diff options
161 files changed, 4776 insertions, 1465 deletions
diff --git a/.ci/scripts/common/post-upload.sh b/.ci/scripts/common/post-upload.sh new file mode 100644 index 000000000..bb4e9d328 --- /dev/null +++ b/.ci/scripts/common/post-upload.sh | |||
| @@ -0,0 +1,15 @@ | |||
| 1 | #!/bin/bash -ex | ||
| 2 | |||
| 3 | # Copy documentation | ||
| 4 | cp license.txt "$REV_NAME" | ||
| 5 | cp README.md "$REV_NAME" | ||
| 6 | |||
| 7 | tar $COMPRESSION_FLAGS "$ARCHIVE_NAME" "$REV_NAME" | ||
| 8 | |||
| 9 | mv "$REV_NAME" $RELEASE_NAME | ||
| 10 | |||
| 11 | 7z a "$REV_NAME.7z" $RELEASE_NAME | ||
| 12 | |||
| 13 | # move the compiled archive into the artifacts directory to be uploaded by travis releases | ||
| 14 | mv "$ARCHIVE_NAME" artifacts/ | ||
| 15 | mv "$REV_NAME.7z" artifacts/ | ||
diff --git a/.ci/scripts/common/pre-upload.sh b/.ci/scripts/common/pre-upload.sh new file mode 100644 index 000000000..3c2fc79a2 --- /dev/null +++ b/.ci/scripts/common/pre-upload.sh | |||
| @@ -0,0 +1,6 @@ | |||
| 1 | #!/bin/bash -ex | ||
| 2 | |||
| 3 | GITDATE="`git show -s --date=short --format='%ad' | sed 's/-//g'`" | ||
| 4 | GITREV="`git show -s --format='%h'`" | ||
| 5 | |||
| 6 | mkdir -p artifacts | ||
diff --git a/.ci/scripts/format/docker.sh b/.ci/scripts/format/docker.sh new file mode 100644 index 000000000..778411e4a --- /dev/null +++ b/.ci/scripts/format/docker.sh | |||
| @@ -0,0 +1,6 @@ | |||
| 1 | #!/bin/bash -ex | ||
| 2 | |||
| 3 | # Run clang-format | ||
| 4 | cd /yuzu | ||
| 5 | chmod a+x ./.ci/scripts/format/script.sh | ||
| 6 | ./.ci/scripts/format/script.sh | ||
diff --git a/.ci/scripts/format/exec.sh b/.ci/scripts/format/exec.sh new file mode 100644 index 000000000..5d6393b38 --- /dev/null +++ b/.ci/scripts/format/exec.sh | |||
| @@ -0,0 +1,4 @@ | |||
| 1 | #!/bin/bash -ex | ||
| 2 | |||
| 3 | chmod a+x ./.ci/scripts/format/docker.sh | ||
| 4 | docker run -v $(pwd):/yuzu yuzuemu/build-environments:linux-clang-format /bin/bash -ex /yuzu/.ci/scripts/format/docker.sh | ||
diff --git a/.ci/scripts/format/script.sh b/.ci/scripts/format/script.sh new file mode 100644 index 000000000..5ab828d5e --- /dev/null +++ b/.ci/scripts/format/script.sh | |||
| @@ -0,0 +1,37 @@ | |||
| 1 | #!/bin/bash -ex | ||
| 2 | |||
| 3 | if grep -nrI '\s$' src *.yml *.txt *.md Doxyfile .gitignore .gitmodules .ci* dist/*.desktop \ | ||
| 4 | dist/*.svg dist/*.xml; then | ||
| 5 | echo Trailing whitespace found, aborting | ||
| 6 | exit 1 | ||
| 7 | fi | ||
| 8 | |||
| 9 | # Default clang-format points to default 3.5 version one | ||
| 10 | CLANG_FORMAT=clang-format-6.0 | ||
| 11 | $CLANG_FORMAT --version | ||
| 12 | |||
| 13 | if [ "$TRAVIS_EVENT_TYPE" = "pull_request" ]; then | ||
| 14 | # Get list of every file modified in this pull request | ||
| 15 | files_to_lint="$(git diff --name-only --diff-filter=ACMRTUXB $TRAVIS_COMMIT_RANGE | grep '^src/[^.]*[.]\(cpp\|h\)$' || true)" | ||
| 16 | else | ||
| 17 | # Check everything for branch pushes | ||
| 18 | files_to_lint="$(find src/ -name '*.cpp' -or -name '*.h')" | ||
| 19 | fi | ||
| 20 | |||
| 21 | # Turn off tracing for this because it's too verbose | ||
| 22 | set +x | ||
| 23 | |||
| 24 | for f in $files_to_lint; do | ||
| 25 | d=$(diff -u "$f" <($CLANG_FORMAT "$f") || true) | ||
| 26 | if ! [ -z "$d" ]; then | ||
| 27 | echo "!!! $f not compliant to coding style, here is the fix:" | ||
| 28 | echo "$d" | ||
| 29 | fail=1 | ||
| 30 | fi | ||
| 31 | done | ||
| 32 | |||
| 33 | set -x | ||
| 34 | |||
| 35 | if [ "$fail" = 1 ]; then | ||
| 36 | exit 1 | ||
| 37 | fi | ||
diff --git a/.ci/scripts/linux/docker.sh b/.ci/scripts/linux/docker.sh new file mode 100644 index 000000000..f538a4081 --- /dev/null +++ b/.ci/scripts/linux/docker.sh | |||
| @@ -0,0 +1,14 @@ | |||
| 1 | #!/bin/bash -ex | ||
| 2 | |||
| 3 | cd /yuzu | ||
| 4 | |||
| 5 | ccache -s | ||
| 6 | |||
| 7 | mkdir build || true && cd build | ||
| 8 | cmake .. -G Ninja -DYUZU_USE_BUNDLED_UNICORN=ON -DYUZU_USE_QT_WEB_ENGINE=ON -DCMAKE_BUILD_TYPE=Release -DCMAKE_C_COMPILER=/usr/lib/ccache/gcc -DCMAKE_CXX_COMPILER=/usr/lib/ccache/g++ -DYUZU_ENABLE_COMPATIBILITY_REPORTING=${ENABLE_COMPATIBILITY_REPORTING:-"OFF"} -DENABLE_COMPATIBILITY_LIST_DOWNLOAD=ON -DUSE_DISCORD_PRESENCE=ON | ||
| 9 | |||
| 10 | ninja | ||
| 11 | |||
| 12 | ccache -s | ||
| 13 | |||
| 14 | ctest -VV -C Release | ||
diff --git a/.ci/scripts/linux/exec.sh b/.ci/scripts/linux/exec.sh new file mode 100644 index 000000000..a5a6c34b9 --- /dev/null +++ b/.ci/scripts/linux/exec.sh | |||
| @@ -0,0 +1,5 @@ | |||
| 1 | #!/bin/bash -ex | ||
| 2 | |||
| 3 | mkdir -p "ccache" || true | ||
| 4 | chmod a+x ./.ci/scripts/linux/docker.sh | ||
| 5 | docker run -e ENABLE_COMPATIBILITY_REPORTING -e CCACHE_DIR=/yuzu/ccache -v $(pwd):/yuzu yuzuemu/build-environments:linux-fresh /bin/bash /yuzu/.ci/scripts/linux/docker.sh | ||
diff --git a/.ci/scripts/linux/upload.sh b/.ci/scripts/linux/upload.sh new file mode 100644 index 000000000..0d131d1dd --- /dev/null +++ b/.ci/scripts/linux/upload.sh | |||
| @@ -0,0 +1,14 @@ | |||
| 1 | #!/bin/bash -ex | ||
| 2 | |||
| 3 | . .ci/scripts/common/pre-upload.sh | ||
| 4 | |||
| 5 | REV_NAME="yuzu-linux-${GITDATE}-${GITREV}" | ||
| 6 | ARCHIVE_NAME="${REV_NAME}.tar.xz" | ||
| 7 | COMPRESSION_FLAGS="-cJvf" | ||
| 8 | |||
| 9 | mkdir "$REV_NAME" | ||
| 10 | |||
| 11 | cp build/bin/yuzu-cmd "$REV_NAME" | ||
| 12 | cp build/bin/yuzu "$REV_NAME" | ||
| 13 | |||
| 14 | . .ci/scripts/common/post-upload.sh | ||
diff --git a/.ci/scripts/merge/apply-patches-by-label.py b/.ci/scripts/merge/apply-patches-by-label.py new file mode 100644 index 000000000..b346001a5 --- /dev/null +++ b/.ci/scripts/merge/apply-patches-by-label.py | |||
| @@ -0,0 +1,28 @@ | |||
| 1 | # Download all pull requests as patches that match a specific label | ||
| 2 | # Usage: python download-patches-by-label.py <Label to Match> <Root Path Folder to DL to> | ||
| 3 | |||
| 4 | import requests, sys, json, urllib3.request, shutil, subprocess | ||
| 5 | |||
| 6 | http = urllib3.PoolManager() | ||
| 7 | dl_list = {} | ||
| 8 | |||
| 9 | def check_individual(labels): | ||
| 10 | for label in labels: | ||
| 11 | if (label["name"] == sys.argv[1]): | ||
| 12 | return True | ||
| 13 | return False | ||
| 14 | |||
| 15 | try: | ||
| 16 | url = 'https://api.github.com/repos/yuzu-emu/yuzu/pulls' | ||
| 17 | response = requests.get(url) | ||
| 18 | if (response.ok): | ||
| 19 | j = json.loads(response.content) | ||
| 20 | for pr in j: | ||
| 21 | if (check_individual(pr["labels"])): | ||
| 22 | pn = pr["number"] | ||
| 23 | print("Matched PR# %s" % pn) | ||
| 24 | print(subprocess.check_output(["git", "fetch", "https://github.com/yuzu-emu/yuzu.git", "pull/%s/head:pr-%s" % (pn, pn), "-f"])) | ||
| 25 | print(subprocess.check_output(["git", "merge", "--squash", "pr-%s" % pn])) | ||
| 26 | print(subprocess.check_output(["git", "commit", "-m\"Merge PR %s\"" % pn])) | ||
| 27 | except: | ||
| 28 | sys.exit(-1) | ||
diff --git a/.ci/scripts/merge/check-label-presence.py b/.ci/scripts/merge/check-label-presence.py new file mode 100644 index 000000000..048466d7e --- /dev/null +++ b/.ci/scripts/merge/check-label-presence.py | |||
| @@ -0,0 +1,18 @@ | |||
| 1 | # Checks to see if the specified pull request # has the specified tag | ||
| 2 | # Usage: python check-label-presence.py <Pull Request ID> <Name of Label> | ||
| 3 | |||
| 4 | import requests, json, sys | ||
| 5 | |||
| 6 | try: | ||
| 7 | url = 'https://api.github.com/repos/yuzu-emu/yuzu/issues/%s' % sys.argv[1] | ||
| 8 | response = requests.get(url) | ||
| 9 | if (response.ok): | ||
| 10 | j = json.loads(response.content) | ||
| 11 | for label in j["labels"]: | ||
| 12 | if label["name"] == sys.argv[2]: | ||
| 13 | print('##vso[task.setvariable variable=enabletesting;]true') | ||
| 14 | sys.exit() | ||
| 15 | except: | ||
| 16 | sys.exit(-1) | ||
| 17 | |||
| 18 | print('##vso[task.setvariable variable=enabletesting;]false') | ||
diff --git a/.ci/scripts/merge/yuzubot-git-config.sh b/.ci/scripts/merge/yuzubot-git-config.sh new file mode 100644 index 000000000..d9d595bbc --- /dev/null +++ b/.ci/scripts/merge/yuzubot-git-config.sh | |||
| @@ -0,0 +1,2 @@ | |||
| 1 | git config --global user.email "yuzu@yuzu-emu.org" | ||
| 2 | git config --global user.name "yuzubot" \ No newline at end of file | ||
diff --git a/.ci/scripts/windows/docker.sh b/.ci/scripts/windows/docker.sh new file mode 100644 index 000000000..f7093363b --- /dev/null +++ b/.ci/scripts/windows/docker.sh | |||
| @@ -0,0 +1,50 @@ | |||
| 1 | #!/bin/bash -ex | ||
| 2 | |||
| 3 | cd /yuzu | ||
| 4 | |||
| 5 | ccache -s | ||
| 6 | |||
| 7 | # Dirty hack to trick unicorn makefile into believing we are in a MINGW system | ||
| 8 | mv /bin/uname /bin/uname1 && echo -e '#!/bin/sh\necho MINGW64' >> /bin/uname | ||
| 9 | chmod +x /bin/uname | ||
| 10 | |||
| 11 | # Dirty hack to trick unicorn makefile into believing we have cmd | ||
| 12 | echo '' >> /bin/cmd | ||
| 13 | chmod +x /bin/cmd | ||
| 14 | |||
| 15 | mkdir build || true && cd build | ||
| 16 | cmake .. -G Ninja -DCMAKE_TOOLCHAIN_FILE="$(pwd)/../CMakeModules/MinGWCross.cmake" -DUSE_CCACHE=ON -DYUZU_USE_BUNDLED_UNICORN=ON -DENABLE_COMPATIBILITY_LIST_DOWNLOAD=ON -DCMAKE_BUILD_TYPE=Release | ||
| 17 | ninja | ||
| 18 | |||
| 19 | # Clean up the dirty hacks | ||
| 20 | rm /bin/uname && mv /bin/uname1 /bin/uname | ||
| 21 | rm /bin/cmd | ||
| 22 | |||
| 23 | ccache -s | ||
| 24 | |||
| 25 | echo "Tests skipped" | ||
| 26 | #ctest -VV -C Release | ||
| 27 | |||
| 28 | echo 'Prepare binaries...' | ||
| 29 | cd .. | ||
| 30 | mkdir package | ||
| 31 | |||
| 32 | QT_PLATFORM_DLL_PATH='/usr/x86_64-w64-mingw32/lib/qt5/plugins/platforms/' | ||
| 33 | find build/ -name "yuzu*.exe" -exec cp {} 'package' \; | ||
| 34 | |||
| 35 | # copy Qt plugins | ||
| 36 | mkdir package/platforms | ||
| 37 | cp "${QT_PLATFORM_DLL_PATH}/qwindows.dll" package/platforms/ | ||
| 38 | cp -rv "${QT_PLATFORM_DLL_PATH}/../mediaservice/" package/ | ||
| 39 | cp -rv "${QT_PLATFORM_DLL_PATH}/../imageformats/" package/ | ||
| 40 | rm -f package/mediaservice/*d.dll | ||
| 41 | |||
| 42 | for i in package/*.exe; do | ||
| 43 | # we need to process pdb here, however, cv2pdb | ||
| 44 | # does not work here, so we just simply strip all the debug symbols | ||
| 45 | x86_64-w64-mingw32-strip "${i}" | ||
| 46 | done | ||
| 47 | |||
| 48 | pip3 install pefile | ||
| 49 | python3 .ci/scripts/windows/scan_dll.py package/*.exe "package/" | ||
| 50 | python3 .ci/scripts/windows/scan_dll.py package/imageformats/*.dll "package/" | ||
diff --git a/.ci/scripts/windows/exec.sh b/.ci/scripts/windows/exec.sh new file mode 100644 index 000000000..d6a994856 --- /dev/null +++ b/.ci/scripts/windows/exec.sh | |||
| @@ -0,0 +1,5 @@ | |||
| 1 | #!/bin/bash -ex | ||
| 2 | |||
| 3 | mkdir -p "ccache" || true | ||
| 4 | chmod a+x ./.ci/scripts/windows/docker.sh | ||
| 5 | docker run -e CCACHE_DIR=/yuzu/ccache -v $(pwd):/yuzu yuzuemu/build-environments:linux-mingw /bin/bash -ex /yuzu/.ci/scripts/windows/docker.sh | ||
diff --git a/.ci/scripts/windows/scan_dll.py b/.ci/scripts/windows/scan_dll.py new file mode 100644 index 000000000..163183f2e --- /dev/null +++ b/.ci/scripts/windows/scan_dll.py | |||
| @@ -0,0 +1,106 @@ | |||
| 1 | import pefile | ||
| 2 | import sys | ||
| 3 | import re | ||
| 4 | import os | ||
| 5 | import queue | ||
| 6 | import shutil | ||
| 7 | |||
| 8 | # constant definitions | ||
| 9 | KNOWN_SYS_DLLS = ['WINMM.DLL', 'MSVCRT.DLL', 'VERSION.DLL', 'MPR.DLL', | ||
| 10 | 'DWMAPI.DLL', 'UXTHEME.DLL', 'DNSAPI.DLL', 'IPHLPAPI.DLL'] | ||
| 11 | # below is for Ubuntu 18.04 with specified PPA enabled, if you are using | ||
| 12 | # other distro or different repositories, change the following accordingly | ||
| 13 | DLL_PATH = [ | ||
| 14 | '/usr/x86_64-w64-mingw32/bin/', | ||
| 15 | '/usr/x86_64-w64-mingw32/lib/', | ||
| 16 | '/usr/lib/gcc/x86_64-w64-mingw32/7.3-posix/' | ||
| 17 | ] | ||
| 18 | |||
| 19 | missing = [] | ||
| 20 | |||
| 21 | |||
| 22 | def parse_imports(file_name): | ||
| 23 | results = [] | ||
| 24 | pe = pefile.PE(file_name, fast_load=True) | ||
| 25 | pe.parse_data_directories() | ||
| 26 | |||
| 27 | for entry in pe.DIRECTORY_ENTRY_IMPORT: | ||
| 28 | current = entry.dll.decode() | ||
| 29 | current_u = current.upper() # b/c Windows is often case insensitive | ||
| 30 | # here we filter out system dlls | ||
| 31 | # dll w/ names like *32.dll are likely to be system dlls | ||
| 32 | if current_u.upper() not in KNOWN_SYS_DLLS and not re.match(string=current_u, pattern=r'.*32\.DLL'): | ||
| 33 | results.append(current) | ||
| 34 | |||
| 35 | return results | ||
| 36 | |||
| 37 | |||
| 38 | def parse_imports_recursive(file_name, path_list=[]): | ||
| 39 | q = queue.Queue() # create a FIFO queue | ||
| 40 | # file_name can be a string or a list for the convience | ||
| 41 | if isinstance(file_name, str): | ||
| 42 | q.put(file_name) | ||
| 43 | elif isinstance(file_name, list): | ||
| 44 | for i in file_name: | ||
| 45 | q.put(i) | ||
| 46 | full_list = [] | ||
| 47 | while q.qsize(): | ||
| 48 | current = q.get_nowait() | ||
| 49 | print('> %s' % current) | ||
| 50 | deps = parse_imports(current) | ||
| 51 | # if this dll does not have any import, ignore it | ||
| 52 | if not deps: | ||
| 53 | continue | ||
| 54 | for dep in deps: | ||
| 55 | # the dependency already included in the list, skip | ||
| 56 | if dep in full_list: | ||
| 57 | continue | ||
| 58 | # find the requested dll in the provided paths | ||
| 59 | full_path = find_dll(dep) | ||
| 60 | if not full_path: | ||
| 61 | missing.append(dep) | ||
| 62 | continue | ||
| 63 | full_list.append(dep) | ||
| 64 | q.put(full_path) | ||
| 65 | path_list.append(full_path) | ||
| 66 | return full_list | ||
| 67 | |||
| 68 | |||
| 69 | def find_dll(name): | ||
| 70 | for path in DLL_PATH: | ||
| 71 | for root, _, files in os.walk(path): | ||
| 72 | for f in files: | ||
| 73 | if name.lower() == f.lower(): | ||
| 74 | return os.path.join(root, f) | ||
| 75 | |||
| 76 | |||
| 77 | def deploy(name, dst, dry_run=False): | ||
| 78 | dlls_path = [] | ||
| 79 | parse_imports_recursive(name, dlls_path) | ||
| 80 | for dll_entry in dlls_path: | ||
| 81 | if not dry_run: | ||
| 82 | shutil.copy(dll_entry, dst) | ||
| 83 | else: | ||
| 84 | print('[Dry-Run] Copy %s to %s' % (dll_entry, dst)) | ||
| 85 | print('Deploy completed.') | ||
| 86 | return dlls_path | ||
| 87 | |||
| 88 | |||
| 89 | def main(): | ||
| 90 | if len(sys.argv) < 3: | ||
| 91 | print('Usage: %s [files to examine ...] [target deploy directory]') | ||
| 92 | return 1 | ||
| 93 | to_deploy = sys.argv[1:-1] | ||
| 94 | tgt_dir = sys.argv[-1] | ||
| 95 | if not os.path.isdir(tgt_dir): | ||
| 96 | print('%s is not a directory.' % tgt_dir) | ||
| 97 | return 1 | ||
| 98 | print('Scanning dependencies...') | ||
| 99 | deploy(to_deploy, tgt_dir) | ||
| 100 | if missing: | ||
| 101 | print('Following DLLs are not found: %s' % ('\n'.join(missing))) | ||
| 102 | return 0 | ||
| 103 | |||
| 104 | |||
| 105 | if __name__ == '__main__': | ||
| 106 | main() | ||
diff --git a/.ci/scripts/windows/upload.sh b/.ci/scripts/windows/upload.sh new file mode 100644 index 000000000..de73d3541 --- /dev/null +++ b/.ci/scripts/windows/upload.sh | |||
| @@ -0,0 +1,13 @@ | |||
| 1 | #!/bin/bash -ex | ||
| 2 | |||
| 3 | . .ci/scripts/common/pre-upload.sh | ||
| 4 | |||
| 5 | REV_NAME="yuzu-windows-mingw-${GITDATE}-${GITREV}" | ||
| 6 | ARCHIVE_NAME="${REV_NAME}.tar.gz" | ||
| 7 | COMPRESSION_FLAGS="-czvf" | ||
| 8 | |||
| 9 | mkdir "$REV_NAME" | ||
| 10 | # get around the permission issues | ||
| 11 | cp -r package/* "$REV_NAME" | ||
| 12 | |||
| 13 | . .ci/scripts/common/post-upload.sh | ||
diff --git a/.ci/templates/build-single.yml b/.ci/templates/build-single.yml new file mode 100644 index 000000000..c411e25d1 --- /dev/null +++ b/.ci/templates/build-single.yml | |||
| @@ -0,0 +1,21 @@ | |||
| 1 | parameters: | ||
| 2 | artifactSource: 'true' | ||
| 3 | |||
| 4 | steps: | ||
| 5 | - task: DockerInstaller@0 | ||
| 6 | displayName: 'Prepare Environment' | ||
| 7 | inputs: | ||
| 8 | dockerVersion: '17.09.0-ce' | ||
| 9 | - task: CacheBeta@0 | ||
| 10 | displayName: 'Cache Build System' | ||
| 11 | inputs: | ||
| 12 | key: yuzu-v1-$(BuildName)-$(BuildSuffix)-$(CacheSuffix) | ||
| 13 | path: $(System.DefaultWorkingDirectory)/ccache | ||
| 14 | cacheHitVar: CACHE_RESTORED | ||
| 15 | - script: chmod a+x ./.ci/scripts/$(ScriptFolder)/exec.sh && ./.ci/scripts/$(ScriptFolder)/exec.sh | ||
| 16 | displayName: 'Build' | ||
| 17 | - script: chmod a+x ./.ci/scripts/$(ScriptFolder)/upload.sh && RELEASE_NAME=$(BuildName) ./.ci/scripts/$(ScriptFolder)/upload.sh | ||
| 18 | displayName: 'Package Artifacts' | ||
| 19 | - publish: artifacts | ||
| 20 | artifact: 'yuzu-$(BuildName)-$(BuildSuffix)' | ||
| 21 | displayName: 'Upload Artifacts' | ||
diff --git a/.ci/templates/build-standard.yml b/.ci/templates/build-standard.yml new file mode 100644 index 000000000..6cd209dbf --- /dev/null +++ b/.ci/templates/build-standard.yml | |||
| @@ -0,0 +1,22 @@ | |||
| 1 | jobs: | ||
| 2 | - job: build | ||
| 3 | displayName: 'standard' | ||
| 4 | pool: | ||
| 5 | vmImage: ubuntu-latest | ||
| 6 | strategy: | ||
| 7 | maxParallel: 10 | ||
| 8 | matrix: | ||
| 9 | windows: | ||
| 10 | BuildSuffix: 'windows-mingw' | ||
| 11 | ScriptFolder: 'windows' | ||
| 12 | linux: | ||
| 13 | BuildSuffix: 'linux' | ||
| 14 | ScriptFolder: 'linux' | ||
| 15 | steps: | ||
| 16 | - template: ./sync-source.yml | ||
| 17 | parameters: | ||
| 18 | artifactSource: $(parameters.artifactSource) | ||
| 19 | needSubmodules: 'true' | ||
| 20 | - template: ./build-single.yml | ||
| 21 | parameters: | ||
| 22 | artifactSource: 'false' \ No newline at end of file | ||
diff --git a/.ci/templates/build-testing.yml b/.ci/templates/build-testing.yml new file mode 100644 index 000000000..cb7736205 --- /dev/null +++ b/.ci/templates/build-testing.yml | |||
| @@ -0,0 +1,32 @@ | |||
| 1 | jobs: | ||
| 2 | - job: build_test | ||
| 3 | displayName: 'testing' | ||
| 4 | pool: | ||
| 5 | vmImage: ubuntu-latest | ||
| 6 | strategy: | ||
| 7 | maxParallel: 5 | ||
| 8 | matrix: | ||
| 9 | windows: | ||
| 10 | BuildSuffix: 'windows-testing' | ||
| 11 | ScriptFolder: 'windows' | ||
| 12 | steps: | ||
| 13 | - script: pip install requests urllib3 | ||
| 14 | displayName: 'Prepare Environment' | ||
| 15 | - task: PythonScript@0 | ||
| 16 | condition: eq(variables['Build.Reason'], 'PullRequest') | ||
| 17 | displayName: 'Determine Testing Status' | ||
| 18 | inputs: | ||
| 19 | scriptSource: 'filePath' | ||
| 20 | scriptPath: '.ci/scripts/merge/check-label-presence.py' | ||
| 21 | arguments: '$(System.PullRequest.PullRequestNumber) create-testing-build' | ||
| 22 | - ${{ if eq(variables.enabletesting, 'true') }}: | ||
| 23 | - template: ./sync-source.yml | ||
| 24 | parameters: | ||
| 25 | artifactSource: $(parameters.artifactSource) | ||
| 26 | needSubmodules: 'true' | ||
| 27 | - template: ./mergebot.yml | ||
| 28 | parameters: | ||
| 29 | matchLabel: 'testing-merge' | ||
| 30 | - template: ./build-single.yml | ||
| 31 | parameters: | ||
| 32 | artifactSource: 'false' | ||
diff --git a/.ci/templates/format-check.yml b/.ci/templates/format-check.yml new file mode 100644 index 000000000..5061f1cb8 --- /dev/null +++ b/.ci/templates/format-check.yml | |||
| @@ -0,0 +1,14 @@ | |||
| 1 | parameters: | ||
| 2 | artifactSource: 'true' | ||
| 3 | |||
| 4 | steps: | ||
| 5 | - template: ./sync-source.yml | ||
| 6 | parameters: | ||
| 7 | artifactSource: $(parameters.artifactSource) | ||
| 8 | needSubmodules: 'false' | ||
| 9 | - task: DockerInstaller@0 | ||
| 10 | displayName: 'Prepare Environment' | ||
| 11 | inputs: | ||
| 12 | dockerVersion: '17.09.0-ce' | ||
| 13 | - script: chmod a+x ./.ci/scripts/format/exec.sh && ./.ci/scripts/format/exec.sh | ||
| 14 | displayName: 'Verify Formatting' | ||
diff --git a/.ci/templates/merge.yml b/.ci/templates/merge.yml new file mode 100644 index 000000000..efc82778a --- /dev/null +++ b/.ci/templates/merge.yml | |||
| @@ -0,0 +1,46 @@ | |||
| 1 | jobs: | ||
| 2 | - job: merge | ||
| 3 | displayName: 'pull requests' | ||
| 4 | steps: | ||
| 5 | - checkout: self | ||
| 6 | submodules: recursive | ||
| 7 | - template: ./mergebot.yml | ||
| 8 | parameters: | ||
| 9 | matchLabel: '$(BuildName)-merge' | ||
| 10 | - task: ArchiveFiles@2 | ||
| 11 | displayName: 'Package Source' | ||
| 12 | inputs: | ||
| 13 | rootFolderOrFile: '$(System.DefaultWorkingDirectory)' | ||
| 14 | includeRootFolder: false | ||
| 15 | archiveType: '7z' | ||
| 16 | archiveFile: '$(Build.ArtifactStagingDirectory)/yuzu-$(BuildName)-source.7z' | ||
| 17 | - task: PublishPipelineArtifact@1 | ||
| 18 | displayName: 'Upload Artifacts' | ||
| 19 | inputs: | ||
| 20 | targetPath: '$(Build.ArtifactStagingDirectory)/yuzu-$(BuildName)-source.7z' | ||
| 21 | artifact: 'yuzu-$(BuildName)-source' | ||
| 22 | replaceExistingArchive: true | ||
| 23 | - job: upload_source | ||
| 24 | displayName: 'upload' | ||
| 25 | dependsOn: merge | ||
| 26 | steps: | ||
| 27 | - template: ./sync-source.yml | ||
| 28 | parameters: | ||
| 29 | artifactSource: 'true' | ||
| 30 | needSubmodules: 'true' | ||
| 31 | - script: chmod a+x $(System.DefaultWorkingDirectory)/.ci/scripts/merge/yuzubot-git-config.sh && $(System.DefaultWorkingDirectory)/.ci/scripts/merge/yuzubot-git-config.sh | ||
| 32 | displayName: 'Apply Git Configuration' | ||
| 33 | - script: git tag -a $(BuildName)-$(Build.BuildId) -m "yuzu $(BuildName) $(Build.BuildNumber) $(Build.DefinitionName)" | ||
| 34 | displayName: 'Tag Source' | ||
| 35 | - script: git remote add other $(GitRepoPushChangesURL) | ||
| 36 | displayName: 'Register Repository' | ||
| 37 | - script: git push --follow-tags --force other HEAD:$(GitPushBranch) | ||
| 38 | displayName: 'Update Code' | ||
| 39 | - script: git rev-list -n 1 $(BuildName)-$(Build.BuildId) > $(Build.ArtifactStagingDirectory)/tag-commit.sha | ||
| 40 | displayName: 'Calculate Release Point' | ||
| 41 | - task: PublishPipelineArtifact@1 | ||
| 42 | displayName: 'Upload Release Point' | ||
| 43 | inputs: | ||
| 44 | targetPath: '$(Build.ArtifactStagingDirectory)/tag-commit.sha' | ||
| 45 | artifact: 'yuzu-$(BuildName)-release-point' | ||
| 46 | replaceExistingArchive: true \ No newline at end of file | ||
diff --git a/.ci/templates/mergebot.yml b/.ci/templates/mergebot.yml new file mode 100644 index 000000000..5211efcc6 --- /dev/null +++ b/.ci/templates/mergebot.yml | |||
| @@ -0,0 +1,15 @@ | |||
| 1 | parameters: | ||
| 2 | matchLabel: 'dummy-merge' | ||
| 3 | |||
| 4 | steps: | ||
| 5 | - script: mkdir $(System.DefaultWorkingDirectory)/patches && pip install requests urllib3 | ||
| 6 | displayName: 'Prepare Environment' | ||
| 7 | - script: chmod a+x $(System.DefaultWorkingDirectory)/.ci/scripts/merge/yuzubot-git-config.sh && $(System.DefaultWorkingDirectory)/.ci/scripts/merge/yuzubot-git-config.sh | ||
| 8 | displayName: 'Apply Git Configuration' | ||
| 9 | - task: PythonScript@0 | ||
| 10 | displayName: 'Discover, Download, and Apply Patches' | ||
| 11 | inputs: | ||
| 12 | scriptSource: 'filePath' | ||
| 13 | scriptPath: '.ci/scripts/merge/apply-patches-by-label.py' | ||
| 14 | arguments: '${{ parameters.matchLabel }} patches' | ||
| 15 | workingDirectory: '$(System.DefaultWorkingDirectory)' | ||
diff --git a/.ci/templates/retrieve-artifact-source.yml b/.ci/templates/retrieve-artifact-source.yml new file mode 100644 index 000000000..47d217e7b --- /dev/null +++ b/.ci/templates/retrieve-artifact-source.yml | |||
| @@ -0,0 +1,16 @@ | |||
| 1 | steps: | ||
| 2 | - checkout: none | ||
| 3 | - task: DownloadPipelineArtifact@2 | ||
| 4 | displayName: 'Download Source' | ||
| 5 | inputs: | ||
| 6 | artifactName: 'yuzu-$(BuildName)-source' | ||
| 7 | buildType: 'current' | ||
| 8 | targetPath: '$(Build.ArtifactStagingDirectory)' | ||
| 9 | - script: rm -rf $(System.DefaultWorkingDirectory) && mkdir $(System.DefaultWorkingDirectory) | ||
| 10 | displayName: 'Clean Working Directory' | ||
| 11 | - task: ExtractFiles@1 | ||
| 12 | displayName: 'Prepare Source' | ||
| 13 | inputs: | ||
| 14 | archiveFilePatterns: '$(Build.ArtifactStagingDirectory)/*.7z' | ||
| 15 | destinationFolder: '$(System.DefaultWorkingDirectory)' | ||
| 16 | cleanDestinationFolder: false \ No newline at end of file | ||
diff --git a/.ci/templates/retrieve-master-source.yml b/.ci/templates/retrieve-master-source.yml new file mode 100644 index 000000000..a08a3f926 --- /dev/null +++ b/.ci/templates/retrieve-master-source.yml | |||
| @@ -0,0 +1,11 @@ | |||
| 1 | parameters: | ||
| 2 | needSubmodules: 'true' | ||
| 3 | |||
| 4 | steps: | ||
| 5 | - checkout: self | ||
| 6 | displayName: 'Checkout Recursive' | ||
| 7 | submodules: recursive | ||
| 8 | # condition: eq(parameters.needSubmodules, 'true') | ||
| 9 | #- checkout: self | ||
| 10 | # displayName: 'Checkout Fast' | ||
| 11 | # condition: ne(parameters.needSubmodules, 'true') | ||
diff --git a/.ci/templates/sync-source.yml b/.ci/templates/sync-source.yml new file mode 100644 index 000000000..409e1cd83 --- /dev/null +++ b/.ci/templates/sync-source.yml | |||
| @@ -0,0 +1,7 @@ | |||
| 1 | steps: | ||
| 2 | - ${{ if eq(parameters.artifactSource, 'true') }}: | ||
| 3 | - template: ./retrieve-artifact-source.yml | ||
| 4 | - ${{ if ne(parameters.artifactSource, 'true') }}: | ||
| 5 | - template: ./retrieve-master-source.yml | ||
| 6 | parameters: | ||
| 7 | needSubmodules: $(parameters.needSubmodules) \ No newline at end of file | ||
diff --git a/.ci/yuzu-mainline.yml b/.ci/yuzu-mainline.yml new file mode 100644 index 000000000..164bcb165 --- /dev/null +++ b/.ci/yuzu-mainline.yml | |||
| @@ -0,0 +1,23 @@ | |||
| 1 | trigger: | ||
| 2 | - master | ||
| 3 | |||
| 4 | stages: | ||
| 5 | - stage: merge | ||
| 6 | displayName: 'merge' | ||
| 7 | jobs: | ||
| 8 | - template: ./templates/merge.yml | ||
| 9 | - stage: format | ||
| 10 | dependsOn: merge | ||
| 11 | displayName: 'format' | ||
| 12 | jobs: | ||
| 13 | - job: format | ||
| 14 | displayName: 'clang' | ||
| 15 | pool: | ||
| 16 | vmImage: ubuntu-latest | ||
| 17 | steps: | ||
| 18 | - template: ./templates/format-check.yml | ||
| 19 | - stage: build | ||
| 20 | displayName: 'build' | ||
| 21 | dependsOn: format | ||
| 22 | jobs: | ||
| 23 | - template: ./templates/build-standard.yml | ||
diff --git a/.ci/yuzu-patreon.yml b/.ci/yuzu-patreon.yml new file mode 100644 index 000000000..aa912913d --- /dev/null +++ b/.ci/yuzu-patreon.yml | |||
| @@ -0,0 +1,19 @@ | |||
| 1 | # Starter pipeline | ||
| 2 | # Start with a minimal pipeline that you can customize to build and deploy your code. | ||
| 3 | # Add steps that build, run tests, deploy, and more: | ||
| 4 | # https://aka.ms/yaml | ||
| 5 | |||
| 6 | trigger: | ||
| 7 | - master | ||
| 8 | |||
| 9 | pool: | ||
| 10 | vmImage: 'ubuntu-latest' | ||
| 11 | |||
| 12 | steps: | ||
| 13 | - script: echo Hello, world! | ||
| 14 | displayName: 'Run a one-line script' | ||
| 15 | |||
| 16 | - script: | | ||
| 17 | echo Add other tasks to build, test, and deploy your project. | ||
| 18 | echo See https://aka.ms/yaml | ||
| 19 | displayName: 'Run a multi-line script' | ||
diff --git a/.ci/yuzu-repo-sync.yml b/.ci/yuzu-repo-sync.yml new file mode 100644 index 000000000..602e298a6 --- /dev/null +++ b/.ci/yuzu-repo-sync.yml | |||
| @@ -0,0 +1,19 @@ | |||
| 1 | trigger: | ||
| 2 | - master | ||
| 3 | |||
| 4 | jobs: | ||
| 5 | - job: copy | ||
| 6 | displayName: 'Sync Repository' | ||
| 7 | pool: | ||
| 8 | vmImage: 'ubuntu-latest' | ||
| 9 | steps: | ||
| 10 | - script: echo 'https://$(GitUsername):$(GitAccessToken)@dev.azure.com' > $HOME/.git-credentials | ||
| 11 | displayName: 'Load Credentials' | ||
| 12 | - script: git config --global credential.helper store | ||
| 13 | displayName: 'Register Credential Helper' | ||
| 14 | - script: git remote add other $(GitRepoPushChangesURL) | ||
| 15 | displayName: 'Register Repository' | ||
| 16 | - script: git push --force other HEAD:$(GitPushBranch) | ||
| 17 | displayName: 'Update Code' | ||
| 18 | - script: rm -rf $HOME/.git-credentials | ||
| 19 | displayName: 'Clear Cached Credentials' | ||
diff --git a/.ci/yuzu-verify.yml b/.ci/yuzu-verify.yml new file mode 100644 index 000000000..d01c1feed --- /dev/null +++ b/.ci/yuzu-verify.yml | |||
| @@ -0,0 +1,18 @@ | |||
| 1 | stages: | ||
| 2 | - stage: format | ||
| 3 | displayName: 'format' | ||
| 4 | jobs: | ||
| 5 | - job: format | ||
| 6 | displayName: 'clang' | ||
| 7 | pool: | ||
| 8 | vmImage: ubuntu-latest | ||
| 9 | steps: | ||
| 10 | - template: ./templates/format-check.yml | ||
| 11 | parameters: | ||
| 12 | artifactSource: 'false' | ||
| 13 | - stage: build | ||
| 14 | displayName: 'build' | ||
| 15 | dependsOn: format | ||
| 16 | jobs: | ||
| 17 | - template: ./templates/build-standard.yml | ||
| 18 | - template: ./templates/build-testing.yml \ No newline at end of file | ||
diff --git a/CMakeModules/GenerateSCMRev.cmake b/CMakeModules/GenerateSCMRev.cmake index dd65cfe42..abdc74428 100644 --- a/CMakeModules/GenerateSCMRev.cmake +++ b/CMakeModules/GenerateSCMRev.cmake | |||
| @@ -82,6 +82,8 @@ set(HASH_FILES | |||
| 82 | "${VIDEO_CORE}/shader/decode/shift.cpp" | 82 | "${VIDEO_CORE}/shader/decode/shift.cpp" |
| 83 | "${VIDEO_CORE}/shader/decode/video.cpp" | 83 | "${VIDEO_CORE}/shader/decode/video.cpp" |
| 84 | "${VIDEO_CORE}/shader/decode/xmad.cpp" | 84 | "${VIDEO_CORE}/shader/decode/xmad.cpp" |
| 85 | "${VIDEO_CORE}/shader/control_flow.cpp" | ||
| 86 | "${VIDEO_CORE}/shader/control_flow.h" | ||
| 85 | "${VIDEO_CORE}/shader/decode.cpp" | 87 | "${VIDEO_CORE}/shader/decode.cpp" |
| 86 | "${VIDEO_CORE}/shader/node.h" | 88 | "${VIDEO_CORE}/shader/node.h" |
| 87 | "${VIDEO_CORE}/shader/node_helper.cpp" | 89 | "${VIDEO_CORE}/shader/node_helper.cpp" |
| @@ -2,6 +2,7 @@ yuzu emulator | |||
| 2 | ============= | 2 | ============= |
| 3 | [](https://travis-ci.org/yuzu-emu/yuzu) | 3 | [](https://travis-ci.org/yuzu-emu/yuzu) |
| 4 | [](https://ci.appveyor.com/project/bunnei/yuzu) | 4 | [](https://ci.appveyor.com/project/bunnei/yuzu) |
| 5 | [](https://dev.azure.com/yuzu-emu/yuzu/) | ||
| 5 | 6 | ||
| 6 | yuzu is an experimental open-source emulator for the Nintendo Switch from the creators of [Citra](https://citra-emu.org/). | 7 | yuzu is an experimental open-source emulator for the Nintendo Switch from the creators of [Citra](https://citra-emu.org/). |
| 7 | 8 | ||
diff --git a/src/audio_core/audio_renderer.cpp b/src/audio_core/audio_renderer.cpp index 9a0939883..da50a0bbc 100644 --- a/src/audio_core/audio_renderer.cpp +++ b/src/audio_core/audio_renderer.cpp | |||
| @@ -73,13 +73,15 @@ private: | |||
| 73 | EffectInStatus info{}; | 73 | EffectInStatus info{}; |
| 74 | }; | 74 | }; |
| 75 | AudioRenderer::AudioRenderer(Core::Timing::CoreTiming& core_timing, AudioRendererParameter params, | 75 | AudioRenderer::AudioRenderer(Core::Timing::CoreTiming& core_timing, AudioRendererParameter params, |
| 76 | Kernel::SharedPtr<Kernel::WritableEvent> buffer_event) | 76 | Kernel::SharedPtr<Kernel::WritableEvent> buffer_event, |
| 77 | std::size_t instance_number) | ||
| 77 | : worker_params{params}, buffer_event{buffer_event}, voices(params.voice_count), | 78 | : worker_params{params}, buffer_event{buffer_event}, voices(params.voice_count), |
| 78 | effects(params.effect_count) { | 79 | effects(params.effect_count) { |
| 79 | 80 | ||
| 80 | audio_out = std::make_unique<AudioCore::AudioOut>(); | 81 | audio_out = std::make_unique<AudioCore::AudioOut>(); |
| 81 | stream = audio_out->OpenStream(core_timing, STREAM_SAMPLE_RATE, STREAM_NUM_CHANNELS, | 82 | stream = audio_out->OpenStream(core_timing, STREAM_SAMPLE_RATE, STREAM_NUM_CHANNELS, |
| 82 | "AudioRenderer", [=]() { buffer_event->Signal(); }); | 83 | fmt::format("AudioRenderer-Instance{}", instance_number), |
| 84 | [=]() { buffer_event->Signal(); }); | ||
| 83 | audio_out->StartStream(stream); | 85 | audio_out->StartStream(stream); |
| 84 | 86 | ||
| 85 | QueueMixedBuffer(0); | 87 | QueueMixedBuffer(0); |
| @@ -217,13 +219,15 @@ std::vector<s16> AudioRenderer::VoiceState::DequeueSamples(std::size_t sample_co | |||
| 217 | if (offset == samples.size()) { | 219 | if (offset == samples.size()) { |
| 218 | offset = 0; | 220 | offset = 0; |
| 219 | 221 | ||
| 220 | if (!wave_buffer.is_looping) { | 222 | if (!wave_buffer.is_looping && wave_buffer.buffer_sz) { |
| 221 | SetWaveIndex(wave_index + 1); | 223 | SetWaveIndex(wave_index + 1); |
| 222 | } | 224 | } |
| 223 | 225 | ||
| 224 | out_status.wave_buffer_consumed++; | 226 | if (wave_buffer.buffer_sz) { |
| 227 | out_status.wave_buffer_consumed++; | ||
| 228 | } | ||
| 225 | 229 | ||
| 226 | if (wave_buffer.end_of_stream) { | 230 | if (wave_buffer.end_of_stream || wave_buffer.buffer_sz == 0) { |
| 227 | info.play_state = PlayState::Paused; | 231 | info.play_state = PlayState::Paused; |
| 228 | } | 232 | } |
| 229 | } | 233 | } |
diff --git a/src/audio_core/audio_renderer.h b/src/audio_core/audio_renderer.h index b2e5d336c..45afbe759 100644 --- a/src/audio_core/audio_renderer.h +++ b/src/audio_core/audio_renderer.h | |||
| @@ -215,7 +215,8 @@ static_assert(sizeof(UpdateDataHeader) == 0x40, "UpdateDataHeader has wrong size | |||
| 215 | class AudioRenderer { | 215 | class AudioRenderer { |
| 216 | public: | 216 | public: |
| 217 | AudioRenderer(Core::Timing::CoreTiming& core_timing, AudioRendererParameter params, | 217 | AudioRenderer(Core::Timing::CoreTiming& core_timing, AudioRendererParameter params, |
| 218 | Kernel::SharedPtr<Kernel::WritableEvent> buffer_event); | 218 | Kernel::SharedPtr<Kernel::WritableEvent> buffer_event, |
| 219 | std::size_t instance_number); | ||
| 219 | ~AudioRenderer(); | 220 | ~AudioRenderer(); |
| 220 | 221 | ||
| 221 | std::vector<u8> UpdateAudioRenderer(const std::vector<u8>& input_params); | 222 | std::vector<u8> UpdateAudioRenderer(const std::vector<u8>& input_params); |
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index 2554add28..2b4266f29 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt | |||
| @@ -56,6 +56,8 @@ add_custom_command(OUTPUT scm_rev.cpp | |||
| 56 | "${VIDEO_CORE}/shader/decode/shift.cpp" | 56 | "${VIDEO_CORE}/shader/decode/shift.cpp" |
| 57 | "${VIDEO_CORE}/shader/decode/video.cpp" | 57 | "${VIDEO_CORE}/shader/decode/video.cpp" |
| 58 | "${VIDEO_CORE}/shader/decode/xmad.cpp" | 58 | "${VIDEO_CORE}/shader/decode/xmad.cpp" |
| 59 | "${VIDEO_CORE}/shader/control_flow.cpp" | ||
| 60 | "${VIDEO_CORE}/shader/control_flow.h" | ||
| 59 | "${VIDEO_CORE}/shader/decode.cpp" | 61 | "${VIDEO_CORE}/shader/decode.cpp" |
| 60 | "${VIDEO_CORE}/shader/node.h" | 62 | "${VIDEO_CORE}/shader/node.h" |
| 61 | "${VIDEO_CORE}/shader/node_helper.cpp" | 63 | "${VIDEO_CORE}/shader/node_helper.cpp" |
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index 12f06a189..5462decee 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt | |||
| @@ -210,6 +210,8 @@ add_library(core STATIC | |||
| 210 | hle/service/aoc/aoc_u.h | 210 | hle/service/aoc/aoc_u.h |
| 211 | hle/service/apm/apm.cpp | 211 | hle/service/apm/apm.cpp |
| 212 | hle/service/apm/apm.h | 212 | hle/service/apm/apm.h |
| 213 | hle/service/apm/controller.cpp | ||
| 214 | hle/service/apm/controller.h | ||
| 213 | hle/service/apm/interface.cpp | 215 | hle/service/apm/interface.cpp |
| 214 | hle/service/apm/interface.h | 216 | hle/service/apm/interface.h |
| 215 | hle/service/audio/audctl.cpp | 217 | hle/service/audio/audctl.cpp |
| @@ -295,6 +297,7 @@ add_library(core STATIC | |||
| 295 | hle/service/hid/irs.h | 297 | hle/service/hid/irs.h |
| 296 | hle/service/hid/xcd.cpp | 298 | hle/service/hid/xcd.cpp |
| 297 | hle/service/hid/xcd.h | 299 | hle/service/hid/xcd.h |
| 300 | hle/service/hid/errors.h | ||
| 298 | hle/service/hid/controllers/controller_base.cpp | 301 | hle/service/hid/controllers/controller_base.cpp |
| 299 | hle/service/hid/controllers/controller_base.h | 302 | hle/service/hid/controllers/controller_base.h |
| 300 | hle/service/hid/controllers/debug_pad.cpp | 303 | hle/service/hid/controllers/debug_pad.cpp |
diff --git a/src/core/arm/arm_interface.h b/src/core/arm/arm_interface.h index c6691a8e1..45e94e625 100644 --- a/src/core/arm/arm_interface.h +++ b/src/core/arm/arm_interface.h | |||
| @@ -44,13 +44,6 @@ public: | |||
| 44 | /// Step CPU by one instruction | 44 | /// Step CPU by one instruction |
| 45 | virtual void Step() = 0; | 45 | virtual void Step() = 0; |
| 46 | 46 | ||
| 47 | /// Maps a backing memory region for the CPU | ||
| 48 | virtual void MapBackingMemory(VAddr address, std::size_t size, u8* memory, | ||
| 49 | Kernel::VMAPermission perms) = 0; | ||
| 50 | |||
| 51 | /// Unmaps a region of memory that was previously mapped using MapBackingMemory | ||
| 52 | virtual void UnmapMemory(VAddr address, std::size_t size) = 0; | ||
| 53 | |||
| 54 | /// Clear all instruction cache | 47 | /// Clear all instruction cache |
| 55 | virtual void ClearInstructionCache() = 0; | 48 | virtual void ClearInstructionCache() = 0; |
| 56 | 49 | ||
diff --git a/src/core/arm/dynarmic/arm_dynarmic.cpp b/src/core/arm/dynarmic/arm_dynarmic.cpp index 44307fa19..f1506b372 100644 --- a/src/core/arm/dynarmic/arm_dynarmic.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic.cpp | |||
| @@ -177,15 +177,6 @@ ARM_Dynarmic::ARM_Dynarmic(System& system, ExclusiveMonitor& exclusive_monitor, | |||
| 177 | 177 | ||
| 178 | ARM_Dynarmic::~ARM_Dynarmic() = default; | 178 | ARM_Dynarmic::~ARM_Dynarmic() = default; |
| 179 | 179 | ||
| 180 | void ARM_Dynarmic::MapBackingMemory(u64 address, std::size_t size, u8* memory, | ||
| 181 | Kernel::VMAPermission perms) { | ||
| 182 | inner_unicorn.MapBackingMemory(address, size, memory, perms); | ||
| 183 | } | ||
| 184 | |||
| 185 | void ARM_Dynarmic::UnmapMemory(u64 address, std::size_t size) { | ||
| 186 | inner_unicorn.UnmapMemory(address, size); | ||
| 187 | } | ||
| 188 | |||
| 189 | void ARM_Dynarmic::SetPC(u64 pc) { | 180 | void ARM_Dynarmic::SetPC(u64 pc) { |
| 190 | jit->SetPC(pc); | 181 | jit->SetPC(pc); |
| 191 | } | 182 | } |
diff --git a/src/core/arm/dynarmic/arm_dynarmic.h b/src/core/arm/dynarmic/arm_dynarmic.h index b701e97a3..504d46c68 100644 --- a/src/core/arm/dynarmic/arm_dynarmic.h +++ b/src/core/arm/dynarmic/arm_dynarmic.h | |||
| @@ -23,9 +23,6 @@ public: | |||
| 23 | ARM_Dynarmic(System& system, ExclusiveMonitor& exclusive_monitor, std::size_t core_index); | 23 | ARM_Dynarmic(System& system, ExclusiveMonitor& exclusive_monitor, std::size_t core_index); |
| 24 | ~ARM_Dynarmic() override; | 24 | ~ARM_Dynarmic() override; |
| 25 | 25 | ||
| 26 | void MapBackingMemory(VAddr address, std::size_t size, u8* memory, | ||
| 27 | Kernel::VMAPermission perms) override; | ||
| 28 | void UnmapMemory(u64 address, std::size_t size) override; | ||
| 29 | void SetPC(u64 pc) override; | 26 | void SetPC(u64 pc) override; |
| 30 | u64 GetPC() const override; | 27 | u64 GetPC() const override; |
| 31 | u64 GetReg(int index) const override; | 28 | u64 GetReg(int index) const override; |
diff --git a/src/core/arm/unicorn/arm_unicorn.cpp b/src/core/arm/unicorn/arm_unicorn.cpp index 4e07fe8b5..97d5c2a8a 100644 --- a/src/core/arm/unicorn/arm_unicorn.cpp +++ b/src/core/arm/unicorn/arm_unicorn.cpp | |||
| @@ -50,11 +50,14 @@ static void CodeHook(uc_engine* uc, uint64_t address, uint32_t size, void* user_ | |||
| 50 | 50 | ||
| 51 | static bool UnmappedMemoryHook(uc_engine* uc, uc_mem_type type, u64 addr, int size, u64 value, | 51 | static bool UnmappedMemoryHook(uc_engine* uc, uc_mem_type type, u64 addr, int size, u64 value, |
| 52 | void* user_data) { | 52 | void* user_data) { |
| 53 | auto* const system = static_cast<System*>(user_data); | ||
| 54 | |||
| 53 | ARM_Interface::ThreadContext ctx{}; | 55 | ARM_Interface::ThreadContext ctx{}; |
| 54 | Core::CurrentArmInterface().SaveContext(ctx); | 56 | system->CurrentArmInterface().SaveContext(ctx); |
| 55 | ASSERT_MSG(false, "Attempted to read from unmapped memory: 0x{:X}, pc=0x{:X}, lr=0x{:X}", addr, | 57 | ASSERT_MSG(false, "Attempted to read from unmapped memory: 0x{:X}, pc=0x{:X}, lr=0x{:X}", addr, |
| 56 | ctx.pc, ctx.cpu_registers[30]); | 58 | ctx.pc, ctx.cpu_registers[30]); |
| 57 | return {}; | 59 | |
| 60 | return false; | ||
| 58 | } | 61 | } |
| 59 | 62 | ||
| 60 | ARM_Unicorn::ARM_Unicorn(System& system) : system{system} { | 63 | ARM_Unicorn::ARM_Unicorn(System& system) : system{system} { |
| @@ -65,7 +68,7 @@ ARM_Unicorn::ARM_Unicorn(System& system) : system{system} { | |||
| 65 | 68 | ||
| 66 | uc_hook hook{}; | 69 | uc_hook hook{}; |
| 67 | CHECKED(uc_hook_add(uc, &hook, UC_HOOK_INTR, (void*)InterruptHook, this, 0, -1)); | 70 | CHECKED(uc_hook_add(uc, &hook, UC_HOOK_INTR, (void*)InterruptHook, this, 0, -1)); |
| 68 | CHECKED(uc_hook_add(uc, &hook, UC_HOOK_MEM_INVALID, (void*)UnmappedMemoryHook, this, 0, -1)); | 71 | CHECKED(uc_hook_add(uc, &hook, UC_HOOK_MEM_INVALID, (void*)UnmappedMemoryHook, &system, 0, -1)); |
| 69 | if (GDBStub::IsServerEnabled()) { | 72 | if (GDBStub::IsServerEnabled()) { |
| 70 | CHECKED(uc_hook_add(uc, &hook, UC_HOOK_CODE, (void*)CodeHook, this, 0, -1)); | 73 | CHECKED(uc_hook_add(uc, &hook, UC_HOOK_CODE, (void*)CodeHook, this, 0, -1)); |
| 71 | last_bkpt_hit = false; | 74 | last_bkpt_hit = false; |
| @@ -76,15 +79,6 @@ ARM_Unicorn::~ARM_Unicorn() { | |||
| 76 | CHECKED(uc_close(uc)); | 79 | CHECKED(uc_close(uc)); |
| 77 | } | 80 | } |
| 78 | 81 | ||
| 79 | void ARM_Unicorn::MapBackingMemory(VAddr address, std::size_t size, u8* memory, | ||
| 80 | Kernel::VMAPermission perms) { | ||
| 81 | CHECKED(uc_mem_map_ptr(uc, address, size, static_cast<u32>(perms), memory)); | ||
| 82 | } | ||
| 83 | |||
| 84 | void ARM_Unicorn::UnmapMemory(VAddr address, std::size_t size) { | ||
| 85 | CHECKED(uc_mem_unmap(uc, address, size)); | ||
| 86 | } | ||
| 87 | |||
| 88 | void ARM_Unicorn::SetPC(u64 pc) { | 82 | void ARM_Unicorn::SetPC(u64 pc) { |
| 89 | CHECKED(uc_reg_write(uc, UC_ARM64_REG_PC, &pc)); | 83 | CHECKED(uc_reg_write(uc, UC_ARM64_REG_PC, &pc)); |
| 90 | } | 84 | } |
diff --git a/src/core/arm/unicorn/arm_unicorn.h b/src/core/arm/unicorn/arm_unicorn.h index 34e974b4d..fe2ffd70c 100644 --- a/src/core/arm/unicorn/arm_unicorn.h +++ b/src/core/arm/unicorn/arm_unicorn.h | |||
| @@ -18,9 +18,6 @@ public: | |||
| 18 | explicit ARM_Unicorn(System& system); | 18 | explicit ARM_Unicorn(System& system); |
| 19 | ~ARM_Unicorn() override; | 19 | ~ARM_Unicorn() override; |
| 20 | 20 | ||
| 21 | void MapBackingMemory(VAddr address, std::size_t size, u8* memory, | ||
| 22 | Kernel::VMAPermission perms) override; | ||
| 23 | void UnmapMemory(VAddr address, std::size_t size) override; | ||
| 24 | void SetPC(u64 pc) override; | 21 | void SetPC(u64 pc) override; |
| 25 | u64 GetPC() const override; | 22 | u64 GetPC() const override; |
| 26 | u64 GetReg(int index) const override; | 23 | u64 GetReg(int index) const override; |
diff --git a/src/core/core.cpp b/src/core/core.cpp index d7f43f5ec..20d64f3b0 100644 --- a/src/core/core.cpp +++ b/src/core/core.cpp | |||
| @@ -26,6 +26,7 @@ | |||
| 26 | #include "core/hle/kernel/scheduler.h" | 26 | #include "core/hle/kernel/scheduler.h" |
| 27 | #include "core/hle/kernel/thread.h" | 27 | #include "core/hle/kernel/thread.h" |
| 28 | #include "core/hle/service/am/applets/applets.h" | 28 | #include "core/hle/service/am/applets/applets.h" |
| 29 | #include "core/hle/service/apm/controller.h" | ||
| 29 | #include "core/hle/service/glue/manager.h" | 30 | #include "core/hle/service/glue/manager.h" |
| 30 | #include "core/hle/service/service.h" | 31 | #include "core/hle/service/service.h" |
| 31 | #include "core/hle/service/sm/sm.h" | 32 | #include "core/hle/service/sm/sm.h" |
| @@ -144,7 +145,7 @@ struct System::Impl { | |||
| 144 | telemetry_session = std::make_unique<Core::TelemetrySession>(); | 145 | telemetry_session = std::make_unique<Core::TelemetrySession>(); |
| 145 | service_manager = std::make_shared<Service::SM::ServiceManager>(); | 146 | service_manager = std::make_shared<Service::SM::ServiceManager>(); |
| 146 | 147 | ||
| 147 | Service::Init(service_manager, system, *virtual_filesystem); | 148 | Service::Init(service_manager, system); |
| 148 | GDBStub::Init(); | 149 | GDBStub::Init(); |
| 149 | 150 | ||
| 150 | renderer = VideoCore::CreateRenderer(emu_window, system); | 151 | renderer = VideoCore::CreateRenderer(emu_window, system); |
| @@ -308,6 +309,9 @@ struct System::Impl { | |||
| 308 | /// Frontend applets | 309 | /// Frontend applets |
| 309 | Service::AM::Applets::AppletManager applet_manager; | 310 | Service::AM::Applets::AppletManager applet_manager; |
| 310 | 311 | ||
| 312 | /// APM (Performance) services | ||
| 313 | Service::APM::Controller apm_controller{core_timing}; | ||
| 314 | |||
| 311 | /// Glue services | 315 | /// Glue services |
| 312 | Service::Glue::ARPManager arp_manager; | 316 | Service::Glue::ARPManager arp_manager; |
| 313 | 317 | ||
| @@ -578,6 +582,14 @@ const Service::Glue::ARPManager& System::GetARPManager() const { | |||
| 578 | return impl->arp_manager; | 582 | return impl->arp_manager; |
| 579 | } | 583 | } |
| 580 | 584 | ||
| 585 | Service::APM::Controller& System::GetAPMController() { | ||
| 586 | return impl->apm_controller; | ||
| 587 | } | ||
| 588 | |||
| 589 | const Service::APM::Controller& System::GetAPMController() const { | ||
| 590 | return impl->apm_controller; | ||
| 591 | } | ||
| 592 | |||
| 581 | System::ResultStatus System::Init(Frontend::EmuWindow& emu_window) { | 593 | System::ResultStatus System::Init(Frontend::EmuWindow& emu_window) { |
| 582 | return impl->Init(*this, emu_window); | 594 | return impl->Init(*this, emu_window); |
| 583 | } | 595 | } |
diff --git a/src/core/core.h b/src/core/core.h index 53e6fdb7b..0138d93b0 100644 --- a/src/core/core.h +++ b/src/core/core.h | |||
| @@ -43,6 +43,10 @@ struct AppletFrontendSet; | |||
| 43 | class AppletManager; | 43 | class AppletManager; |
| 44 | } // namespace AM::Applets | 44 | } // namespace AM::Applets |
| 45 | 45 | ||
| 46 | namespace APM { | ||
| 47 | class Controller; | ||
| 48 | } | ||
| 49 | |||
| 46 | namespace Glue { | 50 | namespace Glue { |
| 47 | class ARPManager; | 51 | class ARPManager; |
| 48 | } | 52 | } |
| @@ -306,6 +310,10 @@ public: | |||
| 306 | 310 | ||
| 307 | const Service::Glue::ARPManager& GetARPManager() const; | 311 | const Service::Glue::ARPManager& GetARPManager() const; |
| 308 | 312 | ||
| 313 | Service::APM::Controller& GetAPMController(); | ||
| 314 | |||
| 315 | const Service::APM::Controller& GetAPMController() const; | ||
| 316 | |||
| 309 | private: | 317 | private: |
| 310 | System(); | 318 | System(); |
| 311 | 319 | ||
| @@ -329,10 +337,6 @@ private: | |||
| 329 | static System s_instance; | 337 | static System s_instance; |
| 330 | }; | 338 | }; |
| 331 | 339 | ||
| 332 | inline ARM_Interface& CurrentArmInterface() { | ||
| 333 | return System::GetInstance().CurrentArmInterface(); | ||
| 334 | } | ||
| 335 | |||
| 336 | inline Kernel::Process* CurrentProcess() { | 340 | inline Kernel::Process* CurrentProcess() { |
| 337 | return System::GetInstance().CurrentProcess(); | 341 | return System::GetInstance().CurrentProcess(); |
| 338 | } | 342 | } |
diff --git a/src/core/core_cpu.cpp b/src/core/core_cpu.cpp index 99b7d387d..21c410e34 100644 --- a/src/core/core_cpu.cpp +++ b/src/core/core_cpu.cpp | |||
| @@ -53,16 +53,12 @@ bool CpuBarrier::Rendezvous() { | |||
| 53 | Cpu::Cpu(System& system, ExclusiveMonitor& exclusive_monitor, CpuBarrier& cpu_barrier, | 53 | Cpu::Cpu(System& system, ExclusiveMonitor& exclusive_monitor, CpuBarrier& cpu_barrier, |
| 54 | std::size_t core_index) | 54 | std::size_t core_index) |
| 55 | : cpu_barrier{cpu_barrier}, core_timing{system.CoreTiming()}, core_index{core_index} { | 55 | : cpu_barrier{cpu_barrier}, core_timing{system.CoreTiming()}, core_index{core_index} { |
| 56 | if (Settings::values.cpu_jit_enabled) { | ||
| 57 | #ifdef ARCHITECTURE_x86_64 | 56 | #ifdef ARCHITECTURE_x86_64 |
| 58 | arm_interface = std::make_unique<ARM_Dynarmic>(system, exclusive_monitor, core_index); | 57 | arm_interface = std::make_unique<ARM_Dynarmic>(system, exclusive_monitor, core_index); |
| 59 | #else | 58 | #else |
| 60 | arm_interface = std::make_unique<ARM_Unicorn>(system); | 59 | arm_interface = std::make_unique<ARM_Unicorn>(system); |
| 61 | LOG_WARNING(Core, "CPU JIT requested, but Dynarmic not available"); | 60 | LOG_WARNING(Core, "CPU JIT requested, but Dynarmic not available"); |
| 62 | #endif | 61 | #endif |
| 63 | } else { | ||
| 64 | arm_interface = std::make_unique<ARM_Unicorn>(system); | ||
| 65 | } | ||
| 66 | 62 | ||
| 67 | scheduler = std::make_unique<Kernel::Scheduler>(system, *arm_interface); | 63 | scheduler = std::make_unique<Kernel::Scheduler>(system, *arm_interface); |
| 68 | } | 64 | } |
| @@ -70,15 +66,12 @@ Cpu::Cpu(System& system, ExclusiveMonitor& exclusive_monitor, CpuBarrier& cpu_ba | |||
| 70 | Cpu::~Cpu() = default; | 66 | Cpu::~Cpu() = default; |
| 71 | 67 | ||
| 72 | std::unique_ptr<ExclusiveMonitor> Cpu::MakeExclusiveMonitor(std::size_t num_cores) { | 68 | std::unique_ptr<ExclusiveMonitor> Cpu::MakeExclusiveMonitor(std::size_t num_cores) { |
| 73 | if (Settings::values.cpu_jit_enabled) { | ||
| 74 | #ifdef ARCHITECTURE_x86_64 | 69 | #ifdef ARCHITECTURE_x86_64 |
| 75 | return std::make_unique<DynarmicExclusiveMonitor>(num_cores); | 70 | return std::make_unique<DynarmicExclusiveMonitor>(num_cores); |
| 76 | #else | 71 | #else |
| 77 | return nullptr; // TODO(merry): Passthrough exclusive monitor | 72 | // TODO(merry): Passthrough exclusive monitor |
| 73 | return nullptr; | ||
| 78 | #endif | 74 | #endif |
| 79 | } else { | ||
| 80 | return nullptr; // TODO(merry): Passthrough exclusive monitor | ||
| 81 | } | ||
| 82 | } | 75 | } |
| 83 | 76 | ||
| 84 | void Cpu::RunLoop(bool tight_loop) { | 77 | void Cpu::RunLoop(bool tight_loop) { |
diff --git a/src/core/file_sys/program_metadata.cpp b/src/core/file_sys/program_metadata.cpp index eb76174c5..7310b3602 100644 --- a/src/core/file_sys/program_metadata.cpp +++ b/src/core/file_sys/program_metadata.cpp | |||
| @@ -94,6 +94,10 @@ u64 ProgramMetadata::GetFilesystemPermissions() const { | |||
| 94 | return aci_file_access.permissions; | 94 | return aci_file_access.permissions; |
| 95 | } | 95 | } |
| 96 | 96 | ||
| 97 | u32 ProgramMetadata::GetSystemResourceSize() const { | ||
| 98 | return npdm_header.system_resource_size; | ||
| 99 | } | ||
| 100 | |||
| 97 | const ProgramMetadata::KernelCapabilityDescriptors& ProgramMetadata::GetKernelCapabilities() const { | 101 | const ProgramMetadata::KernelCapabilityDescriptors& ProgramMetadata::GetKernelCapabilities() const { |
| 98 | return aci_kernel_capabilities; | 102 | return aci_kernel_capabilities; |
| 99 | } | 103 | } |
diff --git a/src/core/file_sys/program_metadata.h b/src/core/file_sys/program_metadata.h index 43bf2820a..88ec97d85 100644 --- a/src/core/file_sys/program_metadata.h +++ b/src/core/file_sys/program_metadata.h | |||
| @@ -58,6 +58,7 @@ public: | |||
| 58 | u32 GetMainThreadStackSize() const; | 58 | u32 GetMainThreadStackSize() const; |
| 59 | u64 GetTitleID() const; | 59 | u64 GetTitleID() const; |
| 60 | u64 GetFilesystemPermissions() const; | 60 | u64 GetFilesystemPermissions() const; |
| 61 | u32 GetSystemResourceSize() const; | ||
| 61 | const KernelCapabilityDescriptors& GetKernelCapabilities() const; | 62 | const KernelCapabilityDescriptors& GetKernelCapabilities() const; |
| 62 | 63 | ||
| 63 | void Print() const; | 64 | void Print() const; |
| @@ -76,7 +77,8 @@ private: | |||
| 76 | u8 reserved_3; | 77 | u8 reserved_3; |
| 77 | u8 main_thread_priority; | 78 | u8 main_thread_priority; |
| 78 | u8 main_thread_cpu; | 79 | u8 main_thread_cpu; |
| 79 | std::array<u8, 8> reserved_4; | 80 | std::array<u8, 4> reserved_4; |
| 81 | u32_le system_resource_size; | ||
| 80 | u32_le process_category; | 82 | u32_le process_category; |
| 81 | u32_le main_stack_size; | 83 | u32_le main_stack_size; |
| 82 | std::array<u8, 0x10> application_name; | 84 | std::array<u8, 0x10> application_name; |
diff --git a/src/core/hle/kernel/process.cpp b/src/core/hle/kernel/process.cpp index f45ef05f6..92169a97b 100644 --- a/src/core/hle/kernel/process.cpp +++ b/src/core/hle/kernel/process.cpp | |||
| @@ -129,20 +129,17 @@ u64 Process::GetTotalPhysicalMemoryAvailable() const { | |||
| 129 | return vm_manager.GetTotalPhysicalMemoryAvailable(); | 129 | return vm_manager.GetTotalPhysicalMemoryAvailable(); |
| 130 | } | 130 | } |
| 131 | 131 | ||
| 132 | u64 Process::GetTotalPhysicalMemoryAvailableWithoutMmHeap() const { | 132 | u64 Process::GetTotalPhysicalMemoryAvailableWithoutSystemResource() const { |
| 133 | // TODO: Subtract the personal heap size from this when the | 133 | return GetTotalPhysicalMemoryAvailable() - GetSystemResourceSize(); |
| 134 | // personal heap is implemented. | ||
| 135 | return GetTotalPhysicalMemoryAvailable(); | ||
| 136 | } | 134 | } |
| 137 | 135 | ||
| 138 | u64 Process::GetTotalPhysicalMemoryUsed() const { | 136 | u64 Process::GetTotalPhysicalMemoryUsed() const { |
| 139 | return vm_manager.GetCurrentHeapSize() + main_thread_stack_size + code_memory_size; | 137 | return vm_manager.GetCurrentHeapSize() + main_thread_stack_size + code_memory_size + |
| 138 | GetSystemResourceUsage(); | ||
| 140 | } | 139 | } |
| 141 | 140 | ||
| 142 | u64 Process::GetTotalPhysicalMemoryUsedWithoutMmHeap() const { | 141 | u64 Process::GetTotalPhysicalMemoryUsedWithoutSystemResource() const { |
| 143 | // TODO: Subtract the personal heap size from this when the | 142 | return GetTotalPhysicalMemoryUsed() - GetSystemResourceUsage(); |
| 144 | // personal heap is implemented. | ||
| 145 | return GetTotalPhysicalMemoryUsed(); | ||
| 146 | } | 143 | } |
| 147 | 144 | ||
| 148 | void Process::RegisterThread(const Thread* thread) { | 145 | void Process::RegisterThread(const Thread* thread) { |
| @@ -172,6 +169,7 @@ ResultCode Process::LoadFromMetadata(const FileSys::ProgramMetadata& metadata) { | |||
| 172 | program_id = metadata.GetTitleID(); | 169 | program_id = metadata.GetTitleID(); |
| 173 | ideal_core = metadata.GetMainThreadCore(); | 170 | ideal_core = metadata.GetMainThreadCore(); |
| 174 | is_64bit_process = metadata.Is64BitProgram(); | 171 | is_64bit_process = metadata.Is64BitProgram(); |
| 172 | system_resource_size = metadata.GetSystemResourceSize(); | ||
| 175 | 173 | ||
| 176 | vm_manager.Reset(metadata.GetAddressSpaceType()); | 174 | vm_manager.Reset(metadata.GetAddressSpaceType()); |
| 177 | 175 | ||
| @@ -186,19 +184,11 @@ ResultCode Process::LoadFromMetadata(const FileSys::ProgramMetadata& metadata) { | |||
| 186 | } | 184 | } |
| 187 | 185 | ||
| 188 | void Process::Run(s32 main_thread_priority, u64 stack_size) { | 186 | void Process::Run(s32 main_thread_priority, u64 stack_size) { |
| 189 | // The kernel always ensures that the given stack size is page aligned. | 187 | AllocateMainThreadStack(stack_size); |
| 190 | main_thread_stack_size = Common::AlignUp(stack_size, Memory::PAGE_SIZE); | 188 | tls_region_address = CreateTLSRegion(); |
| 191 | |||
| 192 | // Allocate and map the main thread stack | ||
| 193 | // TODO(bunnei): This is heap area that should be allocated by the kernel and not mapped as part | ||
| 194 | // of the user address space. | ||
| 195 | const VAddr mapping_address = vm_manager.GetTLSIORegionEndAddress() - main_thread_stack_size; | ||
| 196 | vm_manager | ||
| 197 | .MapMemoryBlock(mapping_address, std::make_shared<std::vector<u8>>(main_thread_stack_size), | ||
| 198 | 0, main_thread_stack_size, MemoryState::Stack) | ||
| 199 | .Unwrap(); | ||
| 200 | 189 | ||
| 201 | vm_manager.LogLayout(); | 190 | vm_manager.LogLayout(); |
| 191 | |||
| 202 | ChangeStatus(ProcessStatus::Running); | 192 | ChangeStatus(ProcessStatus::Running); |
| 203 | 193 | ||
| 204 | SetupMainThread(*this, kernel, main_thread_priority); | 194 | SetupMainThread(*this, kernel, main_thread_priority); |
| @@ -228,6 +218,9 @@ void Process::PrepareForTermination() { | |||
| 228 | stop_threads(system.Scheduler(2).GetThreadList()); | 218 | stop_threads(system.Scheduler(2).GetThreadList()); |
| 229 | stop_threads(system.Scheduler(3).GetThreadList()); | 219 | stop_threads(system.Scheduler(3).GetThreadList()); |
| 230 | 220 | ||
| 221 | FreeTLSRegion(tls_region_address); | ||
| 222 | tls_region_address = 0; | ||
| 223 | |||
| 231 | ChangeStatus(ProcessStatus::Exited); | 224 | ChangeStatus(ProcessStatus::Exited); |
| 232 | } | 225 | } |
| 233 | 226 | ||
| @@ -327,4 +320,16 @@ void Process::ChangeStatus(ProcessStatus new_status) { | |||
| 327 | WakeupAllWaitingThreads(); | 320 | WakeupAllWaitingThreads(); |
| 328 | } | 321 | } |
| 329 | 322 | ||
| 323 | void Process::AllocateMainThreadStack(u64 stack_size) { | ||
| 324 | // The kernel always ensures that the given stack size is page aligned. | ||
| 325 | main_thread_stack_size = Common::AlignUp(stack_size, Memory::PAGE_SIZE); | ||
| 326 | |||
| 327 | // Allocate and map the main thread stack | ||
| 328 | const VAddr mapping_address = vm_manager.GetTLSIORegionEndAddress() - main_thread_stack_size; | ||
| 329 | vm_manager | ||
| 330 | .MapMemoryBlock(mapping_address, std::make_shared<std::vector<u8>>(main_thread_stack_size), | ||
| 331 | 0, main_thread_stack_size, MemoryState::Stack) | ||
| 332 | .Unwrap(); | ||
| 333 | } | ||
| 334 | |||
| 330 | } // namespace Kernel | 335 | } // namespace Kernel |
diff --git a/src/core/hle/kernel/process.h b/src/core/hle/kernel/process.h index 83ea02bee..c2df451f3 100644 --- a/src/core/hle/kernel/process.h +++ b/src/core/hle/kernel/process.h | |||
| @@ -135,6 +135,11 @@ public: | |||
| 135 | return mutex; | 135 | return mutex; |
| 136 | } | 136 | } |
| 137 | 137 | ||
| 138 | /// Gets the address to the process' dedicated TLS region. | ||
| 139 | VAddr GetTLSRegionAddress() const { | ||
| 140 | return tls_region_address; | ||
| 141 | } | ||
| 142 | |||
| 138 | /// Gets the current status of the process | 143 | /// Gets the current status of the process |
| 139 | ProcessStatus GetStatus() const { | 144 | ProcessStatus GetStatus() const { |
| 140 | return status; | 145 | return status; |
| @@ -168,8 +173,24 @@ public: | |||
| 168 | return capabilities.GetPriorityMask(); | 173 | return capabilities.GetPriorityMask(); |
| 169 | } | 174 | } |
| 170 | 175 | ||
| 171 | u32 IsVirtualMemoryEnabled() const { | 176 | /// Gets the amount of secure memory to allocate for memory management. |
| 172 | return is_virtual_address_memory_enabled; | 177 | u32 GetSystemResourceSize() const { |
| 178 | return system_resource_size; | ||
| 179 | } | ||
| 180 | |||
| 181 | /// Gets the amount of secure memory currently in use for memory management. | ||
| 182 | u32 GetSystemResourceUsage() const { | ||
| 183 | // On hardware, this returns the amount of system resource memory that has | ||
| 184 | // been used by the kernel. This is problematic for Yuzu to emulate, because | ||
| 185 | // system resource memory is used for page tables -- and yuzu doesn't really | ||
| 186 | // have a way to calculate how much memory is required for page tables for | ||
| 187 | // the current process at any given time. | ||
| 188 | // TODO: Is this even worth implementing? Games may retrieve this value via | ||
| 189 | // an SDK function that gets used + available system resource size for debug | ||
| 190 | // or diagnostic purposes. However, it seems unlikely that a game would make | ||
| 191 | // decisions based on how much system memory is dedicated to its page tables. | ||
| 192 | // Is returning a value other than zero wise? | ||
| 193 | return 0; | ||
| 173 | } | 194 | } |
| 174 | 195 | ||
| 175 | /// Whether this process is an AArch64 or AArch32 process. | 196 | /// Whether this process is an AArch64 or AArch32 process. |
| @@ -196,15 +217,15 @@ public: | |||
| 196 | u64 GetTotalPhysicalMemoryAvailable() const; | 217 | u64 GetTotalPhysicalMemoryAvailable() const; |
| 197 | 218 | ||
| 198 | /// Retrieves the total physical memory available to this process in bytes, | 219 | /// Retrieves the total physical memory available to this process in bytes, |
| 199 | /// without the size of the personal heap added to it. | 220 | /// without the size of the personal system resource heap added to it. |
| 200 | u64 GetTotalPhysicalMemoryAvailableWithoutMmHeap() const; | 221 | u64 GetTotalPhysicalMemoryAvailableWithoutSystemResource() const; |
| 201 | 222 | ||
| 202 | /// Retrieves the total physical memory used by this process in bytes. | 223 | /// Retrieves the total physical memory used by this process in bytes. |
| 203 | u64 GetTotalPhysicalMemoryUsed() const; | 224 | u64 GetTotalPhysicalMemoryUsed() const; |
| 204 | 225 | ||
| 205 | /// Retrieves the total physical memory used by this process in bytes, | 226 | /// Retrieves the total physical memory used by this process in bytes, |
| 206 | /// without the size of the personal heap added to it. | 227 | /// without the size of the personal system resource heap added to it. |
| 207 | u64 GetTotalPhysicalMemoryUsedWithoutMmHeap() const; | 228 | u64 GetTotalPhysicalMemoryUsedWithoutSystemResource() const; |
| 208 | 229 | ||
| 209 | /// Gets the list of all threads created with this process as their owner. | 230 | /// Gets the list of all threads created with this process as their owner. |
| 210 | const std::list<const Thread*>& GetThreadList() const { | 231 | const std::list<const Thread*>& GetThreadList() const { |
| @@ -280,6 +301,9 @@ private: | |||
| 280 | /// a process signal. | 301 | /// a process signal. |
| 281 | void ChangeStatus(ProcessStatus new_status); | 302 | void ChangeStatus(ProcessStatus new_status); |
| 282 | 303 | ||
| 304 | /// Allocates the main thread stack for the process, given the stack size in bytes. | ||
| 305 | void AllocateMainThreadStack(u64 stack_size); | ||
| 306 | |||
| 283 | /// Memory manager for this process. | 307 | /// Memory manager for this process. |
| 284 | Kernel::VMManager vm_manager; | 308 | Kernel::VMManager vm_manager; |
| 285 | 309 | ||
| @@ -298,12 +322,16 @@ private: | |||
| 298 | /// Title ID corresponding to the process | 322 | /// Title ID corresponding to the process |
| 299 | u64 program_id = 0; | 323 | u64 program_id = 0; |
| 300 | 324 | ||
| 325 | /// Specifies additional memory to be reserved for the process's memory management by the | ||
| 326 | /// system. When this is non-zero, secure memory is allocated and used for page table allocation | ||
| 327 | /// instead of using the normal global page tables/memory block management. | ||
| 328 | u32 system_resource_size = 0; | ||
| 329 | |||
| 301 | /// Resource limit descriptor for this process | 330 | /// Resource limit descriptor for this process |
| 302 | SharedPtr<ResourceLimit> resource_limit; | 331 | SharedPtr<ResourceLimit> resource_limit; |
| 303 | 332 | ||
| 304 | /// The ideal CPU core for this process, threads are scheduled on this core by default. | 333 | /// The ideal CPU core for this process, threads are scheduled on this core by default. |
| 305 | u8 ideal_core = 0; | 334 | u8 ideal_core = 0; |
| 306 | u32 is_virtual_address_memory_enabled = 0; | ||
| 307 | 335 | ||
| 308 | /// The Thread Local Storage area is allocated as processes create threads, | 336 | /// The Thread Local Storage area is allocated as processes create threads, |
| 309 | /// each TLS area is 0x200 bytes, so one page (0x1000) is split up in 8 parts, and each part | 337 | /// each TLS area is 0x200 bytes, so one page (0x1000) is split up in 8 parts, and each part |
| @@ -338,6 +366,9 @@ private: | |||
| 338 | /// variable related facilities. | 366 | /// variable related facilities. |
| 339 | Mutex mutex; | 367 | Mutex mutex; |
| 340 | 368 | ||
| 369 | /// Address indicating the location of the process' dedicated TLS region. | ||
| 370 | VAddr tls_region_address = 0; | ||
| 371 | |||
| 341 | /// Random values for svcGetInfo RandomEntropy | 372 | /// Random values for svcGetInfo RandomEntropy |
| 342 | std::array<u64, RANDOM_ENTROPY_SIZE> random_entropy{}; | 373 | std::array<u64, RANDOM_ENTROPY_SIZE> random_entropy{}; |
| 343 | 374 | ||
diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp index de6363ff2..1fd1a732a 100644 --- a/src/core/hle/kernel/svc.cpp +++ b/src/core/hle/kernel/svc.cpp | |||
| @@ -98,9 +98,9 @@ ResultCode MapUnmapMemorySanityChecks(const VMManager& vm_manager, VAddr dst_add | |||
| 98 | return ERR_INVALID_ADDRESS_STATE; | 98 | return ERR_INVALID_ADDRESS_STATE; |
| 99 | } | 99 | } |
| 100 | 100 | ||
| 101 | if (!vm_manager.IsWithinNewMapRegion(dst_addr, size)) { | 101 | if (!vm_manager.IsWithinStackRegion(dst_addr, size)) { |
| 102 | LOG_ERROR(Kernel_SVC, | 102 | LOG_ERROR(Kernel_SVC, |
| 103 | "Destination is not within the new map region, addr=0x{:016X}, size=0x{:016X}", | 103 | "Destination is not within the stack region, addr=0x{:016X}, size=0x{:016X}", |
| 104 | dst_addr, size); | 104 | dst_addr, size); |
| 105 | return ERR_INVALID_MEMORY_RANGE; | 105 | return ERR_INVALID_MEMORY_RANGE; |
| 106 | } | 106 | } |
| @@ -318,7 +318,14 @@ static ResultCode UnmapMemory(Core::System& system, VAddr dst_addr, VAddr src_ad | |||
| 318 | return result; | 318 | return result; |
| 319 | } | 319 | } |
| 320 | 320 | ||
| 321 | return vm_manager.UnmapRange(dst_addr, size); | 321 | const auto unmap_res = vm_manager.UnmapRange(dst_addr, size); |
| 322 | |||
| 323 | // Reprotect the source mapping on success | ||
| 324 | if (unmap_res.IsSuccess()) { | ||
| 325 | ASSERT(vm_manager.ReprotectRange(src_addr, size, VMAPermission::ReadWrite).IsSuccess()); | ||
| 326 | } | ||
| 327 | |||
| 328 | return unmap_res; | ||
| 322 | } | 329 | } |
| 323 | 330 | ||
| 324 | /// Connect to an OS service given the port name, returns the handle to the port to out | 331 | /// Connect to an OS service given the port name, returns the handle to the port to out |
| @@ -726,19 +733,19 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha | |||
| 726 | // 2.0.0+ | 733 | // 2.0.0+ |
| 727 | ASLRRegionBaseAddr = 12, | 734 | ASLRRegionBaseAddr = 12, |
| 728 | ASLRRegionSize = 13, | 735 | ASLRRegionSize = 13, |
| 729 | NewMapRegionBaseAddr = 14, | 736 | StackRegionBaseAddr = 14, |
| 730 | NewMapRegionSize = 15, | 737 | StackRegionSize = 15, |
| 731 | // 3.0.0+ | 738 | // 3.0.0+ |
| 732 | IsVirtualAddressMemoryEnabled = 16, | 739 | SystemResourceSize = 16, |
| 733 | PersonalMmHeapUsage = 17, | 740 | SystemResourceUsage = 17, |
| 734 | TitleId = 18, | 741 | TitleId = 18, |
| 735 | // 4.0.0+ | 742 | // 4.0.0+ |
| 736 | PrivilegedProcessId = 19, | 743 | PrivilegedProcessId = 19, |
| 737 | // 5.0.0+ | 744 | // 5.0.0+ |
| 738 | UserExceptionContextAddr = 20, | 745 | UserExceptionContextAddr = 20, |
| 739 | // 6.0.0+ | 746 | // 6.0.0+ |
| 740 | TotalPhysicalMemoryAvailableWithoutMmHeap = 21, | 747 | TotalPhysicalMemoryAvailableWithoutSystemResource = 21, |
| 741 | TotalPhysicalMemoryUsedWithoutMmHeap = 22, | 748 | TotalPhysicalMemoryUsedWithoutSystemResource = 22, |
| 742 | }; | 749 | }; |
| 743 | 750 | ||
| 744 | const auto info_id_type = static_cast<GetInfoType>(info_id); | 751 | const auto info_id_type = static_cast<GetInfoType>(info_id); |
| @@ -752,16 +759,16 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha | |||
| 752 | case GetInfoType::HeapRegionSize: | 759 | case GetInfoType::HeapRegionSize: |
| 753 | case GetInfoType::ASLRRegionBaseAddr: | 760 | case GetInfoType::ASLRRegionBaseAddr: |
| 754 | case GetInfoType::ASLRRegionSize: | 761 | case GetInfoType::ASLRRegionSize: |
| 755 | case GetInfoType::NewMapRegionBaseAddr: | 762 | case GetInfoType::StackRegionBaseAddr: |
| 756 | case GetInfoType::NewMapRegionSize: | 763 | case GetInfoType::StackRegionSize: |
| 757 | case GetInfoType::TotalPhysicalMemoryAvailable: | 764 | case GetInfoType::TotalPhysicalMemoryAvailable: |
| 758 | case GetInfoType::TotalPhysicalMemoryUsed: | 765 | case GetInfoType::TotalPhysicalMemoryUsed: |
| 759 | case GetInfoType::IsVirtualAddressMemoryEnabled: | 766 | case GetInfoType::SystemResourceSize: |
| 760 | case GetInfoType::PersonalMmHeapUsage: | 767 | case GetInfoType::SystemResourceUsage: |
| 761 | case GetInfoType::TitleId: | 768 | case GetInfoType::TitleId: |
| 762 | case GetInfoType::UserExceptionContextAddr: | 769 | case GetInfoType::UserExceptionContextAddr: |
| 763 | case GetInfoType::TotalPhysicalMemoryAvailableWithoutMmHeap: | 770 | case GetInfoType::TotalPhysicalMemoryAvailableWithoutSystemResource: |
| 764 | case GetInfoType::TotalPhysicalMemoryUsedWithoutMmHeap: { | 771 | case GetInfoType::TotalPhysicalMemoryUsedWithoutSystemResource: { |
| 765 | if (info_sub_id != 0) { | 772 | if (info_sub_id != 0) { |
| 766 | return ERR_INVALID_ENUM_VALUE; | 773 | return ERR_INVALID_ENUM_VALUE; |
| 767 | } | 774 | } |
| @@ -806,12 +813,12 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha | |||
| 806 | *result = process->VMManager().GetASLRRegionSize(); | 813 | *result = process->VMManager().GetASLRRegionSize(); |
| 807 | return RESULT_SUCCESS; | 814 | return RESULT_SUCCESS; |
| 808 | 815 | ||
| 809 | case GetInfoType::NewMapRegionBaseAddr: | 816 | case GetInfoType::StackRegionBaseAddr: |
| 810 | *result = process->VMManager().GetNewMapRegionBaseAddress(); | 817 | *result = process->VMManager().GetStackRegionBaseAddress(); |
| 811 | return RESULT_SUCCESS; | 818 | return RESULT_SUCCESS; |
| 812 | 819 | ||
| 813 | case GetInfoType::NewMapRegionSize: | 820 | case GetInfoType::StackRegionSize: |
| 814 | *result = process->VMManager().GetNewMapRegionSize(); | 821 | *result = process->VMManager().GetStackRegionSize(); |
| 815 | return RESULT_SUCCESS; | 822 | return RESULT_SUCCESS; |
| 816 | 823 | ||
| 817 | case GetInfoType::TotalPhysicalMemoryAvailable: | 824 | case GetInfoType::TotalPhysicalMemoryAvailable: |
| @@ -822,8 +829,13 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha | |||
| 822 | *result = process->GetTotalPhysicalMemoryUsed(); | 829 | *result = process->GetTotalPhysicalMemoryUsed(); |
| 823 | return RESULT_SUCCESS; | 830 | return RESULT_SUCCESS; |
| 824 | 831 | ||
| 825 | case GetInfoType::IsVirtualAddressMemoryEnabled: | 832 | case GetInfoType::SystemResourceSize: |
| 826 | *result = process->IsVirtualMemoryEnabled(); | 833 | *result = process->GetSystemResourceSize(); |
| 834 | return RESULT_SUCCESS; | ||
| 835 | |||
| 836 | case GetInfoType::SystemResourceUsage: | ||
| 837 | LOG_WARNING(Kernel_SVC, "(STUBBED) Attempted to query system resource usage"); | ||
| 838 | *result = process->GetSystemResourceUsage(); | ||
| 827 | return RESULT_SUCCESS; | 839 | return RESULT_SUCCESS; |
| 828 | 840 | ||
| 829 | case GetInfoType::TitleId: | 841 | case GetInfoType::TitleId: |
| @@ -831,17 +843,15 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha | |||
| 831 | return RESULT_SUCCESS; | 843 | return RESULT_SUCCESS; |
| 832 | 844 | ||
| 833 | case GetInfoType::UserExceptionContextAddr: | 845 | case GetInfoType::UserExceptionContextAddr: |
| 834 | LOG_WARNING(Kernel_SVC, | 846 | *result = process->GetTLSRegionAddress(); |
| 835 | "(STUBBED) Attempted to query user exception context address, returned 0"); | ||
| 836 | *result = 0; | ||
| 837 | return RESULT_SUCCESS; | 847 | return RESULT_SUCCESS; |
| 838 | 848 | ||
| 839 | case GetInfoType::TotalPhysicalMemoryAvailableWithoutMmHeap: | 849 | case GetInfoType::TotalPhysicalMemoryAvailableWithoutSystemResource: |
| 840 | *result = process->GetTotalPhysicalMemoryAvailable(); | 850 | *result = process->GetTotalPhysicalMemoryAvailableWithoutSystemResource(); |
| 841 | return RESULT_SUCCESS; | 851 | return RESULT_SUCCESS; |
| 842 | 852 | ||
| 843 | case GetInfoType::TotalPhysicalMemoryUsedWithoutMmHeap: | 853 | case GetInfoType::TotalPhysicalMemoryUsedWithoutSystemResource: |
| 844 | *result = process->GetTotalPhysicalMemoryUsedWithoutMmHeap(); | 854 | *result = process->GetTotalPhysicalMemoryUsedWithoutSystemResource(); |
| 845 | return RESULT_SUCCESS; | 855 | return RESULT_SUCCESS; |
| 846 | 856 | ||
| 847 | default: | 857 | default: |
| @@ -946,6 +956,86 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha | |||
| 946 | } | 956 | } |
| 947 | } | 957 | } |
| 948 | 958 | ||
| 959 | /// Maps memory at a desired address | ||
| 960 | static ResultCode MapPhysicalMemory(Core::System& system, VAddr addr, u64 size) { | ||
| 961 | LOG_DEBUG(Kernel_SVC, "called, addr=0x{:016X}, size=0x{:X}", addr, size); | ||
| 962 | |||
| 963 | if (!Common::Is4KBAligned(addr)) { | ||
| 964 | LOG_ERROR(Kernel_SVC, "Address is not aligned to 4KB, 0x{:016X}", addr); | ||
| 965 | return ERR_INVALID_ADDRESS; | ||
| 966 | } | ||
| 967 | |||
| 968 | if (!Common::Is4KBAligned(size)) { | ||
| 969 | LOG_ERROR(Kernel_SVC, "Size is not aligned to 4KB, 0x{:X}", size); | ||
| 970 | return ERR_INVALID_SIZE; | ||
| 971 | } | ||
| 972 | |||
| 973 | if (size == 0) { | ||
| 974 | LOG_ERROR(Kernel_SVC, "Size is zero"); | ||
| 975 | return ERR_INVALID_SIZE; | ||
| 976 | } | ||
| 977 | |||
| 978 | if (!(addr < addr + size)) { | ||
| 979 | LOG_ERROR(Kernel_SVC, "Size causes 64-bit overflow of address"); | ||
| 980 | return ERR_INVALID_MEMORY_RANGE; | ||
| 981 | } | ||
| 982 | |||
| 983 | Process* const current_process = system.Kernel().CurrentProcess(); | ||
| 984 | auto& vm_manager = current_process->VMManager(); | ||
| 985 | |||
| 986 | if (current_process->GetSystemResourceSize() == 0) { | ||
| 987 | LOG_ERROR(Kernel_SVC, "System Resource Size is zero"); | ||
| 988 | return ERR_INVALID_STATE; | ||
| 989 | } | ||
| 990 | |||
| 991 | if (!vm_manager.IsWithinMapRegion(addr, size)) { | ||
| 992 | LOG_ERROR(Kernel_SVC, "Range not within map region"); | ||
| 993 | return ERR_INVALID_MEMORY_RANGE; | ||
| 994 | } | ||
| 995 | |||
| 996 | return vm_manager.MapPhysicalMemory(addr, size); | ||
| 997 | } | ||
| 998 | |||
| 999 | /// Unmaps memory previously mapped via MapPhysicalMemory | ||
| 1000 | static ResultCode UnmapPhysicalMemory(Core::System& system, VAddr addr, u64 size) { | ||
| 1001 | LOG_DEBUG(Kernel_SVC, "called, addr=0x{:016X}, size=0x{:X}", addr, size); | ||
| 1002 | |||
| 1003 | if (!Common::Is4KBAligned(addr)) { | ||
| 1004 | LOG_ERROR(Kernel_SVC, "Address is not aligned to 4KB, 0x{:016X}", addr); | ||
| 1005 | return ERR_INVALID_ADDRESS; | ||
| 1006 | } | ||
| 1007 | |||
| 1008 | if (!Common::Is4KBAligned(size)) { | ||
| 1009 | LOG_ERROR(Kernel_SVC, "Size is not aligned to 4KB, 0x{:X}", size); | ||
| 1010 | return ERR_INVALID_SIZE; | ||
| 1011 | } | ||
| 1012 | |||
| 1013 | if (size == 0) { | ||
| 1014 | LOG_ERROR(Kernel_SVC, "Size is zero"); | ||
| 1015 | return ERR_INVALID_SIZE; | ||
| 1016 | } | ||
| 1017 | |||
| 1018 | if (!(addr < addr + size)) { | ||
| 1019 | LOG_ERROR(Kernel_SVC, "Size causes 64-bit overflow of address"); | ||
| 1020 | return ERR_INVALID_MEMORY_RANGE; | ||
| 1021 | } | ||
| 1022 | |||
| 1023 | Process* const current_process = system.Kernel().CurrentProcess(); | ||
| 1024 | auto& vm_manager = current_process->VMManager(); | ||
| 1025 | |||
| 1026 | if (current_process->GetSystemResourceSize() == 0) { | ||
| 1027 | LOG_ERROR(Kernel_SVC, "System Resource Size is zero"); | ||
| 1028 | return ERR_INVALID_STATE; | ||
| 1029 | } | ||
| 1030 | |||
| 1031 | if (!vm_manager.IsWithinMapRegion(addr, size)) { | ||
| 1032 | LOG_ERROR(Kernel_SVC, "Range not within map region"); | ||
| 1033 | return ERR_INVALID_MEMORY_RANGE; | ||
| 1034 | } | ||
| 1035 | |||
| 1036 | return vm_manager.UnmapPhysicalMemory(addr, size); | ||
| 1037 | } | ||
| 1038 | |||
| 949 | /// Sets the thread activity | 1039 | /// Sets the thread activity |
| 950 | static ResultCode SetThreadActivity(Core::System& system, Handle handle, u32 activity) { | 1040 | static ResultCode SetThreadActivity(Core::System& system, Handle handle, u32 activity) { |
| 951 | LOG_DEBUG(Kernel_SVC, "called, handle=0x{:08X}, activity=0x{:08X}", handle, activity); | 1041 | LOG_DEBUG(Kernel_SVC, "called, handle=0x{:08X}, activity=0x{:08X}", handle, activity); |
| @@ -1647,8 +1737,8 @@ static ResultCode SignalProcessWideKey(Core::System& system, VAddr condition_var | |||
| 1647 | // Wait for an address (via Address Arbiter) | 1737 | // Wait for an address (via Address Arbiter) |
| 1648 | static ResultCode WaitForAddress(Core::System& system, VAddr address, u32 type, s32 value, | 1738 | static ResultCode WaitForAddress(Core::System& system, VAddr address, u32 type, s32 value, |
| 1649 | s64 timeout) { | 1739 | s64 timeout) { |
| 1650 | LOG_WARNING(Kernel_SVC, "called, address=0x{:X}, type=0x{:X}, value=0x{:X}, timeout={}", | 1740 | LOG_TRACE(Kernel_SVC, "called, address=0x{:X}, type=0x{:X}, value=0x{:X}, timeout={}", address, |
| 1651 | address, type, value, timeout); | 1741 | type, value, timeout); |
| 1652 | 1742 | ||
| 1653 | // If the passed address is a kernel virtual address, return invalid memory state. | 1743 | // If the passed address is a kernel virtual address, return invalid memory state. |
| 1654 | if (Memory::IsKernelVirtualAddress(address)) { | 1744 | if (Memory::IsKernelVirtualAddress(address)) { |
| @@ -1670,8 +1760,8 @@ static ResultCode WaitForAddress(Core::System& system, VAddr address, u32 type, | |||
| 1670 | // Signals to an address (via Address Arbiter) | 1760 | // Signals to an address (via Address Arbiter) |
| 1671 | static ResultCode SignalToAddress(Core::System& system, VAddr address, u32 type, s32 value, | 1761 | static ResultCode SignalToAddress(Core::System& system, VAddr address, u32 type, s32 value, |
| 1672 | s32 num_to_wake) { | 1762 | s32 num_to_wake) { |
| 1673 | LOG_WARNING(Kernel_SVC, "called, address=0x{:X}, type=0x{:X}, value=0x{:X}, num_to_wake=0x{:X}", | 1763 | LOG_TRACE(Kernel_SVC, "called, address=0x{:X}, type=0x{:X}, value=0x{:X}, num_to_wake=0x{:X}", |
| 1674 | address, type, value, num_to_wake); | 1764 | address, type, value, num_to_wake); |
| 1675 | 1765 | ||
| 1676 | // If the passed address is a kernel virtual address, return invalid memory state. | 1766 | // If the passed address is a kernel virtual address, return invalid memory state. |
| 1677 | if (Memory::IsKernelVirtualAddress(address)) { | 1767 | if (Memory::IsKernelVirtualAddress(address)) { |
| @@ -2303,8 +2393,8 @@ static const FunctionDef SVC_Table[] = { | |||
| 2303 | {0x29, SvcWrap<GetInfo>, "GetInfo"}, | 2393 | {0x29, SvcWrap<GetInfo>, "GetInfo"}, |
| 2304 | {0x2A, nullptr, "FlushEntireDataCache"}, | 2394 | {0x2A, nullptr, "FlushEntireDataCache"}, |
| 2305 | {0x2B, nullptr, "FlushDataCache"}, | 2395 | {0x2B, nullptr, "FlushDataCache"}, |
| 2306 | {0x2C, nullptr, "MapPhysicalMemory"}, | 2396 | {0x2C, SvcWrap<MapPhysicalMemory>, "MapPhysicalMemory"}, |
| 2307 | {0x2D, nullptr, "UnmapPhysicalMemory"}, | 2397 | {0x2D, SvcWrap<UnmapPhysicalMemory>, "UnmapPhysicalMemory"}, |
| 2308 | {0x2E, nullptr, "GetFutureThreadInfo"}, | 2398 | {0x2E, nullptr, "GetFutureThreadInfo"}, |
| 2309 | {0x2F, nullptr, "GetLastThreadInfo"}, | 2399 | {0x2F, nullptr, "GetLastThreadInfo"}, |
| 2310 | {0x30, SvcWrap<GetResourceLimitLimitValue>, "GetResourceLimitLimitValue"}, | 2400 | {0x30, SvcWrap<GetResourceLimitLimitValue>, "GetResourceLimitLimitValue"}, |
diff --git a/src/core/hle/kernel/svc_wrap.h b/src/core/hle/kernel/svc_wrap.h index 865473c6f..c2d8d0dc3 100644 --- a/src/core/hle/kernel/svc_wrap.h +++ b/src/core/hle/kernel/svc_wrap.h | |||
| @@ -32,6 +32,11 @@ void SvcWrap(Core::System& system) { | |||
| 32 | FuncReturn(system, func(system, Param(system, 0)).raw); | 32 | FuncReturn(system, func(system, Param(system, 0)).raw); |
| 33 | } | 33 | } |
| 34 | 34 | ||
| 35 | template <ResultCode func(Core::System&, u64, u64)> | ||
| 36 | void SvcWrap(Core::System& system) { | ||
| 37 | FuncReturn(system, func(system, Param(system, 0), Param(system, 1)).raw); | ||
| 38 | } | ||
| 39 | |||
| 35 | template <ResultCode func(Core::System&, u32)> | 40 | template <ResultCode func(Core::System&, u32)> |
| 36 | void SvcWrap(Core::System& system) { | 41 | void SvcWrap(Core::System& system) { |
| 37 | FuncReturn(system, func(system, static_cast<u32>(Param(system, 0))).raw); | 42 | FuncReturn(system, func(system, static_cast<u32>(Param(system, 0))).raw); |
diff --git a/src/core/hle/kernel/vm_manager.cpp b/src/core/hle/kernel/vm_manager.cpp index 3df5ccb7f..4f45fb03b 100644 --- a/src/core/hle/kernel/vm_manager.cpp +++ b/src/core/hle/kernel/vm_manager.cpp | |||
| @@ -8,10 +8,11 @@ | |||
| 8 | #include "common/assert.h" | 8 | #include "common/assert.h" |
| 9 | #include "common/logging/log.h" | 9 | #include "common/logging/log.h" |
| 10 | #include "common/memory_hook.h" | 10 | #include "common/memory_hook.h" |
| 11 | #include "core/arm/arm_interface.h" | ||
| 12 | #include "core/core.h" | 11 | #include "core/core.h" |
| 13 | #include "core/file_sys/program_metadata.h" | 12 | #include "core/file_sys/program_metadata.h" |
| 14 | #include "core/hle/kernel/errors.h" | 13 | #include "core/hle/kernel/errors.h" |
| 14 | #include "core/hle/kernel/process.h" | ||
| 15 | #include "core/hle/kernel/resource_limit.h" | ||
| 15 | #include "core/hle/kernel/vm_manager.h" | 16 | #include "core/hle/kernel/vm_manager.h" |
| 16 | #include "core/memory.h" | 17 | #include "core/memory.h" |
| 17 | #include "core/memory_setup.h" | 18 | #include "core/memory_setup.h" |
| @@ -49,10 +50,14 @@ bool VirtualMemoryArea::CanBeMergedWith(const VirtualMemoryArea& next) const { | |||
| 49 | type != next.type) { | 50 | type != next.type) { |
| 50 | return false; | 51 | return false; |
| 51 | } | 52 | } |
| 52 | if (type == VMAType::AllocatedMemoryBlock && | 53 | if ((attribute & MemoryAttribute::DeviceMapped) == MemoryAttribute::DeviceMapped) { |
| 53 | (backing_block != next.backing_block || offset + size != next.offset)) { | 54 | // TODO: Can device mapped memory be merged sanely? |
| 55 | // Not merging it may cause inaccuracies versus hardware when memory layout is queried. | ||
| 54 | return false; | 56 | return false; |
| 55 | } | 57 | } |
| 58 | if (type == VMAType::AllocatedMemoryBlock) { | ||
| 59 | return true; | ||
| 60 | } | ||
| 56 | if (type == VMAType::BackingMemory && backing_memory + size != next.backing_memory) { | 61 | if (type == VMAType::BackingMemory && backing_memory + size != next.backing_memory) { |
| 57 | return false; | 62 | return false; |
| 58 | } | 63 | } |
| @@ -100,7 +105,7 @@ bool VMManager::IsValidHandle(VMAHandle handle) const { | |||
| 100 | ResultVal<VMManager::VMAHandle> VMManager::MapMemoryBlock(VAddr target, | 105 | ResultVal<VMManager::VMAHandle> VMManager::MapMemoryBlock(VAddr target, |
| 101 | std::shared_ptr<std::vector<u8>> block, | 106 | std::shared_ptr<std::vector<u8>> block, |
| 102 | std::size_t offset, u64 size, | 107 | std::size_t offset, u64 size, |
| 103 | MemoryState state) { | 108 | MemoryState state, VMAPermission perm) { |
| 104 | ASSERT(block != nullptr); | 109 | ASSERT(block != nullptr); |
| 105 | ASSERT(offset + size <= block->size()); | 110 | ASSERT(offset + size <= block->size()); |
| 106 | 111 | ||
| @@ -109,17 +114,8 @@ ResultVal<VMManager::VMAHandle> VMManager::MapMemoryBlock(VAddr target, | |||
| 109 | VirtualMemoryArea& final_vma = vma_handle->second; | 114 | VirtualMemoryArea& final_vma = vma_handle->second; |
| 110 | ASSERT(final_vma.size == size); | 115 | ASSERT(final_vma.size == size); |
| 111 | 116 | ||
| 112 | system.ArmInterface(0).MapBackingMemory(target, size, block->data() + offset, | ||
| 113 | VMAPermission::ReadWriteExecute); | ||
| 114 | system.ArmInterface(1).MapBackingMemory(target, size, block->data() + offset, | ||
| 115 | VMAPermission::ReadWriteExecute); | ||
| 116 | system.ArmInterface(2).MapBackingMemory(target, size, block->data() + offset, | ||
| 117 | VMAPermission::ReadWriteExecute); | ||
| 118 | system.ArmInterface(3).MapBackingMemory(target, size, block->data() + offset, | ||
| 119 | VMAPermission::ReadWriteExecute); | ||
| 120 | |||
| 121 | final_vma.type = VMAType::AllocatedMemoryBlock; | 117 | final_vma.type = VMAType::AllocatedMemoryBlock; |
| 122 | final_vma.permissions = VMAPermission::ReadWrite; | 118 | final_vma.permissions = perm; |
| 123 | final_vma.state = state; | 119 | final_vma.state = state; |
| 124 | final_vma.backing_block = std::move(block); | 120 | final_vma.backing_block = std::move(block); |
| 125 | final_vma.offset = offset; | 121 | final_vma.offset = offset; |
| @@ -137,11 +133,6 @@ ResultVal<VMManager::VMAHandle> VMManager::MapBackingMemory(VAddr target, u8* me | |||
| 137 | VirtualMemoryArea& final_vma = vma_handle->second; | 133 | VirtualMemoryArea& final_vma = vma_handle->second; |
| 138 | ASSERT(final_vma.size == size); | 134 | ASSERT(final_vma.size == size); |
| 139 | 135 | ||
| 140 | system.ArmInterface(0).MapBackingMemory(target, size, memory, VMAPermission::ReadWriteExecute); | ||
| 141 | system.ArmInterface(1).MapBackingMemory(target, size, memory, VMAPermission::ReadWriteExecute); | ||
| 142 | system.ArmInterface(2).MapBackingMemory(target, size, memory, VMAPermission::ReadWriteExecute); | ||
| 143 | system.ArmInterface(3).MapBackingMemory(target, size, memory, VMAPermission::ReadWriteExecute); | ||
| 144 | |||
| 145 | final_vma.type = VMAType::BackingMemory; | 136 | final_vma.type = VMAType::BackingMemory; |
| 146 | final_vma.permissions = VMAPermission::ReadWrite; | 137 | final_vma.permissions = VMAPermission::ReadWrite; |
| 147 | final_vma.state = state; | 138 | final_vma.state = state; |
| @@ -230,11 +221,6 @@ ResultCode VMManager::UnmapRange(VAddr target, u64 size) { | |||
| 230 | 221 | ||
| 231 | ASSERT(FindVMA(target)->second.size >= size); | 222 | ASSERT(FindVMA(target)->second.size >= size); |
| 232 | 223 | ||
| 233 | system.ArmInterface(0).UnmapMemory(target, size); | ||
| 234 | system.ArmInterface(1).UnmapMemory(target, size); | ||
| 235 | system.ArmInterface(2).UnmapMemory(target, size); | ||
| 236 | system.ArmInterface(3).UnmapMemory(target, size); | ||
| 237 | |||
| 238 | return RESULT_SUCCESS; | 224 | return RESULT_SUCCESS; |
| 239 | } | 225 | } |
| 240 | 226 | ||
| @@ -308,6 +294,166 @@ ResultVal<VAddr> VMManager::SetHeapSize(u64 size) { | |||
| 308 | return MakeResult<VAddr>(heap_region_base); | 294 | return MakeResult<VAddr>(heap_region_base); |
| 309 | } | 295 | } |
| 310 | 296 | ||
| 297 | ResultCode VMManager::MapPhysicalMemory(VAddr target, u64 size) { | ||
| 298 | const auto end_addr = target + size; | ||
| 299 | const auto last_addr = end_addr - 1; | ||
| 300 | VAddr cur_addr = target; | ||
| 301 | |||
| 302 | ResultCode result = RESULT_SUCCESS; | ||
| 303 | |||
| 304 | // Check how much memory we've already mapped. | ||
| 305 | const auto mapped_size_result = SizeOfAllocatedVMAsInRange(target, size); | ||
| 306 | if (mapped_size_result.Failed()) { | ||
| 307 | return mapped_size_result.Code(); | ||
| 308 | } | ||
| 309 | |||
| 310 | // If we've already mapped the desired amount, return early. | ||
| 311 | const std::size_t mapped_size = *mapped_size_result; | ||
| 312 | if (mapped_size == size) { | ||
| 313 | return RESULT_SUCCESS; | ||
| 314 | } | ||
| 315 | |||
| 316 | // Check that we can map the memory we want. | ||
| 317 | const auto res_limit = system.CurrentProcess()->GetResourceLimit(); | ||
| 318 | const u64 physmem_remaining = res_limit->GetMaxResourceValue(ResourceType::PhysicalMemory) - | ||
| 319 | res_limit->GetCurrentResourceValue(ResourceType::PhysicalMemory); | ||
| 320 | if (physmem_remaining < (size - mapped_size)) { | ||
| 321 | return ERR_RESOURCE_LIMIT_EXCEEDED; | ||
| 322 | } | ||
| 323 | |||
| 324 | // Keep track of the memory regions we unmap. | ||
| 325 | std::vector<std::pair<u64, u64>> mapped_regions; | ||
| 326 | |||
| 327 | // Iterate, trying to map memory. | ||
| 328 | { | ||
| 329 | cur_addr = target; | ||
| 330 | |||
| 331 | auto iter = FindVMA(target); | ||
| 332 | ASSERT_MSG(iter != vma_map.end(), "MapPhysicalMemory iter != end"); | ||
| 333 | |||
| 334 | while (true) { | ||
| 335 | const auto& vma = iter->second; | ||
| 336 | const auto vma_start = vma.base; | ||
| 337 | const auto vma_end = vma_start + vma.size; | ||
| 338 | const auto vma_last = vma_end - 1; | ||
| 339 | |||
| 340 | // Map the memory block | ||
| 341 | const auto map_size = std::min(end_addr - cur_addr, vma_end - cur_addr); | ||
| 342 | if (vma.state == MemoryState::Unmapped) { | ||
| 343 | const auto map_res = | ||
| 344 | MapMemoryBlock(cur_addr, std::make_shared<std::vector<u8>>(map_size, 0), 0, | ||
| 345 | map_size, MemoryState::Heap, VMAPermission::ReadWrite); | ||
| 346 | result = map_res.Code(); | ||
| 347 | if (result.IsError()) { | ||
| 348 | break; | ||
| 349 | } | ||
| 350 | |||
| 351 | mapped_regions.emplace_back(cur_addr, map_size); | ||
| 352 | } | ||
| 353 | |||
| 354 | // Break once we hit the end of the range. | ||
| 355 | if (last_addr <= vma_last) { | ||
| 356 | break; | ||
| 357 | } | ||
| 358 | |||
| 359 | // Advance to the next block. | ||
| 360 | cur_addr = vma_end; | ||
| 361 | iter = FindVMA(cur_addr); | ||
| 362 | ASSERT_MSG(iter != vma_map.end(), "MapPhysicalMemory iter != end"); | ||
| 363 | } | ||
| 364 | } | ||
| 365 | |||
| 366 | // If we failed, unmap memory. | ||
| 367 | if (result.IsError()) { | ||
| 368 | for (const auto [unmap_address, unmap_size] : mapped_regions) { | ||
| 369 | ASSERT_MSG(UnmapRange(unmap_address, unmap_size).IsSuccess(), | ||
| 370 | "MapPhysicalMemory un-map on error"); | ||
| 371 | } | ||
| 372 | |||
| 373 | return result; | ||
| 374 | } | ||
| 375 | |||
| 376 | // Update amount of mapped physical memory. | ||
| 377 | physical_memory_mapped += size - mapped_size; | ||
| 378 | |||
| 379 | return RESULT_SUCCESS; | ||
| 380 | } | ||
| 381 | |||
| 382 | ResultCode VMManager::UnmapPhysicalMemory(VAddr target, u64 size) { | ||
| 383 | const auto end_addr = target + size; | ||
| 384 | const auto last_addr = end_addr - 1; | ||
| 385 | VAddr cur_addr = target; | ||
| 386 | |||
| 387 | ResultCode result = RESULT_SUCCESS; | ||
| 388 | |||
| 389 | // Check how much memory is currently mapped. | ||
| 390 | const auto mapped_size_result = SizeOfUnmappablePhysicalMemoryInRange(target, size); | ||
| 391 | if (mapped_size_result.Failed()) { | ||
| 392 | return mapped_size_result.Code(); | ||
| 393 | } | ||
| 394 | |||
| 395 | // If we've already unmapped all the memory, return early. | ||
| 396 | const std::size_t mapped_size = *mapped_size_result; | ||
| 397 | if (mapped_size == 0) { | ||
| 398 | return RESULT_SUCCESS; | ||
| 399 | } | ||
| 400 | |||
| 401 | // Keep track of the memory regions we unmap. | ||
| 402 | std::vector<std::pair<u64, u64>> unmapped_regions; | ||
| 403 | |||
| 404 | // Try to unmap regions. | ||
| 405 | { | ||
| 406 | cur_addr = target; | ||
| 407 | |||
| 408 | auto iter = FindVMA(target); | ||
| 409 | ASSERT_MSG(iter != vma_map.end(), "UnmapPhysicalMemory iter != end"); | ||
| 410 | |||
| 411 | while (true) { | ||
| 412 | const auto& vma = iter->second; | ||
| 413 | const auto vma_start = vma.base; | ||
| 414 | const auto vma_end = vma_start + vma.size; | ||
| 415 | const auto vma_last = vma_end - 1; | ||
| 416 | |||
| 417 | // Unmap the memory block | ||
| 418 | const auto unmap_size = std::min(end_addr - cur_addr, vma_end - cur_addr); | ||
| 419 | if (vma.state == MemoryState::Heap) { | ||
| 420 | result = UnmapRange(cur_addr, unmap_size); | ||
| 421 | if (result.IsError()) { | ||
| 422 | break; | ||
| 423 | } | ||
| 424 | |||
| 425 | unmapped_regions.emplace_back(cur_addr, unmap_size); | ||
| 426 | } | ||
| 427 | |||
| 428 | // Break once we hit the end of the range. | ||
| 429 | if (last_addr <= vma_last) { | ||
| 430 | break; | ||
| 431 | } | ||
| 432 | |||
| 433 | // Advance to the next block. | ||
| 434 | cur_addr = vma_end; | ||
| 435 | iter = FindVMA(cur_addr); | ||
| 436 | ASSERT_MSG(iter != vma_map.end(), "UnmapPhysicalMemory iter != end"); | ||
| 437 | } | ||
| 438 | } | ||
| 439 | |||
| 440 | // If we failed, re-map regions. | ||
| 441 | // TODO: Preserve memory contents? | ||
| 442 | if (result.IsError()) { | ||
| 443 | for (const auto [map_address, map_size] : unmapped_regions) { | ||
| 444 | const auto remap_res = | ||
| 445 | MapMemoryBlock(map_address, std::make_shared<std::vector<u8>>(map_size, 0), 0, | ||
| 446 | map_size, MemoryState::Heap, VMAPermission::None); | ||
| 447 | ASSERT_MSG(remap_res.Succeeded(), "UnmapPhysicalMemory re-map on error"); | ||
| 448 | } | ||
| 449 | } | ||
| 450 | |||
| 451 | // Update mapped amount | ||
| 452 | physical_memory_mapped -= mapped_size; | ||
| 453 | |||
| 454 | return RESULT_SUCCESS; | ||
| 455 | } | ||
| 456 | |||
| 311 | ResultCode VMManager::MapCodeMemory(VAddr dst_address, VAddr src_address, u64 size) { | 457 | ResultCode VMManager::MapCodeMemory(VAddr dst_address, VAddr src_address, u64 size) { |
| 312 | constexpr auto ignore_attribute = MemoryAttribute::LockedForIPC | MemoryAttribute::DeviceMapped; | 458 | constexpr auto ignore_attribute = MemoryAttribute::LockedForIPC | MemoryAttribute::DeviceMapped; |
| 313 | const auto src_check_result = CheckRangeState( | 459 | const auto src_check_result = CheckRangeState( |
| @@ -455,7 +601,7 @@ ResultCode VMManager::MirrorMemory(VAddr dst_addr, VAddr src_addr, u64 size, Mem | |||
| 455 | // Protect mirror with permissions from old region | 601 | // Protect mirror with permissions from old region |
| 456 | Reprotect(new_vma, vma->second.permissions); | 602 | Reprotect(new_vma, vma->second.permissions); |
| 457 | // Remove permissions from old region | 603 | // Remove permissions from old region |
| 458 | Reprotect(vma, VMAPermission::None); | 604 | ReprotectRange(src_addr, size, VMAPermission::None); |
| 459 | 605 | ||
| 460 | return RESULT_SUCCESS; | 606 | return RESULT_SUCCESS; |
| 461 | } | 607 | } |
| @@ -588,14 +734,14 @@ VMManager::VMAIter VMManager::SplitVMA(VMAIter vma_handle, u64 offset_in_vma) { | |||
| 588 | VMManager::VMAIter VMManager::MergeAdjacent(VMAIter iter) { | 734 | VMManager::VMAIter VMManager::MergeAdjacent(VMAIter iter) { |
| 589 | const VMAIter next_vma = std::next(iter); | 735 | const VMAIter next_vma = std::next(iter); |
| 590 | if (next_vma != vma_map.end() && iter->second.CanBeMergedWith(next_vma->second)) { | 736 | if (next_vma != vma_map.end() && iter->second.CanBeMergedWith(next_vma->second)) { |
| 591 | iter->second.size += next_vma->second.size; | 737 | MergeAdjacentVMA(iter->second, next_vma->second); |
| 592 | vma_map.erase(next_vma); | 738 | vma_map.erase(next_vma); |
| 593 | } | 739 | } |
| 594 | 740 | ||
| 595 | if (iter != vma_map.begin()) { | 741 | if (iter != vma_map.begin()) { |
| 596 | VMAIter prev_vma = std::prev(iter); | 742 | VMAIter prev_vma = std::prev(iter); |
| 597 | if (prev_vma->second.CanBeMergedWith(iter->second)) { | 743 | if (prev_vma->second.CanBeMergedWith(iter->second)) { |
| 598 | prev_vma->second.size += iter->second.size; | 744 | MergeAdjacentVMA(prev_vma->second, iter->second); |
| 599 | vma_map.erase(iter); | 745 | vma_map.erase(iter); |
| 600 | iter = prev_vma; | 746 | iter = prev_vma; |
| 601 | } | 747 | } |
| @@ -604,6 +750,38 @@ VMManager::VMAIter VMManager::MergeAdjacent(VMAIter iter) { | |||
| 604 | return iter; | 750 | return iter; |
| 605 | } | 751 | } |
| 606 | 752 | ||
| 753 | void VMManager::MergeAdjacentVMA(VirtualMemoryArea& left, const VirtualMemoryArea& right) { | ||
| 754 | ASSERT(left.CanBeMergedWith(right)); | ||
| 755 | |||
| 756 | // Always merge allocated memory blocks, even when they don't share the same backing block. | ||
| 757 | if (left.type == VMAType::AllocatedMemoryBlock && | ||
| 758 | (left.backing_block != right.backing_block || left.offset + left.size != right.offset)) { | ||
| 759 | // Check if we can save work. | ||
| 760 | if (left.offset == 0 && left.size == left.backing_block->size()) { | ||
| 761 | // Fast case: left is an entire backing block. | ||
| 762 | left.backing_block->insert(left.backing_block->end(), | ||
| 763 | right.backing_block->begin() + right.offset, | ||
| 764 | right.backing_block->begin() + right.offset + right.size); | ||
| 765 | } else { | ||
| 766 | // Slow case: make a new memory block for left and right. | ||
| 767 | auto new_memory = std::make_shared<std::vector<u8>>(); | ||
| 768 | new_memory->insert(new_memory->end(), left.backing_block->begin() + left.offset, | ||
| 769 | left.backing_block->begin() + left.offset + left.size); | ||
| 770 | new_memory->insert(new_memory->end(), right.backing_block->begin() + right.offset, | ||
| 771 | right.backing_block->begin() + right.offset + right.size); | ||
| 772 | left.backing_block = new_memory; | ||
| 773 | left.offset = 0; | ||
| 774 | } | ||
| 775 | |||
| 776 | // Page table update is needed, because backing memory changed. | ||
| 777 | left.size += right.size; | ||
| 778 | UpdatePageTableForVMA(left); | ||
| 779 | } else { | ||
| 780 | // Just update the size. | ||
| 781 | left.size += right.size; | ||
| 782 | } | ||
| 783 | } | ||
| 784 | |||
| 607 | void VMManager::UpdatePageTableForVMA(const VirtualMemoryArea& vma) { | 785 | void VMManager::UpdatePageTableForVMA(const VirtualMemoryArea& vma) { |
| 608 | switch (vma.type) { | 786 | switch (vma.type) { |
| 609 | case VMAType::Free: | 787 | case VMAType::Free: |
| @@ -625,9 +803,11 @@ void VMManager::UpdatePageTableForVMA(const VirtualMemoryArea& vma) { | |||
| 625 | void VMManager::InitializeMemoryRegionRanges(FileSys::ProgramAddressSpaceType type) { | 803 | void VMManager::InitializeMemoryRegionRanges(FileSys::ProgramAddressSpaceType type) { |
| 626 | u64 map_region_size = 0; | 804 | u64 map_region_size = 0; |
| 627 | u64 heap_region_size = 0; | 805 | u64 heap_region_size = 0; |
| 628 | u64 new_map_region_size = 0; | 806 | u64 stack_region_size = 0; |
| 629 | u64 tls_io_region_size = 0; | 807 | u64 tls_io_region_size = 0; |
| 630 | 808 | ||
| 809 | u64 stack_and_tls_io_end = 0; | ||
| 810 | |||
| 631 | switch (type) { | 811 | switch (type) { |
| 632 | case FileSys::ProgramAddressSpaceType::Is32Bit: | 812 | case FileSys::ProgramAddressSpaceType::Is32Bit: |
| 633 | case FileSys::ProgramAddressSpaceType::Is32BitNoMap: | 813 | case FileSys::ProgramAddressSpaceType::Is32BitNoMap: |
| @@ -643,6 +823,7 @@ void VMManager::InitializeMemoryRegionRanges(FileSys::ProgramAddressSpaceType ty | |||
| 643 | map_region_size = 0; | 823 | map_region_size = 0; |
| 644 | heap_region_size = 0x80000000; | 824 | heap_region_size = 0x80000000; |
| 645 | } | 825 | } |
| 826 | stack_and_tls_io_end = 0x40000000; | ||
| 646 | break; | 827 | break; |
| 647 | case FileSys::ProgramAddressSpaceType::Is36Bit: | 828 | case FileSys::ProgramAddressSpaceType::Is36Bit: |
| 648 | address_space_width = 36; | 829 | address_space_width = 36; |
| @@ -652,6 +833,7 @@ void VMManager::InitializeMemoryRegionRanges(FileSys::ProgramAddressSpaceType ty | |||
| 652 | aslr_region_end = aslr_region_base + 0xFF8000000; | 833 | aslr_region_end = aslr_region_base + 0xFF8000000; |
| 653 | map_region_size = 0x180000000; | 834 | map_region_size = 0x180000000; |
| 654 | heap_region_size = 0x180000000; | 835 | heap_region_size = 0x180000000; |
| 836 | stack_and_tls_io_end = 0x80000000; | ||
| 655 | break; | 837 | break; |
| 656 | case FileSys::ProgramAddressSpaceType::Is39Bit: | 838 | case FileSys::ProgramAddressSpaceType::Is39Bit: |
| 657 | address_space_width = 39; | 839 | address_space_width = 39; |
| @@ -661,7 +843,7 @@ void VMManager::InitializeMemoryRegionRanges(FileSys::ProgramAddressSpaceType ty | |||
| 661 | aslr_region_end = aslr_region_base + 0x7FF8000000; | 843 | aslr_region_end = aslr_region_base + 0x7FF8000000; |
| 662 | map_region_size = 0x1000000000; | 844 | map_region_size = 0x1000000000; |
| 663 | heap_region_size = 0x180000000; | 845 | heap_region_size = 0x180000000; |
| 664 | new_map_region_size = 0x80000000; | 846 | stack_region_size = 0x80000000; |
| 665 | tls_io_region_size = 0x1000000000; | 847 | tls_io_region_size = 0x1000000000; |
| 666 | break; | 848 | break; |
| 667 | default: | 849 | default: |
| @@ -669,6 +851,8 @@ void VMManager::InitializeMemoryRegionRanges(FileSys::ProgramAddressSpaceType ty | |||
| 669 | return; | 851 | return; |
| 670 | } | 852 | } |
| 671 | 853 | ||
| 854 | const u64 stack_and_tls_io_begin = aslr_region_base; | ||
| 855 | |||
| 672 | address_space_base = 0; | 856 | address_space_base = 0; |
| 673 | address_space_end = 1ULL << address_space_width; | 857 | address_space_end = 1ULL << address_space_width; |
| 674 | 858 | ||
| @@ -679,15 +863,20 @@ void VMManager::InitializeMemoryRegionRanges(FileSys::ProgramAddressSpaceType ty | |||
| 679 | heap_region_end = heap_region_base + heap_region_size; | 863 | heap_region_end = heap_region_base + heap_region_size; |
| 680 | heap_end = heap_region_base; | 864 | heap_end = heap_region_base; |
| 681 | 865 | ||
| 682 | new_map_region_base = heap_region_end; | 866 | stack_region_base = heap_region_end; |
| 683 | new_map_region_end = new_map_region_base + new_map_region_size; | 867 | stack_region_end = stack_region_base + stack_region_size; |
| 684 | 868 | ||
| 685 | tls_io_region_base = new_map_region_end; | 869 | tls_io_region_base = stack_region_end; |
| 686 | tls_io_region_end = tls_io_region_base + tls_io_region_size; | 870 | tls_io_region_end = tls_io_region_base + tls_io_region_size; |
| 687 | 871 | ||
| 688 | if (new_map_region_size == 0) { | 872 | if (stack_region_size == 0) { |
| 689 | new_map_region_base = address_space_base; | 873 | stack_region_base = stack_and_tls_io_begin; |
| 690 | new_map_region_end = address_space_end; | 874 | stack_region_end = stack_and_tls_io_end; |
| 875 | } | ||
| 876 | |||
| 877 | if (tls_io_region_size == 0) { | ||
| 878 | tls_io_region_base = stack_and_tls_io_begin; | ||
| 879 | tls_io_region_end = stack_and_tls_io_end; | ||
| 691 | } | 880 | } |
| 692 | } | 881 | } |
| 693 | 882 | ||
| @@ -767,6 +956,84 @@ VMManager::CheckResults VMManager::CheckRangeState(VAddr address, u64 size, Memo | |||
| 767 | std::make_tuple(initial_state, initial_permissions, initial_attributes & ~ignore_mask)); | 956 | std::make_tuple(initial_state, initial_permissions, initial_attributes & ~ignore_mask)); |
| 768 | } | 957 | } |
| 769 | 958 | ||
| 959 | ResultVal<std::size_t> VMManager::SizeOfAllocatedVMAsInRange(VAddr address, | ||
| 960 | std::size_t size) const { | ||
| 961 | const VAddr end_addr = address + size; | ||
| 962 | const VAddr last_addr = end_addr - 1; | ||
| 963 | std::size_t mapped_size = 0; | ||
| 964 | |||
| 965 | VAddr cur_addr = address; | ||
| 966 | auto iter = FindVMA(cur_addr); | ||
| 967 | ASSERT_MSG(iter != vma_map.end(), "SizeOfAllocatedVMAsInRange iter != end"); | ||
| 968 | |||
| 969 | while (true) { | ||
| 970 | const auto& vma = iter->second; | ||
| 971 | const VAddr vma_start = vma.base; | ||
| 972 | const VAddr vma_end = vma_start + vma.size; | ||
| 973 | const VAddr vma_last = vma_end - 1; | ||
| 974 | |||
| 975 | // Add size if relevant. | ||
| 976 | if (vma.state != MemoryState::Unmapped) { | ||
| 977 | mapped_size += std::min(end_addr - cur_addr, vma_end - cur_addr); | ||
| 978 | } | ||
| 979 | |||
| 980 | // Break once we hit the end of the range. | ||
| 981 | if (last_addr <= vma_last) { | ||
| 982 | break; | ||
| 983 | } | ||
| 984 | |||
| 985 | // Advance to the next block. | ||
| 986 | cur_addr = vma_end; | ||
| 987 | iter = std::next(iter); | ||
| 988 | ASSERT_MSG(iter != vma_map.end(), "SizeOfAllocatedVMAsInRange iter != end"); | ||
| 989 | } | ||
| 990 | |||
| 991 | return MakeResult(mapped_size); | ||
| 992 | } | ||
| 993 | |||
| 994 | ResultVal<std::size_t> VMManager::SizeOfUnmappablePhysicalMemoryInRange(VAddr address, | ||
| 995 | std::size_t size) const { | ||
| 996 | const VAddr end_addr = address + size; | ||
| 997 | const VAddr last_addr = end_addr - 1; | ||
| 998 | std::size_t mapped_size = 0; | ||
| 999 | |||
| 1000 | VAddr cur_addr = address; | ||
| 1001 | auto iter = FindVMA(cur_addr); | ||
| 1002 | ASSERT_MSG(iter != vma_map.end(), "SizeOfUnmappablePhysicalMemoryInRange iter != end"); | ||
| 1003 | |||
| 1004 | while (true) { | ||
| 1005 | const auto& vma = iter->second; | ||
| 1006 | const auto vma_start = vma.base; | ||
| 1007 | const auto vma_end = vma_start + vma.size; | ||
| 1008 | const auto vma_last = vma_end - 1; | ||
| 1009 | const auto state = vma.state; | ||
| 1010 | const auto attr = vma.attribute; | ||
| 1011 | |||
| 1012 | // Memory within region must be free or mapped heap. | ||
| 1013 | if (!((state == MemoryState::Heap && attr == MemoryAttribute::None) || | ||
| 1014 | (state == MemoryState::Unmapped))) { | ||
| 1015 | return ERR_INVALID_ADDRESS_STATE; | ||
| 1016 | } | ||
| 1017 | |||
| 1018 | // Add size if relevant. | ||
| 1019 | if (state != MemoryState::Unmapped) { | ||
| 1020 | mapped_size += std::min(end_addr - cur_addr, vma_end - cur_addr); | ||
| 1021 | } | ||
| 1022 | |||
| 1023 | // Break once we hit the end of the range. | ||
| 1024 | if (last_addr <= vma_last) { | ||
| 1025 | break; | ||
| 1026 | } | ||
| 1027 | |||
| 1028 | // Advance to the next block. | ||
| 1029 | cur_addr = vma_end; | ||
| 1030 | iter = std::next(iter); | ||
| 1031 | ASSERT_MSG(iter != vma_map.end(), "SizeOfUnmappablePhysicalMemoryInRange iter != end"); | ||
| 1032 | } | ||
| 1033 | |||
| 1034 | return MakeResult(mapped_size); | ||
| 1035 | } | ||
| 1036 | |||
| 770 | u64 VMManager::GetTotalPhysicalMemoryAvailable() const { | 1037 | u64 VMManager::GetTotalPhysicalMemoryAvailable() const { |
| 771 | LOG_WARNING(Kernel, "(STUBBED) called"); | 1038 | LOG_WARNING(Kernel, "(STUBBED) called"); |
| 772 | return 0xF8000000; | 1039 | return 0xF8000000; |
| @@ -879,21 +1146,21 @@ bool VMManager::IsWithinMapRegion(VAddr address, u64 size) const { | |||
| 879 | return IsInsideAddressRange(address, size, GetMapRegionBaseAddress(), GetMapRegionEndAddress()); | 1146 | return IsInsideAddressRange(address, size, GetMapRegionBaseAddress(), GetMapRegionEndAddress()); |
| 880 | } | 1147 | } |
| 881 | 1148 | ||
| 882 | VAddr VMManager::GetNewMapRegionBaseAddress() const { | 1149 | VAddr VMManager::GetStackRegionBaseAddress() const { |
| 883 | return new_map_region_base; | 1150 | return stack_region_base; |
| 884 | } | 1151 | } |
| 885 | 1152 | ||
| 886 | VAddr VMManager::GetNewMapRegionEndAddress() const { | 1153 | VAddr VMManager::GetStackRegionEndAddress() const { |
| 887 | return new_map_region_end; | 1154 | return stack_region_end; |
| 888 | } | 1155 | } |
| 889 | 1156 | ||
| 890 | u64 VMManager::GetNewMapRegionSize() const { | 1157 | u64 VMManager::GetStackRegionSize() const { |
| 891 | return new_map_region_end - new_map_region_base; | 1158 | return stack_region_end - stack_region_base; |
| 892 | } | 1159 | } |
| 893 | 1160 | ||
| 894 | bool VMManager::IsWithinNewMapRegion(VAddr address, u64 size) const { | 1161 | bool VMManager::IsWithinStackRegion(VAddr address, u64 size) const { |
| 895 | return IsInsideAddressRange(address, size, GetNewMapRegionBaseAddress(), | 1162 | return IsInsideAddressRange(address, size, GetStackRegionBaseAddress(), |
| 896 | GetNewMapRegionEndAddress()); | 1163 | GetStackRegionEndAddress()); |
| 897 | } | 1164 | } |
| 898 | 1165 | ||
| 899 | VAddr VMManager::GetTLSIORegionBaseAddress() const { | 1166 | VAddr VMManager::GetTLSIORegionBaseAddress() const { |
diff --git a/src/core/hle/kernel/vm_manager.h b/src/core/hle/kernel/vm_manager.h index 752ae62f9..0aecb7499 100644 --- a/src/core/hle/kernel/vm_manager.h +++ b/src/core/hle/kernel/vm_manager.h | |||
| @@ -349,7 +349,8 @@ public: | |||
| 349 | * @param state MemoryState tag to attach to the VMA. | 349 | * @param state MemoryState tag to attach to the VMA. |
| 350 | */ | 350 | */ |
| 351 | ResultVal<VMAHandle> MapMemoryBlock(VAddr target, std::shared_ptr<std::vector<u8>> block, | 351 | ResultVal<VMAHandle> MapMemoryBlock(VAddr target, std::shared_ptr<std::vector<u8>> block, |
| 352 | std::size_t offset, u64 size, MemoryState state); | 352 | std::size_t offset, u64 size, MemoryState state, |
| 353 | VMAPermission perm = VMAPermission::ReadWrite); | ||
| 353 | 354 | ||
| 354 | /** | 355 | /** |
| 355 | * Maps an unmanaged host memory pointer at a given address. | 356 | * Maps an unmanaged host memory pointer at a given address. |
| @@ -450,6 +451,34 @@ public: | |||
| 450 | /// | 451 | /// |
| 451 | ResultVal<VAddr> SetHeapSize(u64 size); | 452 | ResultVal<VAddr> SetHeapSize(u64 size); |
| 452 | 453 | ||
| 454 | /// Maps memory at a given address. | ||
| 455 | /// | ||
| 456 | /// @param addr The virtual address to map memory at. | ||
| 457 | /// @param size The amount of memory to map. | ||
| 458 | /// | ||
| 459 | /// @note The destination address must lie within the Map region. | ||
| 460 | /// | ||
| 461 | /// @note This function requires that SystemResourceSize be non-zero, | ||
| 462 | /// however, this is just because if it were not then the | ||
| 463 | /// resulting page tables could be exploited on hardware by | ||
| 464 | /// a malicious program. SystemResource usage does not need | ||
| 465 | /// to be explicitly checked or updated here. | ||
| 466 | ResultCode MapPhysicalMemory(VAddr target, u64 size); | ||
| 467 | |||
| 468 | /// Unmaps memory at a given address. | ||
| 469 | /// | ||
| 470 | /// @param addr The virtual address to unmap memory at. | ||
| 471 | /// @param size The amount of memory to unmap. | ||
| 472 | /// | ||
| 473 | /// @note The destination address must lie within the Map region. | ||
| 474 | /// | ||
| 475 | /// @note This function requires that SystemResourceSize be non-zero, | ||
| 476 | /// however, this is just because if it were not then the | ||
| 477 | /// resulting page tables could be exploited on hardware by | ||
| 478 | /// a malicious program. SystemResource usage does not need | ||
| 479 | /// to be explicitly checked or updated here. | ||
| 480 | ResultCode UnmapPhysicalMemory(VAddr target, u64 size); | ||
| 481 | |||
| 453 | /// Maps a region of memory as code memory. | 482 | /// Maps a region of memory as code memory. |
| 454 | /// | 483 | /// |
| 455 | /// @param dst_address The base address of the region to create the aliasing memory region. | 484 | /// @param dst_address The base address of the region to create the aliasing memory region. |
| @@ -596,17 +625,17 @@ public: | |||
| 596 | /// Determines whether or not the specified range is within the map region. | 625 | /// Determines whether or not the specified range is within the map region. |
| 597 | bool IsWithinMapRegion(VAddr address, u64 size) const; | 626 | bool IsWithinMapRegion(VAddr address, u64 size) const; |
| 598 | 627 | ||
| 599 | /// Gets the base address of the new map region. | 628 | /// Gets the base address of the stack region. |
| 600 | VAddr GetNewMapRegionBaseAddress() const; | 629 | VAddr GetStackRegionBaseAddress() const; |
| 601 | 630 | ||
| 602 | /// Gets the end address of the new map region. | 631 | /// Gets the end address of the stack region. |
| 603 | VAddr GetNewMapRegionEndAddress() const; | 632 | VAddr GetStackRegionEndAddress() const; |
| 604 | 633 | ||
| 605 | /// Gets the total size of the new map region in bytes. | 634 | /// Gets the total size of the stack region in bytes. |
| 606 | u64 GetNewMapRegionSize() const; | 635 | u64 GetStackRegionSize() const; |
| 607 | 636 | ||
| 608 | /// Determines whether or not the given address range is within the new map region | 637 | /// Determines whether or not the given address range is within the stack region |
| 609 | bool IsWithinNewMapRegion(VAddr address, u64 size) const; | 638 | bool IsWithinStackRegion(VAddr address, u64 size) const; |
| 610 | 639 | ||
| 611 | /// Gets the base address of the TLS IO region. | 640 | /// Gets the base address of the TLS IO region. |
| 612 | VAddr GetTLSIORegionBaseAddress() const; | 641 | VAddr GetTLSIORegionBaseAddress() const; |
| @@ -657,6 +686,11 @@ private: | |||
| 657 | */ | 686 | */ |
| 658 | VMAIter MergeAdjacent(VMAIter vma); | 687 | VMAIter MergeAdjacent(VMAIter vma); |
| 659 | 688 | ||
| 689 | /** | ||
| 690 | * Merges two adjacent VMAs. | ||
| 691 | */ | ||
| 692 | void MergeAdjacentVMA(VirtualMemoryArea& left, const VirtualMemoryArea& right); | ||
| 693 | |||
| 660 | /// Updates the pages corresponding to this VMA so they match the VMA's attributes. | 694 | /// Updates the pages corresponding to this VMA so they match the VMA's attributes. |
| 661 | void UpdatePageTableForVMA(const VirtualMemoryArea& vma); | 695 | void UpdatePageTableForVMA(const VirtualMemoryArea& vma); |
| 662 | 696 | ||
| @@ -701,6 +735,13 @@ private: | |||
| 701 | MemoryAttribute attribute_mask, MemoryAttribute attribute, | 735 | MemoryAttribute attribute_mask, MemoryAttribute attribute, |
| 702 | MemoryAttribute ignore_mask) const; | 736 | MemoryAttribute ignore_mask) const; |
| 703 | 737 | ||
| 738 | /// Gets the amount of memory currently mapped (state != Unmapped) in a range. | ||
| 739 | ResultVal<std::size_t> SizeOfAllocatedVMAsInRange(VAddr address, std::size_t size) const; | ||
| 740 | |||
| 741 | /// Gets the amount of memory unmappable by UnmapPhysicalMemory in a range. | ||
| 742 | ResultVal<std::size_t> SizeOfUnmappablePhysicalMemoryInRange(VAddr address, | ||
| 743 | std::size_t size) const; | ||
| 744 | |||
| 704 | /** | 745 | /** |
| 705 | * A map covering the entirety of the managed address space, keyed by the `base` field of each | 746 | * A map covering the entirety of the managed address space, keyed by the `base` field of each |
| 706 | * VMA. It must always be modified by splitting or merging VMAs, so that the invariant | 747 | * VMA. It must always be modified by splitting or merging VMAs, so that the invariant |
| @@ -726,8 +767,8 @@ private: | |||
| 726 | VAddr map_region_base = 0; | 767 | VAddr map_region_base = 0; |
| 727 | VAddr map_region_end = 0; | 768 | VAddr map_region_end = 0; |
| 728 | 769 | ||
| 729 | VAddr new_map_region_base = 0; | 770 | VAddr stack_region_base = 0; |
| 730 | VAddr new_map_region_end = 0; | 771 | VAddr stack_region_end = 0; |
| 731 | 772 | ||
| 732 | VAddr tls_io_region_base = 0; | 773 | VAddr tls_io_region_base = 0; |
| 733 | VAddr tls_io_region_end = 0; | 774 | VAddr tls_io_region_end = 0; |
| @@ -742,6 +783,11 @@ private: | |||
| 742 | // end of the range. This is essentially 'base_address + current_size'. | 783 | // end of the range. This is essentially 'base_address + current_size'. |
| 743 | VAddr heap_end = 0; | 784 | VAddr heap_end = 0; |
| 744 | 785 | ||
| 786 | // The current amount of memory mapped via MapPhysicalMemory. | ||
| 787 | // This is used here (and in Nintendo's kernel) only for debugging, and does not impact | ||
| 788 | // any behavior. | ||
| 789 | u64 physical_memory_mapped = 0; | ||
| 790 | |||
| 745 | Core::System& system; | 791 | Core::System& system; |
| 746 | }; | 792 | }; |
| 747 | } // namespace Kernel | 793 | } // namespace Kernel |
diff --git a/src/core/hle/service/am/am.cpp b/src/core/hle/service/am/am.cpp index 33cebb48b..a192a1f5f 100644 --- a/src/core/hle/service/am/am.cpp +++ b/src/core/hle/service/am/am.cpp | |||
| @@ -29,7 +29,8 @@ | |||
| 29 | #include "core/hle/service/am/omm.h" | 29 | #include "core/hle/service/am/omm.h" |
| 30 | #include "core/hle/service/am/spsm.h" | 30 | #include "core/hle/service/am/spsm.h" |
| 31 | #include "core/hle/service/am/tcap.h" | 31 | #include "core/hle/service/am/tcap.h" |
| 32 | #include "core/hle/service/apm/apm.h" | 32 | #include "core/hle/service/apm/controller.h" |
| 33 | #include "core/hle/service/apm/interface.h" | ||
| 33 | #include "core/hle/service/filesystem/filesystem.h" | 34 | #include "core/hle/service/filesystem/filesystem.h" |
| 34 | #include "core/hle/service/ns/ns.h" | 35 | #include "core/hle/service/ns/ns.h" |
| 35 | #include "core/hle/service/nvflinger/nvflinger.h" | 36 | #include "core/hle/service/nvflinger/nvflinger.h" |
| @@ -265,12 +266,12 @@ ISelfController::ISelfController(std::shared_ptr<NVFlinger::NVFlinger> nvflinger | |||
| 265 | {65, nullptr, "ReportUserIsActive"}, | 266 | {65, nullptr, "ReportUserIsActive"}, |
| 266 | {66, nullptr, "GetCurrentIlluminance"}, | 267 | {66, nullptr, "GetCurrentIlluminance"}, |
| 267 | {67, nullptr, "IsIlluminanceAvailable"}, | 268 | {67, nullptr, "IsIlluminanceAvailable"}, |
| 268 | {68, nullptr, "SetAutoSleepDisabled"}, | 269 | {68, &ISelfController::SetAutoSleepDisabled, "SetAutoSleepDisabled"}, |
| 269 | {69, nullptr, "IsAutoSleepDisabled"}, | 270 | {69, &ISelfController::IsAutoSleepDisabled, "IsAutoSleepDisabled"}, |
| 270 | {70, nullptr, "ReportMultimediaError"}, | 271 | {70, nullptr, "ReportMultimediaError"}, |
| 271 | {71, nullptr, "GetCurrentIlluminanceEx"}, | 272 | {71, nullptr, "GetCurrentIlluminanceEx"}, |
| 272 | {80, nullptr, "SetWirelessPriorityMode"}, | 273 | {80, nullptr, "SetWirelessPriorityMode"}, |
| 273 | {90, nullptr, "GetAccumulatedSuspendedTickValue"}, | 274 | {90, &ISelfController::GetAccumulatedSuspendedTickValue, "GetAccumulatedSuspendedTickValue"}, |
| 274 | {91, &ISelfController::GetAccumulatedSuspendedTickChangedEvent, "GetAccumulatedSuspendedTickChangedEvent"}, | 275 | {91, &ISelfController::GetAccumulatedSuspendedTickChangedEvent, "GetAccumulatedSuspendedTickChangedEvent"}, |
| 275 | {100, nullptr, "SetAlbumImageTakenNotificationEnabled"}, | 276 | {100, nullptr, "SetAlbumImageTakenNotificationEnabled"}, |
| 276 | {1000, nullptr, "GetDebugStorageChannel"}, | 277 | {1000, nullptr, "GetDebugStorageChannel"}, |
| @@ -283,10 +284,14 @@ ISelfController::ISelfController(std::shared_ptr<NVFlinger::NVFlinger> nvflinger | |||
| 283 | launchable_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Manual, | 284 | launchable_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Manual, |
| 284 | "ISelfController:LaunchableEvent"); | 285 | "ISelfController:LaunchableEvent"); |
| 285 | 286 | ||
| 286 | // TODO(ogniK): Figure out where, when and why this event gets signalled | 287 | // This event is created by AM on the first time GetAccumulatedSuspendedTickChangedEvent() is |
| 288 | // called. Yuzu can just create it unconditionally, since it doesn't need to support multiple | ||
| 289 | // ISelfControllers. The event is signaled on creation, and on transition from suspended -> not | ||
| 290 | // suspended if the event has previously been created by a call to | ||
| 291 | // GetAccumulatedSuspendedTickChangedEvent. | ||
| 287 | accumulated_suspended_tick_changed_event = Kernel::WritableEvent::CreateEventPair( | 292 | accumulated_suspended_tick_changed_event = Kernel::WritableEvent::CreateEventPair( |
| 288 | kernel, Kernel::ResetType::Manual, "ISelfController:AccumulatedSuspendedTickChangedEvent"); | 293 | kernel, Kernel::ResetType::Manual, "ISelfController:AccumulatedSuspendedTickChangedEvent"); |
| 289 | accumulated_suspended_tick_changed_event.writable->Signal(); // Is signalled on creation | 294 | accumulated_suspended_tick_changed_event.writable->Signal(); |
| 290 | } | 295 | } |
| 291 | 296 | ||
| 292 | ISelfController::~ISelfController() = default; | 297 | ISelfController::~ISelfController() = default; |
| @@ -449,11 +454,47 @@ void ISelfController::GetIdleTimeDetectionExtension(Kernel::HLERequestContext& c | |||
| 449 | rb.Push<u32>(idle_time_detection_extension); | 454 | rb.Push<u32>(idle_time_detection_extension); |
| 450 | } | 455 | } |
| 451 | 456 | ||
| 457 | void ISelfController::SetAutoSleepDisabled(Kernel::HLERequestContext& ctx) { | ||
| 458 | IPC::RequestParser rp{ctx}; | ||
| 459 | is_auto_sleep_disabled = rp.Pop<bool>(); | ||
| 460 | |||
| 461 | // On the system itself, if the previous state of is_auto_sleep_disabled | ||
| 462 | // differed from the current value passed in, it'd signify the internal | ||
| 463 | // window manager to update (and also increment some statistics like update counts) | ||
| 464 | // | ||
| 465 | // It'd also indicate this change to an idle handling context. | ||
| 466 | // | ||
| 467 | // However, given we're emulating this behavior, most of this can be ignored | ||
| 468 | // and it's sufficient to simply set the member variable for querying via | ||
| 469 | // IsAutoSleepDisabled(). | ||
| 470 | |||
| 471 | LOG_DEBUG(Service_AM, "called. is_auto_sleep_disabled={}", is_auto_sleep_disabled); | ||
| 472 | |||
| 473 | IPC::ResponseBuilder rb{ctx, 2}; | ||
| 474 | rb.Push(RESULT_SUCCESS); | ||
| 475 | } | ||
| 476 | |||
| 477 | void ISelfController::IsAutoSleepDisabled(Kernel::HLERequestContext& ctx) { | ||
| 478 | LOG_DEBUG(Service_AM, "called."); | ||
| 479 | |||
| 480 | IPC::ResponseBuilder rb{ctx, 3}; | ||
| 481 | rb.Push(RESULT_SUCCESS); | ||
| 482 | rb.Push(is_auto_sleep_disabled); | ||
| 483 | } | ||
| 484 | |||
| 485 | void ISelfController::GetAccumulatedSuspendedTickValue(Kernel::HLERequestContext& ctx) { | ||
| 486 | LOG_DEBUG(Service_AM, "called."); | ||
| 487 | |||
| 488 | // This command returns the total number of system ticks since ISelfController creation | ||
| 489 | // where the game was suspended. Since Yuzu doesn't implement game suspension, this command | ||
| 490 | // can just always return 0 ticks. | ||
| 491 | IPC::ResponseBuilder rb{ctx, 4}; | ||
| 492 | rb.Push(RESULT_SUCCESS); | ||
| 493 | rb.Push<u64>(0); | ||
| 494 | } | ||
| 495 | |||
| 452 | void ISelfController::GetAccumulatedSuspendedTickChangedEvent(Kernel::HLERequestContext& ctx) { | 496 | void ISelfController::GetAccumulatedSuspendedTickChangedEvent(Kernel::HLERequestContext& ctx) { |
| 453 | // The implementation of this function is fine as is, the reason we're labelling it as stubbed | 497 | LOG_DEBUG(Service_AM, "called."); |
| 454 | // is because we're currently unsure when and where accumulated_suspended_tick_changed_event is | ||
| 455 | // actually signalled for the time being. | ||
| 456 | LOG_WARNING(Service_AM, "(STUBBED) called"); | ||
| 457 | 498 | ||
| 458 | IPC::ResponseBuilder rb{ctx, 2, 1}; | 499 | IPC::ResponseBuilder rb{ctx, 2, 1}; |
| 459 | rb.Push(RESULT_SUCCESS); | 500 | rb.Push(RESULT_SUCCESS); |
| @@ -508,8 +549,9 @@ void AppletMessageQueue::OperationModeChanged() { | |||
| 508 | on_operation_mode_changed.writable->Signal(); | 549 | on_operation_mode_changed.writable->Signal(); |
| 509 | } | 550 | } |
| 510 | 551 | ||
| 511 | ICommonStateGetter::ICommonStateGetter(std::shared_ptr<AppletMessageQueue> msg_queue) | 552 | ICommonStateGetter::ICommonStateGetter(Core::System& system, |
| 512 | : ServiceFramework("ICommonStateGetter"), msg_queue(std::move(msg_queue)) { | 553 | std::shared_ptr<AppletMessageQueue> msg_queue) |
| 554 | : ServiceFramework("ICommonStateGetter"), system(system), msg_queue(std::move(msg_queue)) { | ||
| 513 | // clang-format off | 555 | // clang-format off |
| 514 | static const FunctionInfo functions[] = { | 556 | static const FunctionInfo functions[] = { |
| 515 | {0, &ICommonStateGetter::GetEventHandle, "GetEventHandle"}, | 557 | {0, &ICommonStateGetter::GetEventHandle, "GetEventHandle"}, |
| @@ -542,7 +584,7 @@ ICommonStateGetter::ICommonStateGetter(std::shared_ptr<AppletMessageQueue> msg_q | |||
| 542 | {63, nullptr, "GetHdcpAuthenticationStateChangeEvent"}, | 584 | {63, nullptr, "GetHdcpAuthenticationStateChangeEvent"}, |
| 543 | {64, nullptr, "SetTvPowerStateMatchingMode"}, | 585 | {64, nullptr, "SetTvPowerStateMatchingMode"}, |
| 544 | {65, nullptr, "GetApplicationIdByContentActionName"}, | 586 | {65, nullptr, "GetApplicationIdByContentActionName"}, |
| 545 | {66, nullptr, "SetCpuBoostMode"}, | 587 | {66, &ICommonStateGetter::SetCpuBoostMode, "SetCpuBoostMode"}, |
| 546 | {80, nullptr, "PerformSystemButtonPressingIfInFocus"}, | 588 | {80, nullptr, "PerformSystemButtonPressingIfInFocus"}, |
| 547 | {90, nullptr, "SetPerformanceConfigurationChangedNotification"}, | 589 | {90, nullptr, "SetPerformanceConfigurationChangedNotification"}, |
| 548 | {91, nullptr, "GetCurrentPerformanceConfiguration"}, | 590 | {91, nullptr, "GetCurrentPerformanceConfiguration"}, |
| @@ -623,6 +665,16 @@ void ICommonStateGetter::GetDefaultDisplayResolution(Kernel::HLERequestContext& | |||
| 623 | } | 665 | } |
| 624 | } | 666 | } |
| 625 | 667 | ||
| 668 | void ICommonStateGetter::SetCpuBoostMode(Kernel::HLERequestContext& ctx) { | ||
| 669 | LOG_DEBUG(Service_AM, "called, forwarding to APM:SYS"); | ||
| 670 | |||
| 671 | const auto& sm = system.ServiceManager(); | ||
| 672 | const auto apm_sys = sm.GetService<APM::APM_Sys>("apm:sys"); | ||
| 673 | ASSERT(apm_sys != nullptr); | ||
| 674 | |||
| 675 | apm_sys->SetCpuBoostMode(ctx); | ||
| 676 | } | ||
| 677 | |||
| 626 | IStorage::IStorage(std::vector<u8> buffer) | 678 | IStorage::IStorage(std::vector<u8> buffer) |
| 627 | : ServiceFramework("IStorage"), buffer(std::move(buffer)) { | 679 | : ServiceFramework("IStorage"), buffer(std::move(buffer)) { |
| 628 | // clang-format off | 680 | // clang-format off |
| @@ -651,13 +703,11 @@ void ICommonStateGetter::GetOperationMode(Kernel::HLERequestContext& ctx) { | |||
| 651 | } | 703 | } |
| 652 | 704 | ||
| 653 | void ICommonStateGetter::GetPerformanceMode(Kernel::HLERequestContext& ctx) { | 705 | void ICommonStateGetter::GetPerformanceMode(Kernel::HLERequestContext& ctx) { |
| 654 | const bool use_docked_mode{Settings::values.use_docked_mode}; | 706 | LOG_DEBUG(Service_AM, "called"); |
| 655 | LOG_DEBUG(Service_AM, "called, use_docked_mode={}", use_docked_mode); | ||
| 656 | 707 | ||
| 657 | IPC::ResponseBuilder rb{ctx, 3}; | 708 | IPC::ResponseBuilder rb{ctx, 3}; |
| 658 | rb.Push(RESULT_SUCCESS); | 709 | rb.Push(RESULT_SUCCESS); |
| 659 | rb.Push(static_cast<u32>(use_docked_mode ? APM::PerformanceMode::Docked | 710 | rb.PushEnum(system.GetAPMController().GetCurrentPerformanceMode()); |
| 660 | : APM::PerformanceMode::Handheld)); | ||
| 661 | } | 711 | } |
| 662 | 712 | ||
| 663 | class ILibraryAppletAccessor final : public ServiceFramework<ILibraryAppletAccessor> { | 713 | class ILibraryAppletAccessor final : public ServiceFramework<ILibraryAppletAccessor> { |
diff --git a/src/core/hle/service/am/am.h b/src/core/hle/service/am/am.h index 4ea609d23..6cb582483 100644 --- a/src/core/hle/service/am/am.h +++ b/src/core/hle/service/am/am.h | |||
| @@ -133,6 +133,9 @@ private: | |||
| 133 | void SetHandlesRequestToDisplay(Kernel::HLERequestContext& ctx); | 133 | void SetHandlesRequestToDisplay(Kernel::HLERequestContext& ctx); |
| 134 | void SetIdleTimeDetectionExtension(Kernel::HLERequestContext& ctx); | 134 | void SetIdleTimeDetectionExtension(Kernel::HLERequestContext& ctx); |
| 135 | void GetIdleTimeDetectionExtension(Kernel::HLERequestContext& ctx); | 135 | void GetIdleTimeDetectionExtension(Kernel::HLERequestContext& ctx); |
| 136 | void SetAutoSleepDisabled(Kernel::HLERequestContext& ctx); | ||
| 137 | void IsAutoSleepDisabled(Kernel::HLERequestContext& ctx); | ||
| 138 | void GetAccumulatedSuspendedTickValue(Kernel::HLERequestContext& ctx); | ||
| 136 | void GetAccumulatedSuspendedTickChangedEvent(Kernel::HLERequestContext& ctx); | 139 | void GetAccumulatedSuspendedTickChangedEvent(Kernel::HLERequestContext& ctx); |
| 137 | 140 | ||
| 138 | std::shared_ptr<NVFlinger::NVFlinger> nvflinger; | 141 | std::shared_ptr<NVFlinger::NVFlinger> nvflinger; |
| @@ -141,11 +144,13 @@ private: | |||
| 141 | 144 | ||
| 142 | u32 idle_time_detection_extension = 0; | 145 | u32 idle_time_detection_extension = 0; |
| 143 | u64 num_fatal_sections_entered = 0; | 146 | u64 num_fatal_sections_entered = 0; |
| 147 | bool is_auto_sleep_disabled = false; | ||
| 144 | }; | 148 | }; |
| 145 | 149 | ||
| 146 | class ICommonStateGetter final : public ServiceFramework<ICommonStateGetter> { | 150 | class ICommonStateGetter final : public ServiceFramework<ICommonStateGetter> { |
| 147 | public: | 151 | public: |
| 148 | explicit ICommonStateGetter(std::shared_ptr<AppletMessageQueue> msg_queue); | 152 | explicit ICommonStateGetter(Core::System& system, |
| 153 | std::shared_ptr<AppletMessageQueue> msg_queue); | ||
| 149 | ~ICommonStateGetter() override; | 154 | ~ICommonStateGetter() override; |
| 150 | 155 | ||
| 151 | private: | 156 | private: |
| @@ -167,7 +172,9 @@ private: | |||
| 167 | void GetPerformanceMode(Kernel::HLERequestContext& ctx); | 172 | void GetPerformanceMode(Kernel::HLERequestContext& ctx); |
| 168 | void GetBootMode(Kernel::HLERequestContext& ctx); | 173 | void GetBootMode(Kernel::HLERequestContext& ctx); |
| 169 | void GetDefaultDisplayResolution(Kernel::HLERequestContext& ctx); | 174 | void GetDefaultDisplayResolution(Kernel::HLERequestContext& ctx); |
| 175 | void SetCpuBoostMode(Kernel::HLERequestContext& ctx); | ||
| 170 | 176 | ||
| 177 | Core::System& system; | ||
| 171 | std::shared_ptr<AppletMessageQueue> msg_queue; | 178 | std::shared_ptr<AppletMessageQueue> msg_queue; |
| 172 | }; | 179 | }; |
| 173 | 180 | ||
diff --git a/src/core/hle/service/am/applet_ae.cpp b/src/core/hle/service/am/applet_ae.cpp index fe5beb8f9..a34368c8b 100644 --- a/src/core/hle/service/am/applet_ae.cpp +++ b/src/core/hle/service/am/applet_ae.cpp | |||
| @@ -42,7 +42,7 @@ private: | |||
| 42 | 42 | ||
| 43 | IPC::ResponseBuilder rb{ctx, 2, 0, 1}; | 43 | IPC::ResponseBuilder rb{ctx, 2, 0, 1}; |
| 44 | rb.Push(RESULT_SUCCESS); | 44 | rb.Push(RESULT_SUCCESS); |
| 45 | rb.PushIpcInterface<ICommonStateGetter>(msg_queue); | 45 | rb.PushIpcInterface<ICommonStateGetter>(system, msg_queue); |
| 46 | } | 46 | } |
| 47 | 47 | ||
| 48 | void GetSelfController(Kernel::HLERequestContext& ctx) { | 48 | void GetSelfController(Kernel::HLERequestContext& ctx) { |
| @@ -146,7 +146,7 @@ private: | |||
| 146 | 146 | ||
| 147 | IPC::ResponseBuilder rb{ctx, 2, 0, 1}; | 147 | IPC::ResponseBuilder rb{ctx, 2, 0, 1}; |
| 148 | rb.Push(RESULT_SUCCESS); | 148 | rb.Push(RESULT_SUCCESS); |
| 149 | rb.PushIpcInterface<ICommonStateGetter>(msg_queue); | 149 | rb.PushIpcInterface<ICommonStateGetter>(system, msg_queue); |
| 150 | } | 150 | } |
| 151 | 151 | ||
| 152 | void GetSelfController(Kernel::HLERequestContext& ctx) { | 152 | void GetSelfController(Kernel::HLERequestContext& ctx) { |
diff --git a/src/core/hle/service/am/applet_oe.cpp b/src/core/hle/service/am/applet_oe.cpp index 6e255fe95..5d53ef113 100644 --- a/src/core/hle/service/am/applet_oe.cpp +++ b/src/core/hle/service/am/applet_oe.cpp | |||
| @@ -80,7 +80,7 @@ private: | |||
| 80 | 80 | ||
| 81 | IPC::ResponseBuilder rb{ctx, 2, 0, 1}; | 81 | IPC::ResponseBuilder rb{ctx, 2, 0, 1}; |
| 82 | rb.Push(RESULT_SUCCESS); | 82 | rb.Push(RESULT_SUCCESS); |
| 83 | rb.PushIpcInterface<ICommonStateGetter>(msg_queue); | 83 | rb.PushIpcInterface<ICommonStateGetter>(system, msg_queue); |
| 84 | } | 84 | } |
| 85 | 85 | ||
| 86 | void GetLibraryAppletCreator(Kernel::HLERequestContext& ctx) { | 86 | void GetLibraryAppletCreator(Kernel::HLERequestContext& ctx) { |
diff --git a/src/core/hle/service/apm/apm.cpp b/src/core/hle/service/apm/apm.cpp index f3c09bbb1..85bbf5988 100644 --- a/src/core/hle/service/apm/apm.cpp +++ b/src/core/hle/service/apm/apm.cpp | |||
| @@ -2,7 +2,6 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include "common/logging/log.h" | ||
| 6 | #include "core/hle/ipc_helpers.h" | 5 | #include "core/hle/ipc_helpers.h" |
| 7 | #include "core/hle/service/apm/apm.h" | 6 | #include "core/hle/service/apm/apm.h" |
| 8 | #include "core/hle/service/apm/interface.h" | 7 | #include "core/hle/service/apm/interface.h" |
| @@ -12,11 +11,15 @@ namespace Service::APM { | |||
| 12 | Module::Module() = default; | 11 | Module::Module() = default; |
| 13 | Module::~Module() = default; | 12 | Module::~Module() = default; |
| 14 | 13 | ||
| 15 | void InstallInterfaces(SM::ServiceManager& service_manager) { | 14 | void InstallInterfaces(Core::System& system) { |
| 16 | auto module_ = std::make_shared<Module>(); | 15 | auto module_ = std::make_shared<Module>(); |
| 17 | std::make_shared<APM>(module_, "apm")->InstallAsService(service_manager); | 16 | std::make_shared<APM>(module_, system.GetAPMController(), "apm") |
| 18 | std::make_shared<APM>(module_, "apm:p")->InstallAsService(service_manager); | 17 | ->InstallAsService(system.ServiceManager()); |
| 19 | std::make_shared<APM_Sys>()->InstallAsService(service_manager); | 18 | std::make_shared<APM>(module_, system.GetAPMController(), "apm:p") |
| 19 | ->InstallAsService(system.ServiceManager()); | ||
| 20 | std::make_shared<APM>(module_, system.GetAPMController(), "apm:am") | ||
| 21 | ->InstallAsService(system.ServiceManager()); | ||
| 22 | std::make_shared<APM_Sys>(system.GetAPMController())->InstallAsService(system.ServiceManager()); | ||
| 20 | } | 23 | } |
| 21 | 24 | ||
| 22 | } // namespace Service::APM | 25 | } // namespace Service::APM |
diff --git a/src/core/hle/service/apm/apm.h b/src/core/hle/service/apm/apm.h index 4d7d5bb7c..cf4c2bb11 100644 --- a/src/core/hle/service/apm/apm.h +++ b/src/core/hle/service/apm/apm.h | |||
| @@ -8,11 +8,6 @@ | |||
| 8 | 8 | ||
| 9 | namespace Service::APM { | 9 | namespace Service::APM { |
| 10 | 10 | ||
| 11 | enum class PerformanceMode : u8 { | ||
| 12 | Handheld = 0, | ||
| 13 | Docked = 1, | ||
| 14 | }; | ||
| 15 | |||
| 16 | class Module final { | 11 | class Module final { |
| 17 | public: | 12 | public: |
| 18 | Module(); | 13 | Module(); |
| @@ -20,6 +15,6 @@ public: | |||
| 20 | }; | 15 | }; |
| 21 | 16 | ||
| 22 | /// Registers all AM services with the specified service manager. | 17 | /// Registers all AM services with the specified service manager. |
| 23 | void InstallInterfaces(SM::ServiceManager& service_manager); | 18 | void InstallInterfaces(Core::System& system); |
| 24 | 19 | ||
| 25 | } // namespace Service::APM | 20 | } // namespace Service::APM |
diff --git a/src/core/hle/service/apm/controller.cpp b/src/core/hle/service/apm/controller.cpp new file mode 100644 index 000000000..4376612eb --- /dev/null +++ b/src/core/hle/service/apm/controller.cpp | |||
| @@ -0,0 +1,68 @@ | |||
| 1 | // Copyright 2019 yuzu emulator team | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/logging/log.h" | ||
| 6 | #include "core/core_timing.h" | ||
| 7 | #include "core/hle/service/apm/controller.h" | ||
| 8 | #include "core/settings.h" | ||
| 9 | |||
| 10 | namespace Service::APM { | ||
| 11 | |||
| 12 | constexpr PerformanceConfiguration DEFAULT_PERFORMANCE_CONFIGURATION = | ||
| 13 | PerformanceConfiguration::Config7; | ||
| 14 | |||
| 15 | Controller::Controller(Core::Timing::CoreTiming& core_timing) | ||
| 16 | : core_timing(core_timing), configs{ | ||
| 17 | {PerformanceMode::Handheld, DEFAULT_PERFORMANCE_CONFIGURATION}, | ||
| 18 | {PerformanceMode::Docked, DEFAULT_PERFORMANCE_CONFIGURATION}, | ||
| 19 | } {} | ||
| 20 | |||
| 21 | Controller::~Controller() = default; | ||
| 22 | |||
| 23 | void Controller::SetPerformanceConfiguration(PerformanceMode mode, | ||
| 24 | PerformanceConfiguration config) { | ||
| 25 | static const std::map<PerformanceConfiguration, u32> PCONFIG_TO_SPEED_MAP{ | ||
| 26 | {PerformanceConfiguration::Config1, 1020}, {PerformanceConfiguration::Config2, 1020}, | ||
| 27 | {PerformanceConfiguration::Config3, 1224}, {PerformanceConfiguration::Config4, 1020}, | ||
| 28 | {PerformanceConfiguration::Config5, 1020}, {PerformanceConfiguration::Config6, 1224}, | ||
| 29 | {PerformanceConfiguration::Config7, 1020}, {PerformanceConfiguration::Config8, 1020}, | ||
| 30 | {PerformanceConfiguration::Config9, 1020}, {PerformanceConfiguration::Config10, 1020}, | ||
| 31 | {PerformanceConfiguration::Config11, 1020}, {PerformanceConfiguration::Config12, 1020}, | ||
| 32 | {PerformanceConfiguration::Config13, 1785}, {PerformanceConfiguration::Config14, 1785}, | ||
| 33 | {PerformanceConfiguration::Config15, 1020}, {PerformanceConfiguration::Config16, 1020}, | ||
| 34 | }; | ||
| 35 | |||
| 36 | SetClockSpeed(PCONFIG_TO_SPEED_MAP.find(config)->second); | ||
| 37 | configs.insert_or_assign(mode, config); | ||
| 38 | } | ||
| 39 | |||
| 40 | void Controller::SetFromCpuBoostMode(CpuBoostMode mode) { | ||
| 41 | constexpr std::array<PerformanceConfiguration, 3> BOOST_MODE_TO_CONFIG_MAP{{ | ||
| 42 | PerformanceConfiguration::Config7, | ||
| 43 | PerformanceConfiguration::Config13, | ||
| 44 | PerformanceConfiguration::Config15, | ||
| 45 | }}; | ||
| 46 | |||
| 47 | SetPerformanceConfiguration(PerformanceMode::Docked, | ||
| 48 | BOOST_MODE_TO_CONFIG_MAP.at(static_cast<u32>(mode))); | ||
| 49 | } | ||
| 50 | |||
| 51 | PerformanceMode Controller::GetCurrentPerformanceMode() { | ||
| 52 | return Settings::values.use_docked_mode ? PerformanceMode::Docked : PerformanceMode::Handheld; | ||
| 53 | } | ||
| 54 | |||
| 55 | PerformanceConfiguration Controller::GetCurrentPerformanceConfiguration(PerformanceMode mode) { | ||
| 56 | if (configs.find(mode) == configs.end()) { | ||
| 57 | configs.insert_or_assign(mode, DEFAULT_PERFORMANCE_CONFIGURATION); | ||
| 58 | } | ||
| 59 | |||
| 60 | return configs[mode]; | ||
| 61 | } | ||
| 62 | |||
| 63 | void Controller::SetClockSpeed(u32 mhz) { | ||
| 64 | LOG_INFO(Service_APM, "called, mhz={:08X}", mhz); | ||
| 65 | // TODO(DarkLordZach): Actually signal core_timing to change clock speed. | ||
| 66 | } | ||
| 67 | |||
| 68 | } // namespace Service::APM | ||
diff --git a/src/core/hle/service/apm/controller.h b/src/core/hle/service/apm/controller.h new file mode 100644 index 000000000..8ac80eaea --- /dev/null +++ b/src/core/hle/service/apm/controller.h | |||
| @@ -0,0 +1,70 @@ | |||
| 1 | // Copyright 2019 yuzu emulator team | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <map> | ||
| 8 | #include "common/common_types.h" | ||
| 9 | |||
| 10 | namespace Core::Timing { | ||
| 11 | class CoreTiming; | ||
| 12 | } | ||
| 13 | |||
| 14 | namespace Service::APM { | ||
| 15 | |||
| 16 | enum class PerformanceConfiguration : u32 { | ||
| 17 | Config1 = 0x00010000, | ||
| 18 | Config2 = 0x00010001, | ||
| 19 | Config3 = 0x00010002, | ||
| 20 | Config4 = 0x00020000, | ||
| 21 | Config5 = 0x00020001, | ||
| 22 | Config6 = 0x00020002, | ||
| 23 | Config7 = 0x00020003, | ||
| 24 | Config8 = 0x00020004, | ||
| 25 | Config9 = 0x00020005, | ||
| 26 | Config10 = 0x00020006, | ||
| 27 | Config11 = 0x92220007, | ||
| 28 | Config12 = 0x92220008, | ||
| 29 | Config13 = 0x92220009, | ||
| 30 | Config14 = 0x9222000A, | ||
| 31 | Config15 = 0x9222000B, | ||
| 32 | Config16 = 0x9222000C, | ||
| 33 | }; | ||
| 34 | |||
| 35 | enum class CpuBoostMode : u32 { | ||
| 36 | Disabled = 0, | ||
| 37 | Full = 1, // CPU + GPU -> Config 13, 14, 15, or 16 | ||
| 38 | Partial = 2, // GPU Only -> Config 15 or 16 | ||
| 39 | }; | ||
| 40 | |||
| 41 | enum class PerformanceMode : u8 { | ||
| 42 | Handheld = 0, | ||
| 43 | Docked = 1, | ||
| 44 | }; | ||
| 45 | |||
| 46 | // Class to manage the state and change of the emulated system performance. | ||
| 47 | // Specifically, this deals with PerformanceMode, which corresponds to the system being docked or | ||
| 48 | // undocked, and PerformanceConfig which specifies the exact CPU, GPU, and Memory clocks to operate | ||
| 49 | // at. Additionally, this manages 'Boost Mode', which allows games to temporarily overclock the | ||
| 50 | // system during times of high load -- this simply maps to different PerformanceConfigs to use. | ||
| 51 | class Controller { | ||
| 52 | public: | ||
| 53 | Controller(Core::Timing::CoreTiming& core_timing); | ||
| 54 | ~Controller(); | ||
| 55 | |||
| 56 | void SetPerformanceConfiguration(PerformanceMode mode, PerformanceConfiguration config); | ||
| 57 | void SetFromCpuBoostMode(CpuBoostMode mode); | ||
| 58 | |||
| 59 | PerformanceMode GetCurrentPerformanceMode(); | ||
| 60 | PerformanceConfiguration GetCurrentPerformanceConfiguration(PerformanceMode mode); | ||
| 61 | |||
| 62 | private: | ||
| 63 | void SetClockSpeed(u32 mhz); | ||
| 64 | |||
| 65 | std::map<PerformanceMode, PerformanceConfiguration> configs; | ||
| 66 | |||
| 67 | Core::Timing::CoreTiming& core_timing; | ||
| 68 | }; | ||
| 69 | |||
| 70 | } // namespace Service::APM | ||
diff --git a/src/core/hle/service/apm/interface.cpp b/src/core/hle/service/apm/interface.cpp index d058c0245..06f0f8edd 100644 --- a/src/core/hle/service/apm/interface.cpp +++ b/src/core/hle/service/apm/interface.cpp | |||
| @@ -5,43 +5,32 @@ | |||
| 5 | #include "common/logging/log.h" | 5 | #include "common/logging/log.h" |
| 6 | #include "core/hle/ipc_helpers.h" | 6 | #include "core/hle/ipc_helpers.h" |
| 7 | #include "core/hle/service/apm/apm.h" | 7 | #include "core/hle/service/apm/apm.h" |
| 8 | #include "core/hle/service/apm/controller.h" | ||
| 8 | #include "core/hle/service/apm/interface.h" | 9 | #include "core/hle/service/apm/interface.h" |
| 9 | 10 | ||
| 10 | namespace Service::APM { | 11 | namespace Service::APM { |
| 11 | 12 | ||
| 12 | class ISession final : public ServiceFramework<ISession> { | 13 | class ISession final : public ServiceFramework<ISession> { |
| 13 | public: | 14 | public: |
| 14 | ISession() : ServiceFramework("ISession") { | 15 | ISession(Controller& controller) : ServiceFramework("ISession"), controller(controller) { |
| 15 | static const FunctionInfo functions[] = { | 16 | static const FunctionInfo functions[] = { |
| 16 | {0, &ISession::SetPerformanceConfiguration, "SetPerformanceConfiguration"}, | 17 | {0, &ISession::SetPerformanceConfiguration, "SetPerformanceConfiguration"}, |
| 17 | {1, &ISession::GetPerformanceConfiguration, "GetPerformanceConfiguration"}, | 18 | {1, &ISession::GetPerformanceConfiguration, "GetPerformanceConfiguration"}, |
| 19 | {2, nullptr, "SetCpuOverclockEnabled"}, | ||
| 18 | }; | 20 | }; |
| 19 | RegisterHandlers(functions); | 21 | RegisterHandlers(functions); |
| 20 | } | 22 | } |
| 21 | 23 | ||
| 22 | private: | 24 | private: |
| 23 | enum class PerformanceConfiguration : u32 { | ||
| 24 | Config1 = 0x00010000, | ||
| 25 | Config2 = 0x00010001, | ||
| 26 | Config3 = 0x00010002, | ||
| 27 | Config4 = 0x00020000, | ||
| 28 | Config5 = 0x00020001, | ||
| 29 | Config6 = 0x00020002, | ||
| 30 | Config7 = 0x00020003, | ||
| 31 | Config8 = 0x00020004, | ||
| 32 | Config9 = 0x00020005, | ||
| 33 | Config10 = 0x00020006, | ||
| 34 | Config11 = 0x92220007, | ||
| 35 | Config12 = 0x92220008, | ||
| 36 | }; | ||
| 37 | |||
| 38 | void SetPerformanceConfiguration(Kernel::HLERequestContext& ctx) { | 25 | void SetPerformanceConfiguration(Kernel::HLERequestContext& ctx) { |
| 39 | IPC::RequestParser rp{ctx}; | 26 | IPC::RequestParser rp{ctx}; |
| 40 | 27 | ||
| 41 | auto mode = static_cast<PerformanceMode>(rp.Pop<u32>()); | 28 | const auto mode = rp.PopEnum<PerformanceMode>(); |
| 42 | u32 config = rp.Pop<u32>(); | 29 | const auto config = rp.PopEnum<PerformanceConfiguration>(); |
| 43 | LOG_WARNING(Service_APM, "(STUBBED) called mode={} config={}", static_cast<u32>(mode), | 30 | LOG_DEBUG(Service_APM, "called mode={} config={}", static_cast<u32>(mode), |
| 44 | config); | 31 | static_cast<u32>(config)); |
| 32 | |||
| 33 | controller.SetPerformanceConfiguration(mode, config); | ||
| 45 | 34 | ||
| 46 | IPC::ResponseBuilder rb{ctx, 2}; | 35 | IPC::ResponseBuilder rb{ctx, 2}; |
| 47 | rb.Push(RESULT_SUCCESS); | 36 | rb.Push(RESULT_SUCCESS); |
| @@ -50,20 +39,23 @@ private: | |||
| 50 | void GetPerformanceConfiguration(Kernel::HLERequestContext& ctx) { | 39 | void GetPerformanceConfiguration(Kernel::HLERequestContext& ctx) { |
| 51 | IPC::RequestParser rp{ctx}; | 40 | IPC::RequestParser rp{ctx}; |
| 52 | 41 | ||
| 53 | auto mode = static_cast<PerformanceMode>(rp.Pop<u32>()); | 42 | const auto mode = rp.PopEnum<PerformanceMode>(); |
| 54 | LOG_WARNING(Service_APM, "(STUBBED) called mode={}", static_cast<u32>(mode)); | 43 | LOG_DEBUG(Service_APM, "called mode={}", static_cast<u32>(mode)); |
| 55 | 44 | ||
| 56 | IPC::ResponseBuilder rb{ctx, 3}; | 45 | IPC::ResponseBuilder rb{ctx, 3}; |
| 57 | rb.Push(RESULT_SUCCESS); | 46 | rb.Push(RESULT_SUCCESS); |
| 58 | rb.Push<u32>(static_cast<u32>(PerformanceConfiguration::Config1)); | 47 | rb.PushEnum(controller.GetCurrentPerformanceConfiguration(mode)); |
| 59 | } | 48 | } |
| 49 | |||
| 50 | Controller& controller; | ||
| 60 | }; | 51 | }; |
| 61 | 52 | ||
| 62 | APM::APM(std::shared_ptr<Module> apm, const char* name) | 53 | APM::APM(std::shared_ptr<Module> apm, Controller& controller, const char* name) |
| 63 | : ServiceFramework(name), apm(std::move(apm)) { | 54 | : ServiceFramework(name), apm(std::move(apm)), controller(controller) { |
| 64 | static const FunctionInfo functions[] = { | 55 | static const FunctionInfo functions[] = { |
| 65 | {0, &APM::OpenSession, "OpenSession"}, | 56 | {0, &APM::OpenSession, "OpenSession"}, |
| 66 | {1, nullptr, "GetPerformanceMode"}, | 57 | {1, &APM::GetPerformanceMode, "GetPerformanceMode"}, |
| 58 | {6, nullptr, "IsCpuOverclockEnabled"}, | ||
| 67 | }; | 59 | }; |
| 68 | RegisterHandlers(functions); | 60 | RegisterHandlers(functions); |
| 69 | } | 61 | } |
| @@ -75,10 +67,17 @@ void APM::OpenSession(Kernel::HLERequestContext& ctx) { | |||
| 75 | 67 | ||
| 76 | IPC::ResponseBuilder rb{ctx, 2, 0, 1}; | 68 | IPC::ResponseBuilder rb{ctx, 2, 0, 1}; |
| 77 | rb.Push(RESULT_SUCCESS); | 69 | rb.Push(RESULT_SUCCESS); |
| 78 | rb.PushIpcInterface<ISession>(); | 70 | rb.PushIpcInterface<ISession>(controller); |
| 71 | } | ||
| 72 | |||
| 73 | void APM::GetPerformanceMode(Kernel::HLERequestContext& ctx) { | ||
| 74 | LOG_DEBUG(Service_APM, "called"); | ||
| 75 | |||
| 76 | IPC::ResponseBuilder rb{ctx, 2}; | ||
| 77 | rb.PushEnum(controller.GetCurrentPerformanceMode()); | ||
| 79 | } | 78 | } |
| 80 | 79 | ||
| 81 | APM_Sys::APM_Sys() : ServiceFramework{"apm:sys"} { | 80 | APM_Sys::APM_Sys(Controller& controller) : ServiceFramework{"apm:sys"}, controller(controller) { |
| 82 | // clang-format off | 81 | // clang-format off |
| 83 | static const FunctionInfo functions[] = { | 82 | static const FunctionInfo functions[] = { |
| 84 | {0, nullptr, "RequestPerformanceMode"}, | 83 | {0, nullptr, "RequestPerformanceMode"}, |
| @@ -87,8 +86,8 @@ APM_Sys::APM_Sys() : ServiceFramework{"apm:sys"} { | |||
| 87 | {3, nullptr, "GetLastThrottlingState"}, | 86 | {3, nullptr, "GetLastThrottlingState"}, |
| 88 | {4, nullptr, "ClearLastThrottlingState"}, | 87 | {4, nullptr, "ClearLastThrottlingState"}, |
| 89 | {5, nullptr, "LoadAndApplySettings"}, | 88 | {5, nullptr, "LoadAndApplySettings"}, |
| 90 | {6, nullptr, "SetCpuBoostMode"}, | 89 | {6, &APM_Sys::SetCpuBoostMode, "SetCpuBoostMode"}, |
| 91 | {7, nullptr, "GetCurrentPerformanceConfiguration"}, | 90 | {7, &APM_Sys::GetCurrentPerformanceConfiguration, "GetCurrentPerformanceConfiguration"}, |
| 92 | }; | 91 | }; |
| 93 | // clang-format on | 92 | // clang-format on |
| 94 | 93 | ||
| @@ -102,7 +101,28 @@ void APM_Sys::GetPerformanceEvent(Kernel::HLERequestContext& ctx) { | |||
| 102 | 101 | ||
| 103 | IPC::ResponseBuilder rb{ctx, 2, 0, 1}; | 102 | IPC::ResponseBuilder rb{ctx, 2, 0, 1}; |
| 104 | rb.Push(RESULT_SUCCESS); | 103 | rb.Push(RESULT_SUCCESS); |
| 105 | rb.PushIpcInterface<ISession>(); | 104 | rb.PushIpcInterface<ISession>(controller); |
| 105 | } | ||
| 106 | |||
| 107 | void APM_Sys::SetCpuBoostMode(Kernel::HLERequestContext& ctx) { | ||
| 108 | IPC::RequestParser rp{ctx}; | ||
| 109 | const auto mode = rp.PopEnum<CpuBoostMode>(); | ||
| 110 | |||
| 111 | LOG_DEBUG(Service_APM, "called, mode={:08X}", static_cast<u32>(mode)); | ||
| 112 | |||
| 113 | controller.SetFromCpuBoostMode(mode); | ||
| 114 | |||
| 115 | IPC::ResponseBuilder rb{ctx, 2}; | ||
| 116 | rb.Push(RESULT_SUCCESS); | ||
| 117 | } | ||
| 118 | |||
| 119 | void APM_Sys::GetCurrentPerformanceConfiguration(Kernel::HLERequestContext& ctx) { | ||
| 120 | LOG_DEBUG(Service_APM, "called"); | ||
| 121 | |||
| 122 | IPC::ResponseBuilder rb{ctx, 3}; | ||
| 123 | rb.Push(RESULT_SUCCESS); | ||
| 124 | rb.PushEnum( | ||
| 125 | controller.GetCurrentPerformanceConfiguration(controller.GetCurrentPerformanceMode())); | ||
| 106 | } | 126 | } |
| 107 | 127 | ||
| 108 | } // namespace Service::APM | 128 | } // namespace Service::APM |
diff --git a/src/core/hle/service/apm/interface.h b/src/core/hle/service/apm/interface.h index 773541aa4..de1b89437 100644 --- a/src/core/hle/service/apm/interface.h +++ b/src/core/hle/service/apm/interface.h | |||
| @@ -8,24 +8,34 @@ | |||
| 8 | 8 | ||
| 9 | namespace Service::APM { | 9 | namespace Service::APM { |
| 10 | 10 | ||
| 11 | class Controller; | ||
| 12 | class Module; | ||
| 13 | |||
| 11 | class APM final : public ServiceFramework<APM> { | 14 | class APM final : public ServiceFramework<APM> { |
| 12 | public: | 15 | public: |
| 13 | explicit APM(std::shared_ptr<Module> apm, const char* name); | 16 | explicit APM(std::shared_ptr<Module> apm, Controller& controller, const char* name); |
| 14 | ~APM() override; | 17 | ~APM() override; |
| 15 | 18 | ||
| 16 | private: | 19 | private: |
| 17 | void OpenSession(Kernel::HLERequestContext& ctx); | 20 | void OpenSession(Kernel::HLERequestContext& ctx); |
| 21 | void GetPerformanceMode(Kernel::HLERequestContext& ctx); | ||
| 18 | 22 | ||
| 19 | std::shared_ptr<Module> apm; | 23 | std::shared_ptr<Module> apm; |
| 24 | Controller& controller; | ||
| 20 | }; | 25 | }; |
| 21 | 26 | ||
| 22 | class APM_Sys final : public ServiceFramework<APM_Sys> { | 27 | class APM_Sys final : public ServiceFramework<APM_Sys> { |
| 23 | public: | 28 | public: |
| 24 | explicit APM_Sys(); | 29 | explicit APM_Sys(Controller& controller); |
| 25 | ~APM_Sys() override; | 30 | ~APM_Sys() override; |
| 26 | 31 | ||
| 32 | void SetCpuBoostMode(Kernel::HLERequestContext& ctx); | ||
| 33 | |||
| 27 | private: | 34 | private: |
| 28 | void GetPerformanceEvent(Kernel::HLERequestContext& ctx); | 35 | void GetPerformanceEvent(Kernel::HLERequestContext& ctx); |
| 36 | void GetCurrentPerformanceConfiguration(Kernel::HLERequestContext& ctx); | ||
| 37 | |||
| 38 | Controller& controller; | ||
| 29 | }; | 39 | }; |
| 30 | 40 | ||
| 31 | } // namespace Service::APM | 41 | } // namespace Service::APM |
diff --git a/src/core/hle/service/audio/audren_u.cpp b/src/core/hle/service/audio/audren_u.cpp index 3711e1ea1..679299f68 100644 --- a/src/core/hle/service/audio/audren_u.cpp +++ b/src/core/hle/service/audio/audren_u.cpp | |||
| @@ -25,7 +25,8 @@ namespace Service::Audio { | |||
| 25 | 25 | ||
| 26 | class IAudioRenderer final : public ServiceFramework<IAudioRenderer> { | 26 | class IAudioRenderer final : public ServiceFramework<IAudioRenderer> { |
| 27 | public: | 27 | public: |
| 28 | explicit IAudioRenderer(AudioCore::AudioRendererParameter audren_params) | 28 | explicit IAudioRenderer(AudioCore::AudioRendererParameter audren_params, |
| 29 | const std::size_t instance_number) | ||
| 29 | : ServiceFramework("IAudioRenderer") { | 30 | : ServiceFramework("IAudioRenderer") { |
| 30 | // clang-format off | 31 | // clang-format off |
| 31 | static const FunctionInfo functions[] = { | 32 | static const FunctionInfo functions[] = { |
| @@ -48,8 +49,8 @@ public: | |||
| 48 | auto& system = Core::System::GetInstance(); | 49 | auto& system = Core::System::GetInstance(); |
| 49 | system_event = Kernel::WritableEvent::CreateEventPair( | 50 | system_event = Kernel::WritableEvent::CreateEventPair( |
| 50 | system.Kernel(), Kernel::ResetType::Manual, "IAudioRenderer:SystemEvent"); | 51 | system.Kernel(), Kernel::ResetType::Manual, "IAudioRenderer:SystemEvent"); |
| 51 | renderer = std::make_unique<AudioCore::AudioRenderer>(system.CoreTiming(), audren_params, | 52 | renderer = std::make_unique<AudioCore::AudioRenderer>( |
| 52 | system_event.writable); | 53 | system.CoreTiming(), audren_params, system_event.writable, instance_number); |
| 53 | } | 54 | } |
| 54 | 55 | ||
| 55 | private: | 56 | private: |
| @@ -607,7 +608,7 @@ void AudRenU::OpenAudioRendererImpl(Kernel::HLERequestContext& ctx) { | |||
| 607 | IPC::ResponseBuilder rb{ctx, 2, 0, 1}; | 608 | IPC::ResponseBuilder rb{ctx, 2, 0, 1}; |
| 608 | 609 | ||
| 609 | rb.Push(RESULT_SUCCESS); | 610 | rb.Push(RESULT_SUCCESS); |
| 610 | rb.PushIpcInterface<IAudioRenderer>(params); | 611 | rb.PushIpcInterface<IAudioRenderer>(params, audren_instance_count++); |
| 611 | } | 612 | } |
| 612 | 613 | ||
| 613 | bool AudRenU::IsFeatureSupported(AudioFeatures feature, u32_le revision) const { | 614 | bool AudRenU::IsFeatureSupported(AudioFeatures feature, u32_le revision) const { |
diff --git a/src/core/hle/service/audio/audren_u.h b/src/core/hle/service/audio/audren_u.h index 1d3c8df61..49f2733cf 100644 --- a/src/core/hle/service/audio/audren_u.h +++ b/src/core/hle/service/audio/audren_u.h | |||
| @@ -33,6 +33,7 @@ private: | |||
| 33 | }; | 33 | }; |
| 34 | 34 | ||
| 35 | bool IsFeatureSupported(AudioFeatures feature, u32_le revision) const; | 35 | bool IsFeatureSupported(AudioFeatures feature, u32_le revision) const; |
| 36 | std::size_t audren_instance_count = 0; | ||
| 36 | }; | 37 | }; |
| 37 | 38 | ||
| 38 | } // namespace Service::Audio | 39 | } // namespace Service::Audio |
diff --git a/src/core/hle/service/filesystem/filesystem.cpp b/src/core/hle/service/filesystem/filesystem.cpp index 1ebfeb4bf..8ce110dd1 100644 --- a/src/core/hle/service/filesystem/filesystem.cpp +++ b/src/core/hle/service/filesystem/filesystem.cpp | |||
| @@ -472,12 +472,12 @@ void CreateFactories(FileSys::VfsFilesystem& vfs, bool overwrite) { | |||
| 472 | } | 472 | } |
| 473 | } | 473 | } |
| 474 | 474 | ||
| 475 | void InstallInterfaces(SM::ServiceManager& service_manager, FileSys::VfsFilesystem& vfs) { | 475 | void InstallInterfaces(Core::System& system) { |
| 476 | romfs_factory = nullptr; | 476 | romfs_factory = nullptr; |
| 477 | CreateFactories(vfs, false); | 477 | CreateFactories(*system.GetFilesystem(), false); |
| 478 | std::make_shared<FSP_LDR>()->InstallAsService(service_manager); | 478 | std::make_shared<FSP_LDR>()->InstallAsService(system.ServiceManager()); |
| 479 | std::make_shared<FSP_PR>()->InstallAsService(service_manager); | 479 | std::make_shared<FSP_PR>()->InstallAsService(system.ServiceManager()); |
| 480 | std::make_shared<FSP_SRV>()->InstallAsService(service_manager); | 480 | std::make_shared<FSP_SRV>(system.GetReporter())->InstallAsService(system.ServiceManager()); |
| 481 | } | 481 | } |
| 482 | 482 | ||
| 483 | } // namespace Service::FileSystem | 483 | } // namespace Service::FileSystem |
diff --git a/src/core/hle/service/filesystem/filesystem.h b/src/core/hle/service/filesystem/filesystem.h index 6481f237c..3849dd89e 100644 --- a/src/core/hle/service/filesystem/filesystem.h +++ b/src/core/hle/service/filesystem/filesystem.h | |||
| @@ -65,7 +65,7 @@ FileSys::VirtualDir GetModificationDumpRoot(u64 title_id); | |||
| 65 | // above is called. | 65 | // above is called. |
| 66 | void CreateFactories(FileSys::VfsFilesystem& vfs, bool overwrite = true); | 66 | void CreateFactories(FileSys::VfsFilesystem& vfs, bool overwrite = true); |
| 67 | 67 | ||
| 68 | void InstallInterfaces(SM::ServiceManager& service_manager, FileSys::VfsFilesystem& vfs); | 68 | void InstallInterfaces(Core::System& system); |
| 69 | 69 | ||
| 70 | // A class that wraps a VfsDirectory with methods that return ResultVal and ResultCode instead of | 70 | // A class that wraps a VfsDirectory with methods that return ResultVal and ResultCode instead of |
| 71 | // pointers and booleans. This makes using a VfsDirectory with switch services much easier and | 71 | // pointers and booleans. This makes using a VfsDirectory with switch services much easier and |
diff --git a/src/core/hle/service/filesystem/fsp_srv.cpp b/src/core/hle/service/filesystem/fsp_srv.cpp index e7df8fd98..d3cd46a9b 100644 --- a/src/core/hle/service/filesystem/fsp_srv.cpp +++ b/src/core/hle/service/filesystem/fsp_srv.cpp | |||
| @@ -26,6 +26,7 @@ | |||
| 26 | #include "core/hle/kernel/process.h" | 26 | #include "core/hle/kernel/process.h" |
| 27 | #include "core/hle/service/filesystem/filesystem.h" | 27 | #include "core/hle/service/filesystem/filesystem.h" |
| 28 | #include "core/hle/service/filesystem/fsp_srv.h" | 28 | #include "core/hle/service/filesystem/fsp_srv.h" |
| 29 | #include "core/reporter.h" | ||
| 29 | 30 | ||
| 30 | namespace Service::FileSystem { | 31 | namespace Service::FileSystem { |
| 31 | 32 | ||
| @@ -613,7 +614,7 @@ private: | |||
| 613 | u64 next_entry_index = 0; | 614 | u64 next_entry_index = 0; |
| 614 | }; | 615 | }; |
| 615 | 616 | ||
| 616 | FSP_SRV::FSP_SRV() : ServiceFramework("fsp-srv") { | 617 | FSP_SRV::FSP_SRV(const Core::Reporter& reporter) : ServiceFramework("fsp-srv"), reporter(reporter) { |
| 617 | // clang-format off | 618 | // clang-format off |
| 618 | static const FunctionInfo functions[] = { | 619 | static const FunctionInfo functions[] = { |
| 619 | {0, nullptr, "OpenFileSystem"}, | 620 | {0, nullptr, "OpenFileSystem"}, |
| @@ -710,14 +711,14 @@ FSP_SRV::FSP_SRV() : ServiceFramework("fsp-srv") { | |||
| 710 | {1001, nullptr, "SetSaveDataSize"}, | 711 | {1001, nullptr, "SetSaveDataSize"}, |
| 711 | {1002, nullptr, "SetSaveDataRootPath"}, | 712 | {1002, nullptr, "SetSaveDataRootPath"}, |
| 712 | {1003, nullptr, "DisableAutoSaveDataCreation"}, | 713 | {1003, nullptr, "DisableAutoSaveDataCreation"}, |
| 713 | {1004, nullptr, "SetGlobalAccessLogMode"}, | 714 | {1004, &FSP_SRV::SetGlobalAccessLogMode, "SetGlobalAccessLogMode"}, |
| 714 | {1005, &FSP_SRV::GetGlobalAccessLogMode, "GetGlobalAccessLogMode"}, | 715 | {1005, &FSP_SRV::GetGlobalAccessLogMode, "GetGlobalAccessLogMode"}, |
| 715 | {1006, nullptr, "OutputAccessLogToSdCard"}, | 716 | {1006, &FSP_SRV::OutputAccessLogToSdCard, "OutputAccessLogToSdCard"}, |
| 716 | {1007, nullptr, "RegisterUpdatePartition"}, | 717 | {1007, nullptr, "RegisterUpdatePartition"}, |
| 717 | {1008, nullptr, "OpenRegisteredUpdatePartition"}, | 718 | {1008, nullptr, "OpenRegisteredUpdatePartition"}, |
| 718 | {1009, nullptr, "GetAndClearMemoryReportInfo"}, | 719 | {1009, nullptr, "GetAndClearMemoryReportInfo"}, |
| 719 | {1010, nullptr, "SetDataStorageRedirectTarget"}, | 720 | {1010, nullptr, "SetDataStorageRedirectTarget"}, |
| 720 | {1011, nullptr, "OutputAccessLogToSdCard2"}, | 721 | {1011, &FSP_SRV::GetAccessLogVersionInfo, "GetAccessLogVersionInfo"}, |
| 721 | {1100, nullptr, "OverrideSaveDataTransferTokenSignVerificationKey"}, | 722 | {1100, nullptr, "OverrideSaveDataTransferTokenSignVerificationKey"}, |
| 722 | {1110, nullptr, "CorruptSaveDataFileSystemBySaveDataSpaceId2"}, | 723 | {1110, nullptr, "CorruptSaveDataFileSystemBySaveDataSpaceId2"}, |
| 723 | {1200, nullptr, "OpenMultiCommitManager"}, | 724 | {1200, nullptr, "OpenMultiCommitManager"}, |
| @@ -814,21 +815,22 @@ void FSP_SRV::OpenSaveDataInfoReaderBySaveDataSpaceId(Kernel::HLERequestContext& | |||
| 814 | rb.PushIpcInterface<ISaveDataInfoReader>(std::make_shared<ISaveDataInfoReader>(space)); | 815 | rb.PushIpcInterface<ISaveDataInfoReader>(std::make_shared<ISaveDataInfoReader>(space)); |
| 815 | } | 816 | } |
| 816 | 817 | ||
| 817 | void FSP_SRV::GetGlobalAccessLogMode(Kernel::HLERequestContext& ctx) { | 818 | void FSP_SRV::SetGlobalAccessLogMode(Kernel::HLERequestContext& ctx) { |
| 818 | LOG_WARNING(Service_FS, "(STUBBED) called"); | 819 | IPC::RequestParser rp{ctx}; |
| 820 | log_mode = rp.PopEnum<LogMode>(); | ||
| 819 | 821 | ||
| 820 | enum class LogMode : u32 { | 822 | LOG_DEBUG(Service_FS, "called, log_mode={:08X}", static_cast<u32>(log_mode)); |
| 821 | Off, | 823 | |
| 822 | Log, | 824 | IPC::ResponseBuilder rb{ctx, 2}; |
| 823 | RedirectToSdCard, | 825 | rb.Push(RESULT_SUCCESS); |
| 824 | LogToSdCard = Log | RedirectToSdCard, | 826 | } |
| 825 | }; | 827 | |
| 828 | void FSP_SRV::GetGlobalAccessLogMode(Kernel::HLERequestContext& ctx) { | ||
| 829 | LOG_DEBUG(Service_FS, "called"); | ||
| 826 | 830 | ||
| 827 | // Given we always want to receive logging information, | ||
| 828 | // we always specify logging as enabled. | ||
| 829 | IPC::ResponseBuilder rb{ctx, 3}; | 831 | IPC::ResponseBuilder rb{ctx, 3}; |
| 830 | rb.Push(RESULT_SUCCESS); | 832 | rb.Push(RESULT_SUCCESS); |
| 831 | rb.PushEnum(LogMode::Log); | 833 | rb.PushEnum(log_mode); |
| 832 | } | 834 | } |
| 833 | 835 | ||
| 834 | void FSP_SRV::OpenDataStorageByCurrentProcess(Kernel::HLERequestContext& ctx) { | 836 | void FSP_SRV::OpenDataStorageByCurrentProcess(Kernel::HLERequestContext& ctx) { |
| @@ -902,4 +904,26 @@ void FSP_SRV::OpenPatchDataStorageByCurrentProcess(Kernel::HLERequestContext& ct | |||
| 902 | rb.Push(FileSys::ERROR_ENTITY_NOT_FOUND); | 904 | rb.Push(FileSys::ERROR_ENTITY_NOT_FOUND); |
| 903 | } | 905 | } |
| 904 | 906 | ||
| 907 | void FSP_SRV::OutputAccessLogToSdCard(Kernel::HLERequestContext& ctx) { | ||
| 908 | const auto raw = ctx.ReadBuffer(); | ||
| 909 | auto log = Common::StringFromFixedZeroTerminatedBuffer( | ||
| 910 | reinterpret_cast<const char*>(raw.data()), raw.size()); | ||
| 911 | |||
| 912 | LOG_DEBUG(Service_FS, "called, log='{}'", log); | ||
| 913 | |||
| 914 | reporter.SaveFilesystemAccessReport(log_mode, std::move(log)); | ||
| 915 | |||
| 916 | IPC::ResponseBuilder rb{ctx, 2}; | ||
| 917 | rb.Push(RESULT_SUCCESS); | ||
| 918 | } | ||
| 919 | |||
| 920 | void FSP_SRV::GetAccessLogVersionInfo(Kernel::HLERequestContext& ctx) { | ||
| 921 | LOG_DEBUG(Service_FS, "called"); | ||
| 922 | |||
| 923 | IPC::ResponseBuilder rb{ctx, 4}; | ||
| 924 | rb.Push(RESULT_SUCCESS); | ||
| 925 | rb.PushEnum(AccessLogVersion::Latest); | ||
| 926 | rb.Push(access_log_program_index); | ||
| 927 | } | ||
| 928 | |||
| 905 | } // namespace Service::FileSystem | 929 | } // namespace Service::FileSystem |
diff --git a/src/core/hle/service/filesystem/fsp_srv.h b/src/core/hle/service/filesystem/fsp_srv.h index d7572ba7a..b5486a193 100644 --- a/src/core/hle/service/filesystem/fsp_srv.h +++ b/src/core/hle/service/filesystem/fsp_srv.h | |||
| @@ -7,15 +7,32 @@ | |||
| 7 | #include <memory> | 7 | #include <memory> |
| 8 | #include "core/hle/service/service.h" | 8 | #include "core/hle/service/service.h" |
| 9 | 9 | ||
| 10 | namespace Core { | ||
| 11 | class Reporter; | ||
| 12 | } | ||
| 13 | |||
| 10 | namespace FileSys { | 14 | namespace FileSys { |
| 11 | class FileSystemBackend; | 15 | class FileSystemBackend; |
| 12 | } | 16 | } |
| 13 | 17 | ||
| 14 | namespace Service::FileSystem { | 18 | namespace Service::FileSystem { |
| 15 | 19 | ||
| 20 | enum class AccessLogVersion : u32 { | ||
| 21 | V7_0_0 = 2, | ||
| 22 | |||
| 23 | Latest = V7_0_0, | ||
| 24 | }; | ||
| 25 | |||
| 26 | enum class LogMode : u32 { | ||
| 27 | Off, | ||
| 28 | Log, | ||
| 29 | RedirectToSdCard, | ||
| 30 | LogToSdCard = Log | RedirectToSdCard, | ||
| 31 | }; | ||
| 32 | |||
| 16 | class FSP_SRV final : public ServiceFramework<FSP_SRV> { | 33 | class FSP_SRV final : public ServiceFramework<FSP_SRV> { |
| 17 | public: | 34 | public: |
| 18 | explicit FSP_SRV(); | 35 | explicit FSP_SRV(const Core::Reporter& reporter); |
| 19 | ~FSP_SRV() override; | 36 | ~FSP_SRV() override; |
| 20 | 37 | ||
| 21 | private: | 38 | private: |
| @@ -26,13 +43,20 @@ private: | |||
| 26 | void OpenSaveDataFileSystem(Kernel::HLERequestContext& ctx); | 43 | void OpenSaveDataFileSystem(Kernel::HLERequestContext& ctx); |
| 27 | void OpenReadOnlySaveDataFileSystem(Kernel::HLERequestContext& ctx); | 44 | void OpenReadOnlySaveDataFileSystem(Kernel::HLERequestContext& ctx); |
| 28 | void OpenSaveDataInfoReaderBySaveDataSpaceId(Kernel::HLERequestContext& ctx); | 45 | void OpenSaveDataInfoReaderBySaveDataSpaceId(Kernel::HLERequestContext& ctx); |
| 46 | void SetGlobalAccessLogMode(Kernel::HLERequestContext& ctx); | ||
| 29 | void GetGlobalAccessLogMode(Kernel::HLERequestContext& ctx); | 47 | void GetGlobalAccessLogMode(Kernel::HLERequestContext& ctx); |
| 30 | void OpenDataStorageByCurrentProcess(Kernel::HLERequestContext& ctx); | 48 | void OpenDataStorageByCurrentProcess(Kernel::HLERequestContext& ctx); |
| 31 | void OpenDataStorageByDataId(Kernel::HLERequestContext& ctx); | 49 | void OpenDataStorageByDataId(Kernel::HLERequestContext& ctx); |
| 32 | void OpenPatchDataStorageByCurrentProcess(Kernel::HLERequestContext& ctx); | 50 | void OpenPatchDataStorageByCurrentProcess(Kernel::HLERequestContext& ctx); |
| 51 | void OutputAccessLogToSdCard(Kernel::HLERequestContext& ctx); | ||
| 52 | void GetAccessLogVersionInfo(Kernel::HLERequestContext& ctx); | ||
| 33 | 53 | ||
| 34 | FileSys::VirtualFile romfs; | 54 | FileSys::VirtualFile romfs; |
| 35 | u64 current_process_id = 0; | 55 | u64 current_process_id = 0; |
| 56 | u32 access_log_program_index = 0; | ||
| 57 | LogMode log_mode = LogMode::LogToSdCard; | ||
| 58 | |||
| 59 | const Core::Reporter& reporter; | ||
| 36 | }; | 60 | }; |
| 37 | 61 | ||
| 38 | } // namespace Service::FileSystem | 62 | } // namespace Service::FileSystem |
diff --git a/src/core/hle/service/friend/friend.cpp b/src/core/hle/service/friend/friend.cpp index dec541f2e..d1ec12ef9 100644 --- a/src/core/hle/service/friend/friend.cpp +++ b/src/core/hle/service/friend/friend.cpp | |||
| @@ -22,7 +22,7 @@ public: | |||
| 22 | {0, nullptr, "GetCompletionEvent"}, | 22 | {0, nullptr, "GetCompletionEvent"}, |
| 23 | {1, nullptr, "Cancel"}, | 23 | {1, nullptr, "Cancel"}, |
| 24 | {10100, nullptr, "GetFriendListIds"}, | 24 | {10100, nullptr, "GetFriendListIds"}, |
| 25 | {10101, nullptr, "GetFriendList"}, | 25 | {10101, &IFriendService::GetFriendList, "GetFriendList"}, |
| 26 | {10102, nullptr, "UpdateFriendInfo"}, | 26 | {10102, nullptr, "UpdateFriendInfo"}, |
| 27 | {10110, nullptr, "GetFriendProfileImage"}, | 27 | {10110, nullptr, "GetFriendProfileImage"}, |
| 28 | {10200, nullptr, "SendFriendRequestForApplication"}, | 28 | {10200, nullptr, "SendFriendRequestForApplication"}, |
| @@ -99,6 +99,23 @@ public: | |||
| 99 | } | 99 | } |
| 100 | 100 | ||
| 101 | private: | 101 | private: |
| 102 | enum class PresenceFilter : u32 { | ||
| 103 | None = 0, | ||
| 104 | Online = 1, | ||
| 105 | OnlinePlay = 2, | ||
| 106 | OnlineOrOnlinePlay = 3, | ||
| 107 | }; | ||
| 108 | |||
| 109 | struct SizedFriendFilter { | ||
| 110 | PresenceFilter presence; | ||
| 111 | u8 is_favorite; | ||
| 112 | u8 same_app; | ||
| 113 | u8 same_app_played; | ||
| 114 | u8 arbitary_app_played; | ||
| 115 | u64 group_id; | ||
| 116 | }; | ||
| 117 | static_assert(sizeof(SizedFriendFilter) == 0x10, "SizedFriendFilter is an invalid size"); | ||
| 118 | |||
| 102 | void DeclareCloseOnlinePlaySession(Kernel::HLERequestContext& ctx) { | 119 | void DeclareCloseOnlinePlaySession(Kernel::HLERequestContext& ctx) { |
| 103 | // Stub used by Splatoon 2 | 120 | // Stub used by Splatoon 2 |
| 104 | LOG_WARNING(Service_ACC, "(STUBBED) called"); | 121 | LOG_WARNING(Service_ACC, "(STUBBED) called"); |
| @@ -112,6 +129,22 @@ private: | |||
| 112 | IPC::ResponseBuilder rb{ctx, 2}; | 129 | IPC::ResponseBuilder rb{ctx, 2}; |
| 113 | rb.Push(RESULT_SUCCESS); | 130 | rb.Push(RESULT_SUCCESS); |
| 114 | } | 131 | } |
| 132 | |||
| 133 | void GetFriendList(Kernel::HLERequestContext& ctx) { | ||
| 134 | IPC::RequestParser rp{ctx}; | ||
| 135 | const auto friend_offset = rp.Pop<u32>(); | ||
| 136 | const auto uuid = rp.PopRaw<Common::UUID>(); | ||
| 137 | [[maybe_unused]] const auto filter = rp.PopRaw<SizedFriendFilter>(); | ||
| 138 | const auto pid = rp.Pop<u64>(); | ||
| 139 | LOG_WARNING(Service_ACC, "(STUBBED) called, offset={}, uuid={}, pid={}", friend_offset, | ||
| 140 | uuid.Format(), pid); | ||
| 141 | |||
| 142 | IPC::ResponseBuilder rb{ctx, 3}; | ||
| 143 | rb.Push(RESULT_SUCCESS); | ||
| 144 | |||
| 145 | rb.Push<u32>(0); // Friend count | ||
| 146 | // TODO(ogniK): Return a buffer of u64s which are the "NetworkServiceAccountId" | ||
| 147 | } | ||
| 115 | }; | 148 | }; |
| 116 | 149 | ||
| 117 | class INotificationService final : public ServiceFramework<INotificationService> { | 150 | class INotificationService final : public ServiceFramework<INotificationService> { |
diff --git a/src/core/hle/service/hid/controllers/npad.cpp b/src/core/hle/service/hid/controllers/npad.cpp index fdd6d79a2..1e81f776f 100644 --- a/src/core/hle/service/hid/controllers/npad.cpp +++ b/src/core/hle/service/hid/controllers/npad.cpp | |||
| @@ -548,6 +548,37 @@ void Controller_NPad::DisconnectNPad(u32 npad_id) { | |||
| 548 | connected_controllers[NPadIdToIndex(npad_id)].is_connected = false; | 548 | connected_controllers[NPadIdToIndex(npad_id)].is_connected = false; |
| 549 | } | 549 | } |
| 550 | 550 | ||
| 551 | void Controller_NPad::StartLRAssignmentMode() { | ||
| 552 | // Nothing internally is used for lr assignment mode. Since we have the ability to set the | ||
| 553 | // controller types from boot, it doesn't really matter about showing a selection screen | ||
| 554 | is_in_lr_assignment_mode = true; | ||
| 555 | } | ||
| 556 | |||
| 557 | void Controller_NPad::StopLRAssignmentMode() { | ||
| 558 | is_in_lr_assignment_mode = false; | ||
| 559 | } | ||
| 560 | |||
| 561 | bool Controller_NPad::SwapNpadAssignment(u32 npad_id_1, u32 npad_id_2) { | ||
| 562 | if (npad_id_1 == NPAD_HANDHELD || npad_id_2 == NPAD_HANDHELD || npad_id_1 == NPAD_UNKNOWN || | ||
| 563 | npad_id_2 == NPAD_UNKNOWN) { | ||
| 564 | return true; | ||
| 565 | } | ||
| 566 | const auto npad_index_1 = NPadIdToIndex(npad_id_1); | ||
| 567 | const auto npad_index_2 = NPadIdToIndex(npad_id_2); | ||
| 568 | |||
| 569 | if (!IsControllerSupported(connected_controllers[npad_index_1].type) || | ||
| 570 | !IsControllerSupported(connected_controllers[npad_index_2].type)) { | ||
| 571 | return false; | ||
| 572 | } | ||
| 573 | |||
| 574 | std::swap(connected_controllers[npad_index_1].type, connected_controllers[npad_index_2].type); | ||
| 575 | |||
| 576 | InitNewlyAddedControler(npad_index_1); | ||
| 577 | InitNewlyAddedControler(npad_index_2); | ||
| 578 | |||
| 579 | return true; | ||
| 580 | } | ||
| 581 | |||
| 551 | bool Controller_NPad::IsControllerSupported(NPadControllerType controller) { | 582 | bool Controller_NPad::IsControllerSupported(NPadControllerType controller) { |
| 552 | if (controller == NPadControllerType::Handheld) { | 583 | if (controller == NPadControllerType::Handheld) { |
| 553 | // Handheld is not even a supported type, lets stop here | 584 | // Handheld is not even a supported type, lets stop here |
diff --git a/src/core/hle/service/hid/controllers/npad.h b/src/core/hle/service/hid/controllers/npad.h index 4ff50b3cd..4b6c1083f 100644 --- a/src/core/hle/service/hid/controllers/npad.h +++ b/src/core/hle/service/hid/controllers/npad.h | |||
| @@ -124,6 +124,10 @@ public: | |||
| 124 | void ConnectAllDisconnectedControllers(); | 124 | void ConnectAllDisconnectedControllers(); |
| 125 | void ClearAllControllers(); | 125 | void ClearAllControllers(); |
| 126 | 126 | ||
| 127 | void StartLRAssignmentMode(); | ||
| 128 | void StopLRAssignmentMode(); | ||
| 129 | bool SwapNpadAssignment(u32 npad_id_1, u32 npad_id_2); | ||
| 130 | |||
| 127 | // Logical OR for all buttons presses on all controllers | 131 | // Logical OR for all buttons presses on all controllers |
| 128 | // Specifically for cheat engine and other features. | 132 | // Specifically for cheat engine and other features. |
| 129 | u32 GetAndResetPressState(); | 133 | u32 GetAndResetPressState(); |
| @@ -321,5 +325,6 @@ private: | |||
| 321 | void RequestPadStateUpdate(u32 npad_id); | 325 | void RequestPadStateUpdate(u32 npad_id); |
| 322 | std::array<ControllerPad, 10> npad_pad_states{}; | 326 | std::array<ControllerPad, 10> npad_pad_states{}; |
| 323 | bool IsControllerSupported(NPadControllerType controller); | 327 | bool IsControllerSupported(NPadControllerType controller); |
| 328 | bool is_in_lr_assignment_mode{false}; | ||
| 324 | }; | 329 | }; |
| 325 | } // namespace Service::HID | 330 | } // namespace Service::HID |
diff --git a/src/core/hle/service/hid/errors.h b/src/core/hle/service/hid/errors.h new file mode 100644 index 000000000..3583642e7 --- /dev/null +++ b/src/core/hle/service/hid/errors.h | |||
| @@ -0,0 +1,13 @@ | |||
| 1 | // Copyright 2019 yuzu emulator team | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "core/hle/result.h" | ||
| 8 | |||
| 9 | namespace Service::HID { | ||
| 10 | |||
| 11 | constexpr ResultCode ERR_NPAD_NOT_CONNECTED{ErrorModule::HID, 710}; | ||
| 12 | |||
| 13 | } // namespace Service::HID | ||
diff --git a/src/core/hle/service/hid/hid.cpp b/src/core/hle/service/hid/hid.cpp index a4ad95d96..0bd24b8eb 100644 --- a/src/core/hle/service/hid/hid.cpp +++ b/src/core/hle/service/hid/hid.cpp | |||
| @@ -16,6 +16,7 @@ | |||
| 16 | #include "core/hle/kernel/readable_event.h" | 16 | #include "core/hle/kernel/readable_event.h" |
| 17 | #include "core/hle/kernel/shared_memory.h" | 17 | #include "core/hle/kernel/shared_memory.h" |
| 18 | #include "core/hle/kernel/writable_event.h" | 18 | #include "core/hle/kernel/writable_event.h" |
| 19 | #include "core/hle/service/hid/errors.h" | ||
| 19 | #include "core/hle/service/hid/hid.h" | 20 | #include "core/hle/service/hid/hid.h" |
| 20 | #include "core/hle/service/hid/irs.h" | 21 | #include "core/hle/service/hid/irs.h" |
| 21 | #include "core/hle/service/hid/xcd.h" | 22 | #include "core/hle/service/hid/xcd.h" |
| @@ -202,11 +203,11 @@ Hid::Hid() : ServiceFramework("hid") { | |||
| 202 | {123, nullptr, "SetNpadJoyAssignmentModeSingleByDefault"}, | 203 | {123, nullptr, "SetNpadJoyAssignmentModeSingleByDefault"}, |
| 203 | {124, &Hid::SetNpadJoyAssignmentModeDual, "SetNpadJoyAssignmentModeDual"}, | 204 | {124, &Hid::SetNpadJoyAssignmentModeDual, "SetNpadJoyAssignmentModeDual"}, |
| 204 | {125, &Hid::MergeSingleJoyAsDualJoy, "MergeSingleJoyAsDualJoy"}, | 205 | {125, &Hid::MergeSingleJoyAsDualJoy, "MergeSingleJoyAsDualJoy"}, |
| 205 | {126, nullptr, "StartLrAssignmentMode"}, | 206 | {126, &Hid::StartLrAssignmentMode, "StartLrAssignmentMode"}, |
| 206 | {127, nullptr, "StopLrAssignmentMode"}, | 207 | {127, &Hid::StopLrAssignmentMode, "StopLrAssignmentMode"}, |
| 207 | {128, &Hid::SetNpadHandheldActivationMode, "SetNpadHandheldActivationMode"}, | 208 | {128, &Hid::SetNpadHandheldActivationMode, "SetNpadHandheldActivationMode"}, |
| 208 | {129, nullptr, "GetNpadHandheldActivationMode"}, | 209 | {129, nullptr, "GetNpadHandheldActivationMode"}, |
| 209 | {130, nullptr, "SwapNpadAssignment"}, | 210 | {130, &Hid::SwapNpadAssignment, "SwapNpadAssignment"}, |
| 210 | {131, nullptr, "IsUnintendedHomeButtonInputProtectionEnabled"}, | 211 | {131, nullptr, "IsUnintendedHomeButtonInputProtectionEnabled"}, |
| 211 | {132, nullptr, "EnableUnintendedHomeButtonInputProtection"}, | 212 | {132, nullptr, "EnableUnintendedHomeButtonInputProtection"}, |
| 212 | {133, nullptr, "SetNpadJoyAssignmentModeSingleWithDestination"}, | 213 | {133, nullptr, "SetNpadJoyAssignmentModeSingleWithDestination"}, |
| @@ -733,6 +734,49 @@ void Hid::SetPalmaBoostMode(Kernel::HLERequestContext& ctx) { | |||
| 733 | rb.Push(RESULT_SUCCESS); | 734 | rb.Push(RESULT_SUCCESS); |
| 734 | } | 735 | } |
| 735 | 736 | ||
| 737 | void Hid::StartLrAssignmentMode(Kernel::HLERequestContext& ctx) { | ||
| 738 | IPC::RequestParser rp{ctx}; | ||
| 739 | const auto applet_resource_user_id{rp.Pop<u64>()}; | ||
| 740 | |||
| 741 | LOG_DEBUG(Service_HID, "called, applet_resource_user_id={}", applet_resource_user_id); | ||
| 742 | auto& controller = applet_resource->GetController<Controller_NPad>(HidController::NPad); | ||
| 743 | controller.StartLRAssignmentMode(); | ||
| 744 | |||
| 745 | IPC::ResponseBuilder rb{ctx, 2}; | ||
| 746 | rb.Push(RESULT_SUCCESS); | ||
| 747 | } | ||
| 748 | |||
| 749 | void Hid::StopLrAssignmentMode(Kernel::HLERequestContext& ctx) { | ||
| 750 | IPC::RequestParser rp{ctx}; | ||
| 751 | const auto applet_resource_user_id{rp.Pop<u64>()}; | ||
| 752 | |||
| 753 | LOG_DEBUG(Service_HID, "called, applet_resource_user_id={}", applet_resource_user_id); | ||
| 754 | auto& controller = applet_resource->GetController<Controller_NPad>(HidController::NPad); | ||
| 755 | controller.StopLRAssignmentMode(); | ||
| 756 | |||
| 757 | IPC::ResponseBuilder rb{ctx, 2}; | ||
| 758 | rb.Push(RESULT_SUCCESS); | ||
| 759 | } | ||
| 760 | |||
| 761 | void Hid::SwapNpadAssignment(Kernel::HLERequestContext& ctx) { | ||
| 762 | IPC::RequestParser rp{ctx}; | ||
| 763 | const auto npad_1{rp.Pop<u32>()}; | ||
| 764 | const auto npad_2{rp.Pop<u32>()}; | ||
| 765 | const auto applet_resource_user_id{rp.Pop<u64>()}; | ||
| 766 | |||
| 767 | LOG_DEBUG(Service_HID, "called, applet_resource_user_id={}, npad_1={}, npad_2={}", | ||
| 768 | applet_resource_user_id, npad_1, npad_2); | ||
| 769 | |||
| 770 | auto& controller = applet_resource->GetController<Controller_NPad>(HidController::NPad); | ||
| 771 | IPC::ResponseBuilder rb{ctx, 2}; | ||
| 772 | if (controller.SwapNpadAssignment(npad_1, npad_2)) { | ||
| 773 | rb.Push(RESULT_SUCCESS); | ||
| 774 | } else { | ||
| 775 | LOG_ERROR(Service_HID, "Npads are not connected!"); | ||
| 776 | rb.Push(ERR_NPAD_NOT_CONNECTED); | ||
| 777 | } | ||
| 778 | } | ||
| 779 | |||
| 736 | class HidDbg final : public ServiceFramework<HidDbg> { | 780 | class HidDbg final : public ServiceFramework<HidDbg> { |
| 737 | public: | 781 | public: |
| 738 | explicit HidDbg() : ServiceFramework{"hid:dbg"} { | 782 | explicit HidDbg() : ServiceFramework{"hid:dbg"} { |
diff --git a/src/core/hle/service/hid/hid.h b/src/core/hle/service/hid/hid.h index d3660cad2..28260ef1b 100644 --- a/src/core/hle/service/hid/hid.h +++ b/src/core/hle/service/hid/hid.h | |||
| @@ -119,6 +119,9 @@ private: | |||
| 119 | void StopSixAxisSensor(Kernel::HLERequestContext& ctx); | 119 | void StopSixAxisSensor(Kernel::HLERequestContext& ctx); |
| 120 | void SetIsPalmaAllConnectable(Kernel::HLERequestContext& ctx); | 120 | void SetIsPalmaAllConnectable(Kernel::HLERequestContext& ctx); |
| 121 | void SetPalmaBoostMode(Kernel::HLERequestContext& ctx); | 121 | void SetPalmaBoostMode(Kernel::HLERequestContext& ctx); |
| 122 | void StartLrAssignmentMode(Kernel::HLERequestContext& ctx); | ||
| 123 | void StopLrAssignmentMode(Kernel::HLERequestContext& ctx); | ||
| 124 | void SwapNpadAssignment(Kernel::HLERequestContext& ctx); | ||
| 122 | 125 | ||
| 123 | std::shared_ptr<IAppletResource> applet_resource; | 126 | std::shared_ptr<IAppletResource> applet_resource; |
| 124 | }; | 127 | }; |
diff --git a/src/core/hle/service/ldr/ldr.cpp b/src/core/hle/service/ldr/ldr.cpp index b839303ac..8ddad8682 100644 --- a/src/core/hle/service/ldr/ldr.cpp +++ b/src/core/hle/service/ldr/ldr.cpp | |||
| @@ -345,14 +345,16 @@ public: | |||
| 345 | vm_manager | 345 | vm_manager |
| 346 | .MirrorMemory(*map_address, nro_address, nro_size, Kernel::MemoryState::ModuleCode) | 346 | .MirrorMemory(*map_address, nro_address, nro_size, Kernel::MemoryState::ModuleCode) |
| 347 | .IsSuccess()); | 347 | .IsSuccess()); |
| 348 | ASSERT(vm_manager.UnmapRange(nro_address, nro_size).IsSuccess()); | 348 | ASSERT(vm_manager.ReprotectRange(nro_address, nro_size, Kernel::VMAPermission::None) |
| 349 | .IsSuccess()); | ||
| 349 | 350 | ||
| 350 | if (bss_size > 0) { | 351 | if (bss_size > 0) { |
| 351 | ASSERT(vm_manager | 352 | ASSERT(vm_manager |
| 352 | .MirrorMemory(*map_address + nro_size, bss_address, bss_size, | 353 | .MirrorMemory(*map_address + nro_size, bss_address, bss_size, |
| 353 | Kernel::MemoryState::ModuleCode) | 354 | Kernel::MemoryState::ModuleCode) |
| 354 | .IsSuccess()); | 355 | .IsSuccess()); |
| 355 | ASSERT(vm_manager.UnmapRange(bss_address, bss_size).IsSuccess()); | 356 | ASSERT(vm_manager.ReprotectRange(bss_address, bss_size, Kernel::VMAPermission::None) |
| 357 | .IsSuccess()); | ||
| 356 | } | 358 | } |
| 357 | 359 | ||
| 358 | vm_manager.ReprotectRange(*map_address, header.text_size, | 360 | vm_manager.ReprotectRange(*map_address, header.text_size, |
| @@ -364,7 +366,8 @@ public: | |||
| 364 | 366 | ||
| 365 | Core::System::GetInstance().InvalidateCpuInstructionCaches(); | 367 | Core::System::GetInstance().InvalidateCpuInstructionCaches(); |
| 366 | 368 | ||
| 367 | nro.insert_or_assign(*map_address, NROInfo{hash, nro_size + bss_size}); | 369 | nro.insert_or_assign(*map_address, |
| 370 | NROInfo{hash, nro_address, nro_size, bss_address, bss_size}); | ||
| 368 | 371 | ||
| 369 | IPC::ResponseBuilder rb{ctx, 4}; | 372 | IPC::ResponseBuilder rb{ctx, 4}; |
| 370 | rb.Push(RESULT_SUCCESS); | 373 | rb.Push(RESULT_SUCCESS); |
| @@ -409,9 +412,23 @@ public: | |||
| 409 | } | 412 | } |
| 410 | 413 | ||
| 411 | auto& vm_manager = Core::CurrentProcess()->VMManager(); | 414 | auto& vm_manager = Core::CurrentProcess()->VMManager(); |
| 412 | const auto& nro_size = iter->second.size; | 415 | const auto& nro_info = iter->second; |
| 413 | 416 | ||
| 414 | ASSERT(vm_manager.UnmapRange(nro_address, nro_size).IsSuccess()); | 417 | // Unmap the mirrored memory |
| 418 | ASSERT( | ||
| 419 | vm_manager.UnmapRange(nro_address, nro_info.nro_size + nro_info.bss_size).IsSuccess()); | ||
| 420 | |||
| 421 | // Reprotect the source memory | ||
| 422 | ASSERT(vm_manager | ||
| 423 | .ReprotectRange(nro_info.nro_address, nro_info.nro_size, | ||
| 424 | Kernel::VMAPermission::ReadWrite) | ||
| 425 | .IsSuccess()); | ||
| 426 | if (nro_info.bss_size > 0) { | ||
| 427 | ASSERT(vm_manager | ||
| 428 | .ReprotectRange(nro_info.bss_address, nro_info.bss_size, | ||
| 429 | Kernel::VMAPermission::ReadWrite) | ||
| 430 | .IsSuccess()); | ||
| 431 | } | ||
| 415 | 432 | ||
| 416 | Core::System::GetInstance().InvalidateCpuInstructionCaches(); | 433 | Core::System::GetInstance().InvalidateCpuInstructionCaches(); |
| 417 | 434 | ||
| @@ -473,7 +490,10 @@ private: | |||
| 473 | 490 | ||
| 474 | struct NROInfo { | 491 | struct NROInfo { |
| 475 | SHA256Hash hash; | 492 | SHA256Hash hash; |
| 476 | u64 size; | 493 | VAddr nro_address; |
| 494 | u64 nro_size; | ||
| 495 | VAddr bss_address; | ||
| 496 | u64 bss_size; | ||
| 477 | }; | 497 | }; |
| 478 | 498 | ||
| 479 | bool initialized = false; | 499 | bool initialized = false; |
diff --git a/src/core/hle/service/mii/mii.cpp b/src/core/hle/service/mii/mii.cpp index ce84e25ed..0b3923ad9 100644 --- a/src/core/hle/service/mii/mii.cpp +++ b/src/core/hle/service/mii/mii.cpp | |||
| @@ -48,7 +48,7 @@ public: | |||
| 48 | {19, nullptr, "Export"}, | 48 | {19, nullptr, "Export"}, |
| 49 | {20, nullptr, "IsBrokenDatabaseWithClearFlag"}, | 49 | {20, nullptr, "IsBrokenDatabaseWithClearFlag"}, |
| 50 | {21, &IDatabaseService::GetIndex, "GetIndex"}, | 50 | {21, &IDatabaseService::GetIndex, "GetIndex"}, |
| 51 | {22, nullptr, "SetInterfaceVersion"}, | 51 | {22, &IDatabaseService::SetInterfaceVersion, "SetInterfaceVersion"}, |
| 52 | {23, nullptr, "Convert"}, | 52 | {23, nullptr, "Convert"}, |
| 53 | }; | 53 | }; |
| 54 | // clang-format on | 54 | // clang-format on |
| @@ -350,8 +350,22 @@ private: | |||
| 350 | rb.Push(index); | 350 | rb.Push(index); |
| 351 | } | 351 | } |
| 352 | 352 | ||
| 353 | void SetInterfaceVersion(Kernel::HLERequestContext& ctx) { | ||
| 354 | IPC::RequestParser rp{ctx}; | ||
| 355 | current_interface_version = rp.PopRaw<u32>(); | ||
| 356 | |||
| 357 | LOG_DEBUG(Service_Mii, "called, interface_version={:08X}", current_interface_version); | ||
| 358 | |||
| 359 | UNIMPLEMENTED_IF(current_interface_version != 1); | ||
| 360 | |||
| 361 | IPC::ResponseBuilder rb{ctx, 2}; | ||
| 362 | rb.Push(RESULT_SUCCESS); | ||
| 363 | } | ||
| 364 | |||
| 353 | MiiManager db; | 365 | MiiManager db; |
| 354 | 366 | ||
| 367 | u32 current_interface_version = 0; | ||
| 368 | |||
| 355 | // Last read offsets of Get functions | 369 | // Last read offsets of Get functions |
| 356 | std::array<u32, 4> offsets{}; | 370 | std::array<u32, 4> offsets{}; |
| 357 | }; | 371 | }; |
diff --git a/src/core/hle/service/pm/pm.cpp b/src/core/hle/service/pm/pm.cpp index ebcc41a43..fe6b5f798 100644 --- a/src/core/hle/service/pm/pm.cpp +++ b/src/core/hle/service/pm/pm.cpp | |||
| @@ -3,11 +3,44 @@ | |||
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include "core/hle/ipc_helpers.h" | 5 | #include "core/hle/ipc_helpers.h" |
| 6 | #include "core/hle/kernel/kernel.h" | ||
| 7 | #include "core/hle/kernel/process.h" | ||
| 6 | #include "core/hle/service/pm/pm.h" | 8 | #include "core/hle/service/pm/pm.h" |
| 7 | #include "core/hle/service/service.h" | 9 | #include "core/hle/service/service.h" |
| 8 | 10 | ||
| 9 | namespace Service::PM { | 11 | namespace Service::PM { |
| 10 | 12 | ||
| 13 | namespace { | ||
| 14 | |||
| 15 | constexpr ResultCode ERROR_PROCESS_NOT_FOUND{ErrorModule::PM, 1}; | ||
| 16 | |||
| 17 | constexpr u64 NO_PROCESS_FOUND_PID{0}; | ||
| 18 | |||
| 19 | std::optional<Kernel::SharedPtr<Kernel::Process>> SearchProcessList( | ||
| 20 | const std::vector<Kernel::SharedPtr<Kernel::Process>>& process_list, | ||
| 21 | std::function<bool(const Kernel::SharedPtr<Kernel::Process>&)> predicate) { | ||
| 22 | const auto iter = std::find_if(process_list.begin(), process_list.end(), predicate); | ||
| 23 | |||
| 24 | if (iter == process_list.end()) { | ||
| 25 | return std::nullopt; | ||
| 26 | } | ||
| 27 | |||
| 28 | return *iter; | ||
| 29 | } | ||
| 30 | |||
| 31 | void GetApplicationPidGeneric(Kernel::HLERequestContext& ctx, | ||
| 32 | const std::vector<Kernel::SharedPtr<Kernel::Process>>& process_list) { | ||
| 33 | const auto process = SearchProcessList(process_list, [](const auto& process) { | ||
| 34 | return process->GetProcessID() == Kernel::Process::ProcessIDMin; | ||
| 35 | }); | ||
| 36 | |||
| 37 | IPC::ResponseBuilder rb{ctx, 4}; | ||
| 38 | rb.Push(RESULT_SUCCESS); | ||
| 39 | rb.Push(process.has_value() ? (*process)->GetProcessID() : NO_PROCESS_FOUND_PID); | ||
| 40 | } | ||
| 41 | |||
| 42 | } // Anonymous namespace | ||
| 43 | |||
| 11 | class BootMode final : public ServiceFramework<BootMode> { | 44 | class BootMode final : public ServiceFramework<BootMode> { |
| 12 | public: | 45 | public: |
| 13 | explicit BootMode() : ServiceFramework{"pm:bm"} { | 46 | explicit BootMode() : ServiceFramework{"pm:bm"} { |
| @@ -41,14 +74,15 @@ private: | |||
| 41 | 74 | ||
| 42 | class DebugMonitor final : public ServiceFramework<DebugMonitor> { | 75 | class DebugMonitor final : public ServiceFramework<DebugMonitor> { |
| 43 | public: | 76 | public: |
| 44 | explicit DebugMonitor() : ServiceFramework{"pm:dmnt"} { | 77 | explicit DebugMonitor(const Kernel::KernelCore& kernel) |
| 78 | : ServiceFramework{"pm:dmnt"}, kernel(kernel) { | ||
| 45 | // clang-format off | 79 | // clang-format off |
| 46 | static const FunctionInfo functions[] = { | 80 | static const FunctionInfo functions[] = { |
| 47 | {0, nullptr, "GetDebugProcesses"}, | 81 | {0, nullptr, "GetDebugProcesses"}, |
| 48 | {1, nullptr, "StartDebugProcess"}, | 82 | {1, nullptr, "StartDebugProcess"}, |
| 49 | {2, nullptr, "GetTitlePid"}, | 83 | {2, &DebugMonitor::GetTitlePid, "GetTitlePid"}, |
| 50 | {3, nullptr, "EnableDebugForTitleId"}, | 84 | {3, nullptr, "EnableDebugForTitleId"}, |
| 51 | {4, nullptr, "GetApplicationPid"}, | 85 | {4, &DebugMonitor::GetApplicationPid, "GetApplicationPid"}, |
| 52 | {5, nullptr, "EnableDebugForApplication"}, | 86 | {5, nullptr, "EnableDebugForApplication"}, |
| 53 | {6, nullptr, "DisableDebug"}, | 87 | {6, nullptr, "DisableDebug"}, |
| 54 | }; | 88 | }; |
| @@ -56,21 +90,77 @@ public: | |||
| 56 | 90 | ||
| 57 | RegisterHandlers(functions); | 91 | RegisterHandlers(functions); |
| 58 | } | 92 | } |
| 93 | |||
| 94 | private: | ||
| 95 | void GetTitlePid(Kernel::HLERequestContext& ctx) { | ||
| 96 | IPC::RequestParser rp{ctx}; | ||
| 97 | const auto title_id = rp.PopRaw<u64>(); | ||
| 98 | |||
| 99 | LOG_DEBUG(Service_PM, "called, title_id={:016X}", title_id); | ||
| 100 | |||
| 101 | const auto process = | ||
| 102 | SearchProcessList(kernel.GetProcessList(), [title_id](const auto& process) { | ||
| 103 | return process->GetTitleID() == title_id; | ||
| 104 | }); | ||
| 105 | |||
| 106 | if (!process.has_value()) { | ||
| 107 | IPC::ResponseBuilder rb{ctx, 2}; | ||
| 108 | rb.Push(ERROR_PROCESS_NOT_FOUND); | ||
| 109 | return; | ||
| 110 | } | ||
| 111 | |||
| 112 | IPC::ResponseBuilder rb{ctx, 4}; | ||
| 113 | rb.Push(RESULT_SUCCESS); | ||
| 114 | rb.Push((*process)->GetProcessID()); | ||
| 115 | } | ||
| 116 | |||
| 117 | void GetApplicationPid(Kernel::HLERequestContext& ctx) { | ||
| 118 | LOG_DEBUG(Service_PM, "called"); | ||
| 119 | GetApplicationPidGeneric(ctx, kernel.GetProcessList()); | ||
| 120 | } | ||
| 121 | |||
| 122 | const Kernel::KernelCore& kernel; | ||
| 59 | }; | 123 | }; |
| 60 | 124 | ||
| 61 | class Info final : public ServiceFramework<Info> { | 125 | class Info final : public ServiceFramework<Info> { |
| 62 | public: | 126 | public: |
| 63 | explicit Info() : ServiceFramework{"pm:info"} { | 127 | explicit Info(const std::vector<Kernel::SharedPtr<Kernel::Process>>& process_list) |
| 128 | : ServiceFramework{"pm:info"}, process_list(process_list) { | ||
| 64 | static const FunctionInfo functions[] = { | 129 | static const FunctionInfo functions[] = { |
| 65 | {0, nullptr, "GetTitleId"}, | 130 | {0, &Info::GetTitleId, "GetTitleId"}, |
| 66 | }; | 131 | }; |
| 67 | RegisterHandlers(functions); | 132 | RegisterHandlers(functions); |
| 68 | } | 133 | } |
| 134 | |||
| 135 | private: | ||
| 136 | void GetTitleId(Kernel::HLERequestContext& ctx) { | ||
| 137 | IPC::RequestParser rp{ctx}; | ||
| 138 | const auto process_id = rp.PopRaw<u64>(); | ||
| 139 | |||
| 140 | LOG_DEBUG(Service_PM, "called, process_id={:016X}", process_id); | ||
| 141 | |||
| 142 | const auto process = SearchProcessList(process_list, [process_id](const auto& process) { | ||
| 143 | return process->GetProcessID() == process_id; | ||
| 144 | }); | ||
| 145 | |||
| 146 | if (!process.has_value()) { | ||
| 147 | IPC::ResponseBuilder rb{ctx, 2}; | ||
| 148 | rb.Push(ERROR_PROCESS_NOT_FOUND); | ||
| 149 | return; | ||
| 150 | } | ||
| 151 | |||
| 152 | IPC::ResponseBuilder rb{ctx, 4}; | ||
| 153 | rb.Push(RESULT_SUCCESS); | ||
| 154 | rb.Push((*process)->GetTitleID()); | ||
| 155 | } | ||
| 156 | |||
| 157 | const std::vector<Kernel::SharedPtr<Kernel::Process>>& process_list; | ||
| 69 | }; | 158 | }; |
| 70 | 159 | ||
| 71 | class Shell final : public ServiceFramework<Shell> { | 160 | class Shell final : public ServiceFramework<Shell> { |
| 72 | public: | 161 | public: |
| 73 | explicit Shell() : ServiceFramework{"pm:shell"} { | 162 | explicit Shell(const Kernel::KernelCore& kernel) |
| 163 | : ServiceFramework{"pm:shell"}, kernel(kernel) { | ||
| 74 | // clang-format off | 164 | // clang-format off |
| 75 | static const FunctionInfo functions[] = { | 165 | static const FunctionInfo functions[] = { |
| 76 | {0, nullptr, "LaunchProcess"}, | 166 | {0, nullptr, "LaunchProcess"}, |
| @@ -79,21 +169,31 @@ public: | |||
| 79 | {3, nullptr, "GetProcessEventWaiter"}, | 169 | {3, nullptr, "GetProcessEventWaiter"}, |
| 80 | {4, nullptr, "GetProcessEventType"}, | 170 | {4, nullptr, "GetProcessEventType"}, |
| 81 | {5, nullptr, "NotifyBootFinished"}, | 171 | {5, nullptr, "NotifyBootFinished"}, |
| 82 | {6, nullptr, "GetApplicationPid"}, | 172 | {6, &Shell::GetApplicationPid, "GetApplicationPid"}, |
| 83 | {7, nullptr, "BoostSystemMemoryResourceLimit"}, | 173 | {7, nullptr, "BoostSystemMemoryResourceLimit"}, |
| 84 | {8, nullptr, "EnableAdditionalSystemThreads"}, | 174 | {8, nullptr, "EnableAdditionalSystemThreads"}, |
| 175 | {9, nullptr, "GetUnimplementedEventHandle"}, | ||
| 85 | }; | 176 | }; |
| 86 | // clang-format on | 177 | // clang-format on |
| 87 | 178 | ||
| 88 | RegisterHandlers(functions); | 179 | RegisterHandlers(functions); |
| 89 | } | 180 | } |
| 181 | |||
| 182 | private: | ||
| 183 | void GetApplicationPid(Kernel::HLERequestContext& ctx) { | ||
| 184 | LOG_DEBUG(Service_PM, "called"); | ||
| 185 | GetApplicationPidGeneric(ctx, kernel.GetProcessList()); | ||
| 186 | } | ||
| 187 | |||
| 188 | const Kernel::KernelCore& kernel; | ||
| 90 | }; | 189 | }; |
| 91 | 190 | ||
| 92 | void InstallInterfaces(SM::ServiceManager& sm) { | 191 | void InstallInterfaces(Core::System& system) { |
| 93 | std::make_shared<BootMode>()->InstallAsService(sm); | 192 | std::make_shared<BootMode>()->InstallAsService(system.ServiceManager()); |
| 94 | std::make_shared<DebugMonitor>()->InstallAsService(sm); | 193 | std::make_shared<DebugMonitor>(system.Kernel())->InstallAsService(system.ServiceManager()); |
| 95 | std::make_shared<Info>()->InstallAsService(sm); | 194 | std::make_shared<Info>(system.Kernel().GetProcessList()) |
| 96 | std::make_shared<Shell>()->InstallAsService(sm); | 195 | ->InstallAsService(system.ServiceManager()); |
| 196 | std::make_shared<Shell>(system.Kernel())->InstallAsService(system.ServiceManager()); | ||
| 97 | } | 197 | } |
| 98 | 198 | ||
| 99 | } // namespace Service::PM | 199 | } // namespace Service::PM |
diff --git a/src/core/hle/service/pm/pm.h b/src/core/hle/service/pm/pm.h index cc8d3f215..852e7050c 100644 --- a/src/core/hle/service/pm/pm.h +++ b/src/core/hle/service/pm/pm.h | |||
| @@ -4,8 +4,8 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | namespace Service::SM { | 7 | namespace Core { |
| 8 | class ServiceManager; | 8 | class System; |
| 9 | } | 9 | } |
| 10 | 10 | ||
| 11 | namespace Service::PM { | 11 | namespace Service::PM { |
| @@ -16,6 +16,6 @@ enum class SystemBootMode { | |||
| 16 | }; | 16 | }; |
| 17 | 17 | ||
| 18 | /// Registers all PM services with the specified service manager. | 18 | /// Registers all PM services with the specified service manager. |
| 19 | void InstallInterfaces(SM::ServiceManager& service_manager); | 19 | void InstallInterfaces(Core::System& system); |
| 20 | 20 | ||
| 21 | } // namespace Service::PM | 21 | } // namespace Service::PM |
diff --git a/src/core/hle/service/service.cpp b/src/core/hle/service/service.cpp index 07b1f4d43..2daa1ae49 100644 --- a/src/core/hle/service/service.cpp +++ b/src/core/hle/service/service.cpp | |||
| @@ -195,8 +195,7 @@ ResultCode ServiceFrameworkBase::HandleSyncRequest(Kernel::HLERequestContext& co | |||
| 195 | // Module interface | 195 | // Module interface |
| 196 | 196 | ||
| 197 | /// Initialize ServiceManager | 197 | /// Initialize ServiceManager |
| 198 | void Init(std::shared_ptr<SM::ServiceManager>& sm, Core::System& system, | 198 | void Init(std::shared_ptr<SM::ServiceManager>& sm, Core::System& system) { |
| 199 | FileSys::VfsFilesystem& vfs) { | ||
| 200 | // NVFlinger needs to be accessed by several services like Vi and AppletOE so we instantiate it | 199 | // NVFlinger needs to be accessed by several services like Vi and AppletOE so we instantiate it |
| 201 | // here and pass it into the respective InstallInterfaces functions. | 200 | // here and pass it into the respective InstallInterfaces functions. |
| 202 | auto nv_flinger = std::make_shared<NVFlinger::NVFlinger>(system.CoreTiming()); | 201 | auto nv_flinger = std::make_shared<NVFlinger::NVFlinger>(system.CoreTiming()); |
| @@ -206,7 +205,7 @@ void Init(std::shared_ptr<SM::ServiceManager>& sm, Core::System& system, | |||
| 206 | Account::InstallInterfaces(system); | 205 | Account::InstallInterfaces(system); |
| 207 | AM::InstallInterfaces(*sm, nv_flinger, system); | 206 | AM::InstallInterfaces(*sm, nv_flinger, system); |
| 208 | AOC::InstallInterfaces(*sm); | 207 | AOC::InstallInterfaces(*sm); |
| 209 | APM::InstallInterfaces(*sm); | 208 | APM::InstallInterfaces(system); |
| 210 | Audio::InstallInterfaces(*sm); | 209 | Audio::InstallInterfaces(*sm); |
| 211 | BCAT::InstallInterfaces(*sm); | 210 | BCAT::InstallInterfaces(*sm); |
| 212 | BPC::InstallInterfaces(*sm); | 211 | BPC::InstallInterfaces(*sm); |
| @@ -218,7 +217,7 @@ void Init(std::shared_ptr<SM::ServiceManager>& sm, Core::System& system, | |||
| 218 | EUPLD::InstallInterfaces(*sm); | 217 | EUPLD::InstallInterfaces(*sm); |
| 219 | Fatal::InstallInterfaces(*sm); | 218 | Fatal::InstallInterfaces(*sm); |
| 220 | FGM::InstallInterfaces(*sm); | 219 | FGM::InstallInterfaces(*sm); |
| 221 | FileSystem::InstallInterfaces(*sm, vfs); | 220 | FileSystem::InstallInterfaces(system); |
| 222 | Friend::InstallInterfaces(*sm); | 221 | Friend::InstallInterfaces(*sm); |
| 223 | Glue::InstallInterfaces(system); | 222 | Glue::InstallInterfaces(system); |
| 224 | GRC::InstallInterfaces(*sm); | 223 | GRC::InstallInterfaces(*sm); |
| @@ -242,7 +241,7 @@ void Init(std::shared_ptr<SM::ServiceManager>& sm, Core::System& system, | |||
| 242 | PCTL::InstallInterfaces(*sm); | 241 | PCTL::InstallInterfaces(*sm); |
| 243 | PCV::InstallInterfaces(*sm); | 242 | PCV::InstallInterfaces(*sm); |
| 244 | PlayReport::InstallInterfaces(*sm); | 243 | PlayReport::InstallInterfaces(*sm); |
| 245 | PM::InstallInterfaces(*sm); | 244 | PM::InstallInterfaces(system); |
| 246 | PSC::InstallInterfaces(*sm); | 245 | PSC::InstallInterfaces(*sm); |
| 247 | PSM::InstallInterfaces(*sm); | 246 | PSM::InstallInterfaces(*sm); |
| 248 | Set::InstallInterfaces(*sm); | 247 | Set::InstallInterfaces(*sm); |
diff --git a/src/core/hle/service/service.h b/src/core/hle/service/service.h index abbfe5524..c6c4bdae5 100644 --- a/src/core/hle/service/service.h +++ b/src/core/hle/service/service.h | |||
| @@ -182,8 +182,7 @@ private: | |||
| 182 | }; | 182 | }; |
| 183 | 183 | ||
| 184 | /// Initialize ServiceManager | 184 | /// Initialize ServiceManager |
| 185 | void Init(std::shared_ptr<SM::ServiceManager>& sm, Core::System& system, | 185 | void Init(std::shared_ptr<SM::ServiceManager>& sm, Core::System& system); |
| 186 | FileSys::VfsFilesystem& vfs); | ||
| 187 | 186 | ||
| 188 | /// Shutdown ServiceManager | 187 | /// Shutdown ServiceManager |
| 189 | void Shutdown(); | 188 | void Shutdown(); |
diff --git a/src/core/memory.cpp b/src/core/memory.cpp index f18f6226b..8555691c0 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp | |||
| @@ -16,11 +16,9 @@ | |||
| 16 | #include "core/core.h" | 16 | #include "core/core.h" |
| 17 | #include "core/hle/kernel/process.h" | 17 | #include "core/hle/kernel/process.h" |
| 18 | #include "core/hle/kernel/vm_manager.h" | 18 | #include "core/hle/kernel/vm_manager.h" |
| 19 | #include "core/hle/lock.h" | ||
| 20 | #include "core/memory.h" | 19 | #include "core/memory.h" |
| 21 | #include "core/memory_setup.h" | 20 | #include "core/memory_setup.h" |
| 22 | #include "video_core/gpu.h" | 21 | #include "video_core/gpu.h" |
| 23 | #include "video_core/renderer_base.h" | ||
| 24 | 22 | ||
| 25 | namespace Memory { | 23 | namespace Memory { |
| 26 | 24 | ||
diff --git a/src/core/memory.h b/src/core/memory.h index 04e2c5f1d..09008e1dd 100644 --- a/src/core/memory.h +++ b/src/core/memory.h | |||
| @@ -8,10 +8,6 @@ | |||
| 8 | #include <string> | 8 | #include <string> |
| 9 | #include "common/common_types.h" | 9 | #include "common/common_types.h" |
| 10 | 10 | ||
| 11 | namespace Common { | ||
| 12 | struct PageTable; | ||
| 13 | } | ||
| 14 | |||
| 15 | namespace Kernel { | 11 | namespace Kernel { |
| 16 | class Process; | 12 | class Process; |
| 17 | } | 13 | } |
diff --git a/src/core/reporter.cpp b/src/core/reporter.cpp index 774022569..5d4c3e6ea 100644 --- a/src/core/reporter.cpp +++ b/src/core/reporter.cpp | |||
| @@ -2,8 +2,13 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <ctime> | ||
| 5 | #include <fstream> | 6 | #include <fstream> |
| 7 | |||
| 8 | #include <fmt/format.h> | ||
| 9 | #include <fmt/time.h> | ||
| 6 | #include <json.hpp> | 10 | #include <json.hpp> |
| 11 | |||
| 7 | #include "common/file_util.h" | 12 | #include "common/file_util.h" |
| 8 | #include "common/hex_util.h" | 13 | #include "common/hex_util.h" |
| 9 | #include "common/scm_rev.h" | 14 | #include "common/scm_rev.h" |
| @@ -14,7 +19,6 @@ | |||
| 14 | #include "core/hle/result.h" | 19 | #include "core/hle/result.h" |
| 15 | #include "core/reporter.h" | 20 | #include "core/reporter.h" |
| 16 | #include "core/settings.h" | 21 | #include "core/settings.h" |
| 17 | #include "fmt/time.h" | ||
| 18 | 22 | ||
| 19 | namespace { | 23 | namespace { |
| 20 | 24 | ||
| @@ -30,9 +34,11 @@ std::string GetTimestamp() { | |||
| 30 | 34 | ||
| 31 | using namespace nlohmann; | 35 | using namespace nlohmann; |
| 32 | 36 | ||
| 33 | void SaveToFile(const json& json, const std::string& filename) { | 37 | void SaveToFile(json json, const std::string& filename) { |
| 34 | if (!FileUtil::CreateFullPath(filename)) | 38 | if (!FileUtil::CreateFullPath(filename)) { |
| 35 | LOG_ERROR(Core, "Failed to create path for '{}' to save report!", filename); | 39 | LOG_ERROR(Core, "Failed to create path for '{}' to save report!", filename); |
| 40 | return; | ||
| 41 | } | ||
| 36 | 42 | ||
| 37 | std::ofstream file( | 43 | std::ofstream file( |
| 38 | FileUtil::SanitizePath(filename, FileUtil::DirectorySeparator::PlatformDefault)); | 44 | FileUtil::SanitizePath(filename, FileUtil::DirectorySeparator::PlatformDefault)); |
| @@ -61,8 +67,11 @@ json GetReportCommonData(u64 title_id, ResultCode result, const std::string& tim | |||
| 61 | {"result_description", fmt::format("{:08X}", result.description.Value())}, | 67 | {"result_description", fmt::format("{:08X}", result.description.Value())}, |
| 62 | {"timestamp", timestamp}, | 68 | {"timestamp", timestamp}, |
| 63 | }; | 69 | }; |
| 64 | if (user_id.has_value()) | 70 | |
| 71 | if (user_id.has_value()) { | ||
| 65 | out["user_id"] = fmt::format("{:016X}{:016X}", (*user_id)[1], (*user_id)[0]); | 72 | out["user_id"] = fmt::format("{:016X}{:016X}", (*user_id)[1], (*user_id)[0]); |
| 73 | } | ||
| 74 | |||
| 66 | return out; | 75 | return out; |
| 67 | } | 76 | } |
| 68 | 77 | ||
| @@ -171,14 +180,14 @@ json GetHLERequestContextData(Kernel::HLERequestContext& ctx) { | |||
| 171 | out["buffer_descriptor_c"] = GetHLEBufferDescriptorData<false>(ctx.BufferDescriptorC()); | 180 | out["buffer_descriptor_c"] = GetHLEBufferDescriptorData<false>(ctx.BufferDescriptorC()); |
| 172 | out["buffer_descriptor_x"] = GetHLEBufferDescriptorData<true>(ctx.BufferDescriptorX()); | 181 | out["buffer_descriptor_x"] = GetHLEBufferDescriptorData<true>(ctx.BufferDescriptorX()); |
| 173 | 182 | ||
| 174 | return std::move(out); | 183 | return out; |
| 175 | } | 184 | } |
| 176 | 185 | ||
| 177 | } // Anonymous namespace | 186 | } // Anonymous namespace |
| 178 | 187 | ||
| 179 | namespace Core { | 188 | namespace Core { |
| 180 | 189 | ||
| 181 | Reporter::Reporter(Core::System& system) : system(system) {} | 190 | Reporter::Reporter(System& system) : system(system) {} |
| 182 | 191 | ||
| 183 | Reporter::~Reporter() = default; | 192 | Reporter::~Reporter() = default; |
| 184 | 193 | ||
| @@ -187,8 +196,9 @@ void Reporter::SaveCrashReport(u64 title_id, ResultCode result, u64 set_flags, u | |||
| 187 | const std::array<u64, 31>& registers, | 196 | const std::array<u64, 31>& registers, |
| 188 | const std::array<u64, 32>& backtrace, u32 backtrace_size, | 197 | const std::array<u64, 32>& backtrace, u32 backtrace_size, |
| 189 | const std::string& arch, u32 unk10) const { | 198 | const std::string& arch, u32 unk10) const { |
| 190 | if (!IsReportingEnabled()) | 199 | if (!IsReportingEnabled()) { |
| 191 | return; | 200 | return; |
| 201 | } | ||
| 192 | 202 | ||
| 193 | const auto timestamp = GetTimestamp(); | 203 | const auto timestamp = GetTimestamp(); |
| 194 | json out; | 204 | json out; |
| @@ -212,8 +222,9 @@ void Reporter::SaveCrashReport(u64 title_id, ResultCode result, u64 set_flags, u | |||
| 212 | 222 | ||
| 213 | void Reporter::SaveSvcBreakReport(u32 type, bool signal_debugger, u64 info1, u64 info2, | 223 | void Reporter::SaveSvcBreakReport(u32 type, bool signal_debugger, u64 info1, u64 info2, |
| 214 | std::optional<std::vector<u8>> resolved_buffer) const { | 224 | std::optional<std::vector<u8>> resolved_buffer) const { |
| 215 | if (!IsReportingEnabled()) | 225 | if (!IsReportingEnabled()) { |
| 216 | return; | 226 | return; |
| 227 | } | ||
| 217 | 228 | ||
| 218 | const auto timestamp = GetTimestamp(); | 229 | const auto timestamp = GetTimestamp(); |
| 219 | const auto title_id = system.CurrentProcess()->GetTitleID(); | 230 | const auto title_id = system.CurrentProcess()->GetTitleID(); |
| @@ -238,8 +249,9 @@ void Reporter::SaveSvcBreakReport(u32 type, bool signal_debugger, u64 info1, u64 | |||
| 238 | void Reporter::SaveUnimplementedFunctionReport(Kernel::HLERequestContext& ctx, u32 command_id, | 249 | void Reporter::SaveUnimplementedFunctionReport(Kernel::HLERequestContext& ctx, u32 command_id, |
| 239 | const std::string& name, | 250 | const std::string& name, |
| 240 | const std::string& service_name) const { | 251 | const std::string& service_name) const { |
| 241 | if (!IsReportingEnabled()) | 252 | if (!IsReportingEnabled()) { |
| 242 | return; | 253 | return; |
| 254 | } | ||
| 243 | 255 | ||
| 244 | const auto timestamp = GetTimestamp(); | 256 | const auto timestamp = GetTimestamp(); |
| 245 | const auto title_id = system.CurrentProcess()->GetTitleID(); | 257 | const auto title_id = system.CurrentProcess()->GetTitleID(); |
| @@ -259,8 +271,9 @@ void Reporter::SaveUnimplementedAppletReport( | |||
| 259 | u32 applet_id, u32 common_args_version, u32 library_version, u32 theme_color, | 271 | u32 applet_id, u32 common_args_version, u32 library_version, u32 theme_color, |
| 260 | bool startup_sound, u64 system_tick, std::vector<std::vector<u8>> normal_channel, | 272 | bool startup_sound, u64 system_tick, std::vector<std::vector<u8>> normal_channel, |
| 261 | std::vector<std::vector<u8>> interactive_channel) const { | 273 | std::vector<std::vector<u8>> interactive_channel) const { |
| 262 | if (!IsReportingEnabled()) | 274 | if (!IsReportingEnabled()) { |
| 263 | return; | 275 | return; |
| 276 | } | ||
| 264 | 277 | ||
| 265 | const auto timestamp = GetTimestamp(); | 278 | const auto timestamp = GetTimestamp(); |
| 266 | const auto title_id = system.CurrentProcess()->GetTitleID(); | 279 | const auto title_id = system.CurrentProcess()->GetTitleID(); |
| @@ -293,8 +306,9 @@ void Reporter::SaveUnimplementedAppletReport( | |||
| 293 | 306 | ||
| 294 | void Reporter::SavePlayReport(u64 title_id, u64 process_id, std::vector<std::vector<u8>> data, | 307 | void Reporter::SavePlayReport(u64 title_id, u64 process_id, std::vector<std::vector<u8>> data, |
| 295 | std::optional<u128> user_id) const { | 308 | std::optional<u128> user_id) const { |
| 296 | if (!IsReportingEnabled()) | 309 | if (!IsReportingEnabled()) { |
| 297 | return; | 310 | return; |
| 311 | } | ||
| 298 | 312 | ||
| 299 | const auto timestamp = GetTimestamp(); | 313 | const auto timestamp = GetTimestamp(); |
| 300 | json out; | 314 | json out; |
| @@ -316,8 +330,9 @@ void Reporter::SavePlayReport(u64 title_id, u64 process_id, std::vector<std::vec | |||
| 316 | void Reporter::SaveErrorReport(u64 title_id, ResultCode result, | 330 | void Reporter::SaveErrorReport(u64 title_id, ResultCode result, |
| 317 | std::optional<std::string> custom_text_main, | 331 | std::optional<std::string> custom_text_main, |
| 318 | std::optional<std::string> custom_text_detail) const { | 332 | std::optional<std::string> custom_text_detail) const { |
| 319 | if (!IsReportingEnabled()) | 333 | if (!IsReportingEnabled()) { |
| 320 | return; | 334 | return; |
| 335 | } | ||
| 321 | 336 | ||
| 322 | const auto timestamp = GetTimestamp(); | 337 | const auto timestamp = GetTimestamp(); |
| 323 | json out; | 338 | json out; |
| @@ -335,12 +350,31 @@ void Reporter::SaveErrorReport(u64 title_id, ResultCode result, | |||
| 335 | SaveToFile(std::move(out), GetPath("error_report", title_id, timestamp)); | 350 | SaveToFile(std::move(out), GetPath("error_report", title_id, timestamp)); |
| 336 | } | 351 | } |
| 337 | 352 | ||
| 338 | void Reporter::SaveUserReport() const { | 353 | void Reporter::SaveFilesystemAccessReport(Service::FileSystem::LogMode log_mode, |
| 354 | std::string log_message) const { | ||
| 339 | if (!IsReportingEnabled()) | 355 | if (!IsReportingEnabled()) |
| 340 | return; | 356 | return; |
| 341 | 357 | ||
| 342 | const auto timestamp = GetTimestamp(); | 358 | const auto timestamp = GetTimestamp(); |
| 343 | const auto title_id = system.CurrentProcess()->GetTitleID(); | 359 | const auto title_id = system.CurrentProcess()->GetTitleID(); |
| 360 | json out; | ||
| 361 | |||
| 362 | out["yuzu_version"] = GetYuzuVersionData(); | ||
| 363 | out["report_common"] = GetReportCommonData(title_id, RESULT_SUCCESS, timestamp); | ||
| 364 | |||
| 365 | out["log_mode"] = fmt::format("{:08X}", static_cast<u32>(log_mode)); | ||
| 366 | out["log_message"] = std::move(log_message); | ||
| 367 | |||
| 368 | SaveToFile(std::move(out), GetPath("filesystem_access_report", title_id, timestamp)); | ||
| 369 | } | ||
| 370 | |||
| 371 | void Reporter::SaveUserReport() const { | ||
| 372 | if (!IsReportingEnabled()) { | ||
| 373 | return; | ||
| 374 | } | ||
| 375 | |||
| 376 | const auto timestamp = GetTimestamp(); | ||
| 377 | const auto title_id = system.CurrentProcess()->GetTitleID(); | ||
| 344 | 378 | ||
| 345 | SaveToFile(GetFullDataAuto(timestamp, title_id, system), | 379 | SaveToFile(GetFullDataAuto(timestamp, title_id, system), |
| 346 | GetPath("user_report", title_id, timestamp)); | 380 | GetPath("user_report", title_id, timestamp)); |
diff --git a/src/core/reporter.h b/src/core/reporter.h index 3de19c0f7..44256de50 100644 --- a/src/core/reporter.h +++ b/src/core/reporter.h | |||
| @@ -4,7 +4,9 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <array> | ||
| 7 | #include <optional> | 8 | #include <optional> |
| 9 | #include <string> | ||
| 8 | #include <vector> | 10 | #include <vector> |
| 9 | #include "common/common_types.h" | 11 | #include "common/common_types.h" |
| 10 | 12 | ||
| @@ -14,11 +16,17 @@ namespace Kernel { | |||
| 14 | class HLERequestContext; | 16 | class HLERequestContext; |
| 15 | } // namespace Kernel | 17 | } // namespace Kernel |
| 16 | 18 | ||
| 19 | namespace Service::FileSystem { | ||
| 20 | enum class LogMode : u32; | ||
| 21 | } | ||
| 22 | |||
| 17 | namespace Core { | 23 | namespace Core { |
| 18 | 24 | ||
| 25 | class System; | ||
| 26 | |||
| 19 | class Reporter { | 27 | class Reporter { |
| 20 | public: | 28 | public: |
| 21 | explicit Reporter(Core::System& system); | 29 | explicit Reporter(System& system); |
| 22 | ~Reporter(); | 30 | ~Reporter(); |
| 23 | 31 | ||
| 24 | void SaveCrashReport(u64 title_id, ResultCode result, u64 set_flags, u64 entry_point, u64 sp, | 32 | void SaveCrashReport(u64 title_id, ResultCode result, u64 set_flags, u64 entry_point, u64 sp, |
| @@ -45,12 +53,15 @@ public: | |||
| 45 | std::optional<std::string> custom_text_main = {}, | 53 | std::optional<std::string> custom_text_main = {}, |
| 46 | std::optional<std::string> custom_text_detail = {}) const; | 54 | std::optional<std::string> custom_text_detail = {}) const; |
| 47 | 55 | ||
| 56 | void SaveFilesystemAccessReport(Service::FileSystem::LogMode log_mode, | ||
| 57 | std::string log_message) const; | ||
| 58 | |||
| 48 | void SaveUserReport() const; | 59 | void SaveUserReport() const; |
| 49 | 60 | ||
| 50 | private: | 61 | private: |
| 51 | bool IsReportingEnabled() const; | 62 | bool IsReportingEnabled() const; |
| 52 | 63 | ||
| 53 | Core::System& system; | 64 | System& system; |
| 54 | }; | 65 | }; |
| 55 | 66 | ||
| 56 | } // namespace Core | 67 | } // namespace Core |
diff --git a/src/core/settings.cpp b/src/core/settings.cpp index 63aa59690..0dd1632ac 100644 --- a/src/core/settings.cpp +++ b/src/core/settings.cpp | |||
| @@ -85,7 +85,6 @@ void LogSettings() { | |||
| 85 | LogSetting("System_RngSeed", Settings::values.rng_seed.value_or(0)); | 85 | LogSetting("System_RngSeed", Settings::values.rng_seed.value_or(0)); |
| 86 | LogSetting("System_CurrentUser", Settings::values.current_user); | 86 | LogSetting("System_CurrentUser", Settings::values.current_user); |
| 87 | LogSetting("System_LanguageIndex", Settings::values.language_index); | 87 | LogSetting("System_LanguageIndex", Settings::values.language_index); |
| 88 | LogSetting("Core_CpuJitEnabled", Settings::values.cpu_jit_enabled); | ||
| 89 | LogSetting("Core_UseMultiCore", Settings::values.use_multi_core); | 88 | LogSetting("Core_UseMultiCore", Settings::values.use_multi_core); |
| 90 | LogSetting("Renderer_UseResolutionFactor", Settings::values.resolution_factor); | 89 | LogSetting("Renderer_UseResolutionFactor", Settings::values.resolution_factor); |
| 91 | LogSetting("Renderer_UseFrameLimit", Settings::values.use_frame_limit); | 90 | LogSetting("Renderer_UseFrameLimit", Settings::values.use_frame_limit); |
diff --git a/src/core/settings.h b/src/core/settings.h index acf18d653..6638ce8f9 100644 --- a/src/core/settings.h +++ b/src/core/settings.h | |||
| @@ -378,7 +378,6 @@ struct Values { | |||
| 378 | std::atomic_bool is_device_reload_pending{true}; | 378 | std::atomic_bool is_device_reload_pending{true}; |
| 379 | 379 | ||
| 380 | // Core | 380 | // Core |
| 381 | bool cpu_jit_enabled; | ||
| 382 | bool use_multi_core; | 381 | bool use_multi_core; |
| 383 | 382 | ||
| 384 | // Data Storage | 383 | // Data Storage |
diff --git a/src/core/telemetry_session.cpp b/src/core/telemetry_session.cpp index 98f49042a..793d102d3 100644 --- a/src/core/telemetry_session.cpp +++ b/src/core/telemetry_session.cpp | |||
| @@ -168,7 +168,6 @@ void TelemetrySession::AddInitialInfo(Loader::AppLoader& app_loader) { | |||
| 168 | AddField(Telemetry::FieldType::UserConfig, "Audio_SinkId", Settings::values.sink_id); | 168 | AddField(Telemetry::FieldType::UserConfig, "Audio_SinkId", Settings::values.sink_id); |
| 169 | AddField(Telemetry::FieldType::UserConfig, "Audio_EnableAudioStretching", | 169 | AddField(Telemetry::FieldType::UserConfig, "Audio_EnableAudioStretching", |
| 170 | Settings::values.enable_audio_stretching); | 170 | Settings::values.enable_audio_stretching); |
| 171 | AddField(Telemetry::FieldType::UserConfig, "Core_UseCpuJit", Settings::values.cpu_jit_enabled); | ||
| 172 | AddField(Telemetry::FieldType::UserConfig, "Core_UseMultiCore", | 171 | AddField(Telemetry::FieldType::UserConfig, "Core_UseMultiCore", |
| 173 | Settings::values.use_multi_core); | 172 | Settings::values.use_multi_core); |
| 174 | AddField(Telemetry::FieldType::UserConfig, "Renderer_ResolutionFactor", | 173 | AddField(Telemetry::FieldType::UserConfig, "Renderer_ResolutionFactor", |
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 6839abe71..7c18c27b3 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt | |||
| @@ -1,4 +1,5 @@ | |||
| 1 | add_library(video_core STATIC | 1 | add_library(video_core STATIC |
| 2 | buffer_cache.h | ||
| 2 | dma_pusher.cpp | 3 | dma_pusher.cpp |
| 3 | dma_pusher.h | 4 | dma_pusher.h |
| 4 | debug_utils/debug_utils.cpp | 5 | debug_utils/debug_utils.cpp |
| @@ -43,8 +44,6 @@ add_library(video_core STATIC | |||
| 43 | renderer_opengl/gl_device.h | 44 | renderer_opengl/gl_device.h |
| 44 | renderer_opengl/gl_framebuffer_cache.cpp | 45 | renderer_opengl/gl_framebuffer_cache.cpp |
| 45 | renderer_opengl/gl_framebuffer_cache.h | 46 | renderer_opengl/gl_framebuffer_cache.h |
| 46 | renderer_opengl/gl_global_cache.cpp | ||
| 47 | renderer_opengl/gl_global_cache.h | ||
| 48 | renderer_opengl/gl_rasterizer.cpp | 47 | renderer_opengl/gl_rasterizer.cpp |
| 49 | renderer_opengl/gl_rasterizer.h | 48 | renderer_opengl/gl_rasterizer.h |
| 50 | renderer_opengl/gl_resource_manager.cpp | 49 | renderer_opengl/gl_resource_manager.cpp |
| @@ -103,6 +102,8 @@ add_library(video_core STATIC | |||
| 103 | shader/decode/video.cpp | 102 | shader/decode/video.cpp |
| 104 | shader/decode/xmad.cpp | 103 | shader/decode/xmad.cpp |
| 105 | shader/decode/other.cpp | 104 | shader/decode/other.cpp |
| 105 | shader/control_flow.cpp | ||
| 106 | shader/control_flow.h | ||
| 106 | shader/decode.cpp | 107 | shader/decode.cpp |
| 107 | shader/node_helper.cpp | 108 | shader/node_helper.cpp |
| 108 | shader/node_helper.h | 109 | shader/node_helper.h |
diff --git a/src/video_core/buffer_cache.h b/src/video_core/buffer_cache.h new file mode 100644 index 000000000..6f868b8b4 --- /dev/null +++ b/src/video_core/buffer_cache.h | |||
| @@ -0,0 +1,299 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <array> | ||
| 8 | #include <memory> | ||
| 9 | #include <mutex> | ||
| 10 | #include <unordered_map> | ||
| 11 | #include <unordered_set> | ||
| 12 | #include <utility> | ||
| 13 | #include <vector> | ||
| 14 | |||
| 15 | #include "common/alignment.h" | ||
| 16 | #include "common/common_types.h" | ||
| 17 | #include "core/core.h" | ||
| 18 | #include "video_core/memory_manager.h" | ||
| 19 | #include "video_core/rasterizer_cache.h" | ||
| 20 | |||
| 21 | namespace VideoCore { | ||
| 22 | class RasterizerInterface; | ||
| 23 | } | ||
| 24 | |||
| 25 | namespace VideoCommon { | ||
| 26 | |||
| 27 | template <typename BufferStorageType> | ||
| 28 | class CachedBuffer final : public RasterizerCacheObject { | ||
| 29 | public: | ||
| 30 | explicit CachedBuffer(VAddr cpu_addr, u8* host_ptr) | ||
| 31 | : RasterizerCacheObject{host_ptr}, host_ptr{host_ptr}, cpu_addr{cpu_addr} {} | ||
| 32 | ~CachedBuffer() override = default; | ||
| 33 | |||
| 34 | VAddr GetCpuAddr() const override { | ||
| 35 | return cpu_addr; | ||
| 36 | } | ||
| 37 | |||
| 38 | std::size_t GetSizeInBytes() const override { | ||
| 39 | return size; | ||
| 40 | } | ||
| 41 | |||
| 42 | u8* GetWritableHostPtr() const { | ||
| 43 | return host_ptr; | ||
| 44 | } | ||
| 45 | |||
| 46 | std::size_t GetSize() const { | ||
| 47 | return size; | ||
| 48 | } | ||
| 49 | |||
| 50 | std::size_t GetCapacity() const { | ||
| 51 | return capacity; | ||
| 52 | } | ||
| 53 | |||
| 54 | bool IsInternalized() const { | ||
| 55 | return is_internal; | ||
| 56 | } | ||
| 57 | |||
| 58 | const BufferStorageType& GetBuffer() const { | ||
| 59 | return buffer; | ||
| 60 | } | ||
| 61 | |||
| 62 | void SetSize(std::size_t new_size) { | ||
| 63 | size = new_size; | ||
| 64 | } | ||
| 65 | |||
| 66 | void SetInternalState(bool is_internal_) { | ||
| 67 | is_internal = is_internal_; | ||
| 68 | } | ||
| 69 | |||
| 70 | BufferStorageType ExchangeBuffer(BufferStorageType buffer_, std::size_t new_capacity) { | ||
| 71 | capacity = new_capacity; | ||
| 72 | std::swap(buffer, buffer_); | ||
| 73 | return buffer_; | ||
| 74 | } | ||
| 75 | |||
| 76 | private: | ||
| 77 | u8* host_ptr{}; | ||
| 78 | VAddr cpu_addr{}; | ||
| 79 | std::size_t size{}; | ||
| 80 | std::size_t capacity{}; | ||
| 81 | bool is_internal{}; | ||
| 82 | BufferStorageType buffer; | ||
| 83 | }; | ||
| 84 | |||
| 85 | template <typename BufferStorageType, typename BufferType, typename StreamBuffer> | ||
| 86 | class BufferCache : public RasterizerCache<std::shared_ptr<CachedBuffer<BufferStorageType>>> { | ||
| 87 | public: | ||
| 88 | using Buffer = std::shared_ptr<CachedBuffer<BufferStorageType>>; | ||
| 89 | using BufferInfo = std::pair<const BufferType*, u64>; | ||
| 90 | |||
| 91 | explicit BufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system, | ||
| 92 | std::unique_ptr<StreamBuffer> stream_buffer) | ||
| 93 | : RasterizerCache<Buffer>{rasterizer}, system{system}, | ||
| 94 | stream_buffer{std::move(stream_buffer)}, stream_buffer_handle{ | ||
| 95 | this->stream_buffer->GetHandle()} {} | ||
| 96 | ~BufferCache() = default; | ||
| 97 | |||
| 98 | void Unregister(const Buffer& entry) override { | ||
| 99 | std::lock_guard lock{RasterizerCache<Buffer>::mutex}; | ||
| 100 | if (entry->IsInternalized()) { | ||
| 101 | internalized_entries.erase(entry->GetCacheAddr()); | ||
| 102 | } | ||
| 103 | ReserveBuffer(entry); | ||
| 104 | RasterizerCache<Buffer>::Unregister(entry); | ||
| 105 | } | ||
| 106 | |||
| 107 | void TickFrame() { | ||
| 108 | marked_for_destruction_index = | ||
| 109 | (marked_for_destruction_index + 1) % marked_for_destruction_ring_buffer.size(); | ||
| 110 | MarkedForDestruction().clear(); | ||
| 111 | } | ||
| 112 | |||
| 113 | BufferInfo UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4, | ||
| 114 | bool internalize = false, bool is_written = false) { | ||
| 115 | std::lock_guard lock{RasterizerCache<Buffer>::mutex}; | ||
| 116 | |||
| 117 | auto& memory_manager = system.GPU().MemoryManager(); | ||
| 118 | const auto host_ptr = memory_manager.GetPointer(gpu_addr); | ||
| 119 | if (!host_ptr) { | ||
| 120 | return {GetEmptyBuffer(size), 0}; | ||
| 121 | } | ||
| 122 | const auto cache_addr = ToCacheAddr(host_ptr); | ||
| 123 | |||
| 124 | // Cache management is a big overhead, so only cache entries with a given size. | ||
| 125 | // TODO: Figure out which size is the best for given games. | ||
| 126 | constexpr std::size_t max_stream_size = 0x800; | ||
| 127 | if (!internalize && size < max_stream_size && | ||
| 128 | internalized_entries.find(cache_addr) == internalized_entries.end()) { | ||
| 129 | return StreamBufferUpload(host_ptr, size, alignment); | ||
| 130 | } | ||
| 131 | |||
| 132 | auto entry = RasterizerCache<Buffer>::TryGet(cache_addr); | ||
| 133 | if (!entry) { | ||
| 134 | return FixedBufferUpload(gpu_addr, host_ptr, size, internalize, is_written); | ||
| 135 | } | ||
| 136 | |||
| 137 | if (entry->GetSize() < size) { | ||
| 138 | IncreaseBufferSize(entry, size); | ||
| 139 | } | ||
| 140 | if (is_written) { | ||
| 141 | entry->MarkAsModified(true, *this); | ||
| 142 | } | ||
| 143 | return {ToHandle(entry->GetBuffer()), 0}; | ||
| 144 | } | ||
| 145 | |||
| 146 | /// Uploads from a host memory. Returns the OpenGL buffer where it's located and its offset. | ||
| 147 | BufferInfo UploadHostMemory(const void* raw_pointer, std::size_t size, | ||
| 148 | std::size_t alignment = 4) { | ||
| 149 | std::lock_guard lock{RasterizerCache<Buffer>::mutex}; | ||
| 150 | return StreamBufferUpload(raw_pointer, size, alignment); | ||
| 151 | } | ||
| 152 | |||
| 153 | void Map(std::size_t max_size) { | ||
| 154 | std::tie(buffer_ptr, buffer_offset_base, invalidated) = stream_buffer->Map(max_size, 4); | ||
| 155 | buffer_offset = buffer_offset_base; | ||
| 156 | } | ||
| 157 | |||
| 158 | /// Finishes the upload stream, returns true on bindings invalidation. | ||
| 159 | bool Unmap() { | ||
| 160 | stream_buffer->Unmap(buffer_offset - buffer_offset_base); | ||
| 161 | return std::exchange(invalidated, false); | ||
| 162 | } | ||
| 163 | |||
| 164 | virtual const BufferType* GetEmptyBuffer(std::size_t size) = 0; | ||
| 165 | |||
| 166 | protected: | ||
| 167 | void FlushObjectInner(const Buffer& entry) override { | ||
| 168 | DownloadBufferData(entry->GetBuffer(), 0, entry->GetSize(), entry->GetWritableHostPtr()); | ||
| 169 | } | ||
| 170 | |||
| 171 | virtual BufferStorageType CreateBuffer(std::size_t size) = 0; | ||
| 172 | |||
| 173 | virtual const BufferType* ToHandle(const BufferStorageType& storage) = 0; | ||
| 174 | |||
| 175 | virtual void UploadBufferData(const BufferStorageType& buffer, std::size_t offset, | ||
| 176 | std::size_t size, const u8* data) = 0; | ||
| 177 | |||
| 178 | virtual void DownloadBufferData(const BufferStorageType& buffer, std::size_t offset, | ||
| 179 | std::size_t size, u8* data) = 0; | ||
| 180 | |||
| 181 | virtual void CopyBufferData(const BufferStorageType& src, const BufferStorageType& dst, | ||
| 182 | std::size_t src_offset, std::size_t dst_offset, | ||
| 183 | std::size_t size) = 0; | ||
| 184 | |||
| 185 | private: | ||
| 186 | BufferInfo StreamBufferUpload(const void* raw_pointer, std::size_t size, | ||
| 187 | std::size_t alignment) { | ||
| 188 | AlignBuffer(alignment); | ||
| 189 | const std::size_t uploaded_offset = buffer_offset; | ||
| 190 | std::memcpy(buffer_ptr, raw_pointer, size); | ||
| 191 | |||
| 192 | buffer_ptr += size; | ||
| 193 | buffer_offset += size; | ||
| 194 | return {&stream_buffer_handle, uploaded_offset}; | ||
| 195 | } | ||
| 196 | |||
| 197 | BufferInfo FixedBufferUpload(GPUVAddr gpu_addr, u8* host_ptr, std::size_t size, | ||
| 198 | bool internalize, bool is_written) { | ||
| 199 | auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager(); | ||
| 200 | const auto cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr); | ||
| 201 | ASSERT(cpu_addr); | ||
| 202 | |||
| 203 | auto entry = GetUncachedBuffer(*cpu_addr, host_ptr); | ||
| 204 | entry->SetSize(size); | ||
| 205 | entry->SetInternalState(internalize); | ||
| 206 | RasterizerCache<Buffer>::Register(entry); | ||
| 207 | |||
| 208 | if (internalize) { | ||
| 209 | internalized_entries.emplace(ToCacheAddr(host_ptr)); | ||
| 210 | } | ||
| 211 | if (is_written) { | ||
| 212 | entry->MarkAsModified(true, *this); | ||
| 213 | } | ||
| 214 | |||
| 215 | if (entry->GetCapacity() < size) { | ||
| 216 | MarkedForDestruction().push_back(entry->ExchangeBuffer(CreateBuffer(size), size)); | ||
| 217 | } | ||
| 218 | |||
| 219 | UploadBufferData(entry->GetBuffer(), 0, size, host_ptr); | ||
| 220 | return {ToHandle(entry->GetBuffer()), 0}; | ||
| 221 | } | ||
| 222 | |||
| 223 | void IncreaseBufferSize(Buffer& entry, std::size_t new_size) { | ||
| 224 | const std::size_t old_size = entry->GetSize(); | ||
| 225 | if (entry->GetCapacity() < new_size) { | ||
| 226 | const auto& old_buffer = entry->GetBuffer(); | ||
| 227 | auto new_buffer = CreateBuffer(new_size); | ||
| 228 | |||
| 229 | // Copy bits from the old buffer to the new buffer. | ||
| 230 | CopyBufferData(old_buffer, new_buffer, 0, 0, old_size); | ||
| 231 | MarkedForDestruction().push_back( | ||
| 232 | entry->ExchangeBuffer(std::move(new_buffer), new_size)); | ||
| 233 | |||
| 234 | // This buffer could have been used | ||
| 235 | invalidated = true; | ||
| 236 | } | ||
| 237 | // Upload the new bits. | ||
| 238 | const std::size_t size_diff = new_size - old_size; | ||
| 239 | UploadBufferData(entry->GetBuffer(), old_size, size_diff, entry->GetHostPtr() + old_size); | ||
| 240 | |||
| 241 | // Update entry's size in the object and in the cache. | ||
| 242 | Unregister(entry); | ||
| 243 | |||
| 244 | entry->SetSize(new_size); | ||
| 245 | RasterizerCache<Buffer>::Register(entry); | ||
| 246 | } | ||
| 247 | |||
| 248 | Buffer GetUncachedBuffer(VAddr cpu_addr, u8* host_ptr) { | ||
| 249 | if (auto entry = TryGetReservedBuffer(host_ptr)) { | ||
| 250 | return entry; | ||
| 251 | } | ||
| 252 | return std::make_shared<CachedBuffer<BufferStorageType>>(cpu_addr, host_ptr); | ||
| 253 | } | ||
| 254 | |||
| 255 | Buffer TryGetReservedBuffer(u8* host_ptr) { | ||
| 256 | const auto it = buffer_reserve.find(ToCacheAddr(host_ptr)); | ||
| 257 | if (it == buffer_reserve.end()) { | ||
| 258 | return {}; | ||
| 259 | } | ||
| 260 | auto& reserve = it->second; | ||
| 261 | auto entry = reserve.back(); | ||
| 262 | reserve.pop_back(); | ||
| 263 | return entry; | ||
| 264 | } | ||
| 265 | |||
| 266 | void ReserveBuffer(Buffer entry) { | ||
| 267 | buffer_reserve[entry->GetCacheAddr()].push_back(std::move(entry)); | ||
| 268 | } | ||
| 269 | |||
| 270 | void AlignBuffer(std::size_t alignment) { | ||
| 271 | // Align the offset, not the mapped pointer | ||
| 272 | const std::size_t offset_aligned = Common::AlignUp(buffer_offset, alignment); | ||
| 273 | buffer_ptr += offset_aligned - buffer_offset; | ||
| 274 | buffer_offset = offset_aligned; | ||
| 275 | } | ||
| 276 | |||
| 277 | std::vector<BufferStorageType>& MarkedForDestruction() { | ||
| 278 | return marked_for_destruction_ring_buffer[marked_for_destruction_index]; | ||
| 279 | } | ||
| 280 | |||
| 281 | Core::System& system; | ||
| 282 | |||
| 283 | std::unique_ptr<StreamBuffer> stream_buffer; | ||
| 284 | BufferType stream_buffer_handle{}; | ||
| 285 | |||
| 286 | bool invalidated = false; | ||
| 287 | |||
| 288 | u8* buffer_ptr = nullptr; | ||
| 289 | u64 buffer_offset = 0; | ||
| 290 | u64 buffer_offset_base = 0; | ||
| 291 | |||
| 292 | std::size_t marked_for_destruction_index = 0; | ||
| 293 | std::array<std::vector<BufferStorageType>, 4> marked_for_destruction_ring_buffer; | ||
| 294 | |||
| 295 | std::unordered_set<CacheAddr> internalized_entries; | ||
| 296 | std::unordered_map<CacheAddr, std::vector<Buffer>> buffer_reserve; | ||
| 297 | }; | ||
| 298 | |||
| 299 | } // namespace VideoCommon | ||
diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp index 3175579cc..bd036cbe8 100644 --- a/src/video_core/dma_pusher.cpp +++ b/src/video_core/dma_pusher.cpp | |||
| @@ -22,7 +22,7 @@ void DmaPusher::DispatchCalls() { | |||
| 22 | MICROPROFILE_SCOPE(DispatchCalls); | 22 | MICROPROFILE_SCOPE(DispatchCalls); |
| 23 | 23 | ||
| 24 | // On entering GPU code, assume all memory may be touched by the ARM core. | 24 | // On entering GPU code, assume all memory may be touched by the ARM core. |
| 25 | gpu.Maxwell3D().dirty_flags.OnMemoryWrite(); | 25 | gpu.Maxwell3D().dirty.OnMemoryWrite(); |
| 26 | 26 | ||
| 27 | dma_pushbuffer_subindex = 0; | 27 | dma_pushbuffer_subindex = 0; |
| 28 | 28 | ||
diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp index 7404a8163..08586d33c 100644 --- a/src/video_core/engines/kepler_compute.cpp +++ b/src/video_core/engines/kepler_compute.cpp | |||
| @@ -37,7 +37,7 @@ void KeplerCompute::CallMethod(const GPU::MethodCall& method_call) { | |||
| 37 | const bool is_last_call = method_call.IsLastCall(); | 37 | const bool is_last_call = method_call.IsLastCall(); |
| 38 | upload_state.ProcessData(method_call.argument, is_last_call); | 38 | upload_state.ProcessData(method_call.argument, is_last_call); |
| 39 | if (is_last_call) { | 39 | if (is_last_call) { |
| 40 | system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); | 40 | system.GPU().Maxwell3D().dirty.OnMemoryWrite(); |
| 41 | } | 41 | } |
| 42 | break; | 42 | break; |
| 43 | } | 43 | } |
| @@ -50,13 +50,14 @@ void KeplerCompute::CallMethod(const GPU::MethodCall& method_call) { | |||
| 50 | } | 50 | } |
| 51 | 51 | ||
| 52 | void KeplerCompute::ProcessLaunch() { | 52 | void KeplerCompute::ProcessLaunch() { |
| 53 | |||
| 54 | const GPUVAddr launch_desc_loc = regs.launch_desc_loc.Address(); | 53 | const GPUVAddr launch_desc_loc = regs.launch_desc_loc.Address(); |
| 55 | memory_manager.ReadBlockUnsafe(launch_desc_loc, &launch_description, | 54 | memory_manager.ReadBlockUnsafe(launch_desc_loc, &launch_description, |
| 56 | LaunchParams::NUM_LAUNCH_PARAMETERS * sizeof(u32)); | 55 | LaunchParams::NUM_LAUNCH_PARAMETERS * sizeof(u32)); |
| 57 | 56 | ||
| 58 | const GPUVAddr code_loc = regs.code_loc.Address() + launch_description.program_start; | 57 | const GPUVAddr code_addr = regs.code_loc.Address() + launch_description.program_start; |
| 59 | LOG_WARNING(HW_GPU, "Compute Kernel Execute at Address 0x{:016x}, STUBBED", code_loc); | 58 | LOG_TRACE(HW_GPU, "Compute invocation launched at address 0x{:016x}", code_addr); |
| 59 | |||
| 60 | rasterizer.DispatchCompute(code_addr); | ||
| 60 | } | 61 | } |
| 61 | 62 | ||
| 62 | } // namespace Tegra::Engines | 63 | } // namespace Tegra::Engines |
diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp index 0561f676c..44279de00 100644 --- a/src/video_core/engines/kepler_memory.cpp +++ b/src/video_core/engines/kepler_memory.cpp | |||
| @@ -34,7 +34,7 @@ void KeplerMemory::CallMethod(const GPU::MethodCall& method_call) { | |||
| 34 | const bool is_last_call = method_call.IsLastCall(); | 34 | const bool is_last_call = method_call.IsLastCall(); |
| 35 | upload_state.ProcessData(method_call.argument, is_last_call); | 35 | upload_state.ProcessData(method_call.argument, is_last_call); |
| 36 | if (is_last_call) { | 36 | if (is_last_call) { |
| 37 | system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); | 37 | system.GPU().Maxwell3D().dirty.OnMemoryWrite(); |
| 38 | } | 38 | } |
| 39 | break; | 39 | break; |
| 40 | } | 40 | } |
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 224c27bd2..125c53360 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp | |||
| @@ -22,6 +22,7 @@ Maxwell3D::Maxwell3D(Core::System& system, VideoCore::RasterizerInterface& raste | |||
| 22 | MemoryManager& memory_manager) | 22 | MemoryManager& memory_manager) |
| 23 | : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager}, | 23 | : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager}, |
| 24 | macro_interpreter{*this}, upload_state{memory_manager, regs.upload} { | 24 | macro_interpreter{*this}, upload_state{memory_manager, regs.upload} { |
| 25 | InitDirtySettings(); | ||
| 25 | InitializeRegisterDefaults(); | 26 | InitializeRegisterDefaults(); |
| 26 | } | 27 | } |
| 27 | 28 | ||
| @@ -69,6 +70,10 @@ void Maxwell3D::InitializeRegisterDefaults() { | |||
| 69 | regs.stencil_back_func_mask = 0xFFFFFFFF; | 70 | regs.stencil_back_func_mask = 0xFFFFFFFF; |
| 70 | regs.stencil_back_mask = 0xFFFFFFFF; | 71 | regs.stencil_back_mask = 0xFFFFFFFF; |
| 71 | 72 | ||
| 73 | regs.depth_test_func = Regs::ComparisonOp::Always; | ||
| 74 | regs.cull.front_face = Regs::Cull::FrontFace::CounterClockWise; | ||
| 75 | regs.cull.cull_face = Regs::Cull::CullFace::Back; | ||
| 76 | |||
| 72 | // TODO(Rodrigo): Most games do not set a point size. I think this is a case of a | 77 | // TODO(Rodrigo): Most games do not set a point size. I think this is a case of a |
| 73 | // register carrying a default value. Assume it's OpenGL's default (1). | 78 | // register carrying a default value. Assume it's OpenGL's default (1). |
| 74 | regs.point_size = 1.0f; | 79 | regs.point_size = 1.0f; |
| @@ -86,6 +91,159 @@ void Maxwell3D::InitializeRegisterDefaults() { | |||
| 86 | regs.rt_separate_frag_data = 1; | 91 | regs.rt_separate_frag_data = 1; |
| 87 | } | 92 | } |
| 88 | 93 | ||
| 94 | #define DIRTY_REGS_POS(field_name) (offsetof(Maxwell3D::DirtyRegs, field_name)) | ||
| 95 | |||
| 96 | void Maxwell3D::InitDirtySettings() { | ||
| 97 | const auto set_block = [this](const u32 start, const u32 range, const u8 position) { | ||
| 98 | const auto start_itr = dirty_pointers.begin() + start; | ||
| 99 | const auto end_itr = start_itr + range; | ||
| 100 | std::fill(start_itr, end_itr, position); | ||
| 101 | }; | ||
| 102 | dirty.regs.fill(true); | ||
| 103 | |||
| 104 | // Init Render Targets | ||
| 105 | constexpr u32 registers_per_rt = sizeof(regs.rt[0]) / sizeof(u32); | ||
| 106 | constexpr u32 rt_start_reg = MAXWELL3D_REG_INDEX(rt); | ||
| 107 | constexpr u32 rt_end_reg = rt_start_reg + registers_per_rt * 8; | ||
| 108 | u32 rt_dirty_reg = DIRTY_REGS_POS(render_target); | ||
| 109 | for (u32 rt_reg = rt_start_reg; rt_reg < rt_end_reg; rt_reg += registers_per_rt) { | ||
| 110 | set_block(rt_reg, registers_per_rt, rt_dirty_reg); | ||
| 111 | rt_dirty_reg++; | ||
| 112 | } | ||
| 113 | constexpr u32 depth_buffer_flag = DIRTY_REGS_POS(depth_buffer); | ||
| 114 | dirty_pointers[MAXWELL3D_REG_INDEX(zeta_enable)] = depth_buffer_flag; | ||
| 115 | dirty_pointers[MAXWELL3D_REG_INDEX(zeta_width)] = depth_buffer_flag; | ||
| 116 | dirty_pointers[MAXWELL3D_REG_INDEX(zeta_height)] = depth_buffer_flag; | ||
| 117 | constexpr u32 registers_in_zeta = sizeof(regs.zeta) / sizeof(u32); | ||
| 118 | constexpr u32 zeta_reg = MAXWELL3D_REG_INDEX(zeta); | ||
| 119 | set_block(zeta_reg, registers_in_zeta, depth_buffer_flag); | ||
| 120 | |||
| 121 | // Init Vertex Arrays | ||
| 122 | constexpr u32 vertex_array_start = MAXWELL3D_REG_INDEX(vertex_array); | ||
| 123 | constexpr u32 vertex_array_size = sizeof(regs.vertex_array[0]) / sizeof(u32); | ||
| 124 | constexpr u32 vertex_array_end = vertex_array_start + vertex_array_size * Regs::NumVertexArrays; | ||
| 125 | u32 va_reg = DIRTY_REGS_POS(vertex_array); | ||
| 126 | u32 vi_reg = DIRTY_REGS_POS(vertex_instance); | ||
| 127 | for (u32 vertex_reg = vertex_array_start; vertex_reg < vertex_array_end; | ||
| 128 | vertex_reg += vertex_array_size) { | ||
| 129 | set_block(vertex_reg, 3, va_reg); | ||
| 130 | // The divisor concerns vertex array instances | ||
| 131 | dirty_pointers[vertex_reg + 3] = vi_reg; | ||
| 132 | va_reg++; | ||
| 133 | vi_reg++; | ||
| 134 | } | ||
| 135 | constexpr u32 vertex_limit_start = MAXWELL3D_REG_INDEX(vertex_array_limit); | ||
| 136 | constexpr u32 vertex_limit_size = sizeof(regs.vertex_array_limit[0]) / sizeof(u32); | ||
| 137 | constexpr u32 vertex_limit_end = vertex_limit_start + vertex_limit_size * Regs::NumVertexArrays; | ||
| 138 | va_reg = DIRTY_REGS_POS(vertex_array); | ||
| 139 | for (u32 vertex_reg = vertex_limit_start; vertex_reg < vertex_limit_end; | ||
| 140 | vertex_reg += vertex_limit_size) { | ||
| 141 | set_block(vertex_reg, vertex_limit_size, va_reg); | ||
| 142 | va_reg++; | ||
| 143 | } | ||
| 144 | constexpr u32 vertex_instance_start = MAXWELL3D_REG_INDEX(instanced_arrays); | ||
| 145 | constexpr u32 vertex_instance_size = | ||
| 146 | sizeof(regs.instanced_arrays.is_instanced[0]) / sizeof(u32); | ||
| 147 | constexpr u32 vertex_instance_end = | ||
| 148 | vertex_instance_start + vertex_instance_size * Regs::NumVertexArrays; | ||
| 149 | vi_reg = DIRTY_REGS_POS(vertex_instance); | ||
| 150 | for (u32 vertex_reg = vertex_instance_start; vertex_reg < vertex_instance_end; | ||
| 151 | vertex_reg += vertex_instance_size) { | ||
| 152 | set_block(vertex_reg, vertex_instance_size, vi_reg); | ||
| 153 | vi_reg++; | ||
| 154 | } | ||
| 155 | set_block(MAXWELL3D_REG_INDEX(vertex_attrib_format), regs.vertex_attrib_format.size(), | ||
| 156 | DIRTY_REGS_POS(vertex_attrib_format)); | ||
| 157 | |||
| 158 | // Init Shaders | ||
| 159 | constexpr u32 shader_registers_count = | ||
| 160 | sizeof(regs.shader_config[0]) * Regs::MaxShaderProgram / sizeof(u32); | ||
| 161 | set_block(MAXWELL3D_REG_INDEX(shader_config[0]), shader_registers_count, | ||
| 162 | DIRTY_REGS_POS(shaders)); | ||
| 163 | |||
| 164 | // State | ||
| 165 | |||
| 166 | // Viewport | ||
| 167 | constexpr u32 viewport_dirty_reg = DIRTY_REGS_POS(viewport); | ||
| 168 | constexpr u32 viewport_start = MAXWELL3D_REG_INDEX(viewports); | ||
| 169 | constexpr u32 viewport_size = sizeof(regs.viewports) / sizeof(u32); | ||
| 170 | set_block(viewport_start, viewport_size, viewport_dirty_reg); | ||
| 171 | constexpr u32 view_volume_start = MAXWELL3D_REG_INDEX(view_volume_clip_control); | ||
| 172 | constexpr u32 view_volume_size = sizeof(regs.view_volume_clip_control) / sizeof(u32); | ||
| 173 | set_block(view_volume_start, view_volume_size, viewport_dirty_reg); | ||
| 174 | |||
| 175 | // Viewport transformation | ||
| 176 | constexpr u32 viewport_trans_start = MAXWELL3D_REG_INDEX(viewport_transform); | ||
| 177 | constexpr u32 viewport_trans_size = sizeof(regs.viewport_transform) / sizeof(u32); | ||
| 178 | set_block(viewport_trans_start, viewport_trans_size, DIRTY_REGS_POS(viewport_transform)); | ||
| 179 | |||
| 180 | // Cullmode | ||
| 181 | constexpr u32 cull_mode_start = MAXWELL3D_REG_INDEX(cull); | ||
| 182 | constexpr u32 cull_mode_size = sizeof(regs.cull) / sizeof(u32); | ||
| 183 | set_block(cull_mode_start, cull_mode_size, DIRTY_REGS_POS(cull_mode)); | ||
| 184 | |||
| 185 | // Screen y control | ||
| 186 | dirty_pointers[MAXWELL3D_REG_INDEX(screen_y_control)] = DIRTY_REGS_POS(screen_y_control); | ||
| 187 | |||
| 188 | // Primitive Restart | ||
| 189 | constexpr u32 primitive_restart_start = MAXWELL3D_REG_INDEX(primitive_restart); | ||
| 190 | constexpr u32 primitive_restart_size = sizeof(regs.primitive_restart) / sizeof(u32); | ||
| 191 | set_block(primitive_restart_start, primitive_restart_size, DIRTY_REGS_POS(primitive_restart)); | ||
| 192 | |||
| 193 | // Depth Test | ||
| 194 | constexpr u32 depth_test_dirty_reg = DIRTY_REGS_POS(depth_test); | ||
| 195 | dirty_pointers[MAXWELL3D_REG_INDEX(depth_test_enable)] = depth_test_dirty_reg; | ||
| 196 | dirty_pointers[MAXWELL3D_REG_INDEX(depth_write_enabled)] = depth_test_dirty_reg; | ||
| 197 | dirty_pointers[MAXWELL3D_REG_INDEX(depth_test_func)] = depth_test_dirty_reg; | ||
| 198 | |||
| 199 | // Stencil Test | ||
| 200 | constexpr u32 stencil_test_dirty_reg = DIRTY_REGS_POS(stencil_test); | ||
| 201 | dirty_pointers[MAXWELL3D_REG_INDEX(stencil_enable)] = stencil_test_dirty_reg; | ||
| 202 | dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_func_func)] = stencil_test_dirty_reg; | ||
| 203 | dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_func_ref)] = stencil_test_dirty_reg; | ||
| 204 | dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_func_mask)] = stencil_test_dirty_reg; | ||
| 205 | dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_op_fail)] = stencil_test_dirty_reg; | ||
| 206 | dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_op_zfail)] = stencil_test_dirty_reg; | ||
| 207 | dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_op_zpass)] = stencil_test_dirty_reg; | ||
| 208 | dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_mask)] = stencil_test_dirty_reg; | ||
| 209 | dirty_pointers[MAXWELL3D_REG_INDEX(stencil_two_side_enable)] = stencil_test_dirty_reg; | ||
| 210 | dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_func_func)] = stencil_test_dirty_reg; | ||
| 211 | dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_func_ref)] = stencil_test_dirty_reg; | ||
| 212 | dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_func_mask)] = stencil_test_dirty_reg; | ||
| 213 | dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_op_fail)] = stencil_test_dirty_reg; | ||
| 214 | dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_op_zfail)] = stencil_test_dirty_reg; | ||
| 215 | dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_op_zpass)] = stencil_test_dirty_reg; | ||
| 216 | dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_mask)] = stencil_test_dirty_reg; | ||
| 217 | |||
| 218 | // Color Mask | ||
| 219 | constexpr u32 color_mask_dirty_reg = DIRTY_REGS_POS(color_mask); | ||
| 220 | dirty_pointers[MAXWELL3D_REG_INDEX(color_mask_common)] = color_mask_dirty_reg; | ||
| 221 | set_block(MAXWELL3D_REG_INDEX(color_mask), sizeof(regs.color_mask) / sizeof(u32), | ||
| 222 | color_mask_dirty_reg); | ||
| 223 | // Blend State | ||
| 224 | constexpr u32 blend_state_dirty_reg = DIRTY_REGS_POS(blend_state); | ||
| 225 | set_block(MAXWELL3D_REG_INDEX(blend_color), sizeof(regs.blend_color) / sizeof(u32), | ||
| 226 | blend_state_dirty_reg); | ||
| 227 | dirty_pointers[MAXWELL3D_REG_INDEX(independent_blend_enable)] = blend_state_dirty_reg; | ||
| 228 | set_block(MAXWELL3D_REG_INDEX(blend), sizeof(regs.blend) / sizeof(u32), blend_state_dirty_reg); | ||
| 229 | set_block(MAXWELL3D_REG_INDEX(independent_blend), sizeof(regs.independent_blend) / sizeof(u32), | ||
| 230 | blend_state_dirty_reg); | ||
| 231 | |||
| 232 | // Scissor State | ||
| 233 | constexpr u32 scissor_test_dirty_reg = DIRTY_REGS_POS(scissor_test); | ||
| 234 | set_block(MAXWELL3D_REG_INDEX(scissor_test), sizeof(regs.scissor_test) / sizeof(u32), | ||
| 235 | scissor_test_dirty_reg); | ||
| 236 | |||
| 237 | // Polygon Offset | ||
| 238 | constexpr u32 polygon_offset_dirty_reg = DIRTY_REGS_POS(polygon_offset); | ||
| 239 | dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_fill_enable)] = polygon_offset_dirty_reg; | ||
| 240 | dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_line_enable)] = polygon_offset_dirty_reg; | ||
| 241 | dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_point_enable)] = polygon_offset_dirty_reg; | ||
| 242 | dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_units)] = polygon_offset_dirty_reg; | ||
| 243 | dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_factor)] = polygon_offset_dirty_reg; | ||
| 244 | dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_clamp)] = polygon_offset_dirty_reg; | ||
| 245 | } | ||
| 246 | |||
| 89 | void Maxwell3D::CallMacroMethod(u32 method, std::vector<u32> parameters) { | 247 | void Maxwell3D::CallMacroMethod(u32 method, std::vector<u32> parameters) { |
| 90 | // Reset the current macro. | 248 | // Reset the current macro. |
| 91 | executing_macro = 0; | 249 | executing_macro = 0; |
| @@ -108,6 +266,14 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) { | |||
| 108 | 266 | ||
| 109 | const u32 method = method_call.method; | 267 | const u32 method = method_call.method; |
| 110 | 268 | ||
| 269 | if (method == cb_data_state.current) { | ||
| 270 | regs.reg_array[method] = method_call.argument; | ||
| 271 | ProcessCBData(method_call.argument); | ||
| 272 | return; | ||
| 273 | } else if (cb_data_state.current != null_cb_data) { | ||
| 274 | FinishCBData(); | ||
| 275 | } | ||
| 276 | |||
| 111 | // It is an error to write to a register other than the current macro's ARG register before it | 277 | // It is an error to write to a register other than the current macro's ARG register before it |
| 112 | // has finished execution. | 278 | // has finished execution. |
| 113 | if (executing_macro != 0) { | 279 | if (executing_macro != 0) { |
| @@ -143,49 +309,19 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) { | |||
| 143 | 309 | ||
| 144 | if (regs.reg_array[method] != method_call.argument) { | 310 | if (regs.reg_array[method] != method_call.argument) { |
| 145 | regs.reg_array[method] = method_call.argument; | 311 | regs.reg_array[method] = method_call.argument; |
| 146 | // Color buffers | 312 | const std::size_t dirty_reg = dirty_pointers[method]; |
| 147 | constexpr u32 first_rt_reg = MAXWELL3D_REG_INDEX(rt); | 313 | if (dirty_reg) { |
| 148 | constexpr u32 registers_per_rt = sizeof(regs.rt[0]) / sizeof(u32); | 314 | dirty.regs[dirty_reg] = true; |
| 149 | if (method >= first_rt_reg && | 315 | if (dirty_reg >= DIRTY_REGS_POS(vertex_array) && |
| 150 | method < first_rt_reg + registers_per_rt * Regs::NumRenderTargets) { | 316 | dirty_reg < DIRTY_REGS_POS(vertex_array_buffers)) { |
| 151 | const std::size_t rt_index = (method - first_rt_reg) / registers_per_rt; | 317 | dirty.vertex_array_buffers = true; |
| 152 | dirty_flags.color_buffer.set(rt_index); | 318 | } else if (dirty_reg >= DIRTY_REGS_POS(vertex_instance) && |
| 153 | } | 319 | dirty_reg < DIRTY_REGS_POS(vertex_instances)) { |
| 154 | 320 | dirty.vertex_instances = true; | |
| 155 | // Zeta buffer | 321 | } else if (dirty_reg >= DIRTY_REGS_POS(render_target) && |
| 156 | constexpr u32 registers_in_zeta = sizeof(regs.zeta) / sizeof(u32); | 322 | dirty_reg < DIRTY_REGS_POS(render_settings)) { |
| 157 | if (method == MAXWELL3D_REG_INDEX(zeta_enable) || | 323 | dirty.render_settings = true; |
| 158 | method == MAXWELL3D_REG_INDEX(zeta_width) || | 324 | } |
| 159 | method == MAXWELL3D_REG_INDEX(zeta_height) || | ||
| 160 | (method >= MAXWELL3D_REG_INDEX(zeta) && | ||
| 161 | method < MAXWELL3D_REG_INDEX(zeta) + registers_in_zeta)) { | ||
| 162 | dirty_flags.zeta_buffer = true; | ||
| 163 | } | ||
| 164 | |||
| 165 | // Shader | ||
| 166 | constexpr u32 shader_registers_count = | ||
| 167 | sizeof(regs.shader_config[0]) * Regs::MaxShaderProgram / sizeof(u32); | ||
| 168 | if (method >= MAXWELL3D_REG_INDEX(shader_config[0]) && | ||
| 169 | method < MAXWELL3D_REG_INDEX(shader_config[0]) + shader_registers_count) { | ||
| 170 | dirty_flags.shaders = true; | ||
| 171 | } | ||
| 172 | |||
| 173 | // Vertex format | ||
| 174 | if (method >= MAXWELL3D_REG_INDEX(vertex_attrib_format) && | ||
| 175 | method < MAXWELL3D_REG_INDEX(vertex_attrib_format) + regs.vertex_attrib_format.size()) { | ||
| 176 | dirty_flags.vertex_attrib_format = true; | ||
| 177 | } | ||
| 178 | |||
| 179 | // Vertex buffer | ||
| 180 | if (method >= MAXWELL3D_REG_INDEX(vertex_array) && | ||
| 181 | method < MAXWELL3D_REG_INDEX(vertex_array) + 4 * Regs::NumVertexArrays) { | ||
| 182 | dirty_flags.vertex_array.set((method - MAXWELL3D_REG_INDEX(vertex_array)) >> 2); | ||
| 183 | } else if (method >= MAXWELL3D_REG_INDEX(vertex_array_limit) && | ||
| 184 | method < MAXWELL3D_REG_INDEX(vertex_array_limit) + 2 * Regs::NumVertexArrays) { | ||
| 185 | dirty_flags.vertex_array.set((method - MAXWELL3D_REG_INDEX(vertex_array_limit)) >> 1); | ||
| 186 | } else if (method >= MAXWELL3D_REG_INDEX(instanced_arrays) && | ||
| 187 | method < MAXWELL3D_REG_INDEX(instanced_arrays) + Regs::NumVertexArrays) { | ||
| 188 | dirty_flags.vertex_array.set(method - MAXWELL3D_REG_INDEX(instanced_arrays)); | ||
| 189 | } | 325 | } |
| 190 | } | 326 | } |
| 191 | 327 | ||
| @@ -214,7 +350,7 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) { | |||
| 214 | case MAXWELL3D_REG_INDEX(const_buffer.cb_data[13]): | 350 | case MAXWELL3D_REG_INDEX(const_buffer.cb_data[13]): |
| 215 | case MAXWELL3D_REG_INDEX(const_buffer.cb_data[14]): | 351 | case MAXWELL3D_REG_INDEX(const_buffer.cb_data[14]): |
| 216 | case MAXWELL3D_REG_INDEX(const_buffer.cb_data[15]): { | 352 | case MAXWELL3D_REG_INDEX(const_buffer.cb_data[15]): { |
| 217 | ProcessCBData(method_call.argument); | 353 | StartCBData(method); |
| 218 | break; | 354 | break; |
| 219 | } | 355 | } |
| 220 | case MAXWELL3D_REG_INDEX(cb_bind[0].raw_config): { | 356 | case MAXWELL3D_REG_INDEX(cb_bind[0].raw_config): { |
| @@ -249,6 +385,10 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) { | |||
| 249 | ProcessQueryGet(); | 385 | ProcessQueryGet(); |
| 250 | break; | 386 | break; |
| 251 | } | 387 | } |
| 388 | case MAXWELL3D_REG_INDEX(condition.mode): { | ||
| 389 | ProcessQueryCondition(); | ||
| 390 | break; | ||
| 391 | } | ||
| 252 | case MAXWELL3D_REG_INDEX(sync_info): { | 392 | case MAXWELL3D_REG_INDEX(sync_info): { |
| 253 | ProcessSyncPoint(); | 393 | ProcessSyncPoint(); |
| 254 | break; | 394 | break; |
| @@ -261,7 +401,7 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) { | |||
| 261 | const bool is_last_call = method_call.IsLastCall(); | 401 | const bool is_last_call = method_call.IsLastCall(); |
| 262 | upload_state.ProcessData(method_call.argument, is_last_call); | 402 | upload_state.ProcessData(method_call.argument, is_last_call); |
| 263 | if (is_last_call) { | 403 | if (is_last_call) { |
| 264 | dirty_flags.OnMemoryWrite(); | 404 | dirty.OnMemoryWrite(); |
| 265 | } | 405 | } |
| 266 | break; | 406 | break; |
| 267 | } | 407 | } |
| @@ -302,6 +442,7 @@ void Maxwell3D::ProcessQueryGet() { | |||
| 302 | result = regs.query.query_sequence; | 442 | result = regs.query.query_sequence; |
| 303 | break; | 443 | break; |
| 304 | default: | 444 | default: |
| 445 | result = 1; | ||
| 305 | UNIMPLEMENTED_MSG("Unimplemented query select type {}", | 446 | UNIMPLEMENTED_MSG("Unimplemented query select type {}", |
| 306 | static_cast<u32>(regs.query.query_get.select.Value())); | 447 | static_cast<u32>(regs.query.query_get.select.Value())); |
| 307 | } | 448 | } |
| @@ -333,7 +474,6 @@ void Maxwell3D::ProcessQueryGet() { | |||
| 333 | query_result.timestamp = system.CoreTiming().GetTicks(); | 474 | query_result.timestamp = system.CoreTiming().GetTicks(); |
| 334 | memory_manager.WriteBlock(sequence_address, &query_result, sizeof(query_result)); | 475 | memory_manager.WriteBlock(sequence_address, &query_result, sizeof(query_result)); |
| 335 | } | 476 | } |
| 336 | dirty_flags.OnMemoryWrite(); | ||
| 337 | break; | 477 | break; |
| 338 | } | 478 | } |
| 339 | default: | 479 | default: |
| @@ -342,6 +482,45 @@ void Maxwell3D::ProcessQueryGet() { | |||
| 342 | } | 482 | } |
| 343 | } | 483 | } |
| 344 | 484 | ||
| 485 | void Maxwell3D::ProcessQueryCondition() { | ||
| 486 | const GPUVAddr condition_address{regs.condition.Address()}; | ||
| 487 | switch (regs.condition.mode) { | ||
| 488 | case Regs::ConditionMode::Always: { | ||
| 489 | execute_on = true; | ||
| 490 | break; | ||
| 491 | } | ||
| 492 | case Regs::ConditionMode::Never: { | ||
| 493 | execute_on = false; | ||
| 494 | break; | ||
| 495 | } | ||
| 496 | case Regs::ConditionMode::ResNonZero: { | ||
| 497 | Regs::QueryCompare cmp; | ||
| 498 | memory_manager.ReadBlockUnsafe(condition_address, &cmp, sizeof(cmp)); | ||
| 499 | execute_on = cmp.initial_sequence != 0U && cmp.initial_mode != 0U; | ||
| 500 | break; | ||
| 501 | } | ||
| 502 | case Regs::ConditionMode::Equal: { | ||
| 503 | Regs::QueryCompare cmp; | ||
| 504 | memory_manager.ReadBlockUnsafe(condition_address, &cmp, sizeof(cmp)); | ||
| 505 | execute_on = | ||
| 506 | cmp.initial_sequence == cmp.current_sequence && cmp.initial_mode == cmp.current_mode; | ||
| 507 | break; | ||
| 508 | } | ||
| 509 | case Regs::ConditionMode::NotEqual: { | ||
| 510 | Regs::QueryCompare cmp; | ||
| 511 | memory_manager.ReadBlockUnsafe(condition_address, &cmp, sizeof(cmp)); | ||
| 512 | execute_on = | ||
| 513 | cmp.initial_sequence != cmp.current_sequence || cmp.initial_mode != cmp.current_mode; | ||
| 514 | break; | ||
| 515 | } | ||
| 516 | default: { | ||
| 517 | UNIMPLEMENTED_MSG("Uninplemented Condition Mode!"); | ||
| 518 | execute_on = true; | ||
| 519 | break; | ||
| 520 | } | ||
| 521 | } | ||
| 522 | } | ||
| 523 | |||
| 345 | void Maxwell3D::ProcessSyncPoint() { | 524 | void Maxwell3D::ProcessSyncPoint() { |
| 346 | const u32 sync_point = regs.sync_info.sync_point.Value(); | 525 | const u32 sync_point = regs.sync_info.sync_point.Value(); |
| 347 | const u32 increment = regs.sync_info.increment.Value(); | 526 | const u32 increment = regs.sync_info.increment.Value(); |
| @@ -406,23 +585,39 @@ void Maxwell3D::ProcessCBBind(Regs::ShaderStage stage) { | |||
| 406 | } | 585 | } |
| 407 | 586 | ||
| 408 | void Maxwell3D::ProcessCBData(u32 value) { | 587 | void Maxwell3D::ProcessCBData(u32 value) { |
| 588 | const u32 id = cb_data_state.id; | ||
| 589 | cb_data_state.buffer[id][cb_data_state.counter] = value; | ||
| 590 | // Increment the current buffer position. | ||
| 591 | regs.const_buffer.cb_pos = regs.const_buffer.cb_pos + 4; | ||
| 592 | cb_data_state.counter++; | ||
| 593 | } | ||
| 594 | |||
| 595 | void Maxwell3D::StartCBData(u32 method) { | ||
| 596 | constexpr u32 first_cb_data = MAXWELL3D_REG_INDEX(const_buffer.cb_data[0]); | ||
| 597 | cb_data_state.start_pos = regs.const_buffer.cb_pos; | ||
| 598 | cb_data_state.id = method - first_cb_data; | ||
| 599 | cb_data_state.current = method; | ||
| 600 | cb_data_state.counter = 0; | ||
| 601 | ProcessCBData(regs.const_buffer.cb_data[cb_data_state.id]); | ||
| 602 | } | ||
| 603 | |||
| 604 | void Maxwell3D::FinishCBData() { | ||
| 409 | // Write the input value to the current const buffer at the current position. | 605 | // Write the input value to the current const buffer at the current position. |
| 410 | const GPUVAddr buffer_address = regs.const_buffer.BufferAddress(); | 606 | const GPUVAddr buffer_address = regs.const_buffer.BufferAddress(); |
| 411 | ASSERT(buffer_address != 0); | 607 | ASSERT(buffer_address != 0); |
| 412 | 608 | ||
| 413 | // Don't allow writing past the end of the buffer. | 609 | // Don't allow writing past the end of the buffer. |
| 414 | ASSERT(regs.const_buffer.cb_pos + sizeof(u32) <= regs.const_buffer.cb_size); | 610 | ASSERT(regs.const_buffer.cb_pos <= regs.const_buffer.cb_size); |
| 415 | |||
| 416 | const GPUVAddr address{buffer_address + regs.const_buffer.cb_pos}; | ||
| 417 | 611 | ||
| 418 | u8* ptr{memory_manager.GetPointer(address)}; | 612 | const GPUVAddr address{buffer_address + cb_data_state.start_pos}; |
| 419 | rasterizer.InvalidateRegion(ToCacheAddr(ptr), sizeof(u32)); | 613 | const std::size_t size = regs.const_buffer.cb_pos - cb_data_state.start_pos; |
| 420 | memory_manager.Write<u32>(address, value); | ||
| 421 | 614 | ||
| 422 | dirty_flags.OnMemoryWrite(); | 615 | const u32 id = cb_data_state.id; |
| 616 | memory_manager.WriteBlock(address, cb_data_state.buffer[id].data(), size); | ||
| 617 | dirty.OnMemoryWrite(); | ||
| 423 | 618 | ||
| 424 | // Increment the current buffer position. | 619 | cb_data_state.id = null_cb_data; |
| 425 | regs.const_buffer.cb_pos = regs.const_buffer.cb_pos + 4; | 620 | cb_data_state.current = null_cb_data; |
| 426 | } | 621 | } |
| 427 | 622 | ||
| 428 | Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const { | 623 | Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const { |
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 13e314944..1ee982b76 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h | |||
| @@ -67,6 +67,7 @@ public: | |||
| 67 | static constexpr std::size_t MaxShaderStage = 5; | 67 | static constexpr std::size_t MaxShaderStage = 5; |
| 68 | // Maximum number of const buffers per shader stage. | 68 | // Maximum number of const buffers per shader stage. |
| 69 | static constexpr std::size_t MaxConstBuffers = 18; | 69 | static constexpr std::size_t MaxConstBuffers = 18; |
| 70 | static constexpr std::size_t MaxConstBufferSize = 0x10000; | ||
| 70 | 71 | ||
| 71 | enum class QueryMode : u32 { | 72 | enum class QueryMode : u32 { |
| 72 | Write = 0, | 73 | Write = 0, |
| @@ -89,6 +90,20 @@ public: | |||
| 89 | 90 | ||
| 90 | enum class QuerySelect : u32 { | 91 | enum class QuerySelect : u32 { |
| 91 | Zero = 0, | 92 | Zero = 0, |
| 93 | TimeElapsed = 2, | ||
| 94 | TransformFeedbackPrimitivesGenerated = 11, | ||
| 95 | PrimitivesGenerated = 18, | ||
| 96 | SamplesPassed = 21, | ||
| 97 | TransformFeedbackUnknown = 26, | ||
| 98 | }; | ||
| 99 | |||
| 100 | struct QueryCompare { | ||
| 101 | u32 initial_sequence; | ||
| 102 | u32 initial_mode; | ||
| 103 | u32 unknown1; | ||
| 104 | u32 unknown2; | ||
| 105 | u32 current_sequence; | ||
| 106 | u32 current_mode; | ||
| 92 | }; | 107 | }; |
| 93 | 108 | ||
| 94 | enum class QuerySyncCondition : u32 { | 109 | enum class QuerySyncCondition : u32 { |
| @@ -96,6 +111,14 @@ public: | |||
| 96 | GreaterThan = 1, | 111 | GreaterThan = 1, |
| 97 | }; | 112 | }; |
| 98 | 113 | ||
| 114 | enum class ConditionMode : u32 { | ||
| 115 | Never = 0, | ||
| 116 | Always = 1, | ||
| 117 | ResNonZero = 2, | ||
| 118 | Equal = 3, | ||
| 119 | NotEqual = 4, | ||
| 120 | }; | ||
| 121 | |||
| 99 | enum class ShaderProgram : u32 { | 122 | enum class ShaderProgram : u32 { |
| 100 | VertexA = 0, | 123 | VertexA = 0, |
| 101 | VertexB = 1, | 124 | VertexB = 1, |
| @@ -814,7 +837,18 @@ public: | |||
| 814 | BitField<4, 1, u32> alpha_to_one; | 837 | BitField<4, 1, u32> alpha_to_one; |
| 815 | } multisample_control; | 838 | } multisample_control; |
| 816 | 839 | ||
| 817 | INSERT_PADDING_WORDS(0x7); | 840 | INSERT_PADDING_WORDS(0x4); |
| 841 | |||
| 842 | struct { | ||
| 843 | u32 address_high; | ||
| 844 | u32 address_low; | ||
| 845 | ConditionMode mode; | ||
| 846 | |||
| 847 | GPUVAddr Address() const { | ||
| 848 | return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | | ||
| 849 | address_low); | ||
| 850 | } | ||
| 851 | } condition; | ||
| 818 | 852 | ||
| 819 | struct { | 853 | struct { |
| 820 | u32 tsc_address_high; | 854 | u32 tsc_address_high; |
| @@ -1123,23 +1157,77 @@ public: | |||
| 1123 | 1157 | ||
| 1124 | State state{}; | 1158 | State state{}; |
| 1125 | 1159 | ||
| 1126 | struct DirtyFlags { | 1160 | struct DirtyRegs { |
| 1127 | std::bitset<8> color_buffer{0xFF}; | 1161 | static constexpr std::size_t NUM_REGS = 256; |
| 1128 | std::bitset<32> vertex_array{0xFFFFFFFF}; | 1162 | union { |
| 1163 | struct { | ||
| 1164 | bool null_dirty; | ||
| 1165 | |||
| 1166 | // Vertex Attributes | ||
| 1167 | bool vertex_attrib_format; | ||
| 1168 | |||
| 1169 | // Vertex Arrays | ||
| 1170 | std::array<bool, 32> vertex_array; | ||
| 1171 | |||
| 1172 | bool vertex_array_buffers; | ||
| 1173 | |||
| 1174 | // Vertex Instances | ||
| 1175 | std::array<bool, 32> vertex_instance; | ||
| 1129 | 1176 | ||
| 1130 | bool vertex_attrib_format = true; | 1177 | bool vertex_instances; |
| 1131 | bool zeta_buffer = true; | 1178 | |
| 1132 | bool shaders = true; | 1179 | // Render Targets |
| 1180 | std::array<bool, 8> render_target; | ||
| 1181 | bool depth_buffer; | ||
| 1182 | |||
| 1183 | bool render_settings; | ||
| 1184 | |||
| 1185 | // Shaders | ||
| 1186 | bool shaders; | ||
| 1187 | |||
| 1188 | // Rasterizer State | ||
| 1189 | bool viewport; | ||
| 1190 | bool clip_coefficient; | ||
| 1191 | bool cull_mode; | ||
| 1192 | bool primitive_restart; | ||
| 1193 | bool depth_test; | ||
| 1194 | bool stencil_test; | ||
| 1195 | bool blend_state; | ||
| 1196 | bool scissor_test; | ||
| 1197 | bool transform_feedback; | ||
| 1198 | bool color_mask; | ||
| 1199 | bool polygon_offset; | ||
| 1200 | |||
| 1201 | // Complementary | ||
| 1202 | bool viewport_transform; | ||
| 1203 | bool screen_y_control; | ||
| 1204 | |||
| 1205 | bool memory_general; | ||
| 1206 | }; | ||
| 1207 | std::array<bool, NUM_REGS> regs; | ||
| 1208 | }; | ||
| 1209 | |||
| 1210 | void ResetVertexArrays() { | ||
| 1211 | vertex_array.fill(true); | ||
| 1212 | vertex_array_buffers = true; | ||
| 1213 | } | ||
| 1214 | |||
| 1215 | void ResetRenderTargets() { | ||
| 1216 | depth_buffer = true; | ||
| 1217 | render_target.fill(true); | ||
| 1218 | render_settings = true; | ||
| 1219 | } | ||
| 1133 | 1220 | ||
| 1134 | void OnMemoryWrite() { | 1221 | void OnMemoryWrite() { |
| 1135 | zeta_buffer = true; | ||
| 1136 | shaders = true; | 1222 | shaders = true; |
| 1137 | color_buffer.set(); | 1223 | memory_general = true; |
| 1138 | vertex_array.set(); | 1224 | ResetRenderTargets(); |
| 1225 | ResetVertexArrays(); | ||
| 1139 | } | 1226 | } |
| 1140 | }; | ||
| 1141 | 1227 | ||
| 1142 | DirtyFlags dirty_flags; | 1228 | } dirty{}; |
| 1229 | |||
| 1230 | std::array<u8, Regs::NUM_REGS> dirty_pointers{}; | ||
| 1143 | 1231 | ||
| 1144 | /// Reads a register value located at the input method address | 1232 | /// Reads a register value located at the input method address |
| 1145 | u32 GetRegisterValue(u32 method) const; | 1233 | u32 GetRegisterValue(u32 method) const; |
| @@ -1168,6 +1256,10 @@ public: | |||
| 1168 | return macro_memory; | 1256 | return macro_memory; |
| 1169 | } | 1257 | } |
| 1170 | 1258 | ||
| 1259 | bool ShouldExecute() const { | ||
| 1260 | return execute_on; | ||
| 1261 | } | ||
| 1262 | |||
| 1171 | private: | 1263 | private: |
| 1172 | void InitializeRegisterDefaults(); | 1264 | void InitializeRegisterDefaults(); |
| 1173 | 1265 | ||
| @@ -1191,14 +1283,27 @@ private: | |||
| 1191 | /// Interpreter for the macro codes uploaded to the GPU. | 1283 | /// Interpreter for the macro codes uploaded to the GPU. |
| 1192 | MacroInterpreter macro_interpreter; | 1284 | MacroInterpreter macro_interpreter; |
| 1193 | 1285 | ||
| 1286 | static constexpr u32 null_cb_data = 0xFFFFFFFF; | ||
| 1287 | struct { | ||
| 1288 | std::array<std::array<u32, 0x4000>, 16> buffer; | ||
| 1289 | u32 current{null_cb_data}; | ||
| 1290 | u32 id{null_cb_data}; | ||
| 1291 | u32 start_pos{}; | ||
| 1292 | u32 counter{}; | ||
| 1293 | } cb_data_state; | ||
| 1294 | |||
| 1194 | Upload::State upload_state; | 1295 | Upload::State upload_state; |
| 1195 | 1296 | ||
| 1297 | bool execute_on{true}; | ||
| 1298 | |||
| 1196 | /// Retrieves information about a specific TIC entry from the TIC buffer. | 1299 | /// Retrieves information about a specific TIC entry from the TIC buffer. |
| 1197 | Texture::TICEntry GetTICEntry(u32 tic_index) const; | 1300 | Texture::TICEntry GetTICEntry(u32 tic_index) const; |
| 1198 | 1301 | ||
| 1199 | /// Retrieves information about a specific TSC entry from the TSC buffer. | 1302 | /// Retrieves information about a specific TSC entry from the TSC buffer. |
| 1200 | Texture::TSCEntry GetTSCEntry(u32 tsc_index) const; | 1303 | Texture::TSCEntry GetTSCEntry(u32 tsc_index) const; |
| 1201 | 1304 | ||
| 1305 | void InitDirtySettings(); | ||
| 1306 | |||
| 1202 | /** | 1307 | /** |
| 1203 | * Call a macro on this engine. | 1308 | * Call a macro on this engine. |
| 1204 | * @param method Method to call | 1309 | * @param method Method to call |
| @@ -1218,11 +1323,16 @@ private: | |||
| 1218 | /// Handles a write to the QUERY_GET register. | 1323 | /// Handles a write to the QUERY_GET register. |
| 1219 | void ProcessQueryGet(); | 1324 | void ProcessQueryGet(); |
| 1220 | 1325 | ||
| 1326 | // Handles Conditional Rendering | ||
| 1327 | void ProcessQueryCondition(); | ||
| 1328 | |||
| 1221 | /// Handles writes to syncing register. | 1329 | /// Handles writes to syncing register. |
| 1222 | void ProcessSyncPoint(); | 1330 | void ProcessSyncPoint(); |
| 1223 | 1331 | ||
| 1224 | /// Handles a write to the CB_DATA[i] register. | 1332 | /// Handles a write to the CB_DATA[i] register. |
| 1333 | void StartCBData(u32 method); | ||
| 1225 | void ProcessCBData(u32 value); | 1334 | void ProcessCBData(u32 value); |
| 1335 | void FinishCBData(); | ||
| 1226 | 1336 | ||
| 1227 | /// Handles a write to the CB_BIND register. | 1337 | /// Handles a write to the CB_BIND register. |
| 1228 | void ProcessCBBind(Regs::ShaderStage stage); | 1338 | void ProcessCBBind(Regs::ShaderStage stage); |
| @@ -1289,6 +1399,7 @@ ASSERT_REG_POSITION(clip_distance_enabled, 0x544); | |||
| 1289 | ASSERT_REG_POSITION(point_size, 0x546); | 1399 | ASSERT_REG_POSITION(point_size, 0x546); |
| 1290 | ASSERT_REG_POSITION(zeta_enable, 0x54E); | 1400 | ASSERT_REG_POSITION(zeta_enable, 0x54E); |
| 1291 | ASSERT_REG_POSITION(multisample_control, 0x54F); | 1401 | ASSERT_REG_POSITION(multisample_control, 0x54F); |
| 1402 | ASSERT_REG_POSITION(condition, 0x554); | ||
| 1292 | ASSERT_REG_POSITION(tsc, 0x557); | 1403 | ASSERT_REG_POSITION(tsc, 0x557); |
| 1293 | ASSERT_REG_POSITION(polygon_offset_factor, 0x55b); | 1404 | ASSERT_REG_POSITION(polygon_offset_factor, 0x55b); |
| 1294 | ASSERT_REG_POSITION(tic, 0x55D); | 1405 | ASSERT_REG_POSITION(tic, 0x55D); |
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index afb9578d0..a28c04473 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp | |||
| @@ -38,7 +38,7 @@ void MaxwellDMA::CallMethod(const GPU::MethodCall& method_call) { | |||
| 38 | } | 38 | } |
| 39 | 39 | ||
| 40 | void MaxwellDMA::HandleCopy() { | 40 | void MaxwellDMA::HandleCopy() { |
| 41 | LOG_WARNING(HW_GPU, "Requested a DMA copy"); | 41 | LOG_TRACE(HW_GPU, "Requested a DMA copy"); |
| 42 | 42 | ||
| 43 | const GPUVAddr source = regs.src_address.Address(); | 43 | const GPUVAddr source = regs.src_address.Address(); |
| 44 | const GPUVAddr dest = regs.dst_address.Address(); | 44 | const GPUVAddr dest = regs.dst_address.Address(); |
| @@ -58,7 +58,7 @@ void MaxwellDMA::HandleCopy() { | |||
| 58 | } | 58 | } |
| 59 | 59 | ||
| 60 | // All copies here update the main memory, so mark all rasterizer states as invalid. | 60 | // All copies here update the main memory, so mark all rasterizer states as invalid. |
| 61 | system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); | 61 | system.GPU().Maxwell3D().dirty.OnMemoryWrite(); |
| 62 | 62 | ||
| 63 | if (regs.exec.is_dst_linear && regs.exec.is_src_linear) { | 63 | if (regs.exec.is_dst_linear && regs.exec.is_src_linear) { |
| 64 | // When the enable_2d bit is disabled, the copy is performed as if we were copying a 1D | 64 | // When the enable_2d bit is disabled, the copy is performed as if we were copying a 1D |
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index 404d4f5aa..8520a0143 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h | |||
| @@ -78,7 +78,7 @@ union Attribute { | |||
| 78 | constexpr explicit Attribute(u64 value) : value(value) {} | 78 | constexpr explicit Attribute(u64 value) : value(value) {} |
| 79 | 79 | ||
| 80 | enum class Index : u64 { | 80 | enum class Index : u64 { |
| 81 | PointSize = 6, | 81 | LayerViewportPointSize = 6, |
| 82 | Position = 7, | 82 | Position = 7, |
| 83 | Attribute_0 = 8, | 83 | Attribute_0 = 8, |
| 84 | Attribute_31 = 39, | 84 | Attribute_31 = 39, |
| @@ -931,8 +931,6 @@ union Instruction { | |||
| 931 | } csetp; | 931 | } csetp; |
| 932 | 932 | ||
| 933 | union { | 933 | union { |
| 934 | BitField<35, 4, PredCondition> cond; | ||
| 935 | BitField<49, 1, u64> h_and; | ||
| 936 | BitField<6, 1, u64> ftz; | 934 | BitField<6, 1, u64> ftz; |
| 937 | BitField<45, 2, PredOperation> op; | 935 | BitField<45, 2, PredOperation> op; |
| 938 | BitField<3, 3, u64> pred3; | 936 | BitField<3, 3, u64> pred3; |
| @@ -940,9 +938,21 @@ union Instruction { | |||
| 940 | BitField<43, 1, u64> negate_a; | 938 | BitField<43, 1, u64> negate_a; |
| 941 | BitField<44, 1, u64> abs_a; | 939 | BitField<44, 1, u64> abs_a; |
| 942 | BitField<47, 2, HalfType> type_a; | 940 | BitField<47, 2, HalfType> type_a; |
| 943 | BitField<31, 1, u64> negate_b; | 941 | union { |
| 944 | BitField<30, 1, u64> abs_b; | 942 | BitField<35, 4, PredCondition> cond; |
| 945 | BitField<28, 2, HalfType> type_b; | 943 | BitField<49, 1, u64> h_and; |
| 944 | BitField<31, 1, u64> negate_b; | ||
| 945 | BitField<30, 1, u64> abs_b; | ||
| 946 | BitField<28, 2, HalfType> type_b; | ||
| 947 | } reg; | ||
| 948 | union { | ||
| 949 | BitField<56, 1, u64> negate_b; | ||
| 950 | BitField<54, 1, u64> abs_b; | ||
| 951 | } cbuf; | ||
| 952 | union { | ||
| 953 | BitField<49, 4, PredCondition> cond; | ||
| 954 | BitField<53, 1, u64> h_and; | ||
| 955 | } cbuf_and_imm; | ||
| 946 | BitField<42, 1, u64> neg_pred; | 956 | BitField<42, 1, u64> neg_pred; |
| 947 | BitField<39, 3, u64> pred39; | 957 | BitField<39, 3, u64> pred39; |
| 948 | } hsetp2; | 958 | } hsetp2; |
| @@ -1278,6 +1288,7 @@ union Instruction { | |||
| 1278 | union { | 1288 | union { |
| 1279 | BitField<49, 1, u64> nodep_flag; | 1289 | BitField<49, 1, u64> nodep_flag; |
| 1280 | BitField<53, 4, u64> texture_info; | 1290 | BitField<53, 4, u64> texture_info; |
| 1291 | BitField<59, 1, u64> fp32_flag; | ||
| 1281 | 1292 | ||
| 1282 | TextureType GetTextureType() const { | 1293 | TextureType GetTextureType() const { |
| 1283 | // The TLDS instruction has a weird encoding for the texture type. | 1294 | // The TLDS instruction has a weird encoding for the texture type. |
| @@ -1368,6 +1379,20 @@ union Instruction { | |||
| 1368 | } bra; | 1379 | } bra; |
| 1369 | 1380 | ||
| 1370 | union { | 1381 | union { |
| 1382 | BitField<20, 24, u64> target; | ||
| 1383 | BitField<5, 1, u64> constant_buffer; | ||
| 1384 | |||
| 1385 | s32 GetBranchExtend() const { | ||
| 1386 | // Sign extend the branch target offset | ||
| 1387 | u32 mask = 1U << (24 - 1); | ||
| 1388 | u32 value = static_cast<u32>(target); | ||
| 1389 | // The branch offset is relative to the next instruction and is stored in bytes, so | ||
| 1390 | // divide it by the size of an instruction and add 1 to it. | ||
| 1391 | return static_cast<s32>((value ^ mask) - mask) / sizeof(Instruction) + 1; | ||
| 1392 | } | ||
| 1393 | } brx; | ||
| 1394 | |||
| 1395 | union { | ||
| 1371 | BitField<39, 1, u64> emit; // EmitVertex | 1396 | BitField<39, 1, u64> emit; // EmitVertex |
| 1372 | BitField<40, 1, u64> cut; // EndPrimitive | 1397 | BitField<40, 1, u64> cut; // EndPrimitive |
| 1373 | } out; | 1398 | } out; |
| @@ -1464,6 +1489,7 @@ public: | |||
| 1464 | BFE_IMM, | 1489 | BFE_IMM, |
| 1465 | BFI_IMM_R, | 1490 | BFI_IMM_R, |
| 1466 | BRA, | 1491 | BRA, |
| 1492 | BRX, | ||
| 1467 | PBK, | 1493 | PBK, |
| 1468 | LD_A, | 1494 | LD_A, |
| 1469 | LD_L, | 1495 | LD_L, |
| @@ -1532,7 +1558,9 @@ public: | |||
| 1532 | HFMA2_RC, | 1558 | HFMA2_RC, |
| 1533 | HFMA2_RR, | 1559 | HFMA2_RR, |
| 1534 | HFMA2_IMM_R, | 1560 | HFMA2_IMM_R, |
| 1561 | HSETP2_C, | ||
| 1535 | HSETP2_R, | 1562 | HSETP2_R, |
| 1563 | HSETP2_IMM, | ||
| 1536 | HSET2_R, | 1564 | HSET2_R, |
| 1537 | POPC_C, | 1565 | POPC_C, |
| 1538 | POPC_R, | 1566 | POPC_R, |
| @@ -1738,6 +1766,7 @@ private: | |||
| 1738 | INST("111000101001----", Id::SSY, Type::Flow, "SSY"), | 1766 | INST("111000101001----", Id::SSY, Type::Flow, "SSY"), |
| 1739 | INST("111000101010----", Id::PBK, Type::Flow, "PBK"), | 1767 | INST("111000101010----", Id::PBK, Type::Flow, "PBK"), |
| 1740 | INST("111000100100----", Id::BRA, Type::Flow, "BRA"), | 1768 | INST("111000100100----", Id::BRA, Type::Flow, "BRA"), |
| 1769 | INST("111000100101----", Id::BRX, Type::Flow, "BRX"), | ||
| 1741 | INST("1111000011111---", Id::SYNC, Type::Flow, "SYNC"), | 1770 | INST("1111000011111---", Id::SYNC, Type::Flow, "SYNC"), |
| 1742 | INST("111000110100---", Id::BRK, Type::Flow, "BRK"), | 1771 | INST("111000110100---", Id::BRK, Type::Flow, "BRK"), |
| 1743 | INST("111000110000----", Id::EXIT, Type::Flow, "EXIT"), | 1772 | INST("111000110000----", Id::EXIT, Type::Flow, "EXIT"), |
| @@ -1760,7 +1789,7 @@ private: | |||
| 1760 | INST("1101111101010---", Id::TXQ_B, Type::Texture, "TXQ_B"), | 1789 | INST("1101111101010---", Id::TXQ_B, Type::Texture, "TXQ_B"), |
| 1761 | INST("1101-00---------", Id::TEXS, Type::Texture, "TEXS"), | 1790 | INST("1101-00---------", Id::TEXS, Type::Texture, "TEXS"), |
| 1762 | INST("11011100--11----", Id::TLD, Type::Texture, "TLD"), | 1791 | INST("11011100--11----", Id::TLD, Type::Texture, "TLD"), |
| 1763 | INST("1101101---------", Id::TLDS, Type::Texture, "TLDS"), | 1792 | INST("1101-01---------", Id::TLDS, Type::Texture, "TLDS"), |
| 1764 | INST("110010----111---", Id::TLD4, Type::Texture, "TLD4"), | 1793 | INST("110010----111---", Id::TLD4, Type::Texture, "TLD4"), |
| 1765 | INST("1101111100------", Id::TLD4S, Type::Texture, "TLD4S"), | 1794 | INST("1101111100------", Id::TLD4S, Type::Texture, "TLD4S"), |
| 1766 | INST("110111110110----", Id::TMML_B, Type::Texture, "TMML_B"), | 1795 | INST("110111110110----", Id::TMML_B, Type::Texture, "TMML_B"), |
| @@ -1814,7 +1843,9 @@ private: | |||
| 1814 | INST("01100---1-------", Id::HFMA2_RC, Type::Hfma2, "HFMA2_RC"), | 1843 | INST("01100---1-------", Id::HFMA2_RC, Type::Hfma2, "HFMA2_RC"), |
| 1815 | INST("0101110100000---", Id::HFMA2_RR, Type::Hfma2, "HFMA2_RR"), | 1844 | INST("0101110100000---", Id::HFMA2_RR, Type::Hfma2, "HFMA2_RR"), |
| 1816 | INST("01110---0-------", Id::HFMA2_IMM_R, Type::Hfma2, "HFMA2_R_IMM"), | 1845 | INST("01110---0-------", Id::HFMA2_IMM_R, Type::Hfma2, "HFMA2_R_IMM"), |
| 1817 | INST("0101110100100---", Id::HSETP2_R, Type::HalfSetPredicate, "HSETP_R"), | 1846 | INST("0111111-1-------", Id::HSETP2_C, Type::HalfSetPredicate, "HSETP2_C"), |
| 1847 | INST("0101110100100---", Id::HSETP2_R, Type::HalfSetPredicate, "HSETP2_R"), | ||
| 1848 | INST("0111111-0-------", Id::HSETP2_IMM, Type::HalfSetPredicate, "HSETP2_IMM"), | ||
| 1818 | INST("0101110100011---", Id::HSET2_R, Type::HalfSet, "HSET2_R"), | 1849 | INST("0101110100011---", Id::HSET2_R, Type::HalfSet, "HSET2_R"), |
| 1819 | INST("0101000010000---", Id::MUFU, Type::Arithmetic, "MUFU"), | 1850 | INST("0101000010000---", Id::MUFU, Type::Arithmetic, "MUFU"), |
| 1820 | INST("0100110010010---", Id::RRO_C, Type::Arithmetic, "RRO_C"), | 1851 | INST("0100110010010---", Id::RRO_C, Type::Arithmetic, "RRO_C"), |
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index da8c715b6..1622332a4 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp | |||
| @@ -32,7 +32,7 @@ u32 FramebufferConfig::BytesPerPixel(PixelFormat format) { | |||
| 32 | GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer, bool is_async) | 32 | GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer, bool is_async) |
| 33 | : system{system}, renderer{renderer}, is_async{is_async} { | 33 | : system{system}, renderer{renderer}, is_async{is_async} { |
| 34 | auto& rasterizer{renderer.Rasterizer()}; | 34 | auto& rasterizer{renderer.Rasterizer()}; |
| 35 | memory_manager = std::make_unique<Tegra::MemoryManager>(rasterizer); | 35 | memory_manager = std::make_unique<Tegra::MemoryManager>(system, rasterizer); |
| 36 | dma_pusher = std::make_unique<Tegra::DmaPusher>(*this); | 36 | dma_pusher = std::make_unique<Tegra::DmaPusher>(*this); |
| 37 | maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, rasterizer, *memory_manager); | 37 | maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, rasterizer, *memory_manager); |
| 38 | fermi_2d = std::make_unique<Engines::Fermi2D>(rasterizer, *memory_manager); | 38 | fermi_2d = std::make_unique<Engines::Fermi2D>(rasterizer, *memory_manager); |
| @@ -51,6 +51,14 @@ const Engines::Maxwell3D& GPU::Maxwell3D() const { | |||
| 51 | return *maxwell_3d; | 51 | return *maxwell_3d; |
| 52 | } | 52 | } |
| 53 | 53 | ||
| 54 | Engines::KeplerCompute& GPU::KeplerCompute() { | ||
| 55 | return *kepler_compute; | ||
| 56 | } | ||
| 57 | |||
| 58 | const Engines::KeplerCompute& GPU::KeplerCompute() const { | ||
| 59 | return *kepler_compute; | ||
| 60 | } | ||
| 61 | |||
| 54 | MemoryManager& GPU::MemoryManager() { | 62 | MemoryManager& GPU::MemoryManager() { |
| 55 | return *memory_manager; | 63 | return *memory_manager; |
| 56 | } | 64 | } |
| @@ -189,12 +197,12 @@ enum class BufferMethods { | |||
| 189 | NotifyIntr = 0x8, | 197 | NotifyIntr = 0x8, |
| 190 | WrcacheFlush = 0x9, | 198 | WrcacheFlush = 0x9, |
| 191 | Unk28 = 0xA, | 199 | Unk28 = 0xA, |
| 192 | Unk2c = 0xB, | 200 | UnkCacheFlush = 0xB, |
| 193 | RefCnt = 0x14, | 201 | RefCnt = 0x14, |
| 194 | SemaphoreAcquire = 0x1A, | 202 | SemaphoreAcquire = 0x1A, |
| 195 | SemaphoreRelease = 0x1B, | 203 | SemaphoreRelease = 0x1B, |
| 196 | Unk70 = 0x1C, | 204 | FenceValue = 0x1C, |
| 197 | Unk74 = 0x1D, | 205 | FenceAction = 0x1D, |
| 198 | Unk78 = 0x1E, | 206 | Unk78 = 0x1E, |
| 199 | Unk7c = 0x1F, | 207 | Unk7c = 0x1F, |
| 200 | Yield = 0x20, | 208 | Yield = 0x20, |
| @@ -240,6 +248,10 @@ void GPU::CallPullerMethod(const MethodCall& method_call) { | |||
| 240 | case BufferMethods::SemaphoreAddressLow: | 248 | case BufferMethods::SemaphoreAddressLow: |
| 241 | case BufferMethods::SemaphoreSequence: | 249 | case BufferMethods::SemaphoreSequence: |
| 242 | case BufferMethods::RefCnt: | 250 | case BufferMethods::RefCnt: |
| 251 | case BufferMethods::UnkCacheFlush: | ||
| 252 | case BufferMethods::WrcacheFlush: | ||
| 253 | case BufferMethods::FenceValue: | ||
| 254 | case BufferMethods::FenceAction: | ||
| 243 | break; | 255 | break; |
| 244 | case BufferMethods::SemaphoreTrigger: { | 256 | case BufferMethods::SemaphoreTrigger: { |
| 245 | ProcessSemaphoreTriggerMethod(); | 257 | ProcessSemaphoreTriggerMethod(); |
| @@ -250,21 +262,11 @@ void GPU::CallPullerMethod(const MethodCall& method_call) { | |||
| 250 | LOG_ERROR(HW_GPU, "Special puller engine method NotifyIntr not implemented"); | 262 | LOG_ERROR(HW_GPU, "Special puller engine method NotifyIntr not implemented"); |
| 251 | break; | 263 | break; |
| 252 | } | 264 | } |
| 253 | case BufferMethods::WrcacheFlush: { | ||
| 254 | // TODO(Kmather73): Research and implement this method. | ||
| 255 | LOG_ERROR(HW_GPU, "Special puller engine method WrcacheFlush not implemented"); | ||
| 256 | break; | ||
| 257 | } | ||
| 258 | case BufferMethods::Unk28: { | 265 | case BufferMethods::Unk28: { |
| 259 | // TODO(Kmather73): Research and implement this method. | 266 | // TODO(Kmather73): Research and implement this method. |
| 260 | LOG_ERROR(HW_GPU, "Special puller engine method Unk28 not implemented"); | 267 | LOG_ERROR(HW_GPU, "Special puller engine method Unk28 not implemented"); |
| 261 | break; | 268 | break; |
| 262 | } | 269 | } |
| 263 | case BufferMethods::Unk2c: { | ||
| 264 | // TODO(Kmather73): Research and implement this method. | ||
| 265 | LOG_ERROR(HW_GPU, "Special puller engine method Unk2c not implemented"); | ||
| 266 | break; | ||
| 267 | } | ||
| 268 | case BufferMethods::SemaphoreAcquire: { | 270 | case BufferMethods::SemaphoreAcquire: { |
| 269 | ProcessSemaphoreAcquire(); | 271 | ProcessSemaphoreAcquire(); |
| 270 | break; | 272 | break; |
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index 334dec48c..87c96f46b 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h | |||
| @@ -159,6 +159,12 @@ public: | |||
| 159 | /// Returns a const reference to the Maxwell3D GPU engine. | 159 | /// Returns a const reference to the Maxwell3D GPU engine. |
| 160 | const Engines::Maxwell3D& Maxwell3D() const; | 160 | const Engines::Maxwell3D& Maxwell3D() const; |
| 161 | 161 | ||
| 162 | /// Returns a reference to the KeplerCompute GPU engine. | ||
| 163 | Engines::KeplerCompute& KeplerCompute(); | ||
| 164 | |||
| 165 | /// Returns a reference to the KeplerCompute GPU engine. | ||
| 166 | const Engines::KeplerCompute& KeplerCompute() const; | ||
| 167 | |||
| 162 | /// Returns a reference to the GPU memory manager. | 168 | /// Returns a reference to the GPU memory manager. |
| 163 | Tegra::MemoryManager& MemoryManager(); | 169 | Tegra::MemoryManager& MemoryManager(); |
| 164 | 170 | ||
| @@ -214,7 +220,12 @@ public: | |||
| 214 | 220 | ||
| 215 | u32 semaphore_acquire; | 221 | u32 semaphore_acquire; |
| 216 | u32 semaphore_release; | 222 | u32 semaphore_release; |
| 217 | INSERT_PADDING_WORDS(0xE4); | 223 | u32 fence_value; |
| 224 | union { | ||
| 225 | BitField<4, 4, u32> operation; | ||
| 226 | BitField<8, 8, u32> id; | ||
| 227 | } fence_action; | ||
| 228 | INSERT_PADDING_WORDS(0xE2); | ||
| 218 | 229 | ||
| 219 | // Puller state | 230 | // Puller state |
| 220 | u32 acquire_mode; | 231 | u32 acquire_mode; |
| @@ -306,6 +317,8 @@ ASSERT_REG_POSITION(semaphore_trigger, 0x7); | |||
| 306 | ASSERT_REG_POSITION(reference_count, 0x14); | 317 | ASSERT_REG_POSITION(reference_count, 0x14); |
| 307 | ASSERT_REG_POSITION(semaphore_acquire, 0x1A); | 318 | ASSERT_REG_POSITION(semaphore_acquire, 0x1A); |
| 308 | ASSERT_REG_POSITION(semaphore_release, 0x1B); | 319 | ASSERT_REG_POSITION(semaphore_release, 0x1B); |
| 320 | ASSERT_REG_POSITION(fence_value, 0x1C); | ||
| 321 | ASSERT_REG_POSITION(fence_action, 0x1D); | ||
| 309 | 322 | ||
| 310 | ASSERT_REG_POSITION(acquire_mode, 0x100); | 323 | ASSERT_REG_POSITION(acquire_mode, 0x100); |
| 311 | ASSERT_REG_POSITION(acquire_source, 0x101); | 324 | ASSERT_REG_POSITION(acquire_source, 0x101); |
diff --git a/src/video_core/macro_interpreter.cpp b/src/video_core/macro_interpreter.cpp index c766ed692..9f59a2dc1 100644 --- a/src/video_core/macro_interpreter.cpp +++ b/src/video_core/macro_interpreter.cpp | |||
| @@ -4,14 +4,18 @@ | |||
| 4 | 4 | ||
| 5 | #include "common/assert.h" | 5 | #include "common/assert.h" |
| 6 | #include "common/logging/log.h" | 6 | #include "common/logging/log.h" |
| 7 | #include "common/microprofile.h" | ||
| 7 | #include "video_core/engines/maxwell_3d.h" | 8 | #include "video_core/engines/maxwell_3d.h" |
| 8 | #include "video_core/macro_interpreter.h" | 9 | #include "video_core/macro_interpreter.h" |
| 9 | 10 | ||
| 11 | MICROPROFILE_DEFINE(MacroInterp, "GPU", "Execute macro interpreter", MP_RGB(128, 128, 192)); | ||
| 12 | |||
| 10 | namespace Tegra { | 13 | namespace Tegra { |
| 11 | 14 | ||
| 12 | MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d) : maxwell3d(maxwell3d) {} | 15 | MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d) : maxwell3d(maxwell3d) {} |
| 13 | 16 | ||
| 14 | void MacroInterpreter::Execute(u32 offset, std::vector<u32> parameters) { | 17 | void MacroInterpreter::Execute(u32 offset, std::vector<u32> parameters) { |
| 18 | MICROPROFILE_SCOPE(MacroInterp); | ||
| 15 | Reset(); | 19 | Reset(); |
| 16 | registers[1] = parameters[0]; | 20 | registers[1] = parameters[0]; |
| 17 | this->parameters = std::move(parameters); | 21 | this->parameters = std::move(parameters); |
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index 322453116..bffae940c 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp | |||
| @@ -5,13 +5,17 @@ | |||
| 5 | #include "common/alignment.h" | 5 | #include "common/alignment.h" |
| 6 | #include "common/assert.h" | 6 | #include "common/assert.h" |
| 7 | #include "common/logging/log.h" | 7 | #include "common/logging/log.h" |
| 8 | #include "core/core.h" | ||
| 9 | #include "core/hle/kernel/process.h" | ||
| 10 | #include "core/hle/kernel/vm_manager.h" | ||
| 8 | #include "core/memory.h" | 11 | #include "core/memory.h" |
| 9 | #include "video_core/memory_manager.h" | 12 | #include "video_core/memory_manager.h" |
| 10 | #include "video_core/rasterizer_interface.h" | 13 | #include "video_core/rasterizer_interface.h" |
| 11 | 14 | ||
| 12 | namespace Tegra { | 15 | namespace Tegra { |
| 13 | 16 | ||
| 14 | MemoryManager::MemoryManager(VideoCore::RasterizerInterface& rasterizer) : rasterizer{rasterizer} { | 17 | MemoryManager::MemoryManager(Core::System& system, VideoCore::RasterizerInterface& rasterizer) |
| 18 | : rasterizer{rasterizer}, system{system} { | ||
| 15 | std::fill(page_table.pointers.begin(), page_table.pointers.end(), nullptr); | 19 | std::fill(page_table.pointers.begin(), page_table.pointers.end(), nullptr); |
| 16 | std::fill(page_table.attributes.begin(), page_table.attributes.end(), | 20 | std::fill(page_table.attributes.begin(), page_table.attributes.end(), |
| 17 | Common::PageType::Unmapped); | 21 | Common::PageType::Unmapped); |
| @@ -49,6 +53,11 @@ GPUVAddr MemoryManager::MapBufferEx(VAddr cpu_addr, u64 size) { | |||
| 49 | const GPUVAddr gpu_addr{FindFreeRegion(address_space_base, aligned_size)}; | 53 | const GPUVAddr gpu_addr{FindFreeRegion(address_space_base, aligned_size)}; |
| 50 | 54 | ||
| 51 | MapBackingMemory(gpu_addr, Memory::GetPointer(cpu_addr), aligned_size, cpu_addr); | 55 | MapBackingMemory(gpu_addr, Memory::GetPointer(cpu_addr), aligned_size, cpu_addr); |
| 56 | ASSERT(system.CurrentProcess() | ||
| 57 | ->VMManager() | ||
| 58 | .SetMemoryAttribute(cpu_addr, size, Kernel::MemoryAttribute::DeviceMapped, | ||
| 59 | Kernel::MemoryAttribute::DeviceMapped) | ||
| 60 | .IsSuccess()); | ||
| 52 | 61 | ||
| 53 | return gpu_addr; | 62 | return gpu_addr; |
| 54 | } | 63 | } |
| @@ -59,7 +68,11 @@ GPUVAddr MemoryManager::MapBufferEx(VAddr cpu_addr, GPUVAddr gpu_addr, u64 size) | |||
| 59 | const u64 aligned_size{Common::AlignUp(size, page_size)}; | 68 | const u64 aligned_size{Common::AlignUp(size, page_size)}; |
| 60 | 69 | ||
| 61 | MapBackingMemory(gpu_addr, Memory::GetPointer(cpu_addr), aligned_size, cpu_addr); | 70 | MapBackingMemory(gpu_addr, Memory::GetPointer(cpu_addr), aligned_size, cpu_addr); |
| 62 | 71 | ASSERT(system.CurrentProcess() | |
| 72 | ->VMManager() | ||
| 73 | .SetMemoryAttribute(cpu_addr, size, Kernel::MemoryAttribute::DeviceMapped, | ||
| 74 | Kernel::MemoryAttribute::DeviceMapped) | ||
| 75 | .IsSuccess()); | ||
| 63 | return gpu_addr; | 76 | return gpu_addr; |
| 64 | } | 77 | } |
| 65 | 78 | ||
| @@ -68,9 +81,16 @@ GPUVAddr MemoryManager::UnmapBuffer(GPUVAddr gpu_addr, u64 size) { | |||
| 68 | 81 | ||
| 69 | const u64 aligned_size{Common::AlignUp(size, page_size)}; | 82 | const u64 aligned_size{Common::AlignUp(size, page_size)}; |
| 70 | const CacheAddr cache_addr{ToCacheAddr(GetPointer(gpu_addr))}; | 83 | const CacheAddr cache_addr{ToCacheAddr(GetPointer(gpu_addr))}; |
| 84 | const auto cpu_addr = GpuToCpuAddress(gpu_addr); | ||
| 85 | ASSERT(cpu_addr); | ||
| 71 | 86 | ||
| 72 | rasterizer.FlushAndInvalidateRegion(cache_addr, aligned_size); | 87 | rasterizer.FlushAndInvalidateRegion(cache_addr, aligned_size); |
| 73 | UnmapRange(gpu_addr, aligned_size); | 88 | UnmapRange(gpu_addr, aligned_size); |
| 89 | ASSERT(system.CurrentProcess() | ||
| 90 | ->VMManager() | ||
| 91 | .SetMemoryAttribute(cpu_addr.value(), size, Kernel::MemoryAttribute::DeviceMapped, | ||
| 92 | Kernel::MemoryAttribute::None) | ||
| 93 | .IsSuccess()); | ||
| 74 | 94 | ||
| 75 | return gpu_addr; | 95 | return gpu_addr; |
| 76 | } | 96 | } |
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h index 43a84bd52..aea010087 100644 --- a/src/video_core/memory_manager.h +++ b/src/video_core/memory_manager.h | |||
| @@ -14,6 +14,10 @@ namespace VideoCore { | |||
| 14 | class RasterizerInterface; | 14 | class RasterizerInterface; |
| 15 | } | 15 | } |
| 16 | 16 | ||
| 17 | namespace Core { | ||
| 18 | class System; | ||
| 19 | } | ||
| 20 | |||
| 17 | namespace Tegra { | 21 | namespace Tegra { |
| 18 | 22 | ||
| 19 | /** | 23 | /** |
| @@ -47,7 +51,7 @@ struct VirtualMemoryArea { | |||
| 47 | 51 | ||
| 48 | class MemoryManager final { | 52 | class MemoryManager final { |
| 49 | public: | 53 | public: |
| 50 | explicit MemoryManager(VideoCore::RasterizerInterface& rasterizer); | 54 | explicit MemoryManager(Core::System& system, VideoCore::RasterizerInterface& rasterizer); |
| 51 | ~MemoryManager(); | 55 | ~MemoryManager(); |
| 52 | 56 | ||
| 53 | GPUVAddr AllocateSpace(u64 size, u64 align); | 57 | GPUVAddr AllocateSpace(u64 size, u64 align); |
| @@ -173,6 +177,8 @@ private: | |||
| 173 | Common::PageTable page_table{page_bits}; | 177 | Common::PageTable page_table{page_bits}; |
| 174 | VMAMap vma_map; | 178 | VMAMap vma_map; |
| 175 | VideoCore::RasterizerInterface& rasterizer; | 179 | VideoCore::RasterizerInterface& rasterizer; |
| 180 | |||
| 181 | Core::System& system; | ||
| 176 | }; | 182 | }; |
| 177 | 183 | ||
| 178 | } // namespace Tegra | 184 | } // namespace Tegra |
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index 5ee4f8e8e..9881df0d5 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h | |||
| @@ -34,6 +34,9 @@ public: | |||
| 34 | /// Clear the current framebuffer | 34 | /// Clear the current framebuffer |
| 35 | virtual void Clear() = 0; | 35 | virtual void Clear() = 0; |
| 36 | 36 | ||
| 37 | /// Dispatches a compute shader invocation | ||
| 38 | virtual void DispatchCompute(GPUVAddr code_addr) = 0; | ||
| 39 | |||
| 37 | /// Notify rasterizer that all caches should be flushed to Switch memory | 40 | /// Notify rasterizer that all caches should be flushed to Switch memory |
| 38 | virtual void FlushAll() = 0; | 41 | virtual void FlushAll() = 0; |
| 39 | 42 | ||
| @@ -47,6 +50,9 @@ public: | |||
| 47 | /// and invalidated | 50 | /// and invalidated |
| 48 | virtual void FlushAndInvalidateRegion(CacheAddr addr, u64 size) = 0; | 51 | virtual void FlushAndInvalidateRegion(CacheAddr addr, u64 size) = 0; |
| 49 | 52 | ||
| 53 | /// Notify rasterizer that a frame is about to finish | ||
| 54 | virtual void TickFrame() = 0; | ||
| 55 | |||
| 50 | /// Attempt to use a faster method to perform a surface copy | 56 | /// Attempt to use a faster method to perform a surface copy |
| 51 | virtual bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, | 57 | virtual bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, |
| 52 | const Tegra::Engines::Fermi2D::Regs::Surface& dst, | 58 | const Tegra::Engines::Fermi2D::Regs::Surface& dst, |
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index 2b9bd142e..2a9b523f5 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp | |||
| @@ -2,103 +2,57 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <cstring> | ||
| 6 | #include <memory> | 5 | #include <memory> |
| 7 | 6 | ||
| 8 | #include "common/alignment.h" | 7 | #include <glad/glad.h> |
| 9 | #include "core/core.h" | 8 | |
| 10 | #include "video_core/memory_manager.h" | 9 | #include "common/assert.h" |
| 11 | #include "video_core/renderer_opengl/gl_buffer_cache.h" | 10 | #include "video_core/renderer_opengl/gl_buffer_cache.h" |
| 12 | #include "video_core/renderer_opengl/gl_rasterizer.h" | 11 | #include "video_core/renderer_opengl/gl_rasterizer.h" |
| 12 | #include "video_core/renderer_opengl/gl_resource_manager.h" | ||
| 13 | 13 | ||
| 14 | namespace OpenGL { | 14 | namespace OpenGL { |
| 15 | 15 | ||
| 16 | CachedBufferEntry::CachedBufferEntry(VAddr cpu_addr, std::size_t size, GLintptr offset, | 16 | OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system, |
| 17 | std::size_t alignment, u8* host_ptr) | 17 | std::size_t stream_size) |
| 18 | : RasterizerCacheObject{host_ptr}, cpu_addr{cpu_addr}, size{size}, offset{offset}, | 18 | : VideoCommon::BufferCache<OGLBuffer, GLuint, OGLStreamBuffer>{ |
| 19 | alignment{alignment} {} | 19 | rasterizer, system, std::make_unique<OGLStreamBuffer>(stream_size, true)} {} |
| 20 | |||
| 21 | OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, std::size_t size) | ||
| 22 | : RasterizerCache{rasterizer}, stream_buffer(size, true) {} | ||
| 23 | |||
| 24 | GLintptr OGLBufferCache::UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment, | ||
| 25 | bool cache) { | ||
| 26 | std::lock_guard lock{mutex}; | ||
| 27 | auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager(); | ||
| 28 | |||
| 29 | // Cache management is a big overhead, so only cache entries with a given size. | ||
| 30 | // TODO: Figure out which size is the best for given games. | ||
| 31 | cache &= size >= 2048; | ||
| 32 | |||
| 33 | const auto& host_ptr{memory_manager.GetPointer(gpu_addr)}; | ||
| 34 | if (cache) { | ||
| 35 | auto entry = TryGet(host_ptr); | ||
| 36 | if (entry) { | ||
| 37 | if (entry->GetSize() >= size && entry->GetAlignment() == alignment) { | ||
| 38 | return entry->GetOffset(); | ||
| 39 | } | ||
| 40 | Unregister(entry); | ||
| 41 | } | ||
| 42 | } | ||
| 43 | 20 | ||
| 44 | AlignBuffer(alignment); | 21 | OGLBufferCache::~OGLBufferCache() = default; |
| 45 | const GLintptr uploaded_offset = buffer_offset; | ||
| 46 | 22 | ||
| 47 | if (!host_ptr) { | 23 | OGLBuffer OGLBufferCache::CreateBuffer(std::size_t size) { |
| 48 | return uploaded_offset; | 24 | OGLBuffer buffer; |
| 49 | } | 25 | buffer.Create(); |
| 50 | 26 | glNamedBufferData(buffer.handle, static_cast<GLsizeiptr>(size), nullptr, GL_DYNAMIC_DRAW); | |
| 51 | std::memcpy(buffer_ptr, host_ptr, size); | 27 | return buffer; |
| 52 | buffer_ptr += size; | ||
| 53 | buffer_offset += size; | ||
| 54 | |||
| 55 | if (cache) { | ||
| 56 | auto entry = std::make_shared<CachedBufferEntry>( | ||
| 57 | *memory_manager.GpuToCpuAddress(gpu_addr), size, uploaded_offset, alignment, host_ptr); | ||
| 58 | Register(entry); | ||
| 59 | } | ||
| 60 | |||
| 61 | return uploaded_offset; | ||
| 62 | } | 28 | } |
| 63 | 29 | ||
| 64 | GLintptr OGLBufferCache::UploadHostMemory(const void* raw_pointer, std::size_t size, | 30 | const GLuint* OGLBufferCache::ToHandle(const OGLBuffer& buffer) { |
| 65 | std::size_t alignment) { | 31 | return &buffer.handle; |
| 66 | std::lock_guard lock{mutex}; | ||
| 67 | AlignBuffer(alignment); | ||
| 68 | std::memcpy(buffer_ptr, raw_pointer, size); | ||
| 69 | const GLintptr uploaded_offset = buffer_offset; | ||
| 70 | |||
| 71 | buffer_ptr += size; | ||
| 72 | buffer_offset += size; | ||
| 73 | return uploaded_offset; | ||
| 74 | } | 32 | } |
| 75 | 33 | ||
| 76 | bool OGLBufferCache::Map(std::size_t max_size) { | 34 | const GLuint* OGLBufferCache::GetEmptyBuffer(std::size_t) { |
| 77 | bool invalidate; | 35 | static const GLuint null_buffer = 0; |
| 78 | std::tie(buffer_ptr, buffer_offset_base, invalidate) = | 36 | return &null_buffer; |
| 79 | stream_buffer.Map(static_cast<GLsizeiptr>(max_size), 4); | ||
| 80 | buffer_offset = buffer_offset_base; | ||
| 81 | |||
| 82 | if (invalidate) { | ||
| 83 | InvalidateAll(); | ||
| 84 | } | ||
| 85 | return invalidate; | ||
| 86 | } | 37 | } |
| 87 | 38 | ||
| 88 | void OGLBufferCache::Unmap() { | 39 | void OGLBufferCache::UploadBufferData(const OGLBuffer& buffer, std::size_t offset, std::size_t size, |
| 89 | stream_buffer.Unmap(buffer_offset - buffer_offset_base); | 40 | const u8* data) { |
| 41 | glNamedBufferSubData(buffer.handle, static_cast<GLintptr>(offset), | ||
| 42 | static_cast<GLsizeiptr>(size), data); | ||
| 90 | } | 43 | } |
| 91 | 44 | ||
| 92 | GLuint OGLBufferCache::GetHandle() const { | 45 | void OGLBufferCache::DownloadBufferData(const OGLBuffer& buffer, std::size_t offset, |
| 93 | return stream_buffer.GetHandle(); | 46 | std::size_t size, u8* data) { |
| 47 | glGetNamedBufferSubData(buffer.handle, static_cast<GLintptr>(offset), | ||
| 48 | static_cast<GLsizeiptr>(size), data); | ||
| 94 | } | 49 | } |
| 95 | 50 | ||
| 96 | void OGLBufferCache::AlignBuffer(std::size_t alignment) { | 51 | void OGLBufferCache::CopyBufferData(const OGLBuffer& src, const OGLBuffer& dst, |
| 97 | // Align the offset, not the mapped pointer | 52 | std::size_t src_offset, std::size_t dst_offset, |
| 98 | const GLintptr offset_aligned = | 53 | std::size_t size) { |
| 99 | static_cast<GLintptr>(Common::AlignUp(static_cast<std::size_t>(buffer_offset), alignment)); | 54 | glCopyNamedBufferSubData(src.handle, dst.handle, static_cast<GLintptr>(src_offset), |
| 100 | buffer_ptr += offset_aligned - buffer_offset; | 55 | static_cast<GLintptr>(dst_offset), static_cast<GLsizeiptr>(size)); |
| 101 | buffer_offset = offset_aligned; | ||
| 102 | } | 56 | } |
| 103 | 57 | ||
| 104 | } // namespace OpenGL | 58 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index f2347581b..8c8ac4038 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h | |||
| @@ -4,80 +4,44 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <cstddef> | ||
| 8 | #include <memory> | 7 | #include <memory> |
| 9 | #include <tuple> | ||
| 10 | 8 | ||
| 11 | #include "common/common_types.h" | 9 | #include "common/common_types.h" |
| 10 | #include "video_core/buffer_cache.h" | ||
| 12 | #include "video_core/rasterizer_cache.h" | 11 | #include "video_core/rasterizer_cache.h" |
| 13 | #include "video_core/renderer_opengl/gl_resource_manager.h" | 12 | #include "video_core/renderer_opengl/gl_resource_manager.h" |
| 14 | #include "video_core/renderer_opengl/gl_stream_buffer.h" | 13 | #include "video_core/renderer_opengl/gl_stream_buffer.h" |
| 15 | 14 | ||
| 15 | namespace Core { | ||
| 16 | class System; | ||
| 17 | } | ||
| 18 | |||
| 16 | namespace OpenGL { | 19 | namespace OpenGL { |
| 17 | 20 | ||
| 21 | class OGLStreamBuffer; | ||
| 18 | class RasterizerOpenGL; | 22 | class RasterizerOpenGL; |
| 19 | 23 | ||
| 20 | class CachedBufferEntry final : public RasterizerCacheObject { | 24 | class OGLBufferCache final : public VideoCommon::BufferCache<OGLBuffer, GLuint, OGLStreamBuffer> { |
| 21 | public: | ||
| 22 | explicit CachedBufferEntry(VAddr cpu_addr, std::size_t size, GLintptr offset, | ||
| 23 | std::size_t alignment, u8* host_ptr); | ||
| 24 | |||
| 25 | VAddr GetCpuAddr() const override { | ||
| 26 | return cpu_addr; | ||
| 27 | } | ||
| 28 | |||
| 29 | std::size_t GetSizeInBytes() const override { | ||
| 30 | return size; | ||
| 31 | } | ||
| 32 | |||
| 33 | std::size_t GetSize() const { | ||
| 34 | return size; | ||
| 35 | } | ||
| 36 | |||
| 37 | GLintptr GetOffset() const { | ||
| 38 | return offset; | ||
| 39 | } | ||
| 40 | |||
| 41 | std::size_t GetAlignment() const { | ||
| 42 | return alignment; | ||
| 43 | } | ||
| 44 | |||
| 45 | private: | ||
| 46 | VAddr cpu_addr{}; | ||
| 47 | std::size_t size{}; | ||
| 48 | GLintptr offset{}; | ||
| 49 | std::size_t alignment{}; | ||
| 50 | }; | ||
| 51 | |||
| 52 | class OGLBufferCache final : public RasterizerCache<std::shared_ptr<CachedBufferEntry>> { | ||
| 53 | public: | 25 | public: |
| 54 | explicit OGLBufferCache(RasterizerOpenGL& rasterizer, std::size_t size); | 26 | explicit OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system, |
| 55 | 27 | std::size_t stream_size); | |
| 56 | /// Uploads data from a guest GPU address. Returns host's buffer offset where it's been | 28 | ~OGLBufferCache(); |
| 57 | /// allocated. | ||
| 58 | GLintptr UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4, | ||
| 59 | bool cache = true); | ||
| 60 | 29 | ||
| 61 | /// Uploads from a host memory. Returns host's buffer offset where it's been allocated. | 30 | const GLuint* GetEmptyBuffer(std::size_t) override; |
| 62 | GLintptr UploadHostMemory(const void* raw_pointer, std::size_t size, std::size_t alignment = 4); | ||
| 63 | |||
| 64 | bool Map(std::size_t max_size); | ||
| 65 | void Unmap(); | ||
| 66 | |||
| 67 | GLuint GetHandle() const; | ||
| 68 | 31 | ||
| 69 | protected: | 32 | protected: |
| 70 | void AlignBuffer(std::size_t alignment); | 33 | OGLBuffer CreateBuffer(std::size_t size) override; |
| 34 | |||
| 35 | const GLuint* ToHandle(const OGLBuffer& buffer) override; | ||
| 71 | 36 | ||
| 72 | // We do not have to flush this cache as things in it are never modified by us. | 37 | void UploadBufferData(const OGLBuffer& buffer, std::size_t offset, std::size_t size, |
| 73 | void FlushObjectInner(const std::shared_ptr<CachedBufferEntry>& object) override {} | 38 | const u8* data) override; |
| 74 | 39 | ||
| 75 | private: | 40 | void DownloadBufferData(const OGLBuffer& buffer, std::size_t offset, std::size_t size, |
| 76 | OGLStreamBuffer stream_buffer; | 41 | u8* data) override; |
| 77 | 42 | ||
| 78 | u8* buffer_ptr = nullptr; | 43 | void CopyBufferData(const OGLBuffer& src, const OGLBuffer& dst, std::size_t src_offset, |
| 79 | GLintptr buffer_offset = 0; | 44 | std::size_t dst_offset, std::size_t size) override; |
| 80 | GLintptr buffer_offset_base = 0; | ||
| 81 | }; | 45 | }; |
| 82 | 46 | ||
| 83 | } // namespace OpenGL | 47 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index a48e14d2e..85424a4c9 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp | |||
| @@ -24,8 +24,10 @@ T GetInteger(GLenum pname) { | |||
| 24 | 24 | ||
| 25 | Device::Device() { | 25 | Device::Device() { |
| 26 | uniform_buffer_alignment = GetInteger<std::size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT); | 26 | uniform_buffer_alignment = GetInteger<std::size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT); |
| 27 | shader_storage_alignment = GetInteger<std::size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT); | ||
| 27 | max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS); | 28 | max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS); |
| 28 | max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS); | 29 | max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS); |
| 30 | has_vertex_viewport_layer = GLAD_GL_ARB_shader_viewport_layer_array; | ||
| 29 | has_variable_aoffi = TestVariableAoffi(); | 31 | has_variable_aoffi = TestVariableAoffi(); |
| 30 | has_component_indexing_bug = TestComponentIndexingBug(); | 32 | has_component_indexing_bug = TestComponentIndexingBug(); |
| 31 | } | 33 | } |
| @@ -34,6 +36,7 @@ Device::Device(std::nullptr_t) { | |||
| 34 | uniform_buffer_alignment = 0; | 36 | uniform_buffer_alignment = 0; |
| 35 | max_vertex_attributes = 16; | 37 | max_vertex_attributes = 16; |
| 36 | max_varyings = 15; | 38 | max_varyings = 15; |
| 39 | has_vertex_viewport_layer = true; | ||
| 37 | has_variable_aoffi = true; | 40 | has_variable_aoffi = true; |
| 38 | has_component_indexing_bug = false; | 41 | has_component_indexing_bug = false; |
| 39 | } | 42 | } |
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index 8c8c93760..dc883722d 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h | |||
| @@ -18,6 +18,10 @@ public: | |||
| 18 | return uniform_buffer_alignment; | 18 | return uniform_buffer_alignment; |
| 19 | } | 19 | } |
| 20 | 20 | ||
| 21 | std::size_t GetShaderStorageBufferAlignment() const { | ||
| 22 | return shader_storage_alignment; | ||
| 23 | } | ||
| 24 | |||
| 21 | u32 GetMaxVertexAttributes() const { | 25 | u32 GetMaxVertexAttributes() const { |
| 22 | return max_vertex_attributes; | 26 | return max_vertex_attributes; |
| 23 | } | 27 | } |
| @@ -26,6 +30,10 @@ public: | |||
| 26 | return max_varyings; | 30 | return max_varyings; |
| 27 | } | 31 | } |
| 28 | 32 | ||
| 33 | bool HasVertexViewportLayer() const { | ||
| 34 | return has_vertex_viewport_layer; | ||
| 35 | } | ||
| 36 | |||
| 29 | bool HasVariableAoffi() const { | 37 | bool HasVariableAoffi() const { |
| 30 | return has_variable_aoffi; | 38 | return has_variable_aoffi; |
| 31 | } | 39 | } |
| @@ -39,8 +47,10 @@ private: | |||
| 39 | static bool TestComponentIndexingBug(); | 47 | static bool TestComponentIndexingBug(); |
| 40 | 48 | ||
| 41 | std::size_t uniform_buffer_alignment{}; | 49 | std::size_t uniform_buffer_alignment{}; |
| 50 | std::size_t shader_storage_alignment{}; | ||
| 42 | u32 max_vertex_attributes{}; | 51 | u32 max_vertex_attributes{}; |
| 43 | u32 max_varyings{}; | 52 | u32 max_varyings{}; |
| 53 | bool has_vertex_viewport_layer{}; | ||
| 44 | bool has_variable_aoffi{}; | 54 | bool has_variable_aoffi{}; |
| 45 | bool has_component_indexing_bug{}; | 55 | bool has_component_indexing_bug{}; |
| 46 | }; | 56 | }; |
diff --git a/src/video_core/renderer_opengl/gl_global_cache.cpp b/src/video_core/renderer_opengl/gl_global_cache.cpp deleted file mode 100644 index d5e385151..000000000 --- a/src/video_core/renderer_opengl/gl_global_cache.cpp +++ /dev/null | |||
| @@ -1,102 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <glad/glad.h> | ||
| 6 | |||
| 7 | #include "common/logging/log.h" | ||
| 8 | #include "core/core.h" | ||
| 9 | #include "video_core/memory_manager.h" | ||
| 10 | #include "video_core/renderer_opengl/gl_global_cache.h" | ||
| 11 | #include "video_core/renderer_opengl/gl_rasterizer.h" | ||
| 12 | #include "video_core/renderer_opengl/gl_shader_decompiler.h" | ||
| 13 | #include "video_core/renderer_opengl/utils.h" | ||
| 14 | |||
| 15 | namespace OpenGL { | ||
| 16 | |||
| 17 | CachedGlobalRegion::CachedGlobalRegion(VAddr cpu_addr, u8* host_ptr, u32 size, u32 max_size) | ||
| 18 | : RasterizerCacheObject{host_ptr}, cpu_addr{cpu_addr}, host_ptr{host_ptr}, size{size}, | ||
| 19 | max_size{max_size} { | ||
| 20 | buffer.Create(); | ||
| 21 | LabelGLObject(GL_BUFFER, buffer.handle, cpu_addr, "GlobalMemory"); | ||
| 22 | } | ||
| 23 | |||
| 24 | CachedGlobalRegion::~CachedGlobalRegion() = default; | ||
| 25 | |||
| 26 | void CachedGlobalRegion::Reload(u32 size_) { | ||
| 27 | size = size_; | ||
| 28 | if (size > max_size) { | ||
| 29 | size = max_size; | ||
| 30 | LOG_CRITICAL(HW_GPU, "Global region size {} exceeded the supported size {}!", size_, | ||
| 31 | max_size); | ||
| 32 | } | ||
| 33 | glNamedBufferData(buffer.handle, size, host_ptr, GL_STREAM_DRAW); | ||
| 34 | } | ||
| 35 | |||
| 36 | void CachedGlobalRegion::Flush() { | ||
| 37 | LOG_DEBUG(Render_OpenGL, "Flushing {} bytes to CPU memory address 0x{:16}", size, cpu_addr); | ||
| 38 | glGetNamedBufferSubData(buffer.handle, 0, static_cast<GLsizeiptr>(size), host_ptr); | ||
| 39 | } | ||
| 40 | |||
| 41 | GlobalRegion GlobalRegionCacheOpenGL::TryGetReservedGlobalRegion(CacheAddr addr, u32 size) const { | ||
| 42 | const auto search{reserve.find(addr)}; | ||
| 43 | if (search == reserve.end()) { | ||
| 44 | return {}; | ||
| 45 | } | ||
| 46 | return search->second; | ||
| 47 | } | ||
| 48 | |||
| 49 | GlobalRegion GlobalRegionCacheOpenGL::GetUncachedGlobalRegion(GPUVAddr addr, u8* host_ptr, | ||
| 50 | u32 size) { | ||
| 51 | GlobalRegion region{TryGetReservedGlobalRegion(ToCacheAddr(host_ptr), size)}; | ||
| 52 | if (!region) { | ||
| 53 | // No reserved surface available, create a new one and reserve it | ||
| 54 | auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()}; | ||
| 55 | const auto cpu_addr{memory_manager.GpuToCpuAddress(addr)}; | ||
| 56 | ASSERT(cpu_addr); | ||
| 57 | |||
| 58 | region = std::make_shared<CachedGlobalRegion>(*cpu_addr, host_ptr, size, max_ssbo_size); | ||
| 59 | ReserveGlobalRegion(region); | ||
| 60 | } | ||
| 61 | region->Reload(size); | ||
| 62 | return region; | ||
| 63 | } | ||
| 64 | |||
| 65 | void GlobalRegionCacheOpenGL::ReserveGlobalRegion(GlobalRegion region) { | ||
| 66 | reserve.insert_or_assign(region->GetCacheAddr(), std::move(region)); | ||
| 67 | } | ||
| 68 | |||
| 69 | GlobalRegionCacheOpenGL::GlobalRegionCacheOpenGL(RasterizerOpenGL& rasterizer) | ||
| 70 | : RasterizerCache{rasterizer} { | ||
| 71 | GLint max_ssbo_size_; | ||
| 72 | glGetIntegerv(GL_MAX_SHADER_STORAGE_BLOCK_SIZE, &max_ssbo_size_); | ||
| 73 | max_ssbo_size = static_cast<u32>(max_ssbo_size_); | ||
| 74 | } | ||
| 75 | |||
| 76 | GlobalRegion GlobalRegionCacheOpenGL::GetGlobalRegion( | ||
| 77 | const GLShader::GlobalMemoryEntry& global_region, | ||
| 78 | Tegra::Engines::Maxwell3D::Regs::ShaderStage stage) { | ||
| 79 | std::lock_guard lock{mutex}; | ||
| 80 | |||
| 81 | auto& gpu{Core::System::GetInstance().GPU()}; | ||
| 82 | auto& memory_manager{gpu.MemoryManager()}; | ||
| 83 | const auto cbufs{gpu.Maxwell3D().state.shader_stages[static_cast<std::size_t>(stage)]}; | ||
| 84 | const auto addr{cbufs.const_buffers[global_region.GetCbufIndex()].address + | ||
| 85 | global_region.GetCbufOffset()}; | ||
| 86 | const auto actual_addr{memory_manager.Read<u64>(addr)}; | ||
| 87 | const auto size{memory_manager.Read<u32>(addr + 8)}; | ||
| 88 | |||
| 89 | // Look up global region in the cache based on address | ||
| 90 | const auto& host_ptr{memory_manager.GetPointer(actual_addr)}; | ||
| 91 | GlobalRegion region{TryGet(host_ptr)}; | ||
| 92 | |||
| 93 | if (!region) { | ||
| 94 | // No global region found - create a new one | ||
| 95 | region = GetUncachedGlobalRegion(actual_addr, host_ptr, size); | ||
| 96 | Register(region); | ||
| 97 | } | ||
| 98 | |||
| 99 | return region; | ||
| 100 | } | ||
| 101 | |||
| 102 | } // namespace OpenGL | ||
diff --git a/src/video_core/renderer_opengl/gl_global_cache.h b/src/video_core/renderer_opengl/gl_global_cache.h deleted file mode 100644 index 2d467a240..000000000 --- a/src/video_core/renderer_opengl/gl_global_cache.h +++ /dev/null | |||
| @@ -1,82 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <memory> | ||
| 8 | #include <unordered_map> | ||
| 9 | |||
| 10 | #include <glad/glad.h> | ||
| 11 | |||
| 12 | #include "common/assert.h" | ||
| 13 | #include "common/common_types.h" | ||
| 14 | #include "video_core/engines/maxwell_3d.h" | ||
| 15 | #include "video_core/rasterizer_cache.h" | ||
| 16 | #include "video_core/renderer_opengl/gl_resource_manager.h" | ||
| 17 | |||
| 18 | namespace OpenGL { | ||
| 19 | |||
| 20 | namespace GLShader { | ||
| 21 | class GlobalMemoryEntry; | ||
| 22 | } | ||
| 23 | |||
| 24 | class RasterizerOpenGL; | ||
| 25 | class CachedGlobalRegion; | ||
| 26 | using GlobalRegion = std::shared_ptr<CachedGlobalRegion>; | ||
| 27 | |||
| 28 | class CachedGlobalRegion final : public RasterizerCacheObject { | ||
| 29 | public: | ||
| 30 | explicit CachedGlobalRegion(VAddr cpu_addr, u8* host_ptr, u32 size, u32 max_size); | ||
| 31 | ~CachedGlobalRegion(); | ||
| 32 | |||
| 33 | VAddr GetCpuAddr() const override { | ||
| 34 | return cpu_addr; | ||
| 35 | } | ||
| 36 | |||
| 37 | std::size_t GetSizeInBytes() const override { | ||
| 38 | return size; | ||
| 39 | } | ||
| 40 | |||
| 41 | /// Gets the GL program handle for the buffer | ||
| 42 | GLuint GetBufferHandle() const { | ||
| 43 | return buffer.handle; | ||
| 44 | } | ||
| 45 | |||
| 46 | /// Reloads the global region from guest memory | ||
| 47 | void Reload(u32 size_); | ||
| 48 | |||
| 49 | void Flush(); | ||
| 50 | |||
| 51 | private: | ||
| 52 | VAddr cpu_addr{}; | ||
| 53 | u8* host_ptr{}; | ||
| 54 | u32 size{}; | ||
| 55 | u32 max_size{}; | ||
| 56 | |||
| 57 | OGLBuffer buffer; | ||
| 58 | }; | ||
| 59 | |||
| 60 | class GlobalRegionCacheOpenGL final : public RasterizerCache<GlobalRegion> { | ||
| 61 | public: | ||
| 62 | explicit GlobalRegionCacheOpenGL(RasterizerOpenGL& rasterizer); | ||
| 63 | |||
| 64 | /// Gets the current specified shader stage program | ||
| 65 | GlobalRegion GetGlobalRegion(const GLShader::GlobalMemoryEntry& descriptor, | ||
| 66 | Tegra::Engines::Maxwell3D::Regs::ShaderStage stage); | ||
| 67 | |||
| 68 | protected: | ||
| 69 | void FlushObjectInner(const GlobalRegion& object) override { | ||
| 70 | object->Flush(); | ||
| 71 | } | ||
| 72 | |||
| 73 | private: | ||
| 74 | GlobalRegion TryGetReservedGlobalRegion(CacheAddr addr, u32 size) const; | ||
| 75 | GlobalRegion GetUncachedGlobalRegion(GPUVAddr addr, u8* host_ptr, u32 size); | ||
| 76 | void ReserveGlobalRegion(GlobalRegion region); | ||
| 77 | |||
| 78 | std::unordered_map<CacheAddr, GlobalRegion> reserve; | ||
| 79 | u32 max_ssbo_size{}; | ||
| 80 | }; | ||
| 81 | |||
| 82 | } // namespace OpenGL | ||
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index f45a3c5ef..c28ae795c 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp | |||
| @@ -4,6 +4,7 @@ | |||
| 4 | 4 | ||
| 5 | #include <algorithm> | 5 | #include <algorithm> |
| 6 | #include <array> | 6 | #include <array> |
| 7 | #include <bitset> | ||
| 7 | #include <memory> | 8 | #include <memory> |
| 8 | #include <string> | 9 | #include <string> |
| 9 | #include <string_view> | 10 | #include <string_view> |
| @@ -19,7 +20,9 @@ | |||
| 19 | #include "core/core.h" | 20 | #include "core/core.h" |
| 20 | #include "core/hle/kernel/process.h" | 21 | #include "core/hle/kernel/process.h" |
| 21 | #include "core/settings.h" | 22 | #include "core/settings.h" |
| 23 | #include "video_core/engines/kepler_compute.h" | ||
| 22 | #include "video_core/engines/maxwell_3d.h" | 24 | #include "video_core/engines/maxwell_3d.h" |
| 25 | #include "video_core/memory_manager.h" | ||
| 23 | #include "video_core/renderer_opengl/gl_rasterizer.h" | 26 | #include "video_core/renderer_opengl/gl_rasterizer.h" |
| 24 | #include "video_core/renderer_opengl/gl_shader_cache.h" | 27 | #include "video_core/renderer_opengl/gl_shader_cache.h" |
| 25 | #include "video_core/renderer_opengl/gl_shader_gen.h" | 28 | #include "video_core/renderer_opengl/gl_shader_gen.h" |
| @@ -80,16 +83,31 @@ struct DrawParameters { | |||
| 80 | } | 83 | } |
| 81 | }; | 84 | }; |
| 82 | 85 | ||
| 86 | static std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer, | ||
| 87 | const GLShader::ConstBufferEntry& entry) { | ||
| 88 | if (!entry.IsIndirect()) { | ||
| 89 | return entry.GetSize(); | ||
| 90 | } | ||
| 91 | |||
| 92 | if (buffer.size > Maxwell::MaxConstBufferSize) { | ||
| 93 | LOG_WARNING(Render_OpenGL, "Indirect constbuffer size {} exceeds maximum {}", buffer.size, | ||
| 94 | Maxwell::MaxConstBufferSize); | ||
| 95 | return Maxwell::MaxConstBufferSize; | ||
| 96 | } | ||
| 97 | |||
| 98 | return buffer.size; | ||
| 99 | } | ||
| 100 | |||
| 83 | RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window, | 101 | RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window, |
| 84 | ScreenInfo& info) | 102 | ScreenInfo& info) |
| 85 | : texture_cache{system, *this, device}, shader_cache{*this, system, emu_window, device}, | 103 | : texture_cache{system, *this, device}, shader_cache{*this, system, emu_window, device}, |
| 86 | global_cache{*this}, system{system}, screen_info{info}, | 104 | system{system}, screen_info{info}, buffer_cache{*this, system, STREAM_BUFFER_SIZE} { |
| 87 | buffer_cache(*this, STREAM_BUFFER_SIZE) { | ||
| 88 | OpenGLState::ApplyDefaultState(); | 105 | OpenGLState::ApplyDefaultState(); |
| 89 | 106 | ||
| 90 | shader_program_manager = std::make_unique<GLShader::ProgramManager>(); | 107 | shader_program_manager = std::make_unique<GLShader::ProgramManager>(); |
| 91 | state.draw.shader_program = 0; | 108 | state.draw.shader_program = 0; |
| 92 | state.Apply(); | 109 | state.Apply(); |
| 110 | clear_framebuffer.Create(); | ||
| 93 | 111 | ||
| 94 | LOG_DEBUG(Render_OpenGL, "Sync fixed function OpenGL state here"); | 112 | LOG_DEBUG(Render_OpenGL, "Sync fixed function OpenGL state here"); |
| 95 | CheckExtensions(); | 113 | CheckExtensions(); |
| @@ -109,10 +127,10 @@ GLuint RasterizerOpenGL::SetupVertexFormat() { | |||
| 109 | auto& gpu = system.GPU().Maxwell3D(); | 127 | auto& gpu = system.GPU().Maxwell3D(); |
| 110 | const auto& regs = gpu.regs; | 128 | const auto& regs = gpu.regs; |
| 111 | 129 | ||
| 112 | if (!gpu.dirty_flags.vertex_attrib_format) { | 130 | if (!gpu.dirty.vertex_attrib_format) { |
| 113 | return state.draw.vertex_array; | 131 | return state.draw.vertex_array; |
| 114 | } | 132 | } |
| 115 | gpu.dirty_flags.vertex_attrib_format = false; | 133 | gpu.dirty.vertex_attrib_format = false; |
| 116 | 134 | ||
| 117 | MICROPROFILE_SCOPE(OpenGL_VAO); | 135 | MICROPROFILE_SCOPE(OpenGL_VAO); |
| 118 | 136 | ||
| @@ -129,8 +147,6 @@ GLuint RasterizerOpenGL::SetupVertexFormat() { | |||
| 129 | state.draw.vertex_array = vao; | 147 | state.draw.vertex_array = vao; |
| 130 | state.ApplyVertexArrayState(); | 148 | state.ApplyVertexArrayState(); |
| 131 | 149 | ||
| 132 | glVertexArrayElementBuffer(vao, buffer_cache.GetHandle()); | ||
| 133 | |||
| 134 | // Use the vertex array as-is, assumes that the data is formatted correctly for OpenGL. | 150 | // Use the vertex array as-is, assumes that the data is formatted correctly for OpenGL. |
| 135 | // Enables the first 16 vertex attributes always, as we don't know which ones are actually | 151 | // Enables the first 16 vertex attributes always, as we don't know which ones are actually |
| 136 | // used until shader time. Note, Tegra technically supports 32, but we're capping this to 16 | 152 | // used until shader time. Note, Tegra technically supports 32, but we're capping this to 16 |
| @@ -168,7 +184,7 @@ GLuint RasterizerOpenGL::SetupVertexFormat() { | |||
| 168 | } | 184 | } |
| 169 | 185 | ||
| 170 | // Rebinding the VAO invalidates the vertex buffer bindings. | 186 | // Rebinding the VAO invalidates the vertex buffer bindings. |
| 171 | gpu.dirty_flags.vertex_array.set(); | 187 | gpu.dirty.ResetVertexArrays(); |
| 172 | 188 | ||
| 173 | state.draw.vertex_array = vao_entry.handle; | 189 | state.draw.vertex_array = vao_entry.handle; |
| 174 | return vao_entry.handle; | 190 | return vao_entry.handle; |
| @@ -176,17 +192,20 @@ GLuint RasterizerOpenGL::SetupVertexFormat() { | |||
| 176 | 192 | ||
| 177 | void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) { | 193 | void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) { |
| 178 | auto& gpu = system.GPU().Maxwell3D(); | 194 | auto& gpu = system.GPU().Maxwell3D(); |
| 179 | const auto& regs = gpu.regs; | 195 | if (!gpu.dirty.vertex_array_buffers) |
| 180 | |||
| 181 | if (gpu.dirty_flags.vertex_array.none()) | ||
| 182 | return; | 196 | return; |
| 197 | gpu.dirty.vertex_array_buffers = false; | ||
| 198 | |||
| 199 | const auto& regs = gpu.regs; | ||
| 183 | 200 | ||
| 184 | MICROPROFILE_SCOPE(OpenGL_VB); | 201 | MICROPROFILE_SCOPE(OpenGL_VB); |
| 185 | 202 | ||
| 186 | // Upload all guest vertex arrays sequentially to our buffer | 203 | // Upload all guest vertex arrays sequentially to our buffer |
| 187 | for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) { | 204 | for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) { |
| 188 | if (!gpu.dirty_flags.vertex_array[index]) | 205 | if (!gpu.dirty.vertex_array[index]) |
| 189 | continue; | 206 | continue; |
| 207 | gpu.dirty.vertex_array[index] = false; | ||
| 208 | gpu.dirty.vertex_instance[index] = false; | ||
| 190 | 209 | ||
| 191 | const auto& vertex_array = regs.vertex_array[index]; | 210 | const auto& vertex_array = regs.vertex_array[index]; |
| 192 | if (!vertex_array.IsEnabled()) | 211 | if (!vertex_array.IsEnabled()) |
| @@ -197,11 +216,11 @@ void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) { | |||
| 197 | 216 | ||
| 198 | ASSERT(end > start); | 217 | ASSERT(end > start); |
| 199 | const u64 size = end - start + 1; | 218 | const u64 size = end - start + 1; |
| 200 | const GLintptr vertex_buffer_offset = buffer_cache.UploadMemory(start, size); | 219 | const auto [vertex_buffer, vertex_buffer_offset] = buffer_cache.UploadMemory(start, size); |
| 201 | 220 | ||
| 202 | // Bind the vertex array to the buffer at the current offset. | 221 | // Bind the vertex array to the buffer at the current offset. |
| 203 | glVertexArrayVertexBuffer(vao, index, buffer_cache.GetHandle(), vertex_buffer_offset, | 222 | vertex_array_pushbuffer.SetVertexBuffer(index, vertex_buffer, vertex_buffer_offset, |
| 204 | vertex_array.stride); | 223 | vertex_array.stride); |
| 205 | 224 | ||
| 206 | if (regs.instanced_arrays.IsInstancingEnabled(index) && vertex_array.divisor != 0) { | 225 | if (regs.instanced_arrays.IsInstancingEnabled(index) && vertex_array.divisor != 0) { |
| 207 | // Enable vertex buffer instancing with the specified divisor. | 226 | // Enable vertex buffer instancing with the specified divisor. |
| @@ -211,11 +230,47 @@ void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) { | |||
| 211 | glVertexArrayBindingDivisor(vao, index, 0); | 230 | glVertexArrayBindingDivisor(vao, index, 0); |
| 212 | } | 231 | } |
| 213 | } | 232 | } |
| 233 | } | ||
| 234 | |||
| 235 | void RasterizerOpenGL::SetupVertexInstances(GLuint vao) { | ||
| 236 | auto& gpu = system.GPU().Maxwell3D(); | ||
| 237 | |||
| 238 | if (!gpu.dirty.vertex_instances) | ||
| 239 | return; | ||
| 240 | gpu.dirty.vertex_instances = false; | ||
| 241 | |||
| 242 | const auto& regs = gpu.regs; | ||
| 243 | // Upload all guest vertex arrays sequentially to our buffer | ||
| 244 | for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) { | ||
| 245 | if (!gpu.dirty.vertex_instance[index]) | ||
| 246 | continue; | ||
| 247 | |||
| 248 | gpu.dirty.vertex_instance[index] = false; | ||
| 249 | |||
| 250 | if (regs.instanced_arrays.IsInstancingEnabled(index) && | ||
| 251 | regs.vertex_array[index].divisor != 0) { | ||
| 252 | // Enable vertex buffer instancing with the specified divisor. | ||
| 253 | glVertexArrayBindingDivisor(vao, index, regs.vertex_array[index].divisor); | ||
| 254 | } else { | ||
| 255 | // Disable the vertex buffer instancing. | ||
| 256 | glVertexArrayBindingDivisor(vao, index, 0); | ||
| 257 | } | ||
| 258 | } | ||
| 259 | } | ||
| 214 | 260 | ||
| 215 | gpu.dirty_flags.vertex_array.reset(); | 261 | GLintptr RasterizerOpenGL::SetupIndexBuffer() { |
| 262 | if (accelerate_draw != AccelDraw::Indexed) { | ||
| 263 | return 0; | ||
| 264 | } | ||
| 265 | MICROPROFILE_SCOPE(OpenGL_Index); | ||
| 266 | const auto& regs = system.GPU().Maxwell3D().regs; | ||
| 267 | const std::size_t size = CalculateIndexBufferSize(); | ||
| 268 | const auto [buffer, offset] = buffer_cache.UploadMemory(regs.index_array.IndexStart(), size); | ||
| 269 | vertex_array_pushbuffer.SetIndexBuffer(buffer); | ||
| 270 | return offset; | ||
| 216 | } | 271 | } |
| 217 | 272 | ||
| 218 | DrawParameters RasterizerOpenGL::SetupDraw() { | 273 | DrawParameters RasterizerOpenGL::SetupDraw(GLintptr index_buffer_offset) { |
| 219 | const auto& gpu = system.GPU().Maxwell3D(); | 274 | const auto& gpu = system.GPU().Maxwell3D(); |
| 220 | const auto& regs = gpu.regs; | 275 | const auto& regs = gpu.regs; |
| 221 | const bool is_indexed = accelerate_draw == AccelDraw::Indexed; | 276 | const bool is_indexed = accelerate_draw == AccelDraw::Indexed; |
| @@ -227,11 +282,9 @@ DrawParameters RasterizerOpenGL::SetupDraw() { | |||
| 227 | params.primitive_mode = MaxwellToGL::PrimitiveTopology(regs.draw.topology); | 282 | params.primitive_mode = MaxwellToGL::PrimitiveTopology(regs.draw.topology); |
| 228 | 283 | ||
| 229 | if (is_indexed) { | 284 | if (is_indexed) { |
| 230 | MICROPROFILE_SCOPE(OpenGL_Index); | ||
| 231 | params.index_format = MaxwellToGL::IndexFormat(regs.index_array.format); | 285 | params.index_format = MaxwellToGL::IndexFormat(regs.index_array.format); |
| 232 | params.count = regs.index_array.count; | 286 | params.count = regs.index_array.count; |
| 233 | params.index_buffer_offset = | 287 | params.index_buffer_offset = index_buffer_offset; |
| 234 | buffer_cache.UploadMemory(regs.index_array.IndexStart(), CalculateIndexBufferSize()); | ||
| 235 | params.base_vertex = static_cast<GLint>(regs.vb_element_base); | 288 | params.base_vertex = static_cast<GLint>(regs.vb_element_base); |
| 236 | } else { | 289 | } else { |
| 237 | params.count = regs.vertex_buffer.count; | 290 | params.count = regs.vertex_buffer.count; |
| @@ -247,10 +300,6 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { | |||
| 247 | BaseBindings base_bindings; | 300 | BaseBindings base_bindings; |
| 248 | std::array<bool, Maxwell::NumClipDistances> clip_distances{}; | 301 | std::array<bool, Maxwell::NumClipDistances> clip_distances{}; |
| 249 | 302 | ||
| 250 | // Prepare packed bindings | ||
| 251 | bind_ubo_pushbuffer.Setup(base_bindings.cbuf); | ||
| 252 | bind_ssbo_pushbuffer.Setup(base_bindings.gmem); | ||
| 253 | |||
| 254 | for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { | 303 | for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { |
| 255 | const auto& shader_config = gpu.regs.shader_config[index]; | 304 | const auto& shader_config = gpu.regs.shader_config[index]; |
| 256 | const Maxwell::ShaderProgram program{static_cast<Maxwell::ShaderProgram>(index)}; | 305 | const Maxwell::ShaderProgram program{static_cast<Maxwell::ShaderProgram>(index)}; |
| @@ -271,18 +320,17 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { | |||
| 271 | 320 | ||
| 272 | GLShader::MaxwellUniformData ubo{}; | 321 | GLShader::MaxwellUniformData ubo{}; |
| 273 | ubo.SetFromRegs(gpu, stage); | 322 | ubo.SetFromRegs(gpu, stage); |
| 274 | const GLintptr offset = | 323 | const auto [buffer, offset] = |
| 275 | buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment()); | 324 | buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment()); |
| 276 | 325 | ||
| 277 | // Bind the emulation info buffer | 326 | // Bind the emulation info buffer |
| 278 | bind_ubo_pushbuffer.Push(buffer_cache.GetHandle(), offset, | 327 | bind_ubo_pushbuffer.Push(buffer, offset, static_cast<GLsizeiptr>(sizeof(ubo))); |
| 279 | static_cast<GLsizeiptr>(sizeof(ubo))); | ||
| 280 | 328 | ||
| 281 | Shader shader{shader_cache.GetStageProgram(program)}; | 329 | Shader shader{shader_cache.GetStageProgram(program)}; |
| 282 | 330 | ||
| 283 | const auto stage_enum{static_cast<Maxwell::ShaderStage>(stage)}; | 331 | const auto stage_enum = static_cast<Maxwell::ShaderStage>(stage); |
| 284 | SetupDrawConstBuffers(stage_enum, shader); | 332 | SetupDrawConstBuffers(stage_enum, shader); |
| 285 | SetupGlobalRegions(stage_enum, shader); | 333 | SetupDrawGlobalMemory(stage_enum, shader); |
| 286 | const auto texture_buffer_usage{SetupTextures(stage_enum, shader, base_bindings)}; | 334 | const auto texture_buffer_usage{SetupTextures(stage_enum, shader, base_bindings)}; |
| 287 | 335 | ||
| 288 | const ProgramVariant variant{base_bindings, primitive_mode, texture_buffer_usage}; | 336 | const ProgramVariant variant{base_bindings, primitive_mode, texture_buffer_usage}; |
| @@ -321,12 +369,9 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { | |||
| 321 | base_bindings = next_bindings; | 369 | base_bindings = next_bindings; |
| 322 | } | 370 | } |
| 323 | 371 | ||
| 324 | bind_ubo_pushbuffer.Bind(); | ||
| 325 | bind_ssbo_pushbuffer.Bind(); | ||
| 326 | |||
| 327 | SyncClipEnabled(clip_distances); | 372 | SyncClipEnabled(clip_distances); |
| 328 | 373 | ||
| 329 | gpu.dirty_flags.shaders = false; | 374 | gpu.dirty.shaders = false; |
| 330 | } | 375 | } |
| 331 | 376 | ||
| 332 | std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const { | 377 | std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const { |
| @@ -409,13 +454,13 @@ std::pair<bool, bool> RasterizerOpenGL::ConfigureFramebuffers( | |||
| 409 | 454 | ||
| 410 | const FramebufferConfigState fb_config_state{using_color_fb, using_depth_fb, preserve_contents, | 455 | const FramebufferConfigState fb_config_state{using_color_fb, using_depth_fb, preserve_contents, |
| 411 | single_color_target}; | 456 | single_color_target}; |
| 412 | if (fb_config_state == current_framebuffer_config_state && | 457 | if (fb_config_state == current_framebuffer_config_state && !gpu.dirty.render_settings) { |
| 413 | gpu.dirty_flags.color_buffer.none() && !gpu.dirty_flags.zeta_buffer) { | ||
| 414 | // Only skip if the previous ConfigureFramebuffers call was from the same kind (multiple or | 458 | // Only skip if the previous ConfigureFramebuffers call was from the same kind (multiple or |
| 415 | // single color targets). This is done because the guest registers may not change but the | 459 | // single color targets). This is done because the guest registers may not change but the |
| 416 | // host framebuffer may contain different attachments | 460 | // host framebuffer may contain different attachments |
| 417 | return current_depth_stencil_usage; | 461 | return current_depth_stencil_usage; |
| 418 | } | 462 | } |
| 463 | gpu.dirty.render_settings = false; | ||
| 419 | current_framebuffer_config_state = fb_config_state; | 464 | current_framebuffer_config_state = fb_config_state; |
| 420 | 465 | ||
| 421 | texture_cache.GuardRenderTargets(true); | 466 | texture_cache.GuardRenderTargets(true); |
| @@ -504,13 +549,71 @@ std::pair<bool, bool> RasterizerOpenGL::ConfigureFramebuffers( | |||
| 504 | return current_depth_stencil_usage = {static_cast<bool>(depth_surface), fbkey.stencil_enable}; | 549 | return current_depth_stencil_usage = {static_cast<bool>(depth_surface), fbkey.stencil_enable}; |
| 505 | } | 550 | } |
| 506 | 551 | ||
| 552 | void RasterizerOpenGL::ConfigureClearFramebuffer(OpenGLState& current_state, bool using_color_fb, | ||
| 553 | bool using_depth_fb, bool using_stencil_fb) { | ||
| 554 | auto& gpu = system.GPU().Maxwell3D(); | ||
| 555 | const auto& regs = gpu.regs; | ||
| 556 | |||
| 557 | texture_cache.GuardRenderTargets(true); | ||
| 558 | View color_surface{}; | ||
| 559 | if (using_color_fb) { | ||
| 560 | color_surface = texture_cache.GetColorBufferSurface(regs.clear_buffers.RT, false); | ||
| 561 | } | ||
| 562 | View depth_surface{}; | ||
| 563 | if (using_depth_fb || using_stencil_fb) { | ||
| 564 | depth_surface = texture_cache.GetDepthBufferSurface(false); | ||
| 565 | } | ||
| 566 | texture_cache.GuardRenderTargets(false); | ||
| 567 | |||
| 568 | current_state.draw.draw_framebuffer = clear_framebuffer.handle; | ||
| 569 | current_state.ApplyFramebufferState(); | ||
| 570 | |||
| 571 | if (color_surface) { | ||
| 572 | color_surface->Attach(GL_COLOR_ATTACHMENT0, GL_DRAW_FRAMEBUFFER); | ||
| 573 | } else { | ||
| 574 | glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); | ||
| 575 | } | ||
| 576 | |||
| 577 | if (depth_surface) { | ||
| 578 | const auto& params = depth_surface->GetSurfaceParams(); | ||
| 579 | switch (params.type) { | ||
| 580 | case VideoCore::Surface::SurfaceType::Depth: { | ||
| 581 | depth_surface->Attach(GL_DEPTH_ATTACHMENT, GL_DRAW_FRAMEBUFFER); | ||
| 582 | glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); | ||
| 583 | break; | ||
| 584 | } | ||
| 585 | case VideoCore::Surface::SurfaceType::DepthStencil: { | ||
| 586 | depth_surface->Attach(GL_DEPTH_ATTACHMENT, GL_DRAW_FRAMEBUFFER); | ||
| 587 | break; | ||
| 588 | } | ||
| 589 | default: { UNIMPLEMENTED(); } | ||
| 590 | } | ||
| 591 | } else { | ||
| 592 | glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, | ||
| 593 | 0); | ||
| 594 | } | ||
| 595 | } | ||
| 596 | |||
| 507 | void RasterizerOpenGL::Clear() { | 597 | void RasterizerOpenGL::Clear() { |
| 508 | const auto& regs = system.GPU().Maxwell3D().regs; | 598 | const auto& maxwell3d = system.GPU().Maxwell3D(); |
| 599 | |||
| 600 | if (!maxwell3d.ShouldExecute()) { | ||
| 601 | return; | ||
| 602 | } | ||
| 603 | |||
| 604 | const auto& regs = maxwell3d.regs; | ||
| 509 | bool use_color{}; | 605 | bool use_color{}; |
| 510 | bool use_depth{}; | 606 | bool use_depth{}; |
| 511 | bool use_stencil{}; | 607 | bool use_stencil{}; |
| 512 | 608 | ||
| 513 | OpenGLState clear_state; | 609 | OpenGLState prev_state{OpenGLState::GetCurState()}; |
| 610 | SCOPE_EXIT({ | ||
| 611 | prev_state.AllDirty(); | ||
| 612 | prev_state.Apply(); | ||
| 613 | }); | ||
| 614 | |||
| 615 | OpenGLState clear_state{OpenGLState::GetCurState()}; | ||
| 616 | clear_state.SetDefaultViewports(); | ||
| 514 | if (regs.clear_buffers.R || regs.clear_buffers.G || regs.clear_buffers.B || | 617 | if (regs.clear_buffers.R || regs.clear_buffers.G || regs.clear_buffers.B || |
| 515 | regs.clear_buffers.A) { | 618 | regs.clear_buffers.A) { |
| 516 | use_color = true; | 619 | use_color = true; |
| @@ -530,6 +633,7 @@ void RasterizerOpenGL::Clear() { | |||
| 530 | // true. | 633 | // true. |
| 531 | clear_state.depth.test_enabled = true; | 634 | clear_state.depth.test_enabled = true; |
| 532 | clear_state.depth.test_func = GL_ALWAYS; | 635 | clear_state.depth.test_func = GL_ALWAYS; |
| 636 | clear_state.depth.write_mask = GL_TRUE; | ||
| 533 | } | 637 | } |
| 534 | if (regs.clear_buffers.S) { | 638 | if (regs.clear_buffers.S) { |
| 535 | ASSERT_MSG(regs.zeta_enable != 0, "Tried to clear stencil but buffer is not enabled!"); | 639 | ASSERT_MSG(regs.zeta_enable != 0, "Tried to clear stencil but buffer is not enabled!"); |
| @@ -566,8 +670,9 @@ void RasterizerOpenGL::Clear() { | |||
| 566 | return; | 670 | return; |
| 567 | } | 671 | } |
| 568 | 672 | ||
| 569 | const auto [clear_depth, clear_stencil] = ConfigureFramebuffers( | 673 | ConfigureClearFramebuffer(clear_state, use_color, use_depth, use_stencil); |
| 570 | clear_state, use_color, use_depth || use_stencil, false, regs.clear_buffers.RT.Value()); | 674 | |
| 675 | SyncViewport(clear_state); | ||
| 571 | if (regs.clear_flags.scissor) { | 676 | if (regs.clear_flags.scissor) { |
| 572 | SyncScissorTest(clear_state); | 677 | SyncScissorTest(clear_state); |
| 573 | } | 678 | } |
| @@ -576,21 +681,18 @@ void RasterizerOpenGL::Clear() { | |||
| 576 | clear_state.EmulateViewportWithScissor(); | 681 | clear_state.EmulateViewportWithScissor(); |
| 577 | } | 682 | } |
| 578 | 683 | ||
| 579 | clear_state.ApplyColorMask(); | 684 | clear_state.AllDirty(); |
| 580 | clear_state.ApplyDepth(); | 685 | clear_state.Apply(); |
| 581 | clear_state.ApplyStencilTest(); | ||
| 582 | clear_state.ApplyViewport(); | ||
| 583 | clear_state.ApplyFramebufferState(); | ||
| 584 | 686 | ||
| 585 | if (use_color) { | 687 | if (use_color) { |
| 586 | glClearBufferfv(GL_COLOR, regs.clear_buffers.RT, regs.clear_color); | 688 | glClearBufferfv(GL_COLOR, 0, regs.clear_color); |
| 587 | } | 689 | } |
| 588 | 690 | ||
| 589 | if (clear_depth && clear_stencil) { | 691 | if (use_depth && use_stencil) { |
| 590 | glClearBufferfi(GL_DEPTH_STENCIL, 0, regs.clear_depth, regs.clear_stencil); | 692 | glClearBufferfi(GL_DEPTH_STENCIL, 0, regs.clear_depth, regs.clear_stencil); |
| 591 | } else if (clear_depth) { | 693 | } else if (use_depth) { |
| 592 | glClearBufferfv(GL_DEPTH, 0, ®s.clear_depth); | 694 | glClearBufferfv(GL_DEPTH, 0, ®s.clear_depth); |
| 593 | } else if (clear_stencil) { | 695 | } else if (use_stencil) { |
| 594 | glClearBufferiv(GL_STENCIL, 0, ®s.clear_stencil); | 696 | glClearBufferiv(GL_STENCIL, 0, ®s.clear_stencil); |
| 595 | } | 697 | } |
| 596 | } | 698 | } |
| @@ -601,6 +703,11 @@ void RasterizerOpenGL::DrawArrays() { | |||
| 601 | 703 | ||
| 602 | MICROPROFILE_SCOPE(OpenGL_Drawing); | 704 | MICROPROFILE_SCOPE(OpenGL_Drawing); |
| 603 | auto& gpu = system.GPU().Maxwell3D(); | 705 | auto& gpu = system.GPU().Maxwell3D(); |
| 706 | |||
| 707 | if (!gpu.ShouldExecute()) { | ||
| 708 | return; | ||
| 709 | } | ||
| 710 | |||
| 604 | const auto& regs = gpu.regs; | 711 | const auto& regs = gpu.regs; |
| 605 | 712 | ||
| 606 | SyncColorMask(); | 713 | SyncColorMask(); |
| @@ -634,26 +741,47 @@ void RasterizerOpenGL::DrawArrays() { | |||
| 634 | Maxwell::MaxShaderStage; | 741 | Maxwell::MaxShaderStage; |
| 635 | 742 | ||
| 636 | // Add space for at least 18 constant buffers | 743 | // Add space for at least 18 constant buffers |
| 637 | buffer_size += | 744 | buffer_size += Maxwell::MaxConstBuffers * |
| 638 | Maxwell::MaxConstBuffers * (MaxConstbufferSize + device.GetUniformBufferAlignment()); | 745 | (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment()); |
| 639 | 746 | ||
| 640 | const bool invalidate = buffer_cache.Map(buffer_size); | 747 | // Prepare the vertex array. |
| 641 | if (invalidate) { | 748 | buffer_cache.Map(buffer_size); |
| 642 | // As all cached buffers are invalidated, we need to recheck their state. | ||
| 643 | gpu.dirty_flags.vertex_array.set(); | ||
| 644 | } | ||
| 645 | 749 | ||
| 750 | // Prepare vertex array format. | ||
| 646 | const GLuint vao = SetupVertexFormat(); | 751 | const GLuint vao = SetupVertexFormat(); |
| 752 | vertex_array_pushbuffer.Setup(vao); | ||
| 753 | |||
| 754 | // Upload vertex and index data. | ||
| 647 | SetupVertexBuffer(vao); | 755 | SetupVertexBuffer(vao); |
| 756 | SetupVertexInstances(vao); | ||
| 757 | const GLintptr index_buffer_offset = SetupIndexBuffer(); | ||
| 758 | |||
| 759 | // Setup draw parameters. It will automatically choose what glDraw* method to use. | ||
| 760 | const DrawParameters params = SetupDraw(index_buffer_offset); | ||
| 761 | |||
| 762 | // Prepare packed bindings. | ||
| 763 | bind_ubo_pushbuffer.Setup(0); | ||
| 764 | bind_ssbo_pushbuffer.Setup(0); | ||
| 648 | 765 | ||
| 649 | DrawParameters params = SetupDraw(); | 766 | // Setup shaders and their used resources. |
| 650 | texture_cache.GuardSamplers(true); | 767 | texture_cache.GuardSamplers(true); |
| 651 | SetupShaders(params.primitive_mode); | 768 | SetupShaders(params.primitive_mode); |
| 652 | texture_cache.GuardSamplers(false); | 769 | texture_cache.GuardSamplers(false); |
| 653 | 770 | ||
| 654 | ConfigureFramebuffers(state); | 771 | ConfigureFramebuffers(state); |
| 655 | 772 | ||
| 656 | buffer_cache.Unmap(); | 773 | // Signal the buffer cache that we are not going to upload more things. |
| 774 | const bool invalidate = buffer_cache.Unmap(); | ||
| 775 | |||
| 776 | // Now that we are no longer uploading data, we can safely bind the buffers to OpenGL. | ||
| 777 | vertex_array_pushbuffer.Bind(); | ||
| 778 | bind_ubo_pushbuffer.Bind(); | ||
| 779 | bind_ssbo_pushbuffer.Bind(); | ||
| 780 | |||
| 781 | if (invalidate) { | ||
| 782 | // As all cached buffers are invalidated, we need to recheck their state. | ||
| 783 | gpu.dirty.ResetVertexArrays(); | ||
| 784 | } | ||
| 657 | 785 | ||
| 658 | shader_program_manager->ApplyTo(state); | 786 | shader_program_manager->ApplyTo(state); |
| 659 | state.Apply(); | 787 | state.Apply(); |
| @@ -665,6 +793,46 @@ void RasterizerOpenGL::DrawArrays() { | |||
| 665 | params.DispatchDraw(); | 793 | params.DispatchDraw(); |
| 666 | 794 | ||
| 667 | accelerate_draw = AccelDraw::Disabled; | 795 | accelerate_draw = AccelDraw::Disabled; |
| 796 | gpu.dirty.memory_general = false; | ||
| 797 | } | ||
| 798 | |||
| 799 | void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { | ||
| 800 | if (!GLAD_GL_ARB_compute_variable_group_size) { | ||
| 801 | LOG_ERROR(Render_OpenGL, "Compute is currently not supported on this device due to the " | ||
| 802 | "lack of GL_ARB_compute_variable_group_size"); | ||
| 803 | return; | ||
| 804 | } | ||
| 805 | |||
| 806 | auto kernel = shader_cache.GetComputeKernel(code_addr); | ||
| 807 | const auto [program, next_bindings] = kernel->GetProgramHandle({}); | ||
| 808 | state.draw.shader_program = program; | ||
| 809 | state.draw.program_pipeline = 0; | ||
| 810 | |||
| 811 | const std::size_t buffer_size = | ||
| 812 | Tegra::Engines::KeplerCompute::NumConstBuffers * | ||
| 813 | (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment()); | ||
| 814 | buffer_cache.Map(buffer_size); | ||
| 815 | |||
| 816 | bind_ubo_pushbuffer.Setup(0); | ||
| 817 | bind_ssbo_pushbuffer.Setup(0); | ||
| 818 | |||
| 819 | SetupComputeConstBuffers(kernel); | ||
| 820 | SetupComputeGlobalMemory(kernel); | ||
| 821 | |||
| 822 | // TODO(Rodrigo): Bind images and samplers | ||
| 823 | |||
| 824 | buffer_cache.Unmap(); | ||
| 825 | |||
| 826 | bind_ubo_pushbuffer.Bind(); | ||
| 827 | bind_ssbo_pushbuffer.Bind(); | ||
| 828 | |||
| 829 | state.ApplyShaderProgram(); | ||
| 830 | state.ApplyProgramPipeline(); | ||
| 831 | |||
| 832 | const auto& launch_desc = system.GPU().KeplerCompute().launch_description; | ||
| 833 | glDispatchComputeGroupSizeARB(launch_desc.grid_dim_x, launch_desc.grid_dim_y, | ||
| 834 | launch_desc.grid_dim_z, launch_desc.block_dim_x, | ||
| 835 | launch_desc.block_dim_y, launch_desc.block_dim_z); | ||
| 668 | } | 836 | } |
| 669 | 837 | ||
| 670 | void RasterizerOpenGL::FlushAll() {} | 838 | void RasterizerOpenGL::FlushAll() {} |
| @@ -675,7 +843,7 @@ void RasterizerOpenGL::FlushRegion(CacheAddr addr, u64 size) { | |||
| 675 | return; | 843 | return; |
| 676 | } | 844 | } |
| 677 | texture_cache.FlushRegion(addr, size); | 845 | texture_cache.FlushRegion(addr, size); |
| 678 | global_cache.FlushRegion(addr, size); | 846 | buffer_cache.FlushRegion(addr, size); |
| 679 | } | 847 | } |
| 680 | 848 | ||
| 681 | void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) { | 849 | void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) { |
| @@ -685,7 +853,6 @@ void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) { | |||
| 685 | } | 853 | } |
| 686 | texture_cache.InvalidateRegion(addr, size); | 854 | texture_cache.InvalidateRegion(addr, size); |
| 687 | shader_cache.InvalidateRegion(addr, size); | 855 | shader_cache.InvalidateRegion(addr, size); |
| 688 | global_cache.InvalidateRegion(addr, size); | ||
| 689 | buffer_cache.InvalidateRegion(addr, size); | 856 | buffer_cache.InvalidateRegion(addr, size); |
| 690 | } | 857 | } |
| 691 | 858 | ||
| @@ -696,6 +863,10 @@ void RasterizerOpenGL::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { | |||
| 696 | InvalidateRegion(addr, size); | 863 | InvalidateRegion(addr, size); |
| 697 | } | 864 | } |
| 698 | 865 | ||
| 866 | void RasterizerOpenGL::TickFrame() { | ||
| 867 | buffer_cache.TickFrame(); | ||
| 868 | } | ||
| 869 | |||
| 699 | bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, | 870 | bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, |
| 700 | const Tegra::Engines::Fermi2D::Regs::Surface& dst, | 871 | const Tegra::Engines::Fermi2D::Regs::Surface& dst, |
| 701 | const Tegra::Engines::Fermi2D::Config& copy_config) { | 872 | const Tegra::Engines::Fermi2D::Config& copy_config) { |
| @@ -737,14 +908,25 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config, | |||
| 737 | void RasterizerOpenGL::SetupDrawConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, | 908 | void RasterizerOpenGL::SetupDrawConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, |
| 738 | const Shader& shader) { | 909 | const Shader& shader) { |
| 739 | MICROPROFILE_SCOPE(OpenGL_UBO); | 910 | MICROPROFILE_SCOPE(OpenGL_UBO); |
| 740 | const auto stage_index = static_cast<std::size_t>(stage); | 911 | const auto& stages = system.GPU().Maxwell3D().state.shader_stages; |
| 741 | const auto& shader_stage = system.GPU().Maxwell3D().state.shader_stages[stage_index]; | 912 | const auto& shader_stage = stages[static_cast<std::size_t>(stage)]; |
| 742 | const auto& entries = shader->GetShaderEntries().const_buffers; | 913 | for (const auto& entry : shader->GetShaderEntries().const_buffers) { |
| 914 | const auto& buffer = shader_stage.const_buffers[entry.GetIndex()]; | ||
| 915 | SetupConstBuffer(buffer, entry); | ||
| 916 | } | ||
| 917 | } | ||
| 743 | 918 | ||
| 744 | // Upload only the enabled buffers from the 16 constbuffers of each shader stage | 919 | void RasterizerOpenGL::SetupComputeConstBuffers(const Shader& kernel) { |
| 745 | for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) { | 920 | MICROPROFILE_SCOPE(OpenGL_UBO); |
| 746 | const auto& entry = entries[bindpoint]; | 921 | const auto& launch_desc = system.GPU().KeplerCompute().launch_description; |
| 747 | SetupConstBuffer(shader_stage.const_buffers[entry.GetIndex()], entry); | 922 | for (const auto& entry : kernel->GetShaderEntries().const_buffers) { |
| 923 | const auto& config = launch_desc.const_buffer_config[entry.GetIndex()]; | ||
| 924 | const std::bitset<8> mask = launch_desc.memory_config.const_buffer_enable_mask.Value(); | ||
| 925 | Tegra::Engines::ConstBufferInfo buffer; | ||
| 926 | buffer.address = config.Address(); | ||
| 927 | buffer.size = config.size; | ||
| 928 | buffer.enabled = mask[entry.GetIndex()]; | ||
| 929 | SetupConstBuffer(buffer, entry); | ||
| 748 | } | 930 | } |
| 749 | } | 931 | } |
| 750 | 932 | ||
| @@ -752,49 +934,52 @@ void RasterizerOpenGL::SetupConstBuffer(const Tegra::Engines::ConstBufferInfo& b | |||
| 752 | const GLShader::ConstBufferEntry& entry) { | 934 | const GLShader::ConstBufferEntry& entry) { |
| 753 | if (!buffer.enabled) { | 935 | if (!buffer.enabled) { |
| 754 | // Set values to zero to unbind buffers | 936 | // Set values to zero to unbind buffers |
| 755 | bind_ubo_pushbuffer.Push(0, 0, 0); | 937 | bind_ubo_pushbuffer.Push(buffer_cache.GetEmptyBuffer(sizeof(float)), 0, sizeof(float)); |
| 756 | return; | 938 | return; |
| 757 | } | 939 | } |
| 758 | 940 | ||
| 759 | std::size_t size; | ||
| 760 | if (entry.IsIndirect()) { | ||
| 761 | // Buffer is accessed indirectly, so upload the entire thing | ||
| 762 | size = buffer.size; | ||
| 763 | |||
| 764 | if (size > MaxConstbufferSize) { | ||
| 765 | LOG_WARNING(Render_OpenGL, "Indirect constbuffer size {} exceeds maximum {}", size, | ||
| 766 | MaxConstbufferSize); | ||
| 767 | size = MaxConstbufferSize; | ||
| 768 | } | ||
| 769 | } else { | ||
| 770 | // Buffer is accessed directly, upload just what we use | ||
| 771 | size = entry.GetSize(); | ||
| 772 | } | ||
| 773 | |||
| 774 | // Align the actual size so it ends up being a multiple of vec4 to meet the OpenGL std140 | 941 | // Align the actual size so it ends up being a multiple of vec4 to meet the OpenGL std140 |
| 775 | // UBO alignment requirements. | 942 | // UBO alignment requirements. |
| 776 | size = Common::AlignUp(size, sizeof(GLvec4)); | 943 | const std::size_t size = Common::AlignUp(GetConstBufferSize(buffer, entry), sizeof(GLvec4)); |
| 777 | ASSERT_MSG(size <= MaxConstbufferSize, "Constant buffer is too big"); | ||
| 778 | 944 | ||
| 779 | const std::size_t alignment = device.GetUniformBufferAlignment(); | 945 | const auto alignment = device.GetUniformBufferAlignment(); |
| 780 | const GLintptr offset = buffer_cache.UploadMemory(buffer.address, size, alignment); | 946 | const auto [cbuf, offset] = buffer_cache.UploadMemory(buffer.address, size, alignment); |
| 781 | bind_ubo_pushbuffer.Push(buffer_cache.GetHandle(), offset, size); | 947 | bind_ubo_pushbuffer.Push(cbuf, offset, size); |
| 782 | } | 948 | } |
| 783 | 949 | ||
| 784 | void RasterizerOpenGL::SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, | 950 | void RasterizerOpenGL::SetupDrawGlobalMemory(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, |
| 785 | const Shader& shader) { | 951 | const Shader& shader) { |
| 786 | const auto& entries = shader->GetShaderEntries().global_memory_entries; | 952 | auto& gpu{system.GPU()}; |
| 787 | for (std::size_t bindpoint = 0; bindpoint < entries.size(); ++bindpoint) { | 953 | auto& memory_manager{gpu.MemoryManager()}; |
| 788 | const auto& entry{entries[bindpoint]}; | 954 | const auto cbufs{gpu.Maxwell3D().state.shader_stages[static_cast<std::size_t>(stage)]}; |
| 789 | const auto& region{global_cache.GetGlobalRegion(entry, stage)}; | 955 | for (const auto& entry : shader->GetShaderEntries().global_memory_entries) { |
| 790 | if (entry.IsWritten()) { | 956 | const auto addr{cbufs.const_buffers[entry.GetCbufIndex()].address + entry.GetCbufOffset()}; |
| 791 | region->MarkAsModified(true, global_cache); | 957 | const auto gpu_addr{memory_manager.Read<u64>(addr)}; |
| 792 | } | 958 | const auto size{memory_manager.Read<u32>(addr + 8)}; |
| 793 | bind_ssbo_pushbuffer.Push(region->GetBufferHandle(), 0, | 959 | SetupGlobalMemory(entry, gpu_addr, size); |
| 794 | static_cast<GLsizeiptr>(region->GetSizeInBytes())); | 960 | } |
| 961 | } | ||
| 962 | |||
| 963 | void RasterizerOpenGL::SetupComputeGlobalMemory(const Shader& kernel) { | ||
| 964 | auto& gpu{system.GPU()}; | ||
| 965 | auto& memory_manager{gpu.MemoryManager()}; | ||
| 966 | const auto cbufs{gpu.KeplerCompute().launch_description.const_buffer_config}; | ||
| 967 | for (const auto& entry : kernel->GetShaderEntries().global_memory_entries) { | ||
| 968 | const auto addr{cbufs[entry.GetCbufIndex()].Address() + entry.GetCbufOffset()}; | ||
| 969 | const auto gpu_addr{memory_manager.Read<u64>(addr)}; | ||
| 970 | const auto size{memory_manager.Read<u32>(addr + 8)}; | ||
| 971 | SetupGlobalMemory(entry, gpu_addr, size); | ||
| 795 | } | 972 | } |
| 796 | } | 973 | } |
| 797 | 974 | ||
| 975 | void RasterizerOpenGL::SetupGlobalMemory(const GLShader::GlobalMemoryEntry& entry, | ||
| 976 | GPUVAddr gpu_addr, std::size_t size) { | ||
| 977 | const auto alignment{device.GetShaderStorageBufferAlignment()}; | ||
| 978 | const auto [ssbo, buffer_offset] = | ||
| 979 | buffer_cache.UploadMemory(gpu_addr, size, alignment, true, entry.IsWritten()); | ||
| 980 | bind_ssbo_pushbuffer.Push(ssbo, buffer_offset, static_cast<GLsizeiptr>(size)); | ||
| 981 | } | ||
| 982 | |||
| 798 | TextureBufferUsage RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, const Shader& shader, | 983 | TextureBufferUsage RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, const Shader& shader, |
| 799 | BaseBindings base_bindings) { | 984 | BaseBindings base_bindings) { |
| 800 | MICROPROFILE_SCOPE(OpenGL_Texture); | 985 | MICROPROFILE_SCOPE(OpenGL_Texture); |
| @@ -883,10 +1068,11 @@ void RasterizerOpenGL::SyncClipCoef() { | |||
| 883 | } | 1068 | } |
| 884 | 1069 | ||
| 885 | void RasterizerOpenGL::SyncCullMode() { | 1070 | void RasterizerOpenGL::SyncCullMode() { |
| 886 | const auto& regs = system.GPU().Maxwell3D().regs; | 1071 | auto& maxwell3d = system.GPU().Maxwell3D(); |
| 887 | 1072 | ||
| 888 | state.cull.enabled = regs.cull.enabled != 0; | 1073 | const auto& regs = maxwell3d.regs; |
| 889 | 1074 | ||
| 1075 | state.cull.enabled = regs.cull.enabled != 0; | ||
| 890 | if (state.cull.enabled) { | 1076 | if (state.cull.enabled) { |
| 891 | state.cull.front_face = MaxwellToGL::FrontFace(regs.cull.front_face); | 1077 | state.cull.front_face = MaxwellToGL::FrontFace(regs.cull.front_face); |
| 892 | state.cull.mode = MaxwellToGL::CullFace(regs.cull.cull_face); | 1078 | state.cull.mode = MaxwellToGL::CullFace(regs.cull.cull_face); |
| @@ -919,16 +1105,21 @@ void RasterizerOpenGL::SyncDepthTestState() { | |||
| 919 | state.depth.test_enabled = regs.depth_test_enable != 0; | 1105 | state.depth.test_enabled = regs.depth_test_enable != 0; |
| 920 | state.depth.write_mask = regs.depth_write_enabled ? GL_TRUE : GL_FALSE; | 1106 | state.depth.write_mask = regs.depth_write_enabled ? GL_TRUE : GL_FALSE; |
| 921 | 1107 | ||
| 922 | if (!state.depth.test_enabled) | 1108 | if (!state.depth.test_enabled) { |
| 923 | return; | 1109 | return; |
| 1110 | } | ||
| 924 | 1111 | ||
| 925 | state.depth.test_func = MaxwellToGL::ComparisonOp(regs.depth_test_func); | 1112 | state.depth.test_func = MaxwellToGL::ComparisonOp(regs.depth_test_func); |
| 926 | } | 1113 | } |
| 927 | 1114 | ||
| 928 | void RasterizerOpenGL::SyncStencilTestState() { | 1115 | void RasterizerOpenGL::SyncStencilTestState() { |
| 929 | const auto& regs = system.GPU().Maxwell3D().regs; | 1116 | auto& maxwell3d = system.GPU().Maxwell3D(); |
| 930 | state.stencil.test_enabled = regs.stencil_enable != 0; | 1117 | if (!maxwell3d.dirty.stencil_test) { |
| 1118 | return; | ||
| 1119 | } | ||
| 1120 | const auto& regs = maxwell3d.regs; | ||
| 931 | 1121 | ||
| 1122 | state.stencil.test_enabled = regs.stencil_enable != 0; | ||
| 932 | if (!regs.stencil_enable) { | 1123 | if (!regs.stencil_enable) { |
| 933 | return; | 1124 | return; |
| 934 | } | 1125 | } |
| @@ -957,10 +1148,17 @@ void RasterizerOpenGL::SyncStencilTestState() { | |||
| 957 | state.stencil.back.action_depth_fail = GL_KEEP; | 1148 | state.stencil.back.action_depth_fail = GL_KEEP; |
| 958 | state.stencil.back.action_depth_pass = GL_KEEP; | 1149 | state.stencil.back.action_depth_pass = GL_KEEP; |
| 959 | } | 1150 | } |
| 1151 | state.MarkDirtyStencilState(); | ||
| 1152 | maxwell3d.dirty.stencil_test = false; | ||
| 960 | } | 1153 | } |
| 961 | 1154 | ||
| 962 | void RasterizerOpenGL::SyncColorMask() { | 1155 | void RasterizerOpenGL::SyncColorMask() { |
| 963 | const auto& regs = system.GPU().Maxwell3D().regs; | 1156 | auto& maxwell3d = system.GPU().Maxwell3D(); |
| 1157 | if (!maxwell3d.dirty.color_mask) { | ||
| 1158 | return; | ||
| 1159 | } | ||
| 1160 | const auto& regs = maxwell3d.regs; | ||
| 1161 | |||
| 964 | const std::size_t count = | 1162 | const std::size_t count = |
| 965 | regs.independent_blend_enable ? Tegra::Engines::Maxwell3D::Regs::NumRenderTargets : 1; | 1163 | regs.independent_blend_enable ? Tegra::Engines::Maxwell3D::Regs::NumRenderTargets : 1; |
| 966 | for (std::size_t i = 0; i < count; i++) { | 1164 | for (std::size_t i = 0; i < count; i++) { |
| @@ -971,6 +1169,9 @@ void RasterizerOpenGL::SyncColorMask() { | |||
| 971 | dest.blue_enabled = (source.B == 0) ? GL_FALSE : GL_TRUE; | 1169 | dest.blue_enabled = (source.B == 0) ? GL_FALSE : GL_TRUE; |
| 972 | dest.alpha_enabled = (source.A == 0) ? GL_FALSE : GL_TRUE; | 1170 | dest.alpha_enabled = (source.A == 0) ? GL_FALSE : GL_TRUE; |
| 973 | } | 1171 | } |
| 1172 | |||
| 1173 | state.MarkDirtyColorMask(); | ||
| 1174 | maxwell3d.dirty.color_mask = false; | ||
| 974 | } | 1175 | } |
| 975 | 1176 | ||
| 976 | void RasterizerOpenGL::SyncMultiSampleState() { | 1177 | void RasterizerOpenGL::SyncMultiSampleState() { |
| @@ -985,7 +1186,11 @@ void RasterizerOpenGL::SyncFragmentColorClampState() { | |||
| 985 | } | 1186 | } |
| 986 | 1187 | ||
| 987 | void RasterizerOpenGL::SyncBlendState() { | 1188 | void RasterizerOpenGL::SyncBlendState() { |
| 988 | const auto& regs = system.GPU().Maxwell3D().regs; | 1189 | auto& maxwell3d = system.GPU().Maxwell3D(); |
| 1190 | if (!maxwell3d.dirty.blend_state) { | ||
| 1191 | return; | ||
| 1192 | } | ||
| 1193 | const auto& regs = maxwell3d.regs; | ||
| 989 | 1194 | ||
| 990 | state.blend_color.red = regs.blend_color.r; | 1195 | state.blend_color.red = regs.blend_color.r; |
| 991 | state.blend_color.green = regs.blend_color.g; | 1196 | state.blend_color.green = regs.blend_color.g; |
| @@ -1008,6 +1213,8 @@ void RasterizerOpenGL::SyncBlendState() { | |||
| 1008 | for (std::size_t i = 1; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) { | 1213 | for (std::size_t i = 1; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) { |
| 1009 | state.blend[i].enabled = false; | 1214 | state.blend[i].enabled = false; |
| 1010 | } | 1215 | } |
| 1216 | maxwell3d.dirty.blend_state = false; | ||
| 1217 | state.MarkDirtyBlendState(); | ||
| 1011 | return; | 1218 | return; |
| 1012 | } | 1219 | } |
| 1013 | 1220 | ||
| @@ -1024,6 +1231,9 @@ void RasterizerOpenGL::SyncBlendState() { | |||
| 1024 | blend.src_a_func = MaxwellToGL::BlendFunc(src.factor_source_a); | 1231 | blend.src_a_func = MaxwellToGL::BlendFunc(src.factor_source_a); |
| 1025 | blend.dst_a_func = MaxwellToGL::BlendFunc(src.factor_dest_a); | 1232 | blend.dst_a_func = MaxwellToGL::BlendFunc(src.factor_dest_a); |
| 1026 | } | 1233 | } |
| 1234 | |||
| 1235 | state.MarkDirtyBlendState(); | ||
| 1236 | maxwell3d.dirty.blend_state = false; | ||
| 1027 | } | 1237 | } |
| 1028 | 1238 | ||
| 1029 | void RasterizerOpenGL::SyncLogicOpState() { | 1239 | void RasterizerOpenGL::SyncLogicOpState() { |
| @@ -1075,13 +1285,21 @@ void RasterizerOpenGL::SyncPointState() { | |||
| 1075 | } | 1285 | } |
| 1076 | 1286 | ||
| 1077 | void RasterizerOpenGL::SyncPolygonOffset() { | 1287 | void RasterizerOpenGL::SyncPolygonOffset() { |
| 1078 | const auto& regs = system.GPU().Maxwell3D().regs; | 1288 | auto& maxwell3d = system.GPU().Maxwell3D(); |
| 1289 | if (!maxwell3d.dirty.polygon_offset) { | ||
| 1290 | return; | ||
| 1291 | } | ||
| 1292 | const auto& regs = maxwell3d.regs; | ||
| 1293 | |||
| 1079 | state.polygon_offset.fill_enable = regs.polygon_offset_fill_enable != 0; | 1294 | state.polygon_offset.fill_enable = regs.polygon_offset_fill_enable != 0; |
| 1080 | state.polygon_offset.line_enable = regs.polygon_offset_line_enable != 0; | 1295 | state.polygon_offset.line_enable = regs.polygon_offset_line_enable != 0; |
| 1081 | state.polygon_offset.point_enable = regs.polygon_offset_point_enable != 0; | 1296 | state.polygon_offset.point_enable = regs.polygon_offset_point_enable != 0; |
| 1082 | state.polygon_offset.units = regs.polygon_offset_units; | 1297 | state.polygon_offset.units = regs.polygon_offset_units; |
| 1083 | state.polygon_offset.factor = regs.polygon_offset_factor; | 1298 | state.polygon_offset.factor = regs.polygon_offset_factor; |
| 1084 | state.polygon_offset.clamp = regs.polygon_offset_clamp; | 1299 | state.polygon_offset.clamp = regs.polygon_offset_clamp; |
| 1300 | |||
| 1301 | state.MarkDirtyPolygonOffset(); | ||
| 1302 | maxwell3d.dirty.polygon_offset = false; | ||
| 1085 | } | 1303 | } |
| 1086 | 1304 | ||
| 1087 | void RasterizerOpenGL::SyncAlphaTest() { | 1305 | void RasterizerOpenGL::SyncAlphaTest() { |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index bf67e3a70..8b123c48d 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h | |||
| @@ -24,7 +24,6 @@ | |||
| 24 | #include "video_core/renderer_opengl/gl_buffer_cache.h" | 24 | #include "video_core/renderer_opengl/gl_buffer_cache.h" |
| 25 | #include "video_core/renderer_opengl/gl_device.h" | 25 | #include "video_core/renderer_opengl/gl_device.h" |
| 26 | #include "video_core/renderer_opengl/gl_framebuffer_cache.h" | 26 | #include "video_core/renderer_opengl/gl_framebuffer_cache.h" |
| 27 | #include "video_core/renderer_opengl/gl_global_cache.h" | ||
| 28 | #include "video_core/renderer_opengl/gl_resource_manager.h" | 27 | #include "video_core/renderer_opengl/gl_resource_manager.h" |
| 29 | #include "video_core/renderer_opengl/gl_sampler_cache.h" | 28 | #include "video_core/renderer_opengl/gl_sampler_cache.h" |
| 30 | #include "video_core/renderer_opengl/gl_shader_cache.h" | 29 | #include "video_core/renderer_opengl/gl_shader_cache.h" |
| @@ -59,10 +58,12 @@ public: | |||
| 59 | 58 | ||
| 60 | void DrawArrays() override; | 59 | void DrawArrays() override; |
| 61 | void Clear() override; | 60 | void Clear() override; |
| 61 | void DispatchCompute(GPUVAddr code_addr) override; | ||
| 62 | void FlushAll() override; | 62 | void FlushAll() override; |
| 63 | void FlushRegion(CacheAddr addr, u64 size) override; | 63 | void FlushRegion(CacheAddr addr, u64 size) override; |
| 64 | void InvalidateRegion(CacheAddr addr, u64 size) override; | 64 | void InvalidateRegion(CacheAddr addr, u64 size) override; |
| 65 | void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; | 65 | void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; |
| 66 | void TickFrame() override; | ||
| 66 | bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, | 67 | bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, |
| 67 | const Tegra::Engines::Fermi2D::Regs::Surface& dst, | 68 | const Tegra::Engines::Fermi2D::Regs::Surface& dst, |
| 68 | const Tegra::Engines::Fermi2D::Config& copy_config) override; | 69 | const Tegra::Engines::Fermi2D::Config& copy_config) override; |
| @@ -73,11 +74,6 @@ public: | |||
| 73 | void LoadDiskResources(const std::atomic_bool& stop_loading, | 74 | void LoadDiskResources(const std::atomic_bool& stop_loading, |
| 74 | const VideoCore::DiskResourceLoadCallback& callback) override; | 75 | const VideoCore::DiskResourceLoadCallback& callback) override; |
| 75 | 76 | ||
| 76 | /// Maximum supported size that a constbuffer can have in bytes. | ||
| 77 | static constexpr std::size_t MaxConstbufferSize = 0x10000; | ||
| 78 | static_assert(MaxConstbufferSize % sizeof(GLvec4) == 0, | ||
| 79 | "The maximum size of a constbuffer must be a multiple of the size of GLvec4"); | ||
| 80 | |||
| 81 | private: | 77 | private: |
| 82 | struct FramebufferConfigState { | 78 | struct FramebufferConfigState { |
| 83 | bool using_color_fb{}; | 79 | bool using_color_fb{}; |
| @@ -98,30 +94,45 @@ private: | |||
| 98 | 94 | ||
| 99 | /** | 95 | /** |
| 100 | * Configures the color and depth framebuffer states. | 96 | * Configures the color and depth framebuffer states. |
| 101 | * @param must_reconfigure If true, tells the framebuffer to skip the cache and reconfigure | 97 | * |
| 102 | * again. Used by the texture cache to solve texception conflicts | 98 | * @param current_state The current OpenGL state. |
| 103 | * @param use_color_fb If true, configure color framebuffers. | 99 | * @param using_color_fb If true, configure color framebuffers. |
| 104 | * @param using_depth_fb If true, configure the depth/stencil framebuffer. | 100 | * @param using_depth_fb If true, configure the depth/stencil framebuffer. |
| 105 | * @param preserve_contents If true, tries to preserve data from a previously used framebuffer. | 101 | * @param preserve_contents If true, tries to preserve data from a previously used |
| 102 | * framebuffer. | ||
| 106 | * @param single_color_target Specifies if a single color buffer target should be used. | 103 | * @param single_color_target Specifies if a single color buffer target should be used. |
| 104 | * | ||
| 107 | * @returns If depth (first) or stencil (second) are being stored in the bound zeta texture | 105 | * @returns If depth (first) or stencil (second) are being stored in the bound zeta texture |
| 108 | * (requires using_depth_fb to be true) | 106 | * (requires using_depth_fb to be true) |
| 109 | */ | 107 | */ |
| 110 | std::pair<bool, bool> ConfigureFramebuffers( | 108 | std::pair<bool, bool> ConfigureFramebuffers( |
| 111 | OpenGLState& current_state, bool use_color_fb = true, bool using_depth_fb = true, | 109 | OpenGLState& current_state, bool using_color_fb = true, bool using_depth_fb = true, |
| 112 | bool preserve_contents = true, std::optional<std::size_t> single_color_target = {}); | 110 | bool preserve_contents = true, std::optional<std::size_t> single_color_target = {}); |
| 113 | 111 | ||
| 112 | void ConfigureClearFramebuffer(OpenGLState& current_state, bool using_color_fb, | ||
| 113 | bool using_depth_fb, bool using_stencil_fb); | ||
| 114 | |||
| 114 | /// Configures the current constbuffers to use for the draw command. | 115 | /// Configures the current constbuffers to use for the draw command. |
| 115 | void SetupDrawConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, | 116 | void SetupDrawConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, |
| 116 | const Shader& shader); | 117 | const Shader& shader); |
| 117 | 118 | ||
| 119 | /// Configures the current constbuffers to use for the kernel invocation. | ||
| 120 | void SetupComputeConstBuffers(const Shader& kernel); | ||
| 121 | |||
| 118 | /// Configures a constant buffer. | 122 | /// Configures a constant buffer. |
| 119 | void SetupConstBuffer(const Tegra::Engines::ConstBufferInfo& buffer, | 123 | void SetupConstBuffer(const Tegra::Engines::ConstBufferInfo& buffer, |
| 120 | const GLShader::ConstBufferEntry& entry); | 124 | const GLShader::ConstBufferEntry& entry); |
| 121 | 125 | ||
| 122 | /// Configures the current global memory entries to use for the draw command. | 126 | /// Configures the current global memory entries to use for the draw command. |
| 123 | void SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, | 127 | void SetupDrawGlobalMemory(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, |
| 124 | const Shader& shader); | 128 | const Shader& shader); |
| 129 | |||
| 130 | /// Configures the current global memory entries to use for the kernel invocation. | ||
| 131 | void SetupComputeGlobalMemory(const Shader& kernel); | ||
| 132 | |||
| 133 | /// Configures a constant buffer. | ||
| 134 | void SetupGlobalMemory(const GLShader::GlobalMemoryEntry& entry, GPUVAddr gpu_addr, | ||
| 135 | std::size_t size); | ||
| 125 | 136 | ||
| 126 | /// Configures the current textures to use for the draw command. Returns shaders texture buffer | 137 | /// Configures the current textures to use for the draw command. Returns shaders texture buffer |
| 127 | /// usage. | 138 | /// usage. |
| @@ -189,7 +200,6 @@ private: | |||
| 189 | 200 | ||
| 190 | TextureCacheOpenGL texture_cache; | 201 | TextureCacheOpenGL texture_cache; |
| 191 | ShaderCacheOpenGL shader_cache; | 202 | ShaderCacheOpenGL shader_cache; |
| 192 | GlobalRegionCacheOpenGL global_cache; | ||
| 193 | SamplerCacheOpenGL sampler_cache; | 203 | SamplerCacheOpenGL sampler_cache; |
| 194 | FramebufferCacheOpenGL framebuffer_cache; | 204 | FramebufferCacheOpenGL framebuffer_cache; |
| 195 | 205 | ||
| @@ -208,6 +218,7 @@ private: | |||
| 208 | static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024; | 218 | static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024; |
| 209 | OGLBufferCache buffer_cache; | 219 | OGLBufferCache buffer_cache; |
| 210 | 220 | ||
| 221 | VertexArrayPushBuffer vertex_array_pushbuffer; | ||
| 211 | BindBuffersRangePushBuffer bind_ubo_pushbuffer{GL_UNIFORM_BUFFER}; | 222 | BindBuffersRangePushBuffer bind_ubo_pushbuffer{GL_UNIFORM_BUFFER}; |
| 212 | BindBuffersRangePushBuffer bind_ssbo_pushbuffer{GL_SHADER_STORAGE_BUFFER}; | 223 | BindBuffersRangePushBuffer bind_ssbo_pushbuffer{GL_SHADER_STORAGE_BUFFER}; |
| 213 | 224 | ||
| @@ -219,14 +230,19 @@ private: | |||
| 219 | GLuint SetupVertexFormat(); | 230 | GLuint SetupVertexFormat(); |
| 220 | 231 | ||
| 221 | void SetupVertexBuffer(GLuint vao); | 232 | void SetupVertexBuffer(GLuint vao); |
| 233 | void SetupVertexInstances(GLuint vao); | ||
| 222 | 234 | ||
| 223 | DrawParameters SetupDraw(); | 235 | GLintptr SetupIndexBuffer(); |
| 236 | |||
| 237 | DrawParameters SetupDraw(GLintptr index_buffer_offset); | ||
| 224 | 238 | ||
| 225 | void SetupShaders(GLenum primitive_mode); | 239 | void SetupShaders(GLenum primitive_mode); |
| 226 | 240 | ||
| 227 | enum class AccelDraw { Disabled, Arrays, Indexed }; | 241 | enum class AccelDraw { Disabled, Arrays, Indexed }; |
| 228 | AccelDraw accelerate_draw = AccelDraw::Disabled; | 242 | AccelDraw accelerate_draw = AccelDraw::Disabled; |
| 229 | 243 | ||
| 244 | OGLFramebuffer clear_framebuffer; | ||
| 245 | |||
| 230 | using CachedPageMap = boost::icl::interval_map<u64, int>; | 246 | using CachedPageMap = boost::icl::interval_map<u64, int>; |
| 231 | CachedPageMap cached_pages; | 247 | CachedPageMap cached_pages; |
| 232 | }; | 248 | }; |
diff --git a/src/video_core/renderer_opengl/gl_sampler_cache.h b/src/video_core/renderer_opengl/gl_sampler_cache.h index defbc2d81..34ee37f00 100644 --- a/src/video_core/renderer_opengl/gl_sampler_cache.h +++ b/src/video_core/renderer_opengl/gl_sampler_cache.h | |||
| @@ -17,9 +17,9 @@ public: | |||
| 17 | ~SamplerCacheOpenGL(); | 17 | ~SamplerCacheOpenGL(); |
| 18 | 18 | ||
| 19 | protected: | 19 | protected: |
| 20 | OGLSampler CreateSampler(const Tegra::Texture::TSCEntry& tsc) const; | 20 | OGLSampler CreateSampler(const Tegra::Texture::TSCEntry& tsc) const override; |
| 21 | 21 | ||
| 22 | GLuint ToSamplerType(const OGLSampler& sampler) const; | 22 | GLuint ToSamplerType(const OGLSampler& sampler) const override; |
| 23 | }; | 23 | }; |
| 24 | 24 | ||
| 25 | } // namespace OpenGL | 25 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index f9b2b03a0..1c90facc3 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp | |||
| @@ -23,13 +23,13 @@ namespace OpenGL { | |||
| 23 | 23 | ||
| 24 | using VideoCommon::Shader::ProgramCode; | 24 | using VideoCommon::Shader::ProgramCode; |
| 25 | 25 | ||
| 26 | // One UBO is always reserved for emulation values | 26 | // One UBO is always reserved for emulation values on staged shaders |
| 27 | constexpr u32 RESERVED_UBOS = 1; | 27 | constexpr u32 STAGE_RESERVED_UBOS = 1; |
| 28 | 28 | ||
| 29 | struct UnspecializedShader { | 29 | struct UnspecializedShader { |
| 30 | std::string code; | 30 | std::string code; |
| 31 | GLShader::ShaderEntries entries; | 31 | GLShader::ShaderEntries entries; |
| 32 | Maxwell::ShaderProgram program_type; | 32 | ProgramType program_type; |
| 33 | }; | 33 | }; |
| 34 | 34 | ||
| 35 | namespace { | 35 | namespace { |
| @@ -55,15 +55,17 @@ ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, const GPUVAddr g | |||
| 55 | } | 55 | } |
| 56 | 56 | ||
| 57 | /// Gets the shader type from a Maxwell program type | 57 | /// Gets the shader type from a Maxwell program type |
| 58 | constexpr GLenum GetShaderType(Maxwell::ShaderProgram program_type) { | 58 | constexpr GLenum GetShaderType(ProgramType program_type) { |
| 59 | switch (program_type) { | 59 | switch (program_type) { |
| 60 | case Maxwell::ShaderProgram::VertexA: | 60 | case ProgramType::VertexA: |
| 61 | case Maxwell::ShaderProgram::VertexB: | 61 | case ProgramType::VertexB: |
| 62 | return GL_VERTEX_SHADER; | 62 | return GL_VERTEX_SHADER; |
| 63 | case Maxwell::ShaderProgram::Geometry: | 63 | case ProgramType::Geometry: |
| 64 | return GL_GEOMETRY_SHADER; | 64 | return GL_GEOMETRY_SHADER; |
| 65 | case Maxwell::ShaderProgram::Fragment: | 65 | case ProgramType::Fragment: |
| 66 | return GL_FRAGMENT_SHADER; | 66 | return GL_FRAGMENT_SHADER; |
| 67 | case ProgramType::Compute: | ||
| 68 | return GL_COMPUTE_SHADER; | ||
| 67 | default: | 69 | default: |
| 68 | return GL_NONE; | 70 | return GL_NONE; |
| 69 | } | 71 | } |
| @@ -100,6 +102,25 @@ constexpr std::tuple<const char*, const char*, u32> GetPrimitiveDescription(GLen | |||
| 100 | } | 102 | } |
| 101 | } | 103 | } |
| 102 | 104 | ||
| 105 | ProgramType GetProgramType(Maxwell::ShaderProgram program) { | ||
| 106 | switch (program) { | ||
| 107 | case Maxwell::ShaderProgram::VertexA: | ||
| 108 | return ProgramType::VertexA; | ||
| 109 | case Maxwell::ShaderProgram::VertexB: | ||
| 110 | return ProgramType::VertexB; | ||
| 111 | case Maxwell::ShaderProgram::TesselationControl: | ||
| 112 | return ProgramType::TessellationControl; | ||
| 113 | case Maxwell::ShaderProgram::TesselationEval: | ||
| 114 | return ProgramType::TessellationEval; | ||
| 115 | case Maxwell::ShaderProgram::Geometry: | ||
| 116 | return ProgramType::Geometry; | ||
| 117 | case Maxwell::ShaderProgram::Fragment: | ||
| 118 | return ProgramType::Fragment; | ||
| 119 | } | ||
| 120 | UNREACHABLE(); | ||
| 121 | return {}; | ||
| 122 | } | ||
| 123 | |||
| 103 | /// Calculates the size of a program stream | 124 | /// Calculates the size of a program stream |
| 104 | std::size_t CalculateProgramSize(const GLShader::ProgramCode& program) { | 125 | std::size_t CalculateProgramSize(const GLShader::ProgramCode& program) { |
| 105 | constexpr std::size_t start_offset = 10; | 126 | constexpr std::size_t start_offset = 10; |
| @@ -128,11 +149,13 @@ std::size_t CalculateProgramSize(const GLShader::ProgramCode& program) { | |||
| 128 | } | 149 | } |
| 129 | 150 | ||
| 130 | /// Hashes one (or two) program streams | 151 | /// Hashes one (or two) program streams |
| 131 | u64 GetUniqueIdentifier(Maxwell::ShaderProgram program_type, const ProgramCode& code, | 152 | u64 GetUniqueIdentifier(ProgramType program_type, const ProgramCode& code, |
| 132 | const ProgramCode& code_b) { | 153 | const ProgramCode& code_b, std::size_t size_a = 0, std::size_t size_b = 0) { |
| 133 | u64 unique_identifier = | 154 | if (size_a == 0) { |
| 134 | Common::CityHash64(reinterpret_cast<const char*>(code.data()), CalculateProgramSize(code)); | 155 | size_a = CalculateProgramSize(code); |
| 135 | if (program_type != Maxwell::ShaderProgram::VertexA) { | 156 | } |
| 157 | u64 unique_identifier = Common::CityHash64(reinterpret_cast<const char*>(code.data()), size_a); | ||
| 158 | if (program_type != ProgramType::VertexA) { | ||
| 136 | return unique_identifier; | 159 | return unique_identifier; |
| 137 | } | 160 | } |
| 138 | // VertexA programs include two programs | 161 | // VertexA programs include two programs |
| @@ -140,50 +163,67 @@ u64 GetUniqueIdentifier(Maxwell::ShaderProgram program_type, const ProgramCode& | |||
| 140 | std::size_t seed = 0; | 163 | std::size_t seed = 0; |
| 141 | boost::hash_combine(seed, unique_identifier); | 164 | boost::hash_combine(seed, unique_identifier); |
| 142 | 165 | ||
| 143 | const u64 identifier_b = Common::CityHash64(reinterpret_cast<const char*>(code_b.data()), | 166 | if (size_b == 0) { |
| 144 | CalculateProgramSize(code_b)); | 167 | size_b = CalculateProgramSize(code_b); |
| 168 | } | ||
| 169 | const u64 identifier_b = | ||
| 170 | Common::CityHash64(reinterpret_cast<const char*>(code_b.data()), size_b); | ||
| 145 | boost::hash_combine(seed, identifier_b); | 171 | boost::hash_combine(seed, identifier_b); |
| 146 | return static_cast<u64>(seed); | 172 | return static_cast<u64>(seed); |
| 147 | } | 173 | } |
| 148 | 174 | ||
| 149 | /// Creates an unspecialized program from code streams | 175 | /// Creates an unspecialized program from code streams |
| 150 | GLShader::ProgramResult CreateProgram(const Device& device, Maxwell::ShaderProgram program_type, | 176 | GLShader::ProgramResult CreateProgram(const Device& device, ProgramType program_type, |
| 151 | ProgramCode program_code, ProgramCode program_code_b) { | 177 | ProgramCode program_code, ProgramCode program_code_b) { |
| 152 | GLShader::ShaderSetup setup(program_code); | 178 | GLShader::ShaderSetup setup(program_code); |
| 153 | if (program_type == Maxwell::ShaderProgram::VertexA) { | 179 | setup.program.size_a = CalculateProgramSize(program_code); |
| 180 | setup.program.size_b = 0; | ||
| 181 | if (program_type == ProgramType::VertexA) { | ||
| 154 | // VertexB is always enabled, so when VertexA is enabled, we have two vertex shaders. | 182 | // VertexB is always enabled, so when VertexA is enabled, we have two vertex shaders. |
| 155 | // Conventional HW does not support this, so we combine VertexA and VertexB into one | 183 | // Conventional HW does not support this, so we combine VertexA and VertexB into one |
| 156 | // stage here. | 184 | // stage here. |
| 157 | setup.SetProgramB(program_code_b); | 185 | setup.SetProgramB(program_code_b); |
| 186 | setup.program.size_b = CalculateProgramSize(program_code_b); | ||
| 158 | } | 187 | } |
| 159 | setup.program.unique_identifier = | 188 | setup.program.unique_identifier = GetUniqueIdentifier( |
| 160 | GetUniqueIdentifier(program_type, program_code, program_code_b); | 189 | program_type, program_code, program_code_b, setup.program.size_a, setup.program.size_b); |
| 161 | 190 | ||
| 162 | switch (program_type) { | 191 | switch (program_type) { |
| 163 | case Maxwell::ShaderProgram::VertexA: | 192 | case ProgramType::VertexA: |
| 164 | case Maxwell::ShaderProgram::VertexB: | 193 | case ProgramType::VertexB: |
| 165 | return GLShader::GenerateVertexShader(device, setup); | 194 | return GLShader::GenerateVertexShader(device, setup); |
| 166 | case Maxwell::ShaderProgram::Geometry: | 195 | case ProgramType::Geometry: |
| 167 | return GLShader::GenerateGeometryShader(device, setup); | 196 | return GLShader::GenerateGeometryShader(device, setup); |
| 168 | case Maxwell::ShaderProgram::Fragment: | 197 | case ProgramType::Fragment: |
| 169 | return GLShader::GenerateFragmentShader(device, setup); | 198 | return GLShader::GenerateFragmentShader(device, setup); |
| 199 | case ProgramType::Compute: | ||
| 200 | return GLShader::GenerateComputeShader(device, setup); | ||
| 170 | default: | 201 | default: |
| 171 | LOG_CRITICAL(HW_GPU, "Unimplemented program_type={}", static_cast<u32>(program_type)); | 202 | UNIMPLEMENTED_MSG("Unimplemented program_type={}", static_cast<u32>(program_type)); |
| 172 | UNREACHABLE(); | ||
| 173 | return {}; | 203 | return {}; |
| 174 | } | 204 | } |
| 175 | } | 205 | } |
| 176 | 206 | ||
| 177 | CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEntries& entries, | 207 | CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEntries& entries, |
| 178 | Maxwell::ShaderProgram program_type, const ProgramVariant& variant, | 208 | ProgramType program_type, const ProgramVariant& variant, |
| 179 | bool hint_retrievable = false) { | 209 | bool hint_retrievable = false) { |
| 180 | auto base_bindings{variant.base_bindings}; | 210 | auto base_bindings{variant.base_bindings}; |
| 181 | const auto primitive_mode{variant.primitive_mode}; | 211 | const auto primitive_mode{variant.primitive_mode}; |
| 182 | const auto texture_buffer_usage{variant.texture_buffer_usage}; | 212 | const auto texture_buffer_usage{variant.texture_buffer_usage}; |
| 183 | 213 | ||
| 184 | std::string source = "#version 430 core\n" | 214 | std::string source = "#version 430 core\n" |
| 185 | "#extension GL_ARB_separate_shader_objects : enable\n\n"; | 215 | "#extension GL_ARB_separate_shader_objects : enable\n"; |
| 186 | source += fmt::format("#define EMULATION_UBO_BINDING {}\n", base_bindings.cbuf++); | 216 | if (entries.shader_viewport_layer_array) { |
| 217 | source += "#extension GL_ARB_shader_viewport_layer_array : enable\n"; | ||
| 218 | } | ||
| 219 | if (program_type == ProgramType::Compute) { | ||
| 220 | source += "#extension GL_ARB_compute_variable_group_size : require\n"; | ||
| 221 | } | ||
| 222 | source += '\n'; | ||
| 223 | |||
| 224 | if (program_type != ProgramType::Compute) { | ||
| 225 | source += fmt::format("#define EMULATION_UBO_BINDING {}\n", base_bindings.cbuf++); | ||
| 226 | } | ||
| 187 | 227 | ||
| 188 | for (const auto& cbuf : entries.const_buffers) { | 228 | for (const auto& cbuf : entries.const_buffers) { |
| 189 | source += | 229 | source += |
| @@ -210,13 +250,16 @@ CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEn | |||
| 210 | source += fmt::format("#define SAMPLER_{}_IS_BUFFER", i); | 250 | source += fmt::format("#define SAMPLER_{}_IS_BUFFER", i); |
| 211 | } | 251 | } |
| 212 | 252 | ||
| 213 | if (program_type == Maxwell::ShaderProgram::Geometry) { | 253 | if (program_type == ProgramType::Geometry) { |
| 214 | const auto [glsl_topology, debug_name, max_vertices] = | 254 | const auto [glsl_topology, debug_name, max_vertices] = |
| 215 | GetPrimitiveDescription(primitive_mode); | 255 | GetPrimitiveDescription(primitive_mode); |
| 216 | 256 | ||
| 217 | source += "layout (" + std::string(glsl_topology) + ") in;\n"; | 257 | source += "layout (" + std::string(glsl_topology) + ") in;\n"; |
| 218 | source += "#define MAX_VERTEX_INPUT " + std::to_string(max_vertices) + '\n'; | 258 | source += "#define MAX_VERTEX_INPUT " + std::to_string(max_vertices) + '\n'; |
| 219 | } | 259 | } |
| 260 | if (program_type == ProgramType::Compute) { | ||
| 261 | source += "layout (local_size_variable) in;\n"; | ||
| 262 | } | ||
| 220 | 263 | ||
| 221 | source += code; | 264 | source += code; |
| 222 | 265 | ||
| @@ -244,7 +287,7 @@ std::set<GLenum> GetSupportedFormats() { | |||
| 244 | 287 | ||
| 245 | } // Anonymous namespace | 288 | } // Anonymous namespace |
| 246 | 289 | ||
| 247 | CachedShader::CachedShader(const ShaderParameters& params, Maxwell::ShaderProgram program_type, | 290 | CachedShader::CachedShader(const ShaderParameters& params, ProgramType program_type, |
| 248 | GLShader::ProgramResult result) | 291 | GLShader::ProgramResult result) |
| 249 | : RasterizerCacheObject{params.host_ptr}, host_ptr{params.host_ptr}, cpu_addr{params.cpu_addr}, | 292 | : RasterizerCacheObject{params.host_ptr}, host_ptr{params.host_ptr}, cpu_addr{params.cpu_addr}, |
| 250 | unique_identifier{params.unique_identifier}, program_type{program_type}, | 293 | unique_identifier{params.unique_identifier}, program_type{program_type}, |
| @@ -257,29 +300,50 @@ Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params, | |||
| 257 | ProgramCode&& program_code_b) { | 300 | ProgramCode&& program_code_b) { |
| 258 | const auto code_size{CalculateProgramSize(program_code)}; | 301 | const auto code_size{CalculateProgramSize(program_code)}; |
| 259 | const auto code_size_b{CalculateProgramSize(program_code_b)}; | 302 | const auto code_size_b{CalculateProgramSize(program_code_b)}; |
| 260 | auto result{CreateProgram(params.device, program_type, program_code, program_code_b)}; | 303 | auto result{ |
| 304 | CreateProgram(params.device, GetProgramType(program_type), program_code, program_code_b)}; | ||
| 261 | if (result.first.empty()) { | 305 | if (result.first.empty()) { |
| 262 | // TODO(Rodrigo): Unimplemented shader stages hit here, avoid using these for now | 306 | // TODO(Rodrigo): Unimplemented shader stages hit here, avoid using these for now |
| 263 | return {}; | 307 | return {}; |
| 264 | } | 308 | } |
| 265 | 309 | ||
| 266 | params.disk_cache.SaveRaw(ShaderDiskCacheRaw( | 310 | params.disk_cache.SaveRaw(ShaderDiskCacheRaw( |
| 267 | params.unique_identifier, program_type, static_cast<u32>(code_size / sizeof(u64)), | 311 | params.unique_identifier, GetProgramType(program_type), |
| 268 | static_cast<u32>(code_size_b / sizeof(u64)), std::move(program_code), | 312 | static_cast<u32>(code_size / sizeof(u64)), static_cast<u32>(code_size_b / sizeof(u64)), |
| 269 | std::move(program_code_b))); | 313 | std::move(program_code), std::move(program_code_b))); |
| 270 | 314 | ||
| 271 | return std::shared_ptr<CachedShader>(new CachedShader(params, program_type, std::move(result))); | 315 | return std::shared_ptr<CachedShader>( |
| 316 | new CachedShader(params, GetProgramType(program_type), std::move(result))); | ||
| 272 | } | 317 | } |
| 273 | 318 | ||
| 274 | Shader CachedShader::CreateStageFromCache(const ShaderParameters& params, | 319 | Shader CachedShader::CreateStageFromCache(const ShaderParameters& params, |
| 275 | Maxwell::ShaderProgram program_type, | 320 | Maxwell::ShaderProgram program_type, |
| 276 | GLShader::ProgramResult result) { | 321 | GLShader::ProgramResult result) { |
| 277 | return std::shared_ptr<CachedShader>(new CachedShader(params, program_type, std::move(result))); | 322 | return std::shared_ptr<CachedShader>( |
| 323 | new CachedShader(params, GetProgramType(program_type), std::move(result))); | ||
| 324 | } | ||
| 325 | |||
| 326 | Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode&& code) { | ||
| 327 | auto result{CreateProgram(params.device, ProgramType::Compute, code, {})}; | ||
| 328 | |||
| 329 | const auto code_size{CalculateProgramSize(code)}; | ||
| 330 | params.disk_cache.SaveRaw(ShaderDiskCacheRaw(params.unique_identifier, ProgramType::Compute, | ||
| 331 | static_cast<u32>(code_size / sizeof(u64)), 0, | ||
| 332 | std::move(code), {})); | ||
| 333 | |||
| 334 | return std::shared_ptr<CachedShader>( | ||
| 335 | new CachedShader(params, ProgramType::Compute, std::move(result))); | ||
| 336 | } | ||
| 337 | |||
| 338 | Shader CachedShader::CreateKernelFromCache(const ShaderParameters& params, | ||
| 339 | GLShader::ProgramResult result) { | ||
| 340 | return std::shared_ptr<CachedShader>( | ||
| 341 | new CachedShader(params, ProgramType::Compute, std::move(result))); | ||
| 278 | } | 342 | } |
| 279 | 343 | ||
| 280 | std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(const ProgramVariant& variant) { | 344 | std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(const ProgramVariant& variant) { |
| 281 | GLuint handle{}; | 345 | GLuint handle{}; |
| 282 | if (program_type == Maxwell::ShaderProgram::Geometry) { | 346 | if (program_type == ProgramType::Geometry) { |
| 283 | handle = GetGeometryShader(variant); | 347 | handle = GetGeometryShader(variant); |
| 284 | } else { | 348 | } else { |
| 285 | const auto [entry, is_cache_miss] = programs.try_emplace(variant); | 349 | const auto [entry, is_cache_miss] = programs.try_emplace(variant); |
| @@ -297,8 +361,11 @@ std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(const ProgramVar | |||
| 297 | handle = program->handle; | 361 | handle = program->handle; |
| 298 | } | 362 | } |
| 299 | 363 | ||
| 300 | auto base_bindings{variant.base_bindings}; | 364 | auto base_bindings = variant.base_bindings; |
| 301 | base_bindings.cbuf += static_cast<u32>(entries.const_buffers.size()) + RESERVED_UBOS; | 365 | base_bindings.cbuf += static_cast<u32>(entries.const_buffers.size()); |
| 366 | if (program_type != ProgramType::Compute) { | ||
| 367 | base_bindings.cbuf += STAGE_RESERVED_UBOS; | ||
| 368 | } | ||
| 302 | base_bindings.gmem += static_cast<u32>(entries.global_memory_entries.size()); | 369 | base_bindings.gmem += static_cast<u32>(entries.global_memory_entries.size()); |
| 303 | base_bindings.sampler += static_cast<u32>(entries.samplers.size()); | 370 | base_bindings.sampler += static_cast<u32>(entries.samplers.size()); |
| 304 | 371 | ||
| @@ -561,7 +628,7 @@ std::unordered_map<u64, UnspecializedShader> ShaderCacheOpenGL::GenerateUnspecia | |||
| 561 | } | 628 | } |
| 562 | 629 | ||
| 563 | Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { | 630 | Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { |
| 564 | if (!system.GPU().Maxwell3D().dirty_flags.shaders) { | 631 | if (!system.GPU().Maxwell3D().dirty.shaders) { |
| 565 | return last_shaders[static_cast<std::size_t>(program)]; | 632 | return last_shaders[static_cast<std::size_t>(program)]; |
| 566 | } | 633 | } |
| 567 | 634 | ||
| @@ -578,13 +645,15 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { | |||
| 578 | // No shader found - create a new one | 645 | // No shader found - create a new one |
| 579 | ProgramCode program_code{GetShaderCode(memory_manager, program_addr, host_ptr)}; | 646 | ProgramCode program_code{GetShaderCode(memory_manager, program_addr, host_ptr)}; |
| 580 | ProgramCode program_code_b; | 647 | ProgramCode program_code_b; |
| 581 | if (program == Maxwell::ShaderProgram::VertexA) { | 648 | const bool is_program_a{program == Maxwell::ShaderProgram::VertexA}; |
| 649 | if (is_program_a) { | ||
| 582 | const GPUVAddr program_addr_b{GetShaderAddress(system, Maxwell::ShaderProgram::VertexB)}; | 650 | const GPUVAddr program_addr_b{GetShaderAddress(system, Maxwell::ShaderProgram::VertexB)}; |
| 583 | program_code_b = GetShaderCode(memory_manager, program_addr_b, | 651 | program_code_b = GetShaderCode(memory_manager, program_addr_b, |
| 584 | memory_manager.GetPointer(program_addr_b)); | 652 | memory_manager.GetPointer(program_addr_b)); |
| 585 | } | 653 | } |
| 586 | 654 | ||
| 587 | const auto unique_identifier = GetUniqueIdentifier(program, program_code, program_code_b); | 655 | const auto unique_identifier = |
| 656 | GetUniqueIdentifier(GetProgramType(program), program_code, program_code_b); | ||
| 588 | const auto cpu_addr{*memory_manager.GpuToCpuAddress(program_addr)}; | 657 | const auto cpu_addr{*memory_manager.GpuToCpuAddress(program_addr)}; |
| 589 | const ShaderParameters params{disk_cache, precompiled_programs, device, cpu_addr, | 658 | const ShaderParameters params{disk_cache, precompiled_programs, device, cpu_addr, |
| 590 | host_ptr, unique_identifier}; | 659 | host_ptr, unique_identifier}; |
| @@ -601,4 +670,30 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { | |||
| 601 | return last_shaders[static_cast<std::size_t>(program)] = shader; | 670 | return last_shaders[static_cast<std::size_t>(program)] = shader; |
| 602 | } | 671 | } |
| 603 | 672 | ||
| 673 | Shader ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) { | ||
| 674 | auto& memory_manager{system.GPU().MemoryManager()}; | ||
| 675 | const auto host_ptr{memory_manager.GetPointer(code_addr)}; | ||
| 676 | auto kernel = TryGet(host_ptr); | ||
| 677 | if (kernel) { | ||
| 678 | return kernel; | ||
| 679 | } | ||
| 680 | |||
| 681 | // No kernel found - create a new one | ||
| 682 | auto code{GetShaderCode(memory_manager, code_addr, host_ptr)}; | ||
| 683 | const auto unique_identifier{GetUniqueIdentifier(ProgramType::Compute, code, {})}; | ||
| 684 | const auto cpu_addr{*memory_manager.GpuToCpuAddress(code_addr)}; | ||
| 685 | const ShaderParameters params{disk_cache, precompiled_programs, device, cpu_addr, | ||
| 686 | host_ptr, unique_identifier}; | ||
| 687 | |||
| 688 | const auto found = precompiled_shaders.find(unique_identifier); | ||
| 689 | if (found == precompiled_shaders.end()) { | ||
| 690 | kernel = CachedShader::CreateKernelFromMemory(params, std::move(code)); | ||
| 691 | } else { | ||
| 692 | kernel = CachedShader::CreateKernelFromCache(params, found->second); | ||
| 693 | } | ||
| 694 | |||
| 695 | Register(kernel); | ||
| 696 | return kernel; | ||
| 697 | } | ||
| 698 | |||
| 604 | } // namespace OpenGL | 699 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index bbb53cdf4..a3106a0ff 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h | |||
| @@ -61,6 +61,11 @@ public: | |||
| 61 | Maxwell::ShaderProgram program_type, | 61 | Maxwell::ShaderProgram program_type, |
| 62 | GLShader::ProgramResult result); | 62 | GLShader::ProgramResult result); |
| 63 | 63 | ||
| 64 | static Shader CreateKernelFromMemory(const ShaderParameters& params, ProgramCode&& code); | ||
| 65 | |||
| 66 | static Shader CreateKernelFromCache(const ShaderParameters& params, | ||
| 67 | GLShader::ProgramResult result); | ||
| 68 | |||
| 64 | VAddr GetCpuAddr() const override { | 69 | VAddr GetCpuAddr() const override { |
| 65 | return cpu_addr; | 70 | return cpu_addr; |
| 66 | } | 71 | } |
| @@ -78,7 +83,7 @@ public: | |||
| 78 | std::tuple<GLuint, BaseBindings> GetProgramHandle(const ProgramVariant& variant); | 83 | std::tuple<GLuint, BaseBindings> GetProgramHandle(const ProgramVariant& variant); |
| 79 | 84 | ||
| 80 | private: | 85 | private: |
| 81 | explicit CachedShader(const ShaderParameters& params, Maxwell::ShaderProgram program_type, | 86 | explicit CachedShader(const ShaderParameters& params, ProgramType program_type, |
| 82 | GLShader::ProgramResult result); | 87 | GLShader::ProgramResult result); |
| 83 | 88 | ||
| 84 | // Geometry programs. These are needed because GLSL needs an input topology but it's not | 89 | // Geometry programs. These are needed because GLSL needs an input topology but it's not |
| @@ -104,7 +109,7 @@ private: | |||
| 104 | u8* host_ptr{}; | 109 | u8* host_ptr{}; |
| 105 | VAddr cpu_addr{}; | 110 | VAddr cpu_addr{}; |
| 106 | u64 unique_identifier{}; | 111 | u64 unique_identifier{}; |
| 107 | Maxwell::ShaderProgram program_type{}; | 112 | ProgramType program_type{}; |
| 108 | ShaderDiskCacheOpenGL& disk_cache; | 113 | ShaderDiskCacheOpenGL& disk_cache; |
| 109 | const PrecompiledPrograms& precompiled_programs; | 114 | const PrecompiledPrograms& precompiled_programs; |
| 110 | 115 | ||
| @@ -132,6 +137,9 @@ public: | |||
| 132 | /// Gets the current specified shader stage program | 137 | /// Gets the current specified shader stage program |
| 133 | Shader GetStageProgram(Maxwell::ShaderProgram program); | 138 | Shader GetStageProgram(Maxwell::ShaderProgram program); |
| 134 | 139 | ||
| 140 | /// Gets a compute kernel in the passed address | ||
| 141 | Shader GetComputeKernel(GPUVAddr code_addr); | ||
| 142 | |||
| 135 | protected: | 143 | protected: |
| 136 | // We do not have to flush this cache as things in it are never modified by us. | 144 | // We do not have to flush this cache as things in it are never modified by us. |
| 137 | void FlushObjectInner(const Shader& object) override {} | 145 | void FlushObjectInner(const Shader& object) override {} |
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 5f2f1510c..ffe26b241 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp | |||
| @@ -14,6 +14,7 @@ | |||
| 14 | #include "common/alignment.h" | 14 | #include "common/alignment.h" |
| 15 | #include "common/assert.h" | 15 | #include "common/assert.h" |
| 16 | #include "common/common_types.h" | 16 | #include "common/common_types.h" |
| 17 | #include "common/logging/log.h" | ||
| 17 | #include "video_core/engines/maxwell_3d.h" | 18 | #include "video_core/engines/maxwell_3d.h" |
| 18 | #include "video_core/renderer_opengl/gl_device.h" | 19 | #include "video_core/renderer_opengl/gl_device.h" |
| 19 | #include "video_core/renderer_opengl/gl_rasterizer.h" | 20 | #include "video_core/renderer_opengl/gl_rasterizer.h" |
| @@ -36,7 +37,6 @@ using namespace std::string_literals; | |||
| 36 | using namespace VideoCommon::Shader; | 37 | using namespace VideoCommon::Shader; |
| 37 | 38 | ||
| 38 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | 39 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; |
| 39 | using ShaderStage = Tegra::Engines::Maxwell3D::Regs::ShaderStage; | ||
| 40 | using Operation = const OperationNode&; | 40 | using Operation = const OperationNode&; |
| 41 | 41 | ||
| 42 | enum class Type { Bool, Bool2, Float, Int, Uint, HalfFloat }; | 42 | enum class Type { Bool, Bool2, Float, Int, Uint, HalfFloat }; |
| @@ -46,7 +46,7 @@ using TextureArgument = std::pair<Type, Node>; | |||
| 46 | using TextureIR = std::variant<TextureAoffi, TextureArgument>; | 46 | using TextureIR = std::variant<TextureAoffi, TextureArgument>; |
| 47 | 47 | ||
| 48 | constexpr u32 MAX_CONSTBUFFER_ELEMENTS = | 48 | constexpr u32 MAX_CONSTBUFFER_ELEMENTS = |
| 49 | static_cast<u32>(RasterizerOpenGL::MaxConstbufferSize) / (4 * sizeof(float)); | 49 | static_cast<u32>(Maxwell::MaxConstBufferSize) / (4 * sizeof(float)); |
| 50 | 50 | ||
| 51 | class ShaderWriter { | 51 | class ShaderWriter { |
| 52 | public: | 52 | public: |
| @@ -161,9 +161,13 @@ std::string FlowStackTopName(MetaStackClass stack) { | |||
| 161 | return fmt::format("{}_flow_stack_top", GetFlowStackPrefix(stack)); | 161 | return fmt::format("{}_flow_stack_top", GetFlowStackPrefix(stack)); |
| 162 | } | 162 | } |
| 163 | 163 | ||
| 164 | constexpr bool IsVertexShader(ProgramType stage) { | ||
| 165 | return stage == ProgramType::VertexA || stage == ProgramType::VertexB; | ||
| 166 | } | ||
| 167 | |||
| 164 | class GLSLDecompiler final { | 168 | class GLSLDecompiler final { |
| 165 | public: | 169 | public: |
| 166 | explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, ShaderStage stage, | 170 | explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, ProgramType stage, |
| 167 | std::string suffix) | 171 | std::string suffix) |
| 168 | : device{device}, ir{ir}, stage{stage}, suffix{suffix}, header{ir.GetHeader()} {} | 172 | : device{device}, ir{ir}, stage{stage}, suffix{suffix}, header{ir.GetHeader()} {} |
| 169 | 173 | ||
| @@ -191,10 +195,12 @@ public: | |||
| 191 | 195 | ||
| 192 | // TODO(Subv): Figure out the actual depth of the flow stack, for now it seems | 196 | // TODO(Subv): Figure out the actual depth of the flow stack, for now it seems |
| 193 | // unlikely that shaders will use 20 nested SSYs and PBKs. | 197 | // unlikely that shaders will use 20 nested SSYs and PBKs. |
| 194 | constexpr u32 FLOW_STACK_SIZE = 20; | 198 | if (!ir.IsFlowStackDisabled()) { |
| 195 | for (const auto stack : std::array{MetaStackClass::Ssy, MetaStackClass::Pbk}) { | 199 | constexpr u32 FLOW_STACK_SIZE = 20; |
| 196 | code.AddLine("uint {}[{}];", FlowStackName(stack), FLOW_STACK_SIZE); | 200 | for (const auto stack : std::array{MetaStackClass::Ssy, MetaStackClass::Pbk}) { |
| 197 | code.AddLine("uint {} = 0u;", FlowStackTopName(stack)); | 201 | code.AddLine("uint {}[{}];", FlowStackName(stack), FLOW_STACK_SIZE); |
| 202 | code.AddLine("uint {} = 0u;", FlowStackTopName(stack)); | ||
| 203 | } | ||
| 198 | } | 204 | } |
| 199 | 205 | ||
| 200 | code.AddLine("while (true) {{"); | 206 | code.AddLine("while (true) {{"); |
| @@ -244,24 +250,22 @@ public: | |||
| 244 | usage.is_read, usage.is_written); | 250 | usage.is_read, usage.is_written); |
| 245 | } | 251 | } |
| 246 | entries.clip_distances = ir.GetClipDistances(); | 252 | entries.clip_distances = ir.GetClipDistances(); |
| 253 | entries.shader_viewport_layer_array = | ||
| 254 | IsVertexShader(stage) && (ir.UsesLayer() || ir.UsesViewportIndex()); | ||
| 247 | entries.shader_length = ir.GetLength(); | 255 | entries.shader_length = ir.GetLength(); |
| 248 | return entries; | 256 | return entries; |
| 249 | } | 257 | } |
| 250 | 258 | ||
| 251 | private: | 259 | private: |
| 252 | using OperationDecompilerFn = std::string (GLSLDecompiler::*)(Operation); | ||
| 253 | using OperationDecompilersArray = | ||
| 254 | std::array<OperationDecompilerFn, static_cast<std::size_t>(OperationCode::Amount)>; | ||
| 255 | |||
| 256 | void DeclareVertex() { | 260 | void DeclareVertex() { |
| 257 | if (stage != ShaderStage::Vertex) | 261 | if (!IsVertexShader(stage)) |
| 258 | return; | 262 | return; |
| 259 | 263 | ||
| 260 | DeclareVertexRedeclarations(); | 264 | DeclareVertexRedeclarations(); |
| 261 | } | 265 | } |
| 262 | 266 | ||
| 263 | void DeclareGeometry() { | 267 | void DeclareGeometry() { |
| 264 | if (stage != ShaderStage::Geometry) { | 268 | if (stage != ProgramType::Geometry) { |
| 265 | return; | 269 | return; |
| 266 | } | 270 | } |
| 267 | 271 | ||
| @@ -280,22 +284,35 @@ private: | |||
| 280 | } | 284 | } |
| 281 | 285 | ||
| 282 | void DeclareVertexRedeclarations() { | 286 | void DeclareVertexRedeclarations() { |
| 283 | bool clip_distances_declared = false; | ||
| 284 | |||
| 285 | code.AddLine("out gl_PerVertex {{"); | 287 | code.AddLine("out gl_PerVertex {{"); |
| 286 | ++code.scope; | 288 | ++code.scope; |
| 287 | 289 | ||
| 288 | code.AddLine("vec4 gl_Position;"); | 290 | code.AddLine("vec4 gl_Position;"); |
| 289 | 291 | ||
| 290 | for (const auto o : ir.GetOutputAttributes()) { | 292 | for (const auto attribute : ir.GetOutputAttributes()) { |
| 291 | if (o == Attribute::Index::PointSize) | 293 | if (attribute == Attribute::Index::ClipDistances0123 || |
| 292 | code.AddLine("float gl_PointSize;"); | 294 | attribute == Attribute::Index::ClipDistances4567) { |
| 293 | if (!clip_distances_declared && (o == Attribute::Index::ClipDistances0123 || | ||
| 294 | o == Attribute::Index::ClipDistances4567)) { | ||
| 295 | code.AddLine("float gl_ClipDistance[];"); | 295 | code.AddLine("float gl_ClipDistance[];"); |
| 296 | clip_distances_declared = true; | 296 | break; |
| 297 | } | 297 | } |
| 298 | } | 298 | } |
| 299 | if (!IsVertexShader(stage) || device.HasVertexViewportLayer()) { | ||
| 300 | if (ir.UsesLayer()) { | ||
| 301 | code.AddLine("int gl_Layer;"); | ||
| 302 | } | ||
| 303 | if (ir.UsesViewportIndex()) { | ||
| 304 | code.AddLine("int gl_ViewportIndex;"); | ||
| 305 | } | ||
| 306 | } else if ((ir.UsesLayer() || ir.UsesViewportIndex()) && IsVertexShader(stage) && | ||
| 307 | !device.HasVertexViewportLayer()) { | ||
| 308 | LOG_ERROR( | ||
| 309 | Render_OpenGL, | ||
| 310 | "GL_ARB_shader_viewport_layer_array is not available and its required by a shader"); | ||
| 311 | } | ||
| 312 | |||
| 313 | if (ir.UsesPointSize()) { | ||
| 314 | code.AddLine("float gl_PointSize;"); | ||
| 315 | } | ||
| 299 | 316 | ||
| 300 | --code.scope; | 317 | --code.scope; |
| 301 | code.AddLine("}};"); | 318 | code.AddLine("}};"); |
| @@ -323,11 +340,16 @@ private: | |||
| 323 | } | 340 | } |
| 324 | 341 | ||
| 325 | void DeclareLocalMemory() { | 342 | void DeclareLocalMemory() { |
| 326 | if (const u64 local_memory_size = header.GetLocalMemorySize(); local_memory_size > 0) { | 343 | // TODO(Rodrigo): Unstub kernel local memory size and pass it from a register at |
| 327 | const auto element_count = Common::AlignUp(local_memory_size, 4) / 4; | 344 | // specialization time. |
| 328 | code.AddLine("float {}[{}];", GetLocalMemory(), element_count); | 345 | const u64 local_memory_size = |
| 329 | code.AddNewLine(); | 346 | stage == ProgramType::Compute ? 0x400 : header.GetLocalMemorySize(); |
| 347 | if (local_memory_size == 0) { | ||
| 348 | return; | ||
| 330 | } | 349 | } |
| 350 | const auto element_count = Common::AlignUp(local_memory_size, 4) / 4; | ||
| 351 | code.AddLine("float {}[{}];", GetLocalMemory(), element_count); | ||
| 352 | code.AddNewLine(); | ||
| 331 | } | 353 | } |
| 332 | 354 | ||
| 333 | void DeclareInternalFlags() { | 355 | void DeclareInternalFlags() { |
| @@ -381,12 +403,12 @@ private: | |||
| 381 | const u32 location{GetGenericAttributeIndex(index)}; | 403 | const u32 location{GetGenericAttributeIndex(index)}; |
| 382 | 404 | ||
| 383 | std::string name{GetInputAttribute(index)}; | 405 | std::string name{GetInputAttribute(index)}; |
| 384 | if (stage == ShaderStage::Geometry) { | 406 | if (stage == ProgramType::Geometry) { |
| 385 | name = "gs_" + name + "[]"; | 407 | name = "gs_" + name + "[]"; |
| 386 | } | 408 | } |
| 387 | 409 | ||
| 388 | std::string suffix; | 410 | std::string suffix; |
| 389 | if (stage == ShaderStage::Fragment) { | 411 | if (stage == ProgramType::Fragment) { |
| 390 | const auto input_mode{header.ps.GetAttributeUse(location)}; | 412 | const auto input_mode{header.ps.GetAttributeUse(location)}; |
| 391 | if (skip_unused && input_mode == AttributeUse::Unused) { | 413 | if (skip_unused && input_mode == AttributeUse::Unused) { |
| 392 | return; | 414 | return; |
| @@ -398,7 +420,7 @@ private: | |||
| 398 | } | 420 | } |
| 399 | 421 | ||
| 400 | void DeclareOutputAttributes() { | 422 | void DeclareOutputAttributes() { |
| 401 | if (ir.HasPhysicalAttributes() && stage != ShaderStage::Fragment) { | 423 | if (ir.HasPhysicalAttributes() && stage != ProgramType::Fragment) { |
| 402 | for (u32 i = 0; i < GetNumPhysicalVaryings(); ++i) { | 424 | for (u32 i = 0; i < GetNumPhysicalVaryings(); ++i) { |
| 403 | DeclareOutputAttribute(ToGenericAttribute(i)); | 425 | DeclareOutputAttribute(ToGenericAttribute(i)); |
| 404 | } | 426 | } |
| @@ -520,7 +542,7 @@ private: | |||
| 520 | constexpr u32 element_stride{4}; | 542 | constexpr u32 element_stride{4}; |
| 521 | const u32 address{generic_base + index * generic_stride + element * element_stride}; | 543 | const u32 address{generic_base + index * generic_stride + element * element_stride}; |
| 522 | 544 | ||
| 523 | const bool declared{stage != ShaderStage::Fragment || | 545 | const bool declared{stage != ProgramType::Fragment || |
| 524 | header.ps.GetAttributeUse(index) != AttributeUse::Unused}; | 546 | header.ps.GetAttributeUse(index) != AttributeUse::Unused}; |
| 525 | const std::string value{declared ? ReadAttribute(attribute, element) : "0"}; | 547 | const std::string value{declared ? ReadAttribute(attribute, element) : "0"}; |
| 526 | code.AddLine("case 0x{:x}: return {};", address, value); | 548 | code.AddLine("case 0x{:x}: return {};", address, value); |
| @@ -624,7 +646,7 @@ private: | |||
| 624 | } | 646 | } |
| 625 | 647 | ||
| 626 | if (const auto abuf = std::get_if<AbufNode>(&*node)) { | 648 | if (const auto abuf = std::get_if<AbufNode>(&*node)) { |
| 627 | UNIMPLEMENTED_IF_MSG(abuf->IsPhysicalBuffer() && stage == ShaderStage::Geometry, | 649 | UNIMPLEMENTED_IF_MSG(abuf->IsPhysicalBuffer() && stage == ProgramType::Geometry, |
| 628 | "Physical attributes in geometry shaders are not implemented"); | 650 | "Physical attributes in geometry shaders are not implemented"); |
| 629 | if (abuf->IsPhysicalBuffer()) { | 651 | if (abuf->IsPhysicalBuffer()) { |
| 630 | return fmt::format("readPhysicalAttribute(ftou({}))", | 652 | return fmt::format("readPhysicalAttribute(ftou({}))", |
| @@ -679,6 +701,9 @@ private: | |||
| 679 | } | 701 | } |
| 680 | 702 | ||
| 681 | if (const auto lmem = std::get_if<LmemNode>(&*node)) { | 703 | if (const auto lmem = std::get_if<LmemNode>(&*node)) { |
| 704 | if (stage == ProgramType::Compute) { | ||
| 705 | LOG_WARNING(Render_OpenGL, "Local memory is stubbed on compute shaders"); | ||
| 706 | } | ||
| 682 | return fmt::format("{}[ftou({}) / 4]", GetLocalMemory(), Visit(lmem->GetAddress())); | 707 | return fmt::format("{}[ftou({}) / 4]", GetLocalMemory(), Visit(lmem->GetAddress())); |
| 683 | } | 708 | } |
| 684 | 709 | ||
| @@ -708,7 +733,7 @@ private: | |||
| 708 | 733 | ||
| 709 | std::string ReadAttribute(Attribute::Index attribute, u32 element, const Node& buffer = {}) { | 734 | std::string ReadAttribute(Attribute::Index attribute, u32 element, const Node& buffer = {}) { |
| 710 | const auto GeometryPass = [&](std::string_view name) { | 735 | const auto GeometryPass = [&](std::string_view name) { |
| 711 | if (stage == ShaderStage::Geometry && buffer) { | 736 | if (stage == ProgramType::Geometry && buffer) { |
| 712 | // TODO(Rodrigo): Guard geometry inputs against out of bound reads. Some games | 737 | // TODO(Rodrigo): Guard geometry inputs against out of bound reads. Some games |
| 713 | // set an 0x80000000 index for those and the shader fails to build. Find out why | 738 | // set an 0x80000000 index for those and the shader fails to build. Find out why |
| 714 | // this happens and what's its intent. | 739 | // this happens and what's its intent. |
| @@ -720,10 +745,10 @@ private: | |||
| 720 | switch (attribute) { | 745 | switch (attribute) { |
| 721 | case Attribute::Index::Position: | 746 | case Attribute::Index::Position: |
| 722 | switch (stage) { | 747 | switch (stage) { |
| 723 | case ShaderStage::Geometry: | 748 | case ProgramType::Geometry: |
| 724 | return fmt::format("gl_in[ftou({})].gl_Position{}", Visit(buffer), | 749 | return fmt::format("gl_in[ftou({})].gl_Position{}", Visit(buffer), |
| 725 | GetSwizzle(element)); | 750 | GetSwizzle(element)); |
| 726 | case ShaderStage::Fragment: | 751 | case ProgramType::Fragment: |
| 727 | return element == 3 ? "1.0f" : ("gl_FragCoord"s + GetSwizzle(element)); | 752 | return element == 3 ? "1.0f" : ("gl_FragCoord"s + GetSwizzle(element)); |
| 728 | default: | 753 | default: |
| 729 | UNREACHABLE(); | 754 | UNREACHABLE(); |
| @@ -744,7 +769,7 @@ private: | |||
| 744 | // TODO(Subv): Find out what the values are for the first two elements when inside a | 769 | // TODO(Subv): Find out what the values are for the first two elements when inside a |
| 745 | // vertex shader, and what's the value of the fourth element when inside a Tess Eval | 770 | // vertex shader, and what's the value of the fourth element when inside a Tess Eval |
| 746 | // shader. | 771 | // shader. |
| 747 | ASSERT(stage == ShaderStage::Vertex); | 772 | ASSERT(IsVertexShader(stage)); |
| 748 | switch (element) { | 773 | switch (element) { |
| 749 | case 2: | 774 | case 2: |
| 750 | // Config pack's first value is instance_id. | 775 | // Config pack's first value is instance_id. |
| @@ -756,7 +781,7 @@ private: | |||
| 756 | return "0"; | 781 | return "0"; |
| 757 | case Attribute::Index::FrontFacing: | 782 | case Attribute::Index::FrontFacing: |
| 758 | // TODO(Subv): Find out what the values are for the other elements. | 783 | // TODO(Subv): Find out what the values are for the other elements. |
| 759 | ASSERT(stage == ShaderStage::Fragment); | 784 | ASSERT(stage == ProgramType::Fragment); |
| 760 | switch (element) { | 785 | switch (element) { |
| 761 | case 3: | 786 | case 3: |
| 762 | return "itof(gl_FrontFacing ? -1 : 0)"; | 787 | return "itof(gl_FrontFacing ? -1 : 0)"; |
| @@ -778,7 +803,7 @@ private: | |||
| 778 | return value; | 803 | return value; |
| 779 | } | 804 | } |
| 780 | // There's a bug in NVidia's proprietary drivers that makes precise fail on fragment shaders | 805 | // There's a bug in NVidia's proprietary drivers that makes precise fail on fragment shaders |
| 781 | const std::string precise = stage != ShaderStage::Fragment ? "precise " : ""; | 806 | const std::string precise = stage != ProgramType::Fragment ? "precise " : ""; |
| 782 | 807 | ||
| 783 | const std::string temporary = code.GenerateTemporary(); | 808 | const std::string temporary = code.GenerateTemporary(); |
| 784 | code.AddLine("{}float {} = {};", precise, temporary, value); | 809 | code.AddLine("{}float {} = {};", precise, temporary, value); |
| @@ -803,6 +828,45 @@ private: | |||
| 803 | return CastOperand(VisitOperand(operation, operand_index), type); | 828 | return CastOperand(VisitOperand(operation, operand_index), type); |
| 804 | } | 829 | } |
| 805 | 830 | ||
| 831 | std::optional<std::pair<std::string, bool>> GetOutputAttribute(const AbufNode* abuf) { | ||
| 832 | switch (const auto attribute = abuf->GetIndex()) { | ||
| 833 | case Attribute::Index::Position: | ||
| 834 | return std::make_pair("gl_Position"s + GetSwizzle(abuf->GetElement()), false); | ||
| 835 | case Attribute::Index::LayerViewportPointSize: | ||
| 836 | switch (abuf->GetElement()) { | ||
| 837 | case 0: | ||
| 838 | UNIMPLEMENTED(); | ||
| 839 | return {}; | ||
| 840 | case 1: | ||
| 841 | if (IsVertexShader(stage) && !device.HasVertexViewportLayer()) { | ||
| 842 | return {}; | ||
| 843 | } | ||
| 844 | return std::make_pair("gl_Layer", true); | ||
| 845 | case 2: | ||
| 846 | if (IsVertexShader(stage) && !device.HasVertexViewportLayer()) { | ||
| 847 | return {}; | ||
| 848 | } | ||
| 849 | return std::make_pair("gl_ViewportIndex", true); | ||
| 850 | case 3: | ||
| 851 | UNIMPLEMENTED_MSG("Requires some state changes for gl_PointSize to work in shader"); | ||
| 852 | return std::make_pair("gl_PointSize", false); | ||
| 853 | } | ||
| 854 | return {}; | ||
| 855 | case Attribute::Index::ClipDistances0123: | ||
| 856 | return std::make_pair(fmt::format("gl_ClipDistance[{}]", abuf->GetElement()), false); | ||
| 857 | case Attribute::Index::ClipDistances4567: | ||
| 858 | return std::make_pair(fmt::format("gl_ClipDistance[{}]", abuf->GetElement() + 4), | ||
| 859 | false); | ||
| 860 | default: | ||
| 861 | if (IsGenericAttribute(attribute)) { | ||
| 862 | return std::make_pair( | ||
| 863 | GetOutputAttribute(attribute) + GetSwizzle(abuf->GetElement()), false); | ||
| 864 | } | ||
| 865 | UNIMPLEMENTED_MSG("Unhandled output attribute: {}", static_cast<u32>(attribute)); | ||
| 866 | return {}; | ||
| 867 | } | ||
| 868 | } | ||
| 869 | |||
| 806 | std::string CastOperand(const std::string& value, Type type) const { | 870 | std::string CastOperand(const std::string& value, Type type) const { |
| 807 | switch (type) { | 871 | switch (type) { |
| 808 | case Type::Bool: | 872 | case Type::Bool: |
| @@ -999,6 +1063,8 @@ private: | |||
| 999 | const Node& src = operation[1]; | 1063 | const Node& src = operation[1]; |
| 1000 | 1064 | ||
| 1001 | std::string target; | 1065 | std::string target; |
| 1066 | bool is_integer = false; | ||
| 1067 | |||
| 1002 | if (const auto gpr = std::get_if<GprNode>(&*dest)) { | 1068 | if (const auto gpr = std::get_if<GprNode>(&*dest)) { |
| 1003 | if (gpr->GetIndex() == Register::ZeroIndex) { | 1069 | if (gpr->GetIndex() == Register::ZeroIndex) { |
| 1004 | // Writing to Register::ZeroIndex is a no op | 1070 | // Writing to Register::ZeroIndex is a no op |
| @@ -1007,27 +1073,16 @@ private: | |||
| 1007 | target = GetRegister(gpr->GetIndex()); | 1073 | target = GetRegister(gpr->GetIndex()); |
| 1008 | } else if (const auto abuf = std::get_if<AbufNode>(&*dest)) { | 1074 | } else if (const auto abuf = std::get_if<AbufNode>(&*dest)) { |
| 1009 | UNIMPLEMENTED_IF(abuf->IsPhysicalBuffer()); | 1075 | UNIMPLEMENTED_IF(abuf->IsPhysicalBuffer()); |
| 1010 | 1076 | const auto result = GetOutputAttribute(abuf); | |
| 1011 | target = [&]() -> std::string { | 1077 | if (!result) { |
| 1012 | switch (const auto attribute = abuf->GetIndex(); abuf->GetIndex()) { | 1078 | return {}; |
| 1013 | case Attribute::Index::Position: | 1079 | } |
| 1014 | return "gl_Position"s + GetSwizzle(abuf->GetElement()); | 1080 | target = result->first; |
| 1015 | case Attribute::Index::PointSize: | 1081 | is_integer = result->second; |
| 1016 | return "gl_PointSize"; | ||
| 1017 | case Attribute::Index::ClipDistances0123: | ||
| 1018 | return fmt::format("gl_ClipDistance[{}]", abuf->GetElement()); | ||
| 1019 | case Attribute::Index::ClipDistances4567: | ||
| 1020 | return fmt::format("gl_ClipDistance[{}]", abuf->GetElement() + 4); | ||
| 1021 | default: | ||
| 1022 | if (IsGenericAttribute(attribute)) { | ||
| 1023 | return GetOutputAttribute(attribute) + GetSwizzle(abuf->GetElement()); | ||
| 1024 | } | ||
| 1025 | UNIMPLEMENTED_MSG("Unhandled output attribute: {}", | ||
| 1026 | static_cast<u32>(attribute)); | ||
| 1027 | return "0"; | ||
| 1028 | } | ||
| 1029 | }(); | ||
| 1030 | } else if (const auto lmem = std::get_if<LmemNode>(&*dest)) { | 1082 | } else if (const auto lmem = std::get_if<LmemNode>(&*dest)) { |
| 1083 | if (stage == ProgramType::Compute) { | ||
| 1084 | LOG_WARNING(Render_OpenGL, "Local memory is stubbed on compute shaders"); | ||
| 1085 | } | ||
| 1031 | target = fmt::format("{}[ftou({}) / 4]", GetLocalMemory(), Visit(lmem->GetAddress())); | 1086 | target = fmt::format("{}[ftou({}) / 4]", GetLocalMemory(), Visit(lmem->GetAddress())); |
| 1032 | } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) { | 1087 | } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) { |
| 1033 | const std::string real = Visit(gmem->GetRealAddress()); | 1088 | const std::string real = Visit(gmem->GetRealAddress()); |
| @@ -1038,7 +1093,11 @@ private: | |||
| 1038 | UNREACHABLE_MSG("Assign called without a proper target"); | 1093 | UNREACHABLE_MSG("Assign called without a proper target"); |
| 1039 | } | 1094 | } |
| 1040 | 1095 | ||
| 1041 | code.AddLine("{} = {};", target, Visit(src)); | 1096 | if (is_integer) { |
| 1097 | code.AddLine("{} = ftoi({});", target, Visit(src)); | ||
| 1098 | } else { | ||
| 1099 | code.AddLine("{} = {};", target, Visit(src)); | ||
| 1100 | } | ||
| 1042 | return {}; | 1101 | return {}; |
| 1043 | } | 1102 | } |
| 1044 | 1103 | ||
| @@ -1351,14 +1410,10 @@ private: | |||
| 1351 | return fmt::format("{}[{}]", pair, VisitOperand(operation, 1, Type::Uint)); | 1410 | return fmt::format("{}[{}]", pair, VisitOperand(operation, 1, Type::Uint)); |
| 1352 | } | 1411 | } |
| 1353 | 1412 | ||
| 1354 | std::string LogicalAll2(Operation operation) { | 1413 | std::string LogicalAnd2(Operation operation) { |
| 1355 | return GenerateUnary(operation, "all", Type::Bool, Type::Bool2); | 1414 | return GenerateUnary(operation, "all", Type::Bool, Type::Bool2); |
| 1356 | } | 1415 | } |
| 1357 | 1416 | ||
| 1358 | std::string LogicalAny2(Operation operation) { | ||
| 1359 | return GenerateUnary(operation, "any", Type::Bool, Type::Bool2); | ||
| 1360 | } | ||
| 1361 | |||
| 1362 | template <bool with_nan> | 1417 | template <bool with_nan> |
| 1363 | std::string GenerateHalfComparison(Operation operation, const std::string& compare_op) { | 1418 | std::string GenerateHalfComparison(Operation operation, const std::string& compare_op) { |
| 1364 | const std::string comparison{GenerateBinaryCall(operation, compare_op, Type::Bool2, | 1419 | const std::string comparison{GenerateBinaryCall(operation, compare_op, Type::Bool2, |
| @@ -1555,6 +1610,14 @@ private: | |||
| 1555 | return {}; | 1610 | return {}; |
| 1556 | } | 1611 | } |
| 1557 | 1612 | ||
| 1613 | std::string BranchIndirect(Operation operation) { | ||
| 1614 | const std::string op_a = VisitOperand(operation, 0, Type::Uint); | ||
| 1615 | |||
| 1616 | code.AddLine("jmp_to = {};", op_a); | ||
| 1617 | code.AddLine("break;"); | ||
| 1618 | return {}; | ||
| 1619 | } | ||
| 1620 | |||
| 1558 | std::string PushFlowStack(Operation operation) { | 1621 | std::string PushFlowStack(Operation operation) { |
| 1559 | const auto stack = std::get<MetaStackClass>(operation.GetMeta()); | 1622 | const auto stack = std::get<MetaStackClass>(operation.GetMeta()); |
| 1560 | const auto target = std::get_if<ImmediateNode>(&*operation[0]); | 1623 | const auto target = std::get_if<ImmediateNode>(&*operation[0]); |
| @@ -1573,7 +1636,7 @@ private: | |||
| 1573 | } | 1636 | } |
| 1574 | 1637 | ||
| 1575 | std::string Exit(Operation operation) { | 1638 | std::string Exit(Operation operation) { |
| 1576 | if (stage != ShaderStage::Fragment) { | 1639 | if (stage != ProgramType::Fragment) { |
| 1577 | code.AddLine("return;"); | 1640 | code.AddLine("return;"); |
| 1578 | return {}; | 1641 | return {}; |
| 1579 | } | 1642 | } |
| @@ -1624,7 +1687,7 @@ private: | |||
| 1624 | } | 1687 | } |
| 1625 | 1688 | ||
| 1626 | std::string EmitVertex(Operation operation) { | 1689 | std::string EmitVertex(Operation operation) { |
| 1627 | ASSERT_MSG(stage == ShaderStage::Geometry, | 1690 | ASSERT_MSG(stage == ProgramType::Geometry, |
| 1628 | "EmitVertex is expected to be used in a geometry shader."); | 1691 | "EmitVertex is expected to be used in a geometry shader."); |
| 1629 | 1692 | ||
| 1630 | // If a geometry shader is attached, it will always flip (it's the last stage before | 1693 | // If a geometry shader is attached, it will always flip (it's the last stage before |
| @@ -1635,7 +1698,7 @@ private: | |||
| 1635 | } | 1698 | } |
| 1636 | 1699 | ||
| 1637 | std::string EndPrimitive(Operation operation) { | 1700 | std::string EndPrimitive(Operation operation) { |
| 1638 | ASSERT_MSG(stage == ShaderStage::Geometry, | 1701 | ASSERT_MSG(stage == ProgramType::Geometry, |
| 1639 | "EndPrimitive is expected to be used in a geometry shader."); | 1702 | "EndPrimitive is expected to be used in a geometry shader."); |
| 1640 | 1703 | ||
| 1641 | code.AddLine("EndPrimitive();"); | 1704 | code.AddLine("EndPrimitive();"); |
| @@ -1657,7 +1720,7 @@ private: | |||
| 1657 | return "utof(gl_WorkGroupID"s + GetSwizzle(element) + ')'; | 1720 | return "utof(gl_WorkGroupID"s + GetSwizzle(element) + ')'; |
| 1658 | } | 1721 | } |
| 1659 | 1722 | ||
| 1660 | static constexpr OperationDecompilersArray operation_decompilers = { | 1723 | static constexpr std::array operation_decompilers = { |
| 1661 | &GLSLDecompiler::Assign, | 1724 | &GLSLDecompiler::Assign, |
| 1662 | 1725 | ||
| 1663 | &GLSLDecompiler::Select, | 1726 | &GLSLDecompiler::Select, |
| @@ -1741,8 +1804,7 @@ private: | |||
| 1741 | &GLSLDecompiler::LogicalXor, | 1804 | &GLSLDecompiler::LogicalXor, |
| 1742 | &GLSLDecompiler::LogicalNegate, | 1805 | &GLSLDecompiler::LogicalNegate, |
| 1743 | &GLSLDecompiler::LogicalPick2, | 1806 | &GLSLDecompiler::LogicalPick2, |
| 1744 | &GLSLDecompiler::LogicalAll2, | 1807 | &GLSLDecompiler::LogicalAnd2, |
| 1745 | &GLSLDecompiler::LogicalAny2, | ||
| 1746 | 1808 | ||
| 1747 | &GLSLDecompiler::LogicalLessThan<Type::Float>, | 1809 | &GLSLDecompiler::LogicalLessThan<Type::Float>, |
| 1748 | &GLSLDecompiler::LogicalEqual<Type::Float>, | 1810 | &GLSLDecompiler::LogicalEqual<Type::Float>, |
| @@ -1789,6 +1851,7 @@ private: | |||
| 1789 | &GLSLDecompiler::ImageStore, | 1851 | &GLSLDecompiler::ImageStore, |
| 1790 | 1852 | ||
| 1791 | &GLSLDecompiler::Branch, | 1853 | &GLSLDecompiler::Branch, |
| 1854 | &GLSLDecompiler::BranchIndirect, | ||
| 1792 | &GLSLDecompiler::PushFlowStack, | 1855 | &GLSLDecompiler::PushFlowStack, |
| 1793 | &GLSLDecompiler::PopFlowStack, | 1856 | &GLSLDecompiler::PopFlowStack, |
| 1794 | &GLSLDecompiler::Exit, | 1857 | &GLSLDecompiler::Exit, |
| @@ -1805,6 +1868,7 @@ private: | |||
| 1805 | &GLSLDecompiler::WorkGroupId<1>, | 1868 | &GLSLDecompiler::WorkGroupId<1>, |
| 1806 | &GLSLDecompiler::WorkGroupId<2>, | 1869 | &GLSLDecompiler::WorkGroupId<2>, |
| 1807 | }; | 1870 | }; |
| 1871 | static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount)); | ||
| 1808 | 1872 | ||
| 1809 | std::string GetRegister(u32 index) const { | 1873 | std::string GetRegister(u32 index) const { |
| 1810 | return GetDeclarationWithSuffix(index, "gpr"); | 1874 | return GetDeclarationWithSuffix(index, "gpr"); |
| @@ -1869,7 +1933,7 @@ private: | |||
| 1869 | } | 1933 | } |
| 1870 | 1934 | ||
| 1871 | u32 GetNumPhysicalInputAttributes() const { | 1935 | u32 GetNumPhysicalInputAttributes() const { |
| 1872 | return stage == ShaderStage::Vertex ? GetNumPhysicalAttributes() : GetNumPhysicalVaryings(); | 1936 | return IsVertexShader(stage) ? GetNumPhysicalAttributes() : GetNumPhysicalVaryings(); |
| 1873 | } | 1937 | } |
| 1874 | 1938 | ||
| 1875 | u32 GetNumPhysicalAttributes() const { | 1939 | u32 GetNumPhysicalAttributes() const { |
| @@ -1882,7 +1946,7 @@ private: | |||
| 1882 | 1946 | ||
| 1883 | const Device& device; | 1947 | const Device& device; |
| 1884 | const ShaderIR& ir; | 1948 | const ShaderIR& ir; |
| 1885 | const ShaderStage stage; | 1949 | const ProgramType stage; |
| 1886 | const std::string suffix; | 1950 | const std::string suffix; |
| 1887 | const Header header; | 1951 | const Header header; |
| 1888 | 1952 | ||
| @@ -1913,7 +1977,7 @@ std::string GetCommonDeclarations() { | |||
| 1913 | MAX_CONSTBUFFER_ELEMENTS); | 1977 | MAX_CONSTBUFFER_ELEMENTS); |
| 1914 | } | 1978 | } |
| 1915 | 1979 | ||
| 1916 | ProgramResult Decompile(const Device& device, const ShaderIR& ir, Maxwell::ShaderStage stage, | 1980 | ProgramResult Decompile(const Device& device, const ShaderIR& ir, ProgramType stage, |
| 1917 | const std::string& suffix) { | 1981 | const std::string& suffix) { |
| 1918 | GLSLDecompiler decompiler(device, ir, stage, suffix); | 1982 | GLSLDecompiler decompiler(device, ir, stage, suffix); |
| 1919 | decompiler.Decompile(); | 1983 | decompiler.Decompile(); |
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h index 14d11c7fc..2ea02f5bf 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.h +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h | |||
| @@ -12,14 +12,26 @@ | |||
| 12 | #include "video_core/engines/maxwell_3d.h" | 12 | #include "video_core/engines/maxwell_3d.h" |
| 13 | #include "video_core/shader/shader_ir.h" | 13 | #include "video_core/shader/shader_ir.h" |
| 14 | 14 | ||
| 15 | namespace OpenGL { | ||
| 16 | class Device; | ||
| 17 | } | ||
| 18 | |||
| 19 | namespace VideoCommon::Shader { | 15 | namespace VideoCommon::Shader { |
| 20 | class ShaderIR; | 16 | class ShaderIR; |
| 21 | } | 17 | } |
| 22 | 18 | ||
| 19 | namespace OpenGL { | ||
| 20 | |||
| 21 | class Device; | ||
| 22 | |||
| 23 | enum class ProgramType : u32 { | ||
| 24 | VertexA = 0, | ||
| 25 | VertexB = 1, | ||
| 26 | TessellationControl = 2, | ||
| 27 | TessellationEval = 3, | ||
| 28 | Geometry = 4, | ||
| 29 | Fragment = 5, | ||
| 30 | Compute = 6 | ||
| 31 | }; | ||
| 32 | |||
| 33 | } // namespace OpenGL | ||
| 34 | |||
| 23 | namespace OpenGL::GLShader { | 35 | namespace OpenGL::GLShader { |
| 24 | 36 | ||
| 25 | struct ShaderEntries; | 37 | struct ShaderEntries; |
| @@ -78,12 +90,13 @@ struct ShaderEntries { | |||
| 78 | std::vector<ImageEntry> images; | 90 | std::vector<ImageEntry> images; |
| 79 | std::vector<GlobalMemoryEntry> global_memory_entries; | 91 | std::vector<GlobalMemoryEntry> global_memory_entries; |
| 80 | std::array<bool, Maxwell::NumClipDistances> clip_distances{}; | 92 | std::array<bool, Maxwell::NumClipDistances> clip_distances{}; |
| 93 | bool shader_viewport_layer_array{}; | ||
| 81 | std::size_t shader_length{}; | 94 | std::size_t shader_length{}; |
| 82 | }; | 95 | }; |
| 83 | 96 | ||
| 84 | std::string GetCommonDeclarations(); | 97 | std::string GetCommonDeclarations(); |
| 85 | 98 | ||
| 86 | ProgramResult Decompile(const Device& device, const VideoCommon::Shader::ShaderIR& ir, | 99 | ProgramResult Decompile(const Device& device, const VideoCommon::Shader::ShaderIR& ir, |
| 87 | Maxwell::ShaderStage stage, const std::string& suffix); | 100 | ProgramType stage, const std::string& suffix); |
| 88 | 101 | ||
| 89 | } // namespace OpenGL::GLShader | 102 | } // namespace OpenGL::GLShader |
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp index 10688397b..969fe9ced 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp | |||
| @@ -51,7 +51,7 @@ ShaderCacheVersionHash GetShaderCacheVersionHash() { | |||
| 51 | 51 | ||
| 52 | } // namespace | 52 | } // namespace |
| 53 | 53 | ||
| 54 | ShaderDiskCacheRaw::ShaderDiskCacheRaw(u64 unique_identifier, Maxwell::ShaderProgram program_type, | 54 | ShaderDiskCacheRaw::ShaderDiskCacheRaw(u64 unique_identifier, ProgramType program_type, |
| 55 | u32 program_code_size, u32 program_code_size_b, | 55 | u32 program_code_size, u32 program_code_size_b, |
| 56 | ProgramCode program_code, ProgramCode program_code_b) | 56 | ProgramCode program_code, ProgramCode program_code_b) |
| 57 | : unique_identifier{unique_identifier}, program_type{program_type}, | 57 | : unique_identifier{unique_identifier}, program_type{program_type}, |
| @@ -373,6 +373,12 @@ std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEn | |||
| 373 | } | 373 | } |
| 374 | } | 374 | } |
| 375 | 375 | ||
| 376 | bool shader_viewport_layer_array{}; | ||
| 377 | if (!LoadObjectFromPrecompiled(shader_viewport_layer_array)) { | ||
| 378 | return {}; | ||
| 379 | } | ||
| 380 | entry.entries.shader_viewport_layer_array = shader_viewport_layer_array; | ||
| 381 | |||
| 376 | u64 shader_length{}; | 382 | u64 shader_length{}; |
| 377 | if (!LoadObjectFromPrecompiled(shader_length)) { | 383 | if (!LoadObjectFromPrecompiled(shader_length)) { |
| 378 | return {}; | 384 | return {}; |
| @@ -445,6 +451,10 @@ bool ShaderDiskCacheOpenGL::SaveDecompiledFile(u64 unique_identifier, const std: | |||
| 445 | } | 451 | } |
| 446 | } | 452 | } |
| 447 | 453 | ||
| 454 | if (!SaveObjectToPrecompiled(entries.shader_viewport_layer_array)) { | ||
| 455 | return false; | ||
| 456 | } | ||
| 457 | |||
| 448 | if (!SaveObjectToPrecompiled(static_cast<u64>(entries.shader_length))) { | 458 | if (!SaveObjectToPrecompiled(static_cast<u64>(entries.shader_length))) { |
| 449 | return false; | 459 | return false; |
| 450 | } | 460 | } |
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h index 4f296dda6..cc8bbd61e 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.h | |||
| @@ -18,7 +18,6 @@ | |||
| 18 | #include "common/assert.h" | 18 | #include "common/assert.h" |
| 19 | #include "common/common_types.h" | 19 | #include "common/common_types.h" |
| 20 | #include "core/file_sys/vfs_vector.h" | 20 | #include "core/file_sys/vfs_vector.h" |
| 21 | #include "video_core/engines/maxwell_3d.h" | ||
| 22 | #include "video_core/renderer_opengl/gl_shader_gen.h" | 21 | #include "video_core/renderer_opengl/gl_shader_gen.h" |
| 23 | 22 | ||
| 24 | namespace Core { | 23 | namespace Core { |
| @@ -34,14 +33,11 @@ namespace OpenGL { | |||
| 34 | struct ShaderDiskCacheUsage; | 33 | struct ShaderDiskCacheUsage; |
| 35 | struct ShaderDiskCacheDump; | 34 | struct ShaderDiskCacheDump; |
| 36 | 35 | ||
| 37 | using ShaderDumpsMap = std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>; | ||
| 38 | |||
| 39 | using ProgramCode = std::vector<u64>; | 36 | using ProgramCode = std::vector<u64>; |
| 40 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | 37 | using ShaderDumpsMap = std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>; |
| 41 | |||
| 42 | using TextureBufferUsage = std::bitset<64>; | 38 | using TextureBufferUsage = std::bitset<64>; |
| 43 | 39 | ||
| 44 | /// Allocated bindings used by an OpenGL shader program. | 40 | /// Allocated bindings used by an OpenGL shader program |
| 45 | struct BaseBindings { | 41 | struct BaseBindings { |
| 46 | u32 cbuf{}; | 42 | u32 cbuf{}; |
| 47 | u32 gmem{}; | 43 | u32 gmem{}; |
| @@ -126,7 +122,7 @@ namespace OpenGL { | |||
| 126 | /// Describes a shader how it's used by the guest GPU | 122 | /// Describes a shader how it's used by the guest GPU |
| 127 | class ShaderDiskCacheRaw { | 123 | class ShaderDiskCacheRaw { |
| 128 | public: | 124 | public: |
| 129 | explicit ShaderDiskCacheRaw(u64 unique_identifier, Maxwell::ShaderProgram program_type, | 125 | explicit ShaderDiskCacheRaw(u64 unique_identifier, ProgramType program_type, |
| 130 | u32 program_code_size, u32 program_code_size_b, | 126 | u32 program_code_size, u32 program_code_size_b, |
| 131 | ProgramCode program_code, ProgramCode program_code_b); | 127 | ProgramCode program_code, ProgramCode program_code_b); |
| 132 | ShaderDiskCacheRaw(); | 128 | ShaderDiskCacheRaw(); |
| @@ -141,30 +137,13 @@ public: | |||
| 141 | } | 137 | } |
| 142 | 138 | ||
| 143 | bool HasProgramA() const { | 139 | bool HasProgramA() const { |
| 144 | return program_type == Maxwell::ShaderProgram::VertexA; | 140 | return program_type == ProgramType::VertexA; |
| 145 | } | 141 | } |
| 146 | 142 | ||
| 147 | Maxwell::ShaderProgram GetProgramType() const { | 143 | ProgramType GetProgramType() const { |
| 148 | return program_type; | 144 | return program_type; |
| 149 | } | 145 | } |
| 150 | 146 | ||
| 151 | Maxwell::ShaderStage GetProgramStage() const { | ||
| 152 | switch (program_type) { | ||
| 153 | case Maxwell::ShaderProgram::VertexA: | ||
| 154 | case Maxwell::ShaderProgram::VertexB: | ||
| 155 | return Maxwell::ShaderStage::Vertex; | ||
| 156 | case Maxwell::ShaderProgram::TesselationControl: | ||
| 157 | return Maxwell::ShaderStage::TesselationControl; | ||
| 158 | case Maxwell::ShaderProgram::TesselationEval: | ||
| 159 | return Maxwell::ShaderStage::TesselationEval; | ||
| 160 | case Maxwell::ShaderProgram::Geometry: | ||
| 161 | return Maxwell::ShaderStage::Geometry; | ||
| 162 | case Maxwell::ShaderProgram::Fragment: | ||
| 163 | return Maxwell::ShaderStage::Fragment; | ||
| 164 | } | ||
| 165 | UNREACHABLE(); | ||
| 166 | } | ||
| 167 | |||
| 168 | const ProgramCode& GetProgramCode() const { | 147 | const ProgramCode& GetProgramCode() const { |
| 169 | return program_code; | 148 | return program_code; |
| 170 | } | 149 | } |
| @@ -175,7 +154,7 @@ public: | |||
| 175 | 154 | ||
| 176 | private: | 155 | private: |
| 177 | u64 unique_identifier{}; | 156 | u64 unique_identifier{}; |
| 178 | Maxwell::ShaderProgram program_type{}; | 157 | ProgramType program_type{}; |
| 179 | u32 program_code_size{}; | 158 | u32 program_code_size{}; |
| 180 | u32 program_code_size_b{}; | 159 | u32 program_code_size_b{}; |
| 181 | 160 | ||
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index 9148629ec..3a8d9e1da 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp | |||
| @@ -14,7 +14,8 @@ using Tegra::Engines::Maxwell3D; | |||
| 14 | using VideoCommon::Shader::ProgramCode; | 14 | using VideoCommon::Shader::ProgramCode; |
| 15 | using VideoCommon::Shader::ShaderIR; | 15 | using VideoCommon::Shader::ShaderIR; |
| 16 | 16 | ||
| 17 | static constexpr u32 PROGRAM_OFFSET{10}; | 17 | static constexpr u32 PROGRAM_OFFSET = 10; |
| 18 | static constexpr u32 COMPUTE_OFFSET = 0; | ||
| 18 | 19 | ||
| 19 | ProgramResult GenerateVertexShader(const Device& device, const ShaderSetup& setup) { | 20 | ProgramResult GenerateVertexShader(const Device& device, const ShaderSetup& setup) { |
| 20 | const std::string id = fmt::format("{:016x}", setup.program.unique_identifier); | 21 | const std::string id = fmt::format("{:016x}", setup.program.unique_identifier); |
| @@ -29,17 +30,15 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform vs_config { | |||
| 29 | }; | 30 | }; |
| 30 | 31 | ||
| 31 | )"; | 32 | )"; |
| 32 | const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET); | ||
| 33 | ProgramResult program = | ||
| 34 | Decompile(device, program_ir, Maxwell3D::Regs::ShaderStage::Vertex, "vertex"); | ||
| 35 | 33 | ||
| 34 | const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a); | ||
| 35 | const auto stage = setup.IsDualProgram() ? ProgramType::VertexA : ProgramType::VertexB; | ||
| 36 | ProgramResult program = Decompile(device, program_ir, stage, "vertex"); | ||
| 36 | out += program.first; | 37 | out += program.first; |
| 37 | 38 | ||
| 38 | if (setup.IsDualProgram()) { | 39 | if (setup.IsDualProgram()) { |
| 39 | const ShaderIR program_ir_b(setup.program.code_b, PROGRAM_OFFSET); | 40 | const ShaderIR program_ir_b(setup.program.code_b, PROGRAM_OFFSET, setup.program.size_b); |
| 40 | ProgramResult program_b = | 41 | ProgramResult program_b = Decompile(device, program_ir_b, ProgramType::VertexB, "vertex_b"); |
| 41 | Decompile(device, program_ir_b, Maxwell3D::Regs::ShaderStage::Vertex, "vertex_b"); | ||
| 42 | |||
| 43 | out += program_b.first; | 42 | out += program_b.first; |
| 44 | } | 43 | } |
| 45 | 44 | ||
| @@ -80,9 +79,9 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform gs_config { | |||
| 80 | }; | 79 | }; |
| 81 | 80 | ||
| 82 | )"; | 81 | )"; |
| 83 | const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET); | 82 | |
| 84 | ProgramResult program = | 83 | const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a); |
| 85 | Decompile(device, program_ir, Maxwell3D::Regs::ShaderStage::Geometry, "geometry"); | 84 | ProgramResult program = Decompile(device, program_ir, ProgramType::Geometry, "geometry"); |
| 86 | out += program.first; | 85 | out += program.first; |
| 87 | 86 | ||
| 88 | out += R"( | 87 | out += R"( |
| @@ -115,10 +114,8 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform fs_config { | |||
| 115 | }; | 114 | }; |
| 116 | 115 | ||
| 117 | )"; | 116 | )"; |
| 118 | const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET); | 117 | const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a); |
| 119 | ProgramResult program = | 118 | ProgramResult program = Decompile(device, program_ir, ProgramType::Fragment, "fragment"); |
| 120 | Decompile(device, program_ir, Maxwell3D::Regs::ShaderStage::Fragment, "fragment"); | ||
| 121 | |||
| 122 | out += program.first; | 119 | out += program.first; |
| 123 | 120 | ||
| 124 | out += R"( | 121 | out += R"( |
| @@ -130,4 +127,22 @@ void main() { | |||
| 130 | return {std::move(out), std::move(program.second)}; | 127 | return {std::move(out), std::move(program.second)}; |
| 131 | } | 128 | } |
| 132 | 129 | ||
| 130 | ProgramResult GenerateComputeShader(const Device& device, const ShaderSetup& setup) { | ||
| 131 | const std::string id = fmt::format("{:016x}", setup.program.unique_identifier); | ||
| 132 | |||
| 133 | std::string out = "// Shader Unique Id: CS" + id + "\n\n"; | ||
| 134 | out += GetCommonDeclarations(); | ||
| 135 | |||
| 136 | const ShaderIR program_ir(setup.program.code, COMPUTE_OFFSET, setup.program.size_a); | ||
| 137 | ProgramResult program = Decompile(device, program_ir, ProgramType::Compute, "compute"); | ||
| 138 | out += program.first; | ||
| 139 | |||
| 140 | out += R"( | ||
| 141 | void main() { | ||
| 142 | execute_compute(); | ||
| 143 | } | ||
| 144 | )"; | ||
| 145 | return {std::move(out), std::move(program.second)}; | ||
| 146 | } | ||
| 147 | |||
| 133 | } // namespace OpenGL::GLShader | 148 | } // namespace OpenGL::GLShader |
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h index 0536c8a03..3833e88ab 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.h +++ b/src/video_core/renderer_opengl/gl_shader_gen.h | |||
| @@ -27,6 +27,8 @@ struct ShaderSetup { | |||
| 27 | ProgramCode code; | 27 | ProgramCode code; |
| 28 | ProgramCode code_b; // Used for dual vertex shaders | 28 | ProgramCode code_b; // Used for dual vertex shaders |
| 29 | u64 unique_identifier; | 29 | u64 unique_identifier; |
| 30 | std::size_t size_a; | ||
| 31 | std::size_t size_b; | ||
| 30 | } program; | 32 | } program; |
| 31 | 33 | ||
| 32 | /// Used in scenarios where we have a dual vertex shaders | 34 | /// Used in scenarios where we have a dual vertex shaders |
| @@ -52,4 +54,7 @@ ProgramResult GenerateGeometryShader(const Device& device, const ShaderSetup& se | |||
| 52 | /// Generates the GLSL fragment shader program source code for the given FS program | 54 | /// Generates the GLSL fragment shader program source code for the given FS program |
| 53 | ProgramResult GenerateFragmentShader(const Device& device, const ShaderSetup& setup); | 55 | ProgramResult GenerateFragmentShader(const Device& device, const ShaderSetup& setup); |
| 54 | 56 | ||
| 57 | /// Generates the GLSL compute shader program source code for the given CS program | ||
| 58 | ProgramResult GenerateComputeShader(const Device& device, const ShaderSetup& setup); | ||
| 59 | |||
| 55 | } // namespace OpenGL::GLShader | 60 | } // namespace OpenGL::GLShader |
diff --git a/src/video_core/renderer_opengl/gl_shader_util.cpp b/src/video_core/renderer_opengl/gl_shader_util.cpp index 5f3fe067e..9e74eda0d 100644 --- a/src/video_core/renderer_opengl/gl_shader_util.cpp +++ b/src/video_core/renderer_opengl/gl_shader_util.cpp | |||
| @@ -10,21 +10,25 @@ | |||
| 10 | 10 | ||
| 11 | namespace OpenGL::GLShader { | 11 | namespace OpenGL::GLShader { |
| 12 | 12 | ||
| 13 | GLuint LoadShader(const char* source, GLenum type) { | 13 | namespace { |
| 14 | const char* debug_type; | 14 | const char* GetStageDebugName(GLenum type) { |
| 15 | switch (type) { | 15 | switch (type) { |
| 16 | case GL_VERTEX_SHADER: | 16 | case GL_VERTEX_SHADER: |
| 17 | debug_type = "vertex"; | 17 | return "vertex"; |
| 18 | break; | ||
| 19 | case GL_GEOMETRY_SHADER: | 18 | case GL_GEOMETRY_SHADER: |
| 20 | debug_type = "geometry"; | 19 | return "geometry"; |
| 21 | break; | ||
| 22 | case GL_FRAGMENT_SHADER: | 20 | case GL_FRAGMENT_SHADER: |
| 23 | debug_type = "fragment"; | 21 | return "fragment"; |
| 24 | break; | 22 | case GL_COMPUTE_SHADER: |
| 25 | default: | 23 | return "compute"; |
| 26 | UNREACHABLE(); | ||
| 27 | } | 24 | } |
| 25 | UNIMPLEMENTED(); | ||
| 26 | return "unknown"; | ||
| 27 | } | ||
| 28 | } // Anonymous namespace | ||
| 29 | |||
| 30 | GLuint LoadShader(const char* source, GLenum type) { | ||
| 31 | const char* debug_type = GetStageDebugName(type); | ||
| 28 | const GLuint shader_id = glCreateShader(type); | 32 | const GLuint shader_id = glCreateShader(type); |
| 29 | glShaderSource(shader_id, 1, &source, nullptr); | 33 | glShaderSource(shader_id, 1, &source, nullptr); |
| 30 | LOG_DEBUG(Render_OpenGL, "Compiling {} shader...", debug_type); | 34 | LOG_DEBUG(Render_OpenGL, "Compiling {} shader...", debug_type); |
diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp index d86e137ac..f4777d0b0 100644 --- a/src/video_core/renderer_opengl/gl_state.cpp +++ b/src/video_core/renderer_opengl/gl_state.cpp | |||
| @@ -6,8 +6,11 @@ | |||
| 6 | #include <glad/glad.h> | 6 | #include <glad/glad.h> |
| 7 | #include "common/assert.h" | 7 | #include "common/assert.h" |
| 8 | #include "common/logging/log.h" | 8 | #include "common/logging/log.h" |
| 9 | #include "common/microprofile.h" | ||
| 9 | #include "video_core/renderer_opengl/gl_state.h" | 10 | #include "video_core/renderer_opengl/gl_state.h" |
| 10 | 11 | ||
| 12 | MICROPROFILE_DEFINE(OpenGL_State, "OpenGL", "State Change", MP_RGB(192, 128, 128)); | ||
| 13 | |||
| 11 | namespace OpenGL { | 14 | namespace OpenGL { |
| 12 | 15 | ||
| 13 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | 16 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; |
| @@ -162,6 +165,25 @@ OpenGLState::OpenGLState() { | |||
| 162 | alpha_test.ref = 0.0f; | 165 | alpha_test.ref = 0.0f; |
| 163 | } | 166 | } |
| 164 | 167 | ||
| 168 | void OpenGLState::SetDefaultViewports() { | ||
| 169 | for (auto& item : viewports) { | ||
| 170 | item.x = 0; | ||
| 171 | item.y = 0; | ||
| 172 | item.width = 0; | ||
| 173 | item.height = 0; | ||
| 174 | item.depth_range_near = 0.0f; | ||
| 175 | item.depth_range_far = 1.0f; | ||
| 176 | item.scissor.enabled = false; | ||
| 177 | item.scissor.x = 0; | ||
| 178 | item.scissor.y = 0; | ||
| 179 | item.scissor.width = 0; | ||
| 180 | item.scissor.height = 0; | ||
| 181 | } | ||
| 182 | |||
| 183 | depth_clamp.far_plane = false; | ||
| 184 | depth_clamp.near_plane = false; | ||
| 185 | } | ||
| 186 | |||
| 165 | void OpenGLState::ApplyDefaultState() { | 187 | void OpenGLState::ApplyDefaultState() { |
| 166 | glEnable(GL_BLEND); | 188 | glEnable(GL_BLEND); |
| 167 | glDisable(GL_FRAMEBUFFER_SRGB); | 189 | glDisable(GL_FRAMEBUFFER_SRGB); |
| @@ -523,7 +545,8 @@ void OpenGLState::ApplySamplers() const { | |||
| 523 | } | 545 | } |
| 524 | } | 546 | } |
| 525 | 547 | ||
| 526 | void OpenGLState::Apply() const { | 548 | void OpenGLState::Apply() { |
| 549 | MICROPROFILE_SCOPE(OpenGL_State); | ||
| 527 | ApplyFramebufferState(); | 550 | ApplyFramebufferState(); |
| 528 | ApplyVertexArrayState(); | 551 | ApplyVertexArrayState(); |
| 529 | ApplyShaderProgram(); | 552 | ApplyShaderProgram(); |
| @@ -532,19 +555,31 @@ void OpenGLState::Apply() const { | |||
| 532 | ApplyPointSize(); | 555 | ApplyPointSize(); |
| 533 | ApplyFragmentColorClamp(); | 556 | ApplyFragmentColorClamp(); |
| 534 | ApplyMultisample(); | 557 | ApplyMultisample(); |
| 558 | if (dirty.color_mask) { | ||
| 559 | ApplyColorMask(); | ||
| 560 | dirty.color_mask = false; | ||
| 561 | } | ||
| 535 | ApplyDepthClamp(); | 562 | ApplyDepthClamp(); |
| 536 | ApplyColorMask(); | ||
| 537 | ApplyViewport(); | 563 | ApplyViewport(); |
| 538 | ApplyStencilTest(); | 564 | if (dirty.stencil_state) { |
| 565 | ApplyStencilTest(); | ||
| 566 | dirty.stencil_state = false; | ||
| 567 | } | ||
| 539 | ApplySRgb(); | 568 | ApplySRgb(); |
| 540 | ApplyCulling(); | 569 | ApplyCulling(); |
| 541 | ApplyDepth(); | 570 | ApplyDepth(); |
| 542 | ApplyPrimitiveRestart(); | 571 | ApplyPrimitiveRestart(); |
| 543 | ApplyBlending(); | 572 | if (dirty.blend_state) { |
| 573 | ApplyBlending(); | ||
| 574 | dirty.blend_state = false; | ||
| 575 | } | ||
| 544 | ApplyLogicOp(); | 576 | ApplyLogicOp(); |
| 545 | ApplyTextures(); | 577 | ApplyTextures(); |
| 546 | ApplySamplers(); | 578 | ApplySamplers(); |
| 547 | ApplyPolygonOffset(); | 579 | if (dirty.polygon_offset) { |
| 580 | ApplyPolygonOffset(); | ||
| 581 | dirty.polygon_offset = false; | ||
| 582 | } | ||
| 548 | ApplyAlphaTest(); | 583 | ApplyAlphaTest(); |
| 549 | } | 584 | } |
| 550 | 585 | ||
diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h index b0140495d..fdf9a8a12 100644 --- a/src/video_core/renderer_opengl/gl_state.h +++ b/src/video_core/renderer_opengl/gl_state.h | |||
| @@ -195,8 +195,9 @@ public: | |||
| 195 | s_rgb_used = false; | 195 | s_rgb_used = false; |
| 196 | } | 196 | } |
| 197 | 197 | ||
| 198 | void SetDefaultViewports(); | ||
| 198 | /// Apply this state as the current OpenGL state | 199 | /// Apply this state as the current OpenGL state |
| 199 | void Apply() const; | 200 | void Apply(); |
| 200 | 201 | ||
| 201 | void ApplyFramebufferState() const; | 202 | void ApplyFramebufferState() const; |
| 202 | void ApplyVertexArrayState() const; | 203 | void ApplyVertexArrayState() const; |
| @@ -237,11 +238,41 @@ public: | |||
| 237 | /// Viewport does not affects glClearBuffer so emulate viewport using scissor test | 238 | /// Viewport does not affects glClearBuffer so emulate viewport using scissor test |
| 238 | void EmulateViewportWithScissor(); | 239 | void EmulateViewportWithScissor(); |
| 239 | 240 | ||
| 241 | void MarkDirtyBlendState() { | ||
| 242 | dirty.blend_state = true; | ||
| 243 | } | ||
| 244 | |||
| 245 | void MarkDirtyStencilState() { | ||
| 246 | dirty.stencil_state = true; | ||
| 247 | } | ||
| 248 | |||
| 249 | void MarkDirtyPolygonOffset() { | ||
| 250 | dirty.polygon_offset = true; | ||
| 251 | } | ||
| 252 | |||
| 253 | void MarkDirtyColorMask() { | ||
| 254 | dirty.color_mask = true; | ||
| 255 | } | ||
| 256 | |||
| 257 | void AllDirty() { | ||
| 258 | dirty.blend_state = true; | ||
| 259 | dirty.stencil_state = true; | ||
| 260 | dirty.polygon_offset = true; | ||
| 261 | dirty.color_mask = true; | ||
| 262 | } | ||
| 263 | |||
| 240 | private: | 264 | private: |
| 241 | static OpenGLState cur_state; | 265 | static OpenGLState cur_state; |
| 242 | 266 | ||
| 243 | // Workaround for sRGB problems caused by QT not supporting srgb output | 267 | // Workaround for sRGB problems caused by QT not supporting srgb output |
| 244 | static bool s_rgb_used; | 268 | static bool s_rgb_used; |
| 269 | struct { | ||
| 270 | bool blend_state; | ||
| 271 | bool stencil_state; | ||
| 272 | bool viewport_state; | ||
| 273 | bool polygon_offset; | ||
| 274 | bool color_mask; | ||
| 275 | } dirty{}; | ||
| 245 | }; | 276 | }; |
| 246 | 277 | ||
| 247 | } // namespace OpenGL | 278 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 08ae1a429..408332f90 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp | |||
| @@ -31,6 +31,8 @@ using VideoCore::Surface::SurfaceType; | |||
| 31 | 31 | ||
| 32 | MICROPROFILE_DEFINE(OpenGL_Texture_Upload, "OpenGL", "Texture Upload", MP_RGB(128, 192, 128)); | 32 | MICROPROFILE_DEFINE(OpenGL_Texture_Upload, "OpenGL", "Texture Upload", MP_RGB(128, 192, 128)); |
| 33 | MICROPROFILE_DEFINE(OpenGL_Texture_Download, "OpenGL", "Texture Download", MP_RGB(128, 192, 128)); | 33 | MICROPROFILE_DEFINE(OpenGL_Texture_Download, "OpenGL", "Texture Download", MP_RGB(128, 192, 128)); |
| 34 | MICROPROFILE_DEFINE(OpenGL_Texture_Buffer_Copy, "OpenGL", "Texture Buffer Copy", | ||
| 35 | MP_RGB(128, 192, 128)); | ||
| 34 | 36 | ||
| 35 | namespace { | 37 | namespace { |
| 36 | 38 | ||
| @@ -135,7 +137,6 @@ constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format | |||
| 135 | const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType component_type) { | 137 | const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType component_type) { |
| 136 | ASSERT(static_cast<std::size_t>(pixel_format) < tex_format_tuples.size()); | 138 | ASSERT(static_cast<std::size_t>(pixel_format) < tex_format_tuples.size()); |
| 137 | const auto& format{tex_format_tuples[static_cast<std::size_t>(pixel_format)]}; | 139 | const auto& format{tex_format_tuples[static_cast<std::size_t>(pixel_format)]}; |
| 138 | ASSERT(component_type == format.component_type); | ||
| 139 | return format; | 140 | return format; |
| 140 | } | 141 | } |
| 141 | 142 | ||
| @@ -483,11 +484,15 @@ void TextureCacheOpenGL::ImageBlit(View& src_view, View& dst_view, | |||
| 483 | const auto& dst_params{dst_view->GetSurfaceParams()}; | 484 | const auto& dst_params{dst_view->GetSurfaceParams()}; |
| 484 | 485 | ||
| 485 | OpenGLState prev_state{OpenGLState::GetCurState()}; | 486 | OpenGLState prev_state{OpenGLState::GetCurState()}; |
| 486 | SCOPE_EXIT({ prev_state.Apply(); }); | 487 | SCOPE_EXIT({ |
| 488 | prev_state.AllDirty(); | ||
| 489 | prev_state.Apply(); | ||
| 490 | }); | ||
| 487 | 491 | ||
| 488 | OpenGLState state; | 492 | OpenGLState state; |
| 489 | state.draw.read_framebuffer = src_framebuffer.handle; | 493 | state.draw.read_framebuffer = src_framebuffer.handle; |
| 490 | state.draw.draw_framebuffer = dst_framebuffer.handle; | 494 | state.draw.draw_framebuffer = dst_framebuffer.handle; |
| 495 | state.AllDirty(); | ||
| 491 | state.Apply(); | 496 | state.Apply(); |
| 492 | 497 | ||
| 493 | u32 buffers{}; | 498 | u32 buffers{}; |
| @@ -535,6 +540,7 @@ void TextureCacheOpenGL::ImageBlit(View& src_view, View& dst_view, | |||
| 535 | } | 540 | } |
| 536 | 541 | ||
| 537 | void TextureCacheOpenGL::BufferCopy(Surface& src_surface, Surface& dst_surface) { | 542 | void TextureCacheOpenGL::BufferCopy(Surface& src_surface, Surface& dst_surface) { |
| 543 | MICROPROFILE_SCOPE(OpenGL_Texture_Buffer_Copy); | ||
| 538 | const auto& src_params = src_surface->GetSurfaceParams(); | 544 | const auto& src_params = src_surface->GetSurfaceParams(); |
| 539 | const auto& dst_params = dst_surface->GetSurfaceParams(); | 545 | const auto& dst_params = dst_surface->GetSurfaceParams(); |
| 540 | UNIMPLEMENTED_IF(src_params.num_levels > 1 || dst_params.num_levels > 1); | 546 | UNIMPLEMENTED_IF(src_params.num_levels > 1 || dst_params.num_levels > 1); |
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index b142521ec..a05cef3b9 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp | |||
| @@ -101,7 +101,6 @@ RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::Syst | |||
| 101 | 101 | ||
| 102 | RendererOpenGL::~RendererOpenGL() = default; | 102 | RendererOpenGL::~RendererOpenGL() = default; |
| 103 | 103 | ||
| 104 | /// Swap buffers (render frame) | ||
| 105 | void RendererOpenGL::SwapBuffers( | 104 | void RendererOpenGL::SwapBuffers( |
| 106 | std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) { | 105 | std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) { |
| 107 | 106 | ||
| @@ -109,6 +108,7 @@ void RendererOpenGL::SwapBuffers( | |||
| 109 | 108 | ||
| 110 | // Maintain the rasterizer's state as a priority | 109 | // Maintain the rasterizer's state as a priority |
| 111 | OpenGLState prev_state = OpenGLState::GetCurState(); | 110 | OpenGLState prev_state = OpenGLState::GetCurState(); |
| 111 | state.AllDirty(); | ||
| 112 | state.Apply(); | 112 | state.Apply(); |
| 113 | 113 | ||
| 114 | if (framebuffer) { | 114 | if (framebuffer) { |
| @@ -130,6 +130,8 @@ void RendererOpenGL::SwapBuffers( | |||
| 130 | 130 | ||
| 131 | DrawScreen(render_window.GetFramebufferLayout()); | 131 | DrawScreen(render_window.GetFramebufferLayout()); |
| 132 | 132 | ||
| 133 | rasterizer->TickFrame(); | ||
| 134 | |||
| 133 | render_window.SwapBuffers(); | 135 | render_window.SwapBuffers(); |
| 134 | } | 136 | } |
| 135 | 137 | ||
| @@ -139,6 +141,7 @@ void RendererOpenGL::SwapBuffers( | |||
| 139 | system.GetPerfStats().BeginSystemFrame(); | 141 | system.GetPerfStats().BeginSystemFrame(); |
| 140 | 142 | ||
| 141 | // Restore the rasterizer state | 143 | // Restore the rasterizer state |
| 144 | prev_state.AllDirty(); | ||
| 142 | prev_state.Apply(); | 145 | prev_state.Apply(); |
| 143 | } | 146 | } |
| 144 | 147 | ||
| @@ -205,6 +208,7 @@ void RendererOpenGL::InitOpenGLObjects() { | |||
| 205 | // Link shaders and get variable locations | 208 | // Link shaders and get variable locations |
| 206 | shader.CreateFromSource(vertex_shader, nullptr, fragment_shader); | 209 | shader.CreateFromSource(vertex_shader, nullptr, fragment_shader); |
| 207 | state.draw.shader_program = shader.handle; | 210 | state.draw.shader_program = shader.handle; |
| 211 | state.AllDirty(); | ||
| 208 | state.Apply(); | 212 | state.Apply(); |
| 209 | uniform_modelview_matrix = glGetUniformLocation(shader.handle, "modelview_matrix"); | 213 | uniform_modelview_matrix = glGetUniformLocation(shader.handle, "modelview_matrix"); |
| 210 | uniform_color_texture = glGetUniformLocation(shader.handle, "color_texture"); | 214 | uniform_color_texture = glGetUniformLocation(shader.handle, "color_texture"); |
| @@ -262,7 +266,6 @@ void RendererOpenGL::CreateRasterizer() { | |||
| 262 | if (rasterizer) { | 266 | if (rasterizer) { |
| 263 | return; | 267 | return; |
| 264 | } | 268 | } |
| 265 | // Initialize sRGB Usage | ||
| 266 | OpenGLState::ClearsRGBUsed(); | 269 | OpenGLState::ClearsRGBUsed(); |
| 267 | rasterizer = std::make_unique<RasterizerOpenGL>(system, emu_window, screen_info); | 270 | rasterizer = std::make_unique<RasterizerOpenGL>(system, emu_window, screen_info); |
| 268 | } | 271 | } |
| @@ -338,12 +341,14 @@ void RendererOpenGL::DrawScreenTriangles(const ScreenInfo& screen_info, float x, | |||
| 338 | // Workaround brigthness problems in SMO by enabling sRGB in the final output | 341 | // Workaround brigthness problems in SMO by enabling sRGB in the final output |
| 339 | // if it has been used in the frame. Needed because of this bug in QT: QTBUG-50987 | 342 | // if it has been used in the frame. Needed because of this bug in QT: QTBUG-50987 |
| 340 | state.framebuffer_srgb.enabled = OpenGLState::GetsRGBUsed(); | 343 | state.framebuffer_srgb.enabled = OpenGLState::GetsRGBUsed(); |
| 344 | state.AllDirty(); | ||
| 341 | state.Apply(); | 345 | state.Apply(); |
| 342 | glNamedBufferSubData(vertex_buffer.handle, 0, sizeof(vertices), vertices.data()); | 346 | glNamedBufferSubData(vertex_buffer.handle, 0, sizeof(vertices), vertices.data()); |
| 343 | glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); | 347 | glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); |
| 344 | // Restore default state | 348 | // Restore default state |
| 345 | state.framebuffer_srgb.enabled = false; | 349 | state.framebuffer_srgb.enabled = false; |
| 346 | state.texture_units[0].texture = 0; | 350 | state.texture_units[0].texture = 0; |
| 351 | state.AllDirty(); | ||
| 347 | state.Apply(); | 352 | state.Apply(); |
| 348 | // Clear sRGB state for the next frame | 353 | // Clear sRGB state for the next frame |
| 349 | OpenGLState::ClearsRGBUsed(); | 354 | OpenGLState::ClearsRGBUsed(); |
| @@ -388,6 +393,7 @@ void RendererOpenGL::CaptureScreenshot() { | |||
| 388 | GLuint old_read_fb = state.draw.read_framebuffer; | 393 | GLuint old_read_fb = state.draw.read_framebuffer; |
| 389 | GLuint old_draw_fb = state.draw.draw_framebuffer; | 394 | GLuint old_draw_fb = state.draw.draw_framebuffer; |
| 390 | state.draw.read_framebuffer = state.draw.draw_framebuffer = screenshot_framebuffer.handle; | 395 | state.draw.read_framebuffer = state.draw.draw_framebuffer = screenshot_framebuffer.handle; |
| 396 | state.AllDirty(); | ||
| 391 | state.Apply(); | 397 | state.Apply(); |
| 392 | 398 | ||
| 393 | Layout::FramebufferLayout layout{renderer_settings.screenshot_framebuffer_layout}; | 399 | Layout::FramebufferLayout layout{renderer_settings.screenshot_framebuffer_layout}; |
| @@ -407,6 +413,7 @@ void RendererOpenGL::CaptureScreenshot() { | |||
| 407 | screenshot_framebuffer.Release(); | 413 | screenshot_framebuffer.Release(); |
| 408 | state.draw.read_framebuffer = old_read_fb; | 414 | state.draw.read_framebuffer = old_read_fb; |
| 409 | state.draw.draw_framebuffer = old_draw_fb; | 415 | state.draw.draw_framebuffer = old_draw_fb; |
| 416 | state.AllDirty(); | ||
| 410 | state.Apply(); | 417 | state.Apply(); |
| 411 | glDeleteRenderbuffers(1, &renderbuffer); | 418 | glDeleteRenderbuffers(1, &renderbuffer); |
| 412 | 419 | ||
diff --git a/src/video_core/renderer_opengl/utils.cpp b/src/video_core/renderer_opengl/utils.cpp index 68c36988d..c504a2c1a 100644 --- a/src/video_core/renderer_opengl/utils.cpp +++ b/src/video_core/renderer_opengl/utils.cpp | |||
| @@ -13,29 +13,67 @@ | |||
| 13 | 13 | ||
| 14 | namespace OpenGL { | 14 | namespace OpenGL { |
| 15 | 15 | ||
| 16 | VertexArrayPushBuffer::VertexArrayPushBuffer() = default; | ||
| 17 | |||
| 18 | VertexArrayPushBuffer::~VertexArrayPushBuffer() = default; | ||
| 19 | |||
| 20 | void VertexArrayPushBuffer::Setup(GLuint vao_) { | ||
| 21 | vao = vao_; | ||
| 22 | index_buffer = nullptr; | ||
| 23 | vertex_buffers.clear(); | ||
| 24 | } | ||
| 25 | |||
| 26 | void VertexArrayPushBuffer::SetIndexBuffer(const GLuint* buffer) { | ||
| 27 | index_buffer = buffer; | ||
| 28 | } | ||
| 29 | |||
| 30 | void VertexArrayPushBuffer::SetVertexBuffer(GLuint binding_index, const GLuint* buffer, | ||
| 31 | GLintptr offset, GLsizei stride) { | ||
| 32 | vertex_buffers.push_back(Entry{binding_index, buffer, offset, stride}); | ||
| 33 | } | ||
| 34 | |||
| 35 | void VertexArrayPushBuffer::Bind() { | ||
| 36 | if (index_buffer) { | ||
| 37 | glVertexArrayElementBuffer(vao, *index_buffer); | ||
| 38 | } | ||
| 39 | |||
| 40 | // TODO(Rodrigo): Find a way to ARB_multi_bind this | ||
| 41 | for (const auto& entry : vertex_buffers) { | ||
| 42 | glVertexArrayVertexBuffer(vao, entry.binding_index, *entry.buffer, entry.offset, | ||
| 43 | entry.stride); | ||
| 44 | } | ||
| 45 | } | ||
| 46 | |||
| 16 | BindBuffersRangePushBuffer::BindBuffersRangePushBuffer(GLenum target) : target{target} {} | 47 | BindBuffersRangePushBuffer::BindBuffersRangePushBuffer(GLenum target) : target{target} {} |
| 17 | 48 | ||
| 18 | BindBuffersRangePushBuffer::~BindBuffersRangePushBuffer() = default; | 49 | BindBuffersRangePushBuffer::~BindBuffersRangePushBuffer() = default; |
| 19 | 50 | ||
| 20 | void BindBuffersRangePushBuffer::Setup(GLuint first_) { | 51 | void BindBuffersRangePushBuffer::Setup(GLuint first_) { |
| 21 | first = first_; | 52 | first = first_; |
| 22 | buffers.clear(); | 53 | buffer_pointers.clear(); |
| 23 | offsets.clear(); | 54 | offsets.clear(); |
| 24 | sizes.clear(); | 55 | sizes.clear(); |
| 25 | } | 56 | } |
| 26 | 57 | ||
| 27 | void BindBuffersRangePushBuffer::Push(GLuint buffer, GLintptr offset, GLsizeiptr size) { | 58 | void BindBuffersRangePushBuffer::Push(const GLuint* buffer, GLintptr offset, GLsizeiptr size) { |
| 28 | buffers.push_back(buffer); | 59 | buffer_pointers.push_back(buffer); |
| 29 | offsets.push_back(offset); | 60 | offsets.push_back(offset); |
| 30 | sizes.push_back(size); | 61 | sizes.push_back(size); |
| 31 | } | 62 | } |
| 32 | 63 | ||
| 33 | void BindBuffersRangePushBuffer::Bind() const { | 64 | void BindBuffersRangePushBuffer::Bind() { |
| 34 | const std::size_t count{buffers.size()}; | 65 | // Ensure sizes are valid. |
| 66 | const std::size_t count{buffer_pointers.size()}; | ||
| 35 | DEBUG_ASSERT(count == offsets.size() && count == sizes.size()); | 67 | DEBUG_ASSERT(count == offsets.size() && count == sizes.size()); |
| 36 | if (count == 0) { | 68 | if (count == 0) { |
| 37 | return; | 69 | return; |
| 38 | } | 70 | } |
| 71 | |||
| 72 | // Dereference buffers. | ||
| 73 | buffers.resize(count); | ||
| 74 | std::transform(buffer_pointers.begin(), buffer_pointers.end(), buffers.begin(), | ||
| 75 | [](const GLuint* pointer) { return *pointer; }); | ||
| 76 | |||
| 39 | glBindBuffersRange(target, first, static_cast<GLsizei>(count), buffers.data(), offsets.data(), | 77 | glBindBuffersRange(target, first, static_cast<GLsizei>(count), buffers.data(), offsets.data(), |
| 40 | sizes.data()); | 78 | sizes.data()); |
| 41 | } | 79 | } |
diff --git a/src/video_core/renderer_opengl/utils.h b/src/video_core/renderer_opengl/utils.h index 4a752f3b4..6c2b45546 100644 --- a/src/video_core/renderer_opengl/utils.h +++ b/src/video_core/renderer_opengl/utils.h | |||
| @@ -11,20 +11,49 @@ | |||
| 11 | 11 | ||
| 12 | namespace OpenGL { | 12 | namespace OpenGL { |
| 13 | 13 | ||
| 14 | class BindBuffersRangePushBuffer { | 14 | class VertexArrayPushBuffer final { |
| 15 | public: | 15 | public: |
| 16 | BindBuffersRangePushBuffer(GLenum target); | 16 | explicit VertexArrayPushBuffer(); |
| 17 | ~VertexArrayPushBuffer(); | ||
| 18 | |||
| 19 | void Setup(GLuint vao_); | ||
| 20 | |||
| 21 | void SetIndexBuffer(const GLuint* buffer); | ||
| 22 | |||
| 23 | void SetVertexBuffer(GLuint binding_index, const GLuint* buffer, GLintptr offset, | ||
| 24 | GLsizei stride); | ||
| 25 | |||
| 26 | void Bind(); | ||
| 27 | |||
| 28 | private: | ||
| 29 | struct Entry { | ||
| 30 | GLuint binding_index{}; | ||
| 31 | const GLuint* buffer{}; | ||
| 32 | GLintptr offset{}; | ||
| 33 | GLsizei stride{}; | ||
| 34 | }; | ||
| 35 | |||
| 36 | GLuint vao{}; | ||
| 37 | const GLuint* index_buffer{}; | ||
| 38 | std::vector<Entry> vertex_buffers; | ||
| 39 | }; | ||
| 40 | |||
| 41 | class BindBuffersRangePushBuffer final { | ||
| 42 | public: | ||
| 43 | explicit BindBuffersRangePushBuffer(GLenum target); | ||
| 17 | ~BindBuffersRangePushBuffer(); | 44 | ~BindBuffersRangePushBuffer(); |
| 18 | 45 | ||
| 19 | void Setup(GLuint first_); | 46 | void Setup(GLuint first_); |
| 20 | 47 | ||
| 21 | void Push(GLuint buffer, GLintptr offset, GLsizeiptr size); | 48 | void Push(const GLuint* buffer, GLintptr offset, GLsizeiptr size); |
| 22 | 49 | ||
| 23 | void Bind() const; | 50 | void Bind(); |
| 24 | 51 | ||
| 25 | private: | 52 | private: |
| 26 | GLenum target; | 53 | GLenum target{}; |
| 27 | GLuint first; | 54 | GLuint first{}; |
| 55 | std::vector<const GLuint*> buffer_pointers; | ||
| 56 | |||
| 28 | std::vector<GLuint> buffers; | 57 | std::vector<GLuint> buffers; |
| 29 | std::vector<GLintptr> offsets; | 58 | std::vector<GLintptr> offsets; |
| 30 | std::vector<GLsizeiptr> sizes; | 59 | std::vector<GLsizeiptr> sizes; |
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index 02a9f5ecb..d2e9f4031 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp | |||
| @@ -109,8 +109,8 @@ void VKBufferCache::Reserve(std::size_t max_size) { | |||
| 109 | } | 109 | } |
| 110 | } | 110 | } |
| 111 | 111 | ||
| 112 | VKExecutionContext VKBufferCache::Send(VKExecutionContext exctx) { | 112 | void VKBufferCache::Send() { |
| 113 | return stream_buffer->Send(exctx, buffer_offset - buffer_offset_base); | 113 | stream_buffer->Send(buffer_offset - buffer_offset_base); |
| 114 | } | 114 | } |
| 115 | 115 | ||
| 116 | void VKBufferCache::AlignBuffer(std::size_t alignment) { | 116 | void VKBufferCache::AlignBuffer(std::size_t alignment) { |
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h index 3edf460df..49f13bcdc 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.h +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h | |||
| @@ -77,7 +77,7 @@ public: | |||
| 77 | void Reserve(std::size_t max_size); | 77 | void Reserve(std::size_t max_size); |
| 78 | 78 | ||
| 79 | /// Ensures that the set data is sent to the device. | 79 | /// Ensures that the set data is sent to the device. |
| 80 | [[nodiscard]] VKExecutionContext Send(VKExecutionContext exctx); | 80 | void Send(); |
| 81 | 81 | ||
| 82 | /// Returns the buffer cache handle. | 82 | /// Returns the buffer cache handle. |
| 83 | vk::Buffer GetBuffer() const { | 83 | vk::Buffer GetBuffer() const { |
diff --git a/src/video_core/renderer_vulkan/vk_sampler_cache.h b/src/video_core/renderer_vulkan/vk_sampler_cache.h index 771b05c73..1f73b716b 100644 --- a/src/video_core/renderer_vulkan/vk_sampler_cache.h +++ b/src/video_core/renderer_vulkan/vk_sampler_cache.h | |||
| @@ -4,9 +4,6 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <unordered_map> | ||
| 8 | |||
| 9 | #include "common/common_types.h" | ||
| 10 | #include "video_core/renderer_vulkan/declarations.h" | 7 | #include "video_core/renderer_vulkan/declarations.h" |
| 11 | #include "video_core/sampler_cache.h" | 8 | #include "video_core/sampler_cache.h" |
| 12 | #include "video_core/textures/texture.h" | 9 | #include "video_core/textures/texture.h" |
| @@ -21,9 +18,9 @@ public: | |||
| 21 | ~VKSamplerCache(); | 18 | ~VKSamplerCache(); |
| 22 | 19 | ||
| 23 | protected: | 20 | protected: |
| 24 | UniqueSampler CreateSampler(const Tegra::Texture::TSCEntry& tsc) const; | 21 | UniqueSampler CreateSampler(const Tegra::Texture::TSCEntry& tsc) const override; |
| 25 | 22 | ||
| 26 | vk::Sampler ToSamplerType(const UniqueSampler& sampler) const; | 23 | vk::Sampler ToSamplerType(const UniqueSampler& sampler) const override; |
| 27 | 24 | ||
| 28 | private: | 25 | private: |
| 29 | const VKDevice& device; | 26 | const VKDevice& device; |
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index f1fea1871..0f8116458 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp | |||
| @@ -19,23 +19,19 @@ VKScheduler::VKScheduler(const VKDevice& device, VKResourceManager& resource_man | |||
| 19 | 19 | ||
| 20 | VKScheduler::~VKScheduler() = default; | 20 | VKScheduler::~VKScheduler() = default; |
| 21 | 21 | ||
| 22 | VKExecutionContext VKScheduler::GetExecutionContext() const { | 22 | void VKScheduler::Flush(bool release_fence, vk::Semaphore semaphore) { |
| 23 | return VKExecutionContext(current_fence, current_cmdbuf); | ||
| 24 | } | ||
| 25 | |||
| 26 | VKExecutionContext VKScheduler::Flush(vk::Semaphore semaphore) { | ||
| 27 | SubmitExecution(semaphore); | 23 | SubmitExecution(semaphore); |
| 28 | current_fence->Release(); | 24 | if (release_fence) |
| 25 | current_fence->Release(); | ||
| 29 | AllocateNewContext(); | 26 | AllocateNewContext(); |
| 30 | return GetExecutionContext(); | ||
| 31 | } | 27 | } |
| 32 | 28 | ||
| 33 | VKExecutionContext VKScheduler::Finish(vk::Semaphore semaphore) { | 29 | void VKScheduler::Finish(bool release_fence, vk::Semaphore semaphore) { |
| 34 | SubmitExecution(semaphore); | 30 | SubmitExecution(semaphore); |
| 35 | current_fence->Wait(); | 31 | current_fence->Wait(); |
| 36 | current_fence->Release(); | 32 | if (release_fence) |
| 33 | current_fence->Release(); | ||
| 37 | AllocateNewContext(); | 34 | AllocateNewContext(); |
| 38 | return GetExecutionContext(); | ||
| 39 | } | 35 | } |
| 40 | 36 | ||
| 41 | void VKScheduler::SubmitExecution(vk::Semaphore semaphore) { | 37 | void VKScheduler::SubmitExecution(vk::Semaphore semaphore) { |
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h index cfaf5376f..0e5b49c7f 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.h +++ b/src/video_core/renderer_vulkan/vk_scheduler.h | |||
| @@ -10,10 +10,43 @@ | |||
| 10 | namespace Vulkan { | 10 | namespace Vulkan { |
| 11 | 11 | ||
| 12 | class VKDevice; | 12 | class VKDevice; |
| 13 | class VKExecutionContext; | ||
| 14 | class VKFence; | 13 | class VKFence; |
| 15 | class VKResourceManager; | 14 | class VKResourceManager; |
| 16 | 15 | ||
| 16 | class VKFenceView { | ||
| 17 | public: | ||
| 18 | VKFenceView() = default; | ||
| 19 | VKFenceView(VKFence* const& fence) : fence{fence} {} | ||
| 20 | |||
| 21 | VKFence* operator->() const noexcept { | ||
| 22 | return fence; | ||
| 23 | } | ||
| 24 | |||
| 25 | operator VKFence&() const noexcept { | ||
| 26 | return *fence; | ||
| 27 | } | ||
| 28 | |||
| 29 | private: | ||
| 30 | VKFence* const& fence; | ||
| 31 | }; | ||
| 32 | |||
| 33 | class VKCommandBufferView { | ||
| 34 | public: | ||
| 35 | VKCommandBufferView() = default; | ||
| 36 | VKCommandBufferView(const vk::CommandBuffer& cmdbuf) : cmdbuf{cmdbuf} {} | ||
| 37 | |||
| 38 | const vk::CommandBuffer* operator->() const noexcept { | ||
| 39 | return &cmdbuf; | ||
| 40 | } | ||
| 41 | |||
| 42 | operator vk::CommandBuffer() const noexcept { | ||
| 43 | return cmdbuf; | ||
| 44 | } | ||
| 45 | |||
| 46 | private: | ||
| 47 | const vk::CommandBuffer& cmdbuf; | ||
| 48 | }; | ||
| 49 | |||
| 17 | /// The scheduler abstracts command buffer and fence management with an interface that's able to do | 50 | /// The scheduler abstracts command buffer and fence management with an interface that's able to do |
| 18 | /// OpenGL-like operations on Vulkan command buffers. | 51 | /// OpenGL-like operations on Vulkan command buffers. |
| 19 | class VKScheduler { | 52 | class VKScheduler { |
| @@ -21,16 +54,21 @@ public: | |||
| 21 | explicit VKScheduler(const VKDevice& device, VKResourceManager& resource_manager); | 54 | explicit VKScheduler(const VKDevice& device, VKResourceManager& resource_manager); |
| 22 | ~VKScheduler(); | 55 | ~VKScheduler(); |
| 23 | 56 | ||
| 24 | /// Gets the current execution context. | 57 | /// Gets a reference to the current fence. |
| 25 | [[nodiscard]] VKExecutionContext GetExecutionContext() const; | 58 | VKFenceView GetFence() const { |
| 59 | return current_fence; | ||
| 60 | } | ||
| 61 | |||
| 62 | /// Gets a reference to the current command buffer. | ||
| 63 | VKCommandBufferView GetCommandBuffer() const { | ||
| 64 | return current_cmdbuf; | ||
| 65 | } | ||
| 26 | 66 | ||
| 27 | /// Sends the current execution context to the GPU. It invalidates the current execution context | 67 | /// Sends the current execution context to the GPU. |
| 28 | /// and returns a new one. | 68 | void Flush(bool release_fence = true, vk::Semaphore semaphore = nullptr); |
| 29 | VKExecutionContext Flush(vk::Semaphore semaphore = nullptr); | ||
| 30 | 69 | ||
| 31 | /// Sends the current execution context to the GPU and waits for it to complete. It invalidates | 70 | /// Sends the current execution context to the GPU and waits for it to complete. |
| 32 | /// the current execution context and returns a new one. | 71 | void Finish(bool release_fence = true, vk::Semaphore semaphore = nullptr); |
| 33 | VKExecutionContext Finish(vk::Semaphore semaphore = nullptr); | ||
| 34 | 72 | ||
| 35 | private: | 73 | private: |
| 36 | void SubmitExecution(vk::Semaphore semaphore); | 74 | void SubmitExecution(vk::Semaphore semaphore); |
| @@ -44,26 +82,4 @@ private: | |||
| 44 | VKFence* next_fence = nullptr; | 82 | VKFence* next_fence = nullptr; |
| 45 | }; | 83 | }; |
| 46 | 84 | ||
| 47 | class VKExecutionContext { | ||
| 48 | friend class VKScheduler; | ||
| 49 | |||
| 50 | public: | ||
| 51 | VKExecutionContext() = default; | ||
| 52 | |||
| 53 | VKFence& GetFence() const { | ||
| 54 | return *fence; | ||
| 55 | } | ||
| 56 | |||
| 57 | vk::CommandBuffer GetCommandBuffer() const { | ||
| 58 | return cmdbuf; | ||
| 59 | } | ||
| 60 | |||
| 61 | private: | ||
| 62 | explicit VKExecutionContext(VKFence* fence, vk::CommandBuffer cmdbuf) | ||
| 63 | : fence{fence}, cmdbuf{cmdbuf} {} | ||
| 64 | |||
| 65 | VKFence* fence{}; | ||
| 66 | vk::CommandBuffer cmdbuf; | ||
| 67 | }; | ||
| 68 | |||
| 69 | } // namespace Vulkan | 85 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index 97ce214b1..d267712c9 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp | |||
| @@ -205,10 +205,6 @@ public: | |||
| 205 | } | 205 | } |
| 206 | 206 | ||
| 207 | private: | 207 | private: |
| 208 | using OperationDecompilerFn = Id (SPIRVDecompiler::*)(Operation); | ||
| 209 | using OperationDecompilersArray = | ||
| 210 | std::array<OperationDecompilerFn, static_cast<std::size_t>(OperationCode::Amount)>; | ||
| 211 | |||
| 212 | static constexpr auto INTERNAL_FLAGS_COUNT = static_cast<std::size_t>(InternalFlag::Amount); | 208 | static constexpr auto INTERNAL_FLAGS_COUNT = static_cast<std::size_t>(InternalFlag::Amount); |
| 213 | 209 | ||
| 214 | void AllocateBindings() { | 210 | void AllocateBindings() { |
| @@ -430,20 +426,17 @@ private: | |||
| 430 | instance_index = DeclareBuiltIn(spv::BuiltIn::InstanceIndex, spv::StorageClass::Input, | 426 | instance_index = DeclareBuiltIn(spv::BuiltIn::InstanceIndex, spv::StorageClass::Input, |
| 431 | t_in_uint, "instance_index"); | 427 | t_in_uint, "instance_index"); |
| 432 | 428 | ||
| 433 | bool is_point_size_declared = false; | ||
| 434 | bool is_clip_distances_declared = false; | 429 | bool is_clip_distances_declared = false; |
| 435 | for (const auto index : ir.GetOutputAttributes()) { | 430 | for (const auto index : ir.GetOutputAttributes()) { |
| 436 | if (index == Attribute::Index::PointSize) { | 431 | if (index == Attribute::Index::ClipDistances0123 || |
| 437 | is_point_size_declared = true; | 432 | index == Attribute::Index::ClipDistances4567) { |
| 438 | } else if (index == Attribute::Index::ClipDistances0123 || | ||
| 439 | index == Attribute::Index::ClipDistances4567) { | ||
| 440 | is_clip_distances_declared = true; | 433 | is_clip_distances_declared = true; |
| 441 | } | 434 | } |
| 442 | } | 435 | } |
| 443 | 436 | ||
| 444 | std::vector<Id> members; | 437 | std::vector<Id> members; |
| 445 | members.push_back(t_float4); | 438 | members.push_back(t_float4); |
| 446 | if (is_point_size_declared) { | 439 | if (ir.UsesPointSize()) { |
| 447 | members.push_back(t_float); | 440 | members.push_back(t_float); |
| 448 | } | 441 | } |
| 449 | if (is_clip_distances_declared) { | 442 | if (is_clip_distances_declared) { |
| @@ -466,7 +459,7 @@ private: | |||
| 466 | 459 | ||
| 467 | position_index = MemberDecorateBuiltIn(spv::BuiltIn::Position, "position", true); | 460 | position_index = MemberDecorateBuiltIn(spv::BuiltIn::Position, "position", true); |
| 468 | point_size_index = | 461 | point_size_index = |
| 469 | MemberDecorateBuiltIn(spv::BuiltIn::PointSize, "point_size", is_point_size_declared); | 462 | MemberDecorateBuiltIn(spv::BuiltIn::PointSize, "point_size", ir.UsesPointSize()); |
| 470 | clip_distances_index = MemberDecorateBuiltIn(spv::BuiltIn::ClipDistance, "clip_distances", | 463 | clip_distances_index = MemberDecorateBuiltIn(spv::BuiltIn::ClipDistance, "clip_distances", |
| 471 | is_clip_distances_declared); | 464 | is_clip_distances_declared); |
| 472 | 465 | ||
| @@ -712,7 +705,8 @@ private: | |||
| 712 | case Attribute::Index::Position: | 705 | case Attribute::Index::Position: |
| 713 | return AccessElement(t_out_float, per_vertex, position_index, | 706 | return AccessElement(t_out_float, per_vertex, position_index, |
| 714 | abuf->GetElement()); | 707 | abuf->GetElement()); |
| 715 | case Attribute::Index::PointSize: | 708 | case Attribute::Index::LayerViewportPointSize: |
| 709 | UNIMPLEMENTED_IF(abuf->GetElement() != 3); | ||
| 716 | return AccessElement(t_out_float, per_vertex, point_size_index); | 710 | return AccessElement(t_out_float, per_vertex, point_size_index); |
| 717 | case Attribute::Index::ClipDistances0123: | 711 | case Attribute::Index::ClipDistances0123: |
| 718 | return AccessElement(t_out_float, per_vertex, clip_distances_index, | 712 | return AccessElement(t_out_float, per_vertex, clip_distances_index, |
| @@ -806,12 +800,7 @@ private: | |||
| 806 | return {}; | 800 | return {}; |
| 807 | } | 801 | } |
| 808 | 802 | ||
| 809 | Id LogicalAll2(Operation operation) { | 803 | Id LogicalAnd2(Operation operation) { |
| 810 | UNIMPLEMENTED(); | ||
| 811 | return {}; | ||
| 812 | } | ||
| 813 | |||
| 814 | Id LogicalAny2(Operation operation) { | ||
| 815 | UNIMPLEMENTED(); | 804 | UNIMPLEMENTED(); |
| 816 | return {}; | 805 | return {}; |
| 817 | } | 806 | } |
| @@ -949,6 +938,14 @@ private: | |||
| 949 | return {}; | 938 | return {}; |
| 950 | } | 939 | } |
| 951 | 940 | ||
| 941 | Id BranchIndirect(Operation operation) { | ||
| 942 | const Id op_a = VisitOperand<Type::Uint>(operation, 0); | ||
| 943 | |||
| 944 | Emit(OpStore(jmp_to, op_a)); | ||
| 945 | BranchingOp([&]() { Emit(OpBranch(continue_label)); }); | ||
| 946 | return {}; | ||
| 947 | } | ||
| 948 | |||
| 952 | Id PushFlowStack(Operation operation) { | 949 | Id PushFlowStack(Operation operation) { |
| 953 | const auto target = std::get_if<ImmediateNode>(&*operation[0]); | 950 | const auto target = std::get_if<ImmediateNode>(&*operation[0]); |
| 954 | ASSERT(target); | 951 | ASSERT(target); |
| @@ -1200,7 +1197,7 @@ private: | |||
| 1200 | return {}; | 1197 | return {}; |
| 1201 | } | 1198 | } |
| 1202 | 1199 | ||
| 1203 | static constexpr OperationDecompilersArray operation_decompilers = { | 1200 | static constexpr std::array operation_decompilers = { |
| 1204 | &SPIRVDecompiler::Assign, | 1201 | &SPIRVDecompiler::Assign, |
| 1205 | 1202 | ||
| 1206 | &SPIRVDecompiler::Ternary<&Module::OpSelect, Type::Float, Type::Bool, Type::Float, | 1203 | &SPIRVDecompiler::Ternary<&Module::OpSelect, Type::Float, Type::Bool, Type::Float, |
| @@ -1285,8 +1282,7 @@ private: | |||
| 1285 | &SPIRVDecompiler::Binary<&Module::OpLogicalNotEqual, Type::Bool>, | 1282 | &SPIRVDecompiler::Binary<&Module::OpLogicalNotEqual, Type::Bool>, |
| 1286 | &SPIRVDecompiler::Unary<&Module::OpLogicalNot, Type::Bool>, | 1283 | &SPIRVDecompiler::Unary<&Module::OpLogicalNot, Type::Bool>, |
| 1287 | &SPIRVDecompiler::LogicalPick2, | 1284 | &SPIRVDecompiler::LogicalPick2, |
| 1288 | &SPIRVDecompiler::LogicalAll2, | 1285 | &SPIRVDecompiler::LogicalAnd2, |
| 1289 | &SPIRVDecompiler::LogicalAny2, | ||
| 1290 | 1286 | ||
| 1291 | &SPIRVDecompiler::Binary<&Module::OpFOrdLessThan, Type::Bool, Type::Float>, | 1287 | &SPIRVDecompiler::Binary<&Module::OpFOrdLessThan, Type::Bool, Type::Float>, |
| 1292 | &SPIRVDecompiler::Binary<&Module::OpFOrdEqual, Type::Bool, Type::Float>, | 1288 | &SPIRVDecompiler::Binary<&Module::OpFOrdEqual, Type::Bool, Type::Float>, |
| @@ -1334,6 +1330,7 @@ private: | |||
| 1334 | &SPIRVDecompiler::ImageStore, | 1330 | &SPIRVDecompiler::ImageStore, |
| 1335 | 1331 | ||
| 1336 | &SPIRVDecompiler::Branch, | 1332 | &SPIRVDecompiler::Branch, |
| 1333 | &SPIRVDecompiler::BranchIndirect, | ||
| 1337 | &SPIRVDecompiler::PushFlowStack, | 1334 | &SPIRVDecompiler::PushFlowStack, |
| 1338 | &SPIRVDecompiler::PopFlowStack, | 1335 | &SPIRVDecompiler::PopFlowStack, |
| 1339 | &SPIRVDecompiler::Exit, | 1336 | &SPIRVDecompiler::Exit, |
| @@ -1350,6 +1347,7 @@ private: | |||
| 1350 | &SPIRVDecompiler::WorkGroupId<1>, | 1347 | &SPIRVDecompiler::WorkGroupId<1>, |
| 1351 | &SPIRVDecompiler::WorkGroupId<2>, | 1348 | &SPIRVDecompiler::WorkGroupId<2>, |
| 1352 | }; | 1349 | }; |
| 1350 | static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount)); | ||
| 1353 | 1351 | ||
| 1354 | const VKDevice& device; | 1352 | const VKDevice& device; |
| 1355 | const ShaderIR& ir; | 1353 | const ShaderIR& ir; |
diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp index 58ffa42f2..62f1427f5 100644 --- a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp +++ b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp | |||
| @@ -46,12 +46,12 @@ std::tuple<u8*, u64, bool> VKStreamBuffer::Reserve(u64 size) { | |||
| 46 | return {mapped_pointer + offset, offset, invalidation_mark.has_value()}; | 46 | return {mapped_pointer + offset, offset, invalidation_mark.has_value()}; |
| 47 | } | 47 | } |
| 48 | 48 | ||
| 49 | VKExecutionContext VKStreamBuffer::Send(VKExecutionContext exctx, u64 size) { | 49 | void VKStreamBuffer::Send(u64 size) { |
| 50 | ASSERT_MSG(size <= mapped_size, "Reserved size is too small"); | 50 | ASSERT_MSG(size <= mapped_size, "Reserved size is too small"); |
| 51 | 51 | ||
| 52 | if (invalidation_mark) { | 52 | if (invalidation_mark) { |
| 53 | // TODO(Rodrigo): Find a better way to invalidate than waiting for all watches to finish. | 53 | // TODO(Rodrigo): Find a better way to invalidate than waiting for all watches to finish. |
| 54 | exctx = scheduler.Flush(); | 54 | scheduler.Flush(); |
| 55 | std::for_each(watches.begin(), watches.begin() + *invalidation_mark, | 55 | std::for_each(watches.begin(), watches.begin() + *invalidation_mark, |
| 56 | [&](auto& resource) { resource->Wait(); }); | 56 | [&](auto& resource) { resource->Wait(); }); |
| 57 | invalidation_mark = std::nullopt; | 57 | invalidation_mark = std::nullopt; |
| @@ -62,11 +62,9 @@ VKExecutionContext VKStreamBuffer::Send(VKExecutionContext exctx, u64 size) { | |||
| 62 | ReserveWatches(WATCHES_RESERVE_CHUNK); | 62 | ReserveWatches(WATCHES_RESERVE_CHUNK); |
| 63 | } | 63 | } |
| 64 | // Add a watch for this allocation. | 64 | // Add a watch for this allocation. |
| 65 | watches[used_watches++]->Watch(exctx.GetFence()); | 65 | watches[used_watches++]->Watch(scheduler.GetFence()); |
| 66 | 66 | ||
| 67 | offset += size; | 67 | offset += size; |
| 68 | |||
| 69 | return exctx; | ||
| 70 | } | 68 | } |
| 71 | 69 | ||
| 72 | void VKStreamBuffer::CreateBuffers(VKMemoryManager& memory_manager, vk::BufferUsageFlags usage) { | 70 | void VKStreamBuffer::CreateBuffers(VKMemoryManager& memory_manager, vk::BufferUsageFlags usage) { |
diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.h b/src/video_core/renderer_vulkan/vk_stream_buffer.h index 69d036ccd..842e54162 100644 --- a/src/video_core/renderer_vulkan/vk_stream_buffer.h +++ b/src/video_core/renderer_vulkan/vk_stream_buffer.h | |||
| @@ -37,7 +37,7 @@ public: | |||
| 37 | std::tuple<u8*, u64, bool> Reserve(u64 size); | 37 | std::tuple<u8*, u64, bool> Reserve(u64 size); |
| 38 | 38 | ||
| 39 | /// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy. | 39 | /// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy. |
| 40 | [[nodiscard]] VKExecutionContext Send(VKExecutionContext exctx, u64 size); | 40 | void Send(u64 size); |
| 41 | 41 | ||
| 42 | vk::Buffer GetBuffer() const { | 42 | vk::Buffer GetBuffer() const { |
| 43 | return *buffer; | 43 | return *buffer; |
diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp new file mode 100644 index 000000000..ec3a76690 --- /dev/null +++ b/src/video_core/shader/control_flow.cpp | |||
| @@ -0,0 +1,481 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <list> | ||
| 6 | #include <map> | ||
| 7 | #include <stack> | ||
| 8 | #include <unordered_map> | ||
| 9 | #include <unordered_set> | ||
| 10 | #include <vector> | ||
| 11 | |||
| 12 | #include "common/assert.h" | ||
| 13 | #include "common/common_types.h" | ||
| 14 | #include "video_core/shader/control_flow.h" | ||
| 15 | #include "video_core/shader/shader_ir.h" | ||
| 16 | |||
| 17 | namespace VideoCommon::Shader { | ||
| 18 | namespace { | ||
| 19 | using Tegra::Shader::Instruction; | ||
| 20 | using Tegra::Shader::OpCode; | ||
| 21 | |||
| 22 | constexpr s32 unassigned_branch = -2; | ||
| 23 | |||
| 24 | struct Query { | ||
| 25 | u32 address{}; | ||
| 26 | std::stack<u32> ssy_stack{}; | ||
| 27 | std::stack<u32> pbk_stack{}; | ||
| 28 | }; | ||
| 29 | |||
| 30 | struct BlockStack { | ||
| 31 | BlockStack() = default; | ||
| 32 | explicit BlockStack(const Query& q) : ssy_stack{q.ssy_stack}, pbk_stack{q.pbk_stack} {} | ||
| 33 | std::stack<u32> ssy_stack{}; | ||
| 34 | std::stack<u32> pbk_stack{}; | ||
| 35 | }; | ||
| 36 | |||
| 37 | struct BlockBranchInfo { | ||
| 38 | Condition condition{}; | ||
| 39 | s32 address{exit_branch}; | ||
| 40 | bool kill{}; | ||
| 41 | bool is_sync{}; | ||
| 42 | bool is_brk{}; | ||
| 43 | bool ignore{}; | ||
| 44 | }; | ||
| 45 | |||
| 46 | struct BlockInfo { | ||
| 47 | u32 start{}; | ||
| 48 | u32 end{}; | ||
| 49 | bool visited{}; | ||
| 50 | BlockBranchInfo branch{}; | ||
| 51 | |||
| 52 | bool IsInside(const u32 address) const { | ||
| 53 | return start <= address && address <= end; | ||
| 54 | } | ||
| 55 | }; | ||
| 56 | |||
| 57 | struct CFGRebuildState { | ||
| 58 | explicit CFGRebuildState(const ProgramCode& program_code, const std::size_t program_size, | ||
| 59 | const u32 start) | ||
| 60 | : start{start}, program_code{program_code}, program_size{program_size} {} | ||
| 61 | |||
| 62 | u32 start{}; | ||
| 63 | std::vector<BlockInfo> block_info{}; | ||
| 64 | std::list<u32> inspect_queries{}; | ||
| 65 | std::list<Query> queries{}; | ||
| 66 | std::unordered_map<u32, u32> registered{}; | ||
| 67 | std::unordered_set<u32> labels{}; | ||
| 68 | std::map<u32, u32> ssy_labels{}; | ||
| 69 | std::map<u32, u32> pbk_labels{}; | ||
| 70 | std::unordered_map<u32, BlockStack> stacks{}; | ||
| 71 | const ProgramCode& program_code; | ||
| 72 | const std::size_t program_size; | ||
| 73 | }; | ||
| 74 | |||
| 75 | enum class BlockCollision : u32 { None, Found, Inside }; | ||
| 76 | |||
| 77 | std::pair<BlockCollision, u32> TryGetBlock(CFGRebuildState& state, u32 address) { | ||
| 78 | const auto& blocks = state.block_info; | ||
| 79 | for (u32 index = 0; index < blocks.size(); index++) { | ||
| 80 | if (blocks[index].start == address) { | ||
| 81 | return {BlockCollision::Found, index}; | ||
| 82 | } | ||
| 83 | if (blocks[index].IsInside(address)) { | ||
| 84 | return {BlockCollision::Inside, index}; | ||
| 85 | } | ||
| 86 | } | ||
| 87 | return {BlockCollision::None, 0xFFFFFFFF}; | ||
| 88 | } | ||
| 89 | |||
| 90 | struct ParseInfo { | ||
| 91 | BlockBranchInfo branch_info{}; | ||
| 92 | u32 end_address{}; | ||
| 93 | }; | ||
| 94 | |||
| 95 | BlockInfo& CreateBlockInfo(CFGRebuildState& state, u32 start, u32 end) { | ||
| 96 | auto& it = state.block_info.emplace_back(); | ||
| 97 | it.start = start; | ||
| 98 | it.end = end; | ||
| 99 | const u32 index = static_cast<u32>(state.block_info.size() - 1); | ||
| 100 | state.registered.insert({start, index}); | ||
| 101 | return it; | ||
| 102 | } | ||
| 103 | |||
| 104 | Pred GetPredicate(u32 index, bool negated) { | ||
| 105 | return static_cast<Pred>(index + (negated ? 8 : 0)); | ||
| 106 | } | ||
| 107 | |||
| 108 | /** | ||
| 109 | * Returns whether the instruction at the specified offset is a 'sched' instruction. | ||
| 110 | * Sched instructions always appear before a sequence of 3 instructions. | ||
| 111 | */ | ||
| 112 | constexpr bool IsSchedInstruction(u32 offset, u32 main_offset) { | ||
| 113 | constexpr u32 SchedPeriod = 4; | ||
| 114 | u32 absolute_offset = offset - main_offset; | ||
| 115 | |||
| 116 | return (absolute_offset % SchedPeriod) == 0; | ||
| 117 | } | ||
| 118 | |||
| 119 | enum class ParseResult : u32 { | ||
| 120 | ControlCaught, | ||
| 121 | BlockEnd, | ||
| 122 | AbnormalFlow, | ||
| 123 | }; | ||
| 124 | |||
| 125 | std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address) { | ||
| 126 | u32 offset = static_cast<u32>(address); | ||
| 127 | const u32 end_address = static_cast<u32>(state.program_size / sizeof(Instruction)); | ||
| 128 | ParseInfo parse_info{}; | ||
| 129 | |||
| 130 | const auto insert_label = [](CFGRebuildState& state, u32 address) { | ||
| 131 | const auto pair = state.labels.emplace(address); | ||
| 132 | if (pair.second) { | ||
| 133 | state.inspect_queries.push_back(address); | ||
| 134 | } | ||
| 135 | }; | ||
| 136 | |||
| 137 | while (true) { | ||
| 138 | if (offset >= end_address) { | ||
| 139 | // ASSERT_OR_EXECUTE can't be used, as it ignores the break | ||
| 140 | ASSERT_MSG(false, "Shader passed the current limit!"); | ||
| 141 | parse_info.branch_info.address = exit_branch; | ||
| 142 | parse_info.branch_info.ignore = false; | ||
| 143 | break; | ||
| 144 | } | ||
| 145 | if (state.registered.count(offset) != 0) { | ||
| 146 | parse_info.branch_info.address = offset; | ||
| 147 | parse_info.branch_info.ignore = true; | ||
| 148 | break; | ||
| 149 | } | ||
| 150 | if (IsSchedInstruction(offset, state.start)) { | ||
| 151 | offset++; | ||
| 152 | continue; | ||
| 153 | } | ||
| 154 | const Instruction instr = {state.program_code[offset]}; | ||
| 155 | const auto opcode = OpCode::Decode(instr); | ||
| 156 | if (!opcode || opcode->get().GetType() != OpCode::Type::Flow) { | ||
| 157 | offset++; | ||
| 158 | continue; | ||
| 159 | } | ||
| 160 | |||
| 161 | switch (opcode->get().GetId()) { | ||
| 162 | case OpCode::Id::EXIT: { | ||
| 163 | const auto pred_index = static_cast<u32>(instr.pred.pred_index); | ||
| 164 | parse_info.branch_info.condition.predicate = | ||
| 165 | GetPredicate(pred_index, instr.negate_pred != 0); | ||
| 166 | if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) { | ||
| 167 | offset++; | ||
| 168 | continue; | ||
| 169 | } | ||
| 170 | const ConditionCode cc = instr.flow_condition_code; | ||
| 171 | parse_info.branch_info.condition.cc = cc; | ||
| 172 | if (cc == ConditionCode::F) { | ||
| 173 | offset++; | ||
| 174 | continue; | ||
| 175 | } | ||
| 176 | parse_info.branch_info.address = exit_branch; | ||
| 177 | parse_info.branch_info.kill = false; | ||
| 178 | parse_info.branch_info.is_sync = false; | ||
| 179 | parse_info.branch_info.is_brk = false; | ||
| 180 | parse_info.branch_info.ignore = false; | ||
| 181 | parse_info.end_address = offset; | ||
| 182 | |||
| 183 | return {ParseResult::ControlCaught, parse_info}; | ||
| 184 | } | ||
| 185 | case OpCode::Id::BRA: { | ||
| 186 | if (instr.bra.constant_buffer != 0) { | ||
| 187 | return {ParseResult::AbnormalFlow, parse_info}; | ||
| 188 | } | ||
| 189 | const auto pred_index = static_cast<u32>(instr.pred.pred_index); | ||
| 190 | parse_info.branch_info.condition.predicate = | ||
| 191 | GetPredicate(pred_index, instr.negate_pred != 0); | ||
| 192 | if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) { | ||
| 193 | offset++; | ||
| 194 | continue; | ||
| 195 | } | ||
| 196 | const ConditionCode cc = instr.flow_condition_code; | ||
| 197 | parse_info.branch_info.condition.cc = cc; | ||
| 198 | if (cc == ConditionCode::F) { | ||
| 199 | offset++; | ||
| 200 | continue; | ||
| 201 | } | ||
| 202 | const u32 branch_offset = offset + instr.bra.GetBranchTarget(); | ||
| 203 | if (branch_offset == 0) { | ||
| 204 | parse_info.branch_info.address = exit_branch; | ||
| 205 | } else { | ||
| 206 | parse_info.branch_info.address = branch_offset; | ||
| 207 | } | ||
| 208 | insert_label(state, branch_offset); | ||
| 209 | parse_info.branch_info.kill = false; | ||
| 210 | parse_info.branch_info.is_sync = false; | ||
| 211 | parse_info.branch_info.is_brk = false; | ||
| 212 | parse_info.branch_info.ignore = false; | ||
| 213 | parse_info.end_address = offset; | ||
| 214 | |||
| 215 | return {ParseResult::ControlCaught, parse_info}; | ||
| 216 | } | ||
| 217 | case OpCode::Id::SYNC: { | ||
| 218 | const auto pred_index = static_cast<u32>(instr.pred.pred_index); | ||
| 219 | parse_info.branch_info.condition.predicate = | ||
| 220 | GetPredicate(pred_index, instr.negate_pred != 0); | ||
| 221 | if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) { | ||
| 222 | offset++; | ||
| 223 | continue; | ||
| 224 | } | ||
| 225 | const ConditionCode cc = instr.flow_condition_code; | ||
| 226 | parse_info.branch_info.condition.cc = cc; | ||
| 227 | if (cc == ConditionCode::F) { | ||
| 228 | offset++; | ||
| 229 | continue; | ||
| 230 | } | ||
| 231 | parse_info.branch_info.address = unassigned_branch; | ||
| 232 | parse_info.branch_info.kill = false; | ||
| 233 | parse_info.branch_info.is_sync = true; | ||
| 234 | parse_info.branch_info.is_brk = false; | ||
| 235 | parse_info.branch_info.ignore = false; | ||
| 236 | parse_info.end_address = offset; | ||
| 237 | |||
| 238 | return {ParseResult::ControlCaught, parse_info}; | ||
| 239 | } | ||
| 240 | case OpCode::Id::BRK: { | ||
| 241 | const auto pred_index = static_cast<u32>(instr.pred.pred_index); | ||
| 242 | parse_info.branch_info.condition.predicate = | ||
| 243 | GetPredicate(pred_index, instr.negate_pred != 0); | ||
| 244 | if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) { | ||
| 245 | offset++; | ||
| 246 | continue; | ||
| 247 | } | ||
| 248 | const ConditionCode cc = instr.flow_condition_code; | ||
| 249 | parse_info.branch_info.condition.cc = cc; | ||
| 250 | if (cc == ConditionCode::F) { | ||
| 251 | offset++; | ||
| 252 | continue; | ||
| 253 | } | ||
| 254 | parse_info.branch_info.address = unassigned_branch; | ||
| 255 | parse_info.branch_info.kill = false; | ||
| 256 | parse_info.branch_info.is_sync = false; | ||
| 257 | parse_info.branch_info.is_brk = true; | ||
| 258 | parse_info.branch_info.ignore = false; | ||
| 259 | parse_info.end_address = offset; | ||
| 260 | |||
| 261 | return {ParseResult::ControlCaught, parse_info}; | ||
| 262 | } | ||
| 263 | case OpCode::Id::KIL: { | ||
| 264 | const auto pred_index = static_cast<u32>(instr.pred.pred_index); | ||
| 265 | parse_info.branch_info.condition.predicate = | ||
| 266 | GetPredicate(pred_index, instr.negate_pred != 0); | ||
| 267 | if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) { | ||
| 268 | offset++; | ||
| 269 | continue; | ||
| 270 | } | ||
| 271 | const ConditionCode cc = instr.flow_condition_code; | ||
| 272 | parse_info.branch_info.condition.cc = cc; | ||
| 273 | if (cc == ConditionCode::F) { | ||
| 274 | offset++; | ||
| 275 | continue; | ||
| 276 | } | ||
| 277 | parse_info.branch_info.address = exit_branch; | ||
| 278 | parse_info.branch_info.kill = true; | ||
| 279 | parse_info.branch_info.is_sync = false; | ||
| 280 | parse_info.branch_info.is_brk = false; | ||
| 281 | parse_info.branch_info.ignore = false; | ||
| 282 | parse_info.end_address = offset; | ||
| 283 | |||
| 284 | return {ParseResult::ControlCaught, parse_info}; | ||
| 285 | } | ||
| 286 | case OpCode::Id::SSY: { | ||
| 287 | const u32 target = offset + instr.bra.GetBranchTarget(); | ||
| 288 | insert_label(state, target); | ||
| 289 | state.ssy_labels.emplace(offset, target); | ||
| 290 | break; | ||
| 291 | } | ||
| 292 | case OpCode::Id::PBK: { | ||
| 293 | const u32 target = offset + instr.bra.GetBranchTarget(); | ||
| 294 | insert_label(state, target); | ||
| 295 | state.pbk_labels.emplace(offset, target); | ||
| 296 | break; | ||
| 297 | } | ||
| 298 | case OpCode::Id::BRX: { | ||
| 299 | return {ParseResult::AbnormalFlow, parse_info}; | ||
| 300 | } | ||
| 301 | default: | ||
| 302 | break; | ||
| 303 | } | ||
| 304 | |||
| 305 | offset++; | ||
| 306 | } | ||
| 307 | parse_info.branch_info.kill = false; | ||
| 308 | parse_info.branch_info.is_sync = false; | ||
| 309 | parse_info.branch_info.is_brk = false; | ||
| 310 | parse_info.end_address = offset - 1; | ||
| 311 | return {ParseResult::BlockEnd, parse_info}; | ||
| 312 | } | ||
| 313 | |||
| 314 | bool TryInspectAddress(CFGRebuildState& state) { | ||
| 315 | if (state.inspect_queries.empty()) { | ||
| 316 | return false; | ||
| 317 | } | ||
| 318 | |||
| 319 | const u32 address = state.inspect_queries.front(); | ||
| 320 | state.inspect_queries.pop_front(); | ||
| 321 | const auto [result, block_index] = TryGetBlock(state, address); | ||
| 322 | switch (result) { | ||
| 323 | case BlockCollision::Found: { | ||
| 324 | return true; | ||
| 325 | } | ||
| 326 | case BlockCollision::Inside: { | ||
| 327 | // This case is the tricky one: | ||
| 328 | // We need to Split the block in 2 sepparate blocks | ||
| 329 | const u32 end = state.block_info[block_index].end; | ||
| 330 | BlockInfo& new_block = CreateBlockInfo(state, address, end); | ||
| 331 | BlockInfo& current_block = state.block_info[block_index]; | ||
| 332 | current_block.end = address - 1; | ||
| 333 | new_block.branch = current_block.branch; | ||
| 334 | BlockBranchInfo forward_branch{}; | ||
| 335 | forward_branch.address = address; | ||
| 336 | forward_branch.ignore = true; | ||
| 337 | current_block.branch = forward_branch; | ||
| 338 | return true; | ||
| 339 | } | ||
| 340 | default: | ||
| 341 | break; | ||
| 342 | } | ||
| 343 | const auto [parse_result, parse_info] = ParseCode(state, address); | ||
| 344 | if (parse_result == ParseResult::AbnormalFlow) { | ||
| 345 | // if it's AbnormalFlow, we end it as false, ending the CFG reconstruction | ||
| 346 | return false; | ||
| 347 | } | ||
| 348 | |||
| 349 | BlockInfo& block_info = CreateBlockInfo(state, address, parse_info.end_address); | ||
| 350 | block_info.branch = parse_info.branch_info; | ||
| 351 | if (parse_info.branch_info.condition.IsUnconditional()) { | ||
| 352 | return true; | ||
| 353 | } | ||
| 354 | |||
| 355 | const u32 fallthrough_address = parse_info.end_address + 1; | ||
| 356 | state.inspect_queries.push_front(fallthrough_address); | ||
| 357 | return true; | ||
| 358 | } | ||
| 359 | |||
| 360 | bool TryQuery(CFGRebuildState& state) { | ||
| 361 | const auto gather_labels = [](std::stack<u32>& cc, std::map<u32, u32>& labels, | ||
| 362 | BlockInfo& block) { | ||
| 363 | auto gather_start = labels.lower_bound(block.start); | ||
| 364 | const auto gather_end = labels.upper_bound(block.end); | ||
| 365 | while (gather_start != gather_end) { | ||
| 366 | cc.push(gather_start->second); | ||
| 367 | ++gather_start; | ||
| 368 | } | ||
| 369 | }; | ||
| 370 | if (state.queries.empty()) { | ||
| 371 | return false; | ||
| 372 | } | ||
| 373 | |||
| 374 | Query& q = state.queries.front(); | ||
| 375 | const u32 block_index = state.registered[q.address]; | ||
| 376 | BlockInfo& block = state.block_info[block_index]; | ||
| 377 | // If the block is visited, check if the stacks match, else gather the ssy/pbk | ||
| 378 | // labels into the current stack and look if the branch at the end of the block | ||
| 379 | // consumes a label. Schedule new queries accordingly | ||
| 380 | if (block.visited) { | ||
| 381 | BlockStack& stack = state.stacks[q.address]; | ||
| 382 | const bool all_okay = (stack.ssy_stack.empty() || q.ssy_stack == stack.ssy_stack) && | ||
| 383 | (stack.pbk_stack.empty() || q.pbk_stack == stack.pbk_stack); | ||
| 384 | state.queries.pop_front(); | ||
| 385 | return all_okay; | ||
| 386 | } | ||
| 387 | block.visited = true; | ||
| 388 | state.stacks.insert_or_assign(q.address, BlockStack{q}); | ||
| 389 | |||
| 390 | Query q2(q); | ||
| 391 | state.queries.pop_front(); | ||
| 392 | gather_labels(q2.ssy_stack, state.ssy_labels, block); | ||
| 393 | gather_labels(q2.pbk_stack, state.pbk_labels, block); | ||
| 394 | if (!block.branch.condition.IsUnconditional()) { | ||
| 395 | q2.address = block.end + 1; | ||
| 396 | state.queries.push_back(q2); | ||
| 397 | } | ||
| 398 | |||
| 399 | Query conditional_query{q2}; | ||
| 400 | if (block.branch.is_sync) { | ||
| 401 | if (block.branch.address == unassigned_branch) { | ||
| 402 | block.branch.address = conditional_query.ssy_stack.top(); | ||
| 403 | } | ||
| 404 | conditional_query.ssy_stack.pop(); | ||
| 405 | } | ||
| 406 | if (block.branch.is_brk) { | ||
| 407 | if (block.branch.address == unassigned_branch) { | ||
| 408 | block.branch.address = conditional_query.pbk_stack.top(); | ||
| 409 | } | ||
| 410 | conditional_query.pbk_stack.pop(); | ||
| 411 | } | ||
| 412 | conditional_query.address = block.branch.address; | ||
| 413 | state.queries.push_back(std::move(conditional_query)); | ||
| 414 | return true; | ||
| 415 | } | ||
| 416 | } // Anonymous namespace | ||
| 417 | |||
| 418 | std::optional<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, | ||
| 419 | std::size_t program_size, u32 start_address) { | ||
| 420 | CFGRebuildState state{program_code, program_size, start_address}; | ||
| 421 | |||
| 422 | // Inspect Code and generate blocks | ||
| 423 | state.labels.clear(); | ||
| 424 | state.labels.emplace(start_address); | ||
| 425 | state.inspect_queries.push_back(state.start); | ||
| 426 | while (!state.inspect_queries.empty()) { | ||
| 427 | if (!TryInspectAddress(state)) { | ||
| 428 | return {}; | ||
| 429 | } | ||
| 430 | } | ||
| 431 | |||
| 432 | // Decompile Stacks | ||
| 433 | state.queries.push_back(Query{state.start, {}, {}}); | ||
| 434 | bool decompiled = true; | ||
| 435 | while (!state.queries.empty()) { | ||
| 436 | if (!TryQuery(state)) { | ||
| 437 | decompiled = false; | ||
| 438 | break; | ||
| 439 | } | ||
| 440 | } | ||
| 441 | |||
| 442 | // Sort and organize results | ||
| 443 | std::sort(state.block_info.begin(), state.block_info.end(), | ||
| 444 | [](const BlockInfo& a, const BlockInfo& b) { return a.start < b.start; }); | ||
| 445 | ShaderCharacteristics result_out{}; | ||
| 446 | result_out.decompilable = decompiled; | ||
| 447 | result_out.start = start_address; | ||
| 448 | result_out.end = start_address; | ||
| 449 | for (const auto& block : state.block_info) { | ||
| 450 | ShaderBlock new_block{}; | ||
| 451 | new_block.start = block.start; | ||
| 452 | new_block.end = block.end; | ||
| 453 | new_block.ignore_branch = block.branch.ignore; | ||
| 454 | if (!new_block.ignore_branch) { | ||
| 455 | new_block.branch.cond = block.branch.condition; | ||
| 456 | new_block.branch.kills = block.branch.kill; | ||
| 457 | new_block.branch.address = block.branch.address; | ||
| 458 | } | ||
| 459 | result_out.end = std::max(result_out.end, block.end); | ||
| 460 | result_out.blocks.push_back(new_block); | ||
| 461 | } | ||
| 462 | if (result_out.decompilable) { | ||
| 463 | result_out.labels = std::move(state.labels); | ||
| 464 | return {std::move(result_out)}; | ||
| 465 | } | ||
| 466 | |||
| 467 | // If it's not decompilable, merge the unlabelled blocks together | ||
| 468 | auto back = result_out.blocks.begin(); | ||
| 469 | auto next = std::next(back); | ||
| 470 | while (next != result_out.blocks.end()) { | ||
| 471 | if (state.labels.count(next->start) == 0 && next->start == back->end + 1) { | ||
| 472 | back->end = next->end; | ||
| 473 | next = result_out.blocks.erase(next); | ||
| 474 | continue; | ||
| 475 | } | ||
| 476 | back = next; | ||
| 477 | ++next; | ||
| 478 | } | ||
| 479 | return {std::move(result_out)}; | ||
| 480 | } | ||
| 481 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/control_flow.h b/src/video_core/shader/control_flow.h new file mode 100644 index 000000000..b0a5e4f8c --- /dev/null +++ b/src/video_core/shader/control_flow.h | |||
| @@ -0,0 +1,79 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <list> | ||
| 8 | #include <optional> | ||
| 9 | #include <unordered_set> | ||
| 10 | |||
| 11 | #include "video_core/engines/shader_bytecode.h" | ||
| 12 | #include "video_core/shader/shader_ir.h" | ||
| 13 | |||
| 14 | namespace VideoCommon::Shader { | ||
| 15 | |||
| 16 | using Tegra::Shader::ConditionCode; | ||
| 17 | using Tegra::Shader::Pred; | ||
| 18 | |||
| 19 | constexpr s32 exit_branch = -1; | ||
| 20 | |||
| 21 | struct Condition { | ||
| 22 | Pred predicate{Pred::UnusedIndex}; | ||
| 23 | ConditionCode cc{ConditionCode::T}; | ||
| 24 | |||
| 25 | bool IsUnconditional() const { | ||
| 26 | return predicate == Pred::UnusedIndex && cc == ConditionCode::T; | ||
| 27 | } | ||
| 28 | |||
| 29 | bool operator==(const Condition& other) const { | ||
| 30 | return std::tie(predicate, cc) == std::tie(other.predicate, other.cc); | ||
| 31 | } | ||
| 32 | |||
| 33 | bool operator!=(const Condition& other) const { | ||
| 34 | return !operator==(other); | ||
| 35 | } | ||
| 36 | }; | ||
| 37 | |||
| 38 | struct ShaderBlock { | ||
| 39 | struct Branch { | ||
| 40 | Condition cond{}; | ||
| 41 | bool kills{}; | ||
| 42 | s32 address{}; | ||
| 43 | |||
| 44 | bool operator==(const Branch& b) const { | ||
| 45 | return std::tie(cond, kills, address) == std::tie(b.cond, b.kills, b.address); | ||
| 46 | } | ||
| 47 | |||
| 48 | bool operator!=(const Branch& b) const { | ||
| 49 | return !operator==(b); | ||
| 50 | } | ||
| 51 | }; | ||
| 52 | |||
| 53 | u32 start{}; | ||
| 54 | u32 end{}; | ||
| 55 | bool ignore_branch{}; | ||
| 56 | Branch branch{}; | ||
| 57 | |||
| 58 | bool operator==(const ShaderBlock& sb) const { | ||
| 59 | return std::tie(start, end, ignore_branch, branch) == | ||
| 60 | std::tie(sb.start, sb.end, sb.ignore_branch, sb.branch); | ||
| 61 | } | ||
| 62 | |||
| 63 | bool operator!=(const ShaderBlock& sb) const { | ||
| 64 | return !operator==(sb); | ||
| 65 | } | ||
| 66 | }; | ||
| 67 | |||
| 68 | struct ShaderCharacteristics { | ||
| 69 | std::list<ShaderBlock> blocks{}; | ||
| 70 | bool decompilable{}; | ||
| 71 | u32 start{}; | ||
| 72 | u32 end{}; | ||
| 73 | std::unordered_set<u32> labels{}; | ||
| 74 | }; | ||
| 75 | |||
| 76 | std::optional<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, | ||
| 77 | std::size_t program_size, u32 start_address); | ||
| 78 | |||
| 79 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp index 2c9ff28f2..b547d8323 100644 --- a/src/video_core/shader/decode.cpp +++ b/src/video_core/shader/decode.cpp | |||
| @@ -11,6 +11,7 @@ | |||
| 11 | #include "common/common_types.h" | 11 | #include "common/common_types.h" |
| 12 | #include "video_core/engines/shader_bytecode.h" | 12 | #include "video_core/engines/shader_bytecode.h" |
| 13 | #include "video_core/engines/shader_header.h" | 13 | #include "video_core/engines/shader_header.h" |
| 14 | #include "video_core/shader/control_flow.h" | ||
| 14 | #include "video_core/shader/node_helper.h" | 15 | #include "video_core/shader/node_helper.h" |
| 15 | #include "video_core/shader/shader_ir.h" | 16 | #include "video_core/shader/shader_ir.h" |
| 16 | 17 | ||
| @@ -21,20 +22,6 @@ using Tegra::Shader::OpCode; | |||
| 21 | 22 | ||
| 22 | namespace { | 23 | namespace { |
| 23 | 24 | ||
| 24 | /// Merges exit method of two parallel branches. | ||
| 25 | constexpr ExitMethod ParallelExit(ExitMethod a, ExitMethod b) { | ||
| 26 | if (a == ExitMethod::Undetermined) { | ||
| 27 | return b; | ||
| 28 | } | ||
| 29 | if (b == ExitMethod::Undetermined) { | ||
| 30 | return a; | ||
| 31 | } | ||
| 32 | if (a == b) { | ||
| 33 | return a; | ||
| 34 | } | ||
| 35 | return ExitMethod::Conditional; | ||
| 36 | } | ||
| 37 | |||
| 38 | /** | 25 | /** |
| 39 | * Returns whether the instruction at the specified offset is a 'sched' instruction. | 26 | * Returns whether the instruction at the specified offset is a 'sched' instruction. |
| 40 | * Sched instructions always appear before a sequence of 3 instructions. | 27 | * Sched instructions always appear before a sequence of 3 instructions. |
| @@ -51,85 +38,104 @@ constexpr bool IsSchedInstruction(u32 offset, u32 main_offset) { | |||
| 51 | void ShaderIR::Decode() { | 38 | void ShaderIR::Decode() { |
| 52 | std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header)); | 39 | std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header)); |
| 53 | 40 | ||
| 54 | std::set<u32> labels; | 41 | disable_flow_stack = false; |
| 55 | const ExitMethod exit_method = Scan(main_offset, MAX_PROGRAM_LENGTH, labels); | 42 | const auto info = ScanFlow(program_code, program_size, main_offset); |
| 56 | if (exit_method != ExitMethod::AlwaysEnd) { | 43 | if (info) { |
| 57 | UNREACHABLE_MSG("Program does not always end"); | 44 | const auto& shader_info = *info; |
| 58 | } | 45 | coverage_begin = shader_info.start; |
| 59 | 46 | coverage_end = shader_info.end; | |
| 60 | if (labels.empty()) { | 47 | if (shader_info.decompilable) { |
| 61 | basic_blocks.insert({main_offset, DecodeRange(main_offset, MAX_PROGRAM_LENGTH)}); | 48 | disable_flow_stack = true; |
| 49 | const auto insert_block = [this](NodeBlock& nodes, u32 label) { | ||
| 50 | if (label == static_cast<u32>(exit_branch)) { | ||
| 51 | return; | ||
| 52 | } | ||
| 53 | basic_blocks.insert({label, nodes}); | ||
| 54 | }; | ||
| 55 | const auto& blocks = shader_info.blocks; | ||
| 56 | NodeBlock current_block; | ||
| 57 | u32 current_label = static_cast<u32>(exit_branch); | ||
| 58 | for (auto& block : blocks) { | ||
| 59 | if (shader_info.labels.count(block.start) != 0) { | ||
| 60 | insert_block(current_block, current_label); | ||
| 61 | current_block.clear(); | ||
| 62 | current_label = block.start; | ||
| 63 | } | ||
| 64 | if (!block.ignore_branch) { | ||
| 65 | DecodeRangeInner(current_block, block.start, block.end); | ||
| 66 | InsertControlFlow(current_block, block); | ||
| 67 | } else { | ||
| 68 | DecodeRangeInner(current_block, block.start, block.end + 1); | ||
| 69 | } | ||
| 70 | } | ||
| 71 | insert_block(current_block, current_label); | ||
| 72 | return; | ||
| 73 | } | ||
| 74 | LOG_WARNING(HW_GPU, "Flow Stack Removing Failed! Falling back to old method"); | ||
| 75 | // we can't decompile it, fallback to standard method | ||
| 76 | for (const auto& block : shader_info.blocks) { | ||
| 77 | basic_blocks.insert({block.start, DecodeRange(block.start, block.end + 1)}); | ||
| 78 | } | ||
| 62 | return; | 79 | return; |
| 63 | } | 80 | } |
| 81 | LOG_WARNING(HW_GPU, "Flow Analysis Failed! Falling back to brute force compiling"); | ||
| 82 | |||
| 83 | // Now we need to deal with an undecompilable shader. We need to brute force | ||
| 84 | // a shader that captures every position. | ||
| 85 | coverage_begin = main_offset; | ||
| 86 | const u32 shader_end = static_cast<u32>(program_size / sizeof(u64)); | ||
| 87 | coverage_end = shader_end; | ||
| 88 | for (u32 label = main_offset; label < shader_end; label++) { | ||
| 89 | basic_blocks.insert({label, DecodeRange(label, label + 1)}); | ||
| 90 | } | ||
| 91 | } | ||
| 64 | 92 | ||
| 65 | labels.insert(main_offset); | 93 | NodeBlock ShaderIR::DecodeRange(u32 begin, u32 end) { |
| 66 | 94 | NodeBlock basic_block; | |
| 67 | for (const u32 label : labels) { | 95 | DecodeRangeInner(basic_block, begin, end); |
| 68 | const auto next_it = labels.lower_bound(label + 1); | 96 | return basic_block; |
| 69 | const u32 next_label = next_it == labels.end() ? MAX_PROGRAM_LENGTH : *next_it; | 97 | } |
| 70 | 98 | ||
| 71 | basic_blocks.insert({label, DecodeRange(label, next_label)}); | 99 | void ShaderIR::DecodeRangeInner(NodeBlock& bb, u32 begin, u32 end) { |
| 100 | for (u32 pc = begin; pc < (begin > end ? MAX_PROGRAM_LENGTH : end);) { | ||
| 101 | pc = DecodeInstr(bb, pc); | ||
| 72 | } | 102 | } |
| 73 | } | 103 | } |
| 74 | 104 | ||
| 75 | ExitMethod ShaderIR::Scan(u32 begin, u32 end, std::set<u32>& labels) { | 105 | void ShaderIR::InsertControlFlow(NodeBlock& bb, const ShaderBlock& block) { |
| 76 | const auto [iter, inserted] = | 106 | const auto apply_conditions = [&](const Condition& cond, Node n) -> Node { |
| 77 | exit_method_map.emplace(std::make_pair(begin, end), ExitMethod::Undetermined); | 107 | Node result = n; |
| 78 | ExitMethod& exit_method = iter->second; | 108 | if (cond.cc != ConditionCode::T) { |
| 79 | if (!inserted) | 109 | result = Conditional(GetConditionCode(cond.cc), {result}); |
| 80 | return exit_method; | ||
| 81 | |||
| 82 | for (u32 offset = begin; offset != end && offset != MAX_PROGRAM_LENGTH; ++offset) { | ||
| 83 | coverage_begin = std::min(coverage_begin, offset); | ||
| 84 | coverage_end = std::max(coverage_end, offset + 1); | ||
| 85 | |||
| 86 | const Instruction instr = {program_code[offset]}; | ||
| 87 | const auto opcode = OpCode::Decode(instr); | ||
| 88 | if (!opcode) | ||
| 89 | continue; | ||
| 90 | switch (opcode->get().GetId()) { | ||
| 91 | case OpCode::Id::EXIT: { | ||
| 92 | // The EXIT instruction can be predicated, which means that the shader can conditionally | ||
| 93 | // end on this instruction. We have to consider the case where the condition is not met | ||
| 94 | // and check the exit method of that other basic block. | ||
| 95 | using Tegra::Shader::Pred; | ||
| 96 | if (instr.pred.pred_index == static_cast<u64>(Pred::UnusedIndex)) { | ||
| 97 | return exit_method = ExitMethod::AlwaysEnd; | ||
| 98 | } else { | ||
| 99 | const ExitMethod not_met = Scan(offset + 1, end, labels); | ||
| 100 | return exit_method = ParallelExit(ExitMethod::AlwaysEnd, not_met); | ||
| 101 | } | ||
| 102 | } | 110 | } |
| 103 | case OpCode::Id::BRA: { | 111 | if (cond.predicate != Pred::UnusedIndex) { |
| 104 | const u32 target = offset + instr.bra.GetBranchTarget(); | 112 | u32 pred = static_cast<u32>(cond.predicate); |
| 105 | labels.insert(target); | 113 | const bool is_neg = pred > 7; |
| 106 | const ExitMethod no_jmp = Scan(offset + 1, end, labels); | 114 | if (is_neg) { |
| 107 | const ExitMethod jmp = Scan(target, end, labels); | 115 | pred -= 8; |
| 108 | return exit_method = ParallelExit(no_jmp, jmp); | 116 | } |
| 109 | } | 117 | result = Conditional(GetPredicate(pred, is_neg), {result}); |
| 110 | case OpCode::Id::SSY: | ||
| 111 | case OpCode::Id::PBK: { | ||
| 112 | // The SSY and PBK use a similar encoding as the BRA instruction. | ||
| 113 | UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, | ||
| 114 | "Constant buffer branching is not supported"); | ||
| 115 | const u32 target = offset + instr.bra.GetBranchTarget(); | ||
| 116 | labels.insert(target); | ||
| 117 | // Continue scanning for an exit method. | ||
| 118 | break; | ||
| 119 | } | 118 | } |
| 120 | default: | 119 | return result; |
| 121 | break; | 120 | }; |
| 121 | if (block.branch.address < 0) { | ||
| 122 | if (block.branch.kills) { | ||
| 123 | Node n = Operation(OperationCode::Discard); | ||
| 124 | n = apply_conditions(block.branch.cond, n); | ||
| 125 | bb.push_back(n); | ||
| 126 | global_code.push_back(n); | ||
| 127 | return; | ||
| 122 | } | 128 | } |
| 129 | Node n = Operation(OperationCode::Exit); | ||
| 130 | n = apply_conditions(block.branch.cond, n); | ||
| 131 | bb.push_back(n); | ||
| 132 | global_code.push_back(n); | ||
| 133 | return; | ||
| 123 | } | 134 | } |
| 124 | return exit_method = ExitMethod::AlwaysReturn; | 135 | Node n = Operation(OperationCode::Branch, Immediate(block.branch.address)); |
| 125 | } | 136 | n = apply_conditions(block.branch.cond, n); |
| 126 | 137 | bb.push_back(n); | |
| 127 | NodeBlock ShaderIR::DecodeRange(u32 begin, u32 end) { | 138 | global_code.push_back(n); |
| 128 | NodeBlock basic_block; | ||
| 129 | for (u32 pc = begin; pc < (begin > end ? MAX_PROGRAM_LENGTH : end);) { | ||
| 130 | pc = DecodeInstr(basic_block, pc); | ||
| 131 | } | ||
| 132 | return basic_block; | ||
| 133 | } | 139 | } |
| 134 | 140 | ||
| 135 | u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) { | 141 | u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) { |
| @@ -140,15 +146,18 @@ u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) { | |||
| 140 | 146 | ||
| 141 | const Instruction instr = {program_code[pc]}; | 147 | const Instruction instr = {program_code[pc]}; |
| 142 | const auto opcode = OpCode::Decode(instr); | 148 | const auto opcode = OpCode::Decode(instr); |
| 149 | const u32 nv_address = ConvertAddressToNvidiaSpace(pc); | ||
| 143 | 150 | ||
| 144 | // Decoding failure | 151 | // Decoding failure |
| 145 | if (!opcode) { | 152 | if (!opcode) { |
| 146 | UNIMPLEMENTED_MSG("Unhandled instruction: {0:x}", instr.value); | 153 | UNIMPLEMENTED_MSG("Unhandled instruction: {0:x}", instr.value); |
| 154 | bb.push_back(Comment(fmt::format("{:05x} Unimplemented Shader instruction (0x{:016x})", | ||
| 155 | nv_address, instr.value))); | ||
| 147 | return pc + 1; | 156 | return pc + 1; |
| 148 | } | 157 | } |
| 149 | 158 | ||
| 150 | bb.push_back( | 159 | bb.push_back(Comment( |
| 151 | Comment(fmt::format("{}: {} (0x{:016x})", pc, opcode->get().GetName(), instr.value))); | 160 | fmt::format("{:05x} {} (0x{:016x})", nv_address, opcode->get().GetName(), instr.value))); |
| 152 | 161 | ||
| 153 | using Tegra::Shader::Pred; | 162 | using Tegra::Shader::Pred; |
| 154 | UNIMPLEMENTED_IF_MSG(instr.pred.full_pred == Pred::NeverExecute, | 163 | UNIMPLEMENTED_IF_MSG(instr.pred.full_pred == Pred::NeverExecute, |
diff --git a/src/video_core/shader/decode/arithmetic.cpp b/src/video_core/shader/decode/arithmetic.cpp index 87d8fecaa..1473c282a 100644 --- a/src/video_core/shader/decode/arithmetic.cpp +++ b/src/video_core/shader/decode/arithmetic.cpp | |||
| @@ -42,11 +42,14 @@ u32 ShaderIR::DecodeArithmetic(NodeBlock& bb, u32 pc) { | |||
| 42 | case OpCode::Id::FMUL_R: | 42 | case OpCode::Id::FMUL_R: |
| 43 | case OpCode::Id::FMUL_IMM: { | 43 | case OpCode::Id::FMUL_IMM: { |
| 44 | // FMUL does not have 'abs' bits and only the second operand has a 'neg' bit. | 44 | // FMUL does not have 'abs' bits and only the second operand has a 'neg' bit. |
| 45 | UNIMPLEMENTED_IF_MSG(instr.fmul.tab5cb8_2 != 0, "FMUL tab5cb8_2({}) is not implemented", | 45 | if (instr.fmul.tab5cb8_2 != 0) { |
| 46 | instr.fmul.tab5cb8_2.Value()); | 46 | LOG_WARNING(HW_GPU, "FMUL tab5cb8_2({}) is not implemented", |
| 47 | UNIMPLEMENTED_IF_MSG( | 47 | instr.fmul.tab5cb8_2.Value()); |
| 48 | instr.fmul.tab5c68_0 != 1, "FMUL tab5cb8_0({}) is not implemented", | 48 | } |
| 49 | instr.fmul.tab5c68_0.Value()); // SMO typical sends 1 here which seems to be the default | 49 | if (instr.fmul.tab5c68_0 != 1) { |
| 50 | LOG_WARNING(HW_GPU, "FMUL tab5cb8_0({}) is not implemented", | ||
| 51 | instr.fmul.tab5c68_0.Value()); | ||
| 52 | } | ||
| 50 | 53 | ||
| 51 | op_b = GetOperandAbsNegFloat(op_b, false, instr.fmul.negate_b); | 54 | op_b = GetOperandAbsNegFloat(op_b, false, instr.fmul.negate_b); |
| 52 | 55 | ||
diff --git a/src/video_core/shader/decode/arithmetic_half_immediate.cpp b/src/video_core/shader/decode/arithmetic_half_immediate.cpp index 7bcf38f23..6466fc011 100644 --- a/src/video_core/shader/decode/arithmetic_half_immediate.cpp +++ b/src/video_core/shader/decode/arithmetic_half_immediate.cpp | |||
| @@ -23,7 +23,9 @@ u32 ShaderIR::DecodeArithmeticHalfImmediate(NodeBlock& bb, u32 pc) { | |||
| 23 | LOG_WARNING(HW_GPU, "{} FTZ not implemented", opcode->get().GetName()); | 23 | LOG_WARNING(HW_GPU, "{} FTZ not implemented", opcode->get().GetName()); |
| 24 | } | 24 | } |
| 25 | } else { | 25 | } else { |
| 26 | UNIMPLEMENTED_IF(instr.alu_half_imm.precision != Tegra::Shader::HalfPrecision::None); | 26 | if (instr.alu_half_imm.precision != Tegra::Shader::HalfPrecision::None) { |
| 27 | LOG_WARNING(HW_GPU, "{} FTZ not implemented", opcode->get().GetName()); | ||
| 28 | } | ||
| 27 | } | 29 | } |
| 28 | 30 | ||
| 29 | Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.alu_half_imm.type_a); | 31 | Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.alu_half_imm.type_a); |
diff --git a/src/video_core/shader/decode/decode_integer_set.cpp b/src/video_core/shader/decode/decode_integer_set.cpp deleted file mode 100644 index e69de29bb..000000000 --- a/src/video_core/shader/decode/decode_integer_set.cpp +++ /dev/null | |||
diff --git a/src/video_core/shader/decode/ffma.cpp b/src/video_core/shader/decode/ffma.cpp index 29be25ca3..ca2f39e8d 100644 --- a/src/video_core/shader/decode/ffma.cpp +++ b/src/video_core/shader/decode/ffma.cpp | |||
| @@ -18,10 +18,12 @@ u32 ShaderIR::DecodeFfma(NodeBlock& bb, u32 pc) { | |||
| 18 | const auto opcode = OpCode::Decode(instr); | 18 | const auto opcode = OpCode::Decode(instr); |
| 19 | 19 | ||
| 20 | UNIMPLEMENTED_IF_MSG(instr.ffma.cc != 0, "FFMA cc not implemented"); | 20 | UNIMPLEMENTED_IF_MSG(instr.ffma.cc != 0, "FFMA cc not implemented"); |
| 21 | UNIMPLEMENTED_IF_MSG(instr.ffma.tab5980_0 != 1, "FFMA tab5980_0({}) not implemented", | 21 | if (instr.ffma.tab5980_0 != 1) { |
| 22 | instr.ffma.tab5980_0.Value()); // Seems to be 1 by default based on SMO | 22 | LOG_WARNING(HW_GPU, "FFMA tab5980_0({}) not implemented", instr.ffma.tab5980_0.Value()); |
| 23 | UNIMPLEMENTED_IF_MSG(instr.ffma.tab5980_1 != 0, "FFMA tab5980_1({}) not implemented", | 23 | } |
| 24 | instr.ffma.tab5980_1.Value()); | 24 | if (instr.ffma.tab5980_1 != 0) { |
| 25 | LOG_WARNING(HW_GPU, "FFMA tab5980_1({}) not implemented", instr.ffma.tab5980_1.Value()); | ||
| 26 | } | ||
| 25 | 27 | ||
| 26 | const Node op_a = GetRegister(instr.gpr8); | 28 | const Node op_a = GetRegister(instr.gpr8); |
| 27 | 29 | ||
diff --git a/src/video_core/shader/decode/half_set_predicate.cpp b/src/video_core/shader/decode/half_set_predicate.cpp index d59d15bd8..a82a6a15c 100644 --- a/src/video_core/shader/decode/half_set_predicate.cpp +++ b/src/video_core/shader/decode/half_set_predicate.cpp | |||
| @@ -18,43 +18,56 @@ u32 ShaderIR::DecodeHalfSetPredicate(NodeBlock& bb, u32 pc) { | |||
| 18 | const Instruction instr = {program_code[pc]}; | 18 | const Instruction instr = {program_code[pc]}; |
| 19 | const auto opcode = OpCode::Decode(instr); | 19 | const auto opcode = OpCode::Decode(instr); |
| 20 | 20 | ||
| 21 | UNIMPLEMENTED_IF(instr.hsetp2.ftz != 0); | 21 | DEBUG_ASSERT(instr.hsetp2.ftz == 0); |
| 22 | 22 | ||
| 23 | Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hsetp2.type_a); | 23 | Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hsetp2.type_a); |
| 24 | op_a = GetOperandAbsNegHalf(op_a, instr.hsetp2.abs_a, instr.hsetp2.negate_a); | 24 | op_a = GetOperandAbsNegHalf(op_a, instr.hsetp2.abs_a, instr.hsetp2.negate_a); |
| 25 | 25 | ||
| 26 | Node op_b = [&]() { | 26 | Tegra::Shader::PredCondition cond{}; |
| 27 | switch (opcode->get().GetId()) { | 27 | bool h_and{}; |
| 28 | case OpCode::Id::HSETP2_R: | 28 | Node op_b{}; |
| 29 | return GetOperandAbsNegHalf(GetRegister(instr.gpr20), instr.hsetp2.abs_a, | 29 | switch (opcode->get().GetId()) { |
| 30 | instr.hsetp2.negate_b); | 30 | case OpCode::Id::HSETP2_C: |
| 31 | default: | 31 | cond = instr.hsetp2.cbuf_and_imm.cond; |
| 32 | UNREACHABLE(); | 32 | h_and = instr.hsetp2.cbuf_and_imm.h_and; |
| 33 | return Immediate(0); | 33 | op_b = GetOperandAbsNegHalf(GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset), |
| 34 | } | 34 | instr.hsetp2.cbuf.abs_b, instr.hsetp2.cbuf.negate_b); |
| 35 | }(); | 35 | break; |
| 36 | op_b = UnpackHalfFloat(op_b, instr.hsetp2.type_b); | 36 | case OpCode::Id::HSETP2_IMM: |
| 37 | 37 | cond = instr.hsetp2.cbuf_and_imm.cond; | |
| 38 | // We can't use the constant predicate as destination. | 38 | h_and = instr.hsetp2.cbuf_and_imm.h_and; |
| 39 | ASSERT(instr.hsetp2.pred3 != static_cast<u64>(Pred::UnusedIndex)); | 39 | op_b = UnpackHalfImmediate(instr, true); |
| 40 | 40 | break; | |
| 41 | const Node second_pred = GetPredicate(instr.hsetp2.pred39, instr.hsetp2.neg_pred != 0); | 41 | case OpCode::Id::HSETP2_R: |
| 42 | cond = instr.hsetp2.reg.cond; | ||
| 43 | h_and = instr.hsetp2.reg.h_and; | ||
| 44 | op_b = | ||
| 45 | UnpackHalfFloat(GetOperandAbsNegHalf(GetRegister(instr.gpr20), instr.hsetp2.reg.abs_b, | ||
| 46 | instr.hsetp2.reg.negate_b), | ||
| 47 | instr.hsetp2.reg.type_b); | ||
| 48 | break; | ||
| 49 | default: | ||
| 50 | UNREACHABLE(); | ||
| 51 | op_b = Immediate(0); | ||
| 52 | } | ||
| 42 | 53 | ||
| 43 | const OperationCode combiner = GetPredicateCombiner(instr.hsetp2.op); | 54 | const OperationCode combiner = GetPredicateCombiner(instr.hsetp2.op); |
| 44 | const OperationCode pair_combiner = | 55 | const Node pred39 = GetPredicate(instr.hsetp2.pred39, instr.hsetp2.neg_pred); |
| 45 | instr.hsetp2.h_and ? OperationCode::LogicalAll2 : OperationCode::LogicalAny2; | ||
| 46 | |||
| 47 | const Node comparison = GetPredicateComparisonHalf(instr.hsetp2.cond, op_a, op_b); | ||
| 48 | const Node first_pred = Operation(pair_combiner, comparison); | ||
| 49 | 56 | ||
| 50 | // Set the primary predicate to the result of Predicate OP SecondPredicate | 57 | const auto Write = [&](u64 dest, Node src) { |
| 51 | const Node value = Operation(combiner, first_pred, second_pred); | 58 | SetPredicate(bb, dest, Operation(combiner, std::move(src), pred39)); |
| 52 | SetPredicate(bb, instr.hsetp2.pred3, value); | 59 | }; |
| 53 | 60 | ||
| 54 | if (instr.hsetp2.pred0 != static_cast<u64>(Pred::UnusedIndex)) { | 61 | const Node comparison = GetPredicateComparisonHalf(cond, op_a, op_b); |
| 55 | // Set the secondary predicate to the result of !Predicate OP SecondPredicate, if enabled | 62 | const u64 first = instr.hsetp2.pred0; |
| 56 | const Node negated_pred = Operation(OperationCode::LogicalNegate, first_pred); | 63 | const u64 second = instr.hsetp2.pred3; |
| 57 | SetPredicate(bb, instr.hsetp2.pred0, Operation(combiner, negated_pred, second_pred)); | 64 | if (h_and) { |
| 65 | const Node joined = Operation(OperationCode::LogicalAnd2, comparison); | ||
| 66 | Write(first, joined); | ||
| 67 | Write(second, Operation(OperationCode::LogicalNegate, joined)); | ||
| 68 | } else { | ||
| 69 | Write(first, Operation(OperationCode::LogicalPick2, comparison, Immediate(0u))); | ||
| 70 | Write(second, Operation(OperationCode::LogicalPick2, comparison, Immediate(1u))); | ||
| 58 | } | 71 | } |
| 59 | 72 | ||
| 60 | return pc; | 73 | return pc; |
diff --git a/src/video_core/shader/decode/hfma2.cpp b/src/video_core/shader/decode/hfma2.cpp index c3bcf1ae9..5b44cb79c 100644 --- a/src/video_core/shader/decode/hfma2.cpp +++ b/src/video_core/shader/decode/hfma2.cpp | |||
| @@ -22,9 +22,9 @@ u32 ShaderIR::DecodeHfma2(NodeBlock& bb, u32 pc) { | |||
| 22 | const auto opcode = OpCode::Decode(instr); | 22 | const auto opcode = OpCode::Decode(instr); |
| 23 | 23 | ||
| 24 | if (opcode->get().GetId() == OpCode::Id::HFMA2_RR) { | 24 | if (opcode->get().GetId() == OpCode::Id::HFMA2_RR) { |
| 25 | UNIMPLEMENTED_IF(instr.hfma2.rr.precision != HalfPrecision::None); | 25 | DEBUG_ASSERT(instr.hfma2.rr.precision == HalfPrecision::None); |
| 26 | } else { | 26 | } else { |
| 27 | UNIMPLEMENTED_IF(instr.hfma2.precision != HalfPrecision::None); | 27 | DEBUG_ASSERT(instr.hfma2.precision == HalfPrecision::None); |
| 28 | } | 28 | } |
| 29 | 29 | ||
| 30 | constexpr auto identity = HalfType::H0_H1; | 30 | constexpr auto identity = HalfType::H0_H1; |
diff --git a/src/video_core/shader/decode/image.cpp b/src/video_core/shader/decode/image.cpp index 24f022cc0..77151a24b 100644 --- a/src/video_core/shader/decode/image.cpp +++ b/src/video_core/shader/decode/image.cpp | |||
| @@ -95,12 +95,8 @@ const Image& ShaderIR::GetImage(Tegra::Shader::Image image, Tegra::Shader::Image | |||
| 95 | const Image& ShaderIR::GetBindlessImage(Tegra::Shader::Register reg, | 95 | const Image& ShaderIR::GetBindlessImage(Tegra::Shader::Register reg, |
| 96 | Tegra::Shader::ImageType type) { | 96 | Tegra::Shader::ImageType type) { |
| 97 | const Node image_register{GetRegister(reg)}; | 97 | const Node image_register{GetRegister(reg)}; |
| 98 | const Node base_image{ | 98 | const auto [base_image, cbuf_index, cbuf_offset]{ |
| 99 | TrackCbuf(image_register, global_code, static_cast<s64>(global_code.size()))}; | 99 | TrackCbuf(image_register, global_code, static_cast<s64>(global_code.size()))}; |
| 100 | const auto cbuf{std::get_if<CbufNode>(&*base_image)}; | ||
| 101 | const auto cbuf_offset_imm{std::get_if<ImmediateNode>(&*cbuf->GetOffset())}; | ||
| 102 | const auto cbuf_offset{cbuf_offset_imm->GetValue()}; | ||
| 103 | const auto cbuf_index{cbuf->GetIndex()}; | ||
| 104 | const auto cbuf_key{(static_cast<u64>(cbuf_index) << 32) | static_cast<u64>(cbuf_offset)}; | 100 | const auto cbuf_key{(static_cast<u64>(cbuf_index) << 32) | static_cast<u64>(cbuf_offset)}; |
| 105 | 101 | ||
| 106 | // If this image has already been used, return the existing mapping. | 102 | // If this image has already been used, return the existing mapping. |
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp index 80fc0ccfc..ed108bea8 100644 --- a/src/video_core/shader/decode/memory.cpp +++ b/src/video_core/shader/decode/memory.cpp | |||
| @@ -95,10 +95,10 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { | |||
| 95 | const Node op_b = | 95 | const Node op_b = |
| 96 | GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.GetOffset() + 4, index); | 96 | GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.GetOffset() + 4, index); |
| 97 | 97 | ||
| 98 | SetTemporal(bb, 0, op_a); | 98 | SetTemporary(bb, 0, op_a); |
| 99 | SetTemporal(bb, 1, op_b); | 99 | SetTemporary(bb, 1, op_b); |
| 100 | SetRegister(bb, instr.gpr0, GetTemporal(0)); | 100 | SetRegister(bb, instr.gpr0, GetTemporary(0)); |
| 101 | SetRegister(bb, instr.gpr0.Value() + 1, GetTemporal(1)); | 101 | SetRegister(bb, instr.gpr0.Value() + 1, GetTemporary(1)); |
| 102 | break; | 102 | break; |
| 103 | } | 103 | } |
| 104 | default: | 104 | default: |
| @@ -136,9 +136,9 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { | |||
| 136 | } | 136 | } |
| 137 | }(); | 137 | }(); |
| 138 | for (u32 i = 0; i < count; ++i) | 138 | for (u32 i = 0; i < count; ++i) |
| 139 | SetTemporal(bb, i, GetLmem(i * 4)); | 139 | SetTemporary(bb, i, GetLmem(i * 4)); |
| 140 | for (u32 i = 0; i < count; ++i) | 140 | for (u32 i = 0; i < count; ++i) |
| 141 | SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i)); | 141 | SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); |
| 142 | break; | 142 | break; |
| 143 | } | 143 | } |
| 144 | default: | 144 | default: |
| @@ -172,10 +172,10 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { | |||
| 172 | Operation(OperationCode::UAdd, NO_PRECISE, real_address_base, it_offset); | 172 | Operation(OperationCode::UAdd, NO_PRECISE, real_address_base, it_offset); |
| 173 | const Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); | 173 | const Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); |
| 174 | 174 | ||
| 175 | SetTemporal(bb, i, gmem); | 175 | SetTemporary(bb, i, gmem); |
| 176 | } | 176 | } |
| 177 | for (u32 i = 0; i < count; ++i) { | 177 | for (u32 i = 0; i < count; ++i) { |
| 178 | SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i)); | 178 | SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); |
| 179 | } | 179 | } |
| 180 | break; | 180 | break; |
| 181 | } | 181 | } |
| @@ -253,11 +253,11 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { | |||
| 253 | TrackAndGetGlobalMemory(bb, instr, true); | 253 | TrackAndGetGlobalMemory(bb, instr, true); |
| 254 | 254 | ||
| 255 | // Encode in temporary registers like this: real_base_address, {registers_to_be_written...} | 255 | // Encode in temporary registers like this: real_base_address, {registers_to_be_written...} |
| 256 | SetTemporal(bb, 0, real_address_base); | 256 | SetTemporary(bb, 0, real_address_base); |
| 257 | 257 | ||
| 258 | const u32 count = GetUniformTypeElementsCount(type); | 258 | const u32 count = GetUniformTypeElementsCount(type); |
| 259 | for (u32 i = 0; i < count; ++i) { | 259 | for (u32 i = 0; i < count; ++i) { |
| 260 | SetTemporal(bb, i + 1, GetRegister(instr.gpr0.Value() + i)); | 260 | SetTemporary(bb, i + 1, GetRegister(instr.gpr0.Value() + i)); |
| 261 | } | 261 | } |
| 262 | for (u32 i = 0; i < count; ++i) { | 262 | for (u32 i = 0; i < count; ++i) { |
| 263 | const Node it_offset = Immediate(i * 4); | 263 | const Node it_offset = Immediate(i * 4); |
| @@ -265,7 +265,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { | |||
| 265 | Operation(OperationCode::UAdd, NO_PRECISE, real_address_base, it_offset); | 265 | Operation(OperationCode::UAdd, NO_PRECISE, real_address_base, it_offset); |
| 266 | const Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); | 266 | const Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); |
| 267 | 267 | ||
| 268 | bb.push_back(Operation(OperationCode::Assign, gmem, GetTemporal(i + 1))); | 268 | bb.push_back(Operation(OperationCode::Assign, gmem, GetTemporary(i + 1))); |
| 269 | } | 269 | } |
| 270 | break; | 270 | break; |
| 271 | } | 271 | } |
| @@ -297,18 +297,13 @@ std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackAndGetGlobalMemory(NodeB | |||
| 297 | const auto addr_register{GetRegister(instr.gmem.gpr)}; | 297 | const auto addr_register{GetRegister(instr.gmem.gpr)}; |
| 298 | const auto immediate_offset{static_cast<u32>(instr.gmem.offset)}; | 298 | const auto immediate_offset{static_cast<u32>(instr.gmem.offset)}; |
| 299 | 299 | ||
| 300 | const Node base_address{ | 300 | const auto [base_address, index, offset] = |
| 301 | TrackCbuf(addr_register, global_code, static_cast<s64>(global_code.size()))}; | 301 | TrackCbuf(addr_register, global_code, static_cast<s64>(global_code.size())); |
| 302 | const auto cbuf = std::get_if<CbufNode>(&*base_address); | 302 | ASSERT(base_address != nullptr); |
| 303 | ASSERT(cbuf != nullptr); | ||
| 304 | const auto cbuf_offset_imm = std::get_if<ImmediateNode>(&*cbuf->GetOffset()); | ||
| 305 | ASSERT(cbuf_offset_imm != nullptr); | ||
| 306 | const auto cbuf_offset = cbuf_offset_imm->GetValue(); | ||
| 307 | 303 | ||
| 308 | bb.push_back( | 304 | bb.push_back(Comment(fmt::format("Base address is c[0x{:x}][0x{:x}]", index, offset))); |
| 309 | Comment(fmt::format("Base address is c[0x{:x}][0x{:x}]", cbuf->GetIndex(), cbuf_offset))); | ||
| 310 | 305 | ||
| 311 | const GlobalMemoryBase descriptor{cbuf->GetIndex(), cbuf_offset}; | 306 | const GlobalMemoryBase descriptor{index, offset}; |
| 312 | const auto& [entry, is_new] = used_global_memory.try_emplace(descriptor); | 307 | const auto& [entry, is_new] = used_global_memory.try_emplace(descriptor); |
| 313 | auto& usage = entry->second; | 308 | auto& usage = entry->second; |
| 314 | if (is_write) { | 309 | if (is_write) { |
diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp index d46a8ab82..c0f64d7a0 100644 --- a/src/video_core/shader/decode/other.cpp +++ b/src/video_core/shader/decode/other.cpp | |||
| @@ -91,11 +91,46 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { | |||
| 91 | break; | 91 | break; |
| 92 | } | 92 | } |
| 93 | case OpCode::Id::BRA: { | 93 | case OpCode::Id::BRA: { |
| 94 | UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, | 94 | Node branch; |
| 95 | "BRA with constant buffers are not implemented"); | 95 | if (instr.bra.constant_buffer == 0) { |
| 96 | const u32 target = pc + instr.bra.GetBranchTarget(); | ||
| 97 | branch = Operation(OperationCode::Branch, Immediate(target)); | ||
| 98 | } else { | ||
| 99 | const u32 target = pc + 1; | ||
| 100 | const Node op_a = GetConstBuffer(instr.cbuf36.index, instr.cbuf36.GetOffset()); | ||
| 101 | const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, true, | ||
| 102 | PRECISE, op_a, Immediate(3)); | ||
| 103 | const Node operand = | ||
| 104 | Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target)); | ||
| 105 | branch = Operation(OperationCode::BranchIndirect, operand); | ||
| 106 | } | ||
| 96 | 107 | ||
| 97 | const u32 target = pc + instr.bra.GetBranchTarget(); | 108 | const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; |
| 98 | const Node branch = Operation(OperationCode::Branch, Immediate(target)); | 109 | if (cc != Tegra::Shader::ConditionCode::T) { |
| 110 | bb.push_back(Conditional(GetConditionCode(cc), {branch})); | ||
| 111 | } else { | ||
| 112 | bb.push_back(branch); | ||
| 113 | } | ||
| 114 | break; | ||
| 115 | } | ||
| 116 | case OpCode::Id::BRX: { | ||
| 117 | Node operand; | ||
| 118 | if (instr.brx.constant_buffer != 0) { | ||
| 119 | const s32 target = pc + 1; | ||
| 120 | const Node index = GetRegister(instr.gpr8); | ||
| 121 | const Node op_a = | ||
| 122 | GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.GetOffset() + 0, index); | ||
| 123 | const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, true, | ||
| 124 | PRECISE, op_a, Immediate(3)); | ||
| 125 | operand = Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target)); | ||
| 126 | } else { | ||
| 127 | const s32 target = pc + instr.brx.GetBranchExtend(); | ||
| 128 | const Node op_a = GetRegister(instr.gpr8); | ||
| 129 | const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, true, | ||
| 130 | PRECISE, op_a, Immediate(3)); | ||
| 131 | operand = Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target)); | ||
| 132 | } | ||
| 133 | const Node branch = Operation(OperationCode::BranchIndirect, operand); | ||
| 99 | 134 | ||
| 100 | const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; | 135 | const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; |
| 101 | if (cc != Tegra::Shader::ConditionCode::T) { | 136 | if (cc != Tegra::Shader::ConditionCode::T) { |
| @@ -109,6 +144,10 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { | |||
| 109 | UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, | 144 | UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, |
| 110 | "Constant buffer flow is not supported"); | 145 | "Constant buffer flow is not supported"); |
| 111 | 146 | ||
| 147 | if (disable_flow_stack) { | ||
| 148 | break; | ||
| 149 | } | ||
| 150 | |||
| 112 | // The SSY opcode tells the GPU where to re-converge divergent execution paths with SYNC. | 151 | // The SSY opcode tells the GPU where to re-converge divergent execution paths with SYNC. |
| 113 | const u32 target = pc + instr.bra.GetBranchTarget(); | 152 | const u32 target = pc + instr.bra.GetBranchTarget(); |
| 114 | bb.push_back( | 153 | bb.push_back( |
| @@ -119,6 +158,10 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { | |||
| 119 | UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, | 158 | UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, |
| 120 | "Constant buffer PBK is not supported"); | 159 | "Constant buffer PBK is not supported"); |
| 121 | 160 | ||
| 161 | if (disable_flow_stack) { | ||
| 162 | break; | ||
| 163 | } | ||
| 164 | |||
| 122 | // PBK pushes to a stack the address where BRK will jump to. | 165 | // PBK pushes to a stack the address where BRK will jump to. |
| 123 | const u32 target = pc + instr.bra.GetBranchTarget(); | 166 | const u32 target = pc + instr.bra.GetBranchTarget(); |
| 124 | bb.push_back( | 167 | bb.push_back( |
| @@ -130,6 +173,10 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { | |||
| 130 | UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "SYNC condition code used: {}", | 173 | UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "SYNC condition code used: {}", |
| 131 | static_cast<u32>(cc)); | 174 | static_cast<u32>(cc)); |
| 132 | 175 | ||
| 176 | if (disable_flow_stack) { | ||
| 177 | break; | ||
| 178 | } | ||
| 179 | |||
| 133 | // The SYNC opcode jumps to the address previously set by the SSY opcode | 180 | // The SYNC opcode jumps to the address previously set by the SSY opcode |
| 134 | bb.push_back(Operation(OperationCode::PopFlowStack, MetaStackClass::Ssy)); | 181 | bb.push_back(Operation(OperationCode::PopFlowStack, MetaStackClass::Ssy)); |
| 135 | break; | 182 | break; |
| @@ -138,6 +185,9 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { | |||
| 138 | const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; | 185 | const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; |
| 139 | UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "BRK condition code used: {}", | 186 | UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "BRK condition code used: {}", |
| 140 | static_cast<u32>(cc)); | 187 | static_cast<u32>(cc)); |
| 188 | if (disable_flow_stack) { | ||
| 189 | break; | ||
| 190 | } | ||
| 141 | 191 | ||
| 142 | // The BRK opcode jumps to the address previously set by the PBK opcode | 192 | // The BRK opcode jumps to the address previously set by the PBK opcode |
| 143 | bb.push_back(Operation(OperationCode::PopFlowStack, MetaStackClass::Pbk)); | 193 | bb.push_back(Operation(OperationCode::PopFlowStack, MetaStackClass::Pbk)); |
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp index cb480be9b..0b934a069 100644 --- a/src/video_core/shader/decode/texture.cpp +++ b/src/video_core/shader/decode/texture.cpp | |||
| @@ -181,10 +181,10 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { | |||
| 181 | const Node value = | 181 | const Node value = |
| 182 | Operation(OperationCode::TextureQueryDimensions, meta, | 182 | Operation(OperationCode::TextureQueryDimensions, meta, |
| 183 | GetRegister(instr.gpr8.Value() + (is_bindless ? 1 : 0))); | 183 | GetRegister(instr.gpr8.Value() + (is_bindless ? 1 : 0))); |
| 184 | SetTemporal(bb, indexer++, value); | 184 | SetTemporary(bb, indexer++, value); |
| 185 | } | 185 | } |
| 186 | for (u32 i = 0; i < indexer; ++i) { | 186 | for (u32 i = 0; i < indexer; ++i) { |
| 187 | SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i)); | 187 | SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); |
| 188 | } | 188 | } |
| 189 | break; | 189 | break; |
| 190 | } | 190 | } |
| @@ -238,10 +238,10 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { | |||
| 238 | auto params = coords; | 238 | auto params = coords; |
| 239 | MetaTexture meta{sampler, {}, {}, {}, {}, {}, {}, element}; | 239 | MetaTexture meta{sampler, {}, {}, {}, {}, {}, {}, element}; |
| 240 | const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params)); | 240 | const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params)); |
| 241 | SetTemporal(bb, indexer++, value); | 241 | SetTemporary(bb, indexer++, value); |
| 242 | } | 242 | } |
| 243 | for (u32 i = 0; i < indexer; ++i) { | 243 | for (u32 i = 0; i < indexer; ++i) { |
| 244 | SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i)); | 244 | SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); |
| 245 | } | 245 | } |
| 246 | break; | 246 | break; |
| 247 | } | 247 | } |
| @@ -269,7 +269,13 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { | |||
| 269 | LOG_WARNING(HW_GPU, "TLDS.NODEP implementation is incomplete"); | 269 | LOG_WARNING(HW_GPU, "TLDS.NODEP implementation is incomplete"); |
| 270 | } | 270 | } |
| 271 | 271 | ||
| 272 | WriteTexsInstructionFloat(bb, instr, GetTldsCode(instr, texture_type, is_array)); | 272 | const Node4 components = GetTldsCode(instr, texture_type, is_array); |
| 273 | |||
| 274 | if (instr.tlds.fp32_flag) { | ||
| 275 | WriteTexsInstructionFloat(bb, instr, components); | ||
| 276 | } else { | ||
| 277 | WriteTexsInstructionHalfFloat(bb, instr, components); | ||
| 278 | } | ||
| 273 | break; | 279 | break; |
| 274 | } | 280 | } |
| 275 | default: | 281 | default: |
| @@ -302,13 +308,9 @@ const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, Textu | |||
| 302 | const Sampler& ShaderIR::GetBindlessSampler(const Tegra::Shader::Register& reg, TextureType type, | 308 | const Sampler& ShaderIR::GetBindlessSampler(const Tegra::Shader::Register& reg, TextureType type, |
| 303 | bool is_array, bool is_shadow) { | 309 | bool is_array, bool is_shadow) { |
| 304 | const Node sampler_register = GetRegister(reg); | 310 | const Node sampler_register = GetRegister(reg); |
| 305 | const Node base_sampler = | 311 | const auto [base_sampler, cbuf_index, cbuf_offset] = |
| 306 | TrackCbuf(sampler_register, global_code, static_cast<s64>(global_code.size())); | 312 | TrackCbuf(sampler_register, global_code, static_cast<s64>(global_code.size())); |
| 307 | const auto cbuf = std::get_if<CbufNode>(&*base_sampler); | 313 | ASSERT(base_sampler != nullptr); |
| 308 | const auto cbuf_offset_imm = std::get_if<ImmediateNode>(&*cbuf->GetOffset()); | ||
| 309 | ASSERT(cbuf_offset_imm != nullptr); | ||
| 310 | const auto cbuf_offset = cbuf_offset_imm->GetValue(); | ||
| 311 | const auto cbuf_index = cbuf->GetIndex(); | ||
| 312 | const auto cbuf_key = (static_cast<u64>(cbuf_index) << 32) | static_cast<u64>(cbuf_offset); | 314 | const auto cbuf_key = (static_cast<u64>(cbuf_index) << 32) | static_cast<u64>(cbuf_offset); |
| 313 | 315 | ||
| 314 | // If this sampler has already been used, return the existing mapping. | 316 | // If this sampler has already been used, return the existing mapping. |
| @@ -334,11 +336,11 @@ void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const | |||
| 334 | // Skip disabled components | 336 | // Skip disabled components |
| 335 | continue; | 337 | continue; |
| 336 | } | 338 | } |
| 337 | SetTemporal(bb, dest_elem++, components[elem]); | 339 | SetTemporary(bb, dest_elem++, components[elem]); |
| 338 | } | 340 | } |
| 339 | // After writing values in temporals, move them to the real registers | 341 | // After writing values in temporals, move them to the real registers |
| 340 | for (u32 i = 0; i < dest_elem; ++i) { | 342 | for (u32 i = 0; i < dest_elem; ++i) { |
| 341 | SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i)); | 343 | SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); |
| 342 | } | 344 | } |
| 343 | } | 345 | } |
| 344 | 346 | ||
| @@ -351,17 +353,17 @@ void ShaderIR::WriteTexsInstructionFloat(NodeBlock& bb, Instruction instr, | |||
| 351 | for (u32 component = 0; component < 4; ++component) { | 353 | for (u32 component = 0; component < 4; ++component) { |
| 352 | if (!instr.texs.IsComponentEnabled(component)) | 354 | if (!instr.texs.IsComponentEnabled(component)) |
| 353 | continue; | 355 | continue; |
| 354 | SetTemporal(bb, dest_elem++, components[component]); | 356 | SetTemporary(bb, dest_elem++, components[component]); |
| 355 | } | 357 | } |
| 356 | 358 | ||
| 357 | for (u32 i = 0; i < dest_elem; ++i) { | 359 | for (u32 i = 0; i < dest_elem; ++i) { |
| 358 | if (i < 2) { | 360 | if (i < 2) { |
| 359 | // Write the first two swizzle components to gpr0 and gpr0+1 | 361 | // Write the first two swizzle components to gpr0 and gpr0+1 |
| 360 | SetRegister(bb, instr.gpr0.Value() + i % 2, GetTemporal(i)); | 362 | SetRegister(bb, instr.gpr0.Value() + i % 2, GetTemporary(i)); |
| 361 | } else { | 363 | } else { |
| 362 | ASSERT(instr.texs.HasTwoDestinations()); | 364 | ASSERT(instr.texs.HasTwoDestinations()); |
| 363 | // Write the rest of the swizzle components to gpr28 and gpr28+1 | 365 | // Write the rest of the swizzle components to gpr28 and gpr28+1 |
| 364 | SetRegister(bb, instr.gpr28.Value() + i % 2, GetTemporal(i)); | 366 | SetRegister(bb, instr.gpr28.Value() + i % 2, GetTemporary(i)); |
| 365 | } | 367 | } |
| 366 | } | 368 | } |
| 367 | } | 369 | } |
| @@ -389,11 +391,11 @@ void ShaderIR::WriteTexsInstructionHalfFloat(NodeBlock& bb, Instruction instr, | |||
| 389 | return; | 391 | return; |
| 390 | } | 392 | } |
| 391 | 393 | ||
| 392 | SetTemporal(bb, 0, first_value); | 394 | SetTemporary(bb, 0, first_value); |
| 393 | SetTemporal(bb, 1, Operation(OperationCode::HPack2, values[2], values[3])); | 395 | SetTemporary(bb, 1, Operation(OperationCode::HPack2, values[2], values[3])); |
| 394 | 396 | ||
| 395 | SetRegister(bb, instr.gpr0, GetTemporal(0)); | 397 | SetRegister(bb, instr.gpr0, GetTemporary(0)); |
| 396 | SetRegister(bb, instr.gpr28, GetTemporal(1)); | 398 | SetRegister(bb, instr.gpr28, GetTemporary(1)); |
| 397 | } | 399 | } |
| 398 | 400 | ||
| 399 | Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, | 401 | Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, |
diff --git a/src/video_core/shader/decode/xmad.cpp b/src/video_core/shader/decode/xmad.cpp index 93dee77d1..206961909 100644 --- a/src/video_core/shader/decode/xmad.cpp +++ b/src/video_core/shader/decode/xmad.cpp | |||
| @@ -73,8 +73,8 @@ u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) { | |||
| 73 | if (is_psl) { | 73 | if (is_psl) { |
| 74 | product = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, product, Immediate(16)); | 74 | product = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, product, Immediate(16)); |
| 75 | } | 75 | } |
| 76 | SetTemporal(bb, 0, product); | 76 | SetTemporary(bb, 0, product); |
| 77 | product = GetTemporal(0); | 77 | product = GetTemporary(0); |
| 78 | 78 | ||
| 79 | const Node original_c = op_c; | 79 | const Node original_c = op_c; |
| 80 | const Tegra::Shader::XmadMode set_mode = mode; // Workaround to clang compile error | 80 | const Tegra::Shader::XmadMode set_mode = mode; // Workaround to clang compile error |
| @@ -98,13 +98,13 @@ u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) { | |||
| 98 | } | 98 | } |
| 99 | }(); | 99 | }(); |
| 100 | 100 | ||
| 101 | SetTemporal(bb, 1, op_c); | 101 | SetTemporary(bb, 1, op_c); |
| 102 | op_c = GetTemporal(1); | 102 | op_c = GetTemporary(1); |
| 103 | 103 | ||
| 104 | // TODO(Rodrigo): Use an appropiate sign for this operation | 104 | // TODO(Rodrigo): Use an appropiate sign for this operation |
| 105 | Node sum = Operation(OperationCode::IAdd, product, op_c); | 105 | Node sum = Operation(OperationCode::IAdd, product, op_c); |
| 106 | SetTemporal(bb, 2, sum); | 106 | SetTemporary(bb, 2, sum); |
| 107 | sum = GetTemporal(2); | 107 | sum = GetTemporary(2); |
| 108 | if (is_merge) { | 108 | if (is_merge) { |
| 109 | const Node a = BitfieldExtract(sum, 0, 16); | 109 | const Node a = BitfieldExtract(sum, 0, 16); |
| 110 | const Node b = | 110 | const Node b = |
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h index 0ac83fcf0..715184d67 100644 --- a/src/video_core/shader/node.h +++ b/src/video_core/shader/node.h | |||
| @@ -101,8 +101,7 @@ enum class OperationCode { | |||
| 101 | LogicalXor, /// (bool a, bool b) -> bool | 101 | LogicalXor, /// (bool a, bool b) -> bool |
| 102 | LogicalNegate, /// (bool a) -> bool | 102 | LogicalNegate, /// (bool a) -> bool |
| 103 | LogicalPick2, /// (bool2 pair, uint index) -> bool | 103 | LogicalPick2, /// (bool2 pair, uint index) -> bool |
| 104 | LogicalAll2, /// (bool2 a) -> bool | 104 | LogicalAnd2, /// (bool2 a) -> bool |
| 105 | LogicalAny2, /// (bool2 a) -> bool | ||
| 106 | 105 | ||
| 107 | LogicalFLessThan, /// (float a, float b) -> bool | 106 | LogicalFLessThan, /// (float a, float b) -> bool |
| 108 | LogicalFEqual, /// (float a, float b) -> bool | 107 | LogicalFEqual, /// (float a, float b) -> bool |
| @@ -148,11 +147,12 @@ enum class OperationCode { | |||
| 148 | 147 | ||
| 149 | ImageStore, /// (MetaImage, float[N] coords) -> void | 148 | ImageStore, /// (MetaImage, float[N] coords) -> void |
| 150 | 149 | ||
| 151 | Branch, /// (uint branch_target) -> void | 150 | Branch, /// (uint branch_target) -> void |
| 152 | PushFlowStack, /// (uint branch_target) -> void | 151 | BranchIndirect, /// (uint branch_target) -> void |
| 153 | PopFlowStack, /// () -> void | 152 | PushFlowStack, /// (uint branch_target) -> void |
| 154 | Exit, /// () -> void | 153 | PopFlowStack, /// () -> void |
| 155 | Discard, /// () -> void | 154 | Exit, /// () -> void |
| 155 | Discard, /// () -> void | ||
| 156 | 156 | ||
| 157 | EmitVertex, /// () -> void | 157 | EmitVertex, /// () -> void |
| 158 | EndPrimitive, /// () -> void | 158 | EndPrimitive, /// () -> void |
diff --git a/src/video_core/shader/node_helper.cpp b/src/video_core/shader/node_helper.cpp index 6fccbbba3..b3dcd291c 100644 --- a/src/video_core/shader/node_helper.cpp +++ b/src/video_core/shader/node_helper.cpp | |||
| @@ -12,7 +12,7 @@ | |||
| 12 | namespace VideoCommon::Shader { | 12 | namespace VideoCommon::Shader { |
| 13 | 13 | ||
| 14 | Node Conditional(Node condition, std::vector<Node> code) { | 14 | Node Conditional(Node condition, std::vector<Node> code) { |
| 15 | return MakeNode<ConditionalNode>(condition, std::move(code)); | 15 | return MakeNode<ConditionalNode>(std::move(condition), std::move(code)); |
| 16 | } | 16 | } |
| 17 | 17 | ||
| 18 | Node Comment(std::string text) { | 18 | Node Comment(std::string text) { |
diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp index 11b545cca..5e91fe129 100644 --- a/src/video_core/shader/shader_ir.cpp +++ b/src/video_core/shader/shader_ir.cpp | |||
| @@ -22,8 +22,8 @@ using Tegra::Shader::PredCondition; | |||
| 22 | using Tegra::Shader::PredOperation; | 22 | using Tegra::Shader::PredOperation; |
| 23 | using Tegra::Shader::Register; | 23 | using Tegra::Shader::Register; |
| 24 | 24 | ||
| 25 | ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset) | 25 | ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset, const std::size_t size) |
| 26 | : program_code{program_code}, main_offset{main_offset} { | 26 | : program_code{program_code}, main_offset{main_offset}, program_size{size} { |
| 27 | Decode(); | 27 | Decode(); |
| 28 | } | 28 | } |
| 29 | 29 | ||
| @@ -61,8 +61,17 @@ Node ShaderIR::GetConstBufferIndirect(u64 index_, u64 offset_, Node node) { | |||
| 61 | const auto [entry, is_new] = used_cbufs.try_emplace(index); | 61 | const auto [entry, is_new] = used_cbufs.try_emplace(index); |
| 62 | entry->second.MarkAsUsedIndirect(); | 62 | entry->second.MarkAsUsedIndirect(); |
| 63 | 63 | ||
| 64 | const Node final_offset = Operation(OperationCode::UAdd, NO_PRECISE, node, Immediate(offset)); | 64 | Node final_offset = [&] { |
| 65 | return MakeNode<CbufNode>(index, final_offset); | 65 | // Attempt to inline constant buffer without a variable offset. This is done to allow |
| 66 | // tracking LDC calls. | ||
| 67 | if (const auto gpr = std::get_if<GprNode>(&*node)) { | ||
| 68 | if (gpr->GetIndex() == Register::ZeroIndex) { | ||
| 69 | return Immediate(offset); | ||
| 70 | } | ||
| 71 | } | ||
| 72 | return Operation(OperationCode::UAdd, NO_PRECISE, std::move(node), Immediate(offset)); | ||
| 73 | }(); | ||
| 74 | return MakeNode<CbufNode>(index, std::move(final_offset)); | ||
| 66 | } | 75 | } |
| 67 | 76 | ||
| 68 | Node ShaderIR::GetPredicate(u64 pred_, bool negated) { | 77 | Node ShaderIR::GetPredicate(u64 pred_, bool negated) { |
| @@ -80,7 +89,7 @@ Node ShaderIR::GetPredicate(bool immediate) { | |||
| 80 | 89 | ||
| 81 | Node ShaderIR::GetInputAttribute(Attribute::Index index, u64 element, Node buffer) { | 90 | Node ShaderIR::GetInputAttribute(Attribute::Index index, u64 element, Node buffer) { |
| 82 | used_input_attributes.emplace(index); | 91 | used_input_attributes.emplace(index); |
| 83 | return MakeNode<AbufNode>(index, static_cast<u32>(element), buffer); | 92 | return MakeNode<AbufNode>(index, static_cast<u32>(element), std::move(buffer)); |
| 84 | } | 93 | } |
| 85 | 94 | ||
| 86 | Node ShaderIR::GetPhysicalInputAttribute(Tegra::Shader::Register physical_address, Node buffer) { | 95 | Node ShaderIR::GetPhysicalInputAttribute(Tegra::Shader::Register physical_address, Node buffer) { |
| @@ -89,6 +98,22 @@ Node ShaderIR::GetPhysicalInputAttribute(Tegra::Shader::Register physical_addres | |||
| 89 | } | 98 | } |
| 90 | 99 | ||
| 91 | Node ShaderIR::GetOutputAttribute(Attribute::Index index, u64 element, Node buffer) { | 100 | Node ShaderIR::GetOutputAttribute(Attribute::Index index, u64 element, Node buffer) { |
| 101 | if (index == Attribute::Index::LayerViewportPointSize) { | ||
| 102 | switch (element) { | ||
| 103 | case 0: | ||
| 104 | UNIMPLEMENTED(); | ||
| 105 | break; | ||
| 106 | case 1: | ||
| 107 | uses_layer = true; | ||
| 108 | break; | ||
| 109 | case 2: | ||
| 110 | uses_viewport_index = true; | ||
| 111 | break; | ||
| 112 | case 3: | ||
| 113 | uses_point_size = true; | ||
| 114 | break; | ||
| 115 | } | ||
| 116 | } | ||
| 92 | if (index == Attribute::Index::ClipDistances0123 || | 117 | if (index == Attribute::Index::ClipDistances0123 || |
| 93 | index == Attribute::Index::ClipDistances4567) { | 118 | index == Attribute::Index::ClipDistances4567) { |
| 94 | const auto clip_index = | 119 | const auto clip_index = |
| @@ -97,7 +122,7 @@ Node ShaderIR::GetOutputAttribute(Attribute::Index index, u64 element, Node buff | |||
| 97 | } | 122 | } |
| 98 | used_output_attributes.insert(index); | 123 | used_output_attributes.insert(index); |
| 99 | 124 | ||
| 100 | return MakeNode<AbufNode>(index, static_cast<u32>(element), buffer); | 125 | return MakeNode<AbufNode>(index, static_cast<u32>(element), std::move(buffer)); |
| 101 | } | 126 | } |
| 102 | 127 | ||
| 103 | Node ShaderIR::GetInternalFlag(InternalFlag flag, bool negated) { | 128 | Node ShaderIR::GetInternalFlag(InternalFlag flag, bool negated) { |
| @@ -109,19 +134,19 @@ Node ShaderIR::GetInternalFlag(InternalFlag flag, bool negated) { | |||
| 109 | } | 134 | } |
| 110 | 135 | ||
| 111 | Node ShaderIR::GetLocalMemory(Node address) { | 136 | Node ShaderIR::GetLocalMemory(Node address) { |
| 112 | return MakeNode<LmemNode>(address); | 137 | return MakeNode<LmemNode>(std::move(address)); |
| 113 | } | 138 | } |
| 114 | 139 | ||
| 115 | Node ShaderIR::GetTemporal(u32 id) { | 140 | Node ShaderIR::GetTemporary(u32 id) { |
| 116 | return GetRegister(Register::ZeroIndex + 1 + id); | 141 | return GetRegister(Register::ZeroIndex + 1 + id); |
| 117 | } | 142 | } |
| 118 | 143 | ||
| 119 | Node ShaderIR::GetOperandAbsNegFloat(Node value, bool absolute, bool negate) { | 144 | Node ShaderIR::GetOperandAbsNegFloat(Node value, bool absolute, bool negate) { |
| 120 | if (absolute) { | 145 | if (absolute) { |
| 121 | value = Operation(OperationCode::FAbsolute, NO_PRECISE, value); | 146 | value = Operation(OperationCode::FAbsolute, NO_PRECISE, std::move(value)); |
| 122 | } | 147 | } |
| 123 | if (negate) { | 148 | if (negate) { |
| 124 | value = Operation(OperationCode::FNegate, NO_PRECISE, value); | 149 | value = Operation(OperationCode::FNegate, NO_PRECISE, std::move(value)); |
| 125 | } | 150 | } |
| 126 | return value; | 151 | return value; |
| 127 | } | 152 | } |
| @@ -130,24 +155,26 @@ Node ShaderIR::GetSaturatedFloat(Node value, bool saturate) { | |||
| 130 | if (!saturate) { | 155 | if (!saturate) { |
| 131 | return value; | 156 | return value; |
| 132 | } | 157 | } |
| 133 | const Node positive_zero = Immediate(std::copysignf(0, 1)); | 158 | |
| 134 | const Node positive_one = Immediate(1.0f); | 159 | Node positive_zero = Immediate(std::copysignf(0, 1)); |
| 135 | return Operation(OperationCode::FClamp, NO_PRECISE, value, positive_zero, positive_one); | 160 | Node positive_one = Immediate(1.0f); |
| 161 | return Operation(OperationCode::FClamp, NO_PRECISE, std::move(value), std::move(positive_zero), | ||
| 162 | std::move(positive_one)); | ||
| 136 | } | 163 | } |
| 137 | 164 | ||
| 138 | Node ShaderIR::ConvertIntegerSize(Node value, Tegra::Shader::Register::Size size, bool is_signed) { | 165 | Node ShaderIR::ConvertIntegerSize(Node value, Register::Size size, bool is_signed) { |
| 139 | switch (size) { | 166 | switch (size) { |
| 140 | case Register::Size::Byte: | 167 | case Register::Size::Byte: |
| 141 | value = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, NO_PRECISE, value, | 168 | value = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, NO_PRECISE, |
| 142 | Immediate(24)); | 169 | std::move(value), Immediate(24)); |
| 143 | value = SignedOperation(OperationCode::IArithmeticShiftRight, is_signed, NO_PRECISE, value, | 170 | value = SignedOperation(OperationCode::IArithmeticShiftRight, is_signed, NO_PRECISE, |
| 144 | Immediate(24)); | 171 | std::move(value), Immediate(24)); |
| 145 | return value; | 172 | return value; |
| 146 | case Register::Size::Short: | 173 | case Register::Size::Short: |
| 147 | value = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, NO_PRECISE, value, | 174 | value = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, NO_PRECISE, |
| 148 | Immediate(16)); | 175 | std::move(value), Immediate(16)); |
| 149 | value = SignedOperation(OperationCode::IArithmeticShiftRight, is_signed, NO_PRECISE, value, | 176 | value = SignedOperation(OperationCode::IArithmeticShiftRight, is_signed, NO_PRECISE, |
| 150 | Immediate(16)); | 177 | std::move(value), Immediate(16)); |
| 151 | case Register::Size::Word: | 178 | case Register::Size::Word: |
| 152 | // Default - do nothing | 179 | // Default - do nothing |
| 153 | return value; | 180 | return value; |
| @@ -163,27 +190,29 @@ Node ShaderIR::GetOperandAbsNegInteger(Node value, bool absolute, bool negate, b | |||
| 163 | return value; | 190 | return value; |
| 164 | } | 191 | } |
| 165 | if (absolute) { | 192 | if (absolute) { |
| 166 | value = Operation(OperationCode::IAbsolute, NO_PRECISE, value); | 193 | value = Operation(OperationCode::IAbsolute, NO_PRECISE, std::move(value)); |
| 167 | } | 194 | } |
| 168 | if (negate) { | 195 | if (negate) { |
| 169 | value = Operation(OperationCode::INegate, NO_PRECISE, value); | 196 | value = Operation(OperationCode::INegate, NO_PRECISE, std::move(value)); |
| 170 | } | 197 | } |
| 171 | return value; | 198 | return value; |
| 172 | } | 199 | } |
| 173 | 200 | ||
| 174 | Node ShaderIR::UnpackHalfImmediate(Instruction instr, bool has_negation) { | 201 | Node ShaderIR::UnpackHalfImmediate(Instruction instr, bool has_negation) { |
| 175 | const Node value = Immediate(instr.half_imm.PackImmediates()); | 202 | Node value = Immediate(instr.half_imm.PackImmediates()); |
| 176 | if (!has_negation) { | 203 | if (!has_negation) { |
| 177 | return value; | 204 | return value; |
| 178 | } | 205 | } |
| 179 | const Node first_negate = GetPredicate(instr.half_imm.first_negate != 0); | ||
| 180 | const Node second_negate = GetPredicate(instr.half_imm.second_negate != 0); | ||
| 181 | 206 | ||
| 182 | return Operation(OperationCode::HNegate, NO_PRECISE, value, first_negate, second_negate); | 207 | Node first_negate = GetPredicate(instr.half_imm.first_negate != 0); |
| 208 | Node second_negate = GetPredicate(instr.half_imm.second_negate != 0); | ||
| 209 | |||
| 210 | return Operation(OperationCode::HNegate, NO_PRECISE, std::move(value), std::move(first_negate), | ||
| 211 | std::move(second_negate)); | ||
| 183 | } | 212 | } |
| 184 | 213 | ||
| 185 | Node ShaderIR::UnpackHalfFloat(Node value, Tegra::Shader::HalfType type) { | 214 | Node ShaderIR::UnpackHalfFloat(Node value, Tegra::Shader::HalfType type) { |
| 186 | return Operation(OperationCode::HUnpack, type, value); | 215 | return Operation(OperationCode::HUnpack, type, std::move(value)); |
| 187 | } | 216 | } |
| 188 | 217 | ||
| 189 | Node ShaderIR::HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge) { | 218 | Node ShaderIR::HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge) { |
| @@ -191,11 +220,11 @@ Node ShaderIR::HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge) { | |||
| 191 | case Tegra::Shader::HalfMerge::H0_H1: | 220 | case Tegra::Shader::HalfMerge::H0_H1: |
| 192 | return src; | 221 | return src; |
| 193 | case Tegra::Shader::HalfMerge::F32: | 222 | case Tegra::Shader::HalfMerge::F32: |
| 194 | return Operation(OperationCode::HMergeF32, src); | 223 | return Operation(OperationCode::HMergeF32, std::move(src)); |
| 195 | case Tegra::Shader::HalfMerge::Mrg_H0: | 224 | case Tegra::Shader::HalfMerge::Mrg_H0: |
| 196 | return Operation(OperationCode::HMergeH0, dest, src); | 225 | return Operation(OperationCode::HMergeH0, std::move(dest), std::move(src)); |
| 197 | case Tegra::Shader::HalfMerge::Mrg_H1: | 226 | case Tegra::Shader::HalfMerge::Mrg_H1: |
| 198 | return Operation(OperationCode::HMergeH1, dest, src); | 227 | return Operation(OperationCode::HMergeH1, std::move(dest), std::move(src)); |
| 199 | } | 228 | } |
| 200 | UNREACHABLE(); | 229 | UNREACHABLE(); |
| 201 | return src; | 230 | return src; |
| @@ -203,10 +232,10 @@ Node ShaderIR::HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge) { | |||
| 203 | 232 | ||
| 204 | Node ShaderIR::GetOperandAbsNegHalf(Node value, bool absolute, bool negate) { | 233 | Node ShaderIR::GetOperandAbsNegHalf(Node value, bool absolute, bool negate) { |
| 205 | if (absolute) { | 234 | if (absolute) { |
| 206 | value = Operation(OperationCode::HAbsolute, NO_PRECISE, value); | 235 | value = Operation(OperationCode::HAbsolute, NO_PRECISE, std::move(value)); |
| 207 | } | 236 | } |
| 208 | if (negate) { | 237 | if (negate) { |
| 209 | value = Operation(OperationCode::HNegate, NO_PRECISE, value, GetPredicate(true), | 238 | value = Operation(OperationCode::HNegate, NO_PRECISE, std::move(value), GetPredicate(true), |
| 210 | GetPredicate(true)); | 239 | GetPredicate(true)); |
| 211 | } | 240 | } |
| 212 | return value; | 241 | return value; |
| @@ -216,9 +245,11 @@ Node ShaderIR::GetSaturatedHalfFloat(Node value, bool saturate) { | |||
| 216 | if (!saturate) { | 245 | if (!saturate) { |
| 217 | return value; | 246 | return value; |
| 218 | } | 247 | } |
| 219 | const Node positive_zero = Immediate(std::copysignf(0, 1)); | 248 | |
| 220 | const Node positive_one = Immediate(1.0f); | 249 | Node positive_zero = Immediate(std::copysignf(0, 1)); |
| 221 | return Operation(OperationCode::HClamp, NO_PRECISE, value, positive_zero, positive_one); | 250 | Node positive_one = Immediate(1.0f); |
| 251 | return Operation(OperationCode::HClamp, NO_PRECISE, std::move(value), std::move(positive_zero), | ||
| 252 | std::move(positive_one)); | ||
| 222 | } | 253 | } |
| 223 | 254 | ||
| 224 | Node ShaderIR::GetPredicateComparisonFloat(PredCondition condition, Node op_a, Node op_b) { | 255 | Node ShaderIR::GetPredicateComparisonFloat(PredCondition condition, Node op_a, Node op_b) { |
| @@ -246,7 +277,6 @@ Node ShaderIR::GetPredicateComparisonFloat(PredCondition condition, Node op_a, N | |||
| 246 | condition == PredCondition::LessEqualWithNan || | 277 | condition == PredCondition::LessEqualWithNan || |
| 247 | condition == PredCondition::GreaterThanWithNan || | 278 | condition == PredCondition::GreaterThanWithNan || |
| 248 | condition == PredCondition::GreaterEqualWithNan) { | 279 | condition == PredCondition::GreaterEqualWithNan) { |
| 249 | |||
| 250 | predicate = Operation(OperationCode::LogicalOr, predicate, | 280 | predicate = Operation(OperationCode::LogicalOr, predicate, |
| 251 | Operation(OperationCode::LogicalFIsNan, op_a)); | 281 | Operation(OperationCode::LogicalFIsNan, op_a)); |
| 252 | predicate = Operation(OperationCode::LogicalOr, predicate, | 282 | predicate = Operation(OperationCode::LogicalOr, predicate, |
| @@ -275,7 +305,8 @@ Node ShaderIR::GetPredicateComparisonInteger(PredCondition condition, bool is_si | |||
| 275 | UNIMPLEMENTED_IF_MSG(comparison == PredicateComparisonTable.end(), | 305 | UNIMPLEMENTED_IF_MSG(comparison == PredicateComparisonTable.end(), |
| 276 | "Unknown predicate comparison operation"); | 306 | "Unknown predicate comparison operation"); |
| 277 | 307 | ||
| 278 | Node predicate = SignedOperation(comparison->second, is_signed, NO_PRECISE, op_a, op_b); | 308 | Node predicate = SignedOperation(comparison->second, is_signed, NO_PRECISE, std::move(op_a), |
| 309 | std::move(op_b)); | ||
| 279 | 310 | ||
| 280 | UNIMPLEMENTED_IF_MSG(condition == PredCondition::LessThanWithNan || | 311 | UNIMPLEMENTED_IF_MSG(condition == PredCondition::LessThanWithNan || |
| 281 | condition == PredCondition::NotEqualWithNan || | 312 | condition == PredCondition::NotEqualWithNan || |
| @@ -305,9 +336,7 @@ Node ShaderIR::GetPredicateComparisonHalf(Tegra::Shader::PredCondition condition | |||
| 305 | UNIMPLEMENTED_IF_MSG(comparison == PredicateComparisonTable.end(), | 336 | UNIMPLEMENTED_IF_MSG(comparison == PredicateComparisonTable.end(), |
| 306 | "Unknown predicate comparison operation"); | 337 | "Unknown predicate comparison operation"); |
| 307 | 338 | ||
| 308 | const Node predicate = Operation(comparison->second, NO_PRECISE, op_a, op_b); | 339 | return Operation(comparison->second, NO_PRECISE, std::move(op_a), std::move(op_b)); |
| 309 | |||
| 310 | return predicate; | ||
| 311 | } | 340 | } |
| 312 | 341 | ||
| 313 | OperationCode ShaderIR::GetPredicateCombiner(PredOperation operation) { | 342 | OperationCode ShaderIR::GetPredicateCombiner(PredOperation operation) { |
| @@ -333,31 +362,32 @@ Node ShaderIR::GetConditionCode(Tegra::Shader::ConditionCode cc) { | |||
| 333 | } | 362 | } |
| 334 | 363 | ||
| 335 | void ShaderIR::SetRegister(NodeBlock& bb, Register dest, Node src) { | 364 | void ShaderIR::SetRegister(NodeBlock& bb, Register dest, Node src) { |
| 336 | bb.push_back(Operation(OperationCode::Assign, GetRegister(dest), src)); | 365 | bb.push_back(Operation(OperationCode::Assign, GetRegister(dest), std::move(src))); |
| 337 | } | 366 | } |
| 338 | 367 | ||
| 339 | void ShaderIR::SetPredicate(NodeBlock& bb, u64 dest, Node src) { | 368 | void ShaderIR::SetPredicate(NodeBlock& bb, u64 dest, Node src) { |
| 340 | bb.push_back(Operation(OperationCode::LogicalAssign, GetPredicate(dest), src)); | 369 | bb.push_back(Operation(OperationCode::LogicalAssign, GetPredicate(dest), std::move(src))); |
| 341 | } | 370 | } |
| 342 | 371 | ||
| 343 | void ShaderIR::SetInternalFlag(NodeBlock& bb, InternalFlag flag, Node value) { | 372 | void ShaderIR::SetInternalFlag(NodeBlock& bb, InternalFlag flag, Node value) { |
| 344 | bb.push_back(Operation(OperationCode::LogicalAssign, GetInternalFlag(flag), value)); | 373 | bb.push_back(Operation(OperationCode::LogicalAssign, GetInternalFlag(flag), std::move(value))); |
| 345 | } | 374 | } |
| 346 | 375 | ||
| 347 | void ShaderIR::SetLocalMemory(NodeBlock& bb, Node address, Node value) { | 376 | void ShaderIR::SetLocalMemory(NodeBlock& bb, Node address, Node value) { |
| 348 | bb.push_back(Operation(OperationCode::Assign, GetLocalMemory(address), value)); | 377 | bb.push_back( |
| 378 | Operation(OperationCode::Assign, GetLocalMemory(std::move(address)), std::move(value))); | ||
| 349 | } | 379 | } |
| 350 | 380 | ||
| 351 | void ShaderIR::SetTemporal(NodeBlock& bb, u32 id, Node value) { | 381 | void ShaderIR::SetTemporary(NodeBlock& bb, u32 id, Node value) { |
| 352 | SetRegister(bb, Register::ZeroIndex + 1 + id, value); | 382 | SetRegister(bb, Register::ZeroIndex + 1 + id, std::move(value)); |
| 353 | } | 383 | } |
| 354 | 384 | ||
| 355 | void ShaderIR::SetInternalFlagsFromFloat(NodeBlock& bb, Node value, bool sets_cc) { | 385 | void ShaderIR::SetInternalFlagsFromFloat(NodeBlock& bb, Node value, bool sets_cc) { |
| 356 | if (!sets_cc) { | 386 | if (!sets_cc) { |
| 357 | return; | 387 | return; |
| 358 | } | 388 | } |
| 359 | const Node zerop = Operation(OperationCode::LogicalFEqual, value, Immediate(0.0f)); | 389 | Node zerop = Operation(OperationCode::LogicalFEqual, std::move(value), Immediate(0.0f)); |
| 360 | SetInternalFlag(bb, InternalFlag::Zero, zerop); | 390 | SetInternalFlag(bb, InternalFlag::Zero, std::move(zerop)); |
| 361 | LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete"); | 391 | LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete"); |
| 362 | } | 392 | } |
| 363 | 393 | ||
| @@ -365,14 +395,14 @@ void ShaderIR::SetInternalFlagsFromInteger(NodeBlock& bb, Node value, bool sets_ | |||
| 365 | if (!sets_cc) { | 395 | if (!sets_cc) { |
| 366 | return; | 396 | return; |
| 367 | } | 397 | } |
| 368 | const Node zerop = Operation(OperationCode::LogicalIEqual, value, Immediate(0)); | 398 | Node zerop = Operation(OperationCode::LogicalIEqual, std::move(value), Immediate(0)); |
| 369 | SetInternalFlag(bb, InternalFlag::Zero, zerop); | 399 | SetInternalFlag(bb, InternalFlag::Zero, std::move(zerop)); |
| 370 | LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete"); | 400 | LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete"); |
| 371 | } | 401 | } |
| 372 | 402 | ||
| 373 | Node ShaderIR::BitfieldExtract(Node value, u32 offset, u32 bits) { | 403 | Node ShaderIR::BitfieldExtract(Node value, u32 offset, u32 bits) { |
| 374 | return Operation(OperationCode::UBitfieldExtract, NO_PRECISE, value, Immediate(offset), | 404 | return Operation(OperationCode::UBitfieldExtract, NO_PRECISE, std::move(value), |
| 375 | Immediate(bits)); | 405 | Immediate(offset), Immediate(bits)); |
| 376 | } | 406 | } |
| 377 | 407 | ||
| 378 | } // namespace VideoCommon::Shader | 408 | } // namespace VideoCommon::Shader |
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index e22548208..59a083d90 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h | |||
| @@ -5,13 +5,10 @@ | |||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <array> | 7 | #include <array> |
| 8 | #include <cstring> | ||
| 9 | #include <map> | 8 | #include <map> |
| 10 | #include <optional> | 9 | #include <optional> |
| 11 | #include <set> | 10 | #include <set> |
| 12 | #include <string> | ||
| 13 | #include <tuple> | 11 | #include <tuple> |
| 14 | #include <variant> | ||
| 15 | #include <vector> | 12 | #include <vector> |
| 16 | 13 | ||
| 17 | #include "common/common_types.h" | 14 | #include "common/common_types.h" |
| @@ -22,18 +19,12 @@ | |||
| 22 | 19 | ||
| 23 | namespace VideoCommon::Shader { | 20 | namespace VideoCommon::Shader { |
| 24 | 21 | ||
| 22 | struct ShaderBlock; | ||
| 23 | |||
| 25 | using ProgramCode = std::vector<u64>; | 24 | using ProgramCode = std::vector<u64>; |
| 26 | 25 | ||
| 27 | constexpr u32 MAX_PROGRAM_LENGTH = 0x1000; | 26 | constexpr u32 MAX_PROGRAM_LENGTH = 0x1000; |
| 28 | 27 | ||
| 29 | /// Describes the behaviour of code path of a given entry point and a return point. | ||
| 30 | enum class ExitMethod { | ||
| 31 | Undetermined, ///< Internal value. Only occur when analyzing JMP loop. | ||
| 32 | AlwaysReturn, ///< All code paths reach the return point. | ||
| 33 | Conditional, ///< Code path reaches the return point or an END instruction conditionally. | ||
| 34 | AlwaysEnd, ///< All code paths reach a END instruction. | ||
| 35 | }; | ||
| 36 | |||
| 37 | class ConstBuffer { | 28 | class ConstBuffer { |
| 38 | public: | 29 | public: |
| 39 | explicit ConstBuffer(u32 max_offset, bool is_indirect) | 30 | explicit ConstBuffer(u32 max_offset, bool is_indirect) |
| @@ -73,7 +64,7 @@ struct GlobalMemoryUsage { | |||
| 73 | 64 | ||
| 74 | class ShaderIR final { | 65 | class ShaderIR final { |
| 75 | public: | 66 | public: |
| 76 | explicit ShaderIR(const ProgramCode& program_code, u32 main_offset); | 67 | explicit ShaderIR(const ProgramCode& program_code, u32 main_offset, std::size_t size); |
| 77 | ~ShaderIR(); | 68 | ~ShaderIR(); |
| 78 | 69 | ||
| 79 | const std::map<u32, NodeBlock>& GetBasicBlocks() const { | 70 | const std::map<u32, NodeBlock>& GetBasicBlocks() const { |
| @@ -121,6 +112,18 @@ public: | |||
| 121 | return static_cast<std::size_t>(coverage_end * sizeof(u64)); | 112 | return static_cast<std::size_t>(coverage_end * sizeof(u64)); |
| 122 | } | 113 | } |
| 123 | 114 | ||
| 115 | bool UsesLayer() const { | ||
| 116 | return uses_layer; | ||
| 117 | } | ||
| 118 | |||
| 119 | bool UsesViewportIndex() const { | ||
| 120 | return uses_viewport_index; | ||
| 121 | } | ||
| 122 | |||
| 123 | bool UsesPointSize() const { | ||
| 124 | return uses_point_size; | ||
| 125 | } | ||
| 126 | |||
| 124 | bool HasPhysicalAttributes() const { | 127 | bool HasPhysicalAttributes() const { |
| 125 | return uses_physical_attributes; | 128 | return uses_physical_attributes; |
| 126 | } | 129 | } |
| @@ -129,12 +132,20 @@ public: | |||
| 129 | return header; | 132 | return header; |
| 130 | } | 133 | } |
| 131 | 134 | ||
| 135 | bool IsFlowStackDisabled() const { | ||
| 136 | return disable_flow_stack; | ||
| 137 | } | ||
| 138 | |||
| 139 | u32 ConvertAddressToNvidiaSpace(const u32 address) const { | ||
| 140 | return (address - main_offset) * sizeof(Tegra::Shader::Instruction); | ||
| 141 | } | ||
| 142 | |||
| 132 | private: | 143 | private: |
| 133 | void Decode(); | 144 | void Decode(); |
| 134 | 145 | ||
| 135 | ExitMethod Scan(u32 begin, u32 end, std::set<u32>& labels); | ||
| 136 | |||
| 137 | NodeBlock DecodeRange(u32 begin, u32 end); | 146 | NodeBlock DecodeRange(u32 begin, u32 end); |
| 147 | void DecodeRangeInner(NodeBlock& bb, u32 begin, u32 end); | ||
| 148 | void InsertControlFlow(NodeBlock& bb, const ShaderBlock& block); | ||
| 138 | 149 | ||
| 139 | /** | 150 | /** |
| 140 | * Decodes a single instruction from Tegra to IR. | 151 | * Decodes a single instruction from Tegra to IR. |
| @@ -196,8 +207,8 @@ private: | |||
| 196 | Node GetInternalFlag(InternalFlag flag, bool negated = false); | 207 | Node GetInternalFlag(InternalFlag flag, bool negated = false); |
| 197 | /// Generates a node representing a local memory address | 208 | /// Generates a node representing a local memory address |
| 198 | Node GetLocalMemory(Node address); | 209 | Node GetLocalMemory(Node address); |
| 199 | /// Generates a temporal, internally it uses a post-RZ register | 210 | /// Generates a temporary, internally it uses a post-RZ register |
| 200 | Node GetTemporal(u32 id); | 211 | Node GetTemporary(u32 id); |
| 201 | 212 | ||
| 202 | /// Sets a register. src value must be a number-evaluated node. | 213 | /// Sets a register. src value must be a number-evaluated node. |
| 203 | void SetRegister(NodeBlock& bb, Tegra::Shader::Register dest, Node src); | 214 | void SetRegister(NodeBlock& bb, Tegra::Shader::Register dest, Node src); |
| @@ -207,8 +218,8 @@ private: | |||
| 207 | void SetInternalFlag(NodeBlock& bb, InternalFlag flag, Node value); | 218 | void SetInternalFlag(NodeBlock& bb, InternalFlag flag, Node value); |
| 208 | /// Sets a local memory address. address and value must be a number-evaluated node | 219 | /// Sets a local memory address. address and value must be a number-evaluated node |
| 209 | void SetLocalMemory(NodeBlock& bb, Node address, Node value); | 220 | void SetLocalMemory(NodeBlock& bb, Node address, Node value); |
| 210 | /// Sets a temporal. Internally it uses a post-RZ register | 221 | /// Sets a temporary. Internally it uses a post-RZ register |
| 211 | void SetTemporal(NodeBlock& bb, u32 id, Node value); | 222 | void SetTemporary(NodeBlock& bb, u32 id, Node value); |
| 212 | 223 | ||
| 213 | /// Sets internal flags from a float | 224 | /// Sets internal flags from a float |
| 214 | void SetInternalFlagsFromFloat(NodeBlock& bb, Node value, bool sets_cc = true); | 225 | void SetInternalFlagsFromFloat(NodeBlock& bb, Node value, bool sets_cc = true); |
| @@ -314,7 +325,7 @@ private: | |||
| 314 | void WriteLop3Instruction(NodeBlock& bb, Tegra::Shader::Register dest, Node op_a, Node op_b, | 325 | void WriteLop3Instruction(NodeBlock& bb, Tegra::Shader::Register dest, Node op_a, Node op_b, |
| 315 | Node op_c, Node imm_lut, bool sets_cc); | 326 | Node op_c, Node imm_lut, bool sets_cc); |
| 316 | 327 | ||
| 317 | Node TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const; | 328 | std::tuple<Node, u32, u32> TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const; |
| 318 | 329 | ||
| 319 | std::optional<u32> TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const; | 330 | std::optional<u32> TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const; |
| 320 | 331 | ||
| @@ -326,10 +337,11 @@ private: | |||
| 326 | 337 | ||
| 327 | const ProgramCode& program_code; | 338 | const ProgramCode& program_code; |
| 328 | const u32 main_offset; | 339 | const u32 main_offset; |
| 340 | const std::size_t program_size; | ||
| 341 | bool disable_flow_stack{}; | ||
| 329 | 342 | ||
| 330 | u32 coverage_begin{}; | 343 | u32 coverage_begin{}; |
| 331 | u32 coverage_end{}; | 344 | u32 coverage_end{}; |
| 332 | std::map<std::pair<u32, u32>, ExitMethod> exit_method_map; | ||
| 333 | 345 | ||
| 334 | std::map<u32, NodeBlock> basic_blocks; | 346 | std::map<u32, NodeBlock> basic_blocks; |
| 335 | NodeBlock global_code; | 347 | NodeBlock global_code; |
| @@ -343,6 +355,9 @@ private: | |||
| 343 | std::set<Image> used_images; | 355 | std::set<Image> used_images; |
| 344 | std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances> used_clip_distances{}; | 356 | std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances> used_clip_distances{}; |
| 345 | std::map<GlobalMemoryBase, GlobalMemoryUsage> used_global_memory; | 357 | std::map<GlobalMemoryBase, GlobalMemoryUsage> used_global_memory; |
| 358 | bool uses_layer{}; | ||
| 359 | bool uses_viewport_index{}; | ||
| 360 | bool uses_point_size{}; | ||
| 346 | bool uses_physical_attributes{}; // Shader uses AL2P or physical attribute read/writes | 361 | bool uses_physical_attributes{}; // Shader uses AL2P or physical attribute read/writes |
| 347 | 362 | ||
| 348 | Tegra::Shader::Header header; | 363 | Tegra::Shader::Header header; |
diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp index fc957d980..55f5949e4 100644 --- a/src/video_core/shader/track.cpp +++ b/src/video_core/shader/track.cpp | |||
| @@ -15,56 +15,63 @@ namespace { | |||
| 15 | std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor, | 15 | std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor, |
| 16 | OperationCode operation_code) { | 16 | OperationCode operation_code) { |
| 17 | for (; cursor >= 0; --cursor) { | 17 | for (; cursor >= 0; --cursor) { |
| 18 | const Node node = code.at(cursor); | 18 | Node node = code.at(cursor); |
| 19 | |||
| 19 | if (const auto operation = std::get_if<OperationNode>(&*node)) { | 20 | if (const auto operation = std::get_if<OperationNode>(&*node)) { |
| 20 | if (operation->GetCode() == operation_code) { | 21 | if (operation->GetCode() == operation_code) { |
| 21 | return {node, cursor}; | 22 | return {std::move(node), cursor}; |
| 22 | } | 23 | } |
| 23 | } | 24 | } |
| 25 | |||
| 24 | if (const auto conditional = std::get_if<ConditionalNode>(&*node)) { | 26 | if (const auto conditional = std::get_if<ConditionalNode>(&*node)) { |
| 25 | const auto& conditional_code = conditional->GetCode(); | 27 | const auto& conditional_code = conditional->GetCode(); |
| 26 | const auto [found, internal_cursor] = FindOperation( | 28 | auto [found, internal_cursor] = FindOperation( |
| 27 | conditional_code, static_cast<s64>(conditional_code.size() - 1), operation_code); | 29 | conditional_code, static_cast<s64>(conditional_code.size() - 1), operation_code); |
| 28 | if (found) { | 30 | if (found) { |
| 29 | return {found, cursor}; | 31 | return {std::move(found), cursor}; |
| 30 | } | 32 | } |
| 31 | } | 33 | } |
| 32 | } | 34 | } |
| 33 | return {}; | 35 | return {}; |
| 34 | } | 36 | } |
| 35 | } // namespace | 37 | } // Anonymous namespace |
| 36 | 38 | ||
| 37 | Node ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const { | 39 | std::tuple<Node, u32, u32> ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code, |
| 40 | s64 cursor) const { | ||
| 38 | if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) { | 41 | if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) { |
| 39 | // Cbuf found, but it has to be immediate | 42 | // Constant buffer found, test if it's an immediate |
| 40 | return std::holds_alternative<ImmediateNode>(*cbuf->GetOffset()) ? tracked : nullptr; | 43 | const auto offset = cbuf->GetOffset(); |
| 44 | if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) { | ||
| 45 | return {tracked, cbuf->GetIndex(), immediate->GetValue()}; | ||
| 46 | } | ||
| 47 | return {}; | ||
| 41 | } | 48 | } |
| 42 | if (const auto gpr = std::get_if<GprNode>(&*tracked)) { | 49 | if (const auto gpr = std::get_if<GprNode>(&*tracked)) { |
| 43 | if (gpr->GetIndex() == Tegra::Shader::Register::ZeroIndex) { | 50 | if (gpr->GetIndex() == Tegra::Shader::Register::ZeroIndex) { |
| 44 | return nullptr; | 51 | return {}; |
| 45 | } | 52 | } |
| 46 | // Reduce the cursor in one to avoid infinite loops when the instruction sets the same | 53 | // Reduce the cursor in one to avoid infinite loops when the instruction sets the same |
| 47 | // register that it uses as operand | 54 | // register that it uses as operand |
| 48 | const auto [source, new_cursor] = TrackRegister(gpr, code, cursor - 1); | 55 | const auto [source, new_cursor] = TrackRegister(gpr, code, cursor - 1); |
| 49 | if (!source) { | 56 | if (!source) { |
| 50 | return nullptr; | 57 | return {}; |
| 51 | } | 58 | } |
| 52 | return TrackCbuf(source, code, new_cursor); | 59 | return TrackCbuf(source, code, new_cursor); |
| 53 | } | 60 | } |
| 54 | if (const auto operation = std::get_if<OperationNode>(&*tracked)) { | 61 | if (const auto operation = std::get_if<OperationNode>(&*tracked)) { |
| 55 | for (std::size_t i = 0; i < operation->GetOperandsCount(); ++i) { | 62 | for (std::size_t i = operation->GetOperandsCount(); i > 0; --i) { |
| 56 | if (const auto found = TrackCbuf((*operation)[i], code, cursor)) { | 63 | if (auto found = TrackCbuf((*operation)[i - 1], code, cursor); std::get<0>(found)) { |
| 57 | // Cbuf found in operand | 64 | // Cbuf found in operand. |
| 58 | return found; | 65 | return found; |
| 59 | } | 66 | } |
| 60 | } | 67 | } |
| 61 | return nullptr; | 68 | return {}; |
| 62 | } | 69 | } |
| 63 | if (const auto conditional = std::get_if<ConditionalNode>(&*tracked)) { | 70 | if (const auto conditional = std::get_if<ConditionalNode>(&*tracked)) { |
| 64 | const auto& conditional_code = conditional->GetCode(); | 71 | const auto& conditional_code = conditional->GetCode(); |
| 65 | return TrackCbuf(tracked, conditional_code, static_cast<s64>(conditional_code.size())); | 72 | return TrackCbuf(tracked, conditional_code, static_cast<s64>(conditional_code.size())); |
| 66 | } | 73 | } |
| 67 | return nullptr; | 74 | return {}; |
| 68 | } | 75 | } |
| 69 | 76 | ||
| 70 | std::optional<u32> ShaderIR::TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const { | 77 | std::optional<u32> ShaderIR::TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const { |
diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp index 7a0fdb19b..683c49207 100644 --- a/src/video_core/texture_cache/surface_base.cpp +++ b/src/video_core/texture_cache/surface_base.cpp | |||
| @@ -24,9 +24,8 @@ StagingCache::StagingCache() = default; | |||
| 24 | StagingCache::~StagingCache() = default; | 24 | StagingCache::~StagingCache() = default; |
| 25 | 25 | ||
| 26 | SurfaceBaseImpl::SurfaceBaseImpl(GPUVAddr gpu_addr, const SurfaceParams& params) | 26 | SurfaceBaseImpl::SurfaceBaseImpl(GPUVAddr gpu_addr, const SurfaceParams& params) |
| 27 | : params{params}, mipmap_sizes(params.num_levels), | 27 | : params{params}, host_memory_size{params.GetHostSizeInBytes()}, gpu_addr{gpu_addr}, |
| 28 | mipmap_offsets(params.num_levels), gpu_addr{gpu_addr}, host_memory_size{ | 28 | mipmap_sizes(params.num_levels), mipmap_offsets(params.num_levels) { |
| 29 | params.GetHostSizeInBytes()} { | ||
| 30 | std::size_t offset = 0; | 29 | std::size_t offset = 0; |
| 31 | for (u32 level = 0; level < params.num_levels; ++level) { | 30 | for (u32 level = 0; level < params.num_levels; ++level) { |
| 32 | const std::size_t mipmap_size{params.GetGuestMipmapSize(level)}; | 31 | const std::size_t mipmap_size{params.GetGuestMipmapSize(level)}; |
| @@ -75,9 +74,12 @@ MatchStructureResult SurfaceBaseImpl::MatchesStructure(const SurfaceParams& rhs) | |||
| 75 | 74 | ||
| 76 | // Linear Surface check | 75 | // Linear Surface check |
| 77 | if (!params.is_tiled) { | 76 | if (!params.is_tiled) { |
| 78 | if (std::tie(params.width, params.height, params.pitch) == | 77 | if (std::tie(params.height, params.pitch) == std::tie(rhs.height, rhs.pitch)) { |
| 79 | std::tie(rhs.width, rhs.height, rhs.pitch)) { | 78 | if (params.width == rhs.width) { |
| 80 | return MatchStructureResult::FullMatch; | 79 | return MatchStructureResult::FullMatch; |
| 80 | } else { | ||
| 81 | return MatchStructureResult::SemiMatch; | ||
| 82 | } | ||
| 81 | } | 83 | } |
| 82 | return MatchStructureResult::None; | 84 | return MatchStructureResult::None; |
| 83 | } | 85 | } |
diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h index 8ba386a8a..bcce8d863 100644 --- a/src/video_core/texture_cache/surface_base.h +++ b/src/video_core/texture_cache/surface_base.h | |||
| @@ -200,8 +200,9 @@ public: | |||
| 200 | modification_tick = tick; | 200 | modification_tick = tick; |
| 201 | } | 201 | } |
| 202 | 202 | ||
| 203 | void MarkAsRenderTarget(const bool is_target) { | 203 | void MarkAsRenderTarget(const bool is_target, const u32 index) { |
| 204 | this->is_target = is_target; | 204 | this->is_target = is_target; |
| 205 | this->index = index; | ||
| 205 | } | 206 | } |
| 206 | 207 | ||
| 207 | void MarkAsPicked(const bool is_picked) { | 208 | void MarkAsPicked(const bool is_picked) { |
| @@ -221,6 +222,10 @@ public: | |||
| 221 | return is_target; | 222 | return is_target; |
| 222 | } | 223 | } |
| 223 | 224 | ||
| 225 | u32 GetRenderTarget() const { | ||
| 226 | return index; | ||
| 227 | } | ||
| 228 | |||
| 224 | bool IsRegistered() const { | 229 | bool IsRegistered() const { |
| 225 | return is_registered; | 230 | return is_registered; |
| 226 | } | 231 | } |
| @@ -307,10 +312,13 @@ private: | |||
| 307 | return view; | 312 | return view; |
| 308 | } | 313 | } |
| 309 | 314 | ||
| 315 | static constexpr u32 NO_RT = 0xFFFFFFFF; | ||
| 316 | |||
| 310 | bool is_modified{}; | 317 | bool is_modified{}; |
| 311 | bool is_target{}; | 318 | bool is_target{}; |
| 312 | bool is_registered{}; | 319 | bool is_registered{}; |
| 313 | bool is_picked{}; | 320 | bool is_picked{}; |
| 321 | u32 index{NO_RT}; | ||
| 314 | u64 modification_tick{}; | 322 | u64 modification_tick{}; |
| 315 | }; | 323 | }; |
| 316 | 324 | ||
diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp index 9c56e2b4f..fd5472451 100644 --- a/src/video_core/texture_cache/surface_params.cpp +++ b/src/video_core/texture_cache/surface_params.cpp | |||
| @@ -290,12 +290,19 @@ std::size_t SurfaceParams::GetLayerSize(bool as_host_size, bool uncompressed) co | |||
| 290 | 290 | ||
| 291 | std::size_t SurfaceParams::GetInnerMipmapMemorySize(u32 level, bool as_host_size, | 291 | std::size_t SurfaceParams::GetInnerMipmapMemorySize(u32 level, bool as_host_size, |
| 292 | bool uncompressed) const { | 292 | bool uncompressed) const { |
| 293 | const bool tiled{as_host_size ? false : is_tiled}; | ||
| 294 | const u32 width{GetMipmapSize(uncompressed, GetMipWidth(level), GetDefaultBlockWidth())}; | 293 | const u32 width{GetMipmapSize(uncompressed, GetMipWidth(level), GetDefaultBlockWidth())}; |
| 295 | const u32 height{GetMipmapSize(uncompressed, GetMipHeight(level), GetDefaultBlockHeight())}; | 294 | const u32 height{GetMipmapSize(uncompressed, GetMipHeight(level), GetDefaultBlockHeight())}; |
| 296 | const u32 depth{is_layered ? 1U : GetMipDepth(level)}; | 295 | const u32 depth{is_layered ? 1U : GetMipDepth(level)}; |
| 297 | return Tegra::Texture::CalculateSize(tiled, GetBytesPerPixel(), width, height, depth, | 296 | if (is_tiled) { |
| 298 | GetMipBlockHeight(level), GetMipBlockDepth(level)); | 297 | return Tegra::Texture::CalculateSize(!as_host_size, GetBytesPerPixel(), width, height, |
| 298 | depth, GetMipBlockHeight(level), | ||
| 299 | GetMipBlockDepth(level)); | ||
| 300 | } else if (as_host_size || IsBuffer()) { | ||
| 301 | return GetBytesPerPixel() * width * height * depth; | ||
| 302 | } else { | ||
| 303 | // Linear Texture Case | ||
| 304 | return pitch * height * depth; | ||
| 305 | } | ||
| 299 | } | 306 | } |
| 300 | 307 | ||
| 301 | bool SurfaceParams::operator==(const SurfaceParams& rhs) const { | 308 | bool SurfaceParams::operator==(const SurfaceParams& rhs) const { |
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index c9e72531a..a3a3770a7 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h | |||
| @@ -116,10 +116,10 @@ public: | |||
| 116 | std::lock_guard lock{mutex}; | 116 | std::lock_guard lock{mutex}; |
| 117 | auto& maxwell3d = system.GPU().Maxwell3D(); | 117 | auto& maxwell3d = system.GPU().Maxwell3D(); |
| 118 | 118 | ||
| 119 | if (!maxwell3d.dirty_flags.zeta_buffer) { | 119 | if (!maxwell3d.dirty.depth_buffer) { |
| 120 | return depth_buffer.view; | 120 | return depth_buffer.view; |
| 121 | } | 121 | } |
| 122 | maxwell3d.dirty_flags.zeta_buffer = false; | 122 | maxwell3d.dirty.depth_buffer = false; |
| 123 | 123 | ||
| 124 | const auto& regs{maxwell3d.regs}; | 124 | const auto& regs{maxwell3d.regs}; |
| 125 | const auto gpu_addr{regs.zeta.Address()}; | 125 | const auto gpu_addr{regs.zeta.Address()}; |
| @@ -133,11 +133,11 @@ public: | |||
| 133 | regs.zeta.memory_layout.block_depth, regs.zeta.memory_layout.type)}; | 133 | regs.zeta.memory_layout.block_depth, regs.zeta.memory_layout.type)}; |
| 134 | auto surface_view = GetSurface(gpu_addr, depth_params, preserve_contents, true); | 134 | auto surface_view = GetSurface(gpu_addr, depth_params, preserve_contents, true); |
| 135 | if (depth_buffer.target) | 135 | if (depth_buffer.target) |
| 136 | depth_buffer.target->MarkAsRenderTarget(false); | 136 | depth_buffer.target->MarkAsRenderTarget(false, NO_RT); |
| 137 | depth_buffer.target = surface_view.first; | 137 | depth_buffer.target = surface_view.first; |
| 138 | depth_buffer.view = surface_view.second; | 138 | depth_buffer.view = surface_view.second; |
| 139 | if (depth_buffer.target) | 139 | if (depth_buffer.target) |
| 140 | depth_buffer.target->MarkAsRenderTarget(true); | 140 | depth_buffer.target->MarkAsRenderTarget(true, DEPTH_RT); |
| 141 | return surface_view.second; | 141 | return surface_view.second; |
| 142 | } | 142 | } |
| 143 | 143 | ||
| @@ -145,10 +145,10 @@ public: | |||
| 145 | std::lock_guard lock{mutex}; | 145 | std::lock_guard lock{mutex}; |
| 146 | ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets); | 146 | ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets); |
| 147 | auto& maxwell3d = system.GPU().Maxwell3D(); | 147 | auto& maxwell3d = system.GPU().Maxwell3D(); |
| 148 | if (!maxwell3d.dirty_flags.color_buffer[index]) { | 148 | if (!maxwell3d.dirty.render_target[index]) { |
| 149 | return render_targets[index].view; | 149 | return render_targets[index].view; |
| 150 | } | 150 | } |
| 151 | maxwell3d.dirty_flags.color_buffer.reset(index); | 151 | maxwell3d.dirty.render_target[index] = false; |
| 152 | 152 | ||
| 153 | const auto& regs{maxwell3d.regs}; | 153 | const auto& regs{maxwell3d.regs}; |
| 154 | if (index >= regs.rt_control.count || regs.rt[index].Address() == 0 || | 154 | if (index >= regs.rt_control.count || regs.rt[index].Address() == 0 || |
| @@ -167,11 +167,11 @@ public: | |||
| 167 | auto surface_view = GetSurface(gpu_addr, SurfaceParams::CreateForFramebuffer(system, index), | 167 | auto surface_view = GetSurface(gpu_addr, SurfaceParams::CreateForFramebuffer(system, index), |
| 168 | preserve_contents, true); | 168 | preserve_contents, true); |
| 169 | if (render_targets[index].target) | 169 | if (render_targets[index].target) |
| 170 | render_targets[index].target->MarkAsRenderTarget(false); | 170 | render_targets[index].target->MarkAsRenderTarget(false, NO_RT); |
| 171 | render_targets[index].target = surface_view.first; | 171 | render_targets[index].target = surface_view.first; |
| 172 | render_targets[index].view = surface_view.second; | 172 | render_targets[index].view = surface_view.second; |
| 173 | if (render_targets[index].target) | 173 | if (render_targets[index].target) |
| 174 | render_targets[index].target->MarkAsRenderTarget(true); | 174 | render_targets[index].target->MarkAsRenderTarget(true, static_cast<u32>(index)); |
| 175 | return surface_view.second; | 175 | return surface_view.second; |
| 176 | } | 176 | } |
| 177 | 177 | ||
| @@ -191,7 +191,7 @@ public: | |||
| 191 | if (depth_buffer.target == nullptr) { | 191 | if (depth_buffer.target == nullptr) { |
| 192 | return; | 192 | return; |
| 193 | } | 193 | } |
| 194 | depth_buffer.target->MarkAsRenderTarget(false); | 194 | depth_buffer.target->MarkAsRenderTarget(false, NO_RT); |
| 195 | depth_buffer.target = nullptr; | 195 | depth_buffer.target = nullptr; |
| 196 | depth_buffer.view = nullptr; | 196 | depth_buffer.view = nullptr; |
| 197 | } | 197 | } |
| @@ -200,7 +200,7 @@ public: | |||
| 200 | if (render_targets[index].target == nullptr) { | 200 | if (render_targets[index].target == nullptr) { |
| 201 | return; | 201 | return; |
| 202 | } | 202 | } |
| 203 | render_targets[index].target->MarkAsRenderTarget(false); | 203 | render_targets[index].target->MarkAsRenderTarget(false, NO_RT); |
| 204 | render_targets[index].target = nullptr; | 204 | render_targets[index].target = nullptr; |
| 205 | render_targets[index].view = nullptr; | 205 | render_targets[index].view = nullptr; |
| 206 | } | 206 | } |
| @@ -270,6 +270,17 @@ protected: | |||
| 270 | // and reading it from a sepparate buffer. | 270 | // and reading it from a sepparate buffer. |
| 271 | virtual void BufferCopy(TSurface& src_surface, TSurface& dst_surface) = 0; | 271 | virtual void BufferCopy(TSurface& src_surface, TSurface& dst_surface) = 0; |
| 272 | 272 | ||
| 273 | void ManageRenderTargetUnregister(TSurface& surface) { | ||
| 274 | auto& maxwell3d = system.GPU().Maxwell3D(); | ||
| 275 | const u32 index = surface->GetRenderTarget(); | ||
| 276 | if (index == DEPTH_RT) { | ||
| 277 | maxwell3d.dirty.depth_buffer = true; | ||
| 278 | } else { | ||
| 279 | maxwell3d.dirty.render_target[index] = true; | ||
| 280 | } | ||
| 281 | maxwell3d.dirty.render_settings = true; | ||
| 282 | } | ||
| 283 | |||
| 273 | void Register(TSurface surface) { | 284 | void Register(TSurface surface) { |
| 274 | const GPUVAddr gpu_addr = surface->GetGpuAddr(); | 285 | const GPUVAddr gpu_addr = surface->GetGpuAddr(); |
| 275 | const CacheAddr cache_ptr = ToCacheAddr(system.GPU().MemoryManager().GetPointer(gpu_addr)); | 286 | const CacheAddr cache_ptr = ToCacheAddr(system.GPU().MemoryManager().GetPointer(gpu_addr)); |
| @@ -294,6 +305,9 @@ protected: | |||
| 294 | if (guard_render_targets && surface->IsProtected()) { | 305 | if (guard_render_targets && surface->IsProtected()) { |
| 295 | return; | 306 | return; |
| 296 | } | 307 | } |
| 308 | if (!guard_render_targets && surface->IsRenderTarget()) { | ||
| 309 | ManageRenderTargetUnregister(surface); | ||
| 310 | } | ||
| 297 | const GPUVAddr gpu_addr = surface->GetGpuAddr(); | 311 | const GPUVAddr gpu_addr = surface->GetGpuAddr(); |
| 298 | const CacheAddr cache_ptr = surface->GetCacheAddr(); | 312 | const CacheAddr cache_ptr = surface->GetCacheAddr(); |
| 299 | const std::size_t size = surface->GetSizeInBytes(); | 313 | const std::size_t size = surface->GetSizeInBytes(); |
| @@ -649,15 +663,6 @@ private: | |||
| 649 | } | 663 | } |
| 650 | return {current_surface, *view}; | 664 | return {current_surface, *view}; |
| 651 | } | 665 | } |
| 652 | // The next case is unsafe, so if we r in accurate GPU, just skip it | ||
| 653 | if (Settings::values.use_accurate_gpu_emulation) { | ||
| 654 | return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, | ||
| 655 | MatchTopologyResult::FullMatch); | ||
| 656 | } | ||
| 657 | // This is the case the texture is a part of the parent. | ||
| 658 | if (current_surface->MatchesSubTexture(params, gpu_addr)) { | ||
| 659 | return RebuildSurface(current_surface, params, is_render); | ||
| 660 | } | ||
| 661 | } else { | 666 | } else { |
| 662 | // If there are many overlaps, odds are they are subtextures of the candidate | 667 | // If there are many overlaps, odds are they are subtextures of the candidate |
| 663 | // surface. We try to construct a new surface based on the candidate parameters, | 668 | // surface. We try to construct a new surface based on the candidate parameters, |
| @@ -793,6 +798,9 @@ private: | |||
| 793 | static constexpr u64 registry_page_size{1 << registry_page_bits}; | 798 | static constexpr u64 registry_page_size{1 << registry_page_bits}; |
| 794 | std::unordered_map<CacheAddr, std::vector<TSurface>> registry; | 799 | std::unordered_map<CacheAddr, std::vector<TSurface>> registry; |
| 795 | 800 | ||
| 801 | static constexpr u32 DEPTH_RT = 8; | ||
| 802 | static constexpr u32 NO_RT = 0xFFFFFFFF; | ||
| 803 | |||
| 796 | // The L1 Cache is used for fast texture lookup before checking the overlaps | 804 | // The L1 Cache is used for fast texture lookup before checking the overlaps |
| 797 | // This avoids calculating size and other stuffs. | 805 | // This avoids calculating size and other stuffs. |
| 798 | std::unordered_map<CacheAddr, TSurface> l1_cache; | 806 | std::unordered_map<CacheAddr, TSurface> l1_cache; |
diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp index 73978ff5b..b7f3fdf75 100644 --- a/src/yuzu/configuration/config.cpp +++ b/src/yuzu/configuration/config.cpp | |||
| @@ -436,8 +436,6 @@ void Config::ReadControlValues() { | |||
| 436 | void Config::ReadCoreValues() { | 436 | void Config::ReadCoreValues() { |
| 437 | qt_config->beginGroup(QStringLiteral("Core")); | 437 | qt_config->beginGroup(QStringLiteral("Core")); |
| 438 | 438 | ||
| 439 | Settings::values.cpu_jit_enabled = | ||
| 440 | ReadSetting(QStringLiteral("cpu_jit_enabled"), true).toBool(); | ||
| 441 | Settings::values.use_multi_core = ReadSetting(QStringLiteral("use_multi_core"), false).toBool(); | 439 | Settings::values.use_multi_core = ReadSetting(QStringLiteral("use_multi_core"), false).toBool(); |
| 442 | 440 | ||
| 443 | qt_config->endGroup(); | 441 | qt_config->endGroup(); |
| @@ -831,7 +829,6 @@ void Config::SaveControlValues() { | |||
| 831 | void Config::SaveCoreValues() { | 829 | void Config::SaveCoreValues() { |
| 832 | qt_config->beginGroup(QStringLiteral("Core")); | 830 | qt_config->beginGroup(QStringLiteral("Core")); |
| 833 | 831 | ||
| 834 | WriteSetting(QStringLiteral("cpu_jit_enabled"), Settings::values.cpu_jit_enabled, true); | ||
| 835 | WriteSetting(QStringLiteral("use_multi_core"), Settings::values.use_multi_core, false); | 832 | WriteSetting(QStringLiteral("use_multi_core"), Settings::values.use_multi_core, false); |
| 836 | 833 | ||
| 837 | qt_config->endGroup(); | 834 | qt_config->endGroup(); |
diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp index ae21f4753..381644694 100644 --- a/src/yuzu/main.cpp +++ b/src/yuzu/main.cpp | |||
| @@ -1843,13 +1843,14 @@ void GMainWindow::OnCoreError(Core::System::ResultStatus result, std::string det | |||
| 1843 | "data, or other bugs."); | 1843 | "data, or other bugs."); |
| 1844 | switch (result) { | 1844 | switch (result) { |
| 1845 | case Core::System::ResultStatus::ErrorSystemFiles: { | 1845 | case Core::System::ResultStatus::ErrorSystemFiles: { |
| 1846 | QString message = tr("yuzu was unable to locate a Switch system archive"); | 1846 | QString message; |
| 1847 | if (!details.empty()) { | 1847 | if (details.empty()) { |
| 1848 | message.append(tr(": %1. ").arg(QString::fromStdString(details))); | 1848 | message = |
| 1849 | tr("yuzu was unable to locate a Switch system archive. %1").arg(common_message); | ||
| 1849 | } else { | 1850 | } else { |
| 1850 | message.append(tr(". ")); | 1851 | message = tr("yuzu was unable to locate a Switch system archive: %1. %2") |
| 1852 | .arg(QString::fromStdString(details), common_message); | ||
| 1851 | } | 1853 | } |
| 1852 | message.append(common_message); | ||
| 1853 | 1854 | ||
| 1854 | answer = QMessageBox::question(this, tr("System Archive Not Found"), message, | 1855 | answer = QMessageBox::question(this, tr("System Archive Not Found"), message, |
| 1855 | QMessageBox::Yes | QMessageBox::No, QMessageBox::No); | 1856 | QMessageBox::Yes | QMessageBox::No, QMessageBox::No); |
| @@ -1858,8 +1859,8 @@ void GMainWindow::OnCoreError(Core::System::ResultStatus result, std::string det | |||
| 1858 | } | 1859 | } |
| 1859 | 1860 | ||
| 1860 | case Core::System::ResultStatus::ErrorSharedFont: { | 1861 | case Core::System::ResultStatus::ErrorSharedFont: { |
| 1861 | QString message = tr("yuzu was unable to locate the Switch shared fonts. "); | 1862 | const QString message = |
| 1862 | message.append(common_message); | 1863 | tr("yuzu was unable to locate the Switch shared fonts. %1").arg(common_message); |
| 1863 | answer = QMessageBox::question(this, tr("Shared Fonts Not Found"), message, | 1864 | answer = QMessageBox::question(this, tr("Shared Fonts Not Found"), message, |
| 1864 | QMessageBox::Yes | QMessageBox::No, QMessageBox::No); | 1865 | QMessageBox::Yes | QMessageBox::No, QMessageBox::No); |
| 1865 | status_message = tr("Shared Font Missing"); | 1866 | status_message = tr("Shared Font Missing"); |
diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp index 30b22341b..067d58d80 100644 --- a/src/yuzu_cmd/config.cpp +++ b/src/yuzu_cmd/config.cpp | |||
| @@ -340,7 +340,6 @@ void Config::ReadValues() { | |||
| 340 | } | 340 | } |
| 341 | 341 | ||
| 342 | // Core | 342 | // Core |
| 343 | Settings::values.cpu_jit_enabled = sdl2_config->GetBoolean("Core", "cpu_jit_enabled", true); | ||
| 344 | Settings::values.use_multi_core = sdl2_config->GetBoolean("Core", "use_multi_core", false); | 343 | Settings::values.use_multi_core = sdl2_config->GetBoolean("Core", "use_multi_core", false); |
| 345 | 344 | ||
| 346 | // Renderer | 345 | // Renderer |
diff --git a/src/yuzu_cmd/default_ini.h b/src/yuzu_cmd/default_ini.h index 4f1add434..0cfc111a6 100644 --- a/src/yuzu_cmd/default_ini.h +++ b/src/yuzu_cmd/default_ini.h | |||
| @@ -76,10 +76,6 @@ motion_device= | |||
| 76 | touch_device= | 76 | touch_device= |
| 77 | 77 | ||
| 78 | [Core] | 78 | [Core] |
| 79 | # Whether to use the Just-In-Time (JIT) compiler for CPU emulation | ||
| 80 | # 0: Interpreter (slow), 1 (default): JIT (fast) | ||
| 81 | cpu_jit_enabled = | ||
| 82 | |||
| 83 | # Whether to use multi-core for CPU emulation | 79 | # Whether to use multi-core for CPU emulation |
| 84 | # 0 (default): Disabled, 1: Enabled | 80 | # 0 (default): Disabled, 1: Enabled |
| 85 | use_multi_core= | 81 | use_multi_core= |
diff --git a/src/yuzu_tester/config.cpp b/src/yuzu_tester/config.cpp index b96b7d279..9a11dc6c3 100644 --- a/src/yuzu_tester/config.cpp +++ b/src/yuzu_tester/config.cpp | |||
| @@ -114,7 +114,6 @@ void Config::ReadValues() { | |||
| 114 | } | 114 | } |
| 115 | 115 | ||
| 116 | // Core | 116 | // Core |
| 117 | Settings::values.cpu_jit_enabled = sdl2_config->GetBoolean("Core", "cpu_jit_enabled", true); | ||
| 118 | Settings::values.use_multi_core = sdl2_config->GetBoolean("Core", "use_multi_core", false); | 117 | Settings::values.use_multi_core = sdl2_config->GetBoolean("Core", "use_multi_core", false); |
| 119 | 118 | ||
| 120 | // Renderer | 119 | // Renderer |
diff --git a/src/yuzu_tester/default_ini.h b/src/yuzu_tester/default_ini.h index 0f880d8c7..9a3e86d68 100644 --- a/src/yuzu_tester/default_ini.h +++ b/src/yuzu_tester/default_ini.h | |||
| @@ -8,10 +8,6 @@ namespace DefaultINI { | |||
| 8 | 8 | ||
| 9 | const char* sdl2_config_file = R"( | 9 | const char* sdl2_config_file = R"( |
| 10 | [Core] | 10 | [Core] |
| 11 | # Whether to use the Just-In-Time (JIT) compiler for CPU emulation | ||
| 12 | # 0: Interpreter (slow), 1 (default): JIT (fast) | ||
| 13 | cpu_jit_enabled = | ||
| 14 | |||
| 15 | # Whether to use multi-core for CPU emulation | 11 | # Whether to use multi-core for CPU emulation |
| 16 | # 0 (default): Disabled, 1: Enabled | 12 | # 0 (default): Disabled, 1: Enabled |
| 17 | use_multi_core= | 13 | use_multi_core= |