diff options
236 files changed, 6092 insertions, 2036 deletions
diff --git a/.ci/scripts/common/post-upload.sh b/.ci/scripts/common/post-upload.sh new file mode 100644 index 000000000..bb4e9d328 --- /dev/null +++ b/.ci/scripts/common/post-upload.sh | |||
| @@ -0,0 +1,15 @@ | |||
| 1 | #!/bin/bash -ex | ||
| 2 | |||
| 3 | # Copy documentation | ||
| 4 | cp license.txt "$REV_NAME" | ||
| 5 | cp README.md "$REV_NAME" | ||
| 6 | |||
| 7 | tar $COMPRESSION_FLAGS "$ARCHIVE_NAME" "$REV_NAME" | ||
| 8 | |||
| 9 | mv "$REV_NAME" $RELEASE_NAME | ||
| 10 | |||
| 11 | 7z a "$REV_NAME.7z" $RELEASE_NAME | ||
| 12 | |||
| 13 | # move the compiled archive into the artifacts directory to be uploaded by travis releases | ||
| 14 | mv "$ARCHIVE_NAME" artifacts/ | ||
| 15 | mv "$REV_NAME.7z" artifacts/ | ||
diff --git a/.ci/scripts/common/pre-upload.sh b/.ci/scripts/common/pre-upload.sh new file mode 100644 index 000000000..3c2fc79a2 --- /dev/null +++ b/.ci/scripts/common/pre-upload.sh | |||
| @@ -0,0 +1,6 @@ | |||
| 1 | #!/bin/bash -ex | ||
| 2 | |||
| 3 | GITDATE="`git show -s --date=short --format='%ad' | sed 's/-//g'`" | ||
| 4 | GITREV="`git show -s --format='%h'`" | ||
| 5 | |||
| 6 | mkdir -p artifacts | ||
diff --git a/.ci/scripts/format/docker.sh b/.ci/scripts/format/docker.sh new file mode 100644 index 000000000..778411e4a --- /dev/null +++ b/.ci/scripts/format/docker.sh | |||
| @@ -0,0 +1,6 @@ | |||
| 1 | #!/bin/bash -ex | ||
| 2 | |||
| 3 | # Run clang-format | ||
| 4 | cd /yuzu | ||
| 5 | chmod a+x ./.ci/scripts/format/script.sh | ||
| 6 | ./.ci/scripts/format/script.sh | ||
diff --git a/.ci/scripts/format/exec.sh b/.ci/scripts/format/exec.sh new file mode 100644 index 000000000..5d6393b38 --- /dev/null +++ b/.ci/scripts/format/exec.sh | |||
| @@ -0,0 +1,4 @@ | |||
| 1 | #!/bin/bash -ex | ||
| 2 | |||
| 3 | chmod a+x ./.ci/scripts/format/docker.sh | ||
| 4 | docker run -v $(pwd):/yuzu yuzuemu/build-environments:linux-clang-format /bin/bash -ex /yuzu/.ci/scripts/format/docker.sh | ||
diff --git a/.ci/scripts/format/script.sh b/.ci/scripts/format/script.sh new file mode 100644 index 000000000..5ab828d5e --- /dev/null +++ b/.ci/scripts/format/script.sh | |||
| @@ -0,0 +1,37 @@ | |||
| 1 | #!/bin/bash -ex | ||
| 2 | |||
| 3 | if grep -nrI '\s$' src *.yml *.txt *.md Doxyfile .gitignore .gitmodules .ci* dist/*.desktop \ | ||
| 4 | dist/*.svg dist/*.xml; then | ||
| 5 | echo Trailing whitespace found, aborting | ||
| 6 | exit 1 | ||
| 7 | fi | ||
| 8 | |||
| 9 | # Default clang-format points to default 3.5 version one | ||
| 10 | CLANG_FORMAT=clang-format-6.0 | ||
| 11 | $CLANG_FORMAT --version | ||
| 12 | |||
| 13 | if [ "$TRAVIS_EVENT_TYPE" = "pull_request" ]; then | ||
| 14 | # Get list of every file modified in this pull request | ||
| 15 | files_to_lint="$(git diff --name-only --diff-filter=ACMRTUXB $TRAVIS_COMMIT_RANGE | grep '^src/[^.]*[.]\(cpp\|h\)$' || true)" | ||
| 16 | else | ||
| 17 | # Check everything for branch pushes | ||
| 18 | files_to_lint="$(find src/ -name '*.cpp' -or -name '*.h')" | ||
| 19 | fi | ||
| 20 | |||
| 21 | # Turn off tracing for this because it's too verbose | ||
| 22 | set +x | ||
| 23 | |||
| 24 | for f in $files_to_lint; do | ||
| 25 | d=$(diff -u "$f" <($CLANG_FORMAT "$f") || true) | ||
| 26 | if ! [ -z "$d" ]; then | ||
| 27 | echo "!!! $f not compliant to coding style, here is the fix:" | ||
| 28 | echo "$d" | ||
| 29 | fail=1 | ||
| 30 | fi | ||
| 31 | done | ||
| 32 | |||
| 33 | set -x | ||
| 34 | |||
| 35 | if [ "$fail" = 1 ]; then | ||
| 36 | exit 1 | ||
| 37 | fi | ||
diff --git a/.ci/scripts/linux/docker.sh b/.ci/scripts/linux/docker.sh new file mode 100644 index 000000000..f538a4081 --- /dev/null +++ b/.ci/scripts/linux/docker.sh | |||
| @@ -0,0 +1,14 @@ | |||
| 1 | #!/bin/bash -ex | ||
| 2 | |||
| 3 | cd /yuzu | ||
| 4 | |||
| 5 | ccache -s | ||
| 6 | |||
| 7 | mkdir build || true && cd build | ||
| 8 | cmake .. -G Ninja -DYUZU_USE_BUNDLED_UNICORN=ON -DYUZU_USE_QT_WEB_ENGINE=ON -DCMAKE_BUILD_TYPE=Release -DCMAKE_C_COMPILER=/usr/lib/ccache/gcc -DCMAKE_CXX_COMPILER=/usr/lib/ccache/g++ -DYUZU_ENABLE_COMPATIBILITY_REPORTING=${ENABLE_COMPATIBILITY_REPORTING:-"OFF"} -DENABLE_COMPATIBILITY_LIST_DOWNLOAD=ON -DUSE_DISCORD_PRESENCE=ON | ||
| 9 | |||
| 10 | ninja | ||
| 11 | |||
| 12 | ccache -s | ||
| 13 | |||
| 14 | ctest -VV -C Release | ||
diff --git a/.ci/scripts/linux/exec.sh b/.ci/scripts/linux/exec.sh new file mode 100644 index 000000000..a5a6c34b9 --- /dev/null +++ b/.ci/scripts/linux/exec.sh | |||
| @@ -0,0 +1,5 @@ | |||
| 1 | #!/bin/bash -ex | ||
| 2 | |||
| 3 | mkdir -p "ccache" || true | ||
| 4 | chmod a+x ./.ci/scripts/linux/docker.sh | ||
| 5 | docker run -e ENABLE_COMPATIBILITY_REPORTING -e CCACHE_DIR=/yuzu/ccache -v $(pwd):/yuzu yuzuemu/build-environments:linux-fresh /bin/bash /yuzu/.ci/scripts/linux/docker.sh | ||
diff --git a/.ci/scripts/linux/upload.sh b/.ci/scripts/linux/upload.sh new file mode 100644 index 000000000..0d131d1dd --- /dev/null +++ b/.ci/scripts/linux/upload.sh | |||
| @@ -0,0 +1,14 @@ | |||
| 1 | #!/bin/bash -ex | ||
| 2 | |||
| 3 | . .ci/scripts/common/pre-upload.sh | ||
| 4 | |||
| 5 | REV_NAME="yuzu-linux-${GITDATE}-${GITREV}" | ||
| 6 | ARCHIVE_NAME="${REV_NAME}.tar.xz" | ||
| 7 | COMPRESSION_FLAGS="-cJvf" | ||
| 8 | |||
| 9 | mkdir "$REV_NAME" | ||
| 10 | |||
| 11 | cp build/bin/yuzu-cmd "$REV_NAME" | ||
| 12 | cp build/bin/yuzu "$REV_NAME" | ||
| 13 | |||
| 14 | . .ci/scripts/common/post-upload.sh | ||
diff --git a/.ci/scripts/merge/apply-patches-by-label.py b/.ci/scripts/merge/apply-patches-by-label.py new file mode 100644 index 000000000..b346001a5 --- /dev/null +++ b/.ci/scripts/merge/apply-patches-by-label.py | |||
| @@ -0,0 +1,28 @@ | |||
| 1 | # Download all pull requests as patches that match a specific label | ||
| 2 | # Usage: python download-patches-by-label.py <Label to Match> <Root Path Folder to DL to> | ||
| 3 | |||
| 4 | import requests, sys, json, urllib3.request, shutil, subprocess | ||
| 5 | |||
| 6 | http = urllib3.PoolManager() | ||
| 7 | dl_list = {} | ||
| 8 | |||
| 9 | def check_individual(labels): | ||
| 10 | for label in labels: | ||
| 11 | if (label["name"] == sys.argv[1]): | ||
| 12 | return True | ||
| 13 | return False | ||
| 14 | |||
| 15 | try: | ||
| 16 | url = 'https://api.github.com/repos/yuzu-emu/yuzu/pulls' | ||
| 17 | response = requests.get(url) | ||
| 18 | if (response.ok): | ||
| 19 | j = json.loads(response.content) | ||
| 20 | for pr in j: | ||
| 21 | if (check_individual(pr["labels"])): | ||
| 22 | pn = pr["number"] | ||
| 23 | print("Matched PR# %s" % pn) | ||
| 24 | print(subprocess.check_output(["git", "fetch", "https://github.com/yuzu-emu/yuzu.git", "pull/%s/head:pr-%s" % (pn, pn), "-f"])) | ||
| 25 | print(subprocess.check_output(["git", "merge", "--squash", "pr-%s" % pn])) | ||
| 26 | print(subprocess.check_output(["git", "commit", "-m\"Merge PR %s\"" % pn])) | ||
| 27 | except: | ||
| 28 | sys.exit(-1) | ||
diff --git a/.ci/scripts/merge/check-label-presence.py b/.ci/scripts/merge/check-label-presence.py new file mode 100644 index 000000000..048466d7e --- /dev/null +++ b/.ci/scripts/merge/check-label-presence.py | |||
| @@ -0,0 +1,18 @@ | |||
| 1 | # Checks to see if the specified pull request # has the specified tag | ||
| 2 | # Usage: python check-label-presence.py <Pull Request ID> <Name of Label> | ||
| 3 | |||
| 4 | import requests, json, sys | ||
| 5 | |||
| 6 | try: | ||
| 7 | url = 'https://api.github.com/repos/yuzu-emu/yuzu/issues/%s' % sys.argv[1] | ||
| 8 | response = requests.get(url) | ||
| 9 | if (response.ok): | ||
| 10 | j = json.loads(response.content) | ||
| 11 | for label in j["labels"]: | ||
| 12 | if label["name"] == sys.argv[2]: | ||
| 13 | print('##vso[task.setvariable variable=enabletesting;]true') | ||
| 14 | sys.exit() | ||
| 15 | except: | ||
| 16 | sys.exit(-1) | ||
| 17 | |||
| 18 | print('##vso[task.setvariable variable=enabletesting;]false') | ||
diff --git a/.ci/scripts/merge/yuzubot-git-config.sh b/.ci/scripts/merge/yuzubot-git-config.sh new file mode 100644 index 000000000..d9d595bbc --- /dev/null +++ b/.ci/scripts/merge/yuzubot-git-config.sh | |||
| @@ -0,0 +1,2 @@ | |||
| 1 | git config --global user.email "yuzu@yuzu-emu.org" | ||
| 2 | git config --global user.name "yuzubot" \ No newline at end of file | ||
diff --git a/.ci/scripts/windows/docker.sh b/.ci/scripts/windows/docker.sh new file mode 100644 index 000000000..f7093363b --- /dev/null +++ b/.ci/scripts/windows/docker.sh | |||
| @@ -0,0 +1,50 @@ | |||
| 1 | #!/bin/bash -ex | ||
| 2 | |||
| 3 | cd /yuzu | ||
| 4 | |||
| 5 | ccache -s | ||
| 6 | |||
| 7 | # Dirty hack to trick unicorn makefile into believing we are in a MINGW system | ||
| 8 | mv /bin/uname /bin/uname1 && echo -e '#!/bin/sh\necho MINGW64' >> /bin/uname | ||
| 9 | chmod +x /bin/uname | ||
| 10 | |||
| 11 | # Dirty hack to trick unicorn makefile into believing we have cmd | ||
| 12 | echo '' >> /bin/cmd | ||
| 13 | chmod +x /bin/cmd | ||
| 14 | |||
| 15 | mkdir build || true && cd build | ||
| 16 | cmake .. -G Ninja -DCMAKE_TOOLCHAIN_FILE="$(pwd)/../CMakeModules/MinGWCross.cmake" -DUSE_CCACHE=ON -DYUZU_USE_BUNDLED_UNICORN=ON -DENABLE_COMPATIBILITY_LIST_DOWNLOAD=ON -DCMAKE_BUILD_TYPE=Release | ||
| 17 | ninja | ||
| 18 | |||
| 19 | # Clean up the dirty hacks | ||
| 20 | rm /bin/uname && mv /bin/uname1 /bin/uname | ||
| 21 | rm /bin/cmd | ||
| 22 | |||
| 23 | ccache -s | ||
| 24 | |||
| 25 | echo "Tests skipped" | ||
| 26 | #ctest -VV -C Release | ||
| 27 | |||
| 28 | echo 'Prepare binaries...' | ||
| 29 | cd .. | ||
| 30 | mkdir package | ||
| 31 | |||
| 32 | QT_PLATFORM_DLL_PATH='/usr/x86_64-w64-mingw32/lib/qt5/plugins/platforms/' | ||
| 33 | find build/ -name "yuzu*.exe" -exec cp {} 'package' \; | ||
| 34 | |||
| 35 | # copy Qt plugins | ||
| 36 | mkdir package/platforms | ||
| 37 | cp "${QT_PLATFORM_DLL_PATH}/qwindows.dll" package/platforms/ | ||
| 38 | cp -rv "${QT_PLATFORM_DLL_PATH}/../mediaservice/" package/ | ||
| 39 | cp -rv "${QT_PLATFORM_DLL_PATH}/../imageformats/" package/ | ||
| 40 | rm -f package/mediaservice/*d.dll | ||
| 41 | |||
| 42 | for i in package/*.exe; do | ||
| 43 | # we need to process pdb here, however, cv2pdb | ||
| 44 | # does not work here, so we just simply strip all the debug symbols | ||
| 45 | x86_64-w64-mingw32-strip "${i}" | ||
| 46 | done | ||
| 47 | |||
| 48 | pip3 install pefile | ||
| 49 | python3 .ci/scripts/windows/scan_dll.py package/*.exe "package/" | ||
| 50 | python3 .ci/scripts/windows/scan_dll.py package/imageformats/*.dll "package/" | ||
diff --git a/.ci/scripts/windows/exec.sh b/.ci/scripts/windows/exec.sh new file mode 100644 index 000000000..d6a994856 --- /dev/null +++ b/.ci/scripts/windows/exec.sh | |||
| @@ -0,0 +1,5 @@ | |||
| 1 | #!/bin/bash -ex | ||
| 2 | |||
| 3 | mkdir -p "ccache" || true | ||
| 4 | chmod a+x ./.ci/scripts/windows/docker.sh | ||
| 5 | docker run -e CCACHE_DIR=/yuzu/ccache -v $(pwd):/yuzu yuzuemu/build-environments:linux-mingw /bin/bash -ex /yuzu/.ci/scripts/windows/docker.sh | ||
diff --git a/.ci/scripts/windows/scan_dll.py b/.ci/scripts/windows/scan_dll.py new file mode 100644 index 000000000..163183f2e --- /dev/null +++ b/.ci/scripts/windows/scan_dll.py | |||
| @@ -0,0 +1,106 @@ | |||
| 1 | import pefile | ||
| 2 | import sys | ||
| 3 | import re | ||
| 4 | import os | ||
| 5 | import queue | ||
| 6 | import shutil | ||
| 7 | |||
| 8 | # constant definitions | ||
| 9 | KNOWN_SYS_DLLS = ['WINMM.DLL', 'MSVCRT.DLL', 'VERSION.DLL', 'MPR.DLL', | ||
| 10 | 'DWMAPI.DLL', 'UXTHEME.DLL', 'DNSAPI.DLL', 'IPHLPAPI.DLL'] | ||
| 11 | # below is for Ubuntu 18.04 with specified PPA enabled, if you are using | ||
| 12 | # other distro or different repositories, change the following accordingly | ||
| 13 | DLL_PATH = [ | ||
| 14 | '/usr/x86_64-w64-mingw32/bin/', | ||
| 15 | '/usr/x86_64-w64-mingw32/lib/', | ||
| 16 | '/usr/lib/gcc/x86_64-w64-mingw32/7.3-posix/' | ||
| 17 | ] | ||
| 18 | |||
| 19 | missing = [] | ||
| 20 | |||
| 21 | |||
| 22 | def parse_imports(file_name): | ||
| 23 | results = [] | ||
| 24 | pe = pefile.PE(file_name, fast_load=True) | ||
| 25 | pe.parse_data_directories() | ||
| 26 | |||
| 27 | for entry in pe.DIRECTORY_ENTRY_IMPORT: | ||
| 28 | current = entry.dll.decode() | ||
| 29 | current_u = current.upper() # b/c Windows is often case insensitive | ||
| 30 | # here we filter out system dlls | ||
| 31 | # dll w/ names like *32.dll are likely to be system dlls | ||
| 32 | if current_u.upper() not in KNOWN_SYS_DLLS and not re.match(string=current_u, pattern=r'.*32\.DLL'): | ||
| 33 | results.append(current) | ||
| 34 | |||
| 35 | return results | ||
| 36 | |||
| 37 | |||
| 38 | def parse_imports_recursive(file_name, path_list=[]): | ||
| 39 | q = queue.Queue() # create a FIFO queue | ||
| 40 | # file_name can be a string or a list for the convience | ||
| 41 | if isinstance(file_name, str): | ||
| 42 | q.put(file_name) | ||
| 43 | elif isinstance(file_name, list): | ||
| 44 | for i in file_name: | ||
| 45 | q.put(i) | ||
| 46 | full_list = [] | ||
| 47 | while q.qsize(): | ||
| 48 | current = q.get_nowait() | ||
| 49 | print('> %s' % current) | ||
| 50 | deps = parse_imports(current) | ||
| 51 | # if this dll does not have any import, ignore it | ||
| 52 | if not deps: | ||
| 53 | continue | ||
| 54 | for dep in deps: | ||
| 55 | # the dependency already included in the list, skip | ||
| 56 | if dep in full_list: | ||
| 57 | continue | ||
| 58 | # find the requested dll in the provided paths | ||
| 59 | full_path = find_dll(dep) | ||
| 60 | if not full_path: | ||
| 61 | missing.append(dep) | ||
| 62 | continue | ||
| 63 | full_list.append(dep) | ||
| 64 | q.put(full_path) | ||
| 65 | path_list.append(full_path) | ||
| 66 | return full_list | ||
| 67 | |||
| 68 | |||
| 69 | def find_dll(name): | ||
| 70 | for path in DLL_PATH: | ||
| 71 | for root, _, files in os.walk(path): | ||
| 72 | for f in files: | ||
| 73 | if name.lower() == f.lower(): | ||
| 74 | return os.path.join(root, f) | ||
| 75 | |||
| 76 | |||
| 77 | def deploy(name, dst, dry_run=False): | ||
| 78 | dlls_path = [] | ||
| 79 | parse_imports_recursive(name, dlls_path) | ||
| 80 | for dll_entry in dlls_path: | ||
| 81 | if not dry_run: | ||
| 82 | shutil.copy(dll_entry, dst) | ||
| 83 | else: | ||
| 84 | print('[Dry-Run] Copy %s to %s' % (dll_entry, dst)) | ||
| 85 | print('Deploy completed.') | ||
| 86 | return dlls_path | ||
| 87 | |||
| 88 | |||
| 89 | def main(): | ||
| 90 | if len(sys.argv) < 3: | ||
| 91 | print('Usage: %s [files to examine ...] [target deploy directory]') | ||
| 92 | return 1 | ||
| 93 | to_deploy = sys.argv[1:-1] | ||
| 94 | tgt_dir = sys.argv[-1] | ||
| 95 | if not os.path.isdir(tgt_dir): | ||
| 96 | print('%s is not a directory.' % tgt_dir) | ||
| 97 | return 1 | ||
| 98 | print('Scanning dependencies...') | ||
| 99 | deploy(to_deploy, tgt_dir) | ||
| 100 | if missing: | ||
| 101 | print('Following DLLs are not found: %s' % ('\n'.join(missing))) | ||
| 102 | return 0 | ||
| 103 | |||
| 104 | |||
| 105 | if __name__ == '__main__': | ||
| 106 | main() | ||
diff --git a/.ci/scripts/windows/upload.sh b/.ci/scripts/windows/upload.sh new file mode 100644 index 000000000..de73d3541 --- /dev/null +++ b/.ci/scripts/windows/upload.sh | |||
| @@ -0,0 +1,13 @@ | |||
| 1 | #!/bin/bash -ex | ||
| 2 | |||
| 3 | . .ci/scripts/common/pre-upload.sh | ||
| 4 | |||
| 5 | REV_NAME="yuzu-windows-mingw-${GITDATE}-${GITREV}" | ||
| 6 | ARCHIVE_NAME="${REV_NAME}.tar.gz" | ||
| 7 | COMPRESSION_FLAGS="-czvf" | ||
| 8 | |||
| 9 | mkdir "$REV_NAME" | ||
| 10 | # get around the permission issues | ||
| 11 | cp -r package/* "$REV_NAME" | ||
| 12 | |||
| 13 | . .ci/scripts/common/post-upload.sh | ||
diff --git a/.ci/templates/build-single.yml b/.ci/templates/build-single.yml new file mode 100644 index 000000000..357731eb9 --- /dev/null +++ b/.ci/templates/build-single.yml | |||
| @@ -0,0 +1,23 @@ | |||
| 1 | parameters: | ||
| 2 | artifactSource: 'true' | ||
| 3 | cache: 'false' | ||
| 4 | |||
| 5 | steps: | ||
| 6 | - task: DockerInstaller@0 | ||
| 7 | displayName: 'Prepare Environment' | ||
| 8 | inputs: | ||
| 9 | dockerVersion: '17.09.0-ce' | ||
| 10 | - ${{ if eq(parameters.cache, 'true') }}: | ||
| 11 | - task: CacheBeta@0 | ||
| 12 | displayName: 'Cache Build System' | ||
| 13 | inputs: | ||
| 14 | key: yuzu-v1-$(BuildName)-$(BuildSuffix)-$(CacheSuffix) | ||
| 15 | path: $(System.DefaultWorkingDirectory)/ccache | ||
| 16 | cacheHitVar: CACHE_RESTORED | ||
| 17 | - script: chmod a+x ./.ci/scripts/$(ScriptFolder)/exec.sh && ./.ci/scripts/$(ScriptFolder)/exec.sh | ||
| 18 | displayName: 'Build' | ||
| 19 | - script: chmod a+x ./.ci/scripts/$(ScriptFolder)/upload.sh && RELEASE_NAME=$(BuildName) ./.ci/scripts/$(ScriptFolder)/upload.sh | ||
| 20 | displayName: 'Package Artifacts' | ||
| 21 | - publish: artifacts | ||
| 22 | artifact: 'yuzu-$(BuildName)-$(BuildSuffix)' | ||
| 23 | displayName: 'Upload Artifacts' | ||
diff --git a/.ci/templates/build-standard.yml b/.ci/templates/build-standard.yml new file mode 100644 index 000000000..aa180894e --- /dev/null +++ b/.ci/templates/build-standard.yml | |||
| @@ -0,0 +1,23 @@ | |||
| 1 | jobs: | ||
| 2 | - job: build | ||
| 3 | displayName: 'standard' | ||
| 4 | pool: | ||
| 5 | vmImage: ubuntu-latest | ||
| 6 | strategy: | ||
| 7 | maxParallel: 10 | ||
| 8 | matrix: | ||
| 9 | windows: | ||
| 10 | BuildSuffix: 'windows-mingw' | ||
| 11 | ScriptFolder: 'windows' | ||
| 12 | linux: | ||
| 13 | BuildSuffix: 'linux' | ||
| 14 | ScriptFolder: 'linux' | ||
| 15 | steps: | ||
| 16 | - template: ./sync-source.yml | ||
| 17 | parameters: | ||
| 18 | artifactSource: $(parameters.artifactSource) | ||
| 19 | needSubmodules: 'true' | ||
| 20 | - template: ./build-single.yml | ||
| 21 | parameters: | ||
| 22 | artifactSource: 'false' | ||
| 23 | cache: $(parameters.cache) \ No newline at end of file | ||
diff --git a/.ci/templates/build-testing.yml b/.ci/templates/build-testing.yml new file mode 100644 index 000000000..a307addfd --- /dev/null +++ b/.ci/templates/build-testing.yml | |||
| @@ -0,0 +1,33 @@ | |||
| 1 | jobs: | ||
| 2 | - job: build_test | ||
| 3 | displayName: 'testing' | ||
| 4 | pool: | ||
| 5 | vmImage: ubuntu-latest | ||
| 6 | strategy: | ||
| 7 | maxParallel: 5 | ||
| 8 | matrix: | ||
| 9 | windows: | ||
| 10 | BuildSuffix: 'windows-testing' | ||
| 11 | ScriptFolder: 'windows' | ||
| 12 | steps: | ||
| 13 | - script: sudo apt upgrade python3-pip && pip install requests urllib3 | ||
| 14 | displayName: 'Prepare Environment' | ||
| 15 | - task: PythonScript@0 | ||
| 16 | condition: eq(variables['Build.Reason'], 'PullRequest') | ||
| 17 | displayName: 'Determine Testing Status' | ||
| 18 | inputs: | ||
| 19 | scriptSource: 'filePath' | ||
| 20 | scriptPath: '.ci/scripts/merge/check-label-presence.py' | ||
| 21 | arguments: '$(System.PullRequest.PullRequestNumber) create-testing-build' | ||
| 22 | - ${{ if eq(variables.enabletesting, 'true') }}: | ||
| 23 | - template: ./sync-source.yml | ||
| 24 | parameters: | ||
| 25 | artifactSource: $(parameters.artifactSource) | ||
| 26 | needSubmodules: 'true' | ||
| 27 | - template: ./mergebot.yml | ||
| 28 | parameters: | ||
| 29 | matchLabel: 'testing-merge' | ||
| 30 | - template: ./build-single.yml | ||
| 31 | parameters: | ||
| 32 | artifactSource: 'false' | ||
| 33 | cache: 'false' | ||
diff --git a/.ci/templates/format-check.yml b/.ci/templates/format-check.yml new file mode 100644 index 000000000..5061f1cb8 --- /dev/null +++ b/.ci/templates/format-check.yml | |||
| @@ -0,0 +1,14 @@ | |||
| 1 | parameters: | ||
| 2 | artifactSource: 'true' | ||
| 3 | |||
| 4 | steps: | ||
| 5 | - template: ./sync-source.yml | ||
| 6 | parameters: | ||
| 7 | artifactSource: $(parameters.artifactSource) | ||
| 8 | needSubmodules: 'false' | ||
| 9 | - task: DockerInstaller@0 | ||
| 10 | displayName: 'Prepare Environment' | ||
| 11 | inputs: | ||
| 12 | dockerVersion: '17.09.0-ce' | ||
| 13 | - script: chmod a+x ./.ci/scripts/format/exec.sh && ./.ci/scripts/format/exec.sh | ||
| 14 | displayName: 'Verify Formatting' | ||
diff --git a/.ci/templates/merge.yml b/.ci/templates/merge.yml new file mode 100644 index 000000000..efc82778a --- /dev/null +++ b/.ci/templates/merge.yml | |||
| @@ -0,0 +1,46 @@ | |||
| 1 | jobs: | ||
| 2 | - job: merge | ||
| 3 | displayName: 'pull requests' | ||
| 4 | steps: | ||
| 5 | - checkout: self | ||
| 6 | submodules: recursive | ||
| 7 | - template: ./mergebot.yml | ||
| 8 | parameters: | ||
| 9 | matchLabel: '$(BuildName)-merge' | ||
| 10 | - task: ArchiveFiles@2 | ||
| 11 | displayName: 'Package Source' | ||
| 12 | inputs: | ||
| 13 | rootFolderOrFile: '$(System.DefaultWorkingDirectory)' | ||
| 14 | includeRootFolder: false | ||
| 15 | archiveType: '7z' | ||
| 16 | archiveFile: '$(Build.ArtifactStagingDirectory)/yuzu-$(BuildName)-source.7z' | ||
| 17 | - task: PublishPipelineArtifact@1 | ||
| 18 | displayName: 'Upload Artifacts' | ||
| 19 | inputs: | ||
| 20 | targetPath: '$(Build.ArtifactStagingDirectory)/yuzu-$(BuildName)-source.7z' | ||
| 21 | artifact: 'yuzu-$(BuildName)-source' | ||
| 22 | replaceExistingArchive: true | ||
| 23 | - job: upload_source | ||
| 24 | displayName: 'upload' | ||
| 25 | dependsOn: merge | ||
| 26 | steps: | ||
| 27 | - template: ./sync-source.yml | ||
| 28 | parameters: | ||
| 29 | artifactSource: 'true' | ||
| 30 | needSubmodules: 'true' | ||
| 31 | - script: chmod a+x $(System.DefaultWorkingDirectory)/.ci/scripts/merge/yuzubot-git-config.sh && $(System.DefaultWorkingDirectory)/.ci/scripts/merge/yuzubot-git-config.sh | ||
| 32 | displayName: 'Apply Git Configuration' | ||
| 33 | - script: git tag -a $(BuildName)-$(Build.BuildId) -m "yuzu $(BuildName) $(Build.BuildNumber) $(Build.DefinitionName)" | ||
| 34 | displayName: 'Tag Source' | ||
| 35 | - script: git remote add other $(GitRepoPushChangesURL) | ||
| 36 | displayName: 'Register Repository' | ||
| 37 | - script: git push --follow-tags --force other HEAD:$(GitPushBranch) | ||
| 38 | displayName: 'Update Code' | ||
| 39 | - script: git rev-list -n 1 $(BuildName)-$(Build.BuildId) > $(Build.ArtifactStagingDirectory)/tag-commit.sha | ||
| 40 | displayName: 'Calculate Release Point' | ||
| 41 | - task: PublishPipelineArtifact@1 | ||
| 42 | displayName: 'Upload Release Point' | ||
| 43 | inputs: | ||
| 44 | targetPath: '$(Build.ArtifactStagingDirectory)/tag-commit.sha' | ||
| 45 | artifact: 'yuzu-$(BuildName)-release-point' | ||
| 46 | replaceExistingArchive: true \ No newline at end of file | ||
diff --git a/.ci/templates/mergebot.yml b/.ci/templates/mergebot.yml new file mode 100644 index 000000000..5211efcc6 --- /dev/null +++ b/.ci/templates/mergebot.yml | |||
| @@ -0,0 +1,15 @@ | |||
| 1 | parameters: | ||
| 2 | matchLabel: 'dummy-merge' | ||
| 3 | |||
| 4 | steps: | ||
| 5 | - script: mkdir $(System.DefaultWorkingDirectory)/patches && pip install requests urllib3 | ||
| 6 | displayName: 'Prepare Environment' | ||
| 7 | - script: chmod a+x $(System.DefaultWorkingDirectory)/.ci/scripts/merge/yuzubot-git-config.sh && $(System.DefaultWorkingDirectory)/.ci/scripts/merge/yuzubot-git-config.sh | ||
| 8 | displayName: 'Apply Git Configuration' | ||
| 9 | - task: PythonScript@0 | ||
| 10 | displayName: 'Discover, Download, and Apply Patches' | ||
| 11 | inputs: | ||
| 12 | scriptSource: 'filePath' | ||
| 13 | scriptPath: '.ci/scripts/merge/apply-patches-by-label.py' | ||
| 14 | arguments: '${{ parameters.matchLabel }} patches' | ||
| 15 | workingDirectory: '$(System.DefaultWorkingDirectory)' | ||
diff --git a/.ci/templates/retrieve-artifact-source.yml b/.ci/templates/retrieve-artifact-source.yml new file mode 100644 index 000000000..47d217e7b --- /dev/null +++ b/.ci/templates/retrieve-artifact-source.yml | |||
| @@ -0,0 +1,16 @@ | |||
| 1 | steps: | ||
| 2 | - checkout: none | ||
| 3 | - task: DownloadPipelineArtifact@2 | ||
| 4 | displayName: 'Download Source' | ||
| 5 | inputs: | ||
| 6 | artifactName: 'yuzu-$(BuildName)-source' | ||
| 7 | buildType: 'current' | ||
| 8 | targetPath: '$(Build.ArtifactStagingDirectory)' | ||
| 9 | - script: rm -rf $(System.DefaultWorkingDirectory) && mkdir $(System.DefaultWorkingDirectory) | ||
| 10 | displayName: 'Clean Working Directory' | ||
| 11 | - task: ExtractFiles@1 | ||
| 12 | displayName: 'Prepare Source' | ||
| 13 | inputs: | ||
| 14 | archiveFilePatterns: '$(Build.ArtifactStagingDirectory)/*.7z' | ||
| 15 | destinationFolder: '$(System.DefaultWorkingDirectory)' | ||
| 16 | cleanDestinationFolder: false \ No newline at end of file | ||
diff --git a/.ci/templates/retrieve-master-source.yml b/.ci/templates/retrieve-master-source.yml new file mode 100644 index 000000000..a08a3f926 --- /dev/null +++ b/.ci/templates/retrieve-master-source.yml | |||
| @@ -0,0 +1,11 @@ | |||
| 1 | parameters: | ||
| 2 | needSubmodules: 'true' | ||
| 3 | |||
| 4 | steps: | ||
| 5 | - checkout: self | ||
| 6 | displayName: 'Checkout Recursive' | ||
| 7 | submodules: recursive | ||
| 8 | # condition: eq(parameters.needSubmodules, 'true') | ||
| 9 | #- checkout: self | ||
| 10 | # displayName: 'Checkout Fast' | ||
| 11 | # condition: ne(parameters.needSubmodules, 'true') | ||
diff --git a/.ci/templates/sync-source.yml b/.ci/templates/sync-source.yml new file mode 100644 index 000000000..409e1cd83 --- /dev/null +++ b/.ci/templates/sync-source.yml | |||
| @@ -0,0 +1,7 @@ | |||
| 1 | steps: | ||
| 2 | - ${{ if eq(parameters.artifactSource, 'true') }}: | ||
| 3 | - template: ./retrieve-artifact-source.yml | ||
| 4 | - ${{ if ne(parameters.artifactSource, 'true') }}: | ||
| 5 | - template: ./retrieve-master-source.yml | ||
| 6 | parameters: | ||
| 7 | needSubmodules: $(parameters.needSubmodules) \ No newline at end of file | ||
diff --git a/.ci/yuzu-mainline.yml b/.ci/yuzu-mainline.yml new file mode 100644 index 000000000..2930a8564 --- /dev/null +++ b/.ci/yuzu-mainline.yml | |||
| @@ -0,0 +1,25 @@ | |||
| 1 | trigger: | ||
| 2 | - master | ||
| 3 | |||
| 4 | stages: | ||
| 5 | - stage: merge | ||
| 6 | displayName: 'merge' | ||
| 7 | jobs: | ||
| 8 | - template: ./templates/merge.yml | ||
| 9 | - stage: format | ||
| 10 | dependsOn: merge | ||
| 11 | displayName: 'format' | ||
| 12 | jobs: | ||
| 13 | - job: format | ||
| 14 | displayName: 'clang' | ||
| 15 | pool: | ||
| 16 | vmImage: ubuntu-latest | ||
| 17 | steps: | ||
| 18 | - template: ./templates/format-check.yml | ||
| 19 | - stage: build | ||
| 20 | displayName: 'build' | ||
| 21 | dependsOn: format | ||
| 22 | jobs: | ||
| 23 | - template: ./templates/build-standard.yml | ||
| 24 | parameters: | ||
| 25 | cache: 'true' | ||
diff --git a/azure-pipelines.yml b/.ci/yuzu-patreon.yml index aa912913d..aa912913d 100644 --- a/azure-pipelines.yml +++ b/.ci/yuzu-patreon.yml | |||
diff --git a/.ci/yuzu-repo-sync.yml b/.ci/yuzu-repo-sync.yml new file mode 100644 index 000000000..602e298a6 --- /dev/null +++ b/.ci/yuzu-repo-sync.yml | |||
| @@ -0,0 +1,19 @@ | |||
| 1 | trigger: | ||
| 2 | - master | ||
| 3 | |||
| 4 | jobs: | ||
| 5 | - job: copy | ||
| 6 | displayName: 'Sync Repository' | ||
| 7 | pool: | ||
| 8 | vmImage: 'ubuntu-latest' | ||
| 9 | steps: | ||
| 10 | - script: echo 'https://$(GitUsername):$(GitAccessToken)@dev.azure.com' > $HOME/.git-credentials | ||
| 11 | displayName: 'Load Credentials' | ||
| 12 | - script: git config --global credential.helper store | ||
| 13 | displayName: 'Register Credential Helper' | ||
| 14 | - script: git remote add other $(GitRepoPushChangesURL) | ||
| 15 | displayName: 'Register Repository' | ||
| 16 | - script: git push --force other HEAD:$(GitPushBranch) | ||
| 17 | displayName: 'Update Code' | ||
| 18 | - script: rm -rf $HOME/.git-credentials | ||
| 19 | displayName: 'Clear Cached Credentials' | ||
diff --git a/.ci/yuzu-verify.yml b/.ci/yuzu-verify.yml new file mode 100644 index 000000000..5492e696a --- /dev/null +++ b/.ci/yuzu-verify.yml | |||
| @@ -0,0 +1,20 @@ | |||
| 1 | stages: | ||
| 2 | - stage: format | ||
| 3 | displayName: 'format' | ||
| 4 | jobs: | ||
| 5 | - job: format | ||
| 6 | displayName: 'clang' | ||
| 7 | pool: | ||
| 8 | vmImage: ubuntu-latest | ||
| 9 | steps: | ||
| 10 | - template: ./templates/format-check.yml | ||
| 11 | parameters: | ||
| 12 | artifactSource: 'false' | ||
| 13 | - stage: build | ||
| 14 | displayName: 'build' | ||
| 15 | dependsOn: format | ||
| 16 | jobs: | ||
| 17 | - template: ./templates/build-standard.yml | ||
| 18 | parameters: | ||
| 19 | cache: 'false' | ||
| 20 | - template: ./templates/build-testing.yml | ||
diff --git a/CMakeModules/GenerateSCMRev.cmake b/CMakeModules/GenerateSCMRev.cmake index dd65cfe42..a1ace89cb 100644 --- a/CMakeModules/GenerateSCMRev.cmake +++ b/CMakeModules/GenerateSCMRev.cmake | |||
| @@ -81,7 +81,10 @@ set(HASH_FILES | |||
| 81 | "${VIDEO_CORE}/shader/decode/register_set_predicate.cpp" | 81 | "${VIDEO_CORE}/shader/decode/register_set_predicate.cpp" |
| 82 | "${VIDEO_CORE}/shader/decode/shift.cpp" | 82 | "${VIDEO_CORE}/shader/decode/shift.cpp" |
| 83 | "${VIDEO_CORE}/shader/decode/video.cpp" | 83 | "${VIDEO_CORE}/shader/decode/video.cpp" |
| 84 | "${VIDEO_CORE}/shader/decode/warp.cpp" | ||
| 84 | "${VIDEO_CORE}/shader/decode/xmad.cpp" | 85 | "${VIDEO_CORE}/shader/decode/xmad.cpp" |
| 86 | "${VIDEO_CORE}/shader/control_flow.cpp" | ||
| 87 | "${VIDEO_CORE}/shader/control_flow.h" | ||
| 85 | "${VIDEO_CORE}/shader/decode.cpp" | 88 | "${VIDEO_CORE}/shader/decode.cpp" |
| 86 | "${VIDEO_CORE}/shader/node.h" | 89 | "${VIDEO_CORE}/shader/node.h" |
| 87 | "${VIDEO_CORE}/shader/node_helper.cpp" | 90 | "${VIDEO_CORE}/shader/node_helper.cpp" |
| @@ -2,6 +2,7 @@ yuzu emulator | |||
| 2 | ============= | 2 | ============= |
| 3 | [](https://travis-ci.org/yuzu-emu/yuzu) | 3 | [](https://travis-ci.org/yuzu-emu/yuzu) |
| 4 | [](https://ci.appveyor.com/project/bunnei/yuzu) | 4 | [](https://ci.appveyor.com/project/bunnei/yuzu) |
| 5 | [](https://dev.azure.com/yuzu-emu/yuzu/) | ||
| 5 | 6 | ||
| 6 | yuzu is an experimental open-source emulator for the Nintendo Switch from the creators of [Citra](https://citra-emu.org/). | 7 | yuzu is an experimental open-source emulator for the Nintendo Switch from the creators of [Citra](https://citra-emu.org/). |
| 7 | 8 | ||
diff --git a/src/audio_core/audio_renderer.cpp b/src/audio_core/audio_renderer.cpp index 4882a6cd8..da50a0bbc 100644 --- a/src/audio_core/audio_renderer.cpp +++ b/src/audio_core/audio_renderer.cpp | |||
| @@ -73,13 +73,15 @@ private: | |||
| 73 | EffectInStatus info{}; | 73 | EffectInStatus info{}; |
| 74 | }; | 74 | }; |
| 75 | AudioRenderer::AudioRenderer(Core::Timing::CoreTiming& core_timing, AudioRendererParameter params, | 75 | AudioRenderer::AudioRenderer(Core::Timing::CoreTiming& core_timing, AudioRendererParameter params, |
| 76 | Kernel::SharedPtr<Kernel::WritableEvent> buffer_event) | 76 | Kernel::SharedPtr<Kernel::WritableEvent> buffer_event, |
| 77 | std::size_t instance_number) | ||
| 77 | : worker_params{params}, buffer_event{buffer_event}, voices(params.voice_count), | 78 | : worker_params{params}, buffer_event{buffer_event}, voices(params.voice_count), |
| 78 | effects(params.effect_count) { | 79 | effects(params.effect_count) { |
| 79 | 80 | ||
| 80 | audio_out = std::make_unique<AudioCore::AudioOut>(); | 81 | audio_out = std::make_unique<AudioCore::AudioOut>(); |
| 81 | stream = audio_out->OpenStream(core_timing, STREAM_SAMPLE_RATE, STREAM_NUM_CHANNELS, | 82 | stream = audio_out->OpenStream(core_timing, STREAM_SAMPLE_RATE, STREAM_NUM_CHANNELS, |
| 82 | "AudioRenderer", [=]() { buffer_event->Signal(); }); | 83 | fmt::format("AudioRenderer-Instance{}", instance_number), |
| 84 | [=]() { buffer_event->Signal(); }); | ||
| 83 | audio_out->StartStream(stream); | 85 | audio_out->StartStream(stream); |
| 84 | 86 | ||
| 85 | QueueMixedBuffer(0); | 87 | QueueMixedBuffer(0); |
diff --git a/src/audio_core/audio_renderer.h b/src/audio_core/audio_renderer.h index b2e5d336c..45afbe759 100644 --- a/src/audio_core/audio_renderer.h +++ b/src/audio_core/audio_renderer.h | |||
| @@ -215,7 +215,8 @@ static_assert(sizeof(UpdateDataHeader) == 0x40, "UpdateDataHeader has wrong size | |||
| 215 | class AudioRenderer { | 215 | class AudioRenderer { |
| 216 | public: | 216 | public: |
| 217 | AudioRenderer(Core::Timing::CoreTiming& core_timing, AudioRendererParameter params, | 217 | AudioRenderer(Core::Timing::CoreTiming& core_timing, AudioRendererParameter params, |
| 218 | Kernel::SharedPtr<Kernel::WritableEvent> buffer_event); | 218 | Kernel::SharedPtr<Kernel::WritableEvent> buffer_event, |
| 219 | std::size_t instance_number); | ||
| 219 | ~AudioRenderer(); | 220 | ~AudioRenderer(); |
| 220 | 221 | ||
| 221 | std::vector<u8> UpdateAudioRenderer(const std::vector<u8>& input_params); | 222 | std::vector<u8> UpdateAudioRenderer(const std::vector<u8>& input_params); |
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index 2554add28..01abdb3bb 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt | |||
| @@ -55,7 +55,10 @@ add_custom_command(OUTPUT scm_rev.cpp | |||
| 55 | "${VIDEO_CORE}/shader/decode/register_set_predicate.cpp" | 55 | "${VIDEO_CORE}/shader/decode/register_set_predicate.cpp" |
| 56 | "${VIDEO_CORE}/shader/decode/shift.cpp" | 56 | "${VIDEO_CORE}/shader/decode/shift.cpp" |
| 57 | "${VIDEO_CORE}/shader/decode/video.cpp" | 57 | "${VIDEO_CORE}/shader/decode/video.cpp" |
| 58 | "${VIDEO_CORE}/shader/decode/warp.cpp" | ||
| 58 | "${VIDEO_CORE}/shader/decode/xmad.cpp" | 59 | "${VIDEO_CORE}/shader/decode/xmad.cpp" |
| 60 | "${VIDEO_CORE}/shader/control_flow.cpp" | ||
| 61 | "${VIDEO_CORE}/shader/control_flow.h" | ||
| 59 | "${VIDEO_CORE}/shader/decode.cpp" | 62 | "${VIDEO_CORE}/shader/decode.cpp" |
| 60 | "${VIDEO_CORE}/shader/node.h" | 63 | "${VIDEO_CORE}/shader/node.h" |
| 61 | "${VIDEO_CORE}/shader/node_helper.cpp" | 64 | "${VIDEO_CORE}/shader/node_helper.cpp" |
diff --git a/src/common/alignment.h b/src/common/alignment.h index 617b14d9b..88d5d3a65 100644 --- a/src/common/alignment.h +++ b/src/common/alignment.h | |||
| @@ -3,6 +3,7 @@ | |||
| 3 | #pragma once | 3 | #pragma once |
| 4 | 4 | ||
| 5 | #include <cstddef> | 5 | #include <cstddef> |
| 6 | #include <memory> | ||
| 6 | #include <type_traits> | 7 | #include <type_traits> |
| 7 | 8 | ||
| 8 | namespace Common { | 9 | namespace Common { |
| @@ -37,4 +38,63 @@ constexpr bool IsWordAligned(T value) { | |||
| 37 | return (value & 0b11) == 0; | 38 | return (value & 0b11) == 0; |
| 38 | } | 39 | } |
| 39 | 40 | ||
| 41 | template <typename T, std::size_t Align = 16> | ||
| 42 | class AlignmentAllocator { | ||
| 43 | public: | ||
| 44 | using value_type = T; | ||
| 45 | using size_type = std::size_t; | ||
| 46 | using difference_type = std::ptrdiff_t; | ||
| 47 | |||
| 48 | using pointer = T*; | ||
| 49 | using const_pointer = const T*; | ||
| 50 | |||
| 51 | using reference = T&; | ||
| 52 | using const_reference = const T&; | ||
| 53 | |||
| 54 | public: | ||
| 55 | pointer address(reference r) noexcept { | ||
| 56 | return std::addressof(r); | ||
| 57 | } | ||
| 58 | |||
| 59 | const_pointer address(const_reference r) const noexcept { | ||
| 60 | return std::addressof(r); | ||
| 61 | } | ||
| 62 | |||
| 63 | pointer allocate(size_type n) { | ||
| 64 | return static_cast<pointer>(::operator new (n, std::align_val_t{Align})); | ||
| 65 | } | ||
| 66 | |||
| 67 | void deallocate(pointer p, size_type) { | ||
| 68 | ::operator delete (p, std::align_val_t{Align}); | ||
| 69 | } | ||
| 70 | |||
| 71 | void construct(pointer p, const value_type& wert) { | ||
| 72 | new (p) value_type(wert); | ||
| 73 | } | ||
| 74 | |||
| 75 | void destroy(pointer p) { | ||
| 76 | p->~value_type(); | ||
| 77 | } | ||
| 78 | |||
| 79 | size_type max_size() const noexcept { | ||
| 80 | return size_type(-1) / sizeof(value_type); | ||
| 81 | } | ||
| 82 | |||
| 83 | template <typename T2> | ||
| 84 | struct rebind { | ||
| 85 | using other = AlignmentAllocator<T2, Align>; | ||
| 86 | }; | ||
| 87 | |||
| 88 | bool operator!=(const AlignmentAllocator<T, Align>& other) const noexcept { | ||
| 89 | return !(*this == other); | ||
| 90 | } | ||
| 91 | |||
| 92 | // Returns true if and only if storage allocated from *this | ||
| 93 | // can be deallocated from other, and vice versa. | ||
| 94 | // Always returns true for stateless allocators. | ||
| 95 | bool operator==(const AlignmentAllocator<T, Align>& other) const noexcept { | ||
| 96 | return true; | ||
| 97 | } | ||
| 98 | }; | ||
| 99 | |||
| 40 | } // namespace Common | 100 | } // namespace Common |
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index f4325f0f8..5462decee 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt | |||
| @@ -111,6 +111,8 @@ add_library(core STATIC | |||
| 111 | frontend/scope_acquire_window_context.h | 111 | frontend/scope_acquire_window_context.h |
| 112 | gdbstub/gdbstub.cpp | 112 | gdbstub/gdbstub.cpp |
| 113 | gdbstub/gdbstub.h | 113 | gdbstub/gdbstub.h |
| 114 | hardware_interrupt_manager.cpp | ||
| 115 | hardware_interrupt_manager.h | ||
| 114 | hle/ipc.h | 116 | hle/ipc.h |
| 115 | hle/ipc_helpers.h | 117 | hle/ipc_helpers.h |
| 116 | hle/kernel/address_arbiter.cpp | 118 | hle/kernel/address_arbiter.cpp |
| @@ -372,6 +374,7 @@ add_library(core STATIC | |||
| 372 | hle/service/nvdrv/devices/nvmap.h | 374 | hle/service/nvdrv/devices/nvmap.h |
| 373 | hle/service/nvdrv/interface.cpp | 375 | hle/service/nvdrv/interface.cpp |
| 374 | hle/service/nvdrv/interface.h | 376 | hle/service/nvdrv/interface.h |
| 377 | hle/service/nvdrv/nvdata.h | ||
| 375 | hle/service/nvdrv/nvdrv.cpp | 378 | hle/service/nvdrv/nvdrv.cpp |
| 376 | hle/service/nvdrv/nvdrv.h | 379 | hle/service/nvdrv/nvdrv.h |
| 377 | hle/service/nvdrv/nvmemp.cpp | 380 | hle/service/nvdrv/nvmemp.cpp |
diff --git a/src/core/arm/arm_interface.h b/src/core/arm/arm_interface.h index c6691a8e1..45e94e625 100644 --- a/src/core/arm/arm_interface.h +++ b/src/core/arm/arm_interface.h | |||
| @@ -44,13 +44,6 @@ public: | |||
| 44 | /// Step CPU by one instruction | 44 | /// Step CPU by one instruction |
| 45 | virtual void Step() = 0; | 45 | virtual void Step() = 0; |
| 46 | 46 | ||
| 47 | /// Maps a backing memory region for the CPU | ||
| 48 | virtual void MapBackingMemory(VAddr address, std::size_t size, u8* memory, | ||
| 49 | Kernel::VMAPermission perms) = 0; | ||
| 50 | |||
| 51 | /// Unmaps a region of memory that was previously mapped using MapBackingMemory | ||
| 52 | virtual void UnmapMemory(VAddr address, std::size_t size) = 0; | ||
| 53 | |||
| 54 | /// Clear all instruction cache | 47 | /// Clear all instruction cache |
| 55 | virtual void ClearInstructionCache() = 0; | 48 | virtual void ClearInstructionCache() = 0; |
| 56 | 49 | ||
diff --git a/src/core/arm/dynarmic/arm_dynarmic.cpp b/src/core/arm/dynarmic/arm_dynarmic.cpp index 44307fa19..f1506b372 100644 --- a/src/core/arm/dynarmic/arm_dynarmic.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic.cpp | |||
| @@ -177,15 +177,6 @@ ARM_Dynarmic::ARM_Dynarmic(System& system, ExclusiveMonitor& exclusive_monitor, | |||
| 177 | 177 | ||
| 178 | ARM_Dynarmic::~ARM_Dynarmic() = default; | 178 | ARM_Dynarmic::~ARM_Dynarmic() = default; |
| 179 | 179 | ||
| 180 | void ARM_Dynarmic::MapBackingMemory(u64 address, std::size_t size, u8* memory, | ||
| 181 | Kernel::VMAPermission perms) { | ||
| 182 | inner_unicorn.MapBackingMemory(address, size, memory, perms); | ||
| 183 | } | ||
| 184 | |||
| 185 | void ARM_Dynarmic::UnmapMemory(u64 address, std::size_t size) { | ||
| 186 | inner_unicorn.UnmapMemory(address, size); | ||
| 187 | } | ||
| 188 | |||
| 189 | void ARM_Dynarmic::SetPC(u64 pc) { | 180 | void ARM_Dynarmic::SetPC(u64 pc) { |
| 190 | jit->SetPC(pc); | 181 | jit->SetPC(pc); |
| 191 | } | 182 | } |
diff --git a/src/core/arm/dynarmic/arm_dynarmic.h b/src/core/arm/dynarmic/arm_dynarmic.h index b701e97a3..504d46c68 100644 --- a/src/core/arm/dynarmic/arm_dynarmic.h +++ b/src/core/arm/dynarmic/arm_dynarmic.h | |||
| @@ -23,9 +23,6 @@ public: | |||
| 23 | ARM_Dynarmic(System& system, ExclusiveMonitor& exclusive_monitor, std::size_t core_index); | 23 | ARM_Dynarmic(System& system, ExclusiveMonitor& exclusive_monitor, std::size_t core_index); |
| 24 | ~ARM_Dynarmic() override; | 24 | ~ARM_Dynarmic() override; |
| 25 | 25 | ||
| 26 | void MapBackingMemory(VAddr address, std::size_t size, u8* memory, | ||
| 27 | Kernel::VMAPermission perms) override; | ||
| 28 | void UnmapMemory(u64 address, std::size_t size) override; | ||
| 29 | void SetPC(u64 pc) override; | 26 | void SetPC(u64 pc) override; |
| 30 | u64 GetPC() const override; | 27 | u64 GetPC() const override; |
| 31 | u64 GetReg(int index) const override; | 28 | u64 GetReg(int index) const override; |
diff --git a/src/core/arm/unicorn/arm_unicorn.cpp b/src/core/arm/unicorn/arm_unicorn.cpp index 4e07fe8b5..97d5c2a8a 100644 --- a/src/core/arm/unicorn/arm_unicorn.cpp +++ b/src/core/arm/unicorn/arm_unicorn.cpp | |||
| @@ -50,11 +50,14 @@ static void CodeHook(uc_engine* uc, uint64_t address, uint32_t size, void* user_ | |||
| 50 | 50 | ||
| 51 | static bool UnmappedMemoryHook(uc_engine* uc, uc_mem_type type, u64 addr, int size, u64 value, | 51 | static bool UnmappedMemoryHook(uc_engine* uc, uc_mem_type type, u64 addr, int size, u64 value, |
| 52 | void* user_data) { | 52 | void* user_data) { |
| 53 | auto* const system = static_cast<System*>(user_data); | ||
| 54 | |||
| 53 | ARM_Interface::ThreadContext ctx{}; | 55 | ARM_Interface::ThreadContext ctx{}; |
| 54 | Core::CurrentArmInterface().SaveContext(ctx); | 56 | system->CurrentArmInterface().SaveContext(ctx); |
| 55 | ASSERT_MSG(false, "Attempted to read from unmapped memory: 0x{:X}, pc=0x{:X}, lr=0x{:X}", addr, | 57 | ASSERT_MSG(false, "Attempted to read from unmapped memory: 0x{:X}, pc=0x{:X}, lr=0x{:X}", addr, |
| 56 | ctx.pc, ctx.cpu_registers[30]); | 58 | ctx.pc, ctx.cpu_registers[30]); |
| 57 | return {}; | 59 | |
| 60 | return false; | ||
| 58 | } | 61 | } |
| 59 | 62 | ||
| 60 | ARM_Unicorn::ARM_Unicorn(System& system) : system{system} { | 63 | ARM_Unicorn::ARM_Unicorn(System& system) : system{system} { |
| @@ -65,7 +68,7 @@ ARM_Unicorn::ARM_Unicorn(System& system) : system{system} { | |||
| 65 | 68 | ||
| 66 | uc_hook hook{}; | 69 | uc_hook hook{}; |
| 67 | CHECKED(uc_hook_add(uc, &hook, UC_HOOK_INTR, (void*)InterruptHook, this, 0, -1)); | 70 | CHECKED(uc_hook_add(uc, &hook, UC_HOOK_INTR, (void*)InterruptHook, this, 0, -1)); |
| 68 | CHECKED(uc_hook_add(uc, &hook, UC_HOOK_MEM_INVALID, (void*)UnmappedMemoryHook, this, 0, -1)); | 71 | CHECKED(uc_hook_add(uc, &hook, UC_HOOK_MEM_INVALID, (void*)UnmappedMemoryHook, &system, 0, -1)); |
| 69 | if (GDBStub::IsServerEnabled()) { | 72 | if (GDBStub::IsServerEnabled()) { |
| 70 | CHECKED(uc_hook_add(uc, &hook, UC_HOOK_CODE, (void*)CodeHook, this, 0, -1)); | 73 | CHECKED(uc_hook_add(uc, &hook, UC_HOOK_CODE, (void*)CodeHook, this, 0, -1)); |
| 71 | last_bkpt_hit = false; | 74 | last_bkpt_hit = false; |
| @@ -76,15 +79,6 @@ ARM_Unicorn::~ARM_Unicorn() { | |||
| 76 | CHECKED(uc_close(uc)); | 79 | CHECKED(uc_close(uc)); |
| 77 | } | 80 | } |
| 78 | 81 | ||
| 79 | void ARM_Unicorn::MapBackingMemory(VAddr address, std::size_t size, u8* memory, | ||
| 80 | Kernel::VMAPermission perms) { | ||
| 81 | CHECKED(uc_mem_map_ptr(uc, address, size, static_cast<u32>(perms), memory)); | ||
| 82 | } | ||
| 83 | |||
| 84 | void ARM_Unicorn::UnmapMemory(VAddr address, std::size_t size) { | ||
| 85 | CHECKED(uc_mem_unmap(uc, address, size)); | ||
| 86 | } | ||
| 87 | |||
| 88 | void ARM_Unicorn::SetPC(u64 pc) { | 82 | void ARM_Unicorn::SetPC(u64 pc) { |
| 89 | CHECKED(uc_reg_write(uc, UC_ARM64_REG_PC, &pc)); | 83 | CHECKED(uc_reg_write(uc, UC_ARM64_REG_PC, &pc)); |
| 90 | } | 84 | } |
diff --git a/src/core/arm/unicorn/arm_unicorn.h b/src/core/arm/unicorn/arm_unicorn.h index 34e974b4d..fe2ffd70c 100644 --- a/src/core/arm/unicorn/arm_unicorn.h +++ b/src/core/arm/unicorn/arm_unicorn.h | |||
| @@ -18,9 +18,6 @@ public: | |||
| 18 | explicit ARM_Unicorn(System& system); | 18 | explicit ARM_Unicorn(System& system); |
| 19 | ~ARM_Unicorn() override; | 19 | ~ARM_Unicorn() override; |
| 20 | 20 | ||
| 21 | void MapBackingMemory(VAddr address, std::size_t size, u8* memory, | ||
| 22 | Kernel::VMAPermission perms) override; | ||
| 23 | void UnmapMemory(VAddr address, std::size_t size) override; | ||
| 24 | void SetPC(u64 pc) override; | 21 | void SetPC(u64 pc) override; |
| 25 | u64 GetPC() const override; | 22 | u64 GetPC() const override; |
| 26 | u64 GetReg(int index) const override; | 23 | u64 GetReg(int index) const override; |
diff --git a/src/core/core.cpp b/src/core/core.cpp index 4aceee785..20d64f3b0 100644 --- a/src/core/core.cpp +++ b/src/core/core.cpp | |||
| @@ -19,6 +19,7 @@ | |||
| 19 | #include "core/file_sys/vfs_concat.h" | 19 | #include "core/file_sys/vfs_concat.h" |
| 20 | #include "core/file_sys/vfs_real.h" | 20 | #include "core/file_sys/vfs_real.h" |
| 21 | #include "core/gdbstub/gdbstub.h" | 21 | #include "core/gdbstub/gdbstub.h" |
| 22 | #include "core/hardware_interrupt_manager.h" | ||
| 22 | #include "core/hle/kernel/client_port.h" | 23 | #include "core/hle/kernel/client_port.h" |
| 23 | #include "core/hle/kernel/kernel.h" | 24 | #include "core/hle/kernel/kernel.h" |
| 24 | #include "core/hle/kernel/process.h" | 25 | #include "core/hle/kernel/process.h" |
| @@ -151,7 +152,7 @@ struct System::Impl { | |||
| 151 | if (!renderer->Init()) { | 152 | if (!renderer->Init()) { |
| 152 | return ResultStatus::ErrorVideoCore; | 153 | return ResultStatus::ErrorVideoCore; |
| 153 | } | 154 | } |
| 154 | 155 | interrupt_manager = std::make_unique<Core::Hardware::InterruptManager>(system); | |
| 155 | gpu_core = VideoCore::CreateGPU(system); | 156 | gpu_core = VideoCore::CreateGPU(system); |
| 156 | 157 | ||
| 157 | is_powered_on = true; | 158 | is_powered_on = true; |
| @@ -298,6 +299,7 @@ struct System::Impl { | |||
| 298 | std::unique_ptr<VideoCore::RendererBase> renderer; | 299 | std::unique_ptr<VideoCore::RendererBase> renderer; |
| 299 | std::unique_ptr<Tegra::GPU> gpu_core; | 300 | std::unique_ptr<Tegra::GPU> gpu_core; |
| 300 | std::shared_ptr<Tegra::DebugContext> debug_context; | 301 | std::shared_ptr<Tegra::DebugContext> debug_context; |
| 302 | std::unique_ptr<Core::Hardware::InterruptManager> interrupt_manager; | ||
| 301 | CpuCoreManager cpu_core_manager; | 303 | CpuCoreManager cpu_core_manager; |
| 302 | bool is_powered_on = false; | 304 | bool is_powered_on = false; |
| 303 | 305 | ||
| @@ -444,6 +446,14 @@ const Tegra::GPU& System::GPU() const { | |||
| 444 | return *impl->gpu_core; | 446 | return *impl->gpu_core; |
| 445 | } | 447 | } |
| 446 | 448 | ||
| 449 | Core::Hardware::InterruptManager& System::InterruptManager() { | ||
| 450 | return *impl->interrupt_manager; | ||
| 451 | } | ||
| 452 | |||
| 453 | const Core::Hardware::InterruptManager& System::InterruptManager() const { | ||
| 454 | return *impl->interrupt_manager; | ||
| 455 | } | ||
| 456 | |||
| 447 | VideoCore::RendererBase& System::Renderer() { | 457 | VideoCore::RendererBase& System::Renderer() { |
| 448 | return *impl->renderer; | 458 | return *impl->renderer; |
| 449 | } | 459 | } |
diff --git a/src/core/core.h b/src/core/core.h index 11e73278e..0138d93b0 100644 --- a/src/core/core.h +++ b/src/core/core.h | |||
| @@ -70,6 +70,10 @@ namespace Core::Timing { | |||
| 70 | class CoreTiming; | 70 | class CoreTiming; |
| 71 | } | 71 | } |
| 72 | 72 | ||
| 73 | namespace Core::Hardware { | ||
| 74 | class InterruptManager; | ||
| 75 | } | ||
| 76 | |||
| 73 | namespace Core { | 77 | namespace Core { |
| 74 | 78 | ||
| 75 | class ARM_Interface; | 79 | class ARM_Interface; |
| @@ -234,6 +238,12 @@ public: | |||
| 234 | /// Provides a constant reference to the core timing instance. | 238 | /// Provides a constant reference to the core timing instance. |
| 235 | const Timing::CoreTiming& CoreTiming() const; | 239 | const Timing::CoreTiming& CoreTiming() const; |
| 236 | 240 | ||
| 241 | /// Provides a reference to the interrupt manager instance. | ||
| 242 | Core::Hardware::InterruptManager& InterruptManager(); | ||
| 243 | |||
| 244 | /// Provides a constant reference to the interrupt manager instance. | ||
| 245 | const Core::Hardware::InterruptManager& InterruptManager() const; | ||
| 246 | |||
| 237 | /// Provides a reference to the kernel instance. | 247 | /// Provides a reference to the kernel instance. |
| 238 | Kernel::KernelCore& Kernel(); | 248 | Kernel::KernelCore& Kernel(); |
| 239 | 249 | ||
| @@ -327,10 +337,6 @@ private: | |||
| 327 | static System s_instance; | 337 | static System s_instance; |
| 328 | }; | 338 | }; |
| 329 | 339 | ||
| 330 | inline ARM_Interface& CurrentArmInterface() { | ||
| 331 | return System::GetInstance().CurrentArmInterface(); | ||
| 332 | } | ||
| 333 | |||
| 334 | inline Kernel::Process* CurrentProcess() { | 340 | inline Kernel::Process* CurrentProcess() { |
| 335 | return System::GetInstance().CurrentProcess(); | 341 | return System::GetInstance().CurrentProcess(); |
| 336 | } | 342 | } |
diff --git a/src/core/core_cpu.cpp b/src/core/core_cpu.cpp index 99b7d387d..21c410e34 100644 --- a/src/core/core_cpu.cpp +++ b/src/core/core_cpu.cpp | |||
| @@ -53,16 +53,12 @@ bool CpuBarrier::Rendezvous() { | |||
| 53 | Cpu::Cpu(System& system, ExclusiveMonitor& exclusive_monitor, CpuBarrier& cpu_barrier, | 53 | Cpu::Cpu(System& system, ExclusiveMonitor& exclusive_monitor, CpuBarrier& cpu_barrier, |
| 54 | std::size_t core_index) | 54 | std::size_t core_index) |
| 55 | : cpu_barrier{cpu_barrier}, core_timing{system.CoreTiming()}, core_index{core_index} { | 55 | : cpu_barrier{cpu_barrier}, core_timing{system.CoreTiming()}, core_index{core_index} { |
| 56 | if (Settings::values.cpu_jit_enabled) { | ||
| 57 | #ifdef ARCHITECTURE_x86_64 | 56 | #ifdef ARCHITECTURE_x86_64 |
| 58 | arm_interface = std::make_unique<ARM_Dynarmic>(system, exclusive_monitor, core_index); | 57 | arm_interface = std::make_unique<ARM_Dynarmic>(system, exclusive_monitor, core_index); |
| 59 | #else | 58 | #else |
| 60 | arm_interface = std::make_unique<ARM_Unicorn>(system); | 59 | arm_interface = std::make_unique<ARM_Unicorn>(system); |
| 61 | LOG_WARNING(Core, "CPU JIT requested, but Dynarmic not available"); | 60 | LOG_WARNING(Core, "CPU JIT requested, but Dynarmic not available"); |
| 62 | #endif | 61 | #endif |
| 63 | } else { | ||
| 64 | arm_interface = std::make_unique<ARM_Unicorn>(system); | ||
| 65 | } | ||
| 66 | 62 | ||
| 67 | scheduler = std::make_unique<Kernel::Scheduler>(system, *arm_interface); | 63 | scheduler = std::make_unique<Kernel::Scheduler>(system, *arm_interface); |
| 68 | } | 64 | } |
| @@ -70,15 +66,12 @@ Cpu::Cpu(System& system, ExclusiveMonitor& exclusive_monitor, CpuBarrier& cpu_ba | |||
| 70 | Cpu::~Cpu() = default; | 66 | Cpu::~Cpu() = default; |
| 71 | 67 | ||
| 72 | std::unique_ptr<ExclusiveMonitor> Cpu::MakeExclusiveMonitor(std::size_t num_cores) { | 68 | std::unique_ptr<ExclusiveMonitor> Cpu::MakeExclusiveMonitor(std::size_t num_cores) { |
| 73 | if (Settings::values.cpu_jit_enabled) { | ||
| 74 | #ifdef ARCHITECTURE_x86_64 | 69 | #ifdef ARCHITECTURE_x86_64 |
| 75 | return std::make_unique<DynarmicExclusiveMonitor>(num_cores); | 70 | return std::make_unique<DynarmicExclusiveMonitor>(num_cores); |
| 76 | #else | 71 | #else |
| 77 | return nullptr; // TODO(merry): Passthrough exclusive monitor | 72 | // TODO(merry): Passthrough exclusive monitor |
| 73 | return nullptr; | ||
| 78 | #endif | 74 | #endif |
| 79 | } else { | ||
| 80 | return nullptr; // TODO(merry): Passthrough exclusive monitor | ||
| 81 | } | ||
| 82 | } | 75 | } |
| 83 | 76 | ||
| 84 | void Cpu::RunLoop(bool tight_loop) { | 77 | void Cpu::RunLoop(bool tight_loop) { |
diff --git a/src/core/file_sys/program_metadata.cpp b/src/core/file_sys/program_metadata.cpp index eb76174c5..7310b3602 100644 --- a/src/core/file_sys/program_metadata.cpp +++ b/src/core/file_sys/program_metadata.cpp | |||
| @@ -94,6 +94,10 @@ u64 ProgramMetadata::GetFilesystemPermissions() const { | |||
| 94 | return aci_file_access.permissions; | 94 | return aci_file_access.permissions; |
| 95 | } | 95 | } |
| 96 | 96 | ||
| 97 | u32 ProgramMetadata::GetSystemResourceSize() const { | ||
| 98 | return npdm_header.system_resource_size; | ||
| 99 | } | ||
| 100 | |||
| 97 | const ProgramMetadata::KernelCapabilityDescriptors& ProgramMetadata::GetKernelCapabilities() const { | 101 | const ProgramMetadata::KernelCapabilityDescriptors& ProgramMetadata::GetKernelCapabilities() const { |
| 98 | return aci_kernel_capabilities; | 102 | return aci_kernel_capabilities; |
| 99 | } | 103 | } |
diff --git a/src/core/file_sys/program_metadata.h b/src/core/file_sys/program_metadata.h index 43bf2820a..88ec97d85 100644 --- a/src/core/file_sys/program_metadata.h +++ b/src/core/file_sys/program_metadata.h | |||
| @@ -58,6 +58,7 @@ public: | |||
| 58 | u32 GetMainThreadStackSize() const; | 58 | u32 GetMainThreadStackSize() const; |
| 59 | u64 GetTitleID() const; | 59 | u64 GetTitleID() const; |
| 60 | u64 GetFilesystemPermissions() const; | 60 | u64 GetFilesystemPermissions() const; |
| 61 | u32 GetSystemResourceSize() const; | ||
| 61 | const KernelCapabilityDescriptors& GetKernelCapabilities() const; | 62 | const KernelCapabilityDescriptors& GetKernelCapabilities() const; |
| 62 | 63 | ||
| 63 | void Print() const; | 64 | void Print() const; |
| @@ -76,7 +77,8 @@ private: | |||
| 76 | u8 reserved_3; | 77 | u8 reserved_3; |
| 77 | u8 main_thread_priority; | 78 | u8 main_thread_priority; |
| 78 | u8 main_thread_cpu; | 79 | u8 main_thread_cpu; |
| 79 | std::array<u8, 8> reserved_4; | 80 | std::array<u8, 4> reserved_4; |
| 81 | u32_le system_resource_size; | ||
| 80 | u32_le process_category; | 82 | u32_le process_category; |
| 81 | u32_le main_stack_size; | 83 | u32_le main_stack_size; |
| 82 | std::array<u8, 0x10> application_name; | 84 | std::array<u8, 0x10> application_name; |
diff --git a/src/core/hardware_interrupt_manager.cpp b/src/core/hardware_interrupt_manager.cpp new file mode 100644 index 000000000..c2115db2d --- /dev/null +++ b/src/core/hardware_interrupt_manager.cpp | |||
| @@ -0,0 +1,30 @@ | |||
| 1 | // Copyright 2019 Yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "core/core.h" | ||
| 6 | #include "core/core_timing.h" | ||
| 7 | #include "core/hardware_interrupt_manager.h" | ||
| 8 | #include "core/hle/service/nvdrv/interface.h" | ||
| 9 | #include "core/hle/service/sm/sm.h" | ||
| 10 | |||
| 11 | namespace Core::Hardware { | ||
| 12 | |||
| 13 | InterruptManager::InterruptManager(Core::System& system_in) : system(system_in) { | ||
| 14 | gpu_interrupt_event = | ||
| 15 | system.CoreTiming().RegisterEvent("GPUInterrupt", [this](u64 message, s64) { | ||
| 16 | auto nvdrv = system.ServiceManager().GetService<Service::Nvidia::NVDRV>("nvdrv"); | ||
| 17 | const u32 syncpt = static_cast<u32>(message >> 32); | ||
| 18 | const u32 value = static_cast<u32>(message); | ||
| 19 | nvdrv->SignalGPUInterruptSyncpt(syncpt, value); | ||
| 20 | }); | ||
| 21 | } | ||
| 22 | |||
| 23 | InterruptManager::~InterruptManager() = default; | ||
| 24 | |||
| 25 | void InterruptManager::GPUInterruptSyncpt(const u32 syncpoint_id, const u32 value) { | ||
| 26 | const u64 msg = (static_cast<u64>(syncpoint_id) << 32ULL) | value; | ||
| 27 | system.CoreTiming().ScheduleEvent(10, gpu_interrupt_event, msg); | ||
| 28 | } | ||
| 29 | |||
| 30 | } // namespace Core::Hardware | ||
diff --git a/src/core/hardware_interrupt_manager.h b/src/core/hardware_interrupt_manager.h new file mode 100644 index 000000000..494db883a --- /dev/null +++ b/src/core/hardware_interrupt_manager.h | |||
| @@ -0,0 +1,31 @@ | |||
| 1 | // Copyright 2019 Yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "common/common_types.h" | ||
| 8 | |||
| 9 | namespace Core { | ||
| 10 | class System; | ||
| 11 | } | ||
| 12 | |||
| 13 | namespace Core::Timing { | ||
| 14 | struct EventType; | ||
| 15 | } | ||
| 16 | |||
| 17 | namespace Core::Hardware { | ||
| 18 | |||
| 19 | class InterruptManager { | ||
| 20 | public: | ||
| 21 | explicit InterruptManager(Core::System& system); | ||
| 22 | ~InterruptManager(); | ||
| 23 | |||
| 24 | void GPUInterruptSyncpt(u32 syncpoint_id, u32 value); | ||
| 25 | |||
| 26 | private: | ||
| 27 | Core::System& system; | ||
| 28 | Core::Timing::EventType* gpu_interrupt_event{}; | ||
| 29 | }; | ||
| 30 | |||
| 31 | } // namespace Core::Hardware | ||
diff --git a/src/core/hle/kernel/code_set.h b/src/core/hle/kernel/code_set.h index 879957dcb..d8ad54030 100644 --- a/src/core/hle/kernel/code_set.h +++ b/src/core/hle/kernel/code_set.h | |||
| @@ -8,6 +8,7 @@ | |||
| 8 | #include <vector> | 8 | #include <vector> |
| 9 | 9 | ||
| 10 | #include "common/common_types.h" | 10 | #include "common/common_types.h" |
| 11 | #include "core/hle/kernel/physical_memory.h" | ||
| 11 | 12 | ||
| 12 | namespace Kernel { | 13 | namespace Kernel { |
| 13 | 14 | ||
| @@ -77,7 +78,7 @@ struct CodeSet final { | |||
| 77 | } | 78 | } |
| 78 | 79 | ||
| 79 | /// The overall data that backs this code set. | 80 | /// The overall data that backs this code set. |
| 80 | std::vector<u8> memory; | 81 | Kernel::PhysicalMemory memory; |
| 81 | 82 | ||
| 82 | /// The segments that comprise this code set. | 83 | /// The segments that comprise this code set. |
| 83 | std::array<Segment, 3> segments; | 84 | std::array<Segment, 3> segments; |
diff --git a/src/core/hle/kernel/physical_memory.h b/src/core/hle/kernel/physical_memory.h new file mode 100644 index 000000000..090565310 --- /dev/null +++ b/src/core/hle/kernel/physical_memory.h | |||
| @@ -0,0 +1,19 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "common/alignment.h" | ||
| 8 | |||
| 9 | namespace Kernel { | ||
| 10 | |||
| 11 | // This encapsulation serves 2 purposes: | ||
| 12 | // - First, to encapsulate host physical memory under a single type and set an | ||
| 13 | // standard for managing it. | ||
| 14 | // - Second to ensure all host backing memory used is aligned to 256 bytes due | ||
| 15 | // to strict alignment restrictions on GPU memory. | ||
| 16 | |||
| 17 | using PhysicalMemory = std::vector<u8, Common::AlignmentAllocator<u8, 256>>; | ||
| 18 | |||
| 19 | } // namespace Kernel | ||
diff --git a/src/core/hle/kernel/process.cpp b/src/core/hle/kernel/process.cpp index f45ef05f6..e80a12ac3 100644 --- a/src/core/hle/kernel/process.cpp +++ b/src/core/hle/kernel/process.cpp | |||
| @@ -129,20 +129,17 @@ u64 Process::GetTotalPhysicalMemoryAvailable() const { | |||
| 129 | return vm_manager.GetTotalPhysicalMemoryAvailable(); | 129 | return vm_manager.GetTotalPhysicalMemoryAvailable(); |
| 130 | } | 130 | } |
| 131 | 131 | ||
| 132 | u64 Process::GetTotalPhysicalMemoryAvailableWithoutMmHeap() const { | 132 | u64 Process::GetTotalPhysicalMemoryAvailableWithoutSystemResource() const { |
| 133 | // TODO: Subtract the personal heap size from this when the | 133 | return GetTotalPhysicalMemoryAvailable() - GetSystemResourceSize(); |
| 134 | // personal heap is implemented. | ||
| 135 | return GetTotalPhysicalMemoryAvailable(); | ||
| 136 | } | 134 | } |
| 137 | 135 | ||
| 138 | u64 Process::GetTotalPhysicalMemoryUsed() const { | 136 | u64 Process::GetTotalPhysicalMemoryUsed() const { |
| 139 | return vm_manager.GetCurrentHeapSize() + main_thread_stack_size + code_memory_size; | 137 | return vm_manager.GetCurrentHeapSize() + main_thread_stack_size + code_memory_size + |
| 138 | GetSystemResourceUsage(); | ||
| 140 | } | 139 | } |
| 141 | 140 | ||
| 142 | u64 Process::GetTotalPhysicalMemoryUsedWithoutMmHeap() const { | 141 | u64 Process::GetTotalPhysicalMemoryUsedWithoutSystemResource() const { |
| 143 | // TODO: Subtract the personal heap size from this when the | 142 | return GetTotalPhysicalMemoryUsed() - GetSystemResourceUsage(); |
| 144 | // personal heap is implemented. | ||
| 145 | return GetTotalPhysicalMemoryUsed(); | ||
| 146 | } | 143 | } |
| 147 | 144 | ||
| 148 | void Process::RegisterThread(const Thread* thread) { | 145 | void Process::RegisterThread(const Thread* thread) { |
| @@ -172,6 +169,7 @@ ResultCode Process::LoadFromMetadata(const FileSys::ProgramMetadata& metadata) { | |||
| 172 | program_id = metadata.GetTitleID(); | 169 | program_id = metadata.GetTitleID(); |
| 173 | ideal_core = metadata.GetMainThreadCore(); | 170 | ideal_core = metadata.GetMainThreadCore(); |
| 174 | is_64bit_process = metadata.Is64BitProgram(); | 171 | is_64bit_process = metadata.Is64BitProgram(); |
| 172 | system_resource_size = metadata.GetSystemResourceSize(); | ||
| 175 | 173 | ||
| 176 | vm_manager.Reset(metadata.GetAddressSpaceType()); | 174 | vm_manager.Reset(metadata.GetAddressSpaceType()); |
| 177 | 175 | ||
| @@ -186,19 +184,11 @@ ResultCode Process::LoadFromMetadata(const FileSys::ProgramMetadata& metadata) { | |||
| 186 | } | 184 | } |
| 187 | 185 | ||
| 188 | void Process::Run(s32 main_thread_priority, u64 stack_size) { | 186 | void Process::Run(s32 main_thread_priority, u64 stack_size) { |
| 189 | // The kernel always ensures that the given stack size is page aligned. | 187 | AllocateMainThreadStack(stack_size); |
| 190 | main_thread_stack_size = Common::AlignUp(stack_size, Memory::PAGE_SIZE); | 188 | tls_region_address = CreateTLSRegion(); |
| 191 | |||
| 192 | // Allocate and map the main thread stack | ||
| 193 | // TODO(bunnei): This is heap area that should be allocated by the kernel and not mapped as part | ||
| 194 | // of the user address space. | ||
| 195 | const VAddr mapping_address = vm_manager.GetTLSIORegionEndAddress() - main_thread_stack_size; | ||
| 196 | vm_manager | ||
| 197 | .MapMemoryBlock(mapping_address, std::make_shared<std::vector<u8>>(main_thread_stack_size), | ||
| 198 | 0, main_thread_stack_size, MemoryState::Stack) | ||
| 199 | .Unwrap(); | ||
| 200 | 189 | ||
| 201 | vm_manager.LogLayout(); | 190 | vm_manager.LogLayout(); |
| 191 | |||
| 202 | ChangeStatus(ProcessStatus::Running); | 192 | ChangeStatus(ProcessStatus::Running); |
| 203 | 193 | ||
| 204 | SetupMainThread(*this, kernel, main_thread_priority); | 194 | SetupMainThread(*this, kernel, main_thread_priority); |
| @@ -228,6 +218,9 @@ void Process::PrepareForTermination() { | |||
| 228 | stop_threads(system.Scheduler(2).GetThreadList()); | 218 | stop_threads(system.Scheduler(2).GetThreadList()); |
| 229 | stop_threads(system.Scheduler(3).GetThreadList()); | 219 | stop_threads(system.Scheduler(3).GetThreadList()); |
| 230 | 220 | ||
| 221 | FreeTLSRegion(tls_region_address); | ||
| 222 | tls_region_address = 0; | ||
| 223 | |||
| 231 | ChangeStatus(ProcessStatus::Exited); | 224 | ChangeStatus(ProcessStatus::Exited); |
| 232 | } | 225 | } |
| 233 | 226 | ||
| @@ -254,7 +247,7 @@ VAddr Process::CreateTLSRegion() { | |||
| 254 | ASSERT(region_address.Succeeded()); | 247 | ASSERT(region_address.Succeeded()); |
| 255 | 248 | ||
| 256 | const auto map_result = vm_manager.MapMemoryBlock( | 249 | const auto map_result = vm_manager.MapMemoryBlock( |
| 257 | *region_address, std::make_shared<std::vector<u8>>(Memory::PAGE_SIZE), 0, | 250 | *region_address, std::make_shared<PhysicalMemory>(Memory::PAGE_SIZE), 0, |
| 258 | Memory::PAGE_SIZE, MemoryState::ThreadLocal); | 251 | Memory::PAGE_SIZE, MemoryState::ThreadLocal); |
| 259 | ASSERT(map_result.Succeeded()); | 252 | ASSERT(map_result.Succeeded()); |
| 260 | 253 | ||
| @@ -284,7 +277,7 @@ void Process::FreeTLSRegion(VAddr tls_address) { | |||
| 284 | } | 277 | } |
| 285 | 278 | ||
| 286 | void Process::LoadModule(CodeSet module_, VAddr base_addr) { | 279 | void Process::LoadModule(CodeSet module_, VAddr base_addr) { |
| 287 | const auto memory = std::make_shared<std::vector<u8>>(std::move(module_.memory)); | 280 | const auto memory = std::make_shared<PhysicalMemory>(std::move(module_.memory)); |
| 288 | 281 | ||
| 289 | const auto MapSegment = [&](const CodeSet::Segment& segment, VMAPermission permissions, | 282 | const auto MapSegment = [&](const CodeSet::Segment& segment, VMAPermission permissions, |
| 290 | MemoryState memory_state) { | 283 | MemoryState memory_state) { |
| @@ -327,4 +320,16 @@ void Process::ChangeStatus(ProcessStatus new_status) { | |||
| 327 | WakeupAllWaitingThreads(); | 320 | WakeupAllWaitingThreads(); |
| 328 | } | 321 | } |
| 329 | 322 | ||
| 323 | void Process::AllocateMainThreadStack(u64 stack_size) { | ||
| 324 | // The kernel always ensures that the given stack size is page aligned. | ||
| 325 | main_thread_stack_size = Common::AlignUp(stack_size, Memory::PAGE_SIZE); | ||
| 326 | |||
| 327 | // Allocate and map the main thread stack | ||
| 328 | const VAddr mapping_address = vm_manager.GetTLSIORegionEndAddress() - main_thread_stack_size; | ||
| 329 | vm_manager | ||
| 330 | .MapMemoryBlock(mapping_address, std::make_shared<PhysicalMemory>(main_thread_stack_size), | ||
| 331 | 0, main_thread_stack_size, MemoryState::Stack) | ||
| 332 | .Unwrap(); | ||
| 333 | } | ||
| 334 | |||
| 330 | } // namespace Kernel | 335 | } // namespace Kernel |
diff --git a/src/core/hle/kernel/process.h b/src/core/hle/kernel/process.h index 83ea02bee..c2df451f3 100644 --- a/src/core/hle/kernel/process.h +++ b/src/core/hle/kernel/process.h | |||
| @@ -135,6 +135,11 @@ public: | |||
| 135 | return mutex; | 135 | return mutex; |
| 136 | } | 136 | } |
| 137 | 137 | ||
| 138 | /// Gets the address to the process' dedicated TLS region. | ||
| 139 | VAddr GetTLSRegionAddress() const { | ||
| 140 | return tls_region_address; | ||
| 141 | } | ||
| 142 | |||
| 138 | /// Gets the current status of the process | 143 | /// Gets the current status of the process |
| 139 | ProcessStatus GetStatus() const { | 144 | ProcessStatus GetStatus() const { |
| 140 | return status; | 145 | return status; |
| @@ -168,8 +173,24 @@ public: | |||
| 168 | return capabilities.GetPriorityMask(); | 173 | return capabilities.GetPriorityMask(); |
| 169 | } | 174 | } |
| 170 | 175 | ||
| 171 | u32 IsVirtualMemoryEnabled() const { | 176 | /// Gets the amount of secure memory to allocate for memory management. |
| 172 | return is_virtual_address_memory_enabled; | 177 | u32 GetSystemResourceSize() const { |
| 178 | return system_resource_size; | ||
| 179 | } | ||
| 180 | |||
| 181 | /// Gets the amount of secure memory currently in use for memory management. | ||
| 182 | u32 GetSystemResourceUsage() const { | ||
| 183 | // On hardware, this returns the amount of system resource memory that has | ||
| 184 | // been used by the kernel. This is problematic for Yuzu to emulate, because | ||
| 185 | // system resource memory is used for page tables -- and yuzu doesn't really | ||
| 186 | // have a way to calculate how much memory is required for page tables for | ||
| 187 | // the current process at any given time. | ||
| 188 | // TODO: Is this even worth implementing? Games may retrieve this value via | ||
| 189 | // an SDK function that gets used + available system resource size for debug | ||
| 190 | // or diagnostic purposes. However, it seems unlikely that a game would make | ||
| 191 | // decisions based on how much system memory is dedicated to its page tables. | ||
| 192 | // Is returning a value other than zero wise? | ||
| 193 | return 0; | ||
| 173 | } | 194 | } |
| 174 | 195 | ||
| 175 | /// Whether this process is an AArch64 or AArch32 process. | 196 | /// Whether this process is an AArch64 or AArch32 process. |
| @@ -196,15 +217,15 @@ public: | |||
| 196 | u64 GetTotalPhysicalMemoryAvailable() const; | 217 | u64 GetTotalPhysicalMemoryAvailable() const; |
| 197 | 218 | ||
| 198 | /// Retrieves the total physical memory available to this process in bytes, | 219 | /// Retrieves the total physical memory available to this process in bytes, |
| 199 | /// without the size of the personal heap added to it. | 220 | /// without the size of the personal system resource heap added to it. |
| 200 | u64 GetTotalPhysicalMemoryAvailableWithoutMmHeap() const; | 221 | u64 GetTotalPhysicalMemoryAvailableWithoutSystemResource() const; |
| 201 | 222 | ||
| 202 | /// Retrieves the total physical memory used by this process in bytes. | 223 | /// Retrieves the total physical memory used by this process in bytes. |
| 203 | u64 GetTotalPhysicalMemoryUsed() const; | 224 | u64 GetTotalPhysicalMemoryUsed() const; |
| 204 | 225 | ||
| 205 | /// Retrieves the total physical memory used by this process in bytes, | 226 | /// Retrieves the total physical memory used by this process in bytes, |
| 206 | /// without the size of the personal heap added to it. | 227 | /// without the size of the personal system resource heap added to it. |
| 207 | u64 GetTotalPhysicalMemoryUsedWithoutMmHeap() const; | 228 | u64 GetTotalPhysicalMemoryUsedWithoutSystemResource() const; |
| 208 | 229 | ||
| 209 | /// Gets the list of all threads created with this process as their owner. | 230 | /// Gets the list of all threads created with this process as their owner. |
| 210 | const std::list<const Thread*>& GetThreadList() const { | 231 | const std::list<const Thread*>& GetThreadList() const { |
| @@ -280,6 +301,9 @@ private: | |||
| 280 | /// a process signal. | 301 | /// a process signal. |
| 281 | void ChangeStatus(ProcessStatus new_status); | 302 | void ChangeStatus(ProcessStatus new_status); |
| 282 | 303 | ||
| 304 | /// Allocates the main thread stack for the process, given the stack size in bytes. | ||
| 305 | void AllocateMainThreadStack(u64 stack_size); | ||
| 306 | |||
| 283 | /// Memory manager for this process. | 307 | /// Memory manager for this process. |
| 284 | Kernel::VMManager vm_manager; | 308 | Kernel::VMManager vm_manager; |
| 285 | 309 | ||
| @@ -298,12 +322,16 @@ private: | |||
| 298 | /// Title ID corresponding to the process | 322 | /// Title ID corresponding to the process |
| 299 | u64 program_id = 0; | 323 | u64 program_id = 0; |
| 300 | 324 | ||
| 325 | /// Specifies additional memory to be reserved for the process's memory management by the | ||
| 326 | /// system. When this is non-zero, secure memory is allocated and used for page table allocation | ||
| 327 | /// instead of using the normal global page tables/memory block management. | ||
| 328 | u32 system_resource_size = 0; | ||
| 329 | |||
| 301 | /// Resource limit descriptor for this process | 330 | /// Resource limit descriptor for this process |
| 302 | SharedPtr<ResourceLimit> resource_limit; | 331 | SharedPtr<ResourceLimit> resource_limit; |
| 303 | 332 | ||
| 304 | /// The ideal CPU core for this process, threads are scheduled on this core by default. | 333 | /// The ideal CPU core for this process, threads are scheduled on this core by default. |
| 305 | u8 ideal_core = 0; | 334 | u8 ideal_core = 0; |
| 306 | u32 is_virtual_address_memory_enabled = 0; | ||
| 307 | 335 | ||
| 308 | /// The Thread Local Storage area is allocated as processes create threads, | 336 | /// The Thread Local Storage area is allocated as processes create threads, |
| 309 | /// each TLS area is 0x200 bytes, so one page (0x1000) is split up in 8 parts, and each part | 337 | /// each TLS area is 0x200 bytes, so one page (0x1000) is split up in 8 parts, and each part |
| @@ -338,6 +366,9 @@ private: | |||
| 338 | /// variable related facilities. | 366 | /// variable related facilities. |
| 339 | Mutex mutex; | 367 | Mutex mutex; |
| 340 | 368 | ||
| 369 | /// Address indicating the location of the process' dedicated TLS region. | ||
| 370 | VAddr tls_region_address = 0; | ||
| 371 | |||
| 341 | /// Random values for svcGetInfo RandomEntropy | 372 | /// Random values for svcGetInfo RandomEntropy |
| 342 | std::array<u64, RANDOM_ENTROPY_SIZE> random_entropy{}; | 373 | std::array<u64, RANDOM_ENTROPY_SIZE> random_entropy{}; |
| 343 | 374 | ||
diff --git a/src/core/hle/kernel/shared_memory.cpp b/src/core/hle/kernel/shared_memory.cpp index f15c5ee36..a815c4eea 100644 --- a/src/core/hle/kernel/shared_memory.cpp +++ b/src/core/hle/kernel/shared_memory.cpp | |||
| @@ -28,7 +28,7 @@ SharedPtr<SharedMemory> SharedMemory::Create(KernelCore& kernel, Process* owner_ | |||
| 28 | shared_memory->other_permissions = other_permissions; | 28 | shared_memory->other_permissions = other_permissions; |
| 29 | 29 | ||
| 30 | if (address == 0) { | 30 | if (address == 0) { |
| 31 | shared_memory->backing_block = std::make_shared<std::vector<u8>>(size); | 31 | shared_memory->backing_block = std::make_shared<Kernel::PhysicalMemory>(size); |
| 32 | shared_memory->backing_block_offset = 0; | 32 | shared_memory->backing_block_offset = 0; |
| 33 | 33 | ||
| 34 | // Refresh the address mappings for the current process. | 34 | // Refresh the address mappings for the current process. |
| @@ -59,8 +59,8 @@ SharedPtr<SharedMemory> SharedMemory::Create(KernelCore& kernel, Process* owner_ | |||
| 59 | } | 59 | } |
| 60 | 60 | ||
| 61 | SharedPtr<SharedMemory> SharedMemory::CreateForApplet( | 61 | SharedPtr<SharedMemory> SharedMemory::CreateForApplet( |
| 62 | KernelCore& kernel, std::shared_ptr<std::vector<u8>> heap_block, std::size_t offset, u64 size, | 62 | KernelCore& kernel, std::shared_ptr<Kernel::PhysicalMemory> heap_block, std::size_t offset, |
| 63 | MemoryPermission permissions, MemoryPermission other_permissions, std::string name) { | 63 | u64 size, MemoryPermission permissions, MemoryPermission other_permissions, std::string name) { |
| 64 | SharedPtr<SharedMemory> shared_memory(new SharedMemory(kernel)); | 64 | SharedPtr<SharedMemory> shared_memory(new SharedMemory(kernel)); |
| 65 | 65 | ||
| 66 | shared_memory->owner_process = nullptr; | 66 | shared_memory->owner_process = nullptr; |
diff --git a/src/core/hle/kernel/shared_memory.h b/src/core/hle/kernel/shared_memory.h index c2b6155e1..01ca6dcd2 100644 --- a/src/core/hle/kernel/shared_memory.h +++ b/src/core/hle/kernel/shared_memory.h | |||
| @@ -10,6 +10,7 @@ | |||
| 10 | 10 | ||
| 11 | #include "common/common_types.h" | 11 | #include "common/common_types.h" |
| 12 | #include "core/hle/kernel/object.h" | 12 | #include "core/hle/kernel/object.h" |
| 13 | #include "core/hle/kernel/physical_memory.h" | ||
| 13 | #include "core/hle/kernel/process.h" | 14 | #include "core/hle/kernel/process.h" |
| 14 | #include "core/hle/result.h" | 15 | #include "core/hle/result.h" |
| 15 | 16 | ||
| @@ -62,12 +63,10 @@ public: | |||
| 62 | * block. | 63 | * block. |
| 63 | * @param name Optional object name, used for debugging purposes. | 64 | * @param name Optional object name, used for debugging purposes. |
| 64 | */ | 65 | */ |
| 65 | static SharedPtr<SharedMemory> CreateForApplet(KernelCore& kernel, | 66 | static SharedPtr<SharedMemory> CreateForApplet( |
| 66 | std::shared_ptr<std::vector<u8>> heap_block, | 67 | KernelCore& kernel, std::shared_ptr<Kernel::PhysicalMemory> heap_block, std::size_t offset, |
| 67 | std::size_t offset, u64 size, | 68 | u64 size, MemoryPermission permissions, MemoryPermission other_permissions, |
| 68 | MemoryPermission permissions, | 69 | std::string name = "Unknown Applet"); |
| 69 | MemoryPermission other_permissions, | ||
| 70 | std::string name = "Unknown Applet"); | ||
| 71 | 70 | ||
| 72 | std::string GetTypeName() const override { | 71 | std::string GetTypeName() const override { |
| 73 | return "SharedMemory"; | 72 | return "SharedMemory"; |
| @@ -135,7 +134,7 @@ private: | |||
| 135 | ~SharedMemory() override; | 134 | ~SharedMemory() override; |
| 136 | 135 | ||
| 137 | /// Backing memory for this shared memory block. | 136 | /// Backing memory for this shared memory block. |
| 138 | std::shared_ptr<std::vector<u8>> backing_block; | 137 | std::shared_ptr<PhysicalMemory> backing_block; |
| 139 | /// Offset into the backing block for this shared memory. | 138 | /// Offset into the backing block for this shared memory. |
| 140 | std::size_t backing_block_offset = 0; | 139 | std::size_t backing_block_offset = 0; |
| 141 | /// Size of the memory block. Page-aligned. | 140 | /// Size of the memory block. Page-aligned. |
diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp index 332573a95..1fd1a732a 100644 --- a/src/core/hle/kernel/svc.cpp +++ b/src/core/hle/kernel/svc.cpp | |||
| @@ -318,7 +318,14 @@ static ResultCode UnmapMemory(Core::System& system, VAddr dst_addr, VAddr src_ad | |||
| 318 | return result; | 318 | return result; |
| 319 | } | 319 | } |
| 320 | 320 | ||
| 321 | return vm_manager.UnmapRange(dst_addr, size); | 321 | const auto unmap_res = vm_manager.UnmapRange(dst_addr, size); |
| 322 | |||
| 323 | // Reprotect the source mapping on success | ||
| 324 | if (unmap_res.IsSuccess()) { | ||
| 325 | ASSERT(vm_manager.ReprotectRange(src_addr, size, VMAPermission::ReadWrite).IsSuccess()); | ||
| 326 | } | ||
| 327 | |||
| 328 | return unmap_res; | ||
| 322 | } | 329 | } |
| 323 | 330 | ||
| 324 | /// Connect to an OS service given the port name, returns the handle to the port to out | 331 | /// Connect to an OS service given the port name, returns the handle to the port to out |
| @@ -729,16 +736,16 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha | |||
| 729 | StackRegionBaseAddr = 14, | 736 | StackRegionBaseAddr = 14, |
| 730 | StackRegionSize = 15, | 737 | StackRegionSize = 15, |
| 731 | // 3.0.0+ | 738 | // 3.0.0+ |
| 732 | IsVirtualAddressMemoryEnabled = 16, | 739 | SystemResourceSize = 16, |
| 733 | PersonalMmHeapUsage = 17, | 740 | SystemResourceUsage = 17, |
| 734 | TitleId = 18, | 741 | TitleId = 18, |
| 735 | // 4.0.0+ | 742 | // 4.0.0+ |
| 736 | PrivilegedProcessId = 19, | 743 | PrivilegedProcessId = 19, |
| 737 | // 5.0.0+ | 744 | // 5.0.0+ |
| 738 | UserExceptionContextAddr = 20, | 745 | UserExceptionContextAddr = 20, |
| 739 | // 6.0.0+ | 746 | // 6.0.0+ |
| 740 | TotalPhysicalMemoryAvailableWithoutMmHeap = 21, | 747 | TotalPhysicalMemoryAvailableWithoutSystemResource = 21, |
| 741 | TotalPhysicalMemoryUsedWithoutMmHeap = 22, | 748 | TotalPhysicalMemoryUsedWithoutSystemResource = 22, |
| 742 | }; | 749 | }; |
| 743 | 750 | ||
| 744 | const auto info_id_type = static_cast<GetInfoType>(info_id); | 751 | const auto info_id_type = static_cast<GetInfoType>(info_id); |
| @@ -756,12 +763,12 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha | |||
| 756 | case GetInfoType::StackRegionSize: | 763 | case GetInfoType::StackRegionSize: |
| 757 | case GetInfoType::TotalPhysicalMemoryAvailable: | 764 | case GetInfoType::TotalPhysicalMemoryAvailable: |
| 758 | case GetInfoType::TotalPhysicalMemoryUsed: | 765 | case GetInfoType::TotalPhysicalMemoryUsed: |
| 759 | case GetInfoType::IsVirtualAddressMemoryEnabled: | 766 | case GetInfoType::SystemResourceSize: |
| 760 | case GetInfoType::PersonalMmHeapUsage: | 767 | case GetInfoType::SystemResourceUsage: |
| 761 | case GetInfoType::TitleId: | 768 | case GetInfoType::TitleId: |
| 762 | case GetInfoType::UserExceptionContextAddr: | 769 | case GetInfoType::UserExceptionContextAddr: |
| 763 | case GetInfoType::TotalPhysicalMemoryAvailableWithoutMmHeap: | 770 | case GetInfoType::TotalPhysicalMemoryAvailableWithoutSystemResource: |
| 764 | case GetInfoType::TotalPhysicalMemoryUsedWithoutMmHeap: { | 771 | case GetInfoType::TotalPhysicalMemoryUsedWithoutSystemResource: { |
| 765 | if (info_sub_id != 0) { | 772 | if (info_sub_id != 0) { |
| 766 | return ERR_INVALID_ENUM_VALUE; | 773 | return ERR_INVALID_ENUM_VALUE; |
| 767 | } | 774 | } |
| @@ -822,8 +829,13 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha | |||
| 822 | *result = process->GetTotalPhysicalMemoryUsed(); | 829 | *result = process->GetTotalPhysicalMemoryUsed(); |
| 823 | return RESULT_SUCCESS; | 830 | return RESULT_SUCCESS; |
| 824 | 831 | ||
| 825 | case GetInfoType::IsVirtualAddressMemoryEnabled: | 832 | case GetInfoType::SystemResourceSize: |
| 826 | *result = process->IsVirtualMemoryEnabled(); | 833 | *result = process->GetSystemResourceSize(); |
| 834 | return RESULT_SUCCESS; | ||
| 835 | |||
| 836 | case GetInfoType::SystemResourceUsage: | ||
| 837 | LOG_WARNING(Kernel_SVC, "(STUBBED) Attempted to query system resource usage"); | ||
| 838 | *result = process->GetSystemResourceUsage(); | ||
| 827 | return RESULT_SUCCESS; | 839 | return RESULT_SUCCESS; |
| 828 | 840 | ||
| 829 | case GetInfoType::TitleId: | 841 | case GetInfoType::TitleId: |
| @@ -831,17 +843,15 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha | |||
| 831 | return RESULT_SUCCESS; | 843 | return RESULT_SUCCESS; |
| 832 | 844 | ||
| 833 | case GetInfoType::UserExceptionContextAddr: | 845 | case GetInfoType::UserExceptionContextAddr: |
| 834 | LOG_WARNING(Kernel_SVC, | 846 | *result = process->GetTLSRegionAddress(); |
| 835 | "(STUBBED) Attempted to query user exception context address, returned 0"); | ||
| 836 | *result = 0; | ||
| 837 | return RESULT_SUCCESS; | 847 | return RESULT_SUCCESS; |
| 838 | 848 | ||
| 839 | case GetInfoType::TotalPhysicalMemoryAvailableWithoutMmHeap: | 849 | case GetInfoType::TotalPhysicalMemoryAvailableWithoutSystemResource: |
| 840 | *result = process->GetTotalPhysicalMemoryAvailable(); | 850 | *result = process->GetTotalPhysicalMemoryAvailableWithoutSystemResource(); |
| 841 | return RESULT_SUCCESS; | 851 | return RESULT_SUCCESS; |
| 842 | 852 | ||
| 843 | case GetInfoType::TotalPhysicalMemoryUsedWithoutMmHeap: | 853 | case GetInfoType::TotalPhysicalMemoryUsedWithoutSystemResource: |
| 844 | *result = process->GetTotalPhysicalMemoryUsedWithoutMmHeap(); | 854 | *result = process->GetTotalPhysicalMemoryUsedWithoutSystemResource(); |
| 845 | return RESULT_SUCCESS; | 855 | return RESULT_SUCCESS; |
| 846 | 856 | ||
| 847 | default: | 857 | default: |
| @@ -946,6 +956,86 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha | |||
| 946 | } | 956 | } |
| 947 | } | 957 | } |
| 948 | 958 | ||
| 959 | /// Maps memory at a desired address | ||
| 960 | static ResultCode MapPhysicalMemory(Core::System& system, VAddr addr, u64 size) { | ||
| 961 | LOG_DEBUG(Kernel_SVC, "called, addr=0x{:016X}, size=0x{:X}", addr, size); | ||
| 962 | |||
| 963 | if (!Common::Is4KBAligned(addr)) { | ||
| 964 | LOG_ERROR(Kernel_SVC, "Address is not aligned to 4KB, 0x{:016X}", addr); | ||
| 965 | return ERR_INVALID_ADDRESS; | ||
| 966 | } | ||
| 967 | |||
| 968 | if (!Common::Is4KBAligned(size)) { | ||
| 969 | LOG_ERROR(Kernel_SVC, "Size is not aligned to 4KB, 0x{:X}", size); | ||
| 970 | return ERR_INVALID_SIZE; | ||
| 971 | } | ||
| 972 | |||
| 973 | if (size == 0) { | ||
| 974 | LOG_ERROR(Kernel_SVC, "Size is zero"); | ||
| 975 | return ERR_INVALID_SIZE; | ||
| 976 | } | ||
| 977 | |||
| 978 | if (!(addr < addr + size)) { | ||
| 979 | LOG_ERROR(Kernel_SVC, "Size causes 64-bit overflow of address"); | ||
| 980 | return ERR_INVALID_MEMORY_RANGE; | ||
| 981 | } | ||
| 982 | |||
| 983 | Process* const current_process = system.Kernel().CurrentProcess(); | ||
| 984 | auto& vm_manager = current_process->VMManager(); | ||
| 985 | |||
| 986 | if (current_process->GetSystemResourceSize() == 0) { | ||
| 987 | LOG_ERROR(Kernel_SVC, "System Resource Size is zero"); | ||
| 988 | return ERR_INVALID_STATE; | ||
| 989 | } | ||
| 990 | |||
| 991 | if (!vm_manager.IsWithinMapRegion(addr, size)) { | ||
| 992 | LOG_ERROR(Kernel_SVC, "Range not within map region"); | ||
| 993 | return ERR_INVALID_MEMORY_RANGE; | ||
| 994 | } | ||
| 995 | |||
| 996 | return vm_manager.MapPhysicalMemory(addr, size); | ||
| 997 | } | ||
| 998 | |||
| 999 | /// Unmaps memory previously mapped via MapPhysicalMemory | ||
| 1000 | static ResultCode UnmapPhysicalMemory(Core::System& system, VAddr addr, u64 size) { | ||
| 1001 | LOG_DEBUG(Kernel_SVC, "called, addr=0x{:016X}, size=0x{:X}", addr, size); | ||
| 1002 | |||
| 1003 | if (!Common::Is4KBAligned(addr)) { | ||
| 1004 | LOG_ERROR(Kernel_SVC, "Address is not aligned to 4KB, 0x{:016X}", addr); | ||
| 1005 | return ERR_INVALID_ADDRESS; | ||
| 1006 | } | ||
| 1007 | |||
| 1008 | if (!Common::Is4KBAligned(size)) { | ||
| 1009 | LOG_ERROR(Kernel_SVC, "Size is not aligned to 4KB, 0x{:X}", size); | ||
| 1010 | return ERR_INVALID_SIZE; | ||
| 1011 | } | ||
| 1012 | |||
| 1013 | if (size == 0) { | ||
| 1014 | LOG_ERROR(Kernel_SVC, "Size is zero"); | ||
| 1015 | return ERR_INVALID_SIZE; | ||
| 1016 | } | ||
| 1017 | |||
| 1018 | if (!(addr < addr + size)) { | ||
| 1019 | LOG_ERROR(Kernel_SVC, "Size causes 64-bit overflow of address"); | ||
| 1020 | return ERR_INVALID_MEMORY_RANGE; | ||
| 1021 | } | ||
| 1022 | |||
| 1023 | Process* const current_process = system.Kernel().CurrentProcess(); | ||
| 1024 | auto& vm_manager = current_process->VMManager(); | ||
| 1025 | |||
| 1026 | if (current_process->GetSystemResourceSize() == 0) { | ||
| 1027 | LOG_ERROR(Kernel_SVC, "System Resource Size is zero"); | ||
| 1028 | return ERR_INVALID_STATE; | ||
| 1029 | } | ||
| 1030 | |||
| 1031 | if (!vm_manager.IsWithinMapRegion(addr, size)) { | ||
| 1032 | LOG_ERROR(Kernel_SVC, "Range not within map region"); | ||
| 1033 | return ERR_INVALID_MEMORY_RANGE; | ||
| 1034 | } | ||
| 1035 | |||
| 1036 | return vm_manager.UnmapPhysicalMemory(addr, size); | ||
| 1037 | } | ||
| 1038 | |||
| 949 | /// Sets the thread activity | 1039 | /// Sets the thread activity |
| 950 | static ResultCode SetThreadActivity(Core::System& system, Handle handle, u32 activity) { | 1040 | static ResultCode SetThreadActivity(Core::System& system, Handle handle, u32 activity) { |
| 951 | LOG_DEBUG(Kernel_SVC, "called, handle=0x{:08X}, activity=0x{:08X}", handle, activity); | 1041 | LOG_DEBUG(Kernel_SVC, "called, handle=0x{:08X}, activity=0x{:08X}", handle, activity); |
| @@ -1647,8 +1737,8 @@ static ResultCode SignalProcessWideKey(Core::System& system, VAddr condition_var | |||
| 1647 | // Wait for an address (via Address Arbiter) | 1737 | // Wait for an address (via Address Arbiter) |
| 1648 | static ResultCode WaitForAddress(Core::System& system, VAddr address, u32 type, s32 value, | 1738 | static ResultCode WaitForAddress(Core::System& system, VAddr address, u32 type, s32 value, |
| 1649 | s64 timeout) { | 1739 | s64 timeout) { |
| 1650 | LOG_WARNING(Kernel_SVC, "called, address=0x{:X}, type=0x{:X}, value=0x{:X}, timeout={}", | 1740 | LOG_TRACE(Kernel_SVC, "called, address=0x{:X}, type=0x{:X}, value=0x{:X}, timeout={}", address, |
| 1651 | address, type, value, timeout); | 1741 | type, value, timeout); |
| 1652 | 1742 | ||
| 1653 | // If the passed address is a kernel virtual address, return invalid memory state. | 1743 | // If the passed address is a kernel virtual address, return invalid memory state. |
| 1654 | if (Memory::IsKernelVirtualAddress(address)) { | 1744 | if (Memory::IsKernelVirtualAddress(address)) { |
| @@ -1670,8 +1760,8 @@ static ResultCode WaitForAddress(Core::System& system, VAddr address, u32 type, | |||
| 1670 | // Signals to an address (via Address Arbiter) | 1760 | // Signals to an address (via Address Arbiter) |
| 1671 | static ResultCode SignalToAddress(Core::System& system, VAddr address, u32 type, s32 value, | 1761 | static ResultCode SignalToAddress(Core::System& system, VAddr address, u32 type, s32 value, |
| 1672 | s32 num_to_wake) { | 1762 | s32 num_to_wake) { |
| 1673 | LOG_WARNING(Kernel_SVC, "called, address=0x{:X}, type=0x{:X}, value=0x{:X}, num_to_wake=0x{:X}", | 1763 | LOG_TRACE(Kernel_SVC, "called, address=0x{:X}, type=0x{:X}, value=0x{:X}, num_to_wake=0x{:X}", |
| 1674 | address, type, value, num_to_wake); | 1764 | address, type, value, num_to_wake); |
| 1675 | 1765 | ||
| 1676 | // If the passed address is a kernel virtual address, return invalid memory state. | 1766 | // If the passed address is a kernel virtual address, return invalid memory state. |
| 1677 | if (Memory::IsKernelVirtualAddress(address)) { | 1767 | if (Memory::IsKernelVirtualAddress(address)) { |
| @@ -2303,8 +2393,8 @@ static const FunctionDef SVC_Table[] = { | |||
| 2303 | {0x29, SvcWrap<GetInfo>, "GetInfo"}, | 2393 | {0x29, SvcWrap<GetInfo>, "GetInfo"}, |
| 2304 | {0x2A, nullptr, "FlushEntireDataCache"}, | 2394 | {0x2A, nullptr, "FlushEntireDataCache"}, |
| 2305 | {0x2B, nullptr, "FlushDataCache"}, | 2395 | {0x2B, nullptr, "FlushDataCache"}, |
| 2306 | {0x2C, nullptr, "MapPhysicalMemory"}, | 2396 | {0x2C, SvcWrap<MapPhysicalMemory>, "MapPhysicalMemory"}, |
| 2307 | {0x2D, nullptr, "UnmapPhysicalMemory"}, | 2397 | {0x2D, SvcWrap<UnmapPhysicalMemory>, "UnmapPhysicalMemory"}, |
| 2308 | {0x2E, nullptr, "GetFutureThreadInfo"}, | 2398 | {0x2E, nullptr, "GetFutureThreadInfo"}, |
| 2309 | {0x2F, nullptr, "GetLastThreadInfo"}, | 2399 | {0x2F, nullptr, "GetLastThreadInfo"}, |
| 2310 | {0x30, SvcWrap<GetResourceLimitLimitValue>, "GetResourceLimitLimitValue"}, | 2400 | {0x30, SvcWrap<GetResourceLimitLimitValue>, "GetResourceLimitLimitValue"}, |
diff --git a/src/core/hle/kernel/svc_wrap.h b/src/core/hle/kernel/svc_wrap.h index 865473c6f..c2d8d0dc3 100644 --- a/src/core/hle/kernel/svc_wrap.h +++ b/src/core/hle/kernel/svc_wrap.h | |||
| @@ -32,6 +32,11 @@ void SvcWrap(Core::System& system) { | |||
| 32 | FuncReturn(system, func(system, Param(system, 0)).raw); | 32 | FuncReturn(system, func(system, Param(system, 0)).raw); |
| 33 | } | 33 | } |
| 34 | 34 | ||
| 35 | template <ResultCode func(Core::System&, u64, u64)> | ||
| 36 | void SvcWrap(Core::System& system) { | ||
| 37 | FuncReturn(system, func(system, Param(system, 0), Param(system, 1)).raw); | ||
| 38 | } | ||
| 39 | |||
| 35 | template <ResultCode func(Core::System&, u32)> | 40 | template <ResultCode func(Core::System&, u32)> |
| 36 | void SvcWrap(Core::System& system) { | 41 | void SvcWrap(Core::System& system) { |
| 37 | FuncReturn(system, func(system, static_cast<u32>(Param(system, 0))).raw); | 42 | FuncReturn(system, func(system, static_cast<u32>(Param(system, 0))).raw); |
diff --git a/src/core/hle/kernel/transfer_memory.cpp b/src/core/hle/kernel/transfer_memory.cpp index 26c4e5e67..1113c815e 100644 --- a/src/core/hle/kernel/transfer_memory.cpp +++ b/src/core/hle/kernel/transfer_memory.cpp | |||
| @@ -47,7 +47,7 @@ ResultCode TransferMemory::MapMemory(VAddr address, u64 size, MemoryPermission p | |||
| 47 | return ERR_INVALID_STATE; | 47 | return ERR_INVALID_STATE; |
| 48 | } | 48 | } |
| 49 | 49 | ||
| 50 | backing_block = std::make_shared<std::vector<u8>>(size); | 50 | backing_block = std::make_shared<PhysicalMemory>(size); |
| 51 | 51 | ||
| 52 | const auto map_state = owner_permissions == MemoryPermission::None | 52 | const auto map_state = owner_permissions == MemoryPermission::None |
| 53 | ? MemoryState::TransferMemoryIsolated | 53 | ? MemoryState::TransferMemoryIsolated |
diff --git a/src/core/hle/kernel/transfer_memory.h b/src/core/hle/kernel/transfer_memory.h index a140b1e2b..6be9dc094 100644 --- a/src/core/hle/kernel/transfer_memory.h +++ b/src/core/hle/kernel/transfer_memory.h | |||
| @@ -8,6 +8,7 @@ | |||
| 8 | #include <vector> | 8 | #include <vector> |
| 9 | 9 | ||
| 10 | #include "core/hle/kernel/object.h" | 10 | #include "core/hle/kernel/object.h" |
| 11 | #include "core/hle/kernel/physical_memory.h" | ||
| 11 | 12 | ||
| 12 | union ResultCode; | 13 | union ResultCode; |
| 13 | 14 | ||
| @@ -82,7 +83,7 @@ private: | |||
| 82 | ~TransferMemory() override; | 83 | ~TransferMemory() override; |
| 83 | 84 | ||
| 84 | /// Memory block backing this instance. | 85 | /// Memory block backing this instance. |
| 85 | std::shared_ptr<std::vector<u8>> backing_block; | 86 | std::shared_ptr<PhysicalMemory> backing_block; |
| 86 | 87 | ||
| 87 | /// The base address for the memory managed by this instance. | 88 | /// The base address for the memory managed by this instance. |
| 88 | VAddr base_address = 0; | 89 | VAddr base_address = 0; |
diff --git a/src/core/hle/kernel/vm_manager.cpp b/src/core/hle/kernel/vm_manager.cpp index 501544090..40cea1e7c 100644 --- a/src/core/hle/kernel/vm_manager.cpp +++ b/src/core/hle/kernel/vm_manager.cpp | |||
| @@ -5,13 +5,15 @@ | |||
| 5 | #include <algorithm> | 5 | #include <algorithm> |
| 6 | #include <iterator> | 6 | #include <iterator> |
| 7 | #include <utility> | 7 | #include <utility> |
| 8 | #include "common/alignment.h" | ||
| 8 | #include "common/assert.h" | 9 | #include "common/assert.h" |
| 9 | #include "common/logging/log.h" | 10 | #include "common/logging/log.h" |
| 10 | #include "common/memory_hook.h" | 11 | #include "common/memory_hook.h" |
| 11 | #include "core/arm/arm_interface.h" | ||
| 12 | #include "core/core.h" | 12 | #include "core/core.h" |
| 13 | #include "core/file_sys/program_metadata.h" | 13 | #include "core/file_sys/program_metadata.h" |
| 14 | #include "core/hle/kernel/errors.h" | 14 | #include "core/hle/kernel/errors.h" |
| 15 | #include "core/hle/kernel/process.h" | ||
| 16 | #include "core/hle/kernel/resource_limit.h" | ||
| 15 | #include "core/hle/kernel/vm_manager.h" | 17 | #include "core/hle/kernel/vm_manager.h" |
| 16 | #include "core/memory.h" | 18 | #include "core/memory.h" |
| 17 | #include "core/memory_setup.h" | 19 | #include "core/memory_setup.h" |
| @@ -49,10 +51,14 @@ bool VirtualMemoryArea::CanBeMergedWith(const VirtualMemoryArea& next) const { | |||
| 49 | type != next.type) { | 51 | type != next.type) { |
| 50 | return false; | 52 | return false; |
| 51 | } | 53 | } |
| 52 | if (type == VMAType::AllocatedMemoryBlock && | 54 | if ((attribute & MemoryAttribute::DeviceMapped) == MemoryAttribute::DeviceMapped) { |
| 53 | (backing_block != next.backing_block || offset + size != next.offset)) { | 55 | // TODO: Can device mapped memory be merged sanely? |
| 56 | // Not merging it may cause inaccuracies versus hardware when memory layout is queried. | ||
| 54 | return false; | 57 | return false; |
| 55 | } | 58 | } |
| 59 | if (type == VMAType::AllocatedMemoryBlock) { | ||
| 60 | return true; | ||
| 61 | } | ||
| 56 | if (type == VMAType::BackingMemory && backing_memory + size != next.backing_memory) { | 62 | if (type == VMAType::BackingMemory && backing_memory + size != next.backing_memory) { |
| 57 | return false; | 63 | return false; |
| 58 | } | 64 | } |
| @@ -98,9 +104,9 @@ bool VMManager::IsValidHandle(VMAHandle handle) const { | |||
| 98 | } | 104 | } |
| 99 | 105 | ||
| 100 | ResultVal<VMManager::VMAHandle> VMManager::MapMemoryBlock(VAddr target, | 106 | ResultVal<VMManager::VMAHandle> VMManager::MapMemoryBlock(VAddr target, |
| 101 | std::shared_ptr<std::vector<u8>> block, | 107 | std::shared_ptr<PhysicalMemory> block, |
| 102 | std::size_t offset, u64 size, | 108 | std::size_t offset, u64 size, |
| 103 | MemoryState state) { | 109 | MemoryState state, VMAPermission perm) { |
| 104 | ASSERT(block != nullptr); | 110 | ASSERT(block != nullptr); |
| 105 | ASSERT(offset + size <= block->size()); | 111 | ASSERT(offset + size <= block->size()); |
| 106 | 112 | ||
| @@ -109,17 +115,8 @@ ResultVal<VMManager::VMAHandle> VMManager::MapMemoryBlock(VAddr target, | |||
| 109 | VirtualMemoryArea& final_vma = vma_handle->second; | 115 | VirtualMemoryArea& final_vma = vma_handle->second; |
| 110 | ASSERT(final_vma.size == size); | 116 | ASSERT(final_vma.size == size); |
| 111 | 117 | ||
| 112 | system.ArmInterface(0).MapBackingMemory(target, size, block->data() + offset, | ||
| 113 | VMAPermission::ReadWriteExecute); | ||
| 114 | system.ArmInterface(1).MapBackingMemory(target, size, block->data() + offset, | ||
| 115 | VMAPermission::ReadWriteExecute); | ||
| 116 | system.ArmInterface(2).MapBackingMemory(target, size, block->data() + offset, | ||
| 117 | VMAPermission::ReadWriteExecute); | ||
| 118 | system.ArmInterface(3).MapBackingMemory(target, size, block->data() + offset, | ||
| 119 | VMAPermission::ReadWriteExecute); | ||
| 120 | |||
| 121 | final_vma.type = VMAType::AllocatedMemoryBlock; | 118 | final_vma.type = VMAType::AllocatedMemoryBlock; |
| 122 | final_vma.permissions = VMAPermission::ReadWrite; | 119 | final_vma.permissions = perm; |
| 123 | final_vma.state = state; | 120 | final_vma.state = state; |
| 124 | final_vma.backing_block = std::move(block); | 121 | final_vma.backing_block = std::move(block); |
| 125 | final_vma.offset = offset; | 122 | final_vma.offset = offset; |
| @@ -137,11 +134,6 @@ ResultVal<VMManager::VMAHandle> VMManager::MapBackingMemory(VAddr target, u8* me | |||
| 137 | VirtualMemoryArea& final_vma = vma_handle->second; | 134 | VirtualMemoryArea& final_vma = vma_handle->second; |
| 138 | ASSERT(final_vma.size == size); | 135 | ASSERT(final_vma.size == size); |
| 139 | 136 | ||
| 140 | system.ArmInterface(0).MapBackingMemory(target, size, memory, VMAPermission::ReadWriteExecute); | ||
| 141 | system.ArmInterface(1).MapBackingMemory(target, size, memory, VMAPermission::ReadWriteExecute); | ||
| 142 | system.ArmInterface(2).MapBackingMemory(target, size, memory, VMAPermission::ReadWriteExecute); | ||
| 143 | system.ArmInterface(3).MapBackingMemory(target, size, memory, VMAPermission::ReadWriteExecute); | ||
| 144 | |||
| 145 | final_vma.type = VMAType::BackingMemory; | 137 | final_vma.type = VMAType::BackingMemory; |
| 146 | final_vma.permissions = VMAPermission::ReadWrite; | 138 | final_vma.permissions = VMAPermission::ReadWrite; |
| 147 | final_vma.state = state; | 139 | final_vma.state = state; |
| @@ -230,11 +222,6 @@ ResultCode VMManager::UnmapRange(VAddr target, u64 size) { | |||
| 230 | 222 | ||
| 231 | ASSERT(FindVMA(target)->second.size >= size); | 223 | ASSERT(FindVMA(target)->second.size >= size); |
| 232 | 224 | ||
| 233 | system.ArmInterface(0).UnmapMemory(target, size); | ||
| 234 | system.ArmInterface(1).UnmapMemory(target, size); | ||
| 235 | system.ArmInterface(2).UnmapMemory(target, size); | ||
| 236 | system.ArmInterface(3).UnmapMemory(target, size); | ||
| 237 | |||
| 238 | return RESULT_SUCCESS; | 225 | return RESULT_SUCCESS; |
| 239 | } | 226 | } |
| 240 | 227 | ||
| @@ -274,7 +261,7 @@ ResultVal<VAddr> VMManager::SetHeapSize(u64 size) { | |||
| 274 | 261 | ||
| 275 | if (heap_memory == nullptr) { | 262 | if (heap_memory == nullptr) { |
| 276 | // Initialize heap | 263 | // Initialize heap |
| 277 | heap_memory = std::make_shared<std::vector<u8>>(size); | 264 | heap_memory = std::make_shared<PhysicalMemory>(size); |
| 278 | heap_end = heap_region_base + size; | 265 | heap_end = heap_region_base + size; |
| 279 | } else { | 266 | } else { |
| 280 | UnmapRange(heap_region_base, GetCurrentHeapSize()); | 267 | UnmapRange(heap_region_base, GetCurrentHeapSize()); |
| @@ -308,6 +295,166 @@ ResultVal<VAddr> VMManager::SetHeapSize(u64 size) { | |||
| 308 | return MakeResult<VAddr>(heap_region_base); | 295 | return MakeResult<VAddr>(heap_region_base); |
| 309 | } | 296 | } |
| 310 | 297 | ||
| 298 | ResultCode VMManager::MapPhysicalMemory(VAddr target, u64 size) { | ||
| 299 | const auto end_addr = target + size; | ||
| 300 | const auto last_addr = end_addr - 1; | ||
| 301 | VAddr cur_addr = target; | ||
| 302 | |||
| 303 | ResultCode result = RESULT_SUCCESS; | ||
| 304 | |||
| 305 | // Check how much memory we've already mapped. | ||
| 306 | const auto mapped_size_result = SizeOfAllocatedVMAsInRange(target, size); | ||
| 307 | if (mapped_size_result.Failed()) { | ||
| 308 | return mapped_size_result.Code(); | ||
| 309 | } | ||
| 310 | |||
| 311 | // If we've already mapped the desired amount, return early. | ||
| 312 | const std::size_t mapped_size = *mapped_size_result; | ||
| 313 | if (mapped_size == size) { | ||
| 314 | return RESULT_SUCCESS; | ||
| 315 | } | ||
| 316 | |||
| 317 | // Check that we can map the memory we want. | ||
| 318 | const auto res_limit = system.CurrentProcess()->GetResourceLimit(); | ||
| 319 | const u64 physmem_remaining = res_limit->GetMaxResourceValue(ResourceType::PhysicalMemory) - | ||
| 320 | res_limit->GetCurrentResourceValue(ResourceType::PhysicalMemory); | ||
| 321 | if (physmem_remaining < (size - mapped_size)) { | ||
| 322 | return ERR_RESOURCE_LIMIT_EXCEEDED; | ||
| 323 | } | ||
| 324 | |||
| 325 | // Keep track of the memory regions we unmap. | ||
| 326 | std::vector<std::pair<u64, u64>> mapped_regions; | ||
| 327 | |||
| 328 | // Iterate, trying to map memory. | ||
| 329 | { | ||
| 330 | cur_addr = target; | ||
| 331 | |||
| 332 | auto iter = FindVMA(target); | ||
| 333 | ASSERT_MSG(iter != vma_map.end(), "MapPhysicalMemory iter != end"); | ||
| 334 | |||
| 335 | while (true) { | ||
| 336 | const auto& vma = iter->second; | ||
| 337 | const auto vma_start = vma.base; | ||
| 338 | const auto vma_end = vma_start + vma.size; | ||
| 339 | const auto vma_last = vma_end - 1; | ||
| 340 | |||
| 341 | // Map the memory block | ||
| 342 | const auto map_size = std::min(end_addr - cur_addr, vma_end - cur_addr); | ||
| 343 | if (vma.state == MemoryState::Unmapped) { | ||
| 344 | const auto map_res = | ||
| 345 | MapMemoryBlock(cur_addr, std::make_shared<PhysicalMemory>(map_size, 0), 0, | ||
| 346 | map_size, MemoryState::Heap, VMAPermission::ReadWrite); | ||
| 347 | result = map_res.Code(); | ||
| 348 | if (result.IsError()) { | ||
| 349 | break; | ||
| 350 | } | ||
| 351 | |||
| 352 | mapped_regions.emplace_back(cur_addr, map_size); | ||
| 353 | } | ||
| 354 | |||
| 355 | // Break once we hit the end of the range. | ||
| 356 | if (last_addr <= vma_last) { | ||
| 357 | break; | ||
| 358 | } | ||
| 359 | |||
| 360 | // Advance to the next block. | ||
| 361 | cur_addr = vma_end; | ||
| 362 | iter = FindVMA(cur_addr); | ||
| 363 | ASSERT_MSG(iter != vma_map.end(), "MapPhysicalMemory iter != end"); | ||
| 364 | } | ||
| 365 | } | ||
| 366 | |||
| 367 | // If we failed, unmap memory. | ||
| 368 | if (result.IsError()) { | ||
| 369 | for (const auto [unmap_address, unmap_size] : mapped_regions) { | ||
| 370 | ASSERT_MSG(UnmapRange(unmap_address, unmap_size).IsSuccess(), | ||
| 371 | "MapPhysicalMemory un-map on error"); | ||
| 372 | } | ||
| 373 | |||
| 374 | return result; | ||
| 375 | } | ||
| 376 | |||
| 377 | // Update amount of mapped physical memory. | ||
| 378 | physical_memory_mapped += size - mapped_size; | ||
| 379 | |||
| 380 | return RESULT_SUCCESS; | ||
| 381 | } | ||
| 382 | |||
| 383 | ResultCode VMManager::UnmapPhysicalMemory(VAddr target, u64 size) { | ||
| 384 | const auto end_addr = target + size; | ||
| 385 | const auto last_addr = end_addr - 1; | ||
| 386 | VAddr cur_addr = target; | ||
| 387 | |||
| 388 | ResultCode result = RESULT_SUCCESS; | ||
| 389 | |||
| 390 | // Check how much memory is currently mapped. | ||
| 391 | const auto mapped_size_result = SizeOfUnmappablePhysicalMemoryInRange(target, size); | ||
| 392 | if (mapped_size_result.Failed()) { | ||
| 393 | return mapped_size_result.Code(); | ||
| 394 | } | ||
| 395 | |||
| 396 | // If we've already unmapped all the memory, return early. | ||
| 397 | const std::size_t mapped_size = *mapped_size_result; | ||
| 398 | if (mapped_size == 0) { | ||
| 399 | return RESULT_SUCCESS; | ||
| 400 | } | ||
| 401 | |||
| 402 | // Keep track of the memory regions we unmap. | ||
| 403 | std::vector<std::pair<u64, u64>> unmapped_regions; | ||
| 404 | |||
| 405 | // Try to unmap regions. | ||
| 406 | { | ||
| 407 | cur_addr = target; | ||
| 408 | |||
| 409 | auto iter = FindVMA(target); | ||
| 410 | ASSERT_MSG(iter != vma_map.end(), "UnmapPhysicalMemory iter != end"); | ||
| 411 | |||
| 412 | while (true) { | ||
| 413 | const auto& vma = iter->second; | ||
| 414 | const auto vma_start = vma.base; | ||
| 415 | const auto vma_end = vma_start + vma.size; | ||
| 416 | const auto vma_last = vma_end - 1; | ||
| 417 | |||
| 418 | // Unmap the memory block | ||
| 419 | const auto unmap_size = std::min(end_addr - cur_addr, vma_end - cur_addr); | ||
| 420 | if (vma.state == MemoryState::Heap) { | ||
| 421 | result = UnmapRange(cur_addr, unmap_size); | ||
| 422 | if (result.IsError()) { | ||
| 423 | break; | ||
| 424 | } | ||
| 425 | |||
| 426 | unmapped_regions.emplace_back(cur_addr, unmap_size); | ||
| 427 | } | ||
| 428 | |||
| 429 | // Break once we hit the end of the range. | ||
| 430 | if (last_addr <= vma_last) { | ||
| 431 | break; | ||
| 432 | } | ||
| 433 | |||
| 434 | // Advance to the next block. | ||
| 435 | cur_addr = vma_end; | ||
| 436 | iter = FindVMA(cur_addr); | ||
| 437 | ASSERT_MSG(iter != vma_map.end(), "UnmapPhysicalMemory iter != end"); | ||
| 438 | } | ||
| 439 | } | ||
| 440 | |||
| 441 | // If we failed, re-map regions. | ||
| 442 | // TODO: Preserve memory contents? | ||
| 443 | if (result.IsError()) { | ||
| 444 | for (const auto [map_address, map_size] : unmapped_regions) { | ||
| 445 | const auto remap_res = | ||
| 446 | MapMemoryBlock(map_address, std::make_shared<PhysicalMemory>(map_size, 0), 0, | ||
| 447 | map_size, MemoryState::Heap, VMAPermission::None); | ||
| 448 | ASSERT_MSG(remap_res.Succeeded(), "UnmapPhysicalMemory re-map on error"); | ||
| 449 | } | ||
| 450 | } | ||
| 451 | |||
| 452 | // Update mapped amount | ||
| 453 | physical_memory_mapped -= mapped_size; | ||
| 454 | |||
| 455 | return RESULT_SUCCESS; | ||
| 456 | } | ||
| 457 | |||
| 311 | ResultCode VMManager::MapCodeMemory(VAddr dst_address, VAddr src_address, u64 size) { | 458 | ResultCode VMManager::MapCodeMemory(VAddr dst_address, VAddr src_address, u64 size) { |
| 312 | constexpr auto ignore_attribute = MemoryAttribute::LockedForIPC | MemoryAttribute::DeviceMapped; | 459 | constexpr auto ignore_attribute = MemoryAttribute::LockedForIPC | MemoryAttribute::DeviceMapped; |
| 313 | const auto src_check_result = CheckRangeState( | 460 | const auto src_check_result = CheckRangeState( |
| @@ -447,7 +594,7 @@ ResultCode VMManager::MirrorMemory(VAddr dst_addr, VAddr src_addr, u64 size, Mem | |||
| 447 | ASSERT_MSG(vma_offset + size <= vma->second.size, | 594 | ASSERT_MSG(vma_offset + size <= vma->second.size, |
| 448 | "Shared memory exceeds bounds of mapped block"); | 595 | "Shared memory exceeds bounds of mapped block"); |
| 449 | 596 | ||
| 450 | const std::shared_ptr<std::vector<u8>>& backing_block = vma->second.backing_block; | 597 | const std::shared_ptr<PhysicalMemory>& backing_block = vma->second.backing_block; |
| 451 | const std::size_t backing_block_offset = vma->second.offset + vma_offset; | 598 | const std::size_t backing_block_offset = vma->second.offset + vma_offset; |
| 452 | 599 | ||
| 453 | CASCADE_RESULT(auto new_vma, | 600 | CASCADE_RESULT(auto new_vma, |
| @@ -455,12 +602,12 @@ ResultCode VMManager::MirrorMemory(VAddr dst_addr, VAddr src_addr, u64 size, Mem | |||
| 455 | // Protect mirror with permissions from old region | 602 | // Protect mirror with permissions from old region |
| 456 | Reprotect(new_vma, vma->second.permissions); | 603 | Reprotect(new_vma, vma->second.permissions); |
| 457 | // Remove permissions from old region | 604 | // Remove permissions from old region |
| 458 | Reprotect(vma, VMAPermission::None); | 605 | ReprotectRange(src_addr, size, VMAPermission::None); |
| 459 | 606 | ||
| 460 | return RESULT_SUCCESS; | 607 | return RESULT_SUCCESS; |
| 461 | } | 608 | } |
| 462 | 609 | ||
| 463 | void VMManager::RefreshMemoryBlockMappings(const std::vector<u8>* block) { | 610 | void VMManager::RefreshMemoryBlockMappings(const PhysicalMemory* block) { |
| 464 | // If this ever proves to have a noticeable performance impact, allow users of the function to | 611 | // If this ever proves to have a noticeable performance impact, allow users of the function to |
| 465 | // specify a specific range of addresses to limit the scan to. | 612 | // specify a specific range of addresses to limit the scan to. |
| 466 | for (const auto& p : vma_map) { | 613 | for (const auto& p : vma_map) { |
| @@ -588,14 +735,14 @@ VMManager::VMAIter VMManager::SplitVMA(VMAIter vma_handle, u64 offset_in_vma) { | |||
| 588 | VMManager::VMAIter VMManager::MergeAdjacent(VMAIter iter) { | 735 | VMManager::VMAIter VMManager::MergeAdjacent(VMAIter iter) { |
| 589 | const VMAIter next_vma = std::next(iter); | 736 | const VMAIter next_vma = std::next(iter); |
| 590 | if (next_vma != vma_map.end() && iter->second.CanBeMergedWith(next_vma->second)) { | 737 | if (next_vma != vma_map.end() && iter->second.CanBeMergedWith(next_vma->second)) { |
| 591 | iter->second.size += next_vma->second.size; | 738 | MergeAdjacentVMA(iter->second, next_vma->second); |
| 592 | vma_map.erase(next_vma); | 739 | vma_map.erase(next_vma); |
| 593 | } | 740 | } |
| 594 | 741 | ||
| 595 | if (iter != vma_map.begin()) { | 742 | if (iter != vma_map.begin()) { |
| 596 | VMAIter prev_vma = std::prev(iter); | 743 | VMAIter prev_vma = std::prev(iter); |
| 597 | if (prev_vma->second.CanBeMergedWith(iter->second)) { | 744 | if (prev_vma->second.CanBeMergedWith(iter->second)) { |
| 598 | prev_vma->second.size += iter->second.size; | 745 | MergeAdjacentVMA(prev_vma->second, iter->second); |
| 599 | vma_map.erase(iter); | 746 | vma_map.erase(iter); |
| 600 | iter = prev_vma; | 747 | iter = prev_vma; |
| 601 | } | 748 | } |
| @@ -604,6 +751,38 @@ VMManager::VMAIter VMManager::MergeAdjacent(VMAIter iter) { | |||
| 604 | return iter; | 751 | return iter; |
| 605 | } | 752 | } |
| 606 | 753 | ||
| 754 | void VMManager::MergeAdjacentVMA(VirtualMemoryArea& left, const VirtualMemoryArea& right) { | ||
| 755 | ASSERT(left.CanBeMergedWith(right)); | ||
| 756 | |||
| 757 | // Always merge allocated memory blocks, even when they don't share the same backing block. | ||
| 758 | if (left.type == VMAType::AllocatedMemoryBlock && | ||
| 759 | (left.backing_block != right.backing_block || left.offset + left.size != right.offset)) { | ||
| 760 | // Check if we can save work. | ||
| 761 | if (left.offset == 0 && left.size == left.backing_block->size()) { | ||
| 762 | // Fast case: left is an entire backing block. | ||
| 763 | left.backing_block->insert(left.backing_block->end(), | ||
| 764 | right.backing_block->begin() + right.offset, | ||
| 765 | right.backing_block->begin() + right.offset + right.size); | ||
| 766 | } else { | ||
| 767 | // Slow case: make a new memory block for left and right. | ||
| 768 | auto new_memory = std::make_shared<PhysicalMemory>(); | ||
| 769 | new_memory->insert(new_memory->end(), left.backing_block->begin() + left.offset, | ||
| 770 | left.backing_block->begin() + left.offset + left.size); | ||
| 771 | new_memory->insert(new_memory->end(), right.backing_block->begin() + right.offset, | ||
| 772 | right.backing_block->begin() + right.offset + right.size); | ||
| 773 | left.backing_block = new_memory; | ||
| 774 | left.offset = 0; | ||
| 775 | } | ||
| 776 | |||
| 777 | // Page table update is needed, because backing memory changed. | ||
| 778 | left.size += right.size; | ||
| 779 | UpdatePageTableForVMA(left); | ||
| 780 | } else { | ||
| 781 | // Just update the size. | ||
| 782 | left.size += right.size; | ||
| 783 | } | ||
| 784 | } | ||
| 785 | |||
| 607 | void VMManager::UpdatePageTableForVMA(const VirtualMemoryArea& vma) { | 786 | void VMManager::UpdatePageTableForVMA(const VirtualMemoryArea& vma) { |
| 608 | switch (vma.type) { | 787 | switch (vma.type) { |
| 609 | case VMAType::Free: | 788 | case VMAType::Free: |
| @@ -778,6 +957,84 @@ VMManager::CheckResults VMManager::CheckRangeState(VAddr address, u64 size, Memo | |||
| 778 | std::make_tuple(initial_state, initial_permissions, initial_attributes & ~ignore_mask)); | 957 | std::make_tuple(initial_state, initial_permissions, initial_attributes & ~ignore_mask)); |
| 779 | } | 958 | } |
| 780 | 959 | ||
| 960 | ResultVal<std::size_t> VMManager::SizeOfAllocatedVMAsInRange(VAddr address, | ||
| 961 | std::size_t size) const { | ||
| 962 | const VAddr end_addr = address + size; | ||
| 963 | const VAddr last_addr = end_addr - 1; | ||
| 964 | std::size_t mapped_size = 0; | ||
| 965 | |||
| 966 | VAddr cur_addr = address; | ||
| 967 | auto iter = FindVMA(cur_addr); | ||
| 968 | ASSERT_MSG(iter != vma_map.end(), "SizeOfAllocatedVMAsInRange iter != end"); | ||
| 969 | |||
| 970 | while (true) { | ||
| 971 | const auto& vma = iter->second; | ||
| 972 | const VAddr vma_start = vma.base; | ||
| 973 | const VAddr vma_end = vma_start + vma.size; | ||
| 974 | const VAddr vma_last = vma_end - 1; | ||
| 975 | |||
| 976 | // Add size if relevant. | ||
| 977 | if (vma.state != MemoryState::Unmapped) { | ||
| 978 | mapped_size += std::min(end_addr - cur_addr, vma_end - cur_addr); | ||
| 979 | } | ||
| 980 | |||
| 981 | // Break once we hit the end of the range. | ||
| 982 | if (last_addr <= vma_last) { | ||
| 983 | break; | ||
| 984 | } | ||
| 985 | |||
| 986 | // Advance to the next block. | ||
| 987 | cur_addr = vma_end; | ||
| 988 | iter = std::next(iter); | ||
| 989 | ASSERT_MSG(iter != vma_map.end(), "SizeOfAllocatedVMAsInRange iter != end"); | ||
| 990 | } | ||
| 991 | |||
| 992 | return MakeResult(mapped_size); | ||
| 993 | } | ||
| 994 | |||
| 995 | ResultVal<std::size_t> VMManager::SizeOfUnmappablePhysicalMemoryInRange(VAddr address, | ||
| 996 | std::size_t size) const { | ||
| 997 | const VAddr end_addr = address + size; | ||
| 998 | const VAddr last_addr = end_addr - 1; | ||
| 999 | std::size_t mapped_size = 0; | ||
| 1000 | |||
| 1001 | VAddr cur_addr = address; | ||
| 1002 | auto iter = FindVMA(cur_addr); | ||
| 1003 | ASSERT_MSG(iter != vma_map.end(), "SizeOfUnmappablePhysicalMemoryInRange iter != end"); | ||
| 1004 | |||
| 1005 | while (true) { | ||
| 1006 | const auto& vma = iter->second; | ||
| 1007 | const auto vma_start = vma.base; | ||
| 1008 | const auto vma_end = vma_start + vma.size; | ||
| 1009 | const auto vma_last = vma_end - 1; | ||
| 1010 | const auto state = vma.state; | ||
| 1011 | const auto attr = vma.attribute; | ||
| 1012 | |||
| 1013 | // Memory within region must be free or mapped heap. | ||
| 1014 | if (!((state == MemoryState::Heap && attr == MemoryAttribute::None) || | ||
| 1015 | (state == MemoryState::Unmapped))) { | ||
| 1016 | return ERR_INVALID_ADDRESS_STATE; | ||
| 1017 | } | ||
| 1018 | |||
| 1019 | // Add size if relevant. | ||
| 1020 | if (state != MemoryState::Unmapped) { | ||
| 1021 | mapped_size += std::min(end_addr - cur_addr, vma_end - cur_addr); | ||
| 1022 | } | ||
| 1023 | |||
| 1024 | // Break once we hit the end of the range. | ||
| 1025 | if (last_addr <= vma_last) { | ||
| 1026 | break; | ||
| 1027 | } | ||
| 1028 | |||
| 1029 | // Advance to the next block. | ||
| 1030 | cur_addr = vma_end; | ||
| 1031 | iter = std::next(iter); | ||
| 1032 | ASSERT_MSG(iter != vma_map.end(), "SizeOfUnmappablePhysicalMemoryInRange iter != end"); | ||
| 1033 | } | ||
| 1034 | |||
| 1035 | return MakeResult(mapped_size); | ||
| 1036 | } | ||
| 1037 | |||
| 781 | u64 VMManager::GetTotalPhysicalMemoryAvailable() const { | 1038 | u64 VMManager::GetTotalPhysicalMemoryAvailable() const { |
| 782 | LOG_WARNING(Kernel, "(STUBBED) called"); | 1039 | LOG_WARNING(Kernel, "(STUBBED) called"); |
| 783 | return 0xF8000000; | 1040 | return 0xF8000000; |
diff --git a/src/core/hle/kernel/vm_manager.h b/src/core/hle/kernel/vm_manager.h index 9fe6ac3f4..b18cde619 100644 --- a/src/core/hle/kernel/vm_manager.h +++ b/src/core/hle/kernel/vm_manager.h | |||
| @@ -11,6 +11,7 @@ | |||
| 11 | #include "common/common_types.h" | 11 | #include "common/common_types.h" |
| 12 | #include "common/memory_hook.h" | 12 | #include "common/memory_hook.h" |
| 13 | #include "common/page_table.h" | 13 | #include "common/page_table.h" |
| 14 | #include "core/hle/kernel/physical_memory.h" | ||
| 14 | #include "core/hle/result.h" | 15 | #include "core/hle/result.h" |
| 15 | #include "core/memory.h" | 16 | #include "core/memory.h" |
| 16 | 17 | ||
| @@ -290,7 +291,7 @@ struct VirtualMemoryArea { | |||
| 290 | 291 | ||
| 291 | // Settings for type = AllocatedMemoryBlock | 292 | // Settings for type = AllocatedMemoryBlock |
| 292 | /// Memory block backing this VMA. | 293 | /// Memory block backing this VMA. |
| 293 | std::shared_ptr<std::vector<u8>> backing_block = nullptr; | 294 | std::shared_ptr<PhysicalMemory> backing_block = nullptr; |
| 294 | /// Offset into the backing_memory the mapping starts from. | 295 | /// Offset into the backing_memory the mapping starts from. |
| 295 | std::size_t offset = 0; | 296 | std::size_t offset = 0; |
| 296 | 297 | ||
| @@ -348,8 +349,9 @@ public: | |||
| 348 | * @param size Size of the mapping. | 349 | * @param size Size of the mapping. |
| 349 | * @param state MemoryState tag to attach to the VMA. | 350 | * @param state MemoryState tag to attach to the VMA. |
| 350 | */ | 351 | */ |
| 351 | ResultVal<VMAHandle> MapMemoryBlock(VAddr target, std::shared_ptr<std::vector<u8>> block, | 352 | ResultVal<VMAHandle> MapMemoryBlock(VAddr target, std::shared_ptr<PhysicalMemory> block, |
| 352 | std::size_t offset, u64 size, MemoryState state); | 353 | std::size_t offset, u64 size, MemoryState state, |
| 354 | VMAPermission perm = VMAPermission::ReadWrite); | ||
| 353 | 355 | ||
| 354 | /** | 356 | /** |
| 355 | * Maps an unmanaged host memory pointer at a given address. | 357 | * Maps an unmanaged host memory pointer at a given address. |
| @@ -450,6 +452,34 @@ public: | |||
| 450 | /// | 452 | /// |
| 451 | ResultVal<VAddr> SetHeapSize(u64 size); | 453 | ResultVal<VAddr> SetHeapSize(u64 size); |
| 452 | 454 | ||
| 455 | /// Maps memory at a given address. | ||
| 456 | /// | ||
| 457 | /// @param addr The virtual address to map memory at. | ||
| 458 | /// @param size The amount of memory to map. | ||
| 459 | /// | ||
| 460 | /// @note The destination address must lie within the Map region. | ||
| 461 | /// | ||
| 462 | /// @note This function requires that SystemResourceSize be non-zero, | ||
| 463 | /// however, this is just because if it were not then the | ||
| 464 | /// resulting page tables could be exploited on hardware by | ||
| 465 | /// a malicious program. SystemResource usage does not need | ||
| 466 | /// to be explicitly checked or updated here. | ||
| 467 | ResultCode MapPhysicalMemory(VAddr target, u64 size); | ||
| 468 | |||
| 469 | /// Unmaps memory at a given address. | ||
| 470 | /// | ||
| 471 | /// @param addr The virtual address to unmap memory at. | ||
| 472 | /// @param size The amount of memory to unmap. | ||
| 473 | /// | ||
| 474 | /// @note The destination address must lie within the Map region. | ||
| 475 | /// | ||
| 476 | /// @note This function requires that SystemResourceSize be non-zero, | ||
| 477 | /// however, this is just because if it were not then the | ||
| 478 | /// resulting page tables could be exploited on hardware by | ||
| 479 | /// a malicious program. SystemResource usage does not need | ||
| 480 | /// to be explicitly checked or updated here. | ||
| 481 | ResultCode UnmapPhysicalMemory(VAddr target, u64 size); | ||
| 482 | |||
| 453 | /// Maps a region of memory as code memory. | 483 | /// Maps a region of memory as code memory. |
| 454 | /// | 484 | /// |
| 455 | /// @param dst_address The base address of the region to create the aliasing memory region. | 485 | /// @param dst_address The base address of the region to create the aliasing memory region. |
| @@ -518,7 +548,7 @@ public: | |||
| 518 | * Scans all VMAs and updates the page table range of any that use the given vector as backing | 548 | * Scans all VMAs and updates the page table range of any that use the given vector as backing |
| 519 | * memory. This should be called after any operation that causes reallocation of the vector. | 549 | * memory. This should be called after any operation that causes reallocation of the vector. |
| 520 | */ | 550 | */ |
| 521 | void RefreshMemoryBlockMappings(const std::vector<u8>* block); | 551 | void RefreshMemoryBlockMappings(const PhysicalMemory* block); |
| 522 | 552 | ||
| 523 | /// Dumps the address space layout to the log, for debugging | 553 | /// Dumps the address space layout to the log, for debugging |
| 524 | void LogLayout() const; | 554 | void LogLayout() const; |
| @@ -657,6 +687,11 @@ private: | |||
| 657 | */ | 687 | */ |
| 658 | VMAIter MergeAdjacent(VMAIter vma); | 688 | VMAIter MergeAdjacent(VMAIter vma); |
| 659 | 689 | ||
| 690 | /** | ||
| 691 | * Merges two adjacent VMAs. | ||
| 692 | */ | ||
| 693 | void MergeAdjacentVMA(VirtualMemoryArea& left, const VirtualMemoryArea& right); | ||
| 694 | |||
| 660 | /// Updates the pages corresponding to this VMA so they match the VMA's attributes. | 695 | /// Updates the pages corresponding to this VMA so they match the VMA's attributes. |
| 661 | void UpdatePageTableForVMA(const VirtualMemoryArea& vma); | 696 | void UpdatePageTableForVMA(const VirtualMemoryArea& vma); |
| 662 | 697 | ||
| @@ -701,6 +736,13 @@ private: | |||
| 701 | MemoryAttribute attribute_mask, MemoryAttribute attribute, | 736 | MemoryAttribute attribute_mask, MemoryAttribute attribute, |
| 702 | MemoryAttribute ignore_mask) const; | 737 | MemoryAttribute ignore_mask) const; |
| 703 | 738 | ||
| 739 | /// Gets the amount of memory currently mapped (state != Unmapped) in a range. | ||
| 740 | ResultVal<std::size_t> SizeOfAllocatedVMAsInRange(VAddr address, std::size_t size) const; | ||
| 741 | |||
| 742 | /// Gets the amount of memory unmappable by UnmapPhysicalMemory in a range. | ||
| 743 | ResultVal<std::size_t> SizeOfUnmappablePhysicalMemoryInRange(VAddr address, | ||
| 744 | std::size_t size) const; | ||
| 745 | |||
| 704 | /** | 746 | /** |
| 705 | * A map covering the entirety of the managed address space, keyed by the `base` field of each | 747 | * A map covering the entirety of the managed address space, keyed by the `base` field of each |
| 706 | * VMA. It must always be modified by splitting or merging VMAs, so that the invariant | 748 | * VMA. It must always be modified by splitting or merging VMAs, so that the invariant |
| @@ -736,12 +778,17 @@ private: | |||
| 736 | // the entire virtual address space extents that bound the allocations, including any holes. | 778 | // the entire virtual address space extents that bound the allocations, including any holes. |
| 737 | // This makes deallocation and reallocation of holes fast and keeps process memory contiguous | 779 | // This makes deallocation and reallocation of holes fast and keeps process memory contiguous |
| 738 | // in the emulator address space, allowing Memory::GetPointer to be reasonably safe. | 780 | // in the emulator address space, allowing Memory::GetPointer to be reasonably safe. |
| 739 | std::shared_ptr<std::vector<u8>> heap_memory; | 781 | std::shared_ptr<PhysicalMemory> heap_memory; |
| 740 | 782 | ||
| 741 | // The end of the currently allocated heap. This is not an inclusive | 783 | // The end of the currently allocated heap. This is not an inclusive |
| 742 | // end of the range. This is essentially 'base_address + current_size'. | 784 | // end of the range. This is essentially 'base_address + current_size'. |
| 743 | VAddr heap_end = 0; | 785 | VAddr heap_end = 0; |
| 744 | 786 | ||
| 787 | // The current amount of memory mapped via MapPhysicalMemory. | ||
| 788 | // This is used here (and in Nintendo's kernel) only for debugging, and does not impact | ||
| 789 | // any behavior. | ||
| 790 | u64 physical_memory_mapped = 0; | ||
| 791 | |||
| 745 | Core::System& system; | 792 | Core::System& system; |
| 746 | }; | 793 | }; |
| 747 | } // namespace Kernel | 794 | } // namespace Kernel |
diff --git a/src/core/hle/service/am/am.cpp b/src/core/hle/service/am/am.cpp index 9fdcf2965..a192a1f5f 100644 --- a/src/core/hle/service/am/am.cpp +++ b/src/core/hle/service/am/am.cpp | |||
| @@ -266,8 +266,8 @@ ISelfController::ISelfController(std::shared_ptr<NVFlinger::NVFlinger> nvflinger | |||
| 266 | {65, nullptr, "ReportUserIsActive"}, | 266 | {65, nullptr, "ReportUserIsActive"}, |
| 267 | {66, nullptr, "GetCurrentIlluminance"}, | 267 | {66, nullptr, "GetCurrentIlluminance"}, |
| 268 | {67, nullptr, "IsIlluminanceAvailable"}, | 268 | {67, nullptr, "IsIlluminanceAvailable"}, |
| 269 | {68, nullptr, "SetAutoSleepDisabled"}, | 269 | {68, &ISelfController::SetAutoSleepDisabled, "SetAutoSleepDisabled"}, |
| 270 | {69, nullptr, "IsAutoSleepDisabled"}, | 270 | {69, &ISelfController::IsAutoSleepDisabled, "IsAutoSleepDisabled"}, |
| 271 | {70, nullptr, "ReportMultimediaError"}, | 271 | {70, nullptr, "ReportMultimediaError"}, |
| 272 | {71, nullptr, "GetCurrentIlluminanceEx"}, | 272 | {71, nullptr, "GetCurrentIlluminanceEx"}, |
| 273 | {80, nullptr, "SetWirelessPriorityMode"}, | 273 | {80, nullptr, "SetWirelessPriorityMode"}, |
| @@ -454,6 +454,34 @@ void ISelfController::GetIdleTimeDetectionExtension(Kernel::HLERequestContext& c | |||
| 454 | rb.Push<u32>(idle_time_detection_extension); | 454 | rb.Push<u32>(idle_time_detection_extension); |
| 455 | } | 455 | } |
| 456 | 456 | ||
| 457 | void ISelfController::SetAutoSleepDisabled(Kernel::HLERequestContext& ctx) { | ||
| 458 | IPC::RequestParser rp{ctx}; | ||
| 459 | is_auto_sleep_disabled = rp.Pop<bool>(); | ||
| 460 | |||
| 461 | // On the system itself, if the previous state of is_auto_sleep_disabled | ||
| 462 | // differed from the current value passed in, it'd signify the internal | ||
| 463 | // window manager to update (and also increment some statistics like update counts) | ||
| 464 | // | ||
| 465 | // It'd also indicate this change to an idle handling context. | ||
| 466 | // | ||
| 467 | // However, given we're emulating this behavior, most of this can be ignored | ||
| 468 | // and it's sufficient to simply set the member variable for querying via | ||
| 469 | // IsAutoSleepDisabled(). | ||
| 470 | |||
| 471 | LOG_DEBUG(Service_AM, "called. is_auto_sleep_disabled={}", is_auto_sleep_disabled); | ||
| 472 | |||
| 473 | IPC::ResponseBuilder rb{ctx, 2}; | ||
| 474 | rb.Push(RESULT_SUCCESS); | ||
| 475 | } | ||
| 476 | |||
| 477 | void ISelfController::IsAutoSleepDisabled(Kernel::HLERequestContext& ctx) { | ||
| 478 | LOG_DEBUG(Service_AM, "called."); | ||
| 479 | |||
| 480 | IPC::ResponseBuilder rb{ctx, 3}; | ||
| 481 | rb.Push(RESULT_SUCCESS); | ||
| 482 | rb.Push(is_auto_sleep_disabled); | ||
| 483 | } | ||
| 484 | |||
| 457 | void ISelfController::GetAccumulatedSuspendedTickValue(Kernel::HLERequestContext& ctx) { | 485 | void ISelfController::GetAccumulatedSuspendedTickValue(Kernel::HLERequestContext& ctx) { |
| 458 | LOG_DEBUG(Service_AM, "called."); | 486 | LOG_DEBUG(Service_AM, "called."); |
| 459 | 487 | ||
diff --git a/src/core/hle/service/am/am.h b/src/core/hle/service/am/am.h index 14b010164..6cb582483 100644 --- a/src/core/hle/service/am/am.h +++ b/src/core/hle/service/am/am.h | |||
| @@ -133,6 +133,8 @@ private: | |||
| 133 | void SetHandlesRequestToDisplay(Kernel::HLERequestContext& ctx); | 133 | void SetHandlesRequestToDisplay(Kernel::HLERequestContext& ctx); |
| 134 | void SetIdleTimeDetectionExtension(Kernel::HLERequestContext& ctx); | 134 | void SetIdleTimeDetectionExtension(Kernel::HLERequestContext& ctx); |
| 135 | void GetIdleTimeDetectionExtension(Kernel::HLERequestContext& ctx); | 135 | void GetIdleTimeDetectionExtension(Kernel::HLERequestContext& ctx); |
| 136 | void SetAutoSleepDisabled(Kernel::HLERequestContext& ctx); | ||
| 137 | void IsAutoSleepDisabled(Kernel::HLERequestContext& ctx); | ||
| 136 | void GetAccumulatedSuspendedTickValue(Kernel::HLERequestContext& ctx); | 138 | void GetAccumulatedSuspendedTickValue(Kernel::HLERequestContext& ctx); |
| 137 | void GetAccumulatedSuspendedTickChangedEvent(Kernel::HLERequestContext& ctx); | 139 | void GetAccumulatedSuspendedTickChangedEvent(Kernel::HLERequestContext& ctx); |
| 138 | 140 | ||
| @@ -142,6 +144,7 @@ private: | |||
| 142 | 144 | ||
| 143 | u32 idle_time_detection_extension = 0; | 145 | u32 idle_time_detection_extension = 0; |
| 144 | u64 num_fatal_sections_entered = 0; | 146 | u64 num_fatal_sections_entered = 0; |
| 147 | bool is_auto_sleep_disabled = false; | ||
| 145 | }; | 148 | }; |
| 146 | 149 | ||
| 147 | class ICommonStateGetter final : public ServiceFramework<ICommonStateGetter> { | 150 | class ICommonStateGetter final : public ServiceFramework<ICommonStateGetter> { |
diff --git a/src/core/hle/service/audio/audio.cpp b/src/core/hle/service/audio/audio.cpp index 128df7db5..1781bec83 100644 --- a/src/core/hle/service/audio/audio.cpp +++ b/src/core/hle/service/audio/audio.cpp | |||
| @@ -19,16 +19,16 @@ | |||
| 19 | 19 | ||
| 20 | namespace Service::Audio { | 20 | namespace Service::Audio { |
| 21 | 21 | ||
| 22 | void InstallInterfaces(SM::ServiceManager& service_manager) { | 22 | void InstallInterfaces(SM::ServiceManager& service_manager, Core::System& system) { |
| 23 | std::make_shared<AudCtl>()->InstallAsService(service_manager); | 23 | std::make_shared<AudCtl>()->InstallAsService(service_manager); |
| 24 | std::make_shared<AudOutA>()->InstallAsService(service_manager); | 24 | std::make_shared<AudOutA>()->InstallAsService(service_manager); |
| 25 | std::make_shared<AudOutU>()->InstallAsService(service_manager); | 25 | std::make_shared<AudOutU>(system)->InstallAsService(service_manager); |
| 26 | std::make_shared<AudInA>()->InstallAsService(service_manager); | 26 | std::make_shared<AudInA>()->InstallAsService(service_manager); |
| 27 | std::make_shared<AudInU>()->InstallAsService(service_manager); | 27 | std::make_shared<AudInU>()->InstallAsService(service_manager); |
| 28 | std::make_shared<AudRecA>()->InstallAsService(service_manager); | 28 | std::make_shared<AudRecA>()->InstallAsService(service_manager); |
| 29 | std::make_shared<AudRecU>()->InstallAsService(service_manager); | 29 | std::make_shared<AudRecU>()->InstallAsService(service_manager); |
| 30 | std::make_shared<AudRenA>()->InstallAsService(service_manager); | 30 | std::make_shared<AudRenA>()->InstallAsService(service_manager); |
| 31 | std::make_shared<AudRenU>()->InstallAsService(service_manager); | 31 | std::make_shared<AudRenU>(system)->InstallAsService(service_manager); |
| 32 | std::make_shared<CodecCtl>()->InstallAsService(service_manager); | 32 | std::make_shared<CodecCtl>()->InstallAsService(service_manager); |
| 33 | std::make_shared<HwOpus>()->InstallAsService(service_manager); | 33 | std::make_shared<HwOpus>()->InstallAsService(service_manager); |
| 34 | 34 | ||
diff --git a/src/core/hle/service/audio/audio.h b/src/core/hle/service/audio/audio.h index f5bd3bf5f..b6d13912e 100644 --- a/src/core/hle/service/audio/audio.h +++ b/src/core/hle/service/audio/audio.h | |||
| @@ -4,6 +4,10 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | namespace Core { | ||
| 8 | class System; | ||
| 9 | } | ||
| 10 | |||
| 7 | namespace Service::SM { | 11 | namespace Service::SM { |
| 8 | class ServiceManager; | 12 | class ServiceManager; |
| 9 | } | 13 | } |
| @@ -11,6 +15,6 @@ class ServiceManager; | |||
| 11 | namespace Service::Audio { | 15 | namespace Service::Audio { |
| 12 | 16 | ||
| 13 | /// Registers all Audio services with the specified service manager. | 17 | /// Registers all Audio services with the specified service manager. |
| 14 | void InstallInterfaces(SM::ServiceManager& service_manager); | 18 | void InstallInterfaces(SM::ServiceManager& service_manager, Core::System& system); |
| 15 | 19 | ||
| 16 | } // namespace Service::Audio | 20 | } // namespace Service::Audio |
diff --git a/src/core/hle/service/audio/audout_u.cpp b/src/core/hle/service/audio/audout_u.cpp index 7db6eb08d..fb84a8f13 100644 --- a/src/core/hle/service/audio/audout_u.cpp +++ b/src/core/hle/service/audio/audout_u.cpp | |||
| @@ -40,8 +40,8 @@ enum class AudioState : u32 { | |||
| 40 | 40 | ||
| 41 | class IAudioOut final : public ServiceFramework<IAudioOut> { | 41 | class IAudioOut final : public ServiceFramework<IAudioOut> { |
| 42 | public: | 42 | public: |
| 43 | IAudioOut(AudoutParams audio_params, AudioCore::AudioOut& audio_core, std::string&& device_name, | 43 | IAudioOut(Core::System& system, AudoutParams audio_params, AudioCore::AudioOut& audio_core, |
| 44 | std::string&& unique_name) | 44 | std::string&& device_name, std::string&& unique_name) |
| 45 | : ServiceFramework("IAudioOut"), audio_core(audio_core), | 45 | : ServiceFramework("IAudioOut"), audio_core(audio_core), |
| 46 | device_name(std::move(device_name)), audio_params(audio_params) { | 46 | device_name(std::move(device_name)), audio_params(audio_params) { |
| 47 | // clang-format off | 47 | // clang-format off |
| @@ -65,7 +65,6 @@ public: | |||
| 65 | RegisterHandlers(functions); | 65 | RegisterHandlers(functions); |
| 66 | 66 | ||
| 67 | // This is the event handle used to check if the audio buffer was released | 67 | // This is the event handle used to check if the audio buffer was released |
| 68 | auto& system = Core::System::GetInstance(); | ||
| 69 | buffer_event = Kernel::WritableEvent::CreateEventPair( | 68 | buffer_event = Kernel::WritableEvent::CreateEventPair( |
| 70 | system.Kernel(), Kernel::ResetType::Manual, "IAudioOutBufferReleased"); | 69 | system.Kernel(), Kernel::ResetType::Manual, "IAudioOutBufferReleased"); |
| 71 | 70 | ||
| @@ -212,6 +211,22 @@ private: | |||
| 212 | Kernel::EventPair buffer_event; | 211 | Kernel::EventPair buffer_event; |
| 213 | }; | 212 | }; |
| 214 | 213 | ||
| 214 | AudOutU::AudOutU(Core::System& system_) : ServiceFramework("audout:u"), system{system_} { | ||
| 215 | // clang-format off | ||
| 216 | static const FunctionInfo functions[] = { | ||
| 217 | {0, &AudOutU::ListAudioOutsImpl, "ListAudioOuts"}, | ||
| 218 | {1, &AudOutU::OpenAudioOutImpl, "OpenAudioOut"}, | ||
| 219 | {2, &AudOutU::ListAudioOutsImpl, "ListAudioOutsAuto"}, | ||
| 220 | {3, &AudOutU::OpenAudioOutImpl, "OpenAudioOutAuto"}, | ||
| 221 | }; | ||
| 222 | // clang-format on | ||
| 223 | |||
| 224 | RegisterHandlers(functions); | ||
| 225 | audio_core = std::make_unique<AudioCore::AudioOut>(); | ||
| 226 | } | ||
| 227 | |||
| 228 | AudOutU::~AudOutU() = default; | ||
| 229 | |||
| 215 | void AudOutU::ListAudioOutsImpl(Kernel::HLERequestContext& ctx) { | 230 | void AudOutU::ListAudioOutsImpl(Kernel::HLERequestContext& ctx) { |
| 216 | LOG_DEBUG(Service_Audio, "called"); | 231 | LOG_DEBUG(Service_Audio, "called"); |
| 217 | 232 | ||
| @@ -248,7 +263,7 @@ void AudOutU::OpenAudioOutImpl(Kernel::HLERequestContext& ctx) { | |||
| 248 | 263 | ||
| 249 | std::string unique_name{fmt::format("{}-{}", device_name, audio_out_interfaces.size())}; | 264 | std::string unique_name{fmt::format("{}-{}", device_name, audio_out_interfaces.size())}; |
| 250 | auto audio_out_interface = std::make_shared<IAudioOut>( | 265 | auto audio_out_interface = std::make_shared<IAudioOut>( |
| 251 | params, *audio_core, std::move(device_name), std::move(unique_name)); | 266 | system, params, *audio_core, std::move(device_name), std::move(unique_name)); |
| 252 | 267 | ||
| 253 | IPC::ResponseBuilder rb{ctx, 6, 0, 1}; | 268 | IPC::ResponseBuilder rb{ctx, 6, 0, 1}; |
| 254 | rb.Push(RESULT_SUCCESS); | 269 | rb.Push(RESULT_SUCCESS); |
| @@ -256,20 +271,9 @@ void AudOutU::OpenAudioOutImpl(Kernel::HLERequestContext& ctx) { | |||
| 256 | rb.Push<u32>(params.channel_count); | 271 | rb.Push<u32>(params.channel_count); |
| 257 | rb.Push<u32>(static_cast<u32>(AudioCore::Codec::PcmFormat::Int16)); | 272 | rb.Push<u32>(static_cast<u32>(AudioCore::Codec::PcmFormat::Int16)); |
| 258 | rb.Push<u32>(static_cast<u32>(AudioState::Stopped)); | 273 | rb.Push<u32>(static_cast<u32>(AudioState::Stopped)); |
| 259 | rb.PushIpcInterface<Audio::IAudioOut>(audio_out_interface); | 274 | rb.PushIpcInterface<IAudioOut>(audio_out_interface); |
| 260 | 275 | ||
| 261 | audio_out_interfaces.push_back(std::move(audio_out_interface)); | 276 | audio_out_interfaces.push_back(std::move(audio_out_interface)); |
| 262 | } | 277 | } |
| 263 | 278 | ||
| 264 | AudOutU::AudOutU() : ServiceFramework("audout:u") { | ||
| 265 | static const FunctionInfo functions[] = {{0, &AudOutU::ListAudioOutsImpl, "ListAudioOuts"}, | ||
| 266 | {1, &AudOutU::OpenAudioOutImpl, "OpenAudioOut"}, | ||
| 267 | {2, &AudOutU::ListAudioOutsImpl, "ListAudioOutsAuto"}, | ||
| 268 | {3, &AudOutU::OpenAudioOutImpl, "OpenAudioOutAuto"}}; | ||
| 269 | RegisterHandlers(functions); | ||
| 270 | audio_core = std::make_unique<AudioCore::AudioOut>(); | ||
| 271 | } | ||
| 272 | |||
| 273 | AudOutU::~AudOutU() = default; | ||
| 274 | |||
| 275 | } // namespace Service::Audio | 279 | } // namespace Service::Audio |
diff --git a/src/core/hle/service/audio/audout_u.h b/src/core/hle/service/audio/audout_u.h index aed4c43b2..c9f532ccd 100644 --- a/src/core/hle/service/audio/audout_u.h +++ b/src/core/hle/service/audio/audout_u.h | |||
| @@ -11,6 +11,10 @@ namespace AudioCore { | |||
| 11 | class AudioOut; | 11 | class AudioOut; |
| 12 | } | 12 | } |
| 13 | 13 | ||
| 14 | namespace Core { | ||
| 15 | class System; | ||
| 16 | } | ||
| 17 | |||
| 14 | namespace Kernel { | 18 | namespace Kernel { |
| 15 | class HLERequestContext; | 19 | class HLERequestContext; |
| 16 | } | 20 | } |
| @@ -21,15 +25,17 @@ class IAudioOut; | |||
| 21 | 25 | ||
| 22 | class AudOutU final : public ServiceFramework<AudOutU> { | 26 | class AudOutU final : public ServiceFramework<AudOutU> { |
| 23 | public: | 27 | public: |
| 24 | AudOutU(); | 28 | explicit AudOutU(Core::System& system_); |
| 25 | ~AudOutU() override; | 29 | ~AudOutU() override; |
| 26 | 30 | ||
| 27 | private: | 31 | private: |
| 32 | void ListAudioOutsImpl(Kernel::HLERequestContext& ctx); | ||
| 33 | void OpenAudioOutImpl(Kernel::HLERequestContext& ctx); | ||
| 34 | |||
| 28 | std::vector<std::shared_ptr<IAudioOut>> audio_out_interfaces; | 35 | std::vector<std::shared_ptr<IAudioOut>> audio_out_interfaces; |
| 29 | std::unique_ptr<AudioCore::AudioOut> audio_core; | 36 | std::unique_ptr<AudioCore::AudioOut> audio_core; |
| 30 | 37 | ||
| 31 | void ListAudioOutsImpl(Kernel::HLERequestContext& ctx); | 38 | Core::System& system; |
| 32 | void OpenAudioOutImpl(Kernel::HLERequestContext& ctx); | ||
| 33 | }; | 39 | }; |
| 34 | 40 | ||
| 35 | } // namespace Service::Audio | 41 | } // namespace Service::Audio |
diff --git a/src/core/hle/service/audio/audren_u.cpp b/src/core/hle/service/audio/audren_u.cpp index 3711e1ea1..5b0b7f17e 100644 --- a/src/core/hle/service/audio/audren_u.cpp +++ b/src/core/hle/service/audio/audren_u.cpp | |||
| @@ -5,6 +5,7 @@ | |||
| 5 | #include <algorithm> | 5 | #include <algorithm> |
| 6 | #include <array> | 6 | #include <array> |
| 7 | #include <memory> | 7 | #include <memory> |
| 8 | #include <string_view> | ||
| 8 | 9 | ||
| 9 | #include "audio_core/audio_renderer.h" | 10 | #include "audio_core/audio_renderer.h" |
| 10 | #include "common/alignment.h" | 11 | #include "common/alignment.h" |
| @@ -25,7 +26,8 @@ namespace Service::Audio { | |||
| 25 | 26 | ||
| 26 | class IAudioRenderer final : public ServiceFramework<IAudioRenderer> { | 27 | class IAudioRenderer final : public ServiceFramework<IAudioRenderer> { |
| 27 | public: | 28 | public: |
| 28 | explicit IAudioRenderer(AudioCore::AudioRendererParameter audren_params) | 29 | explicit IAudioRenderer(Core::System& system, AudioCore::AudioRendererParameter audren_params, |
| 30 | const std::size_t instance_number) | ||
| 29 | : ServiceFramework("IAudioRenderer") { | 31 | : ServiceFramework("IAudioRenderer") { |
| 30 | // clang-format off | 32 | // clang-format off |
| 31 | static const FunctionInfo functions[] = { | 33 | static const FunctionInfo functions[] = { |
| @@ -45,11 +47,10 @@ public: | |||
| 45 | // clang-format on | 47 | // clang-format on |
| 46 | RegisterHandlers(functions); | 48 | RegisterHandlers(functions); |
| 47 | 49 | ||
| 48 | auto& system = Core::System::GetInstance(); | ||
| 49 | system_event = Kernel::WritableEvent::CreateEventPair( | 50 | system_event = Kernel::WritableEvent::CreateEventPair( |
| 50 | system.Kernel(), Kernel::ResetType::Manual, "IAudioRenderer:SystemEvent"); | 51 | system.Kernel(), Kernel::ResetType::Manual, "IAudioRenderer:SystemEvent"); |
| 51 | renderer = std::make_unique<AudioCore::AudioRenderer>(system.CoreTiming(), audren_params, | 52 | renderer = std::make_unique<AudioCore::AudioRenderer>( |
| 52 | system_event.writable); | 53 | system.CoreTiming(), audren_params, system_event.writable, instance_number); |
| 53 | } | 54 | } |
| 54 | 55 | ||
| 55 | private: | 56 | private: |
| @@ -159,7 +160,8 @@ private: | |||
| 159 | 160 | ||
| 160 | class IAudioDevice final : public ServiceFramework<IAudioDevice> { | 161 | class IAudioDevice final : public ServiceFramework<IAudioDevice> { |
| 161 | public: | 162 | public: |
| 162 | IAudioDevice() : ServiceFramework("IAudioDevice") { | 163 | explicit IAudioDevice(Core::System& system, u32_le revision_num) |
| 164 | : ServiceFramework("IAudioDevice"), revision{revision_num} { | ||
| 163 | static const FunctionInfo functions[] = { | 165 | static const FunctionInfo functions[] = { |
| 164 | {0, &IAudioDevice::ListAudioDeviceName, "ListAudioDeviceName"}, | 166 | {0, &IAudioDevice::ListAudioDeviceName, "ListAudioDeviceName"}, |
| 165 | {1, &IAudioDevice::SetAudioDeviceOutputVolume, "SetAudioDeviceOutputVolume"}, | 167 | {1, &IAudioDevice::SetAudioDeviceOutputVolume, "SetAudioDeviceOutputVolume"}, |
| @@ -177,7 +179,7 @@ public: | |||
| 177 | }; | 179 | }; |
| 178 | RegisterHandlers(functions); | 180 | RegisterHandlers(functions); |
| 179 | 181 | ||
| 180 | auto& kernel = Core::System::GetInstance().Kernel(); | 182 | auto& kernel = system.Kernel(); |
| 181 | buffer_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Automatic, | 183 | buffer_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Automatic, |
| 182 | "IAudioOutBufferReleasedEvent"); | 184 | "IAudioOutBufferReleasedEvent"); |
| 183 | 185 | ||
| @@ -188,15 +190,47 @@ public: | |||
| 188 | } | 190 | } |
| 189 | 191 | ||
| 190 | private: | 192 | private: |
| 193 | using AudioDeviceName = std::array<char, 256>; | ||
| 194 | static constexpr std::array<std::string_view, 4> audio_device_names{{ | ||
| 195 | "AudioStereoJackOutput", | ||
| 196 | "AudioBuiltInSpeakerOutput", | ||
| 197 | "AudioTvOutput", | ||
| 198 | "AudioUsbDeviceOutput", | ||
| 199 | }}; | ||
| 200 | enum class DeviceType { | ||
| 201 | AHUBHeadphones, | ||
| 202 | AHUBSpeakers, | ||
| 203 | HDA, | ||
| 204 | USBOutput, | ||
| 205 | }; | ||
| 206 | |||
| 191 | void ListAudioDeviceName(Kernel::HLERequestContext& ctx) { | 207 | void ListAudioDeviceName(Kernel::HLERequestContext& ctx) { |
| 192 | LOG_WARNING(Service_Audio, "(STUBBED) called"); | 208 | LOG_DEBUG(Service_Audio, "called"); |
| 193 | 209 | ||
| 194 | constexpr std::array<char, 15> audio_interface{{"AudioInterface"}}; | 210 | const bool usb_output_supported = |
| 195 | ctx.WriteBuffer(audio_interface); | 211 | IsFeatureSupported(AudioFeatures::AudioUSBDeviceOutput, revision); |
| 212 | const std::size_t count = ctx.GetWriteBufferSize() / sizeof(AudioDeviceName); | ||
| 213 | |||
| 214 | std::vector<AudioDeviceName> name_buffer; | ||
| 215 | name_buffer.reserve(audio_device_names.size()); | ||
| 216 | |||
| 217 | for (std::size_t i = 0; i < count && i < audio_device_names.size(); i++) { | ||
| 218 | const auto type = static_cast<DeviceType>(i); | ||
| 219 | |||
| 220 | if (!usb_output_supported && type == DeviceType::USBOutput) { | ||
| 221 | continue; | ||
| 222 | } | ||
| 223 | |||
| 224 | const auto& device_name = audio_device_names[i]; | ||
| 225 | auto& entry = name_buffer.emplace_back(); | ||
| 226 | device_name.copy(entry.data(), device_name.size()); | ||
| 227 | } | ||
| 228 | |||
| 229 | ctx.WriteBuffer(name_buffer); | ||
| 196 | 230 | ||
| 197 | IPC::ResponseBuilder rb{ctx, 3}; | 231 | IPC::ResponseBuilder rb{ctx, 3}; |
| 198 | rb.Push(RESULT_SUCCESS); | 232 | rb.Push(RESULT_SUCCESS); |
| 199 | rb.Push<u32>(1); | 233 | rb.Push(static_cast<u32>(name_buffer.size())); |
| 200 | } | 234 | } |
| 201 | 235 | ||
| 202 | void SetAudioDeviceOutputVolume(Kernel::HLERequestContext& ctx) { | 236 | void SetAudioDeviceOutputVolume(Kernel::HLERequestContext& ctx) { |
| @@ -215,12 +249,16 @@ private: | |||
| 215 | void GetActiveAudioDeviceName(Kernel::HLERequestContext& ctx) { | 249 | void GetActiveAudioDeviceName(Kernel::HLERequestContext& ctx) { |
| 216 | LOG_WARNING(Service_Audio, "(STUBBED) called"); | 250 | LOG_WARNING(Service_Audio, "(STUBBED) called"); |
| 217 | 251 | ||
| 218 | constexpr std::array<char, 12> audio_interface{{"AudioDevice"}}; | 252 | // Currently set to always be TV audio output. |
| 219 | ctx.WriteBuffer(audio_interface); | 253 | const auto& device_name = audio_device_names[2]; |
| 220 | 254 | ||
| 221 | IPC::ResponseBuilder rb{ctx, 3}; | 255 | AudioDeviceName out_device_name{}; |
| 256 | device_name.copy(out_device_name.data(), device_name.size()); | ||
| 257 | |||
| 258 | ctx.WriteBuffer(out_device_name); | ||
| 259 | |||
| 260 | IPC::ResponseBuilder rb{ctx, 2}; | ||
| 222 | rb.Push(RESULT_SUCCESS); | 261 | rb.Push(RESULT_SUCCESS); |
| 223 | rb.Push<u32>(1); | ||
| 224 | } | 262 | } |
| 225 | 263 | ||
| 226 | void QueryAudioDeviceSystemEvent(Kernel::HLERequestContext& ctx) { | 264 | void QueryAudioDeviceSystemEvent(Kernel::HLERequestContext& ctx) { |
| @@ -249,12 +287,13 @@ private: | |||
| 249 | rb.PushCopyObjects(audio_output_device_switch_event.readable); | 287 | rb.PushCopyObjects(audio_output_device_switch_event.readable); |
| 250 | } | 288 | } |
| 251 | 289 | ||
| 290 | u32_le revision = 0; | ||
| 252 | Kernel::EventPair buffer_event; | 291 | Kernel::EventPair buffer_event; |
| 253 | Kernel::EventPair audio_output_device_switch_event; | 292 | Kernel::EventPair audio_output_device_switch_event; |
| 254 | 293 | ||
| 255 | }; // namespace Audio | 294 | }; // namespace Audio |
| 256 | 295 | ||
| 257 | AudRenU::AudRenU() : ServiceFramework("audren:u") { | 296 | AudRenU::AudRenU(Core::System& system_) : ServiceFramework("audren:u"), system{system_} { |
| 258 | // clang-format off | 297 | // clang-format off |
| 259 | static const FunctionInfo functions[] = { | 298 | static const FunctionInfo functions[] = { |
| 260 | {0, &AudRenU::OpenAudioRenderer, "OpenAudioRenderer"}, | 299 | {0, &AudRenU::OpenAudioRenderer, "OpenAudioRenderer"}, |
| @@ -327,7 +366,7 @@ void AudRenU::GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx) { | |||
| 327 | }; | 366 | }; |
| 328 | 367 | ||
| 329 | // Calculates the portion of the size related to the mix data (and the sorting thereof). | 368 | // Calculates the portion of the size related to the mix data (and the sorting thereof). |
| 330 | const auto calculate_mix_info_size = [this](const AudioCore::AudioRendererParameter& params) { | 369 | const auto calculate_mix_info_size = [](const AudioCore::AudioRendererParameter& params) { |
| 331 | // The size of the mixing info data structure. | 370 | // The size of the mixing info data structure. |
| 332 | constexpr u64 mix_info_size = 0x940; | 371 | constexpr u64 mix_info_size = 0x940; |
| 333 | 372 | ||
| @@ -399,7 +438,7 @@ void AudRenU::GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx) { | |||
| 399 | 438 | ||
| 400 | // Calculates the part of the size related to the splitter context. | 439 | // Calculates the part of the size related to the splitter context. |
| 401 | const auto calculate_splitter_context_size = | 440 | const auto calculate_splitter_context_size = |
| 402 | [this](const AudioCore::AudioRendererParameter& params) -> u64 { | 441 | [](const AudioCore::AudioRendererParameter& params) -> u64 { |
| 403 | if (!IsFeatureSupported(AudioFeatures::Splitter, params.revision)) { | 442 | if (!IsFeatureSupported(AudioFeatures::Splitter, params.revision)) { |
| 404 | return 0; | 443 | return 0; |
| 405 | } | 444 | } |
| @@ -446,7 +485,7 @@ void AudRenU::GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx) { | |||
| 446 | }; | 485 | }; |
| 447 | 486 | ||
| 448 | // Calculates the part of the size related to performance statistics. | 487 | // Calculates the part of the size related to performance statistics. |
| 449 | const auto calculate_perf_size = [this](const AudioCore::AudioRendererParameter& params) { | 488 | const auto calculate_perf_size = [](const AudioCore::AudioRendererParameter& params) { |
| 450 | // Extra size value appended to the end of the calculation. | 489 | // Extra size value appended to the end of the calculation. |
| 451 | constexpr u64 appended = 128; | 490 | constexpr u64 appended = 128; |
| 452 | 491 | ||
| @@ -473,78 +512,76 @@ void AudRenU::GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx) { | |||
| 473 | }; | 512 | }; |
| 474 | 513 | ||
| 475 | // Calculates the part of the size that relates to the audio command buffer. | 514 | // Calculates the part of the size that relates to the audio command buffer. |
| 476 | const auto calculate_command_buffer_size = | 515 | const auto calculate_command_buffer_size = [](const AudioCore::AudioRendererParameter& params) { |
| 477 | [this](const AudioCore::AudioRendererParameter& params) { | 516 | constexpr u64 alignment = (buffer_alignment_size - 1) * 2; |
| 478 | constexpr u64 alignment = (buffer_alignment_size - 1) * 2; | ||
| 479 | 517 | ||
| 480 | if (!IsFeatureSupported(AudioFeatures::VariadicCommandBuffer, params.revision)) { | 518 | if (!IsFeatureSupported(AudioFeatures::VariadicCommandBuffer, params.revision)) { |
| 481 | constexpr u64 command_buffer_size = 0x18000; | 519 | constexpr u64 command_buffer_size = 0x18000; |
| 482 | 520 | ||
| 483 | return command_buffer_size + alignment; | 521 | return command_buffer_size + alignment; |
| 484 | } | 522 | } |
| 485 | 523 | ||
| 486 | // When the variadic command buffer is supported, this means | 524 | // When the variadic command buffer is supported, this means |
| 487 | // the command generator for the audio renderer can issue commands | 525 | // the command generator for the audio renderer can issue commands |
| 488 | // that are (as one would expect), variable in size. So what we need to do | 526 | // that are (as one would expect), variable in size. So what we need to do |
| 489 | // is determine the maximum possible size for a few command data structures | 527 | // is determine the maximum possible size for a few command data structures |
| 490 | // then multiply them by the amount of present commands indicated by the given | 528 | // then multiply them by the amount of present commands indicated by the given |
| 491 | // respective audio parameters. | 529 | // respective audio parameters. |
| 492 | 530 | ||
| 493 | constexpr u64 max_biquad_filters = 2; | 531 | constexpr u64 max_biquad_filters = 2; |
| 494 | constexpr u64 max_mix_buffers = 24; | 532 | constexpr u64 max_mix_buffers = 24; |
| 495 | 533 | ||
| 496 | constexpr u64 biquad_filter_command_size = 0x2C; | 534 | constexpr u64 biquad_filter_command_size = 0x2C; |
| 497 | 535 | ||
| 498 | constexpr u64 depop_mix_command_size = 0x24; | 536 | constexpr u64 depop_mix_command_size = 0x24; |
| 499 | constexpr u64 depop_setup_command_size = 0x50; | 537 | constexpr u64 depop_setup_command_size = 0x50; |
| 500 | 538 | ||
| 501 | constexpr u64 effect_command_max_size = 0x540; | 539 | constexpr u64 effect_command_max_size = 0x540; |
| 502 | 540 | ||
| 503 | constexpr u64 mix_command_size = 0x1C; | 541 | constexpr u64 mix_command_size = 0x1C; |
| 504 | constexpr u64 mix_ramp_command_size = 0x24; | 542 | constexpr u64 mix_ramp_command_size = 0x24; |
| 505 | constexpr u64 mix_ramp_grouped_command_size = 0x13C; | 543 | constexpr u64 mix_ramp_grouped_command_size = 0x13C; |
| 506 | 544 | ||
| 507 | constexpr u64 perf_command_size = 0x28; | 545 | constexpr u64 perf_command_size = 0x28; |
| 508 | 546 | ||
| 509 | constexpr u64 sink_command_size = 0x130; | 547 | constexpr u64 sink_command_size = 0x130; |
| 510 | 548 | ||
| 511 | constexpr u64 submix_command_max_size = | 549 | constexpr u64 submix_command_max_size = |
| 512 | depop_mix_command_size + (mix_command_size * max_mix_buffers) * max_mix_buffers; | 550 | depop_mix_command_size + (mix_command_size * max_mix_buffers) * max_mix_buffers; |
| 513 | 551 | ||
| 514 | constexpr u64 volume_command_size = 0x1C; | 552 | constexpr u64 volume_command_size = 0x1C; |
| 515 | constexpr u64 volume_ramp_command_size = 0x20; | 553 | constexpr u64 volume_ramp_command_size = 0x20; |
| 516 | 554 | ||
| 517 | constexpr u64 voice_biquad_filter_command_size = | 555 | constexpr u64 voice_biquad_filter_command_size = |
| 518 | biquad_filter_command_size * max_biquad_filters; | 556 | biquad_filter_command_size * max_biquad_filters; |
| 519 | constexpr u64 voice_data_command_size = 0x9C; | 557 | constexpr u64 voice_data_command_size = 0x9C; |
| 520 | const u64 voice_command_max_size = | 558 | const u64 voice_command_max_size = |
| 521 | (params.splitter_count * depop_setup_command_size) + | 559 | (params.splitter_count * depop_setup_command_size) + |
| 522 | (voice_data_command_size + voice_biquad_filter_command_size + | 560 | (voice_data_command_size + voice_biquad_filter_command_size + volume_ramp_command_size + |
| 523 | volume_ramp_command_size + mix_ramp_grouped_command_size); | 561 | mix_ramp_grouped_command_size); |
| 524 | 562 | ||
| 525 | // Now calculate the individual elements that comprise the size and add them together. | 563 | // Now calculate the individual elements that comprise the size and add them together. |
| 526 | const u64 effect_commands_size = params.effect_count * effect_command_max_size; | 564 | const u64 effect_commands_size = params.effect_count * effect_command_max_size; |
| 527 | 565 | ||
| 528 | const u64 final_mix_commands_size = | 566 | const u64 final_mix_commands_size = |
| 529 | depop_mix_command_size + volume_command_size * max_mix_buffers; | 567 | depop_mix_command_size + volume_command_size * max_mix_buffers; |
| 530 | 568 | ||
| 531 | const u64 perf_commands_size = | 569 | const u64 perf_commands_size = |
| 532 | perf_command_size * | 570 | perf_command_size * (CalculateNumPerformanceEntries(params) + max_perf_detail_entries); |
| 533 | (CalculateNumPerformanceEntries(params) + max_perf_detail_entries); | ||
| 534 | 571 | ||
| 535 | const u64 sink_commands_size = params.sink_count * sink_command_size; | 572 | const u64 sink_commands_size = params.sink_count * sink_command_size; |
| 536 | 573 | ||
| 537 | const u64 splitter_commands_size = | 574 | const u64 splitter_commands_size = |
| 538 | params.num_splitter_send_channels * max_mix_buffers * mix_ramp_command_size; | 575 | params.num_splitter_send_channels * max_mix_buffers * mix_ramp_command_size; |
| 539 | 576 | ||
| 540 | const u64 submix_commands_size = params.submix_count * submix_command_max_size; | 577 | const u64 submix_commands_size = params.submix_count * submix_command_max_size; |
| 541 | 578 | ||
| 542 | const u64 voice_commands_size = params.voice_count * voice_command_max_size; | 579 | const u64 voice_commands_size = params.voice_count * voice_command_max_size; |
| 543 | 580 | ||
| 544 | return effect_commands_size + final_mix_commands_size + perf_commands_size + | 581 | return effect_commands_size + final_mix_commands_size + perf_commands_size + |
| 545 | sink_commands_size + splitter_commands_size + submix_commands_size + | 582 | sink_commands_size + splitter_commands_size + submix_commands_size + |
| 546 | voice_commands_size + alignment; | 583 | voice_commands_size + alignment; |
| 547 | }; | 584 | }; |
| 548 | 585 | ||
| 549 | IPC::RequestParser rp{ctx}; | 586 | IPC::RequestParser rp{ctx}; |
| 550 | const auto params = rp.PopRaw<AudioCore::AudioRendererParameter>(); | 587 | const auto params = rp.PopRaw<AudioCore::AudioRendererParameter>(); |
| @@ -577,12 +614,16 @@ void AudRenU::GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx) { | |||
| 577 | } | 614 | } |
| 578 | 615 | ||
| 579 | void AudRenU::GetAudioDeviceService(Kernel::HLERequestContext& ctx) { | 616 | void AudRenU::GetAudioDeviceService(Kernel::HLERequestContext& ctx) { |
| 580 | LOG_DEBUG(Service_Audio, "called"); | 617 | IPC::RequestParser rp{ctx}; |
| 618 | const u64 aruid = rp.Pop<u64>(); | ||
| 581 | 619 | ||
| 582 | IPC::ResponseBuilder rb{ctx, 2, 0, 1}; | 620 | LOG_DEBUG(Service_Audio, "called. aruid={:016X}", aruid); |
| 583 | 621 | ||
| 622 | // Revisionless variant of GetAudioDeviceServiceWithRevisionInfo that | ||
| 623 | // always assumes the initial release revision (REV1). | ||
| 624 | IPC::ResponseBuilder rb{ctx, 2, 0, 1}; | ||
| 584 | rb.Push(RESULT_SUCCESS); | 625 | rb.Push(RESULT_SUCCESS); |
| 585 | rb.PushIpcInterface<Audio::IAudioDevice>(); | 626 | rb.PushIpcInterface<IAudioDevice>(system, Common::MakeMagic('R', 'E', 'V', '1')); |
| 586 | } | 627 | } |
| 587 | 628 | ||
| 588 | void AudRenU::OpenAudioRendererAuto(Kernel::HLERequestContext& ctx) { | 629 | void AudRenU::OpenAudioRendererAuto(Kernel::HLERequestContext& ctx) { |
| @@ -592,13 +633,19 @@ void AudRenU::OpenAudioRendererAuto(Kernel::HLERequestContext& ctx) { | |||
| 592 | } | 633 | } |
| 593 | 634 | ||
| 594 | void AudRenU::GetAudioDeviceServiceWithRevisionInfo(Kernel::HLERequestContext& ctx) { | 635 | void AudRenU::GetAudioDeviceServiceWithRevisionInfo(Kernel::HLERequestContext& ctx) { |
| 595 | LOG_WARNING(Service_Audio, "(STUBBED) called"); | 636 | struct Parameters { |
| 637 | u32 revision; | ||
| 638 | u64 aruid; | ||
| 639 | }; | ||
| 596 | 640 | ||
| 597 | IPC::ResponseBuilder rb{ctx, 2, 0, 1}; | 641 | IPC::RequestParser rp{ctx}; |
| 642 | const auto [revision, aruid] = rp.PopRaw<Parameters>(); | ||
| 643 | |||
| 644 | LOG_DEBUG(Service_Audio, "called. revision={:08X}, aruid={:016X}", revision, aruid); | ||
| 598 | 645 | ||
| 646 | IPC::ResponseBuilder rb{ctx, 2, 0, 1}; | ||
| 599 | rb.Push(RESULT_SUCCESS); | 647 | rb.Push(RESULT_SUCCESS); |
| 600 | rb.PushIpcInterface<Audio::IAudioDevice>(); // TODO(ogniK): Figure out what is different | 648 | rb.PushIpcInterface<IAudioDevice>(system, revision); |
| 601 | // based on the current revision | ||
| 602 | } | 649 | } |
| 603 | 650 | ||
| 604 | void AudRenU::OpenAudioRendererImpl(Kernel::HLERequestContext& ctx) { | 651 | void AudRenU::OpenAudioRendererImpl(Kernel::HLERequestContext& ctx) { |
| @@ -607,14 +654,16 @@ void AudRenU::OpenAudioRendererImpl(Kernel::HLERequestContext& ctx) { | |||
| 607 | IPC::ResponseBuilder rb{ctx, 2, 0, 1}; | 654 | IPC::ResponseBuilder rb{ctx, 2, 0, 1}; |
| 608 | 655 | ||
| 609 | rb.Push(RESULT_SUCCESS); | 656 | rb.Push(RESULT_SUCCESS); |
| 610 | rb.PushIpcInterface<IAudioRenderer>(params); | 657 | rb.PushIpcInterface<IAudioRenderer>(system, params, audren_instance_count++); |
| 611 | } | 658 | } |
| 612 | 659 | ||
| 613 | bool AudRenU::IsFeatureSupported(AudioFeatures feature, u32_le revision) const { | 660 | bool IsFeatureSupported(AudioFeatures feature, u32_le revision) { |
| 614 | // Byte swap | 661 | // Byte swap |
| 615 | const u32_be version_num = revision - Common::MakeMagic('R', 'E', 'V', '0'); | 662 | const u32_be version_num = revision - Common::MakeMagic('R', 'E', 'V', '0'); |
| 616 | 663 | ||
| 617 | switch (feature) { | 664 | switch (feature) { |
| 665 | case AudioFeatures::AudioUSBDeviceOutput: | ||
| 666 | return version_num >= 4U; | ||
| 618 | case AudioFeatures::Splitter: | 667 | case AudioFeatures::Splitter: |
| 619 | return version_num >= 2U; | 668 | return version_num >= 2U; |
| 620 | case AudioFeatures::PerformanceMetricsVersion2: | 669 | case AudioFeatures::PerformanceMetricsVersion2: |
diff --git a/src/core/hle/service/audio/audren_u.h b/src/core/hle/service/audio/audren_u.h index 1d3c8df61..4e0ccc792 100644 --- a/src/core/hle/service/audio/audren_u.h +++ b/src/core/hle/service/audio/audren_u.h | |||
| @@ -6,6 +6,10 @@ | |||
| 6 | 6 | ||
| 7 | #include "core/hle/service/service.h" | 7 | #include "core/hle/service/service.h" |
| 8 | 8 | ||
| 9 | namespace Core { | ||
| 10 | class System; | ||
| 11 | } | ||
| 12 | |||
| 9 | namespace Kernel { | 13 | namespace Kernel { |
| 10 | class HLERequestContext; | 14 | class HLERequestContext; |
| 11 | } | 15 | } |
| @@ -14,7 +18,7 @@ namespace Service::Audio { | |||
| 14 | 18 | ||
| 15 | class AudRenU final : public ServiceFramework<AudRenU> { | 19 | class AudRenU final : public ServiceFramework<AudRenU> { |
| 16 | public: | 20 | public: |
| 17 | explicit AudRenU(); | 21 | explicit AudRenU(Core::System& system_); |
| 18 | ~AudRenU() override; | 22 | ~AudRenU() override; |
| 19 | 23 | ||
| 20 | private: | 24 | private: |
| @@ -26,13 +30,19 @@ private: | |||
| 26 | 30 | ||
| 27 | void OpenAudioRendererImpl(Kernel::HLERequestContext& ctx); | 31 | void OpenAudioRendererImpl(Kernel::HLERequestContext& ctx); |
| 28 | 32 | ||
| 29 | enum class AudioFeatures : u32 { | 33 | std::size_t audren_instance_count = 0; |
| 30 | Splitter, | 34 | Core::System& system; |
| 31 | PerformanceMetricsVersion2, | 35 | }; |
| 32 | VariadicCommandBuffer, | ||
| 33 | }; | ||
| 34 | 36 | ||
| 35 | bool IsFeatureSupported(AudioFeatures feature, u32_le revision) const; | 37 | // Describes a particular audio feature that may be supported in a particular revision. |
| 38 | enum class AudioFeatures : u32 { | ||
| 39 | AudioUSBDeviceOutput, | ||
| 40 | Splitter, | ||
| 41 | PerformanceMetricsVersion2, | ||
| 42 | VariadicCommandBuffer, | ||
| 36 | }; | 43 | }; |
| 37 | 44 | ||
| 45 | // Tests if a particular audio feature is supported with a given audio revision. | ||
| 46 | bool IsFeatureSupported(AudioFeatures feature, u32_le revision); | ||
| 47 | |||
| 38 | } // namespace Service::Audio | 48 | } // namespace Service::Audio |
diff --git a/src/core/hle/service/friend/friend.cpp b/src/core/hle/service/friend/friend.cpp index dec541f2e..d1ec12ef9 100644 --- a/src/core/hle/service/friend/friend.cpp +++ b/src/core/hle/service/friend/friend.cpp | |||
| @@ -22,7 +22,7 @@ public: | |||
| 22 | {0, nullptr, "GetCompletionEvent"}, | 22 | {0, nullptr, "GetCompletionEvent"}, |
| 23 | {1, nullptr, "Cancel"}, | 23 | {1, nullptr, "Cancel"}, |
| 24 | {10100, nullptr, "GetFriendListIds"}, | 24 | {10100, nullptr, "GetFriendListIds"}, |
| 25 | {10101, nullptr, "GetFriendList"}, | 25 | {10101, &IFriendService::GetFriendList, "GetFriendList"}, |
| 26 | {10102, nullptr, "UpdateFriendInfo"}, | 26 | {10102, nullptr, "UpdateFriendInfo"}, |
| 27 | {10110, nullptr, "GetFriendProfileImage"}, | 27 | {10110, nullptr, "GetFriendProfileImage"}, |
| 28 | {10200, nullptr, "SendFriendRequestForApplication"}, | 28 | {10200, nullptr, "SendFriendRequestForApplication"}, |
| @@ -99,6 +99,23 @@ public: | |||
| 99 | } | 99 | } |
| 100 | 100 | ||
| 101 | private: | 101 | private: |
| 102 | enum class PresenceFilter : u32 { | ||
| 103 | None = 0, | ||
| 104 | Online = 1, | ||
| 105 | OnlinePlay = 2, | ||
| 106 | OnlineOrOnlinePlay = 3, | ||
| 107 | }; | ||
| 108 | |||
| 109 | struct SizedFriendFilter { | ||
| 110 | PresenceFilter presence; | ||
| 111 | u8 is_favorite; | ||
| 112 | u8 same_app; | ||
| 113 | u8 same_app_played; | ||
| 114 | u8 arbitary_app_played; | ||
| 115 | u64 group_id; | ||
| 116 | }; | ||
| 117 | static_assert(sizeof(SizedFriendFilter) == 0x10, "SizedFriendFilter is an invalid size"); | ||
| 118 | |||
| 102 | void DeclareCloseOnlinePlaySession(Kernel::HLERequestContext& ctx) { | 119 | void DeclareCloseOnlinePlaySession(Kernel::HLERequestContext& ctx) { |
| 103 | // Stub used by Splatoon 2 | 120 | // Stub used by Splatoon 2 |
| 104 | LOG_WARNING(Service_ACC, "(STUBBED) called"); | 121 | LOG_WARNING(Service_ACC, "(STUBBED) called"); |
| @@ -112,6 +129,22 @@ private: | |||
| 112 | IPC::ResponseBuilder rb{ctx, 2}; | 129 | IPC::ResponseBuilder rb{ctx, 2}; |
| 113 | rb.Push(RESULT_SUCCESS); | 130 | rb.Push(RESULT_SUCCESS); |
| 114 | } | 131 | } |
| 132 | |||
| 133 | void GetFriendList(Kernel::HLERequestContext& ctx) { | ||
| 134 | IPC::RequestParser rp{ctx}; | ||
| 135 | const auto friend_offset = rp.Pop<u32>(); | ||
| 136 | const auto uuid = rp.PopRaw<Common::UUID>(); | ||
| 137 | [[maybe_unused]] const auto filter = rp.PopRaw<SizedFriendFilter>(); | ||
| 138 | const auto pid = rp.Pop<u64>(); | ||
| 139 | LOG_WARNING(Service_ACC, "(STUBBED) called, offset={}, uuid={}, pid={}", friend_offset, | ||
| 140 | uuid.Format(), pid); | ||
| 141 | |||
| 142 | IPC::ResponseBuilder rb{ctx, 3}; | ||
| 143 | rb.Push(RESULT_SUCCESS); | ||
| 144 | |||
| 145 | rb.Push<u32>(0); // Friend count | ||
| 146 | // TODO(ogniK): Return a buffer of u64s which are the "NetworkServiceAccountId" | ||
| 147 | } | ||
| 115 | }; | 148 | }; |
| 116 | 149 | ||
| 117 | class INotificationService final : public ServiceFramework<INotificationService> { | 150 | class INotificationService final : public ServiceFramework<INotificationService> { |
diff --git a/src/core/hle/service/ldr/ldr.cpp b/src/core/hle/service/ldr/ldr.cpp index b839303ac..8ddad8682 100644 --- a/src/core/hle/service/ldr/ldr.cpp +++ b/src/core/hle/service/ldr/ldr.cpp | |||
| @@ -345,14 +345,16 @@ public: | |||
| 345 | vm_manager | 345 | vm_manager |
| 346 | .MirrorMemory(*map_address, nro_address, nro_size, Kernel::MemoryState::ModuleCode) | 346 | .MirrorMemory(*map_address, nro_address, nro_size, Kernel::MemoryState::ModuleCode) |
| 347 | .IsSuccess()); | 347 | .IsSuccess()); |
| 348 | ASSERT(vm_manager.UnmapRange(nro_address, nro_size).IsSuccess()); | 348 | ASSERT(vm_manager.ReprotectRange(nro_address, nro_size, Kernel::VMAPermission::None) |
| 349 | .IsSuccess()); | ||
| 349 | 350 | ||
| 350 | if (bss_size > 0) { | 351 | if (bss_size > 0) { |
| 351 | ASSERT(vm_manager | 352 | ASSERT(vm_manager |
| 352 | .MirrorMemory(*map_address + nro_size, bss_address, bss_size, | 353 | .MirrorMemory(*map_address + nro_size, bss_address, bss_size, |
| 353 | Kernel::MemoryState::ModuleCode) | 354 | Kernel::MemoryState::ModuleCode) |
| 354 | .IsSuccess()); | 355 | .IsSuccess()); |
| 355 | ASSERT(vm_manager.UnmapRange(bss_address, bss_size).IsSuccess()); | 356 | ASSERT(vm_manager.ReprotectRange(bss_address, bss_size, Kernel::VMAPermission::None) |
| 357 | .IsSuccess()); | ||
| 356 | } | 358 | } |
| 357 | 359 | ||
| 358 | vm_manager.ReprotectRange(*map_address, header.text_size, | 360 | vm_manager.ReprotectRange(*map_address, header.text_size, |
| @@ -364,7 +366,8 @@ public: | |||
| 364 | 366 | ||
| 365 | Core::System::GetInstance().InvalidateCpuInstructionCaches(); | 367 | Core::System::GetInstance().InvalidateCpuInstructionCaches(); |
| 366 | 368 | ||
| 367 | nro.insert_or_assign(*map_address, NROInfo{hash, nro_size + bss_size}); | 369 | nro.insert_or_assign(*map_address, |
| 370 | NROInfo{hash, nro_address, nro_size, bss_address, bss_size}); | ||
| 368 | 371 | ||
| 369 | IPC::ResponseBuilder rb{ctx, 4}; | 372 | IPC::ResponseBuilder rb{ctx, 4}; |
| 370 | rb.Push(RESULT_SUCCESS); | 373 | rb.Push(RESULT_SUCCESS); |
| @@ -409,9 +412,23 @@ public: | |||
| 409 | } | 412 | } |
| 410 | 413 | ||
| 411 | auto& vm_manager = Core::CurrentProcess()->VMManager(); | 414 | auto& vm_manager = Core::CurrentProcess()->VMManager(); |
| 412 | const auto& nro_size = iter->second.size; | 415 | const auto& nro_info = iter->second; |
| 413 | 416 | ||
| 414 | ASSERT(vm_manager.UnmapRange(nro_address, nro_size).IsSuccess()); | 417 | // Unmap the mirrored memory |
| 418 | ASSERT( | ||
| 419 | vm_manager.UnmapRange(nro_address, nro_info.nro_size + nro_info.bss_size).IsSuccess()); | ||
| 420 | |||
| 421 | // Reprotect the source memory | ||
| 422 | ASSERT(vm_manager | ||
| 423 | .ReprotectRange(nro_info.nro_address, nro_info.nro_size, | ||
| 424 | Kernel::VMAPermission::ReadWrite) | ||
| 425 | .IsSuccess()); | ||
| 426 | if (nro_info.bss_size > 0) { | ||
| 427 | ASSERT(vm_manager | ||
| 428 | .ReprotectRange(nro_info.bss_address, nro_info.bss_size, | ||
| 429 | Kernel::VMAPermission::ReadWrite) | ||
| 430 | .IsSuccess()); | ||
| 431 | } | ||
| 415 | 432 | ||
| 416 | Core::System::GetInstance().InvalidateCpuInstructionCaches(); | 433 | Core::System::GetInstance().InvalidateCpuInstructionCaches(); |
| 417 | 434 | ||
| @@ -473,7 +490,10 @@ private: | |||
| 473 | 490 | ||
| 474 | struct NROInfo { | 491 | struct NROInfo { |
| 475 | SHA256Hash hash; | 492 | SHA256Hash hash; |
| 476 | u64 size; | 493 | VAddr nro_address; |
| 494 | u64 nro_size; | ||
| 495 | VAddr bss_address; | ||
| 496 | u64 bss_size; | ||
| 477 | }; | 497 | }; |
| 478 | 498 | ||
| 479 | bool initialized = false; | 499 | bool initialized = false; |
diff --git a/src/core/hle/service/mii/mii.cpp b/src/core/hle/service/mii/mii.cpp index ce84e25ed..0b3923ad9 100644 --- a/src/core/hle/service/mii/mii.cpp +++ b/src/core/hle/service/mii/mii.cpp | |||
| @@ -48,7 +48,7 @@ public: | |||
| 48 | {19, nullptr, "Export"}, | 48 | {19, nullptr, "Export"}, |
| 49 | {20, nullptr, "IsBrokenDatabaseWithClearFlag"}, | 49 | {20, nullptr, "IsBrokenDatabaseWithClearFlag"}, |
| 50 | {21, &IDatabaseService::GetIndex, "GetIndex"}, | 50 | {21, &IDatabaseService::GetIndex, "GetIndex"}, |
| 51 | {22, nullptr, "SetInterfaceVersion"}, | 51 | {22, &IDatabaseService::SetInterfaceVersion, "SetInterfaceVersion"}, |
| 52 | {23, nullptr, "Convert"}, | 52 | {23, nullptr, "Convert"}, |
| 53 | }; | 53 | }; |
| 54 | // clang-format on | 54 | // clang-format on |
| @@ -350,8 +350,22 @@ private: | |||
| 350 | rb.Push(index); | 350 | rb.Push(index); |
| 351 | } | 351 | } |
| 352 | 352 | ||
| 353 | void SetInterfaceVersion(Kernel::HLERequestContext& ctx) { | ||
| 354 | IPC::RequestParser rp{ctx}; | ||
| 355 | current_interface_version = rp.PopRaw<u32>(); | ||
| 356 | |||
| 357 | LOG_DEBUG(Service_Mii, "called, interface_version={:08X}", current_interface_version); | ||
| 358 | |||
| 359 | UNIMPLEMENTED_IF(current_interface_version != 1); | ||
| 360 | |||
| 361 | IPC::ResponseBuilder rb{ctx, 2}; | ||
| 362 | rb.Push(RESULT_SUCCESS); | ||
| 363 | } | ||
| 364 | |||
| 353 | MiiManager db; | 365 | MiiManager db; |
| 354 | 366 | ||
| 367 | u32 current_interface_version = 0; | ||
| 368 | |||
| 355 | // Last read offsets of Get functions | 369 | // Last read offsets of Get functions |
| 356 | std::array<u32, 4> offsets{}; | 370 | std::array<u32, 4> offsets{}; |
| 357 | }; | 371 | }; |
diff --git a/src/core/hle/service/ns/pl_u.cpp b/src/core/hle/service/ns/pl_u.cpp index ad176f89d..2a522136d 100644 --- a/src/core/hle/service/ns/pl_u.cpp +++ b/src/core/hle/service/ns/pl_u.cpp | |||
| @@ -77,7 +77,7 @@ enum class LoadState : u32 { | |||
| 77 | Done = 1, | 77 | Done = 1, |
| 78 | }; | 78 | }; |
| 79 | 79 | ||
| 80 | static void DecryptSharedFont(const std::vector<u32>& input, std::vector<u8>& output, | 80 | static void DecryptSharedFont(const std::vector<u32>& input, Kernel::PhysicalMemory& output, |
| 81 | std::size_t& offset) { | 81 | std::size_t& offset) { |
| 82 | ASSERT_MSG(offset + (input.size() * sizeof(u32)) < SHARED_FONT_MEM_SIZE, | 82 | ASSERT_MSG(offset + (input.size() * sizeof(u32)) < SHARED_FONT_MEM_SIZE, |
| 83 | "Shared fonts exceeds 17mb!"); | 83 | "Shared fonts exceeds 17mb!"); |
| @@ -94,7 +94,7 @@ static void DecryptSharedFont(const std::vector<u32>& input, std::vector<u8>& ou | |||
| 94 | offset += transformed_font.size() * sizeof(u32); | 94 | offset += transformed_font.size() * sizeof(u32); |
| 95 | } | 95 | } |
| 96 | 96 | ||
| 97 | static void EncryptSharedFont(const std::vector<u8>& input, std::vector<u8>& output, | 97 | static void EncryptSharedFont(const std::vector<u8>& input, Kernel::PhysicalMemory& output, |
| 98 | std::size_t& offset) { | 98 | std::size_t& offset) { |
| 99 | ASSERT_MSG(offset + input.size() + 8 < SHARED_FONT_MEM_SIZE, "Shared fonts exceeds 17mb!"); | 99 | ASSERT_MSG(offset + input.size() + 8 < SHARED_FONT_MEM_SIZE, "Shared fonts exceeds 17mb!"); |
| 100 | const u32 KEY = EXPECTED_MAGIC ^ EXPECTED_RESULT; | 100 | const u32 KEY = EXPECTED_MAGIC ^ EXPECTED_RESULT; |
| @@ -121,7 +121,7 @@ struct PL_U::Impl { | |||
| 121 | return shared_font_regions.at(index); | 121 | return shared_font_regions.at(index); |
| 122 | } | 122 | } |
| 123 | 123 | ||
| 124 | void BuildSharedFontsRawRegions(const std::vector<u8>& input) { | 124 | void BuildSharedFontsRawRegions(const Kernel::PhysicalMemory& input) { |
| 125 | // As we can derive the xor key we can just populate the offsets | 125 | // As we can derive the xor key we can just populate the offsets |
| 126 | // based on the shared memory dump | 126 | // based on the shared memory dump |
| 127 | unsigned cur_offset = 0; | 127 | unsigned cur_offset = 0; |
| @@ -144,7 +144,7 @@ struct PL_U::Impl { | |||
| 144 | Kernel::SharedPtr<Kernel::SharedMemory> shared_font_mem; | 144 | Kernel::SharedPtr<Kernel::SharedMemory> shared_font_mem; |
| 145 | 145 | ||
| 146 | /// Backing memory for the shared font data | 146 | /// Backing memory for the shared font data |
| 147 | std::shared_ptr<std::vector<u8>> shared_font; | 147 | std::shared_ptr<Kernel::PhysicalMemory> shared_font; |
| 148 | 148 | ||
| 149 | // Automatically populated based on shared_fonts dump or system archives. | 149 | // Automatically populated based on shared_fonts dump or system archives. |
| 150 | std::vector<FontRegion> shared_font_regions; | 150 | std::vector<FontRegion> shared_font_regions; |
| @@ -166,7 +166,7 @@ PL_U::PL_U() : ServiceFramework("pl:u"), impl{std::make_unique<Impl>()} { | |||
| 166 | // Rebuild shared fonts from data ncas | 166 | // Rebuild shared fonts from data ncas |
| 167 | if (nand->HasEntry(static_cast<u64>(FontArchives::Standard), | 167 | if (nand->HasEntry(static_cast<u64>(FontArchives::Standard), |
| 168 | FileSys::ContentRecordType::Data)) { | 168 | FileSys::ContentRecordType::Data)) { |
| 169 | impl->shared_font = std::make_shared<std::vector<u8>>(SHARED_FONT_MEM_SIZE); | 169 | impl->shared_font = std::make_shared<Kernel::PhysicalMemory>(SHARED_FONT_MEM_SIZE); |
| 170 | for (auto font : SHARED_FONTS) { | 170 | for (auto font : SHARED_FONTS) { |
| 171 | const auto nca = | 171 | const auto nca = |
| 172 | nand->GetEntry(static_cast<u64>(font.first), FileSys::ContentRecordType::Data); | 172 | nand->GetEntry(static_cast<u64>(font.first), FileSys::ContentRecordType::Data); |
| @@ -207,7 +207,7 @@ PL_U::PL_U() : ServiceFramework("pl:u"), impl{std::make_unique<Impl>()} { | |||
| 207 | } | 207 | } |
| 208 | 208 | ||
| 209 | } else { | 209 | } else { |
| 210 | impl->shared_font = std::make_shared<std::vector<u8>>( | 210 | impl->shared_font = std::make_shared<Kernel::PhysicalMemory>( |
| 211 | SHARED_FONT_MEM_SIZE); // Shared memory needs to always be allocated and a fixed size | 211 | SHARED_FONT_MEM_SIZE); // Shared memory needs to always be allocated and a fixed size |
| 212 | 212 | ||
| 213 | const std::string user_path = FileUtil::GetUserPath(FileUtil::UserPath::SysDataDir); | 213 | const std::string user_path = FileUtil::GetUserPath(FileUtil::UserPath::SysDataDir); |
diff --git a/src/core/hle/service/nvdrv/devices/nvdevice.h b/src/core/hle/service/nvdrv/devices/nvdevice.h index 4f6042b00..5b8248433 100644 --- a/src/core/hle/service/nvdrv/devices/nvdevice.h +++ b/src/core/hle/service/nvdrv/devices/nvdevice.h | |||
| @@ -8,6 +8,11 @@ | |||
| 8 | #include "common/bit_field.h" | 8 | #include "common/bit_field.h" |
| 9 | #include "common/common_types.h" | 9 | #include "common/common_types.h" |
| 10 | #include "common/swap.h" | 10 | #include "common/swap.h" |
| 11 | #include "core/hle/service/nvdrv/nvdata.h" | ||
| 12 | |||
| 13 | namespace Core { | ||
| 14 | class System; | ||
| 15 | } | ||
| 11 | 16 | ||
| 12 | namespace Service::Nvidia::Devices { | 17 | namespace Service::Nvidia::Devices { |
| 13 | 18 | ||
| @@ -15,7 +20,7 @@ namespace Service::Nvidia::Devices { | |||
| 15 | /// implement the ioctl interface. | 20 | /// implement the ioctl interface. |
| 16 | class nvdevice { | 21 | class nvdevice { |
| 17 | public: | 22 | public: |
| 18 | nvdevice() = default; | 23 | explicit nvdevice(Core::System& system) : system{system} {}; |
| 19 | virtual ~nvdevice() = default; | 24 | virtual ~nvdevice() = default; |
| 20 | union Ioctl { | 25 | union Ioctl { |
| 21 | u32_le raw; | 26 | u32_le raw; |
| @@ -33,7 +38,11 @@ public: | |||
| 33 | * @param output A buffer where the output data will be written to. | 38 | * @param output A buffer where the output data will be written to. |
| 34 | * @returns The result code of the ioctl. | 39 | * @returns The result code of the ioctl. |
| 35 | */ | 40 | */ |
| 36 | virtual u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) = 0; | 41 | virtual u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, |
| 42 | IoctlCtrl& ctrl) = 0; | ||
| 43 | |||
| 44 | protected: | ||
| 45 | Core::System& system; | ||
| 37 | }; | 46 | }; |
| 38 | 47 | ||
| 39 | } // namespace Service::Nvidia::Devices | 48 | } // namespace Service::Nvidia::Devices |
diff --git a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp index 20c7c39aa..926a1285d 100644 --- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp +++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp | |||
| @@ -13,10 +13,12 @@ | |||
| 13 | 13 | ||
| 14 | namespace Service::Nvidia::Devices { | 14 | namespace Service::Nvidia::Devices { |
| 15 | 15 | ||
| 16 | nvdisp_disp0::nvdisp_disp0(std::shared_ptr<nvmap> nvmap_dev) : nvmap_dev(std::move(nvmap_dev)) {} | 16 | nvdisp_disp0::nvdisp_disp0(Core::System& system, std::shared_ptr<nvmap> nvmap_dev) |
| 17 | : nvdevice(system), nvmap_dev(std::move(nvmap_dev)) {} | ||
| 17 | nvdisp_disp0 ::~nvdisp_disp0() = default; | 18 | nvdisp_disp0 ::~nvdisp_disp0() = default; |
| 18 | 19 | ||
| 19 | u32 nvdisp_disp0::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) { | 20 | u32 nvdisp_disp0::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, |
| 21 | IoctlCtrl& ctrl) { | ||
| 20 | UNIMPLEMENTED_MSG("Unimplemented ioctl"); | 22 | UNIMPLEMENTED_MSG("Unimplemented ioctl"); |
| 21 | return 0; | 23 | return 0; |
| 22 | } | 24 | } |
| @@ -34,9 +36,8 @@ void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u3 | |||
| 34 | addr, offset, width, height, stride, static_cast<PixelFormat>(format), | 36 | addr, offset, width, height, stride, static_cast<PixelFormat>(format), |
| 35 | transform, crop_rect}; | 37 | transform, crop_rect}; |
| 36 | 38 | ||
| 37 | auto& instance = Core::System::GetInstance(); | 39 | system.GetPerfStats().EndGameFrame(); |
| 38 | instance.GetPerfStats().EndGameFrame(); | 40 | system.GPU().SwapBuffers(&framebuffer); |
| 39 | instance.GPU().SwapBuffers(framebuffer); | ||
| 40 | } | 41 | } |
| 41 | 42 | ||
| 42 | } // namespace Service::Nvidia::Devices | 43 | } // namespace Service::Nvidia::Devices |
diff --git a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h index 12f3ef825..e79e490ff 100644 --- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h +++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h | |||
| @@ -17,10 +17,11 @@ class nvmap; | |||
| 17 | 17 | ||
| 18 | class nvdisp_disp0 final : public nvdevice { | 18 | class nvdisp_disp0 final : public nvdevice { |
| 19 | public: | 19 | public: |
| 20 | explicit nvdisp_disp0(std::shared_ptr<nvmap> nvmap_dev); | 20 | explicit nvdisp_disp0(Core::System& system, std::shared_ptr<nvmap> nvmap_dev); |
| 21 | ~nvdisp_disp0() override; | 21 | ~nvdisp_disp0() override; |
| 22 | 22 | ||
| 23 | u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override; | 23 | u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, |
| 24 | IoctlCtrl& ctrl) override; | ||
| 24 | 25 | ||
| 25 | /// Performs a screen flip, drawing the buffer pointed to by the handle. | 26 | /// Performs a screen flip, drawing the buffer pointed to by the handle. |
| 26 | void flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u32 height, u32 stride, | 27 | void flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u32 height, u32 stride, |
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp index af62d33d2..24ab3f2e9 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp | |||
| @@ -22,10 +22,12 @@ enum { | |||
| 22 | }; | 22 | }; |
| 23 | } | 23 | } |
| 24 | 24 | ||
| 25 | nvhost_as_gpu::nvhost_as_gpu(std::shared_ptr<nvmap> nvmap_dev) : nvmap_dev(std::move(nvmap_dev)) {} | 25 | nvhost_as_gpu::nvhost_as_gpu(Core::System& system, std::shared_ptr<nvmap> nvmap_dev) |
| 26 | : nvdevice(system), nvmap_dev(std::move(nvmap_dev)) {} | ||
| 26 | nvhost_as_gpu::~nvhost_as_gpu() = default; | 27 | nvhost_as_gpu::~nvhost_as_gpu() = default; |
| 27 | 28 | ||
| 28 | u32 nvhost_as_gpu::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) { | 29 | u32 nvhost_as_gpu::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, |
| 30 | IoctlCtrl& ctrl) { | ||
| 29 | LOG_DEBUG(Service_NVDRV, "called, command=0x{:08X}, input_size=0x{:X}, output_size=0x{:X}", | 31 | LOG_DEBUG(Service_NVDRV, "called, command=0x{:08X}, input_size=0x{:X}, output_size=0x{:X}", |
| 30 | command.raw, input.size(), output.size()); | 32 | command.raw, input.size(), output.size()); |
| 31 | 33 | ||
| @@ -65,7 +67,7 @@ u32 nvhost_as_gpu::AllocateSpace(const std::vector<u8>& input, std::vector<u8>& | |||
| 65 | LOG_DEBUG(Service_NVDRV, "called, pages={:X}, page_size={:X}, flags={:X}", params.pages, | 67 | LOG_DEBUG(Service_NVDRV, "called, pages={:X}, page_size={:X}, flags={:X}", params.pages, |
| 66 | params.page_size, params.flags); | 68 | params.page_size, params.flags); |
| 67 | 69 | ||
| 68 | auto& gpu = Core::System::GetInstance().GPU(); | 70 | auto& gpu = system.GPU(); |
| 69 | const u64 size{static_cast<u64>(params.pages) * static_cast<u64>(params.page_size)}; | 71 | const u64 size{static_cast<u64>(params.pages) * static_cast<u64>(params.page_size)}; |
| 70 | if (params.flags & 1) { | 72 | if (params.flags & 1) { |
| 71 | params.offset = gpu.MemoryManager().AllocateSpace(params.offset, size, 1); | 73 | params.offset = gpu.MemoryManager().AllocateSpace(params.offset, size, 1); |
| @@ -85,7 +87,7 @@ u32 nvhost_as_gpu::Remap(const std::vector<u8>& input, std::vector<u8>& output) | |||
| 85 | std::vector<IoctlRemapEntry> entries(num_entries); | 87 | std::vector<IoctlRemapEntry> entries(num_entries); |
| 86 | std::memcpy(entries.data(), input.data(), input.size()); | 88 | std::memcpy(entries.data(), input.data(), input.size()); |
| 87 | 89 | ||
| 88 | auto& gpu = Core::System::GetInstance().GPU(); | 90 | auto& gpu = system.GPU(); |
| 89 | for (const auto& entry : entries) { | 91 | for (const auto& entry : entries) { |
| 90 | LOG_WARNING(Service_NVDRV, "remap entry, offset=0x{:X} handle=0x{:X} pages=0x{:X}", | 92 | LOG_WARNING(Service_NVDRV, "remap entry, offset=0x{:X} handle=0x{:X} pages=0x{:X}", |
| 91 | entry.offset, entry.nvmap_handle, entry.pages); | 93 | entry.offset, entry.nvmap_handle, entry.pages); |
| @@ -136,7 +138,7 @@ u32 nvhost_as_gpu::MapBufferEx(const std::vector<u8>& input, std::vector<u8>& ou | |||
| 136 | // case to prevent unexpected behavior. | 138 | // case to prevent unexpected behavior. |
| 137 | ASSERT(object->id == params.nvmap_handle); | 139 | ASSERT(object->id == params.nvmap_handle); |
| 138 | 140 | ||
| 139 | auto& gpu = Core::System::GetInstance().GPU(); | 141 | auto& gpu = system.GPU(); |
| 140 | 142 | ||
| 141 | if (params.flags & 1) { | 143 | if (params.flags & 1) { |
| 142 | params.offset = gpu.MemoryManager().MapBufferEx(object->addr, params.offset, object->size); | 144 | params.offset = gpu.MemoryManager().MapBufferEx(object->addr, params.offset, object->size); |
| @@ -173,8 +175,7 @@ u32 nvhost_as_gpu::UnmapBuffer(const std::vector<u8>& input, std::vector<u8>& ou | |||
| 173 | return 0; | 175 | return 0; |
| 174 | } | 176 | } |
| 175 | 177 | ||
| 176 | params.offset = Core::System::GetInstance().GPU().MemoryManager().UnmapBuffer(params.offset, | 178 | params.offset = system.GPU().MemoryManager().UnmapBuffer(params.offset, itr->second.size); |
| 177 | itr->second.size); | ||
| 178 | buffer_mappings.erase(itr->second.offset); | 179 | buffer_mappings.erase(itr->second.offset); |
| 179 | 180 | ||
| 180 | std::memcpy(output.data(), ¶ms, output.size()); | 181 | std::memcpy(output.data(), ¶ms, output.size()); |
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h index eb14b1da8..30ca5f4c3 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h | |||
| @@ -17,10 +17,11 @@ class nvmap; | |||
| 17 | 17 | ||
| 18 | class nvhost_as_gpu final : public nvdevice { | 18 | class nvhost_as_gpu final : public nvdevice { |
| 19 | public: | 19 | public: |
| 20 | explicit nvhost_as_gpu(std::shared_ptr<nvmap> nvmap_dev); | 20 | explicit nvhost_as_gpu(Core::System& system, std::shared_ptr<nvmap> nvmap_dev); |
| 21 | ~nvhost_as_gpu() override; | 21 | ~nvhost_as_gpu() override; |
| 22 | 22 | ||
| 23 | u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override; | 23 | u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, |
| 24 | IoctlCtrl& ctrl) override; | ||
| 24 | 25 | ||
| 25 | private: | 26 | private: |
| 26 | enum class IoctlCommand : u32_le { | 27 | enum class IoctlCommand : u32_le { |
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp index b39fb9ef9..9a66a5f88 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp | |||
| @@ -7,14 +7,20 @@ | |||
| 7 | 7 | ||
| 8 | #include "common/assert.h" | 8 | #include "common/assert.h" |
| 9 | #include "common/logging/log.h" | 9 | #include "common/logging/log.h" |
| 10 | #include "core/core.h" | ||
| 11 | #include "core/hle/kernel/readable_event.h" | ||
| 12 | #include "core/hle/kernel/writable_event.h" | ||
| 10 | #include "core/hle/service/nvdrv/devices/nvhost_ctrl.h" | 13 | #include "core/hle/service/nvdrv/devices/nvhost_ctrl.h" |
| 14 | #include "video_core/gpu.h" | ||
| 11 | 15 | ||
| 12 | namespace Service::Nvidia::Devices { | 16 | namespace Service::Nvidia::Devices { |
| 13 | 17 | ||
| 14 | nvhost_ctrl::nvhost_ctrl() = default; | 18 | nvhost_ctrl::nvhost_ctrl(Core::System& system, EventInterface& events_interface) |
| 19 | : nvdevice(system), events_interface{events_interface} {} | ||
| 15 | nvhost_ctrl::~nvhost_ctrl() = default; | 20 | nvhost_ctrl::~nvhost_ctrl() = default; |
| 16 | 21 | ||
| 17 | u32 nvhost_ctrl::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) { | 22 | u32 nvhost_ctrl::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, |
| 23 | IoctlCtrl& ctrl) { | ||
| 18 | LOG_DEBUG(Service_NVDRV, "called, command=0x{:08X}, input_size=0x{:X}, output_size=0x{:X}", | 24 | LOG_DEBUG(Service_NVDRV, "called, command=0x{:08X}, input_size=0x{:X}, output_size=0x{:X}", |
| 19 | command.raw, input.size(), output.size()); | 25 | command.raw, input.size(), output.size()); |
| 20 | 26 | ||
| @@ -22,11 +28,15 @@ u32 nvhost_ctrl::ioctl(Ioctl command, const std::vector<u8>& input, std::vector< | |||
| 22 | case IoctlCommand::IocGetConfigCommand: | 28 | case IoctlCommand::IocGetConfigCommand: |
| 23 | return NvOsGetConfigU32(input, output); | 29 | return NvOsGetConfigU32(input, output); |
| 24 | case IoctlCommand::IocCtrlEventWaitCommand: | 30 | case IoctlCommand::IocCtrlEventWaitCommand: |
| 25 | return IocCtrlEventWait(input, output, false); | 31 | return IocCtrlEventWait(input, output, false, ctrl); |
| 26 | case IoctlCommand::IocCtrlEventWaitAsyncCommand: | 32 | case IoctlCommand::IocCtrlEventWaitAsyncCommand: |
| 27 | return IocCtrlEventWait(input, output, true); | 33 | return IocCtrlEventWait(input, output, true, ctrl); |
| 28 | case IoctlCommand::IocCtrlEventRegisterCommand: | 34 | case IoctlCommand::IocCtrlEventRegisterCommand: |
| 29 | return IocCtrlEventRegister(input, output); | 35 | return IocCtrlEventRegister(input, output); |
| 36 | case IoctlCommand::IocCtrlEventUnregisterCommand: | ||
| 37 | return IocCtrlEventUnregister(input, output); | ||
| 38 | case IoctlCommand::IocCtrlEventSignalCommand: | ||
| 39 | return IocCtrlEventSignal(input, output); | ||
| 30 | } | 40 | } |
| 31 | UNIMPLEMENTED_MSG("Unimplemented ioctl"); | 41 | UNIMPLEMENTED_MSG("Unimplemented ioctl"); |
| 32 | return 0; | 42 | return 0; |
| @@ -41,23 +51,137 @@ u32 nvhost_ctrl::NvOsGetConfigU32(const std::vector<u8>& input, std::vector<u8>& | |||
| 41 | } | 51 | } |
| 42 | 52 | ||
| 43 | u32 nvhost_ctrl::IocCtrlEventWait(const std::vector<u8>& input, std::vector<u8>& output, | 53 | u32 nvhost_ctrl::IocCtrlEventWait(const std::vector<u8>& input, std::vector<u8>& output, |
| 44 | bool is_async) { | 54 | bool is_async, IoctlCtrl& ctrl) { |
| 45 | IocCtrlEventWaitParams params{}; | 55 | IocCtrlEventWaitParams params{}; |
| 46 | std::memcpy(¶ms, input.data(), sizeof(params)); | 56 | std::memcpy(¶ms, input.data(), sizeof(params)); |
| 47 | LOG_WARNING(Service_NVDRV, | 57 | LOG_DEBUG(Service_NVDRV, "syncpt_id={}, threshold={}, timeout={}, is_async={}", |
| 48 | "(STUBBED) called, syncpt_id={}, threshold={}, timeout={}, is_async={}", | 58 | params.syncpt_id, params.threshold, params.timeout, is_async); |
| 49 | params.syncpt_id, params.threshold, params.timeout, is_async); | ||
| 50 | 59 | ||
| 51 | // TODO(Subv): Implement actual syncpt waiting. | 60 | if (params.syncpt_id >= MaxSyncPoints) { |
| 52 | params.value = 0; | 61 | return NvResult::BadParameter; |
| 62 | } | ||
| 63 | |||
| 64 | auto& gpu = system.GPU(); | ||
| 65 | // This is mostly to take into account unimplemented features. As synced | ||
| 66 | // gpu is always synced. | ||
| 67 | if (!gpu.IsAsync()) { | ||
| 68 | return NvResult::Success; | ||
| 69 | } | ||
| 70 | auto lock = gpu.LockSync(); | ||
| 71 | const u32 current_syncpoint_value = gpu.GetSyncpointValue(params.syncpt_id); | ||
| 72 | const s32 diff = current_syncpoint_value - params.threshold; | ||
| 73 | if (diff >= 0) { | ||
| 74 | params.value = current_syncpoint_value; | ||
| 75 | std::memcpy(output.data(), ¶ms, sizeof(params)); | ||
| 76 | return NvResult::Success; | ||
| 77 | } | ||
| 78 | const u32 target_value = current_syncpoint_value - diff; | ||
| 79 | |||
| 80 | if (!is_async) { | ||
| 81 | params.value = 0; | ||
| 82 | } | ||
| 83 | |||
| 84 | if (params.timeout == 0) { | ||
| 85 | std::memcpy(output.data(), ¶ms, sizeof(params)); | ||
| 86 | return NvResult::Timeout; | ||
| 87 | } | ||
| 88 | |||
| 89 | u32 event_id; | ||
| 90 | if (is_async) { | ||
| 91 | event_id = params.value & 0x00FF; | ||
| 92 | if (event_id >= MaxNvEvents) { | ||
| 93 | std::memcpy(output.data(), ¶ms, sizeof(params)); | ||
| 94 | return NvResult::BadParameter; | ||
| 95 | } | ||
| 96 | } else { | ||
| 97 | if (ctrl.fresh_call) { | ||
| 98 | const auto result = events_interface.GetFreeEvent(); | ||
| 99 | if (result) { | ||
| 100 | event_id = *result; | ||
| 101 | } else { | ||
| 102 | LOG_CRITICAL(Service_NVDRV, "No Free Events available!"); | ||
| 103 | event_id = params.value & 0x00FF; | ||
| 104 | } | ||
| 105 | } else { | ||
| 106 | event_id = ctrl.event_id; | ||
| 107 | } | ||
| 108 | } | ||
| 109 | |||
| 110 | EventState status = events_interface.status[event_id]; | ||
| 111 | if (event_id < MaxNvEvents || status == EventState::Free || status == EventState::Registered) { | ||
| 112 | events_interface.SetEventStatus(event_id, EventState::Waiting); | ||
| 113 | events_interface.assigned_syncpt[event_id] = params.syncpt_id; | ||
| 114 | events_interface.assigned_value[event_id] = target_value; | ||
| 115 | if (is_async) { | ||
| 116 | params.value = params.syncpt_id << 4; | ||
| 117 | } else { | ||
| 118 | params.value = ((params.syncpt_id & 0xfff) << 16) | 0x10000000; | ||
| 119 | } | ||
| 120 | params.value |= event_id; | ||
| 121 | events_interface.events[event_id].writable->Clear(); | ||
| 122 | gpu.RegisterSyncptInterrupt(params.syncpt_id, target_value); | ||
| 123 | if (!is_async && ctrl.fresh_call) { | ||
| 124 | ctrl.must_delay = true; | ||
| 125 | ctrl.timeout = params.timeout; | ||
| 126 | ctrl.event_id = event_id; | ||
| 127 | return NvResult::Timeout; | ||
| 128 | } | ||
| 129 | std::memcpy(output.data(), ¶ms, sizeof(params)); | ||
| 130 | return NvResult::Timeout; | ||
| 131 | } | ||
| 53 | std::memcpy(output.data(), ¶ms, sizeof(params)); | 132 | std::memcpy(output.data(), ¶ms, sizeof(params)); |
| 54 | return 0; | 133 | return NvResult::BadParameter; |
| 55 | } | 134 | } |
| 56 | 135 | ||
| 57 | u32 nvhost_ctrl::IocCtrlEventRegister(const std::vector<u8>& input, std::vector<u8>& output) { | 136 | u32 nvhost_ctrl::IocCtrlEventRegister(const std::vector<u8>& input, std::vector<u8>& output) { |
| 58 | LOG_WARNING(Service_NVDRV, "(STUBBED) called"); | 137 | IocCtrlEventRegisterParams params{}; |
| 59 | // TODO(bunnei): Implement this. | 138 | std::memcpy(¶ms, input.data(), sizeof(params)); |
| 60 | return 0; | 139 | const u32 event_id = params.user_event_id & 0x00FF; |
| 140 | LOG_DEBUG(Service_NVDRV, " called, user_event_id: {:X}", event_id); | ||
| 141 | if (event_id >= MaxNvEvents) { | ||
| 142 | return NvResult::BadParameter; | ||
| 143 | } | ||
| 144 | if (events_interface.registered[event_id]) { | ||
| 145 | return NvResult::BadParameter; | ||
| 146 | } | ||
| 147 | events_interface.RegisterEvent(event_id); | ||
| 148 | return NvResult::Success; | ||
| 149 | } | ||
| 150 | |||
| 151 | u32 nvhost_ctrl::IocCtrlEventUnregister(const std::vector<u8>& input, std::vector<u8>& output) { | ||
| 152 | IocCtrlEventUnregisterParams params{}; | ||
| 153 | std::memcpy(¶ms, input.data(), sizeof(params)); | ||
| 154 | const u32 event_id = params.user_event_id & 0x00FF; | ||
| 155 | LOG_DEBUG(Service_NVDRV, " called, user_event_id: {:X}", event_id); | ||
| 156 | if (event_id >= MaxNvEvents) { | ||
| 157 | return NvResult::BadParameter; | ||
| 158 | } | ||
| 159 | if (!events_interface.registered[event_id]) { | ||
| 160 | return NvResult::BadParameter; | ||
| 161 | } | ||
| 162 | events_interface.UnregisterEvent(event_id); | ||
| 163 | return NvResult::Success; | ||
| 164 | } | ||
| 165 | |||
| 166 | u32 nvhost_ctrl::IocCtrlEventSignal(const std::vector<u8>& input, std::vector<u8>& output) { | ||
| 167 | IocCtrlEventSignalParams params{}; | ||
| 168 | std::memcpy(¶ms, input.data(), sizeof(params)); | ||
| 169 | // TODO(Blinkhawk): This is normally called when an NvEvents timeout on WaitSynchronization | ||
| 170 | // It is believed from RE to cancel the GPU Event. However, better research is required | ||
| 171 | u32 event_id = params.user_event_id & 0x00FF; | ||
| 172 | LOG_WARNING(Service_NVDRV, "(STUBBED) called, user_event_id: {:X}", event_id); | ||
| 173 | if (event_id >= MaxNvEvents) { | ||
| 174 | return NvResult::BadParameter; | ||
| 175 | } | ||
| 176 | if (events_interface.status[event_id] == EventState::Waiting) { | ||
| 177 | auto& gpu = system.GPU(); | ||
| 178 | if (gpu.CancelSyncptInterrupt(events_interface.assigned_syncpt[event_id], | ||
| 179 | events_interface.assigned_value[event_id])) { | ||
| 180 | events_interface.LiberateEvent(event_id); | ||
| 181 | events_interface.events[event_id].writable->Signal(); | ||
| 182 | } | ||
| 183 | } | ||
| 184 | return NvResult::Success; | ||
| 61 | } | 185 | } |
| 62 | 186 | ||
| 63 | } // namespace Service::Nvidia::Devices | 187 | } // namespace Service::Nvidia::Devices |
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h index 6d0de2212..14e6e7e57 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h | |||
| @@ -8,15 +8,17 @@ | |||
| 8 | #include <vector> | 8 | #include <vector> |
| 9 | #include "common/common_types.h" | 9 | #include "common/common_types.h" |
| 10 | #include "core/hle/service/nvdrv/devices/nvdevice.h" | 10 | #include "core/hle/service/nvdrv/devices/nvdevice.h" |
| 11 | #include "core/hle/service/nvdrv/nvdrv.h" | ||
| 11 | 12 | ||
| 12 | namespace Service::Nvidia::Devices { | 13 | namespace Service::Nvidia::Devices { |
| 13 | 14 | ||
| 14 | class nvhost_ctrl final : public nvdevice { | 15 | class nvhost_ctrl final : public nvdevice { |
| 15 | public: | 16 | public: |
| 16 | nvhost_ctrl(); | 17 | explicit nvhost_ctrl(Core::System& system, EventInterface& events_interface); |
| 17 | ~nvhost_ctrl() override; | 18 | ~nvhost_ctrl() override; |
| 18 | 19 | ||
| 19 | u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override; | 20 | u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, |
| 21 | IoctlCtrl& ctrl) override; | ||
| 20 | 22 | ||
| 21 | private: | 23 | private: |
| 22 | enum class IoctlCommand : u32_le { | 24 | enum class IoctlCommand : u32_le { |
| @@ -132,9 +134,16 @@ private: | |||
| 132 | 134 | ||
| 133 | u32 NvOsGetConfigU32(const std::vector<u8>& input, std::vector<u8>& output); | 135 | u32 NvOsGetConfigU32(const std::vector<u8>& input, std::vector<u8>& output); |
| 134 | 136 | ||
| 135 | u32 IocCtrlEventWait(const std::vector<u8>& input, std::vector<u8>& output, bool is_async); | 137 | u32 IocCtrlEventWait(const std::vector<u8>& input, std::vector<u8>& output, bool is_async, |
| 138 | IoctlCtrl& ctrl); | ||
| 136 | 139 | ||
| 137 | u32 IocCtrlEventRegister(const std::vector<u8>& input, std::vector<u8>& output); | 140 | u32 IocCtrlEventRegister(const std::vector<u8>& input, std::vector<u8>& output); |
| 141 | |||
| 142 | u32 IocCtrlEventUnregister(const std::vector<u8>& input, std::vector<u8>& output); | ||
| 143 | |||
| 144 | u32 IocCtrlEventSignal(const std::vector<u8>& input, std::vector<u8>& output); | ||
| 145 | |||
| 146 | EventInterface& events_interface; | ||
| 138 | }; | 147 | }; |
| 139 | 148 | ||
| 140 | } // namespace Service::Nvidia::Devices | 149 | } // namespace Service::Nvidia::Devices |
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp index 0e28755bd..988effd90 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp | |||
| @@ -12,10 +12,11 @@ | |||
| 12 | 12 | ||
| 13 | namespace Service::Nvidia::Devices { | 13 | namespace Service::Nvidia::Devices { |
| 14 | 14 | ||
| 15 | nvhost_ctrl_gpu::nvhost_ctrl_gpu() = default; | 15 | nvhost_ctrl_gpu::nvhost_ctrl_gpu(Core::System& system) : nvdevice(system) {} |
| 16 | nvhost_ctrl_gpu::~nvhost_ctrl_gpu() = default; | 16 | nvhost_ctrl_gpu::~nvhost_ctrl_gpu() = default; |
| 17 | 17 | ||
| 18 | u32 nvhost_ctrl_gpu::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) { | 18 | u32 nvhost_ctrl_gpu::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, |
| 19 | IoctlCtrl& ctrl) { | ||
| 19 | LOG_DEBUG(Service_NVDRV, "called, command=0x{:08X}, input_size=0x{:X}, output_size=0x{:X}", | 20 | LOG_DEBUG(Service_NVDRV, "called, command=0x{:08X}, input_size=0x{:X}, output_size=0x{:X}", |
| 20 | command.raw, input.size(), output.size()); | 21 | command.raw, input.size(), output.size()); |
| 21 | 22 | ||
| @@ -185,7 +186,7 @@ u32 nvhost_ctrl_gpu::GetGpuTime(const std::vector<u8>& input, std::vector<u8>& o | |||
| 185 | 186 | ||
| 186 | IoctlGetGpuTime params{}; | 187 | IoctlGetGpuTime params{}; |
| 187 | std::memcpy(¶ms, input.data(), input.size()); | 188 | std::memcpy(¶ms, input.data(), input.size()); |
| 188 | const auto ns = Core::Timing::CyclesToNs(Core::System::GetInstance().CoreTiming().GetTicks()); | 189 | const auto ns = Core::Timing::CyclesToNs(system.CoreTiming().GetTicks()); |
| 189 | params.gpu_time = static_cast<u64_le>(ns.count()); | 190 | params.gpu_time = static_cast<u64_le>(ns.count()); |
| 190 | std::memcpy(output.data(), ¶ms, output.size()); | 191 | std::memcpy(output.data(), ¶ms, output.size()); |
| 191 | return 0; | 192 | return 0; |
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h index 240435eea..2b035ae3f 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h | |||
| @@ -13,10 +13,11 @@ namespace Service::Nvidia::Devices { | |||
| 13 | 13 | ||
| 14 | class nvhost_ctrl_gpu final : public nvdevice { | 14 | class nvhost_ctrl_gpu final : public nvdevice { |
| 15 | public: | 15 | public: |
| 16 | nvhost_ctrl_gpu(); | 16 | explicit nvhost_ctrl_gpu(Core::System& system); |
| 17 | ~nvhost_ctrl_gpu() override; | 17 | ~nvhost_ctrl_gpu() override; |
| 18 | 18 | ||
| 19 | u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override; | 19 | u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, |
| 20 | IoctlCtrl& ctrl) override; | ||
| 20 | 21 | ||
| 21 | private: | 22 | private: |
| 22 | enum class IoctlCommand : u32_le { | 23 | enum class IoctlCommand : u32_le { |
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp index 8ce7bc7a5..241dac881 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp | |||
| @@ -13,10 +13,12 @@ | |||
| 13 | 13 | ||
| 14 | namespace Service::Nvidia::Devices { | 14 | namespace Service::Nvidia::Devices { |
| 15 | 15 | ||
| 16 | nvhost_gpu::nvhost_gpu(std::shared_ptr<nvmap> nvmap_dev) : nvmap_dev(std::move(nvmap_dev)) {} | 16 | nvhost_gpu::nvhost_gpu(Core::System& system, std::shared_ptr<nvmap> nvmap_dev) |
| 17 | : nvdevice(system), nvmap_dev(std::move(nvmap_dev)) {} | ||
| 17 | nvhost_gpu::~nvhost_gpu() = default; | 18 | nvhost_gpu::~nvhost_gpu() = default; |
| 18 | 19 | ||
| 19 | u32 nvhost_gpu::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) { | 20 | u32 nvhost_gpu::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, |
| 21 | IoctlCtrl& ctrl) { | ||
| 20 | LOG_DEBUG(Service_NVDRV, "called, command=0x{:08X}, input_size=0x{:X}, output_size=0x{:X}", | 22 | LOG_DEBUG(Service_NVDRV, "called, command=0x{:08X}, input_size=0x{:X}, output_size=0x{:X}", |
| 21 | command.raw, input.size(), output.size()); | 23 | command.raw, input.size(), output.size()); |
| 22 | 24 | ||
| @@ -119,8 +121,10 @@ u32 nvhost_gpu::AllocGPFIFOEx2(const std::vector<u8>& input, std::vector<u8>& ou | |||
| 119 | params.num_entries, params.flags, params.unk0, params.unk1, params.unk2, | 121 | params.num_entries, params.flags, params.unk0, params.unk1, params.unk2, |
| 120 | params.unk3); | 122 | params.unk3); |
| 121 | 123 | ||
| 122 | params.fence_out.id = 0; | 124 | auto& gpu = system.GPU(); |
| 123 | params.fence_out.value = 0; | 125 | params.fence_out.id = assigned_syncpoints; |
| 126 | params.fence_out.value = gpu.GetSyncpointValue(assigned_syncpoints); | ||
| 127 | assigned_syncpoints++; | ||
| 124 | std::memcpy(output.data(), ¶ms, output.size()); | 128 | std::memcpy(output.data(), ¶ms, output.size()); |
| 125 | return 0; | 129 | return 0; |
| 126 | } | 130 | } |
| @@ -143,7 +147,7 @@ u32 nvhost_gpu::SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& outp | |||
| 143 | IoctlSubmitGpfifo params{}; | 147 | IoctlSubmitGpfifo params{}; |
| 144 | std::memcpy(¶ms, input.data(), sizeof(IoctlSubmitGpfifo)); | 148 | std::memcpy(¶ms, input.data(), sizeof(IoctlSubmitGpfifo)); |
| 145 | LOG_WARNING(Service_NVDRV, "(STUBBED) called, gpfifo={:X}, num_entries={:X}, flags={:X}", | 149 | LOG_WARNING(Service_NVDRV, "(STUBBED) called, gpfifo={:X}, num_entries={:X}, flags={:X}", |
| 146 | params.address, params.num_entries, params.flags); | 150 | params.address, params.num_entries, params.flags.raw); |
| 147 | 151 | ||
| 148 | ASSERT_MSG(input.size() == sizeof(IoctlSubmitGpfifo) + | 152 | ASSERT_MSG(input.size() == sizeof(IoctlSubmitGpfifo) + |
| 149 | params.num_entries * sizeof(Tegra::CommandListHeader), | 153 | params.num_entries * sizeof(Tegra::CommandListHeader), |
| @@ -153,10 +157,18 @@ u32 nvhost_gpu::SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& outp | |||
| 153 | std::memcpy(entries.data(), &input[sizeof(IoctlSubmitGpfifo)], | 157 | std::memcpy(entries.data(), &input[sizeof(IoctlSubmitGpfifo)], |
| 154 | params.num_entries * sizeof(Tegra::CommandListHeader)); | 158 | params.num_entries * sizeof(Tegra::CommandListHeader)); |
| 155 | 159 | ||
| 156 | Core::System::GetInstance().GPU().PushGPUEntries(std::move(entries)); | 160 | UNIMPLEMENTED_IF(params.flags.add_wait.Value() != 0); |
| 161 | UNIMPLEMENTED_IF(params.flags.add_increment.Value() != 0); | ||
| 162 | |||
| 163 | auto& gpu = system.GPU(); | ||
| 164 | u32 current_syncpoint_value = gpu.GetSyncpointValue(params.fence_out.id); | ||
| 165 | if (params.flags.increment.Value()) { | ||
| 166 | params.fence_out.value += current_syncpoint_value; | ||
| 167 | } else { | ||
| 168 | params.fence_out.value = current_syncpoint_value; | ||
| 169 | } | ||
| 170 | gpu.PushGPUEntries(std::move(entries)); | ||
| 157 | 171 | ||
| 158 | params.fence_out.id = 0; | ||
| 159 | params.fence_out.value = 0; | ||
| 160 | std::memcpy(output.data(), ¶ms, sizeof(IoctlSubmitGpfifo)); | 172 | std::memcpy(output.data(), ¶ms, sizeof(IoctlSubmitGpfifo)); |
| 161 | return 0; | 173 | return 0; |
| 162 | } | 174 | } |
| @@ -168,16 +180,24 @@ u32 nvhost_gpu::KickoffPB(const std::vector<u8>& input, std::vector<u8>& output) | |||
| 168 | IoctlSubmitGpfifo params{}; | 180 | IoctlSubmitGpfifo params{}; |
| 169 | std::memcpy(¶ms, input.data(), sizeof(IoctlSubmitGpfifo)); | 181 | std::memcpy(¶ms, input.data(), sizeof(IoctlSubmitGpfifo)); |
| 170 | LOG_WARNING(Service_NVDRV, "(STUBBED) called, gpfifo={:X}, num_entries={:X}, flags={:X}", | 182 | LOG_WARNING(Service_NVDRV, "(STUBBED) called, gpfifo={:X}, num_entries={:X}, flags={:X}", |
| 171 | params.address, params.num_entries, params.flags); | 183 | params.address, params.num_entries, params.flags.raw); |
| 172 | 184 | ||
| 173 | Tegra::CommandList entries(params.num_entries); | 185 | Tegra::CommandList entries(params.num_entries); |
| 174 | Memory::ReadBlock(params.address, entries.data(), | 186 | Memory::ReadBlock(params.address, entries.data(), |
| 175 | params.num_entries * sizeof(Tegra::CommandListHeader)); | 187 | params.num_entries * sizeof(Tegra::CommandListHeader)); |
| 176 | 188 | ||
| 177 | Core::System::GetInstance().GPU().PushGPUEntries(std::move(entries)); | 189 | UNIMPLEMENTED_IF(params.flags.add_wait.Value() != 0); |
| 190 | UNIMPLEMENTED_IF(params.flags.add_increment.Value() != 0); | ||
| 191 | |||
| 192 | auto& gpu = system.GPU(); | ||
| 193 | u32 current_syncpoint_value = gpu.GetSyncpointValue(params.fence_out.id); | ||
| 194 | if (params.flags.increment.Value()) { | ||
| 195 | params.fence_out.value += current_syncpoint_value; | ||
| 196 | } else { | ||
| 197 | params.fence_out.value = current_syncpoint_value; | ||
| 198 | } | ||
| 199 | gpu.PushGPUEntries(std::move(entries)); | ||
| 178 | 200 | ||
| 179 | params.fence_out.id = 0; | ||
| 180 | params.fence_out.value = 0; | ||
| 181 | std::memcpy(output.data(), ¶ms, output.size()); | 201 | std::memcpy(output.data(), ¶ms, output.size()); |
| 182 | return 0; | 202 | return 0; |
| 183 | } | 203 | } |
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.h b/src/core/hle/service/nvdrv/devices/nvhost_gpu.h index 62beb5c0c..d2e8fbae9 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.h | |||
| @@ -10,6 +10,7 @@ | |||
| 10 | #include "common/common_types.h" | 10 | #include "common/common_types.h" |
| 11 | #include "common/swap.h" | 11 | #include "common/swap.h" |
| 12 | #include "core/hle/service/nvdrv/devices/nvdevice.h" | 12 | #include "core/hle/service/nvdrv/devices/nvdevice.h" |
| 13 | #include "core/hle/service/nvdrv/nvdata.h" | ||
| 13 | 14 | ||
| 14 | namespace Service::Nvidia::Devices { | 15 | namespace Service::Nvidia::Devices { |
| 15 | 16 | ||
| @@ -20,10 +21,11 @@ constexpr u32 NVGPU_IOCTL_CHANNEL_KICKOFF_PB(0x1b); | |||
| 20 | 21 | ||
| 21 | class nvhost_gpu final : public nvdevice { | 22 | class nvhost_gpu final : public nvdevice { |
| 22 | public: | 23 | public: |
| 23 | explicit nvhost_gpu(std::shared_ptr<nvmap> nvmap_dev); | 24 | explicit nvhost_gpu(Core::System& system, std::shared_ptr<nvmap> nvmap_dev); |
| 24 | ~nvhost_gpu() override; | 25 | ~nvhost_gpu() override; |
| 25 | 26 | ||
| 26 | u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override; | 27 | u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, |
| 28 | IoctlCtrl& ctrl) override; | ||
| 27 | 29 | ||
| 28 | private: | 30 | private: |
| 29 | enum class IoctlCommand : u32_le { | 31 | enum class IoctlCommand : u32_le { |
| @@ -113,11 +115,7 @@ private: | |||
| 113 | static_assert(sizeof(IoctlGetErrorNotification) == 16, | 115 | static_assert(sizeof(IoctlGetErrorNotification) == 16, |
| 114 | "IoctlGetErrorNotification is incorrect size"); | 116 | "IoctlGetErrorNotification is incorrect size"); |
| 115 | 117 | ||
| 116 | struct IoctlFence { | 118 | static_assert(sizeof(Fence) == 8, "Fence is incorrect size"); |
| 117 | u32_le id; | ||
| 118 | u32_le value; | ||
| 119 | }; | ||
| 120 | static_assert(sizeof(IoctlFence) == 8, "IoctlFence is incorrect size"); | ||
| 121 | 119 | ||
| 122 | struct IoctlAllocGpfifoEx { | 120 | struct IoctlAllocGpfifoEx { |
| 123 | u32_le num_entries; | 121 | u32_le num_entries; |
| @@ -132,13 +130,13 @@ private: | |||
| 132 | static_assert(sizeof(IoctlAllocGpfifoEx) == 32, "IoctlAllocGpfifoEx is incorrect size"); | 130 | static_assert(sizeof(IoctlAllocGpfifoEx) == 32, "IoctlAllocGpfifoEx is incorrect size"); |
| 133 | 131 | ||
| 134 | struct IoctlAllocGpfifoEx2 { | 132 | struct IoctlAllocGpfifoEx2 { |
| 135 | u32_le num_entries; // in | 133 | u32_le num_entries; // in |
| 136 | u32_le flags; // in | 134 | u32_le flags; // in |
| 137 | u32_le unk0; // in (1 works) | 135 | u32_le unk0; // in (1 works) |
| 138 | IoctlFence fence_out; // out | 136 | Fence fence_out; // out |
| 139 | u32_le unk1; // in | 137 | u32_le unk1; // in |
| 140 | u32_le unk2; // in | 138 | u32_le unk2; // in |
| 141 | u32_le unk3; // in | 139 | u32_le unk3; // in |
| 142 | }; | 140 | }; |
| 143 | static_assert(sizeof(IoctlAllocGpfifoEx2) == 32, "IoctlAllocGpfifoEx2 is incorrect size"); | 141 | static_assert(sizeof(IoctlAllocGpfifoEx2) == 32, "IoctlAllocGpfifoEx2 is incorrect size"); |
| 144 | 142 | ||
| @@ -153,10 +151,16 @@ private: | |||
| 153 | struct IoctlSubmitGpfifo { | 151 | struct IoctlSubmitGpfifo { |
| 154 | u64_le address; // pointer to gpfifo entry structs | 152 | u64_le address; // pointer to gpfifo entry structs |
| 155 | u32_le num_entries; // number of fence objects being submitted | 153 | u32_le num_entries; // number of fence objects being submitted |
| 156 | u32_le flags; | 154 | union { |
| 157 | IoctlFence fence_out; // returned new fence object for others to wait on | 155 | u32_le raw; |
| 158 | }; | 156 | BitField<0, 1, u32_le> add_wait; // append a wait sync_point to the list |
| 159 | static_assert(sizeof(IoctlSubmitGpfifo) == 16 + sizeof(IoctlFence), | 157 | BitField<1, 1, u32_le> add_increment; // append an increment to the list |
| 158 | BitField<2, 1, u32_le> new_hw_format; // Mostly ignored | ||
| 159 | BitField<8, 1, u32_le> increment; // increment the returned fence | ||
| 160 | } flags; | ||
| 161 | Fence fence_out; // returned new fence object for others to wait on | ||
| 162 | }; | ||
| 163 | static_assert(sizeof(IoctlSubmitGpfifo) == 16 + sizeof(Fence), | ||
| 160 | "IoctlSubmitGpfifo is incorrect size"); | 164 | "IoctlSubmitGpfifo is incorrect size"); |
| 161 | 165 | ||
| 162 | struct IoctlGetWaitbase { | 166 | struct IoctlGetWaitbase { |
| @@ -184,6 +188,7 @@ private: | |||
| 184 | u32 ChannelSetTimeout(const std::vector<u8>& input, std::vector<u8>& output); | 188 | u32 ChannelSetTimeout(const std::vector<u8>& input, std::vector<u8>& output); |
| 185 | 189 | ||
| 186 | std::shared_ptr<nvmap> nvmap_dev; | 190 | std::shared_ptr<nvmap> nvmap_dev; |
| 191 | u32 assigned_syncpoints{}; | ||
| 187 | }; | 192 | }; |
| 188 | 193 | ||
| 189 | } // namespace Service::Nvidia::Devices | 194 | } // namespace Service::Nvidia::Devices |
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp index f5e8ea7c3..f572ad30f 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp | |||
| @@ -10,10 +10,11 @@ | |||
| 10 | 10 | ||
| 11 | namespace Service::Nvidia::Devices { | 11 | namespace Service::Nvidia::Devices { |
| 12 | 12 | ||
| 13 | nvhost_nvdec::nvhost_nvdec() = default; | 13 | nvhost_nvdec::nvhost_nvdec(Core::System& system) : nvdevice(system) {} |
| 14 | nvhost_nvdec::~nvhost_nvdec() = default; | 14 | nvhost_nvdec::~nvhost_nvdec() = default; |
| 15 | 15 | ||
| 16 | u32 nvhost_nvdec::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) { | 16 | u32 nvhost_nvdec::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, |
| 17 | IoctlCtrl& ctrl) { | ||
| 17 | LOG_DEBUG(Service_NVDRV, "called, command=0x{:08X}, input_size=0x{:X}, output_size=0x{:X}", | 18 | LOG_DEBUG(Service_NVDRV, "called, command=0x{:08X}, input_size=0x{:X}, output_size=0x{:X}", |
| 18 | command.raw, input.size(), output.size()); | 19 | command.raw, input.size(), output.size()); |
| 19 | 20 | ||
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h index 0e7b284f8..2710f0511 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h | |||
| @@ -13,10 +13,11 @@ namespace Service::Nvidia::Devices { | |||
| 13 | 13 | ||
| 14 | class nvhost_nvdec final : public nvdevice { | 14 | class nvhost_nvdec final : public nvdevice { |
| 15 | public: | 15 | public: |
| 16 | nvhost_nvdec(); | 16 | explicit nvhost_nvdec(Core::System& system); |
| 17 | ~nvhost_nvdec() override; | 17 | ~nvhost_nvdec() override; |
| 18 | 18 | ||
| 19 | u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override; | 19 | u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, |
| 20 | IoctlCtrl& ctrl) override; | ||
| 20 | 21 | ||
| 21 | private: | 22 | private: |
| 22 | enum class IoctlCommand : u32_le { | 23 | enum class IoctlCommand : u32_le { |
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.cpp b/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.cpp index 3e0951ab0..38282956f 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.cpp | |||
| @@ -10,10 +10,11 @@ | |||
| 10 | 10 | ||
| 11 | namespace Service::Nvidia::Devices { | 11 | namespace Service::Nvidia::Devices { |
| 12 | 12 | ||
| 13 | nvhost_nvjpg::nvhost_nvjpg() = default; | 13 | nvhost_nvjpg::nvhost_nvjpg(Core::System& system) : nvdevice(system) {} |
| 14 | nvhost_nvjpg::~nvhost_nvjpg() = default; | 14 | nvhost_nvjpg::~nvhost_nvjpg() = default; |
| 15 | 15 | ||
| 16 | u32 nvhost_nvjpg::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) { | 16 | u32 nvhost_nvjpg::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, |
| 17 | IoctlCtrl& ctrl) { | ||
| 17 | LOG_DEBUG(Service_NVDRV, "called, command=0x{:08X}, input_size=0x{:X}, output_size=0x{:X}", | 18 | LOG_DEBUG(Service_NVDRV, "called, command=0x{:08X}, input_size=0x{:X}, output_size=0x{:X}", |
| 18 | command.raw, input.size(), output.size()); | 19 | command.raw, input.size(), output.size()); |
| 19 | 20 | ||
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.h b/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.h index 89fd5e95e..379766693 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.h | |||
| @@ -13,10 +13,11 @@ namespace Service::Nvidia::Devices { | |||
| 13 | 13 | ||
| 14 | class nvhost_nvjpg final : public nvdevice { | 14 | class nvhost_nvjpg final : public nvdevice { |
| 15 | public: | 15 | public: |
| 16 | nvhost_nvjpg(); | 16 | explicit nvhost_nvjpg(Core::System& system); |
| 17 | ~nvhost_nvjpg() override; | 17 | ~nvhost_nvjpg() override; |
| 18 | 18 | ||
| 19 | u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override; | 19 | u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, |
| 20 | IoctlCtrl& ctrl) override; | ||
| 20 | 21 | ||
| 21 | private: | 22 | private: |
| 22 | enum class IoctlCommand : u32_le { | 23 | enum class IoctlCommand : u32_le { |
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp b/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp index d544f0f31..70e8091db 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp | |||
| @@ -10,10 +10,11 @@ | |||
| 10 | 10 | ||
| 11 | namespace Service::Nvidia::Devices { | 11 | namespace Service::Nvidia::Devices { |
| 12 | 12 | ||
| 13 | nvhost_vic::nvhost_vic() = default; | 13 | nvhost_vic::nvhost_vic(Core::System& system) : nvdevice(system) {} |
| 14 | nvhost_vic::~nvhost_vic() = default; | 14 | nvhost_vic::~nvhost_vic() = default; |
| 15 | 15 | ||
| 16 | u32 nvhost_vic::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) { | 16 | u32 nvhost_vic::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, |
| 17 | IoctlCtrl& ctrl) { | ||
| 17 | LOG_DEBUG(Service_NVDRV, "called, command=0x{:08X}, input_size=0x{:X}, output_size=0x{:X}", | 18 | LOG_DEBUG(Service_NVDRV, "called, command=0x{:08X}, input_size=0x{:X}, output_size=0x{:X}", |
| 18 | command.raw, input.size(), output.size()); | 19 | command.raw, input.size(), output.size()); |
| 19 | 20 | ||
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_vic.h b/src/core/hle/service/nvdrv/devices/nvhost_vic.h index fc24c3f9c..7d111977e 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_vic.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_vic.h | |||
| @@ -13,10 +13,11 @@ namespace Service::Nvidia::Devices { | |||
| 13 | 13 | ||
| 14 | class nvhost_vic final : public nvdevice { | 14 | class nvhost_vic final : public nvdevice { |
| 15 | public: | 15 | public: |
| 16 | nvhost_vic(); | 16 | explicit nvhost_vic(Core::System& system); |
| 17 | ~nvhost_vic() override; | 17 | ~nvhost_vic() override; |
| 18 | 18 | ||
| 19 | u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override; | 19 | u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, |
| 20 | IoctlCtrl& ctrl) override; | ||
| 20 | 21 | ||
| 21 | private: | 22 | private: |
| 22 | enum class IoctlCommand : u32_le { | 23 | enum class IoctlCommand : u32_le { |
diff --git a/src/core/hle/service/nvdrv/devices/nvmap.cpp b/src/core/hle/service/nvdrv/devices/nvmap.cpp index 1ec796fc6..223b496b7 100644 --- a/src/core/hle/service/nvdrv/devices/nvmap.cpp +++ b/src/core/hle/service/nvdrv/devices/nvmap.cpp | |||
| @@ -18,7 +18,7 @@ enum { | |||
| 18 | }; | 18 | }; |
| 19 | } | 19 | } |
| 20 | 20 | ||
| 21 | nvmap::nvmap() = default; | 21 | nvmap::nvmap(Core::System& system) : nvdevice(system) {} |
| 22 | nvmap::~nvmap() = default; | 22 | nvmap::~nvmap() = default; |
| 23 | 23 | ||
| 24 | VAddr nvmap::GetObjectAddress(u32 handle) const { | 24 | VAddr nvmap::GetObjectAddress(u32 handle) const { |
| @@ -28,7 +28,8 @@ VAddr nvmap::GetObjectAddress(u32 handle) const { | |||
| 28 | return object->addr; | 28 | return object->addr; |
| 29 | } | 29 | } |
| 30 | 30 | ||
| 31 | u32 nvmap::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) { | 31 | u32 nvmap::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, |
| 32 | IoctlCtrl& ctrl) { | ||
| 32 | switch (static_cast<IoctlCommand>(command.raw)) { | 33 | switch (static_cast<IoctlCommand>(command.raw)) { |
| 33 | case IoctlCommand::Create: | 34 | case IoctlCommand::Create: |
| 34 | return IocCreate(input, output); | 35 | return IocCreate(input, output); |
diff --git a/src/core/hle/service/nvdrv/devices/nvmap.h b/src/core/hle/service/nvdrv/devices/nvmap.h index 396230c19..bf4a101c2 100644 --- a/src/core/hle/service/nvdrv/devices/nvmap.h +++ b/src/core/hle/service/nvdrv/devices/nvmap.h | |||
| @@ -16,13 +16,14 @@ namespace Service::Nvidia::Devices { | |||
| 16 | 16 | ||
| 17 | class nvmap final : public nvdevice { | 17 | class nvmap final : public nvdevice { |
| 18 | public: | 18 | public: |
| 19 | nvmap(); | 19 | explicit nvmap(Core::System& system); |
| 20 | ~nvmap() override; | 20 | ~nvmap() override; |
| 21 | 21 | ||
| 22 | /// Returns the allocated address of an nvmap object given its handle. | 22 | /// Returns the allocated address of an nvmap object given its handle. |
| 23 | VAddr GetObjectAddress(u32 handle) const; | 23 | VAddr GetObjectAddress(u32 handle) const; |
| 24 | 24 | ||
| 25 | u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override; | 25 | u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, |
| 26 | IoctlCtrl& ctrl) override; | ||
| 26 | 27 | ||
| 27 | /// Represents an nvmap object. | 28 | /// Represents an nvmap object. |
| 28 | struct Object { | 29 | struct Object { |
diff --git a/src/core/hle/service/nvdrv/interface.cpp b/src/core/hle/service/nvdrv/interface.cpp index b60fc748b..d5be64ed2 100644 --- a/src/core/hle/service/nvdrv/interface.cpp +++ b/src/core/hle/service/nvdrv/interface.cpp | |||
| @@ -8,12 +8,18 @@ | |||
| 8 | #include "core/hle/ipc_helpers.h" | 8 | #include "core/hle/ipc_helpers.h" |
| 9 | #include "core/hle/kernel/kernel.h" | 9 | #include "core/hle/kernel/kernel.h" |
| 10 | #include "core/hle/kernel/readable_event.h" | 10 | #include "core/hle/kernel/readable_event.h" |
| 11 | #include "core/hle/kernel/thread.h" | ||
| 11 | #include "core/hle/kernel/writable_event.h" | 12 | #include "core/hle/kernel/writable_event.h" |
| 12 | #include "core/hle/service/nvdrv/interface.h" | 13 | #include "core/hle/service/nvdrv/interface.h" |
| 14 | #include "core/hle/service/nvdrv/nvdata.h" | ||
| 13 | #include "core/hle/service/nvdrv/nvdrv.h" | 15 | #include "core/hle/service/nvdrv/nvdrv.h" |
| 14 | 16 | ||
| 15 | namespace Service::Nvidia { | 17 | namespace Service::Nvidia { |
| 16 | 18 | ||
| 19 | void NVDRV::SignalGPUInterruptSyncpt(const u32 syncpoint_id, const u32 value) { | ||
| 20 | nvdrv->SignalSyncpt(syncpoint_id, value); | ||
| 21 | } | ||
| 22 | |||
| 17 | void NVDRV::Open(Kernel::HLERequestContext& ctx) { | 23 | void NVDRV::Open(Kernel::HLERequestContext& ctx) { |
| 18 | LOG_DEBUG(Service_NVDRV, "called"); | 24 | LOG_DEBUG(Service_NVDRV, "called"); |
| 19 | 25 | ||
| @@ -36,11 +42,31 @@ void NVDRV::Ioctl(Kernel::HLERequestContext& ctx) { | |||
| 36 | 42 | ||
| 37 | std::vector<u8> output(ctx.GetWriteBufferSize()); | 43 | std::vector<u8> output(ctx.GetWriteBufferSize()); |
| 38 | 44 | ||
| 45 | IoctlCtrl ctrl{}; | ||
| 46 | |||
| 47 | u32 result = nvdrv->Ioctl(fd, command, ctx.ReadBuffer(), output, ctrl); | ||
| 48 | |||
| 49 | if (ctrl.must_delay) { | ||
| 50 | ctrl.fresh_call = false; | ||
| 51 | ctx.SleepClientThread( | ||
| 52 | "NVServices::DelayedResponse", ctrl.timeout, | ||
| 53 | [=](Kernel::SharedPtr<Kernel::Thread> thread, Kernel::HLERequestContext& ctx, | ||
| 54 | Kernel::ThreadWakeupReason reason) { | ||
| 55 | IoctlCtrl ctrl2{ctrl}; | ||
| 56 | std::vector<u8> output2 = output; | ||
| 57 | u32 result = nvdrv->Ioctl(fd, command, ctx.ReadBuffer(), output2, ctrl2); | ||
| 58 | ctx.WriteBuffer(output2); | ||
| 59 | IPC::ResponseBuilder rb{ctx, 3}; | ||
| 60 | rb.Push(RESULT_SUCCESS); | ||
| 61 | rb.Push(result); | ||
| 62 | }, | ||
| 63 | nvdrv->GetEventWriteable(ctrl.event_id)); | ||
| 64 | } else { | ||
| 65 | ctx.WriteBuffer(output); | ||
| 66 | } | ||
| 39 | IPC::ResponseBuilder rb{ctx, 3}; | 67 | IPC::ResponseBuilder rb{ctx, 3}; |
| 40 | rb.Push(RESULT_SUCCESS); | 68 | rb.Push(RESULT_SUCCESS); |
| 41 | rb.Push(nvdrv->Ioctl(fd, command, ctx.ReadBuffer(), output)); | 69 | rb.Push(result); |
| 42 | |||
| 43 | ctx.WriteBuffer(output); | ||
| 44 | } | 70 | } |
| 45 | 71 | ||
| 46 | void NVDRV::Close(Kernel::HLERequestContext& ctx) { | 72 | void NVDRV::Close(Kernel::HLERequestContext& ctx) { |
| @@ -66,13 +92,19 @@ void NVDRV::Initialize(Kernel::HLERequestContext& ctx) { | |||
| 66 | void NVDRV::QueryEvent(Kernel::HLERequestContext& ctx) { | 92 | void NVDRV::QueryEvent(Kernel::HLERequestContext& ctx) { |
| 67 | IPC::RequestParser rp{ctx}; | 93 | IPC::RequestParser rp{ctx}; |
| 68 | u32 fd = rp.Pop<u32>(); | 94 | u32 fd = rp.Pop<u32>(); |
| 69 | u32 event_id = rp.Pop<u32>(); | 95 | // TODO(Blinkhawk): Figure the meaning of the flag at bit 16 |
| 96 | u32 event_id = rp.Pop<u32>() & 0x000000FF; | ||
| 70 | LOG_WARNING(Service_NVDRV, "(STUBBED) called, fd={:X}, event_id={:X}", fd, event_id); | 97 | LOG_WARNING(Service_NVDRV, "(STUBBED) called, fd={:X}, event_id={:X}", fd, event_id); |
| 71 | 98 | ||
| 72 | IPC::ResponseBuilder rb{ctx, 3, 1}; | 99 | IPC::ResponseBuilder rb{ctx, 3, 1}; |
| 73 | rb.Push(RESULT_SUCCESS); | 100 | rb.Push(RESULT_SUCCESS); |
| 74 | rb.PushCopyObjects(query_event.readable); | 101 | if (event_id < MaxNvEvents) { |
| 75 | rb.Push<u32>(0); | 102 | rb.PushCopyObjects(nvdrv->GetEvent(event_id)); |
| 103 | rb.Push<u32>(NvResult::Success); | ||
| 104 | } else { | ||
| 105 | rb.Push<u32>(0); | ||
| 106 | rb.Push<u32>(NvResult::BadParameter); | ||
| 107 | } | ||
| 76 | } | 108 | } |
| 77 | 109 | ||
| 78 | void NVDRV::SetClientPID(Kernel::HLERequestContext& ctx) { | 110 | void NVDRV::SetClientPID(Kernel::HLERequestContext& ctx) { |
| @@ -127,10 +159,6 @@ NVDRV::NVDRV(std::shared_ptr<Module> nvdrv, const char* name) | |||
| 127 | {13, &NVDRV::FinishInitialize, "FinishInitialize"}, | 159 | {13, &NVDRV::FinishInitialize, "FinishInitialize"}, |
| 128 | }; | 160 | }; |
| 129 | RegisterHandlers(functions); | 161 | RegisterHandlers(functions); |
| 130 | |||
| 131 | auto& kernel = Core::System::GetInstance().Kernel(); | ||
| 132 | query_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Automatic, | ||
| 133 | "NVDRV::query_event"); | ||
| 134 | } | 162 | } |
| 135 | 163 | ||
| 136 | NVDRV::~NVDRV() = default; | 164 | NVDRV::~NVDRV() = default; |
diff --git a/src/core/hle/service/nvdrv/interface.h b/src/core/hle/service/nvdrv/interface.h index 5b4889910..10a0ecd52 100644 --- a/src/core/hle/service/nvdrv/interface.h +++ b/src/core/hle/service/nvdrv/interface.h | |||
| @@ -19,6 +19,8 @@ public: | |||
| 19 | NVDRV(std::shared_ptr<Module> nvdrv, const char* name); | 19 | NVDRV(std::shared_ptr<Module> nvdrv, const char* name); |
| 20 | ~NVDRV() override; | 20 | ~NVDRV() override; |
| 21 | 21 | ||
| 22 | void SignalGPUInterruptSyncpt(const u32 syncpoint_id, const u32 value); | ||
| 23 | |||
| 22 | private: | 24 | private: |
| 23 | void Open(Kernel::HLERequestContext& ctx); | 25 | void Open(Kernel::HLERequestContext& ctx); |
| 24 | void Ioctl(Kernel::HLERequestContext& ctx); | 26 | void Ioctl(Kernel::HLERequestContext& ctx); |
| @@ -33,8 +35,6 @@ private: | |||
| 33 | std::shared_ptr<Module> nvdrv; | 35 | std::shared_ptr<Module> nvdrv; |
| 34 | 36 | ||
| 35 | u64 pid{}; | 37 | u64 pid{}; |
| 36 | |||
| 37 | Kernel::EventPair query_event; | ||
| 38 | }; | 38 | }; |
| 39 | 39 | ||
| 40 | } // namespace Service::Nvidia | 40 | } // namespace Service::Nvidia |
diff --git a/src/core/hle/service/nvdrv/nvdata.h b/src/core/hle/service/nvdrv/nvdata.h new file mode 100644 index 000000000..ac03cbc23 --- /dev/null +++ b/src/core/hle/service/nvdrv/nvdata.h | |||
| @@ -0,0 +1,48 @@ | |||
| 1 | #pragma once | ||
| 2 | |||
| 3 | #include <array> | ||
| 4 | #include "common/common_types.h" | ||
| 5 | |||
| 6 | namespace Service::Nvidia { | ||
| 7 | |||
| 8 | constexpr u32 MaxSyncPoints = 192; | ||
| 9 | constexpr u32 MaxNvEvents = 64; | ||
| 10 | |||
| 11 | struct Fence { | ||
| 12 | s32 id; | ||
| 13 | u32 value; | ||
| 14 | }; | ||
| 15 | |||
| 16 | static_assert(sizeof(Fence) == 8, "Fence has wrong size"); | ||
| 17 | |||
| 18 | struct MultiFence { | ||
| 19 | u32 num_fences; | ||
| 20 | std::array<Fence, 4> fences; | ||
| 21 | }; | ||
| 22 | |||
| 23 | enum NvResult : u32 { | ||
| 24 | Success = 0, | ||
| 25 | BadParameter = 4, | ||
| 26 | Timeout = 5, | ||
| 27 | ResourceError = 15, | ||
| 28 | }; | ||
| 29 | |||
| 30 | enum class EventState { | ||
| 31 | Free = 0, | ||
| 32 | Registered = 1, | ||
| 33 | Waiting = 2, | ||
| 34 | Busy = 3, | ||
| 35 | }; | ||
| 36 | |||
| 37 | struct IoctlCtrl { | ||
| 38 | // First call done to the servioce for services that call itself again after a call. | ||
| 39 | bool fresh_call{true}; | ||
| 40 | // Tells the Ioctl Wrapper that it must delay the IPC response and send the thread to sleep | ||
| 41 | bool must_delay{}; | ||
| 42 | // Timeout for the delay | ||
| 43 | s64 timeout{}; | ||
| 44 | // NV Event Id | ||
| 45 | s32 event_id{-1}; | ||
| 46 | }; | ||
| 47 | |||
| 48 | } // namespace Service::Nvidia | ||
diff --git a/src/core/hle/service/nvdrv/nvdrv.cpp b/src/core/hle/service/nvdrv/nvdrv.cpp index 6e4b8f2c6..2011a226a 100644 --- a/src/core/hle/service/nvdrv/nvdrv.cpp +++ b/src/core/hle/service/nvdrv/nvdrv.cpp | |||
| @@ -4,7 +4,10 @@ | |||
| 4 | 4 | ||
| 5 | #include <utility> | 5 | #include <utility> |
| 6 | 6 | ||
| 7 | #include <fmt/format.h> | ||
| 7 | #include "core/hle/ipc_helpers.h" | 8 | #include "core/hle/ipc_helpers.h" |
| 9 | #include "core/hle/kernel/readable_event.h" | ||
| 10 | #include "core/hle/kernel/writable_event.h" | ||
| 8 | #include "core/hle/service/nvdrv/devices/nvdevice.h" | 11 | #include "core/hle/service/nvdrv/devices/nvdevice.h" |
| 9 | #include "core/hle/service/nvdrv/devices/nvdisp_disp0.h" | 12 | #include "core/hle/service/nvdrv/devices/nvdisp_disp0.h" |
| 10 | #include "core/hle/service/nvdrv/devices/nvhost_as_gpu.h" | 13 | #include "core/hle/service/nvdrv/devices/nvhost_as_gpu.h" |
| @@ -22,8 +25,9 @@ | |||
| 22 | 25 | ||
| 23 | namespace Service::Nvidia { | 26 | namespace Service::Nvidia { |
| 24 | 27 | ||
| 25 | void InstallInterfaces(SM::ServiceManager& service_manager, NVFlinger::NVFlinger& nvflinger) { | 28 | void InstallInterfaces(SM::ServiceManager& service_manager, NVFlinger::NVFlinger& nvflinger, |
| 26 | auto module_ = std::make_shared<Module>(); | 29 | Core::System& system) { |
| 30 | auto module_ = std::make_shared<Module>(system); | ||
| 27 | std::make_shared<NVDRV>(module_, "nvdrv")->InstallAsService(service_manager); | 31 | std::make_shared<NVDRV>(module_, "nvdrv")->InstallAsService(service_manager); |
| 28 | std::make_shared<NVDRV>(module_, "nvdrv:a")->InstallAsService(service_manager); | 32 | std::make_shared<NVDRV>(module_, "nvdrv:a")->InstallAsService(service_manager); |
| 29 | std::make_shared<NVDRV>(module_, "nvdrv:s")->InstallAsService(service_manager); | 33 | std::make_shared<NVDRV>(module_, "nvdrv:s")->InstallAsService(service_manager); |
| @@ -32,17 +36,25 @@ void InstallInterfaces(SM::ServiceManager& service_manager, NVFlinger::NVFlinger | |||
| 32 | nvflinger.SetNVDrvInstance(module_); | 36 | nvflinger.SetNVDrvInstance(module_); |
| 33 | } | 37 | } |
| 34 | 38 | ||
| 35 | Module::Module() { | 39 | Module::Module(Core::System& system) { |
| 36 | auto nvmap_dev = std::make_shared<Devices::nvmap>(); | 40 | auto& kernel = system.Kernel(); |
| 37 | devices["/dev/nvhost-as-gpu"] = std::make_shared<Devices::nvhost_as_gpu>(nvmap_dev); | 41 | for (u32 i = 0; i < MaxNvEvents; i++) { |
| 38 | devices["/dev/nvhost-gpu"] = std::make_shared<Devices::nvhost_gpu>(nvmap_dev); | 42 | std::string event_label = fmt::format("NVDRV::NvEvent_{}", i); |
| 39 | devices["/dev/nvhost-ctrl-gpu"] = std::make_shared<Devices::nvhost_ctrl_gpu>(); | 43 | events_interface.events[i] = Kernel::WritableEvent::CreateEventPair( |
| 44 | kernel, Kernel::ResetType::Automatic, event_label); | ||
| 45 | events_interface.status[i] = EventState::Free; | ||
| 46 | events_interface.registered[i] = false; | ||
| 47 | } | ||
| 48 | auto nvmap_dev = std::make_shared<Devices::nvmap>(system); | ||
| 49 | devices["/dev/nvhost-as-gpu"] = std::make_shared<Devices::nvhost_as_gpu>(system, nvmap_dev); | ||
| 50 | devices["/dev/nvhost-gpu"] = std::make_shared<Devices::nvhost_gpu>(system, nvmap_dev); | ||
| 51 | devices["/dev/nvhost-ctrl-gpu"] = std::make_shared<Devices::nvhost_ctrl_gpu>(system); | ||
| 40 | devices["/dev/nvmap"] = nvmap_dev; | 52 | devices["/dev/nvmap"] = nvmap_dev; |
| 41 | devices["/dev/nvdisp_disp0"] = std::make_shared<Devices::nvdisp_disp0>(nvmap_dev); | 53 | devices["/dev/nvdisp_disp0"] = std::make_shared<Devices::nvdisp_disp0>(system, nvmap_dev); |
| 42 | devices["/dev/nvhost-ctrl"] = std::make_shared<Devices::nvhost_ctrl>(); | 54 | devices["/dev/nvhost-ctrl"] = std::make_shared<Devices::nvhost_ctrl>(system, events_interface); |
| 43 | devices["/dev/nvhost-nvdec"] = std::make_shared<Devices::nvhost_nvdec>(); | 55 | devices["/dev/nvhost-nvdec"] = std::make_shared<Devices::nvhost_nvdec>(system); |
| 44 | devices["/dev/nvhost-nvjpg"] = std::make_shared<Devices::nvhost_nvjpg>(); | 56 | devices["/dev/nvhost-nvjpg"] = std::make_shared<Devices::nvhost_nvjpg>(system); |
| 45 | devices["/dev/nvhost-vic"] = std::make_shared<Devices::nvhost_vic>(); | 57 | devices["/dev/nvhost-vic"] = std::make_shared<Devices::nvhost_vic>(system); |
| 46 | } | 58 | } |
| 47 | 59 | ||
| 48 | Module::~Module() = default; | 60 | Module::~Module() = default; |
| @@ -59,12 +71,13 @@ u32 Module::Open(const std::string& device_name) { | |||
| 59 | return fd; | 71 | return fd; |
| 60 | } | 72 | } |
| 61 | 73 | ||
| 62 | u32 Module::Ioctl(u32 fd, u32 command, const std::vector<u8>& input, std::vector<u8>& output) { | 74 | u32 Module::Ioctl(u32 fd, u32 command, const std::vector<u8>& input, std::vector<u8>& output, |
| 75 | IoctlCtrl& ctrl) { | ||
| 63 | auto itr = open_files.find(fd); | 76 | auto itr = open_files.find(fd); |
| 64 | ASSERT_MSG(itr != open_files.end(), "Tried to talk to an invalid device"); | 77 | ASSERT_MSG(itr != open_files.end(), "Tried to talk to an invalid device"); |
| 65 | 78 | ||
| 66 | auto& device = itr->second; | 79 | auto& device = itr->second; |
| 67 | return device->ioctl({command}, input, output); | 80 | return device->ioctl({command}, input, output, ctrl); |
| 68 | } | 81 | } |
| 69 | 82 | ||
| 70 | ResultCode Module::Close(u32 fd) { | 83 | ResultCode Module::Close(u32 fd) { |
| @@ -77,4 +90,22 @@ ResultCode Module::Close(u32 fd) { | |||
| 77 | return RESULT_SUCCESS; | 90 | return RESULT_SUCCESS; |
| 78 | } | 91 | } |
| 79 | 92 | ||
| 93 | void Module::SignalSyncpt(const u32 syncpoint_id, const u32 value) { | ||
| 94 | for (u32 i = 0; i < MaxNvEvents; i++) { | ||
| 95 | if (events_interface.assigned_syncpt[i] == syncpoint_id && | ||
| 96 | events_interface.assigned_value[i] == value) { | ||
| 97 | events_interface.LiberateEvent(i); | ||
| 98 | events_interface.events[i].writable->Signal(); | ||
| 99 | } | ||
| 100 | } | ||
| 101 | } | ||
| 102 | |||
| 103 | Kernel::SharedPtr<Kernel::ReadableEvent> Module::GetEvent(const u32 event_id) const { | ||
| 104 | return events_interface.events[event_id].readable; | ||
| 105 | } | ||
| 106 | |||
| 107 | Kernel::SharedPtr<Kernel::WritableEvent> Module::GetEventWriteable(const u32 event_id) const { | ||
| 108 | return events_interface.events[event_id].writable; | ||
| 109 | } | ||
| 110 | |||
| 80 | } // namespace Service::Nvidia | 111 | } // namespace Service::Nvidia |
diff --git a/src/core/hle/service/nvdrv/nvdrv.h b/src/core/hle/service/nvdrv/nvdrv.h index 53564f696..a339ab672 100644 --- a/src/core/hle/service/nvdrv/nvdrv.h +++ b/src/core/hle/service/nvdrv/nvdrv.h | |||
| @@ -8,8 +8,14 @@ | |||
| 8 | #include <unordered_map> | 8 | #include <unordered_map> |
| 9 | #include <vector> | 9 | #include <vector> |
| 10 | #include "common/common_types.h" | 10 | #include "common/common_types.h" |
| 11 | #include "core/hle/kernel/writable_event.h" | ||
| 12 | #include "core/hle/service/nvdrv/nvdata.h" | ||
| 11 | #include "core/hle/service/service.h" | 13 | #include "core/hle/service/service.h" |
| 12 | 14 | ||
| 15 | namespace Core { | ||
| 16 | class System; | ||
| 17 | } | ||
| 18 | |||
| 13 | namespace Service::NVFlinger { | 19 | namespace Service::NVFlinger { |
| 14 | class NVFlinger; | 20 | class NVFlinger; |
| 15 | } | 21 | } |
| @@ -20,16 +26,72 @@ namespace Devices { | |||
| 20 | class nvdevice; | 26 | class nvdevice; |
| 21 | } | 27 | } |
| 22 | 28 | ||
| 23 | struct IoctlFence { | 29 | struct EventInterface { |
| 24 | u32 id; | 30 | // Mask representing currently busy events |
| 25 | u32 value; | 31 | u64 events_mask{}; |
| 32 | // Each kernel event associated to an NV event | ||
| 33 | std::array<Kernel::EventPair, MaxNvEvents> events; | ||
| 34 | // The status of the current NVEvent | ||
| 35 | std::array<EventState, MaxNvEvents> status{}; | ||
| 36 | // Tells if an NVEvent is registered or not | ||
| 37 | std::array<bool, MaxNvEvents> registered{}; | ||
| 38 | // When an NVEvent is waiting on GPU interrupt, this is the sync_point | ||
| 39 | // associated with it. | ||
| 40 | std::array<u32, MaxNvEvents> assigned_syncpt{}; | ||
| 41 | // This is the value of the GPU interrupt for which the NVEvent is waiting | ||
| 42 | // for. | ||
| 43 | std::array<u32, MaxNvEvents> assigned_value{}; | ||
| 44 | // Constant to denote an unasigned syncpoint. | ||
| 45 | static constexpr u32 unassigned_syncpt = 0xFFFFFFFF; | ||
| 46 | std::optional<u32> GetFreeEvent() const { | ||
| 47 | u64 mask = events_mask; | ||
| 48 | for (u32 i = 0; i < MaxNvEvents; i++) { | ||
| 49 | const bool is_free = (mask & 0x1) == 0; | ||
| 50 | if (is_free) { | ||
| 51 | if (status[i] == EventState::Registered || status[i] == EventState::Free) { | ||
| 52 | return {i}; | ||
| 53 | } | ||
| 54 | } | ||
| 55 | mask = mask >> 1; | ||
| 56 | } | ||
| 57 | return {}; | ||
| 58 | } | ||
| 59 | void SetEventStatus(const u32 event_id, EventState new_status) { | ||
| 60 | EventState old_status = status[event_id]; | ||
| 61 | if (old_status == new_status) { | ||
| 62 | return; | ||
| 63 | } | ||
| 64 | status[event_id] = new_status; | ||
| 65 | if (new_status == EventState::Registered) { | ||
| 66 | registered[event_id] = true; | ||
| 67 | } | ||
| 68 | if (new_status == EventState::Waiting || new_status == EventState::Busy) { | ||
| 69 | events_mask |= (1ULL << event_id); | ||
| 70 | } | ||
| 71 | } | ||
| 72 | void RegisterEvent(const u32 event_id) { | ||
| 73 | registered[event_id] = true; | ||
| 74 | if (status[event_id] == EventState::Free) { | ||
| 75 | status[event_id] = EventState::Registered; | ||
| 76 | } | ||
| 77 | } | ||
| 78 | void UnregisterEvent(const u32 event_id) { | ||
| 79 | registered[event_id] = false; | ||
| 80 | if (status[event_id] == EventState::Registered) { | ||
| 81 | status[event_id] = EventState::Free; | ||
| 82 | } | ||
| 83 | } | ||
| 84 | void LiberateEvent(const u32 event_id) { | ||
| 85 | status[event_id] = registered[event_id] ? EventState::Registered : EventState::Free; | ||
| 86 | events_mask &= ~(1ULL << event_id); | ||
| 87 | assigned_syncpt[event_id] = unassigned_syncpt; | ||
| 88 | assigned_value[event_id] = 0; | ||
| 89 | } | ||
| 26 | }; | 90 | }; |
| 27 | 91 | ||
| 28 | static_assert(sizeof(IoctlFence) == 8, "IoctlFence has wrong size"); | ||
| 29 | |||
| 30 | class Module final { | 92 | class Module final { |
| 31 | public: | 93 | public: |
| 32 | Module(); | 94 | Module(Core::System& system); |
| 33 | ~Module(); | 95 | ~Module(); |
| 34 | 96 | ||
| 35 | /// Returns a pointer to one of the available devices, identified by its name. | 97 | /// Returns a pointer to one of the available devices, identified by its name. |
| @@ -44,10 +106,17 @@ public: | |||
| 44 | /// Opens a device node and returns a file descriptor to it. | 106 | /// Opens a device node and returns a file descriptor to it. |
| 45 | u32 Open(const std::string& device_name); | 107 | u32 Open(const std::string& device_name); |
| 46 | /// Sends an ioctl command to the specified file descriptor. | 108 | /// Sends an ioctl command to the specified file descriptor. |
| 47 | u32 Ioctl(u32 fd, u32 command, const std::vector<u8>& input, std::vector<u8>& output); | 109 | u32 Ioctl(u32 fd, u32 command, const std::vector<u8>& input, std::vector<u8>& output, |
| 110 | IoctlCtrl& ctrl); | ||
| 48 | /// Closes a device file descriptor and returns operation success. | 111 | /// Closes a device file descriptor and returns operation success. |
| 49 | ResultCode Close(u32 fd); | 112 | ResultCode Close(u32 fd); |
| 50 | 113 | ||
| 114 | void SignalSyncpt(const u32 syncpoint_id, const u32 value); | ||
| 115 | |||
| 116 | Kernel::SharedPtr<Kernel::ReadableEvent> GetEvent(u32 event_id) const; | ||
| 117 | |||
| 118 | Kernel::SharedPtr<Kernel::WritableEvent> GetEventWriteable(u32 event_id) const; | ||
| 119 | |||
| 51 | private: | 120 | private: |
| 52 | /// Id to use for the next open file descriptor. | 121 | /// Id to use for the next open file descriptor. |
| 53 | u32 next_fd = 1; | 122 | u32 next_fd = 1; |
| @@ -57,9 +126,12 @@ private: | |||
| 57 | 126 | ||
| 58 | /// Mapping of device node names to their implementation. | 127 | /// Mapping of device node names to their implementation. |
| 59 | std::unordered_map<std::string, std::shared_ptr<Devices::nvdevice>> devices; | 128 | std::unordered_map<std::string, std::shared_ptr<Devices::nvdevice>> devices; |
| 129 | |||
| 130 | EventInterface events_interface; | ||
| 60 | }; | 131 | }; |
| 61 | 132 | ||
| 62 | /// Registers all NVDRV services with the specified service manager. | 133 | /// Registers all NVDRV services with the specified service manager. |
| 63 | void InstallInterfaces(SM::ServiceManager& service_manager, NVFlinger::NVFlinger& nvflinger); | 134 | void InstallInterfaces(SM::ServiceManager& service_manager, NVFlinger::NVFlinger& nvflinger, |
| 135 | Core::System& system); | ||
| 64 | 136 | ||
| 65 | } // namespace Service::Nvidia | 137 | } // namespace Service::Nvidia |
diff --git a/src/core/hle/service/nvflinger/buffer_queue.cpp b/src/core/hle/service/nvflinger/buffer_queue.cpp index 5731e815f..e1a07d3ee 100644 --- a/src/core/hle/service/nvflinger/buffer_queue.cpp +++ b/src/core/hle/service/nvflinger/buffer_queue.cpp | |||
| @@ -34,7 +34,8 @@ void BufferQueue::SetPreallocatedBuffer(u32 slot, const IGBPBuffer& igbp_buffer) | |||
| 34 | buffer_wait_event.writable->Signal(); | 34 | buffer_wait_event.writable->Signal(); |
| 35 | } | 35 | } |
| 36 | 36 | ||
| 37 | std::optional<u32> BufferQueue::DequeueBuffer(u32 width, u32 height) { | 37 | std::optional<std::pair<u32, Service::Nvidia::MultiFence*>> BufferQueue::DequeueBuffer(u32 width, |
| 38 | u32 height) { | ||
| 38 | auto itr = std::find_if(queue.begin(), queue.end(), [&](const Buffer& buffer) { | 39 | auto itr = std::find_if(queue.begin(), queue.end(), [&](const Buffer& buffer) { |
| 39 | // Only consider free buffers. Buffers become free once again after they've been Acquired | 40 | // Only consider free buffers. Buffers become free once again after they've been Acquired |
| 40 | // and Released by the compositor, see the NVFlinger::Compose method. | 41 | // and Released by the compositor, see the NVFlinger::Compose method. |
| @@ -51,7 +52,7 @@ std::optional<u32> BufferQueue::DequeueBuffer(u32 width, u32 height) { | |||
| 51 | } | 52 | } |
| 52 | 53 | ||
| 53 | itr->status = Buffer::Status::Dequeued; | 54 | itr->status = Buffer::Status::Dequeued; |
| 54 | return itr->slot; | 55 | return {{itr->slot, &itr->multi_fence}}; |
| 55 | } | 56 | } |
| 56 | 57 | ||
| 57 | const IGBPBuffer& BufferQueue::RequestBuffer(u32 slot) const { | 58 | const IGBPBuffer& BufferQueue::RequestBuffer(u32 slot) const { |
| @@ -63,7 +64,8 @@ const IGBPBuffer& BufferQueue::RequestBuffer(u32 slot) const { | |||
| 63 | } | 64 | } |
| 64 | 65 | ||
| 65 | void BufferQueue::QueueBuffer(u32 slot, BufferTransformFlags transform, | 66 | void BufferQueue::QueueBuffer(u32 slot, BufferTransformFlags transform, |
| 66 | const Common::Rectangle<int>& crop_rect) { | 67 | const Common::Rectangle<int>& crop_rect, u32 swap_interval, |
| 68 | Service::Nvidia::MultiFence& multi_fence) { | ||
| 67 | auto itr = std::find_if(queue.begin(), queue.end(), | 69 | auto itr = std::find_if(queue.begin(), queue.end(), |
| 68 | [&](const Buffer& buffer) { return buffer.slot == slot; }); | 70 | [&](const Buffer& buffer) { return buffer.slot == slot; }); |
| 69 | ASSERT(itr != queue.end()); | 71 | ASSERT(itr != queue.end()); |
| @@ -71,12 +73,21 @@ void BufferQueue::QueueBuffer(u32 slot, BufferTransformFlags transform, | |||
| 71 | itr->status = Buffer::Status::Queued; | 73 | itr->status = Buffer::Status::Queued; |
| 72 | itr->transform = transform; | 74 | itr->transform = transform; |
| 73 | itr->crop_rect = crop_rect; | 75 | itr->crop_rect = crop_rect; |
| 76 | itr->swap_interval = swap_interval; | ||
| 77 | itr->multi_fence = multi_fence; | ||
| 78 | queue_sequence.push_back(slot); | ||
| 74 | } | 79 | } |
| 75 | 80 | ||
| 76 | std::optional<std::reference_wrapper<const BufferQueue::Buffer>> BufferQueue::AcquireBuffer() { | 81 | std::optional<std::reference_wrapper<const BufferQueue::Buffer>> BufferQueue::AcquireBuffer() { |
| 77 | auto itr = std::find_if(queue.begin(), queue.end(), [](const Buffer& buffer) { | 82 | auto itr = queue.end(); |
| 78 | return buffer.status == Buffer::Status::Queued; | 83 | // Iterate to find a queued buffer matching the requested slot. |
| 79 | }); | 84 | while (itr == queue.end() && !queue_sequence.empty()) { |
| 85 | u32 slot = queue_sequence.front(); | ||
| 86 | itr = std::find_if(queue.begin(), queue.end(), [&slot](const Buffer& buffer) { | ||
| 87 | return buffer.status == Buffer::Status::Queued && buffer.slot == slot; | ||
| 88 | }); | ||
| 89 | queue_sequence.pop_front(); | ||
| 90 | } | ||
| 80 | if (itr == queue.end()) | 91 | if (itr == queue.end()) |
| 81 | return {}; | 92 | return {}; |
| 82 | itr->status = Buffer::Status::Acquired; | 93 | itr->status = Buffer::Status::Acquired; |
diff --git a/src/core/hle/service/nvflinger/buffer_queue.h b/src/core/hle/service/nvflinger/buffer_queue.h index e1ccb6171..356bedb81 100644 --- a/src/core/hle/service/nvflinger/buffer_queue.h +++ b/src/core/hle/service/nvflinger/buffer_queue.h | |||
| @@ -4,6 +4,7 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <list> | ||
| 7 | #include <optional> | 8 | #include <optional> |
| 8 | #include <vector> | 9 | #include <vector> |
| 9 | 10 | ||
| @@ -12,6 +13,7 @@ | |||
| 12 | #include "common/swap.h" | 13 | #include "common/swap.h" |
| 13 | #include "core/hle/kernel/object.h" | 14 | #include "core/hle/kernel/object.h" |
| 14 | #include "core/hle/kernel/writable_event.h" | 15 | #include "core/hle/kernel/writable_event.h" |
| 16 | #include "core/hle/service/nvdrv/nvdata.h" | ||
| 15 | 17 | ||
| 16 | namespace Service::NVFlinger { | 18 | namespace Service::NVFlinger { |
| 17 | 19 | ||
| @@ -68,13 +70,17 @@ public: | |||
| 68 | IGBPBuffer igbp_buffer; | 70 | IGBPBuffer igbp_buffer; |
| 69 | BufferTransformFlags transform; | 71 | BufferTransformFlags transform; |
| 70 | Common::Rectangle<int> crop_rect; | 72 | Common::Rectangle<int> crop_rect; |
| 73 | u32 swap_interval; | ||
| 74 | Service::Nvidia::MultiFence multi_fence; | ||
| 71 | }; | 75 | }; |
| 72 | 76 | ||
| 73 | void SetPreallocatedBuffer(u32 slot, const IGBPBuffer& igbp_buffer); | 77 | void SetPreallocatedBuffer(u32 slot, const IGBPBuffer& igbp_buffer); |
| 74 | std::optional<u32> DequeueBuffer(u32 width, u32 height); | 78 | std::optional<std::pair<u32, Service::Nvidia::MultiFence*>> DequeueBuffer(u32 width, |
| 79 | u32 height); | ||
| 75 | const IGBPBuffer& RequestBuffer(u32 slot) const; | 80 | const IGBPBuffer& RequestBuffer(u32 slot) const; |
| 76 | void QueueBuffer(u32 slot, BufferTransformFlags transform, | 81 | void QueueBuffer(u32 slot, BufferTransformFlags transform, |
| 77 | const Common::Rectangle<int>& crop_rect); | 82 | const Common::Rectangle<int>& crop_rect, u32 swap_interval, |
| 83 | Service::Nvidia::MultiFence& multi_fence); | ||
| 78 | std::optional<std::reference_wrapper<const Buffer>> AcquireBuffer(); | 84 | std::optional<std::reference_wrapper<const Buffer>> AcquireBuffer(); |
| 79 | void ReleaseBuffer(u32 slot); | 85 | void ReleaseBuffer(u32 slot); |
| 80 | u32 Query(QueryType type); | 86 | u32 Query(QueryType type); |
| @@ -92,6 +98,7 @@ private: | |||
| 92 | u64 layer_id; | 98 | u64 layer_id; |
| 93 | 99 | ||
| 94 | std::vector<Buffer> queue; | 100 | std::vector<Buffer> queue; |
| 101 | std::list<u32> queue_sequence; | ||
| 95 | Kernel::EventPair buffer_wait_event; | 102 | Kernel::EventPair buffer_wait_event; |
| 96 | }; | 103 | }; |
| 97 | 104 | ||
diff --git a/src/core/hle/service/nvflinger/nvflinger.cpp b/src/core/hle/service/nvflinger/nvflinger.cpp index 3c5c53e24..f9db79370 100644 --- a/src/core/hle/service/nvflinger/nvflinger.cpp +++ b/src/core/hle/service/nvflinger/nvflinger.cpp | |||
| @@ -37,15 +37,14 @@ NVFlinger::NVFlinger(Core::Timing::CoreTiming& core_timing) : core_timing{core_t | |||
| 37 | displays.emplace_back(4, "Null"); | 37 | displays.emplace_back(4, "Null"); |
| 38 | 38 | ||
| 39 | // Schedule the screen composition events | 39 | // Schedule the screen composition events |
| 40 | const auto ticks = Settings::values.force_30fps_mode ? frame_ticks_30fps : frame_ticks; | 40 | composition_event = core_timing.RegisterEvent("ScreenComposition", [this](u64 userdata, |
| 41 | 41 | s64 cycles_late) { | |
| 42 | composition_event = core_timing.RegisterEvent( | 42 | Compose(); |
| 43 | "ScreenComposition", [this, ticks](u64 userdata, s64 cycles_late) { | 43 | const auto ticks = Settings::values.force_30fps_mode ? frame_ticks_30fps : GetNextTicks(); |
| 44 | Compose(); | 44 | this->core_timing.ScheduleEvent(std::max<s64>(0LL, ticks - cycles_late), composition_event); |
| 45 | this->core_timing.ScheduleEvent(ticks - cycles_late, composition_event); | 45 | }); |
| 46 | }); | 46 | |
| 47 | 47 | core_timing.ScheduleEvent(frame_ticks, composition_event); | |
| 48 | core_timing.ScheduleEvent(ticks, composition_event); | ||
| 49 | } | 48 | } |
| 50 | 49 | ||
| 51 | NVFlinger::~NVFlinger() { | 50 | NVFlinger::~NVFlinger() { |
| @@ -206,8 +205,14 @@ void NVFlinger::Compose() { | |||
| 206 | igbp_buffer.width, igbp_buffer.height, igbp_buffer.stride, | 205 | igbp_buffer.width, igbp_buffer.height, igbp_buffer.stride, |
| 207 | buffer->get().transform, buffer->get().crop_rect); | 206 | buffer->get().transform, buffer->get().crop_rect); |
| 208 | 207 | ||
| 208 | swap_interval = buffer->get().swap_interval; | ||
| 209 | buffer_queue.ReleaseBuffer(buffer->get().slot); | 209 | buffer_queue.ReleaseBuffer(buffer->get().slot); |
| 210 | } | 210 | } |
| 211 | } | 211 | } |
| 212 | 212 | ||
| 213 | s64 NVFlinger::GetNextTicks() const { | ||
| 214 | constexpr s64 max_hertz = 120LL; | ||
| 215 | return (Core::Timing::BASE_CLOCK_RATE * (1LL << swap_interval)) / max_hertz; | ||
| 216 | } | ||
| 217 | |||
| 213 | } // namespace Service::NVFlinger | 218 | } // namespace Service::NVFlinger |
diff --git a/src/core/hle/service/nvflinger/nvflinger.h b/src/core/hle/service/nvflinger/nvflinger.h index c0a83fffb..988be8726 100644 --- a/src/core/hle/service/nvflinger/nvflinger.h +++ b/src/core/hle/service/nvflinger/nvflinger.h | |||
| @@ -74,6 +74,8 @@ public: | |||
| 74 | /// finished. | 74 | /// finished. |
| 75 | void Compose(); | 75 | void Compose(); |
| 76 | 76 | ||
| 77 | s64 GetNextTicks() const; | ||
| 78 | |||
| 77 | private: | 79 | private: |
| 78 | /// Finds the display identified by the specified ID. | 80 | /// Finds the display identified by the specified ID. |
| 79 | VI::Display* FindDisplay(u64 display_id); | 81 | VI::Display* FindDisplay(u64 display_id); |
| @@ -98,6 +100,8 @@ private: | |||
| 98 | /// layers. | 100 | /// layers. |
| 99 | u32 next_buffer_queue_id = 1; | 101 | u32 next_buffer_queue_id = 1; |
| 100 | 102 | ||
| 103 | u32 swap_interval = 1; | ||
| 104 | |||
| 101 | /// Event that handles screen composition. | 105 | /// Event that handles screen composition. |
| 102 | Core::Timing::EventType* composition_event; | 106 | Core::Timing::EventType* composition_event; |
| 103 | 107 | ||
diff --git a/src/core/hle/service/pm/pm.cpp b/src/core/hle/service/pm/pm.cpp index ebcc41a43..fe6b5f798 100644 --- a/src/core/hle/service/pm/pm.cpp +++ b/src/core/hle/service/pm/pm.cpp | |||
| @@ -3,11 +3,44 @@ | |||
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include "core/hle/ipc_helpers.h" | 5 | #include "core/hle/ipc_helpers.h" |
| 6 | #include "core/hle/kernel/kernel.h" | ||
| 7 | #include "core/hle/kernel/process.h" | ||
| 6 | #include "core/hle/service/pm/pm.h" | 8 | #include "core/hle/service/pm/pm.h" |
| 7 | #include "core/hle/service/service.h" | 9 | #include "core/hle/service/service.h" |
| 8 | 10 | ||
| 9 | namespace Service::PM { | 11 | namespace Service::PM { |
| 10 | 12 | ||
| 13 | namespace { | ||
| 14 | |||
| 15 | constexpr ResultCode ERROR_PROCESS_NOT_FOUND{ErrorModule::PM, 1}; | ||
| 16 | |||
| 17 | constexpr u64 NO_PROCESS_FOUND_PID{0}; | ||
| 18 | |||
| 19 | std::optional<Kernel::SharedPtr<Kernel::Process>> SearchProcessList( | ||
| 20 | const std::vector<Kernel::SharedPtr<Kernel::Process>>& process_list, | ||
| 21 | std::function<bool(const Kernel::SharedPtr<Kernel::Process>&)> predicate) { | ||
| 22 | const auto iter = std::find_if(process_list.begin(), process_list.end(), predicate); | ||
| 23 | |||
| 24 | if (iter == process_list.end()) { | ||
| 25 | return std::nullopt; | ||
| 26 | } | ||
| 27 | |||
| 28 | return *iter; | ||
| 29 | } | ||
| 30 | |||
| 31 | void GetApplicationPidGeneric(Kernel::HLERequestContext& ctx, | ||
| 32 | const std::vector<Kernel::SharedPtr<Kernel::Process>>& process_list) { | ||
| 33 | const auto process = SearchProcessList(process_list, [](const auto& process) { | ||
| 34 | return process->GetProcessID() == Kernel::Process::ProcessIDMin; | ||
| 35 | }); | ||
| 36 | |||
| 37 | IPC::ResponseBuilder rb{ctx, 4}; | ||
| 38 | rb.Push(RESULT_SUCCESS); | ||
| 39 | rb.Push(process.has_value() ? (*process)->GetProcessID() : NO_PROCESS_FOUND_PID); | ||
| 40 | } | ||
| 41 | |||
| 42 | } // Anonymous namespace | ||
| 43 | |||
| 11 | class BootMode final : public ServiceFramework<BootMode> { | 44 | class BootMode final : public ServiceFramework<BootMode> { |
| 12 | public: | 45 | public: |
| 13 | explicit BootMode() : ServiceFramework{"pm:bm"} { | 46 | explicit BootMode() : ServiceFramework{"pm:bm"} { |
| @@ -41,14 +74,15 @@ private: | |||
| 41 | 74 | ||
| 42 | class DebugMonitor final : public ServiceFramework<DebugMonitor> { | 75 | class DebugMonitor final : public ServiceFramework<DebugMonitor> { |
| 43 | public: | 76 | public: |
| 44 | explicit DebugMonitor() : ServiceFramework{"pm:dmnt"} { | 77 | explicit DebugMonitor(const Kernel::KernelCore& kernel) |
| 78 | : ServiceFramework{"pm:dmnt"}, kernel(kernel) { | ||
| 45 | // clang-format off | 79 | // clang-format off |
| 46 | static const FunctionInfo functions[] = { | 80 | static const FunctionInfo functions[] = { |
| 47 | {0, nullptr, "GetDebugProcesses"}, | 81 | {0, nullptr, "GetDebugProcesses"}, |
| 48 | {1, nullptr, "StartDebugProcess"}, | 82 | {1, nullptr, "StartDebugProcess"}, |
| 49 | {2, nullptr, "GetTitlePid"}, | 83 | {2, &DebugMonitor::GetTitlePid, "GetTitlePid"}, |
| 50 | {3, nullptr, "EnableDebugForTitleId"}, | 84 | {3, nullptr, "EnableDebugForTitleId"}, |
| 51 | {4, nullptr, "GetApplicationPid"}, | 85 | {4, &DebugMonitor::GetApplicationPid, "GetApplicationPid"}, |
| 52 | {5, nullptr, "EnableDebugForApplication"}, | 86 | {5, nullptr, "EnableDebugForApplication"}, |
| 53 | {6, nullptr, "DisableDebug"}, | 87 | {6, nullptr, "DisableDebug"}, |
| 54 | }; | 88 | }; |
| @@ -56,21 +90,77 @@ public: | |||
| 56 | 90 | ||
| 57 | RegisterHandlers(functions); | 91 | RegisterHandlers(functions); |
| 58 | } | 92 | } |
| 93 | |||
| 94 | private: | ||
| 95 | void GetTitlePid(Kernel::HLERequestContext& ctx) { | ||
| 96 | IPC::RequestParser rp{ctx}; | ||
| 97 | const auto title_id = rp.PopRaw<u64>(); | ||
| 98 | |||
| 99 | LOG_DEBUG(Service_PM, "called, title_id={:016X}", title_id); | ||
| 100 | |||
| 101 | const auto process = | ||
| 102 | SearchProcessList(kernel.GetProcessList(), [title_id](const auto& process) { | ||
| 103 | return process->GetTitleID() == title_id; | ||
| 104 | }); | ||
| 105 | |||
| 106 | if (!process.has_value()) { | ||
| 107 | IPC::ResponseBuilder rb{ctx, 2}; | ||
| 108 | rb.Push(ERROR_PROCESS_NOT_FOUND); | ||
| 109 | return; | ||
| 110 | } | ||
| 111 | |||
| 112 | IPC::ResponseBuilder rb{ctx, 4}; | ||
| 113 | rb.Push(RESULT_SUCCESS); | ||
| 114 | rb.Push((*process)->GetProcessID()); | ||
| 115 | } | ||
| 116 | |||
| 117 | void GetApplicationPid(Kernel::HLERequestContext& ctx) { | ||
| 118 | LOG_DEBUG(Service_PM, "called"); | ||
| 119 | GetApplicationPidGeneric(ctx, kernel.GetProcessList()); | ||
| 120 | } | ||
| 121 | |||
| 122 | const Kernel::KernelCore& kernel; | ||
| 59 | }; | 123 | }; |
| 60 | 124 | ||
| 61 | class Info final : public ServiceFramework<Info> { | 125 | class Info final : public ServiceFramework<Info> { |
| 62 | public: | 126 | public: |
| 63 | explicit Info() : ServiceFramework{"pm:info"} { | 127 | explicit Info(const std::vector<Kernel::SharedPtr<Kernel::Process>>& process_list) |
| 128 | : ServiceFramework{"pm:info"}, process_list(process_list) { | ||
| 64 | static const FunctionInfo functions[] = { | 129 | static const FunctionInfo functions[] = { |
| 65 | {0, nullptr, "GetTitleId"}, | 130 | {0, &Info::GetTitleId, "GetTitleId"}, |
| 66 | }; | 131 | }; |
| 67 | RegisterHandlers(functions); | 132 | RegisterHandlers(functions); |
| 68 | } | 133 | } |
| 134 | |||
| 135 | private: | ||
| 136 | void GetTitleId(Kernel::HLERequestContext& ctx) { | ||
| 137 | IPC::RequestParser rp{ctx}; | ||
| 138 | const auto process_id = rp.PopRaw<u64>(); | ||
| 139 | |||
| 140 | LOG_DEBUG(Service_PM, "called, process_id={:016X}", process_id); | ||
| 141 | |||
| 142 | const auto process = SearchProcessList(process_list, [process_id](const auto& process) { | ||
| 143 | return process->GetProcessID() == process_id; | ||
| 144 | }); | ||
| 145 | |||
| 146 | if (!process.has_value()) { | ||
| 147 | IPC::ResponseBuilder rb{ctx, 2}; | ||
| 148 | rb.Push(ERROR_PROCESS_NOT_FOUND); | ||
| 149 | return; | ||
| 150 | } | ||
| 151 | |||
| 152 | IPC::ResponseBuilder rb{ctx, 4}; | ||
| 153 | rb.Push(RESULT_SUCCESS); | ||
| 154 | rb.Push((*process)->GetTitleID()); | ||
| 155 | } | ||
| 156 | |||
| 157 | const std::vector<Kernel::SharedPtr<Kernel::Process>>& process_list; | ||
| 69 | }; | 158 | }; |
| 70 | 159 | ||
| 71 | class Shell final : public ServiceFramework<Shell> { | 160 | class Shell final : public ServiceFramework<Shell> { |
| 72 | public: | 161 | public: |
| 73 | explicit Shell() : ServiceFramework{"pm:shell"} { | 162 | explicit Shell(const Kernel::KernelCore& kernel) |
| 163 | : ServiceFramework{"pm:shell"}, kernel(kernel) { | ||
| 74 | // clang-format off | 164 | // clang-format off |
| 75 | static const FunctionInfo functions[] = { | 165 | static const FunctionInfo functions[] = { |
| 76 | {0, nullptr, "LaunchProcess"}, | 166 | {0, nullptr, "LaunchProcess"}, |
| @@ -79,21 +169,31 @@ public: | |||
| 79 | {3, nullptr, "GetProcessEventWaiter"}, | 169 | {3, nullptr, "GetProcessEventWaiter"}, |
| 80 | {4, nullptr, "GetProcessEventType"}, | 170 | {4, nullptr, "GetProcessEventType"}, |
| 81 | {5, nullptr, "NotifyBootFinished"}, | 171 | {5, nullptr, "NotifyBootFinished"}, |
| 82 | {6, nullptr, "GetApplicationPid"}, | 172 | {6, &Shell::GetApplicationPid, "GetApplicationPid"}, |
| 83 | {7, nullptr, "BoostSystemMemoryResourceLimit"}, | 173 | {7, nullptr, "BoostSystemMemoryResourceLimit"}, |
| 84 | {8, nullptr, "EnableAdditionalSystemThreads"}, | 174 | {8, nullptr, "EnableAdditionalSystemThreads"}, |
| 175 | {9, nullptr, "GetUnimplementedEventHandle"}, | ||
| 85 | }; | 176 | }; |
| 86 | // clang-format on | 177 | // clang-format on |
| 87 | 178 | ||
| 88 | RegisterHandlers(functions); | 179 | RegisterHandlers(functions); |
| 89 | } | 180 | } |
| 181 | |||
| 182 | private: | ||
| 183 | void GetApplicationPid(Kernel::HLERequestContext& ctx) { | ||
| 184 | LOG_DEBUG(Service_PM, "called"); | ||
| 185 | GetApplicationPidGeneric(ctx, kernel.GetProcessList()); | ||
| 186 | } | ||
| 187 | |||
| 188 | const Kernel::KernelCore& kernel; | ||
| 90 | }; | 189 | }; |
| 91 | 190 | ||
| 92 | void InstallInterfaces(SM::ServiceManager& sm) { | 191 | void InstallInterfaces(Core::System& system) { |
| 93 | std::make_shared<BootMode>()->InstallAsService(sm); | 192 | std::make_shared<BootMode>()->InstallAsService(system.ServiceManager()); |
| 94 | std::make_shared<DebugMonitor>()->InstallAsService(sm); | 193 | std::make_shared<DebugMonitor>(system.Kernel())->InstallAsService(system.ServiceManager()); |
| 95 | std::make_shared<Info>()->InstallAsService(sm); | 194 | std::make_shared<Info>(system.Kernel().GetProcessList()) |
| 96 | std::make_shared<Shell>()->InstallAsService(sm); | 195 | ->InstallAsService(system.ServiceManager()); |
| 196 | std::make_shared<Shell>(system.Kernel())->InstallAsService(system.ServiceManager()); | ||
| 97 | } | 197 | } |
| 98 | 198 | ||
| 99 | } // namespace Service::PM | 199 | } // namespace Service::PM |
diff --git a/src/core/hle/service/pm/pm.h b/src/core/hle/service/pm/pm.h index cc8d3f215..852e7050c 100644 --- a/src/core/hle/service/pm/pm.h +++ b/src/core/hle/service/pm/pm.h | |||
| @@ -4,8 +4,8 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | namespace Service::SM { | 7 | namespace Core { |
| 8 | class ServiceManager; | 8 | class System; |
| 9 | } | 9 | } |
| 10 | 10 | ||
| 11 | namespace Service::PM { | 11 | namespace Service::PM { |
| @@ -16,6 +16,6 @@ enum class SystemBootMode { | |||
| 16 | }; | 16 | }; |
| 17 | 17 | ||
| 18 | /// Registers all PM services with the specified service manager. | 18 | /// Registers all PM services with the specified service manager. |
| 19 | void InstallInterfaces(SM::ServiceManager& service_manager); | 19 | void InstallInterfaces(Core::System& system); |
| 20 | 20 | ||
| 21 | } // namespace Service::PM | 21 | } // namespace Service::PM |
diff --git a/src/core/hle/service/service.cpp b/src/core/hle/service/service.cpp index 952c03e27..3a0f8c3f6 100644 --- a/src/core/hle/service/service.cpp +++ b/src/core/hle/service/service.cpp | |||
| @@ -206,7 +206,7 @@ void Init(std::shared_ptr<SM::ServiceManager>& sm, Core::System& system) { | |||
| 206 | AM::InstallInterfaces(*sm, nv_flinger, system); | 206 | AM::InstallInterfaces(*sm, nv_flinger, system); |
| 207 | AOC::InstallInterfaces(*sm); | 207 | AOC::InstallInterfaces(*sm); |
| 208 | APM::InstallInterfaces(system); | 208 | APM::InstallInterfaces(system); |
| 209 | Audio::InstallInterfaces(*sm); | 209 | Audio::InstallInterfaces(*sm, system); |
| 210 | BCAT::InstallInterfaces(*sm); | 210 | BCAT::InstallInterfaces(*sm); |
| 211 | BPC::InstallInterfaces(*sm); | 211 | BPC::InstallInterfaces(*sm); |
| 212 | BtDrv::InstallInterfaces(*sm); | 212 | BtDrv::InstallInterfaces(*sm); |
| @@ -236,12 +236,12 @@ void Init(std::shared_ptr<SM::ServiceManager>& sm, Core::System& system) { | |||
| 236 | NIM::InstallInterfaces(*sm); | 236 | NIM::InstallInterfaces(*sm); |
| 237 | NPNS::InstallInterfaces(*sm); | 237 | NPNS::InstallInterfaces(*sm); |
| 238 | NS::InstallInterfaces(*sm); | 238 | NS::InstallInterfaces(*sm); |
| 239 | Nvidia::InstallInterfaces(*sm, *nv_flinger); | 239 | Nvidia::InstallInterfaces(*sm, *nv_flinger, system); |
| 240 | PCIe::InstallInterfaces(*sm); | 240 | PCIe::InstallInterfaces(*sm); |
| 241 | PCTL::InstallInterfaces(*sm); | 241 | PCTL::InstallInterfaces(*sm); |
| 242 | PCV::InstallInterfaces(*sm); | 242 | PCV::InstallInterfaces(*sm); |
| 243 | PlayReport::InstallInterfaces(*sm); | 243 | PlayReport::InstallInterfaces(*sm); |
| 244 | PM::InstallInterfaces(*sm); | 244 | PM::InstallInterfaces(system); |
| 245 | PSC::InstallInterfaces(*sm); | 245 | PSC::InstallInterfaces(*sm); |
| 246 | PSM::InstallInterfaces(*sm); | 246 | PSM::InstallInterfaces(*sm); |
| 247 | Set::InstallInterfaces(*sm); | 247 | Set::InstallInterfaces(*sm); |
diff --git a/src/core/hle/service/vi/vi.cpp b/src/core/hle/service/vi/vi.cpp index f1fa6ccd1..199b30635 100644 --- a/src/core/hle/service/vi/vi.cpp +++ b/src/core/hle/service/vi/vi.cpp | |||
| @@ -21,6 +21,7 @@ | |||
| 21 | #include "core/hle/kernel/readable_event.h" | 21 | #include "core/hle/kernel/readable_event.h" |
| 22 | #include "core/hle/kernel/thread.h" | 22 | #include "core/hle/kernel/thread.h" |
| 23 | #include "core/hle/kernel/writable_event.h" | 23 | #include "core/hle/kernel/writable_event.h" |
| 24 | #include "core/hle/service/nvdrv/nvdata.h" | ||
| 24 | #include "core/hle/service/nvdrv/nvdrv.h" | 25 | #include "core/hle/service/nvdrv/nvdrv.h" |
| 25 | #include "core/hle/service/nvflinger/buffer_queue.h" | 26 | #include "core/hle/service/nvflinger/buffer_queue.h" |
| 26 | #include "core/hle/service/nvflinger/nvflinger.h" | 27 | #include "core/hle/service/nvflinger/nvflinger.h" |
| @@ -328,32 +329,22 @@ public: | |||
| 328 | Data data; | 329 | Data data; |
| 329 | }; | 330 | }; |
| 330 | 331 | ||
| 331 | struct BufferProducerFence { | ||
| 332 | u32 is_valid; | ||
| 333 | std::array<Nvidia::IoctlFence, 4> fences; | ||
| 334 | }; | ||
| 335 | static_assert(sizeof(BufferProducerFence) == 36, "BufferProducerFence has wrong size"); | ||
| 336 | |||
| 337 | class IGBPDequeueBufferResponseParcel : public Parcel { | 332 | class IGBPDequeueBufferResponseParcel : public Parcel { |
| 338 | public: | 333 | public: |
| 339 | explicit IGBPDequeueBufferResponseParcel(u32 slot) : slot(slot) {} | 334 | explicit IGBPDequeueBufferResponseParcel(u32 slot, Service::Nvidia::MultiFence& multi_fence) |
| 335 | : slot(slot), multi_fence(multi_fence) {} | ||
| 340 | ~IGBPDequeueBufferResponseParcel() override = default; | 336 | ~IGBPDequeueBufferResponseParcel() override = default; |
| 341 | 337 | ||
| 342 | protected: | 338 | protected: |
| 343 | void SerializeData() override { | 339 | void SerializeData() override { |
| 344 | // TODO(Subv): Find out how this Fence is used. | ||
| 345 | BufferProducerFence fence = {}; | ||
| 346 | fence.is_valid = 1; | ||
| 347 | for (auto& fence_ : fence.fences) | ||
| 348 | fence_.id = -1; | ||
| 349 | |||
| 350 | Write(slot); | 340 | Write(slot); |
| 351 | Write<u32_le>(1); | 341 | Write<u32_le>(1); |
| 352 | WriteObject(fence); | 342 | WriteObject(multi_fence); |
| 353 | Write<u32_le>(0); | 343 | Write<u32_le>(0); |
| 354 | } | 344 | } |
| 355 | 345 | ||
| 356 | u32_le slot; | 346 | u32_le slot; |
| 347 | Service::Nvidia::MultiFence multi_fence; | ||
| 357 | }; | 348 | }; |
| 358 | 349 | ||
| 359 | class IGBPRequestBufferRequestParcel : public Parcel { | 350 | class IGBPRequestBufferRequestParcel : public Parcel { |
| @@ -400,12 +391,6 @@ public: | |||
| 400 | data = Read<Data>(); | 391 | data = Read<Data>(); |
| 401 | } | 392 | } |
| 402 | 393 | ||
| 403 | struct Fence { | ||
| 404 | u32_le id; | ||
| 405 | u32_le value; | ||
| 406 | }; | ||
| 407 | static_assert(sizeof(Fence) == 8, "Fence has wrong size"); | ||
| 408 | |||
| 409 | struct Data { | 394 | struct Data { |
| 410 | u32_le slot; | 395 | u32_le slot; |
| 411 | INSERT_PADDING_WORDS(3); | 396 | INSERT_PADDING_WORDS(3); |
| @@ -418,15 +403,15 @@ public: | |||
| 418 | s32_le scaling_mode; | 403 | s32_le scaling_mode; |
| 419 | NVFlinger::BufferQueue::BufferTransformFlags transform; | 404 | NVFlinger::BufferQueue::BufferTransformFlags transform; |
| 420 | u32_le sticky_transform; | 405 | u32_le sticky_transform; |
| 421 | INSERT_PADDING_WORDS(2); | 406 | INSERT_PADDING_WORDS(1); |
| 422 | u32_le fence_is_valid; | 407 | u32_le swap_interval; |
| 423 | std::array<Fence, 2> fences; | 408 | Service::Nvidia::MultiFence multi_fence; |
| 424 | 409 | ||
| 425 | Common::Rectangle<int> GetCropRect() const { | 410 | Common::Rectangle<int> GetCropRect() const { |
| 426 | return {crop_left, crop_top, crop_right, crop_bottom}; | 411 | return {crop_left, crop_top, crop_right, crop_bottom}; |
| 427 | } | 412 | } |
| 428 | }; | 413 | }; |
| 429 | static_assert(sizeof(Data) == 80, "ParcelData has wrong size"); | 414 | static_assert(sizeof(Data) == 96, "ParcelData has wrong size"); |
| 430 | 415 | ||
| 431 | Data data; | 416 | Data data; |
| 432 | }; | 417 | }; |
| @@ -547,11 +532,11 @@ private: | |||
| 547 | IGBPDequeueBufferRequestParcel request{ctx.ReadBuffer()}; | 532 | IGBPDequeueBufferRequestParcel request{ctx.ReadBuffer()}; |
| 548 | const u32 width{request.data.width}; | 533 | const u32 width{request.data.width}; |
| 549 | const u32 height{request.data.height}; | 534 | const u32 height{request.data.height}; |
| 550 | std::optional<u32> slot = buffer_queue.DequeueBuffer(width, height); | 535 | auto result = buffer_queue.DequeueBuffer(width, height); |
| 551 | 536 | ||
| 552 | if (slot) { | 537 | if (result) { |
| 553 | // Buffer is available | 538 | // Buffer is available |
| 554 | IGBPDequeueBufferResponseParcel response{*slot}; | 539 | IGBPDequeueBufferResponseParcel response{result->first, *result->second}; |
| 555 | ctx.WriteBuffer(response.Serialize()); | 540 | ctx.WriteBuffer(response.Serialize()); |
| 556 | } else { | 541 | } else { |
| 557 | // Wait the current thread until a buffer becomes available | 542 | // Wait the current thread until a buffer becomes available |
| @@ -561,10 +546,10 @@ private: | |||
| 561 | Kernel::ThreadWakeupReason reason) { | 546 | Kernel::ThreadWakeupReason reason) { |
| 562 | // Repeat TransactParcel DequeueBuffer when a buffer is available | 547 | // Repeat TransactParcel DequeueBuffer when a buffer is available |
| 563 | auto& buffer_queue = nv_flinger->FindBufferQueue(id); | 548 | auto& buffer_queue = nv_flinger->FindBufferQueue(id); |
| 564 | std::optional<u32> slot = buffer_queue.DequeueBuffer(width, height); | 549 | auto result = buffer_queue.DequeueBuffer(width, height); |
| 565 | ASSERT_MSG(slot != std::nullopt, "Could not dequeue buffer."); | 550 | ASSERT_MSG(result != std::nullopt, "Could not dequeue buffer."); |
| 566 | 551 | ||
| 567 | IGBPDequeueBufferResponseParcel response{*slot}; | 552 | IGBPDequeueBufferResponseParcel response{result->first, *result->second}; |
| 568 | ctx.WriteBuffer(response.Serialize()); | 553 | ctx.WriteBuffer(response.Serialize()); |
| 569 | IPC::ResponseBuilder rb{ctx, 2}; | 554 | IPC::ResponseBuilder rb{ctx, 2}; |
| 570 | rb.Push(RESULT_SUCCESS); | 555 | rb.Push(RESULT_SUCCESS); |
| @@ -582,7 +567,8 @@ private: | |||
| 582 | IGBPQueueBufferRequestParcel request{ctx.ReadBuffer()}; | 567 | IGBPQueueBufferRequestParcel request{ctx.ReadBuffer()}; |
| 583 | 568 | ||
| 584 | buffer_queue.QueueBuffer(request.data.slot, request.data.transform, | 569 | buffer_queue.QueueBuffer(request.data.slot, request.data.transform, |
| 585 | request.data.GetCropRect()); | 570 | request.data.GetCropRect(), request.data.swap_interval, |
| 571 | request.data.multi_fence); | ||
| 586 | 572 | ||
| 587 | IGBPQueueBufferResponseParcel response{1280, 720}; | 573 | IGBPQueueBufferResponseParcel response{1280, 720}; |
| 588 | ctx.WriteBuffer(response.Serialize()); | 574 | ctx.WriteBuffer(response.Serialize()); |
diff --git a/src/core/loader/elf.cpp b/src/core/loader/elf.cpp index 6d4b02375..f1795fdd6 100644 --- a/src/core/loader/elf.cpp +++ b/src/core/loader/elf.cpp | |||
| @@ -295,7 +295,7 @@ Kernel::CodeSet ElfReader::LoadInto(VAddr vaddr) { | |||
| 295 | } | 295 | } |
| 296 | } | 296 | } |
| 297 | 297 | ||
| 298 | std::vector<u8> program_image(total_image_size); | 298 | Kernel::PhysicalMemory program_image(total_image_size); |
| 299 | std::size_t current_image_position = 0; | 299 | std::size_t current_image_position = 0; |
| 300 | 300 | ||
| 301 | Kernel::CodeSet codeset; | 301 | Kernel::CodeSet codeset; |
diff --git a/src/core/loader/kip.cpp b/src/core/loader/kip.cpp index 70051c13a..474b55cb1 100644 --- a/src/core/loader/kip.cpp +++ b/src/core/loader/kip.cpp | |||
| @@ -69,7 +69,7 @@ AppLoader::LoadResult AppLoader_KIP::Load(Kernel::Process& process) { | |||
| 69 | 69 | ||
| 70 | const VAddr base_address = process.VMManager().GetCodeRegionBaseAddress(); | 70 | const VAddr base_address = process.VMManager().GetCodeRegionBaseAddress(); |
| 71 | Kernel::CodeSet codeset; | 71 | Kernel::CodeSet codeset; |
| 72 | std::vector<u8> program_image; | 72 | Kernel::PhysicalMemory program_image; |
| 73 | 73 | ||
| 74 | const auto load_segment = [&program_image](Kernel::CodeSet::Segment& segment, | 74 | const auto load_segment = [&program_image](Kernel::CodeSet::Segment& segment, |
| 75 | const std::vector<u8>& data, u32 offset) { | 75 | const std::vector<u8>& data, u32 offset) { |
diff --git a/src/core/loader/nro.cpp b/src/core/loader/nro.cpp index 6a0ca389b..e92e2e06e 100644 --- a/src/core/loader/nro.cpp +++ b/src/core/loader/nro.cpp | |||
| @@ -143,7 +143,7 @@ static bool LoadNroImpl(Kernel::Process& process, const std::vector<u8>& data, | |||
| 143 | } | 143 | } |
| 144 | 144 | ||
| 145 | // Build program image | 145 | // Build program image |
| 146 | std::vector<u8> program_image(PageAlignSize(nro_header.file_size)); | 146 | Kernel::PhysicalMemory program_image(PageAlignSize(nro_header.file_size)); |
| 147 | std::memcpy(program_image.data(), data.data(), program_image.size()); | 147 | std::memcpy(program_image.data(), data.data(), program_image.size()); |
| 148 | if (program_image.size() != PageAlignSize(nro_header.file_size)) { | 148 | if (program_image.size() != PageAlignSize(nro_header.file_size)) { |
| 149 | return {}; | 149 | return {}; |
diff --git a/src/core/loader/nso.cpp b/src/core/loader/nso.cpp index 29311404a..70c90109f 100644 --- a/src/core/loader/nso.cpp +++ b/src/core/loader/nso.cpp | |||
| @@ -89,7 +89,7 @@ std::optional<VAddr> AppLoader_NSO::LoadModule(Kernel::Process& process, | |||
| 89 | 89 | ||
| 90 | // Build program image | 90 | // Build program image |
| 91 | Kernel::CodeSet codeset; | 91 | Kernel::CodeSet codeset; |
| 92 | std::vector<u8> program_image; | 92 | Kernel::PhysicalMemory program_image; |
| 93 | for (std::size_t i = 0; i < nso_header.segments.size(); ++i) { | 93 | for (std::size_t i = 0; i < nso_header.segments.size(); ++i) { |
| 94 | std::vector<u8> data = | 94 | std::vector<u8> data = |
| 95 | file.ReadBytes(nso_header.segments_compressed_size[i], nso_header.segments[i].offset); | 95 | file.ReadBytes(nso_header.segments_compressed_size[i], nso_header.segments[i].offset); |
diff --git a/src/core/settings.cpp b/src/core/settings.cpp index 63aa59690..0dd1632ac 100644 --- a/src/core/settings.cpp +++ b/src/core/settings.cpp | |||
| @@ -85,7 +85,6 @@ void LogSettings() { | |||
| 85 | LogSetting("System_RngSeed", Settings::values.rng_seed.value_or(0)); | 85 | LogSetting("System_RngSeed", Settings::values.rng_seed.value_or(0)); |
| 86 | LogSetting("System_CurrentUser", Settings::values.current_user); | 86 | LogSetting("System_CurrentUser", Settings::values.current_user); |
| 87 | LogSetting("System_LanguageIndex", Settings::values.language_index); | 87 | LogSetting("System_LanguageIndex", Settings::values.language_index); |
| 88 | LogSetting("Core_CpuJitEnabled", Settings::values.cpu_jit_enabled); | ||
| 89 | LogSetting("Core_UseMultiCore", Settings::values.use_multi_core); | 88 | LogSetting("Core_UseMultiCore", Settings::values.use_multi_core); |
| 90 | LogSetting("Renderer_UseResolutionFactor", Settings::values.resolution_factor); | 89 | LogSetting("Renderer_UseResolutionFactor", Settings::values.resolution_factor); |
| 91 | LogSetting("Renderer_UseFrameLimit", Settings::values.use_frame_limit); | 90 | LogSetting("Renderer_UseFrameLimit", Settings::values.use_frame_limit); |
diff --git a/src/core/settings.h b/src/core/settings.h index acf18d653..6638ce8f9 100644 --- a/src/core/settings.h +++ b/src/core/settings.h | |||
| @@ -378,7 +378,6 @@ struct Values { | |||
| 378 | std::atomic_bool is_device_reload_pending{true}; | 378 | std::atomic_bool is_device_reload_pending{true}; |
| 379 | 379 | ||
| 380 | // Core | 380 | // Core |
| 381 | bool cpu_jit_enabled; | ||
| 382 | bool use_multi_core; | 381 | bool use_multi_core; |
| 383 | 382 | ||
| 384 | // Data Storage | 383 | // Data Storage |
diff --git a/src/core/telemetry_session.cpp b/src/core/telemetry_session.cpp index 98f49042a..793d102d3 100644 --- a/src/core/telemetry_session.cpp +++ b/src/core/telemetry_session.cpp | |||
| @@ -168,7 +168,6 @@ void TelemetrySession::AddInitialInfo(Loader::AppLoader& app_loader) { | |||
| 168 | AddField(Telemetry::FieldType::UserConfig, "Audio_SinkId", Settings::values.sink_id); | 168 | AddField(Telemetry::FieldType::UserConfig, "Audio_SinkId", Settings::values.sink_id); |
| 169 | AddField(Telemetry::FieldType::UserConfig, "Audio_EnableAudioStretching", | 169 | AddField(Telemetry::FieldType::UserConfig, "Audio_EnableAudioStretching", |
| 170 | Settings::values.enable_audio_stretching); | 170 | Settings::values.enable_audio_stretching); |
| 171 | AddField(Telemetry::FieldType::UserConfig, "Core_UseCpuJit", Settings::values.cpu_jit_enabled); | ||
| 172 | AddField(Telemetry::FieldType::UserConfig, "Core_UseMultiCore", | 171 | AddField(Telemetry::FieldType::UserConfig, "Core_UseMultiCore", |
| 173 | Settings::values.use_multi_core); | 172 | Settings::values.use_multi_core); |
| 174 | AddField(Telemetry::FieldType::UserConfig, "Renderer_ResolutionFactor", | 173 | AddField(Telemetry::FieldType::UserConfig, "Renderer_ResolutionFactor", |
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 6839abe71..e2f85c5f1 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt | |||
| @@ -1,4 +1,7 @@ | |||
| 1 | add_library(video_core STATIC | 1 | add_library(video_core STATIC |
| 2 | buffer_cache/buffer_block.h | ||
| 3 | buffer_cache/buffer_cache.h | ||
| 4 | buffer_cache/map_interval.h | ||
| 2 | dma_pusher.cpp | 5 | dma_pusher.cpp |
| 3 | dma_pusher.h | 6 | dma_pusher.h |
| 4 | debug_utils/debug_utils.cpp | 7 | debug_utils/debug_utils.cpp |
| @@ -43,8 +46,6 @@ add_library(video_core STATIC | |||
| 43 | renderer_opengl/gl_device.h | 46 | renderer_opengl/gl_device.h |
| 44 | renderer_opengl/gl_framebuffer_cache.cpp | 47 | renderer_opengl/gl_framebuffer_cache.cpp |
| 45 | renderer_opengl/gl_framebuffer_cache.h | 48 | renderer_opengl/gl_framebuffer_cache.h |
| 46 | renderer_opengl/gl_global_cache.cpp | ||
| 47 | renderer_opengl/gl_global_cache.h | ||
| 48 | renderer_opengl/gl_rasterizer.cpp | 49 | renderer_opengl/gl_rasterizer.cpp |
| 49 | renderer_opengl/gl_rasterizer.h | 50 | renderer_opengl/gl_rasterizer.h |
| 50 | renderer_opengl/gl_resource_manager.cpp | 51 | renderer_opengl/gl_resource_manager.cpp |
| @@ -101,8 +102,11 @@ add_library(video_core STATIC | |||
| 101 | shader/decode/integer_set.cpp | 102 | shader/decode/integer_set.cpp |
| 102 | shader/decode/half_set.cpp | 103 | shader/decode/half_set.cpp |
| 103 | shader/decode/video.cpp | 104 | shader/decode/video.cpp |
| 105 | shader/decode/warp.cpp | ||
| 104 | shader/decode/xmad.cpp | 106 | shader/decode/xmad.cpp |
| 105 | shader/decode/other.cpp | 107 | shader/decode/other.cpp |
| 108 | shader/control_flow.cpp | ||
| 109 | shader/control_flow.h | ||
| 106 | shader/decode.cpp | 110 | shader/decode.cpp |
| 107 | shader/node_helper.cpp | 111 | shader/node_helper.cpp |
| 108 | shader/node_helper.h | 112 | shader/node_helper.h |
diff --git a/src/video_core/buffer_cache/buffer_block.h b/src/video_core/buffer_cache/buffer_block.h new file mode 100644 index 000000000..4b9193182 --- /dev/null +++ b/src/video_core/buffer_cache/buffer_block.h | |||
| @@ -0,0 +1,76 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <unordered_set> | ||
| 8 | #include <utility> | ||
| 9 | |||
| 10 | #include "common/alignment.h" | ||
| 11 | #include "common/common_types.h" | ||
| 12 | #include "video_core/gpu.h" | ||
| 13 | |||
| 14 | namespace VideoCommon { | ||
| 15 | |||
| 16 | class BufferBlock { | ||
| 17 | public: | ||
| 18 | bool Overlaps(const CacheAddr start, const CacheAddr end) const { | ||
| 19 | return (cache_addr < end) && (cache_addr_end > start); | ||
| 20 | } | ||
| 21 | |||
| 22 | bool IsInside(const CacheAddr other_start, const CacheAddr other_end) const { | ||
| 23 | return cache_addr <= other_start && other_end <= cache_addr_end; | ||
| 24 | } | ||
| 25 | |||
| 26 | u8* GetWritableHostPtr() const { | ||
| 27 | return FromCacheAddr(cache_addr); | ||
| 28 | } | ||
| 29 | |||
| 30 | u8* GetWritableHostPtr(std::size_t offset) const { | ||
| 31 | return FromCacheAddr(cache_addr + offset); | ||
| 32 | } | ||
| 33 | |||
| 34 | std::size_t GetOffset(const CacheAddr in_addr) { | ||
| 35 | return static_cast<std::size_t>(in_addr - cache_addr); | ||
| 36 | } | ||
| 37 | |||
| 38 | CacheAddr GetCacheAddr() const { | ||
| 39 | return cache_addr; | ||
| 40 | } | ||
| 41 | |||
| 42 | CacheAddr GetCacheAddrEnd() const { | ||
| 43 | return cache_addr_end; | ||
| 44 | } | ||
| 45 | |||
| 46 | void SetCacheAddr(const CacheAddr new_addr) { | ||
| 47 | cache_addr = new_addr; | ||
| 48 | cache_addr_end = new_addr + size; | ||
| 49 | } | ||
| 50 | |||
| 51 | std::size_t GetSize() const { | ||
| 52 | return size; | ||
| 53 | } | ||
| 54 | |||
| 55 | void SetEpoch(u64 new_epoch) { | ||
| 56 | epoch = new_epoch; | ||
| 57 | } | ||
| 58 | |||
| 59 | u64 GetEpoch() { | ||
| 60 | return epoch; | ||
| 61 | } | ||
| 62 | |||
| 63 | protected: | ||
| 64 | explicit BufferBlock(CacheAddr cache_addr, const std::size_t size) : size{size} { | ||
| 65 | SetCacheAddr(cache_addr); | ||
| 66 | } | ||
| 67 | ~BufferBlock() = default; | ||
| 68 | |||
| 69 | private: | ||
| 70 | CacheAddr cache_addr{}; | ||
| 71 | CacheAddr cache_addr_end{}; | ||
| 72 | std::size_t size{}; | ||
| 73 | u64 epoch{}; | ||
| 74 | }; | ||
| 75 | |||
| 76 | } // namespace VideoCommon | ||
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h new file mode 100644 index 000000000..2442ddfd6 --- /dev/null +++ b/src/video_core/buffer_cache/buffer_cache.h | |||
| @@ -0,0 +1,447 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <array> | ||
| 8 | #include <memory> | ||
| 9 | #include <mutex> | ||
| 10 | #include <unordered_map> | ||
| 11 | #include <unordered_set> | ||
| 12 | #include <utility> | ||
| 13 | #include <vector> | ||
| 14 | |||
| 15 | #include "common/alignment.h" | ||
| 16 | #include "common/common_types.h" | ||
| 17 | #include "core/core.h" | ||
| 18 | #include "video_core/buffer_cache/buffer_block.h" | ||
| 19 | #include "video_core/buffer_cache/map_interval.h" | ||
| 20 | #include "video_core/memory_manager.h" | ||
| 21 | #include "video_core/rasterizer_interface.h" | ||
| 22 | |||
| 23 | namespace VideoCommon { | ||
| 24 | |||
| 25 | using MapInterval = std::shared_ptr<MapIntervalBase>; | ||
| 26 | |||
| 27 | template <typename TBuffer, typename TBufferType, typename StreamBuffer> | ||
| 28 | class BufferCache { | ||
| 29 | public: | ||
| 30 | using BufferInfo = std::pair<const TBufferType*, u64>; | ||
| 31 | |||
| 32 | BufferInfo UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4, | ||
| 33 | bool is_written = false) { | ||
| 34 | std::lock_guard lock{mutex}; | ||
| 35 | |||
| 36 | auto& memory_manager = system.GPU().MemoryManager(); | ||
| 37 | const auto host_ptr = memory_manager.GetPointer(gpu_addr); | ||
| 38 | if (!host_ptr) { | ||
| 39 | return {GetEmptyBuffer(size), 0}; | ||
| 40 | } | ||
| 41 | const auto cache_addr = ToCacheAddr(host_ptr); | ||
| 42 | |||
| 43 | // Cache management is a big overhead, so only cache entries with a given size. | ||
| 44 | // TODO: Figure out which size is the best for given games. | ||
| 45 | constexpr std::size_t max_stream_size = 0x800; | ||
| 46 | if (size < max_stream_size) { | ||
| 47 | if (!is_written && !IsRegionWritten(cache_addr, cache_addr + size - 1)) { | ||
| 48 | return StreamBufferUpload(host_ptr, size, alignment); | ||
| 49 | } | ||
| 50 | } | ||
| 51 | |||
| 52 | auto block = GetBlock(cache_addr, size); | ||
| 53 | auto map = MapAddress(block, gpu_addr, cache_addr, size); | ||
| 54 | if (is_written) { | ||
| 55 | map->MarkAsModified(true, GetModifiedTicks()); | ||
| 56 | if (!map->IsWritten()) { | ||
| 57 | map->MarkAsWritten(true); | ||
| 58 | MarkRegionAsWritten(map->GetStart(), map->GetEnd() - 1); | ||
| 59 | } | ||
| 60 | } else { | ||
| 61 | if (map->IsWritten()) { | ||
| 62 | WriteBarrier(); | ||
| 63 | } | ||
| 64 | } | ||
| 65 | |||
| 66 | const u64 offset = static_cast<u64>(block->GetOffset(cache_addr)); | ||
| 67 | |||
| 68 | return {ToHandle(block), offset}; | ||
| 69 | } | ||
| 70 | |||
| 71 | /// Uploads from a host memory. Returns the OpenGL buffer where it's located and its offset. | ||
| 72 | BufferInfo UploadHostMemory(const void* raw_pointer, std::size_t size, | ||
| 73 | std::size_t alignment = 4) { | ||
| 74 | std::lock_guard lock{mutex}; | ||
| 75 | return StreamBufferUpload(raw_pointer, size, alignment); | ||
| 76 | } | ||
| 77 | |||
| 78 | void Map(std::size_t max_size) { | ||
| 79 | std::lock_guard lock{mutex}; | ||
| 80 | |||
| 81 | std::tie(buffer_ptr, buffer_offset_base, invalidated) = stream_buffer->Map(max_size, 4); | ||
| 82 | buffer_offset = buffer_offset_base; | ||
| 83 | } | ||
| 84 | |||
| 85 | /// Finishes the upload stream, returns true on bindings invalidation. | ||
| 86 | bool Unmap() { | ||
| 87 | std::lock_guard lock{mutex}; | ||
| 88 | |||
| 89 | stream_buffer->Unmap(buffer_offset - buffer_offset_base); | ||
| 90 | return std::exchange(invalidated, false); | ||
| 91 | } | ||
| 92 | |||
| 93 | void TickFrame() { | ||
| 94 | ++epoch; | ||
| 95 | while (!pending_destruction.empty()) { | ||
| 96 | if (pending_destruction.front()->GetEpoch() + 1 > epoch) { | ||
| 97 | break; | ||
| 98 | } | ||
| 99 | pending_destruction.pop_front(); | ||
| 100 | } | ||
| 101 | } | ||
| 102 | |||
| 103 | /// Write any cached resources overlapping the specified region back to memory | ||
| 104 | void FlushRegion(CacheAddr addr, std::size_t size) { | ||
| 105 | std::lock_guard lock{mutex}; | ||
| 106 | |||
| 107 | std::vector<MapInterval> objects = GetMapsInRange(addr, size); | ||
| 108 | std::sort(objects.begin(), objects.end(), [](const MapInterval& a, const MapInterval& b) { | ||
| 109 | return a->GetModificationTick() < b->GetModificationTick(); | ||
| 110 | }); | ||
| 111 | for (auto& object : objects) { | ||
| 112 | if (object->IsModified() && object->IsRegistered()) { | ||
| 113 | FlushMap(object); | ||
| 114 | } | ||
| 115 | } | ||
| 116 | } | ||
| 117 | |||
| 118 | /// Mark the specified region as being invalidated | ||
| 119 | void InvalidateRegion(CacheAddr addr, u64 size) { | ||
| 120 | std::lock_guard lock{mutex}; | ||
| 121 | |||
| 122 | std::vector<MapInterval> objects = GetMapsInRange(addr, size); | ||
| 123 | for (auto& object : objects) { | ||
| 124 | if (object->IsRegistered()) { | ||
| 125 | Unregister(object); | ||
| 126 | } | ||
| 127 | } | ||
| 128 | } | ||
| 129 | |||
| 130 | virtual const TBufferType* GetEmptyBuffer(std::size_t size) = 0; | ||
| 131 | |||
| 132 | protected: | ||
| 133 | explicit BufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system, | ||
| 134 | std::unique_ptr<StreamBuffer> stream_buffer) | ||
| 135 | : rasterizer{rasterizer}, system{system}, stream_buffer{std::move(stream_buffer)}, | ||
| 136 | stream_buffer_handle{this->stream_buffer->GetHandle()} {} | ||
| 137 | |||
| 138 | ~BufferCache() = default; | ||
| 139 | |||
| 140 | virtual const TBufferType* ToHandle(const TBuffer& storage) = 0; | ||
| 141 | |||
| 142 | virtual void WriteBarrier() = 0; | ||
| 143 | |||
| 144 | virtual TBuffer CreateBlock(CacheAddr cache_addr, std::size_t size) = 0; | ||
| 145 | |||
| 146 | virtual void UploadBlockData(const TBuffer& buffer, std::size_t offset, std::size_t size, | ||
| 147 | const u8* data) = 0; | ||
| 148 | |||
| 149 | virtual void DownloadBlockData(const TBuffer& buffer, std::size_t offset, std::size_t size, | ||
| 150 | u8* data) = 0; | ||
| 151 | |||
| 152 | virtual void CopyBlock(const TBuffer& src, const TBuffer& dst, std::size_t src_offset, | ||
| 153 | std::size_t dst_offset, std::size_t size) = 0; | ||
| 154 | |||
| 155 | /// Register an object into the cache | ||
| 156 | void Register(const MapInterval& new_map, bool inherit_written = false) { | ||
| 157 | const CacheAddr cache_ptr = new_map->GetStart(); | ||
| 158 | const std::optional<VAddr> cpu_addr = | ||
| 159 | system.GPU().MemoryManager().GpuToCpuAddress(new_map->GetGpuAddress()); | ||
| 160 | if (!cache_ptr || !cpu_addr) { | ||
| 161 | LOG_CRITICAL(HW_GPU, "Failed to register buffer with unmapped gpu_address 0x{:016x}", | ||
| 162 | new_map->GetGpuAddress()); | ||
| 163 | return; | ||
| 164 | } | ||
| 165 | const std::size_t size = new_map->GetEnd() - new_map->GetStart(); | ||
| 166 | new_map->SetCpuAddress(*cpu_addr); | ||
| 167 | new_map->MarkAsRegistered(true); | ||
| 168 | const IntervalType interval{new_map->GetStart(), new_map->GetEnd()}; | ||
| 169 | mapped_addresses.insert({interval, new_map}); | ||
| 170 | rasterizer.UpdatePagesCachedCount(*cpu_addr, size, 1); | ||
| 171 | if (inherit_written) { | ||
| 172 | MarkRegionAsWritten(new_map->GetStart(), new_map->GetEnd() - 1); | ||
| 173 | new_map->MarkAsWritten(true); | ||
| 174 | } | ||
| 175 | } | ||
| 176 | |||
| 177 | /// Unregisters an object from the cache | ||
| 178 | void Unregister(MapInterval& map) { | ||
| 179 | const std::size_t size = map->GetEnd() - map->GetStart(); | ||
| 180 | rasterizer.UpdatePagesCachedCount(map->GetCpuAddress(), size, -1); | ||
| 181 | map->MarkAsRegistered(false); | ||
| 182 | if (map->IsWritten()) { | ||
| 183 | UnmarkRegionAsWritten(map->GetStart(), map->GetEnd() - 1); | ||
| 184 | } | ||
| 185 | const IntervalType delete_interval{map->GetStart(), map->GetEnd()}; | ||
| 186 | mapped_addresses.erase(delete_interval); | ||
| 187 | } | ||
| 188 | |||
| 189 | private: | ||
| 190 | MapInterval CreateMap(const CacheAddr start, const CacheAddr end, const GPUVAddr gpu_addr) { | ||
| 191 | return std::make_shared<MapIntervalBase>(start, end, gpu_addr); | ||
| 192 | } | ||
| 193 | |||
| 194 | MapInterval MapAddress(const TBuffer& block, const GPUVAddr gpu_addr, | ||
| 195 | const CacheAddr cache_addr, const std::size_t size) { | ||
| 196 | |||
| 197 | std::vector<MapInterval> overlaps = GetMapsInRange(cache_addr, size); | ||
| 198 | if (overlaps.empty()) { | ||
| 199 | const CacheAddr cache_addr_end = cache_addr + size; | ||
| 200 | MapInterval new_map = CreateMap(cache_addr, cache_addr_end, gpu_addr); | ||
| 201 | u8* host_ptr = FromCacheAddr(cache_addr); | ||
| 202 | UploadBlockData(block, block->GetOffset(cache_addr), size, host_ptr); | ||
| 203 | Register(new_map); | ||
| 204 | return new_map; | ||
| 205 | } | ||
| 206 | |||
| 207 | const CacheAddr cache_addr_end = cache_addr + size; | ||
| 208 | if (overlaps.size() == 1) { | ||
| 209 | MapInterval& current_map = overlaps[0]; | ||
| 210 | if (current_map->IsInside(cache_addr, cache_addr_end)) { | ||
| 211 | return current_map; | ||
| 212 | } | ||
| 213 | } | ||
| 214 | CacheAddr new_start = cache_addr; | ||
| 215 | CacheAddr new_end = cache_addr_end; | ||
| 216 | bool write_inheritance = false; | ||
| 217 | bool modified_inheritance = false; | ||
| 218 | // Calculate new buffer parameters | ||
| 219 | for (auto& overlap : overlaps) { | ||
| 220 | new_start = std::min(overlap->GetStart(), new_start); | ||
| 221 | new_end = std::max(overlap->GetEnd(), new_end); | ||
| 222 | write_inheritance |= overlap->IsWritten(); | ||
| 223 | modified_inheritance |= overlap->IsModified(); | ||
| 224 | } | ||
| 225 | GPUVAddr new_gpu_addr = gpu_addr + new_start - cache_addr; | ||
| 226 | for (auto& overlap : overlaps) { | ||
| 227 | Unregister(overlap); | ||
| 228 | } | ||
| 229 | UpdateBlock(block, new_start, new_end, overlaps); | ||
| 230 | MapInterval new_map = CreateMap(new_start, new_end, new_gpu_addr); | ||
| 231 | if (modified_inheritance) { | ||
| 232 | new_map->MarkAsModified(true, GetModifiedTicks()); | ||
| 233 | } | ||
| 234 | Register(new_map, write_inheritance); | ||
| 235 | return new_map; | ||
| 236 | } | ||
| 237 | |||
| 238 | void UpdateBlock(const TBuffer& block, CacheAddr start, CacheAddr end, | ||
| 239 | std::vector<MapInterval>& overlaps) { | ||
| 240 | const IntervalType base_interval{start, end}; | ||
| 241 | IntervalSet interval_set{}; | ||
| 242 | interval_set.add(base_interval); | ||
| 243 | for (auto& overlap : overlaps) { | ||
| 244 | const IntervalType subtract{overlap->GetStart(), overlap->GetEnd()}; | ||
| 245 | interval_set.subtract(subtract); | ||
| 246 | } | ||
| 247 | for (auto& interval : interval_set) { | ||
| 248 | std::size_t size = interval.upper() - interval.lower(); | ||
| 249 | if (size > 0) { | ||
| 250 | u8* host_ptr = FromCacheAddr(interval.lower()); | ||
| 251 | UploadBlockData(block, block->GetOffset(interval.lower()), size, host_ptr); | ||
| 252 | } | ||
| 253 | } | ||
| 254 | } | ||
| 255 | |||
| 256 | std::vector<MapInterval> GetMapsInRange(CacheAddr addr, std::size_t size) { | ||
| 257 | if (size == 0) { | ||
| 258 | return {}; | ||
| 259 | } | ||
| 260 | |||
| 261 | std::vector<MapInterval> objects{}; | ||
| 262 | const IntervalType interval{addr, addr + size}; | ||
| 263 | for (auto& pair : boost::make_iterator_range(mapped_addresses.equal_range(interval))) { | ||
| 264 | objects.push_back(pair.second); | ||
| 265 | } | ||
| 266 | |||
| 267 | return objects; | ||
| 268 | } | ||
| 269 | |||
| 270 | /// Returns a ticks counter used for tracking when cached objects were last modified | ||
| 271 | u64 GetModifiedTicks() { | ||
| 272 | return ++modified_ticks; | ||
| 273 | } | ||
| 274 | |||
| 275 | void FlushMap(MapInterval map) { | ||
| 276 | std::size_t size = map->GetEnd() - map->GetStart(); | ||
| 277 | TBuffer block = blocks[map->GetStart() >> block_page_bits]; | ||
| 278 | u8* host_ptr = FromCacheAddr(map->GetStart()); | ||
| 279 | DownloadBlockData(block, block->GetOffset(map->GetStart()), size, host_ptr); | ||
| 280 | map->MarkAsModified(false, 0); | ||
| 281 | } | ||
| 282 | |||
| 283 | BufferInfo StreamBufferUpload(const void* raw_pointer, std::size_t size, | ||
| 284 | std::size_t alignment) { | ||
| 285 | AlignBuffer(alignment); | ||
| 286 | const std::size_t uploaded_offset = buffer_offset; | ||
| 287 | std::memcpy(buffer_ptr, raw_pointer, size); | ||
| 288 | |||
| 289 | buffer_ptr += size; | ||
| 290 | buffer_offset += size; | ||
| 291 | return {&stream_buffer_handle, uploaded_offset}; | ||
| 292 | } | ||
| 293 | |||
| 294 | void AlignBuffer(std::size_t alignment) { | ||
| 295 | // Align the offset, not the mapped pointer | ||
| 296 | const std::size_t offset_aligned = Common::AlignUp(buffer_offset, alignment); | ||
| 297 | buffer_ptr += offset_aligned - buffer_offset; | ||
| 298 | buffer_offset = offset_aligned; | ||
| 299 | } | ||
| 300 | |||
| 301 | TBuffer EnlargeBlock(TBuffer buffer) { | ||
| 302 | const std::size_t old_size = buffer->GetSize(); | ||
| 303 | const std::size_t new_size = old_size + block_page_size; | ||
| 304 | const CacheAddr cache_addr = buffer->GetCacheAddr(); | ||
| 305 | TBuffer new_buffer = CreateBlock(cache_addr, new_size); | ||
| 306 | CopyBlock(buffer, new_buffer, 0, 0, old_size); | ||
| 307 | buffer->SetEpoch(epoch); | ||
| 308 | pending_destruction.push_back(buffer); | ||
| 309 | const CacheAddr cache_addr_end = cache_addr + new_size - 1; | ||
| 310 | u64 page_start = cache_addr >> block_page_bits; | ||
| 311 | const u64 page_end = cache_addr_end >> block_page_bits; | ||
| 312 | while (page_start <= page_end) { | ||
| 313 | blocks[page_start] = new_buffer; | ||
| 314 | ++page_start; | ||
| 315 | } | ||
| 316 | return new_buffer; | ||
| 317 | } | ||
| 318 | |||
| 319 | TBuffer MergeBlocks(TBuffer first, TBuffer second) { | ||
| 320 | const std::size_t size_1 = first->GetSize(); | ||
| 321 | const std::size_t size_2 = second->GetSize(); | ||
| 322 | const CacheAddr first_addr = first->GetCacheAddr(); | ||
| 323 | const CacheAddr second_addr = second->GetCacheAddr(); | ||
| 324 | const CacheAddr new_addr = std::min(first_addr, second_addr); | ||
| 325 | const std::size_t new_size = size_1 + size_2; | ||
| 326 | TBuffer new_buffer = CreateBlock(new_addr, new_size); | ||
| 327 | CopyBlock(first, new_buffer, 0, new_buffer->GetOffset(first_addr), size_1); | ||
| 328 | CopyBlock(second, new_buffer, 0, new_buffer->GetOffset(second_addr), size_2); | ||
| 329 | first->SetEpoch(epoch); | ||
| 330 | second->SetEpoch(epoch); | ||
| 331 | pending_destruction.push_back(first); | ||
| 332 | pending_destruction.push_back(second); | ||
| 333 | const CacheAddr cache_addr_end = new_addr + new_size - 1; | ||
| 334 | u64 page_start = new_addr >> block_page_bits; | ||
| 335 | const u64 page_end = cache_addr_end >> block_page_bits; | ||
| 336 | while (page_start <= page_end) { | ||
| 337 | blocks[page_start] = new_buffer; | ||
| 338 | ++page_start; | ||
| 339 | } | ||
| 340 | return new_buffer; | ||
| 341 | } | ||
| 342 | |||
| 343 | TBuffer GetBlock(const CacheAddr cache_addr, const std::size_t size) { | ||
| 344 | TBuffer found{}; | ||
| 345 | const CacheAddr cache_addr_end = cache_addr + size - 1; | ||
| 346 | u64 page_start = cache_addr >> block_page_bits; | ||
| 347 | const u64 page_end = cache_addr_end >> block_page_bits; | ||
| 348 | while (page_start <= page_end) { | ||
| 349 | auto it = blocks.find(page_start); | ||
| 350 | if (it == blocks.end()) { | ||
| 351 | if (found) { | ||
| 352 | found = EnlargeBlock(found); | ||
| 353 | } else { | ||
| 354 | const CacheAddr start_addr = (page_start << block_page_bits); | ||
| 355 | found = CreateBlock(start_addr, block_page_size); | ||
| 356 | blocks[page_start] = found; | ||
| 357 | } | ||
| 358 | } else { | ||
| 359 | if (found) { | ||
| 360 | if (found == it->second) { | ||
| 361 | ++page_start; | ||
| 362 | continue; | ||
| 363 | } | ||
| 364 | found = MergeBlocks(found, it->second); | ||
| 365 | } else { | ||
| 366 | found = it->second; | ||
| 367 | } | ||
| 368 | } | ||
| 369 | ++page_start; | ||
| 370 | } | ||
| 371 | return found; | ||
| 372 | } | ||
| 373 | |||
| 374 | void MarkRegionAsWritten(const CacheAddr start, const CacheAddr end) { | ||
| 375 | u64 page_start = start >> write_page_bit; | ||
| 376 | const u64 page_end = end >> write_page_bit; | ||
| 377 | while (page_start <= page_end) { | ||
| 378 | auto it = written_pages.find(page_start); | ||
| 379 | if (it != written_pages.end()) { | ||
| 380 | it->second = it->second + 1; | ||
| 381 | } else { | ||
| 382 | written_pages[page_start] = 1; | ||
| 383 | } | ||
| 384 | page_start++; | ||
| 385 | } | ||
| 386 | } | ||
| 387 | |||
| 388 | void UnmarkRegionAsWritten(const CacheAddr start, const CacheAddr end) { | ||
| 389 | u64 page_start = start >> write_page_bit; | ||
| 390 | const u64 page_end = end >> write_page_bit; | ||
| 391 | while (page_start <= page_end) { | ||
| 392 | auto it = written_pages.find(page_start); | ||
| 393 | if (it != written_pages.end()) { | ||
| 394 | if (it->second > 1) { | ||
| 395 | it->second = it->second - 1; | ||
| 396 | } else { | ||
| 397 | written_pages.erase(it); | ||
| 398 | } | ||
| 399 | } | ||
| 400 | page_start++; | ||
| 401 | } | ||
| 402 | } | ||
| 403 | |||
| 404 | bool IsRegionWritten(const CacheAddr start, const CacheAddr end) const { | ||
| 405 | u64 page_start = start >> write_page_bit; | ||
| 406 | const u64 page_end = end >> write_page_bit; | ||
| 407 | while (page_start <= page_end) { | ||
| 408 | if (written_pages.count(page_start) > 0) { | ||
| 409 | return true; | ||
| 410 | } | ||
| 411 | page_start++; | ||
| 412 | } | ||
| 413 | return false; | ||
| 414 | } | ||
| 415 | |||
| 416 | VideoCore::RasterizerInterface& rasterizer; | ||
| 417 | Core::System& system; | ||
| 418 | std::unique_ptr<StreamBuffer> stream_buffer; | ||
| 419 | |||
| 420 | TBufferType stream_buffer_handle{}; | ||
| 421 | |||
| 422 | bool invalidated = false; | ||
| 423 | |||
| 424 | u8* buffer_ptr = nullptr; | ||
| 425 | u64 buffer_offset = 0; | ||
| 426 | u64 buffer_offset_base = 0; | ||
| 427 | |||
| 428 | using IntervalSet = boost::icl::interval_set<CacheAddr>; | ||
| 429 | using IntervalCache = boost::icl::interval_map<CacheAddr, MapInterval>; | ||
| 430 | using IntervalType = typename IntervalCache::interval_type; | ||
| 431 | IntervalCache mapped_addresses{}; | ||
| 432 | |||
| 433 | static constexpr u64 write_page_bit{11}; | ||
| 434 | std::unordered_map<u64, u32> written_pages{}; | ||
| 435 | |||
| 436 | static constexpr u64 block_page_bits{21}; | ||
| 437 | static constexpr u64 block_page_size{1 << block_page_bits}; | ||
| 438 | std::unordered_map<u64, TBuffer> blocks{}; | ||
| 439 | |||
| 440 | std::list<TBuffer> pending_destruction{}; | ||
| 441 | u64 epoch{}; | ||
| 442 | u64 modified_ticks{}; | ||
| 443 | |||
| 444 | std::recursive_mutex mutex; | ||
| 445 | }; | ||
| 446 | |||
| 447 | } // namespace VideoCommon | ||
diff --git a/src/video_core/buffer_cache/map_interval.h b/src/video_core/buffer_cache/map_interval.h new file mode 100644 index 000000000..3a104d5cd --- /dev/null +++ b/src/video_core/buffer_cache/map_interval.h | |||
| @@ -0,0 +1,89 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "common/common_types.h" | ||
| 8 | #include "video_core/gpu.h" | ||
| 9 | |||
| 10 | namespace VideoCommon { | ||
| 11 | |||
| 12 | class MapIntervalBase { | ||
| 13 | public: | ||
| 14 | MapIntervalBase(const CacheAddr start, const CacheAddr end, const GPUVAddr gpu_addr) | ||
| 15 | : start{start}, end{end}, gpu_addr{gpu_addr} {} | ||
| 16 | |||
| 17 | void SetCpuAddress(VAddr new_cpu_addr) { | ||
| 18 | cpu_addr = new_cpu_addr; | ||
| 19 | } | ||
| 20 | |||
| 21 | VAddr GetCpuAddress() const { | ||
| 22 | return cpu_addr; | ||
| 23 | } | ||
| 24 | |||
| 25 | GPUVAddr GetGpuAddress() const { | ||
| 26 | return gpu_addr; | ||
| 27 | } | ||
| 28 | |||
| 29 | bool IsInside(const CacheAddr other_start, const CacheAddr other_end) const { | ||
| 30 | return (start <= other_start && other_end <= end); | ||
| 31 | } | ||
| 32 | |||
| 33 | bool operator==(const MapIntervalBase& rhs) const { | ||
| 34 | return std::tie(start, end) == std::tie(rhs.start, rhs.end); | ||
| 35 | } | ||
| 36 | |||
| 37 | bool operator!=(const MapIntervalBase& rhs) const { | ||
| 38 | return !operator==(rhs); | ||
| 39 | } | ||
| 40 | |||
| 41 | void MarkAsRegistered(const bool registered) { | ||
| 42 | is_registered = registered; | ||
| 43 | } | ||
| 44 | |||
| 45 | bool IsRegistered() const { | ||
| 46 | return is_registered; | ||
| 47 | } | ||
| 48 | |||
| 49 | CacheAddr GetStart() const { | ||
| 50 | return start; | ||
| 51 | } | ||
| 52 | |||
| 53 | CacheAddr GetEnd() const { | ||
| 54 | return end; | ||
| 55 | } | ||
| 56 | |||
| 57 | void MarkAsModified(const bool is_modified_, const u64 tick) { | ||
| 58 | is_modified = is_modified_; | ||
| 59 | ticks = tick; | ||
| 60 | } | ||
| 61 | |||
| 62 | bool IsModified() const { | ||
| 63 | return is_modified; | ||
| 64 | } | ||
| 65 | |||
| 66 | u64 GetModificationTick() const { | ||
| 67 | return ticks; | ||
| 68 | } | ||
| 69 | |||
| 70 | void MarkAsWritten(const bool is_written_) { | ||
| 71 | is_written = is_written_; | ||
| 72 | } | ||
| 73 | |||
| 74 | bool IsWritten() const { | ||
| 75 | return is_written; | ||
| 76 | } | ||
| 77 | |||
| 78 | private: | ||
| 79 | CacheAddr start; | ||
| 80 | CacheAddr end; | ||
| 81 | GPUVAddr gpu_addr; | ||
| 82 | VAddr cpu_addr{}; | ||
| 83 | bool is_written{}; | ||
| 84 | bool is_modified{}; | ||
| 85 | bool is_registered{}; | ||
| 86 | u64 ticks{}; | ||
| 87 | }; | ||
| 88 | |||
| 89 | } // namespace VideoCommon | ||
diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp index 3175579cc..0094fd715 100644 --- a/src/video_core/dma_pusher.cpp +++ b/src/video_core/dma_pusher.cpp | |||
| @@ -22,7 +22,7 @@ void DmaPusher::DispatchCalls() { | |||
| 22 | MICROPROFILE_SCOPE(DispatchCalls); | 22 | MICROPROFILE_SCOPE(DispatchCalls); |
| 23 | 23 | ||
| 24 | // On entering GPU code, assume all memory may be touched by the ARM core. | 24 | // On entering GPU code, assume all memory may be touched by the ARM core. |
| 25 | gpu.Maxwell3D().dirty_flags.OnMemoryWrite(); | 25 | gpu.Maxwell3D().dirty.OnMemoryWrite(); |
| 26 | 26 | ||
| 27 | dma_pushbuffer_subindex = 0; | 27 | dma_pushbuffer_subindex = 0; |
| 28 | 28 | ||
| @@ -31,6 +31,7 @@ void DmaPusher::DispatchCalls() { | |||
| 31 | break; | 31 | break; |
| 32 | } | 32 | } |
| 33 | } | 33 | } |
| 34 | gpu.FlushCommands(); | ||
| 34 | } | 35 | } |
| 35 | 36 | ||
| 36 | bool DmaPusher::Step() { | 37 | bool DmaPusher::Step() { |
diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp index 0ee228e28..98a8b5337 100644 --- a/src/video_core/engines/fermi_2d.cpp +++ b/src/video_core/engines/fermi_2d.cpp | |||
| @@ -10,8 +10,7 @@ | |||
| 10 | 10 | ||
| 11 | namespace Tegra::Engines { | 11 | namespace Tegra::Engines { |
| 12 | 12 | ||
| 13 | Fermi2D::Fermi2D(VideoCore::RasterizerInterface& rasterizer, MemoryManager& memory_manager) | 13 | Fermi2D::Fermi2D(VideoCore::RasterizerInterface& rasterizer) : rasterizer{rasterizer} {} |
| 14 | : rasterizer{rasterizer}, memory_manager{memory_manager} {} | ||
| 15 | 14 | ||
| 16 | void Fermi2D::CallMethod(const GPU::MethodCall& method_call) { | 15 | void Fermi2D::CallMethod(const GPU::MethodCall& method_call) { |
| 17 | ASSERT_MSG(method_call.method < Regs::NUM_REGS, | 16 | ASSERT_MSG(method_call.method < Regs::NUM_REGS, |
diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h index 05421d185..0901cf2fa 100644 --- a/src/video_core/engines/fermi_2d.h +++ b/src/video_core/engines/fermi_2d.h | |||
| @@ -33,7 +33,7 @@ namespace Tegra::Engines { | |||
| 33 | 33 | ||
| 34 | class Fermi2D final { | 34 | class Fermi2D final { |
| 35 | public: | 35 | public: |
| 36 | explicit Fermi2D(VideoCore::RasterizerInterface& rasterizer, MemoryManager& memory_manager); | 36 | explicit Fermi2D(VideoCore::RasterizerInterface& rasterizer); |
| 37 | ~Fermi2D() = default; | 37 | ~Fermi2D() = default; |
| 38 | 38 | ||
| 39 | /// Write the value to the register identified by method. | 39 | /// Write the value to the register identified by method. |
| @@ -145,7 +145,6 @@ public: | |||
| 145 | 145 | ||
| 146 | private: | 146 | private: |
| 147 | VideoCore::RasterizerInterface& rasterizer; | 147 | VideoCore::RasterizerInterface& rasterizer; |
| 148 | MemoryManager& memory_manager; | ||
| 149 | 148 | ||
| 150 | /// Performs the copy from the source surface to the destination surface as configured in the | 149 | /// Performs the copy from the source surface to the destination surface as configured in the |
| 151 | /// registers. | 150 | /// registers. |
diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp index 7404a8163..08586d33c 100644 --- a/src/video_core/engines/kepler_compute.cpp +++ b/src/video_core/engines/kepler_compute.cpp | |||
| @@ -37,7 +37,7 @@ void KeplerCompute::CallMethod(const GPU::MethodCall& method_call) { | |||
| 37 | const bool is_last_call = method_call.IsLastCall(); | 37 | const bool is_last_call = method_call.IsLastCall(); |
| 38 | upload_state.ProcessData(method_call.argument, is_last_call); | 38 | upload_state.ProcessData(method_call.argument, is_last_call); |
| 39 | if (is_last_call) { | 39 | if (is_last_call) { |
| 40 | system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); | 40 | system.GPU().Maxwell3D().dirty.OnMemoryWrite(); |
| 41 | } | 41 | } |
| 42 | break; | 42 | break; |
| 43 | } | 43 | } |
| @@ -50,13 +50,14 @@ void KeplerCompute::CallMethod(const GPU::MethodCall& method_call) { | |||
| 50 | } | 50 | } |
| 51 | 51 | ||
| 52 | void KeplerCompute::ProcessLaunch() { | 52 | void KeplerCompute::ProcessLaunch() { |
| 53 | |||
| 54 | const GPUVAddr launch_desc_loc = regs.launch_desc_loc.Address(); | 53 | const GPUVAddr launch_desc_loc = regs.launch_desc_loc.Address(); |
| 55 | memory_manager.ReadBlockUnsafe(launch_desc_loc, &launch_description, | 54 | memory_manager.ReadBlockUnsafe(launch_desc_loc, &launch_description, |
| 56 | LaunchParams::NUM_LAUNCH_PARAMETERS * sizeof(u32)); | 55 | LaunchParams::NUM_LAUNCH_PARAMETERS * sizeof(u32)); |
| 57 | 56 | ||
| 58 | const GPUVAddr code_loc = regs.code_loc.Address() + launch_description.program_start; | 57 | const GPUVAddr code_addr = regs.code_loc.Address() + launch_description.program_start; |
| 59 | LOG_WARNING(HW_GPU, "Compute Kernel Execute at Address 0x{:016x}, STUBBED", code_loc); | 58 | LOG_TRACE(HW_GPU, "Compute invocation launched at address 0x{:016x}", code_addr); |
| 59 | |||
| 60 | rasterizer.DispatchCompute(code_addr); | ||
| 60 | } | 61 | } |
| 61 | 62 | ||
| 62 | } // namespace Tegra::Engines | 63 | } // namespace Tegra::Engines |
diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp index 0561f676c..fa4a7c5c1 100644 --- a/src/video_core/engines/kepler_memory.cpp +++ b/src/video_core/engines/kepler_memory.cpp | |||
| @@ -15,7 +15,7 @@ | |||
| 15 | namespace Tegra::Engines { | 15 | namespace Tegra::Engines { |
| 16 | 16 | ||
| 17 | KeplerMemory::KeplerMemory(Core::System& system, MemoryManager& memory_manager) | 17 | KeplerMemory::KeplerMemory(Core::System& system, MemoryManager& memory_manager) |
| 18 | : system{system}, memory_manager{memory_manager}, upload_state{memory_manager, regs.upload} {} | 18 | : system{system}, upload_state{memory_manager, regs.upload} {} |
| 19 | 19 | ||
| 20 | KeplerMemory::~KeplerMemory() = default; | 20 | KeplerMemory::~KeplerMemory() = default; |
| 21 | 21 | ||
| @@ -34,7 +34,7 @@ void KeplerMemory::CallMethod(const GPU::MethodCall& method_call) { | |||
| 34 | const bool is_last_call = method_call.IsLastCall(); | 34 | const bool is_last_call = method_call.IsLastCall(); |
| 35 | upload_state.ProcessData(method_call.argument, is_last_call); | 35 | upload_state.ProcessData(method_call.argument, is_last_call); |
| 36 | if (is_last_call) { | 36 | if (is_last_call) { |
| 37 | system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); | 37 | system.GPU().Maxwell3D().dirty.OnMemoryWrite(); |
| 38 | } | 38 | } |
| 39 | break; | 39 | break; |
| 40 | } | 40 | } |
diff --git a/src/video_core/engines/kepler_memory.h b/src/video_core/engines/kepler_memory.h index f3bc675a9..e0e25c321 100644 --- a/src/video_core/engines/kepler_memory.h +++ b/src/video_core/engines/kepler_memory.h | |||
| @@ -65,7 +65,6 @@ public: | |||
| 65 | 65 | ||
| 66 | private: | 66 | private: |
| 67 | Core::System& system; | 67 | Core::System& system; |
| 68 | MemoryManager& memory_manager; | ||
| 69 | Upload::State upload_state; | 68 | Upload::State upload_state; |
| 70 | }; | 69 | }; |
| 71 | 70 | ||
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 8755b8af4..f5158d219 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp | |||
| @@ -22,6 +22,7 @@ Maxwell3D::Maxwell3D(Core::System& system, VideoCore::RasterizerInterface& raste | |||
| 22 | MemoryManager& memory_manager) | 22 | MemoryManager& memory_manager) |
| 23 | : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager}, | 23 | : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager}, |
| 24 | macro_interpreter{*this}, upload_state{memory_manager, regs.upload} { | 24 | macro_interpreter{*this}, upload_state{memory_manager, regs.upload} { |
| 25 | InitDirtySettings(); | ||
| 25 | InitializeRegisterDefaults(); | 26 | InitializeRegisterDefaults(); |
| 26 | } | 27 | } |
| 27 | 28 | ||
| @@ -69,6 +70,10 @@ void Maxwell3D::InitializeRegisterDefaults() { | |||
| 69 | regs.stencil_back_func_mask = 0xFFFFFFFF; | 70 | regs.stencil_back_func_mask = 0xFFFFFFFF; |
| 70 | regs.stencil_back_mask = 0xFFFFFFFF; | 71 | regs.stencil_back_mask = 0xFFFFFFFF; |
| 71 | 72 | ||
| 73 | regs.depth_test_func = Regs::ComparisonOp::Always; | ||
| 74 | regs.cull.front_face = Regs::Cull::FrontFace::CounterClockWise; | ||
| 75 | regs.cull.cull_face = Regs::Cull::CullFace::Back; | ||
| 76 | |||
| 72 | // TODO(Rodrigo): Most games do not set a point size. I think this is a case of a | 77 | // TODO(Rodrigo): Most games do not set a point size. I think this is a case of a |
| 73 | // register carrying a default value. Assume it's OpenGL's default (1). | 78 | // register carrying a default value. Assume it's OpenGL's default (1). |
| 74 | regs.point_size = 1.0f; | 79 | regs.point_size = 1.0f; |
| @@ -86,21 +91,168 @@ void Maxwell3D::InitializeRegisterDefaults() { | |||
| 86 | regs.rt_separate_frag_data = 1; | 91 | regs.rt_separate_frag_data = 1; |
| 87 | } | 92 | } |
| 88 | 93 | ||
| 94 | #define DIRTY_REGS_POS(field_name) (offsetof(Maxwell3D::DirtyRegs, field_name)) | ||
| 95 | |||
| 96 | void Maxwell3D::InitDirtySettings() { | ||
| 97 | const auto set_block = [this](const u32 start, const u32 range, const u8 position) { | ||
| 98 | const auto start_itr = dirty_pointers.begin() + start; | ||
| 99 | const auto end_itr = start_itr + range; | ||
| 100 | std::fill(start_itr, end_itr, position); | ||
| 101 | }; | ||
| 102 | dirty.regs.fill(true); | ||
| 103 | |||
| 104 | // Init Render Targets | ||
| 105 | constexpr u32 registers_per_rt = sizeof(regs.rt[0]) / sizeof(u32); | ||
| 106 | constexpr u32 rt_start_reg = MAXWELL3D_REG_INDEX(rt); | ||
| 107 | constexpr u32 rt_end_reg = rt_start_reg + registers_per_rt * 8; | ||
| 108 | u32 rt_dirty_reg = DIRTY_REGS_POS(render_target); | ||
| 109 | for (u32 rt_reg = rt_start_reg; rt_reg < rt_end_reg; rt_reg += registers_per_rt) { | ||
| 110 | set_block(rt_reg, registers_per_rt, rt_dirty_reg); | ||
| 111 | rt_dirty_reg++; | ||
| 112 | } | ||
| 113 | constexpr u32 depth_buffer_flag = DIRTY_REGS_POS(depth_buffer); | ||
| 114 | dirty_pointers[MAXWELL3D_REG_INDEX(zeta_enable)] = depth_buffer_flag; | ||
| 115 | dirty_pointers[MAXWELL3D_REG_INDEX(zeta_width)] = depth_buffer_flag; | ||
| 116 | dirty_pointers[MAXWELL3D_REG_INDEX(zeta_height)] = depth_buffer_flag; | ||
| 117 | constexpr u32 registers_in_zeta = sizeof(regs.zeta) / sizeof(u32); | ||
| 118 | constexpr u32 zeta_reg = MAXWELL3D_REG_INDEX(zeta); | ||
| 119 | set_block(zeta_reg, registers_in_zeta, depth_buffer_flag); | ||
| 120 | |||
| 121 | // Init Vertex Arrays | ||
| 122 | constexpr u32 vertex_array_start = MAXWELL3D_REG_INDEX(vertex_array); | ||
| 123 | constexpr u32 vertex_array_size = sizeof(regs.vertex_array[0]) / sizeof(u32); | ||
| 124 | constexpr u32 vertex_array_end = vertex_array_start + vertex_array_size * Regs::NumVertexArrays; | ||
| 125 | u32 va_reg = DIRTY_REGS_POS(vertex_array); | ||
| 126 | u32 vi_reg = DIRTY_REGS_POS(vertex_instance); | ||
| 127 | for (u32 vertex_reg = vertex_array_start; vertex_reg < vertex_array_end; | ||
| 128 | vertex_reg += vertex_array_size) { | ||
| 129 | set_block(vertex_reg, 3, va_reg); | ||
| 130 | // The divisor concerns vertex array instances | ||
| 131 | dirty_pointers[vertex_reg + 3] = vi_reg; | ||
| 132 | va_reg++; | ||
| 133 | vi_reg++; | ||
| 134 | } | ||
| 135 | constexpr u32 vertex_limit_start = MAXWELL3D_REG_INDEX(vertex_array_limit); | ||
| 136 | constexpr u32 vertex_limit_size = sizeof(regs.vertex_array_limit[0]) / sizeof(u32); | ||
| 137 | constexpr u32 vertex_limit_end = vertex_limit_start + vertex_limit_size * Regs::NumVertexArrays; | ||
| 138 | va_reg = DIRTY_REGS_POS(vertex_array); | ||
| 139 | for (u32 vertex_reg = vertex_limit_start; vertex_reg < vertex_limit_end; | ||
| 140 | vertex_reg += vertex_limit_size) { | ||
| 141 | set_block(vertex_reg, vertex_limit_size, va_reg); | ||
| 142 | va_reg++; | ||
| 143 | } | ||
| 144 | constexpr u32 vertex_instance_start = MAXWELL3D_REG_INDEX(instanced_arrays); | ||
| 145 | constexpr u32 vertex_instance_size = | ||
| 146 | sizeof(regs.instanced_arrays.is_instanced[0]) / sizeof(u32); | ||
| 147 | constexpr u32 vertex_instance_end = | ||
| 148 | vertex_instance_start + vertex_instance_size * Regs::NumVertexArrays; | ||
| 149 | vi_reg = DIRTY_REGS_POS(vertex_instance); | ||
| 150 | for (u32 vertex_reg = vertex_instance_start; vertex_reg < vertex_instance_end; | ||
| 151 | vertex_reg += vertex_instance_size) { | ||
| 152 | set_block(vertex_reg, vertex_instance_size, vi_reg); | ||
| 153 | vi_reg++; | ||
| 154 | } | ||
| 155 | set_block(MAXWELL3D_REG_INDEX(vertex_attrib_format), regs.vertex_attrib_format.size(), | ||
| 156 | DIRTY_REGS_POS(vertex_attrib_format)); | ||
| 157 | |||
| 158 | // Init Shaders | ||
| 159 | constexpr u32 shader_registers_count = | ||
| 160 | sizeof(regs.shader_config[0]) * Regs::MaxShaderProgram / sizeof(u32); | ||
| 161 | set_block(MAXWELL3D_REG_INDEX(shader_config[0]), shader_registers_count, | ||
| 162 | DIRTY_REGS_POS(shaders)); | ||
| 163 | |||
| 164 | // State | ||
| 165 | |||
| 166 | // Viewport | ||
| 167 | constexpr u32 viewport_dirty_reg = DIRTY_REGS_POS(viewport); | ||
| 168 | constexpr u32 viewport_start = MAXWELL3D_REG_INDEX(viewports); | ||
| 169 | constexpr u32 viewport_size = sizeof(regs.viewports) / sizeof(u32); | ||
| 170 | set_block(viewport_start, viewport_size, viewport_dirty_reg); | ||
| 171 | constexpr u32 view_volume_start = MAXWELL3D_REG_INDEX(view_volume_clip_control); | ||
| 172 | constexpr u32 view_volume_size = sizeof(regs.view_volume_clip_control) / sizeof(u32); | ||
| 173 | set_block(view_volume_start, view_volume_size, viewport_dirty_reg); | ||
| 174 | |||
| 175 | // Viewport transformation | ||
| 176 | constexpr u32 viewport_trans_start = MAXWELL3D_REG_INDEX(viewport_transform); | ||
| 177 | constexpr u32 viewport_trans_size = sizeof(regs.viewport_transform) / sizeof(u32); | ||
| 178 | set_block(viewport_trans_start, viewport_trans_size, DIRTY_REGS_POS(viewport_transform)); | ||
| 179 | |||
| 180 | // Cullmode | ||
| 181 | constexpr u32 cull_mode_start = MAXWELL3D_REG_INDEX(cull); | ||
| 182 | constexpr u32 cull_mode_size = sizeof(regs.cull) / sizeof(u32); | ||
| 183 | set_block(cull_mode_start, cull_mode_size, DIRTY_REGS_POS(cull_mode)); | ||
| 184 | |||
| 185 | // Screen y control | ||
| 186 | dirty_pointers[MAXWELL3D_REG_INDEX(screen_y_control)] = DIRTY_REGS_POS(screen_y_control); | ||
| 187 | |||
| 188 | // Primitive Restart | ||
| 189 | constexpr u32 primitive_restart_start = MAXWELL3D_REG_INDEX(primitive_restart); | ||
| 190 | constexpr u32 primitive_restart_size = sizeof(regs.primitive_restart) / sizeof(u32); | ||
| 191 | set_block(primitive_restart_start, primitive_restart_size, DIRTY_REGS_POS(primitive_restart)); | ||
| 192 | |||
| 193 | // Depth Test | ||
| 194 | constexpr u32 depth_test_dirty_reg = DIRTY_REGS_POS(depth_test); | ||
| 195 | dirty_pointers[MAXWELL3D_REG_INDEX(depth_test_enable)] = depth_test_dirty_reg; | ||
| 196 | dirty_pointers[MAXWELL3D_REG_INDEX(depth_write_enabled)] = depth_test_dirty_reg; | ||
| 197 | dirty_pointers[MAXWELL3D_REG_INDEX(depth_test_func)] = depth_test_dirty_reg; | ||
| 198 | |||
| 199 | // Stencil Test | ||
| 200 | constexpr u32 stencil_test_dirty_reg = DIRTY_REGS_POS(stencil_test); | ||
| 201 | dirty_pointers[MAXWELL3D_REG_INDEX(stencil_enable)] = stencil_test_dirty_reg; | ||
| 202 | dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_func_func)] = stencil_test_dirty_reg; | ||
| 203 | dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_func_ref)] = stencil_test_dirty_reg; | ||
| 204 | dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_func_mask)] = stencil_test_dirty_reg; | ||
| 205 | dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_op_fail)] = stencil_test_dirty_reg; | ||
| 206 | dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_op_zfail)] = stencil_test_dirty_reg; | ||
| 207 | dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_op_zpass)] = stencil_test_dirty_reg; | ||
| 208 | dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_mask)] = stencil_test_dirty_reg; | ||
| 209 | dirty_pointers[MAXWELL3D_REG_INDEX(stencil_two_side_enable)] = stencil_test_dirty_reg; | ||
| 210 | dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_func_func)] = stencil_test_dirty_reg; | ||
| 211 | dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_func_ref)] = stencil_test_dirty_reg; | ||
| 212 | dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_func_mask)] = stencil_test_dirty_reg; | ||
| 213 | dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_op_fail)] = stencil_test_dirty_reg; | ||
| 214 | dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_op_zfail)] = stencil_test_dirty_reg; | ||
| 215 | dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_op_zpass)] = stencil_test_dirty_reg; | ||
| 216 | dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_mask)] = stencil_test_dirty_reg; | ||
| 217 | |||
| 218 | // Color Mask | ||
| 219 | constexpr u32 color_mask_dirty_reg = DIRTY_REGS_POS(color_mask); | ||
| 220 | dirty_pointers[MAXWELL3D_REG_INDEX(color_mask_common)] = color_mask_dirty_reg; | ||
| 221 | set_block(MAXWELL3D_REG_INDEX(color_mask), sizeof(regs.color_mask) / sizeof(u32), | ||
| 222 | color_mask_dirty_reg); | ||
| 223 | // Blend State | ||
| 224 | constexpr u32 blend_state_dirty_reg = DIRTY_REGS_POS(blend_state); | ||
| 225 | set_block(MAXWELL3D_REG_INDEX(blend_color), sizeof(regs.blend_color) / sizeof(u32), | ||
| 226 | blend_state_dirty_reg); | ||
| 227 | dirty_pointers[MAXWELL3D_REG_INDEX(independent_blend_enable)] = blend_state_dirty_reg; | ||
| 228 | set_block(MAXWELL3D_REG_INDEX(blend), sizeof(regs.blend) / sizeof(u32), blend_state_dirty_reg); | ||
| 229 | set_block(MAXWELL3D_REG_INDEX(independent_blend), sizeof(regs.independent_blend) / sizeof(u32), | ||
| 230 | blend_state_dirty_reg); | ||
| 231 | |||
| 232 | // Scissor State | ||
| 233 | constexpr u32 scissor_test_dirty_reg = DIRTY_REGS_POS(scissor_test); | ||
| 234 | set_block(MAXWELL3D_REG_INDEX(scissor_test), sizeof(regs.scissor_test) / sizeof(u32), | ||
| 235 | scissor_test_dirty_reg); | ||
| 236 | |||
| 237 | // Polygon Offset | ||
| 238 | constexpr u32 polygon_offset_dirty_reg = DIRTY_REGS_POS(polygon_offset); | ||
| 239 | dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_fill_enable)] = polygon_offset_dirty_reg; | ||
| 240 | dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_line_enable)] = polygon_offset_dirty_reg; | ||
| 241 | dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_point_enable)] = polygon_offset_dirty_reg; | ||
| 242 | dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_units)] = polygon_offset_dirty_reg; | ||
| 243 | dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_factor)] = polygon_offset_dirty_reg; | ||
| 244 | dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_clamp)] = polygon_offset_dirty_reg; | ||
| 245 | } | ||
| 246 | |||
| 89 | void Maxwell3D::CallMacroMethod(u32 method, std::vector<u32> parameters) { | 247 | void Maxwell3D::CallMacroMethod(u32 method, std::vector<u32> parameters) { |
| 90 | // Reset the current macro. | 248 | // Reset the current macro. |
| 91 | executing_macro = 0; | 249 | executing_macro = 0; |
| 92 | 250 | ||
| 93 | // Lookup the macro offset | 251 | // Lookup the macro offset |
| 94 | const u32 entry{(method - MacroRegistersStart) >> 1}; | 252 | const u32 entry = ((method - MacroRegistersStart) >> 1) % macro_positions.size(); |
| 95 | const auto& search{macro_offsets.find(entry)}; | ||
| 96 | if (search == macro_offsets.end()) { | ||
| 97 | LOG_CRITICAL(HW_GPU, "macro not found for method 0x{:X}!", method); | ||
| 98 | UNREACHABLE(); | ||
| 99 | return; | ||
| 100 | } | ||
| 101 | 253 | ||
| 102 | // Execute the current macro. | 254 | // Execute the current macro. |
| 103 | macro_interpreter.Execute(search->second, std::move(parameters)); | 255 | macro_interpreter.Execute(macro_positions[entry], std::move(parameters)); |
| 104 | } | 256 | } |
| 105 | 257 | ||
| 106 | void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) { | 258 | void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) { |
| @@ -108,6 +260,14 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) { | |||
| 108 | 260 | ||
| 109 | const u32 method = method_call.method; | 261 | const u32 method = method_call.method; |
| 110 | 262 | ||
| 263 | if (method == cb_data_state.current) { | ||
| 264 | regs.reg_array[method] = method_call.argument; | ||
| 265 | ProcessCBData(method_call.argument); | ||
| 266 | return; | ||
| 267 | } else if (cb_data_state.current != null_cb_data) { | ||
| 268 | FinishCBData(); | ||
| 269 | } | ||
| 270 | |||
| 111 | // It is an error to write to a register other than the current macro's ARG register before it | 271 | // It is an error to write to a register other than the current macro's ARG register before it |
| 112 | // has finished execution. | 272 | // has finished execution. |
| 113 | if (executing_macro != 0) { | 273 | if (executing_macro != 0) { |
| @@ -143,49 +303,19 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) { | |||
| 143 | 303 | ||
| 144 | if (regs.reg_array[method] != method_call.argument) { | 304 | if (regs.reg_array[method] != method_call.argument) { |
| 145 | regs.reg_array[method] = method_call.argument; | 305 | regs.reg_array[method] = method_call.argument; |
| 146 | // Color buffers | 306 | const std::size_t dirty_reg = dirty_pointers[method]; |
| 147 | constexpr u32 first_rt_reg = MAXWELL3D_REG_INDEX(rt); | 307 | if (dirty_reg) { |
| 148 | constexpr u32 registers_per_rt = sizeof(regs.rt[0]) / sizeof(u32); | 308 | dirty.regs[dirty_reg] = true; |
| 149 | if (method >= first_rt_reg && | 309 | if (dirty_reg >= DIRTY_REGS_POS(vertex_array) && |
| 150 | method < first_rt_reg + registers_per_rt * Regs::NumRenderTargets) { | 310 | dirty_reg < DIRTY_REGS_POS(vertex_array_buffers)) { |
| 151 | const std::size_t rt_index = (method - first_rt_reg) / registers_per_rt; | 311 | dirty.vertex_array_buffers = true; |
| 152 | dirty_flags.color_buffer.set(rt_index); | 312 | } else if (dirty_reg >= DIRTY_REGS_POS(vertex_instance) && |
| 153 | } | 313 | dirty_reg < DIRTY_REGS_POS(vertex_instances)) { |
| 154 | 314 | dirty.vertex_instances = true; | |
| 155 | // Zeta buffer | 315 | } else if (dirty_reg >= DIRTY_REGS_POS(render_target) && |
| 156 | constexpr u32 registers_in_zeta = sizeof(regs.zeta) / sizeof(u32); | 316 | dirty_reg < DIRTY_REGS_POS(render_settings)) { |
| 157 | if (method == MAXWELL3D_REG_INDEX(zeta_enable) || | 317 | dirty.render_settings = true; |
| 158 | method == MAXWELL3D_REG_INDEX(zeta_width) || | 318 | } |
| 159 | method == MAXWELL3D_REG_INDEX(zeta_height) || | ||
| 160 | (method >= MAXWELL3D_REG_INDEX(zeta) && | ||
| 161 | method < MAXWELL3D_REG_INDEX(zeta) + registers_in_zeta)) { | ||
| 162 | dirty_flags.zeta_buffer = true; | ||
| 163 | } | ||
| 164 | |||
| 165 | // Shader | ||
| 166 | constexpr u32 shader_registers_count = | ||
| 167 | sizeof(regs.shader_config[0]) * Regs::MaxShaderProgram / sizeof(u32); | ||
| 168 | if (method >= MAXWELL3D_REG_INDEX(shader_config[0]) && | ||
| 169 | method < MAXWELL3D_REG_INDEX(shader_config[0]) + shader_registers_count) { | ||
| 170 | dirty_flags.shaders = true; | ||
| 171 | } | ||
| 172 | |||
| 173 | // Vertex format | ||
| 174 | if (method >= MAXWELL3D_REG_INDEX(vertex_attrib_format) && | ||
| 175 | method < MAXWELL3D_REG_INDEX(vertex_attrib_format) + regs.vertex_attrib_format.size()) { | ||
| 176 | dirty_flags.vertex_attrib_format = true; | ||
| 177 | } | ||
| 178 | |||
| 179 | // Vertex buffer | ||
| 180 | if (method >= MAXWELL3D_REG_INDEX(vertex_array) && | ||
| 181 | method < MAXWELL3D_REG_INDEX(vertex_array) + 4 * Regs::NumVertexArrays) { | ||
| 182 | dirty_flags.vertex_array.set((method - MAXWELL3D_REG_INDEX(vertex_array)) >> 2); | ||
| 183 | } else if (method >= MAXWELL3D_REG_INDEX(vertex_array_limit) && | ||
| 184 | method < MAXWELL3D_REG_INDEX(vertex_array_limit) + 2 * Regs::NumVertexArrays) { | ||
| 185 | dirty_flags.vertex_array.set((method - MAXWELL3D_REG_INDEX(vertex_array_limit)) >> 1); | ||
| 186 | } else if (method >= MAXWELL3D_REG_INDEX(instanced_arrays) && | ||
| 187 | method < MAXWELL3D_REG_INDEX(instanced_arrays) + Regs::NumVertexArrays) { | ||
| 188 | dirty_flags.vertex_array.set(method - MAXWELL3D_REG_INDEX(instanced_arrays)); | ||
| 189 | } | 319 | } |
| 190 | } | 320 | } |
| 191 | 321 | ||
| @@ -214,7 +344,7 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) { | |||
| 214 | case MAXWELL3D_REG_INDEX(const_buffer.cb_data[13]): | 344 | case MAXWELL3D_REG_INDEX(const_buffer.cb_data[13]): |
| 215 | case MAXWELL3D_REG_INDEX(const_buffer.cb_data[14]): | 345 | case MAXWELL3D_REG_INDEX(const_buffer.cb_data[14]): |
| 216 | case MAXWELL3D_REG_INDEX(const_buffer.cb_data[15]): { | 346 | case MAXWELL3D_REG_INDEX(const_buffer.cb_data[15]): { |
| 217 | ProcessCBData(method_call.argument); | 347 | StartCBData(method); |
| 218 | break; | 348 | break; |
| 219 | } | 349 | } |
| 220 | case MAXWELL3D_REG_INDEX(cb_bind[0].raw_config): { | 350 | case MAXWELL3D_REG_INDEX(cb_bind[0].raw_config): { |
| @@ -249,6 +379,10 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) { | |||
| 249 | ProcessQueryGet(); | 379 | ProcessQueryGet(); |
| 250 | break; | 380 | break; |
| 251 | } | 381 | } |
| 382 | case MAXWELL3D_REG_INDEX(condition.mode): { | ||
| 383 | ProcessQueryCondition(); | ||
| 384 | break; | ||
| 385 | } | ||
| 252 | case MAXWELL3D_REG_INDEX(sync_info): { | 386 | case MAXWELL3D_REG_INDEX(sync_info): { |
| 253 | ProcessSyncPoint(); | 387 | ProcessSyncPoint(); |
| 254 | break; | 388 | break; |
| @@ -261,7 +395,7 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) { | |||
| 261 | const bool is_last_call = method_call.IsLastCall(); | 395 | const bool is_last_call = method_call.IsLastCall(); |
| 262 | upload_state.ProcessData(method_call.argument, is_last_call); | 396 | upload_state.ProcessData(method_call.argument, is_last_call); |
| 263 | if (is_last_call) { | 397 | if (is_last_call) { |
| 264 | dirty_flags.OnMemoryWrite(); | 398 | dirty.OnMemoryWrite(); |
| 265 | } | 399 | } |
| 266 | break; | 400 | break; |
| 267 | } | 401 | } |
| @@ -281,7 +415,7 @@ void Maxwell3D::ProcessMacroUpload(u32 data) { | |||
| 281 | } | 415 | } |
| 282 | 416 | ||
| 283 | void Maxwell3D::ProcessMacroBind(u32 data) { | 417 | void Maxwell3D::ProcessMacroBind(u32 data) { |
| 284 | macro_offsets[regs.macros.entry] = data; | 418 | macro_positions[regs.macros.entry++] = data; |
| 285 | } | 419 | } |
| 286 | 420 | ||
| 287 | void Maxwell3D::ProcessQueryGet() { | 421 | void Maxwell3D::ProcessQueryGet() { |
| @@ -302,6 +436,7 @@ void Maxwell3D::ProcessQueryGet() { | |||
| 302 | result = regs.query.query_sequence; | 436 | result = regs.query.query_sequence; |
| 303 | break; | 437 | break; |
| 304 | default: | 438 | default: |
| 439 | result = 1; | ||
| 305 | UNIMPLEMENTED_MSG("Unimplemented query select type {}", | 440 | UNIMPLEMENTED_MSG("Unimplemented query select type {}", |
| 306 | static_cast<u32>(regs.query.query_get.select.Value())); | 441 | static_cast<u32>(regs.query.query_get.select.Value())); |
| 307 | } | 442 | } |
| @@ -333,7 +468,6 @@ void Maxwell3D::ProcessQueryGet() { | |||
| 333 | query_result.timestamp = system.CoreTiming().GetTicks(); | 468 | query_result.timestamp = system.CoreTiming().GetTicks(); |
| 334 | memory_manager.WriteBlock(sequence_address, &query_result, sizeof(query_result)); | 469 | memory_manager.WriteBlock(sequence_address, &query_result, sizeof(query_result)); |
| 335 | } | 470 | } |
| 336 | dirty_flags.OnMemoryWrite(); | ||
| 337 | break; | 471 | break; |
| 338 | } | 472 | } |
| 339 | default: | 473 | default: |
| @@ -342,12 +476,52 @@ void Maxwell3D::ProcessQueryGet() { | |||
| 342 | } | 476 | } |
| 343 | } | 477 | } |
| 344 | 478 | ||
| 479 | void Maxwell3D::ProcessQueryCondition() { | ||
| 480 | const GPUVAddr condition_address{regs.condition.Address()}; | ||
| 481 | switch (regs.condition.mode) { | ||
| 482 | case Regs::ConditionMode::Always: { | ||
| 483 | execute_on = true; | ||
| 484 | break; | ||
| 485 | } | ||
| 486 | case Regs::ConditionMode::Never: { | ||
| 487 | execute_on = false; | ||
| 488 | break; | ||
| 489 | } | ||
| 490 | case Regs::ConditionMode::ResNonZero: { | ||
| 491 | Regs::QueryCompare cmp; | ||
| 492 | memory_manager.ReadBlockUnsafe(condition_address, &cmp, sizeof(cmp)); | ||
| 493 | execute_on = cmp.initial_sequence != 0U && cmp.initial_mode != 0U; | ||
| 494 | break; | ||
| 495 | } | ||
| 496 | case Regs::ConditionMode::Equal: { | ||
| 497 | Regs::QueryCompare cmp; | ||
| 498 | memory_manager.ReadBlockUnsafe(condition_address, &cmp, sizeof(cmp)); | ||
| 499 | execute_on = | ||
| 500 | cmp.initial_sequence == cmp.current_sequence && cmp.initial_mode == cmp.current_mode; | ||
| 501 | break; | ||
| 502 | } | ||
| 503 | case Regs::ConditionMode::NotEqual: { | ||
| 504 | Regs::QueryCompare cmp; | ||
| 505 | memory_manager.ReadBlockUnsafe(condition_address, &cmp, sizeof(cmp)); | ||
| 506 | execute_on = | ||
| 507 | cmp.initial_sequence != cmp.current_sequence || cmp.initial_mode != cmp.current_mode; | ||
| 508 | break; | ||
| 509 | } | ||
| 510 | default: { | ||
| 511 | UNIMPLEMENTED_MSG("Uninplemented Condition Mode!"); | ||
| 512 | execute_on = true; | ||
| 513 | break; | ||
| 514 | } | ||
| 515 | } | ||
| 516 | } | ||
| 517 | |||
| 345 | void Maxwell3D::ProcessSyncPoint() { | 518 | void Maxwell3D::ProcessSyncPoint() { |
| 346 | const u32 sync_point = regs.sync_info.sync_point.Value(); | 519 | const u32 sync_point = regs.sync_info.sync_point.Value(); |
| 347 | const u32 increment = regs.sync_info.increment.Value(); | 520 | const u32 increment = regs.sync_info.increment.Value(); |
| 348 | const u32 cache_flush = regs.sync_info.unknown.Value(); | 521 | [[maybe_unused]] const u32 cache_flush = regs.sync_info.unknown.Value(); |
| 349 | LOG_DEBUG(HW_GPU, "Syncpoint set {}, increment: {}, unk: {}", sync_point, increment, | 522 | if (increment) { |
| 350 | cache_flush); | 523 | system.GPU().IncrementSyncPoint(sync_point); |
| 524 | } | ||
| 351 | } | 525 | } |
| 352 | 526 | ||
| 353 | void Maxwell3D::DrawArrays() { | 527 | void Maxwell3D::DrawArrays() { |
| @@ -405,23 +579,39 @@ void Maxwell3D::ProcessCBBind(Regs::ShaderStage stage) { | |||
| 405 | } | 579 | } |
| 406 | 580 | ||
| 407 | void Maxwell3D::ProcessCBData(u32 value) { | 581 | void Maxwell3D::ProcessCBData(u32 value) { |
| 582 | const u32 id = cb_data_state.id; | ||
| 583 | cb_data_state.buffer[id][cb_data_state.counter] = value; | ||
| 584 | // Increment the current buffer position. | ||
| 585 | regs.const_buffer.cb_pos = regs.const_buffer.cb_pos + 4; | ||
| 586 | cb_data_state.counter++; | ||
| 587 | } | ||
| 588 | |||
| 589 | void Maxwell3D::StartCBData(u32 method) { | ||
| 590 | constexpr u32 first_cb_data = MAXWELL3D_REG_INDEX(const_buffer.cb_data[0]); | ||
| 591 | cb_data_state.start_pos = regs.const_buffer.cb_pos; | ||
| 592 | cb_data_state.id = method - first_cb_data; | ||
| 593 | cb_data_state.current = method; | ||
| 594 | cb_data_state.counter = 0; | ||
| 595 | ProcessCBData(regs.const_buffer.cb_data[cb_data_state.id]); | ||
| 596 | } | ||
| 597 | |||
| 598 | void Maxwell3D::FinishCBData() { | ||
| 408 | // Write the input value to the current const buffer at the current position. | 599 | // Write the input value to the current const buffer at the current position. |
| 409 | const GPUVAddr buffer_address = regs.const_buffer.BufferAddress(); | 600 | const GPUVAddr buffer_address = regs.const_buffer.BufferAddress(); |
| 410 | ASSERT(buffer_address != 0); | 601 | ASSERT(buffer_address != 0); |
| 411 | 602 | ||
| 412 | // Don't allow writing past the end of the buffer. | 603 | // Don't allow writing past the end of the buffer. |
| 413 | ASSERT(regs.const_buffer.cb_pos + sizeof(u32) <= regs.const_buffer.cb_size); | 604 | ASSERT(regs.const_buffer.cb_pos <= regs.const_buffer.cb_size); |
| 414 | 605 | ||
| 415 | const GPUVAddr address{buffer_address + regs.const_buffer.cb_pos}; | 606 | const GPUVAddr address{buffer_address + cb_data_state.start_pos}; |
| 607 | const std::size_t size = regs.const_buffer.cb_pos - cb_data_state.start_pos; | ||
| 416 | 608 | ||
| 417 | u8* ptr{memory_manager.GetPointer(address)}; | 609 | const u32 id = cb_data_state.id; |
| 418 | rasterizer.InvalidateRegion(ToCacheAddr(ptr), sizeof(u32)); | 610 | memory_manager.WriteBlock(address, cb_data_state.buffer[id].data(), size); |
| 419 | memory_manager.Write<u32>(address, value); | 611 | dirty.OnMemoryWrite(); |
| 420 | 612 | ||
| 421 | dirty_flags.OnMemoryWrite(); | 613 | cb_data_state.id = null_cb_data; |
| 422 | 614 | cb_data_state.current = null_cb_data; | |
| 423 | // Increment the current buffer position. | ||
| 424 | regs.const_buffer.cb_pos = regs.const_buffer.cb_pos + 4; | ||
| 425 | } | 615 | } |
| 426 | 616 | ||
| 427 | Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const { | 617 | Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const { |
| @@ -430,10 +620,10 @@ Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const { | |||
| 430 | Texture::TICEntry tic_entry; | 620 | Texture::TICEntry tic_entry; |
| 431 | memory_manager.ReadBlockUnsafe(tic_address_gpu, &tic_entry, sizeof(Texture::TICEntry)); | 621 | memory_manager.ReadBlockUnsafe(tic_address_gpu, &tic_entry, sizeof(Texture::TICEntry)); |
| 432 | 622 | ||
| 433 | const auto r_type{tic_entry.r_type.Value()}; | 623 | [[maybe_unused]] const auto r_type{tic_entry.r_type.Value()}; |
| 434 | const auto g_type{tic_entry.g_type.Value()}; | 624 | [[maybe_unused]] const auto g_type{tic_entry.g_type.Value()}; |
| 435 | const auto b_type{tic_entry.b_type.Value()}; | 625 | [[maybe_unused]] const auto b_type{tic_entry.b_type.Value()}; |
| 436 | const auto a_type{tic_entry.a_type.Value()}; | 626 | [[maybe_unused]] const auto a_type{tic_entry.a_type.Value()}; |
| 437 | 627 | ||
| 438 | // TODO(Subv): Different data types for separate components are not supported | 628 | // TODO(Subv): Different data types for separate components are not supported |
| 439 | DEBUG_ASSERT(r_type == g_type && r_type == b_type && r_type == a_type); | 629 | DEBUG_ASSERT(r_type == g_type && r_type == b_type && r_type == a_type); |
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 13e314944..0184342a0 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h | |||
| @@ -67,6 +67,7 @@ public: | |||
| 67 | static constexpr std::size_t MaxShaderStage = 5; | 67 | static constexpr std::size_t MaxShaderStage = 5; |
| 68 | // Maximum number of const buffers per shader stage. | 68 | // Maximum number of const buffers per shader stage. |
| 69 | static constexpr std::size_t MaxConstBuffers = 18; | 69 | static constexpr std::size_t MaxConstBuffers = 18; |
| 70 | static constexpr std::size_t MaxConstBufferSize = 0x10000; | ||
| 70 | 71 | ||
| 71 | enum class QueryMode : u32 { | 72 | enum class QueryMode : u32 { |
| 72 | Write = 0, | 73 | Write = 0, |
| @@ -89,6 +90,20 @@ public: | |||
| 89 | 90 | ||
| 90 | enum class QuerySelect : u32 { | 91 | enum class QuerySelect : u32 { |
| 91 | Zero = 0, | 92 | Zero = 0, |
| 93 | TimeElapsed = 2, | ||
| 94 | TransformFeedbackPrimitivesGenerated = 11, | ||
| 95 | PrimitivesGenerated = 18, | ||
| 96 | SamplesPassed = 21, | ||
| 97 | TransformFeedbackUnknown = 26, | ||
| 98 | }; | ||
| 99 | |||
| 100 | struct QueryCompare { | ||
| 101 | u32 initial_sequence; | ||
| 102 | u32 initial_mode; | ||
| 103 | u32 unknown1; | ||
| 104 | u32 unknown2; | ||
| 105 | u32 current_sequence; | ||
| 106 | u32 current_mode; | ||
| 92 | }; | 107 | }; |
| 93 | 108 | ||
| 94 | enum class QuerySyncCondition : u32 { | 109 | enum class QuerySyncCondition : u32 { |
| @@ -96,6 +111,14 @@ public: | |||
| 96 | GreaterThan = 1, | 111 | GreaterThan = 1, |
| 97 | }; | 112 | }; |
| 98 | 113 | ||
| 114 | enum class ConditionMode : u32 { | ||
| 115 | Never = 0, | ||
| 116 | Always = 1, | ||
| 117 | ResNonZero = 2, | ||
| 118 | Equal = 3, | ||
| 119 | NotEqual = 4, | ||
| 120 | }; | ||
| 121 | |||
| 99 | enum class ShaderProgram : u32 { | 122 | enum class ShaderProgram : u32 { |
| 100 | VertexA = 0, | 123 | VertexA = 0, |
| 101 | VertexB = 1, | 124 | VertexB = 1, |
| @@ -814,7 +837,18 @@ public: | |||
| 814 | BitField<4, 1, u32> alpha_to_one; | 837 | BitField<4, 1, u32> alpha_to_one; |
| 815 | } multisample_control; | 838 | } multisample_control; |
| 816 | 839 | ||
| 817 | INSERT_PADDING_WORDS(0x7); | 840 | INSERT_PADDING_WORDS(0x4); |
| 841 | |||
| 842 | struct { | ||
| 843 | u32 address_high; | ||
| 844 | u32 address_low; | ||
| 845 | ConditionMode mode; | ||
| 846 | |||
| 847 | GPUVAddr Address() const { | ||
| 848 | return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | | ||
| 849 | address_low); | ||
| 850 | } | ||
| 851 | } condition; | ||
| 818 | 852 | ||
| 819 | struct { | 853 | struct { |
| 820 | u32 tsc_address_high; | 854 | u32 tsc_address_high; |
| @@ -1123,23 +1157,77 @@ public: | |||
| 1123 | 1157 | ||
| 1124 | State state{}; | 1158 | State state{}; |
| 1125 | 1159 | ||
| 1126 | struct DirtyFlags { | 1160 | struct DirtyRegs { |
| 1127 | std::bitset<8> color_buffer{0xFF}; | 1161 | static constexpr std::size_t NUM_REGS = 256; |
| 1128 | std::bitset<32> vertex_array{0xFFFFFFFF}; | 1162 | union { |
| 1163 | struct { | ||
| 1164 | bool null_dirty; | ||
| 1165 | |||
| 1166 | // Vertex Attributes | ||
| 1167 | bool vertex_attrib_format; | ||
| 1168 | |||
| 1169 | // Vertex Arrays | ||
| 1170 | std::array<bool, 32> vertex_array; | ||
| 1171 | |||
| 1172 | bool vertex_array_buffers; | ||
| 1173 | |||
| 1174 | // Vertex Instances | ||
| 1175 | std::array<bool, 32> vertex_instance; | ||
| 1129 | 1176 | ||
| 1130 | bool vertex_attrib_format = true; | 1177 | bool vertex_instances; |
| 1131 | bool zeta_buffer = true; | 1178 | |
| 1132 | bool shaders = true; | 1179 | // Render Targets |
| 1180 | std::array<bool, 8> render_target; | ||
| 1181 | bool depth_buffer; | ||
| 1182 | |||
| 1183 | bool render_settings; | ||
| 1184 | |||
| 1185 | // Shaders | ||
| 1186 | bool shaders; | ||
| 1187 | |||
| 1188 | // Rasterizer State | ||
| 1189 | bool viewport; | ||
| 1190 | bool clip_coefficient; | ||
| 1191 | bool cull_mode; | ||
| 1192 | bool primitive_restart; | ||
| 1193 | bool depth_test; | ||
| 1194 | bool stencil_test; | ||
| 1195 | bool blend_state; | ||
| 1196 | bool scissor_test; | ||
| 1197 | bool transform_feedback; | ||
| 1198 | bool color_mask; | ||
| 1199 | bool polygon_offset; | ||
| 1200 | |||
| 1201 | // Complementary | ||
| 1202 | bool viewport_transform; | ||
| 1203 | bool screen_y_control; | ||
| 1204 | |||
| 1205 | bool memory_general; | ||
| 1206 | }; | ||
| 1207 | std::array<bool, NUM_REGS> regs; | ||
| 1208 | }; | ||
| 1209 | |||
| 1210 | void ResetVertexArrays() { | ||
| 1211 | vertex_array.fill(true); | ||
| 1212 | vertex_array_buffers = true; | ||
| 1213 | } | ||
| 1214 | |||
| 1215 | void ResetRenderTargets() { | ||
| 1216 | depth_buffer = true; | ||
| 1217 | render_target.fill(true); | ||
| 1218 | render_settings = true; | ||
| 1219 | } | ||
| 1133 | 1220 | ||
| 1134 | void OnMemoryWrite() { | 1221 | void OnMemoryWrite() { |
| 1135 | zeta_buffer = true; | ||
| 1136 | shaders = true; | 1222 | shaders = true; |
| 1137 | color_buffer.set(); | 1223 | memory_general = true; |
| 1138 | vertex_array.set(); | 1224 | ResetRenderTargets(); |
| 1225 | ResetVertexArrays(); | ||
| 1139 | } | 1226 | } |
| 1140 | }; | ||
| 1141 | 1227 | ||
| 1142 | DirtyFlags dirty_flags; | 1228 | } dirty{}; |
| 1229 | |||
| 1230 | std::array<u8, Regs::NUM_REGS> dirty_pointers{}; | ||
| 1143 | 1231 | ||
| 1144 | /// Reads a register value located at the input method address | 1232 | /// Reads a register value located at the input method address |
| 1145 | u32 GetRegisterValue(u32 method) const; | 1233 | u32 GetRegisterValue(u32 method) const; |
| @@ -1168,6 +1256,10 @@ public: | |||
| 1168 | return macro_memory; | 1256 | return macro_memory; |
| 1169 | } | 1257 | } |
| 1170 | 1258 | ||
| 1259 | bool ShouldExecute() const { | ||
| 1260 | return execute_on; | ||
| 1261 | } | ||
| 1262 | |||
| 1171 | private: | 1263 | private: |
| 1172 | void InitializeRegisterDefaults(); | 1264 | void InitializeRegisterDefaults(); |
| 1173 | 1265 | ||
| @@ -1178,7 +1270,7 @@ private: | |||
| 1178 | MemoryManager& memory_manager; | 1270 | MemoryManager& memory_manager; |
| 1179 | 1271 | ||
| 1180 | /// Start offsets of each macro in macro_memory | 1272 | /// Start offsets of each macro in macro_memory |
| 1181 | std::unordered_map<u32, u32> macro_offsets; | 1273 | std::array<u32, 0x80> macro_positions = {}; |
| 1182 | 1274 | ||
| 1183 | /// Memory for macro code | 1275 | /// Memory for macro code |
| 1184 | MacroMemory macro_memory; | 1276 | MacroMemory macro_memory; |
| @@ -1191,14 +1283,27 @@ private: | |||
| 1191 | /// Interpreter for the macro codes uploaded to the GPU. | 1283 | /// Interpreter for the macro codes uploaded to the GPU. |
| 1192 | MacroInterpreter macro_interpreter; | 1284 | MacroInterpreter macro_interpreter; |
| 1193 | 1285 | ||
| 1286 | static constexpr u32 null_cb_data = 0xFFFFFFFF; | ||
| 1287 | struct { | ||
| 1288 | std::array<std::array<u32, 0x4000>, 16> buffer; | ||
| 1289 | u32 current{null_cb_data}; | ||
| 1290 | u32 id{null_cb_data}; | ||
| 1291 | u32 start_pos{}; | ||
| 1292 | u32 counter{}; | ||
| 1293 | } cb_data_state; | ||
| 1294 | |||
| 1194 | Upload::State upload_state; | 1295 | Upload::State upload_state; |
| 1195 | 1296 | ||
| 1297 | bool execute_on{true}; | ||
| 1298 | |||
| 1196 | /// Retrieves information about a specific TIC entry from the TIC buffer. | 1299 | /// Retrieves information about a specific TIC entry from the TIC buffer. |
| 1197 | Texture::TICEntry GetTICEntry(u32 tic_index) const; | 1300 | Texture::TICEntry GetTICEntry(u32 tic_index) const; |
| 1198 | 1301 | ||
| 1199 | /// Retrieves information about a specific TSC entry from the TSC buffer. | 1302 | /// Retrieves information about a specific TSC entry from the TSC buffer. |
| 1200 | Texture::TSCEntry GetTSCEntry(u32 tsc_index) const; | 1303 | Texture::TSCEntry GetTSCEntry(u32 tsc_index) const; |
| 1201 | 1304 | ||
| 1305 | void InitDirtySettings(); | ||
| 1306 | |||
| 1202 | /** | 1307 | /** |
| 1203 | * Call a macro on this engine. | 1308 | * Call a macro on this engine. |
| 1204 | * @param method Method to call | 1309 | * @param method Method to call |
| @@ -1218,11 +1323,16 @@ private: | |||
| 1218 | /// Handles a write to the QUERY_GET register. | 1323 | /// Handles a write to the QUERY_GET register. |
| 1219 | void ProcessQueryGet(); | 1324 | void ProcessQueryGet(); |
| 1220 | 1325 | ||
| 1326 | // Handles Conditional Rendering | ||
| 1327 | void ProcessQueryCondition(); | ||
| 1328 | |||
| 1221 | /// Handles writes to syncing register. | 1329 | /// Handles writes to syncing register. |
| 1222 | void ProcessSyncPoint(); | 1330 | void ProcessSyncPoint(); |
| 1223 | 1331 | ||
| 1224 | /// Handles a write to the CB_DATA[i] register. | 1332 | /// Handles a write to the CB_DATA[i] register. |
| 1333 | void StartCBData(u32 method); | ||
| 1225 | void ProcessCBData(u32 value); | 1334 | void ProcessCBData(u32 value); |
| 1335 | void FinishCBData(); | ||
| 1226 | 1336 | ||
| 1227 | /// Handles a write to the CB_BIND register. | 1337 | /// Handles a write to the CB_BIND register. |
| 1228 | void ProcessCBBind(Regs::ShaderStage stage); | 1338 | void ProcessCBBind(Regs::ShaderStage stage); |
| @@ -1289,6 +1399,7 @@ ASSERT_REG_POSITION(clip_distance_enabled, 0x544); | |||
| 1289 | ASSERT_REG_POSITION(point_size, 0x546); | 1399 | ASSERT_REG_POSITION(point_size, 0x546); |
| 1290 | ASSERT_REG_POSITION(zeta_enable, 0x54E); | 1400 | ASSERT_REG_POSITION(zeta_enable, 0x54E); |
| 1291 | ASSERT_REG_POSITION(multisample_control, 0x54F); | 1401 | ASSERT_REG_POSITION(multisample_control, 0x54F); |
| 1402 | ASSERT_REG_POSITION(condition, 0x554); | ||
| 1292 | ASSERT_REG_POSITION(tsc, 0x557); | 1403 | ASSERT_REG_POSITION(tsc, 0x557); |
| 1293 | ASSERT_REG_POSITION(polygon_offset_factor, 0x55b); | 1404 | ASSERT_REG_POSITION(polygon_offset_factor, 0x55b); |
| 1294 | ASSERT_REG_POSITION(tic, 0x55D); | 1405 | ASSERT_REG_POSITION(tic, 0x55D); |
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index afb9578d0..ad8453c5f 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp | |||
| @@ -5,18 +5,17 @@ | |||
| 5 | #include "common/assert.h" | 5 | #include "common/assert.h" |
| 6 | #include "common/logging/log.h" | 6 | #include "common/logging/log.h" |
| 7 | #include "core/core.h" | 7 | #include "core/core.h" |
| 8 | #include "core/settings.h" | ||
| 8 | #include "video_core/engines/maxwell_3d.h" | 9 | #include "video_core/engines/maxwell_3d.h" |
| 9 | #include "video_core/engines/maxwell_dma.h" | 10 | #include "video_core/engines/maxwell_dma.h" |
| 10 | #include "video_core/memory_manager.h" | 11 | #include "video_core/memory_manager.h" |
| 11 | #include "video_core/rasterizer_interface.h" | ||
| 12 | #include "video_core/renderer_base.h" | 12 | #include "video_core/renderer_base.h" |
| 13 | #include "video_core/textures/decoders.h" | 13 | #include "video_core/textures/decoders.h" |
| 14 | 14 | ||
| 15 | namespace Tegra::Engines { | 15 | namespace Tegra::Engines { |
| 16 | 16 | ||
| 17 | MaxwellDMA::MaxwellDMA(Core::System& system, VideoCore::RasterizerInterface& rasterizer, | 17 | MaxwellDMA::MaxwellDMA(Core::System& system, MemoryManager& memory_manager) |
| 18 | MemoryManager& memory_manager) | 18 | : system{system}, memory_manager{memory_manager} {} |
| 19 | : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager} {} | ||
| 20 | 19 | ||
| 21 | void MaxwellDMA::CallMethod(const GPU::MethodCall& method_call) { | 20 | void MaxwellDMA::CallMethod(const GPU::MethodCall& method_call) { |
| 22 | ASSERT_MSG(method_call.method < Regs::NUM_REGS, | 21 | ASSERT_MSG(method_call.method < Regs::NUM_REGS, |
| @@ -38,7 +37,7 @@ void MaxwellDMA::CallMethod(const GPU::MethodCall& method_call) { | |||
| 38 | } | 37 | } |
| 39 | 38 | ||
| 40 | void MaxwellDMA::HandleCopy() { | 39 | void MaxwellDMA::HandleCopy() { |
| 41 | LOG_WARNING(HW_GPU, "Requested a DMA copy"); | 40 | LOG_TRACE(HW_GPU, "Requested a DMA copy"); |
| 42 | 41 | ||
| 43 | const GPUVAddr source = regs.src_address.Address(); | 42 | const GPUVAddr source = regs.src_address.Address(); |
| 44 | const GPUVAddr dest = regs.dst_address.Address(); | 43 | const GPUVAddr dest = regs.dst_address.Address(); |
| @@ -58,7 +57,7 @@ void MaxwellDMA::HandleCopy() { | |||
| 58 | } | 57 | } |
| 59 | 58 | ||
| 60 | // All copies here update the main memory, so mark all rasterizer states as invalid. | 59 | // All copies here update the main memory, so mark all rasterizer states as invalid. |
| 61 | system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); | 60 | system.GPU().Maxwell3D().dirty.OnMemoryWrite(); |
| 62 | 61 | ||
| 63 | if (regs.exec.is_dst_linear && regs.exec.is_src_linear) { | 62 | if (regs.exec.is_dst_linear && regs.exec.is_src_linear) { |
| 64 | // When the enable_2d bit is disabled, the copy is performed as if we were copying a 1D | 63 | // When the enable_2d bit is disabled, the copy is performed as if we were copying a 1D |
| @@ -84,13 +83,17 @@ void MaxwellDMA::HandleCopy() { | |||
| 84 | ASSERT(regs.exec.enable_2d == 1); | 83 | ASSERT(regs.exec.enable_2d == 1); |
| 85 | 84 | ||
| 86 | if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) { | 85 | if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) { |
| 87 | ASSERT(regs.src_params.size_z == 1); | 86 | ASSERT(regs.src_params.BlockDepth() == 0); |
| 88 | // If the input is tiled and the output is linear, deswizzle the input and copy it over. | 87 | // If the input is tiled and the output is linear, deswizzle the input and copy it over. |
| 89 | const u32 src_bytes_per_pixel = regs.src_pitch / regs.src_params.size_x; | 88 | const u32 bytes_per_pixel = regs.dst_pitch / regs.x_count; |
| 90 | const std::size_t src_size = Texture::CalculateSize( | 89 | const std::size_t src_size = Texture::CalculateSize( |
| 91 | true, src_bytes_per_pixel, regs.src_params.size_x, regs.src_params.size_y, | 90 | true, bytes_per_pixel, regs.src_params.size_x, regs.src_params.size_y, |
| 92 | regs.src_params.size_z, regs.src_params.BlockHeight(), regs.src_params.BlockDepth()); | 91 | regs.src_params.size_z, regs.src_params.BlockHeight(), regs.src_params.BlockDepth()); |
| 93 | 92 | ||
| 93 | const std::size_t src_layer_size = Texture::CalculateSize( | ||
| 94 | true, bytes_per_pixel, regs.src_params.size_x, regs.src_params.size_y, 1, | ||
| 95 | regs.src_params.BlockHeight(), regs.src_params.BlockDepth()); | ||
| 96 | |||
| 94 | const std::size_t dst_size = regs.dst_pitch * regs.y_count; | 97 | const std::size_t dst_size = regs.dst_pitch * regs.y_count; |
| 95 | 98 | ||
| 96 | if (read_buffer.size() < src_size) { | 99 | if (read_buffer.size() < src_size) { |
| @@ -104,23 +107,23 @@ void MaxwellDMA::HandleCopy() { | |||
| 104 | memory_manager.ReadBlock(source, read_buffer.data(), src_size); | 107 | memory_manager.ReadBlock(source, read_buffer.data(), src_size); |
| 105 | memory_manager.ReadBlock(dest, write_buffer.data(), dst_size); | 108 | memory_manager.ReadBlock(dest, write_buffer.data(), dst_size); |
| 106 | 109 | ||
| 107 | Texture::UnswizzleSubrect(regs.x_count, regs.y_count, regs.dst_pitch, | 110 | Texture::UnswizzleSubrect( |
| 108 | regs.src_params.size_x, src_bytes_per_pixel, read_buffer.data(), | 111 | regs.x_count, regs.y_count, regs.dst_pitch, regs.src_params.size_x, bytes_per_pixel, |
| 109 | write_buffer.data(), regs.src_params.BlockHeight(), | 112 | read_buffer.data() + src_layer_size * regs.src_params.pos_z, write_buffer.data(), |
| 110 | regs.src_params.pos_x, regs.src_params.pos_y); | 113 | regs.src_params.BlockHeight(), regs.src_params.pos_x, regs.src_params.pos_y); |
| 111 | 114 | ||
| 112 | memory_manager.WriteBlock(dest, write_buffer.data(), dst_size); | 115 | memory_manager.WriteBlock(dest, write_buffer.data(), dst_size); |
| 113 | } else { | 116 | } else { |
| 114 | ASSERT(regs.dst_params.BlockDepth() == 0); | 117 | ASSERT(regs.dst_params.BlockDepth() == 0); |
| 115 | 118 | ||
| 116 | const u32 src_bytes_per_pixel = regs.src_pitch / regs.x_count; | 119 | const u32 bytes_per_pixel = regs.src_pitch / regs.x_count; |
| 117 | 120 | ||
| 118 | const std::size_t dst_size = Texture::CalculateSize( | 121 | const std::size_t dst_size = Texture::CalculateSize( |
| 119 | true, src_bytes_per_pixel, regs.dst_params.size_x, regs.dst_params.size_y, | 122 | true, bytes_per_pixel, regs.dst_params.size_x, regs.dst_params.size_y, |
| 120 | regs.dst_params.size_z, regs.dst_params.BlockHeight(), regs.dst_params.BlockDepth()); | 123 | regs.dst_params.size_z, regs.dst_params.BlockHeight(), regs.dst_params.BlockDepth()); |
| 121 | 124 | ||
| 122 | const std::size_t dst_layer_size = Texture::CalculateSize( | 125 | const std::size_t dst_layer_size = Texture::CalculateSize( |
| 123 | true, src_bytes_per_pixel, regs.dst_params.size_x, regs.dst_params.size_y, 1, | 126 | true, bytes_per_pixel, regs.dst_params.size_x, regs.dst_params.size_y, 1, |
| 124 | regs.dst_params.BlockHeight(), regs.dst_params.BlockDepth()); | 127 | regs.dst_params.BlockHeight(), regs.dst_params.BlockDepth()); |
| 125 | 128 | ||
| 126 | const std::size_t src_size = regs.src_pitch * regs.y_count; | 129 | const std::size_t src_size = regs.src_pitch * regs.y_count; |
| @@ -133,14 +136,19 @@ void MaxwellDMA::HandleCopy() { | |||
| 133 | write_buffer.resize(dst_size); | 136 | write_buffer.resize(dst_size); |
| 134 | } | 137 | } |
| 135 | 138 | ||
| 136 | memory_manager.ReadBlock(source, read_buffer.data(), src_size); | 139 | if (Settings::values.use_accurate_gpu_emulation) { |
| 137 | memory_manager.ReadBlock(dest, write_buffer.data(), dst_size); | 140 | memory_manager.ReadBlock(source, read_buffer.data(), src_size); |
| 141 | memory_manager.ReadBlock(dest, write_buffer.data(), dst_size); | ||
| 142 | } else { | ||
| 143 | memory_manager.ReadBlockUnsafe(source, read_buffer.data(), src_size); | ||
| 144 | memory_manager.ReadBlockUnsafe(dest, write_buffer.data(), dst_size); | ||
| 145 | } | ||
| 138 | 146 | ||
| 139 | // If the input is linear and the output is tiled, swizzle the input and copy it over. | 147 | // If the input is linear and the output is tiled, swizzle the input and copy it over. |
| 140 | Texture::SwizzleSubrect(regs.x_count, regs.y_count, regs.src_pitch, regs.dst_params.size_x, | 148 | Texture::SwizzleSubrect( |
| 141 | src_bytes_per_pixel, | 149 | regs.x_count, regs.y_count, regs.src_pitch, regs.dst_params.size_x, bytes_per_pixel, |
| 142 | write_buffer.data() + dst_layer_size * regs.dst_params.pos_z, | 150 | write_buffer.data() + dst_layer_size * regs.dst_params.pos_z, read_buffer.data(), |
| 143 | read_buffer.data(), regs.dst_params.BlockHeight()); | 151 | regs.dst_params.BlockHeight(), regs.dst_params.pos_x, regs.dst_params.pos_y); |
| 144 | 152 | ||
| 145 | memory_manager.WriteBlock(dest, write_buffer.data(), dst_size); | 153 | memory_manager.WriteBlock(dest, write_buffer.data(), dst_size); |
| 146 | } | 154 | } |
diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h index 17b015ca7..93808a9bb 100644 --- a/src/video_core/engines/maxwell_dma.h +++ b/src/video_core/engines/maxwell_dma.h | |||
| @@ -20,10 +20,6 @@ namespace Tegra { | |||
| 20 | class MemoryManager; | 20 | class MemoryManager; |
| 21 | } | 21 | } |
| 22 | 22 | ||
| 23 | namespace VideoCore { | ||
| 24 | class RasterizerInterface; | ||
| 25 | } | ||
| 26 | |||
| 27 | namespace Tegra::Engines { | 23 | namespace Tegra::Engines { |
| 28 | 24 | ||
| 29 | /** | 25 | /** |
| @@ -33,8 +29,7 @@ namespace Tegra::Engines { | |||
| 33 | 29 | ||
| 34 | class MaxwellDMA final { | 30 | class MaxwellDMA final { |
| 35 | public: | 31 | public: |
| 36 | explicit MaxwellDMA(Core::System& system, VideoCore::RasterizerInterface& rasterizer, | 32 | explicit MaxwellDMA(Core::System& system, MemoryManager& memory_manager); |
| 37 | MemoryManager& memory_manager); | ||
| 38 | ~MaxwellDMA() = default; | 33 | ~MaxwellDMA() = default; |
| 39 | 34 | ||
| 40 | /// Write the value to the register identified by method. | 35 | /// Write the value to the register identified by method. |
| @@ -180,8 +175,6 @@ public: | |||
| 180 | private: | 175 | private: |
| 181 | Core::System& system; | 176 | Core::System& system; |
| 182 | 177 | ||
| 183 | VideoCore::RasterizerInterface& rasterizer; | ||
| 184 | |||
| 185 | MemoryManager& memory_manager; | 178 | MemoryManager& memory_manager; |
| 186 | 179 | ||
| 187 | std::vector<u8> read_buffer; | 180 | std::vector<u8> read_buffer; |
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index 404d4f5aa..c3678b9ea 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h | |||
| @@ -78,7 +78,7 @@ union Attribute { | |||
| 78 | constexpr explicit Attribute(u64 value) : value(value) {} | 78 | constexpr explicit Attribute(u64 value) : value(value) {} |
| 79 | 79 | ||
| 80 | enum class Index : u64 { | 80 | enum class Index : u64 { |
| 81 | PointSize = 6, | 81 | LayerViewportPointSize = 6, |
| 82 | Position = 7, | 82 | Position = 7, |
| 83 | Attribute_0 = 8, | 83 | Attribute_0 = 8, |
| 84 | Attribute_31 = 39, | 84 | Attribute_31 = 39, |
| @@ -538,6 +538,12 @@ enum class PhysicalAttributeDirection : u64 { | |||
| 538 | Output = 1, | 538 | Output = 1, |
| 539 | }; | 539 | }; |
| 540 | 540 | ||
| 541 | enum class VoteOperation : u64 { | ||
| 542 | All = 0, // allThreadsNV | ||
| 543 | Any = 1, // anyThreadNV | ||
| 544 | Eq = 2, // allThreadsEqualNV | ||
| 545 | }; | ||
| 546 | |||
| 541 | union Instruction { | 547 | union Instruction { |
| 542 | Instruction& operator=(const Instruction& instr) { | 548 | Instruction& operator=(const Instruction& instr) { |
| 543 | value = instr.value; | 549 | value = instr.value; |
| @@ -560,6 +566,18 @@ union Instruction { | |||
| 560 | BitField<48, 16, u64> opcode; | 566 | BitField<48, 16, u64> opcode; |
| 561 | 567 | ||
| 562 | union { | 568 | union { |
| 569 | BitField<8, 5, ConditionCode> cc; | ||
| 570 | BitField<13, 1, u64> trigger; | ||
| 571 | } nop; | ||
| 572 | |||
| 573 | union { | ||
| 574 | BitField<48, 2, VoteOperation> operation; | ||
| 575 | BitField<45, 3, u64> dest_pred; | ||
| 576 | BitField<39, 3, u64> value; | ||
| 577 | BitField<42, 1, u64> negate_value; | ||
| 578 | } vote; | ||
| 579 | |||
| 580 | union { | ||
| 563 | BitField<8, 8, Register> gpr; | 581 | BitField<8, 8, Register> gpr; |
| 564 | BitField<20, 24, s64> offset; | 582 | BitField<20, 24, s64> offset; |
| 565 | } gmem; | 583 | } gmem; |
| @@ -868,6 +886,7 @@ union Instruction { | |||
| 868 | union { | 886 | union { |
| 869 | BitField<0, 3, u64> pred0; | 887 | BitField<0, 3, u64> pred0; |
| 870 | BitField<3, 3, u64> pred3; | 888 | BitField<3, 3, u64> pred3; |
| 889 | BitField<6, 1, u64> neg_b; | ||
| 871 | BitField<7, 1, u64> abs_a; | 890 | BitField<7, 1, u64> abs_a; |
| 872 | BitField<39, 3, u64> pred39; | 891 | BitField<39, 3, u64> pred39; |
| 873 | BitField<42, 1, u64> neg_pred; | 892 | BitField<42, 1, u64> neg_pred; |
| @@ -931,8 +950,6 @@ union Instruction { | |||
| 931 | } csetp; | 950 | } csetp; |
| 932 | 951 | ||
| 933 | union { | 952 | union { |
| 934 | BitField<35, 4, PredCondition> cond; | ||
| 935 | BitField<49, 1, u64> h_and; | ||
| 936 | BitField<6, 1, u64> ftz; | 953 | BitField<6, 1, u64> ftz; |
| 937 | BitField<45, 2, PredOperation> op; | 954 | BitField<45, 2, PredOperation> op; |
| 938 | BitField<3, 3, u64> pred3; | 955 | BitField<3, 3, u64> pred3; |
| @@ -940,9 +957,21 @@ union Instruction { | |||
| 940 | BitField<43, 1, u64> negate_a; | 957 | BitField<43, 1, u64> negate_a; |
| 941 | BitField<44, 1, u64> abs_a; | 958 | BitField<44, 1, u64> abs_a; |
| 942 | BitField<47, 2, HalfType> type_a; | 959 | BitField<47, 2, HalfType> type_a; |
| 943 | BitField<31, 1, u64> negate_b; | 960 | union { |
| 944 | BitField<30, 1, u64> abs_b; | 961 | BitField<35, 4, PredCondition> cond; |
| 945 | BitField<28, 2, HalfType> type_b; | 962 | BitField<49, 1, u64> h_and; |
| 963 | BitField<31, 1, u64> negate_b; | ||
| 964 | BitField<30, 1, u64> abs_b; | ||
| 965 | BitField<28, 2, HalfType> type_b; | ||
| 966 | } reg; | ||
| 967 | union { | ||
| 968 | BitField<56, 1, u64> negate_b; | ||
| 969 | BitField<54, 1, u64> abs_b; | ||
| 970 | } cbuf; | ||
| 971 | union { | ||
| 972 | BitField<49, 4, PredCondition> cond; | ||
| 973 | BitField<53, 1, u64> h_and; | ||
| 974 | } cbuf_and_imm; | ||
| 946 | BitField<42, 1, u64> neg_pred; | 975 | BitField<42, 1, u64> neg_pred; |
| 947 | BitField<39, 3, u64> pred39; | 976 | BitField<39, 3, u64> pred39; |
| 948 | } hsetp2; | 977 | } hsetp2; |
| @@ -991,7 +1020,6 @@ union Instruction { | |||
| 991 | } iset; | 1020 | } iset; |
| 992 | 1021 | ||
| 993 | union { | 1022 | union { |
| 994 | BitField<41, 2, u64> selector; // i2i and i2f only | ||
| 995 | BitField<45, 1, u64> negate_a; | 1023 | BitField<45, 1, u64> negate_a; |
| 996 | BitField<49, 1, u64> abs_a; | 1024 | BitField<49, 1, u64> abs_a; |
| 997 | BitField<10, 2, Register::Size> src_size; | 1025 | BitField<10, 2, Register::Size> src_size; |
| @@ -1008,8 +1036,6 @@ union Instruction { | |||
| 1008 | } f2i; | 1036 | } f2i; |
| 1009 | 1037 | ||
| 1010 | union { | 1038 | union { |
| 1011 | BitField<8, 2, Register::Size> src_size; | ||
| 1012 | BitField<10, 2, Register::Size> dst_size; | ||
| 1013 | BitField<39, 4, u64> rounding; | 1039 | BitField<39, 4, u64> rounding; |
| 1014 | // H0, H1 extract for F16 missing | 1040 | // H0, H1 extract for F16 missing |
| 1015 | BitField<41, 1, u64> selector; // Guessed as some games set it, TODO: reverse this value | 1041 | BitField<41, 1, u64> selector; // Guessed as some games set it, TODO: reverse this value |
| @@ -1019,6 +1045,13 @@ union Instruction { | |||
| 1019 | } | 1045 | } |
| 1020 | } f2f; | 1046 | } f2f; |
| 1021 | 1047 | ||
| 1048 | union { | ||
| 1049 | BitField<41, 2, u64> selector; | ||
| 1050 | } int_src; | ||
| 1051 | |||
| 1052 | union { | ||
| 1053 | BitField<41, 1, u64> selector; | ||
| 1054 | } float_src; | ||
| 1022 | } conversion; | 1055 | } conversion; |
| 1023 | 1056 | ||
| 1024 | union { | 1057 | union { |
| @@ -1278,6 +1311,7 @@ union Instruction { | |||
| 1278 | union { | 1311 | union { |
| 1279 | BitField<49, 1, u64> nodep_flag; | 1312 | BitField<49, 1, u64> nodep_flag; |
| 1280 | BitField<53, 4, u64> texture_info; | 1313 | BitField<53, 4, u64> texture_info; |
| 1314 | BitField<59, 1, u64> fp32_flag; | ||
| 1281 | 1315 | ||
| 1282 | TextureType GetTextureType() const { | 1316 | TextureType GetTextureType() const { |
| 1283 | // The TLDS instruction has a weird encoding for the texture type. | 1317 | // The TLDS instruction has a weird encoding for the texture type. |
| @@ -1368,6 +1402,20 @@ union Instruction { | |||
| 1368 | } bra; | 1402 | } bra; |
| 1369 | 1403 | ||
| 1370 | union { | 1404 | union { |
| 1405 | BitField<20, 24, u64> target; | ||
| 1406 | BitField<5, 1, u64> constant_buffer; | ||
| 1407 | |||
| 1408 | s32 GetBranchExtend() const { | ||
| 1409 | // Sign extend the branch target offset | ||
| 1410 | u32 mask = 1U << (24 - 1); | ||
| 1411 | u32 value = static_cast<u32>(target); | ||
| 1412 | // The branch offset is relative to the next instruction and is stored in bytes, so | ||
| 1413 | // divide it by the size of an instruction and add 1 to it. | ||
| 1414 | return static_cast<s32>((value ^ mask) - mask) / sizeof(Instruction) + 1; | ||
| 1415 | } | ||
| 1416 | } brx; | ||
| 1417 | |||
| 1418 | union { | ||
| 1371 | BitField<39, 1, u64> emit; // EmitVertex | 1419 | BitField<39, 1, u64> emit; // EmitVertex |
| 1372 | BitField<40, 1, u64> cut; // EndPrimitive | 1420 | BitField<40, 1, u64> cut; // EndPrimitive |
| 1373 | } out; | 1421 | } out; |
| @@ -1459,11 +1507,13 @@ public: | |||
| 1459 | SYNC, | 1507 | SYNC, |
| 1460 | BRK, | 1508 | BRK, |
| 1461 | DEPBAR, | 1509 | DEPBAR, |
| 1510 | VOTE, | ||
| 1462 | BFE_C, | 1511 | BFE_C, |
| 1463 | BFE_R, | 1512 | BFE_R, |
| 1464 | BFE_IMM, | 1513 | BFE_IMM, |
| 1465 | BFI_IMM_R, | 1514 | BFI_IMM_R, |
| 1466 | BRA, | 1515 | BRA, |
| 1516 | BRX, | ||
| 1467 | PBK, | 1517 | PBK, |
| 1468 | LD_A, | 1518 | LD_A, |
| 1469 | LD_L, | 1519 | LD_L, |
| @@ -1490,6 +1540,7 @@ public: | |||
| 1490 | TMML, // Texture Mip Map Level | 1540 | TMML, // Texture Mip Map Level |
| 1491 | SUST, // Surface Store | 1541 | SUST, // Surface Store |
| 1492 | EXIT, | 1542 | EXIT, |
| 1543 | NOP, | ||
| 1493 | IPA, | 1544 | IPA, |
| 1494 | OUT_R, // Emit vertex/primitive | 1545 | OUT_R, // Emit vertex/primitive |
| 1495 | ISBERD, | 1546 | ISBERD, |
| @@ -1532,7 +1583,9 @@ public: | |||
| 1532 | HFMA2_RC, | 1583 | HFMA2_RC, |
| 1533 | HFMA2_RR, | 1584 | HFMA2_RR, |
| 1534 | HFMA2_IMM_R, | 1585 | HFMA2_IMM_R, |
| 1586 | HSETP2_C, | ||
| 1535 | HSETP2_R, | 1587 | HSETP2_R, |
| 1588 | HSETP2_IMM, | ||
| 1536 | HSET2_R, | 1589 | HSET2_R, |
| 1537 | POPC_C, | 1590 | POPC_C, |
| 1538 | POPC_R, | 1591 | POPC_R, |
| @@ -1617,6 +1670,7 @@ public: | |||
| 1617 | Hfma2, | 1670 | Hfma2, |
| 1618 | Flow, | 1671 | Flow, |
| 1619 | Synch, | 1672 | Synch, |
| 1673 | Warp, | ||
| 1620 | Memory, | 1674 | Memory, |
| 1621 | Texture, | 1675 | Texture, |
| 1622 | Image, | 1676 | Image, |
| @@ -1738,10 +1792,12 @@ private: | |||
| 1738 | INST("111000101001----", Id::SSY, Type::Flow, "SSY"), | 1792 | INST("111000101001----", Id::SSY, Type::Flow, "SSY"), |
| 1739 | INST("111000101010----", Id::PBK, Type::Flow, "PBK"), | 1793 | INST("111000101010----", Id::PBK, Type::Flow, "PBK"), |
| 1740 | INST("111000100100----", Id::BRA, Type::Flow, "BRA"), | 1794 | INST("111000100100----", Id::BRA, Type::Flow, "BRA"), |
| 1795 | INST("111000100101----", Id::BRX, Type::Flow, "BRX"), | ||
| 1741 | INST("1111000011111---", Id::SYNC, Type::Flow, "SYNC"), | 1796 | INST("1111000011111---", Id::SYNC, Type::Flow, "SYNC"), |
| 1742 | INST("111000110100---", Id::BRK, Type::Flow, "BRK"), | 1797 | INST("111000110100---", Id::BRK, Type::Flow, "BRK"), |
| 1743 | INST("111000110000----", Id::EXIT, Type::Flow, "EXIT"), | 1798 | INST("111000110000----", Id::EXIT, Type::Flow, "EXIT"), |
| 1744 | INST("1111000011110---", Id::DEPBAR, Type::Synch, "DEPBAR"), | 1799 | INST("1111000011110---", Id::DEPBAR, Type::Synch, "DEPBAR"), |
| 1800 | INST("0101000011011---", Id::VOTE, Type::Warp, "VOTE"), | ||
| 1745 | INST("1110111111011---", Id::LD_A, Type::Memory, "LD_A"), | 1801 | INST("1110111111011---", Id::LD_A, Type::Memory, "LD_A"), |
| 1746 | INST("1110111101001---", Id::LD_S, Type::Memory, "LD_S"), | 1802 | INST("1110111101001---", Id::LD_S, Type::Memory, "LD_S"), |
| 1747 | INST("1110111101000---", Id::LD_L, Type::Memory, "LD_L"), | 1803 | INST("1110111101000---", Id::LD_L, Type::Memory, "LD_L"), |
| @@ -1760,12 +1816,13 @@ private: | |||
| 1760 | INST("1101111101010---", Id::TXQ_B, Type::Texture, "TXQ_B"), | 1816 | INST("1101111101010---", Id::TXQ_B, Type::Texture, "TXQ_B"), |
| 1761 | INST("1101-00---------", Id::TEXS, Type::Texture, "TEXS"), | 1817 | INST("1101-00---------", Id::TEXS, Type::Texture, "TEXS"), |
| 1762 | INST("11011100--11----", Id::TLD, Type::Texture, "TLD"), | 1818 | INST("11011100--11----", Id::TLD, Type::Texture, "TLD"), |
| 1763 | INST("1101101---------", Id::TLDS, Type::Texture, "TLDS"), | 1819 | INST("1101-01---------", Id::TLDS, Type::Texture, "TLDS"), |
| 1764 | INST("110010----111---", Id::TLD4, Type::Texture, "TLD4"), | 1820 | INST("110010----111---", Id::TLD4, Type::Texture, "TLD4"), |
| 1765 | INST("1101111100------", Id::TLD4S, Type::Texture, "TLD4S"), | 1821 | INST("1101111100------", Id::TLD4S, Type::Texture, "TLD4S"), |
| 1766 | INST("110111110110----", Id::TMML_B, Type::Texture, "TMML_B"), | 1822 | INST("110111110110----", Id::TMML_B, Type::Texture, "TMML_B"), |
| 1767 | INST("1101111101011---", Id::TMML, Type::Texture, "TMML"), | 1823 | INST("1101111101011---", Id::TMML, Type::Texture, "TMML"), |
| 1768 | INST("11101011001-----", Id::SUST, Type::Image, "SUST"), | 1824 | INST("11101011001-----", Id::SUST, Type::Image, "SUST"), |
| 1825 | INST("0101000010110---", Id::NOP, Type::Trivial, "NOP"), | ||
| 1769 | INST("11100000--------", Id::IPA, Type::Trivial, "IPA"), | 1826 | INST("11100000--------", Id::IPA, Type::Trivial, "IPA"), |
| 1770 | INST("1111101111100---", Id::OUT_R, Type::Trivial, "OUT_R"), | 1827 | INST("1111101111100---", Id::OUT_R, Type::Trivial, "OUT_R"), |
| 1771 | INST("1110111111010---", Id::ISBERD, Type::Trivial, "ISBERD"), | 1828 | INST("1110111111010---", Id::ISBERD, Type::Trivial, "ISBERD"), |
| @@ -1814,7 +1871,9 @@ private: | |||
| 1814 | INST("01100---1-------", Id::HFMA2_RC, Type::Hfma2, "HFMA2_RC"), | 1871 | INST("01100---1-------", Id::HFMA2_RC, Type::Hfma2, "HFMA2_RC"), |
| 1815 | INST("0101110100000---", Id::HFMA2_RR, Type::Hfma2, "HFMA2_RR"), | 1872 | INST("0101110100000---", Id::HFMA2_RR, Type::Hfma2, "HFMA2_RR"), |
| 1816 | INST("01110---0-------", Id::HFMA2_IMM_R, Type::Hfma2, "HFMA2_R_IMM"), | 1873 | INST("01110---0-------", Id::HFMA2_IMM_R, Type::Hfma2, "HFMA2_R_IMM"), |
| 1817 | INST("0101110100100---", Id::HSETP2_R, Type::HalfSetPredicate, "HSETP_R"), | 1874 | INST("0111111-1-------", Id::HSETP2_C, Type::HalfSetPredicate, "HSETP2_C"), |
| 1875 | INST("0101110100100---", Id::HSETP2_R, Type::HalfSetPredicate, "HSETP2_R"), | ||
| 1876 | INST("0111111-0-------", Id::HSETP2_IMM, Type::HalfSetPredicate, "HSETP2_IMM"), | ||
| 1818 | INST("0101110100011---", Id::HSET2_R, Type::HalfSet, "HSET2_R"), | 1877 | INST("0101110100011---", Id::HSET2_R, Type::HalfSet, "HSET2_R"), |
| 1819 | INST("0101000010000---", Id::MUFU, Type::Arithmetic, "MUFU"), | 1878 | INST("0101000010000---", Id::MUFU, Type::Arithmetic, "MUFU"), |
| 1820 | INST("0100110010010---", Id::RRO_C, Type::Arithmetic, "RRO_C"), | 1879 | INST("0100110010010---", Id::RRO_C, Type::Arithmetic, "RRO_C"), |
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 52706505b..2c47541cb 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp | |||
| @@ -17,26 +17,15 @@ | |||
| 17 | 17 | ||
| 18 | namespace Tegra { | 18 | namespace Tegra { |
| 19 | 19 | ||
| 20 | u32 FramebufferConfig::BytesPerPixel(PixelFormat format) { | 20 | GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer, bool is_async) |
| 21 | switch (format) { | 21 | : system{system}, renderer{renderer}, is_async{is_async} { |
| 22 | case PixelFormat::ABGR8: | ||
| 23 | case PixelFormat::BGRA8: | ||
| 24 | return 4; | ||
| 25 | default: | ||
| 26 | return 4; | ||
| 27 | } | ||
| 28 | |||
| 29 | UNREACHABLE(); | ||
| 30 | } | ||
| 31 | |||
| 32 | GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer) : renderer{renderer} { | ||
| 33 | auto& rasterizer{renderer.Rasterizer()}; | 22 | auto& rasterizer{renderer.Rasterizer()}; |
| 34 | memory_manager = std::make_unique<Tegra::MemoryManager>(rasterizer); | 23 | memory_manager = std::make_unique<Tegra::MemoryManager>(system, rasterizer); |
| 35 | dma_pusher = std::make_unique<Tegra::DmaPusher>(*this); | 24 | dma_pusher = std::make_unique<Tegra::DmaPusher>(*this); |
| 36 | maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, rasterizer, *memory_manager); | 25 | maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, rasterizer, *memory_manager); |
| 37 | fermi_2d = std::make_unique<Engines::Fermi2D>(rasterizer, *memory_manager); | 26 | fermi_2d = std::make_unique<Engines::Fermi2D>(rasterizer); |
| 38 | kepler_compute = std::make_unique<Engines::KeplerCompute>(system, rasterizer, *memory_manager); | 27 | kepler_compute = std::make_unique<Engines::KeplerCompute>(system, rasterizer, *memory_manager); |
| 39 | maxwell_dma = std::make_unique<Engines::MaxwellDMA>(system, rasterizer, *memory_manager); | 28 | maxwell_dma = std::make_unique<Engines::MaxwellDMA>(system, *memory_manager); |
| 40 | kepler_memory = std::make_unique<Engines::KeplerMemory>(system, *memory_manager); | 29 | kepler_memory = std::make_unique<Engines::KeplerMemory>(system, *memory_manager); |
| 41 | } | 30 | } |
| 42 | 31 | ||
| @@ -50,6 +39,14 @@ const Engines::Maxwell3D& GPU::Maxwell3D() const { | |||
| 50 | return *maxwell_3d; | 39 | return *maxwell_3d; |
| 51 | } | 40 | } |
| 52 | 41 | ||
| 42 | Engines::KeplerCompute& GPU::KeplerCompute() { | ||
| 43 | return *kepler_compute; | ||
| 44 | } | ||
| 45 | |||
| 46 | const Engines::KeplerCompute& GPU::KeplerCompute() const { | ||
| 47 | return *kepler_compute; | ||
| 48 | } | ||
| 49 | |||
| 53 | MemoryManager& GPU::MemoryManager() { | 50 | MemoryManager& GPU::MemoryManager() { |
| 54 | return *memory_manager; | 51 | return *memory_manager; |
| 55 | } | 52 | } |
| @@ -66,6 +63,55 @@ const DmaPusher& GPU::DmaPusher() const { | |||
| 66 | return *dma_pusher; | 63 | return *dma_pusher; |
| 67 | } | 64 | } |
| 68 | 65 | ||
| 66 | void GPU::IncrementSyncPoint(const u32 syncpoint_id) { | ||
| 67 | syncpoints[syncpoint_id]++; | ||
| 68 | std::lock_guard lock{sync_mutex}; | ||
| 69 | if (!syncpt_interrupts[syncpoint_id].empty()) { | ||
| 70 | u32 value = syncpoints[syncpoint_id].load(); | ||
| 71 | auto it = syncpt_interrupts[syncpoint_id].begin(); | ||
| 72 | while (it != syncpt_interrupts[syncpoint_id].end()) { | ||
| 73 | if (value >= *it) { | ||
| 74 | TriggerCpuInterrupt(syncpoint_id, *it); | ||
| 75 | it = syncpt_interrupts[syncpoint_id].erase(it); | ||
| 76 | continue; | ||
| 77 | } | ||
| 78 | it++; | ||
| 79 | } | ||
| 80 | } | ||
| 81 | } | ||
| 82 | |||
| 83 | u32 GPU::GetSyncpointValue(const u32 syncpoint_id) const { | ||
| 84 | return syncpoints[syncpoint_id].load(); | ||
| 85 | } | ||
| 86 | |||
| 87 | void GPU::RegisterSyncptInterrupt(const u32 syncpoint_id, const u32 value) { | ||
| 88 | auto& interrupt = syncpt_interrupts[syncpoint_id]; | ||
| 89 | bool contains = std::any_of(interrupt.begin(), interrupt.end(), | ||
| 90 | [value](u32 in_value) { return in_value == value; }); | ||
| 91 | if (contains) { | ||
| 92 | return; | ||
| 93 | } | ||
| 94 | syncpt_interrupts[syncpoint_id].emplace_back(value); | ||
| 95 | } | ||
| 96 | |||
| 97 | bool GPU::CancelSyncptInterrupt(const u32 syncpoint_id, const u32 value) { | ||
| 98 | std::lock_guard lock{sync_mutex}; | ||
| 99 | auto& interrupt = syncpt_interrupts[syncpoint_id]; | ||
| 100 | const auto iter = | ||
| 101 | std::find_if(interrupt.begin(), interrupt.end(), | ||
| 102 | [value](u32 interrupt_value) { return value == interrupt_value; }); | ||
| 103 | |||
| 104 | if (iter == interrupt.end()) { | ||
| 105 | return false; | ||
| 106 | } | ||
| 107 | interrupt.erase(iter); | ||
| 108 | return true; | ||
| 109 | } | ||
| 110 | |||
| 111 | void GPU::FlushCommands() { | ||
| 112 | renderer.Rasterizer().FlushCommands(); | ||
| 113 | } | ||
| 114 | |||
| 69 | u32 RenderTargetBytesPerPixel(RenderTargetFormat format) { | 115 | u32 RenderTargetBytesPerPixel(RenderTargetFormat format) { |
| 70 | ASSERT(format != RenderTargetFormat::NONE); | 116 | ASSERT(format != RenderTargetFormat::NONE); |
| 71 | 117 | ||
| @@ -143,12 +189,12 @@ enum class BufferMethods { | |||
| 143 | NotifyIntr = 0x8, | 189 | NotifyIntr = 0x8, |
| 144 | WrcacheFlush = 0x9, | 190 | WrcacheFlush = 0x9, |
| 145 | Unk28 = 0xA, | 191 | Unk28 = 0xA, |
| 146 | Unk2c = 0xB, | 192 | UnkCacheFlush = 0xB, |
| 147 | RefCnt = 0x14, | 193 | RefCnt = 0x14, |
| 148 | SemaphoreAcquire = 0x1A, | 194 | SemaphoreAcquire = 0x1A, |
| 149 | SemaphoreRelease = 0x1B, | 195 | SemaphoreRelease = 0x1B, |
| 150 | Unk70 = 0x1C, | 196 | FenceValue = 0x1C, |
| 151 | Unk74 = 0x1D, | 197 | FenceAction = 0x1D, |
| 152 | Unk78 = 0x1E, | 198 | Unk78 = 0x1E, |
| 153 | Unk7c = 0x1F, | 199 | Unk7c = 0x1F, |
| 154 | Yield = 0x20, | 200 | Yield = 0x20, |
| @@ -194,6 +240,10 @@ void GPU::CallPullerMethod(const MethodCall& method_call) { | |||
| 194 | case BufferMethods::SemaphoreAddressLow: | 240 | case BufferMethods::SemaphoreAddressLow: |
| 195 | case BufferMethods::SemaphoreSequence: | 241 | case BufferMethods::SemaphoreSequence: |
| 196 | case BufferMethods::RefCnt: | 242 | case BufferMethods::RefCnt: |
| 243 | case BufferMethods::UnkCacheFlush: | ||
| 244 | case BufferMethods::WrcacheFlush: | ||
| 245 | case BufferMethods::FenceValue: | ||
| 246 | case BufferMethods::FenceAction: | ||
| 197 | break; | 247 | break; |
| 198 | case BufferMethods::SemaphoreTrigger: { | 248 | case BufferMethods::SemaphoreTrigger: { |
| 199 | ProcessSemaphoreTriggerMethod(); | 249 | ProcessSemaphoreTriggerMethod(); |
| @@ -204,21 +254,11 @@ void GPU::CallPullerMethod(const MethodCall& method_call) { | |||
| 204 | LOG_ERROR(HW_GPU, "Special puller engine method NotifyIntr not implemented"); | 254 | LOG_ERROR(HW_GPU, "Special puller engine method NotifyIntr not implemented"); |
| 205 | break; | 255 | break; |
| 206 | } | 256 | } |
| 207 | case BufferMethods::WrcacheFlush: { | ||
| 208 | // TODO(Kmather73): Research and implement this method. | ||
| 209 | LOG_ERROR(HW_GPU, "Special puller engine method WrcacheFlush not implemented"); | ||
| 210 | break; | ||
| 211 | } | ||
| 212 | case BufferMethods::Unk28: { | 257 | case BufferMethods::Unk28: { |
| 213 | // TODO(Kmather73): Research and implement this method. | 258 | // TODO(Kmather73): Research and implement this method. |
| 214 | LOG_ERROR(HW_GPU, "Special puller engine method Unk28 not implemented"); | 259 | LOG_ERROR(HW_GPU, "Special puller engine method Unk28 not implemented"); |
| 215 | break; | 260 | break; |
| 216 | } | 261 | } |
| 217 | case BufferMethods::Unk2c: { | ||
| 218 | // TODO(Kmather73): Research and implement this method. | ||
| 219 | LOG_ERROR(HW_GPU, "Special puller engine method Unk2c not implemented"); | ||
| 220 | break; | ||
| 221 | } | ||
| 222 | case BufferMethods::SemaphoreAcquire: { | 262 | case BufferMethods::SemaphoreAcquire: { |
| 223 | ProcessSemaphoreAcquire(); | 263 | ProcessSemaphoreAcquire(); |
| 224 | break; | 264 | break; |
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index fe6628923..78bc0601a 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h | |||
| @@ -5,8 +5,12 @@ | |||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <array> | 7 | #include <array> |
| 8 | #include <atomic> | ||
| 9 | #include <list> | ||
| 8 | #include <memory> | 10 | #include <memory> |
| 11 | #include <mutex> | ||
| 9 | #include "common/common_types.h" | 12 | #include "common/common_types.h" |
| 13 | #include "core/hle/service/nvdrv/nvdata.h" | ||
| 10 | #include "core/hle/service/nvflinger/buffer_queue.h" | 14 | #include "core/hle/service/nvflinger/buffer_queue.h" |
| 11 | #include "video_core/dma_pusher.h" | 15 | #include "video_core/dma_pusher.h" |
| 12 | 16 | ||
| @@ -15,6 +19,10 @@ inline CacheAddr ToCacheAddr(const void* host_ptr) { | |||
| 15 | return reinterpret_cast<CacheAddr>(host_ptr); | 19 | return reinterpret_cast<CacheAddr>(host_ptr); |
| 16 | } | 20 | } |
| 17 | 21 | ||
| 22 | inline u8* FromCacheAddr(CacheAddr cache_addr) { | ||
| 23 | return reinterpret_cast<u8*>(cache_addr); | ||
| 24 | } | ||
| 25 | |||
| 18 | namespace Core { | 26 | namespace Core { |
| 19 | class System; | 27 | class System; |
| 20 | } | 28 | } |
| @@ -87,14 +95,10 @@ class DebugContext; | |||
| 87 | struct FramebufferConfig { | 95 | struct FramebufferConfig { |
| 88 | enum class PixelFormat : u32 { | 96 | enum class PixelFormat : u32 { |
| 89 | ABGR8 = 1, | 97 | ABGR8 = 1, |
| 98 | RGB565 = 4, | ||
| 90 | BGRA8 = 5, | 99 | BGRA8 = 5, |
| 91 | }; | 100 | }; |
| 92 | 101 | ||
| 93 | /** | ||
| 94 | * Returns the number of bytes per pixel. | ||
| 95 | */ | ||
| 96 | static u32 BytesPerPixel(PixelFormat format); | ||
| 97 | |||
| 98 | VAddr address; | 102 | VAddr address; |
| 99 | u32 offset; | 103 | u32 offset; |
| 100 | u32 width; | 104 | u32 width; |
| @@ -127,7 +131,7 @@ class MemoryManager; | |||
| 127 | 131 | ||
| 128 | class GPU { | 132 | class GPU { |
| 129 | public: | 133 | public: |
| 130 | explicit GPU(Core::System& system, VideoCore::RendererBase& renderer); | 134 | explicit GPU(Core::System& system, VideoCore::RendererBase& renderer, bool is_async); |
| 131 | 135 | ||
| 132 | virtual ~GPU(); | 136 | virtual ~GPU(); |
| 133 | 137 | ||
| @@ -149,12 +153,20 @@ public: | |||
| 149 | /// Calls a GPU method. | 153 | /// Calls a GPU method. |
| 150 | void CallMethod(const MethodCall& method_call); | 154 | void CallMethod(const MethodCall& method_call); |
| 151 | 155 | ||
| 156 | void FlushCommands(); | ||
| 157 | |||
| 152 | /// Returns a reference to the Maxwell3D GPU engine. | 158 | /// Returns a reference to the Maxwell3D GPU engine. |
| 153 | Engines::Maxwell3D& Maxwell3D(); | 159 | Engines::Maxwell3D& Maxwell3D(); |
| 154 | 160 | ||
| 155 | /// Returns a const reference to the Maxwell3D GPU engine. | 161 | /// Returns a const reference to the Maxwell3D GPU engine. |
| 156 | const Engines::Maxwell3D& Maxwell3D() const; | 162 | const Engines::Maxwell3D& Maxwell3D() const; |
| 157 | 163 | ||
| 164 | /// Returns a reference to the KeplerCompute GPU engine. | ||
| 165 | Engines::KeplerCompute& KeplerCompute(); | ||
| 166 | |||
| 167 | /// Returns a reference to the KeplerCompute GPU engine. | ||
| 168 | const Engines::KeplerCompute& KeplerCompute() const; | ||
| 169 | |||
| 158 | /// Returns a reference to the GPU memory manager. | 170 | /// Returns a reference to the GPU memory manager. |
| 159 | Tegra::MemoryManager& MemoryManager(); | 171 | Tegra::MemoryManager& MemoryManager(); |
| 160 | 172 | ||
| @@ -164,6 +176,22 @@ public: | |||
| 164 | /// Returns a reference to the GPU DMA pusher. | 176 | /// Returns a reference to the GPU DMA pusher. |
| 165 | Tegra::DmaPusher& DmaPusher(); | 177 | Tegra::DmaPusher& DmaPusher(); |
| 166 | 178 | ||
| 179 | void IncrementSyncPoint(u32 syncpoint_id); | ||
| 180 | |||
| 181 | u32 GetSyncpointValue(u32 syncpoint_id) const; | ||
| 182 | |||
| 183 | void RegisterSyncptInterrupt(u32 syncpoint_id, u32 value); | ||
| 184 | |||
| 185 | bool CancelSyncptInterrupt(u32 syncpoint_id, u32 value); | ||
| 186 | |||
| 187 | std::unique_lock<std::mutex> LockSync() { | ||
| 188 | return std::unique_lock{sync_mutex}; | ||
| 189 | } | ||
| 190 | |||
| 191 | bool IsAsync() const { | ||
| 192 | return is_async; | ||
| 193 | } | ||
| 194 | |||
| 167 | /// Returns a const reference to the GPU DMA pusher. | 195 | /// Returns a const reference to the GPU DMA pusher. |
| 168 | const Tegra::DmaPusher& DmaPusher() const; | 196 | const Tegra::DmaPusher& DmaPusher() const; |
| 169 | 197 | ||
| @@ -194,7 +222,12 @@ public: | |||
| 194 | 222 | ||
| 195 | u32 semaphore_acquire; | 223 | u32 semaphore_acquire; |
| 196 | u32 semaphore_release; | 224 | u32 semaphore_release; |
| 197 | INSERT_PADDING_WORDS(0xE4); | 225 | u32 fence_value; |
| 226 | union { | ||
| 227 | BitField<4, 4, u32> operation; | ||
| 228 | BitField<8, 8, u32> id; | ||
| 229 | } fence_action; | ||
| 230 | INSERT_PADDING_WORDS(0xE2); | ||
| 198 | 231 | ||
| 199 | // Puller state | 232 | // Puller state |
| 200 | u32 acquire_mode; | 233 | u32 acquire_mode; |
| @@ -216,8 +249,7 @@ public: | |||
| 216 | virtual void PushGPUEntries(Tegra::CommandList&& entries) = 0; | 249 | virtual void PushGPUEntries(Tegra::CommandList&& entries) = 0; |
| 217 | 250 | ||
| 218 | /// Swap buffers (render frame) | 251 | /// Swap buffers (render frame) |
| 219 | virtual void SwapBuffers( | 252 | virtual void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) = 0; |
| 220 | std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) = 0; | ||
| 221 | 253 | ||
| 222 | /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory | 254 | /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory |
| 223 | virtual void FlushRegion(CacheAddr addr, u64 size) = 0; | 255 | virtual void FlushRegion(CacheAddr addr, u64 size) = 0; |
| @@ -228,6 +260,9 @@ public: | |||
| 228 | /// Notify rasterizer that any caches of the specified region should be flushed and invalidated | 260 | /// Notify rasterizer that any caches of the specified region should be flushed and invalidated |
| 229 | virtual void FlushAndInvalidateRegion(CacheAddr addr, u64 size) = 0; | 261 | virtual void FlushAndInvalidateRegion(CacheAddr addr, u64 size) = 0; |
| 230 | 262 | ||
| 263 | protected: | ||
| 264 | virtual void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const = 0; | ||
| 265 | |||
| 231 | private: | 266 | private: |
| 232 | void ProcessBindMethod(const MethodCall& method_call); | 267 | void ProcessBindMethod(const MethodCall& method_call); |
| 233 | void ProcessSemaphoreTriggerMethod(); | 268 | void ProcessSemaphoreTriggerMethod(); |
| @@ -245,6 +280,7 @@ private: | |||
| 245 | 280 | ||
| 246 | protected: | 281 | protected: |
| 247 | std::unique_ptr<Tegra::DmaPusher> dma_pusher; | 282 | std::unique_ptr<Tegra::DmaPusher> dma_pusher; |
| 283 | Core::System& system; | ||
| 248 | VideoCore::RendererBase& renderer; | 284 | VideoCore::RendererBase& renderer; |
| 249 | 285 | ||
| 250 | private: | 286 | private: |
| @@ -262,6 +298,14 @@ private: | |||
| 262 | std::unique_ptr<Engines::MaxwellDMA> maxwell_dma; | 298 | std::unique_ptr<Engines::MaxwellDMA> maxwell_dma; |
| 263 | /// Inline memory engine | 299 | /// Inline memory engine |
| 264 | std::unique_ptr<Engines::KeplerMemory> kepler_memory; | 300 | std::unique_ptr<Engines::KeplerMemory> kepler_memory; |
| 301 | |||
| 302 | std::array<std::atomic<u32>, Service::Nvidia::MaxSyncPoints> syncpoints{}; | ||
| 303 | |||
| 304 | std::array<std::list<u32>, Service::Nvidia::MaxSyncPoints> syncpt_interrupts; | ||
| 305 | |||
| 306 | std::mutex sync_mutex; | ||
| 307 | |||
| 308 | const bool is_async; | ||
| 265 | }; | 309 | }; |
| 266 | 310 | ||
| 267 | #define ASSERT_REG_POSITION(field_name, position) \ | 311 | #define ASSERT_REG_POSITION(field_name, position) \ |
| @@ -274,6 +318,8 @@ ASSERT_REG_POSITION(semaphore_trigger, 0x7); | |||
| 274 | ASSERT_REG_POSITION(reference_count, 0x14); | 318 | ASSERT_REG_POSITION(reference_count, 0x14); |
| 275 | ASSERT_REG_POSITION(semaphore_acquire, 0x1A); | 319 | ASSERT_REG_POSITION(semaphore_acquire, 0x1A); |
| 276 | ASSERT_REG_POSITION(semaphore_release, 0x1B); | 320 | ASSERT_REG_POSITION(semaphore_release, 0x1B); |
| 321 | ASSERT_REG_POSITION(fence_value, 0x1C); | ||
| 322 | ASSERT_REG_POSITION(fence_action, 0x1D); | ||
| 277 | 323 | ||
| 278 | ASSERT_REG_POSITION(acquire_mode, 0x100); | 324 | ASSERT_REG_POSITION(acquire_mode, 0x100); |
| 279 | ASSERT_REG_POSITION(acquire_source, 0x101); | 325 | ASSERT_REG_POSITION(acquire_source, 0x101); |
diff --git a/src/video_core/gpu_asynch.cpp b/src/video_core/gpu_asynch.cpp index d4e2553a9..f2a3a390e 100644 --- a/src/video_core/gpu_asynch.cpp +++ b/src/video_core/gpu_asynch.cpp | |||
| @@ -2,6 +2,8 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include "core/core.h" | ||
| 6 | #include "core/hardware_interrupt_manager.h" | ||
| 5 | #include "video_core/gpu_asynch.h" | 7 | #include "video_core/gpu_asynch.h" |
| 6 | #include "video_core/gpu_thread.h" | 8 | #include "video_core/gpu_thread.h" |
| 7 | #include "video_core/renderer_base.h" | 9 | #include "video_core/renderer_base.h" |
| @@ -9,7 +11,7 @@ | |||
| 9 | namespace VideoCommon { | 11 | namespace VideoCommon { |
| 10 | 12 | ||
| 11 | GPUAsynch::GPUAsynch(Core::System& system, VideoCore::RendererBase& renderer) | 13 | GPUAsynch::GPUAsynch(Core::System& system, VideoCore::RendererBase& renderer) |
| 12 | : GPU(system, renderer), gpu_thread{system} {} | 14 | : GPU(system, renderer, true), gpu_thread{system} {} |
| 13 | 15 | ||
| 14 | GPUAsynch::~GPUAsynch() = default; | 16 | GPUAsynch::~GPUAsynch() = default; |
| 15 | 17 | ||
| @@ -21,9 +23,8 @@ void GPUAsynch::PushGPUEntries(Tegra::CommandList&& entries) { | |||
| 21 | gpu_thread.SubmitList(std::move(entries)); | 23 | gpu_thread.SubmitList(std::move(entries)); |
| 22 | } | 24 | } |
| 23 | 25 | ||
| 24 | void GPUAsynch::SwapBuffers( | 26 | void GPUAsynch::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { |
| 25 | std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) { | 27 | gpu_thread.SwapBuffers(framebuffer); |
| 26 | gpu_thread.SwapBuffers(std::move(framebuffer)); | ||
| 27 | } | 28 | } |
| 28 | 29 | ||
| 29 | void GPUAsynch::FlushRegion(CacheAddr addr, u64 size) { | 30 | void GPUAsynch::FlushRegion(CacheAddr addr, u64 size) { |
| @@ -38,4 +39,9 @@ void GPUAsynch::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { | |||
| 38 | gpu_thread.FlushAndInvalidateRegion(addr, size); | 39 | gpu_thread.FlushAndInvalidateRegion(addr, size); |
| 39 | } | 40 | } |
| 40 | 41 | ||
| 42 | void GPUAsynch::TriggerCpuInterrupt(const u32 syncpoint_id, const u32 value) const { | ||
| 43 | auto& interrupt_manager = system.InterruptManager(); | ||
| 44 | interrupt_manager.GPUInterruptSyncpt(syncpoint_id, value); | ||
| 45 | } | ||
| 46 | |||
| 41 | } // namespace VideoCommon | 47 | } // namespace VideoCommon |
diff --git a/src/video_core/gpu_asynch.h b/src/video_core/gpu_asynch.h index 30be74cba..a12f9bac4 100644 --- a/src/video_core/gpu_asynch.h +++ b/src/video_core/gpu_asynch.h | |||
| @@ -14,19 +14,21 @@ class RendererBase; | |||
| 14 | namespace VideoCommon { | 14 | namespace VideoCommon { |
| 15 | 15 | ||
| 16 | /// Implementation of GPU interface that runs the GPU asynchronously | 16 | /// Implementation of GPU interface that runs the GPU asynchronously |
| 17 | class GPUAsynch : public Tegra::GPU { | 17 | class GPUAsynch final : public Tegra::GPU { |
| 18 | public: | 18 | public: |
| 19 | explicit GPUAsynch(Core::System& system, VideoCore::RendererBase& renderer); | 19 | explicit GPUAsynch(Core::System& system, VideoCore::RendererBase& renderer); |
| 20 | ~GPUAsynch() override; | 20 | ~GPUAsynch() override; |
| 21 | 21 | ||
| 22 | void Start() override; | 22 | void Start() override; |
| 23 | void PushGPUEntries(Tegra::CommandList&& entries) override; | 23 | void PushGPUEntries(Tegra::CommandList&& entries) override; |
| 24 | void SwapBuffers( | 24 | void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override; |
| 25 | std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) override; | ||
| 26 | void FlushRegion(CacheAddr addr, u64 size) override; | 25 | void FlushRegion(CacheAddr addr, u64 size) override; |
| 27 | void InvalidateRegion(CacheAddr addr, u64 size) override; | 26 | void InvalidateRegion(CacheAddr addr, u64 size) override; |
| 28 | void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; | 27 | void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; |
| 29 | 28 | ||
| 29 | protected: | ||
| 30 | void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const override; | ||
| 31 | |||
| 30 | private: | 32 | private: |
| 31 | GPUThread::ThreadManager gpu_thread; | 33 | GPUThread::ThreadManager gpu_thread; |
| 32 | }; | 34 | }; |
diff --git a/src/video_core/gpu_synch.cpp b/src/video_core/gpu_synch.cpp index 45e43b1dc..d48221077 100644 --- a/src/video_core/gpu_synch.cpp +++ b/src/video_core/gpu_synch.cpp | |||
| @@ -8,7 +8,7 @@ | |||
| 8 | namespace VideoCommon { | 8 | namespace VideoCommon { |
| 9 | 9 | ||
| 10 | GPUSynch::GPUSynch(Core::System& system, VideoCore::RendererBase& renderer) | 10 | GPUSynch::GPUSynch(Core::System& system, VideoCore::RendererBase& renderer) |
| 11 | : GPU(system, renderer) {} | 11 | : GPU(system, renderer, false) {} |
| 12 | 12 | ||
| 13 | GPUSynch::~GPUSynch() = default; | 13 | GPUSynch::~GPUSynch() = default; |
| 14 | 14 | ||
| @@ -19,9 +19,8 @@ void GPUSynch::PushGPUEntries(Tegra::CommandList&& entries) { | |||
| 19 | dma_pusher->DispatchCalls(); | 19 | dma_pusher->DispatchCalls(); |
| 20 | } | 20 | } |
| 21 | 21 | ||
| 22 | void GPUSynch::SwapBuffers( | 22 | void GPUSynch::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { |
| 23 | std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) { | 23 | renderer.SwapBuffers(framebuffer); |
| 24 | renderer.SwapBuffers(std::move(framebuffer)); | ||
| 25 | } | 24 | } |
| 26 | 25 | ||
| 27 | void GPUSynch::FlushRegion(CacheAddr addr, u64 size) { | 26 | void GPUSynch::FlushRegion(CacheAddr addr, u64 size) { |
diff --git a/src/video_core/gpu_synch.h b/src/video_core/gpu_synch.h index 3031fcf72..5eb1c461c 100644 --- a/src/video_core/gpu_synch.h +++ b/src/video_core/gpu_synch.h | |||
| @@ -13,18 +13,21 @@ class RendererBase; | |||
| 13 | namespace VideoCommon { | 13 | namespace VideoCommon { |
| 14 | 14 | ||
| 15 | /// Implementation of GPU interface that runs the GPU synchronously | 15 | /// Implementation of GPU interface that runs the GPU synchronously |
| 16 | class GPUSynch : public Tegra::GPU { | 16 | class GPUSynch final : public Tegra::GPU { |
| 17 | public: | 17 | public: |
| 18 | explicit GPUSynch(Core::System& system, VideoCore::RendererBase& renderer); | 18 | explicit GPUSynch(Core::System& system, VideoCore::RendererBase& renderer); |
| 19 | ~GPUSynch() override; | 19 | ~GPUSynch() override; |
| 20 | 20 | ||
| 21 | void Start() override; | 21 | void Start() override; |
| 22 | void PushGPUEntries(Tegra::CommandList&& entries) override; | 22 | void PushGPUEntries(Tegra::CommandList&& entries) override; |
| 23 | void SwapBuffers( | 23 | void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override; |
| 24 | std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) override; | ||
| 25 | void FlushRegion(CacheAddr addr, u64 size) override; | 24 | void FlushRegion(CacheAddr addr, u64 size) override; |
| 26 | void InvalidateRegion(CacheAddr addr, u64 size) override; | 25 | void InvalidateRegion(CacheAddr addr, u64 size) override; |
| 27 | void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; | 26 | void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; |
| 27 | |||
| 28 | protected: | ||
| 29 | void TriggerCpuInterrupt([[maybe_unused]] u32 syncpoint_id, | ||
| 30 | [[maybe_unused]] u32 value) const override {} | ||
| 28 | }; | 31 | }; |
| 29 | 32 | ||
| 30 | } // namespace VideoCommon | 33 | } // namespace VideoCommon |
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp index 3f0939ec9..5f039e4fd 100644 --- a/src/video_core/gpu_thread.cpp +++ b/src/video_core/gpu_thread.cpp | |||
| @@ -21,7 +21,8 @@ static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_p | |||
| 21 | MicroProfileOnThreadCreate("GpuThread"); | 21 | MicroProfileOnThreadCreate("GpuThread"); |
| 22 | 22 | ||
| 23 | // Wait for first GPU command before acquiring the window context | 23 | // Wait for first GPU command before acquiring the window context |
| 24 | state.WaitForCommands(); | 24 | while (state.queue.Empty()) |
| 25 | ; | ||
| 25 | 26 | ||
| 26 | // If emulation was stopped during disk shader loading, abort before trying to acquire context | 27 | // If emulation was stopped during disk shader loading, abort before trying to acquire context |
| 27 | if (!state.is_running) { | 28 | if (!state.is_running) { |
| @@ -32,14 +33,13 @@ static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_p | |||
| 32 | 33 | ||
| 33 | CommandDataContainer next; | 34 | CommandDataContainer next; |
| 34 | while (state.is_running) { | 35 | while (state.is_running) { |
| 35 | state.WaitForCommands(); | ||
| 36 | while (!state.queue.Empty()) { | 36 | while (!state.queue.Empty()) { |
| 37 | state.queue.Pop(next); | 37 | state.queue.Pop(next); |
| 38 | if (const auto submit_list = std::get_if<SubmitListCommand>(&next.data)) { | 38 | if (const auto submit_list = std::get_if<SubmitListCommand>(&next.data)) { |
| 39 | dma_pusher.Push(std::move(submit_list->entries)); | 39 | dma_pusher.Push(std::move(submit_list->entries)); |
| 40 | dma_pusher.DispatchCalls(); | 40 | dma_pusher.DispatchCalls(); |
| 41 | } else if (const auto data = std::get_if<SwapBuffersCommand>(&next.data)) { | 41 | } else if (const auto data = std::get_if<SwapBuffersCommand>(&next.data)) { |
| 42 | renderer.SwapBuffers(std::move(data->framebuffer)); | 42 | renderer.SwapBuffers(data->framebuffer ? &*data->framebuffer : nullptr); |
| 43 | } else if (const auto data = std::get_if<FlushRegionCommand>(&next.data)) { | 43 | } else if (const auto data = std::get_if<FlushRegionCommand>(&next.data)) { |
| 44 | renderer.Rasterizer().FlushRegion(data->addr, data->size); | 44 | renderer.Rasterizer().FlushRegion(data->addr, data->size); |
| 45 | } else if (const auto data = std::get_if<InvalidateRegionCommand>(&next.data)) { | 45 | } else if (const auto data = std::get_if<InvalidateRegionCommand>(&next.data)) { |
| @@ -49,8 +49,7 @@ static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_p | |||
| 49 | } else { | 49 | } else { |
| 50 | UNREACHABLE(); | 50 | UNREACHABLE(); |
| 51 | } | 51 | } |
| 52 | state.signaled_fence = next.fence; | 52 | state.signaled_fence.store(next.fence); |
| 53 | state.TrySynchronize(); | ||
| 54 | } | 53 | } |
| 55 | } | 54 | } |
| 56 | } | 55 | } |
| @@ -79,9 +78,9 @@ void ThreadManager::SubmitList(Tegra::CommandList&& entries) { | |||
| 79 | system.CoreTiming().ScheduleEvent(synchronization_ticks, synchronization_event, fence); | 78 | system.CoreTiming().ScheduleEvent(synchronization_ticks, synchronization_event, fence); |
| 80 | } | 79 | } |
| 81 | 80 | ||
| 82 | void ThreadManager::SwapBuffers( | 81 | void ThreadManager::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { |
| 83 | std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) { | 82 | PushCommand(SwapBuffersCommand(framebuffer ? *framebuffer |
| 84 | PushCommand(SwapBuffersCommand(std::move(framebuffer))); | 83 | : std::optional<const Tegra::FramebufferConfig>{})); |
| 85 | } | 84 | } |
| 86 | 85 | ||
| 87 | void ThreadManager::FlushRegion(CacheAddr addr, u64 size) { | 86 | void ThreadManager::FlushRegion(CacheAddr addr, u64 size) { |
| @@ -89,12 +88,7 @@ void ThreadManager::FlushRegion(CacheAddr addr, u64 size) { | |||
| 89 | } | 88 | } |
| 90 | 89 | ||
| 91 | void ThreadManager::InvalidateRegion(CacheAddr addr, u64 size) { | 90 | void ThreadManager::InvalidateRegion(CacheAddr addr, u64 size) { |
| 92 | if (state.queue.Empty()) { | 91 | system.Renderer().Rasterizer().InvalidateRegion(addr, size); |
| 93 | // It's quicker to invalidate a single region on the CPU if the queue is already empty | ||
| 94 | system.Renderer().Rasterizer().InvalidateRegion(addr, size); | ||
| 95 | } else { | ||
| 96 | PushCommand(InvalidateRegionCommand(addr, size)); | ||
| 97 | } | ||
| 98 | } | 92 | } |
| 99 | 93 | ||
| 100 | void ThreadManager::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { | 94 | void ThreadManager::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { |
| @@ -105,22 +99,13 @@ void ThreadManager::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { | |||
| 105 | u64 ThreadManager::PushCommand(CommandData&& command_data) { | 99 | u64 ThreadManager::PushCommand(CommandData&& command_data) { |
| 106 | const u64 fence{++state.last_fence}; | 100 | const u64 fence{++state.last_fence}; |
| 107 | state.queue.Push(CommandDataContainer(std::move(command_data), fence)); | 101 | state.queue.Push(CommandDataContainer(std::move(command_data), fence)); |
| 108 | state.SignalCommands(); | ||
| 109 | return fence; | 102 | return fence; |
| 110 | } | 103 | } |
| 111 | 104 | ||
| 112 | MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192)); | 105 | MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192)); |
| 113 | void SynchState::WaitForSynchronization(u64 fence) { | 106 | void SynchState::WaitForSynchronization(u64 fence) { |
| 114 | if (signaled_fence >= fence) { | 107 | while (signaled_fence.load() < fence) |
| 115 | return; | 108 | ; |
| 116 | } | ||
| 117 | |||
| 118 | // Wait for the GPU to be idle (all commands to be executed) | ||
| 119 | { | ||
| 120 | MICROPROFILE_SCOPE(GPU_wait); | ||
| 121 | std::unique_lock lock{synchronization_mutex}; | ||
| 122 | synchronization_condition.wait(lock, [this, fence] { return signaled_fence >= fence; }); | ||
| 123 | } | ||
| 124 | } | 109 | } |
| 125 | 110 | ||
| 126 | } // namespace VideoCommon::GPUThread | 111 | } // namespace VideoCommon::GPUThread |
diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h index 05a168a72..3ae0ec9f3 100644 --- a/src/video_core/gpu_thread.h +++ b/src/video_core/gpu_thread.h | |||
| @@ -88,41 +88,9 @@ struct CommandDataContainer { | |||
| 88 | /// Struct used to synchronize the GPU thread | 88 | /// Struct used to synchronize the GPU thread |
| 89 | struct SynchState final { | 89 | struct SynchState final { |
| 90 | std::atomic_bool is_running{true}; | 90 | std::atomic_bool is_running{true}; |
| 91 | std::atomic_int queued_frame_count{}; | ||
| 92 | std::mutex synchronization_mutex; | ||
| 93 | std::mutex commands_mutex; | ||
| 94 | std::condition_variable commands_condition; | ||
| 95 | std::condition_variable synchronization_condition; | ||
| 96 | |||
| 97 | /// Returns true if the gap in GPU commands is small enough that we can consider the CPU and GPU | ||
| 98 | /// synchronized. This is entirely empirical. | ||
| 99 | bool IsSynchronized() const { | ||
| 100 | constexpr std::size_t max_queue_gap{5}; | ||
| 101 | return queue.Size() <= max_queue_gap; | ||
| 102 | } | ||
| 103 | |||
| 104 | void TrySynchronize() { | ||
| 105 | if (IsSynchronized()) { | ||
| 106 | std::lock_guard lock{synchronization_mutex}; | ||
| 107 | synchronization_condition.notify_one(); | ||
| 108 | } | ||
| 109 | } | ||
| 110 | 91 | ||
| 111 | void WaitForSynchronization(u64 fence); | 92 | void WaitForSynchronization(u64 fence); |
| 112 | 93 | ||
| 113 | void SignalCommands() { | ||
| 114 | if (queue.Empty()) { | ||
| 115 | return; | ||
| 116 | } | ||
| 117 | |||
| 118 | commands_condition.notify_one(); | ||
| 119 | } | ||
| 120 | |||
| 121 | void WaitForCommands() { | ||
| 122 | std::unique_lock lock{commands_mutex}; | ||
| 123 | commands_condition.wait(lock, [this] { return !queue.Empty(); }); | ||
| 124 | } | ||
| 125 | |||
| 126 | using CommandQueue = Common::SPSCQueue<CommandDataContainer>; | 94 | using CommandQueue = Common::SPSCQueue<CommandDataContainer>; |
| 127 | CommandQueue queue; | 95 | CommandQueue queue; |
| 128 | u64 last_fence{}; | 96 | u64 last_fence{}; |
| @@ -142,8 +110,7 @@ public: | |||
| 142 | void SubmitList(Tegra::CommandList&& entries); | 110 | void SubmitList(Tegra::CommandList&& entries); |
| 143 | 111 | ||
| 144 | /// Swap buffers (render frame) | 112 | /// Swap buffers (render frame) |
| 145 | void SwapBuffers( | 113 | void SwapBuffers(const Tegra::FramebufferConfig* framebuffer); |
| 146 | std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer); | ||
| 147 | 114 | ||
| 148 | /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory | 115 | /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory |
| 149 | void FlushRegion(CacheAddr addr, u64 size); | 116 | void FlushRegion(CacheAddr addr, u64 size); |
diff --git a/src/video_core/macro_interpreter.cpp b/src/video_core/macro_interpreter.cpp index c766ed692..9f59a2dc1 100644 --- a/src/video_core/macro_interpreter.cpp +++ b/src/video_core/macro_interpreter.cpp | |||
| @@ -4,14 +4,18 @@ | |||
| 4 | 4 | ||
| 5 | #include "common/assert.h" | 5 | #include "common/assert.h" |
| 6 | #include "common/logging/log.h" | 6 | #include "common/logging/log.h" |
| 7 | #include "common/microprofile.h" | ||
| 7 | #include "video_core/engines/maxwell_3d.h" | 8 | #include "video_core/engines/maxwell_3d.h" |
| 8 | #include "video_core/macro_interpreter.h" | 9 | #include "video_core/macro_interpreter.h" |
| 9 | 10 | ||
| 11 | MICROPROFILE_DEFINE(MacroInterp, "GPU", "Execute macro interpreter", MP_RGB(128, 128, 192)); | ||
| 12 | |||
| 10 | namespace Tegra { | 13 | namespace Tegra { |
| 11 | 14 | ||
| 12 | MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d) : maxwell3d(maxwell3d) {} | 15 | MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d) : maxwell3d(maxwell3d) {} |
| 13 | 16 | ||
| 14 | void MacroInterpreter::Execute(u32 offset, std::vector<u32> parameters) { | 17 | void MacroInterpreter::Execute(u32 offset, std::vector<u32> parameters) { |
| 18 | MICROPROFILE_SCOPE(MacroInterp); | ||
| 15 | Reset(); | 19 | Reset(); |
| 16 | registers[1] = parameters[0]; | 20 | registers[1] = parameters[0]; |
| 17 | this->parameters = std::move(parameters); | 21 | this->parameters = std::move(parameters); |
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index 322453116..bffae940c 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp | |||
| @@ -5,13 +5,17 @@ | |||
| 5 | #include "common/alignment.h" | 5 | #include "common/alignment.h" |
| 6 | #include "common/assert.h" | 6 | #include "common/assert.h" |
| 7 | #include "common/logging/log.h" | 7 | #include "common/logging/log.h" |
| 8 | #include "core/core.h" | ||
| 9 | #include "core/hle/kernel/process.h" | ||
| 10 | #include "core/hle/kernel/vm_manager.h" | ||
| 8 | #include "core/memory.h" | 11 | #include "core/memory.h" |
| 9 | #include "video_core/memory_manager.h" | 12 | #include "video_core/memory_manager.h" |
| 10 | #include "video_core/rasterizer_interface.h" | 13 | #include "video_core/rasterizer_interface.h" |
| 11 | 14 | ||
| 12 | namespace Tegra { | 15 | namespace Tegra { |
| 13 | 16 | ||
| 14 | MemoryManager::MemoryManager(VideoCore::RasterizerInterface& rasterizer) : rasterizer{rasterizer} { | 17 | MemoryManager::MemoryManager(Core::System& system, VideoCore::RasterizerInterface& rasterizer) |
| 18 | : rasterizer{rasterizer}, system{system} { | ||
| 15 | std::fill(page_table.pointers.begin(), page_table.pointers.end(), nullptr); | 19 | std::fill(page_table.pointers.begin(), page_table.pointers.end(), nullptr); |
| 16 | std::fill(page_table.attributes.begin(), page_table.attributes.end(), | 20 | std::fill(page_table.attributes.begin(), page_table.attributes.end(), |
| 17 | Common::PageType::Unmapped); | 21 | Common::PageType::Unmapped); |
| @@ -49,6 +53,11 @@ GPUVAddr MemoryManager::MapBufferEx(VAddr cpu_addr, u64 size) { | |||
| 49 | const GPUVAddr gpu_addr{FindFreeRegion(address_space_base, aligned_size)}; | 53 | const GPUVAddr gpu_addr{FindFreeRegion(address_space_base, aligned_size)}; |
| 50 | 54 | ||
| 51 | MapBackingMemory(gpu_addr, Memory::GetPointer(cpu_addr), aligned_size, cpu_addr); | 55 | MapBackingMemory(gpu_addr, Memory::GetPointer(cpu_addr), aligned_size, cpu_addr); |
| 56 | ASSERT(system.CurrentProcess() | ||
| 57 | ->VMManager() | ||
| 58 | .SetMemoryAttribute(cpu_addr, size, Kernel::MemoryAttribute::DeviceMapped, | ||
| 59 | Kernel::MemoryAttribute::DeviceMapped) | ||
| 60 | .IsSuccess()); | ||
| 52 | 61 | ||
| 53 | return gpu_addr; | 62 | return gpu_addr; |
| 54 | } | 63 | } |
| @@ -59,7 +68,11 @@ GPUVAddr MemoryManager::MapBufferEx(VAddr cpu_addr, GPUVAddr gpu_addr, u64 size) | |||
| 59 | const u64 aligned_size{Common::AlignUp(size, page_size)}; | 68 | const u64 aligned_size{Common::AlignUp(size, page_size)}; |
| 60 | 69 | ||
| 61 | MapBackingMemory(gpu_addr, Memory::GetPointer(cpu_addr), aligned_size, cpu_addr); | 70 | MapBackingMemory(gpu_addr, Memory::GetPointer(cpu_addr), aligned_size, cpu_addr); |
| 62 | 71 | ASSERT(system.CurrentProcess() | |
| 72 | ->VMManager() | ||
| 73 | .SetMemoryAttribute(cpu_addr, size, Kernel::MemoryAttribute::DeviceMapped, | ||
| 74 | Kernel::MemoryAttribute::DeviceMapped) | ||
| 75 | .IsSuccess()); | ||
| 63 | return gpu_addr; | 76 | return gpu_addr; |
| 64 | } | 77 | } |
| 65 | 78 | ||
| @@ -68,9 +81,16 @@ GPUVAddr MemoryManager::UnmapBuffer(GPUVAddr gpu_addr, u64 size) { | |||
| 68 | 81 | ||
| 69 | const u64 aligned_size{Common::AlignUp(size, page_size)}; | 82 | const u64 aligned_size{Common::AlignUp(size, page_size)}; |
| 70 | const CacheAddr cache_addr{ToCacheAddr(GetPointer(gpu_addr))}; | 83 | const CacheAddr cache_addr{ToCacheAddr(GetPointer(gpu_addr))}; |
| 84 | const auto cpu_addr = GpuToCpuAddress(gpu_addr); | ||
| 85 | ASSERT(cpu_addr); | ||
| 71 | 86 | ||
| 72 | rasterizer.FlushAndInvalidateRegion(cache_addr, aligned_size); | 87 | rasterizer.FlushAndInvalidateRegion(cache_addr, aligned_size); |
| 73 | UnmapRange(gpu_addr, aligned_size); | 88 | UnmapRange(gpu_addr, aligned_size); |
| 89 | ASSERT(system.CurrentProcess() | ||
| 90 | ->VMManager() | ||
| 91 | .SetMemoryAttribute(cpu_addr.value(), size, Kernel::MemoryAttribute::DeviceMapped, | ||
| 92 | Kernel::MemoryAttribute::None) | ||
| 93 | .IsSuccess()); | ||
| 74 | 94 | ||
| 75 | return gpu_addr; | 95 | return gpu_addr; |
| 76 | } | 96 | } |
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h index 43a84bd52..aea010087 100644 --- a/src/video_core/memory_manager.h +++ b/src/video_core/memory_manager.h | |||
| @@ -14,6 +14,10 @@ namespace VideoCore { | |||
| 14 | class RasterizerInterface; | 14 | class RasterizerInterface; |
| 15 | } | 15 | } |
| 16 | 16 | ||
| 17 | namespace Core { | ||
| 18 | class System; | ||
| 19 | } | ||
| 20 | |||
| 17 | namespace Tegra { | 21 | namespace Tegra { |
| 18 | 22 | ||
| 19 | /** | 23 | /** |
| @@ -47,7 +51,7 @@ struct VirtualMemoryArea { | |||
| 47 | 51 | ||
| 48 | class MemoryManager final { | 52 | class MemoryManager final { |
| 49 | public: | 53 | public: |
| 50 | explicit MemoryManager(VideoCore::RasterizerInterface& rasterizer); | 54 | explicit MemoryManager(Core::System& system, VideoCore::RasterizerInterface& rasterizer); |
| 51 | ~MemoryManager(); | 55 | ~MemoryManager(); |
| 52 | 56 | ||
| 53 | GPUVAddr AllocateSpace(u64 size, u64 align); | 57 | GPUVAddr AllocateSpace(u64 size, u64 align); |
| @@ -173,6 +177,8 @@ private: | |||
| 173 | Common::PageTable page_table{page_bits}; | 177 | Common::PageTable page_table{page_bits}; |
| 174 | VMAMap vma_map; | 178 | VMAMap vma_map; |
| 175 | VideoCore::RasterizerInterface& rasterizer; | 179 | VideoCore::RasterizerInterface& rasterizer; |
| 180 | |||
| 181 | Core::System& system; | ||
| 176 | }; | 182 | }; |
| 177 | 183 | ||
| 178 | } // namespace Tegra | 184 | } // namespace Tegra |
diff --git a/src/video_core/morton.cpp b/src/video_core/morton.cpp index 3e91cbc83..084f85e67 100644 --- a/src/video_core/morton.cpp +++ b/src/video_core/morton.cpp | |||
| @@ -25,8 +25,8 @@ static void MortonCopy(u32 stride, u32 block_height, u32 height, u32 block_depth | |||
| 25 | 25 | ||
| 26 | // With the BCn formats (DXT and DXN), each 4x4 tile is swizzled instead of just individual | 26 | // With the BCn formats (DXT and DXN), each 4x4 tile is swizzled instead of just individual |
| 27 | // pixel values. | 27 | // pixel values. |
| 28 | const u32 tile_size_x{GetDefaultBlockWidth(format)}; | 28 | constexpr u32 tile_size_x{GetDefaultBlockWidth(format)}; |
| 29 | const u32 tile_size_y{GetDefaultBlockHeight(format)}; | 29 | constexpr u32 tile_size_y{GetDefaultBlockHeight(format)}; |
| 30 | 30 | ||
| 31 | if constexpr (morton_to_linear) { | 31 | if constexpr (morton_to_linear) { |
| 32 | Tegra::Texture::UnswizzleTexture(buffer, addr, tile_size_x, tile_size_y, bytes_per_pixel, | 32 | Tegra::Texture::UnswizzleTexture(buffer, addr, tile_size_x, tile_size_y, bytes_per_pixel, |
| @@ -186,99 +186,6 @@ static MortonCopyFn GetSwizzleFunction(MortonSwizzleMode mode, Surface::PixelFor | |||
| 186 | return morton_to_linear_fns[static_cast<std::size_t>(format)]; | 186 | return morton_to_linear_fns[static_cast<std::size_t>(format)]; |
| 187 | } | 187 | } |
| 188 | 188 | ||
| 189 | static u32 MortonInterleave128(u32 x, u32 y) { | ||
| 190 | // 128x128 Z-Order coordinate from 2D coordinates | ||
| 191 | static constexpr u32 xlut[] = { | ||
| 192 | 0x0000, 0x0001, 0x0002, 0x0003, 0x0008, 0x0009, 0x000a, 0x000b, 0x0040, 0x0041, 0x0042, | ||
| 193 | 0x0043, 0x0048, 0x0049, 0x004a, 0x004b, 0x0800, 0x0801, 0x0802, 0x0803, 0x0808, 0x0809, | ||
| 194 | 0x080a, 0x080b, 0x0840, 0x0841, 0x0842, 0x0843, 0x0848, 0x0849, 0x084a, 0x084b, 0x1000, | ||
| 195 | 0x1001, 0x1002, 0x1003, 0x1008, 0x1009, 0x100a, 0x100b, 0x1040, 0x1041, 0x1042, 0x1043, | ||
| 196 | 0x1048, 0x1049, 0x104a, 0x104b, 0x1800, 0x1801, 0x1802, 0x1803, 0x1808, 0x1809, 0x180a, | ||
| 197 | 0x180b, 0x1840, 0x1841, 0x1842, 0x1843, 0x1848, 0x1849, 0x184a, 0x184b, 0x2000, 0x2001, | ||
| 198 | 0x2002, 0x2003, 0x2008, 0x2009, 0x200a, 0x200b, 0x2040, 0x2041, 0x2042, 0x2043, 0x2048, | ||
| 199 | 0x2049, 0x204a, 0x204b, 0x2800, 0x2801, 0x2802, 0x2803, 0x2808, 0x2809, 0x280a, 0x280b, | ||
| 200 | 0x2840, 0x2841, 0x2842, 0x2843, 0x2848, 0x2849, 0x284a, 0x284b, 0x3000, 0x3001, 0x3002, | ||
| 201 | 0x3003, 0x3008, 0x3009, 0x300a, 0x300b, 0x3040, 0x3041, 0x3042, 0x3043, 0x3048, 0x3049, | ||
| 202 | 0x304a, 0x304b, 0x3800, 0x3801, 0x3802, 0x3803, 0x3808, 0x3809, 0x380a, 0x380b, 0x3840, | ||
| 203 | 0x3841, 0x3842, 0x3843, 0x3848, 0x3849, 0x384a, 0x384b, 0x0000, 0x0001, 0x0002, 0x0003, | ||
| 204 | 0x0008, 0x0009, 0x000a, 0x000b, 0x0040, 0x0041, 0x0042, 0x0043, 0x0048, 0x0049, 0x004a, | ||
| 205 | 0x004b, 0x0800, 0x0801, 0x0802, 0x0803, 0x0808, 0x0809, 0x080a, 0x080b, 0x0840, 0x0841, | ||
| 206 | 0x0842, 0x0843, 0x0848, 0x0849, 0x084a, 0x084b, 0x1000, 0x1001, 0x1002, 0x1003, 0x1008, | ||
| 207 | 0x1009, 0x100a, 0x100b, 0x1040, 0x1041, 0x1042, 0x1043, 0x1048, 0x1049, 0x104a, 0x104b, | ||
| 208 | 0x1800, 0x1801, 0x1802, 0x1803, 0x1808, 0x1809, 0x180a, 0x180b, 0x1840, 0x1841, 0x1842, | ||
| 209 | 0x1843, 0x1848, 0x1849, 0x184a, 0x184b, 0x2000, 0x2001, 0x2002, 0x2003, 0x2008, 0x2009, | ||
| 210 | 0x200a, 0x200b, 0x2040, 0x2041, 0x2042, 0x2043, 0x2048, 0x2049, 0x204a, 0x204b, 0x2800, | ||
| 211 | 0x2801, 0x2802, 0x2803, 0x2808, 0x2809, 0x280a, 0x280b, 0x2840, 0x2841, 0x2842, 0x2843, | ||
| 212 | 0x2848, 0x2849, 0x284a, 0x284b, 0x3000, 0x3001, 0x3002, 0x3003, 0x3008, 0x3009, 0x300a, | ||
| 213 | 0x300b, 0x3040, 0x3041, 0x3042, 0x3043, 0x3048, 0x3049, 0x304a, 0x304b, 0x3800, 0x3801, | ||
| 214 | 0x3802, 0x3803, 0x3808, 0x3809, 0x380a, 0x380b, 0x3840, 0x3841, 0x3842, 0x3843, 0x3848, | ||
| 215 | 0x3849, 0x384a, 0x384b, 0x0000, 0x0001, 0x0002, 0x0003, 0x0008, 0x0009, 0x000a, 0x000b, | ||
| 216 | 0x0040, 0x0041, 0x0042, 0x0043, 0x0048, 0x0049, 0x004a, 0x004b, 0x0800, 0x0801, 0x0802, | ||
| 217 | 0x0803, 0x0808, 0x0809, 0x080a, 0x080b, 0x0840, 0x0841, 0x0842, 0x0843, 0x0848, 0x0849, | ||
| 218 | 0x084a, 0x084b, 0x1000, 0x1001, 0x1002, 0x1003, 0x1008, 0x1009, 0x100a, 0x100b, 0x1040, | ||
| 219 | 0x1041, 0x1042, 0x1043, 0x1048, 0x1049, 0x104a, 0x104b, 0x1800, 0x1801, 0x1802, 0x1803, | ||
| 220 | 0x1808, 0x1809, 0x180a, 0x180b, 0x1840, 0x1841, 0x1842, 0x1843, 0x1848, 0x1849, 0x184a, | ||
| 221 | 0x184b, 0x2000, 0x2001, 0x2002, 0x2003, 0x2008, 0x2009, 0x200a, 0x200b, 0x2040, 0x2041, | ||
| 222 | 0x2042, 0x2043, 0x2048, 0x2049, 0x204a, 0x204b, 0x2800, 0x2801, 0x2802, 0x2803, 0x2808, | ||
| 223 | 0x2809, 0x280a, 0x280b, 0x2840, 0x2841, 0x2842, 0x2843, 0x2848, 0x2849, 0x284a, 0x284b, | ||
| 224 | 0x3000, 0x3001, 0x3002, 0x3003, 0x3008, 0x3009, 0x300a, 0x300b, 0x3040, 0x3041, 0x3042, | ||
| 225 | 0x3043, 0x3048, 0x3049, 0x304a, 0x304b, 0x3800, 0x3801, 0x3802, 0x3803, 0x3808, 0x3809, | ||
| 226 | 0x380a, 0x380b, 0x3840, 0x3841, 0x3842, 0x3843, 0x3848, 0x3849, 0x384a, 0x384b, | ||
| 227 | }; | ||
| 228 | static constexpr u32 ylut[] = { | ||
| 229 | 0x0000, 0x0004, 0x0010, 0x0014, 0x0020, 0x0024, 0x0030, 0x0034, 0x0080, 0x0084, 0x0090, | ||
| 230 | 0x0094, 0x00a0, 0x00a4, 0x00b0, 0x00b4, 0x0100, 0x0104, 0x0110, 0x0114, 0x0120, 0x0124, | ||
| 231 | 0x0130, 0x0134, 0x0180, 0x0184, 0x0190, 0x0194, 0x01a0, 0x01a4, 0x01b0, 0x01b4, 0x0200, | ||
| 232 | 0x0204, 0x0210, 0x0214, 0x0220, 0x0224, 0x0230, 0x0234, 0x0280, 0x0284, 0x0290, 0x0294, | ||
| 233 | 0x02a0, 0x02a4, 0x02b0, 0x02b4, 0x0300, 0x0304, 0x0310, 0x0314, 0x0320, 0x0324, 0x0330, | ||
| 234 | 0x0334, 0x0380, 0x0384, 0x0390, 0x0394, 0x03a0, 0x03a4, 0x03b0, 0x03b4, 0x0400, 0x0404, | ||
| 235 | 0x0410, 0x0414, 0x0420, 0x0424, 0x0430, 0x0434, 0x0480, 0x0484, 0x0490, 0x0494, 0x04a0, | ||
| 236 | 0x04a4, 0x04b0, 0x04b4, 0x0500, 0x0504, 0x0510, 0x0514, 0x0520, 0x0524, 0x0530, 0x0534, | ||
| 237 | 0x0580, 0x0584, 0x0590, 0x0594, 0x05a0, 0x05a4, 0x05b0, 0x05b4, 0x0600, 0x0604, 0x0610, | ||
| 238 | 0x0614, 0x0620, 0x0624, 0x0630, 0x0634, 0x0680, 0x0684, 0x0690, 0x0694, 0x06a0, 0x06a4, | ||
| 239 | 0x06b0, 0x06b4, 0x0700, 0x0704, 0x0710, 0x0714, 0x0720, 0x0724, 0x0730, 0x0734, 0x0780, | ||
| 240 | 0x0784, 0x0790, 0x0794, 0x07a0, 0x07a4, 0x07b0, 0x07b4, 0x0000, 0x0004, 0x0010, 0x0014, | ||
| 241 | 0x0020, 0x0024, 0x0030, 0x0034, 0x0080, 0x0084, 0x0090, 0x0094, 0x00a0, 0x00a4, 0x00b0, | ||
| 242 | 0x00b4, 0x0100, 0x0104, 0x0110, 0x0114, 0x0120, 0x0124, 0x0130, 0x0134, 0x0180, 0x0184, | ||
| 243 | 0x0190, 0x0194, 0x01a0, 0x01a4, 0x01b0, 0x01b4, 0x0200, 0x0204, 0x0210, 0x0214, 0x0220, | ||
| 244 | 0x0224, 0x0230, 0x0234, 0x0280, 0x0284, 0x0290, 0x0294, 0x02a0, 0x02a4, 0x02b0, 0x02b4, | ||
| 245 | 0x0300, 0x0304, 0x0310, 0x0314, 0x0320, 0x0324, 0x0330, 0x0334, 0x0380, 0x0384, 0x0390, | ||
| 246 | 0x0394, 0x03a0, 0x03a4, 0x03b0, 0x03b4, 0x0400, 0x0404, 0x0410, 0x0414, 0x0420, 0x0424, | ||
| 247 | 0x0430, 0x0434, 0x0480, 0x0484, 0x0490, 0x0494, 0x04a0, 0x04a4, 0x04b0, 0x04b4, 0x0500, | ||
| 248 | 0x0504, 0x0510, 0x0514, 0x0520, 0x0524, 0x0530, 0x0534, 0x0580, 0x0584, 0x0590, 0x0594, | ||
| 249 | 0x05a0, 0x05a4, 0x05b0, 0x05b4, 0x0600, 0x0604, 0x0610, 0x0614, 0x0620, 0x0624, 0x0630, | ||
| 250 | 0x0634, 0x0680, 0x0684, 0x0690, 0x0694, 0x06a0, 0x06a4, 0x06b0, 0x06b4, 0x0700, 0x0704, | ||
| 251 | 0x0710, 0x0714, 0x0720, 0x0724, 0x0730, 0x0734, 0x0780, 0x0784, 0x0790, 0x0794, 0x07a0, | ||
| 252 | 0x07a4, 0x07b0, 0x07b4, 0x0000, 0x0004, 0x0010, 0x0014, 0x0020, 0x0024, 0x0030, 0x0034, | ||
| 253 | 0x0080, 0x0084, 0x0090, 0x0094, 0x00a0, 0x00a4, 0x00b0, 0x00b4, 0x0100, 0x0104, 0x0110, | ||
| 254 | 0x0114, 0x0120, 0x0124, 0x0130, 0x0134, 0x0180, 0x0184, 0x0190, 0x0194, 0x01a0, 0x01a4, | ||
| 255 | 0x01b0, 0x01b4, 0x0200, 0x0204, 0x0210, 0x0214, 0x0220, 0x0224, 0x0230, 0x0234, 0x0280, | ||
| 256 | 0x0284, 0x0290, 0x0294, 0x02a0, 0x02a4, 0x02b0, 0x02b4, 0x0300, 0x0304, 0x0310, 0x0314, | ||
| 257 | 0x0320, 0x0324, 0x0330, 0x0334, 0x0380, 0x0384, 0x0390, 0x0394, 0x03a0, 0x03a4, 0x03b0, | ||
| 258 | 0x03b4, 0x0400, 0x0404, 0x0410, 0x0414, 0x0420, 0x0424, 0x0430, 0x0434, 0x0480, 0x0484, | ||
| 259 | 0x0490, 0x0494, 0x04a0, 0x04a4, 0x04b0, 0x04b4, 0x0500, 0x0504, 0x0510, 0x0514, 0x0520, | ||
| 260 | 0x0524, 0x0530, 0x0534, 0x0580, 0x0584, 0x0590, 0x0594, 0x05a0, 0x05a4, 0x05b0, 0x05b4, | ||
| 261 | 0x0600, 0x0604, 0x0610, 0x0614, 0x0620, 0x0624, 0x0630, 0x0634, 0x0680, 0x0684, 0x0690, | ||
| 262 | 0x0694, 0x06a0, 0x06a4, 0x06b0, 0x06b4, 0x0700, 0x0704, 0x0710, 0x0714, 0x0720, 0x0724, | ||
| 263 | 0x0730, 0x0734, 0x0780, 0x0784, 0x0790, 0x0794, 0x07a0, 0x07a4, 0x07b0, 0x07b4, | ||
| 264 | }; | ||
| 265 | return xlut[x % 128] + ylut[y % 128]; | ||
| 266 | } | ||
| 267 | |||
| 268 | static u32 GetMortonOffset128(u32 x, u32 y, u32 bytes_per_pixel) { | ||
| 269 | // Calculates the offset of the position of the pixel in Morton order | ||
| 270 | // Framebuffer images are split into 128x128 tiles. | ||
| 271 | |||
| 272 | constexpr u32 block_height = 128; | ||
| 273 | const u32 coarse_x = x & ~127; | ||
| 274 | |||
| 275 | const u32 i = MortonInterleave128(x, y); | ||
| 276 | |||
| 277 | const u32 offset = coarse_x * block_height; | ||
| 278 | |||
| 279 | return (i + offset) * bytes_per_pixel; | ||
| 280 | } | ||
| 281 | |||
| 282 | void MortonSwizzle(MortonSwizzleMode mode, Surface::PixelFormat format, u32 stride, | 189 | void MortonSwizzle(MortonSwizzleMode mode, Surface::PixelFormat format, u32 stride, |
| 283 | u32 block_height, u32 height, u32 block_depth, u32 depth, u32 tile_width_spacing, | 190 | u32 block_height, u32 height, u32 block_depth, u32 depth, u32 tile_width_spacing, |
| 284 | u8* buffer, u8* addr) { | 191 | u8* buffer, u8* addr) { |
| @@ -286,23 +193,4 @@ void MortonSwizzle(MortonSwizzleMode mode, Surface::PixelFormat format, u32 stri | |||
| 286 | tile_width_spacing, buffer, addr); | 193 | tile_width_spacing, buffer, addr); |
| 287 | } | 194 | } |
| 288 | 195 | ||
| 289 | void MortonCopyPixels128(MortonSwizzleMode mode, u32 width, u32 height, u32 bytes_per_pixel, | ||
| 290 | u32 linear_bytes_per_pixel, u8* morton_data, u8* linear_data) { | ||
| 291 | const bool morton_to_linear = mode == MortonSwizzleMode::MortonToLinear; | ||
| 292 | u8* data_ptrs[2]; | ||
| 293 | for (u32 y = 0; y < height; ++y) { | ||
| 294 | for (u32 x = 0; x < width; ++x) { | ||
| 295 | const u32 coarse_y = y & ~127; | ||
| 296 | const u32 morton_offset = | ||
| 297 | GetMortonOffset128(x, y, bytes_per_pixel) + coarse_y * width * bytes_per_pixel; | ||
| 298 | const u32 linear_pixel_index = (x + y * width) * linear_bytes_per_pixel; | ||
| 299 | |||
| 300 | data_ptrs[morton_to_linear ? 1 : 0] = morton_data + morton_offset; | ||
| 301 | data_ptrs[morton_to_linear ? 0 : 1] = &linear_data[linear_pixel_index]; | ||
| 302 | |||
| 303 | std::memcpy(data_ptrs[0], data_ptrs[1], bytes_per_pixel); | ||
| 304 | } | ||
| 305 | } | ||
| 306 | } | ||
| 307 | |||
| 308 | } // namespace VideoCore | 196 | } // namespace VideoCore |
diff --git a/src/video_core/morton.h b/src/video_core/morton.h index ee5b45555..b714a7e3f 100644 --- a/src/video_core/morton.h +++ b/src/video_core/morton.h | |||
| @@ -15,7 +15,4 @@ void MortonSwizzle(MortonSwizzleMode mode, VideoCore::Surface::PixelFormat forma | |||
| 15 | u32 block_height, u32 height, u32 block_depth, u32 depth, u32 tile_width_spacing, | 15 | u32 block_height, u32 height, u32 block_depth, u32 depth, u32 tile_width_spacing, |
| 16 | u8* buffer, u8* addr); | 16 | u8* buffer, u8* addr); |
| 17 | 17 | ||
| 18 | void MortonCopyPixels128(MortonSwizzleMode mode, u32 width, u32 height, u32 bytes_per_pixel, | ||
| 19 | u32 linear_bytes_per_pixel, u8* morton_data, u8* linear_data); | ||
| 20 | |||
| 21 | } // namespace VideoCore | 18 | } // namespace VideoCore |
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index 5ee4f8e8e..6b3f2d50a 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h | |||
| @@ -34,6 +34,9 @@ public: | |||
| 34 | /// Clear the current framebuffer | 34 | /// Clear the current framebuffer |
| 35 | virtual void Clear() = 0; | 35 | virtual void Clear() = 0; |
| 36 | 36 | ||
| 37 | /// Dispatches a compute shader invocation | ||
| 38 | virtual void DispatchCompute(GPUVAddr code_addr) = 0; | ||
| 39 | |||
| 37 | /// Notify rasterizer that all caches should be flushed to Switch memory | 40 | /// Notify rasterizer that all caches should be flushed to Switch memory |
| 38 | virtual void FlushAll() = 0; | 41 | virtual void FlushAll() = 0; |
| 39 | 42 | ||
| @@ -47,6 +50,12 @@ public: | |||
| 47 | /// and invalidated | 50 | /// and invalidated |
| 48 | virtual void FlushAndInvalidateRegion(CacheAddr addr, u64 size) = 0; | 51 | virtual void FlushAndInvalidateRegion(CacheAddr addr, u64 size) = 0; |
| 49 | 52 | ||
| 53 | /// Notify the rasterizer to send all written commands to the host GPU. | ||
| 54 | virtual void FlushCommands() = 0; | ||
| 55 | |||
| 56 | /// Notify rasterizer that a frame is about to finish | ||
| 57 | virtual void TickFrame() = 0; | ||
| 58 | |||
| 50 | /// Attempt to use a faster method to perform a surface copy | 59 | /// Attempt to use a faster method to perform a surface copy |
| 51 | virtual bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, | 60 | virtual bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, |
| 52 | const Tegra::Engines::Fermi2D::Regs::Surface& dst, | 61 | const Tegra::Engines::Fermi2D::Regs::Surface& dst, |
diff --git a/src/video_core/renderer_base.h b/src/video_core/renderer_base.h index 1d54c3723..af1bebc4f 100644 --- a/src/video_core/renderer_base.h +++ b/src/video_core/renderer_base.h | |||
| @@ -36,8 +36,7 @@ public: | |||
| 36 | virtual ~RendererBase(); | 36 | virtual ~RendererBase(); |
| 37 | 37 | ||
| 38 | /// Swap buffers (render frame) | 38 | /// Swap buffers (render frame) |
| 39 | virtual void SwapBuffers( | 39 | virtual void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) = 0; |
| 40 | std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) = 0; | ||
| 41 | 40 | ||
| 42 | /// Initialize the renderer | 41 | /// Initialize the renderer |
| 43 | virtual bool Init() = 0; | 42 | virtual bool Init() = 0; |
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index 2b9bd142e..f8a807c84 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp | |||
| @@ -2,103 +2,71 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <cstring> | ||
| 6 | #include <memory> | 5 | #include <memory> |
| 7 | 6 | ||
| 8 | #include "common/alignment.h" | 7 | #include <glad/glad.h> |
| 9 | #include "core/core.h" | 8 | |
| 10 | #include "video_core/memory_manager.h" | 9 | #include "common/assert.h" |
| 10 | #include "common/microprofile.h" | ||
| 11 | #include "video_core/rasterizer_interface.h" | ||
| 11 | #include "video_core/renderer_opengl/gl_buffer_cache.h" | 12 | #include "video_core/renderer_opengl/gl_buffer_cache.h" |
| 12 | #include "video_core/renderer_opengl/gl_rasterizer.h" | 13 | #include "video_core/renderer_opengl/gl_rasterizer.h" |
| 14 | #include "video_core/renderer_opengl/gl_resource_manager.h" | ||
| 13 | 15 | ||
| 14 | namespace OpenGL { | 16 | namespace OpenGL { |
| 15 | 17 | ||
| 16 | CachedBufferEntry::CachedBufferEntry(VAddr cpu_addr, std::size_t size, GLintptr offset, | 18 | MICROPROFILE_DEFINE(OpenGL_Buffer_Download, "OpenGL", "Buffer Download", MP_RGB(192, 192, 128)); |
| 17 | std::size_t alignment, u8* host_ptr) | 19 | |
| 18 | : RasterizerCacheObject{host_ptr}, cpu_addr{cpu_addr}, size{size}, offset{offset}, | 20 | CachedBufferBlock::CachedBufferBlock(CacheAddr cache_addr, const std::size_t size) |
| 19 | alignment{alignment} {} | 21 | : VideoCommon::BufferBlock{cache_addr, size} { |
| 20 | 22 | gl_buffer.Create(); | |
| 21 | OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, std::size_t size) | 23 | glNamedBufferData(gl_buffer.handle, static_cast<GLsizeiptr>(size), nullptr, GL_DYNAMIC_DRAW); |
| 22 | : RasterizerCache{rasterizer}, stream_buffer(size, true) {} | ||
| 23 | |||
| 24 | GLintptr OGLBufferCache::UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment, | ||
| 25 | bool cache) { | ||
| 26 | std::lock_guard lock{mutex}; | ||
| 27 | auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager(); | ||
| 28 | |||
| 29 | // Cache management is a big overhead, so only cache entries with a given size. | ||
| 30 | // TODO: Figure out which size is the best for given games. | ||
| 31 | cache &= size >= 2048; | ||
| 32 | |||
| 33 | const auto& host_ptr{memory_manager.GetPointer(gpu_addr)}; | ||
| 34 | if (cache) { | ||
| 35 | auto entry = TryGet(host_ptr); | ||
| 36 | if (entry) { | ||
| 37 | if (entry->GetSize() >= size && entry->GetAlignment() == alignment) { | ||
| 38 | return entry->GetOffset(); | ||
| 39 | } | ||
| 40 | Unregister(entry); | ||
| 41 | } | ||
| 42 | } | ||
| 43 | |||
| 44 | AlignBuffer(alignment); | ||
| 45 | const GLintptr uploaded_offset = buffer_offset; | ||
| 46 | |||
| 47 | if (!host_ptr) { | ||
| 48 | return uploaded_offset; | ||
| 49 | } | ||
| 50 | |||
| 51 | std::memcpy(buffer_ptr, host_ptr, size); | ||
| 52 | buffer_ptr += size; | ||
| 53 | buffer_offset += size; | ||
| 54 | |||
| 55 | if (cache) { | ||
| 56 | auto entry = std::make_shared<CachedBufferEntry>( | ||
| 57 | *memory_manager.GpuToCpuAddress(gpu_addr), size, uploaded_offset, alignment, host_ptr); | ||
| 58 | Register(entry); | ||
| 59 | } | ||
| 60 | |||
| 61 | return uploaded_offset; | ||
| 62 | } | 24 | } |
| 63 | 25 | ||
| 64 | GLintptr OGLBufferCache::UploadHostMemory(const void* raw_pointer, std::size_t size, | 26 | CachedBufferBlock::~CachedBufferBlock() = default; |
| 65 | std::size_t alignment) { | 27 | |
| 66 | std::lock_guard lock{mutex}; | 28 | OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system, |
| 67 | AlignBuffer(alignment); | 29 | std::size_t stream_size) |
| 68 | std::memcpy(buffer_ptr, raw_pointer, size); | 30 | : VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuffer>{ |
| 69 | const GLintptr uploaded_offset = buffer_offset; | 31 | rasterizer, system, std::make_unique<OGLStreamBuffer>(stream_size, true)} {} |
| 32 | |||
| 33 | OGLBufferCache::~OGLBufferCache() = default; | ||
| 70 | 34 | ||
| 71 | buffer_ptr += size; | 35 | Buffer OGLBufferCache::CreateBlock(CacheAddr cache_addr, std::size_t size) { |
| 72 | buffer_offset += size; | 36 | return std::make_shared<CachedBufferBlock>(cache_addr, size); |
| 73 | return uploaded_offset; | ||
| 74 | } | 37 | } |
| 75 | 38 | ||
| 76 | bool OGLBufferCache::Map(std::size_t max_size) { | 39 | void OGLBufferCache::WriteBarrier() { |
| 77 | bool invalidate; | 40 | glMemoryBarrier(GL_ALL_BARRIER_BITS); |
| 78 | std::tie(buffer_ptr, buffer_offset_base, invalidate) = | 41 | } |
| 79 | stream_buffer.Map(static_cast<GLsizeiptr>(max_size), 4); | 42 | |
| 80 | buffer_offset = buffer_offset_base; | 43 | const GLuint* OGLBufferCache::ToHandle(const Buffer& buffer) { |
| 44 | return buffer->GetHandle(); | ||
| 45 | } | ||
| 81 | 46 | ||
| 82 | if (invalidate) { | 47 | const GLuint* OGLBufferCache::GetEmptyBuffer(std::size_t) { |
| 83 | InvalidateAll(); | 48 | static const GLuint null_buffer = 0; |
| 84 | } | 49 | return &null_buffer; |
| 85 | return invalidate; | ||
| 86 | } | 50 | } |
| 87 | 51 | ||
| 88 | void OGLBufferCache::Unmap() { | 52 | void OGLBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, |
| 89 | stream_buffer.Unmap(buffer_offset - buffer_offset_base); | 53 | const u8* data) { |
| 54 | glNamedBufferSubData(*buffer->GetHandle(), static_cast<GLintptr>(offset), | ||
| 55 | static_cast<GLsizeiptr>(size), data); | ||
| 90 | } | 56 | } |
| 91 | 57 | ||
| 92 | GLuint OGLBufferCache::GetHandle() const { | 58 | void OGLBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, |
| 93 | return stream_buffer.GetHandle(); | 59 | u8* data) { |
| 60 | MICROPROFILE_SCOPE(OpenGL_Buffer_Download); | ||
| 61 | glGetNamedBufferSubData(*buffer->GetHandle(), static_cast<GLintptr>(offset), | ||
| 62 | static_cast<GLsizeiptr>(size), data); | ||
| 94 | } | 63 | } |
| 95 | 64 | ||
| 96 | void OGLBufferCache::AlignBuffer(std::size_t alignment) { | 65 | void OGLBufferCache::CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset, |
| 97 | // Align the offset, not the mapped pointer | 66 | std::size_t dst_offset, std::size_t size) { |
| 98 | const GLintptr offset_aligned = | 67 | glCopyNamedBufferSubData(*src->GetHandle(), *dst->GetHandle(), |
| 99 | static_cast<GLintptr>(Common::AlignUp(static_cast<std::size_t>(buffer_offset), alignment)); | 68 | static_cast<GLintptr>(src_offset), static_cast<GLintptr>(dst_offset), |
| 100 | buffer_ptr += offset_aligned - buffer_offset; | 69 | static_cast<GLsizeiptr>(size)); |
| 101 | buffer_offset = offset_aligned; | ||
| 102 | } | 70 | } |
| 103 | 71 | ||
| 104 | } // namespace OpenGL | 72 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index f2347581b..022e7bfa9 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h | |||
| @@ -4,80 +4,63 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <cstddef> | ||
| 8 | #include <memory> | 7 | #include <memory> |
| 9 | #include <tuple> | ||
| 10 | 8 | ||
| 11 | #include "common/common_types.h" | 9 | #include "common/common_types.h" |
| 10 | #include "video_core/buffer_cache/buffer_cache.h" | ||
| 12 | #include "video_core/rasterizer_cache.h" | 11 | #include "video_core/rasterizer_cache.h" |
| 13 | #include "video_core/renderer_opengl/gl_resource_manager.h" | 12 | #include "video_core/renderer_opengl/gl_resource_manager.h" |
| 14 | #include "video_core/renderer_opengl/gl_stream_buffer.h" | 13 | #include "video_core/renderer_opengl/gl_stream_buffer.h" |
| 15 | 14 | ||
| 15 | namespace Core { | ||
| 16 | class System; | ||
| 17 | } | ||
| 18 | |||
| 16 | namespace OpenGL { | 19 | namespace OpenGL { |
| 17 | 20 | ||
| 21 | class OGLStreamBuffer; | ||
| 18 | class RasterizerOpenGL; | 22 | class RasterizerOpenGL; |
| 19 | 23 | ||
| 20 | class CachedBufferEntry final : public RasterizerCacheObject { | 24 | class CachedBufferBlock; |
| 21 | public: | ||
| 22 | explicit CachedBufferEntry(VAddr cpu_addr, std::size_t size, GLintptr offset, | ||
| 23 | std::size_t alignment, u8* host_ptr); | ||
| 24 | |||
| 25 | VAddr GetCpuAddr() const override { | ||
| 26 | return cpu_addr; | ||
| 27 | } | ||
| 28 | 25 | ||
| 29 | std::size_t GetSizeInBytes() const override { | 26 | using Buffer = std::shared_ptr<CachedBufferBlock>; |
| 30 | return size; | ||
| 31 | } | ||
| 32 | |||
| 33 | std::size_t GetSize() const { | ||
| 34 | return size; | ||
| 35 | } | ||
| 36 | 27 | ||
| 37 | GLintptr GetOffset() const { | 28 | class CachedBufferBlock : public VideoCommon::BufferBlock { |
| 38 | return offset; | 29 | public: |
| 39 | } | 30 | explicit CachedBufferBlock(CacheAddr cache_addr, const std::size_t size); |
| 31 | ~CachedBufferBlock(); | ||
| 40 | 32 | ||
| 41 | std::size_t GetAlignment() const { | 33 | const GLuint* GetHandle() const { |
| 42 | return alignment; | 34 | return &gl_buffer.handle; |
| 43 | } | 35 | } |
| 44 | 36 | ||
| 45 | private: | 37 | private: |
| 46 | VAddr cpu_addr{}; | 38 | OGLBuffer gl_buffer{}; |
| 47 | std::size_t size{}; | ||
| 48 | GLintptr offset{}; | ||
| 49 | std::size_t alignment{}; | ||
| 50 | }; | 39 | }; |
| 51 | 40 | ||
| 52 | class OGLBufferCache final : public RasterizerCache<std::shared_ptr<CachedBufferEntry>> { | 41 | class OGLBufferCache final : public VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuffer> { |
| 53 | public: | 42 | public: |
| 54 | explicit OGLBufferCache(RasterizerOpenGL& rasterizer, std::size_t size); | 43 | explicit OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system, |
| 55 | 44 | std::size_t stream_size); | |
| 56 | /// Uploads data from a guest GPU address. Returns host's buffer offset where it's been | 45 | ~OGLBufferCache(); |
| 57 | /// allocated. | ||
| 58 | GLintptr UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4, | ||
| 59 | bool cache = true); | ||
| 60 | 46 | ||
| 61 | /// Uploads from a host memory. Returns host's buffer offset where it's been allocated. | 47 | const GLuint* GetEmptyBuffer(std::size_t) override; |
| 62 | GLintptr UploadHostMemory(const void* raw_pointer, std::size_t size, std::size_t alignment = 4); | ||
| 63 | 48 | ||
| 64 | bool Map(std::size_t max_size); | 49 | protected: |
| 65 | void Unmap(); | 50 | Buffer CreateBlock(CacheAddr cache_addr, std::size_t size) override; |
| 66 | 51 | ||
| 67 | GLuint GetHandle() const; | 52 | void WriteBarrier() override; |
| 68 | 53 | ||
| 69 | protected: | 54 | const GLuint* ToHandle(const Buffer& buffer) override; |
| 70 | void AlignBuffer(std::size_t alignment); | ||
| 71 | 55 | ||
| 72 | // We do not have to flush this cache as things in it are never modified by us. | 56 | void UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, |
| 73 | void FlushObjectInner(const std::shared_ptr<CachedBufferEntry>& object) override {} | 57 | const u8* data) override; |
| 74 | 58 | ||
| 75 | private: | 59 | void DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, |
| 76 | OGLStreamBuffer stream_buffer; | 60 | u8* data) override; |
| 77 | 61 | ||
| 78 | u8* buffer_ptr = nullptr; | 62 | void CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset, |
| 79 | GLintptr buffer_offset = 0; | 63 | std::size_t dst_offset, std::size_t size) override; |
| 80 | GLintptr buffer_offset_base = 0; | ||
| 81 | }; | 64 | }; |
| 82 | 65 | ||
| 83 | } // namespace OpenGL | 66 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index a48e14d2e..03d434b28 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp | |||
| @@ -24,8 +24,12 @@ T GetInteger(GLenum pname) { | |||
| 24 | 24 | ||
| 25 | Device::Device() { | 25 | Device::Device() { |
| 26 | uniform_buffer_alignment = GetInteger<std::size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT); | 26 | uniform_buffer_alignment = GetInteger<std::size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT); |
| 27 | shader_storage_alignment = GetInteger<std::size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT); | ||
| 27 | max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS); | 28 | max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS); |
| 28 | max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS); | 29 | max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS); |
| 30 | has_warp_intrinsics = GLAD_GL_NV_gpu_shader5 && GLAD_GL_NV_shader_thread_group && | ||
| 31 | GLAD_GL_NV_shader_thread_shuffle; | ||
| 32 | has_vertex_viewport_layer = GLAD_GL_ARB_shader_viewport_layer_array; | ||
| 29 | has_variable_aoffi = TestVariableAoffi(); | 33 | has_variable_aoffi = TestVariableAoffi(); |
| 30 | has_component_indexing_bug = TestComponentIndexingBug(); | 34 | has_component_indexing_bug = TestComponentIndexingBug(); |
| 31 | } | 35 | } |
| @@ -34,6 +38,8 @@ Device::Device(std::nullptr_t) { | |||
| 34 | uniform_buffer_alignment = 0; | 38 | uniform_buffer_alignment = 0; |
| 35 | max_vertex_attributes = 16; | 39 | max_vertex_attributes = 16; |
| 36 | max_varyings = 15; | 40 | max_varyings = 15; |
| 41 | has_warp_intrinsics = true; | ||
| 42 | has_vertex_viewport_layer = true; | ||
| 37 | has_variable_aoffi = true; | 43 | has_variable_aoffi = true; |
| 38 | has_component_indexing_bug = false; | 44 | has_component_indexing_bug = false; |
| 39 | } | 45 | } |
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index 8c8c93760..3ef7c6dd8 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h | |||
| @@ -18,6 +18,10 @@ public: | |||
| 18 | return uniform_buffer_alignment; | 18 | return uniform_buffer_alignment; |
| 19 | } | 19 | } |
| 20 | 20 | ||
| 21 | std::size_t GetShaderStorageBufferAlignment() const { | ||
| 22 | return shader_storage_alignment; | ||
| 23 | } | ||
| 24 | |||
| 21 | u32 GetMaxVertexAttributes() const { | 25 | u32 GetMaxVertexAttributes() const { |
| 22 | return max_vertex_attributes; | 26 | return max_vertex_attributes; |
| 23 | } | 27 | } |
| @@ -26,6 +30,14 @@ public: | |||
| 26 | return max_varyings; | 30 | return max_varyings; |
| 27 | } | 31 | } |
| 28 | 32 | ||
| 33 | bool HasWarpIntrinsics() const { | ||
| 34 | return has_warp_intrinsics; | ||
| 35 | } | ||
| 36 | |||
| 37 | bool HasVertexViewportLayer() const { | ||
| 38 | return has_vertex_viewport_layer; | ||
| 39 | } | ||
| 40 | |||
| 29 | bool HasVariableAoffi() const { | 41 | bool HasVariableAoffi() const { |
| 30 | return has_variable_aoffi; | 42 | return has_variable_aoffi; |
| 31 | } | 43 | } |
| @@ -39,8 +51,11 @@ private: | |||
| 39 | static bool TestComponentIndexingBug(); | 51 | static bool TestComponentIndexingBug(); |
| 40 | 52 | ||
| 41 | std::size_t uniform_buffer_alignment{}; | 53 | std::size_t uniform_buffer_alignment{}; |
| 54 | std::size_t shader_storage_alignment{}; | ||
| 42 | u32 max_vertex_attributes{}; | 55 | u32 max_vertex_attributes{}; |
| 43 | u32 max_varyings{}; | 56 | u32 max_varyings{}; |
| 57 | bool has_warp_intrinsics{}; | ||
| 58 | bool has_vertex_viewport_layer{}; | ||
| 44 | bool has_variable_aoffi{}; | 59 | bool has_variable_aoffi{}; |
| 45 | bool has_component_indexing_bug{}; | 60 | bool has_component_indexing_bug{}; |
| 46 | }; | 61 | }; |
diff --git a/src/video_core/renderer_opengl/gl_global_cache.cpp b/src/video_core/renderer_opengl/gl_global_cache.cpp deleted file mode 100644 index d5e385151..000000000 --- a/src/video_core/renderer_opengl/gl_global_cache.cpp +++ /dev/null | |||
| @@ -1,102 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <glad/glad.h> | ||
| 6 | |||
| 7 | #include "common/logging/log.h" | ||
| 8 | #include "core/core.h" | ||
| 9 | #include "video_core/memory_manager.h" | ||
| 10 | #include "video_core/renderer_opengl/gl_global_cache.h" | ||
| 11 | #include "video_core/renderer_opengl/gl_rasterizer.h" | ||
| 12 | #include "video_core/renderer_opengl/gl_shader_decompiler.h" | ||
| 13 | #include "video_core/renderer_opengl/utils.h" | ||
| 14 | |||
| 15 | namespace OpenGL { | ||
| 16 | |||
| 17 | CachedGlobalRegion::CachedGlobalRegion(VAddr cpu_addr, u8* host_ptr, u32 size, u32 max_size) | ||
| 18 | : RasterizerCacheObject{host_ptr}, cpu_addr{cpu_addr}, host_ptr{host_ptr}, size{size}, | ||
| 19 | max_size{max_size} { | ||
| 20 | buffer.Create(); | ||
| 21 | LabelGLObject(GL_BUFFER, buffer.handle, cpu_addr, "GlobalMemory"); | ||
| 22 | } | ||
| 23 | |||
| 24 | CachedGlobalRegion::~CachedGlobalRegion() = default; | ||
| 25 | |||
| 26 | void CachedGlobalRegion::Reload(u32 size_) { | ||
| 27 | size = size_; | ||
| 28 | if (size > max_size) { | ||
| 29 | size = max_size; | ||
| 30 | LOG_CRITICAL(HW_GPU, "Global region size {} exceeded the supported size {}!", size_, | ||
| 31 | max_size); | ||
| 32 | } | ||
| 33 | glNamedBufferData(buffer.handle, size, host_ptr, GL_STREAM_DRAW); | ||
| 34 | } | ||
| 35 | |||
| 36 | void CachedGlobalRegion::Flush() { | ||
| 37 | LOG_DEBUG(Render_OpenGL, "Flushing {} bytes to CPU memory address 0x{:16}", size, cpu_addr); | ||
| 38 | glGetNamedBufferSubData(buffer.handle, 0, static_cast<GLsizeiptr>(size), host_ptr); | ||
| 39 | } | ||
| 40 | |||
| 41 | GlobalRegion GlobalRegionCacheOpenGL::TryGetReservedGlobalRegion(CacheAddr addr, u32 size) const { | ||
| 42 | const auto search{reserve.find(addr)}; | ||
| 43 | if (search == reserve.end()) { | ||
| 44 | return {}; | ||
| 45 | } | ||
| 46 | return search->second; | ||
| 47 | } | ||
| 48 | |||
| 49 | GlobalRegion GlobalRegionCacheOpenGL::GetUncachedGlobalRegion(GPUVAddr addr, u8* host_ptr, | ||
| 50 | u32 size) { | ||
| 51 | GlobalRegion region{TryGetReservedGlobalRegion(ToCacheAddr(host_ptr), size)}; | ||
| 52 | if (!region) { | ||
| 53 | // No reserved surface available, create a new one and reserve it | ||
| 54 | auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()}; | ||
| 55 | const auto cpu_addr{memory_manager.GpuToCpuAddress(addr)}; | ||
| 56 | ASSERT(cpu_addr); | ||
| 57 | |||
| 58 | region = std::make_shared<CachedGlobalRegion>(*cpu_addr, host_ptr, size, max_ssbo_size); | ||
| 59 | ReserveGlobalRegion(region); | ||
| 60 | } | ||
| 61 | region->Reload(size); | ||
| 62 | return region; | ||
| 63 | } | ||
| 64 | |||
| 65 | void GlobalRegionCacheOpenGL::ReserveGlobalRegion(GlobalRegion region) { | ||
| 66 | reserve.insert_or_assign(region->GetCacheAddr(), std::move(region)); | ||
| 67 | } | ||
| 68 | |||
| 69 | GlobalRegionCacheOpenGL::GlobalRegionCacheOpenGL(RasterizerOpenGL& rasterizer) | ||
| 70 | : RasterizerCache{rasterizer} { | ||
| 71 | GLint max_ssbo_size_; | ||
| 72 | glGetIntegerv(GL_MAX_SHADER_STORAGE_BLOCK_SIZE, &max_ssbo_size_); | ||
| 73 | max_ssbo_size = static_cast<u32>(max_ssbo_size_); | ||
| 74 | } | ||
| 75 | |||
| 76 | GlobalRegion GlobalRegionCacheOpenGL::GetGlobalRegion( | ||
| 77 | const GLShader::GlobalMemoryEntry& global_region, | ||
| 78 | Tegra::Engines::Maxwell3D::Regs::ShaderStage stage) { | ||
| 79 | std::lock_guard lock{mutex}; | ||
| 80 | |||
| 81 | auto& gpu{Core::System::GetInstance().GPU()}; | ||
| 82 | auto& memory_manager{gpu.MemoryManager()}; | ||
| 83 | const auto cbufs{gpu.Maxwell3D().state.shader_stages[static_cast<std::size_t>(stage)]}; | ||
| 84 | const auto addr{cbufs.const_buffers[global_region.GetCbufIndex()].address + | ||
| 85 | global_region.GetCbufOffset()}; | ||
| 86 | const auto actual_addr{memory_manager.Read<u64>(addr)}; | ||
| 87 | const auto size{memory_manager.Read<u32>(addr + 8)}; | ||
| 88 | |||
| 89 | // Look up global region in the cache based on address | ||
| 90 | const auto& host_ptr{memory_manager.GetPointer(actual_addr)}; | ||
| 91 | GlobalRegion region{TryGet(host_ptr)}; | ||
| 92 | |||
| 93 | if (!region) { | ||
| 94 | // No global region found - create a new one | ||
| 95 | region = GetUncachedGlobalRegion(actual_addr, host_ptr, size); | ||
| 96 | Register(region); | ||
| 97 | } | ||
| 98 | |||
| 99 | return region; | ||
| 100 | } | ||
| 101 | |||
| 102 | } // namespace OpenGL | ||
diff --git a/src/video_core/renderer_opengl/gl_global_cache.h b/src/video_core/renderer_opengl/gl_global_cache.h deleted file mode 100644 index 2d467a240..000000000 --- a/src/video_core/renderer_opengl/gl_global_cache.h +++ /dev/null | |||
| @@ -1,82 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <memory> | ||
| 8 | #include <unordered_map> | ||
| 9 | |||
| 10 | #include <glad/glad.h> | ||
| 11 | |||
| 12 | #include "common/assert.h" | ||
| 13 | #include "common/common_types.h" | ||
| 14 | #include "video_core/engines/maxwell_3d.h" | ||
| 15 | #include "video_core/rasterizer_cache.h" | ||
| 16 | #include "video_core/renderer_opengl/gl_resource_manager.h" | ||
| 17 | |||
| 18 | namespace OpenGL { | ||
| 19 | |||
| 20 | namespace GLShader { | ||
| 21 | class GlobalMemoryEntry; | ||
| 22 | } | ||
| 23 | |||
| 24 | class RasterizerOpenGL; | ||
| 25 | class CachedGlobalRegion; | ||
| 26 | using GlobalRegion = std::shared_ptr<CachedGlobalRegion>; | ||
| 27 | |||
| 28 | class CachedGlobalRegion final : public RasterizerCacheObject { | ||
| 29 | public: | ||
| 30 | explicit CachedGlobalRegion(VAddr cpu_addr, u8* host_ptr, u32 size, u32 max_size); | ||
| 31 | ~CachedGlobalRegion(); | ||
| 32 | |||
| 33 | VAddr GetCpuAddr() const override { | ||
| 34 | return cpu_addr; | ||
| 35 | } | ||
| 36 | |||
| 37 | std::size_t GetSizeInBytes() const override { | ||
| 38 | return size; | ||
| 39 | } | ||
| 40 | |||
| 41 | /// Gets the GL program handle for the buffer | ||
| 42 | GLuint GetBufferHandle() const { | ||
| 43 | return buffer.handle; | ||
| 44 | } | ||
| 45 | |||
| 46 | /// Reloads the global region from guest memory | ||
| 47 | void Reload(u32 size_); | ||
| 48 | |||
| 49 | void Flush(); | ||
| 50 | |||
| 51 | private: | ||
| 52 | VAddr cpu_addr{}; | ||
| 53 | u8* host_ptr{}; | ||
| 54 | u32 size{}; | ||
| 55 | u32 max_size{}; | ||
| 56 | |||
| 57 | OGLBuffer buffer; | ||
| 58 | }; | ||
| 59 | |||
| 60 | class GlobalRegionCacheOpenGL final : public RasterizerCache<GlobalRegion> { | ||
| 61 | public: | ||
| 62 | explicit GlobalRegionCacheOpenGL(RasterizerOpenGL& rasterizer); | ||
| 63 | |||
| 64 | /// Gets the current specified shader stage program | ||
| 65 | GlobalRegion GetGlobalRegion(const GLShader::GlobalMemoryEntry& descriptor, | ||
| 66 | Tegra::Engines::Maxwell3D::Regs::ShaderStage stage); | ||
| 67 | |||
| 68 | protected: | ||
| 69 | void FlushObjectInner(const GlobalRegion& object) override { | ||
| 70 | object->Flush(); | ||
| 71 | } | ||
| 72 | |||
| 73 | private: | ||
| 74 | GlobalRegion TryGetReservedGlobalRegion(CacheAddr addr, u32 size) const; | ||
| 75 | GlobalRegion GetUncachedGlobalRegion(GPUVAddr addr, u8* host_ptr, u32 size); | ||
| 76 | void ReserveGlobalRegion(GlobalRegion region); | ||
| 77 | |||
| 78 | std::unordered_map<CacheAddr, GlobalRegion> reserve; | ||
| 79 | u32 max_ssbo_size{}; | ||
| 80 | }; | ||
| 81 | |||
| 82 | } // namespace OpenGL | ||
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index f45a3c5ef..bb09ecd52 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp | |||
| @@ -4,6 +4,7 @@ | |||
| 4 | 4 | ||
| 5 | #include <algorithm> | 5 | #include <algorithm> |
| 6 | #include <array> | 6 | #include <array> |
| 7 | #include <bitset> | ||
| 7 | #include <memory> | 8 | #include <memory> |
| 8 | #include <string> | 9 | #include <string> |
| 9 | #include <string_view> | 10 | #include <string_view> |
| @@ -19,7 +20,9 @@ | |||
| 19 | #include "core/core.h" | 20 | #include "core/core.h" |
| 20 | #include "core/hle/kernel/process.h" | 21 | #include "core/hle/kernel/process.h" |
| 21 | #include "core/settings.h" | 22 | #include "core/settings.h" |
| 23 | #include "video_core/engines/kepler_compute.h" | ||
| 22 | #include "video_core/engines/maxwell_3d.h" | 24 | #include "video_core/engines/maxwell_3d.h" |
| 25 | #include "video_core/memory_manager.h" | ||
| 23 | #include "video_core/renderer_opengl/gl_rasterizer.h" | 26 | #include "video_core/renderer_opengl/gl_rasterizer.h" |
| 24 | #include "video_core/renderer_opengl/gl_shader_cache.h" | 27 | #include "video_core/renderer_opengl/gl_shader_cache.h" |
| 25 | #include "video_core/renderer_opengl/gl_shader_gen.h" | 28 | #include "video_core/renderer_opengl/gl_shader_gen.h" |
| @@ -80,16 +83,31 @@ struct DrawParameters { | |||
| 80 | } | 83 | } |
| 81 | }; | 84 | }; |
| 82 | 85 | ||
| 86 | static std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer, | ||
| 87 | const GLShader::ConstBufferEntry& entry) { | ||
| 88 | if (!entry.IsIndirect()) { | ||
| 89 | return entry.GetSize(); | ||
| 90 | } | ||
| 91 | |||
| 92 | if (buffer.size > Maxwell::MaxConstBufferSize) { | ||
| 93 | LOG_WARNING(Render_OpenGL, "Indirect constbuffer size {} exceeds maximum {}", buffer.size, | ||
| 94 | Maxwell::MaxConstBufferSize); | ||
| 95 | return Maxwell::MaxConstBufferSize; | ||
| 96 | } | ||
| 97 | |||
| 98 | return buffer.size; | ||
| 99 | } | ||
| 100 | |||
| 83 | RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window, | 101 | RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window, |
| 84 | ScreenInfo& info) | 102 | ScreenInfo& info) |
| 85 | : texture_cache{system, *this, device}, shader_cache{*this, system, emu_window, device}, | 103 | : texture_cache{system, *this, device}, shader_cache{*this, system, emu_window, device}, |
| 86 | global_cache{*this}, system{system}, screen_info{info}, | 104 | system{system}, screen_info{info}, buffer_cache{*this, system, STREAM_BUFFER_SIZE} { |
| 87 | buffer_cache(*this, STREAM_BUFFER_SIZE) { | ||
| 88 | OpenGLState::ApplyDefaultState(); | 105 | OpenGLState::ApplyDefaultState(); |
| 89 | 106 | ||
| 90 | shader_program_manager = std::make_unique<GLShader::ProgramManager>(); | 107 | shader_program_manager = std::make_unique<GLShader::ProgramManager>(); |
| 91 | state.draw.shader_program = 0; | 108 | state.draw.shader_program = 0; |
| 92 | state.Apply(); | 109 | state.Apply(); |
| 110 | clear_framebuffer.Create(); | ||
| 93 | 111 | ||
| 94 | LOG_DEBUG(Render_OpenGL, "Sync fixed function OpenGL state here"); | 112 | LOG_DEBUG(Render_OpenGL, "Sync fixed function OpenGL state here"); |
| 95 | CheckExtensions(); | 113 | CheckExtensions(); |
| @@ -109,10 +127,10 @@ GLuint RasterizerOpenGL::SetupVertexFormat() { | |||
| 109 | auto& gpu = system.GPU().Maxwell3D(); | 127 | auto& gpu = system.GPU().Maxwell3D(); |
| 110 | const auto& regs = gpu.regs; | 128 | const auto& regs = gpu.regs; |
| 111 | 129 | ||
| 112 | if (!gpu.dirty_flags.vertex_attrib_format) { | 130 | if (!gpu.dirty.vertex_attrib_format) { |
| 113 | return state.draw.vertex_array; | 131 | return state.draw.vertex_array; |
| 114 | } | 132 | } |
| 115 | gpu.dirty_flags.vertex_attrib_format = false; | 133 | gpu.dirty.vertex_attrib_format = false; |
| 116 | 134 | ||
| 117 | MICROPROFILE_SCOPE(OpenGL_VAO); | 135 | MICROPROFILE_SCOPE(OpenGL_VAO); |
| 118 | 136 | ||
| @@ -129,8 +147,6 @@ GLuint RasterizerOpenGL::SetupVertexFormat() { | |||
| 129 | state.draw.vertex_array = vao; | 147 | state.draw.vertex_array = vao; |
| 130 | state.ApplyVertexArrayState(); | 148 | state.ApplyVertexArrayState(); |
| 131 | 149 | ||
| 132 | glVertexArrayElementBuffer(vao, buffer_cache.GetHandle()); | ||
| 133 | |||
| 134 | // Use the vertex array as-is, assumes that the data is formatted correctly for OpenGL. | 150 | // Use the vertex array as-is, assumes that the data is formatted correctly for OpenGL. |
| 135 | // Enables the first 16 vertex attributes always, as we don't know which ones are actually | 151 | // Enables the first 16 vertex attributes always, as we don't know which ones are actually |
| 136 | // used until shader time. Note, Tegra technically supports 32, but we're capping this to 16 | 152 | // used until shader time. Note, Tegra technically supports 32, but we're capping this to 16 |
| @@ -168,7 +184,7 @@ GLuint RasterizerOpenGL::SetupVertexFormat() { | |||
| 168 | } | 184 | } |
| 169 | 185 | ||
| 170 | // Rebinding the VAO invalidates the vertex buffer bindings. | 186 | // Rebinding the VAO invalidates the vertex buffer bindings. |
| 171 | gpu.dirty_flags.vertex_array.set(); | 187 | gpu.dirty.ResetVertexArrays(); |
| 172 | 188 | ||
| 173 | state.draw.vertex_array = vao_entry.handle; | 189 | state.draw.vertex_array = vao_entry.handle; |
| 174 | return vao_entry.handle; | 190 | return vao_entry.handle; |
| @@ -176,17 +192,20 @@ GLuint RasterizerOpenGL::SetupVertexFormat() { | |||
| 176 | 192 | ||
| 177 | void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) { | 193 | void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) { |
| 178 | auto& gpu = system.GPU().Maxwell3D(); | 194 | auto& gpu = system.GPU().Maxwell3D(); |
| 179 | const auto& regs = gpu.regs; | 195 | if (!gpu.dirty.vertex_array_buffers) |
| 180 | |||
| 181 | if (gpu.dirty_flags.vertex_array.none()) | ||
| 182 | return; | 196 | return; |
| 197 | gpu.dirty.vertex_array_buffers = false; | ||
| 198 | |||
| 199 | const auto& regs = gpu.regs; | ||
| 183 | 200 | ||
| 184 | MICROPROFILE_SCOPE(OpenGL_VB); | 201 | MICROPROFILE_SCOPE(OpenGL_VB); |
| 185 | 202 | ||
| 186 | // Upload all guest vertex arrays sequentially to our buffer | 203 | // Upload all guest vertex arrays sequentially to our buffer |
| 187 | for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) { | 204 | for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) { |
| 188 | if (!gpu.dirty_flags.vertex_array[index]) | 205 | if (!gpu.dirty.vertex_array[index]) |
| 189 | continue; | 206 | continue; |
| 207 | gpu.dirty.vertex_array[index] = false; | ||
| 208 | gpu.dirty.vertex_instance[index] = false; | ||
| 190 | 209 | ||
| 191 | const auto& vertex_array = regs.vertex_array[index]; | 210 | const auto& vertex_array = regs.vertex_array[index]; |
| 192 | if (!vertex_array.IsEnabled()) | 211 | if (!vertex_array.IsEnabled()) |
| @@ -197,11 +216,11 @@ void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) { | |||
| 197 | 216 | ||
| 198 | ASSERT(end > start); | 217 | ASSERT(end > start); |
| 199 | const u64 size = end - start + 1; | 218 | const u64 size = end - start + 1; |
| 200 | const GLintptr vertex_buffer_offset = buffer_cache.UploadMemory(start, size); | 219 | const auto [vertex_buffer, vertex_buffer_offset] = buffer_cache.UploadMemory(start, size); |
| 201 | 220 | ||
| 202 | // Bind the vertex array to the buffer at the current offset. | 221 | // Bind the vertex array to the buffer at the current offset. |
| 203 | glVertexArrayVertexBuffer(vao, index, buffer_cache.GetHandle(), vertex_buffer_offset, | 222 | vertex_array_pushbuffer.SetVertexBuffer(index, vertex_buffer, vertex_buffer_offset, |
| 204 | vertex_array.stride); | 223 | vertex_array.stride); |
| 205 | 224 | ||
| 206 | if (regs.instanced_arrays.IsInstancingEnabled(index) && vertex_array.divisor != 0) { | 225 | if (regs.instanced_arrays.IsInstancingEnabled(index) && vertex_array.divisor != 0) { |
| 207 | // Enable vertex buffer instancing with the specified divisor. | 226 | // Enable vertex buffer instancing with the specified divisor. |
| @@ -211,11 +230,47 @@ void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) { | |||
| 211 | glVertexArrayBindingDivisor(vao, index, 0); | 230 | glVertexArrayBindingDivisor(vao, index, 0); |
| 212 | } | 231 | } |
| 213 | } | 232 | } |
| 233 | } | ||
| 214 | 234 | ||
| 215 | gpu.dirty_flags.vertex_array.reset(); | 235 | void RasterizerOpenGL::SetupVertexInstances(GLuint vao) { |
| 236 | auto& gpu = system.GPU().Maxwell3D(); | ||
| 237 | |||
| 238 | if (!gpu.dirty.vertex_instances) | ||
| 239 | return; | ||
| 240 | gpu.dirty.vertex_instances = false; | ||
| 241 | |||
| 242 | const auto& regs = gpu.regs; | ||
| 243 | // Upload all guest vertex arrays sequentially to our buffer | ||
| 244 | for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) { | ||
| 245 | if (!gpu.dirty.vertex_instance[index]) | ||
| 246 | continue; | ||
| 247 | |||
| 248 | gpu.dirty.vertex_instance[index] = false; | ||
| 249 | |||
| 250 | if (regs.instanced_arrays.IsInstancingEnabled(index) && | ||
| 251 | regs.vertex_array[index].divisor != 0) { | ||
| 252 | // Enable vertex buffer instancing with the specified divisor. | ||
| 253 | glVertexArrayBindingDivisor(vao, index, regs.vertex_array[index].divisor); | ||
| 254 | } else { | ||
| 255 | // Disable the vertex buffer instancing. | ||
| 256 | glVertexArrayBindingDivisor(vao, index, 0); | ||
| 257 | } | ||
| 258 | } | ||
| 216 | } | 259 | } |
| 217 | 260 | ||
| 218 | DrawParameters RasterizerOpenGL::SetupDraw() { | 261 | GLintptr RasterizerOpenGL::SetupIndexBuffer() { |
| 262 | if (accelerate_draw != AccelDraw::Indexed) { | ||
| 263 | return 0; | ||
| 264 | } | ||
| 265 | MICROPROFILE_SCOPE(OpenGL_Index); | ||
| 266 | const auto& regs = system.GPU().Maxwell3D().regs; | ||
| 267 | const std::size_t size = CalculateIndexBufferSize(); | ||
| 268 | const auto [buffer, offset] = buffer_cache.UploadMemory(regs.index_array.IndexStart(), size); | ||
| 269 | vertex_array_pushbuffer.SetIndexBuffer(buffer); | ||
| 270 | return offset; | ||
| 271 | } | ||
| 272 | |||
| 273 | DrawParameters RasterizerOpenGL::SetupDraw(GLintptr index_buffer_offset) { | ||
| 219 | const auto& gpu = system.GPU().Maxwell3D(); | 274 | const auto& gpu = system.GPU().Maxwell3D(); |
| 220 | const auto& regs = gpu.regs; | 275 | const auto& regs = gpu.regs; |
| 221 | const bool is_indexed = accelerate_draw == AccelDraw::Indexed; | 276 | const bool is_indexed = accelerate_draw == AccelDraw::Indexed; |
| @@ -227,11 +282,9 @@ DrawParameters RasterizerOpenGL::SetupDraw() { | |||
| 227 | params.primitive_mode = MaxwellToGL::PrimitiveTopology(regs.draw.topology); | 282 | params.primitive_mode = MaxwellToGL::PrimitiveTopology(regs.draw.topology); |
| 228 | 283 | ||
| 229 | if (is_indexed) { | 284 | if (is_indexed) { |
| 230 | MICROPROFILE_SCOPE(OpenGL_Index); | ||
| 231 | params.index_format = MaxwellToGL::IndexFormat(regs.index_array.format); | 285 | params.index_format = MaxwellToGL::IndexFormat(regs.index_array.format); |
| 232 | params.count = regs.index_array.count; | 286 | params.count = regs.index_array.count; |
| 233 | params.index_buffer_offset = | 287 | params.index_buffer_offset = index_buffer_offset; |
| 234 | buffer_cache.UploadMemory(regs.index_array.IndexStart(), CalculateIndexBufferSize()); | ||
| 235 | params.base_vertex = static_cast<GLint>(regs.vb_element_base); | 288 | params.base_vertex = static_cast<GLint>(regs.vb_element_base); |
| 236 | } else { | 289 | } else { |
| 237 | params.count = regs.vertex_buffer.count; | 290 | params.count = regs.vertex_buffer.count; |
| @@ -247,10 +300,6 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { | |||
| 247 | BaseBindings base_bindings; | 300 | BaseBindings base_bindings; |
| 248 | std::array<bool, Maxwell::NumClipDistances> clip_distances{}; | 301 | std::array<bool, Maxwell::NumClipDistances> clip_distances{}; |
| 249 | 302 | ||
| 250 | // Prepare packed bindings | ||
| 251 | bind_ubo_pushbuffer.Setup(base_bindings.cbuf); | ||
| 252 | bind_ssbo_pushbuffer.Setup(base_bindings.gmem); | ||
| 253 | |||
| 254 | for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { | 303 | for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { |
| 255 | const auto& shader_config = gpu.regs.shader_config[index]; | 304 | const auto& shader_config = gpu.regs.shader_config[index]; |
| 256 | const Maxwell::ShaderProgram program{static_cast<Maxwell::ShaderProgram>(index)}; | 305 | const Maxwell::ShaderProgram program{static_cast<Maxwell::ShaderProgram>(index)}; |
| @@ -271,18 +320,17 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { | |||
| 271 | 320 | ||
| 272 | GLShader::MaxwellUniformData ubo{}; | 321 | GLShader::MaxwellUniformData ubo{}; |
| 273 | ubo.SetFromRegs(gpu, stage); | 322 | ubo.SetFromRegs(gpu, stage); |
| 274 | const GLintptr offset = | 323 | const auto [buffer, offset] = |
| 275 | buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment()); | 324 | buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment()); |
| 276 | 325 | ||
| 277 | // Bind the emulation info buffer | 326 | // Bind the emulation info buffer |
| 278 | bind_ubo_pushbuffer.Push(buffer_cache.GetHandle(), offset, | 327 | bind_ubo_pushbuffer.Push(buffer, offset, static_cast<GLsizeiptr>(sizeof(ubo))); |
| 279 | static_cast<GLsizeiptr>(sizeof(ubo))); | ||
| 280 | 328 | ||
| 281 | Shader shader{shader_cache.GetStageProgram(program)}; | 329 | Shader shader{shader_cache.GetStageProgram(program)}; |
| 282 | 330 | ||
| 283 | const auto stage_enum{static_cast<Maxwell::ShaderStage>(stage)}; | 331 | const auto stage_enum = static_cast<Maxwell::ShaderStage>(stage); |
| 284 | SetupDrawConstBuffers(stage_enum, shader); | 332 | SetupDrawConstBuffers(stage_enum, shader); |
| 285 | SetupGlobalRegions(stage_enum, shader); | 333 | SetupDrawGlobalMemory(stage_enum, shader); |
| 286 | const auto texture_buffer_usage{SetupTextures(stage_enum, shader, base_bindings)}; | 334 | const auto texture_buffer_usage{SetupTextures(stage_enum, shader, base_bindings)}; |
| 287 | 335 | ||
| 288 | const ProgramVariant variant{base_bindings, primitive_mode, texture_buffer_usage}; | 336 | const ProgramVariant variant{base_bindings, primitive_mode, texture_buffer_usage}; |
| @@ -321,12 +369,9 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { | |||
| 321 | base_bindings = next_bindings; | 369 | base_bindings = next_bindings; |
| 322 | } | 370 | } |
| 323 | 371 | ||
| 324 | bind_ubo_pushbuffer.Bind(); | ||
| 325 | bind_ssbo_pushbuffer.Bind(); | ||
| 326 | |||
| 327 | SyncClipEnabled(clip_distances); | 372 | SyncClipEnabled(clip_distances); |
| 328 | 373 | ||
| 329 | gpu.dirty_flags.shaders = false; | 374 | gpu.dirty.shaders = false; |
| 330 | } | 375 | } |
| 331 | 376 | ||
| 332 | std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const { | 377 | std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const { |
| @@ -409,13 +454,13 @@ std::pair<bool, bool> RasterizerOpenGL::ConfigureFramebuffers( | |||
| 409 | 454 | ||
| 410 | const FramebufferConfigState fb_config_state{using_color_fb, using_depth_fb, preserve_contents, | 455 | const FramebufferConfigState fb_config_state{using_color_fb, using_depth_fb, preserve_contents, |
| 411 | single_color_target}; | 456 | single_color_target}; |
| 412 | if (fb_config_state == current_framebuffer_config_state && | 457 | if (fb_config_state == current_framebuffer_config_state && !gpu.dirty.render_settings) { |
| 413 | gpu.dirty_flags.color_buffer.none() && !gpu.dirty_flags.zeta_buffer) { | ||
| 414 | // Only skip if the previous ConfigureFramebuffers call was from the same kind (multiple or | 458 | // Only skip if the previous ConfigureFramebuffers call was from the same kind (multiple or |
| 415 | // single color targets). This is done because the guest registers may not change but the | 459 | // single color targets). This is done because the guest registers may not change but the |
| 416 | // host framebuffer may contain different attachments | 460 | // host framebuffer may contain different attachments |
| 417 | return current_depth_stencil_usage; | 461 | return current_depth_stencil_usage; |
| 418 | } | 462 | } |
| 463 | gpu.dirty.render_settings = false; | ||
| 419 | current_framebuffer_config_state = fb_config_state; | 464 | current_framebuffer_config_state = fb_config_state; |
| 420 | 465 | ||
| 421 | texture_cache.GuardRenderTargets(true); | 466 | texture_cache.GuardRenderTargets(true); |
| @@ -504,13 +549,71 @@ std::pair<bool, bool> RasterizerOpenGL::ConfigureFramebuffers( | |||
| 504 | return current_depth_stencil_usage = {static_cast<bool>(depth_surface), fbkey.stencil_enable}; | 549 | return current_depth_stencil_usage = {static_cast<bool>(depth_surface), fbkey.stencil_enable}; |
| 505 | } | 550 | } |
| 506 | 551 | ||
| 552 | void RasterizerOpenGL::ConfigureClearFramebuffer(OpenGLState& current_state, bool using_color_fb, | ||
| 553 | bool using_depth_fb, bool using_stencil_fb) { | ||
| 554 | auto& gpu = system.GPU().Maxwell3D(); | ||
| 555 | const auto& regs = gpu.regs; | ||
| 556 | |||
| 557 | texture_cache.GuardRenderTargets(true); | ||
| 558 | View color_surface{}; | ||
| 559 | if (using_color_fb) { | ||
| 560 | color_surface = texture_cache.GetColorBufferSurface(regs.clear_buffers.RT, false); | ||
| 561 | } | ||
| 562 | View depth_surface{}; | ||
| 563 | if (using_depth_fb || using_stencil_fb) { | ||
| 564 | depth_surface = texture_cache.GetDepthBufferSurface(false); | ||
| 565 | } | ||
| 566 | texture_cache.GuardRenderTargets(false); | ||
| 567 | |||
| 568 | current_state.draw.draw_framebuffer = clear_framebuffer.handle; | ||
| 569 | current_state.ApplyFramebufferState(); | ||
| 570 | |||
| 571 | if (color_surface) { | ||
| 572 | color_surface->Attach(GL_COLOR_ATTACHMENT0, GL_DRAW_FRAMEBUFFER); | ||
| 573 | } else { | ||
| 574 | glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); | ||
| 575 | } | ||
| 576 | |||
| 577 | if (depth_surface) { | ||
| 578 | const auto& params = depth_surface->GetSurfaceParams(); | ||
| 579 | switch (params.type) { | ||
| 580 | case VideoCore::Surface::SurfaceType::Depth: { | ||
| 581 | depth_surface->Attach(GL_DEPTH_ATTACHMENT, GL_DRAW_FRAMEBUFFER); | ||
| 582 | glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); | ||
| 583 | break; | ||
| 584 | } | ||
| 585 | case VideoCore::Surface::SurfaceType::DepthStencil: { | ||
| 586 | depth_surface->Attach(GL_DEPTH_ATTACHMENT, GL_DRAW_FRAMEBUFFER); | ||
| 587 | break; | ||
| 588 | } | ||
| 589 | default: { UNIMPLEMENTED(); } | ||
| 590 | } | ||
| 591 | } else { | ||
| 592 | glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, | ||
| 593 | 0); | ||
| 594 | } | ||
| 595 | } | ||
| 596 | |||
| 507 | void RasterizerOpenGL::Clear() { | 597 | void RasterizerOpenGL::Clear() { |
| 508 | const auto& regs = system.GPU().Maxwell3D().regs; | 598 | const auto& maxwell3d = system.GPU().Maxwell3D(); |
| 599 | |||
| 600 | if (!maxwell3d.ShouldExecute()) { | ||
| 601 | return; | ||
| 602 | } | ||
| 603 | |||
| 604 | const auto& regs = maxwell3d.regs; | ||
| 509 | bool use_color{}; | 605 | bool use_color{}; |
| 510 | bool use_depth{}; | 606 | bool use_depth{}; |
| 511 | bool use_stencil{}; | 607 | bool use_stencil{}; |
| 512 | 608 | ||
| 513 | OpenGLState clear_state; | 609 | OpenGLState prev_state{OpenGLState::GetCurState()}; |
| 610 | SCOPE_EXIT({ | ||
| 611 | prev_state.AllDirty(); | ||
| 612 | prev_state.Apply(); | ||
| 613 | }); | ||
| 614 | |||
| 615 | OpenGLState clear_state{OpenGLState::GetCurState()}; | ||
| 616 | clear_state.SetDefaultViewports(); | ||
| 514 | if (regs.clear_buffers.R || regs.clear_buffers.G || regs.clear_buffers.B || | 617 | if (regs.clear_buffers.R || regs.clear_buffers.G || regs.clear_buffers.B || |
| 515 | regs.clear_buffers.A) { | 618 | regs.clear_buffers.A) { |
| 516 | use_color = true; | 619 | use_color = true; |
| @@ -530,6 +633,7 @@ void RasterizerOpenGL::Clear() { | |||
| 530 | // true. | 633 | // true. |
| 531 | clear_state.depth.test_enabled = true; | 634 | clear_state.depth.test_enabled = true; |
| 532 | clear_state.depth.test_func = GL_ALWAYS; | 635 | clear_state.depth.test_func = GL_ALWAYS; |
| 636 | clear_state.depth.write_mask = GL_TRUE; | ||
| 533 | } | 637 | } |
| 534 | if (regs.clear_buffers.S) { | 638 | if (regs.clear_buffers.S) { |
| 535 | ASSERT_MSG(regs.zeta_enable != 0, "Tried to clear stencil but buffer is not enabled!"); | 639 | ASSERT_MSG(regs.zeta_enable != 0, "Tried to clear stencil but buffer is not enabled!"); |
| @@ -566,8 +670,9 @@ void RasterizerOpenGL::Clear() { | |||
| 566 | return; | 670 | return; |
| 567 | } | 671 | } |
| 568 | 672 | ||
| 569 | const auto [clear_depth, clear_stencil] = ConfigureFramebuffers( | 673 | ConfigureClearFramebuffer(clear_state, use_color, use_depth, use_stencil); |
| 570 | clear_state, use_color, use_depth || use_stencil, false, regs.clear_buffers.RT.Value()); | 674 | |
| 675 | SyncViewport(clear_state); | ||
| 571 | if (regs.clear_flags.scissor) { | 676 | if (regs.clear_flags.scissor) { |
| 572 | SyncScissorTest(clear_state); | 677 | SyncScissorTest(clear_state); |
| 573 | } | 678 | } |
| @@ -576,21 +681,18 @@ void RasterizerOpenGL::Clear() { | |||
| 576 | clear_state.EmulateViewportWithScissor(); | 681 | clear_state.EmulateViewportWithScissor(); |
| 577 | } | 682 | } |
| 578 | 683 | ||
| 579 | clear_state.ApplyColorMask(); | 684 | clear_state.AllDirty(); |
| 580 | clear_state.ApplyDepth(); | 685 | clear_state.Apply(); |
| 581 | clear_state.ApplyStencilTest(); | ||
| 582 | clear_state.ApplyViewport(); | ||
| 583 | clear_state.ApplyFramebufferState(); | ||
| 584 | 686 | ||
| 585 | if (use_color) { | 687 | if (use_color) { |
| 586 | glClearBufferfv(GL_COLOR, regs.clear_buffers.RT, regs.clear_color); | 688 | glClearBufferfv(GL_COLOR, 0, regs.clear_color); |
| 587 | } | 689 | } |
| 588 | 690 | ||
| 589 | if (clear_depth && clear_stencil) { | 691 | if (use_depth && use_stencil) { |
| 590 | glClearBufferfi(GL_DEPTH_STENCIL, 0, regs.clear_depth, regs.clear_stencil); | 692 | glClearBufferfi(GL_DEPTH_STENCIL, 0, regs.clear_depth, regs.clear_stencil); |
| 591 | } else if (clear_depth) { | 693 | } else if (use_depth) { |
| 592 | glClearBufferfv(GL_DEPTH, 0, ®s.clear_depth); | 694 | glClearBufferfv(GL_DEPTH, 0, ®s.clear_depth); |
| 593 | } else if (clear_stencil) { | 695 | } else if (use_stencil) { |
| 594 | glClearBufferiv(GL_STENCIL, 0, ®s.clear_stencil); | 696 | glClearBufferiv(GL_STENCIL, 0, ®s.clear_stencil); |
| 595 | } | 697 | } |
| 596 | } | 698 | } |
| @@ -601,7 +703,10 @@ void RasterizerOpenGL::DrawArrays() { | |||
| 601 | 703 | ||
| 602 | MICROPROFILE_SCOPE(OpenGL_Drawing); | 704 | MICROPROFILE_SCOPE(OpenGL_Drawing); |
| 603 | auto& gpu = system.GPU().Maxwell3D(); | 705 | auto& gpu = system.GPU().Maxwell3D(); |
| 604 | const auto& regs = gpu.regs; | 706 | |
| 707 | if (!gpu.ShouldExecute()) { | ||
| 708 | return; | ||
| 709 | } | ||
| 605 | 710 | ||
| 606 | SyncColorMask(); | 711 | SyncColorMask(); |
| 607 | SyncFragmentColorClampState(); | 712 | SyncFragmentColorClampState(); |
| @@ -634,26 +739,47 @@ void RasterizerOpenGL::DrawArrays() { | |||
| 634 | Maxwell::MaxShaderStage; | 739 | Maxwell::MaxShaderStage; |
| 635 | 740 | ||
| 636 | // Add space for at least 18 constant buffers | 741 | // Add space for at least 18 constant buffers |
| 637 | buffer_size += | 742 | buffer_size += Maxwell::MaxConstBuffers * |
| 638 | Maxwell::MaxConstBuffers * (MaxConstbufferSize + device.GetUniformBufferAlignment()); | 743 | (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment()); |
| 639 | 744 | ||
| 640 | const bool invalidate = buffer_cache.Map(buffer_size); | 745 | // Prepare the vertex array. |
| 641 | if (invalidate) { | 746 | buffer_cache.Map(buffer_size); |
| 642 | // As all cached buffers are invalidated, we need to recheck their state. | ||
| 643 | gpu.dirty_flags.vertex_array.set(); | ||
| 644 | } | ||
| 645 | 747 | ||
| 748 | // Prepare vertex array format. | ||
| 646 | const GLuint vao = SetupVertexFormat(); | 749 | const GLuint vao = SetupVertexFormat(); |
| 750 | vertex_array_pushbuffer.Setup(vao); | ||
| 751 | |||
| 752 | // Upload vertex and index data. | ||
| 647 | SetupVertexBuffer(vao); | 753 | SetupVertexBuffer(vao); |
| 754 | SetupVertexInstances(vao); | ||
| 755 | const GLintptr index_buffer_offset = SetupIndexBuffer(); | ||
| 756 | |||
| 757 | // Setup draw parameters. It will automatically choose what glDraw* method to use. | ||
| 758 | const DrawParameters params = SetupDraw(index_buffer_offset); | ||
| 648 | 759 | ||
| 649 | DrawParameters params = SetupDraw(); | 760 | // Prepare packed bindings. |
| 761 | bind_ubo_pushbuffer.Setup(0); | ||
| 762 | bind_ssbo_pushbuffer.Setup(0); | ||
| 763 | |||
| 764 | // Setup shaders and their used resources. | ||
| 650 | texture_cache.GuardSamplers(true); | 765 | texture_cache.GuardSamplers(true); |
| 651 | SetupShaders(params.primitive_mode); | 766 | SetupShaders(params.primitive_mode); |
| 652 | texture_cache.GuardSamplers(false); | 767 | texture_cache.GuardSamplers(false); |
| 653 | 768 | ||
| 654 | ConfigureFramebuffers(state); | 769 | ConfigureFramebuffers(state); |
| 655 | 770 | ||
| 656 | buffer_cache.Unmap(); | 771 | // Signal the buffer cache that we are not going to upload more things. |
| 772 | const bool invalidate = buffer_cache.Unmap(); | ||
| 773 | |||
| 774 | // Now that we are no longer uploading data, we can safely bind the buffers to OpenGL. | ||
| 775 | vertex_array_pushbuffer.Bind(); | ||
| 776 | bind_ubo_pushbuffer.Bind(); | ||
| 777 | bind_ssbo_pushbuffer.Bind(); | ||
| 778 | |||
| 779 | if (invalidate) { | ||
| 780 | // As all cached buffers are invalidated, we need to recheck their state. | ||
| 781 | gpu.dirty.ResetVertexArrays(); | ||
| 782 | } | ||
| 657 | 783 | ||
| 658 | shader_program_manager->ApplyTo(state); | 784 | shader_program_manager->ApplyTo(state); |
| 659 | state.Apply(); | 785 | state.Apply(); |
| @@ -665,6 +791,46 @@ void RasterizerOpenGL::DrawArrays() { | |||
| 665 | params.DispatchDraw(); | 791 | params.DispatchDraw(); |
| 666 | 792 | ||
| 667 | accelerate_draw = AccelDraw::Disabled; | 793 | accelerate_draw = AccelDraw::Disabled; |
| 794 | gpu.dirty.memory_general = false; | ||
| 795 | } | ||
| 796 | |||
| 797 | void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { | ||
| 798 | if (!GLAD_GL_ARB_compute_variable_group_size) { | ||
| 799 | LOG_ERROR(Render_OpenGL, "Compute is currently not supported on this device due to the " | ||
| 800 | "lack of GL_ARB_compute_variable_group_size"); | ||
| 801 | return; | ||
| 802 | } | ||
| 803 | |||
| 804 | auto kernel = shader_cache.GetComputeKernel(code_addr); | ||
| 805 | const auto [program, next_bindings] = kernel->GetProgramHandle({}); | ||
| 806 | state.draw.shader_program = program; | ||
| 807 | state.draw.program_pipeline = 0; | ||
| 808 | |||
| 809 | const std::size_t buffer_size = | ||
| 810 | Tegra::Engines::KeplerCompute::NumConstBuffers * | ||
| 811 | (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment()); | ||
| 812 | buffer_cache.Map(buffer_size); | ||
| 813 | |||
| 814 | bind_ubo_pushbuffer.Setup(0); | ||
| 815 | bind_ssbo_pushbuffer.Setup(0); | ||
| 816 | |||
| 817 | SetupComputeConstBuffers(kernel); | ||
| 818 | SetupComputeGlobalMemory(kernel); | ||
| 819 | |||
| 820 | // TODO(Rodrigo): Bind images and samplers | ||
| 821 | |||
| 822 | buffer_cache.Unmap(); | ||
| 823 | |||
| 824 | bind_ubo_pushbuffer.Bind(); | ||
| 825 | bind_ssbo_pushbuffer.Bind(); | ||
| 826 | |||
| 827 | state.ApplyShaderProgram(); | ||
| 828 | state.ApplyProgramPipeline(); | ||
| 829 | |||
| 830 | const auto& launch_desc = system.GPU().KeplerCompute().launch_description; | ||
| 831 | glDispatchComputeGroupSizeARB(launch_desc.grid_dim_x, launch_desc.grid_dim_y, | ||
| 832 | launch_desc.grid_dim_z, launch_desc.block_dim_x, | ||
| 833 | launch_desc.block_dim_y, launch_desc.block_dim_z); | ||
| 668 | } | 834 | } |
| 669 | 835 | ||
| 670 | void RasterizerOpenGL::FlushAll() {} | 836 | void RasterizerOpenGL::FlushAll() {} |
| @@ -675,7 +841,7 @@ void RasterizerOpenGL::FlushRegion(CacheAddr addr, u64 size) { | |||
| 675 | return; | 841 | return; |
| 676 | } | 842 | } |
| 677 | texture_cache.FlushRegion(addr, size); | 843 | texture_cache.FlushRegion(addr, size); |
| 678 | global_cache.FlushRegion(addr, size); | 844 | buffer_cache.FlushRegion(addr, size); |
| 679 | } | 845 | } |
| 680 | 846 | ||
| 681 | void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) { | 847 | void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) { |
| @@ -685,7 +851,6 @@ void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) { | |||
| 685 | } | 851 | } |
| 686 | texture_cache.InvalidateRegion(addr, size); | 852 | texture_cache.InvalidateRegion(addr, size); |
| 687 | shader_cache.InvalidateRegion(addr, size); | 853 | shader_cache.InvalidateRegion(addr, size); |
| 688 | global_cache.InvalidateRegion(addr, size); | ||
| 689 | buffer_cache.InvalidateRegion(addr, size); | 854 | buffer_cache.InvalidateRegion(addr, size); |
| 690 | } | 855 | } |
| 691 | 856 | ||
| @@ -696,6 +861,14 @@ void RasterizerOpenGL::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { | |||
| 696 | InvalidateRegion(addr, size); | 861 | InvalidateRegion(addr, size); |
| 697 | } | 862 | } |
| 698 | 863 | ||
| 864 | void RasterizerOpenGL::FlushCommands() { | ||
| 865 | glFlush(); | ||
| 866 | } | ||
| 867 | |||
| 868 | void RasterizerOpenGL::TickFrame() { | ||
| 869 | buffer_cache.TickFrame(); | ||
| 870 | } | ||
| 871 | |||
| 699 | bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, | 872 | bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, |
| 700 | const Tegra::Engines::Fermi2D::Regs::Surface& dst, | 873 | const Tegra::Engines::Fermi2D::Regs::Surface& dst, |
| 701 | const Tegra::Engines::Fermi2D::Config& copy_config) { | 874 | const Tegra::Engines::Fermi2D::Config& copy_config) { |
| @@ -737,14 +910,25 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config, | |||
| 737 | void RasterizerOpenGL::SetupDrawConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, | 910 | void RasterizerOpenGL::SetupDrawConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, |
| 738 | const Shader& shader) { | 911 | const Shader& shader) { |
| 739 | MICROPROFILE_SCOPE(OpenGL_UBO); | 912 | MICROPROFILE_SCOPE(OpenGL_UBO); |
| 740 | const auto stage_index = static_cast<std::size_t>(stage); | 913 | const auto& stages = system.GPU().Maxwell3D().state.shader_stages; |
| 741 | const auto& shader_stage = system.GPU().Maxwell3D().state.shader_stages[stage_index]; | 914 | const auto& shader_stage = stages[static_cast<std::size_t>(stage)]; |
| 742 | const auto& entries = shader->GetShaderEntries().const_buffers; | 915 | for (const auto& entry : shader->GetShaderEntries().const_buffers) { |
| 916 | const auto& buffer = shader_stage.const_buffers[entry.GetIndex()]; | ||
| 917 | SetupConstBuffer(buffer, entry); | ||
| 918 | } | ||
| 919 | } | ||
| 743 | 920 | ||
| 744 | // Upload only the enabled buffers from the 16 constbuffers of each shader stage | 921 | void RasterizerOpenGL::SetupComputeConstBuffers(const Shader& kernel) { |
| 745 | for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) { | 922 | MICROPROFILE_SCOPE(OpenGL_UBO); |
| 746 | const auto& entry = entries[bindpoint]; | 923 | const auto& launch_desc = system.GPU().KeplerCompute().launch_description; |
| 747 | SetupConstBuffer(shader_stage.const_buffers[entry.GetIndex()], entry); | 924 | for (const auto& entry : kernel->GetShaderEntries().const_buffers) { |
| 925 | const auto& config = launch_desc.const_buffer_config[entry.GetIndex()]; | ||
| 926 | const std::bitset<8> mask = launch_desc.memory_config.const_buffer_enable_mask.Value(); | ||
| 927 | Tegra::Engines::ConstBufferInfo buffer; | ||
| 928 | buffer.address = config.Address(); | ||
| 929 | buffer.size = config.size; | ||
| 930 | buffer.enabled = mask[entry.GetIndex()]; | ||
| 931 | SetupConstBuffer(buffer, entry); | ||
| 748 | } | 932 | } |
| 749 | } | 933 | } |
| 750 | 934 | ||
| @@ -752,49 +936,52 @@ void RasterizerOpenGL::SetupConstBuffer(const Tegra::Engines::ConstBufferInfo& b | |||
| 752 | const GLShader::ConstBufferEntry& entry) { | 936 | const GLShader::ConstBufferEntry& entry) { |
| 753 | if (!buffer.enabled) { | 937 | if (!buffer.enabled) { |
| 754 | // Set values to zero to unbind buffers | 938 | // Set values to zero to unbind buffers |
| 755 | bind_ubo_pushbuffer.Push(0, 0, 0); | 939 | bind_ubo_pushbuffer.Push(buffer_cache.GetEmptyBuffer(sizeof(float)), 0, sizeof(float)); |
| 756 | return; | 940 | return; |
| 757 | } | 941 | } |
| 758 | 942 | ||
| 759 | std::size_t size; | ||
| 760 | if (entry.IsIndirect()) { | ||
| 761 | // Buffer is accessed indirectly, so upload the entire thing | ||
| 762 | size = buffer.size; | ||
| 763 | |||
| 764 | if (size > MaxConstbufferSize) { | ||
| 765 | LOG_WARNING(Render_OpenGL, "Indirect constbuffer size {} exceeds maximum {}", size, | ||
| 766 | MaxConstbufferSize); | ||
| 767 | size = MaxConstbufferSize; | ||
| 768 | } | ||
| 769 | } else { | ||
| 770 | // Buffer is accessed directly, upload just what we use | ||
| 771 | size = entry.GetSize(); | ||
| 772 | } | ||
| 773 | |||
| 774 | // Align the actual size so it ends up being a multiple of vec4 to meet the OpenGL std140 | 943 | // Align the actual size so it ends up being a multiple of vec4 to meet the OpenGL std140 |
| 775 | // UBO alignment requirements. | 944 | // UBO alignment requirements. |
| 776 | size = Common::AlignUp(size, sizeof(GLvec4)); | 945 | const std::size_t size = Common::AlignUp(GetConstBufferSize(buffer, entry), sizeof(GLvec4)); |
| 777 | ASSERT_MSG(size <= MaxConstbufferSize, "Constant buffer is too big"); | ||
| 778 | 946 | ||
| 779 | const std::size_t alignment = device.GetUniformBufferAlignment(); | 947 | const auto alignment = device.GetUniformBufferAlignment(); |
| 780 | const GLintptr offset = buffer_cache.UploadMemory(buffer.address, size, alignment); | 948 | const auto [cbuf, offset] = buffer_cache.UploadMemory(buffer.address, size, alignment); |
| 781 | bind_ubo_pushbuffer.Push(buffer_cache.GetHandle(), offset, size); | 949 | bind_ubo_pushbuffer.Push(cbuf, offset, size); |
| 782 | } | 950 | } |
| 783 | 951 | ||
| 784 | void RasterizerOpenGL::SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, | 952 | void RasterizerOpenGL::SetupDrawGlobalMemory(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, |
| 785 | const Shader& shader) { | 953 | const Shader& shader) { |
| 786 | const auto& entries = shader->GetShaderEntries().global_memory_entries; | 954 | auto& gpu{system.GPU()}; |
| 787 | for (std::size_t bindpoint = 0; bindpoint < entries.size(); ++bindpoint) { | 955 | auto& memory_manager{gpu.MemoryManager()}; |
| 788 | const auto& entry{entries[bindpoint]}; | 956 | const auto cbufs{gpu.Maxwell3D().state.shader_stages[static_cast<std::size_t>(stage)]}; |
| 789 | const auto& region{global_cache.GetGlobalRegion(entry, stage)}; | 957 | for (const auto& entry : shader->GetShaderEntries().global_memory_entries) { |
| 790 | if (entry.IsWritten()) { | 958 | const auto addr{cbufs.const_buffers[entry.GetCbufIndex()].address + entry.GetCbufOffset()}; |
| 791 | region->MarkAsModified(true, global_cache); | 959 | const auto gpu_addr{memory_manager.Read<u64>(addr)}; |
| 792 | } | 960 | const auto size{memory_manager.Read<u32>(addr + 8)}; |
| 793 | bind_ssbo_pushbuffer.Push(region->GetBufferHandle(), 0, | 961 | SetupGlobalMemory(entry, gpu_addr, size); |
| 794 | static_cast<GLsizeiptr>(region->GetSizeInBytes())); | 962 | } |
| 963 | } | ||
| 964 | |||
| 965 | void RasterizerOpenGL::SetupComputeGlobalMemory(const Shader& kernel) { | ||
| 966 | auto& gpu{system.GPU()}; | ||
| 967 | auto& memory_manager{gpu.MemoryManager()}; | ||
| 968 | const auto cbufs{gpu.KeplerCompute().launch_description.const_buffer_config}; | ||
| 969 | for (const auto& entry : kernel->GetShaderEntries().global_memory_entries) { | ||
| 970 | const auto addr{cbufs[entry.GetCbufIndex()].Address() + entry.GetCbufOffset()}; | ||
| 971 | const auto gpu_addr{memory_manager.Read<u64>(addr)}; | ||
| 972 | const auto size{memory_manager.Read<u32>(addr + 8)}; | ||
| 973 | SetupGlobalMemory(entry, gpu_addr, size); | ||
| 795 | } | 974 | } |
| 796 | } | 975 | } |
| 797 | 976 | ||
| 977 | void RasterizerOpenGL::SetupGlobalMemory(const GLShader::GlobalMemoryEntry& entry, | ||
| 978 | GPUVAddr gpu_addr, std::size_t size) { | ||
| 979 | const auto alignment{device.GetShaderStorageBufferAlignment()}; | ||
| 980 | const auto [ssbo, buffer_offset] = | ||
| 981 | buffer_cache.UploadMemory(gpu_addr, size, alignment, entry.IsWritten()); | ||
| 982 | bind_ssbo_pushbuffer.Push(ssbo, buffer_offset, static_cast<GLsizeiptr>(size)); | ||
| 983 | } | ||
| 984 | |||
| 798 | TextureBufferUsage RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, const Shader& shader, | 985 | TextureBufferUsage RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, const Shader& shader, |
| 799 | BaseBindings base_bindings) { | 986 | BaseBindings base_bindings) { |
| 800 | MICROPROFILE_SCOPE(OpenGL_Texture); | 987 | MICROPROFILE_SCOPE(OpenGL_Texture); |
| @@ -883,10 +1070,11 @@ void RasterizerOpenGL::SyncClipCoef() { | |||
| 883 | } | 1070 | } |
| 884 | 1071 | ||
| 885 | void RasterizerOpenGL::SyncCullMode() { | 1072 | void RasterizerOpenGL::SyncCullMode() { |
| 886 | const auto& regs = system.GPU().Maxwell3D().regs; | 1073 | auto& maxwell3d = system.GPU().Maxwell3D(); |
| 887 | 1074 | ||
| 888 | state.cull.enabled = regs.cull.enabled != 0; | 1075 | const auto& regs = maxwell3d.regs; |
| 889 | 1076 | ||
| 1077 | state.cull.enabled = regs.cull.enabled != 0; | ||
| 890 | if (state.cull.enabled) { | 1078 | if (state.cull.enabled) { |
| 891 | state.cull.front_face = MaxwellToGL::FrontFace(regs.cull.front_face); | 1079 | state.cull.front_face = MaxwellToGL::FrontFace(regs.cull.front_face); |
| 892 | state.cull.mode = MaxwellToGL::CullFace(regs.cull.cull_face); | 1080 | state.cull.mode = MaxwellToGL::CullFace(regs.cull.cull_face); |
| @@ -919,16 +1107,21 @@ void RasterizerOpenGL::SyncDepthTestState() { | |||
| 919 | state.depth.test_enabled = regs.depth_test_enable != 0; | 1107 | state.depth.test_enabled = regs.depth_test_enable != 0; |
| 920 | state.depth.write_mask = regs.depth_write_enabled ? GL_TRUE : GL_FALSE; | 1108 | state.depth.write_mask = regs.depth_write_enabled ? GL_TRUE : GL_FALSE; |
| 921 | 1109 | ||
| 922 | if (!state.depth.test_enabled) | 1110 | if (!state.depth.test_enabled) { |
| 923 | return; | 1111 | return; |
| 1112 | } | ||
| 924 | 1113 | ||
| 925 | state.depth.test_func = MaxwellToGL::ComparisonOp(regs.depth_test_func); | 1114 | state.depth.test_func = MaxwellToGL::ComparisonOp(regs.depth_test_func); |
| 926 | } | 1115 | } |
| 927 | 1116 | ||
| 928 | void RasterizerOpenGL::SyncStencilTestState() { | 1117 | void RasterizerOpenGL::SyncStencilTestState() { |
| 929 | const auto& regs = system.GPU().Maxwell3D().regs; | 1118 | auto& maxwell3d = system.GPU().Maxwell3D(); |
| 930 | state.stencil.test_enabled = regs.stencil_enable != 0; | 1119 | if (!maxwell3d.dirty.stencil_test) { |
| 1120 | return; | ||
| 1121 | } | ||
| 1122 | const auto& regs = maxwell3d.regs; | ||
| 931 | 1123 | ||
| 1124 | state.stencil.test_enabled = regs.stencil_enable != 0; | ||
| 932 | if (!regs.stencil_enable) { | 1125 | if (!regs.stencil_enable) { |
| 933 | return; | 1126 | return; |
| 934 | } | 1127 | } |
| @@ -957,10 +1150,17 @@ void RasterizerOpenGL::SyncStencilTestState() { | |||
| 957 | state.stencil.back.action_depth_fail = GL_KEEP; | 1150 | state.stencil.back.action_depth_fail = GL_KEEP; |
| 958 | state.stencil.back.action_depth_pass = GL_KEEP; | 1151 | state.stencil.back.action_depth_pass = GL_KEEP; |
| 959 | } | 1152 | } |
| 1153 | state.MarkDirtyStencilState(); | ||
| 1154 | maxwell3d.dirty.stencil_test = false; | ||
| 960 | } | 1155 | } |
| 961 | 1156 | ||
| 962 | void RasterizerOpenGL::SyncColorMask() { | 1157 | void RasterizerOpenGL::SyncColorMask() { |
| 963 | const auto& regs = system.GPU().Maxwell3D().regs; | 1158 | auto& maxwell3d = system.GPU().Maxwell3D(); |
| 1159 | if (!maxwell3d.dirty.color_mask) { | ||
| 1160 | return; | ||
| 1161 | } | ||
| 1162 | const auto& regs = maxwell3d.regs; | ||
| 1163 | |||
| 964 | const std::size_t count = | 1164 | const std::size_t count = |
| 965 | regs.independent_blend_enable ? Tegra::Engines::Maxwell3D::Regs::NumRenderTargets : 1; | 1165 | regs.independent_blend_enable ? Tegra::Engines::Maxwell3D::Regs::NumRenderTargets : 1; |
| 966 | for (std::size_t i = 0; i < count; i++) { | 1166 | for (std::size_t i = 0; i < count; i++) { |
| @@ -971,6 +1171,9 @@ void RasterizerOpenGL::SyncColorMask() { | |||
| 971 | dest.blue_enabled = (source.B == 0) ? GL_FALSE : GL_TRUE; | 1171 | dest.blue_enabled = (source.B == 0) ? GL_FALSE : GL_TRUE; |
| 972 | dest.alpha_enabled = (source.A == 0) ? GL_FALSE : GL_TRUE; | 1172 | dest.alpha_enabled = (source.A == 0) ? GL_FALSE : GL_TRUE; |
| 973 | } | 1173 | } |
| 1174 | |||
| 1175 | state.MarkDirtyColorMask(); | ||
| 1176 | maxwell3d.dirty.color_mask = false; | ||
| 974 | } | 1177 | } |
| 975 | 1178 | ||
| 976 | void RasterizerOpenGL::SyncMultiSampleState() { | 1179 | void RasterizerOpenGL::SyncMultiSampleState() { |
| @@ -985,7 +1188,11 @@ void RasterizerOpenGL::SyncFragmentColorClampState() { | |||
| 985 | } | 1188 | } |
| 986 | 1189 | ||
| 987 | void RasterizerOpenGL::SyncBlendState() { | 1190 | void RasterizerOpenGL::SyncBlendState() { |
| 988 | const auto& regs = system.GPU().Maxwell3D().regs; | 1191 | auto& maxwell3d = system.GPU().Maxwell3D(); |
| 1192 | if (!maxwell3d.dirty.blend_state) { | ||
| 1193 | return; | ||
| 1194 | } | ||
| 1195 | const auto& regs = maxwell3d.regs; | ||
| 989 | 1196 | ||
| 990 | state.blend_color.red = regs.blend_color.r; | 1197 | state.blend_color.red = regs.blend_color.r; |
| 991 | state.blend_color.green = regs.blend_color.g; | 1198 | state.blend_color.green = regs.blend_color.g; |
| @@ -1008,6 +1215,8 @@ void RasterizerOpenGL::SyncBlendState() { | |||
| 1008 | for (std::size_t i = 1; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) { | 1215 | for (std::size_t i = 1; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) { |
| 1009 | state.blend[i].enabled = false; | 1216 | state.blend[i].enabled = false; |
| 1010 | } | 1217 | } |
| 1218 | maxwell3d.dirty.blend_state = false; | ||
| 1219 | state.MarkDirtyBlendState(); | ||
| 1011 | return; | 1220 | return; |
| 1012 | } | 1221 | } |
| 1013 | 1222 | ||
| @@ -1024,6 +1233,9 @@ void RasterizerOpenGL::SyncBlendState() { | |||
| 1024 | blend.src_a_func = MaxwellToGL::BlendFunc(src.factor_source_a); | 1233 | blend.src_a_func = MaxwellToGL::BlendFunc(src.factor_source_a); |
| 1025 | blend.dst_a_func = MaxwellToGL::BlendFunc(src.factor_dest_a); | 1234 | blend.dst_a_func = MaxwellToGL::BlendFunc(src.factor_dest_a); |
| 1026 | } | 1235 | } |
| 1236 | |||
| 1237 | state.MarkDirtyBlendState(); | ||
| 1238 | maxwell3d.dirty.blend_state = false; | ||
| 1027 | } | 1239 | } |
| 1028 | 1240 | ||
| 1029 | void RasterizerOpenGL::SyncLogicOpState() { | 1241 | void RasterizerOpenGL::SyncLogicOpState() { |
| @@ -1075,13 +1287,21 @@ void RasterizerOpenGL::SyncPointState() { | |||
| 1075 | } | 1287 | } |
| 1076 | 1288 | ||
| 1077 | void RasterizerOpenGL::SyncPolygonOffset() { | 1289 | void RasterizerOpenGL::SyncPolygonOffset() { |
| 1078 | const auto& regs = system.GPU().Maxwell3D().regs; | 1290 | auto& maxwell3d = system.GPU().Maxwell3D(); |
| 1291 | if (!maxwell3d.dirty.polygon_offset) { | ||
| 1292 | return; | ||
| 1293 | } | ||
| 1294 | const auto& regs = maxwell3d.regs; | ||
| 1295 | |||
| 1079 | state.polygon_offset.fill_enable = regs.polygon_offset_fill_enable != 0; | 1296 | state.polygon_offset.fill_enable = regs.polygon_offset_fill_enable != 0; |
| 1080 | state.polygon_offset.line_enable = regs.polygon_offset_line_enable != 0; | 1297 | state.polygon_offset.line_enable = regs.polygon_offset_line_enable != 0; |
| 1081 | state.polygon_offset.point_enable = regs.polygon_offset_point_enable != 0; | 1298 | state.polygon_offset.point_enable = regs.polygon_offset_point_enable != 0; |
| 1082 | state.polygon_offset.units = regs.polygon_offset_units; | 1299 | state.polygon_offset.units = regs.polygon_offset_units; |
| 1083 | state.polygon_offset.factor = regs.polygon_offset_factor; | 1300 | state.polygon_offset.factor = regs.polygon_offset_factor; |
| 1084 | state.polygon_offset.clamp = regs.polygon_offset_clamp; | 1301 | state.polygon_offset.clamp = regs.polygon_offset_clamp; |
| 1302 | |||
| 1303 | state.MarkDirtyPolygonOffset(); | ||
| 1304 | maxwell3d.dirty.polygon_offset = false; | ||
| 1085 | } | 1305 | } |
| 1086 | 1306 | ||
| 1087 | void RasterizerOpenGL::SyncAlphaTest() { | 1307 | void RasterizerOpenGL::SyncAlphaTest() { |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index bf67e3a70..9d20a4fbf 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h | |||
| @@ -24,7 +24,6 @@ | |||
| 24 | #include "video_core/renderer_opengl/gl_buffer_cache.h" | 24 | #include "video_core/renderer_opengl/gl_buffer_cache.h" |
| 25 | #include "video_core/renderer_opengl/gl_device.h" | 25 | #include "video_core/renderer_opengl/gl_device.h" |
| 26 | #include "video_core/renderer_opengl/gl_framebuffer_cache.h" | 26 | #include "video_core/renderer_opengl/gl_framebuffer_cache.h" |
| 27 | #include "video_core/renderer_opengl/gl_global_cache.h" | ||
| 28 | #include "video_core/renderer_opengl/gl_resource_manager.h" | 27 | #include "video_core/renderer_opengl/gl_resource_manager.h" |
| 29 | #include "video_core/renderer_opengl/gl_sampler_cache.h" | 28 | #include "video_core/renderer_opengl/gl_sampler_cache.h" |
| 30 | #include "video_core/renderer_opengl/gl_shader_cache.h" | 29 | #include "video_core/renderer_opengl/gl_shader_cache.h" |
| @@ -59,10 +58,13 @@ public: | |||
| 59 | 58 | ||
| 60 | void DrawArrays() override; | 59 | void DrawArrays() override; |
| 61 | void Clear() override; | 60 | void Clear() override; |
| 61 | void DispatchCompute(GPUVAddr code_addr) override; | ||
| 62 | void FlushAll() override; | 62 | void FlushAll() override; |
| 63 | void FlushRegion(CacheAddr addr, u64 size) override; | 63 | void FlushRegion(CacheAddr addr, u64 size) override; |
| 64 | void InvalidateRegion(CacheAddr addr, u64 size) override; | 64 | void InvalidateRegion(CacheAddr addr, u64 size) override; |
| 65 | void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; | 65 | void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; |
| 66 | void FlushCommands() override; | ||
| 67 | void TickFrame() override; | ||
| 66 | bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, | 68 | bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, |
| 67 | const Tegra::Engines::Fermi2D::Regs::Surface& dst, | 69 | const Tegra::Engines::Fermi2D::Regs::Surface& dst, |
| 68 | const Tegra::Engines::Fermi2D::Config& copy_config) override; | 70 | const Tegra::Engines::Fermi2D::Config& copy_config) override; |
| @@ -73,11 +75,6 @@ public: | |||
| 73 | void LoadDiskResources(const std::atomic_bool& stop_loading, | 75 | void LoadDiskResources(const std::atomic_bool& stop_loading, |
| 74 | const VideoCore::DiskResourceLoadCallback& callback) override; | 76 | const VideoCore::DiskResourceLoadCallback& callback) override; |
| 75 | 77 | ||
| 76 | /// Maximum supported size that a constbuffer can have in bytes. | ||
| 77 | static constexpr std::size_t MaxConstbufferSize = 0x10000; | ||
| 78 | static_assert(MaxConstbufferSize % sizeof(GLvec4) == 0, | ||
| 79 | "The maximum size of a constbuffer must be a multiple of the size of GLvec4"); | ||
| 80 | |||
| 81 | private: | 78 | private: |
| 82 | struct FramebufferConfigState { | 79 | struct FramebufferConfigState { |
| 83 | bool using_color_fb{}; | 80 | bool using_color_fb{}; |
| @@ -98,30 +95,45 @@ private: | |||
| 98 | 95 | ||
| 99 | /** | 96 | /** |
| 100 | * Configures the color and depth framebuffer states. | 97 | * Configures the color and depth framebuffer states. |
| 101 | * @param must_reconfigure If true, tells the framebuffer to skip the cache and reconfigure | 98 | * |
| 102 | * again. Used by the texture cache to solve texception conflicts | 99 | * @param current_state The current OpenGL state. |
| 103 | * @param use_color_fb If true, configure color framebuffers. | 100 | * @param using_color_fb If true, configure color framebuffers. |
| 104 | * @param using_depth_fb If true, configure the depth/stencil framebuffer. | 101 | * @param using_depth_fb If true, configure the depth/stencil framebuffer. |
| 105 | * @param preserve_contents If true, tries to preserve data from a previously used framebuffer. | 102 | * @param preserve_contents If true, tries to preserve data from a previously used |
| 103 | * framebuffer. | ||
| 106 | * @param single_color_target Specifies if a single color buffer target should be used. | 104 | * @param single_color_target Specifies if a single color buffer target should be used. |
| 105 | * | ||
| 107 | * @returns If depth (first) or stencil (second) are being stored in the bound zeta texture | 106 | * @returns If depth (first) or stencil (second) are being stored in the bound zeta texture |
| 108 | * (requires using_depth_fb to be true) | 107 | * (requires using_depth_fb to be true) |
| 109 | */ | 108 | */ |
| 110 | std::pair<bool, bool> ConfigureFramebuffers( | 109 | std::pair<bool, bool> ConfigureFramebuffers( |
| 111 | OpenGLState& current_state, bool use_color_fb = true, bool using_depth_fb = true, | 110 | OpenGLState& current_state, bool using_color_fb = true, bool using_depth_fb = true, |
| 112 | bool preserve_contents = true, std::optional<std::size_t> single_color_target = {}); | 111 | bool preserve_contents = true, std::optional<std::size_t> single_color_target = {}); |
| 113 | 112 | ||
| 113 | void ConfigureClearFramebuffer(OpenGLState& current_state, bool using_color_fb, | ||
| 114 | bool using_depth_fb, bool using_stencil_fb); | ||
| 115 | |||
| 114 | /// Configures the current constbuffers to use for the draw command. | 116 | /// Configures the current constbuffers to use for the draw command. |
| 115 | void SetupDrawConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, | 117 | void SetupDrawConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, |
| 116 | const Shader& shader); | 118 | const Shader& shader); |
| 117 | 119 | ||
| 120 | /// Configures the current constbuffers to use for the kernel invocation. | ||
| 121 | void SetupComputeConstBuffers(const Shader& kernel); | ||
| 122 | |||
| 118 | /// Configures a constant buffer. | 123 | /// Configures a constant buffer. |
| 119 | void SetupConstBuffer(const Tegra::Engines::ConstBufferInfo& buffer, | 124 | void SetupConstBuffer(const Tegra::Engines::ConstBufferInfo& buffer, |
| 120 | const GLShader::ConstBufferEntry& entry); | 125 | const GLShader::ConstBufferEntry& entry); |
| 121 | 126 | ||
| 122 | /// Configures the current global memory entries to use for the draw command. | 127 | /// Configures the current global memory entries to use for the draw command. |
| 123 | void SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, | 128 | void SetupDrawGlobalMemory(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, |
| 124 | const Shader& shader); | 129 | const Shader& shader); |
| 130 | |||
| 131 | /// Configures the current global memory entries to use for the kernel invocation. | ||
| 132 | void SetupComputeGlobalMemory(const Shader& kernel); | ||
| 133 | |||
| 134 | /// Configures a constant buffer. | ||
| 135 | void SetupGlobalMemory(const GLShader::GlobalMemoryEntry& entry, GPUVAddr gpu_addr, | ||
| 136 | std::size_t size); | ||
| 125 | 137 | ||
| 126 | /// Configures the current textures to use for the draw command. Returns shaders texture buffer | 138 | /// Configures the current textures to use for the draw command. Returns shaders texture buffer |
| 127 | /// usage. | 139 | /// usage. |
| @@ -189,7 +201,6 @@ private: | |||
| 189 | 201 | ||
| 190 | TextureCacheOpenGL texture_cache; | 202 | TextureCacheOpenGL texture_cache; |
| 191 | ShaderCacheOpenGL shader_cache; | 203 | ShaderCacheOpenGL shader_cache; |
| 192 | GlobalRegionCacheOpenGL global_cache; | ||
| 193 | SamplerCacheOpenGL sampler_cache; | 204 | SamplerCacheOpenGL sampler_cache; |
| 194 | FramebufferCacheOpenGL framebuffer_cache; | 205 | FramebufferCacheOpenGL framebuffer_cache; |
| 195 | 206 | ||
| @@ -208,6 +219,7 @@ private: | |||
| 208 | static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024; | 219 | static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024; |
| 209 | OGLBufferCache buffer_cache; | 220 | OGLBufferCache buffer_cache; |
| 210 | 221 | ||
| 222 | VertexArrayPushBuffer vertex_array_pushbuffer; | ||
| 211 | BindBuffersRangePushBuffer bind_ubo_pushbuffer{GL_UNIFORM_BUFFER}; | 223 | BindBuffersRangePushBuffer bind_ubo_pushbuffer{GL_UNIFORM_BUFFER}; |
| 212 | BindBuffersRangePushBuffer bind_ssbo_pushbuffer{GL_SHADER_STORAGE_BUFFER}; | 224 | BindBuffersRangePushBuffer bind_ssbo_pushbuffer{GL_SHADER_STORAGE_BUFFER}; |
| 213 | 225 | ||
| @@ -219,14 +231,19 @@ private: | |||
| 219 | GLuint SetupVertexFormat(); | 231 | GLuint SetupVertexFormat(); |
| 220 | 232 | ||
| 221 | void SetupVertexBuffer(GLuint vao); | 233 | void SetupVertexBuffer(GLuint vao); |
| 234 | void SetupVertexInstances(GLuint vao); | ||
| 222 | 235 | ||
| 223 | DrawParameters SetupDraw(); | 236 | GLintptr SetupIndexBuffer(); |
| 237 | |||
| 238 | DrawParameters SetupDraw(GLintptr index_buffer_offset); | ||
| 224 | 239 | ||
| 225 | void SetupShaders(GLenum primitive_mode); | 240 | void SetupShaders(GLenum primitive_mode); |
| 226 | 241 | ||
| 227 | enum class AccelDraw { Disabled, Arrays, Indexed }; | 242 | enum class AccelDraw { Disabled, Arrays, Indexed }; |
| 228 | AccelDraw accelerate_draw = AccelDraw::Disabled; | 243 | AccelDraw accelerate_draw = AccelDraw::Disabled; |
| 229 | 244 | ||
| 245 | OGLFramebuffer clear_framebuffer; | ||
| 246 | |||
| 230 | using CachedPageMap = boost::icl::interval_map<u64, int>; | 247 | using CachedPageMap = boost::icl::interval_map<u64, int>; |
| 231 | CachedPageMap cached_pages; | 248 | CachedPageMap cached_pages; |
| 232 | }; | 249 | }; |
diff --git a/src/video_core/renderer_opengl/gl_sampler_cache.h b/src/video_core/renderer_opengl/gl_sampler_cache.h index defbc2d81..34ee37f00 100644 --- a/src/video_core/renderer_opengl/gl_sampler_cache.h +++ b/src/video_core/renderer_opengl/gl_sampler_cache.h | |||
| @@ -17,9 +17,9 @@ public: | |||
| 17 | ~SamplerCacheOpenGL(); | 17 | ~SamplerCacheOpenGL(); |
| 18 | 18 | ||
| 19 | protected: | 19 | protected: |
| 20 | OGLSampler CreateSampler(const Tegra::Texture::TSCEntry& tsc) const; | 20 | OGLSampler CreateSampler(const Tegra::Texture::TSCEntry& tsc) const override; |
| 21 | 21 | ||
| 22 | GLuint ToSamplerType(const OGLSampler& sampler) const; | 22 | GLuint ToSamplerType(const OGLSampler& sampler) const override; |
| 23 | }; | 23 | }; |
| 24 | 24 | ||
| 25 | } // namespace OpenGL | 25 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index f9b2b03a0..cf6a5cddf 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp | |||
| @@ -23,13 +23,13 @@ namespace OpenGL { | |||
| 23 | 23 | ||
| 24 | using VideoCommon::Shader::ProgramCode; | 24 | using VideoCommon::Shader::ProgramCode; |
| 25 | 25 | ||
| 26 | // One UBO is always reserved for emulation values | 26 | // One UBO is always reserved for emulation values on staged shaders |
| 27 | constexpr u32 RESERVED_UBOS = 1; | 27 | constexpr u32 STAGE_RESERVED_UBOS = 1; |
| 28 | 28 | ||
| 29 | struct UnspecializedShader { | 29 | struct UnspecializedShader { |
| 30 | std::string code; | 30 | std::string code; |
| 31 | GLShader::ShaderEntries entries; | 31 | GLShader::ShaderEntries entries; |
| 32 | Maxwell::ShaderProgram program_type; | 32 | ProgramType program_type; |
| 33 | }; | 33 | }; |
| 34 | 34 | ||
| 35 | namespace { | 35 | namespace { |
| @@ -55,15 +55,17 @@ ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, const GPUVAddr g | |||
| 55 | } | 55 | } |
| 56 | 56 | ||
| 57 | /// Gets the shader type from a Maxwell program type | 57 | /// Gets the shader type from a Maxwell program type |
| 58 | constexpr GLenum GetShaderType(Maxwell::ShaderProgram program_type) { | 58 | constexpr GLenum GetShaderType(ProgramType program_type) { |
| 59 | switch (program_type) { | 59 | switch (program_type) { |
| 60 | case Maxwell::ShaderProgram::VertexA: | 60 | case ProgramType::VertexA: |
| 61 | case Maxwell::ShaderProgram::VertexB: | 61 | case ProgramType::VertexB: |
| 62 | return GL_VERTEX_SHADER; | 62 | return GL_VERTEX_SHADER; |
| 63 | case Maxwell::ShaderProgram::Geometry: | 63 | case ProgramType::Geometry: |
| 64 | return GL_GEOMETRY_SHADER; | 64 | return GL_GEOMETRY_SHADER; |
| 65 | case Maxwell::ShaderProgram::Fragment: | 65 | case ProgramType::Fragment: |
| 66 | return GL_FRAGMENT_SHADER; | 66 | return GL_FRAGMENT_SHADER; |
| 67 | case ProgramType::Compute: | ||
| 68 | return GL_COMPUTE_SHADER; | ||
| 67 | default: | 69 | default: |
| 68 | return GL_NONE; | 70 | return GL_NONE; |
| 69 | } | 71 | } |
| @@ -100,6 +102,25 @@ constexpr std::tuple<const char*, const char*, u32> GetPrimitiveDescription(GLen | |||
| 100 | } | 102 | } |
| 101 | } | 103 | } |
| 102 | 104 | ||
| 105 | ProgramType GetProgramType(Maxwell::ShaderProgram program) { | ||
| 106 | switch (program) { | ||
| 107 | case Maxwell::ShaderProgram::VertexA: | ||
| 108 | return ProgramType::VertexA; | ||
| 109 | case Maxwell::ShaderProgram::VertexB: | ||
| 110 | return ProgramType::VertexB; | ||
| 111 | case Maxwell::ShaderProgram::TesselationControl: | ||
| 112 | return ProgramType::TessellationControl; | ||
| 113 | case Maxwell::ShaderProgram::TesselationEval: | ||
| 114 | return ProgramType::TessellationEval; | ||
| 115 | case Maxwell::ShaderProgram::Geometry: | ||
| 116 | return ProgramType::Geometry; | ||
| 117 | case Maxwell::ShaderProgram::Fragment: | ||
| 118 | return ProgramType::Fragment; | ||
| 119 | } | ||
| 120 | UNREACHABLE(); | ||
| 121 | return {}; | ||
| 122 | } | ||
| 123 | |||
| 103 | /// Calculates the size of a program stream | 124 | /// Calculates the size of a program stream |
| 104 | std::size_t CalculateProgramSize(const GLShader::ProgramCode& program) { | 125 | std::size_t CalculateProgramSize(const GLShader::ProgramCode& program) { |
| 105 | constexpr std::size_t start_offset = 10; | 126 | constexpr std::size_t start_offset = 10; |
| @@ -128,11 +149,13 @@ std::size_t CalculateProgramSize(const GLShader::ProgramCode& program) { | |||
| 128 | } | 149 | } |
| 129 | 150 | ||
| 130 | /// Hashes one (or two) program streams | 151 | /// Hashes one (or two) program streams |
| 131 | u64 GetUniqueIdentifier(Maxwell::ShaderProgram program_type, const ProgramCode& code, | 152 | u64 GetUniqueIdentifier(ProgramType program_type, const ProgramCode& code, |
| 132 | const ProgramCode& code_b) { | 153 | const ProgramCode& code_b, std::size_t size_a = 0, std::size_t size_b = 0) { |
| 133 | u64 unique_identifier = | 154 | if (size_a == 0) { |
| 134 | Common::CityHash64(reinterpret_cast<const char*>(code.data()), CalculateProgramSize(code)); | 155 | size_a = CalculateProgramSize(code); |
| 135 | if (program_type != Maxwell::ShaderProgram::VertexA) { | 156 | } |
| 157 | u64 unique_identifier = Common::CityHash64(reinterpret_cast<const char*>(code.data()), size_a); | ||
| 158 | if (program_type != ProgramType::VertexA) { | ||
| 136 | return unique_identifier; | 159 | return unique_identifier; |
| 137 | } | 160 | } |
| 138 | // VertexA programs include two programs | 161 | // VertexA programs include two programs |
| @@ -140,50 +163,69 @@ u64 GetUniqueIdentifier(Maxwell::ShaderProgram program_type, const ProgramCode& | |||
| 140 | std::size_t seed = 0; | 163 | std::size_t seed = 0; |
| 141 | boost::hash_combine(seed, unique_identifier); | 164 | boost::hash_combine(seed, unique_identifier); |
| 142 | 165 | ||
| 143 | const u64 identifier_b = Common::CityHash64(reinterpret_cast<const char*>(code_b.data()), | 166 | if (size_b == 0) { |
| 144 | CalculateProgramSize(code_b)); | 167 | size_b = CalculateProgramSize(code_b); |
| 168 | } | ||
| 169 | const u64 identifier_b = | ||
| 170 | Common::CityHash64(reinterpret_cast<const char*>(code_b.data()), size_b); | ||
| 145 | boost::hash_combine(seed, identifier_b); | 171 | boost::hash_combine(seed, identifier_b); |
| 146 | return static_cast<u64>(seed); | 172 | return static_cast<u64>(seed); |
| 147 | } | 173 | } |
| 148 | 174 | ||
| 149 | /// Creates an unspecialized program from code streams | 175 | /// Creates an unspecialized program from code streams |
| 150 | GLShader::ProgramResult CreateProgram(const Device& device, Maxwell::ShaderProgram program_type, | 176 | GLShader::ProgramResult CreateProgram(const Device& device, ProgramType program_type, |
| 151 | ProgramCode program_code, ProgramCode program_code_b) { | 177 | ProgramCode program_code, ProgramCode program_code_b) { |
| 152 | GLShader::ShaderSetup setup(program_code); | 178 | GLShader::ShaderSetup setup(program_code); |
| 153 | if (program_type == Maxwell::ShaderProgram::VertexA) { | 179 | setup.program.size_a = CalculateProgramSize(program_code); |
| 180 | setup.program.size_b = 0; | ||
| 181 | if (program_type == ProgramType::VertexA) { | ||
| 154 | // VertexB is always enabled, so when VertexA is enabled, we have two vertex shaders. | 182 | // VertexB is always enabled, so when VertexA is enabled, we have two vertex shaders. |
| 155 | // Conventional HW does not support this, so we combine VertexA and VertexB into one | 183 | // Conventional HW does not support this, so we combine VertexA and VertexB into one |
| 156 | // stage here. | 184 | // stage here. |
| 157 | setup.SetProgramB(program_code_b); | 185 | setup.SetProgramB(program_code_b); |
| 186 | setup.program.size_b = CalculateProgramSize(program_code_b); | ||
| 158 | } | 187 | } |
| 159 | setup.program.unique_identifier = | 188 | setup.program.unique_identifier = GetUniqueIdentifier( |
| 160 | GetUniqueIdentifier(program_type, program_code, program_code_b); | 189 | program_type, program_code, program_code_b, setup.program.size_a, setup.program.size_b); |
| 161 | 190 | ||
| 162 | switch (program_type) { | 191 | switch (program_type) { |
| 163 | case Maxwell::ShaderProgram::VertexA: | 192 | case ProgramType::VertexA: |
| 164 | case Maxwell::ShaderProgram::VertexB: | 193 | case ProgramType::VertexB: |
| 165 | return GLShader::GenerateVertexShader(device, setup); | 194 | return GLShader::GenerateVertexShader(device, setup); |
| 166 | case Maxwell::ShaderProgram::Geometry: | 195 | case ProgramType::Geometry: |
| 167 | return GLShader::GenerateGeometryShader(device, setup); | 196 | return GLShader::GenerateGeometryShader(device, setup); |
| 168 | case Maxwell::ShaderProgram::Fragment: | 197 | case ProgramType::Fragment: |
| 169 | return GLShader::GenerateFragmentShader(device, setup); | 198 | return GLShader::GenerateFragmentShader(device, setup); |
| 199 | case ProgramType::Compute: | ||
| 200 | return GLShader::GenerateComputeShader(device, setup); | ||
| 170 | default: | 201 | default: |
| 171 | LOG_CRITICAL(HW_GPU, "Unimplemented program_type={}", static_cast<u32>(program_type)); | 202 | UNIMPLEMENTED_MSG("Unimplemented program_type={}", static_cast<u32>(program_type)); |
| 172 | UNREACHABLE(); | ||
| 173 | return {}; | 203 | return {}; |
| 174 | } | 204 | } |
| 175 | } | 205 | } |
| 176 | 206 | ||
| 177 | CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEntries& entries, | 207 | CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEntries& entries, |
| 178 | Maxwell::ShaderProgram program_type, const ProgramVariant& variant, | 208 | ProgramType program_type, const ProgramVariant& variant, |
| 179 | bool hint_retrievable = false) { | 209 | bool hint_retrievable = false) { |
| 180 | auto base_bindings{variant.base_bindings}; | 210 | auto base_bindings{variant.base_bindings}; |
| 181 | const auto primitive_mode{variant.primitive_mode}; | 211 | const auto primitive_mode{variant.primitive_mode}; |
| 182 | const auto texture_buffer_usage{variant.texture_buffer_usage}; | 212 | const auto texture_buffer_usage{variant.texture_buffer_usage}; |
| 183 | 213 | ||
| 184 | std::string source = "#version 430 core\n" | 214 | std::string source = "#version 430 core\n" |
| 185 | "#extension GL_ARB_separate_shader_objects : enable\n\n"; | 215 | "#extension GL_ARB_separate_shader_objects : enable\n" |
| 186 | source += fmt::format("#define EMULATION_UBO_BINDING {}\n", base_bindings.cbuf++); | 216 | "#extension GL_NV_gpu_shader5 : enable\n" |
| 217 | "#extension GL_NV_shader_thread_group : enable\n"; | ||
| 218 | if (entries.shader_viewport_layer_array) { | ||
| 219 | source += "#extension GL_ARB_shader_viewport_layer_array : enable\n"; | ||
| 220 | } | ||
| 221 | if (program_type == ProgramType::Compute) { | ||
| 222 | source += "#extension GL_ARB_compute_variable_group_size : require\n"; | ||
| 223 | } | ||
| 224 | source += '\n'; | ||
| 225 | |||
| 226 | if (program_type != ProgramType::Compute) { | ||
| 227 | source += fmt::format("#define EMULATION_UBO_BINDING {}\n", base_bindings.cbuf++); | ||
| 228 | } | ||
| 187 | 229 | ||
| 188 | for (const auto& cbuf : entries.const_buffers) { | 230 | for (const auto& cbuf : entries.const_buffers) { |
| 189 | source += | 231 | source += |
| @@ -207,17 +249,24 @@ CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEn | |||
| 207 | if (!texture_buffer_usage.test(i)) { | 249 | if (!texture_buffer_usage.test(i)) { |
| 208 | continue; | 250 | continue; |
| 209 | } | 251 | } |
| 210 | source += fmt::format("#define SAMPLER_{}_IS_BUFFER", i); | 252 | source += fmt::format("#define SAMPLER_{}_IS_BUFFER\n", i); |
| 253 | } | ||
| 254 | if (texture_buffer_usage.any()) { | ||
| 255 | source += '\n'; | ||
| 211 | } | 256 | } |
| 212 | 257 | ||
| 213 | if (program_type == Maxwell::ShaderProgram::Geometry) { | 258 | if (program_type == ProgramType::Geometry) { |
| 214 | const auto [glsl_topology, debug_name, max_vertices] = | 259 | const auto [glsl_topology, debug_name, max_vertices] = |
| 215 | GetPrimitiveDescription(primitive_mode); | 260 | GetPrimitiveDescription(primitive_mode); |
| 216 | 261 | ||
| 217 | source += "layout (" + std::string(glsl_topology) + ") in;\n"; | 262 | source += "layout (" + std::string(glsl_topology) + ") in;\n\n"; |
| 218 | source += "#define MAX_VERTEX_INPUT " + std::to_string(max_vertices) + '\n'; | 263 | source += "#define MAX_VERTEX_INPUT " + std::to_string(max_vertices) + '\n'; |
| 219 | } | 264 | } |
| 265 | if (program_type == ProgramType::Compute) { | ||
| 266 | source += "layout (local_size_variable) in;\n"; | ||
| 267 | } | ||
| 220 | 268 | ||
| 269 | source += '\n'; | ||
| 221 | source += code; | 270 | source += code; |
| 222 | 271 | ||
| 223 | OGLShader shader; | 272 | OGLShader shader; |
| @@ -244,9 +293,9 @@ std::set<GLenum> GetSupportedFormats() { | |||
| 244 | 293 | ||
| 245 | } // Anonymous namespace | 294 | } // Anonymous namespace |
| 246 | 295 | ||
| 247 | CachedShader::CachedShader(const ShaderParameters& params, Maxwell::ShaderProgram program_type, | 296 | CachedShader::CachedShader(const ShaderParameters& params, ProgramType program_type, |
| 248 | GLShader::ProgramResult result) | 297 | GLShader::ProgramResult result) |
| 249 | : RasterizerCacheObject{params.host_ptr}, host_ptr{params.host_ptr}, cpu_addr{params.cpu_addr}, | 298 | : RasterizerCacheObject{params.host_ptr}, cpu_addr{params.cpu_addr}, |
| 250 | unique_identifier{params.unique_identifier}, program_type{program_type}, | 299 | unique_identifier{params.unique_identifier}, program_type{program_type}, |
| 251 | disk_cache{params.disk_cache}, precompiled_programs{params.precompiled_programs}, | 300 | disk_cache{params.disk_cache}, precompiled_programs{params.precompiled_programs}, |
| 252 | entries{result.second}, code{std::move(result.first)}, shader_length{entries.shader_length} {} | 301 | entries{result.second}, code{std::move(result.first)}, shader_length{entries.shader_length} {} |
| @@ -257,29 +306,50 @@ Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params, | |||
| 257 | ProgramCode&& program_code_b) { | 306 | ProgramCode&& program_code_b) { |
| 258 | const auto code_size{CalculateProgramSize(program_code)}; | 307 | const auto code_size{CalculateProgramSize(program_code)}; |
| 259 | const auto code_size_b{CalculateProgramSize(program_code_b)}; | 308 | const auto code_size_b{CalculateProgramSize(program_code_b)}; |
| 260 | auto result{CreateProgram(params.device, program_type, program_code, program_code_b)}; | 309 | auto result{ |
| 310 | CreateProgram(params.device, GetProgramType(program_type), program_code, program_code_b)}; | ||
| 261 | if (result.first.empty()) { | 311 | if (result.first.empty()) { |
| 262 | // TODO(Rodrigo): Unimplemented shader stages hit here, avoid using these for now | 312 | // TODO(Rodrigo): Unimplemented shader stages hit here, avoid using these for now |
| 263 | return {}; | 313 | return {}; |
| 264 | } | 314 | } |
| 265 | 315 | ||
| 266 | params.disk_cache.SaveRaw(ShaderDiskCacheRaw( | 316 | params.disk_cache.SaveRaw(ShaderDiskCacheRaw( |
| 267 | params.unique_identifier, program_type, static_cast<u32>(code_size / sizeof(u64)), | 317 | params.unique_identifier, GetProgramType(program_type), |
| 268 | static_cast<u32>(code_size_b / sizeof(u64)), std::move(program_code), | 318 | static_cast<u32>(code_size / sizeof(u64)), static_cast<u32>(code_size_b / sizeof(u64)), |
| 269 | std::move(program_code_b))); | 319 | std::move(program_code), std::move(program_code_b))); |
| 270 | 320 | ||
| 271 | return std::shared_ptr<CachedShader>(new CachedShader(params, program_type, std::move(result))); | 321 | return std::shared_ptr<CachedShader>( |
| 322 | new CachedShader(params, GetProgramType(program_type), std::move(result))); | ||
| 272 | } | 323 | } |
| 273 | 324 | ||
| 274 | Shader CachedShader::CreateStageFromCache(const ShaderParameters& params, | 325 | Shader CachedShader::CreateStageFromCache(const ShaderParameters& params, |
| 275 | Maxwell::ShaderProgram program_type, | 326 | Maxwell::ShaderProgram program_type, |
| 276 | GLShader::ProgramResult result) { | 327 | GLShader::ProgramResult result) { |
| 277 | return std::shared_ptr<CachedShader>(new CachedShader(params, program_type, std::move(result))); | 328 | return std::shared_ptr<CachedShader>( |
| 329 | new CachedShader(params, GetProgramType(program_type), std::move(result))); | ||
| 330 | } | ||
| 331 | |||
| 332 | Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode&& code) { | ||
| 333 | auto result{CreateProgram(params.device, ProgramType::Compute, code, {})}; | ||
| 334 | |||
| 335 | const auto code_size{CalculateProgramSize(code)}; | ||
| 336 | params.disk_cache.SaveRaw(ShaderDiskCacheRaw(params.unique_identifier, ProgramType::Compute, | ||
| 337 | static_cast<u32>(code_size / sizeof(u64)), 0, | ||
| 338 | std::move(code), {})); | ||
| 339 | |||
| 340 | return std::shared_ptr<CachedShader>( | ||
| 341 | new CachedShader(params, ProgramType::Compute, std::move(result))); | ||
| 342 | } | ||
| 343 | |||
| 344 | Shader CachedShader::CreateKernelFromCache(const ShaderParameters& params, | ||
| 345 | GLShader::ProgramResult result) { | ||
| 346 | return std::shared_ptr<CachedShader>( | ||
| 347 | new CachedShader(params, ProgramType::Compute, std::move(result))); | ||
| 278 | } | 348 | } |
| 279 | 349 | ||
| 280 | std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(const ProgramVariant& variant) { | 350 | std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(const ProgramVariant& variant) { |
| 281 | GLuint handle{}; | 351 | GLuint handle{}; |
| 282 | if (program_type == Maxwell::ShaderProgram::Geometry) { | 352 | if (program_type == ProgramType::Geometry) { |
| 283 | handle = GetGeometryShader(variant); | 353 | handle = GetGeometryShader(variant); |
| 284 | } else { | 354 | } else { |
| 285 | const auto [entry, is_cache_miss] = programs.try_emplace(variant); | 355 | const auto [entry, is_cache_miss] = programs.try_emplace(variant); |
| @@ -297,8 +367,11 @@ std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(const ProgramVar | |||
| 297 | handle = program->handle; | 367 | handle = program->handle; |
| 298 | } | 368 | } |
| 299 | 369 | ||
| 300 | auto base_bindings{variant.base_bindings}; | 370 | auto base_bindings = variant.base_bindings; |
| 301 | base_bindings.cbuf += static_cast<u32>(entries.const_buffers.size()) + RESERVED_UBOS; | 371 | base_bindings.cbuf += static_cast<u32>(entries.const_buffers.size()); |
| 372 | if (program_type != ProgramType::Compute) { | ||
| 373 | base_bindings.cbuf += STAGE_RESERVED_UBOS; | ||
| 374 | } | ||
| 302 | base_bindings.gmem += static_cast<u32>(entries.global_memory_entries.size()); | 375 | base_bindings.gmem += static_cast<u32>(entries.global_memory_entries.size()); |
| 303 | base_bindings.sampler += static_cast<u32>(entries.samplers.size()); | 376 | base_bindings.sampler += static_cast<u32>(entries.samplers.size()); |
| 304 | 377 | ||
| @@ -561,7 +634,7 @@ std::unordered_map<u64, UnspecializedShader> ShaderCacheOpenGL::GenerateUnspecia | |||
| 561 | } | 634 | } |
| 562 | 635 | ||
| 563 | Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { | 636 | Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { |
| 564 | if (!system.GPU().Maxwell3D().dirty_flags.shaders) { | 637 | if (!system.GPU().Maxwell3D().dirty.shaders) { |
| 565 | return last_shaders[static_cast<std::size_t>(program)]; | 638 | return last_shaders[static_cast<std::size_t>(program)]; |
| 566 | } | 639 | } |
| 567 | 640 | ||
| @@ -578,13 +651,15 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { | |||
| 578 | // No shader found - create a new one | 651 | // No shader found - create a new one |
| 579 | ProgramCode program_code{GetShaderCode(memory_manager, program_addr, host_ptr)}; | 652 | ProgramCode program_code{GetShaderCode(memory_manager, program_addr, host_ptr)}; |
| 580 | ProgramCode program_code_b; | 653 | ProgramCode program_code_b; |
| 581 | if (program == Maxwell::ShaderProgram::VertexA) { | 654 | const bool is_program_a{program == Maxwell::ShaderProgram::VertexA}; |
| 655 | if (is_program_a) { | ||
| 582 | const GPUVAddr program_addr_b{GetShaderAddress(system, Maxwell::ShaderProgram::VertexB)}; | 656 | const GPUVAddr program_addr_b{GetShaderAddress(system, Maxwell::ShaderProgram::VertexB)}; |
| 583 | program_code_b = GetShaderCode(memory_manager, program_addr_b, | 657 | program_code_b = GetShaderCode(memory_manager, program_addr_b, |
| 584 | memory_manager.GetPointer(program_addr_b)); | 658 | memory_manager.GetPointer(program_addr_b)); |
| 585 | } | 659 | } |
| 586 | 660 | ||
| 587 | const auto unique_identifier = GetUniqueIdentifier(program, program_code, program_code_b); | 661 | const auto unique_identifier = |
| 662 | GetUniqueIdentifier(GetProgramType(program), program_code, program_code_b); | ||
| 588 | const auto cpu_addr{*memory_manager.GpuToCpuAddress(program_addr)}; | 663 | const auto cpu_addr{*memory_manager.GpuToCpuAddress(program_addr)}; |
| 589 | const ShaderParameters params{disk_cache, precompiled_programs, device, cpu_addr, | 664 | const ShaderParameters params{disk_cache, precompiled_programs, device, cpu_addr, |
| 590 | host_ptr, unique_identifier}; | 665 | host_ptr, unique_identifier}; |
| @@ -601,4 +676,30 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { | |||
| 601 | return last_shaders[static_cast<std::size_t>(program)] = shader; | 676 | return last_shaders[static_cast<std::size_t>(program)] = shader; |
| 602 | } | 677 | } |
| 603 | 678 | ||
| 679 | Shader ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) { | ||
| 680 | auto& memory_manager{system.GPU().MemoryManager()}; | ||
| 681 | const auto host_ptr{memory_manager.GetPointer(code_addr)}; | ||
| 682 | auto kernel = TryGet(host_ptr); | ||
| 683 | if (kernel) { | ||
| 684 | return kernel; | ||
| 685 | } | ||
| 686 | |||
| 687 | // No kernel found - create a new one | ||
| 688 | auto code{GetShaderCode(memory_manager, code_addr, host_ptr)}; | ||
| 689 | const auto unique_identifier{GetUniqueIdentifier(ProgramType::Compute, code, {})}; | ||
| 690 | const auto cpu_addr{*memory_manager.GpuToCpuAddress(code_addr)}; | ||
| 691 | const ShaderParameters params{disk_cache, precompiled_programs, device, cpu_addr, | ||
| 692 | host_ptr, unique_identifier}; | ||
| 693 | |||
| 694 | const auto found = precompiled_shaders.find(unique_identifier); | ||
| 695 | if (found == precompiled_shaders.end()) { | ||
| 696 | kernel = CachedShader::CreateKernelFromMemory(params, std::move(code)); | ||
| 697 | } else { | ||
| 698 | kernel = CachedShader::CreateKernelFromCache(params, found->second); | ||
| 699 | } | ||
| 700 | |||
| 701 | Register(kernel); | ||
| 702 | return kernel; | ||
| 703 | } | ||
| 704 | |||
| 604 | } // namespace OpenGL | 705 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index bbb53cdf4..2c8faf855 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h | |||
| @@ -61,6 +61,11 @@ public: | |||
| 61 | Maxwell::ShaderProgram program_type, | 61 | Maxwell::ShaderProgram program_type, |
| 62 | GLShader::ProgramResult result); | 62 | GLShader::ProgramResult result); |
| 63 | 63 | ||
| 64 | static Shader CreateKernelFromMemory(const ShaderParameters& params, ProgramCode&& code); | ||
| 65 | |||
| 66 | static Shader CreateKernelFromCache(const ShaderParameters& params, | ||
| 67 | GLShader::ProgramResult result); | ||
| 68 | |||
| 64 | VAddr GetCpuAddr() const override { | 69 | VAddr GetCpuAddr() const override { |
| 65 | return cpu_addr; | 70 | return cpu_addr; |
| 66 | } | 71 | } |
| @@ -78,7 +83,7 @@ public: | |||
| 78 | std::tuple<GLuint, BaseBindings> GetProgramHandle(const ProgramVariant& variant); | 83 | std::tuple<GLuint, BaseBindings> GetProgramHandle(const ProgramVariant& variant); |
| 79 | 84 | ||
| 80 | private: | 85 | private: |
| 81 | explicit CachedShader(const ShaderParameters& params, Maxwell::ShaderProgram program_type, | 86 | explicit CachedShader(const ShaderParameters& params, ProgramType program_type, |
| 82 | GLShader::ProgramResult result); | 87 | GLShader::ProgramResult result); |
| 83 | 88 | ||
| 84 | // Geometry programs. These are needed because GLSL needs an input topology but it's not | 89 | // Geometry programs. These are needed because GLSL needs an input topology but it's not |
| @@ -101,10 +106,9 @@ private: | |||
| 101 | 106 | ||
| 102 | ShaderDiskCacheUsage GetUsage(const ProgramVariant& variant) const; | 107 | ShaderDiskCacheUsage GetUsage(const ProgramVariant& variant) const; |
| 103 | 108 | ||
| 104 | u8* host_ptr{}; | ||
| 105 | VAddr cpu_addr{}; | 109 | VAddr cpu_addr{}; |
| 106 | u64 unique_identifier{}; | 110 | u64 unique_identifier{}; |
| 107 | Maxwell::ShaderProgram program_type{}; | 111 | ProgramType program_type{}; |
| 108 | ShaderDiskCacheOpenGL& disk_cache; | 112 | ShaderDiskCacheOpenGL& disk_cache; |
| 109 | const PrecompiledPrograms& precompiled_programs; | 113 | const PrecompiledPrograms& precompiled_programs; |
| 110 | 114 | ||
| @@ -132,6 +136,9 @@ public: | |||
| 132 | /// Gets the current specified shader stage program | 136 | /// Gets the current specified shader stage program |
| 133 | Shader GetStageProgram(Maxwell::ShaderProgram program); | 137 | Shader GetStageProgram(Maxwell::ShaderProgram program); |
| 134 | 138 | ||
| 139 | /// Gets a compute kernel in the passed address | ||
| 140 | Shader GetComputeKernel(GPUVAddr code_addr); | ||
| 141 | |||
| 135 | protected: | 142 | protected: |
| 136 | // We do not have to flush this cache as things in it are never modified by us. | 143 | // We do not have to flush this cache as things in it are never modified by us. |
| 137 | void FlushObjectInner(const Shader& object) override {} | 144 | void FlushObjectInner(const Shader& object) override {} |
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 5f2f1510c..359d58cbe 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp | |||
| @@ -14,6 +14,7 @@ | |||
| 14 | #include "common/alignment.h" | 14 | #include "common/alignment.h" |
| 15 | #include "common/assert.h" | 15 | #include "common/assert.h" |
| 16 | #include "common/common_types.h" | 16 | #include "common/common_types.h" |
| 17 | #include "common/logging/log.h" | ||
| 17 | #include "video_core/engines/maxwell_3d.h" | 18 | #include "video_core/engines/maxwell_3d.h" |
| 18 | #include "video_core/renderer_opengl/gl_device.h" | 19 | #include "video_core/renderer_opengl/gl_device.h" |
| 19 | #include "video_core/renderer_opengl/gl_rasterizer.h" | 20 | #include "video_core/renderer_opengl/gl_rasterizer.h" |
| @@ -36,7 +37,6 @@ using namespace std::string_literals; | |||
| 36 | using namespace VideoCommon::Shader; | 37 | using namespace VideoCommon::Shader; |
| 37 | 38 | ||
| 38 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | 39 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; |
| 39 | using ShaderStage = Tegra::Engines::Maxwell3D::Regs::ShaderStage; | ||
| 40 | using Operation = const OperationNode&; | 40 | using Operation = const OperationNode&; |
| 41 | 41 | ||
| 42 | enum class Type { Bool, Bool2, Float, Int, Uint, HalfFloat }; | 42 | enum class Type { Bool, Bool2, Float, Int, Uint, HalfFloat }; |
| @@ -46,7 +46,7 @@ using TextureArgument = std::pair<Type, Node>; | |||
| 46 | using TextureIR = std::variant<TextureAoffi, TextureArgument>; | 46 | using TextureIR = std::variant<TextureAoffi, TextureArgument>; |
| 47 | 47 | ||
| 48 | constexpr u32 MAX_CONSTBUFFER_ELEMENTS = | 48 | constexpr u32 MAX_CONSTBUFFER_ELEMENTS = |
| 49 | static_cast<u32>(RasterizerOpenGL::MaxConstbufferSize) / (4 * sizeof(float)); | 49 | static_cast<u32>(Maxwell::MaxConstBufferSize) / (4 * sizeof(float)); |
| 50 | 50 | ||
| 51 | class ShaderWriter { | 51 | class ShaderWriter { |
| 52 | public: | 52 | public: |
| @@ -161,9 +161,13 @@ std::string FlowStackTopName(MetaStackClass stack) { | |||
| 161 | return fmt::format("{}_flow_stack_top", GetFlowStackPrefix(stack)); | 161 | return fmt::format("{}_flow_stack_top", GetFlowStackPrefix(stack)); |
| 162 | } | 162 | } |
| 163 | 163 | ||
| 164 | constexpr bool IsVertexShader(ProgramType stage) { | ||
| 165 | return stage == ProgramType::VertexA || stage == ProgramType::VertexB; | ||
| 166 | } | ||
| 167 | |||
| 164 | class GLSLDecompiler final { | 168 | class GLSLDecompiler final { |
| 165 | public: | 169 | public: |
| 166 | explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, ShaderStage stage, | 170 | explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, ProgramType stage, |
| 167 | std::string suffix) | 171 | std::string suffix) |
| 168 | : device{device}, ir{ir}, stage{stage}, suffix{suffix}, header{ir.GetHeader()} {} | 172 | : device{device}, ir{ir}, stage{stage}, suffix{suffix}, header{ir.GetHeader()} {} |
| 169 | 173 | ||
| @@ -191,10 +195,12 @@ public: | |||
| 191 | 195 | ||
| 192 | // TODO(Subv): Figure out the actual depth of the flow stack, for now it seems | 196 | // TODO(Subv): Figure out the actual depth of the flow stack, for now it seems |
| 193 | // unlikely that shaders will use 20 nested SSYs and PBKs. | 197 | // unlikely that shaders will use 20 nested SSYs and PBKs. |
| 194 | constexpr u32 FLOW_STACK_SIZE = 20; | 198 | if (!ir.IsFlowStackDisabled()) { |
| 195 | for (const auto stack : std::array{MetaStackClass::Ssy, MetaStackClass::Pbk}) { | 199 | constexpr u32 FLOW_STACK_SIZE = 20; |
| 196 | code.AddLine("uint {}[{}];", FlowStackName(stack), FLOW_STACK_SIZE); | 200 | for (const auto stack : std::array{MetaStackClass::Ssy, MetaStackClass::Pbk}) { |
| 197 | code.AddLine("uint {} = 0u;", FlowStackTopName(stack)); | 201 | code.AddLine("uint {}[{}];", FlowStackName(stack), FLOW_STACK_SIZE); |
| 202 | code.AddLine("uint {} = 0u;", FlowStackTopName(stack)); | ||
| 203 | } | ||
| 198 | } | 204 | } |
| 199 | 205 | ||
| 200 | code.AddLine("while (true) {{"); | 206 | code.AddLine("while (true) {{"); |
| @@ -244,24 +250,22 @@ public: | |||
| 244 | usage.is_read, usage.is_written); | 250 | usage.is_read, usage.is_written); |
| 245 | } | 251 | } |
| 246 | entries.clip_distances = ir.GetClipDistances(); | 252 | entries.clip_distances = ir.GetClipDistances(); |
| 253 | entries.shader_viewport_layer_array = | ||
| 254 | IsVertexShader(stage) && (ir.UsesLayer() || ir.UsesViewportIndex()); | ||
| 247 | entries.shader_length = ir.GetLength(); | 255 | entries.shader_length = ir.GetLength(); |
| 248 | return entries; | 256 | return entries; |
| 249 | } | 257 | } |
| 250 | 258 | ||
| 251 | private: | 259 | private: |
| 252 | using OperationDecompilerFn = std::string (GLSLDecompiler::*)(Operation); | ||
| 253 | using OperationDecompilersArray = | ||
| 254 | std::array<OperationDecompilerFn, static_cast<std::size_t>(OperationCode::Amount)>; | ||
| 255 | |||
| 256 | void DeclareVertex() { | 260 | void DeclareVertex() { |
| 257 | if (stage != ShaderStage::Vertex) | 261 | if (!IsVertexShader(stage)) |
| 258 | return; | 262 | return; |
| 259 | 263 | ||
| 260 | DeclareVertexRedeclarations(); | 264 | DeclareVertexRedeclarations(); |
| 261 | } | 265 | } |
| 262 | 266 | ||
| 263 | void DeclareGeometry() { | 267 | void DeclareGeometry() { |
| 264 | if (stage != ShaderStage::Geometry) { | 268 | if (stage != ProgramType::Geometry) { |
| 265 | return; | 269 | return; |
| 266 | } | 270 | } |
| 267 | 271 | ||
| @@ -280,22 +284,35 @@ private: | |||
| 280 | } | 284 | } |
| 281 | 285 | ||
| 282 | void DeclareVertexRedeclarations() { | 286 | void DeclareVertexRedeclarations() { |
| 283 | bool clip_distances_declared = false; | ||
| 284 | |||
| 285 | code.AddLine("out gl_PerVertex {{"); | 287 | code.AddLine("out gl_PerVertex {{"); |
| 286 | ++code.scope; | 288 | ++code.scope; |
| 287 | 289 | ||
| 288 | code.AddLine("vec4 gl_Position;"); | 290 | code.AddLine("vec4 gl_Position;"); |
| 289 | 291 | ||
| 290 | for (const auto o : ir.GetOutputAttributes()) { | 292 | for (const auto attribute : ir.GetOutputAttributes()) { |
| 291 | if (o == Attribute::Index::PointSize) | 293 | if (attribute == Attribute::Index::ClipDistances0123 || |
| 292 | code.AddLine("float gl_PointSize;"); | 294 | attribute == Attribute::Index::ClipDistances4567) { |
| 293 | if (!clip_distances_declared && (o == Attribute::Index::ClipDistances0123 || | ||
| 294 | o == Attribute::Index::ClipDistances4567)) { | ||
| 295 | code.AddLine("float gl_ClipDistance[];"); | 295 | code.AddLine("float gl_ClipDistance[];"); |
| 296 | clip_distances_declared = true; | 296 | break; |
| 297 | } | 297 | } |
| 298 | } | 298 | } |
| 299 | if (!IsVertexShader(stage) || device.HasVertexViewportLayer()) { | ||
| 300 | if (ir.UsesLayer()) { | ||
| 301 | code.AddLine("int gl_Layer;"); | ||
| 302 | } | ||
| 303 | if (ir.UsesViewportIndex()) { | ||
| 304 | code.AddLine("int gl_ViewportIndex;"); | ||
| 305 | } | ||
| 306 | } else if ((ir.UsesLayer() || ir.UsesViewportIndex()) && IsVertexShader(stage) && | ||
| 307 | !device.HasVertexViewportLayer()) { | ||
| 308 | LOG_ERROR( | ||
| 309 | Render_OpenGL, | ||
| 310 | "GL_ARB_shader_viewport_layer_array is not available and its required by a shader"); | ||
| 311 | } | ||
| 312 | |||
| 313 | if (ir.UsesPointSize()) { | ||
| 314 | code.AddLine("float gl_PointSize;"); | ||
| 315 | } | ||
| 299 | 316 | ||
| 300 | --code.scope; | 317 | --code.scope; |
| 301 | code.AddLine("}};"); | 318 | code.AddLine("}};"); |
| @@ -323,11 +340,16 @@ private: | |||
| 323 | } | 340 | } |
| 324 | 341 | ||
| 325 | void DeclareLocalMemory() { | 342 | void DeclareLocalMemory() { |
| 326 | if (const u64 local_memory_size = header.GetLocalMemorySize(); local_memory_size > 0) { | 343 | // TODO(Rodrigo): Unstub kernel local memory size and pass it from a register at |
| 327 | const auto element_count = Common::AlignUp(local_memory_size, 4) / 4; | 344 | // specialization time. |
| 328 | code.AddLine("float {}[{}];", GetLocalMemory(), element_count); | 345 | const u64 local_memory_size = |
| 329 | code.AddNewLine(); | 346 | stage == ProgramType::Compute ? 0x400 : header.GetLocalMemorySize(); |
| 347 | if (local_memory_size == 0) { | ||
| 348 | return; | ||
| 330 | } | 349 | } |
| 350 | const auto element_count = Common::AlignUp(local_memory_size, 4) / 4; | ||
| 351 | code.AddLine("float {}[{}];", GetLocalMemory(), element_count); | ||
| 352 | code.AddNewLine(); | ||
| 331 | } | 353 | } |
| 332 | 354 | ||
| 333 | void DeclareInternalFlags() { | 355 | void DeclareInternalFlags() { |
| @@ -381,12 +403,12 @@ private: | |||
| 381 | const u32 location{GetGenericAttributeIndex(index)}; | 403 | const u32 location{GetGenericAttributeIndex(index)}; |
| 382 | 404 | ||
| 383 | std::string name{GetInputAttribute(index)}; | 405 | std::string name{GetInputAttribute(index)}; |
| 384 | if (stage == ShaderStage::Geometry) { | 406 | if (stage == ProgramType::Geometry) { |
| 385 | name = "gs_" + name + "[]"; | 407 | name = "gs_" + name + "[]"; |
| 386 | } | 408 | } |
| 387 | 409 | ||
| 388 | std::string suffix; | 410 | std::string suffix; |
| 389 | if (stage == ShaderStage::Fragment) { | 411 | if (stage == ProgramType::Fragment) { |
| 390 | const auto input_mode{header.ps.GetAttributeUse(location)}; | 412 | const auto input_mode{header.ps.GetAttributeUse(location)}; |
| 391 | if (skip_unused && input_mode == AttributeUse::Unused) { | 413 | if (skip_unused && input_mode == AttributeUse::Unused) { |
| 392 | return; | 414 | return; |
| @@ -398,7 +420,7 @@ private: | |||
| 398 | } | 420 | } |
| 399 | 421 | ||
| 400 | void DeclareOutputAttributes() { | 422 | void DeclareOutputAttributes() { |
| 401 | if (ir.HasPhysicalAttributes() && stage != ShaderStage::Fragment) { | 423 | if (ir.HasPhysicalAttributes() && stage != ProgramType::Fragment) { |
| 402 | for (u32 i = 0; i < GetNumPhysicalVaryings(); ++i) { | 424 | for (u32 i = 0; i < GetNumPhysicalVaryings(); ++i) { |
| 403 | DeclareOutputAttribute(ToGenericAttribute(i)); | 425 | DeclareOutputAttribute(ToGenericAttribute(i)); |
| 404 | } | 426 | } |
| @@ -520,7 +542,7 @@ private: | |||
| 520 | constexpr u32 element_stride{4}; | 542 | constexpr u32 element_stride{4}; |
| 521 | const u32 address{generic_base + index * generic_stride + element * element_stride}; | 543 | const u32 address{generic_base + index * generic_stride + element * element_stride}; |
| 522 | 544 | ||
| 523 | const bool declared{stage != ShaderStage::Fragment || | 545 | const bool declared{stage != ProgramType::Fragment || |
| 524 | header.ps.GetAttributeUse(index) != AttributeUse::Unused}; | 546 | header.ps.GetAttributeUse(index) != AttributeUse::Unused}; |
| 525 | const std::string value{declared ? ReadAttribute(attribute, element) : "0"}; | 547 | const std::string value{declared ? ReadAttribute(attribute, element) : "0"}; |
| 526 | code.AddLine("case 0x{:x}: return {};", address, value); | 548 | code.AddLine("case 0x{:x}: return {};", address, value); |
| @@ -543,7 +565,7 @@ private: | |||
| 543 | case Tegra::Shader::ImageType::Texture1D: | 565 | case Tegra::Shader::ImageType::Texture1D: |
| 544 | return "image1D"; | 566 | return "image1D"; |
| 545 | case Tegra::Shader::ImageType::TextureBuffer: | 567 | case Tegra::Shader::ImageType::TextureBuffer: |
| 546 | return "bufferImage"; | 568 | return "imageBuffer"; |
| 547 | case Tegra::Shader::ImageType::Texture1DArray: | 569 | case Tegra::Shader::ImageType::Texture1DArray: |
| 548 | return "image1DArray"; | 570 | return "image1DArray"; |
| 549 | case Tegra::Shader::ImageType::Texture2D: | 571 | case Tegra::Shader::ImageType::Texture2D: |
| @@ -624,7 +646,7 @@ private: | |||
| 624 | } | 646 | } |
| 625 | 647 | ||
| 626 | if (const auto abuf = std::get_if<AbufNode>(&*node)) { | 648 | if (const auto abuf = std::get_if<AbufNode>(&*node)) { |
| 627 | UNIMPLEMENTED_IF_MSG(abuf->IsPhysicalBuffer() && stage == ShaderStage::Geometry, | 649 | UNIMPLEMENTED_IF_MSG(abuf->IsPhysicalBuffer() && stage == ProgramType::Geometry, |
| 628 | "Physical attributes in geometry shaders are not implemented"); | 650 | "Physical attributes in geometry shaders are not implemented"); |
| 629 | if (abuf->IsPhysicalBuffer()) { | 651 | if (abuf->IsPhysicalBuffer()) { |
| 630 | return fmt::format("readPhysicalAttribute(ftou({}))", | 652 | return fmt::format("readPhysicalAttribute(ftou({}))", |
| @@ -679,6 +701,9 @@ private: | |||
| 679 | } | 701 | } |
| 680 | 702 | ||
| 681 | if (const auto lmem = std::get_if<LmemNode>(&*node)) { | 703 | if (const auto lmem = std::get_if<LmemNode>(&*node)) { |
| 704 | if (stage == ProgramType::Compute) { | ||
| 705 | LOG_WARNING(Render_OpenGL, "Local memory is stubbed on compute shaders"); | ||
| 706 | } | ||
| 682 | return fmt::format("{}[ftou({}) / 4]", GetLocalMemory(), Visit(lmem->GetAddress())); | 707 | return fmt::format("{}[ftou({}) / 4]", GetLocalMemory(), Visit(lmem->GetAddress())); |
| 683 | } | 708 | } |
| 684 | 709 | ||
| @@ -708,7 +733,7 @@ private: | |||
| 708 | 733 | ||
| 709 | std::string ReadAttribute(Attribute::Index attribute, u32 element, const Node& buffer = {}) { | 734 | std::string ReadAttribute(Attribute::Index attribute, u32 element, const Node& buffer = {}) { |
| 710 | const auto GeometryPass = [&](std::string_view name) { | 735 | const auto GeometryPass = [&](std::string_view name) { |
| 711 | if (stage == ShaderStage::Geometry && buffer) { | 736 | if (stage == ProgramType::Geometry && buffer) { |
| 712 | // TODO(Rodrigo): Guard geometry inputs against out of bound reads. Some games | 737 | // TODO(Rodrigo): Guard geometry inputs against out of bound reads. Some games |
| 713 | // set an 0x80000000 index for those and the shader fails to build. Find out why | 738 | // set an 0x80000000 index for those and the shader fails to build. Find out why |
| 714 | // this happens and what's its intent. | 739 | // this happens and what's its intent. |
| @@ -720,10 +745,10 @@ private: | |||
| 720 | switch (attribute) { | 745 | switch (attribute) { |
| 721 | case Attribute::Index::Position: | 746 | case Attribute::Index::Position: |
| 722 | switch (stage) { | 747 | switch (stage) { |
| 723 | case ShaderStage::Geometry: | 748 | case ProgramType::Geometry: |
| 724 | return fmt::format("gl_in[ftou({})].gl_Position{}", Visit(buffer), | 749 | return fmt::format("gl_in[ftou({})].gl_Position{}", Visit(buffer), |
| 725 | GetSwizzle(element)); | 750 | GetSwizzle(element)); |
| 726 | case ShaderStage::Fragment: | 751 | case ProgramType::Fragment: |
| 727 | return element == 3 ? "1.0f" : ("gl_FragCoord"s + GetSwizzle(element)); | 752 | return element == 3 ? "1.0f" : ("gl_FragCoord"s + GetSwizzle(element)); |
| 728 | default: | 753 | default: |
| 729 | UNREACHABLE(); | 754 | UNREACHABLE(); |
| @@ -744,7 +769,7 @@ private: | |||
| 744 | // TODO(Subv): Find out what the values are for the first two elements when inside a | 769 | // TODO(Subv): Find out what the values are for the first two elements when inside a |
| 745 | // vertex shader, and what's the value of the fourth element when inside a Tess Eval | 770 | // vertex shader, and what's the value of the fourth element when inside a Tess Eval |
| 746 | // shader. | 771 | // shader. |
| 747 | ASSERT(stage == ShaderStage::Vertex); | 772 | ASSERT(IsVertexShader(stage)); |
| 748 | switch (element) { | 773 | switch (element) { |
| 749 | case 2: | 774 | case 2: |
| 750 | // Config pack's first value is instance_id. | 775 | // Config pack's first value is instance_id. |
| @@ -756,7 +781,7 @@ private: | |||
| 756 | return "0"; | 781 | return "0"; |
| 757 | case Attribute::Index::FrontFacing: | 782 | case Attribute::Index::FrontFacing: |
| 758 | // TODO(Subv): Find out what the values are for the other elements. | 783 | // TODO(Subv): Find out what the values are for the other elements. |
| 759 | ASSERT(stage == ShaderStage::Fragment); | 784 | ASSERT(stage == ProgramType::Fragment); |
| 760 | switch (element) { | 785 | switch (element) { |
| 761 | case 3: | 786 | case 3: |
| 762 | return "itof(gl_FrontFacing ? -1 : 0)"; | 787 | return "itof(gl_FrontFacing ? -1 : 0)"; |
| @@ -778,7 +803,7 @@ private: | |||
| 778 | return value; | 803 | return value; |
| 779 | } | 804 | } |
| 780 | // There's a bug in NVidia's proprietary drivers that makes precise fail on fragment shaders | 805 | // There's a bug in NVidia's proprietary drivers that makes precise fail on fragment shaders |
| 781 | const std::string precise = stage != ShaderStage::Fragment ? "precise " : ""; | 806 | const std::string precise = stage != ProgramType::Fragment ? "precise " : ""; |
| 782 | 807 | ||
| 783 | const std::string temporary = code.GenerateTemporary(); | 808 | const std::string temporary = code.GenerateTemporary(); |
| 784 | code.AddLine("{}float {} = {};", precise, temporary, value); | 809 | code.AddLine("{}float {} = {};", precise, temporary, value); |
| @@ -803,6 +828,45 @@ private: | |||
| 803 | return CastOperand(VisitOperand(operation, operand_index), type); | 828 | return CastOperand(VisitOperand(operation, operand_index), type); |
| 804 | } | 829 | } |
| 805 | 830 | ||
| 831 | std::optional<std::pair<std::string, bool>> GetOutputAttribute(const AbufNode* abuf) { | ||
| 832 | switch (const auto attribute = abuf->GetIndex()) { | ||
| 833 | case Attribute::Index::Position: | ||
| 834 | return std::make_pair("gl_Position"s + GetSwizzle(abuf->GetElement()), false); | ||
| 835 | case Attribute::Index::LayerViewportPointSize: | ||
| 836 | switch (abuf->GetElement()) { | ||
| 837 | case 0: | ||
| 838 | UNIMPLEMENTED(); | ||
| 839 | return {}; | ||
| 840 | case 1: | ||
| 841 | if (IsVertexShader(stage) && !device.HasVertexViewportLayer()) { | ||
| 842 | return {}; | ||
| 843 | } | ||
| 844 | return std::make_pair("gl_Layer", true); | ||
| 845 | case 2: | ||
| 846 | if (IsVertexShader(stage) && !device.HasVertexViewportLayer()) { | ||
| 847 | return {}; | ||
| 848 | } | ||
| 849 | return std::make_pair("gl_ViewportIndex", true); | ||
| 850 | case 3: | ||
| 851 | UNIMPLEMENTED_MSG("Requires some state changes for gl_PointSize to work in shader"); | ||
| 852 | return std::make_pair("gl_PointSize", false); | ||
| 853 | } | ||
| 854 | return {}; | ||
| 855 | case Attribute::Index::ClipDistances0123: | ||
| 856 | return std::make_pair(fmt::format("gl_ClipDistance[{}]", abuf->GetElement()), false); | ||
| 857 | case Attribute::Index::ClipDistances4567: | ||
| 858 | return std::make_pair(fmt::format("gl_ClipDistance[{}]", abuf->GetElement() + 4), | ||
| 859 | false); | ||
| 860 | default: | ||
| 861 | if (IsGenericAttribute(attribute)) { | ||
| 862 | return std::make_pair( | ||
| 863 | GetOutputAttribute(attribute) + GetSwizzle(abuf->GetElement()), false); | ||
| 864 | } | ||
| 865 | UNIMPLEMENTED_MSG("Unhandled output attribute: {}", static_cast<u32>(attribute)); | ||
| 866 | return {}; | ||
| 867 | } | ||
| 868 | } | ||
| 869 | |||
| 806 | std::string CastOperand(const std::string& value, Type type) const { | 870 | std::string CastOperand(const std::string& value, Type type) const { |
| 807 | switch (type) { | 871 | switch (type) { |
| 808 | case Type::Bool: | 872 | case Type::Bool: |
| @@ -999,6 +1063,8 @@ private: | |||
| 999 | const Node& src = operation[1]; | 1063 | const Node& src = operation[1]; |
| 1000 | 1064 | ||
| 1001 | std::string target; | 1065 | std::string target; |
| 1066 | bool is_integer = false; | ||
| 1067 | |||
| 1002 | if (const auto gpr = std::get_if<GprNode>(&*dest)) { | 1068 | if (const auto gpr = std::get_if<GprNode>(&*dest)) { |
| 1003 | if (gpr->GetIndex() == Register::ZeroIndex) { | 1069 | if (gpr->GetIndex() == Register::ZeroIndex) { |
| 1004 | // Writing to Register::ZeroIndex is a no op | 1070 | // Writing to Register::ZeroIndex is a no op |
| @@ -1007,27 +1073,16 @@ private: | |||
| 1007 | target = GetRegister(gpr->GetIndex()); | 1073 | target = GetRegister(gpr->GetIndex()); |
| 1008 | } else if (const auto abuf = std::get_if<AbufNode>(&*dest)) { | 1074 | } else if (const auto abuf = std::get_if<AbufNode>(&*dest)) { |
| 1009 | UNIMPLEMENTED_IF(abuf->IsPhysicalBuffer()); | 1075 | UNIMPLEMENTED_IF(abuf->IsPhysicalBuffer()); |
| 1010 | 1076 | const auto result = GetOutputAttribute(abuf); | |
| 1011 | target = [&]() -> std::string { | 1077 | if (!result) { |
| 1012 | switch (const auto attribute = abuf->GetIndex(); abuf->GetIndex()) { | 1078 | return {}; |
| 1013 | case Attribute::Index::Position: | 1079 | } |
| 1014 | return "gl_Position"s + GetSwizzle(abuf->GetElement()); | 1080 | target = result->first; |
| 1015 | case Attribute::Index::PointSize: | 1081 | is_integer = result->second; |
| 1016 | return "gl_PointSize"; | ||
| 1017 | case Attribute::Index::ClipDistances0123: | ||
| 1018 | return fmt::format("gl_ClipDistance[{}]", abuf->GetElement()); | ||
| 1019 | case Attribute::Index::ClipDistances4567: | ||
| 1020 | return fmt::format("gl_ClipDistance[{}]", abuf->GetElement() + 4); | ||
| 1021 | default: | ||
| 1022 | if (IsGenericAttribute(attribute)) { | ||
| 1023 | return GetOutputAttribute(attribute) + GetSwizzle(abuf->GetElement()); | ||
| 1024 | } | ||
| 1025 | UNIMPLEMENTED_MSG("Unhandled output attribute: {}", | ||
| 1026 | static_cast<u32>(attribute)); | ||
| 1027 | return "0"; | ||
| 1028 | } | ||
| 1029 | }(); | ||
| 1030 | } else if (const auto lmem = std::get_if<LmemNode>(&*dest)) { | 1082 | } else if (const auto lmem = std::get_if<LmemNode>(&*dest)) { |
| 1083 | if (stage == ProgramType::Compute) { | ||
| 1084 | LOG_WARNING(Render_OpenGL, "Local memory is stubbed on compute shaders"); | ||
| 1085 | } | ||
| 1031 | target = fmt::format("{}[ftou({}) / 4]", GetLocalMemory(), Visit(lmem->GetAddress())); | 1086 | target = fmt::format("{}[ftou({}) / 4]", GetLocalMemory(), Visit(lmem->GetAddress())); |
| 1032 | } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) { | 1087 | } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) { |
| 1033 | const std::string real = Visit(gmem->GetRealAddress()); | 1088 | const std::string real = Visit(gmem->GetRealAddress()); |
| @@ -1038,7 +1093,11 @@ private: | |||
| 1038 | UNREACHABLE_MSG("Assign called without a proper target"); | 1093 | UNREACHABLE_MSG("Assign called without a proper target"); |
| 1039 | } | 1094 | } |
| 1040 | 1095 | ||
| 1041 | code.AddLine("{} = {};", target, Visit(src)); | 1096 | if (is_integer) { |
| 1097 | code.AddLine("{} = ftoi({});", target, Visit(src)); | ||
| 1098 | } else { | ||
| 1099 | code.AddLine("{} = {};", target, Visit(src)); | ||
| 1100 | } | ||
| 1042 | return {}; | 1101 | return {}; |
| 1043 | } | 1102 | } |
| 1044 | 1103 | ||
| @@ -1077,6 +1136,16 @@ private: | |||
| 1077 | Type::Float); | 1136 | Type::Float); |
| 1078 | } | 1137 | } |
| 1079 | 1138 | ||
| 1139 | std::string FCastHalf0(Operation operation) { | ||
| 1140 | const std::string op_a = VisitOperand(operation, 0, Type::HalfFloat); | ||
| 1141 | return fmt::format("({})[0]", op_a); | ||
| 1142 | } | ||
| 1143 | |||
| 1144 | std::string FCastHalf1(Operation operation) { | ||
| 1145 | const std::string op_a = VisitOperand(operation, 0, Type::HalfFloat); | ||
| 1146 | return fmt::format("({})[1]", op_a); | ||
| 1147 | } | ||
| 1148 | |||
| 1080 | template <Type type> | 1149 | template <Type type> |
| 1081 | std::string Min(Operation operation) { | 1150 | std::string Min(Operation operation) { |
| 1082 | return GenerateBinaryCall(operation, "min", type, type, type); | 1151 | return GenerateBinaryCall(operation, "min", type, type, type); |
| @@ -1233,6 +1302,11 @@ private: | |||
| 1233 | return ApplyPrecise(operation, BitwiseCastResult(clamped, Type::HalfFloat)); | 1302 | return ApplyPrecise(operation, BitwiseCastResult(clamped, Type::HalfFloat)); |
| 1234 | } | 1303 | } |
| 1235 | 1304 | ||
| 1305 | std::string HCastFloat(Operation operation) { | ||
| 1306 | const std::string op_a = VisitOperand(operation, 0, Type::Float); | ||
| 1307 | return fmt::format("fromHalf2(vec2({}, 0.0f))", op_a); | ||
| 1308 | } | ||
| 1309 | |||
| 1236 | std::string HUnpack(Operation operation) { | 1310 | std::string HUnpack(Operation operation) { |
| 1237 | const std::string operand{VisitOperand(operation, 0, Type::HalfFloat)}; | 1311 | const std::string operand{VisitOperand(operation, 0, Type::HalfFloat)}; |
| 1238 | const auto value = [&]() -> std::string { | 1312 | const auto value = [&]() -> std::string { |
| @@ -1351,14 +1425,10 @@ private: | |||
| 1351 | return fmt::format("{}[{}]", pair, VisitOperand(operation, 1, Type::Uint)); | 1425 | return fmt::format("{}[{}]", pair, VisitOperand(operation, 1, Type::Uint)); |
| 1352 | } | 1426 | } |
| 1353 | 1427 | ||
| 1354 | std::string LogicalAll2(Operation operation) { | 1428 | std::string LogicalAnd2(Operation operation) { |
| 1355 | return GenerateUnary(operation, "all", Type::Bool, Type::Bool2); | 1429 | return GenerateUnary(operation, "all", Type::Bool, Type::Bool2); |
| 1356 | } | 1430 | } |
| 1357 | 1431 | ||
| 1358 | std::string LogicalAny2(Operation operation) { | ||
| 1359 | return GenerateUnary(operation, "any", Type::Bool, Type::Bool2); | ||
| 1360 | } | ||
| 1361 | |||
| 1362 | template <bool with_nan> | 1432 | template <bool with_nan> |
| 1363 | std::string GenerateHalfComparison(Operation operation, const std::string& compare_op) { | 1433 | std::string GenerateHalfComparison(Operation operation, const std::string& compare_op) { |
| 1364 | const std::string comparison{GenerateBinaryCall(operation, compare_op, Type::Bool2, | 1434 | const std::string comparison{GenerateBinaryCall(operation, compare_op, Type::Bool2, |
| @@ -1555,6 +1625,14 @@ private: | |||
| 1555 | return {}; | 1625 | return {}; |
| 1556 | } | 1626 | } |
| 1557 | 1627 | ||
| 1628 | std::string BranchIndirect(Operation operation) { | ||
| 1629 | const std::string op_a = VisitOperand(operation, 0, Type::Uint); | ||
| 1630 | |||
| 1631 | code.AddLine("jmp_to = {};", op_a); | ||
| 1632 | code.AddLine("break;"); | ||
| 1633 | return {}; | ||
| 1634 | } | ||
| 1635 | |||
| 1558 | std::string PushFlowStack(Operation operation) { | 1636 | std::string PushFlowStack(Operation operation) { |
| 1559 | const auto stack = std::get<MetaStackClass>(operation.GetMeta()); | 1637 | const auto stack = std::get<MetaStackClass>(operation.GetMeta()); |
| 1560 | const auto target = std::get_if<ImmediateNode>(&*operation[0]); | 1638 | const auto target = std::get_if<ImmediateNode>(&*operation[0]); |
| @@ -1573,7 +1651,7 @@ private: | |||
| 1573 | } | 1651 | } |
| 1574 | 1652 | ||
| 1575 | std::string Exit(Operation operation) { | 1653 | std::string Exit(Operation operation) { |
| 1576 | if (stage != ShaderStage::Fragment) { | 1654 | if (stage != ProgramType::Fragment) { |
| 1577 | code.AddLine("return;"); | 1655 | code.AddLine("return;"); |
| 1578 | return {}; | 1656 | return {}; |
| 1579 | } | 1657 | } |
| @@ -1624,7 +1702,7 @@ private: | |||
| 1624 | } | 1702 | } |
| 1625 | 1703 | ||
| 1626 | std::string EmitVertex(Operation operation) { | 1704 | std::string EmitVertex(Operation operation) { |
| 1627 | ASSERT_MSG(stage == ShaderStage::Geometry, | 1705 | ASSERT_MSG(stage == ProgramType::Geometry, |
| 1628 | "EmitVertex is expected to be used in a geometry shader."); | 1706 | "EmitVertex is expected to be used in a geometry shader."); |
| 1629 | 1707 | ||
| 1630 | // If a geometry shader is attached, it will always flip (it's the last stage before | 1708 | // If a geometry shader is attached, it will always flip (it's the last stage before |
| @@ -1635,7 +1713,7 @@ private: | |||
| 1635 | } | 1713 | } |
| 1636 | 1714 | ||
| 1637 | std::string EndPrimitive(Operation operation) { | 1715 | std::string EndPrimitive(Operation operation) { |
| 1638 | ASSERT_MSG(stage == ShaderStage::Geometry, | 1716 | ASSERT_MSG(stage == ProgramType::Geometry, |
| 1639 | "EndPrimitive is expected to be used in a geometry shader."); | 1717 | "EndPrimitive is expected to be used in a geometry shader."); |
| 1640 | 1718 | ||
| 1641 | code.AddLine("EndPrimitive();"); | 1719 | code.AddLine("EndPrimitive();"); |
| @@ -1657,7 +1735,49 @@ private: | |||
| 1657 | return "utof(gl_WorkGroupID"s + GetSwizzle(element) + ')'; | 1735 | return "utof(gl_WorkGroupID"s + GetSwizzle(element) + ')'; |
| 1658 | } | 1736 | } |
| 1659 | 1737 | ||
| 1660 | static constexpr OperationDecompilersArray operation_decompilers = { | 1738 | std::string BallotThread(Operation operation) { |
| 1739 | const std::string value = VisitOperand(operation, 0, Type::Bool); | ||
| 1740 | if (!device.HasWarpIntrinsics()) { | ||
| 1741 | LOG_ERROR(Render_OpenGL, | ||
| 1742 | "Nvidia warp intrinsics are not available and its required by a shader"); | ||
| 1743 | // Stub on non-Nvidia devices by simulating all threads voting the same as the active | ||
| 1744 | // one. | ||
| 1745 | return fmt::format("utof({} ? 0xFFFFFFFFU : 0U)", value); | ||
| 1746 | } | ||
| 1747 | return fmt::format("utof(ballotThreadNV({}))", value); | ||
| 1748 | } | ||
| 1749 | |||
| 1750 | std::string Vote(Operation operation, const char* func) { | ||
| 1751 | const std::string value = VisitOperand(operation, 0, Type::Bool); | ||
| 1752 | if (!device.HasWarpIntrinsics()) { | ||
| 1753 | LOG_ERROR(Render_OpenGL, | ||
| 1754 | "Nvidia vote intrinsics are not available and its required by a shader"); | ||
| 1755 | // Stub with a warp size of one. | ||
| 1756 | return value; | ||
| 1757 | } | ||
| 1758 | return fmt::format("{}({})", func, value); | ||
| 1759 | } | ||
| 1760 | |||
| 1761 | std::string VoteAll(Operation operation) { | ||
| 1762 | return Vote(operation, "allThreadsNV"); | ||
| 1763 | } | ||
| 1764 | |||
| 1765 | std::string VoteAny(Operation operation) { | ||
| 1766 | return Vote(operation, "anyThreadNV"); | ||
| 1767 | } | ||
| 1768 | |||
| 1769 | std::string VoteEqual(Operation operation) { | ||
| 1770 | if (!device.HasWarpIntrinsics()) { | ||
| 1771 | LOG_ERROR(Render_OpenGL, | ||
| 1772 | "Nvidia vote intrinsics are not available and its required by a shader"); | ||
| 1773 | // We must return true here since a stub for a theoretical warp size of 1 will always | ||
| 1774 | // return an equal result for all its votes. | ||
| 1775 | return "true"; | ||
| 1776 | } | ||
| 1777 | return Vote(operation, "allThreadsEqualNV"); | ||
| 1778 | } | ||
| 1779 | |||
| 1780 | static constexpr std::array operation_decompilers = { | ||
| 1661 | &GLSLDecompiler::Assign, | 1781 | &GLSLDecompiler::Assign, |
| 1662 | 1782 | ||
| 1663 | &GLSLDecompiler::Select, | 1783 | &GLSLDecompiler::Select, |
| @@ -1669,6 +1789,8 @@ private: | |||
| 1669 | &GLSLDecompiler::Negate<Type::Float>, | 1789 | &GLSLDecompiler::Negate<Type::Float>, |
| 1670 | &GLSLDecompiler::Absolute<Type::Float>, | 1790 | &GLSLDecompiler::Absolute<Type::Float>, |
| 1671 | &GLSLDecompiler::FClamp, | 1791 | &GLSLDecompiler::FClamp, |
| 1792 | &GLSLDecompiler::FCastHalf0, | ||
| 1793 | &GLSLDecompiler::FCastHalf1, | ||
| 1672 | &GLSLDecompiler::Min<Type::Float>, | 1794 | &GLSLDecompiler::Min<Type::Float>, |
| 1673 | &GLSLDecompiler::Max<Type::Float>, | 1795 | &GLSLDecompiler::Max<Type::Float>, |
| 1674 | &GLSLDecompiler::FCos, | 1796 | &GLSLDecompiler::FCos, |
| @@ -1729,6 +1851,7 @@ private: | |||
| 1729 | &GLSLDecompiler::Absolute<Type::HalfFloat>, | 1851 | &GLSLDecompiler::Absolute<Type::HalfFloat>, |
| 1730 | &GLSLDecompiler::HNegate, | 1852 | &GLSLDecompiler::HNegate, |
| 1731 | &GLSLDecompiler::HClamp, | 1853 | &GLSLDecompiler::HClamp, |
| 1854 | &GLSLDecompiler::HCastFloat, | ||
| 1732 | &GLSLDecompiler::HUnpack, | 1855 | &GLSLDecompiler::HUnpack, |
| 1733 | &GLSLDecompiler::HMergeF32, | 1856 | &GLSLDecompiler::HMergeF32, |
| 1734 | &GLSLDecompiler::HMergeH0, | 1857 | &GLSLDecompiler::HMergeH0, |
| @@ -1741,8 +1864,7 @@ private: | |||
| 1741 | &GLSLDecompiler::LogicalXor, | 1864 | &GLSLDecompiler::LogicalXor, |
| 1742 | &GLSLDecompiler::LogicalNegate, | 1865 | &GLSLDecompiler::LogicalNegate, |
| 1743 | &GLSLDecompiler::LogicalPick2, | 1866 | &GLSLDecompiler::LogicalPick2, |
| 1744 | &GLSLDecompiler::LogicalAll2, | 1867 | &GLSLDecompiler::LogicalAnd2, |
| 1745 | &GLSLDecompiler::LogicalAny2, | ||
| 1746 | 1868 | ||
| 1747 | &GLSLDecompiler::LogicalLessThan<Type::Float>, | 1869 | &GLSLDecompiler::LogicalLessThan<Type::Float>, |
| 1748 | &GLSLDecompiler::LogicalEqual<Type::Float>, | 1870 | &GLSLDecompiler::LogicalEqual<Type::Float>, |
| @@ -1789,6 +1911,7 @@ private: | |||
| 1789 | &GLSLDecompiler::ImageStore, | 1911 | &GLSLDecompiler::ImageStore, |
| 1790 | 1912 | ||
| 1791 | &GLSLDecompiler::Branch, | 1913 | &GLSLDecompiler::Branch, |
| 1914 | &GLSLDecompiler::BranchIndirect, | ||
| 1792 | &GLSLDecompiler::PushFlowStack, | 1915 | &GLSLDecompiler::PushFlowStack, |
| 1793 | &GLSLDecompiler::PopFlowStack, | 1916 | &GLSLDecompiler::PopFlowStack, |
| 1794 | &GLSLDecompiler::Exit, | 1917 | &GLSLDecompiler::Exit, |
| @@ -1804,7 +1927,13 @@ private: | |||
| 1804 | &GLSLDecompiler::WorkGroupId<0>, | 1927 | &GLSLDecompiler::WorkGroupId<0>, |
| 1805 | &GLSLDecompiler::WorkGroupId<1>, | 1928 | &GLSLDecompiler::WorkGroupId<1>, |
| 1806 | &GLSLDecompiler::WorkGroupId<2>, | 1929 | &GLSLDecompiler::WorkGroupId<2>, |
| 1930 | |||
| 1931 | &GLSLDecompiler::BallotThread, | ||
| 1932 | &GLSLDecompiler::VoteAll, | ||
| 1933 | &GLSLDecompiler::VoteAny, | ||
| 1934 | &GLSLDecompiler::VoteEqual, | ||
| 1807 | }; | 1935 | }; |
| 1936 | static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount)); | ||
| 1808 | 1937 | ||
| 1809 | std::string GetRegister(u32 index) const { | 1938 | std::string GetRegister(u32 index) const { |
| 1810 | return GetDeclarationWithSuffix(index, "gpr"); | 1939 | return GetDeclarationWithSuffix(index, "gpr"); |
| @@ -1869,7 +1998,7 @@ private: | |||
| 1869 | } | 1998 | } |
| 1870 | 1999 | ||
| 1871 | u32 GetNumPhysicalInputAttributes() const { | 2000 | u32 GetNumPhysicalInputAttributes() const { |
| 1872 | return stage == ShaderStage::Vertex ? GetNumPhysicalAttributes() : GetNumPhysicalVaryings(); | 2001 | return IsVertexShader(stage) ? GetNumPhysicalAttributes() : GetNumPhysicalVaryings(); |
| 1873 | } | 2002 | } |
| 1874 | 2003 | ||
| 1875 | u32 GetNumPhysicalAttributes() const { | 2004 | u32 GetNumPhysicalAttributes() const { |
| @@ -1882,7 +2011,7 @@ private: | |||
| 1882 | 2011 | ||
| 1883 | const Device& device; | 2012 | const Device& device; |
| 1884 | const ShaderIR& ir; | 2013 | const ShaderIR& ir; |
| 1885 | const ShaderStage stage; | 2014 | const ProgramType stage; |
| 1886 | const std::string suffix; | 2015 | const std::string suffix; |
| 1887 | const Header header; | 2016 | const Header header; |
| 1888 | 2017 | ||
| @@ -1913,7 +2042,7 @@ std::string GetCommonDeclarations() { | |||
| 1913 | MAX_CONSTBUFFER_ELEMENTS); | 2042 | MAX_CONSTBUFFER_ELEMENTS); |
| 1914 | } | 2043 | } |
| 1915 | 2044 | ||
| 1916 | ProgramResult Decompile(const Device& device, const ShaderIR& ir, Maxwell::ShaderStage stage, | 2045 | ProgramResult Decompile(const Device& device, const ShaderIR& ir, ProgramType stage, |
| 1917 | const std::string& suffix) { | 2046 | const std::string& suffix) { |
| 1918 | GLSLDecompiler decompiler(device, ir, stage, suffix); | 2047 | GLSLDecompiler decompiler(device, ir, stage, suffix); |
| 1919 | decompiler.Decompile(); | 2048 | decompiler.Decompile(); |
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h index 14d11c7fc..2ea02f5bf 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.h +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h | |||
| @@ -12,14 +12,26 @@ | |||
| 12 | #include "video_core/engines/maxwell_3d.h" | 12 | #include "video_core/engines/maxwell_3d.h" |
| 13 | #include "video_core/shader/shader_ir.h" | 13 | #include "video_core/shader/shader_ir.h" |
| 14 | 14 | ||
| 15 | namespace OpenGL { | ||
| 16 | class Device; | ||
| 17 | } | ||
| 18 | |||
| 19 | namespace VideoCommon::Shader { | 15 | namespace VideoCommon::Shader { |
| 20 | class ShaderIR; | 16 | class ShaderIR; |
| 21 | } | 17 | } |
| 22 | 18 | ||
| 19 | namespace OpenGL { | ||
| 20 | |||
| 21 | class Device; | ||
| 22 | |||
| 23 | enum class ProgramType : u32 { | ||
| 24 | VertexA = 0, | ||
| 25 | VertexB = 1, | ||
| 26 | TessellationControl = 2, | ||
| 27 | TessellationEval = 3, | ||
| 28 | Geometry = 4, | ||
| 29 | Fragment = 5, | ||
| 30 | Compute = 6 | ||
| 31 | }; | ||
| 32 | |||
| 33 | } // namespace OpenGL | ||
| 34 | |||
| 23 | namespace OpenGL::GLShader { | 35 | namespace OpenGL::GLShader { |
| 24 | 36 | ||
| 25 | struct ShaderEntries; | 37 | struct ShaderEntries; |
| @@ -78,12 +90,13 @@ struct ShaderEntries { | |||
| 78 | std::vector<ImageEntry> images; | 90 | std::vector<ImageEntry> images; |
| 79 | std::vector<GlobalMemoryEntry> global_memory_entries; | 91 | std::vector<GlobalMemoryEntry> global_memory_entries; |
| 80 | std::array<bool, Maxwell::NumClipDistances> clip_distances{}; | 92 | std::array<bool, Maxwell::NumClipDistances> clip_distances{}; |
| 93 | bool shader_viewport_layer_array{}; | ||
| 81 | std::size_t shader_length{}; | 94 | std::size_t shader_length{}; |
| 82 | }; | 95 | }; |
| 83 | 96 | ||
| 84 | std::string GetCommonDeclarations(); | 97 | std::string GetCommonDeclarations(); |
| 85 | 98 | ||
| 86 | ProgramResult Decompile(const Device& device, const VideoCommon::Shader::ShaderIR& ir, | 99 | ProgramResult Decompile(const Device& device, const VideoCommon::Shader::ShaderIR& ir, |
| 87 | Maxwell::ShaderStage stage, const std::string& suffix); | 100 | ProgramType stage, const std::string& suffix); |
| 88 | 101 | ||
| 89 | } // namespace OpenGL::GLShader | 102 | } // namespace OpenGL::GLShader |
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp index 10688397b..969fe9ced 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp | |||
| @@ -51,7 +51,7 @@ ShaderCacheVersionHash GetShaderCacheVersionHash() { | |||
| 51 | 51 | ||
| 52 | } // namespace | 52 | } // namespace |
| 53 | 53 | ||
| 54 | ShaderDiskCacheRaw::ShaderDiskCacheRaw(u64 unique_identifier, Maxwell::ShaderProgram program_type, | 54 | ShaderDiskCacheRaw::ShaderDiskCacheRaw(u64 unique_identifier, ProgramType program_type, |
| 55 | u32 program_code_size, u32 program_code_size_b, | 55 | u32 program_code_size, u32 program_code_size_b, |
| 56 | ProgramCode program_code, ProgramCode program_code_b) | 56 | ProgramCode program_code, ProgramCode program_code_b) |
| 57 | : unique_identifier{unique_identifier}, program_type{program_type}, | 57 | : unique_identifier{unique_identifier}, program_type{program_type}, |
| @@ -373,6 +373,12 @@ std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEn | |||
| 373 | } | 373 | } |
| 374 | } | 374 | } |
| 375 | 375 | ||
| 376 | bool shader_viewport_layer_array{}; | ||
| 377 | if (!LoadObjectFromPrecompiled(shader_viewport_layer_array)) { | ||
| 378 | return {}; | ||
| 379 | } | ||
| 380 | entry.entries.shader_viewport_layer_array = shader_viewport_layer_array; | ||
| 381 | |||
| 376 | u64 shader_length{}; | 382 | u64 shader_length{}; |
| 377 | if (!LoadObjectFromPrecompiled(shader_length)) { | 383 | if (!LoadObjectFromPrecompiled(shader_length)) { |
| 378 | return {}; | 384 | return {}; |
| @@ -445,6 +451,10 @@ bool ShaderDiskCacheOpenGL::SaveDecompiledFile(u64 unique_identifier, const std: | |||
| 445 | } | 451 | } |
| 446 | } | 452 | } |
| 447 | 453 | ||
| 454 | if (!SaveObjectToPrecompiled(entries.shader_viewport_layer_array)) { | ||
| 455 | return false; | ||
| 456 | } | ||
| 457 | |||
| 448 | if (!SaveObjectToPrecompiled(static_cast<u64>(entries.shader_length))) { | 458 | if (!SaveObjectToPrecompiled(static_cast<u64>(entries.shader_length))) { |
| 449 | return false; | 459 | return false; |
| 450 | } | 460 | } |
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h index 4f296dda6..cc8bbd61e 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.h | |||
| @@ -18,7 +18,6 @@ | |||
| 18 | #include "common/assert.h" | 18 | #include "common/assert.h" |
| 19 | #include "common/common_types.h" | 19 | #include "common/common_types.h" |
| 20 | #include "core/file_sys/vfs_vector.h" | 20 | #include "core/file_sys/vfs_vector.h" |
| 21 | #include "video_core/engines/maxwell_3d.h" | ||
| 22 | #include "video_core/renderer_opengl/gl_shader_gen.h" | 21 | #include "video_core/renderer_opengl/gl_shader_gen.h" |
| 23 | 22 | ||
| 24 | namespace Core { | 23 | namespace Core { |
| @@ -34,14 +33,11 @@ namespace OpenGL { | |||
| 34 | struct ShaderDiskCacheUsage; | 33 | struct ShaderDiskCacheUsage; |
| 35 | struct ShaderDiskCacheDump; | 34 | struct ShaderDiskCacheDump; |
| 36 | 35 | ||
| 37 | using ShaderDumpsMap = std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>; | ||
| 38 | |||
| 39 | using ProgramCode = std::vector<u64>; | 36 | using ProgramCode = std::vector<u64>; |
| 40 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | 37 | using ShaderDumpsMap = std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>; |
| 41 | |||
| 42 | using TextureBufferUsage = std::bitset<64>; | 38 | using TextureBufferUsage = std::bitset<64>; |
| 43 | 39 | ||
| 44 | /// Allocated bindings used by an OpenGL shader program. | 40 | /// Allocated bindings used by an OpenGL shader program |
| 45 | struct BaseBindings { | 41 | struct BaseBindings { |
| 46 | u32 cbuf{}; | 42 | u32 cbuf{}; |
| 47 | u32 gmem{}; | 43 | u32 gmem{}; |
| @@ -126,7 +122,7 @@ namespace OpenGL { | |||
| 126 | /// Describes a shader how it's used by the guest GPU | 122 | /// Describes a shader how it's used by the guest GPU |
| 127 | class ShaderDiskCacheRaw { | 123 | class ShaderDiskCacheRaw { |
| 128 | public: | 124 | public: |
| 129 | explicit ShaderDiskCacheRaw(u64 unique_identifier, Maxwell::ShaderProgram program_type, | 125 | explicit ShaderDiskCacheRaw(u64 unique_identifier, ProgramType program_type, |
| 130 | u32 program_code_size, u32 program_code_size_b, | 126 | u32 program_code_size, u32 program_code_size_b, |
| 131 | ProgramCode program_code, ProgramCode program_code_b); | 127 | ProgramCode program_code, ProgramCode program_code_b); |
| 132 | ShaderDiskCacheRaw(); | 128 | ShaderDiskCacheRaw(); |
| @@ -141,30 +137,13 @@ public: | |||
| 141 | } | 137 | } |
| 142 | 138 | ||
| 143 | bool HasProgramA() const { | 139 | bool HasProgramA() const { |
| 144 | return program_type == Maxwell::ShaderProgram::VertexA; | 140 | return program_type == ProgramType::VertexA; |
| 145 | } | 141 | } |
| 146 | 142 | ||
| 147 | Maxwell::ShaderProgram GetProgramType() const { | 143 | ProgramType GetProgramType() const { |
| 148 | return program_type; | 144 | return program_type; |
| 149 | } | 145 | } |
| 150 | 146 | ||
| 151 | Maxwell::ShaderStage GetProgramStage() const { | ||
| 152 | switch (program_type) { | ||
| 153 | case Maxwell::ShaderProgram::VertexA: | ||
| 154 | case Maxwell::ShaderProgram::VertexB: | ||
| 155 | return Maxwell::ShaderStage::Vertex; | ||
| 156 | case Maxwell::ShaderProgram::TesselationControl: | ||
| 157 | return Maxwell::ShaderStage::TesselationControl; | ||
| 158 | case Maxwell::ShaderProgram::TesselationEval: | ||
| 159 | return Maxwell::ShaderStage::TesselationEval; | ||
| 160 | case Maxwell::ShaderProgram::Geometry: | ||
| 161 | return Maxwell::ShaderStage::Geometry; | ||
| 162 | case Maxwell::ShaderProgram::Fragment: | ||
| 163 | return Maxwell::ShaderStage::Fragment; | ||
| 164 | } | ||
| 165 | UNREACHABLE(); | ||
| 166 | } | ||
| 167 | |||
| 168 | const ProgramCode& GetProgramCode() const { | 147 | const ProgramCode& GetProgramCode() const { |
| 169 | return program_code; | 148 | return program_code; |
| 170 | } | 149 | } |
| @@ -175,7 +154,7 @@ public: | |||
| 175 | 154 | ||
| 176 | private: | 155 | private: |
| 177 | u64 unique_identifier{}; | 156 | u64 unique_identifier{}; |
| 178 | Maxwell::ShaderProgram program_type{}; | 157 | ProgramType program_type{}; |
| 179 | u32 program_code_size{}; | 158 | u32 program_code_size{}; |
| 180 | u32 program_code_size_b{}; | 159 | u32 program_code_size_b{}; |
| 181 | 160 | ||
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index 9148629ec..3a8d9e1da 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp | |||
| @@ -14,7 +14,8 @@ using Tegra::Engines::Maxwell3D; | |||
| 14 | using VideoCommon::Shader::ProgramCode; | 14 | using VideoCommon::Shader::ProgramCode; |
| 15 | using VideoCommon::Shader::ShaderIR; | 15 | using VideoCommon::Shader::ShaderIR; |
| 16 | 16 | ||
| 17 | static constexpr u32 PROGRAM_OFFSET{10}; | 17 | static constexpr u32 PROGRAM_OFFSET = 10; |
| 18 | static constexpr u32 COMPUTE_OFFSET = 0; | ||
| 18 | 19 | ||
| 19 | ProgramResult GenerateVertexShader(const Device& device, const ShaderSetup& setup) { | 20 | ProgramResult GenerateVertexShader(const Device& device, const ShaderSetup& setup) { |
| 20 | const std::string id = fmt::format("{:016x}", setup.program.unique_identifier); | 21 | const std::string id = fmt::format("{:016x}", setup.program.unique_identifier); |
| @@ -29,17 +30,15 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform vs_config { | |||
| 29 | }; | 30 | }; |
| 30 | 31 | ||
| 31 | )"; | 32 | )"; |
| 32 | const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET); | ||
| 33 | ProgramResult program = | ||
| 34 | Decompile(device, program_ir, Maxwell3D::Regs::ShaderStage::Vertex, "vertex"); | ||
| 35 | 33 | ||
| 34 | const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a); | ||
| 35 | const auto stage = setup.IsDualProgram() ? ProgramType::VertexA : ProgramType::VertexB; | ||
| 36 | ProgramResult program = Decompile(device, program_ir, stage, "vertex"); | ||
| 36 | out += program.first; | 37 | out += program.first; |
| 37 | 38 | ||
| 38 | if (setup.IsDualProgram()) { | 39 | if (setup.IsDualProgram()) { |
| 39 | const ShaderIR program_ir_b(setup.program.code_b, PROGRAM_OFFSET); | 40 | const ShaderIR program_ir_b(setup.program.code_b, PROGRAM_OFFSET, setup.program.size_b); |
| 40 | ProgramResult program_b = | 41 | ProgramResult program_b = Decompile(device, program_ir_b, ProgramType::VertexB, "vertex_b"); |
| 41 | Decompile(device, program_ir_b, Maxwell3D::Regs::ShaderStage::Vertex, "vertex_b"); | ||
| 42 | |||
| 43 | out += program_b.first; | 42 | out += program_b.first; |
| 44 | } | 43 | } |
| 45 | 44 | ||
| @@ -80,9 +79,9 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform gs_config { | |||
| 80 | }; | 79 | }; |
| 81 | 80 | ||
| 82 | )"; | 81 | )"; |
| 83 | const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET); | 82 | |
| 84 | ProgramResult program = | 83 | const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a); |
| 85 | Decompile(device, program_ir, Maxwell3D::Regs::ShaderStage::Geometry, "geometry"); | 84 | ProgramResult program = Decompile(device, program_ir, ProgramType::Geometry, "geometry"); |
| 86 | out += program.first; | 85 | out += program.first; |
| 87 | 86 | ||
| 88 | out += R"( | 87 | out += R"( |
| @@ -115,10 +114,8 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform fs_config { | |||
| 115 | }; | 114 | }; |
| 116 | 115 | ||
| 117 | )"; | 116 | )"; |
| 118 | const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET); | 117 | const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a); |
| 119 | ProgramResult program = | 118 | ProgramResult program = Decompile(device, program_ir, ProgramType::Fragment, "fragment"); |
| 120 | Decompile(device, program_ir, Maxwell3D::Regs::ShaderStage::Fragment, "fragment"); | ||
| 121 | |||
| 122 | out += program.first; | 119 | out += program.first; |
| 123 | 120 | ||
| 124 | out += R"( | 121 | out += R"( |
| @@ -130,4 +127,22 @@ void main() { | |||
| 130 | return {std::move(out), std::move(program.second)}; | 127 | return {std::move(out), std::move(program.second)}; |
| 131 | } | 128 | } |
| 132 | 129 | ||
| 130 | ProgramResult GenerateComputeShader(const Device& device, const ShaderSetup& setup) { | ||
| 131 | const std::string id = fmt::format("{:016x}", setup.program.unique_identifier); | ||
| 132 | |||
| 133 | std::string out = "// Shader Unique Id: CS" + id + "\n\n"; | ||
| 134 | out += GetCommonDeclarations(); | ||
| 135 | |||
| 136 | const ShaderIR program_ir(setup.program.code, COMPUTE_OFFSET, setup.program.size_a); | ||
| 137 | ProgramResult program = Decompile(device, program_ir, ProgramType::Compute, "compute"); | ||
| 138 | out += program.first; | ||
| 139 | |||
| 140 | out += R"( | ||
| 141 | void main() { | ||
| 142 | execute_compute(); | ||
| 143 | } | ||
| 144 | )"; | ||
| 145 | return {std::move(out), std::move(program.second)}; | ||
| 146 | } | ||
| 147 | |||
| 133 | } // namespace OpenGL::GLShader | 148 | } // namespace OpenGL::GLShader |
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h index 0536c8a03..3833e88ab 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.h +++ b/src/video_core/renderer_opengl/gl_shader_gen.h | |||
| @@ -27,6 +27,8 @@ struct ShaderSetup { | |||
| 27 | ProgramCode code; | 27 | ProgramCode code; |
| 28 | ProgramCode code_b; // Used for dual vertex shaders | 28 | ProgramCode code_b; // Used for dual vertex shaders |
| 29 | u64 unique_identifier; | 29 | u64 unique_identifier; |
| 30 | std::size_t size_a; | ||
| 31 | std::size_t size_b; | ||
| 30 | } program; | 32 | } program; |
| 31 | 33 | ||
| 32 | /// Used in scenarios where we have a dual vertex shaders | 34 | /// Used in scenarios where we have a dual vertex shaders |
| @@ -52,4 +54,7 @@ ProgramResult GenerateGeometryShader(const Device& device, const ShaderSetup& se | |||
| 52 | /// Generates the GLSL fragment shader program source code for the given FS program | 54 | /// Generates the GLSL fragment shader program source code for the given FS program |
| 53 | ProgramResult GenerateFragmentShader(const Device& device, const ShaderSetup& setup); | 55 | ProgramResult GenerateFragmentShader(const Device& device, const ShaderSetup& setup); |
| 54 | 56 | ||
| 57 | /// Generates the GLSL compute shader program source code for the given CS program | ||
| 58 | ProgramResult GenerateComputeShader(const Device& device, const ShaderSetup& setup); | ||
| 59 | |||
| 55 | } // namespace OpenGL::GLShader | 60 | } // namespace OpenGL::GLShader |
diff --git a/src/video_core/renderer_opengl/gl_shader_util.cpp b/src/video_core/renderer_opengl/gl_shader_util.cpp index 5f3fe067e..9e74eda0d 100644 --- a/src/video_core/renderer_opengl/gl_shader_util.cpp +++ b/src/video_core/renderer_opengl/gl_shader_util.cpp | |||
| @@ -10,21 +10,25 @@ | |||
| 10 | 10 | ||
| 11 | namespace OpenGL::GLShader { | 11 | namespace OpenGL::GLShader { |
| 12 | 12 | ||
| 13 | GLuint LoadShader(const char* source, GLenum type) { | 13 | namespace { |
| 14 | const char* debug_type; | 14 | const char* GetStageDebugName(GLenum type) { |
| 15 | switch (type) { | 15 | switch (type) { |
| 16 | case GL_VERTEX_SHADER: | 16 | case GL_VERTEX_SHADER: |
| 17 | debug_type = "vertex"; | 17 | return "vertex"; |
| 18 | break; | ||
| 19 | case GL_GEOMETRY_SHADER: | 18 | case GL_GEOMETRY_SHADER: |
| 20 | debug_type = "geometry"; | 19 | return "geometry"; |
| 21 | break; | ||
| 22 | case GL_FRAGMENT_SHADER: | 20 | case GL_FRAGMENT_SHADER: |
| 23 | debug_type = "fragment"; | 21 | return "fragment"; |
| 24 | break; | 22 | case GL_COMPUTE_SHADER: |
| 25 | default: | 23 | return "compute"; |
| 26 | UNREACHABLE(); | ||
| 27 | } | 24 | } |
| 25 | UNIMPLEMENTED(); | ||
| 26 | return "unknown"; | ||
| 27 | } | ||
| 28 | } // Anonymous namespace | ||
| 29 | |||
| 30 | GLuint LoadShader(const char* source, GLenum type) { | ||
| 31 | const char* debug_type = GetStageDebugName(type); | ||
| 28 | const GLuint shader_id = glCreateShader(type); | 32 | const GLuint shader_id = glCreateShader(type); |
| 29 | glShaderSource(shader_id, 1, &source, nullptr); | 33 | glShaderSource(shader_id, 1, &source, nullptr); |
| 30 | LOG_DEBUG(Render_OpenGL, "Compiling {} shader...", debug_type); | 34 | LOG_DEBUG(Render_OpenGL, "Compiling {} shader...", debug_type); |
diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp index d86e137ac..f4777d0b0 100644 --- a/src/video_core/renderer_opengl/gl_state.cpp +++ b/src/video_core/renderer_opengl/gl_state.cpp | |||
| @@ -6,8 +6,11 @@ | |||
| 6 | #include <glad/glad.h> | 6 | #include <glad/glad.h> |
| 7 | #include "common/assert.h" | 7 | #include "common/assert.h" |
| 8 | #include "common/logging/log.h" | 8 | #include "common/logging/log.h" |
| 9 | #include "common/microprofile.h" | ||
| 9 | #include "video_core/renderer_opengl/gl_state.h" | 10 | #include "video_core/renderer_opengl/gl_state.h" |
| 10 | 11 | ||
| 12 | MICROPROFILE_DEFINE(OpenGL_State, "OpenGL", "State Change", MP_RGB(192, 128, 128)); | ||
| 13 | |||
| 11 | namespace OpenGL { | 14 | namespace OpenGL { |
| 12 | 15 | ||
| 13 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | 16 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; |
| @@ -162,6 +165,25 @@ OpenGLState::OpenGLState() { | |||
| 162 | alpha_test.ref = 0.0f; | 165 | alpha_test.ref = 0.0f; |
| 163 | } | 166 | } |
| 164 | 167 | ||
| 168 | void OpenGLState::SetDefaultViewports() { | ||
| 169 | for (auto& item : viewports) { | ||
| 170 | item.x = 0; | ||
| 171 | item.y = 0; | ||
| 172 | item.width = 0; | ||
| 173 | item.height = 0; | ||
| 174 | item.depth_range_near = 0.0f; | ||
| 175 | item.depth_range_far = 1.0f; | ||
| 176 | item.scissor.enabled = false; | ||
| 177 | item.scissor.x = 0; | ||
| 178 | item.scissor.y = 0; | ||
| 179 | item.scissor.width = 0; | ||
| 180 | item.scissor.height = 0; | ||
| 181 | } | ||
| 182 | |||
| 183 | depth_clamp.far_plane = false; | ||
| 184 | depth_clamp.near_plane = false; | ||
| 185 | } | ||
| 186 | |||
| 165 | void OpenGLState::ApplyDefaultState() { | 187 | void OpenGLState::ApplyDefaultState() { |
| 166 | glEnable(GL_BLEND); | 188 | glEnable(GL_BLEND); |
| 167 | glDisable(GL_FRAMEBUFFER_SRGB); | 189 | glDisable(GL_FRAMEBUFFER_SRGB); |
| @@ -523,7 +545,8 @@ void OpenGLState::ApplySamplers() const { | |||
| 523 | } | 545 | } |
| 524 | } | 546 | } |
| 525 | 547 | ||
| 526 | void OpenGLState::Apply() const { | 548 | void OpenGLState::Apply() { |
| 549 | MICROPROFILE_SCOPE(OpenGL_State); | ||
| 527 | ApplyFramebufferState(); | 550 | ApplyFramebufferState(); |
| 528 | ApplyVertexArrayState(); | 551 | ApplyVertexArrayState(); |
| 529 | ApplyShaderProgram(); | 552 | ApplyShaderProgram(); |
| @@ -532,19 +555,31 @@ void OpenGLState::Apply() const { | |||
| 532 | ApplyPointSize(); | 555 | ApplyPointSize(); |
| 533 | ApplyFragmentColorClamp(); | 556 | ApplyFragmentColorClamp(); |
| 534 | ApplyMultisample(); | 557 | ApplyMultisample(); |
| 558 | if (dirty.color_mask) { | ||
| 559 | ApplyColorMask(); | ||
| 560 | dirty.color_mask = false; | ||
| 561 | } | ||
| 535 | ApplyDepthClamp(); | 562 | ApplyDepthClamp(); |
| 536 | ApplyColorMask(); | ||
| 537 | ApplyViewport(); | 563 | ApplyViewport(); |
| 538 | ApplyStencilTest(); | 564 | if (dirty.stencil_state) { |
| 565 | ApplyStencilTest(); | ||
| 566 | dirty.stencil_state = false; | ||
| 567 | } | ||
| 539 | ApplySRgb(); | 568 | ApplySRgb(); |
| 540 | ApplyCulling(); | 569 | ApplyCulling(); |
| 541 | ApplyDepth(); | 570 | ApplyDepth(); |
| 542 | ApplyPrimitiveRestart(); | 571 | ApplyPrimitiveRestart(); |
| 543 | ApplyBlending(); | 572 | if (dirty.blend_state) { |
| 573 | ApplyBlending(); | ||
| 574 | dirty.blend_state = false; | ||
| 575 | } | ||
| 544 | ApplyLogicOp(); | 576 | ApplyLogicOp(); |
| 545 | ApplyTextures(); | 577 | ApplyTextures(); |
| 546 | ApplySamplers(); | 578 | ApplySamplers(); |
| 547 | ApplyPolygonOffset(); | 579 | if (dirty.polygon_offset) { |
| 580 | ApplyPolygonOffset(); | ||
| 581 | dirty.polygon_offset = false; | ||
| 582 | } | ||
| 548 | ApplyAlphaTest(); | 583 | ApplyAlphaTest(); |
| 549 | } | 584 | } |
| 550 | 585 | ||
diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h index b0140495d..fdf9a8a12 100644 --- a/src/video_core/renderer_opengl/gl_state.h +++ b/src/video_core/renderer_opengl/gl_state.h | |||
| @@ -195,8 +195,9 @@ public: | |||
| 195 | s_rgb_used = false; | 195 | s_rgb_used = false; |
| 196 | } | 196 | } |
| 197 | 197 | ||
| 198 | void SetDefaultViewports(); | ||
| 198 | /// Apply this state as the current OpenGL state | 199 | /// Apply this state as the current OpenGL state |
| 199 | void Apply() const; | 200 | void Apply(); |
| 200 | 201 | ||
| 201 | void ApplyFramebufferState() const; | 202 | void ApplyFramebufferState() const; |
| 202 | void ApplyVertexArrayState() const; | 203 | void ApplyVertexArrayState() const; |
| @@ -237,11 +238,41 @@ public: | |||
| 237 | /// Viewport does not affects glClearBuffer so emulate viewport using scissor test | 238 | /// Viewport does not affects glClearBuffer so emulate viewport using scissor test |
| 238 | void EmulateViewportWithScissor(); | 239 | void EmulateViewportWithScissor(); |
| 239 | 240 | ||
| 241 | void MarkDirtyBlendState() { | ||
| 242 | dirty.blend_state = true; | ||
| 243 | } | ||
| 244 | |||
| 245 | void MarkDirtyStencilState() { | ||
| 246 | dirty.stencil_state = true; | ||
| 247 | } | ||
| 248 | |||
| 249 | void MarkDirtyPolygonOffset() { | ||
| 250 | dirty.polygon_offset = true; | ||
| 251 | } | ||
| 252 | |||
| 253 | void MarkDirtyColorMask() { | ||
| 254 | dirty.color_mask = true; | ||
| 255 | } | ||
| 256 | |||
| 257 | void AllDirty() { | ||
| 258 | dirty.blend_state = true; | ||
| 259 | dirty.stencil_state = true; | ||
| 260 | dirty.polygon_offset = true; | ||
| 261 | dirty.color_mask = true; | ||
| 262 | } | ||
| 263 | |||
| 240 | private: | 264 | private: |
| 241 | static OpenGLState cur_state; | 265 | static OpenGLState cur_state; |
| 242 | 266 | ||
| 243 | // Workaround for sRGB problems caused by QT not supporting srgb output | 267 | // Workaround for sRGB problems caused by QT not supporting srgb output |
| 244 | static bool s_rgb_used; | 268 | static bool s_rgb_used; |
| 269 | struct { | ||
| 270 | bool blend_state; | ||
| 271 | bool stencil_state; | ||
| 272 | bool viewport_state; | ||
| 273 | bool polygon_offset; | ||
| 274 | bool color_mask; | ||
| 275 | } dirty{}; | ||
| 245 | }; | 276 | }; |
| 246 | 277 | ||
| 247 | } // namespace OpenGL | 278 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 08ae1a429..4f135fe03 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp | |||
| @@ -31,6 +31,8 @@ using VideoCore::Surface::SurfaceType; | |||
| 31 | 31 | ||
| 32 | MICROPROFILE_DEFINE(OpenGL_Texture_Upload, "OpenGL", "Texture Upload", MP_RGB(128, 192, 128)); | 32 | MICROPROFILE_DEFINE(OpenGL_Texture_Upload, "OpenGL", "Texture Upload", MP_RGB(128, 192, 128)); |
| 33 | MICROPROFILE_DEFINE(OpenGL_Texture_Download, "OpenGL", "Texture Download", MP_RGB(128, 192, 128)); | 33 | MICROPROFILE_DEFINE(OpenGL_Texture_Download, "OpenGL", "Texture Download", MP_RGB(128, 192, 128)); |
| 34 | MICROPROFILE_DEFINE(OpenGL_Texture_Buffer_Copy, "OpenGL", "Texture Buffer Copy", | ||
| 35 | MP_RGB(128, 192, 128)); | ||
| 34 | 36 | ||
| 35 | namespace { | 37 | namespace { |
| 36 | 38 | ||
| @@ -135,7 +137,6 @@ constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format | |||
| 135 | const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType component_type) { | 137 | const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType component_type) { |
| 136 | ASSERT(static_cast<std::size_t>(pixel_format) < tex_format_tuples.size()); | 138 | ASSERT(static_cast<std::size_t>(pixel_format) < tex_format_tuples.size()); |
| 137 | const auto& format{tex_format_tuples[static_cast<std::size_t>(pixel_format)]}; | 139 | const auto& format{tex_format_tuples[static_cast<std::size_t>(pixel_format)]}; |
| 138 | ASSERT(component_type == format.component_type); | ||
| 139 | return format; | 140 | return format; |
| 140 | } | 141 | } |
| 141 | 142 | ||
| @@ -183,6 +184,9 @@ GLint GetSwizzleSource(SwizzleSource source) { | |||
| 183 | } | 184 | } |
| 184 | 185 | ||
| 185 | void ApplyTextureDefaults(const SurfaceParams& params, GLuint texture) { | 186 | void ApplyTextureDefaults(const SurfaceParams& params, GLuint texture) { |
| 187 | if (params.IsBuffer()) { | ||
| 188 | return; | ||
| 189 | } | ||
| 186 | glTextureParameteri(texture, GL_TEXTURE_MIN_FILTER, GL_LINEAR); | 190 | glTextureParameteri(texture, GL_TEXTURE_MIN_FILTER, GL_LINEAR); |
| 187 | glTextureParameteri(texture, GL_TEXTURE_MAG_FILTER, GL_LINEAR); | 191 | glTextureParameteri(texture, GL_TEXTURE_MAG_FILTER, GL_LINEAR); |
| 188 | glTextureParameteri(texture, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); | 192 | glTextureParameteri(texture, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); |
| @@ -207,6 +211,7 @@ OGLTexture CreateTexture(const SurfaceParams& params, GLenum target, GLenum inte | |||
| 207 | glNamedBufferStorage(texture_buffer.handle, params.width * params.GetBytesPerPixel(), | 211 | glNamedBufferStorage(texture_buffer.handle, params.width * params.GetBytesPerPixel(), |
| 208 | nullptr, GL_DYNAMIC_STORAGE_BIT); | 212 | nullptr, GL_DYNAMIC_STORAGE_BIT); |
| 209 | glTextureBuffer(texture.handle, internal_format, texture_buffer.handle); | 213 | glTextureBuffer(texture.handle, internal_format, texture_buffer.handle); |
| 214 | break; | ||
| 210 | case SurfaceTarget::Texture2D: | 215 | case SurfaceTarget::Texture2D: |
| 211 | case SurfaceTarget::TextureCubemap: | 216 | case SurfaceTarget::TextureCubemap: |
| 212 | glTextureStorage2D(texture.handle, params.emulated_levels, internal_format, params.width, | 217 | glTextureStorage2D(texture.handle, params.emulated_levels, internal_format, params.width, |
| @@ -483,11 +488,15 @@ void TextureCacheOpenGL::ImageBlit(View& src_view, View& dst_view, | |||
| 483 | const auto& dst_params{dst_view->GetSurfaceParams()}; | 488 | const auto& dst_params{dst_view->GetSurfaceParams()}; |
| 484 | 489 | ||
| 485 | OpenGLState prev_state{OpenGLState::GetCurState()}; | 490 | OpenGLState prev_state{OpenGLState::GetCurState()}; |
| 486 | SCOPE_EXIT({ prev_state.Apply(); }); | 491 | SCOPE_EXIT({ |
| 492 | prev_state.AllDirty(); | ||
| 493 | prev_state.Apply(); | ||
| 494 | }); | ||
| 487 | 495 | ||
| 488 | OpenGLState state; | 496 | OpenGLState state; |
| 489 | state.draw.read_framebuffer = src_framebuffer.handle; | 497 | state.draw.read_framebuffer = src_framebuffer.handle; |
| 490 | state.draw.draw_framebuffer = dst_framebuffer.handle; | 498 | state.draw.draw_framebuffer = dst_framebuffer.handle; |
| 499 | state.AllDirty(); | ||
| 491 | state.Apply(); | 500 | state.Apply(); |
| 492 | 501 | ||
| 493 | u32 buffers{}; | 502 | u32 buffers{}; |
| @@ -535,6 +544,7 @@ void TextureCacheOpenGL::ImageBlit(View& src_view, View& dst_view, | |||
| 535 | } | 544 | } |
| 536 | 545 | ||
| 537 | void TextureCacheOpenGL::BufferCopy(Surface& src_surface, Surface& dst_surface) { | 546 | void TextureCacheOpenGL::BufferCopy(Surface& src_surface, Surface& dst_surface) { |
| 547 | MICROPROFILE_SCOPE(OpenGL_Texture_Buffer_Copy); | ||
| 538 | const auto& src_params = src_surface->GetSurfaceParams(); | 548 | const auto& src_params = src_surface->GetSurfaceParams(); |
| 539 | const auto& dst_params = dst_surface->GetSurfaceParams(); | 549 | const auto& dst_params = dst_surface->GetSurfaceParams(); |
| 540 | UNIMPLEMENTED_IF(src_params.num_levels > 1 || dst_params.num_levels > 1); | 550 | UNIMPLEMENTED_IF(src_params.num_levels > 1 || dst_params.num_levels > 1); |
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index ff6ab6988..21324488a 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h | |||
| @@ -51,7 +51,7 @@ public: | |||
| 51 | } | 51 | } |
| 52 | 52 | ||
| 53 | protected: | 53 | protected: |
| 54 | void DecorateSurfaceName(); | 54 | void DecorateSurfaceName() override; |
| 55 | 55 | ||
| 56 | View CreateView(const ViewParams& view_key) override; | 56 | View CreateView(const ViewParams& view_key) override; |
| 57 | View CreateViewInner(const ViewParams& view_key, bool is_proxy); | 57 | View CreateViewInner(const ViewParams& view_key, bool is_proxy); |
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index b142521ec..af9684839 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp | |||
| @@ -101,21 +101,19 @@ RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::Syst | |||
| 101 | 101 | ||
| 102 | RendererOpenGL::~RendererOpenGL() = default; | 102 | RendererOpenGL::~RendererOpenGL() = default; |
| 103 | 103 | ||
| 104 | /// Swap buffers (render frame) | 104 | void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { |
| 105 | void RendererOpenGL::SwapBuffers( | ||
| 106 | std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) { | ||
| 107 | |||
| 108 | system.GetPerfStats().EndSystemFrame(); | 105 | system.GetPerfStats().EndSystemFrame(); |
| 109 | 106 | ||
| 110 | // Maintain the rasterizer's state as a priority | 107 | // Maintain the rasterizer's state as a priority |
| 111 | OpenGLState prev_state = OpenGLState::GetCurState(); | 108 | OpenGLState prev_state = OpenGLState::GetCurState(); |
| 109 | state.AllDirty(); | ||
| 112 | state.Apply(); | 110 | state.Apply(); |
| 113 | 111 | ||
| 114 | if (framebuffer) { | 112 | if (framebuffer) { |
| 115 | // If framebuffer is provided, reload it from memory to a texture | 113 | // If framebuffer is provided, reload it from memory to a texture |
| 116 | if (screen_info.texture.width != (GLsizei)framebuffer->get().width || | 114 | if (screen_info.texture.width != static_cast<GLsizei>(framebuffer->width) || |
| 117 | screen_info.texture.height != (GLsizei)framebuffer->get().height || | 115 | screen_info.texture.height != static_cast<GLsizei>(framebuffer->height) || |
| 118 | screen_info.texture.pixel_format != framebuffer->get().pixel_format) { | 116 | screen_info.texture.pixel_format != framebuffer->pixel_format) { |
| 119 | // Reallocate texture if the framebuffer size has changed. | 117 | // Reallocate texture if the framebuffer size has changed. |
| 120 | // This is expected to not happen very often and hence should not be a | 118 | // This is expected to not happen very often and hence should not be a |
| 121 | // performance problem. | 119 | // performance problem. |
| @@ -130,6 +128,8 @@ void RendererOpenGL::SwapBuffers( | |||
| 130 | 128 | ||
| 131 | DrawScreen(render_window.GetFramebufferLayout()); | 129 | DrawScreen(render_window.GetFramebufferLayout()); |
| 132 | 130 | ||
| 131 | rasterizer->TickFrame(); | ||
| 132 | |||
| 133 | render_window.SwapBuffers(); | 133 | render_window.SwapBuffers(); |
| 134 | } | 134 | } |
| 135 | 135 | ||
| @@ -139,6 +139,7 @@ void RendererOpenGL::SwapBuffers( | |||
| 139 | system.GetPerfStats().BeginSystemFrame(); | 139 | system.GetPerfStats().BeginSystemFrame(); |
| 140 | 140 | ||
| 141 | // Restore the rasterizer state | 141 | // Restore the rasterizer state |
| 142 | prev_state.AllDirty(); | ||
| 142 | prev_state.Apply(); | 143 | prev_state.Apply(); |
| 143 | } | 144 | } |
| 144 | 145 | ||
| @@ -146,43 +147,43 @@ void RendererOpenGL::SwapBuffers( | |||
| 146 | * Loads framebuffer from emulated memory into the active OpenGL texture. | 147 | * Loads framebuffer from emulated memory into the active OpenGL texture. |
| 147 | */ | 148 | */ |
| 148 | void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuffer) { | 149 | void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuffer) { |
| 149 | const u32 bytes_per_pixel{Tegra::FramebufferConfig::BytesPerPixel(framebuffer.pixel_format)}; | ||
| 150 | const u64 size_in_bytes{framebuffer.stride * framebuffer.height * bytes_per_pixel}; | ||
| 151 | const VAddr framebuffer_addr{framebuffer.address + framebuffer.offset}; | ||
| 152 | |||
| 153 | // Framebuffer orientation handling | 150 | // Framebuffer orientation handling |
| 154 | framebuffer_transform_flags = framebuffer.transform_flags; | 151 | framebuffer_transform_flags = framebuffer.transform_flags; |
| 155 | framebuffer_crop_rect = framebuffer.crop_rect; | 152 | framebuffer_crop_rect = framebuffer.crop_rect; |
| 156 | 153 | ||
| 157 | // Ensure no bad interactions with GL_UNPACK_ALIGNMENT, which by default | 154 | const VAddr framebuffer_addr{framebuffer.address + framebuffer.offset}; |
| 158 | // only allows rows to have a memory alignement of 4. | 155 | if (rasterizer->AccelerateDisplay(framebuffer, framebuffer_addr, framebuffer.stride)) { |
| 159 | ASSERT(framebuffer.stride % 4 == 0); | 156 | return; |
| 160 | 157 | } | |
| 161 | if (!rasterizer->AccelerateDisplay(framebuffer, framebuffer_addr, framebuffer.stride)) { | ||
| 162 | // Reset the screen info's display texture to its own permanent texture | ||
| 163 | screen_info.display_texture = screen_info.texture.resource.handle; | ||
| 164 | |||
| 165 | rasterizer->FlushRegion(ToCacheAddr(Memory::GetPointer(framebuffer_addr)), size_in_bytes); | ||
| 166 | |||
| 167 | constexpr u32 linear_bpp = 4; | ||
| 168 | VideoCore::MortonCopyPixels128(VideoCore::MortonSwizzleMode::MortonToLinear, | ||
| 169 | framebuffer.width, framebuffer.height, bytes_per_pixel, | ||
| 170 | linear_bpp, Memory::GetPointer(framebuffer_addr), | ||
| 171 | gl_framebuffer_data.data()); | ||
| 172 | |||
| 173 | glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(framebuffer.stride)); | ||
| 174 | 158 | ||
| 175 | // Update existing texture | 159 | // Reset the screen info's display texture to its own permanent texture |
| 176 | // TODO: Test what happens on hardware when you change the framebuffer dimensions so that | 160 | screen_info.display_texture = screen_info.texture.resource.handle; |
| 177 | // they differ from the LCD resolution. | ||
| 178 | // TODO: Applications could theoretically crash yuzu here by specifying too large | ||
| 179 | // framebuffer sizes. We should make sure that this cannot happen. | ||
| 180 | glTextureSubImage2D(screen_info.texture.resource.handle, 0, 0, 0, framebuffer.width, | ||
| 181 | framebuffer.height, screen_info.texture.gl_format, | ||
| 182 | screen_info.texture.gl_type, gl_framebuffer_data.data()); | ||
| 183 | 161 | ||
| 184 | glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); | 162 | const auto pixel_format{ |
| 185 | } | 163 | VideoCore::Surface::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format)}; |
| 164 | const u32 bytes_per_pixel{VideoCore::Surface::GetBytesPerPixel(pixel_format)}; | ||
| 165 | const u64 size_in_bytes{framebuffer.stride * framebuffer.height * bytes_per_pixel}; | ||
| 166 | const auto host_ptr{Memory::GetPointer(framebuffer_addr)}; | ||
| 167 | rasterizer->FlushRegion(ToCacheAddr(host_ptr), size_in_bytes); | ||
| 168 | |||
| 169 | // TODO(Rodrigo): Read this from HLE | ||
| 170 | constexpr u32 block_height_log2 = 4; | ||
| 171 | VideoCore::MortonSwizzle(VideoCore::MortonSwizzleMode::MortonToLinear, pixel_format, | ||
| 172 | framebuffer.stride, block_height_log2, framebuffer.height, 0, 1, 1, | ||
| 173 | gl_framebuffer_data.data(), host_ptr); | ||
| 174 | |||
| 175 | glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(framebuffer.stride)); | ||
| 176 | |||
| 177 | // Update existing texture | ||
| 178 | // TODO: Test what happens on hardware when you change the framebuffer dimensions so that | ||
| 179 | // they differ from the LCD resolution. | ||
| 180 | // TODO: Applications could theoretically crash yuzu here by specifying too large | ||
| 181 | // framebuffer sizes. We should make sure that this cannot happen. | ||
| 182 | glTextureSubImage2D(screen_info.texture.resource.handle, 0, 0, 0, framebuffer.width, | ||
| 183 | framebuffer.height, screen_info.texture.gl_format, | ||
| 184 | screen_info.texture.gl_type, gl_framebuffer_data.data()); | ||
| 185 | |||
| 186 | glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); | ||
| 186 | } | 187 | } |
| 187 | 188 | ||
| 188 | /** | 189 | /** |
| @@ -205,6 +206,7 @@ void RendererOpenGL::InitOpenGLObjects() { | |||
| 205 | // Link shaders and get variable locations | 206 | // Link shaders and get variable locations |
| 206 | shader.CreateFromSource(vertex_shader, nullptr, fragment_shader); | 207 | shader.CreateFromSource(vertex_shader, nullptr, fragment_shader); |
| 207 | state.draw.shader_program = shader.handle; | 208 | state.draw.shader_program = shader.handle; |
| 209 | state.AllDirty(); | ||
| 208 | state.Apply(); | 210 | state.Apply(); |
| 209 | uniform_modelview_matrix = glGetUniformLocation(shader.handle, "modelview_matrix"); | 211 | uniform_modelview_matrix = glGetUniformLocation(shader.handle, "modelview_matrix"); |
| 210 | uniform_color_texture = glGetUniformLocation(shader.handle, "color_texture"); | 212 | uniform_color_texture = glGetUniformLocation(shader.handle, "color_texture"); |
| @@ -262,7 +264,6 @@ void RendererOpenGL::CreateRasterizer() { | |||
| 262 | if (rasterizer) { | 264 | if (rasterizer) { |
| 263 | return; | 265 | return; |
| 264 | } | 266 | } |
| 265 | // Initialize sRGB Usage | ||
| 266 | OpenGLState::ClearsRGBUsed(); | 267 | OpenGLState::ClearsRGBUsed(); |
| 267 | rasterizer = std::make_unique<RasterizerOpenGL>(system, emu_window, screen_info); | 268 | rasterizer = std::make_unique<RasterizerOpenGL>(system, emu_window, screen_info); |
| 268 | } | 269 | } |
| @@ -273,22 +274,29 @@ void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture, | |||
| 273 | texture.height = framebuffer.height; | 274 | texture.height = framebuffer.height; |
| 274 | texture.pixel_format = framebuffer.pixel_format; | 275 | texture.pixel_format = framebuffer.pixel_format; |
| 275 | 276 | ||
| 277 | const auto pixel_format{ | ||
| 278 | VideoCore::Surface::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format)}; | ||
| 279 | const u32 bytes_per_pixel{VideoCore::Surface::GetBytesPerPixel(pixel_format)}; | ||
| 280 | gl_framebuffer_data.resize(texture.width * texture.height * bytes_per_pixel); | ||
| 281 | |||
| 276 | GLint internal_format; | 282 | GLint internal_format; |
| 277 | switch (framebuffer.pixel_format) { | 283 | switch (framebuffer.pixel_format) { |
| 278 | case Tegra::FramebufferConfig::PixelFormat::ABGR8: | 284 | case Tegra::FramebufferConfig::PixelFormat::ABGR8: |
| 279 | internal_format = GL_RGBA8; | 285 | internal_format = GL_RGBA8; |
| 280 | texture.gl_format = GL_RGBA; | 286 | texture.gl_format = GL_RGBA; |
| 281 | texture.gl_type = GL_UNSIGNED_INT_8_8_8_8_REV; | 287 | texture.gl_type = GL_UNSIGNED_INT_8_8_8_8_REV; |
| 282 | gl_framebuffer_data.resize(texture.width * texture.height * 4); | 288 | break; |
| 289 | case Tegra::FramebufferConfig::PixelFormat::RGB565: | ||
| 290 | internal_format = GL_RGB565; | ||
| 291 | texture.gl_format = GL_RGB; | ||
| 292 | texture.gl_type = GL_UNSIGNED_SHORT_5_6_5; | ||
| 283 | break; | 293 | break; |
| 284 | default: | 294 | default: |
| 285 | internal_format = GL_RGBA8; | 295 | internal_format = GL_RGBA8; |
| 286 | texture.gl_format = GL_RGBA; | 296 | texture.gl_format = GL_RGBA; |
| 287 | texture.gl_type = GL_UNSIGNED_INT_8_8_8_8_REV; | 297 | texture.gl_type = GL_UNSIGNED_INT_8_8_8_8_REV; |
| 288 | gl_framebuffer_data.resize(texture.width * texture.height * 4); | 298 | UNIMPLEMENTED_MSG("Unknown framebuffer pixel format: {}", |
| 289 | LOG_CRITICAL(Render_OpenGL, "Unknown framebuffer pixel format: {}", | 299 | static_cast<u32>(framebuffer.pixel_format)); |
| 290 | static_cast<u32>(framebuffer.pixel_format)); | ||
| 291 | UNREACHABLE(); | ||
| 292 | } | 300 | } |
| 293 | 301 | ||
| 294 | texture.resource.Release(); | 302 | texture.resource.Release(); |
| @@ -338,12 +346,14 @@ void RendererOpenGL::DrawScreenTriangles(const ScreenInfo& screen_info, float x, | |||
| 338 | // Workaround brigthness problems in SMO by enabling sRGB in the final output | 346 | // Workaround brigthness problems in SMO by enabling sRGB in the final output |
| 339 | // if it has been used in the frame. Needed because of this bug in QT: QTBUG-50987 | 347 | // if it has been used in the frame. Needed because of this bug in QT: QTBUG-50987 |
| 340 | state.framebuffer_srgb.enabled = OpenGLState::GetsRGBUsed(); | 348 | state.framebuffer_srgb.enabled = OpenGLState::GetsRGBUsed(); |
| 349 | state.AllDirty(); | ||
| 341 | state.Apply(); | 350 | state.Apply(); |
| 342 | glNamedBufferSubData(vertex_buffer.handle, 0, sizeof(vertices), vertices.data()); | 351 | glNamedBufferSubData(vertex_buffer.handle, 0, sizeof(vertices), vertices.data()); |
| 343 | glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); | 352 | glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); |
| 344 | // Restore default state | 353 | // Restore default state |
| 345 | state.framebuffer_srgb.enabled = false; | 354 | state.framebuffer_srgb.enabled = false; |
| 346 | state.texture_units[0].texture = 0; | 355 | state.texture_units[0].texture = 0; |
| 356 | state.AllDirty(); | ||
| 347 | state.Apply(); | 357 | state.Apply(); |
| 348 | // Clear sRGB state for the next frame | 358 | // Clear sRGB state for the next frame |
| 349 | OpenGLState::ClearsRGBUsed(); | 359 | OpenGLState::ClearsRGBUsed(); |
| @@ -388,6 +398,7 @@ void RendererOpenGL::CaptureScreenshot() { | |||
| 388 | GLuint old_read_fb = state.draw.read_framebuffer; | 398 | GLuint old_read_fb = state.draw.read_framebuffer; |
| 389 | GLuint old_draw_fb = state.draw.draw_framebuffer; | 399 | GLuint old_draw_fb = state.draw.draw_framebuffer; |
| 390 | state.draw.read_framebuffer = state.draw.draw_framebuffer = screenshot_framebuffer.handle; | 400 | state.draw.read_framebuffer = state.draw.draw_framebuffer = screenshot_framebuffer.handle; |
| 401 | state.AllDirty(); | ||
| 391 | state.Apply(); | 402 | state.Apply(); |
| 392 | 403 | ||
| 393 | Layout::FramebufferLayout layout{renderer_settings.screenshot_framebuffer_layout}; | 404 | Layout::FramebufferLayout layout{renderer_settings.screenshot_framebuffer_layout}; |
| @@ -407,6 +418,7 @@ void RendererOpenGL::CaptureScreenshot() { | |||
| 407 | screenshot_framebuffer.Release(); | 418 | screenshot_framebuffer.Release(); |
| 408 | state.draw.read_framebuffer = old_read_fb; | 419 | state.draw.read_framebuffer = old_read_fb; |
| 409 | state.draw.draw_framebuffer = old_draw_fb; | 420 | state.draw.draw_framebuffer = old_draw_fb; |
| 421 | state.AllDirty(); | ||
| 410 | state.Apply(); | 422 | state.Apply(); |
| 411 | glDeleteRenderbuffers(1, &renderbuffer); | 423 | glDeleteRenderbuffers(1, &renderbuffer); |
| 412 | 424 | ||
diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h index 4aebf2321..9bd086368 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.h +++ b/src/video_core/renderer_opengl/renderer_opengl.h | |||
| @@ -43,14 +43,13 @@ struct ScreenInfo { | |||
| 43 | TextureInfo texture; | 43 | TextureInfo texture; |
| 44 | }; | 44 | }; |
| 45 | 45 | ||
| 46 | class RendererOpenGL : public VideoCore::RendererBase { | 46 | class RendererOpenGL final : public VideoCore::RendererBase { |
| 47 | public: | 47 | public: |
| 48 | explicit RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system); | 48 | explicit RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system); |
| 49 | ~RendererOpenGL() override; | 49 | ~RendererOpenGL() override; |
| 50 | 50 | ||
| 51 | /// Swap buffers (render frame) | 51 | /// Swap buffers (render frame) |
| 52 | void SwapBuffers( | 52 | void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override; |
| 53 | std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) override; | ||
| 54 | 53 | ||
| 55 | /// Initialize the renderer | 54 | /// Initialize the renderer |
| 56 | bool Init() override; | 55 | bool Init() override; |
diff --git a/src/video_core/renderer_opengl/utils.cpp b/src/video_core/renderer_opengl/utils.cpp index 68c36988d..c504a2c1a 100644 --- a/src/video_core/renderer_opengl/utils.cpp +++ b/src/video_core/renderer_opengl/utils.cpp | |||
| @@ -13,29 +13,67 @@ | |||
| 13 | 13 | ||
| 14 | namespace OpenGL { | 14 | namespace OpenGL { |
| 15 | 15 | ||
| 16 | VertexArrayPushBuffer::VertexArrayPushBuffer() = default; | ||
| 17 | |||
| 18 | VertexArrayPushBuffer::~VertexArrayPushBuffer() = default; | ||
| 19 | |||
| 20 | void VertexArrayPushBuffer::Setup(GLuint vao_) { | ||
| 21 | vao = vao_; | ||
| 22 | index_buffer = nullptr; | ||
| 23 | vertex_buffers.clear(); | ||
| 24 | } | ||
| 25 | |||
| 26 | void VertexArrayPushBuffer::SetIndexBuffer(const GLuint* buffer) { | ||
| 27 | index_buffer = buffer; | ||
| 28 | } | ||
| 29 | |||
| 30 | void VertexArrayPushBuffer::SetVertexBuffer(GLuint binding_index, const GLuint* buffer, | ||
| 31 | GLintptr offset, GLsizei stride) { | ||
| 32 | vertex_buffers.push_back(Entry{binding_index, buffer, offset, stride}); | ||
| 33 | } | ||
| 34 | |||
| 35 | void VertexArrayPushBuffer::Bind() { | ||
| 36 | if (index_buffer) { | ||
| 37 | glVertexArrayElementBuffer(vao, *index_buffer); | ||
| 38 | } | ||
| 39 | |||
| 40 | // TODO(Rodrigo): Find a way to ARB_multi_bind this | ||
| 41 | for (const auto& entry : vertex_buffers) { | ||
| 42 | glVertexArrayVertexBuffer(vao, entry.binding_index, *entry.buffer, entry.offset, | ||
| 43 | entry.stride); | ||
| 44 | } | ||
| 45 | } | ||
| 46 | |||
| 16 | BindBuffersRangePushBuffer::BindBuffersRangePushBuffer(GLenum target) : target{target} {} | 47 | BindBuffersRangePushBuffer::BindBuffersRangePushBuffer(GLenum target) : target{target} {} |
| 17 | 48 | ||
| 18 | BindBuffersRangePushBuffer::~BindBuffersRangePushBuffer() = default; | 49 | BindBuffersRangePushBuffer::~BindBuffersRangePushBuffer() = default; |
| 19 | 50 | ||
| 20 | void BindBuffersRangePushBuffer::Setup(GLuint first_) { | 51 | void BindBuffersRangePushBuffer::Setup(GLuint first_) { |
| 21 | first = first_; | 52 | first = first_; |
| 22 | buffers.clear(); | 53 | buffer_pointers.clear(); |
| 23 | offsets.clear(); | 54 | offsets.clear(); |
| 24 | sizes.clear(); | 55 | sizes.clear(); |
| 25 | } | 56 | } |
| 26 | 57 | ||
| 27 | void BindBuffersRangePushBuffer::Push(GLuint buffer, GLintptr offset, GLsizeiptr size) { | 58 | void BindBuffersRangePushBuffer::Push(const GLuint* buffer, GLintptr offset, GLsizeiptr size) { |
| 28 | buffers.push_back(buffer); | 59 | buffer_pointers.push_back(buffer); |
| 29 | offsets.push_back(offset); | 60 | offsets.push_back(offset); |
| 30 | sizes.push_back(size); | 61 | sizes.push_back(size); |
| 31 | } | 62 | } |
| 32 | 63 | ||
| 33 | void BindBuffersRangePushBuffer::Bind() const { | 64 | void BindBuffersRangePushBuffer::Bind() { |
| 34 | const std::size_t count{buffers.size()}; | 65 | // Ensure sizes are valid. |
| 66 | const std::size_t count{buffer_pointers.size()}; | ||
| 35 | DEBUG_ASSERT(count == offsets.size() && count == sizes.size()); | 67 | DEBUG_ASSERT(count == offsets.size() && count == sizes.size()); |
| 36 | if (count == 0) { | 68 | if (count == 0) { |
| 37 | return; | 69 | return; |
| 38 | } | 70 | } |
| 71 | |||
| 72 | // Dereference buffers. | ||
| 73 | buffers.resize(count); | ||
| 74 | std::transform(buffer_pointers.begin(), buffer_pointers.end(), buffers.begin(), | ||
| 75 | [](const GLuint* pointer) { return *pointer; }); | ||
| 76 | |||
| 39 | glBindBuffersRange(target, first, static_cast<GLsizei>(count), buffers.data(), offsets.data(), | 77 | glBindBuffersRange(target, first, static_cast<GLsizei>(count), buffers.data(), offsets.data(), |
| 40 | sizes.data()); | 78 | sizes.data()); |
| 41 | } | 79 | } |
diff --git a/src/video_core/renderer_opengl/utils.h b/src/video_core/renderer_opengl/utils.h index 4a752f3b4..6c2b45546 100644 --- a/src/video_core/renderer_opengl/utils.h +++ b/src/video_core/renderer_opengl/utils.h | |||
| @@ -11,20 +11,49 @@ | |||
| 11 | 11 | ||
| 12 | namespace OpenGL { | 12 | namespace OpenGL { |
| 13 | 13 | ||
| 14 | class BindBuffersRangePushBuffer { | 14 | class VertexArrayPushBuffer final { |
| 15 | public: | 15 | public: |
| 16 | BindBuffersRangePushBuffer(GLenum target); | 16 | explicit VertexArrayPushBuffer(); |
| 17 | ~VertexArrayPushBuffer(); | ||
| 18 | |||
| 19 | void Setup(GLuint vao_); | ||
| 20 | |||
| 21 | void SetIndexBuffer(const GLuint* buffer); | ||
| 22 | |||
| 23 | void SetVertexBuffer(GLuint binding_index, const GLuint* buffer, GLintptr offset, | ||
| 24 | GLsizei stride); | ||
| 25 | |||
| 26 | void Bind(); | ||
| 27 | |||
| 28 | private: | ||
| 29 | struct Entry { | ||
| 30 | GLuint binding_index{}; | ||
| 31 | const GLuint* buffer{}; | ||
| 32 | GLintptr offset{}; | ||
| 33 | GLsizei stride{}; | ||
| 34 | }; | ||
| 35 | |||
| 36 | GLuint vao{}; | ||
| 37 | const GLuint* index_buffer{}; | ||
| 38 | std::vector<Entry> vertex_buffers; | ||
| 39 | }; | ||
| 40 | |||
| 41 | class BindBuffersRangePushBuffer final { | ||
| 42 | public: | ||
| 43 | explicit BindBuffersRangePushBuffer(GLenum target); | ||
| 17 | ~BindBuffersRangePushBuffer(); | 44 | ~BindBuffersRangePushBuffer(); |
| 18 | 45 | ||
| 19 | void Setup(GLuint first_); | 46 | void Setup(GLuint first_); |
| 20 | 47 | ||
| 21 | void Push(GLuint buffer, GLintptr offset, GLsizeiptr size); | 48 | void Push(const GLuint* buffer, GLintptr offset, GLsizeiptr size); |
| 22 | 49 | ||
| 23 | void Bind() const; | 50 | void Bind(); |
| 24 | 51 | ||
| 25 | private: | 52 | private: |
| 26 | GLenum target; | 53 | GLenum target{}; |
| 27 | GLuint first; | 54 | GLuint first{}; |
| 55 | std::vector<const GLuint*> buffer_pointers; | ||
| 56 | |||
| 28 | std::vector<GLuint> buffers; | 57 | std::vector<GLuint> buffers; |
| 29 | std::vector<GLintptr> offsets; | 58 | std::vector<GLintptr> offsets; |
| 30 | std::vector<GLsizeiptr> sizes; | 59 | std::vector<GLsizeiptr> sizes; |
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index 02a9f5ecb..d2e9f4031 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp | |||
| @@ -109,8 +109,8 @@ void VKBufferCache::Reserve(std::size_t max_size) { | |||
| 109 | } | 109 | } |
| 110 | } | 110 | } |
| 111 | 111 | ||
| 112 | VKExecutionContext VKBufferCache::Send(VKExecutionContext exctx) { | 112 | void VKBufferCache::Send() { |
| 113 | return stream_buffer->Send(exctx, buffer_offset - buffer_offset_base); | 113 | stream_buffer->Send(buffer_offset - buffer_offset_base); |
| 114 | } | 114 | } |
| 115 | 115 | ||
| 116 | void VKBufferCache::AlignBuffer(std::size_t alignment) { | 116 | void VKBufferCache::AlignBuffer(std::size_t alignment) { |
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h index 3edf460df..49f13bcdc 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.h +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h | |||
| @@ -77,7 +77,7 @@ public: | |||
| 77 | void Reserve(std::size_t max_size); | 77 | void Reserve(std::size_t max_size); |
| 78 | 78 | ||
| 79 | /// Ensures that the set data is sent to the device. | 79 | /// Ensures that the set data is sent to the device. |
| 80 | [[nodiscard]] VKExecutionContext Send(VKExecutionContext exctx); | 80 | void Send(); |
| 81 | 81 | ||
| 82 | /// Returns the buffer cache handle. | 82 | /// Returns the buffer cache handle. |
| 83 | vk::Buffer GetBuffer() const { | 83 | vk::Buffer GetBuffer() const { |
diff --git a/src/video_core/renderer_vulkan/vk_sampler_cache.h b/src/video_core/renderer_vulkan/vk_sampler_cache.h index 771b05c73..1f73b716b 100644 --- a/src/video_core/renderer_vulkan/vk_sampler_cache.h +++ b/src/video_core/renderer_vulkan/vk_sampler_cache.h | |||
| @@ -4,9 +4,6 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <unordered_map> | ||
| 8 | |||
| 9 | #include "common/common_types.h" | ||
| 10 | #include "video_core/renderer_vulkan/declarations.h" | 7 | #include "video_core/renderer_vulkan/declarations.h" |
| 11 | #include "video_core/sampler_cache.h" | 8 | #include "video_core/sampler_cache.h" |
| 12 | #include "video_core/textures/texture.h" | 9 | #include "video_core/textures/texture.h" |
| @@ -21,9 +18,9 @@ public: | |||
| 21 | ~VKSamplerCache(); | 18 | ~VKSamplerCache(); |
| 22 | 19 | ||
| 23 | protected: | 20 | protected: |
| 24 | UniqueSampler CreateSampler(const Tegra::Texture::TSCEntry& tsc) const; | 21 | UniqueSampler CreateSampler(const Tegra::Texture::TSCEntry& tsc) const override; |
| 25 | 22 | ||
| 26 | vk::Sampler ToSamplerType(const UniqueSampler& sampler) const; | 23 | vk::Sampler ToSamplerType(const UniqueSampler& sampler) const override; |
| 27 | 24 | ||
| 28 | private: | 25 | private: |
| 29 | const VKDevice& device; | 26 | const VKDevice& device; |
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index f1fea1871..0f8116458 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp | |||
| @@ -19,23 +19,19 @@ VKScheduler::VKScheduler(const VKDevice& device, VKResourceManager& resource_man | |||
| 19 | 19 | ||
| 20 | VKScheduler::~VKScheduler() = default; | 20 | VKScheduler::~VKScheduler() = default; |
| 21 | 21 | ||
| 22 | VKExecutionContext VKScheduler::GetExecutionContext() const { | 22 | void VKScheduler::Flush(bool release_fence, vk::Semaphore semaphore) { |
| 23 | return VKExecutionContext(current_fence, current_cmdbuf); | ||
| 24 | } | ||
| 25 | |||
| 26 | VKExecutionContext VKScheduler::Flush(vk::Semaphore semaphore) { | ||
| 27 | SubmitExecution(semaphore); | 23 | SubmitExecution(semaphore); |
| 28 | current_fence->Release(); | 24 | if (release_fence) |
| 25 | current_fence->Release(); | ||
| 29 | AllocateNewContext(); | 26 | AllocateNewContext(); |
| 30 | return GetExecutionContext(); | ||
| 31 | } | 27 | } |
| 32 | 28 | ||
| 33 | VKExecutionContext VKScheduler::Finish(vk::Semaphore semaphore) { | 29 | void VKScheduler::Finish(bool release_fence, vk::Semaphore semaphore) { |
| 34 | SubmitExecution(semaphore); | 30 | SubmitExecution(semaphore); |
| 35 | current_fence->Wait(); | 31 | current_fence->Wait(); |
| 36 | current_fence->Release(); | 32 | if (release_fence) |
| 33 | current_fence->Release(); | ||
| 37 | AllocateNewContext(); | 34 | AllocateNewContext(); |
| 38 | return GetExecutionContext(); | ||
| 39 | } | 35 | } |
| 40 | 36 | ||
| 41 | void VKScheduler::SubmitExecution(vk::Semaphore semaphore) { | 37 | void VKScheduler::SubmitExecution(vk::Semaphore semaphore) { |
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h index cfaf5376f..0e5b49c7f 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.h +++ b/src/video_core/renderer_vulkan/vk_scheduler.h | |||
| @@ -10,10 +10,43 @@ | |||
| 10 | namespace Vulkan { | 10 | namespace Vulkan { |
| 11 | 11 | ||
| 12 | class VKDevice; | 12 | class VKDevice; |
| 13 | class VKExecutionContext; | ||
| 14 | class VKFence; | 13 | class VKFence; |
| 15 | class VKResourceManager; | 14 | class VKResourceManager; |
| 16 | 15 | ||
| 16 | class VKFenceView { | ||
| 17 | public: | ||
| 18 | VKFenceView() = default; | ||
| 19 | VKFenceView(VKFence* const& fence) : fence{fence} {} | ||
| 20 | |||
| 21 | VKFence* operator->() const noexcept { | ||
| 22 | return fence; | ||
| 23 | } | ||
| 24 | |||
| 25 | operator VKFence&() const noexcept { | ||
| 26 | return *fence; | ||
| 27 | } | ||
| 28 | |||
| 29 | private: | ||
| 30 | VKFence* const& fence; | ||
| 31 | }; | ||
| 32 | |||
| 33 | class VKCommandBufferView { | ||
| 34 | public: | ||
| 35 | VKCommandBufferView() = default; | ||
| 36 | VKCommandBufferView(const vk::CommandBuffer& cmdbuf) : cmdbuf{cmdbuf} {} | ||
| 37 | |||
| 38 | const vk::CommandBuffer* operator->() const noexcept { | ||
| 39 | return &cmdbuf; | ||
| 40 | } | ||
| 41 | |||
| 42 | operator vk::CommandBuffer() const noexcept { | ||
| 43 | return cmdbuf; | ||
| 44 | } | ||
| 45 | |||
| 46 | private: | ||
| 47 | const vk::CommandBuffer& cmdbuf; | ||
| 48 | }; | ||
| 49 | |||
| 17 | /// The scheduler abstracts command buffer and fence management with an interface that's able to do | 50 | /// The scheduler abstracts command buffer and fence management with an interface that's able to do |
| 18 | /// OpenGL-like operations on Vulkan command buffers. | 51 | /// OpenGL-like operations on Vulkan command buffers. |
| 19 | class VKScheduler { | 52 | class VKScheduler { |
| @@ -21,16 +54,21 @@ public: | |||
| 21 | explicit VKScheduler(const VKDevice& device, VKResourceManager& resource_manager); | 54 | explicit VKScheduler(const VKDevice& device, VKResourceManager& resource_manager); |
| 22 | ~VKScheduler(); | 55 | ~VKScheduler(); |
| 23 | 56 | ||
| 24 | /// Gets the current execution context. | 57 | /// Gets a reference to the current fence. |
| 25 | [[nodiscard]] VKExecutionContext GetExecutionContext() const; | 58 | VKFenceView GetFence() const { |
| 59 | return current_fence; | ||
| 60 | } | ||
| 61 | |||
| 62 | /// Gets a reference to the current command buffer. | ||
| 63 | VKCommandBufferView GetCommandBuffer() const { | ||
| 64 | return current_cmdbuf; | ||
| 65 | } | ||
| 26 | 66 | ||
| 27 | /// Sends the current execution context to the GPU. It invalidates the current execution context | 67 | /// Sends the current execution context to the GPU. |
| 28 | /// and returns a new one. | 68 | void Flush(bool release_fence = true, vk::Semaphore semaphore = nullptr); |
| 29 | VKExecutionContext Flush(vk::Semaphore semaphore = nullptr); | ||
| 30 | 69 | ||
| 31 | /// Sends the current execution context to the GPU and waits for it to complete. It invalidates | 70 | /// Sends the current execution context to the GPU and waits for it to complete. |
| 32 | /// the current execution context and returns a new one. | 71 | void Finish(bool release_fence = true, vk::Semaphore semaphore = nullptr); |
| 33 | VKExecutionContext Finish(vk::Semaphore semaphore = nullptr); | ||
| 34 | 72 | ||
| 35 | private: | 73 | private: |
| 36 | void SubmitExecution(vk::Semaphore semaphore); | 74 | void SubmitExecution(vk::Semaphore semaphore); |
| @@ -44,26 +82,4 @@ private: | |||
| 44 | VKFence* next_fence = nullptr; | 82 | VKFence* next_fence = nullptr; |
| 45 | }; | 83 | }; |
| 46 | 84 | ||
| 47 | class VKExecutionContext { | ||
| 48 | friend class VKScheduler; | ||
| 49 | |||
| 50 | public: | ||
| 51 | VKExecutionContext() = default; | ||
| 52 | |||
| 53 | VKFence& GetFence() const { | ||
| 54 | return *fence; | ||
| 55 | } | ||
| 56 | |||
| 57 | vk::CommandBuffer GetCommandBuffer() const { | ||
| 58 | return cmdbuf; | ||
| 59 | } | ||
| 60 | |||
| 61 | private: | ||
| 62 | explicit VKExecutionContext(VKFence* fence, vk::CommandBuffer cmdbuf) | ||
| 63 | : fence{fence}, cmdbuf{cmdbuf} {} | ||
| 64 | |||
| 65 | VKFence* fence{}; | ||
| 66 | vk::CommandBuffer cmdbuf; | ||
| 67 | }; | ||
| 68 | |||
| 69 | } // namespace Vulkan | 85 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index 97ce214b1..a35b45c9c 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp | |||
| @@ -205,10 +205,6 @@ public: | |||
| 205 | } | 205 | } |
| 206 | 206 | ||
| 207 | private: | 207 | private: |
| 208 | using OperationDecompilerFn = Id (SPIRVDecompiler::*)(Operation); | ||
| 209 | using OperationDecompilersArray = | ||
| 210 | std::array<OperationDecompilerFn, static_cast<std::size_t>(OperationCode::Amount)>; | ||
| 211 | |||
| 212 | static constexpr auto INTERNAL_FLAGS_COUNT = static_cast<std::size_t>(InternalFlag::Amount); | 208 | static constexpr auto INTERNAL_FLAGS_COUNT = static_cast<std::size_t>(InternalFlag::Amount); |
| 213 | 209 | ||
| 214 | void AllocateBindings() { | 210 | void AllocateBindings() { |
| @@ -430,20 +426,17 @@ private: | |||
| 430 | instance_index = DeclareBuiltIn(spv::BuiltIn::InstanceIndex, spv::StorageClass::Input, | 426 | instance_index = DeclareBuiltIn(spv::BuiltIn::InstanceIndex, spv::StorageClass::Input, |
| 431 | t_in_uint, "instance_index"); | 427 | t_in_uint, "instance_index"); |
| 432 | 428 | ||
| 433 | bool is_point_size_declared = false; | ||
| 434 | bool is_clip_distances_declared = false; | 429 | bool is_clip_distances_declared = false; |
| 435 | for (const auto index : ir.GetOutputAttributes()) { | 430 | for (const auto index : ir.GetOutputAttributes()) { |
| 436 | if (index == Attribute::Index::PointSize) { | 431 | if (index == Attribute::Index::ClipDistances0123 || |
| 437 | is_point_size_declared = true; | 432 | index == Attribute::Index::ClipDistances4567) { |
| 438 | } else if (index == Attribute::Index::ClipDistances0123 || | ||
| 439 | index == Attribute::Index::ClipDistances4567) { | ||
| 440 | is_clip_distances_declared = true; | 433 | is_clip_distances_declared = true; |
| 441 | } | 434 | } |
| 442 | } | 435 | } |
| 443 | 436 | ||
| 444 | std::vector<Id> members; | 437 | std::vector<Id> members; |
| 445 | members.push_back(t_float4); | 438 | members.push_back(t_float4); |
| 446 | if (is_point_size_declared) { | 439 | if (ir.UsesPointSize()) { |
| 447 | members.push_back(t_float); | 440 | members.push_back(t_float); |
| 448 | } | 441 | } |
| 449 | if (is_clip_distances_declared) { | 442 | if (is_clip_distances_declared) { |
| @@ -466,7 +459,7 @@ private: | |||
| 466 | 459 | ||
| 467 | position_index = MemberDecorateBuiltIn(spv::BuiltIn::Position, "position", true); | 460 | position_index = MemberDecorateBuiltIn(spv::BuiltIn::Position, "position", true); |
| 468 | point_size_index = | 461 | point_size_index = |
| 469 | MemberDecorateBuiltIn(spv::BuiltIn::PointSize, "point_size", is_point_size_declared); | 462 | MemberDecorateBuiltIn(spv::BuiltIn::PointSize, "point_size", ir.UsesPointSize()); |
| 470 | clip_distances_index = MemberDecorateBuiltIn(spv::BuiltIn::ClipDistance, "clip_distances", | 463 | clip_distances_index = MemberDecorateBuiltIn(spv::BuiltIn::ClipDistance, "clip_distances", |
| 471 | is_clip_distances_declared); | 464 | is_clip_distances_declared); |
| 472 | 465 | ||
| @@ -712,7 +705,8 @@ private: | |||
| 712 | case Attribute::Index::Position: | 705 | case Attribute::Index::Position: |
| 713 | return AccessElement(t_out_float, per_vertex, position_index, | 706 | return AccessElement(t_out_float, per_vertex, position_index, |
| 714 | abuf->GetElement()); | 707 | abuf->GetElement()); |
| 715 | case Attribute::Index::PointSize: | 708 | case Attribute::Index::LayerViewportPointSize: |
| 709 | UNIMPLEMENTED_IF(abuf->GetElement() != 3); | ||
| 716 | return AccessElement(t_out_float, per_vertex, point_size_index); | 710 | return AccessElement(t_out_float, per_vertex, point_size_index); |
| 717 | case Attribute::Index::ClipDistances0123: | 711 | case Attribute::Index::ClipDistances0123: |
| 718 | return AccessElement(t_out_float, per_vertex, clip_distances_index, | 712 | return AccessElement(t_out_float, per_vertex, clip_distances_index, |
| @@ -741,6 +735,16 @@ private: | |||
| 741 | return {}; | 735 | return {}; |
| 742 | } | 736 | } |
| 743 | 737 | ||
| 738 | Id FCastHalf0(Operation operation) { | ||
| 739 | UNIMPLEMENTED(); | ||
| 740 | return {}; | ||
| 741 | } | ||
| 742 | |||
| 743 | Id FCastHalf1(Operation operation) { | ||
| 744 | UNIMPLEMENTED(); | ||
| 745 | return {}; | ||
| 746 | } | ||
| 747 | |||
| 744 | Id HNegate(Operation operation) { | 748 | Id HNegate(Operation operation) { |
| 745 | UNIMPLEMENTED(); | 749 | UNIMPLEMENTED(); |
| 746 | return {}; | 750 | return {}; |
| @@ -751,6 +755,11 @@ private: | |||
| 751 | return {}; | 755 | return {}; |
| 752 | } | 756 | } |
| 753 | 757 | ||
| 758 | Id HCastFloat(Operation operation) { | ||
| 759 | UNIMPLEMENTED(); | ||
| 760 | return {}; | ||
| 761 | } | ||
| 762 | |||
| 754 | Id HUnpack(Operation operation) { | 763 | Id HUnpack(Operation operation) { |
| 755 | UNIMPLEMENTED(); | 764 | UNIMPLEMENTED(); |
| 756 | return {}; | 765 | return {}; |
| @@ -806,12 +815,7 @@ private: | |||
| 806 | return {}; | 815 | return {}; |
| 807 | } | 816 | } |
| 808 | 817 | ||
| 809 | Id LogicalAll2(Operation operation) { | 818 | Id LogicalAnd2(Operation operation) { |
| 810 | UNIMPLEMENTED(); | ||
| 811 | return {}; | ||
| 812 | } | ||
| 813 | |||
| 814 | Id LogicalAny2(Operation operation) { | ||
| 815 | UNIMPLEMENTED(); | 819 | UNIMPLEMENTED(); |
| 816 | return {}; | 820 | return {}; |
| 817 | } | 821 | } |
| @@ -949,6 +953,14 @@ private: | |||
| 949 | return {}; | 953 | return {}; |
| 950 | } | 954 | } |
| 951 | 955 | ||
| 956 | Id BranchIndirect(Operation operation) { | ||
| 957 | const Id op_a = VisitOperand<Type::Uint>(operation, 0); | ||
| 958 | |||
| 959 | Emit(OpStore(jmp_to, op_a)); | ||
| 960 | BranchingOp([&]() { Emit(OpBranch(continue_label)); }); | ||
| 961 | return {}; | ||
| 962 | } | ||
| 963 | |||
| 952 | Id PushFlowStack(Operation operation) { | 964 | Id PushFlowStack(Operation operation) { |
| 953 | const auto target = std::get_if<ImmediateNode>(&*operation[0]); | 965 | const auto target = std::get_if<ImmediateNode>(&*operation[0]); |
| 954 | ASSERT(target); | 966 | ASSERT(target); |
| @@ -1060,6 +1072,26 @@ private: | |||
| 1060 | return {}; | 1072 | return {}; |
| 1061 | } | 1073 | } |
| 1062 | 1074 | ||
| 1075 | Id BallotThread(Operation) { | ||
| 1076 | UNIMPLEMENTED(); | ||
| 1077 | return {}; | ||
| 1078 | } | ||
| 1079 | |||
| 1080 | Id VoteAll(Operation) { | ||
| 1081 | UNIMPLEMENTED(); | ||
| 1082 | return {}; | ||
| 1083 | } | ||
| 1084 | |||
| 1085 | Id VoteAny(Operation) { | ||
| 1086 | UNIMPLEMENTED(); | ||
| 1087 | return {}; | ||
| 1088 | } | ||
| 1089 | |||
| 1090 | Id VoteEqual(Operation) { | ||
| 1091 | UNIMPLEMENTED(); | ||
| 1092 | return {}; | ||
| 1093 | } | ||
| 1094 | |||
| 1063 | Id DeclareBuiltIn(spv::BuiltIn builtin, spv::StorageClass storage, Id type, | 1095 | Id DeclareBuiltIn(spv::BuiltIn builtin, spv::StorageClass storage, Id type, |
| 1064 | const std::string& name) { | 1096 | const std::string& name) { |
| 1065 | const Id id = OpVariable(type, storage); | 1097 | const Id id = OpVariable(type, storage); |
| @@ -1200,7 +1232,7 @@ private: | |||
| 1200 | return {}; | 1232 | return {}; |
| 1201 | } | 1233 | } |
| 1202 | 1234 | ||
| 1203 | static constexpr OperationDecompilersArray operation_decompilers = { | 1235 | static constexpr std::array operation_decompilers = { |
| 1204 | &SPIRVDecompiler::Assign, | 1236 | &SPIRVDecompiler::Assign, |
| 1205 | 1237 | ||
| 1206 | &SPIRVDecompiler::Ternary<&Module::OpSelect, Type::Float, Type::Bool, Type::Float, | 1238 | &SPIRVDecompiler::Ternary<&Module::OpSelect, Type::Float, Type::Bool, Type::Float, |
| @@ -1213,6 +1245,8 @@ private: | |||
| 1213 | &SPIRVDecompiler::Unary<&Module::OpFNegate, Type::Float>, | 1245 | &SPIRVDecompiler::Unary<&Module::OpFNegate, Type::Float>, |
| 1214 | &SPIRVDecompiler::Unary<&Module::OpFAbs, Type::Float>, | 1246 | &SPIRVDecompiler::Unary<&Module::OpFAbs, Type::Float>, |
| 1215 | &SPIRVDecompiler::Ternary<&Module::OpFClamp, Type::Float>, | 1247 | &SPIRVDecompiler::Ternary<&Module::OpFClamp, Type::Float>, |
| 1248 | &SPIRVDecompiler::FCastHalf0, | ||
| 1249 | &SPIRVDecompiler::FCastHalf1, | ||
| 1216 | &SPIRVDecompiler::Binary<&Module::OpFMin, Type::Float>, | 1250 | &SPIRVDecompiler::Binary<&Module::OpFMin, Type::Float>, |
| 1217 | &SPIRVDecompiler::Binary<&Module::OpFMax, Type::Float>, | 1251 | &SPIRVDecompiler::Binary<&Module::OpFMax, Type::Float>, |
| 1218 | &SPIRVDecompiler::Unary<&Module::OpCos, Type::Float>, | 1252 | &SPIRVDecompiler::Unary<&Module::OpCos, Type::Float>, |
| @@ -1273,6 +1307,7 @@ private: | |||
| 1273 | &SPIRVDecompiler::Unary<&Module::OpFAbs, Type::HalfFloat>, | 1307 | &SPIRVDecompiler::Unary<&Module::OpFAbs, Type::HalfFloat>, |
| 1274 | &SPIRVDecompiler::HNegate, | 1308 | &SPIRVDecompiler::HNegate, |
| 1275 | &SPIRVDecompiler::HClamp, | 1309 | &SPIRVDecompiler::HClamp, |
| 1310 | &SPIRVDecompiler::HCastFloat, | ||
| 1276 | &SPIRVDecompiler::HUnpack, | 1311 | &SPIRVDecompiler::HUnpack, |
| 1277 | &SPIRVDecompiler::HMergeF32, | 1312 | &SPIRVDecompiler::HMergeF32, |
| 1278 | &SPIRVDecompiler::HMergeH0, | 1313 | &SPIRVDecompiler::HMergeH0, |
| @@ -1285,8 +1320,7 @@ private: | |||
| 1285 | &SPIRVDecompiler::Binary<&Module::OpLogicalNotEqual, Type::Bool>, | 1320 | &SPIRVDecompiler::Binary<&Module::OpLogicalNotEqual, Type::Bool>, |
| 1286 | &SPIRVDecompiler::Unary<&Module::OpLogicalNot, Type::Bool>, | 1321 | &SPIRVDecompiler::Unary<&Module::OpLogicalNot, Type::Bool>, |
| 1287 | &SPIRVDecompiler::LogicalPick2, | 1322 | &SPIRVDecompiler::LogicalPick2, |
| 1288 | &SPIRVDecompiler::LogicalAll2, | 1323 | &SPIRVDecompiler::LogicalAnd2, |
| 1289 | &SPIRVDecompiler::LogicalAny2, | ||
| 1290 | 1324 | ||
| 1291 | &SPIRVDecompiler::Binary<&Module::OpFOrdLessThan, Type::Bool, Type::Float>, | 1325 | &SPIRVDecompiler::Binary<&Module::OpFOrdLessThan, Type::Bool, Type::Float>, |
| 1292 | &SPIRVDecompiler::Binary<&Module::OpFOrdEqual, Type::Bool, Type::Float>, | 1326 | &SPIRVDecompiler::Binary<&Module::OpFOrdEqual, Type::Bool, Type::Float>, |
| @@ -1334,6 +1368,7 @@ private: | |||
| 1334 | &SPIRVDecompiler::ImageStore, | 1368 | &SPIRVDecompiler::ImageStore, |
| 1335 | 1369 | ||
| 1336 | &SPIRVDecompiler::Branch, | 1370 | &SPIRVDecompiler::Branch, |
| 1371 | &SPIRVDecompiler::BranchIndirect, | ||
| 1337 | &SPIRVDecompiler::PushFlowStack, | 1372 | &SPIRVDecompiler::PushFlowStack, |
| 1338 | &SPIRVDecompiler::PopFlowStack, | 1373 | &SPIRVDecompiler::PopFlowStack, |
| 1339 | &SPIRVDecompiler::Exit, | 1374 | &SPIRVDecompiler::Exit, |
| @@ -1349,7 +1384,13 @@ private: | |||
| 1349 | &SPIRVDecompiler::WorkGroupId<0>, | 1384 | &SPIRVDecompiler::WorkGroupId<0>, |
| 1350 | &SPIRVDecompiler::WorkGroupId<1>, | 1385 | &SPIRVDecompiler::WorkGroupId<1>, |
| 1351 | &SPIRVDecompiler::WorkGroupId<2>, | 1386 | &SPIRVDecompiler::WorkGroupId<2>, |
| 1387 | |||
| 1388 | &SPIRVDecompiler::BallotThread, | ||
| 1389 | &SPIRVDecompiler::VoteAll, | ||
| 1390 | &SPIRVDecompiler::VoteAny, | ||
| 1391 | &SPIRVDecompiler::VoteEqual, | ||
| 1352 | }; | 1392 | }; |
| 1393 | static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount)); | ||
| 1353 | 1394 | ||
| 1354 | const VKDevice& device; | 1395 | const VKDevice& device; |
| 1355 | const ShaderIR& ir; | 1396 | const ShaderIR& ir; |
diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp index 58ffa42f2..62f1427f5 100644 --- a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp +++ b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp | |||
| @@ -46,12 +46,12 @@ std::tuple<u8*, u64, bool> VKStreamBuffer::Reserve(u64 size) { | |||
| 46 | return {mapped_pointer + offset, offset, invalidation_mark.has_value()}; | 46 | return {mapped_pointer + offset, offset, invalidation_mark.has_value()}; |
| 47 | } | 47 | } |
| 48 | 48 | ||
| 49 | VKExecutionContext VKStreamBuffer::Send(VKExecutionContext exctx, u64 size) { | 49 | void VKStreamBuffer::Send(u64 size) { |
| 50 | ASSERT_MSG(size <= mapped_size, "Reserved size is too small"); | 50 | ASSERT_MSG(size <= mapped_size, "Reserved size is too small"); |
| 51 | 51 | ||
| 52 | if (invalidation_mark) { | 52 | if (invalidation_mark) { |
| 53 | // TODO(Rodrigo): Find a better way to invalidate than waiting for all watches to finish. | 53 | // TODO(Rodrigo): Find a better way to invalidate than waiting for all watches to finish. |
| 54 | exctx = scheduler.Flush(); | 54 | scheduler.Flush(); |
| 55 | std::for_each(watches.begin(), watches.begin() + *invalidation_mark, | 55 | std::for_each(watches.begin(), watches.begin() + *invalidation_mark, |
| 56 | [&](auto& resource) { resource->Wait(); }); | 56 | [&](auto& resource) { resource->Wait(); }); |
| 57 | invalidation_mark = std::nullopt; | 57 | invalidation_mark = std::nullopt; |
| @@ -62,11 +62,9 @@ VKExecutionContext VKStreamBuffer::Send(VKExecutionContext exctx, u64 size) { | |||
| 62 | ReserveWatches(WATCHES_RESERVE_CHUNK); | 62 | ReserveWatches(WATCHES_RESERVE_CHUNK); |
| 63 | } | 63 | } |
| 64 | // Add a watch for this allocation. | 64 | // Add a watch for this allocation. |
| 65 | watches[used_watches++]->Watch(exctx.GetFence()); | 65 | watches[used_watches++]->Watch(scheduler.GetFence()); |
| 66 | 66 | ||
| 67 | offset += size; | 67 | offset += size; |
| 68 | |||
| 69 | return exctx; | ||
| 70 | } | 68 | } |
| 71 | 69 | ||
| 72 | void VKStreamBuffer::CreateBuffers(VKMemoryManager& memory_manager, vk::BufferUsageFlags usage) { | 70 | void VKStreamBuffer::CreateBuffers(VKMemoryManager& memory_manager, vk::BufferUsageFlags usage) { |
diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.h b/src/video_core/renderer_vulkan/vk_stream_buffer.h index 69d036ccd..842e54162 100644 --- a/src/video_core/renderer_vulkan/vk_stream_buffer.h +++ b/src/video_core/renderer_vulkan/vk_stream_buffer.h | |||
| @@ -37,7 +37,7 @@ public: | |||
| 37 | std::tuple<u8*, u64, bool> Reserve(u64 size); | 37 | std::tuple<u8*, u64, bool> Reserve(u64 size); |
| 38 | 38 | ||
| 39 | /// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy. | 39 | /// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy. |
| 40 | [[nodiscard]] VKExecutionContext Send(VKExecutionContext exctx, u64 size); | 40 | void Send(u64 size); |
| 41 | 41 | ||
| 42 | vk::Buffer GetBuffer() const { | 42 | vk::Buffer GetBuffer() const { |
| 43 | return *buffer; | 43 | return *buffer; |
diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp new file mode 100644 index 000000000..ec3a76690 --- /dev/null +++ b/src/video_core/shader/control_flow.cpp | |||
| @@ -0,0 +1,481 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <list> | ||
| 6 | #include <map> | ||
| 7 | #include <stack> | ||
| 8 | #include <unordered_map> | ||
| 9 | #include <unordered_set> | ||
| 10 | #include <vector> | ||
| 11 | |||
| 12 | #include "common/assert.h" | ||
| 13 | #include "common/common_types.h" | ||
| 14 | #include "video_core/shader/control_flow.h" | ||
| 15 | #include "video_core/shader/shader_ir.h" | ||
| 16 | |||
| 17 | namespace VideoCommon::Shader { | ||
| 18 | namespace { | ||
| 19 | using Tegra::Shader::Instruction; | ||
| 20 | using Tegra::Shader::OpCode; | ||
| 21 | |||
| 22 | constexpr s32 unassigned_branch = -2; | ||
| 23 | |||
| 24 | struct Query { | ||
| 25 | u32 address{}; | ||
| 26 | std::stack<u32> ssy_stack{}; | ||
| 27 | std::stack<u32> pbk_stack{}; | ||
| 28 | }; | ||
| 29 | |||
| 30 | struct BlockStack { | ||
| 31 | BlockStack() = default; | ||
| 32 | explicit BlockStack(const Query& q) : ssy_stack{q.ssy_stack}, pbk_stack{q.pbk_stack} {} | ||
| 33 | std::stack<u32> ssy_stack{}; | ||
| 34 | std::stack<u32> pbk_stack{}; | ||
| 35 | }; | ||
| 36 | |||
| 37 | struct BlockBranchInfo { | ||
| 38 | Condition condition{}; | ||
| 39 | s32 address{exit_branch}; | ||
| 40 | bool kill{}; | ||
| 41 | bool is_sync{}; | ||
| 42 | bool is_brk{}; | ||
| 43 | bool ignore{}; | ||
| 44 | }; | ||
| 45 | |||
| 46 | struct BlockInfo { | ||
| 47 | u32 start{}; | ||
| 48 | u32 end{}; | ||
| 49 | bool visited{}; | ||
| 50 | BlockBranchInfo branch{}; | ||
| 51 | |||
| 52 | bool IsInside(const u32 address) const { | ||
| 53 | return start <= address && address <= end; | ||
| 54 | } | ||
| 55 | }; | ||
| 56 | |||
| 57 | struct CFGRebuildState { | ||
| 58 | explicit CFGRebuildState(const ProgramCode& program_code, const std::size_t program_size, | ||
| 59 | const u32 start) | ||
| 60 | : start{start}, program_code{program_code}, program_size{program_size} {} | ||
| 61 | |||
| 62 | u32 start{}; | ||
| 63 | std::vector<BlockInfo> block_info{}; | ||
| 64 | std::list<u32> inspect_queries{}; | ||
| 65 | std::list<Query> queries{}; | ||
| 66 | std::unordered_map<u32, u32> registered{}; | ||
| 67 | std::unordered_set<u32> labels{}; | ||
| 68 | std::map<u32, u32> ssy_labels{}; | ||
| 69 | std::map<u32, u32> pbk_labels{}; | ||
| 70 | std::unordered_map<u32, BlockStack> stacks{}; | ||
| 71 | const ProgramCode& program_code; | ||
| 72 | const std::size_t program_size; | ||
| 73 | }; | ||
| 74 | |||
| 75 | enum class BlockCollision : u32 { None, Found, Inside }; | ||
| 76 | |||
| 77 | std::pair<BlockCollision, u32> TryGetBlock(CFGRebuildState& state, u32 address) { | ||
| 78 | const auto& blocks = state.block_info; | ||
| 79 | for (u32 index = 0; index < blocks.size(); index++) { | ||
| 80 | if (blocks[index].start == address) { | ||
| 81 | return {BlockCollision::Found, index}; | ||
| 82 | } | ||
| 83 | if (blocks[index].IsInside(address)) { | ||
| 84 | return {BlockCollision::Inside, index}; | ||
| 85 | } | ||
| 86 | } | ||
| 87 | return {BlockCollision::None, 0xFFFFFFFF}; | ||
| 88 | } | ||
| 89 | |||
| 90 | struct ParseInfo { | ||
| 91 | BlockBranchInfo branch_info{}; | ||
| 92 | u32 end_address{}; | ||
| 93 | }; | ||
| 94 | |||
| 95 | BlockInfo& CreateBlockInfo(CFGRebuildState& state, u32 start, u32 end) { | ||
| 96 | auto& it = state.block_info.emplace_back(); | ||
| 97 | it.start = start; | ||
| 98 | it.end = end; | ||
| 99 | const u32 index = static_cast<u32>(state.block_info.size() - 1); | ||
| 100 | state.registered.insert({start, index}); | ||
| 101 | return it; | ||
| 102 | } | ||
| 103 | |||
| 104 | Pred GetPredicate(u32 index, bool negated) { | ||
| 105 | return static_cast<Pred>(index + (negated ? 8 : 0)); | ||
| 106 | } | ||
| 107 | |||
| 108 | /** | ||
| 109 | * Returns whether the instruction at the specified offset is a 'sched' instruction. | ||
| 110 | * Sched instructions always appear before a sequence of 3 instructions. | ||
| 111 | */ | ||
| 112 | constexpr bool IsSchedInstruction(u32 offset, u32 main_offset) { | ||
| 113 | constexpr u32 SchedPeriod = 4; | ||
| 114 | u32 absolute_offset = offset - main_offset; | ||
| 115 | |||
| 116 | return (absolute_offset % SchedPeriod) == 0; | ||
| 117 | } | ||
| 118 | |||
| 119 | enum class ParseResult : u32 { | ||
| 120 | ControlCaught, | ||
| 121 | BlockEnd, | ||
| 122 | AbnormalFlow, | ||
| 123 | }; | ||
| 124 | |||
| 125 | std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address) { | ||
| 126 | u32 offset = static_cast<u32>(address); | ||
| 127 | const u32 end_address = static_cast<u32>(state.program_size / sizeof(Instruction)); | ||
| 128 | ParseInfo parse_info{}; | ||
| 129 | |||
| 130 | const auto insert_label = [](CFGRebuildState& state, u32 address) { | ||
| 131 | const auto pair = state.labels.emplace(address); | ||
| 132 | if (pair.second) { | ||
| 133 | state.inspect_queries.push_back(address); | ||
| 134 | } | ||
| 135 | }; | ||
| 136 | |||
| 137 | while (true) { | ||
| 138 | if (offset >= end_address) { | ||
| 139 | // ASSERT_OR_EXECUTE can't be used, as it ignores the break | ||
| 140 | ASSERT_MSG(false, "Shader passed the current limit!"); | ||
| 141 | parse_info.branch_info.address = exit_branch; | ||
| 142 | parse_info.branch_info.ignore = false; | ||
| 143 | break; | ||
| 144 | } | ||
| 145 | if (state.registered.count(offset) != 0) { | ||
| 146 | parse_info.branch_info.address = offset; | ||
| 147 | parse_info.branch_info.ignore = true; | ||
| 148 | break; | ||
| 149 | } | ||
| 150 | if (IsSchedInstruction(offset, state.start)) { | ||
| 151 | offset++; | ||
| 152 | continue; | ||
| 153 | } | ||
| 154 | const Instruction instr = {state.program_code[offset]}; | ||
| 155 | const auto opcode = OpCode::Decode(instr); | ||
| 156 | if (!opcode || opcode->get().GetType() != OpCode::Type::Flow) { | ||
| 157 | offset++; | ||
| 158 | continue; | ||
| 159 | } | ||
| 160 | |||
| 161 | switch (opcode->get().GetId()) { | ||
| 162 | case OpCode::Id::EXIT: { | ||
| 163 | const auto pred_index = static_cast<u32>(instr.pred.pred_index); | ||
| 164 | parse_info.branch_info.condition.predicate = | ||
| 165 | GetPredicate(pred_index, instr.negate_pred != 0); | ||
| 166 | if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) { | ||
| 167 | offset++; | ||
| 168 | continue; | ||
| 169 | } | ||
| 170 | const ConditionCode cc = instr.flow_condition_code; | ||
| 171 | parse_info.branch_info.condition.cc = cc; | ||
| 172 | if (cc == ConditionCode::F) { | ||
| 173 | offset++; | ||
| 174 | continue; | ||
| 175 | } | ||
| 176 | parse_info.branch_info.address = exit_branch; | ||
| 177 | parse_info.branch_info.kill = false; | ||
| 178 | parse_info.branch_info.is_sync = false; | ||
| 179 | parse_info.branch_info.is_brk = false; | ||
| 180 | parse_info.branch_info.ignore = false; | ||
| 181 | parse_info.end_address = offset; | ||
| 182 | |||
| 183 | return {ParseResult::ControlCaught, parse_info}; | ||
| 184 | } | ||
| 185 | case OpCode::Id::BRA: { | ||
| 186 | if (instr.bra.constant_buffer != 0) { | ||
| 187 | return {ParseResult::AbnormalFlow, parse_info}; | ||
| 188 | } | ||
| 189 | const auto pred_index = static_cast<u32>(instr.pred.pred_index); | ||
| 190 | parse_info.branch_info.condition.predicate = | ||
| 191 | GetPredicate(pred_index, instr.negate_pred != 0); | ||
| 192 | if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) { | ||
| 193 | offset++; | ||
| 194 | continue; | ||
| 195 | } | ||
| 196 | const ConditionCode cc = instr.flow_condition_code; | ||
| 197 | parse_info.branch_info.condition.cc = cc; | ||
| 198 | if (cc == ConditionCode::F) { | ||
| 199 | offset++; | ||
| 200 | continue; | ||
| 201 | } | ||
| 202 | const u32 branch_offset = offset + instr.bra.GetBranchTarget(); | ||
| 203 | if (branch_offset == 0) { | ||
| 204 | parse_info.branch_info.address = exit_branch; | ||
| 205 | } else { | ||
| 206 | parse_info.branch_info.address = branch_offset; | ||
| 207 | } | ||
| 208 | insert_label(state, branch_offset); | ||
| 209 | parse_info.branch_info.kill = false; | ||
| 210 | parse_info.branch_info.is_sync = false; | ||
| 211 | parse_info.branch_info.is_brk = false; | ||
| 212 | parse_info.branch_info.ignore = false; | ||
| 213 | parse_info.end_address = offset; | ||
| 214 | |||
| 215 | return {ParseResult::ControlCaught, parse_info}; | ||
| 216 | } | ||
| 217 | case OpCode::Id::SYNC: { | ||
| 218 | const auto pred_index = static_cast<u32>(instr.pred.pred_index); | ||
| 219 | parse_info.branch_info.condition.predicate = | ||
| 220 | GetPredicate(pred_index, instr.negate_pred != 0); | ||
| 221 | if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) { | ||
| 222 | offset++; | ||
| 223 | continue; | ||
| 224 | } | ||
| 225 | const ConditionCode cc = instr.flow_condition_code; | ||
| 226 | parse_info.branch_info.condition.cc = cc; | ||
| 227 | if (cc == ConditionCode::F) { | ||
| 228 | offset++; | ||
| 229 | continue; | ||
| 230 | } | ||
| 231 | parse_info.branch_info.address = unassigned_branch; | ||
| 232 | parse_info.branch_info.kill = false; | ||
| 233 | parse_info.branch_info.is_sync = true; | ||
| 234 | parse_info.branch_info.is_brk = false; | ||
| 235 | parse_info.branch_info.ignore = false; | ||
| 236 | parse_info.end_address = offset; | ||
| 237 | |||
| 238 | return {ParseResult::ControlCaught, parse_info}; | ||
| 239 | } | ||
| 240 | case OpCode::Id::BRK: { | ||
| 241 | const auto pred_index = static_cast<u32>(instr.pred.pred_index); | ||
| 242 | parse_info.branch_info.condition.predicate = | ||
| 243 | GetPredicate(pred_index, instr.negate_pred != 0); | ||
| 244 | if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) { | ||
| 245 | offset++; | ||
| 246 | continue; | ||
| 247 | } | ||
| 248 | const ConditionCode cc = instr.flow_condition_code; | ||
| 249 | parse_info.branch_info.condition.cc = cc; | ||
| 250 | if (cc == ConditionCode::F) { | ||
| 251 | offset++; | ||
| 252 | continue; | ||
| 253 | } | ||
| 254 | parse_info.branch_info.address = unassigned_branch; | ||
| 255 | parse_info.branch_info.kill = false; | ||
| 256 | parse_info.branch_info.is_sync = false; | ||
| 257 | parse_info.branch_info.is_brk = true; | ||
| 258 | parse_info.branch_info.ignore = false; | ||
| 259 | parse_info.end_address = offset; | ||
| 260 | |||
| 261 | return {ParseResult::ControlCaught, parse_info}; | ||
| 262 | } | ||
| 263 | case OpCode::Id::KIL: { | ||
| 264 | const auto pred_index = static_cast<u32>(instr.pred.pred_index); | ||
| 265 | parse_info.branch_info.condition.predicate = | ||
| 266 | GetPredicate(pred_index, instr.negate_pred != 0); | ||
| 267 | if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) { | ||
| 268 | offset++; | ||
| 269 | continue; | ||
| 270 | } | ||
| 271 | const ConditionCode cc = instr.flow_condition_code; | ||
| 272 | parse_info.branch_info.condition.cc = cc; | ||
| 273 | if (cc == ConditionCode::F) { | ||
| 274 | offset++; | ||
| 275 | continue; | ||
| 276 | } | ||
| 277 | parse_info.branch_info.address = exit_branch; | ||
| 278 | parse_info.branch_info.kill = true; | ||
| 279 | parse_info.branch_info.is_sync = false; | ||
| 280 | parse_info.branch_info.is_brk = false; | ||
| 281 | parse_info.branch_info.ignore = false; | ||
| 282 | parse_info.end_address = offset; | ||
| 283 | |||
| 284 | return {ParseResult::ControlCaught, parse_info}; | ||
| 285 | } | ||
| 286 | case OpCode::Id::SSY: { | ||
| 287 | const u32 target = offset + instr.bra.GetBranchTarget(); | ||
| 288 | insert_label(state, target); | ||
| 289 | state.ssy_labels.emplace(offset, target); | ||
| 290 | break; | ||
| 291 | } | ||
| 292 | case OpCode::Id::PBK: { | ||
| 293 | const u32 target = offset + instr.bra.GetBranchTarget(); | ||
| 294 | insert_label(state, target); | ||
| 295 | state.pbk_labels.emplace(offset, target); | ||
| 296 | break; | ||
| 297 | } | ||
| 298 | case OpCode::Id::BRX: { | ||
| 299 | return {ParseResult::AbnormalFlow, parse_info}; | ||
| 300 | } | ||
| 301 | default: | ||
| 302 | break; | ||
| 303 | } | ||
| 304 | |||
| 305 | offset++; | ||
| 306 | } | ||
| 307 | parse_info.branch_info.kill = false; | ||
| 308 | parse_info.branch_info.is_sync = false; | ||
| 309 | parse_info.branch_info.is_brk = false; | ||
| 310 | parse_info.end_address = offset - 1; | ||
| 311 | return {ParseResult::BlockEnd, parse_info}; | ||
| 312 | } | ||
| 313 | |||
| 314 | bool TryInspectAddress(CFGRebuildState& state) { | ||
| 315 | if (state.inspect_queries.empty()) { | ||
| 316 | return false; | ||
| 317 | } | ||
| 318 | |||
| 319 | const u32 address = state.inspect_queries.front(); | ||
| 320 | state.inspect_queries.pop_front(); | ||
| 321 | const auto [result, block_index] = TryGetBlock(state, address); | ||
| 322 | switch (result) { | ||
| 323 | case BlockCollision::Found: { | ||
| 324 | return true; | ||
| 325 | } | ||
| 326 | case BlockCollision::Inside: { | ||
| 327 | // This case is the tricky one: | ||
| 328 | // We need to Split the block in 2 sepparate blocks | ||
| 329 | const u32 end = state.block_info[block_index].end; | ||
| 330 | BlockInfo& new_block = CreateBlockInfo(state, address, end); | ||
| 331 | BlockInfo& current_block = state.block_info[block_index]; | ||
| 332 | current_block.end = address - 1; | ||
| 333 | new_block.branch = current_block.branch; | ||
| 334 | BlockBranchInfo forward_branch{}; | ||
| 335 | forward_branch.address = address; | ||
| 336 | forward_branch.ignore = true; | ||
| 337 | current_block.branch = forward_branch; | ||
| 338 | return true; | ||
| 339 | } | ||
| 340 | default: | ||
| 341 | break; | ||
| 342 | } | ||
| 343 | const auto [parse_result, parse_info] = ParseCode(state, address); | ||
| 344 | if (parse_result == ParseResult::AbnormalFlow) { | ||
| 345 | // if it's AbnormalFlow, we end it as false, ending the CFG reconstruction | ||
| 346 | return false; | ||
| 347 | } | ||
| 348 | |||
| 349 | BlockInfo& block_info = CreateBlockInfo(state, address, parse_info.end_address); | ||
| 350 | block_info.branch = parse_info.branch_info; | ||
| 351 | if (parse_info.branch_info.condition.IsUnconditional()) { | ||
| 352 | return true; | ||
| 353 | } | ||
| 354 | |||
| 355 | const u32 fallthrough_address = parse_info.end_address + 1; | ||
| 356 | state.inspect_queries.push_front(fallthrough_address); | ||
| 357 | return true; | ||
| 358 | } | ||
| 359 | |||
| 360 | bool TryQuery(CFGRebuildState& state) { | ||
| 361 | const auto gather_labels = [](std::stack<u32>& cc, std::map<u32, u32>& labels, | ||
| 362 | BlockInfo& block) { | ||
| 363 | auto gather_start = labels.lower_bound(block.start); | ||
| 364 | const auto gather_end = labels.upper_bound(block.end); | ||
| 365 | while (gather_start != gather_end) { | ||
| 366 | cc.push(gather_start->second); | ||
| 367 | ++gather_start; | ||
| 368 | } | ||
| 369 | }; | ||
| 370 | if (state.queries.empty()) { | ||
| 371 | return false; | ||
| 372 | } | ||
| 373 | |||
| 374 | Query& q = state.queries.front(); | ||
| 375 | const u32 block_index = state.registered[q.address]; | ||
| 376 | BlockInfo& block = state.block_info[block_index]; | ||
| 377 | // If the block is visited, check if the stacks match, else gather the ssy/pbk | ||
| 378 | // labels into the current stack and look if the branch at the end of the block | ||
| 379 | // consumes a label. Schedule new queries accordingly | ||
| 380 | if (block.visited) { | ||
| 381 | BlockStack& stack = state.stacks[q.address]; | ||
| 382 | const bool all_okay = (stack.ssy_stack.empty() || q.ssy_stack == stack.ssy_stack) && | ||
| 383 | (stack.pbk_stack.empty() || q.pbk_stack == stack.pbk_stack); | ||
| 384 | state.queries.pop_front(); | ||
| 385 | return all_okay; | ||
| 386 | } | ||
| 387 | block.visited = true; | ||
| 388 | state.stacks.insert_or_assign(q.address, BlockStack{q}); | ||
| 389 | |||
| 390 | Query q2(q); | ||
| 391 | state.queries.pop_front(); | ||
| 392 | gather_labels(q2.ssy_stack, state.ssy_labels, block); | ||
| 393 | gather_labels(q2.pbk_stack, state.pbk_labels, block); | ||
| 394 | if (!block.branch.condition.IsUnconditional()) { | ||
| 395 | q2.address = block.end + 1; | ||
| 396 | state.queries.push_back(q2); | ||
| 397 | } | ||
| 398 | |||
| 399 | Query conditional_query{q2}; | ||
| 400 | if (block.branch.is_sync) { | ||
| 401 | if (block.branch.address == unassigned_branch) { | ||
| 402 | block.branch.address = conditional_query.ssy_stack.top(); | ||
| 403 | } | ||
| 404 | conditional_query.ssy_stack.pop(); | ||
| 405 | } | ||
| 406 | if (block.branch.is_brk) { | ||
| 407 | if (block.branch.address == unassigned_branch) { | ||
| 408 | block.branch.address = conditional_query.pbk_stack.top(); | ||
| 409 | } | ||
| 410 | conditional_query.pbk_stack.pop(); | ||
| 411 | } | ||
| 412 | conditional_query.address = block.branch.address; | ||
| 413 | state.queries.push_back(std::move(conditional_query)); | ||
| 414 | return true; | ||
| 415 | } | ||
| 416 | } // Anonymous namespace | ||
| 417 | |||
| 418 | std::optional<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, | ||
| 419 | std::size_t program_size, u32 start_address) { | ||
| 420 | CFGRebuildState state{program_code, program_size, start_address}; | ||
| 421 | |||
| 422 | // Inspect Code and generate blocks | ||
| 423 | state.labels.clear(); | ||
| 424 | state.labels.emplace(start_address); | ||
| 425 | state.inspect_queries.push_back(state.start); | ||
| 426 | while (!state.inspect_queries.empty()) { | ||
| 427 | if (!TryInspectAddress(state)) { | ||
| 428 | return {}; | ||
| 429 | } | ||
| 430 | } | ||
| 431 | |||
| 432 | // Decompile Stacks | ||
| 433 | state.queries.push_back(Query{state.start, {}, {}}); | ||
| 434 | bool decompiled = true; | ||
| 435 | while (!state.queries.empty()) { | ||
| 436 | if (!TryQuery(state)) { | ||
| 437 | decompiled = false; | ||
| 438 | break; | ||
| 439 | } | ||
| 440 | } | ||
| 441 | |||
| 442 | // Sort and organize results | ||
| 443 | std::sort(state.block_info.begin(), state.block_info.end(), | ||
| 444 | [](const BlockInfo& a, const BlockInfo& b) { return a.start < b.start; }); | ||
| 445 | ShaderCharacteristics result_out{}; | ||
| 446 | result_out.decompilable = decompiled; | ||
| 447 | result_out.start = start_address; | ||
| 448 | result_out.end = start_address; | ||
| 449 | for (const auto& block : state.block_info) { | ||
| 450 | ShaderBlock new_block{}; | ||
| 451 | new_block.start = block.start; | ||
| 452 | new_block.end = block.end; | ||
| 453 | new_block.ignore_branch = block.branch.ignore; | ||
| 454 | if (!new_block.ignore_branch) { | ||
| 455 | new_block.branch.cond = block.branch.condition; | ||
| 456 | new_block.branch.kills = block.branch.kill; | ||
| 457 | new_block.branch.address = block.branch.address; | ||
| 458 | } | ||
| 459 | result_out.end = std::max(result_out.end, block.end); | ||
| 460 | result_out.blocks.push_back(new_block); | ||
| 461 | } | ||
| 462 | if (result_out.decompilable) { | ||
| 463 | result_out.labels = std::move(state.labels); | ||
| 464 | return {std::move(result_out)}; | ||
| 465 | } | ||
| 466 | |||
| 467 | // If it's not decompilable, merge the unlabelled blocks together | ||
| 468 | auto back = result_out.blocks.begin(); | ||
| 469 | auto next = std::next(back); | ||
| 470 | while (next != result_out.blocks.end()) { | ||
| 471 | if (state.labels.count(next->start) == 0 && next->start == back->end + 1) { | ||
| 472 | back->end = next->end; | ||
| 473 | next = result_out.blocks.erase(next); | ||
| 474 | continue; | ||
| 475 | } | ||
| 476 | back = next; | ||
| 477 | ++next; | ||
| 478 | } | ||
| 479 | return {std::move(result_out)}; | ||
| 480 | } | ||
| 481 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/control_flow.h b/src/video_core/shader/control_flow.h new file mode 100644 index 000000000..b0a5e4f8c --- /dev/null +++ b/src/video_core/shader/control_flow.h | |||
| @@ -0,0 +1,79 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <list> | ||
| 8 | #include <optional> | ||
| 9 | #include <unordered_set> | ||
| 10 | |||
| 11 | #include "video_core/engines/shader_bytecode.h" | ||
| 12 | #include "video_core/shader/shader_ir.h" | ||
| 13 | |||
| 14 | namespace VideoCommon::Shader { | ||
| 15 | |||
| 16 | using Tegra::Shader::ConditionCode; | ||
| 17 | using Tegra::Shader::Pred; | ||
| 18 | |||
| 19 | constexpr s32 exit_branch = -1; | ||
| 20 | |||
| 21 | struct Condition { | ||
| 22 | Pred predicate{Pred::UnusedIndex}; | ||
| 23 | ConditionCode cc{ConditionCode::T}; | ||
| 24 | |||
| 25 | bool IsUnconditional() const { | ||
| 26 | return predicate == Pred::UnusedIndex && cc == ConditionCode::T; | ||
| 27 | } | ||
| 28 | |||
| 29 | bool operator==(const Condition& other) const { | ||
| 30 | return std::tie(predicate, cc) == std::tie(other.predicate, other.cc); | ||
| 31 | } | ||
| 32 | |||
| 33 | bool operator!=(const Condition& other) const { | ||
| 34 | return !operator==(other); | ||
| 35 | } | ||
| 36 | }; | ||
| 37 | |||
| 38 | struct ShaderBlock { | ||
| 39 | struct Branch { | ||
| 40 | Condition cond{}; | ||
| 41 | bool kills{}; | ||
| 42 | s32 address{}; | ||
| 43 | |||
| 44 | bool operator==(const Branch& b) const { | ||
| 45 | return std::tie(cond, kills, address) == std::tie(b.cond, b.kills, b.address); | ||
| 46 | } | ||
| 47 | |||
| 48 | bool operator!=(const Branch& b) const { | ||
| 49 | return !operator==(b); | ||
| 50 | } | ||
| 51 | }; | ||
| 52 | |||
| 53 | u32 start{}; | ||
| 54 | u32 end{}; | ||
| 55 | bool ignore_branch{}; | ||
| 56 | Branch branch{}; | ||
| 57 | |||
| 58 | bool operator==(const ShaderBlock& sb) const { | ||
| 59 | return std::tie(start, end, ignore_branch, branch) == | ||
| 60 | std::tie(sb.start, sb.end, sb.ignore_branch, sb.branch); | ||
| 61 | } | ||
| 62 | |||
| 63 | bool operator!=(const ShaderBlock& sb) const { | ||
| 64 | return !operator==(sb); | ||
| 65 | } | ||
| 66 | }; | ||
| 67 | |||
| 68 | struct ShaderCharacteristics { | ||
| 69 | std::list<ShaderBlock> blocks{}; | ||
| 70 | bool decompilable{}; | ||
| 71 | u32 start{}; | ||
| 72 | u32 end{}; | ||
| 73 | std::unordered_set<u32> labels{}; | ||
| 74 | }; | ||
| 75 | |||
| 76 | std::optional<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, | ||
| 77 | std::size_t program_size, u32 start_address); | ||
| 78 | |||
| 79 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp index 2c9ff28f2..47a9fd961 100644 --- a/src/video_core/shader/decode.cpp +++ b/src/video_core/shader/decode.cpp | |||
| @@ -11,6 +11,7 @@ | |||
| 11 | #include "common/common_types.h" | 11 | #include "common/common_types.h" |
| 12 | #include "video_core/engines/shader_bytecode.h" | 12 | #include "video_core/engines/shader_bytecode.h" |
| 13 | #include "video_core/engines/shader_header.h" | 13 | #include "video_core/engines/shader_header.h" |
| 14 | #include "video_core/shader/control_flow.h" | ||
| 14 | #include "video_core/shader/node_helper.h" | 15 | #include "video_core/shader/node_helper.h" |
| 15 | #include "video_core/shader/shader_ir.h" | 16 | #include "video_core/shader/shader_ir.h" |
| 16 | 17 | ||
| @@ -21,20 +22,6 @@ using Tegra::Shader::OpCode; | |||
| 21 | 22 | ||
| 22 | namespace { | 23 | namespace { |
| 23 | 24 | ||
| 24 | /// Merges exit method of two parallel branches. | ||
| 25 | constexpr ExitMethod ParallelExit(ExitMethod a, ExitMethod b) { | ||
| 26 | if (a == ExitMethod::Undetermined) { | ||
| 27 | return b; | ||
| 28 | } | ||
| 29 | if (b == ExitMethod::Undetermined) { | ||
| 30 | return a; | ||
| 31 | } | ||
| 32 | if (a == b) { | ||
| 33 | return a; | ||
| 34 | } | ||
| 35 | return ExitMethod::Conditional; | ||
| 36 | } | ||
| 37 | |||
| 38 | /** | 25 | /** |
| 39 | * Returns whether the instruction at the specified offset is a 'sched' instruction. | 26 | * Returns whether the instruction at the specified offset is a 'sched' instruction. |
| 40 | * Sched instructions always appear before a sequence of 3 instructions. | 27 | * Sched instructions always appear before a sequence of 3 instructions. |
| @@ -51,85 +38,104 @@ constexpr bool IsSchedInstruction(u32 offset, u32 main_offset) { | |||
| 51 | void ShaderIR::Decode() { | 38 | void ShaderIR::Decode() { |
| 52 | std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header)); | 39 | std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header)); |
| 53 | 40 | ||
| 54 | std::set<u32> labels; | 41 | disable_flow_stack = false; |
| 55 | const ExitMethod exit_method = Scan(main_offset, MAX_PROGRAM_LENGTH, labels); | 42 | const auto info = ScanFlow(program_code, program_size, main_offset); |
| 56 | if (exit_method != ExitMethod::AlwaysEnd) { | 43 | if (info) { |
| 57 | UNREACHABLE_MSG("Program does not always end"); | 44 | const auto& shader_info = *info; |
| 58 | } | 45 | coverage_begin = shader_info.start; |
| 59 | 46 | coverage_end = shader_info.end; | |
| 60 | if (labels.empty()) { | 47 | if (shader_info.decompilable) { |
| 61 | basic_blocks.insert({main_offset, DecodeRange(main_offset, MAX_PROGRAM_LENGTH)}); | 48 | disable_flow_stack = true; |
| 49 | const auto insert_block = [this](NodeBlock& nodes, u32 label) { | ||
| 50 | if (label == static_cast<u32>(exit_branch)) { | ||
| 51 | return; | ||
| 52 | } | ||
| 53 | basic_blocks.insert({label, nodes}); | ||
| 54 | }; | ||
| 55 | const auto& blocks = shader_info.blocks; | ||
| 56 | NodeBlock current_block; | ||
| 57 | u32 current_label = static_cast<u32>(exit_branch); | ||
| 58 | for (auto& block : blocks) { | ||
| 59 | if (shader_info.labels.count(block.start) != 0) { | ||
| 60 | insert_block(current_block, current_label); | ||
| 61 | current_block.clear(); | ||
| 62 | current_label = block.start; | ||
| 63 | } | ||
| 64 | if (!block.ignore_branch) { | ||
| 65 | DecodeRangeInner(current_block, block.start, block.end); | ||
| 66 | InsertControlFlow(current_block, block); | ||
| 67 | } else { | ||
| 68 | DecodeRangeInner(current_block, block.start, block.end + 1); | ||
| 69 | } | ||
| 70 | } | ||
| 71 | insert_block(current_block, current_label); | ||
| 72 | return; | ||
| 73 | } | ||
| 74 | LOG_WARNING(HW_GPU, "Flow Stack Removing Failed! Falling back to old method"); | ||
| 75 | // we can't decompile it, fallback to standard method | ||
| 76 | for (const auto& block : shader_info.blocks) { | ||
| 77 | basic_blocks.insert({block.start, DecodeRange(block.start, block.end + 1)}); | ||
| 78 | } | ||
| 62 | return; | 79 | return; |
| 63 | } | 80 | } |
| 81 | LOG_WARNING(HW_GPU, "Flow Analysis Failed! Falling back to brute force compiling"); | ||
| 82 | |||
| 83 | // Now we need to deal with an undecompilable shader. We need to brute force | ||
| 84 | // a shader that captures every position. | ||
| 85 | coverage_begin = main_offset; | ||
| 86 | const u32 shader_end = static_cast<u32>(program_size / sizeof(u64)); | ||
| 87 | coverage_end = shader_end; | ||
| 88 | for (u32 label = main_offset; label < shader_end; label++) { | ||
| 89 | basic_blocks.insert({label, DecodeRange(label, label + 1)}); | ||
| 90 | } | ||
| 91 | } | ||
| 64 | 92 | ||
| 65 | labels.insert(main_offset); | 93 | NodeBlock ShaderIR::DecodeRange(u32 begin, u32 end) { |
| 66 | 94 | NodeBlock basic_block; | |
| 67 | for (const u32 label : labels) { | 95 | DecodeRangeInner(basic_block, begin, end); |
| 68 | const auto next_it = labels.lower_bound(label + 1); | 96 | return basic_block; |
| 69 | const u32 next_label = next_it == labels.end() ? MAX_PROGRAM_LENGTH : *next_it; | 97 | } |
| 70 | 98 | ||
| 71 | basic_blocks.insert({label, DecodeRange(label, next_label)}); | 99 | void ShaderIR::DecodeRangeInner(NodeBlock& bb, u32 begin, u32 end) { |
| 100 | for (u32 pc = begin; pc < (begin > end ? MAX_PROGRAM_LENGTH : end);) { | ||
| 101 | pc = DecodeInstr(bb, pc); | ||
| 72 | } | 102 | } |
| 73 | } | 103 | } |
| 74 | 104 | ||
| 75 | ExitMethod ShaderIR::Scan(u32 begin, u32 end, std::set<u32>& labels) { | 105 | void ShaderIR::InsertControlFlow(NodeBlock& bb, const ShaderBlock& block) { |
| 76 | const auto [iter, inserted] = | 106 | const auto apply_conditions = [&](const Condition& cond, Node n) -> Node { |
| 77 | exit_method_map.emplace(std::make_pair(begin, end), ExitMethod::Undetermined); | 107 | Node result = n; |
| 78 | ExitMethod& exit_method = iter->second; | 108 | if (cond.cc != ConditionCode::T) { |
| 79 | if (!inserted) | 109 | result = Conditional(GetConditionCode(cond.cc), {result}); |
| 80 | return exit_method; | ||
| 81 | |||
| 82 | for (u32 offset = begin; offset != end && offset != MAX_PROGRAM_LENGTH; ++offset) { | ||
| 83 | coverage_begin = std::min(coverage_begin, offset); | ||
| 84 | coverage_end = std::max(coverage_end, offset + 1); | ||
| 85 | |||
| 86 | const Instruction instr = {program_code[offset]}; | ||
| 87 | const auto opcode = OpCode::Decode(instr); | ||
| 88 | if (!opcode) | ||
| 89 | continue; | ||
| 90 | switch (opcode->get().GetId()) { | ||
| 91 | case OpCode::Id::EXIT: { | ||
| 92 | // The EXIT instruction can be predicated, which means that the shader can conditionally | ||
| 93 | // end on this instruction. We have to consider the case where the condition is not met | ||
| 94 | // and check the exit method of that other basic block. | ||
| 95 | using Tegra::Shader::Pred; | ||
| 96 | if (instr.pred.pred_index == static_cast<u64>(Pred::UnusedIndex)) { | ||
| 97 | return exit_method = ExitMethod::AlwaysEnd; | ||
| 98 | } else { | ||
| 99 | const ExitMethod not_met = Scan(offset + 1, end, labels); | ||
| 100 | return exit_method = ParallelExit(ExitMethod::AlwaysEnd, not_met); | ||
| 101 | } | ||
| 102 | } | 110 | } |
| 103 | case OpCode::Id::BRA: { | 111 | if (cond.predicate != Pred::UnusedIndex) { |
| 104 | const u32 target = offset + instr.bra.GetBranchTarget(); | 112 | u32 pred = static_cast<u32>(cond.predicate); |
| 105 | labels.insert(target); | 113 | const bool is_neg = pred > 7; |
| 106 | const ExitMethod no_jmp = Scan(offset + 1, end, labels); | 114 | if (is_neg) { |
| 107 | const ExitMethod jmp = Scan(target, end, labels); | 115 | pred -= 8; |
| 108 | return exit_method = ParallelExit(no_jmp, jmp); | 116 | } |
| 109 | } | 117 | result = Conditional(GetPredicate(pred, is_neg), {result}); |
| 110 | case OpCode::Id::SSY: | ||
| 111 | case OpCode::Id::PBK: { | ||
| 112 | // The SSY and PBK use a similar encoding as the BRA instruction. | ||
| 113 | UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, | ||
| 114 | "Constant buffer branching is not supported"); | ||
| 115 | const u32 target = offset + instr.bra.GetBranchTarget(); | ||
| 116 | labels.insert(target); | ||
| 117 | // Continue scanning for an exit method. | ||
| 118 | break; | ||
| 119 | } | 118 | } |
| 120 | default: | 119 | return result; |
| 121 | break; | 120 | }; |
| 121 | if (block.branch.address < 0) { | ||
| 122 | if (block.branch.kills) { | ||
| 123 | Node n = Operation(OperationCode::Discard); | ||
| 124 | n = apply_conditions(block.branch.cond, n); | ||
| 125 | bb.push_back(n); | ||
| 126 | global_code.push_back(n); | ||
| 127 | return; | ||
| 122 | } | 128 | } |
| 129 | Node n = Operation(OperationCode::Exit); | ||
| 130 | n = apply_conditions(block.branch.cond, n); | ||
| 131 | bb.push_back(n); | ||
| 132 | global_code.push_back(n); | ||
| 133 | return; | ||
| 123 | } | 134 | } |
| 124 | return exit_method = ExitMethod::AlwaysReturn; | 135 | Node n = Operation(OperationCode::Branch, Immediate(block.branch.address)); |
| 125 | } | 136 | n = apply_conditions(block.branch.cond, n); |
| 126 | 137 | bb.push_back(n); | |
| 127 | NodeBlock ShaderIR::DecodeRange(u32 begin, u32 end) { | 138 | global_code.push_back(n); |
| 128 | NodeBlock basic_block; | ||
| 129 | for (u32 pc = begin; pc < (begin > end ? MAX_PROGRAM_LENGTH : end);) { | ||
| 130 | pc = DecodeInstr(basic_block, pc); | ||
| 131 | } | ||
| 132 | return basic_block; | ||
| 133 | } | 139 | } |
| 134 | 140 | ||
| 135 | u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) { | 141 | u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) { |
| @@ -140,15 +146,18 @@ u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) { | |||
| 140 | 146 | ||
| 141 | const Instruction instr = {program_code[pc]}; | 147 | const Instruction instr = {program_code[pc]}; |
| 142 | const auto opcode = OpCode::Decode(instr); | 148 | const auto opcode = OpCode::Decode(instr); |
| 149 | const u32 nv_address = ConvertAddressToNvidiaSpace(pc); | ||
| 143 | 150 | ||
| 144 | // Decoding failure | 151 | // Decoding failure |
| 145 | if (!opcode) { | 152 | if (!opcode) { |
| 146 | UNIMPLEMENTED_MSG("Unhandled instruction: {0:x}", instr.value); | 153 | UNIMPLEMENTED_MSG("Unhandled instruction: {0:x}", instr.value); |
| 154 | bb.push_back(Comment(fmt::format("{:05x} Unimplemented Shader instruction (0x{:016x})", | ||
| 155 | nv_address, instr.value))); | ||
| 147 | return pc + 1; | 156 | return pc + 1; |
| 148 | } | 157 | } |
| 149 | 158 | ||
| 150 | bb.push_back( | 159 | bb.push_back(Comment( |
| 151 | Comment(fmt::format("{}: {} (0x{:016x})", pc, opcode->get().GetName(), instr.value))); | 160 | fmt::format("{:05x} {} (0x{:016x})", nv_address, opcode->get().GetName(), instr.value))); |
| 152 | 161 | ||
| 153 | using Tegra::Shader::Pred; | 162 | using Tegra::Shader::Pred; |
| 154 | UNIMPLEMENTED_IF_MSG(instr.pred.full_pred == Pred::NeverExecute, | 163 | UNIMPLEMENTED_IF_MSG(instr.pred.full_pred == Pred::NeverExecute, |
| @@ -167,6 +176,7 @@ u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) { | |||
| 167 | {OpCode::Type::Ffma, &ShaderIR::DecodeFfma}, | 176 | {OpCode::Type::Ffma, &ShaderIR::DecodeFfma}, |
| 168 | {OpCode::Type::Hfma2, &ShaderIR::DecodeHfma2}, | 177 | {OpCode::Type::Hfma2, &ShaderIR::DecodeHfma2}, |
| 169 | {OpCode::Type::Conversion, &ShaderIR::DecodeConversion}, | 178 | {OpCode::Type::Conversion, &ShaderIR::DecodeConversion}, |
| 179 | {OpCode::Type::Warp, &ShaderIR::DecodeWarp}, | ||
| 170 | {OpCode::Type::Memory, &ShaderIR::DecodeMemory}, | 180 | {OpCode::Type::Memory, &ShaderIR::DecodeMemory}, |
| 171 | {OpCode::Type::Texture, &ShaderIR::DecodeTexture}, | 181 | {OpCode::Type::Texture, &ShaderIR::DecodeTexture}, |
| 172 | {OpCode::Type::Image, &ShaderIR::DecodeImage}, | 182 | {OpCode::Type::Image, &ShaderIR::DecodeImage}, |
diff --git a/src/video_core/shader/decode/arithmetic.cpp b/src/video_core/shader/decode/arithmetic.cpp index 87d8fecaa..1473c282a 100644 --- a/src/video_core/shader/decode/arithmetic.cpp +++ b/src/video_core/shader/decode/arithmetic.cpp | |||
| @@ -42,11 +42,14 @@ u32 ShaderIR::DecodeArithmetic(NodeBlock& bb, u32 pc) { | |||
| 42 | case OpCode::Id::FMUL_R: | 42 | case OpCode::Id::FMUL_R: |
| 43 | case OpCode::Id::FMUL_IMM: { | 43 | case OpCode::Id::FMUL_IMM: { |
| 44 | // FMUL does not have 'abs' bits and only the second operand has a 'neg' bit. | 44 | // FMUL does not have 'abs' bits and only the second operand has a 'neg' bit. |
| 45 | UNIMPLEMENTED_IF_MSG(instr.fmul.tab5cb8_2 != 0, "FMUL tab5cb8_2({}) is not implemented", | 45 | if (instr.fmul.tab5cb8_2 != 0) { |
| 46 | instr.fmul.tab5cb8_2.Value()); | 46 | LOG_WARNING(HW_GPU, "FMUL tab5cb8_2({}) is not implemented", |
| 47 | UNIMPLEMENTED_IF_MSG( | 47 | instr.fmul.tab5cb8_2.Value()); |
| 48 | instr.fmul.tab5c68_0 != 1, "FMUL tab5cb8_0({}) is not implemented", | 48 | } |
| 49 | instr.fmul.tab5c68_0.Value()); // SMO typical sends 1 here which seems to be the default | 49 | if (instr.fmul.tab5c68_0 != 1) { |
| 50 | LOG_WARNING(HW_GPU, "FMUL tab5cb8_0({}) is not implemented", | ||
| 51 | instr.fmul.tab5c68_0.Value()); | ||
| 52 | } | ||
| 50 | 53 | ||
| 51 | op_b = GetOperandAbsNegFloat(op_b, false, instr.fmul.negate_b); | 54 | op_b = GetOperandAbsNegFloat(op_b, false, instr.fmul.negate_b); |
| 52 | 55 | ||
diff --git a/src/video_core/shader/decode/arithmetic_half_immediate.cpp b/src/video_core/shader/decode/arithmetic_half_immediate.cpp index 7bcf38f23..6466fc011 100644 --- a/src/video_core/shader/decode/arithmetic_half_immediate.cpp +++ b/src/video_core/shader/decode/arithmetic_half_immediate.cpp | |||
| @@ -23,7 +23,9 @@ u32 ShaderIR::DecodeArithmeticHalfImmediate(NodeBlock& bb, u32 pc) { | |||
| 23 | LOG_WARNING(HW_GPU, "{} FTZ not implemented", opcode->get().GetName()); | 23 | LOG_WARNING(HW_GPU, "{} FTZ not implemented", opcode->get().GetName()); |
| 24 | } | 24 | } |
| 25 | } else { | 25 | } else { |
| 26 | UNIMPLEMENTED_IF(instr.alu_half_imm.precision != Tegra::Shader::HalfPrecision::None); | 26 | if (instr.alu_half_imm.precision != Tegra::Shader::HalfPrecision::None) { |
| 27 | LOG_WARNING(HW_GPU, "{} FTZ not implemented", opcode->get().GetName()); | ||
| 28 | } | ||
| 27 | } | 29 | } |
| 28 | 30 | ||
| 29 | Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.alu_half_imm.type_a); | 31 | Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.alu_half_imm.type_a); |
diff --git a/src/video_core/shader/decode/conversion.cpp b/src/video_core/shader/decode/conversion.cpp index 4221f0c58..32facd6ba 100644 --- a/src/video_core/shader/decode/conversion.cpp +++ b/src/video_core/shader/decode/conversion.cpp | |||
| @@ -14,6 +14,12 @@ using Tegra::Shader::Instruction; | |||
| 14 | using Tegra::Shader::OpCode; | 14 | using Tegra::Shader::OpCode; |
| 15 | using Tegra::Shader::Register; | 15 | using Tegra::Shader::Register; |
| 16 | 16 | ||
| 17 | namespace { | ||
| 18 | constexpr OperationCode GetFloatSelector(u64 selector) { | ||
| 19 | return selector == 0 ? OperationCode::FCastHalf0 : OperationCode::FCastHalf1; | ||
| 20 | } | ||
| 21 | } // Anonymous namespace | ||
| 22 | |||
| 17 | u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) { | 23 | u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) { |
| 18 | const Instruction instr = {program_code[pc]}; | 24 | const Instruction instr = {program_code[pc]}; |
| 19 | const auto opcode = OpCode::Decode(instr); | 25 | const auto opcode = OpCode::Decode(instr); |
| @@ -22,7 +28,7 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) { | |||
| 22 | case OpCode::Id::I2I_R: | 28 | case OpCode::Id::I2I_R: |
| 23 | case OpCode::Id::I2I_C: | 29 | case OpCode::Id::I2I_C: |
| 24 | case OpCode::Id::I2I_IMM: { | 30 | case OpCode::Id::I2I_IMM: { |
| 25 | UNIMPLEMENTED_IF(instr.conversion.selector); | 31 | UNIMPLEMENTED_IF(instr.conversion.int_src.selector != 0); |
| 26 | UNIMPLEMENTED_IF(instr.conversion.dst_size != Register::Size::Word); | 32 | UNIMPLEMENTED_IF(instr.conversion.dst_size != Register::Size::Word); |
| 27 | UNIMPLEMENTED_IF(instr.alu.saturate_d); | 33 | UNIMPLEMENTED_IF(instr.alu.saturate_d); |
| 28 | 34 | ||
| @@ -57,8 +63,8 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) { | |||
| 57 | case OpCode::Id::I2F_R: | 63 | case OpCode::Id::I2F_R: |
| 58 | case OpCode::Id::I2F_C: | 64 | case OpCode::Id::I2F_C: |
| 59 | case OpCode::Id::I2F_IMM: { | 65 | case OpCode::Id::I2F_IMM: { |
| 60 | UNIMPLEMENTED_IF(instr.conversion.dst_size != Register::Size::Word); | 66 | UNIMPLEMENTED_IF(instr.conversion.int_src.selector != 0); |
| 61 | UNIMPLEMENTED_IF(instr.conversion.selector); | 67 | UNIMPLEMENTED_IF(instr.conversion.dst_size == Register::Size::Long); |
| 62 | UNIMPLEMENTED_IF_MSG(instr.generates_cc, | 68 | UNIMPLEMENTED_IF_MSG(instr.generates_cc, |
| 63 | "Condition codes generation in I2F is not implemented"); | 69 | "Condition codes generation in I2F is not implemented"); |
| 64 | 70 | ||
| @@ -82,14 +88,19 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) { | |||
| 82 | value = GetOperandAbsNegFloat(value, false, instr.conversion.negate_a); | 88 | value = GetOperandAbsNegFloat(value, false, instr.conversion.negate_a); |
| 83 | 89 | ||
| 84 | SetInternalFlagsFromFloat(bb, value, instr.generates_cc); | 90 | SetInternalFlagsFromFloat(bb, value, instr.generates_cc); |
| 91 | |||
| 92 | if (instr.conversion.dst_size == Register::Size::Short) { | ||
| 93 | value = Operation(OperationCode::HCastFloat, PRECISE, value); | ||
| 94 | } | ||
| 95 | |||
| 85 | SetRegister(bb, instr.gpr0, value); | 96 | SetRegister(bb, instr.gpr0, value); |
| 86 | break; | 97 | break; |
| 87 | } | 98 | } |
| 88 | case OpCode::Id::F2F_R: | 99 | case OpCode::Id::F2F_R: |
| 89 | case OpCode::Id::F2F_C: | 100 | case OpCode::Id::F2F_C: |
| 90 | case OpCode::Id::F2F_IMM: { | 101 | case OpCode::Id::F2F_IMM: { |
| 91 | UNIMPLEMENTED_IF(instr.conversion.f2f.dst_size != Register::Size::Word); | 102 | UNIMPLEMENTED_IF(instr.conversion.dst_size == Register::Size::Long); |
| 92 | UNIMPLEMENTED_IF(instr.conversion.f2f.src_size != Register::Size::Word); | 103 | UNIMPLEMENTED_IF(instr.conversion.src_size == Register::Size::Long); |
| 93 | UNIMPLEMENTED_IF_MSG(instr.generates_cc, | 104 | UNIMPLEMENTED_IF_MSG(instr.generates_cc, |
| 94 | "Condition codes generation in F2F is not implemented"); | 105 | "Condition codes generation in F2F is not implemented"); |
| 95 | 106 | ||
| @@ -107,6 +118,13 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) { | |||
| 107 | } | 118 | } |
| 108 | }(); | 119 | }(); |
| 109 | 120 | ||
| 121 | if (instr.conversion.src_size == Register::Size::Short) { | ||
| 122 | value = Operation(GetFloatSelector(instr.conversion.float_src.selector), NO_PRECISE, | ||
| 123 | std::move(value)); | ||
| 124 | } else { | ||
| 125 | ASSERT(instr.conversion.float_src.selector == 0); | ||
| 126 | } | ||
| 127 | |||
| 110 | value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a); | 128 | value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a); |
| 111 | 129 | ||
| 112 | value = [&]() { | 130 | value = [&]() { |
| @@ -124,19 +142,24 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) { | |||
| 124 | default: | 142 | default: |
| 125 | UNIMPLEMENTED_MSG("Unimplemented F2F rounding mode {}", | 143 | UNIMPLEMENTED_MSG("Unimplemented F2F rounding mode {}", |
| 126 | static_cast<u32>(instr.conversion.f2f.rounding.Value())); | 144 | static_cast<u32>(instr.conversion.f2f.rounding.Value())); |
| 127 | return Immediate(0); | 145 | return value; |
| 128 | } | 146 | } |
| 129 | }(); | 147 | }(); |
| 130 | value = GetSaturatedFloat(value, instr.alu.saturate_d); | 148 | value = GetSaturatedFloat(value, instr.alu.saturate_d); |
| 131 | 149 | ||
| 132 | SetInternalFlagsFromFloat(bb, value, instr.generates_cc); | 150 | SetInternalFlagsFromFloat(bb, value, instr.generates_cc); |
| 151 | |||
| 152 | if (instr.conversion.dst_size == Register::Size::Short) { | ||
| 153 | value = Operation(OperationCode::HCastFloat, PRECISE, value); | ||
| 154 | } | ||
| 155 | |||
| 133 | SetRegister(bb, instr.gpr0, value); | 156 | SetRegister(bb, instr.gpr0, value); |
| 134 | break; | 157 | break; |
| 135 | } | 158 | } |
| 136 | case OpCode::Id::F2I_R: | 159 | case OpCode::Id::F2I_R: |
| 137 | case OpCode::Id::F2I_C: | 160 | case OpCode::Id::F2I_C: |
| 138 | case OpCode::Id::F2I_IMM: { | 161 | case OpCode::Id::F2I_IMM: { |
| 139 | UNIMPLEMENTED_IF(instr.conversion.src_size != Register::Size::Word); | 162 | UNIMPLEMENTED_IF(instr.conversion.src_size == Register::Size::Long); |
| 140 | UNIMPLEMENTED_IF_MSG(instr.generates_cc, | 163 | UNIMPLEMENTED_IF_MSG(instr.generates_cc, |
| 141 | "Condition codes generation in F2I is not implemented"); | 164 | "Condition codes generation in F2I is not implemented"); |
| 142 | Node value = [&]() { | 165 | Node value = [&]() { |
| @@ -153,6 +176,13 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) { | |||
| 153 | } | 176 | } |
| 154 | }(); | 177 | }(); |
| 155 | 178 | ||
| 179 | if (instr.conversion.src_size == Register::Size::Short) { | ||
| 180 | value = Operation(GetFloatSelector(instr.conversion.float_src.selector), NO_PRECISE, | ||
| 181 | std::move(value)); | ||
| 182 | } else { | ||
| 183 | ASSERT(instr.conversion.float_src.selector == 0); | ||
| 184 | } | ||
| 185 | |||
| 156 | value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a); | 186 | value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a); |
| 157 | 187 | ||
| 158 | value = [&]() { | 188 | value = [&]() { |
diff --git a/src/video_core/shader/decode/ffma.cpp b/src/video_core/shader/decode/ffma.cpp index 29be25ca3..ca2f39e8d 100644 --- a/src/video_core/shader/decode/ffma.cpp +++ b/src/video_core/shader/decode/ffma.cpp | |||
| @@ -18,10 +18,12 @@ u32 ShaderIR::DecodeFfma(NodeBlock& bb, u32 pc) { | |||
| 18 | const auto opcode = OpCode::Decode(instr); | 18 | const auto opcode = OpCode::Decode(instr); |
| 19 | 19 | ||
| 20 | UNIMPLEMENTED_IF_MSG(instr.ffma.cc != 0, "FFMA cc not implemented"); | 20 | UNIMPLEMENTED_IF_MSG(instr.ffma.cc != 0, "FFMA cc not implemented"); |
| 21 | UNIMPLEMENTED_IF_MSG(instr.ffma.tab5980_0 != 1, "FFMA tab5980_0({}) not implemented", | 21 | if (instr.ffma.tab5980_0 != 1) { |
| 22 | instr.ffma.tab5980_0.Value()); // Seems to be 1 by default based on SMO | 22 | LOG_WARNING(HW_GPU, "FFMA tab5980_0({}) not implemented", instr.ffma.tab5980_0.Value()); |
| 23 | UNIMPLEMENTED_IF_MSG(instr.ffma.tab5980_1 != 0, "FFMA tab5980_1({}) not implemented", | 23 | } |
| 24 | instr.ffma.tab5980_1.Value()); | 24 | if (instr.ffma.tab5980_1 != 0) { |
| 25 | LOG_WARNING(HW_GPU, "FFMA tab5980_1({}) not implemented", instr.ffma.tab5980_1.Value()); | ||
| 26 | } | ||
| 25 | 27 | ||
| 26 | const Node op_a = GetRegister(instr.gpr8); | 28 | const Node op_a = GetRegister(instr.gpr8); |
| 27 | 29 | ||
diff --git a/src/video_core/shader/decode/float_set.cpp b/src/video_core/shader/decode/float_set.cpp index f5013e44a..5614e8a0d 100644 --- a/src/video_core/shader/decode/float_set.cpp +++ b/src/video_core/shader/decode/float_set.cpp | |||
| @@ -15,7 +15,6 @@ using Tegra::Shader::OpCode; | |||
| 15 | 15 | ||
| 16 | u32 ShaderIR::DecodeFloatSet(NodeBlock& bb, u32 pc) { | 16 | u32 ShaderIR::DecodeFloatSet(NodeBlock& bb, u32 pc) { |
| 17 | const Instruction instr = {program_code[pc]}; | 17 | const Instruction instr = {program_code[pc]}; |
| 18 | const auto opcode = OpCode::Decode(instr); | ||
| 19 | 18 | ||
| 20 | const Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fset.abs_a != 0, | 19 | const Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fset.abs_a != 0, |
| 21 | instr.fset.neg_a != 0); | 20 | instr.fset.neg_a != 0); |
diff --git a/src/video_core/shader/decode/float_set_predicate.cpp b/src/video_core/shader/decode/float_set_predicate.cpp index 2323052b0..200c2c983 100644 --- a/src/video_core/shader/decode/float_set_predicate.cpp +++ b/src/video_core/shader/decode/float_set_predicate.cpp | |||
| @@ -16,10 +16,9 @@ using Tegra::Shader::Pred; | |||
| 16 | 16 | ||
| 17 | u32 ShaderIR::DecodeFloatSetPredicate(NodeBlock& bb, u32 pc) { | 17 | u32 ShaderIR::DecodeFloatSetPredicate(NodeBlock& bb, u32 pc) { |
| 18 | const Instruction instr = {program_code[pc]}; | 18 | const Instruction instr = {program_code[pc]}; |
| 19 | const auto opcode = OpCode::Decode(instr); | ||
| 20 | 19 | ||
| 21 | const Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fsetp.abs_a != 0, | 20 | Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fsetp.abs_a != 0, |
| 22 | instr.fsetp.neg_a != 0); | 21 | instr.fsetp.neg_a != 0); |
| 23 | Node op_b = [&]() { | 22 | Node op_b = [&]() { |
| 24 | if (instr.is_b_imm) { | 23 | if (instr.is_b_imm) { |
| 25 | return GetImmediate19(instr); | 24 | return GetImmediate19(instr); |
| @@ -29,12 +28,13 @@ u32 ShaderIR::DecodeFloatSetPredicate(NodeBlock& bb, u32 pc) { | |||
| 29 | return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); | 28 | return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); |
| 30 | } | 29 | } |
| 31 | }(); | 30 | }(); |
| 32 | op_b = GetOperandAbsNegFloat(op_b, instr.fsetp.abs_b, false); | 31 | op_b = GetOperandAbsNegFloat(std::move(op_b), instr.fsetp.abs_b, instr.fsetp.neg_b); |
| 33 | 32 | ||
| 34 | // We can't use the constant predicate as destination. | 33 | // We can't use the constant predicate as destination. |
| 35 | ASSERT(instr.fsetp.pred3 != static_cast<u64>(Pred::UnusedIndex)); | 34 | ASSERT(instr.fsetp.pred3 != static_cast<u64>(Pred::UnusedIndex)); |
| 36 | 35 | ||
| 37 | const Node predicate = GetPredicateComparisonFloat(instr.fsetp.cond, op_a, op_b); | 36 | const Node predicate = |
| 37 | GetPredicateComparisonFloat(instr.fsetp.cond, std::move(op_a), std::move(op_b)); | ||
| 38 | const Node second_pred = GetPredicate(instr.fsetp.pred39, instr.fsetp.neg_pred != 0); | 38 | const Node second_pred = GetPredicate(instr.fsetp.pred39, instr.fsetp.neg_pred != 0); |
| 39 | 39 | ||
| 40 | const OperationCode combiner = GetPredicateCombiner(instr.fsetp.op); | 40 | const OperationCode combiner = GetPredicateCombiner(instr.fsetp.op); |
diff --git a/src/video_core/shader/decode/half_set_predicate.cpp b/src/video_core/shader/decode/half_set_predicate.cpp index d59d15bd8..afea33e5f 100644 --- a/src/video_core/shader/decode/half_set_predicate.cpp +++ b/src/video_core/shader/decode/half_set_predicate.cpp | |||
| @@ -18,43 +18,56 @@ u32 ShaderIR::DecodeHalfSetPredicate(NodeBlock& bb, u32 pc) { | |||
| 18 | const Instruction instr = {program_code[pc]}; | 18 | const Instruction instr = {program_code[pc]}; |
| 19 | const auto opcode = OpCode::Decode(instr); | 19 | const auto opcode = OpCode::Decode(instr); |
| 20 | 20 | ||
| 21 | UNIMPLEMENTED_IF(instr.hsetp2.ftz != 0); | 21 | DEBUG_ASSERT(instr.hsetp2.ftz == 0); |
| 22 | 22 | ||
| 23 | Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hsetp2.type_a); | 23 | Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hsetp2.type_a); |
| 24 | op_a = GetOperandAbsNegHalf(op_a, instr.hsetp2.abs_a, instr.hsetp2.negate_a); | 24 | op_a = GetOperandAbsNegHalf(op_a, instr.hsetp2.abs_a, instr.hsetp2.negate_a); |
| 25 | 25 | ||
| 26 | Node op_b = [&]() { | 26 | Tegra::Shader::PredCondition cond{}; |
| 27 | switch (opcode->get().GetId()) { | 27 | bool h_and{}; |
| 28 | case OpCode::Id::HSETP2_R: | 28 | Node op_b{}; |
| 29 | return GetOperandAbsNegHalf(GetRegister(instr.gpr20), instr.hsetp2.abs_a, | 29 | switch (opcode->get().GetId()) { |
| 30 | instr.hsetp2.negate_b); | 30 | case OpCode::Id::HSETP2_C: |
| 31 | default: | 31 | cond = instr.hsetp2.cbuf_and_imm.cond; |
| 32 | UNREACHABLE(); | 32 | h_and = instr.hsetp2.cbuf_and_imm.h_and; |
| 33 | return Immediate(0); | 33 | op_b = GetOperandAbsNegHalf(GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()), |
| 34 | } | 34 | instr.hsetp2.cbuf.abs_b, instr.hsetp2.cbuf.negate_b); |
| 35 | }(); | 35 | break; |
| 36 | op_b = UnpackHalfFloat(op_b, instr.hsetp2.type_b); | 36 | case OpCode::Id::HSETP2_IMM: |
| 37 | 37 | cond = instr.hsetp2.cbuf_and_imm.cond; | |
| 38 | // We can't use the constant predicate as destination. | 38 | h_and = instr.hsetp2.cbuf_and_imm.h_and; |
| 39 | ASSERT(instr.hsetp2.pred3 != static_cast<u64>(Pred::UnusedIndex)); | 39 | op_b = UnpackHalfImmediate(instr, true); |
| 40 | 40 | break; | |
| 41 | const Node second_pred = GetPredicate(instr.hsetp2.pred39, instr.hsetp2.neg_pred != 0); | 41 | case OpCode::Id::HSETP2_R: |
| 42 | cond = instr.hsetp2.reg.cond; | ||
| 43 | h_and = instr.hsetp2.reg.h_and; | ||
| 44 | op_b = | ||
| 45 | UnpackHalfFloat(GetOperandAbsNegHalf(GetRegister(instr.gpr20), instr.hsetp2.reg.abs_b, | ||
| 46 | instr.hsetp2.reg.negate_b), | ||
| 47 | instr.hsetp2.reg.type_b); | ||
| 48 | break; | ||
| 49 | default: | ||
| 50 | UNREACHABLE(); | ||
| 51 | op_b = Immediate(0); | ||
| 52 | } | ||
| 42 | 53 | ||
| 43 | const OperationCode combiner = GetPredicateCombiner(instr.hsetp2.op); | 54 | const OperationCode combiner = GetPredicateCombiner(instr.hsetp2.op); |
| 44 | const OperationCode pair_combiner = | 55 | const Node combined_pred = GetPredicate(instr.hsetp2.pred3, instr.hsetp2.neg_pred); |
| 45 | instr.hsetp2.h_and ? OperationCode::LogicalAll2 : OperationCode::LogicalAny2; | ||
| 46 | |||
| 47 | const Node comparison = GetPredicateComparisonHalf(instr.hsetp2.cond, op_a, op_b); | ||
| 48 | const Node first_pred = Operation(pair_combiner, comparison); | ||
| 49 | 56 | ||
| 50 | // Set the primary predicate to the result of Predicate OP SecondPredicate | 57 | const auto Write = [&](u64 dest, Node src) { |
| 51 | const Node value = Operation(combiner, first_pred, second_pred); | 58 | SetPredicate(bb, dest, Operation(combiner, std::move(src), combined_pred)); |
| 52 | SetPredicate(bb, instr.hsetp2.pred3, value); | 59 | }; |
| 53 | 60 | ||
| 54 | if (instr.hsetp2.pred0 != static_cast<u64>(Pred::UnusedIndex)) { | 61 | const Node comparison = GetPredicateComparisonHalf(cond, op_a, op_b); |
| 55 | // Set the secondary predicate to the result of !Predicate OP SecondPredicate, if enabled | 62 | const u64 first = instr.hsetp2.pred0; |
| 56 | const Node negated_pred = Operation(OperationCode::LogicalNegate, first_pred); | 63 | const u64 second = instr.hsetp2.pred39; |
| 57 | SetPredicate(bb, instr.hsetp2.pred0, Operation(combiner, negated_pred, second_pred)); | 64 | if (h_and) { |
| 65 | const Node joined = Operation(OperationCode::LogicalAnd2, comparison); | ||
| 66 | Write(first, joined); | ||
| 67 | Write(second, Operation(OperationCode::LogicalNegate, joined)); | ||
| 68 | } else { | ||
| 69 | Write(first, Operation(OperationCode::LogicalPick2, comparison, Immediate(0u))); | ||
| 70 | Write(second, Operation(OperationCode::LogicalPick2, comparison, Immediate(1u))); | ||
| 58 | } | 71 | } |
| 59 | 72 | ||
| 60 | return pc; | 73 | return pc; |
diff --git a/src/video_core/shader/decode/hfma2.cpp b/src/video_core/shader/decode/hfma2.cpp index c3bcf1ae9..5b44cb79c 100644 --- a/src/video_core/shader/decode/hfma2.cpp +++ b/src/video_core/shader/decode/hfma2.cpp | |||
| @@ -22,9 +22,9 @@ u32 ShaderIR::DecodeHfma2(NodeBlock& bb, u32 pc) { | |||
| 22 | const auto opcode = OpCode::Decode(instr); | 22 | const auto opcode = OpCode::Decode(instr); |
| 23 | 23 | ||
| 24 | if (opcode->get().GetId() == OpCode::Id::HFMA2_RR) { | 24 | if (opcode->get().GetId() == OpCode::Id::HFMA2_RR) { |
| 25 | UNIMPLEMENTED_IF(instr.hfma2.rr.precision != HalfPrecision::None); | 25 | DEBUG_ASSERT(instr.hfma2.rr.precision == HalfPrecision::None); |
| 26 | } else { | 26 | } else { |
| 27 | UNIMPLEMENTED_IF(instr.hfma2.precision != HalfPrecision::None); | 27 | DEBUG_ASSERT(instr.hfma2.precision == HalfPrecision::None); |
| 28 | } | 28 | } |
| 29 | 29 | ||
| 30 | constexpr auto identity = HalfType::H0_H1; | 30 | constexpr auto identity = HalfType::H0_H1; |
diff --git a/src/video_core/shader/decode/image.cpp b/src/video_core/shader/decode/image.cpp index 24f022cc0..77151a24b 100644 --- a/src/video_core/shader/decode/image.cpp +++ b/src/video_core/shader/decode/image.cpp | |||
| @@ -95,12 +95,8 @@ const Image& ShaderIR::GetImage(Tegra::Shader::Image image, Tegra::Shader::Image | |||
| 95 | const Image& ShaderIR::GetBindlessImage(Tegra::Shader::Register reg, | 95 | const Image& ShaderIR::GetBindlessImage(Tegra::Shader::Register reg, |
| 96 | Tegra::Shader::ImageType type) { | 96 | Tegra::Shader::ImageType type) { |
| 97 | const Node image_register{GetRegister(reg)}; | 97 | const Node image_register{GetRegister(reg)}; |
| 98 | const Node base_image{ | 98 | const auto [base_image, cbuf_index, cbuf_offset]{ |
| 99 | TrackCbuf(image_register, global_code, static_cast<s64>(global_code.size()))}; | 99 | TrackCbuf(image_register, global_code, static_cast<s64>(global_code.size()))}; |
| 100 | const auto cbuf{std::get_if<CbufNode>(&*base_image)}; | ||
| 101 | const auto cbuf_offset_imm{std::get_if<ImmediateNode>(&*cbuf->GetOffset())}; | ||
| 102 | const auto cbuf_offset{cbuf_offset_imm->GetValue()}; | ||
| 103 | const auto cbuf_index{cbuf->GetIndex()}; | ||
| 104 | const auto cbuf_key{(static_cast<u64>(cbuf_index) << 32) | static_cast<u64>(cbuf_offset)}; | 100 | const auto cbuf_key{(static_cast<u64>(cbuf_index) << 32) | static_cast<u64>(cbuf_offset)}; |
| 105 | 101 | ||
| 106 | // If this image has already been used, return the existing mapping. | 102 | // If this image has already been used, return the existing mapping. |
diff --git a/src/video_core/shader/decode/integer_set.cpp b/src/video_core/shader/decode/integer_set.cpp index 46e3d5905..59809bcd8 100644 --- a/src/video_core/shader/decode/integer_set.cpp +++ b/src/video_core/shader/decode/integer_set.cpp | |||
| @@ -14,7 +14,6 @@ using Tegra::Shader::OpCode; | |||
| 14 | 14 | ||
| 15 | u32 ShaderIR::DecodeIntegerSet(NodeBlock& bb, u32 pc) { | 15 | u32 ShaderIR::DecodeIntegerSet(NodeBlock& bb, u32 pc) { |
| 16 | const Instruction instr = {program_code[pc]}; | 16 | const Instruction instr = {program_code[pc]}; |
| 17 | const auto opcode = OpCode::Decode(instr); | ||
| 18 | 17 | ||
| 19 | const Node op_a = GetRegister(instr.gpr8); | 18 | const Node op_a = GetRegister(instr.gpr8); |
| 20 | const Node op_b = [&]() { | 19 | const Node op_b = [&]() { |
diff --git a/src/video_core/shader/decode/integer_set_predicate.cpp b/src/video_core/shader/decode/integer_set_predicate.cpp index dd20775d7..25e48fef8 100644 --- a/src/video_core/shader/decode/integer_set_predicate.cpp +++ b/src/video_core/shader/decode/integer_set_predicate.cpp | |||
| @@ -16,7 +16,6 @@ using Tegra::Shader::Pred; | |||
| 16 | 16 | ||
| 17 | u32 ShaderIR::DecodeIntegerSetPredicate(NodeBlock& bb, u32 pc) { | 17 | u32 ShaderIR::DecodeIntegerSetPredicate(NodeBlock& bb, u32 pc) { |
| 18 | const Instruction instr = {program_code[pc]}; | 18 | const Instruction instr = {program_code[pc]}; |
| 19 | const auto opcode = OpCode::Decode(instr); | ||
| 20 | 19 | ||
| 21 | const Node op_a = GetRegister(instr.gpr8); | 20 | const Node op_a = GetRegister(instr.gpr8); |
| 22 | 21 | ||
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp index 80fc0ccfc..ed108bea8 100644 --- a/src/video_core/shader/decode/memory.cpp +++ b/src/video_core/shader/decode/memory.cpp | |||
| @@ -95,10 +95,10 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { | |||
| 95 | const Node op_b = | 95 | const Node op_b = |
| 96 | GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.GetOffset() + 4, index); | 96 | GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.GetOffset() + 4, index); |
| 97 | 97 | ||
| 98 | SetTemporal(bb, 0, op_a); | 98 | SetTemporary(bb, 0, op_a); |
| 99 | SetTemporal(bb, 1, op_b); | 99 | SetTemporary(bb, 1, op_b); |
| 100 | SetRegister(bb, instr.gpr0, GetTemporal(0)); | 100 | SetRegister(bb, instr.gpr0, GetTemporary(0)); |
| 101 | SetRegister(bb, instr.gpr0.Value() + 1, GetTemporal(1)); | 101 | SetRegister(bb, instr.gpr0.Value() + 1, GetTemporary(1)); |
| 102 | break; | 102 | break; |
| 103 | } | 103 | } |
| 104 | default: | 104 | default: |
| @@ -136,9 +136,9 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { | |||
| 136 | } | 136 | } |
| 137 | }(); | 137 | }(); |
| 138 | for (u32 i = 0; i < count; ++i) | 138 | for (u32 i = 0; i < count; ++i) |
| 139 | SetTemporal(bb, i, GetLmem(i * 4)); | 139 | SetTemporary(bb, i, GetLmem(i * 4)); |
| 140 | for (u32 i = 0; i < count; ++i) | 140 | for (u32 i = 0; i < count; ++i) |
| 141 | SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i)); | 141 | SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); |
| 142 | break; | 142 | break; |
| 143 | } | 143 | } |
| 144 | default: | 144 | default: |
| @@ -172,10 +172,10 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { | |||
| 172 | Operation(OperationCode::UAdd, NO_PRECISE, real_address_base, it_offset); | 172 | Operation(OperationCode::UAdd, NO_PRECISE, real_address_base, it_offset); |
| 173 | const Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); | 173 | const Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); |
| 174 | 174 | ||
| 175 | SetTemporal(bb, i, gmem); | 175 | SetTemporary(bb, i, gmem); |
| 176 | } | 176 | } |
| 177 | for (u32 i = 0; i < count; ++i) { | 177 | for (u32 i = 0; i < count; ++i) { |
| 178 | SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i)); | 178 | SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); |
| 179 | } | 179 | } |
| 180 | break; | 180 | break; |
| 181 | } | 181 | } |
| @@ -253,11 +253,11 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { | |||
| 253 | TrackAndGetGlobalMemory(bb, instr, true); | 253 | TrackAndGetGlobalMemory(bb, instr, true); |
| 254 | 254 | ||
| 255 | // Encode in temporary registers like this: real_base_address, {registers_to_be_written...} | 255 | // Encode in temporary registers like this: real_base_address, {registers_to_be_written...} |
| 256 | SetTemporal(bb, 0, real_address_base); | 256 | SetTemporary(bb, 0, real_address_base); |
| 257 | 257 | ||
| 258 | const u32 count = GetUniformTypeElementsCount(type); | 258 | const u32 count = GetUniformTypeElementsCount(type); |
| 259 | for (u32 i = 0; i < count; ++i) { | 259 | for (u32 i = 0; i < count; ++i) { |
| 260 | SetTemporal(bb, i + 1, GetRegister(instr.gpr0.Value() + i)); | 260 | SetTemporary(bb, i + 1, GetRegister(instr.gpr0.Value() + i)); |
| 261 | } | 261 | } |
| 262 | for (u32 i = 0; i < count; ++i) { | 262 | for (u32 i = 0; i < count; ++i) { |
| 263 | const Node it_offset = Immediate(i * 4); | 263 | const Node it_offset = Immediate(i * 4); |
| @@ -265,7 +265,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { | |||
| 265 | Operation(OperationCode::UAdd, NO_PRECISE, real_address_base, it_offset); | 265 | Operation(OperationCode::UAdd, NO_PRECISE, real_address_base, it_offset); |
| 266 | const Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); | 266 | const Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); |
| 267 | 267 | ||
| 268 | bb.push_back(Operation(OperationCode::Assign, gmem, GetTemporal(i + 1))); | 268 | bb.push_back(Operation(OperationCode::Assign, gmem, GetTemporary(i + 1))); |
| 269 | } | 269 | } |
| 270 | break; | 270 | break; |
| 271 | } | 271 | } |
| @@ -297,18 +297,13 @@ std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackAndGetGlobalMemory(NodeB | |||
| 297 | const auto addr_register{GetRegister(instr.gmem.gpr)}; | 297 | const auto addr_register{GetRegister(instr.gmem.gpr)}; |
| 298 | const auto immediate_offset{static_cast<u32>(instr.gmem.offset)}; | 298 | const auto immediate_offset{static_cast<u32>(instr.gmem.offset)}; |
| 299 | 299 | ||
| 300 | const Node base_address{ | 300 | const auto [base_address, index, offset] = |
| 301 | TrackCbuf(addr_register, global_code, static_cast<s64>(global_code.size()))}; | 301 | TrackCbuf(addr_register, global_code, static_cast<s64>(global_code.size())); |
| 302 | const auto cbuf = std::get_if<CbufNode>(&*base_address); | 302 | ASSERT(base_address != nullptr); |
| 303 | ASSERT(cbuf != nullptr); | ||
| 304 | const auto cbuf_offset_imm = std::get_if<ImmediateNode>(&*cbuf->GetOffset()); | ||
| 305 | ASSERT(cbuf_offset_imm != nullptr); | ||
| 306 | const auto cbuf_offset = cbuf_offset_imm->GetValue(); | ||
| 307 | 303 | ||
| 308 | bb.push_back( | 304 | bb.push_back(Comment(fmt::format("Base address is c[0x{:x}][0x{:x}]", index, offset))); |
| 309 | Comment(fmt::format("Base address is c[0x{:x}][0x{:x}]", cbuf->GetIndex(), cbuf_offset))); | ||
| 310 | 305 | ||
| 311 | const GlobalMemoryBase descriptor{cbuf->GetIndex(), cbuf_offset}; | 306 | const GlobalMemoryBase descriptor{index, offset}; |
| 312 | const auto& [entry, is_new] = used_global_memory.try_emplace(descriptor); | 307 | const auto& [entry, is_new] = used_global_memory.try_emplace(descriptor); |
| 313 | auto& usage = entry->second; | 308 | auto& usage = entry->second; |
| 314 | if (is_write) { | 309 | if (is_write) { |
diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp index d46a8ab82..d46e0f823 100644 --- a/src/video_core/shader/decode/other.cpp +++ b/src/video_core/shader/decode/other.cpp | |||
| @@ -22,6 +22,12 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { | |||
| 22 | const auto opcode = OpCode::Decode(instr); | 22 | const auto opcode = OpCode::Decode(instr); |
| 23 | 23 | ||
| 24 | switch (opcode->get().GetId()) { | 24 | switch (opcode->get().GetId()) { |
| 25 | case OpCode::Id::NOP: { | ||
| 26 | UNIMPLEMENTED_IF(instr.nop.cc != Tegra::Shader::ConditionCode::T); | ||
| 27 | UNIMPLEMENTED_IF(instr.nop.trigger != 0); | ||
| 28 | // With the previous preconditions, this instruction is a no-operation. | ||
| 29 | break; | ||
| 30 | } | ||
| 25 | case OpCode::Id::EXIT: { | 31 | case OpCode::Id::EXIT: { |
| 26 | const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; | 32 | const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; |
| 27 | UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "EXIT condition code used: {}", | 33 | UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "EXIT condition code used: {}", |
| @@ -68,6 +74,13 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { | |||
| 68 | case SystemVariable::InvocationInfo: | 74 | case SystemVariable::InvocationInfo: |
| 69 | LOG_WARNING(HW_GPU, "MOV_SYS instruction with InvocationInfo is incomplete"); | 75 | LOG_WARNING(HW_GPU, "MOV_SYS instruction with InvocationInfo is incomplete"); |
| 70 | return Immediate(0u); | 76 | return Immediate(0u); |
| 77 | case SystemVariable::Tid: { | ||
| 78 | Node value = Immediate(0); | ||
| 79 | value = BitfieldInsert(value, Operation(OperationCode::LocalInvocationIdX), 0, 9); | ||
| 80 | value = BitfieldInsert(value, Operation(OperationCode::LocalInvocationIdY), 16, 9); | ||
| 81 | value = BitfieldInsert(value, Operation(OperationCode::LocalInvocationIdZ), 26, 5); | ||
| 82 | return value; | ||
| 83 | } | ||
| 71 | case SystemVariable::TidX: | 84 | case SystemVariable::TidX: |
| 72 | return Operation(OperationCode::LocalInvocationIdX); | 85 | return Operation(OperationCode::LocalInvocationIdX); |
| 73 | case SystemVariable::TidY: | 86 | case SystemVariable::TidY: |
| @@ -91,11 +104,46 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { | |||
| 91 | break; | 104 | break; |
| 92 | } | 105 | } |
| 93 | case OpCode::Id::BRA: { | 106 | case OpCode::Id::BRA: { |
| 94 | UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, | 107 | Node branch; |
| 95 | "BRA with constant buffers are not implemented"); | 108 | if (instr.bra.constant_buffer == 0) { |
| 109 | const u32 target = pc + instr.bra.GetBranchTarget(); | ||
| 110 | branch = Operation(OperationCode::Branch, Immediate(target)); | ||
| 111 | } else { | ||
| 112 | const u32 target = pc + 1; | ||
| 113 | const Node op_a = GetConstBuffer(instr.cbuf36.index, instr.cbuf36.GetOffset()); | ||
| 114 | const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, true, | ||
| 115 | PRECISE, op_a, Immediate(3)); | ||
| 116 | const Node operand = | ||
| 117 | Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target)); | ||
| 118 | branch = Operation(OperationCode::BranchIndirect, operand); | ||
| 119 | } | ||
| 96 | 120 | ||
| 97 | const u32 target = pc + instr.bra.GetBranchTarget(); | 121 | const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; |
| 98 | const Node branch = Operation(OperationCode::Branch, Immediate(target)); | 122 | if (cc != Tegra::Shader::ConditionCode::T) { |
| 123 | bb.push_back(Conditional(GetConditionCode(cc), {branch})); | ||
| 124 | } else { | ||
| 125 | bb.push_back(branch); | ||
| 126 | } | ||
| 127 | break; | ||
| 128 | } | ||
| 129 | case OpCode::Id::BRX: { | ||
| 130 | Node operand; | ||
| 131 | if (instr.brx.constant_buffer != 0) { | ||
| 132 | const s32 target = pc + 1; | ||
| 133 | const Node index = GetRegister(instr.gpr8); | ||
| 134 | const Node op_a = | ||
| 135 | GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.GetOffset() + 0, index); | ||
| 136 | const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, true, | ||
| 137 | PRECISE, op_a, Immediate(3)); | ||
| 138 | operand = Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target)); | ||
| 139 | } else { | ||
| 140 | const s32 target = pc + instr.brx.GetBranchExtend(); | ||
| 141 | const Node op_a = GetRegister(instr.gpr8); | ||
| 142 | const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, true, | ||
| 143 | PRECISE, op_a, Immediate(3)); | ||
| 144 | operand = Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target)); | ||
| 145 | } | ||
| 146 | const Node branch = Operation(OperationCode::BranchIndirect, operand); | ||
| 99 | 147 | ||
| 100 | const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; | 148 | const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; |
| 101 | if (cc != Tegra::Shader::ConditionCode::T) { | 149 | if (cc != Tegra::Shader::ConditionCode::T) { |
| @@ -109,6 +157,10 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { | |||
| 109 | UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, | 157 | UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, |
| 110 | "Constant buffer flow is not supported"); | 158 | "Constant buffer flow is not supported"); |
| 111 | 159 | ||
| 160 | if (disable_flow_stack) { | ||
| 161 | break; | ||
| 162 | } | ||
| 163 | |||
| 112 | // The SSY opcode tells the GPU where to re-converge divergent execution paths with SYNC. | 164 | // The SSY opcode tells the GPU where to re-converge divergent execution paths with SYNC. |
| 113 | const u32 target = pc + instr.bra.GetBranchTarget(); | 165 | const u32 target = pc + instr.bra.GetBranchTarget(); |
| 114 | bb.push_back( | 166 | bb.push_back( |
| @@ -119,6 +171,10 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { | |||
| 119 | UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, | 171 | UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, |
| 120 | "Constant buffer PBK is not supported"); | 172 | "Constant buffer PBK is not supported"); |
| 121 | 173 | ||
| 174 | if (disable_flow_stack) { | ||
| 175 | break; | ||
| 176 | } | ||
| 177 | |||
| 122 | // PBK pushes to a stack the address where BRK will jump to. | 178 | // PBK pushes to a stack the address where BRK will jump to. |
| 123 | const u32 target = pc + instr.bra.GetBranchTarget(); | 179 | const u32 target = pc + instr.bra.GetBranchTarget(); |
| 124 | bb.push_back( | 180 | bb.push_back( |
| @@ -130,6 +186,10 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { | |||
| 130 | UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "SYNC condition code used: {}", | 186 | UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "SYNC condition code used: {}", |
| 131 | static_cast<u32>(cc)); | 187 | static_cast<u32>(cc)); |
| 132 | 188 | ||
| 189 | if (disable_flow_stack) { | ||
| 190 | break; | ||
| 191 | } | ||
| 192 | |||
| 133 | // The SYNC opcode jumps to the address previously set by the SSY opcode | 193 | // The SYNC opcode jumps to the address previously set by the SSY opcode |
| 134 | bb.push_back(Operation(OperationCode::PopFlowStack, MetaStackClass::Ssy)); | 194 | bb.push_back(Operation(OperationCode::PopFlowStack, MetaStackClass::Ssy)); |
| 135 | break; | 195 | break; |
| @@ -138,6 +198,9 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { | |||
| 138 | const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; | 198 | const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; |
| 139 | UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "BRK condition code used: {}", | 199 | UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "BRK condition code used: {}", |
| 140 | static_cast<u32>(cc)); | 200 | static_cast<u32>(cc)); |
| 201 | if (disable_flow_stack) { | ||
| 202 | break; | ||
| 203 | } | ||
| 141 | 204 | ||
| 142 | // The BRK opcode jumps to the address previously set by the PBK opcode | 205 | // The BRK opcode jumps to the address previously set by the PBK opcode |
| 143 | bb.push_back(Operation(OperationCode::PopFlowStack, MetaStackClass::Pbk)); | 206 | bb.push_back(Operation(OperationCode::PopFlowStack, MetaStackClass::Pbk)); |
diff --git a/src/video_core/shader/decode/predicate_set_register.cpp b/src/video_core/shader/decode/predicate_set_register.cpp index febbfeb50..84dbc50fe 100644 --- a/src/video_core/shader/decode/predicate_set_register.cpp +++ b/src/video_core/shader/decode/predicate_set_register.cpp | |||
| @@ -15,7 +15,6 @@ using Tegra::Shader::OpCode; | |||
| 15 | 15 | ||
| 16 | u32 ShaderIR::DecodePredicateSetRegister(NodeBlock& bb, u32 pc) { | 16 | u32 ShaderIR::DecodePredicateSetRegister(NodeBlock& bb, u32 pc) { |
| 17 | const Instruction instr = {program_code[pc]}; | 17 | const Instruction instr = {program_code[pc]}; |
| 18 | const auto opcode = OpCode::Decode(instr); | ||
| 19 | 18 | ||
| 20 | UNIMPLEMENTED_IF_MSG(instr.generates_cc, | 19 | UNIMPLEMENTED_IF_MSG(instr.generates_cc, |
| 21 | "Condition codes generation in PSET is not implemented"); | 20 | "Condition codes generation in PSET is not implemented"); |
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp index cb480be9b..0b934a069 100644 --- a/src/video_core/shader/decode/texture.cpp +++ b/src/video_core/shader/decode/texture.cpp | |||
| @@ -181,10 +181,10 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { | |||
| 181 | const Node value = | 181 | const Node value = |
| 182 | Operation(OperationCode::TextureQueryDimensions, meta, | 182 | Operation(OperationCode::TextureQueryDimensions, meta, |
| 183 | GetRegister(instr.gpr8.Value() + (is_bindless ? 1 : 0))); | 183 | GetRegister(instr.gpr8.Value() + (is_bindless ? 1 : 0))); |
| 184 | SetTemporal(bb, indexer++, value); | 184 | SetTemporary(bb, indexer++, value); |
| 185 | } | 185 | } |
| 186 | for (u32 i = 0; i < indexer; ++i) { | 186 | for (u32 i = 0; i < indexer; ++i) { |
| 187 | SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i)); | 187 | SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); |
| 188 | } | 188 | } |
| 189 | break; | 189 | break; |
| 190 | } | 190 | } |
| @@ -238,10 +238,10 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { | |||
| 238 | auto params = coords; | 238 | auto params = coords; |
| 239 | MetaTexture meta{sampler, {}, {}, {}, {}, {}, {}, element}; | 239 | MetaTexture meta{sampler, {}, {}, {}, {}, {}, {}, element}; |
| 240 | const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params)); | 240 | const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params)); |
| 241 | SetTemporal(bb, indexer++, value); | 241 | SetTemporary(bb, indexer++, value); |
| 242 | } | 242 | } |
| 243 | for (u32 i = 0; i < indexer; ++i) { | 243 | for (u32 i = 0; i < indexer; ++i) { |
| 244 | SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i)); | 244 | SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); |
| 245 | } | 245 | } |
| 246 | break; | 246 | break; |
| 247 | } | 247 | } |
| @@ -269,7 +269,13 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { | |||
| 269 | LOG_WARNING(HW_GPU, "TLDS.NODEP implementation is incomplete"); | 269 | LOG_WARNING(HW_GPU, "TLDS.NODEP implementation is incomplete"); |
| 270 | } | 270 | } |
| 271 | 271 | ||
| 272 | WriteTexsInstructionFloat(bb, instr, GetTldsCode(instr, texture_type, is_array)); | 272 | const Node4 components = GetTldsCode(instr, texture_type, is_array); |
| 273 | |||
| 274 | if (instr.tlds.fp32_flag) { | ||
| 275 | WriteTexsInstructionFloat(bb, instr, components); | ||
| 276 | } else { | ||
| 277 | WriteTexsInstructionHalfFloat(bb, instr, components); | ||
| 278 | } | ||
| 273 | break; | 279 | break; |
| 274 | } | 280 | } |
| 275 | default: | 281 | default: |
| @@ -302,13 +308,9 @@ const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, Textu | |||
| 302 | const Sampler& ShaderIR::GetBindlessSampler(const Tegra::Shader::Register& reg, TextureType type, | 308 | const Sampler& ShaderIR::GetBindlessSampler(const Tegra::Shader::Register& reg, TextureType type, |
| 303 | bool is_array, bool is_shadow) { | 309 | bool is_array, bool is_shadow) { |
| 304 | const Node sampler_register = GetRegister(reg); | 310 | const Node sampler_register = GetRegister(reg); |
| 305 | const Node base_sampler = | 311 | const auto [base_sampler, cbuf_index, cbuf_offset] = |
| 306 | TrackCbuf(sampler_register, global_code, static_cast<s64>(global_code.size())); | 312 | TrackCbuf(sampler_register, global_code, static_cast<s64>(global_code.size())); |
| 307 | const auto cbuf = std::get_if<CbufNode>(&*base_sampler); | 313 | ASSERT(base_sampler != nullptr); |
| 308 | const auto cbuf_offset_imm = std::get_if<ImmediateNode>(&*cbuf->GetOffset()); | ||
| 309 | ASSERT(cbuf_offset_imm != nullptr); | ||
| 310 | const auto cbuf_offset = cbuf_offset_imm->GetValue(); | ||
| 311 | const auto cbuf_index = cbuf->GetIndex(); | ||
| 312 | const auto cbuf_key = (static_cast<u64>(cbuf_index) << 32) | static_cast<u64>(cbuf_offset); | 314 | const auto cbuf_key = (static_cast<u64>(cbuf_index) << 32) | static_cast<u64>(cbuf_offset); |
| 313 | 315 | ||
| 314 | // If this sampler has already been used, return the existing mapping. | 316 | // If this sampler has already been used, return the existing mapping. |
| @@ -334,11 +336,11 @@ void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const | |||
| 334 | // Skip disabled components | 336 | // Skip disabled components |
| 335 | continue; | 337 | continue; |
| 336 | } | 338 | } |
| 337 | SetTemporal(bb, dest_elem++, components[elem]); | 339 | SetTemporary(bb, dest_elem++, components[elem]); |
| 338 | } | 340 | } |
| 339 | // After writing values in temporals, move them to the real registers | 341 | // After writing values in temporals, move them to the real registers |
| 340 | for (u32 i = 0; i < dest_elem; ++i) { | 342 | for (u32 i = 0; i < dest_elem; ++i) { |
| 341 | SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i)); | 343 | SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); |
| 342 | } | 344 | } |
| 343 | } | 345 | } |
| 344 | 346 | ||
| @@ -351,17 +353,17 @@ void ShaderIR::WriteTexsInstructionFloat(NodeBlock& bb, Instruction instr, | |||
| 351 | for (u32 component = 0; component < 4; ++component) { | 353 | for (u32 component = 0; component < 4; ++component) { |
| 352 | if (!instr.texs.IsComponentEnabled(component)) | 354 | if (!instr.texs.IsComponentEnabled(component)) |
| 353 | continue; | 355 | continue; |
| 354 | SetTemporal(bb, dest_elem++, components[component]); | 356 | SetTemporary(bb, dest_elem++, components[component]); |
| 355 | } | 357 | } |
| 356 | 358 | ||
| 357 | for (u32 i = 0; i < dest_elem; ++i) { | 359 | for (u32 i = 0; i < dest_elem; ++i) { |
| 358 | if (i < 2) { | 360 | if (i < 2) { |
| 359 | // Write the first two swizzle components to gpr0 and gpr0+1 | 361 | // Write the first two swizzle components to gpr0 and gpr0+1 |
| 360 | SetRegister(bb, instr.gpr0.Value() + i % 2, GetTemporal(i)); | 362 | SetRegister(bb, instr.gpr0.Value() + i % 2, GetTemporary(i)); |
| 361 | } else { | 363 | } else { |
| 362 | ASSERT(instr.texs.HasTwoDestinations()); | 364 | ASSERT(instr.texs.HasTwoDestinations()); |
| 363 | // Write the rest of the swizzle components to gpr28 and gpr28+1 | 365 | // Write the rest of the swizzle components to gpr28 and gpr28+1 |
| 364 | SetRegister(bb, instr.gpr28.Value() + i % 2, GetTemporal(i)); | 366 | SetRegister(bb, instr.gpr28.Value() + i % 2, GetTemporary(i)); |
| 365 | } | 367 | } |
| 366 | } | 368 | } |
| 367 | } | 369 | } |
| @@ -389,11 +391,11 @@ void ShaderIR::WriteTexsInstructionHalfFloat(NodeBlock& bb, Instruction instr, | |||
| 389 | return; | 391 | return; |
| 390 | } | 392 | } |
| 391 | 393 | ||
| 392 | SetTemporal(bb, 0, first_value); | 394 | SetTemporary(bb, 0, first_value); |
| 393 | SetTemporal(bb, 1, Operation(OperationCode::HPack2, values[2], values[3])); | 395 | SetTemporary(bb, 1, Operation(OperationCode::HPack2, values[2], values[3])); |
| 394 | 396 | ||
| 395 | SetRegister(bb, instr.gpr0, GetTemporal(0)); | 397 | SetRegister(bb, instr.gpr0, GetTemporary(0)); |
| 396 | SetRegister(bb, instr.gpr28, GetTemporal(1)); | 398 | SetRegister(bb, instr.gpr28, GetTemporary(1)); |
| 397 | } | 399 | } |
| 398 | 400 | ||
| 399 | Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, | 401 | Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, |
diff --git a/src/video_core/shader/decode/warp.cpp b/src/video_core/shader/decode/warp.cpp new file mode 100644 index 000000000..04ca74f46 --- /dev/null +++ b/src/video_core/shader/decode/warp.cpp | |||
| @@ -0,0 +1,55 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | #include "video_core/shader/node_helper.h" | ||
| 9 | #include "video_core/shader/shader_ir.h" | ||
| 10 | |||
| 11 | namespace VideoCommon::Shader { | ||
| 12 | |||
| 13 | using Tegra::Shader::Instruction; | ||
| 14 | using Tegra::Shader::OpCode; | ||
| 15 | using Tegra::Shader::Pred; | ||
| 16 | using Tegra::Shader::VoteOperation; | ||
| 17 | |||
| 18 | namespace { | ||
| 19 | OperationCode GetOperationCode(VoteOperation vote_op) { | ||
| 20 | switch (vote_op) { | ||
| 21 | case VoteOperation::All: | ||
| 22 | return OperationCode::VoteAll; | ||
| 23 | case VoteOperation::Any: | ||
| 24 | return OperationCode::VoteAny; | ||
| 25 | case VoteOperation::Eq: | ||
| 26 | return OperationCode::VoteEqual; | ||
| 27 | default: | ||
| 28 | UNREACHABLE_MSG("Invalid vote operation={}", static_cast<u64>(vote_op)); | ||
| 29 | return OperationCode::VoteAll; | ||
| 30 | } | ||
| 31 | } | ||
| 32 | } // Anonymous namespace | ||
| 33 | |||
| 34 | u32 ShaderIR::DecodeWarp(NodeBlock& bb, u32 pc) { | ||
| 35 | const Instruction instr = {program_code[pc]}; | ||
| 36 | const auto opcode = OpCode::Decode(instr); | ||
| 37 | |||
| 38 | switch (opcode->get().GetId()) { | ||
| 39 | case OpCode::Id::VOTE: { | ||
| 40 | const Node value = GetPredicate(instr.vote.value, instr.vote.negate_value != 0); | ||
| 41 | const Node active = Operation(OperationCode::BallotThread, value); | ||
| 42 | const Node vote = Operation(GetOperationCode(instr.vote.operation), value); | ||
| 43 | SetRegister(bb, instr.gpr0, active); | ||
| 44 | SetPredicate(bb, instr.vote.dest_pred, vote); | ||
| 45 | break; | ||
| 46 | } | ||
| 47 | default: | ||
| 48 | UNIMPLEMENTED_MSG("Unhandled warp instruction: {}", opcode->get().GetName()); | ||
| 49 | break; | ||
| 50 | } | ||
| 51 | |||
| 52 | return pc; | ||
| 53 | } | ||
| 54 | |||
| 55 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/decode/xmad.cpp b/src/video_core/shader/decode/xmad.cpp index 93dee77d1..206961909 100644 --- a/src/video_core/shader/decode/xmad.cpp +++ b/src/video_core/shader/decode/xmad.cpp | |||
| @@ -73,8 +73,8 @@ u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) { | |||
| 73 | if (is_psl) { | 73 | if (is_psl) { |
| 74 | product = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, product, Immediate(16)); | 74 | product = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, product, Immediate(16)); |
| 75 | } | 75 | } |
| 76 | SetTemporal(bb, 0, product); | 76 | SetTemporary(bb, 0, product); |
| 77 | product = GetTemporal(0); | 77 | product = GetTemporary(0); |
| 78 | 78 | ||
| 79 | const Node original_c = op_c; | 79 | const Node original_c = op_c; |
| 80 | const Tegra::Shader::XmadMode set_mode = mode; // Workaround to clang compile error | 80 | const Tegra::Shader::XmadMode set_mode = mode; // Workaround to clang compile error |
| @@ -98,13 +98,13 @@ u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) { | |||
| 98 | } | 98 | } |
| 99 | }(); | 99 | }(); |
| 100 | 100 | ||
| 101 | SetTemporal(bb, 1, op_c); | 101 | SetTemporary(bb, 1, op_c); |
| 102 | op_c = GetTemporal(1); | 102 | op_c = GetTemporary(1); |
| 103 | 103 | ||
| 104 | // TODO(Rodrigo): Use an appropiate sign for this operation | 104 | // TODO(Rodrigo): Use an appropiate sign for this operation |
| 105 | Node sum = Operation(OperationCode::IAdd, product, op_c); | 105 | Node sum = Operation(OperationCode::IAdd, product, op_c); |
| 106 | SetTemporal(bb, 2, sum); | 106 | SetTemporary(bb, 2, sum); |
| 107 | sum = GetTemporal(2); | 107 | sum = GetTemporary(2); |
| 108 | if (is_merge) { | 108 | if (is_merge) { |
| 109 | const Node a = BitfieldExtract(sum, 0, 16); | 109 | const Node a = BitfieldExtract(sum, 0, 16); |
| 110 | const Node b = | 110 | const Node b = |
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h index 0ac83fcf0..5db9313c4 100644 --- a/src/video_core/shader/node.h +++ b/src/video_core/shader/node.h | |||
| @@ -30,6 +30,8 @@ enum class OperationCode { | |||
| 30 | FNegate, /// (MetaArithmetic, float a) -> float | 30 | FNegate, /// (MetaArithmetic, float a) -> float |
| 31 | FAbsolute, /// (MetaArithmetic, float a) -> float | 31 | FAbsolute, /// (MetaArithmetic, float a) -> float |
| 32 | FClamp, /// (MetaArithmetic, float value, float min, float max) -> float | 32 | FClamp, /// (MetaArithmetic, float value, float min, float max) -> float |
| 33 | FCastHalf0, /// (MetaArithmetic, f16vec2 a) -> float | ||
| 34 | FCastHalf1, /// (MetaArithmetic, f16vec2 a) -> float | ||
| 33 | FMin, /// (MetaArithmetic, float a, float b) -> float | 35 | FMin, /// (MetaArithmetic, float a, float b) -> float |
| 34 | FMax, /// (MetaArithmetic, float a, float b) -> float | 36 | FMax, /// (MetaArithmetic, float a, float b) -> float |
| 35 | FCos, /// (MetaArithmetic, float a) -> float | 37 | FCos, /// (MetaArithmetic, float a) -> float |
| @@ -83,17 +85,18 @@ enum class OperationCode { | |||
| 83 | UBitfieldExtract, /// (MetaArithmetic, uint value, int offset, int offset) -> uint | 85 | UBitfieldExtract, /// (MetaArithmetic, uint value, int offset, int offset) -> uint |
| 84 | UBitCount, /// (MetaArithmetic, uint) -> uint | 86 | UBitCount, /// (MetaArithmetic, uint) -> uint |
| 85 | 87 | ||
| 86 | HAdd, /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2 | 88 | HAdd, /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2 |
| 87 | HMul, /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2 | 89 | HMul, /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2 |
| 88 | HFma, /// (MetaArithmetic, f16vec2 a, f16vec2 b, f16vec2 c) -> f16vec2 | 90 | HFma, /// (MetaArithmetic, f16vec2 a, f16vec2 b, f16vec2 c) -> f16vec2 |
| 89 | HAbsolute, /// (f16vec2 a) -> f16vec2 | 91 | HAbsolute, /// (f16vec2 a) -> f16vec2 |
| 90 | HNegate, /// (f16vec2 a, bool first, bool second) -> f16vec2 | 92 | HNegate, /// (f16vec2 a, bool first, bool second) -> f16vec2 |
| 91 | HClamp, /// (f16vec2 src, float min, float max) -> f16vec2 | 93 | HClamp, /// (f16vec2 src, float min, float max) -> f16vec2 |
| 92 | HUnpack, /// (Tegra::Shader::HalfType, T value) -> f16vec2 | 94 | HCastFloat, /// (MetaArithmetic, float a) -> f16vec2 |
| 93 | HMergeF32, /// (f16vec2 src) -> float | 95 | HUnpack, /// (Tegra::Shader::HalfType, T value) -> f16vec2 |
| 94 | HMergeH0, /// (f16vec2 dest, f16vec2 src) -> f16vec2 | 96 | HMergeF32, /// (f16vec2 src) -> float |
| 95 | HMergeH1, /// (f16vec2 dest, f16vec2 src) -> f16vec2 | 97 | HMergeH0, /// (f16vec2 dest, f16vec2 src) -> f16vec2 |
| 96 | HPack2, /// (float a, float b) -> f16vec2 | 98 | HMergeH1, /// (f16vec2 dest, f16vec2 src) -> f16vec2 |
| 99 | HPack2, /// (float a, float b) -> f16vec2 | ||
| 97 | 100 | ||
| 98 | LogicalAssign, /// (bool& dst, bool src) -> void | 101 | LogicalAssign, /// (bool& dst, bool src) -> void |
| 99 | LogicalAnd, /// (bool a, bool b) -> bool | 102 | LogicalAnd, /// (bool a, bool b) -> bool |
| @@ -101,8 +104,7 @@ enum class OperationCode { | |||
| 101 | LogicalXor, /// (bool a, bool b) -> bool | 104 | LogicalXor, /// (bool a, bool b) -> bool |
| 102 | LogicalNegate, /// (bool a) -> bool | 105 | LogicalNegate, /// (bool a) -> bool |
| 103 | LogicalPick2, /// (bool2 pair, uint index) -> bool | 106 | LogicalPick2, /// (bool2 pair, uint index) -> bool |
| 104 | LogicalAll2, /// (bool2 a) -> bool | 107 | LogicalAnd2, /// (bool2 a) -> bool |
| 105 | LogicalAny2, /// (bool2 a) -> bool | ||
| 106 | 108 | ||
| 107 | LogicalFLessThan, /// (float a, float b) -> bool | 109 | LogicalFLessThan, /// (float a, float b) -> bool |
| 108 | LogicalFEqual, /// (float a, float b) -> bool | 110 | LogicalFEqual, /// (float a, float b) -> bool |
| @@ -148,11 +150,12 @@ enum class OperationCode { | |||
| 148 | 150 | ||
| 149 | ImageStore, /// (MetaImage, float[N] coords) -> void | 151 | ImageStore, /// (MetaImage, float[N] coords) -> void |
| 150 | 152 | ||
| 151 | Branch, /// (uint branch_target) -> void | 153 | Branch, /// (uint branch_target) -> void |
| 152 | PushFlowStack, /// (uint branch_target) -> void | 154 | BranchIndirect, /// (uint branch_target) -> void |
| 153 | PopFlowStack, /// () -> void | 155 | PushFlowStack, /// (uint branch_target) -> void |
| 154 | Exit, /// () -> void | 156 | PopFlowStack, /// () -> void |
| 155 | Discard, /// () -> void | 157 | Exit, /// () -> void |
| 158 | Discard, /// () -> void | ||
| 156 | 159 | ||
| 157 | EmitVertex, /// () -> void | 160 | EmitVertex, /// () -> void |
| 158 | EndPrimitive, /// () -> void | 161 | EndPrimitive, /// () -> void |
| @@ -165,6 +168,11 @@ enum class OperationCode { | |||
| 165 | WorkGroupIdY, /// () -> uint | 168 | WorkGroupIdY, /// () -> uint |
| 166 | WorkGroupIdZ, /// () -> uint | 169 | WorkGroupIdZ, /// () -> uint |
| 167 | 170 | ||
| 171 | BallotThread, /// (bool) -> uint | ||
| 172 | VoteAll, /// (bool) -> bool | ||
| 173 | VoteAny, /// (bool) -> bool | ||
| 174 | VoteEqual, /// (bool) -> bool | ||
| 175 | |||
| 168 | Amount, | 176 | Amount, |
| 169 | }; | 177 | }; |
| 170 | 178 | ||
diff --git a/src/video_core/shader/node_helper.cpp b/src/video_core/shader/node_helper.cpp index 6fccbbba3..b3dcd291c 100644 --- a/src/video_core/shader/node_helper.cpp +++ b/src/video_core/shader/node_helper.cpp | |||
| @@ -12,7 +12,7 @@ | |||
| 12 | namespace VideoCommon::Shader { | 12 | namespace VideoCommon::Shader { |
| 13 | 13 | ||
| 14 | Node Conditional(Node condition, std::vector<Node> code) { | 14 | Node Conditional(Node condition, std::vector<Node> code) { |
| 15 | return MakeNode<ConditionalNode>(condition, std::move(code)); | 15 | return MakeNode<ConditionalNode>(std::move(condition), std::move(code)); |
| 16 | } | 16 | } |
| 17 | 17 | ||
| 18 | Node Comment(std::string text) { | 18 | Node Comment(std::string text) { |
diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp index 11b545cca..1e5c7f660 100644 --- a/src/video_core/shader/shader_ir.cpp +++ b/src/video_core/shader/shader_ir.cpp | |||
| @@ -22,8 +22,8 @@ using Tegra::Shader::PredCondition; | |||
| 22 | using Tegra::Shader::PredOperation; | 22 | using Tegra::Shader::PredOperation; |
| 23 | using Tegra::Shader::Register; | 23 | using Tegra::Shader::Register; |
| 24 | 24 | ||
| 25 | ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset) | 25 | ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset, const std::size_t size) |
| 26 | : program_code{program_code}, main_offset{main_offset} { | 26 | : program_code{program_code}, main_offset{main_offset}, program_size{size} { |
| 27 | Decode(); | 27 | Decode(); |
| 28 | } | 28 | } |
| 29 | 29 | ||
| @@ -61,8 +61,17 @@ Node ShaderIR::GetConstBufferIndirect(u64 index_, u64 offset_, Node node) { | |||
| 61 | const auto [entry, is_new] = used_cbufs.try_emplace(index); | 61 | const auto [entry, is_new] = used_cbufs.try_emplace(index); |
| 62 | entry->second.MarkAsUsedIndirect(); | 62 | entry->second.MarkAsUsedIndirect(); |
| 63 | 63 | ||
| 64 | const Node final_offset = Operation(OperationCode::UAdd, NO_PRECISE, node, Immediate(offset)); | 64 | Node final_offset = [&] { |
| 65 | return MakeNode<CbufNode>(index, final_offset); | 65 | // Attempt to inline constant buffer without a variable offset. This is done to allow |
| 66 | // tracking LDC calls. | ||
| 67 | if (const auto gpr = std::get_if<GprNode>(&*node)) { | ||
| 68 | if (gpr->GetIndex() == Register::ZeroIndex) { | ||
| 69 | return Immediate(offset); | ||
| 70 | } | ||
| 71 | } | ||
| 72 | return Operation(OperationCode::UAdd, NO_PRECISE, std::move(node), Immediate(offset)); | ||
| 73 | }(); | ||
| 74 | return MakeNode<CbufNode>(index, std::move(final_offset)); | ||
| 66 | } | 75 | } |
| 67 | 76 | ||
| 68 | Node ShaderIR::GetPredicate(u64 pred_, bool negated) { | 77 | Node ShaderIR::GetPredicate(u64 pred_, bool negated) { |
| @@ -80,7 +89,7 @@ Node ShaderIR::GetPredicate(bool immediate) { | |||
| 80 | 89 | ||
| 81 | Node ShaderIR::GetInputAttribute(Attribute::Index index, u64 element, Node buffer) { | 90 | Node ShaderIR::GetInputAttribute(Attribute::Index index, u64 element, Node buffer) { |
| 82 | used_input_attributes.emplace(index); | 91 | used_input_attributes.emplace(index); |
| 83 | return MakeNode<AbufNode>(index, static_cast<u32>(element), buffer); | 92 | return MakeNode<AbufNode>(index, static_cast<u32>(element), std::move(buffer)); |
| 84 | } | 93 | } |
| 85 | 94 | ||
| 86 | Node ShaderIR::GetPhysicalInputAttribute(Tegra::Shader::Register physical_address, Node buffer) { | 95 | Node ShaderIR::GetPhysicalInputAttribute(Tegra::Shader::Register physical_address, Node buffer) { |
| @@ -89,6 +98,22 @@ Node ShaderIR::GetPhysicalInputAttribute(Tegra::Shader::Register physical_addres | |||
| 89 | } | 98 | } |
| 90 | 99 | ||
| 91 | Node ShaderIR::GetOutputAttribute(Attribute::Index index, u64 element, Node buffer) { | 100 | Node ShaderIR::GetOutputAttribute(Attribute::Index index, u64 element, Node buffer) { |
| 101 | if (index == Attribute::Index::LayerViewportPointSize) { | ||
| 102 | switch (element) { | ||
| 103 | case 0: | ||
| 104 | UNIMPLEMENTED(); | ||
| 105 | break; | ||
| 106 | case 1: | ||
| 107 | uses_layer = true; | ||
| 108 | break; | ||
| 109 | case 2: | ||
| 110 | uses_viewport_index = true; | ||
| 111 | break; | ||
| 112 | case 3: | ||
| 113 | uses_point_size = true; | ||
| 114 | break; | ||
| 115 | } | ||
| 116 | } | ||
| 92 | if (index == Attribute::Index::ClipDistances0123 || | 117 | if (index == Attribute::Index::ClipDistances0123 || |
| 93 | index == Attribute::Index::ClipDistances4567) { | 118 | index == Attribute::Index::ClipDistances4567) { |
| 94 | const auto clip_index = | 119 | const auto clip_index = |
| @@ -97,7 +122,7 @@ Node ShaderIR::GetOutputAttribute(Attribute::Index index, u64 element, Node buff | |||
| 97 | } | 122 | } |
| 98 | used_output_attributes.insert(index); | 123 | used_output_attributes.insert(index); |
| 99 | 124 | ||
| 100 | return MakeNode<AbufNode>(index, static_cast<u32>(element), buffer); | 125 | return MakeNode<AbufNode>(index, static_cast<u32>(element), std::move(buffer)); |
| 101 | } | 126 | } |
| 102 | 127 | ||
| 103 | Node ShaderIR::GetInternalFlag(InternalFlag flag, bool negated) { | 128 | Node ShaderIR::GetInternalFlag(InternalFlag flag, bool negated) { |
| @@ -109,19 +134,19 @@ Node ShaderIR::GetInternalFlag(InternalFlag flag, bool negated) { | |||
| 109 | } | 134 | } |
| 110 | 135 | ||
| 111 | Node ShaderIR::GetLocalMemory(Node address) { | 136 | Node ShaderIR::GetLocalMemory(Node address) { |
| 112 | return MakeNode<LmemNode>(address); | 137 | return MakeNode<LmemNode>(std::move(address)); |
| 113 | } | 138 | } |
| 114 | 139 | ||
| 115 | Node ShaderIR::GetTemporal(u32 id) { | 140 | Node ShaderIR::GetTemporary(u32 id) { |
| 116 | return GetRegister(Register::ZeroIndex + 1 + id); | 141 | return GetRegister(Register::ZeroIndex + 1 + id); |
| 117 | } | 142 | } |
| 118 | 143 | ||
| 119 | Node ShaderIR::GetOperandAbsNegFloat(Node value, bool absolute, bool negate) { | 144 | Node ShaderIR::GetOperandAbsNegFloat(Node value, bool absolute, bool negate) { |
| 120 | if (absolute) { | 145 | if (absolute) { |
| 121 | value = Operation(OperationCode::FAbsolute, NO_PRECISE, value); | 146 | value = Operation(OperationCode::FAbsolute, NO_PRECISE, std::move(value)); |
| 122 | } | 147 | } |
| 123 | if (negate) { | 148 | if (negate) { |
| 124 | value = Operation(OperationCode::FNegate, NO_PRECISE, value); | 149 | value = Operation(OperationCode::FNegate, NO_PRECISE, std::move(value)); |
| 125 | } | 150 | } |
| 126 | return value; | 151 | return value; |
| 127 | } | 152 | } |
| @@ -130,24 +155,26 @@ Node ShaderIR::GetSaturatedFloat(Node value, bool saturate) { | |||
| 130 | if (!saturate) { | 155 | if (!saturate) { |
| 131 | return value; | 156 | return value; |
| 132 | } | 157 | } |
| 133 | const Node positive_zero = Immediate(std::copysignf(0, 1)); | 158 | |
| 134 | const Node positive_one = Immediate(1.0f); | 159 | Node positive_zero = Immediate(std::copysignf(0, 1)); |
| 135 | return Operation(OperationCode::FClamp, NO_PRECISE, value, positive_zero, positive_one); | 160 | Node positive_one = Immediate(1.0f); |
| 161 | return Operation(OperationCode::FClamp, NO_PRECISE, std::move(value), std::move(positive_zero), | ||
| 162 | std::move(positive_one)); | ||
| 136 | } | 163 | } |
| 137 | 164 | ||
| 138 | Node ShaderIR::ConvertIntegerSize(Node value, Tegra::Shader::Register::Size size, bool is_signed) { | 165 | Node ShaderIR::ConvertIntegerSize(Node value, Register::Size size, bool is_signed) { |
| 139 | switch (size) { | 166 | switch (size) { |
| 140 | case Register::Size::Byte: | 167 | case Register::Size::Byte: |
| 141 | value = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, NO_PRECISE, value, | 168 | value = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, NO_PRECISE, |
| 142 | Immediate(24)); | 169 | std::move(value), Immediate(24)); |
| 143 | value = SignedOperation(OperationCode::IArithmeticShiftRight, is_signed, NO_PRECISE, value, | 170 | value = SignedOperation(OperationCode::IArithmeticShiftRight, is_signed, NO_PRECISE, |
| 144 | Immediate(24)); | 171 | std::move(value), Immediate(24)); |
| 145 | return value; | 172 | return value; |
| 146 | case Register::Size::Short: | 173 | case Register::Size::Short: |
| 147 | value = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, NO_PRECISE, value, | 174 | value = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, NO_PRECISE, |
| 148 | Immediate(16)); | 175 | std::move(value), Immediate(16)); |
| 149 | value = SignedOperation(OperationCode::IArithmeticShiftRight, is_signed, NO_PRECISE, value, | 176 | value = SignedOperation(OperationCode::IArithmeticShiftRight, is_signed, NO_PRECISE, |
| 150 | Immediate(16)); | 177 | std::move(value), Immediate(16)); |
| 151 | case Register::Size::Word: | 178 | case Register::Size::Word: |
| 152 | // Default - do nothing | 179 | // Default - do nothing |
| 153 | return value; | 180 | return value; |
| @@ -163,27 +190,29 @@ Node ShaderIR::GetOperandAbsNegInteger(Node value, bool absolute, bool negate, b | |||
| 163 | return value; | 190 | return value; |
| 164 | } | 191 | } |
| 165 | if (absolute) { | 192 | if (absolute) { |
| 166 | value = Operation(OperationCode::IAbsolute, NO_PRECISE, value); | 193 | value = Operation(OperationCode::IAbsolute, NO_PRECISE, std::move(value)); |
| 167 | } | 194 | } |
| 168 | if (negate) { | 195 | if (negate) { |
| 169 | value = Operation(OperationCode::INegate, NO_PRECISE, value); | 196 | value = Operation(OperationCode::INegate, NO_PRECISE, std::move(value)); |
| 170 | } | 197 | } |
| 171 | return value; | 198 | return value; |
| 172 | } | 199 | } |
| 173 | 200 | ||
| 174 | Node ShaderIR::UnpackHalfImmediate(Instruction instr, bool has_negation) { | 201 | Node ShaderIR::UnpackHalfImmediate(Instruction instr, bool has_negation) { |
| 175 | const Node value = Immediate(instr.half_imm.PackImmediates()); | 202 | Node value = Immediate(instr.half_imm.PackImmediates()); |
| 176 | if (!has_negation) { | 203 | if (!has_negation) { |
| 177 | return value; | 204 | return value; |
| 178 | } | 205 | } |
| 179 | const Node first_negate = GetPredicate(instr.half_imm.first_negate != 0); | ||
| 180 | const Node second_negate = GetPredicate(instr.half_imm.second_negate != 0); | ||
| 181 | 206 | ||
| 182 | return Operation(OperationCode::HNegate, NO_PRECISE, value, first_negate, second_negate); | 207 | Node first_negate = GetPredicate(instr.half_imm.first_negate != 0); |
| 208 | Node second_negate = GetPredicate(instr.half_imm.second_negate != 0); | ||
| 209 | |||
| 210 | return Operation(OperationCode::HNegate, NO_PRECISE, std::move(value), std::move(first_negate), | ||
| 211 | std::move(second_negate)); | ||
| 183 | } | 212 | } |
| 184 | 213 | ||
| 185 | Node ShaderIR::UnpackHalfFloat(Node value, Tegra::Shader::HalfType type) { | 214 | Node ShaderIR::UnpackHalfFloat(Node value, Tegra::Shader::HalfType type) { |
| 186 | return Operation(OperationCode::HUnpack, type, value); | 215 | return Operation(OperationCode::HUnpack, type, std::move(value)); |
| 187 | } | 216 | } |
| 188 | 217 | ||
| 189 | Node ShaderIR::HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge) { | 218 | Node ShaderIR::HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge) { |
| @@ -191,11 +220,11 @@ Node ShaderIR::HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge) { | |||
| 191 | case Tegra::Shader::HalfMerge::H0_H1: | 220 | case Tegra::Shader::HalfMerge::H0_H1: |
| 192 | return src; | 221 | return src; |
| 193 | case Tegra::Shader::HalfMerge::F32: | 222 | case Tegra::Shader::HalfMerge::F32: |
| 194 | return Operation(OperationCode::HMergeF32, src); | 223 | return Operation(OperationCode::HMergeF32, std::move(src)); |
| 195 | case Tegra::Shader::HalfMerge::Mrg_H0: | 224 | case Tegra::Shader::HalfMerge::Mrg_H0: |
| 196 | return Operation(OperationCode::HMergeH0, dest, src); | 225 | return Operation(OperationCode::HMergeH0, std::move(dest), std::move(src)); |
| 197 | case Tegra::Shader::HalfMerge::Mrg_H1: | 226 | case Tegra::Shader::HalfMerge::Mrg_H1: |
| 198 | return Operation(OperationCode::HMergeH1, dest, src); | 227 | return Operation(OperationCode::HMergeH1, std::move(dest), std::move(src)); |
| 199 | } | 228 | } |
| 200 | UNREACHABLE(); | 229 | UNREACHABLE(); |
| 201 | return src; | 230 | return src; |
| @@ -203,10 +232,10 @@ Node ShaderIR::HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge) { | |||
| 203 | 232 | ||
| 204 | Node ShaderIR::GetOperandAbsNegHalf(Node value, bool absolute, bool negate) { | 233 | Node ShaderIR::GetOperandAbsNegHalf(Node value, bool absolute, bool negate) { |
| 205 | if (absolute) { | 234 | if (absolute) { |
| 206 | value = Operation(OperationCode::HAbsolute, NO_PRECISE, value); | 235 | value = Operation(OperationCode::HAbsolute, NO_PRECISE, std::move(value)); |
| 207 | } | 236 | } |
| 208 | if (negate) { | 237 | if (negate) { |
| 209 | value = Operation(OperationCode::HNegate, NO_PRECISE, value, GetPredicate(true), | 238 | value = Operation(OperationCode::HNegate, NO_PRECISE, std::move(value), GetPredicate(true), |
| 210 | GetPredicate(true)); | 239 | GetPredicate(true)); |
| 211 | } | 240 | } |
| 212 | return value; | 241 | return value; |
| @@ -216,9 +245,11 @@ Node ShaderIR::GetSaturatedHalfFloat(Node value, bool saturate) { | |||
| 216 | if (!saturate) { | 245 | if (!saturate) { |
| 217 | return value; | 246 | return value; |
| 218 | } | 247 | } |
| 219 | const Node positive_zero = Immediate(std::copysignf(0, 1)); | 248 | |
| 220 | const Node positive_one = Immediate(1.0f); | 249 | Node positive_zero = Immediate(std::copysignf(0, 1)); |
| 221 | return Operation(OperationCode::HClamp, NO_PRECISE, value, positive_zero, positive_one); | 250 | Node positive_one = Immediate(1.0f); |
| 251 | return Operation(OperationCode::HClamp, NO_PRECISE, std::move(value), std::move(positive_zero), | ||
| 252 | std::move(positive_one)); | ||
| 222 | } | 253 | } |
| 223 | 254 | ||
| 224 | Node ShaderIR::GetPredicateComparisonFloat(PredCondition condition, Node op_a, Node op_b) { | 255 | Node ShaderIR::GetPredicateComparisonFloat(PredCondition condition, Node op_a, Node op_b) { |
| @@ -246,7 +277,6 @@ Node ShaderIR::GetPredicateComparisonFloat(PredCondition condition, Node op_a, N | |||
| 246 | condition == PredCondition::LessEqualWithNan || | 277 | condition == PredCondition::LessEqualWithNan || |
| 247 | condition == PredCondition::GreaterThanWithNan || | 278 | condition == PredCondition::GreaterThanWithNan || |
| 248 | condition == PredCondition::GreaterEqualWithNan) { | 279 | condition == PredCondition::GreaterEqualWithNan) { |
| 249 | |||
| 250 | predicate = Operation(OperationCode::LogicalOr, predicate, | 280 | predicate = Operation(OperationCode::LogicalOr, predicate, |
| 251 | Operation(OperationCode::LogicalFIsNan, op_a)); | 281 | Operation(OperationCode::LogicalFIsNan, op_a)); |
| 252 | predicate = Operation(OperationCode::LogicalOr, predicate, | 282 | predicate = Operation(OperationCode::LogicalOr, predicate, |
| @@ -275,7 +305,8 @@ Node ShaderIR::GetPredicateComparisonInteger(PredCondition condition, bool is_si | |||
| 275 | UNIMPLEMENTED_IF_MSG(comparison == PredicateComparisonTable.end(), | 305 | UNIMPLEMENTED_IF_MSG(comparison == PredicateComparisonTable.end(), |
| 276 | "Unknown predicate comparison operation"); | 306 | "Unknown predicate comparison operation"); |
| 277 | 307 | ||
| 278 | Node predicate = SignedOperation(comparison->second, is_signed, NO_PRECISE, op_a, op_b); | 308 | Node predicate = SignedOperation(comparison->second, is_signed, NO_PRECISE, std::move(op_a), |
| 309 | std::move(op_b)); | ||
| 279 | 310 | ||
| 280 | UNIMPLEMENTED_IF_MSG(condition == PredCondition::LessThanWithNan || | 311 | UNIMPLEMENTED_IF_MSG(condition == PredCondition::LessThanWithNan || |
| 281 | condition == PredCondition::NotEqualWithNan || | 312 | condition == PredCondition::NotEqualWithNan || |
| @@ -305,9 +336,7 @@ Node ShaderIR::GetPredicateComparisonHalf(Tegra::Shader::PredCondition condition | |||
| 305 | UNIMPLEMENTED_IF_MSG(comparison == PredicateComparisonTable.end(), | 336 | UNIMPLEMENTED_IF_MSG(comparison == PredicateComparisonTable.end(), |
| 306 | "Unknown predicate comparison operation"); | 337 | "Unknown predicate comparison operation"); |
| 307 | 338 | ||
| 308 | const Node predicate = Operation(comparison->second, NO_PRECISE, op_a, op_b); | 339 | return Operation(comparison->second, NO_PRECISE, std::move(op_a), std::move(op_b)); |
| 309 | |||
| 310 | return predicate; | ||
| 311 | } | 340 | } |
| 312 | 341 | ||
| 313 | OperationCode ShaderIR::GetPredicateCombiner(PredOperation operation) { | 342 | OperationCode ShaderIR::GetPredicateCombiner(PredOperation operation) { |
| @@ -333,31 +362,32 @@ Node ShaderIR::GetConditionCode(Tegra::Shader::ConditionCode cc) { | |||
| 333 | } | 362 | } |
| 334 | 363 | ||
| 335 | void ShaderIR::SetRegister(NodeBlock& bb, Register dest, Node src) { | 364 | void ShaderIR::SetRegister(NodeBlock& bb, Register dest, Node src) { |
| 336 | bb.push_back(Operation(OperationCode::Assign, GetRegister(dest), src)); | 365 | bb.push_back(Operation(OperationCode::Assign, GetRegister(dest), std::move(src))); |
| 337 | } | 366 | } |
| 338 | 367 | ||
| 339 | void ShaderIR::SetPredicate(NodeBlock& bb, u64 dest, Node src) { | 368 | void ShaderIR::SetPredicate(NodeBlock& bb, u64 dest, Node src) { |
| 340 | bb.push_back(Operation(OperationCode::LogicalAssign, GetPredicate(dest), src)); | 369 | bb.push_back(Operation(OperationCode::LogicalAssign, GetPredicate(dest), std::move(src))); |
| 341 | } | 370 | } |
| 342 | 371 | ||
| 343 | void ShaderIR::SetInternalFlag(NodeBlock& bb, InternalFlag flag, Node value) { | 372 | void ShaderIR::SetInternalFlag(NodeBlock& bb, InternalFlag flag, Node value) { |
| 344 | bb.push_back(Operation(OperationCode::LogicalAssign, GetInternalFlag(flag), value)); | 373 | bb.push_back(Operation(OperationCode::LogicalAssign, GetInternalFlag(flag), std::move(value))); |
| 345 | } | 374 | } |
| 346 | 375 | ||
| 347 | void ShaderIR::SetLocalMemory(NodeBlock& bb, Node address, Node value) { | 376 | void ShaderIR::SetLocalMemory(NodeBlock& bb, Node address, Node value) { |
| 348 | bb.push_back(Operation(OperationCode::Assign, GetLocalMemory(address), value)); | 377 | bb.push_back( |
| 378 | Operation(OperationCode::Assign, GetLocalMemory(std::move(address)), std::move(value))); | ||
| 349 | } | 379 | } |
| 350 | 380 | ||
| 351 | void ShaderIR::SetTemporal(NodeBlock& bb, u32 id, Node value) { | 381 | void ShaderIR::SetTemporary(NodeBlock& bb, u32 id, Node value) { |
| 352 | SetRegister(bb, Register::ZeroIndex + 1 + id, value); | 382 | SetRegister(bb, Register::ZeroIndex + 1 + id, std::move(value)); |
| 353 | } | 383 | } |
| 354 | 384 | ||
| 355 | void ShaderIR::SetInternalFlagsFromFloat(NodeBlock& bb, Node value, bool sets_cc) { | 385 | void ShaderIR::SetInternalFlagsFromFloat(NodeBlock& bb, Node value, bool sets_cc) { |
| 356 | if (!sets_cc) { | 386 | if (!sets_cc) { |
| 357 | return; | 387 | return; |
| 358 | } | 388 | } |
| 359 | const Node zerop = Operation(OperationCode::LogicalFEqual, value, Immediate(0.0f)); | 389 | Node zerop = Operation(OperationCode::LogicalFEqual, std::move(value), Immediate(0.0f)); |
| 360 | SetInternalFlag(bb, InternalFlag::Zero, zerop); | 390 | SetInternalFlag(bb, InternalFlag::Zero, std::move(zerop)); |
| 361 | LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete"); | 391 | LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete"); |
| 362 | } | 392 | } |
| 363 | 393 | ||
| @@ -365,13 +395,18 @@ void ShaderIR::SetInternalFlagsFromInteger(NodeBlock& bb, Node value, bool sets_ | |||
| 365 | if (!sets_cc) { | 395 | if (!sets_cc) { |
| 366 | return; | 396 | return; |
| 367 | } | 397 | } |
| 368 | const Node zerop = Operation(OperationCode::LogicalIEqual, value, Immediate(0)); | 398 | Node zerop = Operation(OperationCode::LogicalIEqual, std::move(value), Immediate(0)); |
| 369 | SetInternalFlag(bb, InternalFlag::Zero, zerop); | 399 | SetInternalFlag(bb, InternalFlag::Zero, std::move(zerop)); |
| 370 | LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete"); | 400 | LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete"); |
| 371 | } | 401 | } |
| 372 | 402 | ||
| 373 | Node ShaderIR::BitfieldExtract(Node value, u32 offset, u32 bits) { | 403 | Node ShaderIR::BitfieldExtract(Node value, u32 offset, u32 bits) { |
| 374 | return Operation(OperationCode::UBitfieldExtract, NO_PRECISE, value, Immediate(offset), | 404 | return Operation(OperationCode::UBitfieldExtract, NO_PRECISE, std::move(value), |
| 405 | Immediate(offset), Immediate(bits)); | ||
| 406 | } | ||
| 407 | |||
| 408 | Node ShaderIR::BitfieldInsert(Node base, Node insert, u32 offset, u32 bits) { | ||
| 409 | return Operation(OperationCode::UBitfieldInsert, NO_PRECISE, base, insert, Immediate(offset), | ||
| 375 | Immediate(bits)); | 410 | Immediate(bits)); |
| 376 | } | 411 | } |
| 377 | 412 | ||
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index e22548208..bcc9b79b6 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h | |||
| @@ -5,13 +5,10 @@ | |||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <array> | 7 | #include <array> |
| 8 | #include <cstring> | ||
| 9 | #include <map> | 8 | #include <map> |
| 10 | #include <optional> | 9 | #include <optional> |
| 11 | #include <set> | 10 | #include <set> |
| 12 | #include <string> | ||
| 13 | #include <tuple> | 11 | #include <tuple> |
| 14 | #include <variant> | ||
| 15 | #include <vector> | 12 | #include <vector> |
| 16 | 13 | ||
| 17 | #include "common/common_types.h" | 14 | #include "common/common_types.h" |
| @@ -22,18 +19,12 @@ | |||
| 22 | 19 | ||
| 23 | namespace VideoCommon::Shader { | 20 | namespace VideoCommon::Shader { |
| 24 | 21 | ||
| 22 | struct ShaderBlock; | ||
| 23 | |||
| 25 | using ProgramCode = std::vector<u64>; | 24 | using ProgramCode = std::vector<u64>; |
| 26 | 25 | ||
| 27 | constexpr u32 MAX_PROGRAM_LENGTH = 0x1000; | 26 | constexpr u32 MAX_PROGRAM_LENGTH = 0x1000; |
| 28 | 27 | ||
| 29 | /// Describes the behaviour of code path of a given entry point and a return point. | ||
| 30 | enum class ExitMethod { | ||
| 31 | Undetermined, ///< Internal value. Only occur when analyzing JMP loop. | ||
| 32 | AlwaysReturn, ///< All code paths reach the return point. | ||
| 33 | Conditional, ///< Code path reaches the return point or an END instruction conditionally. | ||
| 34 | AlwaysEnd, ///< All code paths reach a END instruction. | ||
| 35 | }; | ||
| 36 | |||
| 37 | class ConstBuffer { | 28 | class ConstBuffer { |
| 38 | public: | 29 | public: |
| 39 | explicit ConstBuffer(u32 max_offset, bool is_indirect) | 30 | explicit ConstBuffer(u32 max_offset, bool is_indirect) |
| @@ -73,7 +64,7 @@ struct GlobalMemoryUsage { | |||
| 73 | 64 | ||
| 74 | class ShaderIR final { | 65 | class ShaderIR final { |
| 75 | public: | 66 | public: |
| 76 | explicit ShaderIR(const ProgramCode& program_code, u32 main_offset); | 67 | explicit ShaderIR(const ProgramCode& program_code, u32 main_offset, std::size_t size); |
| 77 | ~ShaderIR(); | 68 | ~ShaderIR(); |
| 78 | 69 | ||
| 79 | const std::map<u32, NodeBlock>& GetBasicBlocks() const { | 70 | const std::map<u32, NodeBlock>& GetBasicBlocks() const { |
| @@ -121,6 +112,18 @@ public: | |||
| 121 | return static_cast<std::size_t>(coverage_end * sizeof(u64)); | 112 | return static_cast<std::size_t>(coverage_end * sizeof(u64)); |
| 122 | } | 113 | } |
| 123 | 114 | ||
| 115 | bool UsesLayer() const { | ||
| 116 | return uses_layer; | ||
| 117 | } | ||
| 118 | |||
| 119 | bool UsesViewportIndex() const { | ||
| 120 | return uses_viewport_index; | ||
| 121 | } | ||
| 122 | |||
| 123 | bool UsesPointSize() const { | ||
| 124 | return uses_point_size; | ||
| 125 | } | ||
| 126 | |||
| 124 | bool HasPhysicalAttributes() const { | 127 | bool HasPhysicalAttributes() const { |
| 125 | return uses_physical_attributes; | 128 | return uses_physical_attributes; |
| 126 | } | 129 | } |
| @@ -129,12 +132,20 @@ public: | |||
| 129 | return header; | 132 | return header; |
| 130 | } | 133 | } |
| 131 | 134 | ||
| 135 | bool IsFlowStackDisabled() const { | ||
| 136 | return disable_flow_stack; | ||
| 137 | } | ||
| 138 | |||
| 139 | u32 ConvertAddressToNvidiaSpace(const u32 address) const { | ||
| 140 | return (address - main_offset) * sizeof(Tegra::Shader::Instruction); | ||
| 141 | } | ||
| 142 | |||
| 132 | private: | 143 | private: |
| 133 | void Decode(); | 144 | void Decode(); |
| 134 | 145 | ||
| 135 | ExitMethod Scan(u32 begin, u32 end, std::set<u32>& labels); | ||
| 136 | |||
| 137 | NodeBlock DecodeRange(u32 begin, u32 end); | 146 | NodeBlock DecodeRange(u32 begin, u32 end); |
| 147 | void DecodeRangeInner(NodeBlock& bb, u32 begin, u32 end); | ||
| 148 | void InsertControlFlow(NodeBlock& bb, const ShaderBlock& block); | ||
| 138 | 149 | ||
| 139 | /** | 150 | /** |
| 140 | * Decodes a single instruction from Tegra to IR. | 151 | * Decodes a single instruction from Tegra to IR. |
| @@ -156,6 +167,7 @@ private: | |||
| 156 | u32 DecodeFfma(NodeBlock& bb, u32 pc); | 167 | u32 DecodeFfma(NodeBlock& bb, u32 pc); |
| 157 | u32 DecodeHfma2(NodeBlock& bb, u32 pc); | 168 | u32 DecodeHfma2(NodeBlock& bb, u32 pc); |
| 158 | u32 DecodeConversion(NodeBlock& bb, u32 pc); | 169 | u32 DecodeConversion(NodeBlock& bb, u32 pc); |
| 170 | u32 DecodeWarp(NodeBlock& bb, u32 pc); | ||
| 159 | u32 DecodeMemory(NodeBlock& bb, u32 pc); | 171 | u32 DecodeMemory(NodeBlock& bb, u32 pc); |
| 160 | u32 DecodeTexture(NodeBlock& bb, u32 pc); | 172 | u32 DecodeTexture(NodeBlock& bb, u32 pc); |
| 161 | u32 DecodeImage(NodeBlock& bb, u32 pc); | 173 | u32 DecodeImage(NodeBlock& bb, u32 pc); |
| @@ -196,8 +208,8 @@ private: | |||
| 196 | Node GetInternalFlag(InternalFlag flag, bool negated = false); | 208 | Node GetInternalFlag(InternalFlag flag, bool negated = false); |
| 197 | /// Generates a node representing a local memory address | 209 | /// Generates a node representing a local memory address |
| 198 | Node GetLocalMemory(Node address); | 210 | Node GetLocalMemory(Node address); |
| 199 | /// Generates a temporal, internally it uses a post-RZ register | 211 | /// Generates a temporary, internally it uses a post-RZ register |
| 200 | Node GetTemporal(u32 id); | 212 | Node GetTemporary(u32 id); |
| 201 | 213 | ||
| 202 | /// Sets a register. src value must be a number-evaluated node. | 214 | /// Sets a register. src value must be a number-evaluated node. |
| 203 | void SetRegister(NodeBlock& bb, Tegra::Shader::Register dest, Node src); | 215 | void SetRegister(NodeBlock& bb, Tegra::Shader::Register dest, Node src); |
| @@ -207,8 +219,8 @@ private: | |||
| 207 | void SetInternalFlag(NodeBlock& bb, InternalFlag flag, Node value); | 219 | void SetInternalFlag(NodeBlock& bb, InternalFlag flag, Node value); |
| 208 | /// Sets a local memory address. address and value must be a number-evaluated node | 220 | /// Sets a local memory address. address and value must be a number-evaluated node |
| 209 | void SetLocalMemory(NodeBlock& bb, Node address, Node value); | 221 | void SetLocalMemory(NodeBlock& bb, Node address, Node value); |
| 210 | /// Sets a temporal. Internally it uses a post-RZ register | 222 | /// Sets a temporary. Internally it uses a post-RZ register |
| 211 | void SetTemporal(NodeBlock& bb, u32 id, Node value); | 223 | void SetTemporary(NodeBlock& bb, u32 id, Node value); |
| 212 | 224 | ||
| 213 | /// Sets internal flags from a float | 225 | /// Sets internal flags from a float |
| 214 | void SetInternalFlagsFromFloat(NodeBlock& bb, Node value, bool sets_cc = true); | 226 | void SetInternalFlagsFromFloat(NodeBlock& bb, Node value, bool sets_cc = true); |
| @@ -268,6 +280,9 @@ private: | |||
| 268 | /// Extracts a sequence of bits from a node | 280 | /// Extracts a sequence of bits from a node |
| 269 | Node BitfieldExtract(Node value, u32 offset, u32 bits); | 281 | Node BitfieldExtract(Node value, u32 offset, u32 bits); |
| 270 | 282 | ||
| 283 | /// Inserts a sequence of bits from a node | ||
| 284 | Node BitfieldInsert(Node base, Node insert, u32 offset, u32 bits); | ||
| 285 | |||
| 271 | void WriteTexInstructionFloat(NodeBlock& bb, Tegra::Shader::Instruction instr, | 286 | void WriteTexInstructionFloat(NodeBlock& bb, Tegra::Shader::Instruction instr, |
| 272 | const Node4& components); | 287 | const Node4& components); |
| 273 | 288 | ||
| @@ -314,7 +329,7 @@ private: | |||
| 314 | void WriteLop3Instruction(NodeBlock& bb, Tegra::Shader::Register dest, Node op_a, Node op_b, | 329 | void WriteLop3Instruction(NodeBlock& bb, Tegra::Shader::Register dest, Node op_a, Node op_b, |
| 315 | Node op_c, Node imm_lut, bool sets_cc); | 330 | Node op_c, Node imm_lut, bool sets_cc); |
| 316 | 331 | ||
| 317 | Node TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const; | 332 | std::tuple<Node, u32, u32> TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const; |
| 318 | 333 | ||
| 319 | std::optional<u32> TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const; | 334 | std::optional<u32> TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const; |
| 320 | 335 | ||
| @@ -326,10 +341,11 @@ private: | |||
| 326 | 341 | ||
| 327 | const ProgramCode& program_code; | 342 | const ProgramCode& program_code; |
| 328 | const u32 main_offset; | 343 | const u32 main_offset; |
| 344 | const std::size_t program_size; | ||
| 345 | bool disable_flow_stack{}; | ||
| 329 | 346 | ||
| 330 | u32 coverage_begin{}; | 347 | u32 coverage_begin{}; |
| 331 | u32 coverage_end{}; | 348 | u32 coverage_end{}; |
| 332 | std::map<std::pair<u32, u32>, ExitMethod> exit_method_map; | ||
| 333 | 349 | ||
| 334 | std::map<u32, NodeBlock> basic_blocks; | 350 | std::map<u32, NodeBlock> basic_blocks; |
| 335 | NodeBlock global_code; | 351 | NodeBlock global_code; |
| @@ -343,6 +359,9 @@ private: | |||
| 343 | std::set<Image> used_images; | 359 | std::set<Image> used_images; |
| 344 | std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances> used_clip_distances{}; | 360 | std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances> used_clip_distances{}; |
| 345 | std::map<GlobalMemoryBase, GlobalMemoryUsage> used_global_memory; | 361 | std::map<GlobalMemoryBase, GlobalMemoryUsage> used_global_memory; |
| 362 | bool uses_layer{}; | ||
| 363 | bool uses_viewport_index{}; | ||
| 364 | bool uses_point_size{}; | ||
| 346 | bool uses_physical_attributes{}; // Shader uses AL2P or physical attribute read/writes | 365 | bool uses_physical_attributes{}; // Shader uses AL2P or physical attribute read/writes |
| 347 | 366 | ||
| 348 | Tegra::Shader::Header header; | 367 | Tegra::Shader::Header header; |
diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp index fc957d980..55f5949e4 100644 --- a/src/video_core/shader/track.cpp +++ b/src/video_core/shader/track.cpp | |||
| @@ -15,56 +15,63 @@ namespace { | |||
| 15 | std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor, | 15 | std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor, |
| 16 | OperationCode operation_code) { | 16 | OperationCode operation_code) { |
| 17 | for (; cursor >= 0; --cursor) { | 17 | for (; cursor >= 0; --cursor) { |
| 18 | const Node node = code.at(cursor); | 18 | Node node = code.at(cursor); |
| 19 | |||
| 19 | if (const auto operation = std::get_if<OperationNode>(&*node)) { | 20 | if (const auto operation = std::get_if<OperationNode>(&*node)) { |
| 20 | if (operation->GetCode() == operation_code) { | 21 | if (operation->GetCode() == operation_code) { |
| 21 | return {node, cursor}; | 22 | return {std::move(node), cursor}; |
| 22 | } | 23 | } |
| 23 | } | 24 | } |
| 25 | |||
| 24 | if (const auto conditional = std::get_if<ConditionalNode>(&*node)) { | 26 | if (const auto conditional = std::get_if<ConditionalNode>(&*node)) { |
| 25 | const auto& conditional_code = conditional->GetCode(); | 27 | const auto& conditional_code = conditional->GetCode(); |
| 26 | const auto [found, internal_cursor] = FindOperation( | 28 | auto [found, internal_cursor] = FindOperation( |
| 27 | conditional_code, static_cast<s64>(conditional_code.size() - 1), operation_code); | 29 | conditional_code, static_cast<s64>(conditional_code.size() - 1), operation_code); |
| 28 | if (found) { | 30 | if (found) { |
| 29 | return {found, cursor}; | 31 | return {std::move(found), cursor}; |
| 30 | } | 32 | } |
| 31 | } | 33 | } |
| 32 | } | 34 | } |
| 33 | return {}; | 35 | return {}; |
| 34 | } | 36 | } |
| 35 | } // namespace | 37 | } // Anonymous namespace |
| 36 | 38 | ||
| 37 | Node ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const { | 39 | std::tuple<Node, u32, u32> ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code, |
| 40 | s64 cursor) const { | ||
| 38 | if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) { | 41 | if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) { |
| 39 | // Cbuf found, but it has to be immediate | 42 | // Constant buffer found, test if it's an immediate |
| 40 | return std::holds_alternative<ImmediateNode>(*cbuf->GetOffset()) ? tracked : nullptr; | 43 | const auto offset = cbuf->GetOffset(); |
| 44 | if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) { | ||
| 45 | return {tracked, cbuf->GetIndex(), immediate->GetValue()}; | ||
| 46 | } | ||
| 47 | return {}; | ||
| 41 | } | 48 | } |
| 42 | if (const auto gpr = std::get_if<GprNode>(&*tracked)) { | 49 | if (const auto gpr = std::get_if<GprNode>(&*tracked)) { |
| 43 | if (gpr->GetIndex() == Tegra::Shader::Register::ZeroIndex) { | 50 | if (gpr->GetIndex() == Tegra::Shader::Register::ZeroIndex) { |
| 44 | return nullptr; | 51 | return {}; |
| 45 | } | 52 | } |
| 46 | // Reduce the cursor in one to avoid infinite loops when the instruction sets the same | 53 | // Reduce the cursor in one to avoid infinite loops when the instruction sets the same |
| 47 | // register that it uses as operand | 54 | // register that it uses as operand |
| 48 | const auto [source, new_cursor] = TrackRegister(gpr, code, cursor - 1); | 55 | const auto [source, new_cursor] = TrackRegister(gpr, code, cursor - 1); |
| 49 | if (!source) { | 56 | if (!source) { |
| 50 | return nullptr; | 57 | return {}; |
| 51 | } | 58 | } |
| 52 | return TrackCbuf(source, code, new_cursor); | 59 | return TrackCbuf(source, code, new_cursor); |
| 53 | } | 60 | } |
| 54 | if (const auto operation = std::get_if<OperationNode>(&*tracked)) { | 61 | if (const auto operation = std::get_if<OperationNode>(&*tracked)) { |
| 55 | for (std::size_t i = 0; i < operation->GetOperandsCount(); ++i) { | 62 | for (std::size_t i = operation->GetOperandsCount(); i > 0; --i) { |
| 56 | if (const auto found = TrackCbuf((*operation)[i], code, cursor)) { | 63 | if (auto found = TrackCbuf((*operation)[i - 1], code, cursor); std::get<0>(found)) { |
| 57 | // Cbuf found in operand | 64 | // Cbuf found in operand. |
| 58 | return found; | 65 | return found; |
| 59 | } | 66 | } |
| 60 | } | 67 | } |
| 61 | return nullptr; | 68 | return {}; |
| 62 | } | 69 | } |
| 63 | if (const auto conditional = std::get_if<ConditionalNode>(&*tracked)) { | 70 | if (const auto conditional = std::get_if<ConditionalNode>(&*tracked)) { |
| 64 | const auto& conditional_code = conditional->GetCode(); | 71 | const auto& conditional_code = conditional->GetCode(); |
| 65 | return TrackCbuf(tracked, conditional_code, static_cast<s64>(conditional_code.size())); | 72 | return TrackCbuf(tracked, conditional_code, static_cast<s64>(conditional_code.size())); |
| 66 | } | 73 | } |
| 67 | return nullptr; | 74 | return {}; |
| 68 | } | 75 | } |
| 69 | 76 | ||
| 70 | std::optional<u32> ShaderIR::TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const { | 77 | std::optional<u32> ShaderIR::TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const { |
diff --git a/src/video_core/surface.cpp b/src/video_core/surface.cpp index c50f6354d..4ceb219be 100644 --- a/src/video_core/surface.cpp +++ b/src/video_core/surface.cpp | |||
| @@ -445,11 +445,12 @@ PixelFormat PixelFormatFromGPUPixelFormat(Tegra::FramebufferConfig::PixelFormat | |||
| 445 | switch (format) { | 445 | switch (format) { |
| 446 | case Tegra::FramebufferConfig::PixelFormat::ABGR8: | 446 | case Tegra::FramebufferConfig::PixelFormat::ABGR8: |
| 447 | return PixelFormat::ABGR8U; | 447 | return PixelFormat::ABGR8U; |
| 448 | case Tegra::FramebufferConfig::PixelFormat::RGB565: | ||
| 449 | return PixelFormat::B5G6R5U; | ||
| 448 | case Tegra::FramebufferConfig::PixelFormat::BGRA8: | 450 | case Tegra::FramebufferConfig::PixelFormat::BGRA8: |
| 449 | return PixelFormat::BGRA8; | 451 | return PixelFormat::BGRA8; |
| 450 | default: | 452 | default: |
| 451 | LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format)); | 453 | UNIMPLEMENTED_MSG("Unimplemented format={}", static_cast<u32>(format)); |
| 452 | UNREACHABLE(); | ||
| 453 | return PixelFormat::ABGR8U; | 454 | return PixelFormat::ABGR8U; |
| 454 | } | 455 | } |
| 455 | } | 456 | } |
diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp index 7a0fdb19b..683c49207 100644 --- a/src/video_core/texture_cache/surface_base.cpp +++ b/src/video_core/texture_cache/surface_base.cpp | |||
| @@ -24,9 +24,8 @@ StagingCache::StagingCache() = default; | |||
| 24 | StagingCache::~StagingCache() = default; | 24 | StagingCache::~StagingCache() = default; |
| 25 | 25 | ||
| 26 | SurfaceBaseImpl::SurfaceBaseImpl(GPUVAddr gpu_addr, const SurfaceParams& params) | 26 | SurfaceBaseImpl::SurfaceBaseImpl(GPUVAddr gpu_addr, const SurfaceParams& params) |
| 27 | : params{params}, mipmap_sizes(params.num_levels), | 27 | : params{params}, host_memory_size{params.GetHostSizeInBytes()}, gpu_addr{gpu_addr}, |
| 28 | mipmap_offsets(params.num_levels), gpu_addr{gpu_addr}, host_memory_size{ | 28 | mipmap_sizes(params.num_levels), mipmap_offsets(params.num_levels) { |
| 29 | params.GetHostSizeInBytes()} { | ||
| 30 | std::size_t offset = 0; | 29 | std::size_t offset = 0; |
| 31 | for (u32 level = 0; level < params.num_levels; ++level) { | 30 | for (u32 level = 0; level < params.num_levels; ++level) { |
| 32 | const std::size_t mipmap_size{params.GetGuestMipmapSize(level)}; | 31 | const std::size_t mipmap_size{params.GetGuestMipmapSize(level)}; |
| @@ -75,9 +74,12 @@ MatchStructureResult SurfaceBaseImpl::MatchesStructure(const SurfaceParams& rhs) | |||
| 75 | 74 | ||
| 76 | // Linear Surface check | 75 | // Linear Surface check |
| 77 | if (!params.is_tiled) { | 76 | if (!params.is_tiled) { |
| 78 | if (std::tie(params.width, params.height, params.pitch) == | 77 | if (std::tie(params.height, params.pitch) == std::tie(rhs.height, rhs.pitch)) { |
| 79 | std::tie(rhs.width, rhs.height, rhs.pitch)) { | 78 | if (params.width == rhs.width) { |
| 80 | return MatchStructureResult::FullMatch; | 79 | return MatchStructureResult::FullMatch; |
| 80 | } else { | ||
| 81 | return MatchStructureResult::SemiMatch; | ||
| 82 | } | ||
| 81 | } | 83 | } |
| 82 | return MatchStructureResult::None; | 84 | return MatchStructureResult::None; |
| 83 | } | 85 | } |
diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h index 8ba386a8a..bcce8d863 100644 --- a/src/video_core/texture_cache/surface_base.h +++ b/src/video_core/texture_cache/surface_base.h | |||
| @@ -200,8 +200,9 @@ public: | |||
| 200 | modification_tick = tick; | 200 | modification_tick = tick; |
| 201 | } | 201 | } |
| 202 | 202 | ||
| 203 | void MarkAsRenderTarget(const bool is_target) { | 203 | void MarkAsRenderTarget(const bool is_target, const u32 index) { |
| 204 | this->is_target = is_target; | 204 | this->is_target = is_target; |
| 205 | this->index = index; | ||
| 205 | } | 206 | } |
| 206 | 207 | ||
| 207 | void MarkAsPicked(const bool is_picked) { | 208 | void MarkAsPicked(const bool is_picked) { |
| @@ -221,6 +222,10 @@ public: | |||
| 221 | return is_target; | 222 | return is_target; |
| 222 | } | 223 | } |
| 223 | 224 | ||
| 225 | u32 GetRenderTarget() const { | ||
| 226 | return index; | ||
| 227 | } | ||
| 228 | |||
| 224 | bool IsRegistered() const { | 229 | bool IsRegistered() const { |
| 225 | return is_registered; | 230 | return is_registered; |
| 226 | } | 231 | } |
| @@ -307,10 +312,13 @@ private: | |||
| 307 | return view; | 312 | return view; |
| 308 | } | 313 | } |
| 309 | 314 | ||
| 315 | static constexpr u32 NO_RT = 0xFFFFFFFF; | ||
| 316 | |||
| 310 | bool is_modified{}; | 317 | bool is_modified{}; |
| 311 | bool is_target{}; | 318 | bool is_target{}; |
| 312 | bool is_registered{}; | 319 | bool is_registered{}; |
| 313 | bool is_picked{}; | 320 | bool is_picked{}; |
| 321 | u32 index{NO_RT}; | ||
| 314 | u64 modification_tick{}; | 322 | u64 modification_tick{}; |
| 315 | }; | 323 | }; |
| 316 | 324 | ||
diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp index 9c56e2b4f..fd5472451 100644 --- a/src/video_core/texture_cache/surface_params.cpp +++ b/src/video_core/texture_cache/surface_params.cpp | |||
| @@ -290,12 +290,19 @@ std::size_t SurfaceParams::GetLayerSize(bool as_host_size, bool uncompressed) co | |||
| 290 | 290 | ||
| 291 | std::size_t SurfaceParams::GetInnerMipmapMemorySize(u32 level, bool as_host_size, | 291 | std::size_t SurfaceParams::GetInnerMipmapMemorySize(u32 level, bool as_host_size, |
| 292 | bool uncompressed) const { | 292 | bool uncompressed) const { |
| 293 | const bool tiled{as_host_size ? false : is_tiled}; | ||
| 294 | const u32 width{GetMipmapSize(uncompressed, GetMipWidth(level), GetDefaultBlockWidth())}; | 293 | const u32 width{GetMipmapSize(uncompressed, GetMipWidth(level), GetDefaultBlockWidth())}; |
| 295 | const u32 height{GetMipmapSize(uncompressed, GetMipHeight(level), GetDefaultBlockHeight())}; | 294 | const u32 height{GetMipmapSize(uncompressed, GetMipHeight(level), GetDefaultBlockHeight())}; |
| 296 | const u32 depth{is_layered ? 1U : GetMipDepth(level)}; | 295 | const u32 depth{is_layered ? 1U : GetMipDepth(level)}; |
| 297 | return Tegra::Texture::CalculateSize(tiled, GetBytesPerPixel(), width, height, depth, | 296 | if (is_tiled) { |
| 298 | GetMipBlockHeight(level), GetMipBlockDepth(level)); | 297 | return Tegra::Texture::CalculateSize(!as_host_size, GetBytesPerPixel(), width, height, |
| 298 | depth, GetMipBlockHeight(level), | ||
| 299 | GetMipBlockDepth(level)); | ||
| 300 | } else if (as_host_size || IsBuffer()) { | ||
| 301 | return GetBytesPerPixel() * width * height * depth; | ||
| 302 | } else { | ||
| 303 | // Linear Texture Case | ||
| 304 | return pitch * height * depth; | ||
| 305 | } | ||
| 299 | } | 306 | } |
| 300 | 307 | ||
| 301 | bool SurfaceParams::operator==(const SurfaceParams& rhs) const { | 308 | bool SurfaceParams::operator==(const SurfaceParams& rhs) const { |
diff --git a/src/video_core/texture_cache/surface_params.h b/src/video_core/texture_cache/surface_params.h index 358d6757c..e7ef66ee2 100644 --- a/src/video_core/texture_cache/surface_params.h +++ b/src/video_core/texture_cache/surface_params.h | |||
| @@ -58,7 +58,6 @@ public: | |||
| 58 | std::size_t GetHostSizeInBytes() const { | 58 | std::size_t GetHostSizeInBytes() const { |
| 59 | std::size_t host_size_in_bytes; | 59 | std::size_t host_size_in_bytes; |
| 60 | if (GetCompressionType() == SurfaceCompression::Converted) { | 60 | if (GetCompressionType() == SurfaceCompression::Converted) { |
| 61 | constexpr std::size_t rgb8_bpp = 4ULL; | ||
| 62 | // ASTC is uncompressed in software, in emulated as RGBA8 | 61 | // ASTC is uncompressed in software, in emulated as RGBA8 |
| 63 | host_size_in_bytes = 0; | 62 | host_size_in_bytes = 0; |
| 64 | for (u32 level = 0; level < num_levels; ++level) { | 63 | for (u32 level = 0; level < num_levels; ++level) { |
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index c9e72531a..2ec0203d1 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h | |||
| @@ -116,10 +116,10 @@ public: | |||
| 116 | std::lock_guard lock{mutex}; | 116 | std::lock_guard lock{mutex}; |
| 117 | auto& maxwell3d = system.GPU().Maxwell3D(); | 117 | auto& maxwell3d = system.GPU().Maxwell3D(); |
| 118 | 118 | ||
| 119 | if (!maxwell3d.dirty_flags.zeta_buffer) { | 119 | if (!maxwell3d.dirty.depth_buffer) { |
| 120 | return depth_buffer.view; | 120 | return depth_buffer.view; |
| 121 | } | 121 | } |
| 122 | maxwell3d.dirty_flags.zeta_buffer = false; | 122 | maxwell3d.dirty.depth_buffer = false; |
| 123 | 123 | ||
| 124 | const auto& regs{maxwell3d.regs}; | 124 | const auto& regs{maxwell3d.regs}; |
| 125 | const auto gpu_addr{regs.zeta.Address()}; | 125 | const auto gpu_addr{regs.zeta.Address()}; |
| @@ -133,11 +133,11 @@ public: | |||
| 133 | regs.zeta.memory_layout.block_depth, regs.zeta.memory_layout.type)}; | 133 | regs.zeta.memory_layout.block_depth, regs.zeta.memory_layout.type)}; |
| 134 | auto surface_view = GetSurface(gpu_addr, depth_params, preserve_contents, true); | 134 | auto surface_view = GetSurface(gpu_addr, depth_params, preserve_contents, true); |
| 135 | if (depth_buffer.target) | 135 | if (depth_buffer.target) |
| 136 | depth_buffer.target->MarkAsRenderTarget(false); | 136 | depth_buffer.target->MarkAsRenderTarget(false, NO_RT); |
| 137 | depth_buffer.target = surface_view.first; | 137 | depth_buffer.target = surface_view.first; |
| 138 | depth_buffer.view = surface_view.second; | 138 | depth_buffer.view = surface_view.second; |
| 139 | if (depth_buffer.target) | 139 | if (depth_buffer.target) |
| 140 | depth_buffer.target->MarkAsRenderTarget(true); | 140 | depth_buffer.target->MarkAsRenderTarget(true, DEPTH_RT); |
| 141 | return surface_view.second; | 141 | return surface_view.second; |
| 142 | } | 142 | } |
| 143 | 143 | ||
| @@ -145,10 +145,10 @@ public: | |||
| 145 | std::lock_guard lock{mutex}; | 145 | std::lock_guard lock{mutex}; |
| 146 | ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets); | 146 | ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets); |
| 147 | auto& maxwell3d = system.GPU().Maxwell3D(); | 147 | auto& maxwell3d = system.GPU().Maxwell3D(); |
| 148 | if (!maxwell3d.dirty_flags.color_buffer[index]) { | 148 | if (!maxwell3d.dirty.render_target[index]) { |
| 149 | return render_targets[index].view; | 149 | return render_targets[index].view; |
| 150 | } | 150 | } |
| 151 | maxwell3d.dirty_flags.color_buffer.reset(index); | 151 | maxwell3d.dirty.render_target[index] = false; |
| 152 | 152 | ||
| 153 | const auto& regs{maxwell3d.regs}; | 153 | const auto& regs{maxwell3d.regs}; |
| 154 | if (index >= regs.rt_control.count || regs.rt[index].Address() == 0 || | 154 | if (index >= regs.rt_control.count || regs.rt[index].Address() == 0 || |
| @@ -167,11 +167,11 @@ public: | |||
| 167 | auto surface_view = GetSurface(gpu_addr, SurfaceParams::CreateForFramebuffer(system, index), | 167 | auto surface_view = GetSurface(gpu_addr, SurfaceParams::CreateForFramebuffer(system, index), |
| 168 | preserve_contents, true); | 168 | preserve_contents, true); |
| 169 | if (render_targets[index].target) | 169 | if (render_targets[index].target) |
| 170 | render_targets[index].target->MarkAsRenderTarget(false); | 170 | render_targets[index].target->MarkAsRenderTarget(false, NO_RT); |
| 171 | render_targets[index].target = surface_view.first; | 171 | render_targets[index].target = surface_view.first; |
| 172 | render_targets[index].view = surface_view.second; | 172 | render_targets[index].view = surface_view.second; |
| 173 | if (render_targets[index].target) | 173 | if (render_targets[index].target) |
| 174 | render_targets[index].target->MarkAsRenderTarget(true); | 174 | render_targets[index].target->MarkAsRenderTarget(true, static_cast<u32>(index)); |
| 175 | return surface_view.second; | 175 | return surface_view.second; |
| 176 | } | 176 | } |
| 177 | 177 | ||
| @@ -191,7 +191,7 @@ public: | |||
| 191 | if (depth_buffer.target == nullptr) { | 191 | if (depth_buffer.target == nullptr) { |
| 192 | return; | 192 | return; |
| 193 | } | 193 | } |
| 194 | depth_buffer.target->MarkAsRenderTarget(false); | 194 | depth_buffer.target->MarkAsRenderTarget(false, NO_RT); |
| 195 | depth_buffer.target = nullptr; | 195 | depth_buffer.target = nullptr; |
| 196 | depth_buffer.view = nullptr; | 196 | depth_buffer.view = nullptr; |
| 197 | } | 197 | } |
| @@ -200,7 +200,7 @@ public: | |||
| 200 | if (render_targets[index].target == nullptr) { | 200 | if (render_targets[index].target == nullptr) { |
| 201 | return; | 201 | return; |
| 202 | } | 202 | } |
| 203 | render_targets[index].target->MarkAsRenderTarget(false); | 203 | render_targets[index].target->MarkAsRenderTarget(false, NO_RT); |
| 204 | render_targets[index].target = nullptr; | 204 | render_targets[index].target = nullptr; |
| 205 | render_targets[index].view = nullptr; | 205 | render_targets[index].view = nullptr; |
| 206 | } | 206 | } |
| @@ -270,6 +270,17 @@ protected: | |||
| 270 | // and reading it from a sepparate buffer. | 270 | // and reading it from a sepparate buffer. |
| 271 | virtual void BufferCopy(TSurface& src_surface, TSurface& dst_surface) = 0; | 271 | virtual void BufferCopy(TSurface& src_surface, TSurface& dst_surface) = 0; |
| 272 | 272 | ||
| 273 | void ManageRenderTargetUnregister(TSurface& surface) { | ||
| 274 | auto& maxwell3d = system.GPU().Maxwell3D(); | ||
| 275 | const u32 index = surface->GetRenderTarget(); | ||
| 276 | if (index == DEPTH_RT) { | ||
| 277 | maxwell3d.dirty.depth_buffer = true; | ||
| 278 | } else { | ||
| 279 | maxwell3d.dirty.render_target[index] = true; | ||
| 280 | } | ||
| 281 | maxwell3d.dirty.render_settings = true; | ||
| 282 | } | ||
| 283 | |||
| 273 | void Register(TSurface surface) { | 284 | void Register(TSurface surface) { |
| 274 | const GPUVAddr gpu_addr = surface->GetGpuAddr(); | 285 | const GPUVAddr gpu_addr = surface->GetGpuAddr(); |
| 275 | const CacheAddr cache_ptr = ToCacheAddr(system.GPU().MemoryManager().GetPointer(gpu_addr)); | 286 | const CacheAddr cache_ptr = ToCacheAddr(system.GPU().MemoryManager().GetPointer(gpu_addr)); |
| @@ -294,8 +305,9 @@ protected: | |||
| 294 | if (guard_render_targets && surface->IsProtected()) { | 305 | if (guard_render_targets && surface->IsProtected()) { |
| 295 | return; | 306 | return; |
| 296 | } | 307 | } |
| 297 | const GPUVAddr gpu_addr = surface->GetGpuAddr(); | 308 | if (!guard_render_targets && surface->IsRenderTarget()) { |
| 298 | const CacheAddr cache_ptr = surface->GetCacheAddr(); | 309 | ManageRenderTargetUnregister(surface); |
| 310 | } | ||
| 299 | const std::size_t size = surface->GetSizeInBytes(); | 311 | const std::size_t size = surface->GetSizeInBytes(); |
| 300 | const VAddr cpu_addr = surface->GetCpuAddr(); | 312 | const VAddr cpu_addr = surface->GetCpuAddr(); |
| 301 | rasterizer.UpdatePagesCachedCount(cpu_addr, size, -1); | 313 | rasterizer.UpdatePagesCachedCount(cpu_addr, size, -1); |
| @@ -649,15 +661,6 @@ private: | |||
| 649 | } | 661 | } |
| 650 | return {current_surface, *view}; | 662 | return {current_surface, *view}; |
| 651 | } | 663 | } |
| 652 | // The next case is unsafe, so if we r in accurate GPU, just skip it | ||
| 653 | if (Settings::values.use_accurate_gpu_emulation) { | ||
| 654 | return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, | ||
| 655 | MatchTopologyResult::FullMatch); | ||
| 656 | } | ||
| 657 | // This is the case the texture is a part of the parent. | ||
| 658 | if (current_surface->MatchesSubTexture(params, gpu_addr)) { | ||
| 659 | return RebuildSurface(current_surface, params, is_render); | ||
| 660 | } | ||
| 661 | } else { | 664 | } else { |
| 662 | // If there are many overlaps, odds are they are subtextures of the candidate | 665 | // If there are many overlaps, odds are they are subtextures of the candidate |
| 663 | // surface. We try to construct a new surface based on the candidate parameters, | 666 | // surface. We try to construct a new surface based on the candidate parameters, |
| @@ -793,6 +796,9 @@ private: | |||
| 793 | static constexpr u64 registry_page_size{1 << registry_page_bits}; | 796 | static constexpr u64 registry_page_size{1 << registry_page_bits}; |
| 794 | std::unordered_map<CacheAddr, std::vector<TSurface>> registry; | 797 | std::unordered_map<CacheAddr, std::vector<TSurface>> registry; |
| 795 | 798 | ||
| 799 | static constexpr u32 DEPTH_RT = 8; | ||
| 800 | static constexpr u32 NO_RT = 0xFFFFFFFF; | ||
| 801 | |||
| 796 | // The L1 Cache is used for fast texture lookup before checking the overlaps | 802 | // The L1 Cache is used for fast texture lookup before checking the overlaps |
| 797 | // This avoids calculating size and other stuffs. | 803 | // This avoids calculating size and other stuffs. |
| 798 | std::unordered_map<CacheAddr, TSurface> l1_cache; | 804 | std::unordered_map<CacheAddr, TSurface> l1_cache; |
diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp index 7e8295944..7df5f1452 100644 --- a/src/video_core/textures/decoders.cpp +++ b/src/video_core/textures/decoders.cpp | |||
| @@ -257,19 +257,21 @@ std::vector<u8> UnswizzleTexture(u8* address, u32 tile_size_x, u32 tile_size_y, | |||
| 257 | 257 | ||
| 258 | void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width, | 258 | void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width, |
| 259 | u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, | 259 | u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, |
| 260 | u32 block_height_bit) { | 260 | u32 block_height_bit, u32 offset_x, u32 offset_y) { |
| 261 | const u32 block_height = 1U << block_height_bit; | 261 | const u32 block_height = 1U << block_height_bit; |
| 262 | const u32 image_width_in_gobs{(swizzled_width * bytes_per_pixel + (gob_size_x - 1)) / | 262 | const u32 image_width_in_gobs{(swizzled_width * bytes_per_pixel + (gob_size_x - 1)) / |
| 263 | gob_size_x}; | 263 | gob_size_x}; |
| 264 | for (u32 line = 0; line < subrect_height; ++line) { | 264 | for (u32 line = 0; line < subrect_height; ++line) { |
| 265 | const u32 dst_y = line + offset_y; | ||
| 265 | const u32 gob_address_y = | 266 | const u32 gob_address_y = |
| 266 | (line / (gob_size_y * block_height)) * gob_size * block_height * image_width_in_gobs + | 267 | (dst_y / (gob_size_y * block_height)) * gob_size * block_height * image_width_in_gobs + |
| 267 | ((line % (gob_size_y * block_height)) / gob_size_y) * gob_size; | 268 | ((dst_y % (gob_size_y * block_height)) / gob_size_y) * gob_size; |
| 268 | const auto& table = legacy_swizzle_table[line % gob_size_y]; | 269 | const auto& table = legacy_swizzle_table[dst_y % gob_size_y]; |
| 269 | for (u32 x = 0; x < subrect_width; ++x) { | 270 | for (u32 x = 0; x < subrect_width; ++x) { |
| 271 | const u32 dst_x = x + offset_x; | ||
| 270 | const u32 gob_address = | 272 | const u32 gob_address = |
| 271 | gob_address_y + (x * bytes_per_pixel / gob_size_x) * gob_size * block_height; | 273 | gob_address_y + (dst_x * bytes_per_pixel / gob_size_x) * gob_size * block_height; |
| 272 | const u32 swizzled_offset = gob_address + table[(x * bytes_per_pixel) % gob_size_x]; | 274 | const u32 swizzled_offset = gob_address + table[(dst_x * bytes_per_pixel) % gob_size_x]; |
| 273 | u8* source_line = unswizzled_data + line * source_pitch + x * bytes_per_pixel; | 275 | u8* source_line = unswizzled_data + line * source_pitch + x * bytes_per_pixel; |
| 274 | u8* dest_addr = swizzled_data + swizzled_offset; | 276 | u8* dest_addr = swizzled_data + swizzled_offset; |
| 275 | 277 | ||
diff --git a/src/video_core/textures/decoders.h b/src/video_core/textures/decoders.h index eaec9b5a5..f1e3952bc 100644 --- a/src/video_core/textures/decoders.h +++ b/src/video_core/textures/decoders.h | |||
| @@ -44,7 +44,8 @@ std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height | |||
| 44 | 44 | ||
| 45 | /// Copies an untiled subrectangle into a tiled surface. | 45 | /// Copies an untiled subrectangle into a tiled surface. |
| 46 | void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width, | 46 | void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width, |
| 47 | u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height); | 47 | u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height, |
| 48 | u32 offset_x, u32 offset_y); | ||
| 48 | 49 | ||
| 49 | /// Copies a tiled subrectangle into a linear surface. | 50 | /// Copies a tiled subrectangle into a linear surface. |
| 50 | void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 swizzled_width, | 51 | void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 swizzled_width, |
diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h index e3be018b9..e36bc2c04 100644 --- a/src/video_core/textures/texture.h +++ b/src/video_core/textures/texture.h | |||
| @@ -213,7 +213,7 @@ struct TICEntry { | |||
| 213 | if (header_version != TICHeaderVersion::OneDBuffer) { | 213 | if (header_version != TICHeaderVersion::OneDBuffer) { |
| 214 | return width_minus_1 + 1; | 214 | return width_minus_1 + 1; |
| 215 | } | 215 | } |
| 216 | return (buffer_high_width_minus_one << 16) | buffer_low_width_minus_one; | 216 | return ((buffer_high_width_minus_one << 16) | buffer_low_width_minus_one) + 1; |
| 217 | } | 217 | } |
| 218 | 218 | ||
| 219 | u32 Height() const { | 219 | u32 Height() const { |
diff --git a/src/yuzu/CMakeLists.txt b/src/yuzu/CMakeLists.txt index 3dc0e47d0..f051e17b4 100644 --- a/src/yuzu/CMakeLists.txt +++ b/src/yuzu/CMakeLists.txt | |||
| @@ -1,5 +1,6 @@ | |||
| 1 | set(CMAKE_AUTOMOC ON) | 1 | set(CMAKE_AUTOMOC ON) |
| 2 | set(CMAKE_AUTORCC ON) | 2 | set(CMAKE_AUTORCC ON) |
| 3 | set(CMAKE_AUTOUIC ON) | ||
| 3 | set(CMAKE_INCLUDE_CURRENT_DIR ON) | 4 | set(CMAKE_INCLUDE_CURRENT_DIR ON) |
| 4 | set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${PROJECT_SOURCE_DIR}/CMakeModules) | 5 | set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${PROJECT_SOURCE_DIR}/CMakeModules) |
| 5 | 6 | ||
| @@ -7,6 +8,7 @@ add_executable(yuzu | |||
| 7 | Info.plist | 8 | Info.plist |
| 8 | about_dialog.cpp | 9 | about_dialog.cpp |
| 9 | about_dialog.h | 10 | about_dialog.h |
| 11 | aboutdialog.ui | ||
| 10 | applets/error.cpp | 12 | applets/error.cpp |
| 11 | applets/error.h | 13 | applets/error.h |
| 12 | applets/profile_select.cpp | 14 | applets/profile_select.cpp |
| @@ -17,42 +19,59 @@ add_executable(yuzu | |||
| 17 | applets/web_browser.h | 19 | applets/web_browser.h |
| 18 | bootmanager.cpp | 20 | bootmanager.cpp |
| 19 | bootmanager.h | 21 | bootmanager.h |
| 22 | compatdb.ui | ||
| 20 | compatibility_list.cpp | 23 | compatibility_list.cpp |
| 21 | compatibility_list.h | 24 | compatibility_list.h |
| 22 | configuration/config.cpp | 25 | configuration/config.cpp |
| 23 | configuration/config.h | 26 | configuration/config.h |
| 27 | configuration/configure.ui | ||
| 24 | configuration/configure_audio.cpp | 28 | configuration/configure_audio.cpp |
| 25 | configuration/configure_audio.h | 29 | configuration/configure_audio.h |
| 30 | configuration/configure_audio.ui | ||
| 26 | configuration/configure_debug.cpp | 31 | configuration/configure_debug.cpp |
| 27 | configuration/configure_debug.h | 32 | configuration/configure_debug.h |
| 33 | configuration/configure_debug.ui | ||
| 28 | configuration/configure_dialog.cpp | 34 | configuration/configure_dialog.cpp |
| 29 | configuration/configure_dialog.h | 35 | configuration/configure_dialog.h |
| 30 | configuration/configure_gamelist.cpp | 36 | configuration/configure_gamelist.cpp |
| 31 | configuration/configure_gamelist.h | 37 | configuration/configure_gamelist.h |
| 38 | configuration/configure_gamelist.ui | ||
| 32 | configuration/configure_general.cpp | 39 | configuration/configure_general.cpp |
| 33 | configuration/configure_general.h | 40 | configuration/configure_general.h |
| 41 | configuration/configure_general.ui | ||
| 34 | configuration/configure_graphics.cpp | 42 | configuration/configure_graphics.cpp |
| 35 | configuration/configure_graphics.h | 43 | configuration/configure_graphics.h |
| 44 | configuration/configure_graphics.ui | ||
| 36 | configuration/configure_hotkeys.cpp | 45 | configuration/configure_hotkeys.cpp |
| 37 | configuration/configure_hotkeys.h | 46 | configuration/configure_hotkeys.h |
| 47 | configuration/configure_hotkeys.ui | ||
| 38 | configuration/configure_input.cpp | 48 | configuration/configure_input.cpp |
| 39 | configuration/configure_input.h | 49 | configuration/configure_input.h |
| 50 | configuration/configure_input.ui | ||
| 40 | configuration/configure_input_player.cpp | 51 | configuration/configure_input_player.cpp |
| 41 | configuration/configure_input_player.h | 52 | configuration/configure_input_player.h |
| 53 | configuration/configure_input_player.ui | ||
| 42 | configuration/configure_input_simple.cpp | 54 | configuration/configure_input_simple.cpp |
| 43 | configuration/configure_input_simple.h | 55 | configuration/configure_input_simple.h |
| 56 | configuration/configure_input_simple.ui | ||
| 44 | configuration/configure_mouse_advanced.cpp | 57 | configuration/configure_mouse_advanced.cpp |
| 45 | configuration/configure_mouse_advanced.h | 58 | configuration/configure_mouse_advanced.h |
| 59 | configuration/configure_mouse_advanced.ui | ||
| 60 | configuration/configure_per_general.cpp | ||
| 61 | configuration/configure_per_general.h | ||
| 62 | configuration/configure_per_general.ui | ||
| 46 | configuration/configure_profile_manager.cpp | 63 | configuration/configure_profile_manager.cpp |
| 47 | configuration/configure_profile_manager.h | 64 | configuration/configure_profile_manager.h |
| 65 | configuration/configure_profile_manager.ui | ||
| 48 | configuration/configure_system.cpp | 66 | configuration/configure_system.cpp |
| 49 | configuration/configure_system.h | 67 | configuration/configure_system.h |
| 50 | configuration/configure_per_general.cpp | 68 | configuration/configure_system.ui |
| 51 | configuration/configure_per_general.h | ||
| 52 | configuration/configure_touchscreen_advanced.cpp | 69 | configuration/configure_touchscreen_advanced.cpp |
| 53 | configuration/configure_touchscreen_advanced.h | 70 | configuration/configure_touchscreen_advanced.h |
| 71 | configuration/configure_touchscreen_advanced.ui | ||
| 54 | configuration/configure_web.cpp | 72 | configuration/configure_web.cpp |
| 55 | configuration/configure_web.h | 73 | configuration/configure_web.h |
| 74 | configuration/configure_web.ui | ||
| 56 | debugger/graphics/graphics_breakpoint_observer.cpp | 75 | debugger/graphics/graphics_breakpoint_observer.cpp |
| 57 | debugger/graphics/graphics_breakpoint_observer.h | 76 | debugger/graphics/graphics_breakpoint_observer.h |
| 58 | debugger/graphics/graphics_breakpoints.cpp | 77 | debugger/graphics/graphics_breakpoints.cpp |
| @@ -72,12 +91,14 @@ add_executable(yuzu | |||
| 72 | game_list_worker.h | 91 | game_list_worker.h |
| 73 | loading_screen.cpp | 92 | loading_screen.cpp |
| 74 | loading_screen.h | 93 | loading_screen.h |
| 94 | loading_screen.ui | ||
| 75 | hotkeys.cpp | 95 | hotkeys.cpp |
| 76 | hotkeys.h | 96 | hotkeys.h |
| 77 | main.cpp | 97 | main.cpp |
| 78 | main.h | 98 | main.h |
| 79 | ui_settings.cpp | 99 | main.ui |
| 80 | ui_settings.h | 100 | uisettings.cpp |
| 101 | uisettings.h | ||
| 81 | util/limitable_input_dialog.cpp | 102 | util/limitable_input_dialog.cpp |
| 82 | util/limitable_input_dialog.h | 103 | util/limitable_input_dialog.h |
| 83 | util/sequence_dialog/sequence_dialog.cpp | 104 | util/sequence_dialog/sequence_dialog.cpp |
| @@ -89,44 +110,18 @@ add_executable(yuzu | |||
| 89 | yuzu.rc | 110 | yuzu.rc |
| 90 | ) | 111 | ) |
| 91 | 112 | ||
| 92 | set(UIS | ||
| 93 | aboutdialog.ui | ||
| 94 | configuration/configure.ui | ||
| 95 | configuration/configure_audio.ui | ||
| 96 | configuration/configure_debug.ui | ||
| 97 | configuration/configure_gamelist.ui | ||
| 98 | configuration/configure_general.ui | ||
| 99 | configuration/configure_graphics.ui | ||
| 100 | configuration/configure_hotkeys.ui | ||
| 101 | configuration/configure_input.ui | ||
| 102 | configuration/configure_input_player.ui | ||
| 103 | configuration/configure_input_simple.ui | ||
| 104 | configuration/configure_mouse_advanced.ui | ||
| 105 | configuration/configure_per_general.ui | ||
| 106 | configuration/configure_profile_manager.ui | ||
| 107 | configuration/configure_system.ui | ||
| 108 | configuration/configure_touchscreen_advanced.ui | ||
| 109 | configuration/configure_web.ui | ||
| 110 | compatdb.ui | ||
| 111 | loading_screen.ui | ||
| 112 | main.ui | ||
| 113 | ) | ||
| 114 | |||
| 115 | file(GLOB COMPAT_LIST | 113 | file(GLOB COMPAT_LIST |
| 116 | ${PROJECT_BINARY_DIR}/dist/compatibility_list/compatibility_list.qrc | 114 | ${PROJECT_BINARY_DIR}/dist/compatibility_list/compatibility_list.qrc |
| 117 | ${PROJECT_BINARY_DIR}/dist/compatibility_list/compatibility_list.json) | 115 | ${PROJECT_BINARY_DIR}/dist/compatibility_list/compatibility_list.json) |
| 118 | file(GLOB_RECURSE ICONS ${PROJECT_SOURCE_DIR}/dist/icons/*) | 116 | file(GLOB_RECURSE ICONS ${PROJECT_SOURCE_DIR}/dist/icons/*) |
| 119 | file(GLOB_RECURSE THEMES ${PROJECT_SOURCE_DIR}/dist/qt_themes/*) | 117 | file(GLOB_RECURSE THEMES ${PROJECT_SOURCE_DIR}/dist/qt_themes/*) |
| 120 | 118 | ||
| 121 | qt5_wrap_ui(UI_HDRS ${UIS}) | ||
| 122 | 119 | ||
| 123 | target_sources(yuzu | 120 | target_sources(yuzu |
| 124 | PRIVATE | 121 | PRIVATE |
| 125 | ${COMPAT_LIST} | 122 | ${COMPAT_LIST} |
| 126 | ${ICONS} | 123 | ${ICONS} |
| 127 | ${THEMES} | 124 | ${THEMES} |
| 128 | ${UI_HDRS} | ||
| 129 | ${UIS} | ||
| 130 | ) | 125 | ) |
| 131 | 126 | ||
| 132 | if (APPLE) | 127 | if (APPLE) |
diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp index 73978ff5b..0456248ac 100644 --- a/src/yuzu/configuration/config.cpp +++ b/src/yuzu/configuration/config.cpp | |||
| @@ -11,7 +11,7 @@ | |||
| 11 | #include "core/hle/service/hid/controllers/npad.h" | 11 | #include "core/hle/service/hid/controllers/npad.h" |
| 12 | #include "input_common/main.h" | 12 | #include "input_common/main.h" |
| 13 | #include "yuzu/configuration/config.h" | 13 | #include "yuzu/configuration/config.h" |
| 14 | #include "yuzu/ui_settings.h" | 14 | #include "yuzu/uisettings.h" |
| 15 | 15 | ||
| 16 | Config::Config() { | 16 | Config::Config() { |
| 17 | // TODO: Don't hardcode the path; let the frontend decide where to put the config files. | 17 | // TODO: Don't hardcode the path; let the frontend decide where to put the config files. |
| @@ -436,8 +436,6 @@ void Config::ReadControlValues() { | |||
| 436 | void Config::ReadCoreValues() { | 436 | void Config::ReadCoreValues() { |
| 437 | qt_config->beginGroup(QStringLiteral("Core")); | 437 | qt_config->beginGroup(QStringLiteral("Core")); |
| 438 | 438 | ||
| 439 | Settings::values.cpu_jit_enabled = | ||
| 440 | ReadSetting(QStringLiteral("cpu_jit_enabled"), true).toBool(); | ||
| 441 | Settings::values.use_multi_core = ReadSetting(QStringLiteral("use_multi_core"), false).toBool(); | 439 | Settings::values.use_multi_core = ReadSetting(QStringLiteral("use_multi_core"), false).toBool(); |
| 442 | 440 | ||
| 443 | qt_config->endGroup(); | 441 | qt_config->endGroup(); |
| @@ -518,6 +516,7 @@ void Config::ReadPathValues() { | |||
| 518 | 516 | ||
| 519 | UISettings::values.roms_path = ReadSetting(QStringLiteral("romsPath")).toString(); | 517 | UISettings::values.roms_path = ReadSetting(QStringLiteral("romsPath")).toString(); |
| 520 | UISettings::values.symbols_path = ReadSetting(QStringLiteral("symbolsPath")).toString(); | 518 | UISettings::values.symbols_path = ReadSetting(QStringLiteral("symbolsPath")).toString(); |
| 519 | UISettings::values.screenshot_path = ReadSetting(QStringLiteral("screenshotPath")).toString(); | ||
| 521 | UISettings::values.game_directory_path = | 520 | UISettings::values.game_directory_path = |
| 522 | ReadSetting(QStringLiteral("gameListRootDir"), QStringLiteral(".")).toString(); | 521 | ReadSetting(QStringLiteral("gameListRootDir"), QStringLiteral(".")).toString(); |
| 523 | UISettings::values.game_directory_deepscan = | 522 | UISettings::values.game_directory_deepscan = |
| @@ -831,7 +830,6 @@ void Config::SaveControlValues() { | |||
| 831 | void Config::SaveCoreValues() { | 830 | void Config::SaveCoreValues() { |
| 832 | qt_config->beginGroup(QStringLiteral("Core")); | 831 | qt_config->beginGroup(QStringLiteral("Core")); |
| 833 | 832 | ||
| 834 | WriteSetting(QStringLiteral("cpu_jit_enabled"), Settings::values.cpu_jit_enabled, true); | ||
| 835 | WriteSetting(QStringLiteral("use_multi_core"), Settings::values.use_multi_core, false); | 833 | WriteSetting(QStringLiteral("use_multi_core"), Settings::values.use_multi_core, false); |
| 836 | 834 | ||
| 837 | qt_config->endGroup(); | 835 | qt_config->endGroup(); |
diff --git a/src/yuzu/configuration/configure_debug.cpp b/src/yuzu/configuration/configure_debug.cpp index 9a13bb797..5b7e03056 100644 --- a/src/yuzu/configuration/configure_debug.cpp +++ b/src/yuzu/configuration/configure_debug.cpp | |||
| @@ -12,13 +12,13 @@ | |||
| 12 | #include "ui_configure_debug.h" | 12 | #include "ui_configure_debug.h" |
| 13 | #include "yuzu/configuration/configure_debug.h" | 13 | #include "yuzu/configuration/configure_debug.h" |
| 14 | #include "yuzu/debugger/console.h" | 14 | #include "yuzu/debugger/console.h" |
| 15 | #include "yuzu/ui_settings.h" | 15 | #include "yuzu/uisettings.h" |
| 16 | 16 | ||
| 17 | ConfigureDebug::ConfigureDebug(QWidget* parent) : QWidget(parent), ui(new Ui::ConfigureDebug) { | 17 | ConfigureDebug::ConfigureDebug(QWidget* parent) : QWidget(parent), ui(new Ui::ConfigureDebug) { |
| 18 | ui->setupUi(this); | 18 | ui->setupUi(this); |
| 19 | SetConfiguration(); | 19 | SetConfiguration(); |
| 20 | 20 | ||
| 21 | connect(ui->open_log_button, &QPushButton::pressed, []() { | 21 | connect(ui->open_log_button, &QPushButton::clicked, []() { |
| 22 | QString path = QString::fromStdString(FileUtil::GetUserPath(FileUtil::UserPath::LogDir)); | 22 | QString path = QString::fromStdString(FileUtil::GetUserPath(FileUtil::UserPath::LogDir)); |
| 23 | QDesktopServices::openUrl(QUrl::fromLocalFile(path)); | 23 | QDesktopServices::openUrl(QUrl::fromLocalFile(path)); |
| 24 | }); | 24 | }); |
diff --git a/src/yuzu/configuration/configure_gamelist.cpp b/src/yuzu/configuration/configure_gamelist.cpp index d1724ba89..daedbc33e 100644 --- a/src/yuzu/configuration/configure_gamelist.cpp +++ b/src/yuzu/configuration/configure_gamelist.cpp | |||
| @@ -9,7 +9,7 @@ | |||
| 9 | #include "core/settings.h" | 9 | #include "core/settings.h" |
| 10 | #include "ui_configure_gamelist.h" | 10 | #include "ui_configure_gamelist.h" |
| 11 | #include "yuzu/configuration/configure_gamelist.h" | 11 | #include "yuzu/configuration/configure_gamelist.h" |
| 12 | #include "yuzu/ui_settings.h" | 12 | #include "yuzu/uisettings.h" |
| 13 | 13 | ||
| 14 | namespace { | 14 | namespace { |
| 15 | constexpr std::array default_icon_sizes{ | 15 | constexpr std::array default_icon_sizes{ |
diff --git a/src/yuzu/configuration/configure_general.cpp b/src/yuzu/configuration/configure_general.cpp index 7a6e921cd..75fcbfea3 100644 --- a/src/yuzu/configuration/configure_general.cpp +++ b/src/yuzu/configuration/configure_general.cpp | |||
| @@ -6,7 +6,7 @@ | |||
| 6 | #include "core/settings.h" | 6 | #include "core/settings.h" |
| 7 | #include "ui_configure_general.h" | 7 | #include "ui_configure_general.h" |
| 8 | #include "yuzu/configuration/configure_general.h" | 8 | #include "yuzu/configuration/configure_general.h" |
| 9 | #include "yuzu/ui_settings.h" | 9 | #include "yuzu/uisettings.h" |
| 10 | 10 | ||
| 11 | ConfigureGeneral::ConfigureGeneral(QWidget* parent) | 11 | ConfigureGeneral::ConfigureGeneral(QWidget* parent) |
| 12 | : QWidget(parent), ui(new Ui::ConfigureGeneral) { | 12 | : QWidget(parent), ui(new Ui::ConfigureGeneral) { |
diff --git a/src/yuzu/configuration/configure_input.cpp b/src/yuzu/configuration/configure_input.cpp index 4dd775aab..7613197f2 100644 --- a/src/yuzu/configuration/configure_input.cpp +++ b/src/yuzu/configuration/configure_input.cpp | |||
| @@ -79,7 +79,7 @@ ConfigureInput::ConfigureInput(QWidget* parent) | |||
| 79 | LoadConfiguration(); | 79 | LoadConfiguration(); |
| 80 | UpdateUIEnabled(); | 80 | UpdateUIEnabled(); |
| 81 | 81 | ||
| 82 | connect(ui->restore_defaults_button, &QPushButton::pressed, this, | 82 | connect(ui->restore_defaults_button, &QPushButton::clicked, this, |
| 83 | &ConfigureInput::RestoreDefaults); | 83 | &ConfigureInput::RestoreDefaults); |
| 84 | 84 | ||
| 85 | for (auto* enabled : players_controller) { | 85 | for (auto* enabled : players_controller) { |
| @@ -96,20 +96,20 @@ ConfigureInput::ConfigureInput(QWidget* parent) | |||
| 96 | &ConfigureInput::UpdateUIEnabled); | 96 | &ConfigureInput::UpdateUIEnabled); |
| 97 | 97 | ||
| 98 | for (std::size_t i = 0; i < players_configure.size(); ++i) { | 98 | for (std::size_t i = 0; i < players_configure.size(); ++i) { |
| 99 | connect(players_configure[i], &QPushButton::pressed, this, | 99 | connect(players_configure[i], &QPushButton::clicked, this, |
| 100 | [this, i] { CallConfigureDialog<ConfigureInputPlayer>(*this, i, false); }); | 100 | [this, i] { CallConfigureDialog<ConfigureInputPlayer>(*this, i, false); }); |
| 101 | } | 101 | } |
| 102 | 102 | ||
| 103 | connect(ui->handheld_configure, &QPushButton::pressed, this, | 103 | connect(ui->handheld_configure, &QPushButton::clicked, this, |
| 104 | [this] { CallConfigureDialog<ConfigureInputPlayer>(*this, 8, false); }); | 104 | [this] { CallConfigureDialog<ConfigureInputPlayer>(*this, 8, false); }); |
| 105 | 105 | ||
| 106 | connect(ui->debug_configure, &QPushButton::pressed, this, | 106 | connect(ui->debug_configure, &QPushButton::clicked, this, |
| 107 | [this] { CallConfigureDialog<ConfigureInputPlayer>(*this, 9, true); }); | 107 | [this] { CallConfigureDialog<ConfigureInputPlayer>(*this, 9, true); }); |
| 108 | 108 | ||
| 109 | connect(ui->mouse_advanced, &QPushButton::pressed, this, | 109 | connect(ui->mouse_advanced, &QPushButton::clicked, this, |
| 110 | [this] { CallConfigureDialog<ConfigureMouseAdvanced>(*this); }); | 110 | [this] { CallConfigureDialog<ConfigureMouseAdvanced>(*this); }); |
| 111 | 111 | ||
| 112 | connect(ui->touchscreen_advanced, &QPushButton::pressed, this, | 112 | connect(ui->touchscreen_advanced, &QPushButton::clicked, this, |
| 113 | [this] { CallConfigureDialog<ConfigureTouchscreenAdvanced>(*this); }); | 113 | [this] { CallConfigureDialog<ConfigureTouchscreenAdvanced>(*this); }); |
| 114 | } | 114 | } |
| 115 | 115 | ||
diff --git a/src/yuzu/configuration/configure_input_player.cpp b/src/yuzu/configuration/configure_input_player.cpp index 916baccc1..7b70f307c 100644 --- a/src/yuzu/configuration/configure_input_player.cpp +++ b/src/yuzu/configuration/configure_input_player.cpp | |||
| @@ -244,7 +244,7 @@ ConfigureInputPlayer::ConfigureInputPlayer(QWidget* parent, std::size_t player_i | |||
| 244 | } | 244 | } |
| 245 | 245 | ||
| 246 | button->setContextMenuPolicy(Qt::CustomContextMenu); | 246 | button->setContextMenuPolicy(Qt::CustomContextMenu); |
| 247 | connect(button, &QPushButton::released, [=] { | 247 | connect(button, &QPushButton::clicked, [=] { |
| 248 | HandleClick( | 248 | HandleClick( |
| 249 | button_map[button_id], | 249 | button_map[button_id], |
| 250 | [=](const Common::ParamPackage& params) { buttons_param[button_id] = params; }, | 250 | [=](const Common::ParamPackage& params) { buttons_param[button_id] = params; }, |
| @@ -273,7 +273,7 @@ ConfigureInputPlayer::ConfigureInputPlayer(QWidget* parent, std::size_t player_i | |||
| 273 | } | 273 | } |
| 274 | 274 | ||
| 275 | analog_button->setContextMenuPolicy(Qt::CustomContextMenu); | 275 | analog_button->setContextMenuPolicy(Qt::CustomContextMenu); |
| 276 | connect(analog_button, &QPushButton::released, [=]() { | 276 | connect(analog_button, &QPushButton::clicked, [=]() { |
| 277 | HandleClick(analog_map_buttons[analog_id][sub_button_id], | 277 | HandleClick(analog_map_buttons[analog_id][sub_button_id], |
| 278 | [=](const Common::ParamPackage& params) { | 278 | [=](const Common::ParamPackage& params) { |
| 279 | SetAnalogButton(params, analogs_param[analog_id], | 279 | SetAnalogButton(params, analogs_param[analog_id], |
| @@ -300,7 +300,7 @@ ConfigureInputPlayer::ConfigureInputPlayer(QWidget* parent, std::size_t player_i | |||
| 300 | menu_location)); | 300 | menu_location)); |
| 301 | }); | 301 | }); |
| 302 | } | 302 | } |
| 303 | connect(analog_map_stick[analog_id], &QPushButton::released, [=] { | 303 | connect(analog_map_stick[analog_id], &QPushButton::clicked, [=] { |
| 304 | QMessageBox::information(this, tr("Information"), | 304 | QMessageBox::information(this, tr("Information"), |
| 305 | tr("After pressing OK, first move your joystick horizontally, " | 305 | tr("After pressing OK, first move your joystick horizontally, " |
| 306 | "and then vertically.")); | 306 | "and then vertically.")); |
| @@ -311,8 +311,8 @@ ConfigureInputPlayer::ConfigureInputPlayer(QWidget* parent, std::size_t player_i | |||
| 311 | }); | 311 | }); |
| 312 | } | 312 | } |
| 313 | 313 | ||
| 314 | connect(ui->buttonClearAll, &QPushButton::released, [this] { ClearAll(); }); | 314 | connect(ui->buttonClearAll, &QPushButton::clicked, [this] { ClearAll(); }); |
| 315 | connect(ui->buttonRestoreDefaults, &QPushButton::released, [this] { RestoreDefaults(); }); | 315 | connect(ui->buttonRestoreDefaults, &QPushButton::clicked, [this] { RestoreDefaults(); }); |
| 316 | 316 | ||
| 317 | timeout_timer->setSingleShot(true); | 317 | timeout_timer->setSingleShot(true); |
| 318 | connect(timeout_timer.get(), &QTimer::timeout, [this] { SetPollingResult({}, true); }); | 318 | connect(timeout_timer.get(), &QTimer::timeout, [this] { SetPollingResult({}, true); }); |
diff --git a/src/yuzu/configuration/configure_input_simple.cpp b/src/yuzu/configuration/configure_input_simple.cpp index 864803ea3..ab3a11d30 100644 --- a/src/yuzu/configuration/configure_input_simple.cpp +++ b/src/yuzu/configuration/configure_input_simple.cpp | |||
| @@ -9,7 +9,7 @@ | |||
| 9 | #include "yuzu/configuration/configure_input.h" | 9 | #include "yuzu/configuration/configure_input.h" |
| 10 | #include "yuzu/configuration/configure_input_player.h" | 10 | #include "yuzu/configuration/configure_input_player.h" |
| 11 | #include "yuzu/configuration/configure_input_simple.h" | 11 | #include "yuzu/configuration/configure_input_simple.h" |
| 12 | #include "yuzu/ui_settings.h" | 12 | #include "yuzu/uisettings.h" |
| 13 | 13 | ||
| 14 | namespace { | 14 | namespace { |
| 15 | 15 | ||
| @@ -101,7 +101,7 @@ ConfigureInputSimple::ConfigureInputSimple(QWidget* parent) | |||
| 101 | 101 | ||
| 102 | connect(ui->profile_combobox, QOverload<int>::of(&QComboBox::currentIndexChanged), this, | 102 | connect(ui->profile_combobox, QOverload<int>::of(&QComboBox::currentIndexChanged), this, |
| 103 | &ConfigureInputSimple::OnSelectProfile); | 103 | &ConfigureInputSimple::OnSelectProfile); |
| 104 | connect(ui->profile_configure, &QPushButton::pressed, this, &ConfigureInputSimple::OnConfigure); | 104 | connect(ui->profile_configure, &QPushButton::clicked, this, &ConfigureInputSimple::OnConfigure); |
| 105 | 105 | ||
| 106 | LoadConfiguration(); | 106 | LoadConfiguration(); |
| 107 | } | 107 | } |
diff --git a/src/yuzu/configuration/configure_mouse_advanced.cpp b/src/yuzu/configuration/configure_mouse_advanced.cpp index b7305e653..0a4abe34f 100644 --- a/src/yuzu/configuration/configure_mouse_advanced.cpp +++ b/src/yuzu/configuration/configure_mouse_advanced.cpp | |||
| @@ -83,7 +83,7 @@ ConfigureMouseAdvanced::ConfigureMouseAdvanced(QWidget* parent) | |||
| 83 | } | 83 | } |
| 84 | 84 | ||
| 85 | button->setContextMenuPolicy(Qt::CustomContextMenu); | 85 | button->setContextMenuPolicy(Qt::CustomContextMenu); |
| 86 | connect(button, &QPushButton::released, [=] { | 86 | connect(button, &QPushButton::clicked, [=] { |
| 87 | HandleClick( | 87 | HandleClick( |
| 88 | button_map[button_id], | 88 | button_map[button_id], |
| 89 | [=](const Common::ParamPackage& params) { buttons_param[button_id] = params; }, | 89 | [=](const Common::ParamPackage& params) { buttons_param[button_id] = params; }, |
| @@ -104,8 +104,8 @@ ConfigureMouseAdvanced::ConfigureMouseAdvanced(QWidget* parent) | |||
| 104 | }); | 104 | }); |
| 105 | } | 105 | } |
| 106 | 106 | ||
| 107 | connect(ui->buttonClearAll, &QPushButton::released, [this] { ClearAll(); }); | 107 | connect(ui->buttonClearAll, &QPushButton::clicked, [this] { ClearAll(); }); |
| 108 | connect(ui->buttonRestoreDefaults, &QPushButton::released, [this] { RestoreDefaults(); }); | 108 | connect(ui->buttonRestoreDefaults, &QPushButton::clicked, [this] { RestoreDefaults(); }); |
| 109 | 109 | ||
| 110 | timeout_timer->setSingleShot(true); | 110 | timeout_timer->setSingleShot(true); |
| 111 | connect(timeout_timer.get(), &QTimer::timeout, [this] { SetPollingResult({}, true); }); | 111 | connect(timeout_timer.get(), &QTimer::timeout, [this] { SetPollingResult({}, true); }); |
diff --git a/src/yuzu/configuration/configure_per_general.cpp b/src/yuzu/configuration/configure_per_general.cpp index 90336e235..d7f259f12 100644 --- a/src/yuzu/configuration/configure_per_general.cpp +++ b/src/yuzu/configuration/configure_per_general.cpp | |||
| @@ -23,7 +23,7 @@ | |||
| 23 | #include "yuzu/configuration/config.h" | 23 | #include "yuzu/configuration/config.h" |
| 24 | #include "yuzu/configuration/configure_input.h" | 24 | #include "yuzu/configuration/configure_input.h" |
| 25 | #include "yuzu/configuration/configure_per_general.h" | 25 | #include "yuzu/configuration/configure_per_general.h" |
| 26 | #include "yuzu/ui_settings.h" | 26 | #include "yuzu/uisettings.h" |
| 27 | #include "yuzu/util/util.h" | 27 | #include "yuzu/util/util.h" |
| 28 | 28 | ||
| 29 | ConfigurePerGameGeneral::ConfigurePerGameGeneral(QWidget* parent, u64 title_id) | 29 | ConfigurePerGameGeneral::ConfigurePerGameGeneral(QWidget* parent, u64 title_id) |
diff --git a/src/yuzu/configuration/configure_profile_manager.cpp b/src/yuzu/configuration/configure_profile_manager.cpp index c90f4cdd8..f53423440 100644 --- a/src/yuzu/configuration/configure_profile_manager.cpp +++ b/src/yuzu/configuration/configure_profile_manager.cpp | |||
| @@ -108,10 +108,10 @@ ConfigureProfileManager ::ConfigureProfileManager(QWidget* parent) | |||
| 108 | 108 | ||
| 109 | connect(tree_view, &QTreeView::clicked, this, &ConfigureProfileManager::SelectUser); | 109 | connect(tree_view, &QTreeView::clicked, this, &ConfigureProfileManager::SelectUser); |
| 110 | 110 | ||
| 111 | connect(ui->pm_add, &QPushButton::pressed, this, &ConfigureProfileManager::AddUser); | 111 | connect(ui->pm_add, &QPushButton::clicked, this, &ConfigureProfileManager::AddUser); |
| 112 | connect(ui->pm_rename, &QPushButton::pressed, this, &ConfigureProfileManager::RenameUser); | 112 | connect(ui->pm_rename, &QPushButton::clicked, this, &ConfigureProfileManager::RenameUser); |
| 113 | connect(ui->pm_remove, &QPushButton::pressed, this, &ConfigureProfileManager::DeleteUser); | 113 | connect(ui->pm_remove, &QPushButton::clicked, this, &ConfigureProfileManager::DeleteUser); |
| 114 | connect(ui->pm_set_image, &QPushButton::pressed, this, &ConfigureProfileManager::SetUserImage); | 114 | connect(ui->pm_set_image, &QPushButton::clicked, this, &ConfigureProfileManager::SetUserImage); |
| 115 | 115 | ||
| 116 | scene = new QGraphicsScene; | 116 | scene = new QGraphicsScene; |
| 117 | ui->current_user_icon->setScene(scene); | 117 | ui->current_user_icon->setScene(scene); |
diff --git a/src/yuzu/configuration/configure_touchscreen_advanced.cpp b/src/yuzu/configuration/configure_touchscreen_advanced.cpp index 8ced28c75..7d7cc00b7 100644 --- a/src/yuzu/configuration/configure_touchscreen_advanced.cpp +++ b/src/yuzu/configuration/configure_touchscreen_advanced.cpp | |||
| @@ -11,7 +11,7 @@ ConfigureTouchscreenAdvanced::ConfigureTouchscreenAdvanced(QWidget* parent) | |||
| 11 | : QDialog(parent), ui(std::make_unique<Ui::ConfigureTouchscreenAdvanced>()) { | 11 | : QDialog(parent), ui(std::make_unique<Ui::ConfigureTouchscreenAdvanced>()) { |
| 12 | ui->setupUi(this); | 12 | ui->setupUi(this); |
| 13 | 13 | ||
| 14 | connect(ui->restore_defaults_button, &QPushButton::pressed, this, | 14 | connect(ui->restore_defaults_button, &QPushButton::clicked, this, |
| 15 | &ConfigureTouchscreenAdvanced::RestoreDefaults); | 15 | &ConfigureTouchscreenAdvanced::RestoreDefaults); |
| 16 | 16 | ||
| 17 | LoadConfiguration(); | 17 | LoadConfiguration(); |
diff --git a/src/yuzu/configuration/configure_web.cpp b/src/yuzu/configuration/configure_web.cpp index 5a70ef168..336b062b3 100644 --- a/src/yuzu/configuration/configure_web.cpp +++ b/src/yuzu/configuration/configure_web.cpp | |||
| @@ -9,7 +9,7 @@ | |||
| 9 | #include "core/telemetry_session.h" | 9 | #include "core/telemetry_session.h" |
| 10 | #include "ui_configure_web.h" | 10 | #include "ui_configure_web.h" |
| 11 | #include "yuzu/configuration/configure_web.h" | 11 | #include "yuzu/configuration/configure_web.h" |
| 12 | #include "yuzu/ui_settings.h" | 12 | #include "yuzu/uisettings.h" |
| 13 | 13 | ||
| 14 | ConfigureWeb::ConfigureWeb(QWidget* parent) | 14 | ConfigureWeb::ConfigureWeb(QWidget* parent) |
| 15 | : QWidget(parent), ui(std::make_unique<Ui::ConfigureWeb>()) { | 15 | : QWidget(parent), ui(std::make_unique<Ui::ConfigureWeb>()) { |
diff --git a/src/yuzu/debugger/console.cpp b/src/yuzu/debugger/console.cpp index 320898f6a..207ff4d58 100644 --- a/src/yuzu/debugger/console.cpp +++ b/src/yuzu/debugger/console.cpp | |||
| @@ -10,7 +10,7 @@ | |||
| 10 | 10 | ||
| 11 | #include "common/logging/backend.h" | 11 | #include "common/logging/backend.h" |
| 12 | #include "yuzu/debugger/console.h" | 12 | #include "yuzu/debugger/console.h" |
| 13 | #include "yuzu/ui_settings.h" | 13 | #include "yuzu/uisettings.h" |
| 14 | 14 | ||
| 15 | namespace Debugger { | 15 | namespace Debugger { |
| 16 | void ToggleConsole() { | 16 | void ToggleConsole() { |
diff --git a/src/yuzu/discord_impl.cpp b/src/yuzu/discord_impl.cpp index 9d87a41eb..ea0079353 100644 --- a/src/yuzu/discord_impl.cpp +++ b/src/yuzu/discord_impl.cpp | |||
| @@ -9,7 +9,7 @@ | |||
| 9 | #include "core/core.h" | 9 | #include "core/core.h" |
| 10 | #include "core/loader/loader.h" | 10 | #include "core/loader/loader.h" |
| 11 | #include "yuzu/discord_impl.h" | 11 | #include "yuzu/discord_impl.h" |
| 12 | #include "yuzu/ui_settings.h" | 12 | #include "yuzu/uisettings.h" |
| 13 | 13 | ||
| 14 | namespace DiscordRPC { | 14 | namespace DiscordRPC { |
| 15 | 15 | ||
diff --git a/src/yuzu/game_list.cpp b/src/yuzu/game_list.cpp index 1885587af..d18b96519 100644 --- a/src/yuzu/game_list.cpp +++ b/src/yuzu/game_list.cpp | |||
| @@ -23,7 +23,7 @@ | |||
| 23 | #include "yuzu/game_list_p.h" | 23 | #include "yuzu/game_list_p.h" |
| 24 | #include "yuzu/game_list_worker.h" | 24 | #include "yuzu/game_list_worker.h" |
| 25 | #include "yuzu/main.h" | 25 | #include "yuzu/main.h" |
| 26 | #include "yuzu/ui_settings.h" | 26 | #include "yuzu/uisettings.h" |
| 27 | 27 | ||
| 28 | GameListSearchField::KeyReleaseEater::KeyReleaseEater(GameList* gamelist) : gamelist{gamelist} {} | 28 | GameListSearchField::KeyReleaseEater::KeyReleaseEater(GameList* gamelist) : gamelist{gamelist} {} |
| 29 | 29 | ||
diff --git a/src/yuzu/game_list_p.h b/src/yuzu/game_list_p.h index 0b458ef48..ece534dd6 100644 --- a/src/yuzu/game_list_p.h +++ b/src/yuzu/game_list_p.h | |||
| @@ -19,7 +19,7 @@ | |||
| 19 | #include "common/common_types.h" | 19 | #include "common/common_types.h" |
| 20 | #include "common/logging/log.h" | 20 | #include "common/logging/log.h" |
| 21 | #include "common/string_util.h" | 21 | #include "common/string_util.h" |
| 22 | #include "yuzu/ui_settings.h" | 22 | #include "yuzu/uisettings.h" |
| 23 | #include "yuzu/util/util.h" | 23 | #include "yuzu/util/util.h" |
| 24 | 24 | ||
| 25 | /** | 25 | /** |
diff --git a/src/yuzu/game_list_worker.cpp b/src/yuzu/game_list_worker.cpp index 4f30e9147..77f358630 100644 --- a/src/yuzu/game_list_worker.cpp +++ b/src/yuzu/game_list_worker.cpp | |||
| @@ -29,7 +29,7 @@ | |||
| 29 | #include "yuzu/game_list.h" | 29 | #include "yuzu/game_list.h" |
| 30 | #include "yuzu/game_list_p.h" | 30 | #include "yuzu/game_list_p.h" |
| 31 | #include "yuzu/game_list_worker.h" | 31 | #include "yuzu/game_list_worker.h" |
| 32 | #include "yuzu/ui_settings.h" | 32 | #include "yuzu/uisettings.h" |
| 33 | 33 | ||
| 34 | namespace { | 34 | namespace { |
| 35 | 35 | ||
diff --git a/src/yuzu/hotkeys.cpp b/src/yuzu/hotkeys.cpp index 4582e7f21..d4e97fa16 100644 --- a/src/yuzu/hotkeys.cpp +++ b/src/yuzu/hotkeys.cpp | |||
| @@ -7,7 +7,7 @@ | |||
| 7 | #include <QTreeWidgetItem> | 7 | #include <QTreeWidgetItem> |
| 8 | #include <QtGlobal> | 8 | #include <QtGlobal> |
| 9 | #include "yuzu/hotkeys.h" | 9 | #include "yuzu/hotkeys.h" |
| 10 | #include "yuzu/ui_settings.h" | 10 | #include "yuzu/uisettings.h" |
| 11 | 11 | ||
| 12 | HotkeyRegistry::HotkeyRegistry() = default; | 12 | HotkeyRegistry::HotkeyRegistry() = default; |
| 13 | HotkeyRegistry::~HotkeyRegistry() = default; | 13 | HotkeyRegistry::~HotkeyRegistry() = default; |
diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp index ae21f4753..ac57229d5 100644 --- a/src/yuzu/main.cpp +++ b/src/yuzu/main.cpp | |||
| @@ -100,7 +100,7 @@ static FileSys::VirtualFile VfsDirectoryCreateFileWrapper(const FileSys::Virtual | |||
| 100 | #include "yuzu/hotkeys.h" | 100 | #include "yuzu/hotkeys.h" |
| 101 | #include "yuzu/loading_screen.h" | 101 | #include "yuzu/loading_screen.h" |
| 102 | #include "yuzu/main.h" | 102 | #include "yuzu/main.h" |
| 103 | #include "yuzu/ui_settings.h" | 103 | #include "yuzu/uisettings.h" |
| 104 | 104 | ||
| 105 | #ifdef USE_DISCORD_PRESENCE | 105 | #ifdef USE_DISCORD_PRESENCE |
| 106 | #include "yuzu/discord_impl.h" | 106 | #include "yuzu/discord_impl.h" |
| @@ -119,6 +119,7 @@ Q_IMPORT_PLUGIN(QWindowsIntegrationPlugin); | |||
| 119 | #endif | 119 | #endif |
| 120 | 120 | ||
| 121 | #ifdef _WIN32 | 121 | #ifdef _WIN32 |
| 122 | #include <windows.h> | ||
| 122 | extern "C" { | 123 | extern "C" { |
| 123 | // tells Nvidia and AMD drivers to use the dedicated GPU by default on laptops with switchable | 124 | // tells Nvidia and AMD drivers to use the dedicated GPU by default on laptops with switchable |
| 124 | // graphics | 125 | // graphics |
| @@ -747,6 +748,18 @@ void GMainWindow::OnDisplayTitleBars(bool show) { | |||
| 747 | } | 748 | } |
| 748 | } | 749 | } |
| 749 | 750 | ||
| 751 | void GMainWindow::PreventOSSleep() { | ||
| 752 | #ifdef _WIN32 | ||
| 753 | SetThreadExecutionState(ES_CONTINUOUS | ES_SYSTEM_REQUIRED | ES_DISPLAY_REQUIRED); | ||
| 754 | #endif | ||
| 755 | } | ||
| 756 | |||
| 757 | void GMainWindow::AllowOSSleep() { | ||
| 758 | #ifdef _WIN32 | ||
| 759 | SetThreadExecutionState(ES_CONTINUOUS); | ||
| 760 | #endif | ||
| 761 | } | ||
| 762 | |||
| 750 | QStringList GMainWindow::GetUnsupportedGLExtensions() { | 763 | QStringList GMainWindow::GetUnsupportedGLExtensions() { |
| 751 | QStringList unsupported_ext; | 764 | QStringList unsupported_ext; |
| 752 | 765 | ||
| @@ -966,6 +979,8 @@ void GMainWindow::BootGame(const QString& filename) { | |||
| 966 | } | 979 | } |
| 967 | 980 | ||
| 968 | void GMainWindow::ShutdownGame() { | 981 | void GMainWindow::ShutdownGame() { |
| 982 | AllowOSSleep(); | ||
| 983 | |||
| 969 | discord_rpc->Pause(); | 984 | discord_rpc->Pause(); |
| 970 | emu_thread->RequestStop(); | 985 | emu_thread->RequestStop(); |
| 971 | 986 | ||
| @@ -1567,6 +1582,8 @@ void GMainWindow::OnMenuRecentFile() { | |||
| 1567 | } | 1582 | } |
| 1568 | 1583 | ||
| 1569 | void GMainWindow::OnStartGame() { | 1584 | void GMainWindow::OnStartGame() { |
| 1585 | PreventOSSleep(); | ||
| 1586 | |||
| 1570 | emu_thread->SetRunning(true); | 1587 | emu_thread->SetRunning(true); |
| 1571 | 1588 | ||
| 1572 | qRegisterMetaType<Core::Frontend::SoftwareKeyboardParameters>( | 1589 | qRegisterMetaType<Core::Frontend::SoftwareKeyboardParameters>( |
| @@ -1598,6 +1615,8 @@ void GMainWindow::OnPauseGame() { | |||
| 1598 | ui.action_Pause->setEnabled(false); | 1615 | ui.action_Pause->setEnabled(false); |
| 1599 | ui.action_Stop->setEnabled(true); | 1616 | ui.action_Stop->setEnabled(true); |
| 1600 | ui.action_Capture_Screenshot->setEnabled(false); | 1617 | ui.action_Capture_Screenshot->setEnabled(false); |
| 1618 | |||
| 1619 | AllowOSSleep(); | ||
| 1601 | } | 1620 | } |
| 1602 | 1621 | ||
| 1603 | void GMainWindow::OnStopGame() { | 1622 | void GMainWindow::OnStopGame() { |
| @@ -1843,13 +1862,14 @@ void GMainWindow::OnCoreError(Core::System::ResultStatus result, std::string det | |||
| 1843 | "data, or other bugs."); | 1862 | "data, or other bugs."); |
| 1844 | switch (result) { | 1863 | switch (result) { |
| 1845 | case Core::System::ResultStatus::ErrorSystemFiles: { | 1864 | case Core::System::ResultStatus::ErrorSystemFiles: { |
| 1846 | QString message = tr("yuzu was unable to locate a Switch system archive"); | 1865 | QString message; |
| 1847 | if (!details.empty()) { | 1866 | if (details.empty()) { |
| 1848 | message.append(tr(": %1. ").arg(QString::fromStdString(details))); | 1867 | message = |
| 1868 | tr("yuzu was unable to locate a Switch system archive. %1").arg(common_message); | ||
| 1849 | } else { | 1869 | } else { |
| 1850 | message.append(tr(". ")); | 1870 | message = tr("yuzu was unable to locate a Switch system archive: %1. %2") |
| 1871 | .arg(QString::fromStdString(details), common_message); | ||
| 1851 | } | 1872 | } |
| 1852 | message.append(common_message); | ||
| 1853 | 1873 | ||
| 1854 | answer = QMessageBox::question(this, tr("System Archive Not Found"), message, | 1874 | answer = QMessageBox::question(this, tr("System Archive Not Found"), message, |
| 1855 | QMessageBox::Yes | QMessageBox::No, QMessageBox::No); | 1875 | QMessageBox::Yes | QMessageBox::No, QMessageBox::No); |
| @@ -1858,8 +1878,8 @@ void GMainWindow::OnCoreError(Core::System::ResultStatus result, std::string det | |||
| 1858 | } | 1878 | } |
| 1859 | 1879 | ||
| 1860 | case Core::System::ResultStatus::ErrorSharedFont: { | 1880 | case Core::System::ResultStatus::ErrorSharedFont: { |
| 1861 | QString message = tr("yuzu was unable to locate the Switch shared fonts. "); | 1881 | const QString message = |
| 1862 | message.append(common_message); | 1882 | tr("yuzu was unable to locate the Switch shared fonts. %1").arg(common_message); |
| 1863 | answer = QMessageBox::question(this, tr("Shared Fonts Not Found"), message, | 1883 | answer = QMessageBox::question(this, tr("Shared Fonts Not Found"), message, |
| 1864 | QMessageBox::Yes | QMessageBox::No, QMessageBox::No); | 1884 | QMessageBox::Yes | QMessageBox::No, QMessageBox::No); |
| 1865 | status_message = tr("Shared Font Missing"); | 1885 | status_message = tr("Shared Font Missing"); |
diff --git a/src/yuzu/main.h b/src/yuzu/main.h index 1137bbc7a..501608ddc 100644 --- a/src/yuzu/main.h +++ b/src/yuzu/main.h | |||
| @@ -130,6 +130,9 @@ private: | |||
| 130 | void ConnectWidgetEvents(); | 130 | void ConnectWidgetEvents(); |
| 131 | void ConnectMenuEvents(); | 131 | void ConnectMenuEvents(); |
| 132 | 132 | ||
| 133 | void PreventOSSleep(); | ||
| 134 | void AllowOSSleep(); | ||
| 135 | |||
| 133 | QStringList GetUnsupportedGLExtensions(); | 136 | QStringList GetUnsupportedGLExtensions(); |
| 134 | bool LoadROM(const QString& filename); | 137 | bool LoadROM(const QString& filename); |
| 135 | void BootGame(const QString& filename); | 138 | void BootGame(const QString& filename); |
diff --git a/src/yuzu/ui_settings.cpp b/src/yuzu/uisettings.cpp index 4bdc302e0..7f7d247a3 100644 --- a/src/yuzu/ui_settings.cpp +++ b/src/yuzu/uisettings.cpp | |||
| @@ -2,7 +2,7 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include "ui_settings.h" | 5 | #include "yuzu/uisettings.h" |
| 6 | 6 | ||
| 7 | namespace UISettings { | 7 | namespace UISettings { |
| 8 | 8 | ||
diff --git a/src/yuzu/ui_settings.h b/src/yuzu/uisettings.h index a62cd6911..a62cd6911 100644 --- a/src/yuzu/ui_settings.h +++ b/src/yuzu/uisettings.h | |||
diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp index 30b22341b..067d58d80 100644 --- a/src/yuzu_cmd/config.cpp +++ b/src/yuzu_cmd/config.cpp | |||
| @@ -340,7 +340,6 @@ void Config::ReadValues() { | |||
| 340 | } | 340 | } |
| 341 | 341 | ||
| 342 | // Core | 342 | // Core |
| 343 | Settings::values.cpu_jit_enabled = sdl2_config->GetBoolean("Core", "cpu_jit_enabled", true); | ||
| 344 | Settings::values.use_multi_core = sdl2_config->GetBoolean("Core", "use_multi_core", false); | 343 | Settings::values.use_multi_core = sdl2_config->GetBoolean("Core", "use_multi_core", false); |
| 345 | 344 | ||
| 346 | // Renderer | 345 | // Renderer |
diff --git a/src/yuzu_cmd/default_ini.h b/src/yuzu_cmd/default_ini.h index 4f1add434..0cfc111a6 100644 --- a/src/yuzu_cmd/default_ini.h +++ b/src/yuzu_cmd/default_ini.h | |||
| @@ -76,10 +76,6 @@ motion_device= | |||
| 76 | touch_device= | 76 | touch_device= |
| 77 | 77 | ||
| 78 | [Core] | 78 | [Core] |
| 79 | # Whether to use the Just-In-Time (JIT) compiler for CPU emulation | ||
| 80 | # 0: Interpreter (slow), 1 (default): JIT (fast) | ||
| 81 | cpu_jit_enabled = | ||
| 82 | |||
| 83 | # Whether to use multi-core for CPU emulation | 79 | # Whether to use multi-core for CPU emulation |
| 84 | # 0 (default): Disabled, 1: Enabled | 80 | # 0 (default): Disabled, 1: Enabled |
| 85 | use_multi_core= | 81 | use_multi_core= |
diff --git a/src/yuzu_tester/config.cpp b/src/yuzu_tester/config.cpp index b96b7d279..9a11dc6c3 100644 --- a/src/yuzu_tester/config.cpp +++ b/src/yuzu_tester/config.cpp | |||
| @@ -114,7 +114,6 @@ void Config::ReadValues() { | |||
| 114 | } | 114 | } |
| 115 | 115 | ||
| 116 | // Core | 116 | // Core |
| 117 | Settings::values.cpu_jit_enabled = sdl2_config->GetBoolean("Core", "cpu_jit_enabled", true); | ||
| 118 | Settings::values.use_multi_core = sdl2_config->GetBoolean("Core", "use_multi_core", false); | 117 | Settings::values.use_multi_core = sdl2_config->GetBoolean("Core", "use_multi_core", false); |
| 119 | 118 | ||
| 120 | // Renderer | 119 | // Renderer |
diff --git a/src/yuzu_tester/default_ini.h b/src/yuzu_tester/default_ini.h index 0f880d8c7..9a3e86d68 100644 --- a/src/yuzu_tester/default_ini.h +++ b/src/yuzu_tester/default_ini.h | |||
| @@ -8,10 +8,6 @@ namespace DefaultINI { | |||
| 8 | 8 | ||
| 9 | const char* sdl2_config_file = R"( | 9 | const char* sdl2_config_file = R"( |
| 10 | [Core] | 10 | [Core] |
| 11 | # Whether to use the Just-In-Time (JIT) compiler for CPU emulation | ||
| 12 | # 0: Interpreter (slow), 1 (default): JIT (fast) | ||
| 13 | cpu_jit_enabled = | ||
| 14 | |||
| 15 | # Whether to use multi-core for CPU emulation | 11 | # Whether to use multi-core for CPU emulation |
| 16 | # 0 (default): Disabled, 1: Enabled | 12 | # 0 (default): Disabled, 1: Enabled |
| 17 | use_multi_core= | 13 | use_multi_core= |
diff --git a/src/yuzu_tester/yuzu.cpp b/src/yuzu_tester/yuzu.cpp index b589c3de3..0ee97aa54 100644 --- a/src/yuzu_tester/yuzu.cpp +++ b/src/yuzu_tester/yuzu.cpp | |||
| @@ -92,7 +92,6 @@ int main(int argc, char** argv) { | |||
| 92 | 92 | ||
| 93 | int option_index = 0; | 93 | int option_index = 0; |
| 94 | 94 | ||
| 95 | char* endarg; | ||
| 96 | #ifdef _WIN32 | 95 | #ifdef _WIN32 |
| 97 | int argc_w; | 96 | int argc_w; |
| 98 | auto argv_w = CommandLineToArgvW(GetCommandLineW(), &argc_w); | 97 | auto argv_w = CommandLineToArgvW(GetCommandLineW(), &argc_w); |
| @@ -226,7 +225,7 @@ int main(int argc, char** argv) { | |||
| 226 | 225 | ||
| 227 | switch (load_result) { | 226 | switch (load_result) { |
| 228 | case Core::System::ResultStatus::ErrorGetLoader: | 227 | case Core::System::ResultStatus::ErrorGetLoader: |
| 229 | LOG_CRITICAL(Frontend, "Failed to obtain loader for %s!", filepath.c_str()); | 228 | LOG_CRITICAL(Frontend, "Failed to obtain loader for {}!", filepath); |
| 230 | return -1; | 229 | return -1; |
| 231 | case Core::System::ResultStatus::ErrorLoader: | 230 | case Core::System::ResultStatus::ErrorLoader: |
| 232 | LOG_CRITICAL(Frontend, "Failed to load ROM!"); | 231 | LOG_CRITICAL(Frontend, "Failed to load ROM!"); |