diff options
25 files changed, 779 insertions, 349 deletions
diff --git a/.appveyor/UtilityFunctions.ps1 b/.appveyor/UtilityFunctions.ps1 deleted file mode 100644 index fd7476314..000000000 --- a/.appveyor/UtilityFunctions.ps1 +++ /dev/null | |||
| @@ -1,39 +0,0 @@ | |||
| 1 | # Set-up Visual Studio Command Prompt environment for PowerShell | ||
| 2 | pushd "C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\Common7\Tools\" | ||
| 3 | cmd /c "VsDevCmd.bat -arch=x64 & set" | foreach { | ||
| 4 | if ($_ -match "=") { | ||
| 5 | $v = $_.split("="); Set-Item -Force -Path "ENV:\$($v[0])" -Value "$($v[1])" | ||
| 6 | } | ||
| 7 | } | ||
| 8 | popd | ||
| 9 | |||
| 10 | function Which ($search_path, $name) { | ||
| 11 | ($search_path).Split(";") | Get-ChildItem -Filter $name | Select -First 1 -Exp FullName | ||
| 12 | } | ||
| 13 | |||
| 14 | function GetDeps ($search_path, $binary) { | ||
| 15 | ((dumpbin /dependents $binary).Where({ $_ -match "dependencies:"}, "SkipUntil") | Select-String "[^ ]*\.dll").Matches | foreach { | ||
| 16 | Which $search_path $_.Value | ||
| 17 | } | ||
| 18 | } | ||
| 19 | |||
| 20 | function RecursivelyGetDeps ($search_path, $binary) { | ||
| 21 | $final_deps = @() | ||
| 22 | $deps_to_process = GetDeps $search_path $binary | ||
| 23 | while ($deps_to_process.Count -gt 0) { | ||
| 24 | $current, $deps_to_process = $deps_to_process | ||
| 25 | if ($final_deps -contains $current) { continue } | ||
| 26 | |||
| 27 | # Is this a system dll file? | ||
| 28 | # We use the same algorithm that cmake uses to determine this. | ||
| 29 | if ($current -match "$([regex]::Escape($env:SystemRoot))\\sys") { continue } | ||
| 30 | if ($current -match "$([regex]::Escape($env:WinDir))\\sys") { continue } | ||
| 31 | if ($current -match "\\msvc[^\\]+dll") { continue } | ||
| 32 | if ($current -match "\\api-ms-win-[^\\]+dll") { continue } | ||
| 33 | |||
| 34 | $final_deps += $current | ||
| 35 | $new_deps = GetDeps $search_path $current | ||
| 36 | $deps_to_process += ($new_deps | ?{-not ($final_deps -contains $_)}) | ||
| 37 | } | ||
| 38 | return $final_deps | ||
| 39 | } | ||
diff --git a/appveyor.yml b/appveyor.yml deleted file mode 100644 index cef19c259..000000000 --- a/appveyor.yml +++ /dev/null | |||
| @@ -1,178 +0,0 @@ | |||
| 1 | # shallow clone | ||
| 2 | clone_depth: 10 | ||
| 3 | |||
| 4 | cache: | ||
| 5 | - C:\ProgramData\chocolatey\bin -> appveyor.yml | ||
| 6 | - C:\ProgramData\chocolatey\lib -> appveyor.yml | ||
| 7 | |||
| 8 | os: Visual Studio 2017 | ||
| 9 | |||
| 10 | environment: | ||
| 11 | # Tell msys2 to add mingw64 to the path | ||
| 12 | MSYSTEM: MINGW64 | ||
| 13 | # Tell msys2 to inherit the current directory when starting the shell | ||
| 14 | CHERE_INVOKING: 1 | ||
| 15 | matrix: | ||
| 16 | - BUILD_TYPE: msvc | ||
| 17 | - BUILD_TYPE: mingw | ||
| 18 | |||
| 19 | platform: | ||
| 20 | - x64 | ||
| 21 | |||
| 22 | configuration: | ||
| 23 | - Release | ||
| 24 | |||
| 25 | install: | ||
| 26 | - git submodule update --init --recursive | ||
| 27 | - ps: | | ||
| 28 | if ($env:BUILD_TYPE -eq 'mingw') { | ||
| 29 | $dependencies = "mingw64/mingw-w64-x86_64-cmake", | ||
| 30 | "mingw64/mingw-w64-x86_64-qt5", | ||
| 31 | "mingw64/mingw-w64-x86_64-SDL2" | ||
| 32 | # redirect err to null to prevent warnings from becoming errors | ||
| 33 | # workaround to prevent pacman from failing due to cyclical dependencies | ||
| 34 | C:\msys64\usr\bin\bash -lc "pacman --noconfirm -S mingw64/mingw-w64-x86_64-freetype mingw64/mingw-w64-x86_64-fontconfig" 2> $null | ||
| 35 | C:\msys64\usr\bin\bash -lc "pacman --noconfirm -S $dependencies" 2> $null | ||
| 36 | } | ||
| 37 | |||
| 38 | before_build: | ||
| 39 | - mkdir %BUILD_TYPE%_build | ||
| 40 | - cd %BUILD_TYPE%_build | ||
| 41 | - ps: | | ||
| 42 | $COMPAT = if ($env:ENABLE_COMPATIBILITY_REPORTING -eq $null) {0} else {$env:ENABLE_COMPATIBILITY_REPORTING} | ||
| 43 | if ($env:BUILD_TYPE -eq 'msvc') { | ||
| 44 | # redirect stderr and change the exit code to prevent powershell from cancelling the build if cmake prints a warning | ||
| 45 | cmd /C 'cmake -G "Visual Studio 15 2017 Win64" -DYUZU_USE_BUNDLED_QT=1 -DYUZU_USE_BUNDLED_SDL2=1 -DYUZU_USE_BUNDLED_UNICORN=1 -DYUZU_USE_QT_WEB_ENGINE=ON -DENABLE_COMPATIBILITY_LIST_DOWNLOAD=ON -DYUZU_ENABLE_COMPATIBILITY_REPORTING=${COMPAT} -DUSE_DISCORD_PRESENCE=ON .. 2>&1 && exit 0' | ||
| 46 | } else { | ||
| 47 | C:\msys64\usr\bin\bash.exe -lc "cmake -G 'MSYS Makefiles' -DYUZU_BUILD_UNICORN=1 -DCMAKE_BUILD_TYPE=Release -DENABLE_COMPATIBILITY_LIST_DOWNLOAD=ON -DYUZU_ENABLE_COMPATIBILITY_REPORTING=${COMPAT} -DUSE_DISCORD_PRESENCE=ON .. 2>&1" | ||
| 48 | } | ||
| 49 | - cd .. | ||
| 50 | |||
| 51 | build_script: | ||
| 52 | - ps: | | ||
| 53 | if ($env:BUILD_TYPE -eq 'msvc') { | ||
| 54 | # https://www.appveyor.com/docs/build-phase | ||
| 55 | msbuild msvc_build/yuzu.sln /maxcpucount /logger:"C:\Program Files\AppVeyor\BuildAgent\Appveyor.MSBuildLogger.dll" | ||
| 56 | } else { | ||
| 57 | C:\msys64\usr\bin\bash.exe -lc 'mingw32-make -C mingw_build/ 2>&1' | ||
| 58 | } | ||
| 59 | |||
| 60 | after_build: | ||
| 61 | - ps: | | ||
| 62 | $GITDATE = $(git show -s --date=short --format='%ad') -replace "-","" | ||
| 63 | $GITREV = $(git show -s --format='%h') | ||
| 64 | |||
| 65 | # Find out which kind of release we are producing by tag name | ||
| 66 | if ($env:APPVEYOR_REPO_TAG_NAME) { | ||
| 67 | $RELEASE_DIST, $RELEASE_VERSION = $env:APPVEYOR_REPO_TAG_NAME.split('-') | ||
| 68 | } else { | ||
| 69 | # There is no repo tag - make assumptions | ||
| 70 | $RELEASE_DIST = "head" | ||
| 71 | } | ||
| 72 | |||
| 73 | if ($env:BUILD_TYPE -eq 'msvc') { | ||
| 74 | # Where are these spaces coming from? Regardless, let's remove them | ||
| 75 | $MSVC_BUILD_ZIP = "yuzu-windows-msvc-$GITDATE-$GITREV.zip" -replace " ", "" | ||
| 76 | $MSVC_BUILD_PDB = "yuzu-windows-msvc-$GITDATE-$GITREV-debugsymbols.zip" -replace " ", "" | ||
| 77 | $MSVC_SEVENZIP = "yuzu-windows-msvc-$GITDATE-$GITREV.7z" -replace " ", "" | ||
| 78 | |||
| 79 | # set the build names as env vars so the artifacts can upload them | ||
| 80 | $env:BUILD_ZIP = $MSVC_BUILD_ZIP | ||
| 81 | $env:BUILD_SYMBOLS = $MSVC_BUILD_PDB | ||
| 82 | $env:BUILD_UPDATE = $MSVC_SEVENZIP | ||
| 83 | |||
| 84 | $BUILD_DIR = ".\msvc_build\bin\Release" | ||
| 85 | |||
| 86 | # Make a debug symbol upload | ||
| 87 | mkdir pdb | ||
| 88 | Get-ChildItem "$BUILD_DIR\" -Recurse -Filter "*.pdb" | Copy-Item -destination .\pdb | ||
| 89 | 7z a -tzip $MSVC_BUILD_PDB .\pdb\*.pdb | ||
| 90 | rm "$BUILD_DIR\*.pdb" | ||
| 91 | |||
| 92 | mkdir $RELEASE_DIST | ||
| 93 | # get rid of extra exes by copying everything over, then deleting all the exes, then copying just the exes we want | ||
| 94 | Copy-Item "$BUILD_DIR\*" -Destination $RELEASE_DIST -Recurse | ||
| 95 | rm "$RELEASE_DIST\*.exe" | ||
| 96 | Get-ChildItem "$BUILD_DIR" -Recurse -Filter "yuzu*.exe" | Copy-Item -destination $RELEASE_DIST | ||
| 97 | Get-ChildItem "$BUILD_DIR" -Recurse -Filter "QtWebEngineProcess*.exe" | Copy-Item -destination $RELEASE_DIST | ||
| 98 | Copy-Item .\license.txt -Destination $RELEASE_DIST | ||
| 99 | Copy-Item .\README.md -Destination $RELEASE_DIST | ||
| 100 | 7z a -tzip $MSVC_BUILD_ZIP $RELEASE_DIST\* | ||
| 101 | 7z a $MSVC_SEVENZIP $RELEASE_DIST | ||
| 102 | } else { | ||
| 103 | $MINGW_BUILD_ZIP = "yuzu-windows-mingw-$GITDATE-$GITREV.zip" -replace " ", "" | ||
| 104 | $MINGW_SEVENZIP = "yuzu-windows-mingw-$GITDATE-$GITREV.7z" -replace " ", "" | ||
| 105 | # not going to bother adding separate debug symbols for mingw, so just upload a README for it | ||
| 106 | # if someone wants to add them, change mingw to compile with -g and use objdump and strip to separate the symbols from the binary | ||
| 107 | $MINGW_NO_DEBUG_SYMBOLS = "README_No_Debug_Symbols.txt" | ||
| 108 | Set-Content -Path $MINGW_NO_DEBUG_SYMBOLS -Value "This is a workaround for Appveyor since msvc has debug symbols but mingw doesnt" -Force | ||
| 109 | |||
| 110 | # store the build information in env vars so we can use them as artifacts | ||
| 111 | $env:BUILD_ZIP = $MINGW_BUILD_ZIP | ||
| 112 | $env:BUILD_SYMBOLS = $MINGW_NO_DEBUG_SYMBOLS | ||
| 113 | $env:BUILD_UPDATE = $MINGW_SEVENZIP | ||
| 114 | |||
| 115 | $CMAKE_SOURCE_DIR = "$env:APPVEYOR_BUILD_FOLDER" | ||
| 116 | $CMAKE_BINARY_DIR = "$CMAKE_SOURCE_DIR/mingw_build/bin" | ||
| 117 | $RELEASE_DIST = $RELEASE_DIST + "-mingw" | ||
| 118 | |||
| 119 | mkdir $RELEASE_DIST | ||
| 120 | mkdir $RELEASE_DIST/platforms | ||
| 121 | mkdir $RELEASE_DIST/styles | ||
| 122 | mkdir $RELEASE_DIST/imageformats | ||
| 123 | |||
| 124 | # copy the compiled binaries and other release files to the release folder | ||
| 125 | Get-ChildItem "$CMAKE_BINARY_DIR" -Filter "yuzu*.exe" | Copy-Item -destination $RELEASE_DIST | ||
| 126 | Copy-Item -path "$CMAKE_SOURCE_DIR/license.txt" -destination $RELEASE_DIST | ||
| 127 | Copy-Item -path "$CMAKE_SOURCE_DIR/README.md" -destination $RELEASE_DIST | ||
| 128 | |||
| 129 | # copy the qt windows plugin dll to platforms | ||
| 130 | Copy-Item -path "C:/msys64/mingw64/share/qt5/plugins/platforms/qwindows.dll" -force -destination "$RELEASE_DIST/platforms" | ||
| 131 | |||
| 132 | # copy the qt windows vista style dll to platforms | ||
| 133 | Copy-Item -path "C:/msys64/mingw64/share/qt5/plugins/styles/qwindowsvistastyle.dll" -force -destination "$RELEASE_DIST/styles" | ||
| 134 | |||
| 135 | # copy the qt jpeg imageformat dll to platforms | ||
| 136 | Copy-Item -path "C:/msys64/mingw64/share/qt5/plugins/imageformats/qjpeg.dll" -force -destination "$RELEASE_DIST/imageformats" | ||
| 137 | |||
| 138 | # copy all the dll dependencies to the release folder | ||
| 139 | . "./.appveyor/UtilityFunctions.ps1" | ||
| 140 | $DLLSearchPath = "C:\msys64\mingw64\bin;$env:PATH" | ||
| 141 | $MingwDLLs = RecursivelyGetDeps $DLLSearchPath "$RELEASE_DIST\yuzu.exe" | ||
| 142 | $MingwDLLs += RecursivelyGetDeps $DLLSearchPath "$RELEASE_DIST\yuzu_cmd.exe" | ||
| 143 | $MingwDLLs += RecursivelyGetDeps $DLLSearchPath "$RELEASE_DIST\imageformats\qjpeg.dll" | ||
| 144 | Write-Host "Detected the following dependencies:" | ||
| 145 | Write-Host $MingwDLLs | ||
| 146 | foreach ($file in $MingwDLLs) { | ||
| 147 | Copy-Item -path "$file" -force -destination "$RELEASE_DIST" | ||
| 148 | } | ||
| 149 | |||
| 150 | 7z a -tzip $MINGW_BUILD_ZIP $RELEASE_DIST\* | ||
| 151 | 7z a $MINGW_SEVENZIP $RELEASE_DIST | ||
| 152 | } | ||
| 153 | |||
| 154 | test_script: | ||
| 155 | - cd %BUILD_TYPE%_build | ||
| 156 | - ps: | | ||
| 157 | if ($env:BUILD_TYPE -eq 'msvc') { | ||
| 158 | ctest -VV -C Release | ||
| 159 | } else { | ||
| 160 | C:\msys64\usr\bin\bash.exe -lc "ctest -VV -C Release" | ||
| 161 | } | ||
| 162 | - cd .. | ||
| 163 | |||
| 164 | artifacts: | ||
| 165 | - path: $(BUILD_ZIP) | ||
| 166 | name: build | ||
| 167 | type: zip | ||
| 168 | |||
| 169 | deploy: | ||
| 170 | provider: GitHub | ||
| 171 | release: $(appveyor_repo_tag_name) | ||
| 172 | auth_token: | ||
| 173 | secure: QqePPnXbkzmXct5c8hZ2X5AbsthbI6cS1Sr+VBzcD8oUOIjfWJJKXVAQGUbQAbb0 | ||
| 174 | artifact: update,build | ||
| 175 | draft: false | ||
| 176 | prerelease: false | ||
| 177 | on: | ||
| 178 | appveyor_repo_tag: true | ||
diff --git a/externals/sirit b/externals/sirit | |||
| Subproject 12f40a80324d7c154f19f25c448a5ce27d38cd1 | Subproject 9f4d057aa28c4e9509bdc767afb27b4aee303b7 | ||
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index d6a2cc8b8..dfb12cd2d 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h | |||
| @@ -1973,7 +1973,7 @@ private: | |||
| 1973 | INST("1101-01---------", Id::TLDS, Type::Texture, "TLDS"), | 1973 | INST("1101-01---------", Id::TLDS, Type::Texture, "TLDS"), |
| 1974 | INST("110010----111---", Id::TLD4, Type::Texture, "TLD4"), | 1974 | INST("110010----111---", Id::TLD4, Type::Texture, "TLD4"), |
| 1975 | INST("1101111011111---", Id::TLD4_B, Type::Texture, "TLD4_B"), | 1975 | INST("1101111011111---", Id::TLD4_B, Type::Texture, "TLD4_B"), |
| 1976 | INST("11011111--00----", Id::TLD4S, Type::Texture, "TLD4S"), | 1976 | INST("11011111-0------", Id::TLD4S, Type::Texture, "TLD4S"), |
| 1977 | INST("110111110110----", Id::TMML_B, Type::Texture, "TMML_B"), | 1977 | INST("110111110110----", Id::TMML_B, Type::Texture, "TMML_B"), |
| 1978 | INST("1101111101011---", Id::TMML, Type::Texture, "TMML"), | 1978 | INST("1101111101011---", Id::TMML, Type::Texture, "TMML"), |
| 1979 | INST("11011110011110--", Id::TXD_B, Type::Texture, "TXD_B"), | 1979 | INST("11011110011110--", Id::TXD_B, Type::Texture, "TXD_B"), |
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 270a9dc2b..de742d11c 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp | |||
| @@ -112,25 +112,25 @@ constexpr GLenum GetGLShaderType(ShaderType shader_type) { | |||
| 112 | } | 112 | } |
| 113 | 113 | ||
| 114 | /// Describes primitive behavior on geometry shaders | 114 | /// Describes primitive behavior on geometry shaders |
| 115 | constexpr std::tuple<const char*, const char*, u32> GetPrimitiveDescription(GLenum primitive_mode) { | 115 | constexpr std::pair<const char*, u32> GetPrimitiveDescription(GLenum primitive_mode) { |
| 116 | switch (primitive_mode) { | 116 | switch (primitive_mode) { |
| 117 | case GL_POINTS: | 117 | case GL_POINTS: |
| 118 | return {"points", "Points", 1}; | 118 | return {"points", 1}; |
| 119 | case GL_LINES: | 119 | case GL_LINES: |
| 120 | case GL_LINE_STRIP: | 120 | case GL_LINE_STRIP: |
| 121 | return {"lines", "Lines", 2}; | 121 | return {"lines", 2}; |
| 122 | case GL_LINES_ADJACENCY: | 122 | case GL_LINES_ADJACENCY: |
| 123 | case GL_LINE_STRIP_ADJACENCY: | 123 | case GL_LINE_STRIP_ADJACENCY: |
| 124 | return {"lines_adjacency", "LinesAdj", 4}; | 124 | return {"lines_adjacency", 4}; |
| 125 | case GL_TRIANGLES: | 125 | case GL_TRIANGLES: |
| 126 | case GL_TRIANGLE_STRIP: | 126 | case GL_TRIANGLE_STRIP: |
| 127 | case GL_TRIANGLE_FAN: | 127 | case GL_TRIANGLE_FAN: |
| 128 | return {"triangles", "Triangles", 3}; | 128 | return {"triangles", 3}; |
| 129 | case GL_TRIANGLES_ADJACENCY: | 129 | case GL_TRIANGLES_ADJACENCY: |
| 130 | case GL_TRIANGLE_STRIP_ADJACENCY: | 130 | case GL_TRIANGLE_STRIP_ADJACENCY: |
| 131 | return {"triangles_adjacency", "TrianglesAdj", 6}; | 131 | return {"triangles_adjacency", 6}; |
| 132 | default: | 132 | default: |
| 133 | return {"points", "Invalid", 1}; | 133 | return {"points", 1}; |
| 134 | } | 134 | } |
| 135 | } | 135 | } |
| 136 | 136 | ||
| @@ -264,30 +264,25 @@ CachedProgram BuildShader(const Device& device, u64 unique_identifier, ShaderTyp | |||
| 264 | "#extension GL_NV_shader_thread_group : require\n" | 264 | "#extension GL_NV_shader_thread_group : require\n" |
| 265 | "#extension GL_NV_shader_thread_shuffle : require\n"; | 265 | "#extension GL_NV_shader_thread_shuffle : require\n"; |
| 266 | } | 266 | } |
| 267 | source += '\n'; | ||
| 268 | 267 | ||
| 269 | if (shader_type == ShaderType::Geometry) { | 268 | if (shader_type == ShaderType::Geometry) { |
| 270 | const auto [glsl_topology, debug_name, max_vertices] = | 269 | const auto [glsl_topology, max_vertices] = GetPrimitiveDescription(variant.primitive_mode); |
| 271 | GetPrimitiveDescription(variant.primitive_mode); | ||
| 272 | |||
| 273 | source += fmt::format("layout ({}) in;\n\n", glsl_topology); | ||
| 274 | source += fmt::format("#define MAX_VERTEX_INPUT {}\n", max_vertices); | 270 | source += fmt::format("#define MAX_VERTEX_INPUT {}\n", max_vertices); |
| 271 | source += fmt::format("layout ({}) in;\n", glsl_topology); | ||
| 275 | } | 272 | } |
| 276 | if (shader_type == ShaderType::Compute) { | 273 | if (shader_type == ShaderType::Compute) { |
| 274 | if (variant.local_memory_size > 0) { | ||
| 275 | source += fmt::format("#define LOCAL_MEMORY_SIZE {}\n", | ||
| 276 | Common::AlignUp(variant.local_memory_size, 4) / 4); | ||
| 277 | } | ||
| 277 | source += | 278 | source += |
| 278 | fmt::format("layout (local_size_x = {}, local_size_y = {}, local_size_z = {}) in;\n", | 279 | fmt::format("layout (local_size_x = {}, local_size_y = {}, local_size_z = {}) in;\n", |
| 279 | variant.block_x, variant.block_y, variant.block_z); | 280 | variant.block_x, variant.block_y, variant.block_z); |
| 280 | 281 | ||
| 281 | if (variant.shared_memory_size > 0) { | 282 | if (variant.shared_memory_size > 0) { |
| 282 | // TODO(Rodrigo): We should divide by four here, but having a larger shared memory pool | 283 | // shared_memory_size is described in number of words |
| 283 | // avoids out of bound stores. Find out why shared memory size is being invalid. | ||
| 284 | source += fmt::format("shared uint smem[{}];\n", variant.shared_memory_size); | 284 | source += fmt::format("shared uint smem[{}];\n", variant.shared_memory_size); |
| 285 | } | 285 | } |
| 286 | |||
| 287 | if (variant.local_memory_size > 0) { | ||
| 288 | source += fmt::format("#define LOCAL_MEMORY_SIZE {}\n", | ||
| 289 | Common::AlignUp(variant.local_memory_size, 4) / 4); | ||
| 290 | } | ||
| 291 | } | 286 | } |
| 292 | 287 | ||
| 293 | source += '\n'; | 288 | source += '\n'; |
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index d1ae4be6d..0389c2143 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp | |||
| @@ -399,6 +399,7 @@ public: | |||
| 399 | DeclareConstantBuffers(); | 399 | DeclareConstantBuffers(); |
| 400 | DeclareGlobalMemory(); | 400 | DeclareGlobalMemory(); |
| 401 | DeclareSamplers(); | 401 | DeclareSamplers(); |
| 402 | DeclareImages(); | ||
| 402 | DeclarePhysicalAttributeReader(); | 403 | DeclarePhysicalAttributeReader(); |
| 403 | 404 | ||
| 404 | code.AddLine("void execute_{}() {{", suffix); | 405 | code.AddLine("void execute_{}() {{", suffix); |
diff --git a/src/video_core/renderer_vulkan/shaders/blit.frag b/src/video_core/renderer_vulkan/shaders/blit.frag new file mode 100644 index 000000000..a06ecd24a --- /dev/null +++ b/src/video_core/renderer_vulkan/shaders/blit.frag | |||
| @@ -0,0 +1,24 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | /* | ||
| 6 | * Build instructions: | ||
| 7 | * $ glslangValidator -V $THIS_FILE -o output.spv | ||
| 8 | * $ spirv-opt -O --strip-debug output.spv -o optimized.spv | ||
| 9 | * $ xxd -i optimized.spv | ||
| 10 | * | ||
| 11 | * Then copy that bytecode to the C++ file | ||
| 12 | */ | ||
| 13 | |||
| 14 | #version 460 core | ||
| 15 | |||
| 16 | layout (location = 0) in vec2 frag_tex_coord; | ||
| 17 | |||
| 18 | layout (location = 0) out vec4 color; | ||
| 19 | |||
| 20 | layout (binding = 1) uniform sampler2D color_texture; | ||
| 21 | |||
| 22 | void main() { | ||
| 23 | color = texture(color_texture, frag_tex_coord); | ||
| 24 | } | ||
diff --git a/src/video_core/renderer_vulkan/shaders/blit.vert b/src/video_core/renderer_vulkan/shaders/blit.vert new file mode 100644 index 000000000..c64d9235a --- /dev/null +++ b/src/video_core/renderer_vulkan/shaders/blit.vert | |||
| @@ -0,0 +1,28 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | /* | ||
| 6 | * Build instructions: | ||
| 7 | * $ glslangValidator -V $THIS_FILE -o output.spv | ||
| 8 | * $ spirv-opt -O --strip-debug output.spv -o optimized.spv | ||
| 9 | * $ xxd -i optimized.spv | ||
| 10 | * | ||
| 11 | * Then copy that bytecode to the C++ file | ||
| 12 | */ | ||
| 13 | |||
| 14 | #version 460 core | ||
| 15 | |||
| 16 | layout (location = 0) in vec2 vert_position; | ||
| 17 | layout (location = 1) in vec2 vert_tex_coord; | ||
| 18 | |||
| 19 | layout (location = 0) out vec2 frag_tex_coord; | ||
| 20 | |||
| 21 | layout (set = 0, binding = 0) uniform MatrixBlock { | ||
| 22 | mat4 modelview_matrix; | ||
| 23 | }; | ||
| 24 | |||
| 25 | void main() { | ||
| 26 | gl_Position = modelview_matrix * vec4(vert_position, 0.0, 1.0); | ||
| 27 | frag_tex_coord = vert_tex_coord; | ||
| 28 | } | ||
diff --git a/src/video_core/renderer_vulkan/shaders/quad_array.comp b/src/video_core/renderer_vulkan/shaders/quad_array.comp new file mode 100644 index 000000000..5a5703308 --- /dev/null +++ b/src/video_core/renderer_vulkan/shaders/quad_array.comp | |||
| @@ -0,0 +1,37 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | /* | ||
| 6 | * Build instructions: | ||
| 7 | * $ glslangValidator -V $THIS_FILE -o output.spv | ||
| 8 | * $ spirv-opt -O --strip-debug output.spv -o optimized.spv | ||
| 9 | * $ xxd -i optimized.spv | ||
| 10 | * | ||
| 11 | * Then copy that bytecode to the C++ file | ||
| 12 | */ | ||
| 13 | |||
| 14 | #version 460 core | ||
| 15 | |||
| 16 | layout (local_size_x = 1024) in; | ||
| 17 | |||
| 18 | layout (std430, set = 0, binding = 0) buffer OutputBuffer { | ||
| 19 | uint output_indexes[]; | ||
| 20 | }; | ||
| 21 | |||
| 22 | layout (push_constant) uniform PushConstants { | ||
| 23 | uint first; | ||
| 24 | }; | ||
| 25 | |||
| 26 | void main() { | ||
| 27 | uint primitive = gl_GlobalInvocationID.x; | ||
| 28 | if (primitive * 6 >= output_indexes.length()) { | ||
| 29 | return; | ||
| 30 | } | ||
| 31 | |||
| 32 | const uint quad_map[6] = uint[](0, 1, 2, 0, 2, 3); | ||
| 33 | for (uint vertex = 0; vertex < 6; ++vertex) { | ||
| 34 | uint index = first + primitive * 4 + quad_map[vertex]; | ||
| 35 | output_indexes[primitive * 6 + vertex] = index; | ||
| 36 | } | ||
| 37 | } | ||
diff --git a/src/video_core/renderer_vulkan/shaders/uint8.comp b/src/video_core/renderer_vulkan/shaders/uint8.comp new file mode 100644 index 000000000..a320f3ae0 --- /dev/null +++ b/src/video_core/renderer_vulkan/shaders/uint8.comp | |||
| @@ -0,0 +1,33 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | /* | ||
| 6 | * Build instructions: | ||
| 7 | * $ glslangValidator -V $THIS_FILE -o output.spv | ||
| 8 | * $ spirv-opt -O --strip-debug output.spv -o optimized.spv | ||
| 9 | * $ xxd -i optimized.spv | ||
| 10 | * | ||
| 11 | * Then copy that bytecode to the C++ file | ||
| 12 | */ | ||
| 13 | |||
| 14 | #version 460 core | ||
| 15 | #extension GL_EXT_shader_16bit_storage : require | ||
| 16 | #extension GL_EXT_shader_8bit_storage : require | ||
| 17 | |||
| 18 | layout (local_size_x = 1024) in; | ||
| 19 | |||
| 20 | layout (std430, set = 0, binding = 0) readonly buffer InputBuffer { | ||
| 21 | uint8_t input_indexes[]; | ||
| 22 | }; | ||
| 23 | |||
| 24 | layout (std430, set = 0, binding = 1) writeonly buffer OutputBuffer { | ||
| 25 | uint16_t output_indexes[]; | ||
| 26 | }; | ||
| 27 | |||
| 28 | void main() { | ||
| 29 | uint id = gl_GlobalInvocationID.x; | ||
| 30 | if (id < input_indexes.length()) { | ||
| 31 | output_indexes[id] = uint16_t(input_indexes[id]); | ||
| 32 | } | ||
| 33 | } | ||
diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp index 92854a4b3..939eebe83 100644 --- a/src/video_core/renderer_vulkan/vk_device.cpp +++ b/src/video_core/renderer_vulkan/vk_device.cpp | |||
| @@ -3,12 +3,15 @@ | |||
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <bitset> | 5 | #include <bitset> |
| 6 | #include <chrono> | ||
| 6 | #include <cstdlib> | 7 | #include <cstdlib> |
| 7 | #include <optional> | 8 | #include <optional> |
| 8 | #include <set> | 9 | #include <set> |
| 9 | #include <string_view> | 10 | #include <string_view> |
| 11 | #include <thread> | ||
| 10 | #include <vector> | 12 | #include <vector> |
| 11 | #include "common/assert.h" | 13 | #include "common/assert.h" |
| 14 | #include "core/settings.h" | ||
| 12 | #include "video_core/renderer_vulkan/declarations.h" | 15 | #include "video_core/renderer_vulkan/declarations.h" |
| 13 | #include "video_core/renderer_vulkan/vk_device.h" | 16 | #include "video_core/renderer_vulkan/vk_device.h" |
| 14 | 17 | ||
| @@ -201,6 +204,22 @@ vk::Format VKDevice::GetSupportedFormat(vk::Format wanted_format, | |||
| 201 | return wanted_format; | 204 | return wanted_format; |
| 202 | } | 205 | } |
| 203 | 206 | ||
| 207 | void VKDevice::ReportLoss() const { | ||
| 208 | LOG_CRITICAL(Render_Vulkan, "Device loss occured!"); | ||
| 209 | |||
| 210 | // Wait some time to let the log flush | ||
| 211 | std::this_thread::sleep_for(std::chrono::seconds{1}); | ||
| 212 | |||
| 213 | if (!nv_device_diagnostic_checkpoints) { | ||
| 214 | return; | ||
| 215 | } | ||
| 216 | |||
| 217 | [[maybe_unused]] const std::vector data = graphics_queue.getCheckpointDataNV(dld); | ||
| 218 | // Catch here in debug builds (or with optimizations disabled) the last graphics pipeline to be | ||
| 219 | // executed. It can be done on a debugger by evaluating the expression: | ||
| 220 | // *(VKGraphicsPipeline*)data[0] | ||
| 221 | } | ||
| 222 | |||
| 204 | bool VKDevice::IsOptimalAstcSupported(const vk::PhysicalDeviceFeatures& features, | 223 | bool VKDevice::IsOptimalAstcSupported(const vk::PhysicalDeviceFeatures& features, |
| 205 | const vk::DispatchLoaderDynamic& dldi) const { | 224 | const vk::DispatchLoaderDynamic& dldi) const { |
| 206 | // Disable for now to avoid converting ASTC twice. | 225 | // Disable for now to avoid converting ASTC twice. |
| @@ -381,6 +400,8 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami | |||
| 381 | VK_EXT_SHADER_VIEWPORT_INDEX_LAYER_EXTENSION_NAME, true); | 400 | VK_EXT_SHADER_VIEWPORT_INDEX_LAYER_EXTENSION_NAME, true); |
| 382 | Test(extension, ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME, | 401 | Test(extension, ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME, |
| 383 | false); | 402 | false); |
| 403 | Test(extension, nv_device_diagnostic_checkpoints, | ||
| 404 | VK_NV_DEVICE_DIAGNOSTIC_CHECKPOINTS_EXTENSION_NAME, true); | ||
| 384 | } | 405 | } |
| 385 | 406 | ||
| 386 | if (khr_shader_float16_int8) { | 407 | if (khr_shader_float16_int8) { |
| @@ -464,6 +485,7 @@ std::vector<vk::DeviceQueueCreateInfo> VKDevice::GetDeviceQueueCreateInfos() con | |||
| 464 | std::unordered_map<vk::Format, vk::FormatProperties> VKDevice::GetFormatProperties( | 485 | std::unordered_map<vk::Format, vk::FormatProperties> VKDevice::GetFormatProperties( |
| 465 | const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical) { | 486 | const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical) { |
| 466 | static constexpr std::array formats{vk::Format::eA8B8G8R8UnormPack32, | 487 | static constexpr std::array formats{vk::Format::eA8B8G8R8UnormPack32, |
| 488 | vk::Format::eA8B8G8R8UintPack32, | ||
| 467 | vk::Format::eA8B8G8R8SnormPack32, | 489 | vk::Format::eA8B8G8R8SnormPack32, |
| 468 | vk::Format::eA8B8G8R8SrgbPack32, | 490 | vk::Format::eA8B8G8R8SrgbPack32, |
| 469 | vk::Format::eB5G6R5UnormPack16, | 491 | vk::Format::eB5G6R5UnormPack16, |
diff --git a/src/video_core/renderer_vulkan/vk_device.h b/src/video_core/renderer_vulkan/vk_device.h index a844c52df..72603f9f6 100644 --- a/src/video_core/renderer_vulkan/vk_device.h +++ b/src/video_core/renderer_vulkan/vk_device.h | |||
| @@ -39,6 +39,9 @@ public: | |||
| 39 | vk::Format GetSupportedFormat(vk::Format wanted_format, vk::FormatFeatureFlags wanted_usage, | 39 | vk::Format GetSupportedFormat(vk::Format wanted_format, vk::FormatFeatureFlags wanted_usage, |
| 40 | FormatType format_type) const; | 40 | FormatType format_type) const; |
| 41 | 41 | ||
| 42 | /// Reports a device loss. | ||
| 43 | void ReportLoss() const; | ||
| 44 | |||
| 42 | /// Returns the dispatch loader with direct function pointers of the device. | 45 | /// Returns the dispatch loader with direct function pointers of the device. |
| 43 | const vk::DispatchLoaderDynamic& GetDispatchLoader() const { | 46 | const vk::DispatchLoaderDynamic& GetDispatchLoader() const { |
| 44 | return dld; | 47 | return dld; |
| @@ -159,6 +162,11 @@ public: | |||
| 159 | return ext_shader_viewport_index_layer; | 162 | return ext_shader_viewport_index_layer; |
| 160 | } | 163 | } |
| 161 | 164 | ||
| 165 | /// Returns true if the device supports VK_NV_device_diagnostic_checkpoints. | ||
| 166 | bool IsNvDeviceDiagnosticCheckpoints() const { | ||
| 167 | return nv_device_diagnostic_checkpoints; | ||
| 168 | } | ||
| 169 | |||
| 162 | /// Returns the vendor name reported from Vulkan. | 170 | /// Returns the vendor name reported from Vulkan. |
| 163 | std::string_view GetVendorName() const { | 171 | std::string_view GetVendorName() const { |
| 164 | return vendor_name; | 172 | return vendor_name; |
| @@ -218,6 +226,7 @@ private: | |||
| 218 | bool ext_index_type_uint8{}; ///< Support for VK_EXT_index_type_uint8. | 226 | bool ext_index_type_uint8{}; ///< Support for VK_EXT_index_type_uint8. |
| 219 | bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted. | 227 | bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted. |
| 220 | bool ext_shader_viewport_index_layer{}; ///< Support for VK_EXT_shader_viewport_index_layer. | 228 | bool ext_shader_viewport_index_layer{}; ///< Support for VK_EXT_shader_viewport_index_layer. |
| 229 | bool nv_device_diagnostic_checkpoints{}; ///< Support for VK_NV_device_diagnostic_checkpoints. | ||
| 221 | 230 | ||
| 222 | // Telemetry parameters | 231 | // Telemetry parameters |
| 223 | std::string vendor_name; ///< Device's driver name. | 232 | std::string vendor_name; ///< Device's driver name. |
diff --git a/src/video_core/renderer_vulkan/vk_resource_manager.cpp b/src/video_core/renderer_vulkan/vk_resource_manager.cpp index 13c46e5b8..525b4bb46 100644 --- a/src/video_core/renderer_vulkan/vk_resource_manager.cpp +++ b/src/video_core/renderer_vulkan/vk_resource_manager.cpp | |||
| @@ -72,12 +72,22 @@ VKFence::VKFence(const VKDevice& device, UniqueFence handle) | |||
| 72 | VKFence::~VKFence() = default; | 72 | VKFence::~VKFence() = default; |
| 73 | 73 | ||
| 74 | void VKFence::Wait() { | 74 | void VKFence::Wait() { |
| 75 | static constexpr u64 timeout = std::numeric_limits<u64>::max(); | ||
| 75 | const auto dev = device.GetLogical(); | 76 | const auto dev = device.GetLogical(); |
| 76 | const auto& dld = device.GetDispatchLoader(); | 77 | const auto& dld = device.GetDispatchLoader(); |
| 77 | dev.waitForFences({*handle}, true, std::numeric_limits<u64>::max(), dld); | 78 | switch (const auto result = dev.waitForFences(1, &*handle, true, timeout, dld)) { |
| 79 | case vk::Result::eSuccess: | ||
| 80 | return; | ||
| 81 | case vk::Result::eErrorDeviceLost: | ||
| 82 | device.ReportLoss(); | ||
| 83 | [[fallthrough]]; | ||
| 84 | default: | ||
| 85 | vk::throwResultException(result, "vk::waitForFences"); | ||
| 86 | } | ||
| 78 | } | 87 | } |
| 79 | 88 | ||
| 80 | void VKFence::Release() { | 89 | void VKFence::Release() { |
| 90 | ASSERT(is_owned); | ||
| 81 | is_owned = false; | 91 | is_owned = false; |
| 82 | } | 92 | } |
| 83 | 93 | ||
| @@ -133,8 +143,32 @@ void VKFence::Unprotect(VKResource* resource) { | |||
| 133 | protected_resources.erase(it); | 143 | protected_resources.erase(it); |
| 134 | } | 144 | } |
| 135 | 145 | ||
| 146 | void VKFence::RedirectProtection(VKResource* old_resource, VKResource* new_resource) noexcept { | ||
| 147 | std::replace(std::begin(protected_resources), std::end(protected_resources), old_resource, | ||
| 148 | new_resource); | ||
| 149 | } | ||
| 150 | |||
| 136 | VKFenceWatch::VKFenceWatch() = default; | 151 | VKFenceWatch::VKFenceWatch() = default; |
| 137 | 152 | ||
| 153 | VKFenceWatch::VKFenceWatch(VKFence& initial_fence) { | ||
| 154 | Watch(initial_fence); | ||
| 155 | } | ||
| 156 | |||
| 157 | VKFenceWatch::VKFenceWatch(VKFenceWatch&& rhs) noexcept { | ||
| 158 | fence = std::exchange(rhs.fence, nullptr); | ||
| 159 | if (fence) { | ||
| 160 | fence->RedirectProtection(&rhs, this); | ||
| 161 | } | ||
| 162 | } | ||
| 163 | |||
| 164 | VKFenceWatch& VKFenceWatch::operator=(VKFenceWatch&& rhs) noexcept { | ||
| 165 | fence = std::exchange(rhs.fence, nullptr); | ||
| 166 | if (fence) { | ||
| 167 | fence->RedirectProtection(&rhs, this); | ||
| 168 | } | ||
| 169 | return *this; | ||
| 170 | } | ||
| 171 | |||
| 138 | VKFenceWatch::~VKFenceWatch() { | 172 | VKFenceWatch::~VKFenceWatch() { |
| 139 | if (fence) { | 173 | if (fence) { |
| 140 | fence->Unprotect(this); | 174 | fence->Unprotect(this); |
diff --git a/src/video_core/renderer_vulkan/vk_resource_manager.h b/src/video_core/renderer_vulkan/vk_resource_manager.h index 08ee86fa6..d4cbc95a5 100644 --- a/src/video_core/renderer_vulkan/vk_resource_manager.h +++ b/src/video_core/renderer_vulkan/vk_resource_manager.h | |||
| @@ -65,6 +65,9 @@ public: | |||
| 65 | /// Removes protection for a resource. | 65 | /// Removes protection for a resource. |
| 66 | void Unprotect(VKResource* resource); | 66 | void Unprotect(VKResource* resource); |
| 67 | 67 | ||
| 68 | /// Redirects one protected resource to a new address. | ||
| 69 | void RedirectProtection(VKResource* old_resource, VKResource* new_resource) noexcept; | ||
| 70 | |||
| 68 | /// Retreives the fence. | 71 | /// Retreives the fence. |
| 69 | operator vk::Fence() const { | 72 | operator vk::Fence() const { |
| 70 | return *handle; | 73 | return *handle; |
| @@ -97,8 +100,13 @@ private: | |||
| 97 | class VKFenceWatch final : public VKResource { | 100 | class VKFenceWatch final : public VKResource { |
| 98 | public: | 101 | public: |
| 99 | explicit VKFenceWatch(); | 102 | explicit VKFenceWatch(); |
| 103 | VKFenceWatch(VKFence& initial_fence); | ||
| 104 | VKFenceWatch(VKFenceWatch&&) noexcept; | ||
| 105 | VKFenceWatch(const VKFenceWatch&) = delete; | ||
| 100 | ~VKFenceWatch() override; | 106 | ~VKFenceWatch() override; |
| 101 | 107 | ||
| 108 | VKFenceWatch& operator=(VKFenceWatch&&) noexcept; | ||
| 109 | |||
| 102 | /// Waits for the fence to be released. | 110 | /// Waits for the fence to be released. |
| 103 | void Wait(); | 111 | void Wait(); |
| 104 | 112 | ||
| @@ -116,6 +124,14 @@ public: | |||
| 116 | 124 | ||
| 117 | void OnFenceRemoval(VKFence* signaling_fence) override; | 125 | void OnFenceRemoval(VKFence* signaling_fence) override; |
| 118 | 126 | ||
| 127 | /** | ||
| 128 | * Do not use it paired with Watch. Use TryWatch instead. | ||
| 129 | * Returns true when the watch is free. | ||
| 130 | */ | ||
| 131 | bool IsUsed() const { | ||
| 132 | return fence != nullptr; | ||
| 133 | } | ||
| 134 | |||
| 119 | private: | 135 | private: |
| 120 | VKFence* fence{}; ///< Fence watching this resource. nullptr when the watch is free. | 136 | VKFence* fence{}; ///< Fence watching this resource. nullptr when the watch is free. |
| 121 | }; | 137 | }; |
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index 0f8116458..d66133ad1 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp | |||
| @@ -3,7 +3,7 @@ | |||
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include "common/assert.h" | 5 | #include "common/assert.h" |
| 6 | #include "common/logging/log.h" | 6 | #include "common/microprofile.h" |
| 7 | #include "video_core/renderer_vulkan/declarations.h" | 7 | #include "video_core/renderer_vulkan/declarations.h" |
| 8 | #include "video_core/renderer_vulkan/vk_device.h" | 8 | #include "video_core/renderer_vulkan/vk_device.h" |
| 9 | #include "video_core/renderer_vulkan/vk_resource_manager.h" | 9 | #include "video_core/renderer_vulkan/vk_resource_manager.h" |
| @@ -11,46 +11,172 @@ | |||
| 11 | 11 | ||
| 12 | namespace Vulkan { | 12 | namespace Vulkan { |
| 13 | 13 | ||
| 14 | MICROPROFILE_DECLARE(Vulkan_WaitForWorker); | ||
| 15 | |||
| 16 | void VKScheduler::CommandChunk::ExecuteAll(vk::CommandBuffer cmdbuf, | ||
| 17 | const vk::DispatchLoaderDynamic& dld) { | ||
| 18 | auto command = first; | ||
| 19 | while (command != nullptr) { | ||
| 20 | auto next = command->GetNext(); | ||
| 21 | command->Execute(cmdbuf, dld); | ||
| 22 | command->~Command(); | ||
| 23 | command = next; | ||
| 24 | } | ||
| 25 | |||
| 26 | command_offset = 0; | ||
| 27 | first = nullptr; | ||
| 28 | last = nullptr; | ||
| 29 | } | ||
| 30 | |||
| 14 | VKScheduler::VKScheduler(const VKDevice& device, VKResourceManager& resource_manager) | 31 | VKScheduler::VKScheduler(const VKDevice& device, VKResourceManager& resource_manager) |
| 15 | : device{device}, resource_manager{resource_manager} { | 32 | : device{device}, resource_manager{resource_manager}, next_fence{ |
| 16 | next_fence = &resource_manager.CommitFence(); | 33 | &resource_manager.CommitFence()} { |
| 34 | AcquireNewChunk(); | ||
| 17 | AllocateNewContext(); | 35 | AllocateNewContext(); |
| 36 | worker_thread = std::thread(&VKScheduler::WorkerThread, this); | ||
| 18 | } | 37 | } |
| 19 | 38 | ||
| 20 | VKScheduler::~VKScheduler() = default; | 39 | VKScheduler::~VKScheduler() { |
| 40 | quit = true; | ||
| 41 | cv.notify_all(); | ||
| 42 | worker_thread.join(); | ||
| 43 | } | ||
| 21 | 44 | ||
| 22 | void VKScheduler::Flush(bool release_fence, vk::Semaphore semaphore) { | 45 | void VKScheduler::Flush(bool release_fence, vk::Semaphore semaphore) { |
| 23 | SubmitExecution(semaphore); | 46 | SubmitExecution(semaphore); |
| 24 | if (release_fence) | 47 | if (release_fence) { |
| 25 | current_fence->Release(); | 48 | current_fence->Release(); |
| 49 | } | ||
| 26 | AllocateNewContext(); | 50 | AllocateNewContext(); |
| 27 | } | 51 | } |
| 28 | 52 | ||
| 29 | void VKScheduler::Finish(bool release_fence, vk::Semaphore semaphore) { | 53 | void VKScheduler::Finish(bool release_fence, vk::Semaphore semaphore) { |
| 30 | SubmitExecution(semaphore); | 54 | SubmitExecution(semaphore); |
| 31 | current_fence->Wait(); | 55 | current_fence->Wait(); |
| 32 | if (release_fence) | 56 | if (release_fence) { |
| 33 | current_fence->Release(); | 57 | current_fence->Release(); |
| 58 | } | ||
| 34 | AllocateNewContext(); | 59 | AllocateNewContext(); |
| 35 | } | 60 | } |
| 36 | 61 | ||
| 62 | void VKScheduler::WaitWorker() { | ||
| 63 | MICROPROFILE_SCOPE(Vulkan_WaitForWorker); | ||
| 64 | DispatchWork(); | ||
| 65 | |||
| 66 | bool finished = false; | ||
| 67 | do { | ||
| 68 | cv.notify_all(); | ||
| 69 | std::unique_lock lock{mutex}; | ||
| 70 | finished = chunk_queue.Empty(); | ||
| 71 | } while (!finished); | ||
| 72 | } | ||
| 73 | |||
| 74 | void VKScheduler::DispatchWork() { | ||
| 75 | if (chunk->Empty()) { | ||
| 76 | return; | ||
| 77 | } | ||
| 78 | chunk_queue.Push(std::move(chunk)); | ||
| 79 | cv.notify_all(); | ||
| 80 | AcquireNewChunk(); | ||
| 81 | } | ||
| 82 | |||
| 83 | void VKScheduler::RequestRenderpass(const vk::RenderPassBeginInfo& renderpass_bi) { | ||
| 84 | if (state.renderpass && renderpass_bi == *state.renderpass) { | ||
| 85 | return; | ||
| 86 | } | ||
| 87 | const bool end_renderpass = state.renderpass.has_value(); | ||
| 88 | state.renderpass = renderpass_bi; | ||
| 89 | Record([renderpass_bi, end_renderpass](auto cmdbuf, auto& dld) { | ||
| 90 | if (end_renderpass) { | ||
| 91 | cmdbuf.endRenderPass(dld); | ||
| 92 | } | ||
| 93 | cmdbuf.beginRenderPass(renderpass_bi, vk::SubpassContents::eInline, dld); | ||
| 94 | }); | ||
| 95 | } | ||
| 96 | |||
| 97 | void VKScheduler::RequestOutsideRenderPassOperationContext() { | ||
| 98 | EndRenderPass(); | ||
| 99 | } | ||
| 100 | |||
| 101 | void VKScheduler::BindGraphicsPipeline(vk::Pipeline pipeline) { | ||
| 102 | if (state.graphics_pipeline == pipeline) { | ||
| 103 | return; | ||
| 104 | } | ||
| 105 | state.graphics_pipeline = pipeline; | ||
| 106 | Record([pipeline](auto cmdbuf, auto& dld) { | ||
| 107 | cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline, dld); | ||
| 108 | }); | ||
| 109 | } | ||
| 110 | |||
| 111 | void VKScheduler::WorkerThread() { | ||
| 112 | std::unique_lock lock{mutex}; | ||
| 113 | do { | ||
| 114 | cv.wait(lock, [this] { return !chunk_queue.Empty() || quit; }); | ||
| 115 | if (quit) { | ||
| 116 | continue; | ||
| 117 | } | ||
| 118 | auto extracted_chunk = std::move(chunk_queue.Front()); | ||
| 119 | chunk_queue.Pop(); | ||
| 120 | extracted_chunk->ExecuteAll(current_cmdbuf, device.GetDispatchLoader()); | ||
| 121 | chunk_reserve.Push(std::move(extracted_chunk)); | ||
| 122 | } while (!quit); | ||
| 123 | } | ||
| 124 | |||
| 37 | void VKScheduler::SubmitExecution(vk::Semaphore semaphore) { | 125 | void VKScheduler::SubmitExecution(vk::Semaphore semaphore) { |
| 126 | EndPendingOperations(); | ||
| 127 | InvalidateState(); | ||
| 128 | WaitWorker(); | ||
| 129 | |||
| 130 | std::unique_lock lock{mutex}; | ||
| 131 | |||
| 132 | const auto queue = device.GetGraphicsQueue(); | ||
| 38 | const auto& dld = device.GetDispatchLoader(); | 133 | const auto& dld = device.GetDispatchLoader(); |
| 39 | current_cmdbuf.end(dld); | 134 | current_cmdbuf.end(dld); |
| 40 | 135 | ||
| 41 | const auto queue = device.GetGraphicsQueue(); | 136 | const vk::SubmitInfo submit_info(0, nullptr, nullptr, 1, ¤t_cmdbuf, semaphore ? 1U : 0U, |
| 42 | const vk::SubmitInfo submit_info(0, nullptr, nullptr, 1, ¤t_cmdbuf, semaphore ? 1u : 0u, | ||
| 43 | &semaphore); | 137 | &semaphore); |
| 44 | queue.submit({submit_info}, *current_fence, dld); | 138 | queue.submit({submit_info}, static_cast<vk::Fence>(*current_fence), dld); |
| 45 | } | 139 | } |
| 46 | 140 | ||
| 47 | void VKScheduler::AllocateNewContext() { | 141 | void VKScheduler::AllocateNewContext() { |
| 142 | std::unique_lock lock{mutex}; | ||
| 48 | current_fence = next_fence; | 143 | current_fence = next_fence; |
| 49 | current_cmdbuf = resource_manager.CommitCommandBuffer(*current_fence); | ||
| 50 | next_fence = &resource_manager.CommitFence(); | 144 | next_fence = &resource_manager.CommitFence(); |
| 51 | 145 | ||
| 52 | const auto& dld = device.GetDispatchLoader(); | 146 | current_cmdbuf = resource_manager.CommitCommandBuffer(*current_fence); |
| 53 | current_cmdbuf.begin({vk::CommandBufferUsageFlagBits::eOneTimeSubmit}, dld); | 147 | current_cmdbuf.begin({vk::CommandBufferUsageFlagBits::eOneTimeSubmit}, |
| 148 | device.GetDispatchLoader()); | ||
| 149 | } | ||
| 150 | |||
| 151 | void VKScheduler::InvalidateState() { | ||
| 152 | state.graphics_pipeline = nullptr; | ||
| 153 | state.viewports = false; | ||
| 154 | state.scissors = false; | ||
| 155 | state.depth_bias = false; | ||
| 156 | state.blend_constants = false; | ||
| 157 | state.depth_bounds = false; | ||
| 158 | state.stencil_values = false; | ||
| 159 | } | ||
| 160 | |||
| 161 | void VKScheduler::EndPendingOperations() { | ||
| 162 | EndRenderPass(); | ||
| 163 | } | ||
| 164 | |||
| 165 | void VKScheduler::EndRenderPass() { | ||
| 166 | if (!state.renderpass) { | ||
| 167 | return; | ||
| 168 | } | ||
| 169 | state.renderpass = std::nullopt; | ||
| 170 | Record([](auto cmdbuf, auto& dld) { cmdbuf.endRenderPass(dld); }); | ||
| 171 | } | ||
| 172 | |||
| 173 | void VKScheduler::AcquireNewChunk() { | ||
| 174 | if (chunk_reserve.Empty()) { | ||
| 175 | chunk = std::make_unique<CommandChunk>(); | ||
| 176 | return; | ||
| 177 | } | ||
| 178 | chunk = std::move(chunk_reserve.Front()); | ||
| 179 | chunk_reserve.Pop(); | ||
| 54 | } | 180 | } |
| 55 | 181 | ||
| 56 | } // namespace Vulkan | 182 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h index 0e5b49c7f..bcdffbba0 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.h +++ b/src/video_core/renderer_vulkan/vk_scheduler.h | |||
| @@ -4,7 +4,14 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <condition_variable> | ||
| 8 | #include <memory> | ||
| 9 | #include <optional> | ||
| 10 | #include <stack> | ||
| 11 | #include <thread> | ||
| 12 | #include <utility> | ||
| 7 | #include "common/common_types.h" | 13 | #include "common/common_types.h" |
| 14 | #include "common/threadsafe_queue.h" | ||
| 8 | #include "video_core/renderer_vulkan/declarations.h" | 15 | #include "video_core/renderer_vulkan/declarations.h" |
| 9 | 16 | ||
| 10 | namespace Vulkan { | 17 | namespace Vulkan { |
| @@ -30,56 +37,197 @@ private: | |||
| 30 | VKFence* const& fence; | 37 | VKFence* const& fence; |
| 31 | }; | 38 | }; |
| 32 | 39 | ||
| 33 | class VKCommandBufferView { | 40 | /// The scheduler abstracts command buffer and fence management with an interface that's able to do |
| 41 | /// OpenGL-like operations on Vulkan command buffers. | ||
| 42 | class VKScheduler { | ||
| 34 | public: | 43 | public: |
| 35 | VKCommandBufferView() = default; | 44 | explicit VKScheduler(const VKDevice& device, VKResourceManager& resource_manager); |
| 36 | VKCommandBufferView(const vk::CommandBuffer& cmdbuf) : cmdbuf{cmdbuf} {} | 45 | ~VKScheduler(); |
| 46 | |||
| 47 | /// Sends the current execution context to the GPU. | ||
| 48 | void Flush(bool release_fence = true, vk::Semaphore semaphore = nullptr); | ||
| 49 | |||
| 50 | /// Sends the current execution context to the GPU and waits for it to complete. | ||
| 51 | void Finish(bool release_fence = true, vk::Semaphore semaphore = nullptr); | ||
| 52 | |||
| 53 | /// Waits for the worker thread to finish executing everything. After this function returns it's | ||
| 54 | /// safe to touch worker resources. | ||
| 55 | void WaitWorker(); | ||
| 56 | |||
| 57 | /// Sends currently recorded work to the worker thread. | ||
| 58 | void DispatchWork(); | ||
| 59 | |||
| 60 | /// Requests to begin a renderpass. | ||
| 61 | void RequestRenderpass(const vk::RenderPassBeginInfo& renderpass_bi); | ||
| 62 | |||
| 63 | /// Requests the current executino context to be able to execute operations only allowed outside | ||
| 64 | /// of a renderpass. | ||
| 65 | void RequestOutsideRenderPassOperationContext(); | ||
| 66 | |||
| 67 | /// Binds a pipeline to the current execution context. | ||
| 68 | void BindGraphicsPipeline(vk::Pipeline pipeline); | ||
| 37 | 69 | ||
| 38 | const vk::CommandBuffer* operator->() const noexcept { | 70 | /// Returns true when viewports have been set in the current command buffer. |
| 39 | return &cmdbuf; | 71 | bool TouchViewports() { |
| 72 | return std::exchange(state.viewports, true); | ||
| 40 | } | 73 | } |
| 41 | 74 | ||
| 42 | operator vk::CommandBuffer() const noexcept { | 75 | /// Returns true when scissors have been set in the current command buffer. |
| 43 | return cmdbuf; | 76 | bool TouchScissors() { |
| 77 | return std::exchange(state.scissors, true); | ||
| 44 | } | 78 | } |
| 45 | 79 | ||
| 46 | private: | 80 | /// Returns true when depth bias have been set in the current command buffer. |
| 47 | const vk::CommandBuffer& cmdbuf; | 81 | bool TouchDepthBias() { |
| 48 | }; | 82 | return std::exchange(state.depth_bias, true); |
| 83 | } | ||
| 49 | 84 | ||
| 50 | /// The scheduler abstracts command buffer and fence management with an interface that's able to do | 85 | /// Returns true when blend constants have been set in the current command buffer. |
| 51 | /// OpenGL-like operations on Vulkan command buffers. | 86 | bool TouchBlendConstants() { |
| 52 | class VKScheduler { | 87 | return std::exchange(state.blend_constants, true); |
| 53 | public: | 88 | } |
| 54 | explicit VKScheduler(const VKDevice& device, VKResourceManager& resource_manager); | 89 | |
| 55 | ~VKScheduler(); | 90 | /// Returns true when depth bounds have been set in the current command buffer. |
| 91 | bool TouchDepthBounds() { | ||
| 92 | return std::exchange(state.depth_bounds, true); | ||
| 93 | } | ||
| 94 | |||
| 95 | /// Returns true when stencil values have been set in the current command buffer. | ||
| 96 | bool TouchStencilValues() { | ||
| 97 | return std::exchange(state.stencil_values, true); | ||
| 98 | } | ||
| 99 | |||
| 100 | /// Send work to a separate thread. | ||
| 101 | template <typename T> | ||
| 102 | void Record(T&& command) { | ||
| 103 | if (chunk->Record(command)) { | ||
| 104 | return; | ||
| 105 | } | ||
| 106 | DispatchWork(); | ||
| 107 | (void)chunk->Record(command); | ||
| 108 | } | ||
| 56 | 109 | ||
| 57 | /// Gets a reference to the current fence. | 110 | /// Gets a reference to the current fence. |
| 58 | VKFenceView GetFence() const { | 111 | VKFenceView GetFence() const { |
| 59 | return current_fence; | 112 | return current_fence; |
| 60 | } | 113 | } |
| 61 | 114 | ||
| 62 | /// Gets a reference to the current command buffer. | 115 | private: |
| 63 | VKCommandBufferView GetCommandBuffer() const { | 116 | class Command { |
| 64 | return current_cmdbuf; | 117 | public: |
| 65 | } | 118 | virtual ~Command() = default; |
| 66 | 119 | ||
| 67 | /// Sends the current execution context to the GPU. | 120 | virtual void Execute(vk::CommandBuffer cmdbuf, |
| 68 | void Flush(bool release_fence = true, vk::Semaphore semaphore = nullptr); | 121 | const vk::DispatchLoaderDynamic& dld) const = 0; |
| 69 | 122 | ||
| 70 | /// Sends the current execution context to the GPU and waits for it to complete. | 123 | Command* GetNext() const { |
| 71 | void Finish(bool release_fence = true, vk::Semaphore semaphore = nullptr); | 124 | return next; |
| 125 | } | ||
| 126 | |||
| 127 | void SetNext(Command* next_) { | ||
| 128 | next = next_; | ||
| 129 | } | ||
| 130 | |||
| 131 | private: | ||
| 132 | Command* next = nullptr; | ||
| 133 | }; | ||
| 134 | |||
| 135 | template <typename T> | ||
| 136 | class TypedCommand final : public Command { | ||
| 137 | public: | ||
| 138 | explicit TypedCommand(T&& command) : command{std::move(command)} {} | ||
| 139 | ~TypedCommand() override = default; | ||
| 140 | |||
| 141 | TypedCommand(TypedCommand&&) = delete; | ||
| 142 | TypedCommand& operator=(TypedCommand&&) = delete; | ||
| 143 | |||
| 144 | void Execute(vk::CommandBuffer cmdbuf, | ||
| 145 | const vk::DispatchLoaderDynamic& dld) const override { | ||
| 146 | command(cmdbuf, dld); | ||
| 147 | } | ||
| 148 | |||
| 149 | private: | ||
| 150 | T command; | ||
| 151 | }; | ||
| 152 | |||
| 153 | class CommandChunk final { | ||
| 154 | public: | ||
| 155 | void ExecuteAll(vk::CommandBuffer cmdbuf, const vk::DispatchLoaderDynamic& dld); | ||
| 156 | |||
| 157 | template <typename T> | ||
| 158 | bool Record(T& command) { | ||
| 159 | using FuncType = TypedCommand<T>; | ||
| 160 | static_assert(sizeof(FuncType) < sizeof(data), "Lambda is too large"); | ||
| 161 | |||
| 162 | if (command_offset > sizeof(data) - sizeof(FuncType)) { | ||
| 163 | return false; | ||
| 164 | } | ||
| 165 | |||
| 166 | Command* current_last = last; | ||
| 167 | |||
| 168 | last = new (data.data() + command_offset) FuncType(std::move(command)); | ||
| 169 | |||
| 170 | if (current_last) { | ||
| 171 | current_last->SetNext(last); | ||
| 172 | } else { | ||
| 173 | first = last; | ||
| 174 | } | ||
| 175 | |||
| 176 | command_offset += sizeof(FuncType); | ||
| 177 | return true; | ||
| 178 | } | ||
| 179 | |||
| 180 | bool Empty() const { | ||
| 181 | return command_offset == 0; | ||
| 182 | } | ||
| 183 | |||
| 184 | private: | ||
| 185 | Command* first = nullptr; | ||
| 186 | Command* last = nullptr; | ||
| 187 | |||
| 188 | std::size_t command_offset = 0; | ||
| 189 | std::array<u8, 0x8000> data{}; | ||
| 190 | }; | ||
| 191 | |||
| 192 | void WorkerThread(); | ||
| 72 | 193 | ||
| 73 | private: | ||
| 74 | void SubmitExecution(vk::Semaphore semaphore); | 194 | void SubmitExecution(vk::Semaphore semaphore); |
| 75 | 195 | ||
| 76 | void AllocateNewContext(); | 196 | void AllocateNewContext(); |
| 77 | 197 | ||
| 198 | void InvalidateState(); | ||
| 199 | |||
| 200 | void EndPendingOperations(); | ||
| 201 | |||
| 202 | void EndRenderPass(); | ||
| 203 | |||
| 204 | void AcquireNewChunk(); | ||
| 205 | |||
| 78 | const VKDevice& device; | 206 | const VKDevice& device; |
| 79 | VKResourceManager& resource_manager; | 207 | VKResourceManager& resource_manager; |
| 80 | vk::CommandBuffer current_cmdbuf; | 208 | vk::CommandBuffer current_cmdbuf; |
| 81 | VKFence* current_fence = nullptr; | 209 | VKFence* current_fence = nullptr; |
| 82 | VKFence* next_fence = nullptr; | 210 | VKFence* next_fence = nullptr; |
| 211 | |||
| 212 | struct State { | ||
| 213 | std::optional<vk::RenderPassBeginInfo> renderpass; | ||
| 214 | vk::Pipeline graphics_pipeline; | ||
| 215 | bool viewports = false; | ||
| 216 | bool scissors = false; | ||
| 217 | bool depth_bias = false; | ||
| 218 | bool blend_constants = false; | ||
| 219 | bool depth_bounds = false; | ||
| 220 | bool stencil_values = false; | ||
| 221 | } state; | ||
| 222 | |||
| 223 | std::unique_ptr<CommandChunk> chunk; | ||
| 224 | std::thread worker_thread; | ||
| 225 | |||
| 226 | Common::SPSCQueue<std::unique_ptr<CommandChunk>> chunk_queue; | ||
| 227 | Common::SPSCQueue<std::unique_ptr<CommandChunk>> chunk_reserve; | ||
| 228 | std::mutex mutex; | ||
| 229 | std::condition_variable cv; | ||
| 230 | bool quit = false; | ||
| 83 | }; | 231 | }; |
| 84 | 232 | ||
| 85 | } // namespace Vulkan | 233 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index 6227bc70b..a8baf91de 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp | |||
| @@ -543,7 +543,7 @@ private: | |||
| 543 | } | 543 | } |
| 544 | 544 | ||
| 545 | for (u32 rt = 0; rt < static_cast<u32>(frag_colors.size()); ++rt) { | 545 | for (u32 rt = 0; rt < static_cast<u32>(frag_colors.size()); ++rt) { |
| 546 | if (!IsRenderTargetUsed(rt)) { | 546 | if (!specialization.enabled_rendertargets[rt]) { |
| 547 | continue; | 547 | continue; |
| 548 | } | 548 | } |
| 549 | 549 | ||
| @@ -1555,40 +1555,48 @@ private: | |||
| 1555 | 1555 | ||
| 1556 | Expression Texture(Operation operation) { | 1556 | Expression Texture(Operation operation) { |
| 1557 | const auto& meta = std::get<MetaTexture>(operation.GetMeta()); | 1557 | const auto& meta = std::get<MetaTexture>(operation.GetMeta()); |
| 1558 | UNIMPLEMENTED_IF(!meta.aoffi.empty()); | ||
| 1559 | 1558 | ||
| 1560 | const bool can_implicit = stage == ShaderType::Fragment; | 1559 | const bool can_implicit = stage == ShaderType::Fragment; |
| 1561 | const Id sampler = GetTextureSampler(operation); | 1560 | const Id sampler = GetTextureSampler(operation); |
| 1562 | const Id coords = GetCoordinates(operation, Type::Float); | 1561 | const Id coords = GetCoordinates(operation, Type::Float); |
| 1563 | 1562 | ||
| 1563 | std::vector<Id> operands; | ||
| 1564 | spv::ImageOperandsMask mask{}; | ||
| 1565 | if (meta.bias) { | ||
| 1566 | mask = mask | spv::ImageOperandsMask::Bias; | ||
| 1567 | operands.push_back(AsFloat(Visit(meta.bias))); | ||
| 1568 | } | ||
| 1569 | |||
| 1570 | if (!can_implicit) { | ||
| 1571 | mask = mask | spv::ImageOperandsMask::Lod; | ||
| 1572 | operands.push_back(v_float_zero); | ||
| 1573 | } | ||
| 1574 | |||
| 1575 | if (!meta.aoffi.empty()) { | ||
| 1576 | mask = mask | spv::ImageOperandsMask::Offset; | ||
| 1577 | operands.push_back(GetOffsetCoordinates(operation)); | ||
| 1578 | } | ||
| 1579 | |||
| 1564 | if (meta.depth_compare) { | 1580 | if (meta.depth_compare) { |
| 1565 | // Depth sampling | 1581 | // Depth sampling |
| 1566 | UNIMPLEMENTED_IF(meta.bias); | 1582 | UNIMPLEMENTED_IF(meta.bias); |
| 1567 | const Id dref = AsFloat(Visit(meta.depth_compare)); | 1583 | const Id dref = AsFloat(Visit(meta.depth_compare)); |
| 1568 | if (can_implicit) { | 1584 | if (can_implicit) { |
| 1569 | return {OpImageSampleDrefImplicitLod(t_float, sampler, coords, dref, {}), | 1585 | return { |
| 1570 | Type::Float}; | 1586 | OpImageSampleDrefImplicitLod(t_float, sampler, coords, dref, mask, operands), |
| 1587 | Type::Float}; | ||
| 1571 | } else { | 1588 | } else { |
| 1572 | return {OpImageSampleDrefExplicitLod(t_float, sampler, coords, dref, | 1589 | return { |
| 1573 | spv::ImageOperandsMask::Lod, v_float_zero), | 1590 | OpImageSampleDrefExplicitLod(t_float, sampler, coords, dref, mask, operands), |
| 1574 | Type::Float}; | 1591 | Type::Float}; |
| 1575 | } | 1592 | } |
| 1576 | } | 1593 | } |
| 1577 | 1594 | ||
| 1578 | std::vector<Id> operands; | ||
| 1579 | spv::ImageOperandsMask mask{}; | ||
| 1580 | if (meta.bias) { | ||
| 1581 | mask = mask | spv::ImageOperandsMask::Bias; | ||
| 1582 | operands.push_back(AsFloat(Visit(meta.bias))); | ||
| 1583 | } | ||
| 1584 | |||
| 1585 | Id texture; | 1595 | Id texture; |
| 1586 | if (can_implicit) { | 1596 | if (can_implicit) { |
| 1587 | texture = OpImageSampleImplicitLod(t_float4, sampler, coords, mask, operands); | 1597 | texture = OpImageSampleImplicitLod(t_float4, sampler, coords, mask, operands); |
| 1588 | } else { | 1598 | } else { |
| 1589 | texture = OpImageSampleExplicitLod(t_float4, sampler, coords, | 1599 | texture = OpImageSampleExplicitLod(t_float4, sampler, coords, mask, operands); |
| 1590 | mask | spv::ImageOperandsMask::Lod, v_float_zero, | ||
| 1591 | operands); | ||
| 1592 | } | 1600 | } |
| 1593 | return GetTextureElement(operation, texture, Type::Float); | 1601 | return GetTextureElement(operation, texture, Type::Float); |
| 1594 | } | 1602 | } |
| @@ -1601,7 +1609,8 @@ private: | |||
| 1601 | const Id lod = AsFloat(Visit(meta.lod)); | 1609 | const Id lod = AsFloat(Visit(meta.lod)); |
| 1602 | 1610 | ||
| 1603 | spv::ImageOperandsMask mask = spv::ImageOperandsMask::Lod; | 1611 | spv::ImageOperandsMask mask = spv::ImageOperandsMask::Lod; |
| 1604 | std::vector<Id> operands; | 1612 | std::vector<Id> operands{lod}; |
| 1613 | |||
| 1605 | if (!meta.aoffi.empty()) { | 1614 | if (!meta.aoffi.empty()) { |
| 1606 | mask = mask | spv::ImageOperandsMask::Offset; | 1615 | mask = mask | spv::ImageOperandsMask::Offset; |
| 1607 | operands.push_back(GetOffsetCoordinates(operation)); | 1616 | operands.push_back(GetOffsetCoordinates(operation)); |
| @@ -1609,11 +1618,10 @@ private: | |||
| 1609 | 1618 | ||
| 1610 | if (meta.sampler.IsShadow()) { | 1619 | if (meta.sampler.IsShadow()) { |
| 1611 | const Id dref = AsFloat(Visit(meta.depth_compare)); | 1620 | const Id dref = AsFloat(Visit(meta.depth_compare)); |
| 1612 | return { | 1621 | return {OpImageSampleDrefExplicitLod(t_float, sampler, coords, dref, mask, operands), |
| 1613 | OpImageSampleDrefExplicitLod(t_float, sampler, coords, dref, mask, lod, operands), | 1622 | Type::Float}; |
| 1614 | Type::Float}; | ||
| 1615 | } | 1623 | } |
| 1616 | const Id texture = OpImageSampleExplicitLod(t_float4, sampler, coords, mask, lod, operands); | 1624 | const Id texture = OpImageSampleExplicitLod(t_float4, sampler, coords, mask, operands); |
| 1617 | return GetTextureElement(operation, texture, Type::Float); | 1625 | return GetTextureElement(operation, texture, Type::Float); |
| 1618 | } | 1626 | } |
| 1619 | 1627 | ||
| @@ -1722,7 +1730,7 @@ private: | |||
| 1722 | const std::vector grad = {dx, dy}; | 1730 | const std::vector grad = {dx, dy}; |
| 1723 | 1731 | ||
| 1724 | static constexpr auto mask = spv::ImageOperandsMask::Grad; | 1732 | static constexpr auto mask = spv::ImageOperandsMask::Grad; |
| 1725 | const Id texture = OpImageSampleImplicitLod(t_float4, sampler, coords, mask, grad); | 1733 | const Id texture = OpImageSampleExplicitLod(t_float4, sampler, coords, mask, grad); |
| 1726 | return GetTextureElement(operation, texture, Type::Float); | 1734 | return GetTextureElement(operation, texture, Type::Float); |
| 1727 | } | 1735 | } |
| 1728 | 1736 | ||
| @@ -1833,7 +1841,7 @@ private: | |||
| 1833 | } | 1841 | } |
| 1834 | 1842 | ||
| 1835 | void PreExit() { | 1843 | void PreExit() { |
| 1836 | if (stage == ShaderType::Vertex) { | 1844 | if (stage == ShaderType::Vertex && specialization.ndc_minus_one_to_one) { |
| 1837 | const u32 position_index = out_indices.position.value(); | 1845 | const u32 position_index = out_indices.position.value(); |
| 1838 | const Id z_pointer = AccessElement(t_out_float, out_vertex, position_index, 2U); | 1846 | const Id z_pointer = AccessElement(t_out_float, out_vertex, position_index, 2U); |
| 1839 | const Id w_pointer = AccessElement(t_out_float, out_vertex, position_index, 3U); | 1847 | const Id w_pointer = AccessElement(t_out_float, out_vertex, position_index, 3U); |
| @@ -1860,12 +1868,18 @@ private: | |||
| 1860 | // rendertargets/components are skipped in the register assignment. | 1868 | // rendertargets/components are skipped in the register assignment. |
| 1861 | u32 current_reg = 0; | 1869 | u32 current_reg = 0; |
| 1862 | for (u32 rt = 0; rt < Maxwell::NumRenderTargets; ++rt) { | 1870 | for (u32 rt = 0; rt < Maxwell::NumRenderTargets; ++rt) { |
| 1871 | if (!specialization.enabled_rendertargets[rt]) { | ||
| 1872 | // Skip rendertargets that are not enabled | ||
| 1873 | continue; | ||
| 1874 | } | ||
| 1863 | // TODO(Subv): Figure out how dual-source blending is configured in the Switch. | 1875 | // TODO(Subv): Figure out how dual-source blending is configured in the Switch. |
| 1864 | for (u32 component = 0; component < 4; ++component) { | 1876 | for (u32 component = 0; component < 4; ++component) { |
| 1877 | const Id pointer = AccessElement(t_out_float, frag_colors.at(rt), component); | ||
| 1865 | if (header.ps.IsColorComponentOutputEnabled(rt, component)) { | 1878 | if (header.ps.IsColorComponentOutputEnabled(rt, component)) { |
| 1866 | OpStore(AccessElement(t_out_float, frag_colors.at(rt), component), | 1879 | OpStore(pointer, SafeGetRegister(current_reg)); |
| 1867 | SafeGetRegister(current_reg)); | ||
| 1868 | ++current_reg; | 1880 | ++current_reg; |
| 1881 | } else { | ||
| 1882 | OpStore(pointer, component == 3 ? v_float_one : v_float_zero); | ||
| 1869 | } | 1883 | } |
| 1870 | } | 1884 | } |
| 1871 | } | 1885 | } |
| @@ -1995,15 +2009,6 @@ private: | |||
| 1995 | return DeclareBuiltIn(builtin, spv::StorageClass::Input, type, std::move(name)); | 2009 | return DeclareBuiltIn(builtin, spv::StorageClass::Input, type, std::move(name)); |
| 1996 | } | 2010 | } |
| 1997 | 2011 | ||
| 1998 | bool IsRenderTargetUsed(u32 rt) const { | ||
| 1999 | for (u32 component = 0; component < 4; ++component) { | ||
| 2000 | if (header.ps.IsColorComponentOutputEnabled(rt, component)) { | ||
| 2001 | return true; | ||
| 2002 | } | ||
| 2003 | } | ||
| 2004 | return false; | ||
| 2005 | } | ||
| 2006 | |||
| 2007 | template <typename... Args> | 2012 | template <typename... Args> |
| 2008 | Id AccessElement(Id pointer_type, Id composite, Args... elements_) { | 2013 | Id AccessElement(Id pointer_type, Id composite, Args... elements_) { |
| 2009 | std::vector<Id> members; | 2014 | std::vector<Id> members; |
| @@ -2552,29 +2557,7 @@ public: | |||
| 2552 | } | 2557 | } |
| 2553 | 2558 | ||
| 2554 | Id operator()(const ExprCondCode& expr) { | 2559 | Id operator()(const ExprCondCode& expr) { |
| 2555 | const Node cc = decomp.ir.GetConditionCode(expr.cc); | 2560 | return decomp.AsBool(decomp.Visit(decomp.ir.GetConditionCode(expr.cc))); |
| 2556 | Id target; | ||
| 2557 | |||
| 2558 | if (const auto pred = std::get_if<PredicateNode>(&*cc)) { | ||
| 2559 | const auto index = pred->GetIndex(); | ||
| 2560 | switch (index) { | ||
| 2561 | case Tegra::Shader::Pred::NeverExecute: | ||
| 2562 | target = decomp.v_false; | ||
| 2563 | break; | ||
| 2564 | case Tegra::Shader::Pred::UnusedIndex: | ||
| 2565 | target = decomp.v_true; | ||
| 2566 | break; | ||
| 2567 | default: | ||
| 2568 | target = decomp.predicates.at(index); | ||
| 2569 | break; | ||
| 2570 | } | ||
| 2571 | } else if (const auto flag = std::get_if<InternalFlagNode>(&*cc)) { | ||
| 2572 | target = decomp.internal_flags.at(static_cast<u32>(flag->GetFlag())); | ||
| 2573 | } else { | ||
| 2574 | UNREACHABLE(); | ||
| 2575 | } | ||
| 2576 | |||
| 2577 | return decomp.OpLoad(decomp.t_bool, target); | ||
| 2578 | } | 2561 | } |
| 2579 | 2562 | ||
| 2580 | Id operator()(const ExprVar& expr) { | 2563 | Id operator()(const ExprVar& expr) { |
| @@ -2589,7 +2572,7 @@ public: | |||
| 2589 | const Id target = decomp.Constant(decomp.t_uint, expr.value); | 2572 | const Id target = decomp.Constant(decomp.t_uint, expr.value); |
| 2590 | Id gpr = decomp.OpLoad(decomp.t_float, decomp.registers.at(expr.gpr)); | 2573 | Id gpr = decomp.OpLoad(decomp.t_float, decomp.registers.at(expr.gpr)); |
| 2591 | gpr = decomp.OpBitcast(decomp.t_uint, gpr); | 2574 | gpr = decomp.OpBitcast(decomp.t_uint, gpr); |
| 2592 | return decomp.OpLogicalEqual(decomp.t_uint, gpr, target); | 2575 | return decomp.OpIEqual(decomp.t_bool, gpr, target); |
| 2593 | } | 2576 | } |
| 2594 | 2577 | ||
| 2595 | Id Visit(const Expr& node) { | 2578 | Id Visit(const Expr& node) { |
| @@ -2659,11 +2642,11 @@ public: | |||
| 2659 | const Id loop_label = decomp.OpLabel(); | 2642 | const Id loop_label = decomp.OpLabel(); |
| 2660 | const Id endloop_label = decomp.OpLabel(); | 2643 | const Id endloop_label = decomp.OpLabel(); |
| 2661 | const Id loop_start_block = decomp.OpLabel(); | 2644 | const Id loop_start_block = decomp.OpLabel(); |
| 2662 | const Id loop_end_block = decomp.OpLabel(); | 2645 | const Id loop_continue_block = decomp.OpLabel(); |
| 2663 | current_loop_exit = endloop_label; | 2646 | current_loop_exit = endloop_label; |
| 2664 | decomp.OpBranch(loop_label); | 2647 | decomp.OpBranch(loop_label); |
| 2665 | decomp.AddLabel(loop_label); | 2648 | decomp.AddLabel(loop_label); |
| 2666 | decomp.OpLoopMerge(endloop_label, loop_end_block, spv::LoopControlMask::MaskNone); | 2649 | decomp.OpLoopMerge(endloop_label, loop_continue_block, spv::LoopControlMask::MaskNone); |
| 2667 | decomp.OpBranch(loop_start_block); | 2650 | decomp.OpBranch(loop_start_block); |
| 2668 | decomp.AddLabel(loop_start_block); | 2651 | decomp.AddLabel(loop_start_block); |
| 2669 | ASTNode current = ast.nodes.GetFirst(); | 2652 | ASTNode current = ast.nodes.GetFirst(); |
| @@ -2671,6 +2654,8 @@ public: | |||
| 2671 | Visit(current); | 2654 | Visit(current); |
| 2672 | current = current->GetNext(); | 2655 | current = current->GetNext(); |
| 2673 | } | 2656 | } |
| 2657 | decomp.OpBranch(loop_continue_block); | ||
| 2658 | decomp.AddLabel(loop_continue_block); | ||
| 2674 | ExprDecompiler expr_parser{decomp}; | 2659 | ExprDecompiler expr_parser{decomp}; |
| 2675 | const Id condition = expr_parser.Visit(ast.condition); | 2660 | const Id condition = expr_parser.Visit(ast.condition); |
| 2676 | decomp.OpBranchConditional(condition, loop_label, endloop_label); | 2661 | decomp.OpBranchConditional(condition, loop_label, endloop_label); |
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.h b/src/video_core/renderer_vulkan/vk_shader_decompiler.h index 2b01321b6..10794be1c 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.h +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.h | |||
| @@ -94,6 +94,7 @@ struct Specialization final { | |||
| 94 | Maxwell::PrimitiveTopology primitive_topology{}; | 94 | Maxwell::PrimitiveTopology primitive_topology{}; |
| 95 | std::optional<float> point_size{}; | 95 | std::optional<float> point_size{}; |
| 96 | std::array<Maxwell::VertexAttribute::Type, Maxwell::NumVertexAttributes> attribute_types{}; | 96 | std::array<Maxwell::VertexAttribute::Type, Maxwell::NumVertexAttributes> attribute_types{}; |
| 97 | bool ndc_minus_one_to_one{}; | ||
| 97 | 98 | ||
| 98 | // Tessellation specific | 99 | // Tessellation specific |
| 99 | struct { | 100 | struct { |
| @@ -101,6 +102,9 @@ struct Specialization final { | |||
| 101 | Maxwell::TessellationSpacing spacing{}; | 102 | Maxwell::TessellationSpacing spacing{}; |
| 102 | bool clockwise{}; | 103 | bool clockwise{}; |
| 103 | } tessellation; | 104 | } tessellation; |
| 105 | |||
| 106 | // Fragment specific | ||
| 107 | std::bitset<8> enabled_rendertargets; | ||
| 104 | }; | 108 | }; |
| 105 | // Old gcc versions don't consider this trivially copyable. | 109 | // Old gcc versions don't consider this trivially copyable. |
| 106 | // static_assert(std::is_trivially_copyable_v<Specialization>); | 110 | // static_assert(std::is_trivially_copyable_v<Specialization>); |
diff --git a/src/video_core/shader/decode/conversion.cpp b/src/video_core/shader/decode/conversion.cpp index 32facd6ba..0eeb75559 100644 --- a/src/video_core/shader/decode/conversion.cpp +++ b/src/video_core/shader/decode/conversion.cpp | |||
| @@ -63,12 +63,11 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) { | |||
| 63 | case OpCode::Id::I2F_R: | 63 | case OpCode::Id::I2F_R: |
| 64 | case OpCode::Id::I2F_C: | 64 | case OpCode::Id::I2F_C: |
| 65 | case OpCode::Id::I2F_IMM: { | 65 | case OpCode::Id::I2F_IMM: { |
| 66 | UNIMPLEMENTED_IF(instr.conversion.int_src.selector != 0); | ||
| 67 | UNIMPLEMENTED_IF(instr.conversion.dst_size == Register::Size::Long); | 66 | UNIMPLEMENTED_IF(instr.conversion.dst_size == Register::Size::Long); |
| 68 | UNIMPLEMENTED_IF_MSG(instr.generates_cc, | 67 | UNIMPLEMENTED_IF_MSG(instr.generates_cc, |
| 69 | "Condition codes generation in I2F is not implemented"); | 68 | "Condition codes generation in I2F is not implemented"); |
| 70 | 69 | ||
| 71 | Node value = [&]() { | 70 | Node value = [&] { |
| 72 | switch (opcode->get().GetId()) { | 71 | switch (opcode->get().GetId()) { |
| 73 | case OpCode::Id::I2F_R: | 72 | case OpCode::Id::I2F_R: |
| 74 | return GetRegister(instr.gpr20); | 73 | return GetRegister(instr.gpr20); |
| @@ -81,7 +80,19 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) { | |||
| 81 | return Immediate(0); | 80 | return Immediate(0); |
| 82 | } | 81 | } |
| 83 | }(); | 82 | }(); |
| 83 | |||
| 84 | const bool input_signed = instr.conversion.is_input_signed; | 84 | const bool input_signed = instr.conversion.is_input_signed; |
| 85 | |||
| 86 | if (instr.conversion.src_size == Register::Size::Byte) { | ||
| 87 | const u32 offset = static_cast<u32>(instr.conversion.int_src.selector) * 8; | ||
| 88 | if (offset > 0) { | ||
| 89 | value = SignedOperation(OperationCode::ILogicalShiftRight, input_signed, | ||
| 90 | std::move(value), Immediate(offset)); | ||
| 91 | } | ||
| 92 | } else { | ||
| 93 | UNIMPLEMENTED_IF(instr.conversion.int_src.selector != 0); | ||
| 94 | } | ||
| 95 | |||
| 85 | value = ConvertIntegerSize(value, instr.conversion.src_size, input_signed); | 96 | value = ConvertIntegerSize(value, instr.conversion.src_size, input_signed); |
| 86 | value = GetOperandAbsNegInteger(value, instr.conversion.abs_a, false, input_signed); | 97 | value = GetOperandAbsNegInteger(value, instr.conversion.abs_a, false, input_signed); |
| 87 | value = SignedOperation(OperationCode::FCastInteger, input_signed, PRECISE, value); | 98 | value = SignedOperation(OperationCode::FCastInteger, input_signed, PRECISE, value); |
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp index 78e92f52e..c934d0719 100644 --- a/src/video_core/shader/decode/memory.cpp +++ b/src/video_core/shader/decode/memory.cpp | |||
| @@ -22,7 +22,23 @@ using Tegra::Shader::Register; | |||
| 22 | 22 | ||
| 23 | namespace { | 23 | namespace { |
| 24 | 24 | ||
| 25 | u32 GetUniformTypeElementsCount(Tegra::Shader::UniformType uniform_type) { | 25 | u32 GetLdgMemorySize(Tegra::Shader::UniformType uniform_type) { |
| 26 | switch (uniform_type) { | ||
| 27 | case Tegra::Shader::UniformType::UnsignedByte: | ||
| 28 | case Tegra::Shader::UniformType::Single: | ||
| 29 | return 1; | ||
| 30 | case Tegra::Shader::UniformType::Double: | ||
| 31 | return 2; | ||
| 32 | case Tegra::Shader::UniformType::Quad: | ||
| 33 | case Tegra::Shader::UniformType::UnsignedQuad: | ||
| 34 | return 4; | ||
| 35 | default: | ||
| 36 | UNIMPLEMENTED_MSG("Unimplemented size={}!", static_cast<u32>(uniform_type)); | ||
| 37 | return 1; | ||
| 38 | } | ||
| 39 | } | ||
| 40 | |||
| 41 | u32 GetStgMemorySize(Tegra::Shader::UniformType uniform_type) { | ||
| 26 | switch (uniform_type) { | 42 | switch (uniform_type) { |
| 27 | case Tegra::Shader::UniformType::Single: | 43 | case Tegra::Shader::UniformType::Single: |
| 28 | return 1; | 44 | return 1; |
| @@ -170,7 +186,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { | |||
| 170 | const auto [real_address_base, base_address, descriptor] = | 186 | const auto [real_address_base, base_address, descriptor] = |
| 171 | TrackGlobalMemory(bb, instr, false); | 187 | TrackGlobalMemory(bb, instr, false); |
| 172 | 188 | ||
| 173 | const u32 count = GetUniformTypeElementsCount(type); | 189 | const u32 count = GetLdgMemorySize(type); |
| 174 | if (!real_address_base || !base_address) { | 190 | if (!real_address_base || !base_address) { |
| 175 | // Tracking failed, load zeroes. | 191 | // Tracking failed, load zeroes. |
| 176 | for (u32 i = 0; i < count; ++i) { | 192 | for (u32 i = 0; i < count; ++i) { |
| @@ -181,12 +197,22 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { | |||
| 181 | 197 | ||
| 182 | for (u32 i = 0; i < count; ++i) { | 198 | for (u32 i = 0; i < count; ++i) { |
| 183 | const Node it_offset = Immediate(i * 4); | 199 | const Node it_offset = Immediate(i * 4); |
| 184 | const Node real_address = | 200 | const Node real_address = Operation(OperationCode::UAdd, real_address_base, it_offset); |
| 185 | Operation(OperationCode::UAdd, NO_PRECISE, real_address_base, it_offset); | 201 | Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); |
| 186 | const Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); | 202 | |
| 203 | if (type == Tegra::Shader::UniformType::UnsignedByte) { | ||
| 204 | // To handle unaligned loads get the byte used to dereferenced global memory | ||
| 205 | // and extract that byte from the loaded uint32. | ||
| 206 | Node byte = Operation(OperationCode::UBitwiseAnd, real_address, Immediate(3)); | ||
| 207 | byte = Operation(OperationCode::ULogicalShiftLeft, std::move(byte), Immediate(3)); | ||
| 208 | |||
| 209 | gmem = Operation(OperationCode::UBitfieldExtract, std::move(gmem), std::move(byte), | ||
| 210 | Immediate(8)); | ||
| 211 | } | ||
| 187 | 212 | ||
| 188 | SetTemporary(bb, i, gmem); | 213 | SetTemporary(bb, i, gmem); |
| 189 | } | 214 | } |
| 215 | |||
| 190 | for (u32 i = 0; i < count; ++i) { | 216 | for (u32 i = 0; i < count; ++i) { |
| 191 | SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); | 217 | SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); |
| 192 | } | 218 | } |
| @@ -276,7 +302,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { | |||
| 276 | break; | 302 | break; |
| 277 | } | 303 | } |
| 278 | 304 | ||
| 279 | const u32 count = GetUniformTypeElementsCount(type); | 305 | const u32 count = GetStgMemorySize(type); |
| 280 | for (u32 i = 0; i < count; ++i) { | 306 | for (u32 i = 0; i < count; ++i) { |
| 281 | const Node it_offset = Immediate(i * 4); | 307 | const Node it_offset = Immediate(i * 4); |
| 282 | const Node real_address = Operation(OperationCode::UAdd, real_address_base, it_offset); | 308 | const Node real_address = Operation(OperationCode::UAdd, real_address_base, it_offset); |
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp index 994c05611..dff01a541 100644 --- a/src/video_core/shader/decode/texture.cpp +++ b/src/video_core/shader/decode/texture.cpp | |||
| @@ -743,13 +743,18 @@ Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is | |||
| 743 | // When lod is used always is in gpr20 | 743 | // When lod is used always is in gpr20 |
| 744 | const Node lod = lod_enabled ? GetRegister(instr.gpr20) : Immediate(0); | 744 | const Node lod = lod_enabled ? GetRegister(instr.gpr20) : Immediate(0); |
| 745 | 745 | ||
| 746 | // Fill empty entries from the guest sampler. | 746 | // Fill empty entries from the guest sampler |
| 747 | const std::size_t entry_coord_count = GetCoordCount(sampler.GetType()); | 747 | const std::size_t entry_coord_count = GetCoordCount(sampler.GetType()); |
| 748 | if (type_coord_count != entry_coord_count) { | 748 | if (type_coord_count != entry_coord_count) { |
| 749 | LOG_WARNING(HW_GPU, "Bound and built texture types mismatch"); | 749 | LOG_WARNING(HW_GPU, "Bound and built texture types mismatch"); |
| 750 | } | 750 | |
| 751 | for (std::size_t i = type_coord_count; i < entry_coord_count; ++i) { | 751 | // When the size is higher we insert zeroes |
| 752 | coords.push_back(GetRegister(Register::ZeroIndex)); | 752 | for (std::size_t i = type_coord_count; i < entry_coord_count; ++i) { |
| 753 | coords.push_back(GetRegister(Register::ZeroIndex)); | ||
| 754 | } | ||
| 755 | |||
| 756 | // Then we ensure the size matches the number of entries (dropping unused values) | ||
| 757 | coords.resize(entry_coord_count); | ||
| 753 | } | 758 | } |
| 754 | 759 | ||
| 755 | Node4 values; | 760 | Node4 values; |
diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp index a4f1edd9a..38b3a4ba8 100644 --- a/src/video_core/texture_cache/surface_params.cpp +++ b/src/video_core/texture_cache/surface_params.cpp | |||
| @@ -392,4 +392,42 @@ std::string SurfaceParams::TargetName() const { | |||
| 392 | } | 392 | } |
| 393 | } | 393 | } |
| 394 | 394 | ||
| 395 | u32 SurfaceParams::GetBlockSize() const { | ||
| 396 | const u32 x = 64U << block_width; | ||
| 397 | const u32 y = 8U << block_height; | ||
| 398 | const u32 z = 1U << block_depth; | ||
| 399 | return x * y * z; | ||
| 400 | } | ||
| 401 | |||
| 402 | std::pair<u32, u32> SurfaceParams::GetBlockXY() const { | ||
| 403 | const u32 x_pixels = 64U / GetBytesPerPixel(); | ||
| 404 | const u32 x = x_pixels << block_width; | ||
| 405 | const u32 y = 8U << block_height; | ||
| 406 | return {x, y}; | ||
| 407 | } | ||
| 408 | |||
| 409 | std::tuple<u32, u32, u32> SurfaceParams::GetBlockOffsetXYZ(u32 offset) const { | ||
| 410 | const auto div_ceil = [](const u32 x, const u32 y) { return ((x + y - 1) / y); }; | ||
| 411 | const u32 block_size = GetBlockSize(); | ||
| 412 | const u32 block_index = offset / block_size; | ||
| 413 | const u32 gob_offset = offset % block_size; | ||
| 414 | const u32 gob_index = gob_offset / static_cast<u32>(Tegra::Texture::GetGOBSize()); | ||
| 415 | const u32 x_gob_pixels = 64U / GetBytesPerPixel(); | ||
| 416 | const u32 x_block_pixels = x_gob_pixels << block_width; | ||
| 417 | const u32 y_block_pixels = 8U << block_height; | ||
| 418 | const u32 z_block_pixels = 1U << block_depth; | ||
| 419 | const u32 x_blocks = div_ceil(width, x_block_pixels); | ||
| 420 | const u32 y_blocks = div_ceil(height, y_block_pixels); | ||
| 421 | const u32 z_blocks = div_ceil(depth, z_block_pixels); | ||
| 422 | const u32 base_x = block_index % x_blocks; | ||
| 423 | const u32 base_y = (block_index / x_blocks) % y_blocks; | ||
| 424 | const u32 base_z = (block_index / (x_blocks * y_blocks)) % z_blocks; | ||
| 425 | u32 x = base_x * x_block_pixels; | ||
| 426 | u32 y = base_y * y_block_pixels; | ||
| 427 | u32 z = base_z * z_block_pixels; | ||
| 428 | z += gob_index >> block_height; | ||
| 429 | y += (gob_index * 8U) % y_block_pixels; | ||
| 430 | return {x, y, z}; | ||
| 431 | } | ||
| 432 | |||
| 395 | } // namespace VideoCommon | 433 | } // namespace VideoCommon |
diff --git a/src/video_core/texture_cache/surface_params.h b/src/video_core/texture_cache/surface_params.h index 129817ad3..992b5c022 100644 --- a/src/video_core/texture_cache/surface_params.h +++ b/src/video_core/texture_cache/surface_params.h | |||
| @@ -4,6 +4,8 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <utility> | ||
| 8 | |||
| 7 | #include "common/alignment.h" | 9 | #include "common/alignment.h" |
| 8 | #include "common/bit_util.h" | 10 | #include "common/bit_util.h" |
| 9 | #include "common/cityhash.h" | 11 | #include "common/cityhash.h" |
| @@ -136,6 +138,15 @@ public: | |||
| 136 | 138 | ||
| 137 | std::size_t GetConvertedMipmapSize(u32 level) const; | 139 | std::size_t GetConvertedMipmapSize(u32 level) const; |
| 138 | 140 | ||
| 141 | /// Get this texture Tegra Block size in guest memory layout | ||
| 142 | u32 GetBlockSize() const; | ||
| 143 | |||
| 144 | /// Get X, Y coordinates max sizes of a single block. | ||
| 145 | std::pair<u32, u32> GetBlockXY() const; | ||
| 146 | |||
| 147 | /// Get the offset in x, y, z coordinates from a memory offset | ||
| 148 | std::tuple<u32, u32, u32> GetBlockOffsetXYZ(u32 offset) const; | ||
| 149 | |||
| 139 | /// Returns the size of a layer in bytes in guest memory. | 150 | /// Returns the size of a layer in bytes in guest memory. |
| 140 | std::size_t GetGuestLayerSize() const { | 151 | std::size_t GetGuestLayerSize() const { |
| 141 | return GetLayerSize(false, false); | 152 | return GetLayerSize(false, false); |
| @@ -269,7 +280,8 @@ private: | |||
| 269 | 280 | ||
| 270 | /// Returns the size of all mipmap levels and aligns as needed. | 281 | /// Returns the size of all mipmap levels and aligns as needed. |
| 271 | std::size_t GetInnerMemorySize(bool as_host_size, bool layer_only, bool uncompressed) const { | 282 | std::size_t GetInnerMemorySize(bool as_host_size, bool layer_only, bool uncompressed) const { |
| 272 | return GetLayerSize(as_host_size, uncompressed) * (layer_only ? 1U : depth); | 283 | return GetLayerSize(as_host_size, uncompressed) * |
| 284 | (layer_only ? 1U : (is_layered ? depth : 1U)); | ||
| 273 | } | 285 | } |
| 274 | 286 | ||
| 275 | /// Returns the size of a layer | 287 | /// Returns the size of a layer |
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 02d2e9136..f4c015635 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h | |||
| @@ -616,6 +616,86 @@ private: | |||
| 616 | } | 616 | } |
| 617 | 617 | ||
| 618 | /** | 618 | /** |
| 619 | * Takes care of managing 3D textures and its slices. Does HLE methods for reconstructing the 3D | ||
| 620 | * textures within the GPU if possible. Falls back to LLE when it isn't possible to use any of | ||
| 621 | * the HLE methods. | ||
| 622 | * | ||
| 623 | * @param overlaps The overlapping surfaces registered in the cache. | ||
| 624 | * @param params The parameters on the new surface. | ||
| 625 | * @param gpu_addr The starting address of the new surface. | ||
| 626 | * @param cache_addr The starting address of the new surface on physical memory. | ||
| 627 | * @param preserve_contents Indicates that the new surface should be loaded from memory or | ||
| 628 | * left blank. | ||
| 629 | */ | ||
| 630 | std::optional<std::pair<TSurface, TView>> Manage3DSurfaces(std::vector<TSurface>& overlaps, | ||
| 631 | const SurfaceParams& params, | ||
| 632 | const GPUVAddr gpu_addr, | ||
| 633 | const CacheAddr cache_addr, | ||
| 634 | bool preserve_contents) { | ||
| 635 | if (params.target == SurfaceTarget::Texture3D) { | ||
| 636 | bool failed = false; | ||
| 637 | if (params.num_levels > 1) { | ||
| 638 | // We can't handle mipmaps in 3D textures yet, better fallback to LLE approach | ||
| 639 | return std::nullopt; | ||
| 640 | } | ||
| 641 | TSurface new_surface = GetUncachedSurface(gpu_addr, params); | ||
| 642 | bool modified = false; | ||
| 643 | for (auto& surface : overlaps) { | ||
| 644 | const SurfaceParams& src_params = surface->GetSurfaceParams(); | ||
| 645 | if (src_params.target != SurfaceTarget::Texture2D) { | ||
| 646 | failed = true; | ||
| 647 | break; | ||
| 648 | } | ||
| 649 | if (src_params.height != params.height) { | ||
| 650 | failed = true; | ||
| 651 | break; | ||
| 652 | } | ||
| 653 | if (src_params.block_depth != params.block_depth || | ||
| 654 | src_params.block_height != params.block_height) { | ||
| 655 | failed = true; | ||
| 656 | break; | ||
| 657 | } | ||
| 658 | const u32 offset = static_cast<u32>(surface->GetCacheAddr() - cache_addr); | ||
| 659 | const auto [x, y, z] = params.GetBlockOffsetXYZ(offset); | ||
| 660 | modified |= surface->IsModified(); | ||
| 661 | const CopyParams copy_params(0, 0, 0, 0, 0, z, 0, 0, params.width, params.height, | ||
| 662 | 1); | ||
| 663 | ImageCopy(surface, new_surface, copy_params); | ||
| 664 | } | ||
| 665 | if (failed) { | ||
| 666 | return std::nullopt; | ||
| 667 | } | ||
| 668 | for (const auto& surface : overlaps) { | ||
| 669 | Unregister(surface); | ||
| 670 | } | ||
| 671 | new_surface->MarkAsModified(modified, Tick()); | ||
| 672 | Register(new_surface); | ||
| 673 | auto view = new_surface->GetMainView(); | ||
| 674 | return {{std::move(new_surface), view}}; | ||
| 675 | } else { | ||
| 676 | for (const auto& surface : overlaps) { | ||
| 677 | if (!surface->MatchTarget(params.target)) { | ||
| 678 | if (overlaps.size() == 1 && surface->GetCacheAddr() == cache_addr) { | ||
| 679 | if (Settings::values.use_accurate_gpu_emulation) { | ||
| 680 | return std::nullopt; | ||
| 681 | } | ||
| 682 | Unregister(surface); | ||
| 683 | return InitializeSurface(gpu_addr, params, preserve_contents); | ||
| 684 | } | ||
| 685 | return std::nullopt; | ||
| 686 | } | ||
| 687 | if (surface->GetCacheAddr() != cache_addr) { | ||
| 688 | continue; | ||
| 689 | } | ||
| 690 | if (surface->MatchesStructure(params) == MatchStructureResult::FullMatch) { | ||
| 691 | return {{surface, surface->GetMainView()}}; | ||
| 692 | } | ||
| 693 | } | ||
| 694 | return InitializeSurface(gpu_addr, params, preserve_contents); | ||
| 695 | } | ||
| 696 | } | ||
| 697 | |||
| 698 | /** | ||
| 619 | * Gets the starting address and parameters of a candidate surface and tries | 699 | * Gets the starting address and parameters of a candidate surface and tries |
| 620 | * to find a matching surface within the cache. This is done in 3 big steps: | 700 | * to find a matching surface within the cache. This is done in 3 big steps: |
| 621 | * | 701 | * |
| @@ -687,6 +767,15 @@ private: | |||
| 687 | } | 767 | } |
| 688 | } | 768 | } |
| 689 | 769 | ||
| 770 | // Check if it's a 3D texture | ||
| 771 | if (params.block_depth > 0) { | ||
| 772 | auto surface = | ||
| 773 | Manage3DSurfaces(overlaps, params, gpu_addr, cache_addr, preserve_contents); | ||
| 774 | if (surface) { | ||
| 775 | return *surface; | ||
| 776 | } | ||
| 777 | } | ||
| 778 | |||
| 690 | // Split cases between 1 overlap or many. | 779 | // Split cases between 1 overlap or many. |
| 691 | if (overlaps.size() == 1) { | 780 | if (overlaps.size() == 1) { |
| 692 | TSurface current_surface = overlaps[0]; | 781 | TSurface current_surface = overlaps[0]; |
diff --git a/src/video_core/textures/decoders.h b/src/video_core/textures/decoders.h index f1e3952bc..e5eac3f3b 100644 --- a/src/video_core/textures/decoders.h +++ b/src/video_core/textures/decoders.h | |||
| @@ -12,6 +12,10 @@ namespace Tegra::Texture { | |||
| 12 | 12 | ||
| 13 | // GOBSize constant. Calculated by 64 bytes in x multiplied by 8 y coords, represents | 13 | // GOBSize constant. Calculated by 64 bytes in x multiplied by 8 y coords, represents |
| 14 | // an small rect of (64/bytes_per_pixel)X8. | 14 | // an small rect of (64/bytes_per_pixel)X8. |
| 15 | inline std::size_t GetGOBSize() { | ||
| 16 | return 512; | ||
| 17 | } | ||
| 18 | |||
| 15 | inline std::size_t GetGOBSizeShift() { | 19 | inline std::size_t GetGOBSizeShift() { |
| 16 | return 9; | 20 | return 9; |
| 17 | } | 21 | } |