summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.travis.yml2
-rwxr-xr-x.travis/linux-mingw/build.sh2
-rwxr-xr-x.travis/linux-mingw/deps.sh2
-rwxr-xr-x.travis/linux-mingw/docker.sh14
-rwxr-xr-x.travis/linux/build.sh2
-rwxr-xr-x.travis/linux/deps.sh2
-rwxr-xr-x.travis/linux/docker.sh5
-rwxr-xr-x.travis/macos/build.sh1
-rw-r--r--CMakeModules/GenerateSCMRev.cmake2
-rw-r--r--externals/CMakeLists.txt8
m---------externals/dynarmic0
-rw-r--r--src/CMakeLists.txt30
-rw-r--r--src/common/zstd_compression.cpp2
-rw-r--r--src/core/frontend/emu_window.cpp2
-rw-r--r--src/core/frontend/emu_window.h2
-rw-r--r--src/core/hle/kernel/process.cpp3
-rw-r--r--src/core/hle/kernel/vm_manager.cpp7
-rw-r--r--src/core/hle/kernel/vm_manager.h8
-rw-r--r--src/core/loader/nso.cpp2
-rw-r--r--src/core/memory.h9
-rw-r--r--src/core/telemetry_session.cpp17
-rw-r--r--src/core/telemetry_session.h1
-rw-r--r--src/video_core/CMakeLists.txt2
-rw-r--r--src/video_core/dma_pusher.cpp2
-rw-r--r--src/video_core/engines/engine_upload.cpp48
-rw-r--r--src/video_core/engines/engine_upload.h75
-rw-r--r--src/video_core/engines/fermi_2d.h6
-rw-r--r--src/video_core/engines/kepler_compute.cpp37
-rw-r--r--src/video_core/engines/kepler_compute.h175
-rw-r--r--src/video_core/engines/kepler_memory.cpp45
-rw-r--r--src/video_core/engines/kepler_memory.h66
-rw-r--r--src/video_core/engines/maxwell_3d.cpp16
-rw-r--r--src/video_core/engines/maxwell_3d.h30
-rw-r--r--src/video_core/engines/maxwell_dma.cpp83
-rw-r--r--src/video_core/engines/maxwell_dma.h43
-rw-r--r--src/video_core/gpu.cpp4
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp6
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp10
-rw-r--r--src/video_core/renderer_opengl/gl_shader_disk_cache.cpp244
-rw-r--r--src/video_core/renderer_opengl/gl_shader_disk_cache.h54
-rw-r--r--src/video_core/renderer_opengl/maxwell_to_gl.h53
-rw-r--r--src/video_core/renderer_vulkan/maxwell_to_vk.cpp10
-rw-r--r--src/video_core/shader/decode.cpp4
-rw-r--r--src/video_core/shader/decode/arithmetic_half.cpp22
-rw-r--r--src/video_core/shader/decode/conversion.cpp7
-rw-r--r--src/video_core/shader/decode/hfma2.cpp9
-rw-r--r--src/video_core/shader/decode/texture.cpp3
-rw-r--r--src/video_core/shader/decode/xmad.cpp5
-rw-r--r--src/video_core/shader/shader_ir.cpp9
-rw-r--r--src/video_core/shader/shader_ir.h5
-rw-r--r--src/video_core/surface.cpp86
-rw-r--r--src/video_core/textures/astc.cpp4
-rw-r--r--src/yuzu/compatdb.cpp6
-rw-r--r--src/yuzu/configuration/configure_dialog.cpp4
-rw-r--r--src/yuzu/hotkeys.h2
55 files changed, 854 insertions, 444 deletions
diff --git a/.travis.yml b/.travis.yml
index 9512f7843..93fda1dfa 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -24,7 +24,7 @@ matrix:
24 - os: osx 24 - os: osx
25 env: NAME="macos build" 25 env: NAME="macos build"
26 sudo: false 26 sudo: false
27 osx_image: xcode10.1 27 osx_image: xcode10.2
28 install: "./.travis/macos/deps.sh" 28 install: "./.travis/macos/deps.sh"
29 script: "./.travis/macos/build.sh" 29 script: "./.travis/macos/build.sh"
30 after_success: "./.travis/macos/upload.sh" 30 after_success: "./.travis/macos/upload.sh"
diff --git a/.travis/linux-mingw/build.sh b/.travis/linux-mingw/build.sh
index be03cc0f3..b12d70b12 100755
--- a/.travis/linux-mingw/build.sh
+++ b/.travis/linux-mingw/build.sh
@@ -1,3 +1,3 @@
1#!/bin/bash -ex 1#!/bin/bash -ex
2mkdir "$HOME/.ccache" || true 2mkdir "$HOME/.ccache" || true
3docker run --env-file .travis/common/travis-ci.env -v $(pwd):/yuzu -v "$HOME/.ccache":/root/.ccache ubuntu:18.04 /bin/bash -ex /yuzu/.travis/linux-mingw/docker.sh 3docker run --env-file .travis/common/travis-ci.env -v $(pwd):/yuzu -v "$HOME/.ccache":/root/.ccache yuzuemu/build-environments:linux-mingw /bin/bash -ex /yuzu/.travis/linux-mingw/docker.sh
diff --git a/.travis/linux-mingw/deps.sh b/.travis/linux-mingw/deps.sh
index 540bb934a..55b5d6006 100755
--- a/.travis/linux-mingw/deps.sh
+++ b/.travis/linux-mingw/deps.sh
@@ -1,3 +1,3 @@
1#!/bin/sh -ex 1#!/bin/sh -ex
2 2
3docker pull ubuntu:18.04 3docker pull yuzuemu/build-environments:linux-mingw
diff --git a/.travis/linux-mingw/docker.sh b/.travis/linux-mingw/docker.sh
index 6cf43a006..28033acfb 100755
--- a/.travis/linux-mingw/docker.sh
+++ b/.travis/linux-mingw/docker.sh
@@ -1,16 +1,6 @@
1#!/bin/bash -ex 1#!/bin/bash -ex
2 2
3cd /yuzu 3cd /yuzu
4MINGW_PACKAGES="sdl2-mingw-w64 qt5base-mingw-w64 qt5tools-mingw-w64 libsamplerate-mingw-w64 qt5multimedia-mingw-w64"
5apt-get update
6apt-get install -y gpg wget git python3-pip python ccache g++-mingw-w64-x86-64 gcc-mingw-w64-x86-64 mingw-w64-tools cmake
7echo 'deb http://ppa.launchpad.net/tobydox/mingw-w64/ubuntu bionic main ' > /etc/apt/sources.list.d/extras.list
8apt-key adv --keyserver keyserver.ubuntu.com --recv '72931B477E22FEFD47F8DECE02FE5F12ADDE29B2'
9apt-get update
10apt-get install -y ${MINGW_PACKAGES}
11
12# fix a problem in current MinGW headers
13wget -q https://raw.githubusercontent.com/Alexpux/mingw-w64/d0d7f784833bbb0b2d279310ddc6afb52fe47a46/mingw-w64-headers/crt/errno.h -O /usr/x86_64-w64-mingw32/include/errno.h
14# override Travis CI unreasonable ccache size 4# override Travis CI unreasonable ccache size
15echo 'max_size = 3.0G' > "$HOME/.ccache/ccache.conf" 5echo 'max_size = 3.0G' > "$HOME/.ccache/ccache.conf"
16 6
@@ -23,8 +13,8 @@ echo '' >> /bin/cmd
23chmod +x /bin/cmd 13chmod +x /bin/cmd
24 14
25mkdir build && cd build 15mkdir build && cd build
26cmake .. -DCMAKE_TOOLCHAIN_FILE="$(pwd)/../CMakeModules/MinGWCross.cmake" -DUSE_CCACHE=ON -DYUZU_USE_BUNDLED_UNICORN=ON -DENABLE_COMPATIBILITY_LIST_DOWNLOAD=ON -DCMAKE_BUILD_TYPE=Release 16cmake .. -G Ninja -DCMAKE_TOOLCHAIN_FILE="$(pwd)/../CMakeModules/MinGWCross.cmake" -DUSE_CCACHE=ON -DYUZU_USE_BUNDLED_UNICORN=ON -DENABLE_COMPATIBILITY_LIST_DOWNLOAD=ON -DCMAKE_BUILD_TYPE=Release
27make -j4 17ninja
28 18
29# Clean up the dirty hacks 19# Clean up the dirty hacks
30rm /bin/uname && mv /bin/uname1 /bin/uname 20rm /bin/uname && mv /bin/uname1 /bin/uname
diff --git a/.travis/linux/build.sh b/.travis/linux/build.sh
index 2fced727d..3929f97fc 100755
--- a/.travis/linux/build.sh
+++ b/.travis/linux/build.sh
@@ -1,4 +1,4 @@
1#!/bin/bash -ex 1#!/bin/bash -ex
2 2
3mkdir -p "$HOME/.ccache" 3mkdir -p "$HOME/.ccache"
4docker run -e ENABLE_COMPATIBILITY_REPORTING --env-file .travis/common/travis-ci.env -v $(pwd):/yuzu -v "$HOME/.ccache":/root/.ccache ubuntu:18.04 /bin/bash /yuzu/.travis/linux/docker.sh 4docker run -e ENABLE_COMPATIBILITY_REPORTING --env-file .travis/common/travis-ci.env -v $(pwd):/yuzu -v "$HOME/.ccache":/root/.ccache yuzuemu/build-environments:linux-fresh /bin/bash /yuzu/.travis/linux/docker.sh
diff --git a/.travis/linux/deps.sh b/.travis/linux/deps.sh
index 540bb934a..8d23c517d 100755
--- a/.travis/linux/deps.sh
+++ b/.travis/linux/deps.sh
@@ -1,3 +1,3 @@
1#!/bin/sh -ex 1#!/bin/sh -ex
2 2
3docker pull ubuntu:18.04 3docker pull yuzuemu/build-environments:linux-fresh
diff --git a/.travis/linux/docker.sh b/.travis/linux/docker.sh
index 8b7e65911..3a9970384 100755
--- a/.travis/linux/docker.sh
+++ b/.travis/linux/docker.sh
@@ -1,12 +1,9 @@
1#!/bin/bash -ex 1#!/bin/bash -ex
2 2
3apt-get update
4apt-get install --no-install-recommends -y build-essential git libqt5opengl5-dev libsdl2-dev libssl-dev python qtbase5-dev qtwebengine5-dev wget cmake ninja-build ccache
5
6cd /yuzu 3cd /yuzu
7 4
8mkdir build && cd build 5mkdir build && cd build
9cmake .. -DYUZU_USE_BUNDLED_UNICORN=ON -DYUZU_USE_QT_WEB_ENGINE=ON -DCMAKE_BUILD_TYPE=Release -DCMAKE_C_COMPILER=/usr/lib/ccache/gcc -DCMAKE_CXX_COMPILER=/usr/lib/ccache/g++ -DYUZU_ENABLE_COMPATIBILITY_REPORTING=${ENABLE_COMPATIBILITY_REPORTING:-"OFF"} -DENABLE_COMPATIBILITY_LIST_DOWNLOAD=ON -DUSE_DISCORD_PRESENCE=ON -G Ninja 6cmake .. -G Ninja -DYUZU_USE_BUNDLED_UNICORN=ON -DYUZU_USE_QT_WEB_ENGINE=ON -DCMAKE_BUILD_TYPE=Release -DCMAKE_C_COMPILER=/usr/lib/ccache/gcc -DCMAKE_CXX_COMPILER=/usr/lib/ccache/g++ -DYUZU_ENABLE_COMPATIBILITY_REPORTING=${ENABLE_COMPATIBILITY_REPORTING:-"OFF"} -DENABLE_COMPATIBILITY_LIST_DOWNLOAD=ON -DUSE_DISCORD_PRESENCE=ON
10ninja 7ninja
11 8
12ccache -s 9ccache -s
diff --git a/.travis/macos/build.sh b/.travis/macos/build.sh
index b7b4c6f8c..0abd1a93a 100755
--- a/.travis/macos/build.sh
+++ b/.travis/macos/build.sh
@@ -7,6 +7,7 @@ export Qt5_DIR=$(brew --prefix)/opt/qt5
7export UNICORNDIR=$(pwd)/externals/unicorn 7export UNICORNDIR=$(pwd)/externals/unicorn
8export PATH="/usr/local/opt/ccache/libexec:$PATH" 8export PATH="/usr/local/opt/ccache/libexec:$PATH"
9 9
10# TODO: Build using ninja instead of make
10mkdir build && cd build 11mkdir build && cd build
11cmake --version 12cmake --version
12cmake .. -DYUZU_USE_BUNDLED_UNICORN=ON -DYUZU_USE_QT_WEB_ENGINE=ON -DCMAKE_BUILD_TYPE=Release -DENABLE_COMPATIBILITY_LIST_DOWNLOAD=ON -DYUZU_ENABLE_COMPATIBILITY_REPORTING=${ENABLE_COMPATIBILITY_REPORTING:-"OFF"} -DUSE_DISCORD_PRESENCE=ON 13cmake .. -DYUZU_USE_BUNDLED_UNICORN=ON -DYUZU_USE_QT_WEB_ENGINE=ON -DCMAKE_BUILD_TYPE=Release -DENABLE_COMPATIBILITY_LIST_DOWNLOAD=ON -DYUZU_ENABLE_COMPATIBILITY_REPORTING=${ENABLE_COMPATIBILITY_REPORTING:-"OFF"} -DUSE_DISCORD_PRESENCE=ON
diff --git a/CMakeModules/GenerateSCMRev.cmake b/CMakeModules/GenerateSCMRev.cmake
index 08315a1f1..5e00d839f 100644
--- a/CMakeModules/GenerateSCMRev.cmake
+++ b/CMakeModules/GenerateSCMRev.cmake
@@ -19,7 +19,7 @@ set(BUILD_VERSION "0")
19if (BUILD_REPOSITORY) 19if (BUILD_REPOSITORY)
20 # regex capture the string nightly or canary into CMAKE_MATCH_1 20 # regex capture the string nightly or canary into CMAKE_MATCH_1
21 string(REGEX MATCH "yuzu-emu/yuzu-?(.*)" OUTVAR ${BUILD_REPOSITORY}) 21 string(REGEX MATCH "yuzu-emu/yuzu-?(.*)" OUTVAR ${BUILD_REPOSITORY})
22 if (${CMAKE_MATCH_COUNT} GREATER 0) 22 if ("${CMAKE_MATCH_COUNT}" GREATER 0)
23 # capitalize the first letter of each word in the repo name. 23 # capitalize the first letter of each word in the repo name.
24 string(REPLACE "-" ";" REPO_NAME_LIST ${CMAKE_MATCH_1}) 24 string(REPLACE "-" ";" REPO_NAME_LIST ${CMAKE_MATCH_1})
25 foreach(WORD ${REPO_NAME_LIST}) 25 foreach(WORD ${REPO_NAME_LIST})
diff --git a/externals/CMakeLists.txt b/externals/CMakeLists.txt
index 3f8b6cda8..e6fa11a03 100644
--- a/externals/CMakeLists.txt
+++ b/externals/CMakeLists.txt
@@ -7,6 +7,10 @@ include(DownloadExternals)
7add_library(catch-single-include INTERFACE) 7add_library(catch-single-include INTERFACE)
8target_include_directories(catch-single-include INTERFACE catch/single_include) 8target_include_directories(catch-single-include INTERFACE catch/single_include)
9 9
10# libfmt
11add_subdirectory(fmt)
12add_library(fmt::fmt ALIAS fmt)
13
10# Dynarmic 14# Dynarmic
11if (ARCHITECTURE_x86_64) 15if (ARCHITECTURE_x86_64)
12 set(DYNARMIC_TESTS OFF) 16 set(DYNARMIC_TESTS OFF)
@@ -14,10 +18,6 @@ if (ARCHITECTURE_x86_64)
14 add_subdirectory(dynarmic) 18 add_subdirectory(dynarmic)
15endif() 19endif()
16 20
17# libfmt
18add_subdirectory(fmt)
19add_library(fmt::fmt ALIAS fmt)
20
21# getopt 21# getopt
22if (MSVC) 22if (MSVC)
23 add_subdirectory(getopt) 23 add_subdirectory(getopt)
diff --git a/externals/dynarmic b/externals/dynarmic
Subproject 4e6848d1c9e8dadc70595c15b5589f8b14aad47 Subproject 2683a9a3e316b5c3f387bbe6787732b9ff44b8d
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 9aea4af87..a1d87bbbc 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -21,15 +21,27 @@ if (MSVC)
21 # Ensure that projects build with Unicode support. 21 # Ensure that projects build with Unicode support.
22 add_definitions(-DUNICODE -D_UNICODE) 22 add_definitions(-DUNICODE -D_UNICODE)
23 23
24 # /W3 - Level 3 warnings 24 # /W3 - Level 3 warnings
25 # /MP - Multi-threaded compilation 25 # /MP - Multi-threaded compilation
26 # /Zi - Output debugging information 26 # /Zi - Output debugging information
27 # /Zo - enhanced debug info for optimized builds 27 # /Zo - Enhanced debug info for optimized builds
28 # /permissive- - enables stricter C++ standards conformance checks 28 # /permissive- - Enables stricter C++ standards conformance checks
29 # /EHsc - C++-only exception handling semantics 29 # /EHsc - C++-only exception handling semantics
30 # /Zc:throwingNew - let codegen assume `operator new` will never return null 30 # /Zc:externConstexpr - Allow extern constexpr variables to have external linkage, like the standard mandates
31 # /Zc:inline - let codegen omit inline functions in object files 31 # /Zc:inline - Let codegen omit inline functions in object files
32 add_compile_options(/W3 /MP /Zi /Zo /permissive- /EHsc /std:c++latest /Zc:throwingNew,inline) 32 # /Zc:throwingNew - Let codegen assume `operator new` (without std::nothrow) will never return null
33 add_compile_options(
34 /W3
35 /MP
36 /Zi
37 /Zo
38 /permissive-
39 /EHsc
40 /std:c++latest
41 /Zc:externConstexpr
42 /Zc:inline
43 /Zc:throwingNew
44 )
33 45
34 # /GS- - No stack buffer overflow checks 46 # /GS- - No stack buffer overflow checks
35 add_compile_options("$<$<CONFIG:Release>:/GS->") 47 add_compile_options("$<$<CONFIG:Release>:/GS->")
diff --git a/src/common/zstd_compression.cpp b/src/common/zstd_compression.cpp
index 60a35c67c..978526492 100644
--- a/src/common/zstd_compression.cpp
+++ b/src/common/zstd_compression.cpp
@@ -2,8 +2,6 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#pragma once
6
7#include <algorithm> 5#include <algorithm>
8#include <zstd.h> 6#include <zstd.h>
9 7
diff --git a/src/core/frontend/emu_window.cpp b/src/core/frontend/emu_window.cpp
index 1320bbe77..eda466a5d 100644
--- a/src/core/frontend/emu_window.cpp
+++ b/src/core/frontend/emu_window.cpp
@@ -10,6 +10,8 @@
10 10
11namespace Core::Frontend { 11namespace Core::Frontend {
12 12
13GraphicsContext::~GraphicsContext() = default;
14
13class EmuWindow::TouchState : public Input::Factory<Input::TouchDevice>, 15class EmuWindow::TouchState : public Input::Factory<Input::TouchDevice>,
14 public std::enable_shared_from_this<TouchState> { 16 public std::enable_shared_from_this<TouchState> {
15public: 17public:
diff --git a/src/core/frontend/emu_window.h b/src/core/frontend/emu_window.h
index 70a522556..e2c290dc1 100644
--- a/src/core/frontend/emu_window.h
+++ b/src/core/frontend/emu_window.h
@@ -19,6 +19,8 @@ namespace Core::Frontend {
19 */ 19 */
20class GraphicsContext { 20class GraphicsContext {
21public: 21public:
22 virtual ~GraphicsContext();
23
22 /// Makes the graphics context current for the caller thread 24 /// Makes the graphics context current for the caller thread
23 virtual void MakeCurrent() = 0; 25 virtual void MakeCurrent() = 0;
24 26
diff --git a/src/core/hle/kernel/process.cpp b/src/core/hle/kernel/process.cpp
index 20d01fc88..0775a89fb 100644
--- a/src/core/hle/kernel/process.cpp
+++ b/src/core/hle/kernel/process.cpp
@@ -241,7 +241,8 @@ void Process::LoadModule(CodeSet module_, VAddr base_addr) {
241} 241}
242 242
243Process::Process(Core::System& system) 243Process::Process(Core::System& system)
244 : WaitObject{system.Kernel()}, address_arbiter{system}, mutex{system}, system{system} {} 244 : WaitObject{system.Kernel()}, vm_manager{system},
245 address_arbiter{system}, mutex{system}, system{system} {}
245 246
246Process::~Process() = default; 247Process::~Process() = default;
247 248
diff --git a/src/core/hle/kernel/vm_manager.cpp b/src/core/hle/kernel/vm_manager.cpp
index f0c0c12fc..48b13cfdd 100644
--- a/src/core/hle/kernel/vm_manager.cpp
+++ b/src/core/hle/kernel/vm_manager.cpp
@@ -62,7 +62,7 @@ bool VirtualMemoryArea::CanBeMergedWith(const VirtualMemoryArea& next) const {
62 return true; 62 return true;
63} 63}
64 64
65VMManager::VMManager() { 65VMManager::VMManager(Core::System& system) : system{system} {
66 // Default to assuming a 39-bit address space. This way we have a sane 66 // Default to assuming a 39-bit address space. This way we have a sane
67 // starting point with executables that don't provide metadata. 67 // starting point with executables that don't provide metadata.
68 Reset(FileSys::ProgramAddressSpaceType::Is39Bit); 68 Reset(FileSys::ProgramAddressSpaceType::Is39Bit);
@@ -111,7 +111,6 @@ ResultVal<VMManager::VMAHandle> VMManager::MapMemoryBlock(VAddr target,
111 VirtualMemoryArea& final_vma = vma_handle->second; 111 VirtualMemoryArea& final_vma = vma_handle->second;
112 ASSERT(final_vma.size == size); 112 ASSERT(final_vma.size == size);
113 113
114 auto& system = Core::System::GetInstance();
115 system.ArmInterface(0).MapBackingMemory(target, size, block->data() + offset, 114 system.ArmInterface(0).MapBackingMemory(target, size, block->data() + offset,
116 VMAPermission::ReadWriteExecute); 115 VMAPermission::ReadWriteExecute);
117 system.ArmInterface(1).MapBackingMemory(target, size, block->data() + offset, 116 system.ArmInterface(1).MapBackingMemory(target, size, block->data() + offset,
@@ -140,7 +139,6 @@ ResultVal<VMManager::VMAHandle> VMManager::MapBackingMemory(VAddr target, u8* me
140 VirtualMemoryArea& final_vma = vma_handle->second; 139 VirtualMemoryArea& final_vma = vma_handle->second;
141 ASSERT(final_vma.size == size); 140 ASSERT(final_vma.size == size);
142 141
143 auto& system = Core::System::GetInstance();
144 system.ArmInterface(0).MapBackingMemory(target, size, memory, VMAPermission::ReadWriteExecute); 142 system.ArmInterface(0).MapBackingMemory(target, size, memory, VMAPermission::ReadWriteExecute);
145 system.ArmInterface(1).MapBackingMemory(target, size, memory, VMAPermission::ReadWriteExecute); 143 system.ArmInterface(1).MapBackingMemory(target, size, memory, VMAPermission::ReadWriteExecute);
146 system.ArmInterface(2).MapBackingMemory(target, size, memory, VMAPermission::ReadWriteExecute); 144 system.ArmInterface(2).MapBackingMemory(target, size, memory, VMAPermission::ReadWriteExecute);
@@ -223,7 +221,6 @@ ResultCode VMManager::UnmapRange(VAddr target, u64 size) {
223 221
224 ASSERT(FindVMA(target)->second.size >= size); 222 ASSERT(FindVMA(target)->second.size >= size);
225 223
226 auto& system = Core::System::GetInstance();
227 system.ArmInterface(0).UnmapMemory(target, size); 224 system.ArmInterface(0).UnmapMemory(target, size);
228 system.ArmInterface(1).UnmapMemory(target, size); 225 system.ArmInterface(1).UnmapMemory(target, size);
229 system.ArmInterface(2).UnmapMemory(target, size); 226 system.ArmInterface(2).UnmapMemory(target, size);
@@ -376,7 +373,7 @@ ResultCode VMManager::UnmapCodeMemory(VAddr dst_address, VAddr src_address, u64
376 Reprotect(src_vma_iter, VMAPermission::ReadWrite); 373 Reprotect(src_vma_iter, VMAPermission::ReadWrite);
377 374
378 if (dst_memory_state == MemoryState::ModuleCode) { 375 if (dst_memory_state == MemoryState::ModuleCode) {
379 Core::System::GetInstance().InvalidateCpuInstructionCaches(); 376 system.InvalidateCpuInstructionCaches();
380 } 377 }
381 378
382 return unmap_result; 379 return unmap_result;
diff --git a/src/core/hle/kernel/vm_manager.h b/src/core/hle/kernel/vm_manager.h
index 288eb9450..ec84d9a70 100644
--- a/src/core/hle/kernel/vm_manager.h
+++ b/src/core/hle/kernel/vm_manager.h
@@ -14,6 +14,10 @@
14#include "core/hle/result.h" 14#include "core/hle/result.h"
15#include "core/memory.h" 15#include "core/memory.h"
16 16
17namespace Core {
18class System;
19}
20
17namespace FileSys { 21namespace FileSys {
18enum class ProgramAddressSpaceType : u8; 22enum class ProgramAddressSpaceType : u8;
19} 23}
@@ -321,7 +325,7 @@ class VMManager final {
321public: 325public:
322 using VMAHandle = VMAMap::const_iterator; 326 using VMAHandle = VMAMap::const_iterator;
323 327
324 VMManager(); 328 explicit VMManager(Core::System& system);
325 ~VMManager(); 329 ~VMManager();
326 330
327 /// Clears the address space map, re-initializing with a single free area. 331 /// Clears the address space map, re-initializing with a single free area.
@@ -712,5 +716,7 @@ private:
712 // The end of the currently allocated heap. This is not an inclusive 716 // The end of the currently allocated heap. This is not an inclusive
713 // end of the range. This is essentially 'base_address + current_size'. 717 // end of the range. This is essentially 'base_address + current_size'.
714 VAddr heap_end = 0; 718 VAddr heap_end = 0;
719
720 Core::System& system;
715}; 721};
716} // namespace Kernel 722} // namespace Kernel
diff --git a/src/core/loader/nso.cpp b/src/core/loader/nso.cpp
index a86653204..8592b1f44 100644
--- a/src/core/loader/nso.cpp
+++ b/src/core/loader/nso.cpp
@@ -21,8 +21,6 @@
21#include "core/memory.h" 21#include "core/memory.h"
22#include "core/settings.h" 22#include "core/settings.h"
23 23
24#pragma optimize("", off)
25
26namespace Loader { 24namespace Loader {
27namespace { 25namespace {
28struct MODHeader { 26struct MODHeader {
diff --git a/src/core/memory.h b/src/core/memory.h
index b9fa18b1d..04e2c5f1d 100644
--- a/src/core/memory.h
+++ b/src/core/memory.h
@@ -72,15 +72,6 @@ u8* GetPointer(VAddr vaddr);
72 72
73std::string ReadCString(VAddr vaddr, std::size_t max_length); 73std::string ReadCString(VAddr vaddr, std::size_t max_length);
74 74
75enum class FlushMode {
76 /// Write back modified surfaces to RAM
77 Flush,
78 /// Remove region from the cache
79 Invalidate,
80 /// Write back modified surfaces to RAM, and also remove them from the cache
81 FlushAndInvalidate,
82};
83
84/** 75/**
85 * Mark each page touching the region as cached. 76 * Mark each page touching the region as cached.
86 */ 77 */
diff --git a/src/core/telemetry_session.cpp b/src/core/telemetry_session.cpp
index e1db06811..4b17bada5 100644
--- a/src/core/telemetry_session.cpp
+++ b/src/core/telemetry_session.cpp
@@ -102,12 +102,6 @@ bool VerifyLogin(const std::string& username, const std::string& token) {
102} 102}
103 103
104TelemetrySession::TelemetrySession() { 104TelemetrySession::TelemetrySession() {
105#ifdef ENABLE_WEB_SERVICE
106 backend = std::make_unique<WebService::TelemetryJson>(
107 Settings::values.web_api_url, Settings::values.yuzu_username, Settings::values.yuzu_token);
108#else
109 backend = std::make_unique<Telemetry::NullVisitor>();
110#endif
111 // Log one-time top-level information 105 // Log one-time top-level information
112 AddField(Telemetry::FieldType::None, "TelemetryId", GetTelemetryId()); 106 AddField(Telemetry::FieldType::None, "TelemetryId", GetTelemetryId());
113 107
@@ -175,9 +169,14 @@ TelemetrySession::~TelemetrySession() {
175 .count()}; 169 .count()};
176 AddField(Telemetry::FieldType::Session, "Shutdown_Time", shutdown_time); 170 AddField(Telemetry::FieldType::Session, "Shutdown_Time", shutdown_time);
177 171
172#ifdef ENABLE_WEB_SERVICE
173 auto backend = std::make_unique<WebService::TelemetryJson>(
174 Settings::values.web_api_url, Settings::values.yuzu_username, Settings::values.yuzu_token);
175#else
176 auto backend = std::make_unique<Telemetry::NullVisitor>();
177#endif
178
178 // Complete the session, submitting to web service if necessary 179 // Complete the session, submitting to web service if necessary
179 // This is just a placeholder to wrap up the session once the core completes and this is
180 // destroyed. This will be moved elsewhere once we are actually doing real I/O with the service.
181 field_collection.Accept(*backend); 180 field_collection.Accept(*backend);
182 if (Settings::values.enable_telemetry) 181 if (Settings::values.enable_telemetry)
183 backend->Complete(); 182 backend->Complete();
@@ -186,6 +185,8 @@ TelemetrySession::~TelemetrySession() {
186 185
187bool TelemetrySession::SubmitTestcase() { 186bool TelemetrySession::SubmitTestcase() {
188#ifdef ENABLE_WEB_SERVICE 187#ifdef ENABLE_WEB_SERVICE
188 auto backend = std::make_unique<WebService::TelemetryJson>(
189 Settings::values.web_api_url, Settings::values.yuzu_username, Settings::values.yuzu_token);
189 field_collection.Accept(*backend); 190 field_collection.Accept(*backend);
190 return backend->SubmitTestcase(); 191 return backend->SubmitTestcase();
191#else 192#else
diff --git a/src/core/telemetry_session.h b/src/core/telemetry_session.h
index 023612b79..cae5a45a0 100644
--- a/src/core/telemetry_session.h
+++ b/src/core/telemetry_session.h
@@ -39,7 +39,6 @@ public:
39 39
40private: 40private:
41 Telemetry::FieldCollection field_collection; ///< Tracks all added fields for the session 41 Telemetry::FieldCollection field_collection; ///< Tracks all added fields for the session
42 std::unique_ptr<Telemetry::VisitorInterface> backend; ///< Backend interface that logs fields
43}; 42};
44 43
45/** 44/**
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 6821f275d..1e010e4da 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -3,6 +3,8 @@ add_library(video_core STATIC
3 dma_pusher.h 3 dma_pusher.h
4 debug_utils/debug_utils.cpp 4 debug_utils/debug_utils.cpp
5 debug_utils/debug_utils.h 5 debug_utils/debug_utils.h
6 engines/engine_upload.cpp
7 engines/engine_upload.h
6 engines/fermi_2d.cpp 8 engines/fermi_2d.cpp
7 engines/fermi_2d.h 9 engines/fermi_2d.h
8 engines/kepler_compute.cpp 10 engines/kepler_compute.cpp
diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp
index 6674d9405..036e66f05 100644
--- a/src/video_core/dma_pusher.cpp
+++ b/src/video_core/dma_pusher.cpp
@@ -105,6 +105,8 @@ bool DmaPusher::Step() {
105 dma_state.non_incrementing = false; 105 dma_state.non_incrementing = false;
106 dma_increment_once = true; 106 dma_increment_once = true;
107 break; 107 break;
108 default:
109 break;
108 } 110 }
109 } 111 }
110 } 112 }
diff --git a/src/video_core/engines/engine_upload.cpp b/src/video_core/engines/engine_upload.cpp
new file mode 100644
index 000000000..f8aa4ff55
--- /dev/null
+++ b/src/video_core/engines/engine_upload.cpp
@@ -0,0 +1,48 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "video_core/engines/engine_upload.h"
7#include "video_core/memory_manager.h"
8#include "video_core/textures/decoders.h"
9
10namespace Tegra::Engines::Upload {
11
12State::State(MemoryManager& memory_manager, Registers& regs)
13 : memory_manager(memory_manager), regs(regs) {}
14
15void State::ProcessExec(const bool is_linear) {
16 write_offset = 0;
17 copy_size = regs.line_length_in * regs.line_count;
18 inner_buffer.resize(copy_size);
19 this->is_linear = is_linear;
20}
21
22void State::ProcessData(const u32 data, const bool is_last_call) {
23 const u32 sub_copy_size = std::min(4U, copy_size - write_offset);
24 std::memcpy(&inner_buffer[write_offset], &data, sub_copy_size);
25 write_offset += sub_copy_size;
26 if (!is_last_call) {
27 return;
28 }
29 const GPUVAddr address{regs.dest.Address()};
30 if (is_linear) {
31 memory_manager.WriteBlock(address, inner_buffer.data(), copy_size);
32 } else {
33 UNIMPLEMENTED_IF(regs.dest.z != 0);
34 UNIMPLEMENTED_IF(regs.dest.depth != 1);
35 UNIMPLEMENTED_IF(regs.dest.BlockWidth() != 1);
36 UNIMPLEMENTED_IF(regs.dest.BlockDepth() != 1);
37 const std::size_t dst_size = Tegra::Texture::CalculateSize(
38 true, 1, regs.dest.width, regs.dest.height, 1, regs.dest.BlockHeight(), 1);
39 tmp_buffer.resize(dst_size);
40 memory_manager.ReadBlock(address, tmp_buffer.data(), dst_size);
41 Tegra::Texture::SwizzleKepler(regs.dest.width, regs.dest.height, regs.dest.x, regs.dest.y,
42 regs.dest.BlockHeight(), copy_size, inner_buffer.data(),
43 tmp_buffer.data());
44 memory_manager.WriteBlock(address, tmp_buffer.data(), dst_size);
45 }
46}
47
48} // namespace Tegra::Engines::Upload
diff --git a/src/video_core/engines/engine_upload.h b/src/video_core/engines/engine_upload.h
new file mode 100644
index 000000000..9c6e0d21c
--- /dev/null
+++ b/src/video_core/engines/engine_upload.h
@@ -0,0 +1,75 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <cstddef>
8#include <vector>
9#include "common/bit_field.h"
10#include "common/common_funcs.h"
11#include "common/common_types.h"
12
13namespace Tegra {
14class MemoryManager;
15}
16
17namespace Tegra::Engines::Upload {
18
19struct Registers {
20 u32 line_length_in;
21 u32 line_count;
22
23 struct {
24 u32 address_high;
25 u32 address_low;
26 u32 pitch;
27 union {
28 BitField<0, 4, u32> block_width;
29 BitField<4, 4, u32> block_height;
30 BitField<8, 4, u32> block_depth;
31 };
32 u32 width;
33 u32 height;
34 u32 depth;
35 u32 z;
36 u32 x;
37 u32 y;
38
39 GPUVAddr Address() const {
40 return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | address_low);
41 }
42
43 u32 BlockWidth() const {
44 return 1U << block_width.Value();
45 }
46
47 u32 BlockHeight() const {
48 return 1U << block_height.Value();
49 }
50
51 u32 BlockDepth() const {
52 return 1U << block_depth.Value();
53 }
54 } dest;
55};
56
57class State {
58public:
59 State(MemoryManager& memory_manager, Registers& regs);
60 ~State() = default;
61
62 void ProcessExec(const bool is_linear);
63 void ProcessData(const u32 data, const bool is_last_call);
64
65private:
66 u32 write_offset = 0;
67 u32 copy_size = 0;
68 std::vector<u8> inner_buffer;
69 std::vector<u8> tmp_buffer;
70 bool is_linear = false;
71 Registers& regs;
72 MemoryManager& memory_manager;
73};
74
75} // namespace Tegra::Engines::Upload
diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h
index 2e51b7f13..45f59a4d9 100644
--- a/src/video_core/engines/fermi_2d.h
+++ b/src/video_core/engines/fermi_2d.h
@@ -21,6 +21,12 @@ class RasterizerInterface;
21 21
22namespace Tegra::Engines { 22namespace Tegra::Engines {
23 23
24/**
25 * This Engine is known as G80_2D. Documentation can be found in:
26 * https://github.com/envytools/envytools/blob/master/rnndb/graph/g80_2d.xml
27 * https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nv50/nv50_2d.xml.h
28 */
29
24#define FERMI2D_REG_INDEX(field_name) \ 30#define FERMI2D_REG_INDEX(field_name) \
25 (offsetof(Tegra::Engines::Fermi2D::Regs, field_name) / sizeof(u32)) 31 (offsetof(Tegra::Engines::Fermi2D::Regs, field_name) / sizeof(u32))
26 32
diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp
index b1d950460..7404a8163 100644
--- a/src/video_core/engines/kepler_compute.cpp
+++ b/src/video_core/engines/kepler_compute.cpp
@@ -4,12 +4,21 @@
4 4
5#include "common/assert.h" 5#include "common/assert.h"
6#include "common/logging/log.h" 6#include "common/logging/log.h"
7#include "core/core.h"
7#include "video_core/engines/kepler_compute.h" 8#include "video_core/engines/kepler_compute.h"
9#include "video_core/engines/maxwell_3d.h"
8#include "video_core/memory_manager.h" 10#include "video_core/memory_manager.h"
11#include "video_core/rasterizer_interface.h"
12#include "video_core/renderer_base.h"
13#include "video_core/textures/decoders.h"
9 14
10namespace Tegra::Engines { 15namespace Tegra::Engines {
11 16
12KeplerCompute::KeplerCompute(MemoryManager& memory_manager) : memory_manager{memory_manager} {} 17KeplerCompute::KeplerCompute(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
18 MemoryManager& memory_manager)
19 : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager}, upload_state{
20 memory_manager,
21 regs.upload} {}
13 22
14KeplerCompute::~KeplerCompute() = default; 23KeplerCompute::~KeplerCompute() = default;
15 24
@@ -20,14 +29,34 @@ void KeplerCompute::CallMethod(const GPU::MethodCall& method_call) {
20 regs.reg_array[method_call.method] = method_call.argument; 29 regs.reg_array[method_call.method] = method_call.argument;
21 30
22 switch (method_call.method) { 31 switch (method_call.method) {
32 case KEPLER_COMPUTE_REG_INDEX(exec_upload): {
33 upload_state.ProcessExec(regs.exec_upload.linear != 0);
34 break;
35 }
36 case KEPLER_COMPUTE_REG_INDEX(data_upload): {
37 const bool is_last_call = method_call.IsLastCall();
38 upload_state.ProcessData(method_call.argument, is_last_call);
39 if (is_last_call) {
40 system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
41 }
42 break;
43 }
23 case KEPLER_COMPUTE_REG_INDEX(launch): 44 case KEPLER_COMPUTE_REG_INDEX(launch):
24 // Abort execution since compute shaders can be used to alter game memory (e.g. CUDA 45 ProcessLaunch();
25 // kernels)
26 UNREACHABLE_MSG("Compute shaders are not implemented");
27 break; 46 break;
28 default: 47 default:
29 break; 48 break;
30 } 49 }
31} 50}
32 51
52void KeplerCompute::ProcessLaunch() {
53
54 const GPUVAddr launch_desc_loc = regs.launch_desc_loc.Address();
55 memory_manager.ReadBlockUnsafe(launch_desc_loc, &launch_description,
56 LaunchParams::NUM_LAUNCH_PARAMETERS * sizeof(u32));
57
58 const GPUVAddr code_loc = regs.code_loc.Address() + launch_description.program_start;
59 LOG_WARNING(HW_GPU, "Compute Kernel Execute at Address 0x{:016x}, STUBBED", code_loc);
60}
61
33} // namespace Tegra::Engines 62} // namespace Tegra::Engines
diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h
index fb6cdf432..5250b8d9b 100644
--- a/src/video_core/engines/kepler_compute.h
+++ b/src/video_core/engines/kepler_compute.h
@@ -6,22 +6,40 @@
6 6
7#include <array> 7#include <array>
8#include <cstddef> 8#include <cstddef>
9#include <vector>
10#include "common/bit_field.h"
9#include "common/common_funcs.h" 11#include "common/common_funcs.h"
10#include "common/common_types.h" 12#include "common/common_types.h"
13#include "video_core/engines/engine_upload.h"
11#include "video_core/gpu.h" 14#include "video_core/gpu.h"
12 15
16namespace Core {
17class System;
18}
19
13namespace Tegra { 20namespace Tegra {
14class MemoryManager; 21class MemoryManager;
15} 22}
16 23
24namespace VideoCore {
25class RasterizerInterface;
26}
27
17namespace Tegra::Engines { 28namespace Tegra::Engines {
18 29
30/**
31 * This Engine is known as GK104_Compute. Documentation can be found in:
32 * https://github.com/envytools/envytools/blob/master/rnndb/graph/gk104_compute.xml
33 * https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nvc0/nve4_compute.xml.h
34 */
35
19#define KEPLER_COMPUTE_REG_INDEX(field_name) \ 36#define KEPLER_COMPUTE_REG_INDEX(field_name) \
20 (offsetof(Tegra::Engines::KeplerCompute::Regs, field_name) / sizeof(u32)) 37 (offsetof(Tegra::Engines::KeplerCompute::Regs, field_name) / sizeof(u32))
21 38
22class KeplerCompute final { 39class KeplerCompute final {
23public: 40public:
24 explicit KeplerCompute(MemoryManager& memory_manager); 41 explicit KeplerCompute(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
42 MemoryManager& memory_manager);
25 ~KeplerCompute(); 43 ~KeplerCompute();
26 44
27 static constexpr std::size_t NumConstBuffers = 8; 45 static constexpr std::size_t NumConstBuffers = 8;
@@ -31,30 +49,181 @@ public:
31 49
32 union { 50 union {
33 struct { 51 struct {
34 INSERT_PADDING_WORDS(0xAF); 52 INSERT_PADDING_WORDS(0x60);
53
54 Upload::Registers upload;
55
56 struct {
57 union {
58 BitField<0, 1, u32> linear;
59 };
60 } exec_upload;
61
62 u32 data_upload;
63
64 INSERT_PADDING_WORDS(0x3F);
65
66 struct {
67 u32 address;
68 GPUVAddr Address() const {
69 return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address) << 8));
70 }
71 } launch_desc_loc;
72
73 INSERT_PADDING_WORDS(0x1);
35 74
36 u32 launch; 75 u32 launch;
37 76
38 INSERT_PADDING_WORDS(0xC48); 77 INSERT_PADDING_WORDS(0x4A7);
78
79 struct {
80 u32 address_high;
81 u32 address_low;
82 u32 limit;
83 GPUVAddr Address() const {
84 return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
85 address_low);
86 }
87 } tsc;
88
89 INSERT_PADDING_WORDS(0x3);
90
91 struct {
92 u32 address_high;
93 u32 address_low;
94 u32 limit;
95 GPUVAddr Address() const {
96 return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
97 address_low);
98 }
99 } tic;
100
101 INSERT_PADDING_WORDS(0x22);
102
103 struct {
104 u32 address_high;
105 u32 address_low;
106 GPUVAddr Address() const {
107 return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
108 address_low);
109 }
110 } code_loc;
111
112 INSERT_PADDING_WORDS(0x3FE);
113
114 u32 texture_const_buffer_index;
115
116 INSERT_PADDING_WORDS(0x374);
39 }; 117 };
40 std::array<u32, NUM_REGS> reg_array; 118 std::array<u32, NUM_REGS> reg_array;
41 }; 119 };
42 } regs{}; 120 } regs{};
121
122 struct LaunchParams {
123 static constexpr std::size_t NUM_LAUNCH_PARAMETERS = 0x40;
124
125 INSERT_PADDING_WORDS(0x8);
126
127 u32 program_start;
128
129 INSERT_PADDING_WORDS(0x2);
130
131 BitField<30, 1, u32> linked_tsc;
132
133 BitField<0, 31, u32> grid_dim_x;
134 union {
135 BitField<0, 16, u32> grid_dim_y;
136 BitField<16, 16, u32> grid_dim_z;
137 };
138
139 INSERT_PADDING_WORDS(0x3);
140
141 BitField<0, 16, u32> shared_alloc;
142
143 BitField<0, 31, u32> block_dim_x;
144 union {
145 BitField<0, 16, u32> block_dim_y;
146 BitField<16, 16, u32> block_dim_z;
147 };
148
149 union {
150 BitField<0, 8, u32> const_buffer_enable_mask;
151 BitField<29, 2, u32> cache_layout;
152 } memory_config;
153
154 INSERT_PADDING_WORDS(0x8);
155
156 struct {
157 u32 address_low;
158 union {
159 BitField<0, 8, u32> address_high;
160 BitField<15, 17, u32> size;
161 };
162 GPUVAddr Address() const {
163 return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high.Value()) << 32) |
164 address_low);
165 }
166 } const_buffer_config[8];
167
168 union {
169 BitField<0, 20, u32> local_pos_alloc;
170 BitField<27, 5, u32> barrier_alloc;
171 };
172
173 union {
174 BitField<0, 20, u32> local_neg_alloc;
175 BitField<24, 5, u32> gpr_alloc;
176 };
177
178 INSERT_PADDING_WORDS(0x11);
179 } launch_description;
180
181 struct {
182 u32 write_offset = 0;
183 u32 copy_size = 0;
184 std::vector<u8> inner_buffer;
185 } state{};
186
43 static_assert(sizeof(Regs) == Regs::NUM_REGS * sizeof(u32), 187 static_assert(sizeof(Regs) == Regs::NUM_REGS * sizeof(u32),
44 "KeplerCompute Regs has wrong size"); 188 "KeplerCompute Regs has wrong size");
45 189
190 static_assert(sizeof(LaunchParams) == LaunchParams::NUM_LAUNCH_PARAMETERS * sizeof(u32),
191 "KeplerCompute LaunchParams has wrong size");
192
46 /// Write the value to the register identified by method. 193 /// Write the value to the register identified by method.
47 void CallMethod(const GPU::MethodCall& method_call); 194 void CallMethod(const GPU::MethodCall& method_call);
48 195
49private: 196private:
197 Core::System& system;
198 VideoCore::RasterizerInterface& rasterizer;
50 MemoryManager& memory_manager; 199 MemoryManager& memory_manager;
200 Upload::State upload_state;
201
202 void ProcessLaunch();
51}; 203};
52 204
53#define ASSERT_REG_POSITION(field_name, position) \ 205#define ASSERT_REG_POSITION(field_name, position) \
54 static_assert(offsetof(KeplerCompute::Regs, field_name) == position * 4, \ 206 static_assert(offsetof(KeplerCompute::Regs, field_name) == position * 4, \
55 "Field " #field_name " has invalid position") 207 "Field " #field_name " has invalid position")
56 208
209#define ASSERT_LAUNCH_PARAM_POSITION(field_name, position) \
210 static_assert(offsetof(KeplerCompute::LaunchParams, field_name) == position * 4, \
211 "Field " #field_name " has invalid position")
212
213ASSERT_REG_POSITION(upload, 0x60);
214ASSERT_REG_POSITION(exec_upload, 0x6C);
215ASSERT_REG_POSITION(data_upload, 0x6D);
57ASSERT_REG_POSITION(launch, 0xAF); 216ASSERT_REG_POSITION(launch, 0xAF);
217ASSERT_REG_POSITION(tsc, 0x557);
218ASSERT_REG_POSITION(tic, 0x55D);
219ASSERT_REG_POSITION(code_loc, 0x582);
220ASSERT_REG_POSITION(texture_const_buffer_index, 0x982);
221ASSERT_LAUNCH_PARAM_POSITION(program_start, 0x8);
222ASSERT_LAUNCH_PARAM_POSITION(grid_dim_x, 0xC);
223ASSERT_LAUNCH_PARAM_POSITION(shared_alloc, 0x11);
224ASSERT_LAUNCH_PARAM_POSITION(block_dim_x, 0x12);
225ASSERT_LAUNCH_PARAM_POSITION(memory_config, 0x14);
226ASSERT_LAUNCH_PARAM_POSITION(const_buffer_config, 0x1D);
58 227
59#undef ASSERT_REG_POSITION 228#undef ASSERT_REG_POSITION
60 229
diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp
index 7387886a3..0561f676c 100644
--- a/src/video_core/engines/kepler_memory.cpp
+++ b/src/video_core/engines/kepler_memory.cpp
@@ -14,9 +14,8 @@
14 14
15namespace Tegra::Engines { 15namespace Tegra::Engines {
16 16
17KeplerMemory::KeplerMemory(Core::System& system, VideoCore::RasterizerInterface& rasterizer, 17KeplerMemory::KeplerMemory(Core::System& system, MemoryManager& memory_manager)
18 MemoryManager& memory_manager) 18 : system{system}, memory_manager{memory_manager}, upload_state{memory_manager, regs.upload} {}
19 : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager} {}
20 19
21KeplerMemory::~KeplerMemory() = default; 20KeplerMemory::~KeplerMemory() = default;
22 21
@@ -28,46 +27,18 @@ void KeplerMemory::CallMethod(const GPU::MethodCall& method_call) {
28 27
29 switch (method_call.method) { 28 switch (method_call.method) {
30 case KEPLERMEMORY_REG_INDEX(exec): { 29 case KEPLERMEMORY_REG_INDEX(exec): {
31 ProcessExec(); 30 upload_state.ProcessExec(regs.exec.linear != 0);
32 break; 31 break;
33 } 32 }
34 case KEPLERMEMORY_REG_INDEX(data): { 33 case KEPLERMEMORY_REG_INDEX(data): {
35 ProcessData(method_call.argument, method_call.IsLastCall()); 34 const bool is_last_call = method_call.IsLastCall();
35 upload_state.ProcessData(method_call.argument, is_last_call);
36 if (is_last_call) {
37 system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
38 }
36 break; 39 break;
37 } 40 }
38 } 41 }
39} 42}
40 43
41void KeplerMemory::ProcessExec() {
42 state.write_offset = 0;
43 state.copy_size = regs.line_length_in * regs.line_count;
44 state.inner_buffer.resize(state.copy_size);
45}
46
47void KeplerMemory::ProcessData(u32 data, bool is_last_call) {
48 const u32 sub_copy_size = std::min(4U, state.copy_size - state.write_offset);
49 std::memcpy(&state.inner_buffer[state.write_offset], &regs.data, sub_copy_size);
50 state.write_offset += sub_copy_size;
51 if (is_last_call) {
52 const GPUVAddr address{regs.dest.Address()};
53 if (regs.exec.linear != 0) {
54 memory_manager.WriteBlock(address, state.inner_buffer.data(), state.copy_size);
55 } else {
56 UNIMPLEMENTED_IF(regs.dest.z != 0);
57 UNIMPLEMENTED_IF(regs.dest.depth != 1);
58 UNIMPLEMENTED_IF(regs.dest.BlockWidth() != 1);
59 UNIMPLEMENTED_IF(regs.dest.BlockDepth() != 1);
60 const std::size_t dst_size = Tegra::Texture::CalculateSize(
61 true, 1, regs.dest.width, regs.dest.height, 1, regs.dest.BlockHeight(), 1);
62 std::vector<u8> tmp_buffer(dst_size);
63 memory_manager.ReadBlock(address, tmp_buffer.data(), dst_size);
64 Tegra::Texture::SwizzleKepler(regs.dest.width, regs.dest.height, regs.dest.x,
65 regs.dest.y, regs.dest.BlockHeight(), state.copy_size,
66 state.inner_buffer.data(), tmp_buffer.data());
67 memory_manager.WriteBlock(address, tmp_buffer.data(), dst_size);
68 }
69 system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
70 }
71}
72
73} // namespace Tegra::Engines 44} // namespace Tegra::Engines
diff --git a/src/video_core/engines/kepler_memory.h b/src/video_core/engines/kepler_memory.h
index 5f892ddad..f3bc675a9 100644
--- a/src/video_core/engines/kepler_memory.h
+++ b/src/video_core/engines/kepler_memory.h
@@ -10,6 +10,7 @@
10#include "common/bit_field.h" 10#include "common/bit_field.h"
11#include "common/common_funcs.h" 11#include "common/common_funcs.h"
12#include "common/common_types.h" 12#include "common/common_types.h"
13#include "video_core/engines/engine_upload.h"
13#include "video_core/gpu.h" 14#include "video_core/gpu.h"
14 15
15namespace Core { 16namespace Core {
@@ -20,19 +21,20 @@ namespace Tegra {
20class MemoryManager; 21class MemoryManager;
21} 22}
22 23
23namespace VideoCore {
24class RasterizerInterface;
25}
26
27namespace Tegra::Engines { 24namespace Tegra::Engines {
28 25
26/**
27 * This Engine is known as P2MF. Documentation can be found in:
28 * https://github.com/envytools/envytools/blob/master/rnndb/graph/gk104_p2mf.xml
29 * https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nvc0/nve4_p2mf.xml.h
30 */
31
29#define KEPLERMEMORY_REG_INDEX(field_name) \ 32#define KEPLERMEMORY_REG_INDEX(field_name) \
30 (offsetof(Tegra::Engines::KeplerMemory::Regs, field_name) / sizeof(u32)) 33 (offsetof(Tegra::Engines::KeplerMemory::Regs, field_name) / sizeof(u32))
31 34
32class KeplerMemory final { 35class KeplerMemory final {
33public: 36public:
34 KeplerMemory(Core::System& system, VideoCore::RasterizerInterface& rasterizer, 37 KeplerMemory(Core::System& system, MemoryManager& memory_manager);
35 MemoryManager& memory_manager);
36 ~KeplerMemory(); 38 ~KeplerMemory();
37 39
38 /// Write the value to the register identified by method. 40 /// Write the value to the register identified by method.
@@ -45,42 +47,7 @@ public:
45 struct { 47 struct {
46 INSERT_PADDING_WORDS(0x60); 48 INSERT_PADDING_WORDS(0x60);
47 49
48 u32 line_length_in; 50 Upload::Registers upload;
49 u32 line_count;
50
51 struct {
52 u32 address_high;
53 u32 address_low;
54 u32 pitch;
55 union {
56 BitField<0, 4, u32> block_width;
57 BitField<4, 4, u32> block_height;
58 BitField<8, 4, u32> block_depth;
59 };
60 u32 width;
61 u32 height;
62 u32 depth;
63 u32 z;
64 u32 x;
65 u32 y;
66
67 GPUVAddr Address() const {
68 return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
69 address_low);
70 }
71
72 u32 BlockWidth() const {
73 return 1U << block_width.Value();
74 }
75
76 u32 BlockHeight() const {
77 return 1U << block_height.Value();
78 }
79
80 u32 BlockDepth() const {
81 return 1U << block_depth.Value();
82 }
83 } dest;
84 51
85 struct { 52 struct {
86 union { 53 union {
@@ -96,28 +63,17 @@ public:
96 }; 63 };
97 } regs{}; 64 } regs{};
98 65
99 struct {
100 u32 write_offset = 0;
101 u32 copy_size = 0;
102 std::vector<u8> inner_buffer;
103 } state{};
104
105private: 66private:
106 Core::System& system; 67 Core::System& system;
107 VideoCore::RasterizerInterface& rasterizer;
108 MemoryManager& memory_manager; 68 MemoryManager& memory_manager;
109 69 Upload::State upload_state;
110 void ProcessExec();
111 void ProcessData(u32 data, bool is_last_call);
112}; 70};
113 71
114#define ASSERT_REG_POSITION(field_name, position) \ 72#define ASSERT_REG_POSITION(field_name, position) \
115 static_assert(offsetof(KeplerMemory::Regs, field_name) == position * 4, \ 73 static_assert(offsetof(KeplerMemory::Regs, field_name) == position * 4, \
116 "Field " #field_name " has invalid position") 74 "Field " #field_name " has invalid position")
117 75
118ASSERT_REG_POSITION(line_length_in, 0x60); 76ASSERT_REG_POSITION(upload, 0x60);
119ASSERT_REG_POSITION(line_count, 0x61);
120ASSERT_REG_POSITION(dest, 0x62);
121ASSERT_REG_POSITION(exec, 0x6C); 77ASSERT_REG_POSITION(exec, 0x6C);
122ASSERT_REG_POSITION(data, 0x6D); 78ASSERT_REG_POSITION(data, 0x6D);
123#undef ASSERT_REG_POSITION 79#undef ASSERT_REG_POSITION
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 9780417f2..d7b586db9 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -20,8 +20,8 @@ constexpr u32 MacroRegistersStart = 0xE00;
20 20
21Maxwell3D::Maxwell3D(Core::System& system, VideoCore::RasterizerInterface& rasterizer, 21Maxwell3D::Maxwell3D(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
22 MemoryManager& memory_manager) 22 MemoryManager& memory_manager)
23 : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager}, macro_interpreter{ 23 : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager},
24 *this} { 24 macro_interpreter{*this}, upload_state{memory_manager, regs.upload} {
25 InitializeRegisterDefaults(); 25 InitializeRegisterDefaults();
26} 26}
27 27
@@ -253,6 +253,18 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
253 ProcessSyncPoint(); 253 ProcessSyncPoint();
254 break; 254 break;
255 } 255 }
256 case MAXWELL3D_REG_INDEX(exec_upload): {
257 upload_state.ProcessExec(regs.exec_upload.linear != 0);
258 break;
259 }
260 case MAXWELL3D_REG_INDEX(data_upload): {
261 const bool is_last_call = method_call.IsLastCall();
262 upload_state.ProcessData(method_call.argument, is_last_call);
263 if (is_last_call) {
264 dirty_flags.OnMemoryWrite();
265 }
266 break;
267 }
256 default: 268 default:
257 break; 269 break;
258 } 270 }
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index cc2424d38..4883b582a 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -14,6 +14,7 @@
14#include "common/common_funcs.h" 14#include "common/common_funcs.h"
15#include "common/common_types.h" 15#include "common/common_types.h"
16#include "common/math_util.h" 16#include "common/math_util.h"
17#include "video_core/engines/engine_upload.h"
17#include "video_core/gpu.h" 18#include "video_core/gpu.h"
18#include "video_core/macro_interpreter.h" 19#include "video_core/macro_interpreter.h"
19#include "video_core/textures/texture.h" 20#include "video_core/textures/texture.h"
@@ -32,6 +33,12 @@ class RasterizerInterface;
32 33
33namespace Tegra::Engines { 34namespace Tegra::Engines {
34 35
36/**
37 * This Engine is known as GF100_3D. Documentation can be found in:
38 * https://github.com/envytools/envytools/blob/master/rnndb/graph/gf100_3d.xml
39 * https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nvc0/nvc0_3d.xml.h
40 */
41
35#define MAXWELL3D_REG_INDEX(field_name) \ 42#define MAXWELL3D_REG_INDEX(field_name) \
36 (offsetof(Tegra::Engines::Maxwell3D::Regs, field_name) / sizeof(u32)) 43 (offsetof(Tegra::Engines::Maxwell3D::Regs, field_name) / sizeof(u32))
37 44
@@ -243,9 +250,10 @@ public:
243 return "10_10_10_2"; 250 return "10_10_10_2";
244 case Size::Size_11_11_10: 251 case Size::Size_11_11_10:
245 return "11_11_10"; 252 return "11_11_10";
253 default:
254 UNREACHABLE();
255 return {};
246 } 256 }
247 UNREACHABLE();
248 return {};
249 } 257 }
250 258
251 std::string TypeString() const { 259 std::string TypeString() const {
@@ -579,7 +587,18 @@ public:
579 u32 bind; 587 u32 bind;
580 } macros; 588 } macros;
581 589
582 INSERT_PADDING_WORDS(0x69); 590 INSERT_PADDING_WORDS(0x17);
591
592 Upload::Registers upload;
593 struct {
594 union {
595 BitField<0, 1, u32> linear;
596 };
597 } exec_upload;
598
599 u32 data_upload;
600
601 INSERT_PADDING_WORDS(0x44);
583 602
584 struct { 603 struct {
585 union { 604 union {
@@ -1175,6 +1194,8 @@ private:
1175 /// Interpreter for the macro codes uploaded to the GPU. 1194 /// Interpreter for the macro codes uploaded to the GPU.
1176 MacroInterpreter macro_interpreter; 1195 MacroInterpreter macro_interpreter;
1177 1196
1197 Upload::State upload_state;
1198
1178 /// Retrieves information about a specific TIC entry from the TIC buffer. 1199 /// Retrieves information about a specific TIC entry from the TIC buffer.
1179 Texture::TICEntry GetTICEntry(u32 tic_index) const; 1200 Texture::TICEntry GetTICEntry(u32 tic_index) const;
1180 1201
@@ -1218,6 +1239,9 @@ private:
1218 "Field " #field_name " has invalid position") 1239 "Field " #field_name " has invalid position")
1219 1240
1220ASSERT_REG_POSITION(macros, 0x45); 1241ASSERT_REG_POSITION(macros, 0x45);
1242ASSERT_REG_POSITION(upload, 0x60);
1243ASSERT_REG_POSITION(exec_upload, 0x6C);
1244ASSERT_REG_POSITION(data_upload, 0x6D);
1221ASSERT_REG_POSITION(sync_info, 0xB2); 1245ASSERT_REG_POSITION(sync_info, 0xB2);
1222ASSERT_REG_POSITION(tfb_enabled, 0x1D1); 1246ASSERT_REG_POSITION(tfb_enabled, 0x1D1);
1223ASSERT_REG_POSITION(rt, 0x200); 1247ASSERT_REG_POSITION(rt, 0x200);
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index 2426d0067..3a5dfef0c 100644
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -83,57 +83,66 @@ void MaxwellDMA::HandleCopy() {
83 83
84 ASSERT(regs.exec.enable_2d == 1); 84 ASSERT(regs.exec.enable_2d == 1);
85 85
86 const std::size_t copy_size = regs.x_count * regs.y_count; 86 if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) {
87 ASSERT(regs.src_params.size_z == 1);
88 // If the input is tiled and the output is linear, deswizzle the input and copy it over.
89 const u32 src_bytes_per_pixel = regs.src_pitch / regs.src_params.size_x;
90 const std::size_t src_size = Texture::CalculateSize(
91 true, src_bytes_per_pixel, regs.src_params.size_x, regs.src_params.size_y,
92 regs.src_params.size_z, regs.src_params.BlockHeight(), regs.src_params.BlockDepth());
87 93
88 auto source_ptr{memory_manager.GetPointer(source)}; 94 const std::size_t dst_size = regs.dst_pitch * regs.y_count;
89 auto dst_ptr{memory_manager.GetPointer(dest)};
90 95
91 if (!source_ptr) { 96 if (read_buffer.size() < src_size) {
92 LOG_ERROR(HW_GPU, "source_ptr is invalid"); 97 read_buffer.resize(src_size);
93 return; 98 }
94 }
95 99
96 if (!dst_ptr) { 100 if (write_buffer.size() < dst_size) {
97 LOG_ERROR(HW_GPU, "dst_ptr is invalid"); 101 write_buffer.resize(dst_size);
98 return; 102 }
99 }
100 103
101 const auto FlushAndInvalidate = [&](u32 src_size, u64 dst_size) { 104 memory_manager.ReadBlock(source, read_buffer.data(), src_size);
102 // TODO(Subv): For now, manually flush the regions until we implement GPU-accelerated 105 memory_manager.ReadBlock(dest, write_buffer.data(), dst_size);
103 // copying.
104 rasterizer.FlushRegion(ToCacheAddr(source_ptr), src_size);
105 106
106 // We have to invalidate the destination region to evict any outdated surfaces from the 107 Texture::UnswizzleSubrect(regs.x_count, regs.y_count, regs.dst_pitch,
107 // cache. We do this before actually writing the new data because the destination address 108 regs.src_params.size_x, src_bytes_per_pixel, read_buffer.data(),
108 // might contain a dirty surface that will have to be written back to memory. 109 write_buffer.data(), regs.src_params.BlockHeight(),
109 rasterizer.InvalidateRegion(ToCacheAddr(dst_ptr), dst_size); 110 regs.src_params.pos_x, regs.src_params.pos_y);
110 };
111 111
112 if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) { 112 memory_manager.WriteBlock(dest, write_buffer.data(), dst_size);
113 ASSERT(regs.src_params.size_z == 1); 113 } else {
114 // If the input is tiled and the output is linear, deswizzle the input and copy it over. 114 ASSERT(regs.dst_params.BlockDepth() == 1);
115 115
116 const u32 src_bytes_per_pixel = regs.src_pitch / regs.src_params.size_x; 116 const u32 src_bytes_per_pixel = regs.src_pitch / regs.x_count;
117 117
118 FlushAndInvalidate(regs.src_pitch * regs.src_params.size_y, 118 const std::size_t dst_size = Texture::CalculateSize(
119 copy_size * src_bytes_per_pixel); 119 true, src_bytes_per_pixel, regs.dst_params.size_x, regs.dst_params.size_y,
120 regs.dst_params.size_z, regs.dst_params.BlockHeight(), regs.dst_params.BlockDepth());
120 121
121 Texture::UnswizzleSubrect(regs.x_count, regs.y_count, regs.dst_pitch, 122 const std::size_t dst_layer_size = Texture::CalculateSize(
122 regs.src_params.size_x, src_bytes_per_pixel, source_ptr, dst_ptr, 123 true, src_bytes_per_pixel, regs.dst_params.size_x, regs.dst_params.size_y, 1,
123 regs.src_params.BlockHeight(), regs.src_params.pos_x, 124 regs.dst_params.BlockHeight(), regs.dst_params.BlockDepth());
124 regs.src_params.pos_y);
125 } else {
126 ASSERT(regs.dst_params.size_z == 1);
127 ASSERT(regs.src_pitch == regs.x_count);
128 125
129 const u32 src_bpp = regs.src_pitch / regs.x_count; 126 const std::size_t src_size = regs.src_pitch * regs.y_count;
130 127
131 FlushAndInvalidate(regs.src_pitch * regs.y_count, 128 if (read_buffer.size() < src_size) {
132 regs.dst_params.size_x * regs.dst_params.size_y * src_bpp); 129 read_buffer.resize(src_size);
130 }
131
132 if (write_buffer.size() < dst_size) {
133 write_buffer.resize(dst_size);
134 }
135
136 memory_manager.ReadBlock(source, read_buffer.data(), src_size);
137 memory_manager.ReadBlock(dest, write_buffer.data(), dst_size);
133 138
134 // If the input is linear and the output is tiled, swizzle the input and copy it over. 139 // If the input is linear and the output is tiled, swizzle the input and copy it over.
135 Texture::SwizzleSubrect(regs.x_count, regs.y_count, regs.src_pitch, regs.dst_params.size_x, 140 Texture::SwizzleSubrect(regs.x_count, regs.y_count, regs.src_pitch, regs.dst_params.size_x,
136 src_bpp, dst_ptr, source_ptr, regs.dst_params.BlockHeight()); 141 src_bytes_per_pixel,
142 write_buffer.data() + dst_layer_size * regs.dst_params.pos_z,
143 read_buffer.data(), regs.dst_params.BlockHeight());
144
145 memory_manager.WriteBlock(dest, write_buffer.data(), dst_size);
137 } 146 }
138} 147}
139 148
diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h
index c6b649842..e5942f671 100644
--- a/src/video_core/engines/maxwell_dma.h
+++ b/src/video_core/engines/maxwell_dma.h
@@ -6,6 +6,7 @@
6 6
7#include <array> 7#include <array>
8#include <cstddef> 8#include <cstddef>
9#include <vector>
9#include "common/bit_field.h" 10#include "common/bit_field.h"
10#include "common/common_funcs.h" 11#include "common/common_funcs.h"
11#include "common/common_types.h" 12#include "common/common_types.h"
@@ -25,6 +26,11 @@ class RasterizerInterface;
25 26
26namespace Tegra::Engines { 27namespace Tegra::Engines {
27 28
29/**
30 * This Engine is known as GK104_Copy. Documentation can be found in:
31 * https://github.com/envytools/envytools/blob/master/rnndb/fifo/gk104_copy.xml
32 */
33
28class MaxwellDMA final { 34class MaxwellDMA final {
29public: 35public:
30 explicit MaxwellDMA(Core::System& system, VideoCore::RasterizerInterface& rasterizer, 36 explicit MaxwellDMA(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
@@ -63,6 +69,16 @@ public:
63 69
64 static_assert(sizeof(Parameters) == 24, "Parameters has wrong size"); 70 static_assert(sizeof(Parameters) == 24, "Parameters has wrong size");
65 71
72 enum class ComponentMode : u32 {
73 Src0 = 0,
74 Src1 = 1,
75 Src2 = 2,
76 Src3 = 3,
77 Const0 = 4,
78 Const1 = 5,
79 Zero = 6,
80 };
81
66 enum class CopyMode : u32 { 82 enum class CopyMode : u32 {
67 None = 0, 83 None = 0,
68 Unk1 = 1, 84 Unk1 = 1,
@@ -128,7 +144,26 @@ public:
128 u32 x_count; 144 u32 x_count;
129 u32 y_count; 145 u32 y_count;
130 146
131 INSERT_PADDING_WORDS(0xBB); 147 INSERT_PADDING_WORDS(0xB8);
148
149 u32 const0;
150 u32 const1;
151 union {
152 BitField<0, 4, ComponentMode> component0;
153 BitField<4, 4, ComponentMode> component1;
154 BitField<8, 4, ComponentMode> component2;
155 BitField<12, 4, ComponentMode> component3;
156 BitField<16, 2, u32> component_size;
157 BitField<20, 3, u32> src_num_components;
158 BitField<24, 3, u32> dst_num_components;
159
160 u32 SrcBytePerPixel() const {
161 return src_num_components.Value() * component_size.Value();
162 }
163 u32 DstBytePerPixel() const {
164 return dst_num_components.Value() * component_size.Value();
165 }
166 } swizzle_config;
132 167
133 Parameters dst_params; 168 Parameters dst_params;
134 169
@@ -149,6 +184,9 @@ private:
149 184
150 MemoryManager& memory_manager; 185 MemoryManager& memory_manager;
151 186
187 std::vector<u8> read_buffer;
188 std::vector<u8> write_buffer;
189
152 /// Performs the copy from the source buffer to the destination buffer as configured in the 190 /// Performs the copy from the source buffer to the destination buffer as configured in the
153 /// registers. 191 /// registers.
154 void HandleCopy(); 192 void HandleCopy();
@@ -165,6 +203,9 @@ ASSERT_REG_POSITION(src_pitch, 0x104);
165ASSERT_REG_POSITION(dst_pitch, 0x105); 203ASSERT_REG_POSITION(dst_pitch, 0x105);
166ASSERT_REG_POSITION(x_count, 0x106); 204ASSERT_REG_POSITION(x_count, 0x106);
167ASSERT_REG_POSITION(y_count, 0x107); 205ASSERT_REG_POSITION(y_count, 0x107);
206ASSERT_REG_POSITION(const0, 0x1C0);
207ASSERT_REG_POSITION(const1, 0x1C1);
208ASSERT_REG_POSITION(swizzle_config, 0x1C2);
168ASSERT_REG_POSITION(dst_params, 0x1C3); 209ASSERT_REG_POSITION(dst_params, 0x1C3);
169ASSERT_REG_POSITION(src_params, 0x1CA); 210ASSERT_REG_POSITION(src_params, 0x1CA);
170 211
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 4461083ff..52706505b 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -35,9 +35,9 @@ GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer) : renderer{ren
35 dma_pusher = std::make_unique<Tegra::DmaPusher>(*this); 35 dma_pusher = std::make_unique<Tegra::DmaPusher>(*this);
36 maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, rasterizer, *memory_manager); 36 maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, rasterizer, *memory_manager);
37 fermi_2d = std::make_unique<Engines::Fermi2D>(rasterizer, *memory_manager); 37 fermi_2d = std::make_unique<Engines::Fermi2D>(rasterizer, *memory_manager);
38 kepler_compute = std::make_unique<Engines::KeplerCompute>(*memory_manager); 38 kepler_compute = std::make_unique<Engines::KeplerCompute>(system, rasterizer, *memory_manager);
39 maxwell_dma = std::make_unique<Engines::MaxwellDMA>(system, rasterizer, *memory_manager); 39 maxwell_dma = std::make_unique<Engines::MaxwellDMA>(system, rasterizer, *memory_manager);
40 kepler_memory = std::make_unique<Engines::KeplerMemory>(system, rasterizer, *memory_manager); 40 kepler_memory = std::make_unique<Engines::KeplerMemory>(system, *memory_manager);
41} 41}
42 42
43GPU::~GPU() = default; 43GPU::~GPU() = default;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 9a088a503..3cc945235 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -305,6 +305,8 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
305 case Maxwell::ShaderProgram::Geometry: 305 case Maxwell::ShaderProgram::Geometry:
306 shader_program_manager->UseTrivialGeometryShader(); 306 shader_program_manager->UseTrivialGeometryShader();
307 break; 307 break;
308 default:
309 break;
308 } 310 }
309 continue; 311 continue;
310 } 312 }
@@ -920,8 +922,8 @@ void RasterizerOpenGL::SyncViewport(OpenGLState& current_state) {
920 viewport.y = viewport_rect.bottom; 922 viewport.y = viewport_rect.bottom;
921 viewport.width = viewport_rect.GetWidth(); 923 viewport.width = viewport_rect.GetWidth();
922 viewport.height = viewport_rect.GetHeight(); 924 viewport.height = viewport_rect.GetHeight();
923 viewport.depth_range_far = regs.viewports[i].depth_range_far; 925 viewport.depth_range_far = src.depth_range_far;
924 viewport.depth_range_near = regs.viewports[i].depth_range_near; 926 viewport.depth_range_near = src.depth_range_near;
925 } 927 }
926 state.depth_clamp.far_plane = regs.view_volume_clip_control.depth_clamp_far != 0; 928 state.depth_clamp.far_plane = regs.view_volume_clip_control.depth_clamp_far != 0;
927 state.depth_clamp.near_plane = regs.view_volume_clip_control.depth_clamp_near != 0; 929 state.depth_clamp.near_plane = regs.view_volume_clip_control.depth_clamp_near != 0;
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 2a81b1169..b1c8f7c35 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -363,6 +363,10 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
363 if (stop_loading) 363 if (stop_loading)
364 return; 364 return;
365 365
366 // Track if precompiled cache was altered during loading to know if we have to serialize the
367 // virtual precompiled cache file back to the hard drive
368 bool precompiled_cache_altered = false;
369
366 // Build shaders 370 // Build shaders
367 if (callback) 371 if (callback)
368 callback(VideoCore::LoadCallbackStage::Build, 0, usages.size()); 372 callback(VideoCore::LoadCallbackStage::Build, 0, usages.size());
@@ -384,6 +388,7 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
384 if (!shader) { 388 if (!shader) {
385 // Invalidate the precompiled cache if a shader dumped shader was rejected 389 // Invalidate the precompiled cache if a shader dumped shader was rejected
386 disk_cache.InvalidatePrecompiled(); 390 disk_cache.InvalidatePrecompiled();
391 precompiled_cache_altered = true;
387 dumps.clear(); 392 dumps.clear();
388 } 393 }
389 } 394 }
@@ -405,8 +410,13 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
405 if (dumps.find(usage) == dumps.end()) { 410 if (dumps.find(usage) == dumps.end()) {
406 const auto& program = precompiled_programs.at(usage); 411 const auto& program = precompiled_programs.at(usage);
407 disk_cache.SaveDump(usage, program->handle); 412 disk_cache.SaveDump(usage, program->handle);
413 precompiled_cache_altered = true;
408 } 414 }
409 } 415 }
416
417 if (precompiled_cache_altered) {
418 disk_cache.SaveVirtualPrecompiledFile();
419 }
410} 420}
411 421
412CachedProgram ShaderCacheOpenGL::GeneratePrecompiledProgram( 422CachedProgram ShaderCacheOpenGL::GeneratePrecompiledProgram(
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
index 53752b38d..254c0d499 100644
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
@@ -104,7 +104,8 @@ bool ShaderDiskCacheRaw::Save(FileUtil::IOFile& file) const {
104 return true; 104 return true;
105} 105}
106 106
107ShaderDiskCacheOpenGL::ShaderDiskCacheOpenGL(Core::System& system) : system{system} {} 107ShaderDiskCacheOpenGL::ShaderDiskCacheOpenGL(Core::System& system)
108 : system{system}, precompiled_cache_virtual_file_offset{0} {}
108 109
109std::optional<std::pair<std::vector<ShaderDiskCacheRaw>, std::vector<ShaderDiskCacheUsage>>> 110std::optional<std::pair<std::vector<ShaderDiskCacheRaw>, std::vector<ShaderDiskCacheUsage>>>
110ShaderDiskCacheOpenGL::LoadTransferable() { 111ShaderDiskCacheOpenGL::LoadTransferable() {
@@ -177,6 +178,7 @@ ShaderDiskCacheOpenGL::LoadTransferable() {
177 return {}; 178 return {};
178 } 179 }
179 } 180 }
181
180 return {{raws, usages}}; 182 return {{raws, usages}};
181} 183}
182 184
@@ -208,59 +210,64 @@ ShaderDiskCacheOpenGL::LoadPrecompiled() {
208std::optional<std::pair<std::unordered_map<u64, ShaderDiskCacheDecompiled>, 210std::optional<std::pair<std::unordered_map<u64, ShaderDiskCacheDecompiled>,
209 std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>>> 211 std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>>>
210ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) { 212ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) {
213 // Read compressed file from disk and decompress to virtual precompiled cache file
214 std::vector<u8> compressed(file.GetSize());
215 file.ReadBytes(compressed.data(), compressed.size());
216 const std::vector<u8> decompressed = Common::Compression::DecompressDataZSTD(compressed);
217 SaveArrayToPrecompiled(decompressed.data(), decompressed.size());
218 precompiled_cache_virtual_file_offset = 0;
219
211 ShaderCacheVersionHash file_hash{}; 220 ShaderCacheVersionHash file_hash{};
212 if (file.ReadArray(file_hash.data(), file_hash.size()) != file_hash.size()) { 221 if (!LoadArrayFromPrecompiled(file_hash.data(), file_hash.size())) {
222 precompiled_cache_virtual_file_offset = 0;
213 return {}; 223 return {};
214 } 224 }
215 if (GetShaderCacheVersionHash() != file_hash) { 225 if (GetShaderCacheVersionHash() != file_hash) {
216 LOG_INFO(Render_OpenGL, "Precompiled cache is from another version of the emulator"); 226 LOG_INFO(Render_OpenGL, "Precompiled cache is from another version of the emulator");
227 precompiled_cache_virtual_file_offset = 0;
217 return {}; 228 return {};
218 } 229 }
219 230
220 std::unordered_map<u64, ShaderDiskCacheDecompiled> decompiled; 231 std::unordered_map<u64, ShaderDiskCacheDecompiled> decompiled;
221 std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump> dumps; 232 std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump> dumps;
222 while (file.Tell() < file.GetSize()) { 233 while (precompiled_cache_virtual_file_offset < precompiled_cache_virtual_file.GetSize()) {
223 PrecompiledEntryKind kind{}; 234 PrecompiledEntryKind kind{};
224 if (file.ReadBytes(&kind, sizeof(u32)) != sizeof(u32)) { 235 if (!LoadObjectFromPrecompiled(kind)) {
225 return {}; 236 return {};
226 } 237 }
227 238
228 switch (kind) { 239 switch (kind) {
229 case PrecompiledEntryKind::Decompiled: { 240 case PrecompiledEntryKind::Decompiled: {
230 u64 unique_identifier{}; 241 u64 unique_identifier{};
231 if (file.ReadBytes(&unique_identifier, sizeof(u64)) != sizeof(u64)) 242 if (!LoadObjectFromPrecompiled(unique_identifier)) {
232 return {}; 243 return {};
244 }
233 245
234 const auto entry = LoadDecompiledEntry(file); 246 const auto entry = LoadDecompiledEntry();
235 if (!entry) 247 if (!entry) {
236 return {}; 248 return {};
249 }
237 decompiled.insert({unique_identifier, std::move(*entry)}); 250 decompiled.insert({unique_identifier, std::move(*entry)});
238 break; 251 break;
239 } 252 }
240 case PrecompiledEntryKind::Dump: { 253 case PrecompiledEntryKind::Dump: {
241 ShaderDiskCacheUsage usage; 254 ShaderDiskCacheUsage usage;
242 if (file.ReadBytes(&usage, sizeof(usage)) != sizeof(usage)) 255 if (!LoadObjectFromPrecompiled(usage)) {
243 return {}; 256 return {};
257 }
244 258
245 ShaderDiskCacheDump dump; 259 ShaderDiskCacheDump dump;
246 if (file.ReadBytes(&dump.binary_format, sizeof(u32)) != sizeof(u32)) 260 if (!LoadObjectFromPrecompiled(dump.binary_format)) {
247 return {};
248
249 u32 binary_length{};
250 u32 compressed_size{};
251 if (file.ReadBytes(&binary_length, sizeof(u32)) != sizeof(u32) ||
252 file.ReadBytes(&compressed_size, sizeof(u32)) != sizeof(u32)) {
253 return {}; 261 return {};
254 } 262 }
255 263
256 std::vector<u8> compressed_binary(compressed_size); 264 u32 binary_length{};
257 if (file.ReadArray(compressed_binary.data(), compressed_binary.size()) != 265 if (!LoadObjectFromPrecompiled(binary_length)) {
258 compressed_binary.size()) {
259 return {}; 266 return {};
260 } 267 }
261 268
262 dump.binary = Common::Compression::DecompressDataZSTD(compressed_binary); 269 dump.binary.resize(binary_length);
263 if (dump.binary.empty()) { 270 if (!LoadArrayFromPrecompiled(dump.binary.data(), dump.binary.size())) {
264 return {}; 271 return {};
265 } 272 }
266 273
@@ -274,45 +281,41 @@ ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) {
274 return {{decompiled, dumps}}; 281 return {{decompiled, dumps}};
275} 282}
276 283
277std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEntry( 284std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEntry() {
278 FileUtil::IOFile& file) {
279 u32 code_size{}; 285 u32 code_size{};
280 u32 compressed_code_size{}; 286 if (!LoadObjectFromPrecompiled(code_size)) {
281 if (file.ReadBytes(&code_size, sizeof(u32)) != sizeof(u32) ||
282 file.ReadBytes(&compressed_code_size, sizeof(u32)) != sizeof(u32)) {
283 return {}; 287 return {};
284 } 288 }
285 289
286 std::vector<u8> compressed_code(compressed_code_size); 290 std::vector<u8> code(code_size);
287 if (file.ReadArray(compressed_code.data(), compressed_code.size()) != compressed_code.size()) { 291 if (!LoadArrayFromPrecompiled(code.data(), code.size())) {
288 return {}; 292 return {};
289 } 293 }
290 294
291 const std::vector<u8> code = Common::Compression::DecompressDataZSTD(compressed_code);
292 if (code.empty()) {
293 return {};
294 }
295 ShaderDiskCacheDecompiled entry; 295 ShaderDiskCacheDecompiled entry;
296 entry.code = std::string(reinterpret_cast<const char*>(code.data()), code_size); 296 entry.code = std::string(reinterpret_cast<const char*>(code.data()), code_size);
297 297
298 u32 const_buffers_count{}; 298 u32 const_buffers_count{};
299 if (file.ReadBytes(&const_buffers_count, sizeof(u32)) != sizeof(u32)) 299 if (!LoadObjectFromPrecompiled(const_buffers_count)) {
300 return {}; 300 return {};
301 }
302
301 for (u32 i = 0; i < const_buffers_count; ++i) { 303 for (u32 i = 0; i < const_buffers_count; ++i) {
302 u32 max_offset{}; 304 u32 max_offset{};
303 u32 index{}; 305 u32 index{};
304 u8 is_indirect{}; 306 u8 is_indirect{};
305 if (file.ReadBytes(&max_offset, sizeof(u32)) != sizeof(u32) || 307 if (!LoadObjectFromPrecompiled(max_offset) || !LoadObjectFromPrecompiled(index) ||
306 file.ReadBytes(&index, sizeof(u32)) != sizeof(u32) || 308 !LoadObjectFromPrecompiled(is_indirect)) {
307 file.ReadBytes(&is_indirect, sizeof(u8)) != sizeof(u8)) {
308 return {}; 309 return {};
309 } 310 }
310 entry.entries.const_buffers.emplace_back(max_offset, is_indirect != 0, index); 311 entry.entries.const_buffers.emplace_back(max_offset, is_indirect != 0, index);
311 } 312 }
312 313
313 u32 samplers_count{}; 314 u32 samplers_count{};
314 if (file.ReadBytes(&samplers_count, sizeof(u32)) != sizeof(u32)) 315 if (!LoadObjectFromPrecompiled(samplers_count)) {
315 return {}; 316 return {};
317 }
318
316 for (u32 i = 0; i < samplers_count; ++i) { 319 for (u32 i = 0; i < samplers_count; ++i) {
317 u64 offset{}; 320 u64 offset{};
318 u64 index{}; 321 u64 index{};
@@ -320,12 +323,9 @@ std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEn
320 u8 is_array{}; 323 u8 is_array{};
321 u8 is_shadow{}; 324 u8 is_shadow{};
322 u8 is_bindless{}; 325 u8 is_bindless{};
323 if (file.ReadBytes(&offset, sizeof(u64)) != sizeof(u64) || 326 if (!LoadObjectFromPrecompiled(offset) || !LoadObjectFromPrecompiled(index) ||
324 file.ReadBytes(&index, sizeof(u64)) != sizeof(u64) || 327 !LoadObjectFromPrecompiled(type) || !LoadObjectFromPrecompiled(is_array) ||
325 file.ReadBytes(&type, sizeof(u32)) != sizeof(u32) || 328 !LoadObjectFromPrecompiled(is_shadow) || !LoadObjectFromPrecompiled(is_bindless)) {
326 file.ReadBytes(&is_array, sizeof(u8)) != sizeof(u8) ||
327 file.ReadBytes(&is_shadow, sizeof(u8)) != sizeof(u8) ||
328 file.ReadBytes(&is_bindless, sizeof(u8)) != sizeof(u8)) {
329 return {}; 329 return {};
330 } 330 }
331 entry.entries.samplers.emplace_back(static_cast<std::size_t>(offset), 331 entry.entries.samplers.emplace_back(static_cast<std::size_t>(offset),
@@ -335,17 +335,17 @@ std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEn
335 } 335 }
336 336
337 u32 global_memory_count{}; 337 u32 global_memory_count{};
338 if (file.ReadBytes(&global_memory_count, sizeof(u32)) != sizeof(u32)) 338 if (!LoadObjectFromPrecompiled(global_memory_count)) {
339 return {}; 339 return {};
340 }
341
340 for (u32 i = 0; i < global_memory_count; ++i) { 342 for (u32 i = 0; i < global_memory_count; ++i) {
341 u32 cbuf_index{}; 343 u32 cbuf_index{};
342 u32 cbuf_offset{}; 344 u32 cbuf_offset{};
343 u8 is_read{}; 345 u8 is_read{};
344 u8 is_written{}; 346 u8 is_written{};
345 if (file.ReadBytes(&cbuf_index, sizeof(u32)) != sizeof(u32) || 347 if (!LoadObjectFromPrecompiled(cbuf_index) || !LoadObjectFromPrecompiled(cbuf_offset) ||
346 file.ReadBytes(&cbuf_offset, sizeof(u32)) != sizeof(u32) || 348 !LoadObjectFromPrecompiled(is_read) || !LoadObjectFromPrecompiled(is_written)) {
347 file.ReadBytes(&is_read, sizeof(u8)) != sizeof(u8) ||
348 file.ReadBytes(&is_written, sizeof(u8)) != sizeof(u8)) {
349 return {}; 349 return {};
350 } 350 }
351 entry.entries.global_memory_entries.emplace_back(cbuf_index, cbuf_offset, is_read != 0, 351 entry.entries.global_memory_entries.emplace_back(cbuf_index, cbuf_offset, is_read != 0,
@@ -354,74 +354,81 @@ std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEn
354 354
355 for (auto& clip_distance : entry.entries.clip_distances) { 355 for (auto& clip_distance : entry.entries.clip_distances) {
356 u8 clip_distance_raw{}; 356 u8 clip_distance_raw{};
357 if (file.ReadBytes(&clip_distance_raw, sizeof(u8)) != sizeof(u8)) 357 if (!LoadObjectFromPrecompiled(clip_distance_raw))
358 return {}; 358 return {};
359 clip_distance = clip_distance_raw != 0; 359 clip_distance = clip_distance_raw != 0;
360 } 360 }
361 361
362 u64 shader_length{}; 362 u64 shader_length{};
363 if (file.ReadBytes(&shader_length, sizeof(u64)) != sizeof(u64)) 363 if (!LoadObjectFromPrecompiled(shader_length)) {
364 return {}; 364 return {};
365 }
366
365 entry.entries.shader_length = static_cast<std::size_t>(shader_length); 367 entry.entries.shader_length = static_cast<std::size_t>(shader_length);
366 368
367 return entry; 369 return entry;
368} 370}
369 371
370bool ShaderDiskCacheOpenGL::SaveDecompiledFile(FileUtil::IOFile& file, u64 unique_identifier, 372bool ShaderDiskCacheOpenGL::SaveDecompiledFile(u64 unique_identifier, const std::string& code,
371 const std::string& code,
372 const std::vector<u8>& compressed_code,
373 const GLShader::ShaderEntries& entries) { 373 const GLShader::ShaderEntries& entries) {
374 if (file.WriteObject(static_cast<u32>(PrecompiledEntryKind::Decompiled)) != 1 || 374 if (!SaveObjectToPrecompiled(static_cast<u32>(PrecompiledEntryKind::Decompiled)) ||
375 file.WriteObject(unique_identifier) != 1 || 375 !SaveObjectToPrecompiled(unique_identifier) ||
376 file.WriteObject(static_cast<u32>(code.size())) != 1 || 376 !SaveObjectToPrecompiled(static_cast<u32>(code.size())) ||
377 file.WriteObject(static_cast<u32>(compressed_code.size())) != 1 || 377 !SaveArrayToPrecompiled(code.data(), code.size())) {
378 file.WriteArray(compressed_code.data(), compressed_code.size()) != compressed_code.size()) {
379 return false; 378 return false;
380 } 379 }
381 380
382 if (file.WriteObject(static_cast<u32>(entries.const_buffers.size())) != 1) 381 if (!SaveObjectToPrecompiled(static_cast<u32>(entries.const_buffers.size()))) {
383 return false; 382 return false;
383 }
384 for (const auto& cbuf : entries.const_buffers) { 384 for (const auto& cbuf : entries.const_buffers) {
385 if (file.WriteObject(static_cast<u32>(cbuf.GetMaxOffset())) != 1 || 385 if (!SaveObjectToPrecompiled(static_cast<u32>(cbuf.GetMaxOffset())) ||
386 file.WriteObject(static_cast<u32>(cbuf.GetIndex())) != 1 || 386 !SaveObjectToPrecompiled(static_cast<u32>(cbuf.GetIndex())) ||
387 file.WriteObject(static_cast<u8>(cbuf.IsIndirect() ? 1 : 0)) != 1) { 387 !SaveObjectToPrecompiled(static_cast<u8>(cbuf.IsIndirect() ? 1 : 0))) {
388 return false; 388 return false;
389 } 389 }
390 } 390 }
391 391
392 if (file.WriteObject(static_cast<u32>(entries.samplers.size())) != 1) 392 if (!SaveObjectToPrecompiled(static_cast<u32>(entries.samplers.size()))) {
393 return false; 393 return false;
394 }
394 for (const auto& sampler : entries.samplers) { 395 for (const auto& sampler : entries.samplers) {
395 if (file.WriteObject(static_cast<u64>(sampler.GetOffset())) != 1 || 396 if (!SaveObjectToPrecompiled(static_cast<u64>(sampler.GetOffset())) ||
396 file.WriteObject(static_cast<u64>(sampler.GetIndex())) != 1 || 397 !SaveObjectToPrecompiled(static_cast<u64>(sampler.GetIndex())) ||
397 file.WriteObject(static_cast<u32>(sampler.GetType())) != 1 || 398 !SaveObjectToPrecompiled(static_cast<u32>(sampler.GetType())) ||
398 file.WriteObject(static_cast<u8>(sampler.IsArray() ? 1 : 0)) != 1 || 399 !SaveObjectToPrecompiled(static_cast<u8>(sampler.IsArray() ? 1 : 0)) ||
399 file.WriteObject(static_cast<u8>(sampler.IsShadow() ? 1 : 0)) != 1 || 400 !SaveObjectToPrecompiled(static_cast<u8>(sampler.IsShadow() ? 1 : 0)) ||
400 file.WriteObject(static_cast<u8>(sampler.IsBindless() ? 1 : 0)) != 1) { 401 !SaveObjectToPrecompiled(static_cast<u8>(sampler.IsBindless() ? 1 : 0))) {
401 return false; 402 return false;
402 } 403 }
403 } 404 }
404 405
405 if (file.WriteObject(static_cast<u32>(entries.global_memory_entries.size())) != 1) 406 if (!SaveObjectToPrecompiled(static_cast<u32>(entries.global_memory_entries.size()))) {
406 return false; 407 return false;
408 }
407 for (const auto& gmem : entries.global_memory_entries) { 409 for (const auto& gmem : entries.global_memory_entries) {
408 if (file.WriteObject(static_cast<u32>(gmem.GetCbufIndex())) != 1 || 410 if (!SaveObjectToPrecompiled(static_cast<u32>(gmem.GetCbufIndex())) ||
409 file.WriteObject(static_cast<u32>(gmem.GetCbufOffset())) != 1 || 411 !SaveObjectToPrecompiled(static_cast<u32>(gmem.GetCbufOffset())) ||
410 file.WriteObject(static_cast<u8>(gmem.IsRead() ? 1 : 0)) != 1 || 412 !SaveObjectToPrecompiled(static_cast<u8>(gmem.IsRead() ? 1 : 0)) ||
411 file.WriteObject(static_cast<u8>(gmem.IsWritten() ? 1 : 0)) != 1) { 413 !SaveObjectToPrecompiled(static_cast<u8>(gmem.IsWritten() ? 1 : 0))) {
412 return false; 414 return false;
413 } 415 }
414 } 416 }
415 417
416 for (const bool clip_distance : entries.clip_distances) { 418 for (const bool clip_distance : entries.clip_distances) {
417 if (file.WriteObject(static_cast<u8>(clip_distance ? 1 : 0)) != 1) 419 if (!SaveObjectToPrecompiled(static_cast<u8>(clip_distance ? 1 : 0))) {
418 return false; 420 return false;
421 }
419 } 422 }
420 423
421 return file.WriteObject(static_cast<u64>(entries.shader_length)) == 1; 424 if (!SaveObjectToPrecompiled(static_cast<u64>(entries.shader_length))) {
425 return false;
426 }
427
428 return true;
422} 429}
423 430
424void ShaderDiskCacheOpenGL::InvalidateTransferable() const { 431void ShaderDiskCacheOpenGL::InvalidateTransferable() {
425 if (!FileUtil::Delete(GetTransferablePath())) { 432 if (!FileUtil::Delete(GetTransferablePath())) {
426 LOG_ERROR(Render_OpenGL, "Failed to invalidate transferable file={}", 433 LOG_ERROR(Render_OpenGL, "Failed to invalidate transferable file={}",
427 GetTransferablePath()); 434 GetTransferablePath());
@@ -429,7 +436,10 @@ void ShaderDiskCacheOpenGL::InvalidateTransferable() const {
429 InvalidatePrecompiled(); 436 InvalidatePrecompiled();
430} 437}
431 438
432void ShaderDiskCacheOpenGL::InvalidatePrecompiled() const { 439void ShaderDiskCacheOpenGL::InvalidatePrecompiled() {
440 // Clear virtaul precompiled cache file
441 precompiled_cache_virtual_file.Resize(0);
442
433 if (!FileUtil::Delete(GetPrecompiledPath())) { 443 if (!FileUtil::Delete(GetPrecompiledPath())) {
434 LOG_ERROR(Render_OpenGL, "Failed to invalidate precompiled file={}", GetPrecompiledPath()); 444 LOG_ERROR(Render_OpenGL, "Failed to invalidate precompiled file={}", GetPrecompiledPath());
435 } 445 }
@@ -465,7 +475,10 @@ void ShaderDiskCacheOpenGL::SaveUsage(const ShaderDiskCacheUsage& usage) {
465 ASSERT_MSG(it != transferable.end(), "Saving shader usage without storing raw previously"); 475 ASSERT_MSG(it != transferable.end(), "Saving shader usage without storing raw previously");
466 476
467 auto& usages{it->second}; 477 auto& usages{it->second};
468 ASSERT(usages.find(usage) == usages.end()); 478 if (usages.find(usage) != usages.end()) {
479 // Skip this variant since the shader is already stored.
480 return;
481 }
469 usages.insert(usage); 482 usages.insert(usage);
470 483
471 FileUtil::IOFile file = AppendTransferableFile(); 484 FileUtil::IOFile file = AppendTransferableFile();
@@ -485,22 +498,13 @@ void ShaderDiskCacheOpenGL::SaveDecompiled(u64 unique_identifier, const std::str
485 if (!IsUsable()) 498 if (!IsUsable())
486 return; 499 return;
487 500
488 const std::vector<u8> compressed_code{Common::Compression::CompressDataZSTDDefault( 501 if (precompiled_cache_virtual_file.GetSize() == 0) {
489 reinterpret_cast<const u8*>(code.data()), code.size())}; 502 SavePrecompiledHeaderToVirtualPrecompiledCache();
490 if (compressed_code.empty()) {
491 LOG_ERROR(Render_OpenGL, "Failed to compress GLSL code - skipping shader {:016x}",
492 unique_identifier);
493 return;
494 } 503 }
495 504
496 FileUtil::IOFile file = AppendPrecompiledFile(); 505 if (!SaveDecompiledFile(unique_identifier, code, entries)) {
497 if (!file.IsOpen())
498 return;
499
500 if (!SaveDecompiledFile(file, unique_identifier, code, compressed_code, entries)) {
501 LOG_ERROR(Render_OpenGL, 506 LOG_ERROR(Render_OpenGL,
502 "Failed to save decompiled entry to the precompiled file - removing"); 507 "Failed to save decompiled entry to the precompiled file - removing");
503 file.Close();
504 InvalidatePrecompiled(); 508 InvalidatePrecompiled();
505 } 509 }
506} 510}
@@ -516,28 +520,13 @@ void ShaderDiskCacheOpenGL::SaveDump(const ShaderDiskCacheUsage& usage, GLuint p
516 std::vector<u8> binary(binary_length); 520 std::vector<u8> binary(binary_length);
517 glGetProgramBinary(program, binary_length, nullptr, &binary_format, binary.data()); 521 glGetProgramBinary(program, binary_length, nullptr, &binary_format, binary.data());
518 522
519 const std::vector<u8> compressed_binary = 523 if (!SaveObjectToPrecompiled(static_cast<u32>(PrecompiledEntryKind::Dump)) ||
520 Common::Compression::CompressDataZSTDDefault(binary.data(), binary.size()); 524 !SaveObjectToPrecompiled(usage) ||
521 525 !SaveObjectToPrecompiled(static_cast<u32>(binary_format)) ||
522 if (compressed_binary.empty()) { 526 !SaveObjectToPrecompiled(static_cast<u32>(binary_length)) ||
523 LOG_ERROR(Render_OpenGL, "Failed to compress binary program in shader={:016x}", 527 !SaveArrayToPrecompiled(binary.data(), binary.size())) {
524 usage.unique_identifier);
525 return;
526 }
527
528 FileUtil::IOFile file = AppendPrecompiledFile();
529 if (!file.IsOpen())
530 return;
531
532 if (file.WriteObject(static_cast<u32>(PrecompiledEntryKind::Dump)) != 1 ||
533 file.WriteObject(usage) != 1 || file.WriteObject(static_cast<u32>(binary_format)) != 1 ||
534 file.WriteObject(static_cast<u32>(binary_length)) != 1 ||
535 file.WriteObject(static_cast<u32>(compressed_binary.size())) != 1 ||
536 file.WriteArray(compressed_binary.data(), compressed_binary.size()) !=
537 compressed_binary.size()) {
538 LOG_ERROR(Render_OpenGL, "Failed to save binary program file in shader={:016x} - removing", 528 LOG_ERROR(Render_OpenGL, "Failed to save binary program file in shader={:016x} - removing",
539 usage.unique_identifier); 529 usage.unique_identifier);
540 file.Close();
541 InvalidatePrecompiled(); 530 InvalidatePrecompiled();
542 return; 531 return;
543 } 532 }
@@ -570,28 +559,33 @@ FileUtil::IOFile ShaderDiskCacheOpenGL::AppendTransferableFile() const {
570 return file; 559 return file;
571} 560}
572 561
573FileUtil::IOFile ShaderDiskCacheOpenGL::AppendPrecompiledFile() const { 562void ShaderDiskCacheOpenGL::SavePrecompiledHeaderToVirtualPrecompiledCache() {
574 if (!EnsureDirectories()) 563 const auto hash{GetShaderCacheVersionHash()};
575 return {}; 564 if (!SaveArrayToPrecompiled(hash.data(), hash.size())) {
565 LOG_ERROR(
566 Render_OpenGL,
567 "Failed to write precompiled cache version hash to virtual precompiled cache file");
568 }
569}
570
571void ShaderDiskCacheOpenGL::SaveVirtualPrecompiledFile() {
572 precompiled_cache_virtual_file_offset = 0;
573 const std::vector<u8>& uncompressed = precompiled_cache_virtual_file.ReadAllBytes();
574 const std::vector<u8>& compressed =
575 Common::Compression::CompressDataZSTDDefault(uncompressed.data(), uncompressed.size());
576 576
577 const auto precompiled_path{GetPrecompiledPath()}; 577 const auto precompiled_path{GetPrecompiledPath()};
578 const bool existed = FileUtil::Exists(precompiled_path); 578 FileUtil::IOFile file(precompiled_path, "wb");
579 579
580 FileUtil::IOFile file(precompiled_path, "ab");
581 if (!file.IsOpen()) { 580 if (!file.IsOpen()) {
582 LOG_ERROR(Render_OpenGL, "Failed to open precompiled cache in path={}", precompiled_path); 581 LOG_ERROR(Render_OpenGL, "Failed to open precompiled cache in path={}", precompiled_path);
583 return {}; 582 return;
584 } 583 }
585 584 if (file.WriteBytes(compressed.data(), compressed.size()) != compressed.size()) {
586 if (!existed || file.GetSize() == 0) { 585 LOG_ERROR(Render_OpenGL, "Failed to write precompiled cache version in path={}",
587 const auto hash{GetShaderCacheVersionHash()}; 586 precompiled_path);
588 if (file.WriteArray(hash.data(), hash.size()) != hash.size()) { 587 return;
589 LOG_ERROR(Render_OpenGL, "Failed to write precompiled cache version hash in path={}",
590 precompiled_path);
591 return {};
592 }
593 } 588 }
594 return file;
595} 589}
596 590
597bool ShaderDiskCacheOpenGL::EnsureDirectories() const { 591bool ShaderDiskCacheOpenGL::EnsureDirectories() const {
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h
index 6be0c0547..0142b2e3b 100644
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.h
@@ -16,6 +16,7 @@
16 16
17#include "common/assert.h" 17#include "common/assert.h"
18#include "common/common_types.h" 18#include "common/common_types.h"
19#include "core/file_sys/vfs_vector.h"
19#include "video_core/engines/maxwell_3d.h" 20#include "video_core/engines/maxwell_3d.h"
20#include "video_core/renderer_opengl/gl_shader_gen.h" 21#include "video_core/renderer_opengl/gl_shader_gen.h"
21 22
@@ -172,10 +173,10 @@ public:
172 LoadPrecompiled(); 173 LoadPrecompiled();
173 174
174 /// Removes the transferable (and precompiled) cache file. 175 /// Removes the transferable (and precompiled) cache file.
175 void InvalidateTransferable() const; 176 void InvalidateTransferable();
176 177
177 /// Removes the precompiled cache file. 178 /// Removes the precompiled cache file and clears virtual precompiled cache file.
178 void InvalidatePrecompiled() const; 179 void InvalidatePrecompiled();
179 180
180 /// Saves a raw dump to the transferable file. Checks for collisions. 181 /// Saves a raw dump to the transferable file. Checks for collisions.
181 void SaveRaw(const ShaderDiskCacheRaw& entry); 182 void SaveRaw(const ShaderDiskCacheRaw& entry);
@@ -190,18 +191,21 @@ public:
190 /// Saves a dump entry to the precompiled file. Does not check for collisions. 191 /// Saves a dump entry to the precompiled file. Does not check for collisions.
191 void SaveDump(const ShaderDiskCacheUsage& usage, GLuint program); 192 void SaveDump(const ShaderDiskCacheUsage& usage, GLuint program);
192 193
194 /// Serializes virtual precompiled shader cache file to real file
195 void SaveVirtualPrecompiledFile();
196
193private: 197private:
194 /// Loads the transferable cache. Returns empty on failure. 198 /// Loads the transferable cache. Returns empty on failure.
195 std::optional<std::pair<std::unordered_map<u64, ShaderDiskCacheDecompiled>, 199 std::optional<std::pair<std::unordered_map<u64, ShaderDiskCacheDecompiled>,
196 std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>>> 200 std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>>>
197 LoadPrecompiledFile(FileUtil::IOFile& file); 201 LoadPrecompiledFile(FileUtil::IOFile& file);
198 202
199 /// Loads a decompiled cache entry from the passed file. Returns empty on failure. 203 /// Loads a decompiled cache entry from m_precompiled_cache_virtual_file. Returns empty on
200 std::optional<ShaderDiskCacheDecompiled> LoadDecompiledEntry(FileUtil::IOFile& file); 204 /// failure.
205 std::optional<ShaderDiskCacheDecompiled> LoadDecompiledEntry();
201 206
202 /// Saves a decompiled entry to the passed file. Returns true on success. 207 /// Saves a decompiled entry to the passed file. Returns true on success.
203 bool SaveDecompiledFile(FileUtil::IOFile& file, u64 unique_identifier, const std::string& code, 208 bool SaveDecompiledFile(u64 unique_identifier, const std::string& code,
204 const std::vector<u8>& compressed_code,
205 const GLShader::ShaderEntries& entries); 209 const GLShader::ShaderEntries& entries);
206 210
207 /// Returns if the cache can be used 211 /// Returns if the cache can be used
@@ -210,8 +214,8 @@ private:
210 /// Opens current game's transferable file and write it's header if it doesn't exist 214 /// Opens current game's transferable file and write it's header if it doesn't exist
211 FileUtil::IOFile AppendTransferableFile() const; 215 FileUtil::IOFile AppendTransferableFile() const;
212 216
213 /// Opens current game's precompiled file and write it's header if it doesn't exist 217 /// Save precompiled header to precompiled_cache_in_memory
214 FileUtil::IOFile AppendPrecompiledFile() const; 218 void SavePrecompiledHeaderToVirtualPrecompiledCache();
215 219
216 /// Create shader disk cache directories. Returns true on success. 220 /// Create shader disk cache directories. Returns true on success.
217 bool EnsureDirectories() const; 221 bool EnsureDirectories() const;
@@ -234,10 +238,42 @@ private:
234 /// Get current game's title id 238 /// Get current game's title id
235 std::string GetTitleID() const; 239 std::string GetTitleID() const;
236 240
241 template <typename T>
242 bool SaveArrayToPrecompiled(const T* data, std::size_t length) {
243 const std::size_t write_length = precompiled_cache_virtual_file.WriteArray(
244 data, length, precompiled_cache_virtual_file_offset);
245 precompiled_cache_virtual_file_offset += write_length;
246 return write_length == sizeof(T) * length;
247 }
248
249 template <typename T>
250 bool LoadArrayFromPrecompiled(T* data, std::size_t length) {
251 const std::size_t read_length = precompiled_cache_virtual_file.ReadArray(
252 data, length, precompiled_cache_virtual_file_offset);
253 precompiled_cache_virtual_file_offset += read_length;
254 return read_length == sizeof(T) * length;
255 }
256
257 template <typename T>
258 bool SaveObjectToPrecompiled(const T& object) {
259 return SaveArrayToPrecompiled(&object, 1);
260 }
261
262 template <typename T>
263 bool LoadObjectFromPrecompiled(T& object) {
264 return LoadArrayFromPrecompiled(&object, 1);
265 }
266
237 // Copre system 267 // Copre system
238 Core::System& system; 268 Core::System& system;
239 // Stored transferable shaders 269 // Stored transferable shaders
240 std::map<u64, std::unordered_set<ShaderDiskCacheUsage>> transferable; 270 std::map<u64, std::unordered_set<ShaderDiskCacheUsage>> transferable;
271 // Stores whole precompiled cache which will be read from or saved to the precompiled chache
272 // file
273 FileSys::VectorVfsFile precompiled_cache_virtual_file;
274 // Stores the current offset of the precompiled cache file for IO purposes
275 std::size_t precompiled_cache_virtual_file_offset;
276
241 // The cache has been loaded at boot 277 // The cache has been loaded at boot
242 bool tried_to_load{}; 278 bool tried_to_load{};
243}; 279};
diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h
index a8833c06e..95b773135 100644
--- a/src/video_core/renderer_opengl/maxwell_to_gl.h
+++ b/src/video_core/renderer_opengl/maxwell_to_gl.h
@@ -27,8 +27,7 @@ using Maxwell = Tegra::Engines::Maxwell3D::Regs;
27inline GLenum VertexType(Maxwell::VertexAttribute attrib) { 27inline GLenum VertexType(Maxwell::VertexAttribute attrib) {
28 switch (attrib.type) { 28 switch (attrib.type) {
29 case Maxwell::VertexAttribute::Type::UnsignedInt: 29 case Maxwell::VertexAttribute::Type::UnsignedInt:
30 case Maxwell::VertexAttribute::Type::UnsignedNorm: { 30 case Maxwell::VertexAttribute::Type::UnsignedNorm:
31
32 switch (attrib.size) { 31 switch (attrib.size) {
33 case Maxwell::VertexAttribute::Size::Size_8: 32 case Maxwell::VertexAttribute::Size::Size_8:
34 case Maxwell::VertexAttribute::Size::Size_8_8: 33 case Maxwell::VertexAttribute::Size::Size_8_8:
@@ -47,16 +46,13 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) {
47 return GL_UNSIGNED_INT; 46 return GL_UNSIGNED_INT;
48 case Maxwell::VertexAttribute::Size::Size_10_10_10_2: 47 case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
49 return GL_UNSIGNED_INT_2_10_10_10_REV; 48 return GL_UNSIGNED_INT_2_10_10_10_REV;
49 default:
50 LOG_CRITICAL(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString());
51 UNREACHABLE();
52 return {};
50 } 53 }
51
52 LOG_CRITICAL(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString());
53 UNREACHABLE();
54 return {};
55 }
56
57 case Maxwell::VertexAttribute::Type::SignedInt: 54 case Maxwell::VertexAttribute::Type::SignedInt:
58 case Maxwell::VertexAttribute::Type::SignedNorm: { 55 case Maxwell::VertexAttribute::Type::SignedNorm:
59
60 switch (attrib.size) { 56 switch (attrib.size) {
61 case Maxwell::VertexAttribute::Size::Size_8: 57 case Maxwell::VertexAttribute::Size::Size_8:
62 case Maxwell::VertexAttribute::Size::Size_8_8: 58 case Maxwell::VertexAttribute::Size::Size_8_8:
@@ -75,14 +71,12 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) {
75 return GL_INT; 71 return GL_INT;
76 case Maxwell::VertexAttribute::Size::Size_10_10_10_2: 72 case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
77 return GL_INT_2_10_10_10_REV; 73 return GL_INT_2_10_10_10_REV;
74 default:
75 LOG_CRITICAL(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString());
76 UNREACHABLE();
77 return {};
78 } 78 }
79 79 case Maxwell::VertexAttribute::Type::Float:
80 LOG_CRITICAL(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString());
81 UNREACHABLE();
82 return {};
83 }
84
85 case Maxwell::VertexAttribute::Type::Float: {
86 switch (attrib.size) { 80 switch (attrib.size) {
87 case Maxwell::VertexAttribute::Size::Size_16: 81 case Maxwell::VertexAttribute::Size::Size_16:
88 case Maxwell::VertexAttribute::Size::Size_16_16: 82 case Maxwell::VertexAttribute::Size::Size_16_16:
@@ -94,13 +88,16 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) {
94 case Maxwell::VertexAttribute::Size::Size_32_32_32: 88 case Maxwell::VertexAttribute::Size::Size_32_32_32:
95 case Maxwell::VertexAttribute::Size::Size_32_32_32_32: 89 case Maxwell::VertexAttribute::Size::Size_32_32_32_32:
96 return GL_FLOAT; 90 return GL_FLOAT;
91 default:
92 LOG_CRITICAL(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString());
93 UNREACHABLE();
94 return {};
97 } 95 }
96 default:
97 LOG_CRITICAL(Render_OpenGL, "Unimplemented vertex type={}", attrib.TypeString());
98 UNREACHABLE();
99 return {};
98 } 100 }
99 }
100
101 LOG_CRITICAL(Render_OpenGL, "Unimplemented vertex type={}", attrib.TypeString());
102 UNREACHABLE();
103 return {};
104} 101}
105 102
106inline GLenum IndexFormat(Maxwell::IndexFormat index_format) { 103inline GLenum IndexFormat(Maxwell::IndexFormat index_format) {
@@ -129,10 +126,11 @@ inline GLenum PrimitiveTopology(Maxwell::PrimitiveTopology topology) {
129 return GL_TRIANGLES; 126 return GL_TRIANGLES;
130 case Maxwell::PrimitiveTopology::TriangleStrip: 127 case Maxwell::PrimitiveTopology::TriangleStrip:
131 return GL_TRIANGLE_STRIP; 128 return GL_TRIANGLE_STRIP;
129 default:
130 LOG_CRITICAL(Render_OpenGL, "Unimplemented topology={}", static_cast<u32>(topology));
131 UNREACHABLE();
132 return {};
132 } 133 }
133 LOG_CRITICAL(Render_OpenGL, "Unimplemented topology={}", static_cast<u32>(topology));
134 UNREACHABLE();
135 return {};
136} 134}
137 135
138inline GLenum TextureFilterMode(Tegra::Texture::TextureFilter filter_mode, 136inline GLenum TextureFilterMode(Tegra::Texture::TextureFilter filter_mode,
@@ -186,9 +184,10 @@ inline GLenum WrapMode(Tegra::Texture::WrapMode wrap_mode) {
186 } else { 184 } else {
187 return GL_MIRROR_CLAMP_TO_EDGE; 185 return GL_MIRROR_CLAMP_TO_EDGE;
188 } 186 }
187 default:
188 LOG_ERROR(Render_OpenGL, "Unimplemented texture wrap mode={}", static_cast<u32>(wrap_mode));
189 return GL_REPEAT;
189 } 190 }
190 LOG_ERROR(Render_OpenGL, "Unimplemented texture wrap mode={}", static_cast<u32>(wrap_mode));
191 return GL_REPEAT;
192} 191}
193 192
194inline GLenum DepthCompareFunc(Tegra::Texture::DepthCompareFunc func) { 193inline GLenum DepthCompareFunc(Tegra::Texture::DepthCompareFunc func) {
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
index 34bf26ff2..9fe1e3280 100644
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
@@ -62,9 +62,10 @@ vk::SamplerAddressMode WrapMode(Tegra::Texture::WrapMode wrap_mode) {
62 case Tegra::Texture::WrapMode::MirrorOnceBorder: 62 case Tegra::Texture::WrapMode::MirrorOnceBorder:
63 UNIMPLEMENTED(); 63 UNIMPLEMENTED();
64 return vk::SamplerAddressMode::eMirrorClampToEdge; 64 return vk::SamplerAddressMode::eMirrorClampToEdge;
65 default:
66 UNIMPLEMENTED_MSG("Unimplemented wrap mode={}", static_cast<u32>(wrap_mode));
67 return {};
65 } 68 }
66 UNIMPLEMENTED_MSG("Unimplemented wrap mode={}", static_cast<u32>(wrap_mode));
67 return {};
68} 69}
69 70
70vk::CompareOp DepthCompareFunction(Tegra::Texture::DepthCompareFunc depth_compare_func) { 71vk::CompareOp DepthCompareFunction(Tegra::Texture::DepthCompareFunc depth_compare_func) {
@@ -225,9 +226,10 @@ vk::PrimitiveTopology PrimitiveTopology(Maxwell::PrimitiveTopology topology) {
225 return vk::PrimitiveTopology::eTriangleList; 226 return vk::PrimitiveTopology::eTriangleList;
226 case Maxwell::PrimitiveTopology::TriangleStrip: 227 case Maxwell::PrimitiveTopology::TriangleStrip:
227 return vk::PrimitiveTopology::eTriangleStrip; 228 return vk::PrimitiveTopology::eTriangleStrip;
229 default:
230 UNIMPLEMENTED_MSG("Unimplemented topology={}", static_cast<u32>(topology));
231 return {};
228 } 232 }
229 UNIMPLEMENTED_MSG("Unimplemented topology={}", static_cast<u32>(topology));
230 return {};
231} 233}
232 234
233vk::Format VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttribute::Size size) { 235vk::Format VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttribute::Size size) {
diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp
index e4c438792..2da595c0d 100644
--- a/src/video_core/shader/decode.cpp
+++ b/src/video_core/shader/decode.cpp
@@ -116,6 +116,8 @@ ExitMethod ShaderIR::Scan(u32 begin, u32 end, std::set<u32>& labels) {
116 // Continue scanning for an exit method. 116 // Continue scanning for an exit method.
117 break; 117 break;
118 } 118 }
119 default:
120 break;
119 } 121 }
120 } 122 }
121 return exit_method = ExitMethod::AlwaysReturn; 123 return exit_method = ExitMethod::AlwaysReturn;
@@ -206,4 +208,4 @@ u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) {
206 return pc + 1; 208 return pc + 1;
207} 209}
208 210
209} // namespace VideoCommon::Shader \ No newline at end of file 211} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/arithmetic_half.cpp b/src/video_core/shader/decode/arithmetic_half.cpp
index 9467f9417..2098c1170 100644
--- a/src/video_core/shader/decode/arithmetic_half.cpp
+++ b/src/video_core/shader/decode/arithmetic_half.cpp
@@ -9,6 +9,7 @@
9 9
10namespace VideoCommon::Shader { 10namespace VideoCommon::Shader {
11 11
12using Tegra::Shader::HalfType;
12using Tegra::Shader::Instruction; 13using Tegra::Shader::Instruction;
13using Tegra::Shader::OpCode; 14using Tegra::Shader::OpCode;
14 15
@@ -22,7 +23,6 @@ u32 ShaderIR::DecodeArithmeticHalf(NodeBlock& bb, u32 pc) {
22 LOG_WARNING(HW_GPU, "{} FTZ not implemented", opcode->get().GetName()); 23 LOG_WARNING(HW_GPU, "{} FTZ not implemented", opcode->get().GetName());
23 } 24 }
24 } 25 }
25 UNIMPLEMENTED_IF_MSG(instr.alu_half.saturate != 0, "Half float saturation not implemented");
26 26
27 const bool negate_a = 27 const bool negate_a =
28 opcode->get().GetId() != OpCode::Id::HMUL2_R && instr.alu_half.negate_a != 0; 28 opcode->get().GetId() != OpCode::Id::HMUL2_R && instr.alu_half.negate_a != 0;
@@ -32,35 +32,37 @@ u32 ShaderIR::DecodeArithmeticHalf(NodeBlock& bb, u32 pc) {
32 Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.alu_half.type_a); 32 Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.alu_half.type_a);
33 op_a = GetOperandAbsNegHalf(op_a, instr.alu_half.abs_a, negate_a); 33 op_a = GetOperandAbsNegHalf(op_a, instr.alu_half.abs_a, negate_a);
34 34
35 Node op_b = [&]() { 35 auto [type_b, op_b] = [&]() -> std::tuple<HalfType, Node> {
36 switch (opcode->get().GetId()) { 36 switch (opcode->get().GetId()) {
37 case OpCode::Id::HADD2_C: 37 case OpCode::Id::HADD2_C:
38 case OpCode::Id::HMUL2_C: 38 case OpCode::Id::HMUL2_C:
39 return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); 39 return {HalfType::F32, GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())};
40 case OpCode::Id::HADD2_R: 40 case OpCode::Id::HADD2_R:
41 case OpCode::Id::HMUL2_R: 41 case OpCode::Id::HMUL2_R:
42 return GetRegister(instr.gpr20); 42 return {instr.alu_half.type_b, GetRegister(instr.gpr20)};
43 default: 43 default:
44 UNREACHABLE(); 44 UNREACHABLE();
45 return Immediate(0); 45 return {HalfType::F32, Immediate(0)};
46 } 46 }
47 }(); 47 }();
48 op_b = UnpackHalfFloat(op_b, instr.alu_half.type_b); 48 op_b = UnpackHalfFloat(op_b, type_b);
49 op_b = GetOperandAbsNegHalf(op_b, instr.alu_half.abs_b, negate_b); 49 // redeclaration to avoid a bug in clang with reusing local bindings in lambdas
50 Node op_b_alt = GetOperandAbsNegHalf(op_b, instr.alu_half.abs_b, negate_b);
50 51
51 Node value = [&]() { 52 Node value = [&]() {
52 switch (opcode->get().GetId()) { 53 switch (opcode->get().GetId()) {
53 case OpCode::Id::HADD2_C: 54 case OpCode::Id::HADD2_C:
54 case OpCode::Id::HADD2_R: 55 case OpCode::Id::HADD2_R:
55 return Operation(OperationCode::HAdd, PRECISE, op_a, op_b); 56 return Operation(OperationCode::HAdd, PRECISE, op_a, op_b_alt);
56 case OpCode::Id::HMUL2_C: 57 case OpCode::Id::HMUL2_C:
57 case OpCode::Id::HMUL2_R: 58 case OpCode::Id::HMUL2_R:
58 return Operation(OperationCode::HMul, PRECISE, op_a, op_b); 59 return Operation(OperationCode::HMul, PRECISE, op_a, op_b_alt);
59 default: 60 default:
60 UNIMPLEMENTED_MSG("Unhandled half float instruction: {}", opcode->get().GetName()); 61 UNIMPLEMENTED_MSG("Unhandled half float instruction: {}", opcode->get().GetName());
61 return Immediate(0); 62 return Immediate(0);
62 } 63 }
63 }(); 64 }();
65 value = GetSaturatedHalfFloat(value, instr.alu_half.saturate);
64 value = HalfMerge(GetRegister(instr.gpr0), value, instr.alu_half.merge); 66 value = HalfMerge(GetRegister(instr.gpr0), value, instr.alu_half.merge);
65 67
66 SetRegister(bb, instr.gpr0, value); 68 SetRegister(bb, instr.gpr0, value);
@@ -68,4 +70,4 @@ u32 ShaderIR::DecodeArithmeticHalf(NodeBlock& bb, u32 pc) {
68 return pc; 70 return pc;
69} 71}
70 72
71} // namespace VideoCommon::Shader \ No newline at end of file 73} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/conversion.cpp b/src/video_core/shader/decode/conversion.cpp
index ba15b1115..b5ec9a6f5 100644
--- a/src/video_core/shader/decode/conversion.cpp
+++ b/src/video_core/shader/decode/conversion.cpp
@@ -120,10 +120,11 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) {
120 return Operation(OperationCode::FCeil, PRECISE, value); 120 return Operation(OperationCode::FCeil, PRECISE, value);
121 case Tegra::Shader::F2fRoundingOp::Trunc: 121 case Tegra::Shader::F2fRoundingOp::Trunc:
122 return Operation(OperationCode::FTrunc, PRECISE, value); 122 return Operation(OperationCode::FTrunc, PRECISE, value);
123 default:
124 UNIMPLEMENTED_MSG("Unimplemented F2F rounding mode {}",
125 static_cast<u32>(instr.conversion.f2f.rounding.Value()));
126 return Immediate(0);
123 } 127 }
124 UNIMPLEMENTED_MSG("Unimplemented F2F rounding mode {}",
125 static_cast<u32>(instr.conversion.f2f.rounding.Value()));
126 return Immediate(0);
127 }(); 128 }();
128 value = GetSaturatedFloat(value, instr.alu.saturate_d); 129 value = GetSaturatedFloat(value, instr.alu.saturate_d);
129 130
diff --git a/src/video_core/shader/decode/hfma2.cpp b/src/video_core/shader/decode/hfma2.cpp
index 5c1becce5..a425f9eb7 100644
--- a/src/video_core/shader/decode/hfma2.cpp
+++ b/src/video_core/shader/decode/hfma2.cpp
@@ -34,15 +34,14 @@ u32 ShaderIR::DecodeHfma2(NodeBlock& bb, u32 pc) {
34 case OpCode::Id::HFMA2_CR: 34 case OpCode::Id::HFMA2_CR:
35 neg_b = instr.hfma2.negate_b; 35 neg_b = instr.hfma2.negate_b;
36 neg_c = instr.hfma2.negate_c; 36 neg_c = instr.hfma2.negate_c;
37 return {instr.hfma2.saturate, instr.hfma2.type_b, 37 return {instr.hfma2.saturate, HalfType::F32,
38 GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()), 38 GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()),
39 instr.hfma2.type_reg39, GetRegister(instr.gpr39)}; 39 instr.hfma2.type_reg39, GetRegister(instr.gpr39)};
40 case OpCode::Id::HFMA2_RC: 40 case OpCode::Id::HFMA2_RC:
41 neg_b = instr.hfma2.negate_b; 41 neg_b = instr.hfma2.negate_b;
42 neg_c = instr.hfma2.negate_c; 42 neg_c = instr.hfma2.negate_c;
43 return {instr.hfma2.saturate, instr.hfma2.type_reg39, GetRegister(instr.gpr39), 43 return {instr.hfma2.saturate, instr.hfma2.type_reg39, GetRegister(instr.gpr39),
44 instr.hfma2.type_b, 44 HalfType::F32, GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())};
45 GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())};
46 case OpCode::Id::HFMA2_RR: 45 case OpCode::Id::HFMA2_RR:
47 neg_b = instr.hfma2.rr.negate_b; 46 neg_b = instr.hfma2.rr.negate_b;
48 neg_c = instr.hfma2.rr.negate_c; 47 neg_c = instr.hfma2.rr.negate_c;
@@ -56,13 +55,13 @@ u32 ShaderIR::DecodeHfma2(NodeBlock& bb, u32 pc) {
56 return {false, identity, Immediate(0), identity, Immediate(0)}; 55 return {false, identity, Immediate(0), identity, Immediate(0)};
57 } 56 }
58 }(); 57 }();
59 UNIMPLEMENTED_IF_MSG(saturate, "HFMA2 saturation is not implemented");
60 58
61 const Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hfma2.type_a); 59 const Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hfma2.type_a);
62 op_b = GetOperandAbsNegHalf(UnpackHalfFloat(op_b, type_b), false, neg_b); 60 op_b = GetOperandAbsNegHalf(UnpackHalfFloat(op_b, type_b), false, neg_b);
63 op_c = GetOperandAbsNegHalf(UnpackHalfFloat(op_c, type_c), false, neg_c); 61 op_c = GetOperandAbsNegHalf(UnpackHalfFloat(op_c, type_c), false, neg_c);
64 62
65 Node value = Operation(OperationCode::HFma, PRECISE, op_a, op_b, op_c); 63 Node value = Operation(OperationCode::HFma, PRECISE, op_a, op_b, op_c);
64 value = GetSaturatedHalfFloat(value, saturate);
66 value = HalfMerge(GetRegister(instr.gpr0), value, instr.hfma2.merge); 65 value = HalfMerge(GetRegister(instr.gpr0), value, instr.hfma2.merge);
67 66
68 SetRegister(bb, instr.gpr0, value); 67 SetRegister(bb, instr.gpr0, value);
@@ -70,4 +69,4 @@ u32 ShaderIR::DecodeHfma2(NodeBlock& bb, u32 pc) {
70 return pc; 69 return pc;
71} 70}
72 71
73} // namespace VideoCommon::Shader \ No newline at end of file 72} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp
index fa65ac9a9..8b574d4e5 100644
--- a/src/video_core/shader/decode/texture.cpp
+++ b/src/video_core/shader/decode/texture.cpp
@@ -296,7 +296,7 @@ const Sampler& ShaderIR::GetBindlessSampler(const Tegra::Shader::Register& reg,
296 ASSERT(cbuf_offset_imm != nullptr); 296 ASSERT(cbuf_offset_imm != nullptr);
297 const auto cbuf_offset = cbuf_offset_imm->GetValue(); 297 const auto cbuf_offset = cbuf_offset_imm->GetValue();
298 const auto cbuf_index = cbuf->GetIndex(); 298 const auto cbuf_index = cbuf->GetIndex();
299 const u64 cbuf_key = (cbuf_index << 32) | cbuf_offset; 299 const auto cbuf_key = (static_cast<u64>(cbuf_index) << 32) | static_cast<u64>(cbuf_offset);
300 300
301 // If this sampler has already been used, return the existing mapping. 301 // If this sampler has already been used, return the existing mapping.
302 const auto itr = 302 const auto itr =
@@ -541,7 +541,6 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de
541 bool is_array, bool is_aoffi) { 541 bool is_array, bool is_aoffi) {
542 const std::size_t coord_count = GetCoordCount(texture_type); 542 const std::size_t coord_count = GetCoordCount(texture_type);
543 const std::size_t total_coord_count = coord_count + (is_array ? 1 : 0); 543 const std::size_t total_coord_count = coord_count + (is_array ? 1 : 0);
544 const std::size_t total_reg_count = total_coord_count + (depth_compare ? 1 : 0);
545 544
546 // If enabled arrays index is always stored in the gpr8 field 545 // If enabled arrays index is always stored in the gpr8 field
547 const u64 array_register = instr.gpr8.Value(); 546 const u64 array_register = instr.gpr8.Value();
diff --git a/src/video_core/shader/decode/xmad.cpp b/src/video_core/shader/decode/xmad.cpp
index db15c0718..04a776398 100644
--- a/src/video_core/shader/decode/xmad.cpp
+++ b/src/video_core/shader/decode/xmad.cpp
@@ -56,9 +56,10 @@ u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) {
56 instr.xmad.mode, 56 instr.xmad.mode,
57 Immediate(static_cast<u32>(instr.xmad.imm20_16)), 57 Immediate(static_cast<u32>(instr.xmad.imm20_16)),
58 GetRegister(instr.gpr39)}; 58 GetRegister(instr.gpr39)};
59 default:
60 UNIMPLEMENTED_MSG("Unhandled XMAD instruction: {}", opcode->get().GetName());
61 return {false, false, false, Tegra::Shader::XmadMode::None, Immediate(0), Immediate(0)};
59 } 62 }
60 UNIMPLEMENTED_MSG("Unhandled XMAD instruction: {}", opcode->get().GetName());
61 return {false, false, false, Tegra::Shader::XmadMode::None, Immediate(0), Immediate(0)};
62 }(); 63 }();
63 64
64 op_a = BitfieldExtract(op_a, instr.xmad.high_a ? 16 : 0, 16); 65 op_a = BitfieldExtract(op_a, instr.xmad.high_a ? 16 : 0, 16);
diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp
index 17f2f711c..e4eb0dfd9 100644
--- a/src/video_core/shader/shader_ir.cpp
+++ b/src/video_core/shader/shader_ir.cpp
@@ -439,11 +439,14 @@ Node ShaderIR::BitfieldExtract(Node value, u32 offset, u32 bits) {
439 return OperationCode::LogicalUGreaterEqual; 439 return OperationCode::LogicalUGreaterEqual;
440 case OperationCode::INegate: 440 case OperationCode::INegate:
441 UNREACHABLE_MSG("Can't negate an unsigned integer"); 441 UNREACHABLE_MSG("Can't negate an unsigned integer");
442 return {};
442 case OperationCode::IAbsolute: 443 case OperationCode::IAbsolute:
443 UNREACHABLE_MSG("Can't apply absolute to an unsigned integer"); 444 UNREACHABLE_MSG("Can't apply absolute to an unsigned integer");
445 return {};
446 default:
447 UNREACHABLE_MSG("Unknown signed operation with code={}", static_cast<u32>(operation_code));
448 return {};
444 } 449 }
445 UNREACHABLE_MSG("Unknown signed operation with code={}", static_cast<u32>(operation_code));
446 return {};
447} 450}
448 451
449} // namespace VideoCommon::Shader \ No newline at end of file 452} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index 81278fb33..65f1e1de9 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -251,8 +251,9 @@ public:
251 } 251 }
252 252
253 bool operator<(const Sampler& rhs) const { 253 bool operator<(const Sampler& rhs) const {
254 return std::tie(offset, index, type, is_array, is_shadow) < 254 return std::tie(index, offset, type, is_array, is_shadow, is_bindless) <
255 std::tie(rhs.offset, rhs.index, rhs.type, rhs.is_array, rhs.is_shadow); 255 std::tie(rhs.index, rhs.offset, rhs.type, rhs.is_array, rhs.is_shadow,
256 rhs.is_bindless);
256 } 257 }
257 258
258private: 259private:
diff --git a/src/video_core/surface.cpp b/src/video_core/surface.cpp
index 3b022a456..6384fa8d2 100644
--- a/src/video_core/surface.cpp
+++ b/src/video_core/surface.cpp
@@ -178,39 +178,44 @@ PixelFormat PixelFormatFromTextureFormat(Tegra::Texture::TextureFormat format,
178 return PixelFormat::ABGR8S; 178 return PixelFormat::ABGR8S;
179 case Tegra::Texture::ComponentType::UINT: 179 case Tegra::Texture::ComponentType::UINT:
180 return PixelFormat::ABGR8UI; 180 return PixelFormat::ABGR8UI;
181 default:
182 break;
181 } 183 }
182 LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", static_cast<u32>(component_type)); 184 break;
183 UNREACHABLE();
184 case Tegra::Texture::TextureFormat::B5G6R5: 185 case Tegra::Texture::TextureFormat::B5G6R5:
185 switch (component_type) { 186 switch (component_type) {
186 case Tegra::Texture::ComponentType::UNORM: 187 case Tegra::Texture::ComponentType::UNORM:
187 return PixelFormat::B5G6R5U; 188 return PixelFormat::B5G6R5U;
189 default:
190 break;
188 } 191 }
189 LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", static_cast<u32>(component_type)); 192 break;
190 UNREACHABLE();
191 case Tegra::Texture::TextureFormat::A2B10G10R10: 193 case Tegra::Texture::TextureFormat::A2B10G10R10:
192 switch (component_type) { 194 switch (component_type) {
193 case Tegra::Texture::ComponentType::UNORM: 195 case Tegra::Texture::ComponentType::UNORM:
194 return PixelFormat::A2B10G10R10U; 196 return PixelFormat::A2B10G10R10U;
197 default:
198 break;
195 } 199 }
196 LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", static_cast<u32>(component_type)); 200 break;
197 UNREACHABLE();
198 case Tegra::Texture::TextureFormat::A1B5G5R5: 201 case Tegra::Texture::TextureFormat::A1B5G5R5:
199 switch (component_type) { 202 switch (component_type) {
200 case Tegra::Texture::ComponentType::UNORM: 203 case Tegra::Texture::ComponentType::UNORM:
201 return PixelFormat::A1B5G5R5U; 204 return PixelFormat::A1B5G5R5U;
205 default:
206 break;
202 } 207 }
203 LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", static_cast<u32>(component_type)); 208 break;
204 UNREACHABLE();
205 case Tegra::Texture::TextureFormat::R8: 209 case Tegra::Texture::TextureFormat::R8:
206 switch (component_type) { 210 switch (component_type) {
207 case Tegra::Texture::ComponentType::UNORM: 211 case Tegra::Texture::ComponentType::UNORM:
208 return PixelFormat::R8U; 212 return PixelFormat::R8U;
209 case Tegra::Texture::ComponentType::UINT: 213 case Tegra::Texture::ComponentType::UINT:
210 return PixelFormat::R8UI; 214 return PixelFormat::R8UI;
215 default:
216 break;
211 } 217 }
212 LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", static_cast<u32>(component_type)); 218 break;
213 UNREACHABLE();
214 case Tegra::Texture::TextureFormat::G8R8: 219 case Tegra::Texture::TextureFormat::G8R8:
215 // TextureFormat::G8R8 is actually ordered red then green, as such we can use 220 // TextureFormat::G8R8 is actually ordered red then green, as such we can use
216 // PixelFormat::RG8U and PixelFormat::RG8S. This was tested with The Legend of Zelda: Breath 221 // PixelFormat::RG8U and PixelFormat::RG8S. This was tested with The Legend of Zelda: Breath
@@ -220,50 +225,55 @@ PixelFormat PixelFormatFromTextureFormat(Tegra::Texture::TextureFormat format,
220 return PixelFormat::RG8U; 225 return PixelFormat::RG8U;
221 case Tegra::Texture::ComponentType::SNORM: 226 case Tegra::Texture::ComponentType::SNORM:
222 return PixelFormat::RG8S; 227 return PixelFormat::RG8S;
228 default:
229 break;
223 } 230 }
224 LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", static_cast<u32>(component_type)); 231 break;
225 UNREACHABLE();
226 case Tegra::Texture::TextureFormat::R16_G16_B16_A16: 232 case Tegra::Texture::TextureFormat::R16_G16_B16_A16:
227 switch (component_type) { 233 switch (component_type) {
228 case Tegra::Texture::ComponentType::UNORM: 234 case Tegra::Texture::ComponentType::UNORM:
229 return PixelFormat::RGBA16U; 235 return PixelFormat::RGBA16U;
230 case Tegra::Texture::ComponentType::FLOAT: 236 case Tegra::Texture::ComponentType::FLOAT:
231 return PixelFormat::RGBA16F; 237 return PixelFormat::RGBA16F;
238 default:
239 break;
232 } 240 }
233 LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", static_cast<u32>(component_type)); 241 break;
234 UNREACHABLE();
235 case Tegra::Texture::TextureFormat::BF10GF11RF11: 242 case Tegra::Texture::TextureFormat::BF10GF11RF11:
236 switch (component_type) { 243 switch (component_type) {
237 case Tegra::Texture::ComponentType::FLOAT: 244 case Tegra::Texture::ComponentType::FLOAT:
238 return PixelFormat::R11FG11FB10F; 245 return PixelFormat::R11FG11FB10F;
246 default:
247 break;
239 } 248 }
240 LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", static_cast<u32>(component_type));
241 UNREACHABLE();
242 case Tegra::Texture::TextureFormat::R32_G32_B32_A32: 249 case Tegra::Texture::TextureFormat::R32_G32_B32_A32:
243 switch (component_type) { 250 switch (component_type) {
244 case Tegra::Texture::ComponentType::FLOAT: 251 case Tegra::Texture::ComponentType::FLOAT:
245 return PixelFormat::RGBA32F; 252 return PixelFormat::RGBA32F;
246 case Tegra::Texture::ComponentType::UINT: 253 case Tegra::Texture::ComponentType::UINT:
247 return PixelFormat::RGBA32UI; 254 return PixelFormat::RGBA32UI;
255 default:
256 break;
248 } 257 }
249 LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", static_cast<u32>(component_type)); 258 break;
250 UNREACHABLE();
251 case Tegra::Texture::TextureFormat::R32_G32: 259 case Tegra::Texture::TextureFormat::R32_G32:
252 switch (component_type) { 260 switch (component_type) {
253 case Tegra::Texture::ComponentType::FLOAT: 261 case Tegra::Texture::ComponentType::FLOAT:
254 return PixelFormat::RG32F; 262 return PixelFormat::RG32F;
255 case Tegra::Texture::ComponentType::UINT: 263 case Tegra::Texture::ComponentType::UINT:
256 return PixelFormat::RG32UI; 264 return PixelFormat::RG32UI;
265 default:
266 break;
257 } 267 }
258 LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", static_cast<u32>(component_type)); 268 break;
259 UNREACHABLE();
260 case Tegra::Texture::TextureFormat::R32_G32_B32: 269 case Tegra::Texture::TextureFormat::R32_G32_B32:
261 switch (component_type) { 270 switch (component_type) {
262 case Tegra::Texture::ComponentType::FLOAT: 271 case Tegra::Texture::ComponentType::FLOAT:
263 return PixelFormat::RGB32F; 272 return PixelFormat::RGB32F;
273 default:
274 break;
264 } 275 }
265 LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", static_cast<u32>(component_type)); 276 break;
266 UNREACHABLE();
267 case Tegra::Texture::TextureFormat::R16: 277 case Tegra::Texture::TextureFormat::R16:
268 switch (component_type) { 278 switch (component_type) {
269 case Tegra::Texture::ComponentType::FLOAT: 279 case Tegra::Texture::ComponentType::FLOAT:
@@ -276,18 +286,20 @@ PixelFormat PixelFormatFromTextureFormat(Tegra::Texture::TextureFormat format,
276 return PixelFormat::R16UI; 286 return PixelFormat::R16UI;
277 case Tegra::Texture::ComponentType::SINT: 287 case Tegra::Texture::ComponentType::SINT:
278 return PixelFormat::R16I; 288 return PixelFormat::R16I;
289 default:
290 break;
279 } 291 }
280 LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", static_cast<u32>(component_type)); 292 break;
281 UNREACHABLE();
282 case Tegra::Texture::TextureFormat::R32: 293 case Tegra::Texture::TextureFormat::R32:
283 switch (component_type) { 294 switch (component_type) {
284 case Tegra::Texture::ComponentType::FLOAT: 295 case Tegra::Texture::ComponentType::FLOAT:
285 return PixelFormat::R32F; 296 return PixelFormat::R32F;
286 case Tegra::Texture::ComponentType::UINT: 297 case Tegra::Texture::ComponentType::UINT:
287 return PixelFormat::R32UI; 298 return PixelFormat::R32UI;
299 default:
300 break;
288 } 301 }
289 LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", static_cast<u32>(component_type)); 302 break;
290 UNREACHABLE();
291 case Tegra::Texture::TextureFormat::ZF32: 303 case Tegra::Texture::TextureFormat::ZF32:
292 return PixelFormat::Z32F; 304 return PixelFormat::Z32F;
293 case Tegra::Texture::TextureFormat::Z16: 305 case Tegra::Texture::TextureFormat::Z16:
@@ -310,9 +322,10 @@ PixelFormat PixelFormatFromTextureFormat(Tegra::Texture::TextureFormat format,
310 return PixelFormat::DXN2UNORM; 322 return PixelFormat::DXN2UNORM;
311 case Tegra::Texture::ComponentType::SNORM: 323 case Tegra::Texture::ComponentType::SNORM:
312 return PixelFormat::DXN2SNORM; 324 return PixelFormat::DXN2SNORM;
325 default:
326 break;
313 } 327 }
314 LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", static_cast<u32>(component_type)); 328 break;
315 UNREACHABLE();
316 case Tegra::Texture::TextureFormat::BC7U: 329 case Tegra::Texture::TextureFormat::BC7U:
317 return is_srgb ? PixelFormat::BC7U_SRGB : PixelFormat::BC7U; 330 return is_srgb ? PixelFormat::BC7U_SRGB : PixelFormat::BC7U;
318 case Tegra::Texture::TextureFormat::BC6H_UF16: 331 case Tegra::Texture::TextureFormat::BC6H_UF16:
@@ -343,15 +356,17 @@ PixelFormat PixelFormatFromTextureFormat(Tegra::Texture::TextureFormat format,
343 return PixelFormat::RG16UI; 356 return PixelFormat::RG16UI;
344 case Tegra::Texture::ComponentType::SINT: 357 case Tegra::Texture::ComponentType::SINT:
345 return PixelFormat::RG16I; 358 return PixelFormat::RG16I;
359 default:
360 break;
346 } 361 }
347 LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", static_cast<u32>(component_type)); 362 break;
348 UNREACHABLE();
349 default: 363 default:
350 LOG_CRITICAL(HW_GPU, "Unimplemented format={}, component_type={}", static_cast<u32>(format), 364 break;
351 static_cast<u32>(component_type));
352 UNREACHABLE();
353 return PixelFormat::ABGR8U;
354 } 365 }
366 LOG_CRITICAL(HW_GPU, "Unimplemented format={}, component_type={}", static_cast<u32>(format),
367 static_cast<u32>(component_type));
368 UNREACHABLE();
369 return PixelFormat::ABGR8U;
355} 370}
356 371
357ComponentType ComponentTypeFromTexture(Tegra::Texture::ComponentType type) { 372ComponentType ComponentTypeFromTexture(Tegra::Texture::ComponentType type) {
@@ -513,8 +528,9 @@ bool IsFormatBCn(PixelFormat format) {
513 case PixelFormat::DXT45_SRGB: 528 case PixelFormat::DXT45_SRGB:
514 case PixelFormat::BC7U_SRGB: 529 case PixelFormat::BC7U_SRGB:
515 return true; 530 return true;
531 default:
532 return false;
516 } 533 }
517 return false;
518} 534}
519 535
520} // namespace VideoCore::Surface 536} // namespace VideoCore::Surface
diff --git a/src/video_core/textures/astc.cpp b/src/video_core/textures/astc.cpp
index b508d64e9..eafb6b73a 100644
--- a/src/video_core/textures/astc.cpp
+++ b/src/video_core/textures/astc.cpp
@@ -1616,6 +1616,7 @@ namespace Tegra::Texture::ASTC {
1616std::vector<uint8_t> Decompress(const uint8_t* data, uint32_t width, uint32_t height, 1616std::vector<uint8_t> Decompress(const uint8_t* data, uint32_t width, uint32_t height,
1617 uint32_t depth, uint32_t block_width, uint32_t block_height) { 1617 uint32_t depth, uint32_t block_width, uint32_t block_height) {
1618 uint32_t blockIdx = 0; 1618 uint32_t blockIdx = 0;
1619 std::size_t depth_offset = 0;
1619 std::vector<uint8_t> outData(height * width * depth * 4); 1620 std::vector<uint8_t> outData(height * width * depth * 4);
1620 for (uint32_t k = 0; k < depth; k++) { 1621 for (uint32_t k = 0; k < depth; k++) {
1621 for (uint32_t j = 0; j < height; j += block_height) { 1622 for (uint32_t j = 0; j < height; j += block_height) {
@@ -1630,7 +1631,7 @@ std::vector<uint8_t> Decompress(const uint8_t* data, uint32_t width, uint32_t he
1630 uint32_t decompWidth = std::min(block_width, width - i); 1631 uint32_t decompWidth = std::min(block_width, width - i);
1631 uint32_t decompHeight = std::min(block_height, height - j); 1632 uint32_t decompHeight = std::min(block_height, height - j);
1632 1633
1633 uint8_t* outRow = outData.data() + (j * width + i) * 4; 1634 uint8_t* outRow = depth_offset + outData.data() + (j * width + i) * 4;
1634 for (uint32_t jj = 0; jj < decompHeight; jj++) { 1635 for (uint32_t jj = 0; jj < decompHeight; jj++) {
1635 memcpy(outRow + jj * width * 4, uncompData + jj * block_width, decompWidth * 4); 1636 memcpy(outRow + jj * width * 4, uncompData + jj * block_width, decompWidth * 4);
1636 } 1637 }
@@ -1638,6 +1639,7 @@ std::vector<uint8_t> Decompress(const uint8_t* data, uint32_t width, uint32_t he
1638 blockIdx++; 1639 blockIdx++;
1639 } 1640 }
1640 } 1641 }
1642 depth_offset += height * width * 4;
1641 } 1643 }
1642 1644
1643 return outData; 1645 return outData;
diff --git a/src/yuzu/compatdb.cpp b/src/yuzu/compatdb.cpp
index c8b0a5ec0..5477f050c 100644
--- a/src/yuzu/compatdb.cpp
+++ b/src/yuzu/compatdb.cpp
@@ -58,7 +58,7 @@ void CompatDB::Submit() {
58 58
59 button(NextButton)->setEnabled(false); 59 button(NextButton)->setEnabled(false);
60 button(NextButton)->setText(tr("Submitting")); 60 button(NextButton)->setText(tr("Submitting"));
61 button(QWizard::CancelButton)->setVisible(false); 61 button(CancelButton)->setVisible(false);
62 62
63 testcase_watcher.setFuture(QtConcurrent::run( 63 testcase_watcher.setFuture(QtConcurrent::run(
64 [] { return Core::System::GetInstance().TelemetrySession().SubmitTestcase(); })); 64 [] { return Core::System::GetInstance().TelemetrySession().SubmitTestcase(); }));
@@ -74,12 +74,12 @@ void CompatDB::OnTestcaseSubmitted() {
74 tr("An error occured while sending the Testcase")); 74 tr("An error occured while sending the Testcase"));
75 button(NextButton)->setEnabled(true); 75 button(NextButton)->setEnabled(true);
76 button(NextButton)->setText(tr("Next")); 76 button(NextButton)->setText(tr("Next"));
77 button(QWizard::CancelButton)->setVisible(true); 77 button(CancelButton)->setVisible(true);
78 } else { 78 } else {
79 next(); 79 next();
80 // older versions of QT don't support the "NoCancelButtonOnLastPage" option, this is a 80 // older versions of QT don't support the "NoCancelButtonOnLastPage" option, this is a
81 // workaround 81 // workaround
82 button(QWizard::CancelButton)->setVisible(false); 82 button(CancelButton)->setVisible(false);
83 } 83 }
84} 84}
85 85
diff --git a/src/yuzu/configuration/configure_dialog.cpp b/src/yuzu/configuration/configure_dialog.cpp
index a5218b051..32c05b797 100644
--- a/src/yuzu/configuration/configure_dialog.cpp
+++ b/src/yuzu/configuration/configure_dialog.cpp
@@ -17,8 +17,12 @@ ConfigureDialog::ConfigureDialog(QWidget* parent, HotkeyRegistry& registry)
17 ui->hotkeysTab->Populate(registry); 17 ui->hotkeysTab->Populate(registry);
18 this->setConfiguration(); 18 this->setConfiguration();
19 this->PopulateSelectionList(); 19 this->PopulateSelectionList();
20
21 setWindowFlags(windowFlags() & ~Qt::WindowContextHelpButtonHint);
22
20 connect(ui->selectorList, &QListWidget::itemSelectionChanged, this, 23 connect(ui->selectorList, &QListWidget::itemSelectionChanged, this,
21 &ConfigureDialog::UpdateVisibleTabs); 24 &ConfigureDialog::UpdateVisibleTabs);
25
22 adjustSize(); 26 adjustSize();
23 ui->selectorList->setCurrentRow(0); 27 ui->selectorList->setCurrentRow(0);
24 28
diff --git a/src/yuzu/hotkeys.h b/src/yuzu/hotkeys.h
index 4f526dc7e..248fadaf3 100644
--- a/src/yuzu/hotkeys.h
+++ b/src/yuzu/hotkeys.h
@@ -67,8 +67,6 @@ public:
67 67
68private: 68private:
69 struct Hotkey { 69 struct Hotkey {
70 Hotkey() : shortcut(nullptr), context(Qt::WindowShortcut) {}
71
72 QKeySequence keyseq; 70 QKeySequence keyseq;
73 QShortcut* shortcut = nullptr; 71 QShortcut* shortcut = nullptr;
74 Qt::ShortcutContext context = Qt::WindowShortcut; 72 Qt::ShortcutContext context = Qt::WindowShortcut;