diff options
191 files changed, 6972 insertions, 5184 deletions
diff --git a/CMakeModules/CopyYuzuQt5Deps.cmake b/CMakeModules/CopyYuzuQt5Deps.cmake index 1e9810bba..2598b9b60 100644 --- a/CMakeModules/CopyYuzuQt5Deps.cmake +++ b/CMakeModules/CopyYuzuQt5Deps.cmake | |||
| @@ -6,9 +6,9 @@ function(copy_yuzu_Qt5_deps target_dir) | |||
| 6 | set(Qt5_STYLES_DIR "${Qt5_DIR}/../../../plugins/styles/") | 6 | set(Qt5_STYLES_DIR "${Qt5_DIR}/../../../plugins/styles/") |
| 7 | set(Qt5_IMAGEFORMATS_DIR "${Qt5_DIR}/../../../plugins/imageformats/") | 7 | set(Qt5_IMAGEFORMATS_DIR "${Qt5_DIR}/../../../plugins/imageformats/") |
| 8 | set(Qt5_RESOURCES_DIR "${Qt5_DIR}/../../../resources/") | 8 | set(Qt5_RESOURCES_DIR "${Qt5_DIR}/../../../resources/") |
| 9 | set(PLATFORMS ${DLL_DEST}platforms/) | 9 | set(PLATFORMS ${DLL_DEST}plugins/platforms/) |
| 10 | set(STYLES ${DLL_DEST}styles/) | 10 | set(STYLES ${DLL_DEST}plugins/styles/) |
| 11 | set(IMAGEFORMATS ${DLL_DEST}imageformats/) | 11 | set(IMAGEFORMATS ${DLL_DEST}plugins/imageformats/) |
| 12 | windows_copy_files(${target_dir} ${Qt5_DLL_DIR} ${DLL_DEST} | 12 | windows_copy_files(${target_dir} ${Qt5_DLL_DIR} ${DLL_DEST} |
| 13 | icudt*.dll | 13 | icudt*.dll |
| 14 | icuin*.dll | 14 | icuin*.dll |
| @@ -42,11 +42,15 @@ function(copy_yuzu_Qt5_deps target_dir) | |||
| 42 | icudtl.dat | 42 | icudtl.dat |
| 43 | ) | 43 | ) |
| 44 | endif () | 44 | endif () |
| 45 | |||
| 46 | windows_copy_files(yuzu ${Qt5_PLATFORMS_DIR} ${PLATFORMS} qwindows$<$<CONFIG:Debug>:d>.*) | 45 | windows_copy_files(yuzu ${Qt5_PLATFORMS_DIR} ${PLATFORMS} qwindows$<$<CONFIG:Debug>:d>.*) |
| 47 | windows_copy_files(yuzu ${Qt5_STYLES_DIR} ${STYLES} qwindowsvistastyle$<$<CONFIG:Debug>:d>.*) | 46 | windows_copy_files(yuzu ${Qt5_STYLES_DIR} ${STYLES} qwindowsvistastyle$<$<CONFIG:Debug>:d>.*) |
| 48 | windows_copy_files(yuzu ${Qt5_IMAGEFORMATS_DIR} ${IMAGEFORMATS} | 47 | windows_copy_files(yuzu ${Qt5_IMAGEFORMATS_DIR} ${IMAGEFORMATS} |
| 49 | qjpeg$<$<CONFIG:Debug>:d>.* | 48 | qjpeg$<$<CONFIG:Debug>:d>.* |
| 50 | qgif$<$<CONFIG:Debug>:d>.* | 49 | qgif$<$<CONFIG:Debug>:d>.* |
| 51 | ) | 50 | ) |
| 51 | # Create an empty qt.conf file. Qt will detect that this file exists, and use the folder that its in as the root folder. | ||
| 52 | # This way it'll look for plugins in the root/plugins/ folder | ||
| 53 | add_custom_command(TARGET yuzu POST_BUILD | ||
| 54 | COMMAND ${CMAKE_COMMAND} -E touch ${DLL_DEST}qt.conf | ||
| 55 | ) | ||
| 52 | endfunction(copy_yuzu_Qt5_deps) | 56 | endfunction(copy_yuzu_Qt5_deps) |
diff --git a/CMakeModules/GenerateSCMRev.cmake b/CMakeModules/GenerateSCMRev.cmake index fa7ae835f..83e4e9df2 100644 --- a/CMakeModules/GenerateSCMRev.cmake +++ b/CMakeModules/GenerateSCMRev.cmake | |||
| @@ -57,8 +57,6 @@ set(HASH_FILES | |||
| 57 | "${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.h" | 57 | "${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.h" |
| 58 | "${VIDEO_CORE}/renderer_opengl/gl_shader_disk_cache.cpp" | 58 | "${VIDEO_CORE}/renderer_opengl/gl_shader_disk_cache.cpp" |
| 59 | "${VIDEO_CORE}/renderer_opengl/gl_shader_disk_cache.h" | 59 | "${VIDEO_CORE}/renderer_opengl/gl_shader_disk_cache.h" |
| 60 | "${VIDEO_CORE}/renderer_opengl/gl_shader_gen.cpp" | ||
| 61 | "${VIDEO_CORE}/renderer_opengl/gl_shader_gen.h" | ||
| 62 | "${VIDEO_CORE}/shader/decode/arithmetic.cpp" | 60 | "${VIDEO_CORE}/shader/decode/arithmetic.cpp" |
| 63 | "${VIDEO_CORE}/shader/decode/arithmetic_half.cpp" | 61 | "${VIDEO_CORE}/shader/decode/arithmetic_half.cpp" |
| 64 | "${VIDEO_CORE}/shader/decode/arithmetic_half_immediate.cpp" | 62 | "${VIDEO_CORE}/shader/decode/arithmetic_half_immediate.cpp" |
| @@ -91,8 +89,6 @@ set(HASH_FILES | |||
| 91 | "${VIDEO_CORE}/shader/ast.h" | 89 | "${VIDEO_CORE}/shader/ast.h" |
| 92 | "${VIDEO_CORE}/shader/compiler_settings.cpp" | 90 | "${VIDEO_CORE}/shader/compiler_settings.cpp" |
| 93 | "${VIDEO_CORE}/shader/compiler_settings.h" | 91 | "${VIDEO_CORE}/shader/compiler_settings.h" |
| 94 | "${VIDEO_CORE}/shader/const_buffer_locker.cpp" | ||
| 95 | "${VIDEO_CORE}/shader/const_buffer_locker.h" | ||
| 96 | "${VIDEO_CORE}/shader/control_flow.cpp" | 92 | "${VIDEO_CORE}/shader/control_flow.cpp" |
| 97 | "${VIDEO_CORE}/shader/control_flow.h" | 93 | "${VIDEO_CORE}/shader/control_flow.h" |
| 98 | "${VIDEO_CORE}/shader/decode.cpp" | 94 | "${VIDEO_CORE}/shader/decode.cpp" |
| @@ -101,9 +97,13 @@ set(HASH_FILES | |||
| 101 | "${VIDEO_CORE}/shader/node.h" | 97 | "${VIDEO_CORE}/shader/node.h" |
| 102 | "${VIDEO_CORE}/shader/node_helper.cpp" | 98 | "${VIDEO_CORE}/shader/node_helper.cpp" |
| 103 | "${VIDEO_CORE}/shader/node_helper.h" | 99 | "${VIDEO_CORE}/shader/node_helper.h" |
| 100 | "${VIDEO_CORE}/shader/registry.cpp" | ||
| 101 | "${VIDEO_CORE}/shader/registry.h" | ||
| 104 | "${VIDEO_CORE}/shader/shader_ir.cpp" | 102 | "${VIDEO_CORE}/shader/shader_ir.cpp" |
| 105 | "${VIDEO_CORE}/shader/shader_ir.h" | 103 | "${VIDEO_CORE}/shader/shader_ir.h" |
| 106 | "${VIDEO_CORE}/shader/track.cpp" | 104 | "${VIDEO_CORE}/shader/track.cpp" |
| 105 | "${VIDEO_CORE}/shader/transform_feedback.cpp" | ||
| 106 | "${VIDEO_CORE}/shader/transform_feedback.h" | ||
| 107 | ) | 107 | ) |
| 108 | set(COMBINED "") | 108 | set(COMBINED "") |
| 109 | foreach (F IN LISTS HASH_FILES) | 109 | foreach (F IN LISTS HASH_FILES) |
| @@ -1,7 +1,8 @@ | |||
| 1 | yuzu emulator | 1 | yuzu emulator |
| 2 | ============= | 2 | ============= |
| 3 | [](https://travis-ci.org/yuzu-emu/yuzu) | 3 | [](https://travis-ci.com/yuzu-emu/yuzu) |
| 4 | [](https://dev.azure.com/yuzu-emu/yuzu/) | 4 | [](https://dev.azure.com/yuzu-emu/yuzu/) |
| 5 | [](https://discord.gg/XQV6dn9) | ||
| 5 | 6 | ||
| 6 | yuzu is an experimental open-source emulator for the Nintendo Switch from the creators of [Citra](https://citra-emu.org/). | 7 | yuzu is an experimental open-source emulator for the Nintendo Switch from the creators of [Citra](https://citra-emu.org/). |
| 7 | 8 | ||
| @@ -21,7 +22,7 @@ For development discussion, please join us on [Discord](https://discord.gg/XQV6d | |||
| 21 | 22 | ||
| 22 | Most of the development happens on GitHub. It's also where [our central repository](https://github.com/yuzu-emu/yuzu) is hosted. | 23 | Most of the development happens on GitHub. It's also where [our central repository](https://github.com/yuzu-emu/yuzu) is hosted. |
| 23 | 24 | ||
| 24 | If you want to contribute please take a look at the [Contributor's Guide](CONTRIBUTING.md) and [Developer Information](https://github.com/yuzu-emu/yuzu/wiki/Developer-Information). You should as well contact any of the developers on Discord in order to know about the current state of the emulator. | 25 | If you want to contribute please take a look at the [Contributor's Guide](https://github.com/yuzu-emu/yuzu/wiki/Contributing) and [Developer Information](https://github.com/yuzu-emu/yuzu/wiki/Developer-Information). You should also contact any of the developers on Discord in order to know about the current state of the emulator. |
| 25 | 26 | ||
| 26 | ### Building | 27 | ### Building |
| 27 | 28 | ||
diff --git a/externals/microprofile/microprofile.h b/externals/microprofile/microprofile.h index cdb312b87..9d830f7bf 100644 --- a/externals/microprofile/microprofile.h +++ b/externals/microprofile/microprofile.h | |||
| @@ -243,6 +243,7 @@ typedef uint32_t ThreadIdType; | |||
| 243 | #define MICROPROFILE_DEFINE_GPU(var, name, color) MicroProfileToken g_mp_##var = MicroProfileGetToken("GPU", name, color, MicroProfileTokenTypeGpu) | 243 | #define MICROPROFILE_DEFINE_GPU(var, name, color) MicroProfileToken g_mp_##var = MicroProfileGetToken("GPU", name, color, MicroProfileTokenTypeGpu) |
| 244 | #define MICROPROFILE_TOKEN_PASTE0(a, b) a ## b | 244 | #define MICROPROFILE_TOKEN_PASTE0(a, b) a ## b |
| 245 | #define MICROPROFILE_TOKEN_PASTE(a, b) MICROPROFILE_TOKEN_PASTE0(a,b) | 245 | #define MICROPROFILE_TOKEN_PASTE(a, b) MICROPROFILE_TOKEN_PASTE0(a,b) |
| 246 | #define MICROPROFILE_TOKEN(var) g_mp_##var | ||
| 246 | #define MICROPROFILE_SCOPE(var) MicroProfileScopeHandler MICROPROFILE_TOKEN_PASTE(foo, __LINE__)(g_mp_##var) | 247 | #define MICROPROFILE_SCOPE(var) MicroProfileScopeHandler MICROPROFILE_TOKEN_PASTE(foo, __LINE__)(g_mp_##var) |
| 247 | #define MICROPROFILE_SCOPE_TOKEN(token) MicroProfileScopeHandler MICROPROFILE_TOKEN_PASTE(foo, __LINE__)(token) | 248 | #define MICROPROFILE_SCOPE_TOKEN(token) MicroProfileScopeHandler MICROPROFILE_TOKEN_PASTE(foo, __LINE__)(token) |
| 248 | #define MICROPROFILE_SCOPEI(group, name, color) static MicroProfileToken MICROPROFILE_TOKEN_PASTE(g_mp,__LINE__) = MicroProfileGetToken(group, name, color, MicroProfileTokenTypeCpu); MicroProfileScopeHandler MICROPROFILE_TOKEN_PASTE(foo,__LINE__)( MICROPROFILE_TOKEN_PASTE(g_mp,__LINE__)) | 249 | #define MICROPROFILE_SCOPEI(group, name, color) static MicroProfileToken MICROPROFILE_TOKEN_PASTE(g_mp,__LINE__) = MicroProfileGetToken(group, name, color, MicroProfileTokenTypeCpu); MicroProfileScopeHandler MICROPROFILE_TOKEN_PASTE(foo,__LINE__)( MICROPROFILE_TOKEN_PASTE(g_mp,__LINE__)) |
| @@ -827,7 +828,7 @@ inline MicroProfileLogEntry MicroProfileMakeLogIndex(uint64_t nBegin, MicroProfi | |||
| 827 | MicroProfileLogEntry Entry = (nBegin<<62) | ((0x3fff&nToken)<<48) | (MP_LOG_TICK_MASK&nTick); | 828 | MicroProfileLogEntry Entry = (nBegin<<62) | ((0x3fff&nToken)<<48) | (MP_LOG_TICK_MASK&nTick); |
| 828 | int t = MicroProfileLogType(Entry); | 829 | int t = MicroProfileLogType(Entry); |
| 829 | uint64_t nTimerIndex = MicroProfileLogTimerIndex(Entry); | 830 | uint64_t nTimerIndex = MicroProfileLogTimerIndex(Entry); |
| 830 | MP_ASSERT(t == nBegin); | 831 | MP_ASSERT((uint64_t)t == nBegin); |
| 831 | MP_ASSERT(nTimerIndex == (nToken&0x3fff)); | 832 | MP_ASSERT(nTimerIndex == (nToken&0x3fff)); |
| 832 | return Entry; | 833 | return Entry; |
| 833 | 834 | ||
| @@ -1555,10 +1556,10 @@ void MicroProfileFlip() | |||
| 1555 | 1556 | ||
| 1556 | pFramePut->nFrameStartCpu = MP_TICK(); | 1557 | pFramePut->nFrameStartCpu = MP_TICK(); |
| 1557 | pFramePut->nFrameStartGpu = (uint32_t)MicroProfileGpuInsertTimeStamp(); | 1558 | pFramePut->nFrameStartGpu = (uint32_t)MicroProfileGpuInsertTimeStamp(); |
| 1558 | if(pFrameNext->nFrameStartGpu != (uint64_t)-1) | 1559 | if(pFrameNext->nFrameStartGpu != -1) |
| 1559 | pFrameNext->nFrameStartGpu = MicroProfileGpuGetTimeStamp((uint32_t)pFrameNext->nFrameStartGpu); | 1560 | pFrameNext->nFrameStartGpu = MicroProfileGpuGetTimeStamp((uint32_t)pFrameNext->nFrameStartGpu); |
| 1560 | 1561 | ||
| 1561 | if(pFrameCurrent->nFrameStartGpu == (uint64_t)-1) | 1562 | if(pFrameCurrent->nFrameStartGpu == -1) |
| 1562 | pFrameCurrent->nFrameStartGpu = pFrameNext->nFrameStartGpu + 1; | 1563 | pFrameCurrent->nFrameStartGpu = pFrameNext->nFrameStartGpu + 1; |
| 1563 | 1564 | ||
| 1564 | uint64_t nFrameStartCpu = pFrameCurrent->nFrameStartCpu; | 1565 | uint64_t nFrameStartCpu = pFrameCurrent->nFrameStartCpu; |
diff --git a/src/audio_core/algorithm/interpolate.cpp b/src/audio_core/algorithm/interpolate.cpp index a58f24169..49ab9d3e1 100644 --- a/src/audio_core/algorithm/interpolate.cpp +++ b/src/audio_core/algorithm/interpolate.cpp | |||
| @@ -8,13 +8,14 @@ | |||
| 8 | #include <climits> | 8 | #include <climits> |
| 9 | #include <cmath> | 9 | #include <cmath> |
| 10 | #include <vector> | 10 | #include <vector> |
| 11 | |||
| 11 | #include "audio_core/algorithm/interpolate.h" | 12 | #include "audio_core/algorithm/interpolate.h" |
| 12 | #include "common/common_types.h" | 13 | #include "common/common_types.h" |
| 13 | #include "common/logging/log.h" | 14 | #include "common/logging/log.h" |
| 14 | 15 | ||
| 15 | namespace AudioCore { | 16 | namespace AudioCore { |
| 16 | 17 | ||
| 17 | constexpr std::array<s16, 512> curve_lut0 = { | 18 | constexpr std::array<s16, 512> curve_lut0{ |
| 18 | 6600, 19426, 6722, 3, 6479, 19424, 6845, 9, 6359, 19419, 6968, 15, 6239, | 19 | 6600, 19426, 6722, 3, 6479, 19424, 6845, 9, 6359, 19419, 6968, 15, 6239, |
| 19 | 19412, 7093, 22, 6121, 19403, 7219, 28, 6004, 19391, 7345, 34, 5888, 19377, | 20 | 19412, 7093, 22, 6121, 19403, 7219, 28, 6004, 19391, 7345, 34, 5888, 19377, |
| 20 | 7472, 41, 5773, 19361, 7600, 48, 5659, 19342, 7728, 55, 5546, 19321, 7857, | 21 | 7472, 41, 5773, 19361, 7600, 48, 5659, 19342, 7728, 55, 5546, 19321, 7857, |
| @@ -56,7 +57,7 @@ constexpr std::array<s16, 512> curve_lut0 = { | |||
| 56 | 19403, 6121, 22, 7093, 19412, 6239, 15, 6968, 19419, 6359, 9, 6845, 19424, | 57 | 19403, 6121, 22, 7093, 19412, 6239, 15, 6968, 19419, 6359, 9, 6845, 19424, |
| 57 | 6479, 3, 6722, 19426, 6600}; | 58 | 6479, 3, 6722, 19426, 6600}; |
| 58 | 59 | ||
| 59 | constexpr std::array<s16, 512> curve_lut1 = { | 60 | constexpr std::array<s16, 512> curve_lut1{ |
| 60 | -68, 32639, 69, -5, -200, 32630, 212, -15, -328, 32613, 359, -26, -450, | 61 | -68, 32639, 69, -5, -200, 32630, 212, -15, -328, 32613, 359, -26, -450, |
| 61 | 32586, 512, -36, -568, 32551, 669, -47, -680, 32507, 832, -58, -788, 32454, | 62 | 32586, 512, -36, -568, 32551, 669, -47, -680, 32507, 832, -58, -788, 32454, |
| 62 | 1000, -69, -891, 32393, 1174, -80, -990, 32323, 1352, -92, -1084, 32244, 1536, | 63 | 1000, -69, -891, 32393, 1174, -80, -990, 32323, 1352, -92, -1084, 32244, 1536, |
| @@ -98,7 +99,7 @@ constexpr std::array<s16, 512> curve_lut1 = { | |||
| 98 | 32551, -568, -36, 512, 32586, -450, -26, 359, 32613, -328, -15, 212, 32630, | 99 | 32551, -568, -36, 512, 32586, -450, -26, 359, 32613, -328, -15, 212, 32630, |
| 99 | -200, -5, 69, 32639, -68}; | 100 | -200, -5, 69, 32639, -68}; |
| 100 | 101 | ||
| 101 | constexpr std::array<s16, 512> curve_lut2 = { | 102 | constexpr std::array<s16, 512> curve_lut2{ |
| 102 | 3195, 26287, 3329, -32, 3064, 26281, 3467, -34, 2936, 26270, 3608, -38, 2811, | 103 | 3195, 26287, 3329, -32, 3064, 26281, 3467, -34, 2936, 26270, 3608, -38, 2811, |
| 103 | 26253, 3751, -42, 2688, 26230, 3897, -46, 2568, 26202, 4046, -50, 2451, 26169, | 104 | 26253, 3751, -42, 2688, 26230, 3897, -46, 2568, 26202, 4046, -50, 2451, 26169, |
| 104 | 4199, -54, 2338, 26130, 4354, -58, 2227, 26085, 4512, -63, 2120, 26035, 4673, | 105 | 4199, -54, 2338, 26130, 4354, -58, 2227, 26085, 4512, -63, 2120, 26035, 4673, |
| @@ -146,10 +147,10 @@ std::vector<s16> Interpolate(InterpolationState& state, std::vector<s16> input, | |||
| 146 | 147 | ||
| 147 | if (ratio <= 0) { | 148 | if (ratio <= 0) { |
| 148 | LOG_CRITICAL(Audio, "Nonsensical interpolation ratio {}", ratio); | 149 | LOG_CRITICAL(Audio, "Nonsensical interpolation ratio {}", ratio); |
| 149 | ratio = 1.0; | 150 | return input; |
| 150 | } | 151 | } |
| 151 | 152 | ||
| 152 | const int step = static_cast<int>(ratio * 0x8000); | 153 | const s32 step{static_cast<s32>(ratio * 0x8000)}; |
| 153 | const std::array<s16, 512>& lut = [step] { | 154 | const std::array<s16, 512>& lut = [step] { |
| 154 | if (step > 0xaaaa) { | 155 | if (step > 0xaaaa) { |
| 155 | return curve_lut0; | 156 | return curve_lut0; |
| @@ -160,28 +161,37 @@ std::vector<s16> Interpolate(InterpolationState& state, std::vector<s16> input, | |||
| 160 | return curve_lut2; | 161 | return curve_lut2; |
| 161 | }(); | 162 | }(); |
| 162 | 163 | ||
| 163 | std::vector<s16> output(static_cast<std::size_t>(input.size() / ratio)); | 164 | const std::size_t num_frames{input.size() / 2}; |
| 164 | int in_offset = 0; | 165 | |
| 165 | for (std::size_t out_offset = 0; out_offset < output.size(); out_offset += 2) { | 166 | std::vector<s16> output; |
| 166 | const int lut_index = (state.fraction >> 8) * 4; | 167 | output.reserve(static_cast<std::size_t>(input.size() / ratio + InterpolationState::taps)); |
| 167 | 168 | ||
| 168 | const int l = input[(in_offset + 0) * 2 + 0] * lut[lut_index + 0] + | 169 | for (std::size_t frame{}; frame < num_frames; ++frame) { |
| 169 | input[(in_offset + 1) * 2 + 0] * lut[lut_index + 1] + | 170 | const std::size_t lut_index{(state.fraction >> 8) * InterpolationState::taps}; |
| 170 | input[(in_offset + 2) * 2 + 0] * lut[lut_index + 2] + | ||
| 171 | input[(in_offset + 3) * 2 + 0] * lut[lut_index + 3]; | ||
| 172 | 171 | ||
| 173 | const int r = input[(in_offset + 0) * 2 + 1] * lut[lut_index + 0] + | 172 | std::rotate(state.history.begin(), state.history.end() - 1, state.history.end()); |
| 174 | input[(in_offset + 1) * 2 + 1] * lut[lut_index + 1] + | 173 | state.history[0][0] = input[frame * 2 + 0]; |
| 175 | input[(in_offset + 2) * 2 + 1] * lut[lut_index + 2] + | 174 | state.history[0][1] = input[frame * 2 + 1]; |
| 176 | input[(in_offset + 3) * 2 + 1] * lut[lut_index + 3]; | ||
| 177 | 175 | ||
| 178 | const int new_offset = state.fraction + step; | 176 | while (state.position <= 1.0) { |
| 177 | const s32 left{state.history[0][0] * lut[lut_index + 0] + | ||
| 178 | state.history[1][0] * lut[lut_index + 1] + | ||
| 179 | state.history[2][0] * lut[lut_index + 2] + | ||
| 180 | state.history[3][0] * lut[lut_index + 3]}; | ||
| 181 | const s32 right{state.history[0][1] * lut[lut_index + 0] + | ||
| 182 | state.history[1][1] * lut[lut_index + 1] + | ||
| 183 | state.history[2][1] * lut[lut_index + 2] + | ||
| 184 | state.history[3][1] * lut[lut_index + 3]}; | ||
| 185 | const s32 new_offset{state.fraction + step}; | ||
| 179 | 186 | ||
| 180 | in_offset += new_offset >> 15; | 187 | state.fraction = new_offset & 0x7fff; |
| 181 | state.fraction = new_offset & 0x7fff; | ||
| 182 | 188 | ||
| 183 | output[out_offset + 0] = static_cast<s16>(std::clamp(l >> 15, SHRT_MIN, SHRT_MAX)); | 189 | output.emplace_back(static_cast<s16>(std::clamp(left >> 15, SHRT_MIN, SHRT_MAX))); |
| 184 | output[out_offset + 1] = static_cast<s16>(std::clamp(r >> 15, SHRT_MIN, SHRT_MAX)); | 190 | output.emplace_back(static_cast<s16>(std::clamp(right >> 15, SHRT_MIN, SHRT_MAX))); |
| 191 | |||
| 192 | state.position += ratio; | ||
| 193 | } | ||
| 194 | state.position -= 1.0; | ||
| 185 | } | 195 | } |
| 186 | 196 | ||
| 187 | return output; | 197 | return output; |
diff --git a/src/audio_core/algorithm/interpolate.h b/src/audio_core/algorithm/interpolate.h index 1b9831a75..ab1a31754 100644 --- a/src/audio_core/algorithm/interpolate.h +++ b/src/audio_core/algorithm/interpolate.h | |||
| @@ -6,12 +6,17 @@ | |||
| 6 | 6 | ||
| 7 | #include <array> | 7 | #include <array> |
| 8 | #include <vector> | 8 | #include <vector> |
| 9 | |||
| 9 | #include "common/common_types.h" | 10 | #include "common/common_types.h" |
| 10 | 11 | ||
| 11 | namespace AudioCore { | 12 | namespace AudioCore { |
| 12 | 13 | ||
| 13 | struct InterpolationState { | 14 | struct InterpolationState { |
| 14 | int fraction = 0; | 15 | static constexpr std::size_t taps{4}; |
| 16 | static constexpr std::size_t history_size{taps * 2 - 1}; | ||
| 17 | std::array<std::array<s16, 2>, history_size> history{}; | ||
| 18 | double position{}; | ||
| 19 | s32 fraction{}; | ||
| 15 | }; | 20 | }; |
| 16 | 21 | ||
| 17 | /// Interpolates input signal to produce output signal. | 22 | /// Interpolates input signal to produce output signal. |
diff --git a/src/audio_core/cubeb_sink.cpp b/src/audio_core/cubeb_sink.cpp index 7047ed9cf..c4e0e30fe 100644 --- a/src/audio_core/cubeb_sink.cpp +++ b/src/audio_core/cubeb_sink.cpp | |||
| @@ -8,6 +8,7 @@ | |||
| 8 | #include "audio_core/cubeb_sink.h" | 8 | #include "audio_core/cubeb_sink.h" |
| 9 | #include "audio_core/stream.h" | 9 | #include "audio_core/stream.h" |
| 10 | #include "audio_core/time_stretch.h" | 10 | #include "audio_core/time_stretch.h" |
| 11 | #include "common/assert.h" | ||
| 11 | #include "common/logging/log.h" | 12 | #include "common/logging/log.h" |
| 12 | #include "common/ring_buffer.h" | 13 | #include "common/ring_buffer.h" |
| 13 | #include "core/settings.h" | 14 | #include "core/settings.h" |
| @@ -65,12 +66,25 @@ public: | |||
| 65 | void EnqueueSamples(u32 source_num_channels, const std::vector<s16>& samples) override { | 66 | void EnqueueSamples(u32 source_num_channels, const std::vector<s16>& samples) override { |
| 66 | if (source_num_channels > num_channels) { | 67 | if (source_num_channels > num_channels) { |
| 67 | // Downsample 6 channels to 2 | 68 | // Downsample 6 channels to 2 |
| 69 | ASSERT_MSG(source_num_channels == 6, "Channel count must be 6"); | ||
| 70 | |||
| 68 | std::vector<s16> buf; | 71 | std::vector<s16> buf; |
| 69 | buf.reserve(samples.size() * num_channels / source_num_channels); | 72 | buf.reserve(samples.size() * num_channels / source_num_channels); |
| 70 | for (std::size_t i = 0; i < samples.size(); i += source_num_channels) { | 73 | for (std::size_t i = 0; i < samples.size(); i += source_num_channels) { |
| 71 | for (std::size_t ch = 0; ch < num_channels; ch++) { | 74 | // Downmixing implementation taken from the ATSC standard |
| 72 | buf.push_back(samples[i + ch]); | 75 | const s16 left{samples[i + 0]}; |
| 73 | } | 76 | const s16 right{samples[i + 1]}; |
| 77 | const s16 center{samples[i + 2]}; | ||
| 78 | const s16 surround_left{samples[i + 4]}; | ||
| 79 | const s16 surround_right{samples[i + 5]}; | ||
| 80 | // Not used in the ATSC reference implementation | ||
| 81 | [[maybe_unused]] const s16 low_frequency_effects { samples[i + 3] }; | ||
| 82 | |||
| 83 | constexpr s32 clev{707}; // center mixing level coefficient | ||
| 84 | constexpr s32 slev{707}; // surround mixing level coefficient | ||
| 85 | |||
| 86 | buf.push_back(left + (clev * center / 1000) + (slev * surround_left / 1000)); | ||
| 87 | buf.push_back(right + (clev * center / 1000) + (slev * surround_right / 1000)); | ||
| 74 | } | 88 | } |
| 75 | queue.Push(buf); | 89 | queue.Push(buf); |
| 76 | return; | 90 | return; |
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index 9afc6105d..fbebed715 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt | |||
| @@ -38,8 +38,6 @@ add_custom_command(OUTPUT scm_rev.cpp | |||
| 38 | "${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.h" | 38 | "${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.h" |
| 39 | "${VIDEO_CORE}/renderer_opengl/gl_shader_disk_cache.cpp" | 39 | "${VIDEO_CORE}/renderer_opengl/gl_shader_disk_cache.cpp" |
| 40 | "${VIDEO_CORE}/renderer_opengl/gl_shader_disk_cache.h" | 40 | "${VIDEO_CORE}/renderer_opengl/gl_shader_disk_cache.h" |
| 41 | "${VIDEO_CORE}/renderer_opengl/gl_shader_gen.cpp" | ||
| 42 | "${VIDEO_CORE}/renderer_opengl/gl_shader_gen.h" | ||
| 43 | "${VIDEO_CORE}/shader/decode/arithmetic.cpp" | 41 | "${VIDEO_CORE}/shader/decode/arithmetic.cpp" |
| 44 | "${VIDEO_CORE}/shader/decode/arithmetic_half.cpp" | 42 | "${VIDEO_CORE}/shader/decode/arithmetic_half.cpp" |
| 45 | "${VIDEO_CORE}/shader/decode/arithmetic_half_immediate.cpp" | 43 | "${VIDEO_CORE}/shader/decode/arithmetic_half_immediate.cpp" |
| @@ -72,8 +70,6 @@ add_custom_command(OUTPUT scm_rev.cpp | |||
| 72 | "${VIDEO_CORE}/shader/ast.h" | 70 | "${VIDEO_CORE}/shader/ast.h" |
| 73 | "${VIDEO_CORE}/shader/compiler_settings.cpp" | 71 | "${VIDEO_CORE}/shader/compiler_settings.cpp" |
| 74 | "${VIDEO_CORE}/shader/compiler_settings.h" | 72 | "${VIDEO_CORE}/shader/compiler_settings.h" |
| 75 | "${VIDEO_CORE}/shader/const_buffer_locker.cpp" | ||
| 76 | "${VIDEO_CORE}/shader/const_buffer_locker.h" | ||
| 77 | "${VIDEO_CORE}/shader/control_flow.cpp" | 73 | "${VIDEO_CORE}/shader/control_flow.cpp" |
| 78 | "${VIDEO_CORE}/shader/control_flow.h" | 74 | "${VIDEO_CORE}/shader/control_flow.h" |
| 79 | "${VIDEO_CORE}/shader/decode.cpp" | 75 | "${VIDEO_CORE}/shader/decode.cpp" |
| @@ -82,9 +78,13 @@ add_custom_command(OUTPUT scm_rev.cpp | |||
| 82 | "${VIDEO_CORE}/shader/node.h" | 78 | "${VIDEO_CORE}/shader/node.h" |
| 83 | "${VIDEO_CORE}/shader/node_helper.cpp" | 79 | "${VIDEO_CORE}/shader/node_helper.cpp" |
| 84 | "${VIDEO_CORE}/shader/node_helper.h" | 80 | "${VIDEO_CORE}/shader/node_helper.h" |
| 81 | "${VIDEO_CORE}/shader/registry.cpp" | ||
| 82 | "${VIDEO_CORE}/shader/registry.h" | ||
| 85 | "${VIDEO_CORE}/shader/shader_ir.cpp" | 83 | "${VIDEO_CORE}/shader/shader_ir.cpp" |
| 86 | "${VIDEO_CORE}/shader/shader_ir.h" | 84 | "${VIDEO_CORE}/shader/shader_ir.h" |
| 87 | "${VIDEO_CORE}/shader/track.cpp" | 85 | "${VIDEO_CORE}/shader/track.cpp" |
| 86 | "${VIDEO_CORE}/shader/transform_feedback.cpp" | ||
| 87 | "${VIDEO_CORE}/shader/transform_feedback.h" | ||
| 88 | # and also check that the scm_rev files haven't changed | 88 | # and also check that the scm_rev files haven't changed |
| 89 | "${CMAKE_CURRENT_SOURCE_DIR}/scm_rev.cpp.in" | 89 | "${CMAKE_CURRENT_SOURCE_DIR}/scm_rev.cpp.in" |
| 90 | "${CMAKE_CURRENT_SOURCE_DIR}/scm_rev.h" | 90 | "${CMAKE_CURRENT_SOURCE_DIR}/scm_rev.h" |
diff --git a/src/common/math_util.h b/src/common/math_util.h index d6c35ee89..83ef0201f 100644 --- a/src/common/math_util.h +++ b/src/common/math_util.h | |||
| @@ -24,17 +24,29 @@ struct Rectangle { | |||
| 24 | : left(left), top(top), right(right), bottom(bottom) {} | 24 | : left(left), top(top), right(right), bottom(bottom) {} |
| 25 | 25 | ||
| 26 | T GetWidth() const { | 26 | T GetWidth() const { |
| 27 | return std::abs(static_cast<std::make_signed_t<T>>(right - left)); | 27 | if constexpr (std::is_floating_point_v<T>) { |
| 28 | return std::abs(right - left); | ||
| 29 | } else { | ||
| 30 | return std::abs(static_cast<std::make_signed_t<T>>(right - left)); | ||
| 31 | } | ||
| 28 | } | 32 | } |
| 33 | |||
| 29 | T GetHeight() const { | 34 | T GetHeight() const { |
| 30 | return std::abs(static_cast<std::make_signed_t<T>>(bottom - top)); | 35 | if constexpr (std::is_floating_point_v<T>) { |
| 36 | return std::abs(bottom - top); | ||
| 37 | } else { | ||
| 38 | return std::abs(static_cast<std::make_signed_t<T>>(bottom - top)); | ||
| 39 | } | ||
| 31 | } | 40 | } |
| 41 | |||
| 32 | Rectangle<T> TranslateX(const T x) const { | 42 | Rectangle<T> TranslateX(const T x) const { |
| 33 | return Rectangle{left + x, top, right + x, bottom}; | 43 | return Rectangle{left + x, top, right + x, bottom}; |
| 34 | } | 44 | } |
| 45 | |||
| 35 | Rectangle<T> TranslateY(const T y) const { | 46 | Rectangle<T> TranslateY(const T y) const { |
| 36 | return Rectangle{left, top + y, right, bottom + y}; | 47 | return Rectangle{left, top + y, right, bottom + y}; |
| 37 | } | 48 | } |
| 49 | |||
| 38 | Rectangle<T> Scale(const float s) const { | 50 | Rectangle<T> Scale(const float s) const { |
| 39 | return Rectangle{left, top, static_cast<T>(left + GetWidth() * s), | 51 | return Rectangle{left, top, static_cast<T>(left + GetWidth() * s), |
| 40 | static_cast<T>(top + GetHeight() * s)}; | 52 | static_cast<T>(top + GetHeight() * s)}; |
diff --git a/src/common/page_table.cpp b/src/common/page_table.cpp index 69b7abc54..566b57b62 100644 --- a/src/common/page_table.cpp +++ b/src/common/page_table.cpp | |||
| @@ -16,7 +16,6 @@ void PageTable::Resize(std::size_t address_space_width_in_bits) { | |||
| 16 | 16 | ||
| 17 | pointers.resize(num_page_table_entries); | 17 | pointers.resize(num_page_table_entries); |
| 18 | attributes.resize(num_page_table_entries); | 18 | attributes.resize(num_page_table_entries); |
| 19 | backing_addr.resize(num_page_table_entries); | ||
| 20 | 19 | ||
| 21 | // The default is a 39-bit address space, which causes an initial 1GB allocation size. If the | 20 | // The default is a 39-bit address space, which causes an initial 1GB allocation size. If the |
| 22 | // vector size is subsequently decreased (via resize), the vector might not automatically | 21 | // vector size is subsequently decreased (via resize), the vector might not automatically |
| @@ -25,6 +24,17 @@ void PageTable::Resize(std::size_t address_space_width_in_bits) { | |||
| 25 | 24 | ||
| 26 | pointers.shrink_to_fit(); | 25 | pointers.shrink_to_fit(); |
| 27 | attributes.shrink_to_fit(); | 26 | attributes.shrink_to_fit(); |
| 27 | } | ||
| 28 | |||
| 29 | BackingPageTable::BackingPageTable(std::size_t page_size_in_bits) : PageTable{page_size_in_bits} {} | ||
| 30 | |||
| 31 | BackingPageTable::~BackingPageTable() = default; | ||
| 32 | |||
| 33 | void BackingPageTable::Resize(std::size_t address_space_width_in_bits) { | ||
| 34 | PageTable::Resize(address_space_width_in_bits); | ||
| 35 | const std::size_t num_page_table_entries = 1ULL | ||
| 36 | << (address_space_width_in_bits - page_size_in_bits); | ||
| 37 | backing_addr.resize(num_page_table_entries); | ||
| 28 | backing_addr.shrink_to_fit(); | 38 | backing_addr.shrink_to_fit(); |
| 29 | } | 39 | } |
| 30 | 40 | ||
diff --git a/src/common/page_table.h b/src/common/page_table.h index 8b8ff0bb8..dbc272ab7 100644 --- a/src/common/page_table.h +++ b/src/common/page_table.h | |||
| @@ -76,9 +76,20 @@ struct PageTable { | |||
| 76 | */ | 76 | */ |
| 77 | std::vector<PageType> attributes; | 77 | std::vector<PageType> attributes; |
| 78 | 78 | ||
| 79 | std::vector<u64> backing_addr; | ||
| 80 | |||
| 81 | const std::size_t page_size_in_bits{}; | 79 | const std::size_t page_size_in_bits{}; |
| 82 | }; | 80 | }; |
| 83 | 81 | ||
| 82 | /** | ||
| 83 | * A more advanced Page Table with the ability to save a backing address when using it | ||
| 84 | * depends on another MMU. | ||
| 85 | */ | ||
| 86 | struct BackingPageTable : PageTable { | ||
| 87 | explicit BackingPageTable(std::size_t page_size_in_bits); | ||
| 88 | ~BackingPageTable(); | ||
| 89 | |||
| 90 | void Resize(std::size_t address_space_width_in_bits); | ||
| 91 | |||
| 92 | std::vector<u64> backing_addr; | ||
| 93 | }; | ||
| 94 | |||
| 84 | } // namespace Common | 95 | } // namespace Common |
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index 88c06b2ce..b31a0328c 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt | |||
| @@ -131,8 +131,8 @@ add_library(core STATIC | |||
| 131 | frontend/framebuffer_layout.cpp | 131 | frontend/framebuffer_layout.cpp |
| 132 | frontend/framebuffer_layout.h | 132 | frontend/framebuffer_layout.h |
| 133 | frontend/input.h | 133 | frontend/input.h |
| 134 | frontend/scope_acquire_window_context.cpp | 134 | frontend/scope_acquire_context.cpp |
| 135 | frontend/scope_acquire_window_context.h | 135 | frontend/scope_acquire_context.h |
| 136 | gdbstub/gdbstub.cpp | 136 | gdbstub/gdbstub.cpp |
| 137 | gdbstub/gdbstub.h | 137 | gdbstub/gdbstub.h |
| 138 | hardware_interrupt_manager.cpp | 138 | hardware_interrupt_manager.cpp |
| @@ -595,8 +595,12 @@ endif() | |||
| 595 | 595 | ||
| 596 | if (ARCHITECTURE_x86_64) | 596 | if (ARCHITECTURE_x86_64) |
| 597 | target_sources(core PRIVATE | 597 | target_sources(core PRIVATE |
| 598 | arm/dynarmic/arm_dynarmic.cpp | 598 | arm/dynarmic/arm_dynarmic_32.cpp |
| 599 | arm/dynarmic/arm_dynarmic.h | 599 | arm/dynarmic/arm_dynarmic_32.h |
| 600 | arm/dynarmic/arm_dynarmic_64.cpp | ||
| 601 | arm/dynarmic/arm_dynarmic_64.h | ||
| 602 | arm/dynarmic/arm_dynarmic_cp15.cpp | ||
| 603 | arm/dynarmic/arm_dynarmic_cp15.h | ||
| 600 | ) | 604 | ) |
| 601 | target_link_libraries(core PRIVATE dynarmic) | 605 | target_link_libraries(core PRIVATE dynarmic) |
| 602 | endif() | 606 | endif() |
diff --git a/src/core/arm/arm_interface.h b/src/core/arm/arm_interface.h index 47b964eb7..57eae839e 100644 --- a/src/core/arm/arm_interface.h +++ b/src/core/arm/arm_interface.h | |||
| @@ -25,7 +25,20 @@ public: | |||
| 25 | explicit ARM_Interface(System& system_) : system{system_} {} | 25 | explicit ARM_Interface(System& system_) : system{system_} {} |
| 26 | virtual ~ARM_Interface() = default; | 26 | virtual ~ARM_Interface() = default; |
| 27 | 27 | ||
| 28 | struct ThreadContext { | 28 | struct ThreadContext32 { |
| 29 | std::array<u32, 16> cpu_registers; | ||
| 30 | u32 cpsr; | ||
| 31 | std::array<u8, 4> padding; | ||
| 32 | std::array<u64, 32> fprs; | ||
| 33 | u32 fpscr; | ||
| 34 | u32 fpexc; | ||
| 35 | u32 tpidr; | ||
| 36 | }; | ||
| 37 | // Internally within the kernel, it expects the AArch32 version of the | ||
| 38 | // thread context to be 344 bytes in size. | ||
| 39 | static_assert(sizeof(ThreadContext32) == 0x158); | ||
| 40 | |||
| 41 | struct ThreadContext64 { | ||
| 29 | std::array<u64, 31> cpu_registers; | 42 | std::array<u64, 31> cpu_registers; |
| 30 | u64 sp; | 43 | u64 sp; |
| 31 | u64 pc; | 44 | u64 pc; |
| @@ -38,7 +51,7 @@ public: | |||
| 38 | }; | 51 | }; |
| 39 | // Internally within the kernel, it expects the AArch64 version of the | 52 | // Internally within the kernel, it expects the AArch64 version of the |
| 40 | // thread context to be 800 bytes in size. | 53 | // thread context to be 800 bytes in size. |
| 41 | static_assert(sizeof(ThreadContext) == 0x320); | 54 | static_assert(sizeof(ThreadContext64) == 0x320); |
| 42 | 55 | ||
| 43 | /// Runs the CPU until an event happens | 56 | /// Runs the CPU until an event happens |
| 44 | virtual void Run() = 0; | 57 | virtual void Run() = 0; |
| @@ -130,17 +143,10 @@ public: | |||
| 130 | */ | 143 | */ |
| 131 | virtual void SetTPIDR_EL0(u64 value) = 0; | 144 | virtual void SetTPIDR_EL0(u64 value) = 0; |
| 132 | 145 | ||
| 133 | /** | 146 | virtual void SaveContext(ThreadContext32& ctx) = 0; |
| 134 | * Saves the current CPU context | 147 | virtual void SaveContext(ThreadContext64& ctx) = 0; |
| 135 | * @param ctx Thread context to save | 148 | virtual void LoadContext(const ThreadContext32& ctx) = 0; |
| 136 | */ | 149 | virtual void LoadContext(const ThreadContext64& ctx) = 0; |
| 137 | virtual void SaveContext(ThreadContext& ctx) = 0; | ||
| 138 | |||
| 139 | /** | ||
| 140 | * Loads a CPU context | ||
| 141 | * @param ctx Thread context to load | ||
| 142 | */ | ||
| 143 | virtual void LoadContext(const ThreadContext& ctx) = 0; | ||
| 144 | 150 | ||
| 145 | /// Clears the exclusive monitor's state. | 151 | /// Clears the exclusive monitor's state. |
| 146 | virtual void ClearExclusiveState() = 0; | 152 | virtual void ClearExclusiveState() = 0; |
diff --git a/src/core/arm/dynarmic/arm_dynarmic_32.cpp b/src/core/arm/dynarmic/arm_dynarmic_32.cpp new file mode 100644 index 000000000..187a972ac --- /dev/null +++ b/src/core/arm/dynarmic/arm_dynarmic_32.cpp | |||
| @@ -0,0 +1,208 @@ | |||
| 1 | // Copyright 2020 yuzu emulator team | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <cinttypes> | ||
| 6 | #include <memory> | ||
| 7 | #include <dynarmic/A32/a32.h> | ||
| 8 | #include <dynarmic/A32/config.h> | ||
| 9 | #include <dynarmic/A32/context.h> | ||
| 10 | #include "common/microprofile.h" | ||
| 11 | #include "core/arm/dynarmic/arm_dynarmic_32.h" | ||
| 12 | #include "core/arm/dynarmic/arm_dynarmic_64.h" | ||
| 13 | #include "core/arm/dynarmic/arm_dynarmic_cp15.h" | ||
| 14 | #include "core/core.h" | ||
| 15 | #include "core/core_manager.h" | ||
| 16 | #include "core/core_timing.h" | ||
| 17 | #include "core/hle/kernel/svc.h" | ||
| 18 | #include "core/memory.h" | ||
| 19 | |||
| 20 | namespace Core { | ||
| 21 | |||
| 22 | class DynarmicCallbacks32 : public Dynarmic::A32::UserCallbacks { | ||
| 23 | public: | ||
| 24 | explicit DynarmicCallbacks32(ARM_Dynarmic_32& parent) : parent(parent) {} | ||
| 25 | |||
| 26 | u8 MemoryRead8(u32 vaddr) override { | ||
| 27 | return parent.system.Memory().Read8(vaddr); | ||
| 28 | } | ||
| 29 | u16 MemoryRead16(u32 vaddr) override { | ||
| 30 | return parent.system.Memory().Read16(vaddr); | ||
| 31 | } | ||
| 32 | u32 MemoryRead32(u32 vaddr) override { | ||
| 33 | return parent.system.Memory().Read32(vaddr); | ||
| 34 | } | ||
| 35 | u64 MemoryRead64(u32 vaddr) override { | ||
| 36 | return parent.system.Memory().Read64(vaddr); | ||
| 37 | } | ||
| 38 | |||
| 39 | void MemoryWrite8(u32 vaddr, u8 value) override { | ||
| 40 | parent.system.Memory().Write8(vaddr, value); | ||
| 41 | } | ||
| 42 | void MemoryWrite16(u32 vaddr, u16 value) override { | ||
| 43 | parent.system.Memory().Write16(vaddr, value); | ||
| 44 | } | ||
| 45 | void MemoryWrite32(u32 vaddr, u32 value) override { | ||
| 46 | parent.system.Memory().Write32(vaddr, value); | ||
| 47 | } | ||
| 48 | void MemoryWrite64(u32 vaddr, u64 value) override { | ||
| 49 | parent.system.Memory().Write64(vaddr, value); | ||
| 50 | } | ||
| 51 | |||
| 52 | void InterpreterFallback(u32 pc, std::size_t num_instructions) override { | ||
| 53 | UNIMPLEMENTED(); | ||
| 54 | } | ||
| 55 | |||
| 56 | void ExceptionRaised(u32 pc, Dynarmic::A32::Exception exception) override { | ||
| 57 | switch (exception) { | ||
| 58 | case Dynarmic::A32::Exception::UndefinedInstruction: | ||
| 59 | case Dynarmic::A32::Exception::UnpredictableInstruction: | ||
| 60 | break; | ||
| 61 | case Dynarmic::A32::Exception::Breakpoint: | ||
| 62 | break; | ||
| 63 | } | ||
| 64 | LOG_CRITICAL(HW_GPU, "ExceptionRaised(exception = {}, pc = {:08X}, code = {:08X})", | ||
| 65 | static_cast<std::size_t>(exception), pc, MemoryReadCode(pc)); | ||
| 66 | UNIMPLEMENTED(); | ||
| 67 | } | ||
| 68 | |||
| 69 | void CallSVC(u32 swi) override { | ||
| 70 | Kernel::CallSVC(parent.system, swi); | ||
| 71 | } | ||
| 72 | |||
| 73 | void AddTicks(u64 ticks) override { | ||
| 74 | // Divide the number of ticks by the amount of CPU cores. TODO(Subv): This yields only a | ||
| 75 | // rough approximation of the amount of executed ticks in the system, it may be thrown off | ||
| 76 | // if not all cores are doing a similar amount of work. Instead of doing this, we should | ||
| 77 | // device a way so that timing is consistent across all cores without increasing the ticks 4 | ||
| 78 | // times. | ||
| 79 | u64 amortized_ticks = (ticks - num_interpreted_instructions) / Core::NUM_CPU_CORES; | ||
| 80 | // Always execute at least one tick. | ||
| 81 | amortized_ticks = std::max<u64>(amortized_ticks, 1); | ||
| 82 | |||
| 83 | parent.system.CoreTiming().AddTicks(amortized_ticks); | ||
| 84 | num_interpreted_instructions = 0; | ||
| 85 | } | ||
| 86 | u64 GetTicksRemaining() override { | ||
| 87 | return std::max(parent.system.CoreTiming().GetDowncount(), {}); | ||
| 88 | } | ||
| 89 | |||
| 90 | ARM_Dynarmic_32& parent; | ||
| 91 | std::size_t num_interpreted_instructions{}; | ||
| 92 | u64 tpidrro_el0{}; | ||
| 93 | u64 tpidr_el0{}; | ||
| 94 | }; | ||
| 95 | |||
| 96 | std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable& page_table, | ||
| 97 | std::size_t address_space_bits) const { | ||
| 98 | Dynarmic::A32::UserConfig config; | ||
| 99 | config.callbacks = cb.get(); | ||
| 100 | // TODO(bunnei): Implement page table for 32-bit | ||
| 101 | // config.page_table = &page_table.pointers; | ||
| 102 | config.coprocessors[15] = std::make_shared<DynarmicCP15>((u32*)&CP15_regs[0]); | ||
| 103 | config.define_unpredictable_behaviour = true; | ||
| 104 | return std::make_unique<Dynarmic::A32::Jit>(config); | ||
| 105 | } | ||
| 106 | |||
| 107 | MICROPROFILE_DEFINE(ARM_Jit_Dynarmic_32, "ARM JIT", "Dynarmic", MP_RGB(255, 64, 64)); | ||
| 108 | |||
| 109 | void ARM_Dynarmic_32::Run() { | ||
| 110 | MICROPROFILE_SCOPE(ARM_Jit_Dynarmic_32); | ||
| 111 | jit->Run(); | ||
| 112 | } | ||
| 113 | |||
| 114 | void ARM_Dynarmic_32::Step() { | ||
| 115 | cb->InterpreterFallback(jit->Regs()[15], 1); | ||
| 116 | } | ||
| 117 | |||
| 118 | ARM_Dynarmic_32::ARM_Dynarmic_32(System& system, ExclusiveMonitor& exclusive_monitor, | ||
| 119 | std::size_t core_index) | ||
| 120 | : ARM_Interface{system}, | ||
| 121 | cb(std::make_unique<DynarmicCallbacks32>(*this)), core_index{core_index}, | ||
| 122 | exclusive_monitor{dynamic_cast<DynarmicExclusiveMonitor&>(exclusive_monitor)} {} | ||
| 123 | |||
| 124 | ARM_Dynarmic_32::~ARM_Dynarmic_32() = default; | ||
| 125 | |||
| 126 | void ARM_Dynarmic_32::SetPC(u64 pc) { | ||
| 127 | jit->Regs()[15] = static_cast<u32>(pc); | ||
| 128 | } | ||
| 129 | |||
| 130 | u64 ARM_Dynarmic_32::GetPC() const { | ||
| 131 | return jit->Regs()[15]; | ||
| 132 | } | ||
| 133 | |||
| 134 | u64 ARM_Dynarmic_32::GetReg(int index) const { | ||
| 135 | return jit->Regs()[index]; | ||
| 136 | } | ||
| 137 | |||
| 138 | void ARM_Dynarmic_32::SetReg(int index, u64 value) { | ||
| 139 | jit->Regs()[index] = static_cast<u32>(value); | ||
| 140 | } | ||
| 141 | |||
| 142 | u128 ARM_Dynarmic_32::GetVectorReg(int index) const { | ||
| 143 | return {}; | ||
| 144 | } | ||
| 145 | |||
| 146 | void ARM_Dynarmic_32::SetVectorReg(int index, u128 value) {} | ||
| 147 | |||
| 148 | u32 ARM_Dynarmic_32::GetPSTATE() const { | ||
| 149 | return jit->Cpsr(); | ||
| 150 | } | ||
| 151 | |||
| 152 | void ARM_Dynarmic_32::SetPSTATE(u32 cpsr) { | ||
| 153 | jit->SetCpsr(cpsr); | ||
| 154 | } | ||
| 155 | |||
| 156 | u64 ARM_Dynarmic_32::GetTlsAddress() const { | ||
| 157 | return CP15_regs[static_cast<std::size_t>(CP15Register::CP15_THREAD_URO)]; | ||
| 158 | } | ||
| 159 | |||
| 160 | void ARM_Dynarmic_32::SetTlsAddress(VAddr address) { | ||
| 161 | CP15_regs[static_cast<std::size_t>(CP15Register::CP15_THREAD_URO)] = static_cast<u32>(address); | ||
| 162 | } | ||
| 163 | |||
| 164 | u64 ARM_Dynarmic_32::GetTPIDR_EL0() const { | ||
| 165 | return cb->tpidr_el0; | ||
| 166 | } | ||
| 167 | |||
| 168 | void ARM_Dynarmic_32::SetTPIDR_EL0(u64 value) { | ||
| 169 | cb->tpidr_el0 = value; | ||
| 170 | } | ||
| 171 | |||
| 172 | void ARM_Dynarmic_32::SaveContext(ThreadContext32& ctx) { | ||
| 173 | Dynarmic::A32::Context context; | ||
| 174 | jit->SaveContext(context); | ||
| 175 | ctx.cpu_registers = context.Regs(); | ||
| 176 | ctx.cpsr = context.Cpsr(); | ||
| 177 | } | ||
| 178 | |||
| 179 | void ARM_Dynarmic_32::LoadContext(const ThreadContext32& ctx) { | ||
| 180 | Dynarmic::A32::Context context; | ||
| 181 | context.Regs() = ctx.cpu_registers; | ||
| 182 | context.SetCpsr(ctx.cpsr); | ||
| 183 | jit->LoadContext(context); | ||
| 184 | } | ||
| 185 | |||
| 186 | void ARM_Dynarmic_32::PrepareReschedule() { | ||
| 187 | jit->HaltExecution(); | ||
| 188 | } | ||
| 189 | |||
| 190 | void ARM_Dynarmic_32::ClearInstructionCache() { | ||
| 191 | jit->ClearCache(); | ||
| 192 | } | ||
| 193 | |||
| 194 | void ARM_Dynarmic_32::ClearExclusiveState() {} | ||
| 195 | |||
| 196 | void ARM_Dynarmic_32::PageTableChanged(Common::PageTable& page_table, | ||
| 197 | std::size_t new_address_space_size_in_bits) { | ||
| 198 | auto key = std::make_pair(&page_table, new_address_space_size_in_bits); | ||
| 199 | auto iter = jit_cache.find(key); | ||
| 200 | if (iter != jit_cache.end()) { | ||
| 201 | jit = iter->second; | ||
| 202 | return; | ||
| 203 | } | ||
| 204 | jit = MakeJit(page_table, new_address_space_size_in_bits); | ||
| 205 | jit_cache.emplace(key, jit); | ||
| 206 | } | ||
| 207 | |||
| 208 | } // namespace Core | ||
diff --git a/src/core/arm/dynarmic/arm_dynarmic_32.h b/src/core/arm/dynarmic/arm_dynarmic_32.h new file mode 100644 index 000000000..143e46e4d --- /dev/null +++ b/src/core/arm/dynarmic/arm_dynarmic_32.h | |||
| @@ -0,0 +1,77 @@ | |||
| 1 | // Copyright 2020 yuzu emulator team | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <memory> | ||
| 8 | #include <unordered_map> | ||
| 9 | |||
| 10 | #include <dynarmic/A32/a32.h> | ||
| 11 | #include <dynarmic/A64/a64.h> | ||
| 12 | #include <dynarmic/A64/exclusive_monitor.h> | ||
| 13 | #include "common/common_types.h" | ||
| 14 | #include "common/hash.h" | ||
| 15 | #include "core/arm/arm_interface.h" | ||
| 16 | #include "core/arm/exclusive_monitor.h" | ||
| 17 | |||
| 18 | namespace Memory { | ||
| 19 | class Memory; | ||
| 20 | } | ||
| 21 | |||
| 22 | namespace Core { | ||
| 23 | |||
| 24 | class DynarmicCallbacks32; | ||
| 25 | class DynarmicExclusiveMonitor; | ||
| 26 | class System; | ||
| 27 | |||
| 28 | class ARM_Dynarmic_32 final : public ARM_Interface { | ||
| 29 | public: | ||
| 30 | ARM_Dynarmic_32(System& system, ExclusiveMonitor& exclusive_monitor, std::size_t core_index); | ||
| 31 | ~ARM_Dynarmic_32() override; | ||
| 32 | |||
| 33 | void SetPC(u64 pc) override; | ||
| 34 | u64 GetPC() const override; | ||
| 35 | u64 GetReg(int index) const override; | ||
| 36 | void SetReg(int index, u64 value) override; | ||
| 37 | u128 GetVectorReg(int index) const override; | ||
| 38 | void SetVectorReg(int index, u128 value) override; | ||
| 39 | u32 GetPSTATE() const override; | ||
| 40 | void SetPSTATE(u32 pstate) override; | ||
| 41 | void Run() override; | ||
| 42 | void Step() override; | ||
| 43 | VAddr GetTlsAddress() const override; | ||
| 44 | void SetTlsAddress(VAddr address) override; | ||
| 45 | void SetTPIDR_EL0(u64 value) override; | ||
| 46 | u64 GetTPIDR_EL0() const override; | ||
| 47 | |||
| 48 | void SaveContext(ThreadContext32& ctx) override; | ||
| 49 | void SaveContext(ThreadContext64& ctx) override {} | ||
| 50 | void LoadContext(const ThreadContext32& ctx) override; | ||
| 51 | void LoadContext(const ThreadContext64& ctx) override {} | ||
| 52 | |||
| 53 | void PrepareReschedule() override; | ||
| 54 | void ClearExclusiveState() override; | ||
| 55 | |||
| 56 | void ClearInstructionCache() override; | ||
| 57 | void PageTableChanged(Common::PageTable& new_page_table, | ||
| 58 | std::size_t new_address_space_size_in_bits) override; | ||
| 59 | |||
| 60 | private: | ||
| 61 | std::shared_ptr<Dynarmic::A32::Jit> MakeJit(Common::PageTable& page_table, | ||
| 62 | std::size_t address_space_bits) const; | ||
| 63 | |||
| 64 | using JitCacheKey = std::pair<Common::PageTable*, std::size_t>; | ||
| 65 | using JitCacheType = | ||
| 66 | std::unordered_map<JitCacheKey, std::shared_ptr<Dynarmic::A32::Jit>, Common::PairHash>; | ||
| 67 | |||
| 68 | friend class DynarmicCallbacks32; | ||
| 69 | std::unique_ptr<DynarmicCallbacks32> cb; | ||
| 70 | JitCacheType jit_cache; | ||
| 71 | std::shared_ptr<Dynarmic::A32::Jit> jit; | ||
| 72 | std::size_t core_index; | ||
| 73 | DynarmicExclusiveMonitor& exclusive_monitor; | ||
| 74 | std::array<u32, 84> CP15_regs{}; | ||
| 75 | }; | ||
| 76 | |||
| 77 | } // namespace Core | ||
diff --git a/src/core/arm/dynarmic/arm_dynarmic.cpp b/src/core/arm/dynarmic/arm_dynarmic_64.cpp index 29eaf74e5..a53a58ba0 100644 --- a/src/core/arm/dynarmic/arm_dynarmic.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic_64.cpp | |||
| @@ -8,7 +8,7 @@ | |||
| 8 | #include <dynarmic/A64/config.h> | 8 | #include <dynarmic/A64/config.h> |
| 9 | #include "common/logging/log.h" | 9 | #include "common/logging/log.h" |
| 10 | #include "common/microprofile.h" | 10 | #include "common/microprofile.h" |
| 11 | #include "core/arm/dynarmic/arm_dynarmic.h" | 11 | #include "core/arm/dynarmic/arm_dynarmic_64.h" |
| 12 | #include "core/core.h" | 12 | #include "core/core.h" |
| 13 | #include "core/core_manager.h" | 13 | #include "core/core_manager.h" |
| 14 | #include "core/core_timing.h" | 14 | #include "core/core_timing.h" |
| @@ -25,9 +25,9 @@ namespace Core { | |||
| 25 | 25 | ||
| 26 | using Vector = Dynarmic::A64::Vector; | 26 | using Vector = Dynarmic::A64::Vector; |
| 27 | 27 | ||
| 28 | class ARM_Dynarmic_Callbacks : public Dynarmic::A64::UserCallbacks { | 28 | class DynarmicCallbacks64 : public Dynarmic::A64::UserCallbacks { |
| 29 | public: | 29 | public: |
| 30 | explicit ARM_Dynarmic_Callbacks(ARM_Dynarmic& parent) : parent(parent) {} | 30 | explicit DynarmicCallbacks64(ARM_Dynarmic_64& parent) : parent(parent) {} |
| 31 | 31 | ||
| 32 | u8 MemoryRead8(u64 vaddr) override { | 32 | u8 MemoryRead8(u64 vaddr) override { |
| 33 | return parent.system.Memory().Read8(vaddr); | 33 | return parent.system.Memory().Read8(vaddr); |
| @@ -68,7 +68,7 @@ public: | |||
| 68 | LOG_INFO(Core_ARM, "Unicorn fallback @ 0x{:X} for {} instructions (instr = {:08X})", pc, | 68 | LOG_INFO(Core_ARM, "Unicorn fallback @ 0x{:X} for {} instructions (instr = {:08X})", pc, |
| 69 | num_instructions, MemoryReadCode(pc)); | 69 | num_instructions, MemoryReadCode(pc)); |
| 70 | 70 | ||
| 71 | ARM_Interface::ThreadContext ctx; | 71 | ARM_Interface::ThreadContext64 ctx; |
| 72 | parent.SaveContext(ctx); | 72 | parent.SaveContext(ctx); |
| 73 | parent.inner_unicorn.LoadContext(ctx); | 73 | parent.inner_unicorn.LoadContext(ctx); |
| 74 | parent.inner_unicorn.ExecuteInstructions(num_instructions); | 74 | parent.inner_unicorn.ExecuteInstructions(num_instructions); |
| @@ -90,7 +90,7 @@ public: | |||
| 90 | parent.jit->HaltExecution(); | 90 | parent.jit->HaltExecution(); |
| 91 | parent.SetPC(pc); | 91 | parent.SetPC(pc); |
| 92 | Kernel::Thread* const thread = parent.system.CurrentScheduler().GetCurrentThread(); | 92 | Kernel::Thread* const thread = parent.system.CurrentScheduler().GetCurrentThread(); |
| 93 | parent.SaveContext(thread->GetContext()); | 93 | parent.SaveContext(thread->GetContext64()); |
| 94 | GDBStub::Break(); | 94 | GDBStub::Break(); |
| 95 | GDBStub::SendTrap(thread, 5); | 95 | GDBStub::SendTrap(thread, 5); |
| 96 | return; | 96 | return; |
| @@ -126,14 +126,14 @@ public: | |||
| 126 | return Timing::CpuCyclesToClockCycles(parent.system.CoreTiming().GetTicks()); | 126 | return Timing::CpuCyclesToClockCycles(parent.system.CoreTiming().GetTicks()); |
| 127 | } | 127 | } |
| 128 | 128 | ||
| 129 | ARM_Dynarmic& parent; | 129 | ARM_Dynarmic_64& parent; |
| 130 | std::size_t num_interpreted_instructions = 0; | 130 | std::size_t num_interpreted_instructions = 0; |
| 131 | u64 tpidrro_el0 = 0; | 131 | u64 tpidrro_el0 = 0; |
| 132 | u64 tpidr_el0 = 0; | 132 | u64 tpidr_el0 = 0; |
| 133 | }; | 133 | }; |
| 134 | 134 | ||
| 135 | std::unique_ptr<Dynarmic::A64::Jit> ARM_Dynarmic::MakeJit(Common::PageTable& page_table, | 135 | std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable& page_table, |
| 136 | std::size_t address_space_bits) const { | 136 | std::size_t address_space_bits) const { |
| 137 | Dynarmic::A64::UserConfig config; | 137 | Dynarmic::A64::UserConfig config; |
| 138 | 138 | ||
| 139 | // Callbacks | 139 | // Callbacks |
| @@ -159,79 +159,79 @@ std::unique_ptr<Dynarmic::A64::Jit> ARM_Dynarmic::MakeJit(Common::PageTable& pag | |||
| 159 | // Unpredictable instructions | 159 | // Unpredictable instructions |
| 160 | config.define_unpredictable_behaviour = true; | 160 | config.define_unpredictable_behaviour = true; |
| 161 | 161 | ||
| 162 | return std::make_unique<Dynarmic::A64::Jit>(config); | 162 | return std::make_shared<Dynarmic::A64::Jit>(config); |
| 163 | } | 163 | } |
| 164 | 164 | ||
| 165 | MICROPROFILE_DEFINE(ARM_Jit_Dynarmic, "ARM JIT", "Dynarmic", MP_RGB(255, 64, 64)); | 165 | MICROPROFILE_DEFINE(ARM_Jit_Dynarmic_64, "ARM JIT", "Dynarmic", MP_RGB(255, 64, 64)); |
| 166 | 166 | ||
| 167 | void ARM_Dynarmic::Run() { | 167 | void ARM_Dynarmic_64::Run() { |
| 168 | MICROPROFILE_SCOPE(ARM_Jit_Dynarmic); | 168 | MICROPROFILE_SCOPE(ARM_Jit_Dynarmic_64); |
| 169 | 169 | ||
| 170 | jit->Run(); | 170 | jit->Run(); |
| 171 | } | 171 | } |
| 172 | 172 | ||
| 173 | void ARM_Dynarmic::Step() { | 173 | void ARM_Dynarmic_64::Step() { |
| 174 | cb->InterpreterFallback(jit->GetPC(), 1); | 174 | cb->InterpreterFallback(jit->GetPC(), 1); |
| 175 | } | 175 | } |
| 176 | 176 | ||
| 177 | ARM_Dynarmic::ARM_Dynarmic(System& system, ExclusiveMonitor& exclusive_monitor, | 177 | ARM_Dynarmic_64::ARM_Dynarmic_64(System& system, ExclusiveMonitor& exclusive_monitor, |
| 178 | std::size_t core_index) | 178 | std::size_t core_index) |
| 179 | : ARM_Interface{system}, | 179 | : ARM_Interface{system}, |
| 180 | cb(std::make_unique<ARM_Dynarmic_Callbacks>(*this)), inner_unicorn{system}, | 180 | cb(std::make_unique<DynarmicCallbacks64>(*this)), inner_unicorn{system}, |
| 181 | core_index{core_index}, exclusive_monitor{ | 181 | core_index{core_index}, exclusive_monitor{ |
| 182 | dynamic_cast<DynarmicExclusiveMonitor&>(exclusive_monitor)} {} | 182 | dynamic_cast<DynarmicExclusiveMonitor&>(exclusive_monitor)} {} |
| 183 | 183 | ||
| 184 | ARM_Dynarmic::~ARM_Dynarmic() = default; | 184 | ARM_Dynarmic_64::~ARM_Dynarmic_64() = default; |
| 185 | 185 | ||
| 186 | void ARM_Dynarmic::SetPC(u64 pc) { | 186 | void ARM_Dynarmic_64::SetPC(u64 pc) { |
| 187 | jit->SetPC(pc); | 187 | jit->SetPC(pc); |
| 188 | } | 188 | } |
| 189 | 189 | ||
| 190 | u64 ARM_Dynarmic::GetPC() const { | 190 | u64 ARM_Dynarmic_64::GetPC() const { |
| 191 | return jit->GetPC(); | 191 | return jit->GetPC(); |
| 192 | } | 192 | } |
| 193 | 193 | ||
| 194 | u64 ARM_Dynarmic::GetReg(int index) const { | 194 | u64 ARM_Dynarmic_64::GetReg(int index) const { |
| 195 | return jit->GetRegister(index); | 195 | return jit->GetRegister(index); |
| 196 | } | 196 | } |
| 197 | 197 | ||
| 198 | void ARM_Dynarmic::SetReg(int index, u64 value) { | 198 | void ARM_Dynarmic_64::SetReg(int index, u64 value) { |
| 199 | jit->SetRegister(index, value); | 199 | jit->SetRegister(index, value); |
| 200 | } | 200 | } |
| 201 | 201 | ||
| 202 | u128 ARM_Dynarmic::GetVectorReg(int index) const { | 202 | u128 ARM_Dynarmic_64::GetVectorReg(int index) const { |
| 203 | return jit->GetVector(index); | 203 | return jit->GetVector(index); |
| 204 | } | 204 | } |
| 205 | 205 | ||
| 206 | void ARM_Dynarmic::SetVectorReg(int index, u128 value) { | 206 | void ARM_Dynarmic_64::SetVectorReg(int index, u128 value) { |
| 207 | jit->SetVector(index, value); | 207 | jit->SetVector(index, value); |
| 208 | } | 208 | } |
| 209 | 209 | ||
| 210 | u32 ARM_Dynarmic::GetPSTATE() const { | 210 | u32 ARM_Dynarmic_64::GetPSTATE() const { |
| 211 | return jit->GetPstate(); | 211 | return jit->GetPstate(); |
| 212 | } | 212 | } |
| 213 | 213 | ||
| 214 | void ARM_Dynarmic::SetPSTATE(u32 pstate) { | 214 | void ARM_Dynarmic_64::SetPSTATE(u32 pstate) { |
| 215 | jit->SetPstate(pstate); | 215 | jit->SetPstate(pstate); |
| 216 | } | 216 | } |
| 217 | 217 | ||
| 218 | u64 ARM_Dynarmic::GetTlsAddress() const { | 218 | u64 ARM_Dynarmic_64::GetTlsAddress() const { |
| 219 | return cb->tpidrro_el0; | 219 | return cb->tpidrro_el0; |
| 220 | } | 220 | } |
| 221 | 221 | ||
| 222 | void ARM_Dynarmic::SetTlsAddress(VAddr address) { | 222 | void ARM_Dynarmic_64::SetTlsAddress(VAddr address) { |
| 223 | cb->tpidrro_el0 = address; | 223 | cb->tpidrro_el0 = address; |
| 224 | } | 224 | } |
| 225 | 225 | ||
| 226 | u64 ARM_Dynarmic::GetTPIDR_EL0() const { | 226 | u64 ARM_Dynarmic_64::GetTPIDR_EL0() const { |
| 227 | return cb->tpidr_el0; | 227 | return cb->tpidr_el0; |
| 228 | } | 228 | } |
| 229 | 229 | ||
| 230 | void ARM_Dynarmic::SetTPIDR_EL0(u64 value) { | 230 | void ARM_Dynarmic_64::SetTPIDR_EL0(u64 value) { |
| 231 | cb->tpidr_el0 = value; | 231 | cb->tpidr_el0 = value; |
| 232 | } | 232 | } |
| 233 | 233 | ||
| 234 | void ARM_Dynarmic::SaveContext(ThreadContext& ctx) { | 234 | void ARM_Dynarmic_64::SaveContext(ThreadContext64& ctx) { |
| 235 | ctx.cpu_registers = jit->GetRegisters(); | 235 | ctx.cpu_registers = jit->GetRegisters(); |
| 236 | ctx.sp = jit->GetSP(); | 236 | ctx.sp = jit->GetSP(); |
| 237 | ctx.pc = jit->GetPC(); | 237 | ctx.pc = jit->GetPC(); |
| @@ -242,7 +242,7 @@ void ARM_Dynarmic::SaveContext(ThreadContext& ctx) { | |||
| 242 | ctx.tpidr = cb->tpidr_el0; | 242 | ctx.tpidr = cb->tpidr_el0; |
| 243 | } | 243 | } |
| 244 | 244 | ||
| 245 | void ARM_Dynarmic::LoadContext(const ThreadContext& ctx) { | 245 | void ARM_Dynarmic_64::LoadContext(const ThreadContext64& ctx) { |
| 246 | jit->SetRegisters(ctx.cpu_registers); | 246 | jit->SetRegisters(ctx.cpu_registers); |
| 247 | jit->SetSP(ctx.sp); | 247 | jit->SetSP(ctx.sp); |
| 248 | jit->SetPC(ctx.pc); | 248 | jit->SetPC(ctx.pc); |
| @@ -253,25 +253,32 @@ void ARM_Dynarmic::LoadContext(const ThreadContext& ctx) { | |||
| 253 | SetTPIDR_EL0(ctx.tpidr); | 253 | SetTPIDR_EL0(ctx.tpidr); |
| 254 | } | 254 | } |
| 255 | 255 | ||
| 256 | void ARM_Dynarmic::PrepareReschedule() { | 256 | void ARM_Dynarmic_64::PrepareReschedule() { |
| 257 | jit->HaltExecution(); | 257 | jit->HaltExecution(); |
| 258 | } | 258 | } |
| 259 | 259 | ||
| 260 | void ARM_Dynarmic::ClearInstructionCache() { | 260 | void ARM_Dynarmic_64::ClearInstructionCache() { |
| 261 | jit->ClearCache(); | 261 | jit->ClearCache(); |
| 262 | } | 262 | } |
| 263 | 263 | ||
| 264 | void ARM_Dynarmic::ClearExclusiveState() { | 264 | void ARM_Dynarmic_64::ClearExclusiveState() { |
| 265 | jit->ClearExclusiveState(); | 265 | jit->ClearExclusiveState(); |
| 266 | } | 266 | } |
| 267 | 267 | ||
| 268 | void ARM_Dynarmic::PageTableChanged(Common::PageTable& page_table, | 268 | void ARM_Dynarmic_64::PageTableChanged(Common::PageTable& page_table, |
| 269 | std::size_t new_address_space_size_in_bits) { | 269 | std::size_t new_address_space_size_in_bits) { |
| 270 | auto key = std::make_pair(&page_table, new_address_space_size_in_bits); | ||
| 271 | auto iter = jit_cache.find(key); | ||
| 272 | if (iter != jit_cache.end()) { | ||
| 273 | jit = iter->second; | ||
| 274 | return; | ||
| 275 | } | ||
| 270 | jit = MakeJit(page_table, new_address_space_size_in_bits); | 276 | jit = MakeJit(page_table, new_address_space_size_in_bits); |
| 277 | jit_cache.emplace(key, jit); | ||
| 271 | } | 278 | } |
| 272 | 279 | ||
| 273 | DynarmicExclusiveMonitor::DynarmicExclusiveMonitor(Memory::Memory& memory_, std::size_t core_count) | 280 | DynarmicExclusiveMonitor::DynarmicExclusiveMonitor(Memory::Memory& memory, std::size_t core_count) |
| 274 | : monitor(core_count), memory{memory_} {} | 281 | : monitor(core_count), memory{memory} {} |
| 275 | 282 | ||
| 276 | DynarmicExclusiveMonitor::~DynarmicExclusiveMonitor() = default; | 283 | DynarmicExclusiveMonitor::~DynarmicExclusiveMonitor() = default; |
| 277 | 284 | ||
diff --git a/src/core/arm/dynarmic/arm_dynarmic.h b/src/core/arm/dynarmic/arm_dynarmic_64.h index 9cd475cfb..e71240a96 100644 --- a/src/core/arm/dynarmic/arm_dynarmic.h +++ b/src/core/arm/dynarmic/arm_dynarmic_64.h | |||
| @@ -5,9 +5,12 @@ | |||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <memory> | 7 | #include <memory> |
| 8 | #include <unordered_map> | ||
| 9 | |||
| 8 | #include <dynarmic/A64/a64.h> | 10 | #include <dynarmic/A64/a64.h> |
| 9 | #include <dynarmic/A64/exclusive_monitor.h> | 11 | #include <dynarmic/A64/exclusive_monitor.h> |
| 10 | #include "common/common_types.h" | 12 | #include "common/common_types.h" |
| 13 | #include "common/hash.h" | ||
| 11 | #include "core/arm/arm_interface.h" | 14 | #include "core/arm/arm_interface.h" |
| 12 | #include "core/arm/exclusive_monitor.h" | 15 | #include "core/arm/exclusive_monitor.h" |
| 13 | #include "core/arm/unicorn/arm_unicorn.h" | 16 | #include "core/arm/unicorn/arm_unicorn.h" |
| @@ -18,14 +21,14 @@ class Memory; | |||
| 18 | 21 | ||
| 19 | namespace Core { | 22 | namespace Core { |
| 20 | 23 | ||
| 21 | class ARM_Dynarmic_Callbacks; | 24 | class DynarmicCallbacks64; |
| 22 | class DynarmicExclusiveMonitor; | 25 | class DynarmicExclusiveMonitor; |
| 23 | class System; | 26 | class System; |
| 24 | 27 | ||
| 25 | class ARM_Dynarmic final : public ARM_Interface { | 28 | class ARM_Dynarmic_64 final : public ARM_Interface { |
| 26 | public: | 29 | public: |
| 27 | ARM_Dynarmic(System& system, ExclusiveMonitor& exclusive_monitor, std::size_t core_index); | 30 | ARM_Dynarmic_64(System& system, ExclusiveMonitor& exclusive_monitor, std::size_t core_index); |
| 28 | ~ARM_Dynarmic() override; | 31 | ~ARM_Dynarmic_64() override; |
| 29 | 32 | ||
| 30 | void SetPC(u64 pc) override; | 33 | void SetPC(u64 pc) override; |
| 31 | u64 GetPC() const override; | 34 | u64 GetPC() const override; |
| @@ -42,8 +45,10 @@ public: | |||
| 42 | void SetTPIDR_EL0(u64 value) override; | 45 | void SetTPIDR_EL0(u64 value) override; |
| 43 | u64 GetTPIDR_EL0() const override; | 46 | u64 GetTPIDR_EL0() const override; |
| 44 | 47 | ||
| 45 | void SaveContext(ThreadContext& ctx) override; | 48 | void SaveContext(ThreadContext32& ctx) override {} |
| 46 | void LoadContext(const ThreadContext& ctx) override; | 49 | void SaveContext(ThreadContext64& ctx) override; |
| 50 | void LoadContext(const ThreadContext32& ctx) override {} | ||
| 51 | void LoadContext(const ThreadContext64& ctx) override; | ||
| 47 | 52 | ||
| 48 | void PrepareReschedule() override; | 53 | void PrepareReschedule() override; |
| 49 | void ClearExclusiveState() override; | 54 | void ClearExclusiveState() override; |
| @@ -53,12 +58,17 @@ public: | |||
| 53 | std::size_t new_address_space_size_in_bits) override; | 58 | std::size_t new_address_space_size_in_bits) override; |
| 54 | 59 | ||
| 55 | private: | 60 | private: |
| 56 | std::unique_ptr<Dynarmic::A64::Jit> MakeJit(Common::PageTable& page_table, | 61 | std::shared_ptr<Dynarmic::A64::Jit> MakeJit(Common::PageTable& page_table, |
| 57 | std::size_t address_space_bits) const; | 62 | std::size_t address_space_bits) const; |
| 58 | 63 | ||
| 59 | friend class ARM_Dynarmic_Callbacks; | 64 | using JitCacheKey = std::pair<Common::PageTable*, std::size_t>; |
| 60 | std::unique_ptr<ARM_Dynarmic_Callbacks> cb; | 65 | using JitCacheType = |
| 61 | std::unique_ptr<Dynarmic::A64::Jit> jit; | 66 | std::unordered_map<JitCacheKey, std::shared_ptr<Dynarmic::A64::Jit>, Common::PairHash>; |
| 67 | |||
| 68 | friend class DynarmicCallbacks64; | ||
| 69 | std::unique_ptr<DynarmicCallbacks64> cb; | ||
| 70 | JitCacheType jit_cache; | ||
| 71 | std::shared_ptr<Dynarmic::A64::Jit> jit; | ||
| 62 | ARM_Unicorn inner_unicorn; | 72 | ARM_Unicorn inner_unicorn; |
| 63 | 73 | ||
| 64 | std::size_t core_index; | 74 | std::size_t core_index; |
| @@ -67,7 +77,7 @@ private: | |||
| 67 | 77 | ||
| 68 | class DynarmicExclusiveMonitor final : public ExclusiveMonitor { | 78 | class DynarmicExclusiveMonitor final : public ExclusiveMonitor { |
| 69 | public: | 79 | public: |
| 70 | explicit DynarmicExclusiveMonitor(Memory::Memory& memory_, std::size_t core_count); | 80 | explicit DynarmicExclusiveMonitor(Memory::Memory& memory, std::size_t core_count); |
| 71 | ~DynarmicExclusiveMonitor() override; | 81 | ~DynarmicExclusiveMonitor() override; |
| 72 | 82 | ||
| 73 | void SetExclusive(std::size_t core_index, VAddr addr) override; | 83 | void SetExclusive(std::size_t core_index, VAddr addr) override; |
| @@ -80,7 +90,7 @@ public: | |||
| 80 | bool ExclusiveWrite128(std::size_t core_index, VAddr vaddr, u128 value) override; | 90 | bool ExclusiveWrite128(std::size_t core_index, VAddr vaddr, u128 value) override; |
| 81 | 91 | ||
| 82 | private: | 92 | private: |
| 83 | friend class ARM_Dynarmic; | 93 | friend class ARM_Dynarmic_64; |
| 84 | Dynarmic::A64::ExclusiveMonitor monitor; | 94 | Dynarmic::A64::ExclusiveMonitor monitor; |
| 85 | Memory::Memory& memory; | 95 | Memory::Memory& memory; |
| 86 | }; | 96 | }; |
diff --git a/src/core/arm/dynarmic/arm_dynarmic_cp15.cpp b/src/core/arm/dynarmic/arm_dynarmic_cp15.cpp new file mode 100644 index 000000000..3fdcdebde --- /dev/null +++ b/src/core/arm/dynarmic/arm_dynarmic_cp15.cpp | |||
| @@ -0,0 +1,80 @@ | |||
| 1 | // Copyright 2017 Citra Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "core/arm/dynarmic/arm_dynarmic_cp15.h" | ||
| 6 | |||
| 7 | using Callback = Dynarmic::A32::Coprocessor::Callback; | ||
| 8 | using CallbackOrAccessOneWord = Dynarmic::A32::Coprocessor::CallbackOrAccessOneWord; | ||
| 9 | using CallbackOrAccessTwoWords = Dynarmic::A32::Coprocessor::CallbackOrAccessTwoWords; | ||
| 10 | |||
| 11 | std::optional<Callback> DynarmicCP15::CompileInternalOperation(bool two, unsigned opc1, | ||
| 12 | CoprocReg CRd, CoprocReg CRn, | ||
| 13 | CoprocReg CRm, unsigned opc2) { | ||
| 14 | return {}; | ||
| 15 | } | ||
| 16 | |||
| 17 | CallbackOrAccessOneWord DynarmicCP15::CompileSendOneWord(bool two, unsigned opc1, CoprocReg CRn, | ||
| 18 | CoprocReg CRm, unsigned opc2) { | ||
| 19 | // TODO(merry): Privileged CP15 registers | ||
| 20 | |||
| 21 | if (!two && CRn == CoprocReg::C7 && opc1 == 0 && CRm == CoprocReg::C5 && opc2 == 4) { | ||
| 22 | // This is a dummy write, we ignore the value written here. | ||
| 23 | return &CP15[static_cast<std::size_t>(CP15Register::CP15_FLUSH_PREFETCH_BUFFER)]; | ||
| 24 | } | ||
| 25 | |||
| 26 | if (!two && CRn == CoprocReg::C7 && opc1 == 0 && CRm == CoprocReg::C10) { | ||
| 27 | switch (opc2) { | ||
| 28 | case 4: | ||
| 29 | // This is a dummy write, we ignore the value written here. | ||
| 30 | return &CP15[static_cast<std::size_t>(CP15Register::CP15_DATA_SYNC_BARRIER)]; | ||
| 31 | case 5: | ||
| 32 | // This is a dummy write, we ignore the value written here. | ||
| 33 | return &CP15[static_cast<std::size_t>(CP15Register::CP15_DATA_MEMORY_BARRIER)]; | ||
| 34 | default: | ||
| 35 | return {}; | ||
| 36 | } | ||
| 37 | } | ||
| 38 | |||
| 39 | if (!two && CRn == CoprocReg::C13 && opc1 == 0 && CRm == CoprocReg::C0 && opc2 == 2) { | ||
| 40 | return &CP15[static_cast<std::size_t>(CP15Register::CP15_THREAD_UPRW)]; | ||
| 41 | } | ||
| 42 | |||
| 43 | return {}; | ||
| 44 | } | ||
| 45 | |||
| 46 | CallbackOrAccessTwoWords DynarmicCP15::CompileSendTwoWords(bool two, unsigned opc, CoprocReg CRm) { | ||
| 47 | return {}; | ||
| 48 | } | ||
| 49 | |||
| 50 | CallbackOrAccessOneWord DynarmicCP15::CompileGetOneWord(bool two, unsigned opc1, CoprocReg CRn, | ||
| 51 | CoprocReg CRm, unsigned opc2) { | ||
| 52 | // TODO(merry): Privileged CP15 registers | ||
| 53 | |||
| 54 | if (!two && CRn == CoprocReg::C13 && opc1 == 0 && CRm == CoprocReg::C0) { | ||
| 55 | switch (opc2) { | ||
| 56 | case 2: | ||
| 57 | return &CP15[static_cast<std::size_t>(CP15Register::CP15_THREAD_UPRW)]; | ||
| 58 | case 3: | ||
| 59 | return &CP15[static_cast<std::size_t>(CP15Register::CP15_THREAD_URO)]; | ||
| 60 | default: | ||
| 61 | return {}; | ||
| 62 | } | ||
| 63 | } | ||
| 64 | |||
| 65 | return {}; | ||
| 66 | } | ||
| 67 | |||
| 68 | CallbackOrAccessTwoWords DynarmicCP15::CompileGetTwoWords(bool two, unsigned opc, CoprocReg CRm) { | ||
| 69 | return {}; | ||
| 70 | } | ||
| 71 | |||
| 72 | std::optional<Callback> DynarmicCP15::CompileLoadWords(bool two, bool long_transfer, CoprocReg CRd, | ||
| 73 | std::optional<u8> option) { | ||
| 74 | return {}; | ||
| 75 | } | ||
| 76 | |||
| 77 | std::optional<Callback> DynarmicCP15::CompileStoreWords(bool two, bool long_transfer, CoprocReg CRd, | ||
| 78 | std::optional<u8> option) { | ||
| 79 | return {}; | ||
| 80 | } | ||
diff --git a/src/core/arm/dynarmic/arm_dynarmic_cp15.h b/src/core/arm/dynarmic/arm_dynarmic_cp15.h new file mode 100644 index 000000000..07bcde5f9 --- /dev/null +++ b/src/core/arm/dynarmic/arm_dynarmic_cp15.h | |||
| @@ -0,0 +1,152 @@ | |||
| 1 | // Copyright 2017 Citra Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <memory> | ||
| 8 | #include <optional> | ||
| 9 | |||
| 10 | #include <dynarmic/A32/coprocessor.h> | ||
| 11 | #include "common/common_types.h" | ||
| 12 | |||
| 13 | enum class CP15Register { | ||
| 14 | // c0 - Information registers | ||
| 15 | CP15_MAIN_ID, | ||
| 16 | CP15_CACHE_TYPE, | ||
| 17 | CP15_TCM_STATUS, | ||
| 18 | CP15_TLB_TYPE, | ||
| 19 | CP15_CPU_ID, | ||
| 20 | CP15_PROCESSOR_FEATURE_0, | ||
| 21 | CP15_PROCESSOR_FEATURE_1, | ||
| 22 | CP15_DEBUG_FEATURE_0, | ||
| 23 | CP15_AUXILIARY_FEATURE_0, | ||
| 24 | CP15_MEMORY_MODEL_FEATURE_0, | ||
| 25 | CP15_MEMORY_MODEL_FEATURE_1, | ||
| 26 | CP15_MEMORY_MODEL_FEATURE_2, | ||
| 27 | CP15_MEMORY_MODEL_FEATURE_3, | ||
| 28 | CP15_ISA_FEATURE_0, | ||
| 29 | CP15_ISA_FEATURE_1, | ||
| 30 | CP15_ISA_FEATURE_2, | ||
| 31 | CP15_ISA_FEATURE_3, | ||
| 32 | CP15_ISA_FEATURE_4, | ||
| 33 | |||
| 34 | // c1 - Control registers | ||
| 35 | CP15_CONTROL, | ||
| 36 | CP15_AUXILIARY_CONTROL, | ||
| 37 | CP15_COPROCESSOR_ACCESS_CONTROL, | ||
| 38 | |||
| 39 | // c2 - Translation table registers | ||
| 40 | CP15_TRANSLATION_BASE_TABLE_0, | ||
| 41 | CP15_TRANSLATION_BASE_TABLE_1, | ||
| 42 | CP15_TRANSLATION_BASE_CONTROL, | ||
| 43 | CP15_DOMAIN_ACCESS_CONTROL, | ||
| 44 | CP15_RESERVED, | ||
| 45 | |||
| 46 | // c5 - Fault status registers | ||
| 47 | CP15_FAULT_STATUS, | ||
| 48 | CP15_INSTR_FAULT_STATUS, | ||
| 49 | CP15_COMBINED_DATA_FSR = CP15_FAULT_STATUS, | ||
| 50 | CP15_INST_FSR, | ||
| 51 | |||
| 52 | // c6 - Fault Address registers | ||
| 53 | CP15_FAULT_ADDRESS, | ||
| 54 | CP15_COMBINED_DATA_FAR = CP15_FAULT_ADDRESS, | ||
| 55 | CP15_WFAR, | ||
| 56 | CP15_IFAR, | ||
| 57 | |||
| 58 | // c7 - Cache operation registers | ||
| 59 | CP15_WAIT_FOR_INTERRUPT, | ||
| 60 | CP15_PHYS_ADDRESS, | ||
| 61 | CP15_INVALIDATE_INSTR_CACHE, | ||
| 62 | CP15_INVALIDATE_INSTR_CACHE_USING_MVA, | ||
| 63 | CP15_INVALIDATE_INSTR_CACHE_USING_INDEX, | ||
| 64 | CP15_FLUSH_PREFETCH_BUFFER, | ||
| 65 | CP15_FLUSH_BRANCH_TARGET_CACHE, | ||
| 66 | CP15_FLUSH_BRANCH_TARGET_CACHE_ENTRY, | ||
| 67 | CP15_INVALIDATE_DATA_CACHE, | ||
| 68 | CP15_INVALIDATE_DATA_CACHE_LINE_USING_MVA, | ||
| 69 | CP15_INVALIDATE_DATA_CACHE_LINE_USING_INDEX, | ||
| 70 | CP15_INVALIDATE_DATA_AND_INSTR_CACHE, | ||
| 71 | CP15_CLEAN_DATA_CACHE, | ||
| 72 | CP15_CLEAN_DATA_CACHE_LINE_USING_MVA, | ||
| 73 | CP15_CLEAN_DATA_CACHE_LINE_USING_INDEX, | ||
| 74 | CP15_DATA_SYNC_BARRIER, | ||
| 75 | CP15_DATA_MEMORY_BARRIER, | ||
| 76 | CP15_CLEAN_AND_INVALIDATE_DATA_CACHE, | ||
| 77 | CP15_CLEAN_AND_INVALIDATE_DATA_CACHE_LINE_USING_MVA, | ||
| 78 | CP15_CLEAN_AND_INVALIDATE_DATA_CACHE_LINE_USING_INDEX, | ||
| 79 | |||
| 80 | // c8 - TLB operations | ||
| 81 | CP15_INVALIDATE_ITLB, | ||
| 82 | CP15_INVALIDATE_ITLB_SINGLE_ENTRY, | ||
| 83 | CP15_INVALIDATE_ITLB_ENTRY_ON_ASID_MATCH, | ||
| 84 | CP15_INVALIDATE_ITLB_ENTRY_ON_MVA, | ||
| 85 | CP15_INVALIDATE_DTLB, | ||
| 86 | CP15_INVALIDATE_DTLB_SINGLE_ENTRY, | ||
| 87 | CP15_INVALIDATE_DTLB_ENTRY_ON_ASID_MATCH, | ||
| 88 | CP15_INVALIDATE_DTLB_ENTRY_ON_MVA, | ||
| 89 | CP15_INVALIDATE_UTLB, | ||
| 90 | CP15_INVALIDATE_UTLB_SINGLE_ENTRY, | ||
| 91 | CP15_INVALIDATE_UTLB_ENTRY_ON_ASID_MATCH, | ||
| 92 | CP15_INVALIDATE_UTLB_ENTRY_ON_MVA, | ||
| 93 | |||
| 94 | // c9 - Data cache lockdown register | ||
| 95 | CP15_DATA_CACHE_LOCKDOWN, | ||
| 96 | |||
| 97 | // c10 - TLB/Memory map registers | ||
| 98 | CP15_TLB_LOCKDOWN, | ||
| 99 | CP15_PRIMARY_REGION_REMAP, | ||
| 100 | CP15_NORMAL_REGION_REMAP, | ||
| 101 | |||
| 102 | // c13 - Thread related registers | ||
| 103 | CP15_PID, | ||
| 104 | CP15_CONTEXT_ID, | ||
| 105 | CP15_THREAD_UPRW, // Thread ID register - User/Privileged Read/Write | ||
| 106 | CP15_THREAD_URO, // Thread ID register - User Read Only (Privileged R/W) | ||
| 107 | CP15_THREAD_PRW, // Thread ID register - Privileged R/W only. | ||
| 108 | |||
| 109 | // c15 - Performance and TLB lockdown registers | ||
| 110 | CP15_PERFORMANCE_MONITOR_CONTROL, | ||
| 111 | CP15_CYCLE_COUNTER, | ||
| 112 | CP15_COUNT_0, | ||
| 113 | CP15_COUNT_1, | ||
| 114 | CP15_READ_MAIN_TLB_LOCKDOWN_ENTRY, | ||
| 115 | CP15_WRITE_MAIN_TLB_LOCKDOWN_ENTRY, | ||
| 116 | CP15_MAIN_TLB_LOCKDOWN_VIRT_ADDRESS, | ||
| 117 | CP15_MAIN_TLB_LOCKDOWN_PHYS_ADDRESS, | ||
| 118 | CP15_MAIN_TLB_LOCKDOWN_ATTRIBUTE, | ||
| 119 | CP15_TLB_DEBUG_CONTROL, | ||
| 120 | |||
| 121 | // Skyeye defined | ||
| 122 | CP15_TLB_FAULT_ADDR, | ||
| 123 | CP15_TLB_FAULT_STATUS, | ||
| 124 | |||
| 125 | // Not an actual register. | ||
| 126 | // All registers should be defined above this. | ||
| 127 | CP15_REGISTER_COUNT, | ||
| 128 | }; | ||
| 129 | |||
| 130 | class DynarmicCP15 final : public Dynarmic::A32::Coprocessor { | ||
| 131 | public: | ||
| 132 | using CoprocReg = Dynarmic::A32::CoprocReg; | ||
| 133 | |||
| 134 | explicit DynarmicCP15(u32* cp15) : CP15(cp15){}; | ||
| 135 | |||
| 136 | std::optional<Callback> CompileInternalOperation(bool two, unsigned opc1, CoprocReg CRd, | ||
| 137 | CoprocReg CRn, CoprocReg CRm, | ||
| 138 | unsigned opc2) override; | ||
| 139 | CallbackOrAccessOneWord CompileSendOneWord(bool two, unsigned opc1, CoprocReg CRn, | ||
| 140 | CoprocReg CRm, unsigned opc2) override; | ||
| 141 | CallbackOrAccessTwoWords CompileSendTwoWords(bool two, unsigned opc, CoprocReg CRm) override; | ||
| 142 | CallbackOrAccessOneWord CompileGetOneWord(bool two, unsigned opc1, CoprocReg CRn, CoprocReg CRm, | ||
| 143 | unsigned opc2) override; | ||
| 144 | CallbackOrAccessTwoWords CompileGetTwoWords(bool two, unsigned opc, CoprocReg CRm) override; | ||
| 145 | std::optional<Callback> CompileLoadWords(bool two, bool long_transfer, CoprocReg CRd, | ||
| 146 | std::optional<u8> option) override; | ||
| 147 | std::optional<Callback> CompileStoreWords(bool two, bool long_transfer, CoprocReg CRd, | ||
| 148 | std::optional<u8> option) override; | ||
| 149 | |||
| 150 | private: | ||
| 151 | u32* CP15{}; | ||
| 152 | }; | ||
diff --git a/src/core/arm/exclusive_monitor.cpp b/src/core/arm/exclusive_monitor.cpp index 94570e520..b32401e0b 100644 --- a/src/core/arm/exclusive_monitor.cpp +++ b/src/core/arm/exclusive_monitor.cpp | |||
| @@ -3,7 +3,7 @@ | |||
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #ifdef ARCHITECTURE_x86_64 | 5 | #ifdef ARCHITECTURE_x86_64 |
| 6 | #include "core/arm/dynarmic/arm_dynarmic.h" | 6 | #include "core/arm/dynarmic/arm_dynarmic_64.h" |
| 7 | #endif | 7 | #endif |
| 8 | #include "core/arm/exclusive_monitor.h" | 8 | #include "core/arm/exclusive_monitor.h" |
| 9 | #include "core/memory.h" | 9 | #include "core/memory.h" |
diff --git a/src/core/arm/unicorn/arm_unicorn.cpp b/src/core/arm/unicorn/arm_unicorn.cpp index f99ad5802..8a9800a96 100644 --- a/src/core/arm/unicorn/arm_unicorn.cpp +++ b/src/core/arm/unicorn/arm_unicorn.cpp | |||
| @@ -53,7 +53,7 @@ static bool UnmappedMemoryHook(uc_engine* uc, uc_mem_type type, u64 addr, int si | |||
| 53 | void* user_data) { | 53 | void* user_data) { |
| 54 | auto* const system = static_cast<System*>(user_data); | 54 | auto* const system = static_cast<System*>(user_data); |
| 55 | 55 | ||
| 56 | ARM_Interface::ThreadContext ctx{}; | 56 | ARM_Interface::ThreadContext64 ctx{}; |
| 57 | system->CurrentArmInterface().SaveContext(ctx); | 57 | system->CurrentArmInterface().SaveContext(ctx); |
| 58 | ASSERT_MSG(false, "Attempted to read from unmapped memory: 0x{:X}, pc=0x{:X}, lr=0x{:X}", addr, | 58 | ASSERT_MSG(false, "Attempted to read from unmapped memory: 0x{:X}, pc=0x{:X}, lr=0x{:X}", addr, |
| 59 | ctx.pc, ctx.cpu_registers[30]); | 59 | ctx.pc, ctx.cpu_registers[30]); |
| @@ -179,7 +179,7 @@ void ARM_Unicorn::ExecuteInstructions(std::size_t num_instructions) { | |||
| 179 | } | 179 | } |
| 180 | 180 | ||
| 181 | Kernel::Thread* const thread = system.CurrentScheduler().GetCurrentThread(); | 181 | Kernel::Thread* const thread = system.CurrentScheduler().GetCurrentThread(); |
| 182 | SaveContext(thread->GetContext()); | 182 | SaveContext(thread->GetContext64()); |
| 183 | if (last_bkpt_hit || GDBStub::IsMemoryBreak() || GDBStub::GetCpuStepFlag()) { | 183 | if (last_bkpt_hit || GDBStub::IsMemoryBreak() || GDBStub::GetCpuStepFlag()) { |
| 184 | last_bkpt_hit = false; | 184 | last_bkpt_hit = false; |
| 185 | GDBStub::Break(); | 185 | GDBStub::Break(); |
| @@ -188,7 +188,7 @@ void ARM_Unicorn::ExecuteInstructions(std::size_t num_instructions) { | |||
| 188 | } | 188 | } |
| 189 | } | 189 | } |
| 190 | 190 | ||
| 191 | void ARM_Unicorn::SaveContext(ThreadContext& ctx) { | 191 | void ARM_Unicorn::SaveContext(ThreadContext64& ctx) { |
| 192 | int uregs[32]; | 192 | int uregs[32]; |
| 193 | void* tregs[32]; | 193 | void* tregs[32]; |
| 194 | 194 | ||
| @@ -215,7 +215,7 @@ void ARM_Unicorn::SaveContext(ThreadContext& ctx) { | |||
| 215 | CHECKED(uc_reg_read_batch(uc, uregs, tregs, 32)); | 215 | CHECKED(uc_reg_read_batch(uc, uregs, tregs, 32)); |
| 216 | } | 216 | } |
| 217 | 217 | ||
| 218 | void ARM_Unicorn::LoadContext(const ThreadContext& ctx) { | 218 | void ARM_Unicorn::LoadContext(const ThreadContext64& ctx) { |
| 219 | int uregs[32]; | 219 | int uregs[32]; |
| 220 | void* tregs[32]; | 220 | void* tregs[32]; |
| 221 | 221 | ||
diff --git a/src/core/arm/unicorn/arm_unicorn.h b/src/core/arm/unicorn/arm_unicorn.h index 3c5b155f9..f30d13cb6 100644 --- a/src/core/arm/unicorn/arm_unicorn.h +++ b/src/core/arm/unicorn/arm_unicorn.h | |||
| @@ -30,8 +30,6 @@ public: | |||
| 30 | void SetTlsAddress(VAddr address) override; | 30 | void SetTlsAddress(VAddr address) override; |
| 31 | void SetTPIDR_EL0(u64 value) override; | 31 | void SetTPIDR_EL0(u64 value) override; |
| 32 | u64 GetTPIDR_EL0() const override; | 32 | u64 GetTPIDR_EL0() const override; |
| 33 | void SaveContext(ThreadContext& ctx) override; | ||
| 34 | void LoadContext(const ThreadContext& ctx) override; | ||
| 35 | void PrepareReschedule() override; | 33 | void PrepareReschedule() override; |
| 36 | void ClearExclusiveState() override; | 34 | void ClearExclusiveState() override; |
| 37 | void ExecuteInstructions(std::size_t num_instructions); | 35 | void ExecuteInstructions(std::size_t num_instructions); |
| @@ -41,6 +39,11 @@ public: | |||
| 41 | void PageTableChanged(Common::PageTable&, std::size_t) override {} | 39 | void PageTableChanged(Common::PageTable&, std::size_t) override {} |
| 42 | void RecordBreak(GDBStub::BreakpointAddress bkpt); | 40 | void RecordBreak(GDBStub::BreakpointAddress bkpt); |
| 43 | 41 | ||
| 42 | void SaveContext(ThreadContext32& ctx) override {} | ||
| 43 | void SaveContext(ThreadContext64& ctx) override; | ||
| 44 | void LoadContext(const ThreadContext32& ctx) override {} | ||
| 45 | void LoadContext(const ThreadContext64& ctx) override; | ||
| 46 | |||
| 44 | private: | 47 | private: |
| 45 | static void InterruptHook(uc_engine* uc, u32 int_no, void* user_data); | 48 | static void InterruptHook(uc_engine* uc, u32 int_no, void* user_data); |
| 46 | 49 | ||
diff --git a/src/core/core.cpp b/src/core/core.cpp index 86e314c94..d1bc9340d 100644 --- a/src/core/core.cpp +++ b/src/core/core.cpp | |||
| @@ -24,6 +24,7 @@ | |||
| 24 | #include "core/file_sys/sdmc_factory.h" | 24 | #include "core/file_sys/sdmc_factory.h" |
| 25 | #include "core/file_sys/vfs_concat.h" | 25 | #include "core/file_sys/vfs_concat.h" |
| 26 | #include "core/file_sys/vfs_real.h" | 26 | #include "core/file_sys/vfs_real.h" |
| 27 | #include "core/frontend/scope_acquire_context.h" | ||
| 27 | #include "core/gdbstub/gdbstub.h" | 28 | #include "core/gdbstub/gdbstub.h" |
| 28 | #include "core/hardware_interrupt_manager.h" | 29 | #include "core/hardware_interrupt_manager.h" |
| 29 | #include "core/hle/kernel/client_port.h" | 30 | #include "core/hle/kernel/client_port.h" |
| @@ -165,7 +166,7 @@ struct System::Impl { | |||
| 165 | service_manager = std::make_shared<Service::SM::ServiceManager>(); | 166 | service_manager = std::make_shared<Service::SM::ServiceManager>(); |
| 166 | 167 | ||
| 167 | Service::Init(service_manager, system); | 168 | Service::Init(service_manager, system); |
| 168 | GDBStub::Init(); | 169 | GDBStub::DeferStart(); |
| 169 | 170 | ||
| 170 | renderer = VideoCore::CreateRenderer(emu_window, system); | 171 | renderer = VideoCore::CreateRenderer(emu_window, system); |
| 171 | if (!renderer->Init()) { | 172 | if (!renderer->Init()) { |
| @@ -173,6 +174,7 @@ struct System::Impl { | |||
| 173 | } | 174 | } |
| 174 | interrupt_manager = std::make_unique<Core::Hardware::InterruptManager>(system); | 175 | interrupt_manager = std::make_unique<Core::Hardware::InterruptManager>(system); |
| 175 | gpu_core = VideoCore::CreateGPU(system); | 176 | gpu_core = VideoCore::CreateGPU(system); |
| 177 | renderer->Rasterizer().SetupDirtyFlags(); | ||
| 176 | 178 | ||
| 177 | is_powered_on = true; | 179 | is_powered_on = true; |
| 178 | exit_lock = false; | 180 | exit_lock = false; |
| @@ -184,6 +186,8 @@ struct System::Impl { | |||
| 184 | 186 | ||
| 185 | ResultStatus Load(System& system, Frontend::EmuWindow& emu_window, | 187 | ResultStatus Load(System& system, Frontend::EmuWindow& emu_window, |
| 186 | const std::string& filepath) { | 188 | const std::string& filepath) { |
| 189 | Core::Frontend::ScopeAcquireContext acquire_context{emu_window}; | ||
| 190 | |||
| 187 | app_loader = Loader::GetLoader(GetGameFileFromPath(virtual_filesystem, filepath)); | 191 | app_loader = Loader::GetLoader(GetGameFileFromPath(virtual_filesystem, filepath)); |
| 188 | if (!app_loader) { | 192 | if (!app_loader) { |
| 189 | LOG_CRITICAL(Core, "Failed to obtain loader for {}!", filepath); | 193 | LOG_CRITICAL(Core, "Failed to obtain loader for {}!", filepath); |
diff --git a/src/core/core_manager.cpp b/src/core/core_manager.cpp index 8eacf92dd..b6b797c80 100644 --- a/src/core/core_manager.cpp +++ b/src/core/core_manager.cpp | |||
| @@ -6,9 +6,6 @@ | |||
| 6 | #include <mutex> | 6 | #include <mutex> |
| 7 | 7 | ||
| 8 | #include "common/logging/log.h" | 8 | #include "common/logging/log.h" |
| 9 | #ifdef ARCHITECTURE_x86_64 | ||
| 10 | #include "core/arm/dynarmic/arm_dynarmic.h" | ||
| 11 | #endif | ||
| 12 | #include "core/arm/exclusive_monitor.h" | 9 | #include "core/arm/exclusive_monitor.h" |
| 13 | #include "core/arm/unicorn/arm_unicorn.h" | 10 | #include "core/arm/unicorn/arm_unicorn.h" |
| 14 | #include "core/core.h" | 11 | #include "core/core.h" |
diff --git a/src/core/frontend/emu_window.h b/src/core/frontend/emu_window.h index 3376eedc5..5eb87fb63 100644 --- a/src/core/frontend/emu_window.h +++ b/src/core/frontend/emu_window.h | |||
| @@ -26,9 +26,6 @@ public: | |||
| 26 | 26 | ||
| 27 | /// Releases (dunno if this is the "right" word) the context from the caller thread | 27 | /// Releases (dunno if this is the "right" word) the context from the caller thread |
| 28 | virtual void DoneCurrent() = 0; | 28 | virtual void DoneCurrent() = 0; |
| 29 | |||
| 30 | /// Swap buffers to display the next frame | ||
| 31 | virtual void SwapBuffers() = 0; | ||
| 32 | }; | 29 | }; |
| 33 | 30 | ||
| 34 | /** | 31 | /** |
diff --git a/src/core/frontend/framebuffer_layout.cpp b/src/core/frontend/framebuffer_layout.cpp index 2dc795d56..68a0e0906 100644 --- a/src/core/frontend/framebuffer_layout.cpp +++ b/src/core/frontend/framebuffer_layout.cpp | |||
| @@ -48,8 +48,8 @@ FramebufferLayout FrameLayoutFromResolutionScale(u32 res_scale) { | |||
| 48 | u32 width, height; | 48 | u32 width, height; |
| 49 | 49 | ||
| 50 | if (Settings::values.use_docked_mode) { | 50 | if (Settings::values.use_docked_mode) { |
| 51 | width = ScreenDocked::WidthDocked * res_scale; | 51 | width = ScreenDocked::Width * res_scale; |
| 52 | height = ScreenDocked::HeightDocked * res_scale; | 52 | height = ScreenDocked::Height * res_scale; |
| 53 | } else { | 53 | } else { |
| 54 | width = ScreenUndocked::Width * res_scale; | 54 | width = ScreenUndocked::Width * res_scale; |
| 55 | height = ScreenUndocked::Height * res_scale; | 55 | height = ScreenUndocked::Height * res_scale; |
diff --git a/src/core/frontend/framebuffer_layout.h b/src/core/frontend/framebuffer_layout.h index 1d39c1faf..15ecfb13d 100644 --- a/src/core/frontend/framebuffer_layout.h +++ b/src/core/frontend/framebuffer_layout.h | |||
| @@ -8,15 +8,15 @@ | |||
| 8 | 8 | ||
| 9 | namespace Layout { | 9 | namespace Layout { |
| 10 | 10 | ||
| 11 | enum ScreenUndocked : u32 { | 11 | namespace ScreenUndocked { |
| 12 | Width = 1280, | 12 | constexpr u32 Width = 1280; |
| 13 | Height = 720, | 13 | constexpr u32 Height = 720; |
| 14 | }; | 14 | } // namespace ScreenUndocked |
| 15 | 15 | ||
| 16 | enum ScreenDocked : u32 { | 16 | namespace ScreenDocked { |
| 17 | WidthDocked = 1920, | 17 | constexpr u32 Width = 1920; |
| 18 | HeightDocked = 1080, | 18 | constexpr u32 Height = 1080; |
| 19 | }; | 19 | } // namespace ScreenDocked |
| 20 | 20 | ||
| 21 | enum class AspectRatio { | 21 | enum class AspectRatio { |
| 22 | Default, | 22 | Default, |
| @@ -29,6 +29,7 @@ enum class AspectRatio { | |||
| 29 | struct FramebufferLayout { | 29 | struct FramebufferLayout { |
| 30 | u32 width{ScreenUndocked::Width}; | 30 | u32 width{ScreenUndocked::Width}; |
| 31 | u32 height{ScreenUndocked::Height}; | 31 | u32 height{ScreenUndocked::Height}; |
| 32 | bool is_srgb{}; | ||
| 32 | 33 | ||
| 33 | Common::Rectangle<u32> screen; | 34 | Common::Rectangle<u32> screen; |
| 34 | 35 | ||
diff --git a/src/core/frontend/scope_acquire_context.cpp b/src/core/frontend/scope_acquire_context.cpp new file mode 100644 index 000000000..878c3157c --- /dev/null +++ b/src/core/frontend/scope_acquire_context.cpp | |||
| @@ -0,0 +1,18 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "core/frontend/emu_window.h" | ||
| 6 | #include "core/frontend/scope_acquire_context.h" | ||
| 7 | |||
| 8 | namespace Core::Frontend { | ||
| 9 | |||
| 10 | ScopeAcquireContext::ScopeAcquireContext(Core::Frontend::GraphicsContext& context) | ||
| 11 | : context{context} { | ||
| 12 | context.MakeCurrent(); | ||
| 13 | } | ||
| 14 | ScopeAcquireContext::~ScopeAcquireContext() { | ||
| 15 | context.DoneCurrent(); | ||
| 16 | } | ||
| 17 | |||
| 18 | } // namespace Core::Frontend | ||
diff --git a/src/core/frontend/scope_acquire_window_context.h b/src/core/frontend/scope_acquire_context.h index 2d9f6e825..7a65c0623 100644 --- a/src/core/frontend/scope_acquire_window_context.h +++ b/src/core/frontend/scope_acquire_context.h | |||
| @@ -8,16 +8,16 @@ | |||
| 8 | 8 | ||
| 9 | namespace Core::Frontend { | 9 | namespace Core::Frontend { |
| 10 | 10 | ||
| 11 | class EmuWindow; | 11 | class GraphicsContext; |
| 12 | 12 | ||
| 13 | /// Helper class to acquire/release window context within a given scope | 13 | /// Helper class to acquire/release window context within a given scope |
| 14 | class ScopeAcquireWindowContext : NonCopyable { | 14 | class ScopeAcquireContext : NonCopyable { |
| 15 | public: | 15 | public: |
| 16 | explicit ScopeAcquireWindowContext(Core::Frontend::EmuWindow& window); | 16 | explicit ScopeAcquireContext(Core::Frontend::GraphicsContext& context); |
| 17 | ~ScopeAcquireWindowContext(); | 17 | ~ScopeAcquireContext(); |
| 18 | 18 | ||
| 19 | private: | 19 | private: |
| 20 | Core::Frontend::EmuWindow& emu_window; | 20 | Core::Frontend::GraphicsContext& context; |
| 21 | }; | 21 | }; |
| 22 | 22 | ||
| 23 | } // namespace Core::Frontend | 23 | } // namespace Core::Frontend |
diff --git a/src/core/frontend/scope_acquire_window_context.cpp b/src/core/frontend/scope_acquire_window_context.cpp deleted file mode 100644 index 3663dad17..000000000 --- a/src/core/frontend/scope_acquire_window_context.cpp +++ /dev/null | |||
| @@ -1,18 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "core/frontend/emu_window.h" | ||
| 6 | #include "core/frontend/scope_acquire_window_context.h" | ||
| 7 | |||
| 8 | namespace Core::Frontend { | ||
| 9 | |||
| 10 | ScopeAcquireWindowContext::ScopeAcquireWindowContext(Core::Frontend::EmuWindow& emu_window_) | ||
| 11 | : emu_window{emu_window_} { | ||
| 12 | emu_window.MakeCurrent(); | ||
| 13 | } | ||
| 14 | ScopeAcquireWindowContext::~ScopeAcquireWindowContext() { | ||
| 15 | emu_window.DoneCurrent(); | ||
| 16 | } | ||
| 17 | |||
| 18 | } // namespace Core::Frontend | ||
diff --git a/src/core/gdbstub/gdbstub.cpp b/src/core/gdbstub/gdbstub.cpp index 67e95999d..6d15aeed9 100644 --- a/src/core/gdbstub/gdbstub.cpp +++ b/src/core/gdbstub/gdbstub.cpp | |||
| @@ -141,6 +141,7 @@ constexpr char target_xml[] = | |||
| 141 | )"; | 141 | )"; |
| 142 | 142 | ||
| 143 | int gdbserver_socket = -1; | 143 | int gdbserver_socket = -1; |
| 144 | bool defer_start = false; | ||
| 144 | 145 | ||
| 145 | u8 command_buffer[GDB_BUFFER_SIZE]; | 146 | u8 command_buffer[GDB_BUFFER_SIZE]; |
| 146 | u32 command_length; | 147 | u32 command_length; |
| @@ -217,7 +218,7 @@ static u64 RegRead(std::size_t id, Kernel::Thread* thread = nullptr) { | |||
| 217 | return 0; | 218 | return 0; |
| 218 | } | 219 | } |
| 219 | 220 | ||
| 220 | const auto& thread_context = thread->GetContext(); | 221 | const auto& thread_context = thread->GetContext64(); |
| 221 | 222 | ||
| 222 | if (id < SP_REGISTER) { | 223 | if (id < SP_REGISTER) { |
| 223 | return thread_context.cpu_registers[id]; | 224 | return thread_context.cpu_registers[id]; |
| @@ -239,7 +240,7 @@ static void RegWrite(std::size_t id, u64 val, Kernel::Thread* thread = nullptr) | |||
| 239 | return; | 240 | return; |
| 240 | } | 241 | } |
| 241 | 242 | ||
| 242 | auto& thread_context = thread->GetContext(); | 243 | auto& thread_context = thread->GetContext64(); |
| 243 | 244 | ||
| 244 | if (id < SP_REGISTER) { | 245 | if (id < SP_REGISTER) { |
| 245 | thread_context.cpu_registers[id] = val; | 246 | thread_context.cpu_registers[id] = val; |
| @@ -259,7 +260,7 @@ static u128 FpuRead(std::size_t id, Kernel::Thread* thread = nullptr) { | |||
| 259 | return u128{0}; | 260 | return u128{0}; |
| 260 | } | 261 | } |
| 261 | 262 | ||
| 262 | auto& thread_context = thread->GetContext(); | 263 | auto& thread_context = thread->GetContext64(); |
| 263 | 264 | ||
| 264 | if (id >= UC_ARM64_REG_Q0 && id < FPCR_REGISTER) { | 265 | if (id >= UC_ARM64_REG_Q0 && id < FPCR_REGISTER) { |
| 265 | return thread_context.vector_registers[id - UC_ARM64_REG_Q0]; | 266 | return thread_context.vector_registers[id - UC_ARM64_REG_Q0]; |
| @@ -275,7 +276,7 @@ static void FpuWrite(std::size_t id, u128 val, Kernel::Thread* thread = nullptr) | |||
| 275 | return; | 276 | return; |
| 276 | } | 277 | } |
| 277 | 278 | ||
| 278 | auto& thread_context = thread->GetContext(); | 279 | auto& thread_context = thread->GetContext64(); |
| 279 | 280 | ||
| 280 | if (id >= UC_ARM64_REG_Q0 && id < FPCR_REGISTER) { | 281 | if (id >= UC_ARM64_REG_Q0 && id < FPCR_REGISTER) { |
| 281 | thread_context.vector_registers[id - UC_ARM64_REG_Q0] = val; | 282 | thread_context.vector_registers[id - UC_ARM64_REG_Q0] = val; |
| @@ -916,7 +917,7 @@ static void WriteRegister() { | |||
| 916 | // Update ARM context, skipping scheduler - no running threads at this point | 917 | // Update ARM context, skipping scheduler - no running threads at this point |
| 917 | Core::System::GetInstance() | 918 | Core::System::GetInstance() |
| 918 | .ArmInterface(current_core) | 919 | .ArmInterface(current_core) |
| 919 | .LoadContext(current_thread->GetContext()); | 920 | .LoadContext(current_thread->GetContext64()); |
| 920 | 921 | ||
| 921 | SendReply("OK"); | 922 | SendReply("OK"); |
| 922 | } | 923 | } |
| @@ -947,7 +948,7 @@ static void WriteRegisters() { | |||
| 947 | // Update ARM context, skipping scheduler - no running threads at this point | 948 | // Update ARM context, skipping scheduler - no running threads at this point |
| 948 | Core::System::GetInstance() | 949 | Core::System::GetInstance() |
| 949 | .ArmInterface(current_core) | 950 | .ArmInterface(current_core) |
| 950 | .LoadContext(current_thread->GetContext()); | 951 | .LoadContext(current_thread->GetContext64()); |
| 951 | 952 | ||
| 952 | SendReply("OK"); | 953 | SendReply("OK"); |
| 953 | } | 954 | } |
| @@ -1019,7 +1020,7 @@ static void Step() { | |||
| 1019 | // Update ARM context, skipping scheduler - no running threads at this point | 1020 | // Update ARM context, skipping scheduler - no running threads at this point |
| 1020 | Core::System::GetInstance() | 1021 | Core::System::GetInstance() |
| 1021 | .ArmInterface(current_core) | 1022 | .ArmInterface(current_core) |
| 1022 | .LoadContext(current_thread->GetContext()); | 1023 | .LoadContext(current_thread->GetContext64()); |
| 1023 | } | 1024 | } |
| 1024 | step_loop = true; | 1025 | step_loop = true; |
| 1025 | halt_loop = true; | 1026 | halt_loop = true; |
| @@ -1166,6 +1167,9 @@ static void RemoveBreakpoint() { | |||
| 1166 | 1167 | ||
| 1167 | void HandlePacket() { | 1168 | void HandlePacket() { |
| 1168 | if (!IsConnected()) { | 1169 | if (!IsConnected()) { |
| 1170 | if (defer_start) { | ||
| 1171 | ToggleServer(true); | ||
| 1172 | } | ||
| 1169 | return; | 1173 | return; |
| 1170 | } | 1174 | } |
| 1171 | 1175 | ||
| @@ -1256,6 +1260,10 @@ void ToggleServer(bool status) { | |||
| 1256 | } | 1260 | } |
| 1257 | } | 1261 | } |
| 1258 | 1262 | ||
| 1263 | void DeferStart() { | ||
| 1264 | defer_start = true; | ||
| 1265 | } | ||
| 1266 | |||
| 1259 | static void Init(u16 port) { | 1267 | static void Init(u16 port) { |
| 1260 | if (!server_enabled) { | 1268 | if (!server_enabled) { |
| 1261 | // Set the halt loop to false in case the user enabled the gdbstub mid-execution. | 1269 | // Set the halt loop to false in case the user enabled the gdbstub mid-execution. |
| @@ -1341,6 +1349,7 @@ void Shutdown() { | |||
| 1341 | if (!server_enabled) { | 1349 | if (!server_enabled) { |
| 1342 | return; | 1350 | return; |
| 1343 | } | 1351 | } |
| 1352 | defer_start = false; | ||
| 1344 | 1353 | ||
| 1345 | LOG_INFO(Debug_GDBStub, "Stopping GDB ..."); | 1354 | LOG_INFO(Debug_GDBStub, "Stopping GDB ..."); |
| 1346 | if (gdbserver_socket != -1) { | 1355 | if (gdbserver_socket != -1) { |
diff --git a/src/core/gdbstub/gdbstub.h b/src/core/gdbstub/gdbstub.h index 5a36524b2..8fe3c320b 100644 --- a/src/core/gdbstub/gdbstub.h +++ b/src/core/gdbstub/gdbstub.h | |||
| @@ -43,6 +43,13 @@ void ToggleServer(bool status); | |||
| 43 | /// Start the gdbstub server. | 43 | /// Start the gdbstub server. |
| 44 | void Init(); | 44 | void Init(); |
| 45 | 45 | ||
| 46 | /** | ||
| 47 | * Defer initialization of the gdbstub to the first packet processing functions. | ||
| 48 | * This avoids a case where the gdbstub thread is frozen after initialization | ||
| 49 | * and fails to respond in time to packets. | ||
| 50 | */ | ||
| 51 | void DeferStart(); | ||
| 52 | |||
| 46 | /// Stop gdbstub server. | 53 | /// Stop gdbstub server. |
| 47 | void Shutdown(); | 54 | void Shutdown(); |
| 48 | 55 | ||
diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp index 9232f4d7e..e47f1deed 100644 --- a/src/core/hle/kernel/kernel.cpp +++ b/src/core/hle/kernel/kernel.cpp | |||
| @@ -186,6 +186,10 @@ struct KernelCore::Impl { | |||
| 186 | return; | 186 | return; |
| 187 | } | 187 | } |
| 188 | 188 | ||
| 189 | for (auto& core : cores) { | ||
| 190 | core.SetIs64Bit(process->Is64BitProcess()); | ||
| 191 | } | ||
| 192 | |||
| 189 | system.Memory().SetCurrentPageTable(*process); | 193 | system.Memory().SetCurrentPageTable(*process); |
| 190 | } | 194 | } |
| 191 | 195 | ||
diff --git a/src/core/hle/kernel/physical_core.cpp b/src/core/hle/kernel/physical_core.cpp index 9303dd273..aa2787467 100644 --- a/src/core/hle/kernel/physical_core.cpp +++ b/src/core/hle/kernel/physical_core.cpp | |||
| @@ -5,7 +5,8 @@ | |||
| 5 | #include "common/logging/log.h" | 5 | #include "common/logging/log.h" |
| 6 | #include "core/arm/arm_interface.h" | 6 | #include "core/arm/arm_interface.h" |
| 7 | #ifdef ARCHITECTURE_x86_64 | 7 | #ifdef ARCHITECTURE_x86_64 |
| 8 | #include "core/arm/dynarmic/arm_dynarmic.h" | 8 | #include "core/arm/dynarmic/arm_dynarmic_32.h" |
| 9 | #include "core/arm/dynarmic/arm_dynarmic_64.h" | ||
| 9 | #endif | 10 | #endif |
| 10 | #include "core/arm/exclusive_monitor.h" | 11 | #include "core/arm/exclusive_monitor.h" |
| 11 | #include "core/arm/unicorn/arm_unicorn.h" | 12 | #include "core/arm/unicorn/arm_unicorn.h" |
| @@ -20,13 +21,17 @@ PhysicalCore::PhysicalCore(Core::System& system, std::size_t id, | |||
| 20 | Core::ExclusiveMonitor& exclusive_monitor) | 21 | Core::ExclusiveMonitor& exclusive_monitor) |
| 21 | : core_index{id} { | 22 | : core_index{id} { |
| 22 | #ifdef ARCHITECTURE_x86_64 | 23 | #ifdef ARCHITECTURE_x86_64 |
| 23 | arm_interface = std::make_unique<Core::ARM_Dynarmic>(system, exclusive_monitor, core_index); | 24 | arm_interface_32 = |
| 25 | std::make_unique<Core::ARM_Dynarmic_32>(system, exclusive_monitor, core_index); | ||
| 26 | arm_interface_64 = | ||
| 27 | std::make_unique<Core::ARM_Dynarmic_64>(system, exclusive_monitor, core_index); | ||
| 28 | |||
| 24 | #else | 29 | #else |
| 25 | arm_interface = std::make_shared<Core::ARM_Unicorn>(system); | 30 | arm_interface = std::make_shared<Core::ARM_Unicorn>(system); |
| 26 | LOG_WARNING(Core, "CPU JIT requested, but Dynarmic not available"); | 31 | LOG_WARNING(Core, "CPU JIT requested, but Dynarmic not available"); |
| 27 | #endif | 32 | #endif |
| 28 | 33 | ||
| 29 | scheduler = std::make_unique<Kernel::Scheduler>(system, *arm_interface, core_index); | 34 | scheduler = std::make_unique<Kernel::Scheduler>(system, core_index); |
| 30 | } | 35 | } |
| 31 | 36 | ||
| 32 | PhysicalCore::~PhysicalCore() = default; | 37 | PhysicalCore::~PhysicalCore() = default; |
| @@ -48,4 +53,12 @@ void PhysicalCore::Shutdown() { | |||
| 48 | scheduler->Shutdown(); | 53 | scheduler->Shutdown(); |
| 49 | } | 54 | } |
| 50 | 55 | ||
| 56 | void PhysicalCore::SetIs64Bit(bool is_64_bit) { | ||
| 57 | if (is_64_bit) { | ||
| 58 | arm_interface = arm_interface_64.get(); | ||
| 59 | } else { | ||
| 60 | arm_interface = arm_interface_32.get(); | ||
| 61 | } | ||
| 62 | } | ||
| 63 | |||
| 51 | } // namespace Kernel | 64 | } // namespace Kernel |
diff --git a/src/core/hle/kernel/physical_core.h b/src/core/hle/kernel/physical_core.h index 4c32c0f1b..3269166be 100644 --- a/src/core/hle/kernel/physical_core.h +++ b/src/core/hle/kernel/physical_core.h | |||
| @@ -68,10 +68,14 @@ public: | |||
| 68 | return *scheduler; | 68 | return *scheduler; |
| 69 | } | 69 | } |
| 70 | 70 | ||
| 71 | void SetIs64Bit(bool is_64_bit); | ||
| 72 | |||
| 71 | private: | 73 | private: |
| 72 | std::size_t core_index; | 74 | std::size_t core_index; |
| 73 | std::unique_ptr<Core::ARM_Interface> arm_interface; | 75 | std::unique_ptr<Core::ARM_Interface> arm_interface_32; |
| 76 | std::unique_ptr<Core::ARM_Interface> arm_interface_64; | ||
| 74 | std::unique_ptr<Kernel::Scheduler> scheduler; | 77 | std::unique_ptr<Kernel::Scheduler> scheduler; |
| 78 | Core::ARM_Interface* arm_interface{}; | ||
| 75 | }; | 79 | }; |
| 76 | 80 | ||
| 77 | } // namespace Kernel | 81 | } // namespace Kernel |
diff --git a/src/core/hle/kernel/process.cpp b/src/core/hle/kernel/process.cpp index 2fcb7326c..edc414d69 100644 --- a/src/core/hle/kernel/process.cpp +++ b/src/core/hle/kernel/process.cpp | |||
| @@ -42,7 +42,8 @@ void SetupMainThread(Process& owner_process, KernelCore& kernel, u32 priority) { | |||
| 42 | 42 | ||
| 43 | // Register 1 must be a handle to the main thread | 43 | // Register 1 must be a handle to the main thread |
| 44 | const Handle thread_handle = owner_process.GetHandleTable().Create(thread).Unwrap(); | 44 | const Handle thread_handle = owner_process.GetHandleTable().Create(thread).Unwrap(); |
| 45 | thread->GetContext().cpu_registers[1] = thread_handle; | 45 | thread->GetContext32().cpu_registers[1] = thread_handle; |
| 46 | thread->GetContext64().cpu_registers[1] = thread_handle; | ||
| 46 | 47 | ||
| 47 | // Threads by default are dormant, wake up the main thread so it runs when the scheduler fires | 48 | // Threads by default are dormant, wake up the main thread so it runs when the scheduler fires |
| 48 | thread->ResumeFromWait(); | 49 | thread->ResumeFromWait(); |
diff --git a/src/core/hle/kernel/scheduler.cpp b/src/core/hle/kernel/scheduler.cpp index c65f82fb7..1140c72a3 100644 --- a/src/core/hle/kernel/scheduler.cpp +++ b/src/core/hle/kernel/scheduler.cpp | |||
| @@ -383,8 +383,8 @@ void GlobalScheduler::Unlock() { | |||
| 383 | // TODO(Blinkhawk): Setup the interrupts and change context on current core. | 383 | // TODO(Blinkhawk): Setup the interrupts and change context on current core. |
| 384 | } | 384 | } |
| 385 | 385 | ||
| 386 | Scheduler::Scheduler(Core::System& system, Core::ARM_Interface& cpu_core, std::size_t core_id) | 386 | Scheduler::Scheduler(Core::System& system, std::size_t core_id) |
| 387 | : system(system), cpu_core(cpu_core), core_id(core_id) {} | 387 | : system{system}, core_id{core_id} {} |
| 388 | 388 | ||
| 389 | Scheduler::~Scheduler() = default; | 389 | Scheduler::~Scheduler() = default; |
| 390 | 390 | ||
| @@ -422,9 +422,10 @@ void Scheduler::UnloadThread() { | |||
| 422 | 422 | ||
| 423 | // Save context for previous thread | 423 | // Save context for previous thread |
| 424 | if (previous_thread) { | 424 | if (previous_thread) { |
| 425 | cpu_core.SaveContext(previous_thread->GetContext()); | 425 | system.ArmInterface(core_id).SaveContext(previous_thread->GetContext32()); |
| 426 | system.ArmInterface(core_id).SaveContext(previous_thread->GetContext64()); | ||
| 426 | // Save the TPIDR_EL0 system register in case it was modified. | 427 | // Save the TPIDR_EL0 system register in case it was modified. |
| 427 | previous_thread->SetTPIDR_EL0(cpu_core.GetTPIDR_EL0()); | 428 | previous_thread->SetTPIDR_EL0(system.ArmInterface(core_id).GetTPIDR_EL0()); |
| 428 | 429 | ||
| 429 | if (previous_thread->GetStatus() == ThreadStatus::Running) { | 430 | if (previous_thread->GetStatus() == ThreadStatus::Running) { |
| 430 | // This is only the case when a reschedule is triggered without the current thread | 431 | // This is only the case when a reschedule is triggered without the current thread |
| @@ -451,9 +452,10 @@ void Scheduler::SwitchContext() { | |||
| 451 | 452 | ||
| 452 | // Save context for previous thread | 453 | // Save context for previous thread |
| 453 | if (previous_thread) { | 454 | if (previous_thread) { |
| 454 | cpu_core.SaveContext(previous_thread->GetContext()); | 455 | system.ArmInterface(core_id).SaveContext(previous_thread->GetContext32()); |
| 456 | system.ArmInterface(core_id).SaveContext(previous_thread->GetContext64()); | ||
| 455 | // Save the TPIDR_EL0 system register in case it was modified. | 457 | // Save the TPIDR_EL0 system register in case it was modified. |
| 456 | previous_thread->SetTPIDR_EL0(cpu_core.GetTPIDR_EL0()); | 458 | previous_thread->SetTPIDR_EL0(system.ArmInterface(core_id).GetTPIDR_EL0()); |
| 457 | 459 | ||
| 458 | if (previous_thread->GetStatus() == ThreadStatus::Running) { | 460 | if (previous_thread->GetStatus() == ThreadStatus::Running) { |
| 459 | // This is only the case when a reschedule is triggered without the current thread | 461 | // This is only the case when a reschedule is triggered without the current thread |
| @@ -481,9 +483,10 @@ void Scheduler::SwitchContext() { | |||
| 481 | system.Kernel().MakeCurrentProcess(thread_owner_process); | 483 | system.Kernel().MakeCurrentProcess(thread_owner_process); |
| 482 | } | 484 | } |
| 483 | 485 | ||
| 484 | cpu_core.LoadContext(new_thread->GetContext()); | 486 | system.ArmInterface(core_id).LoadContext(new_thread->GetContext32()); |
| 485 | cpu_core.SetTlsAddress(new_thread->GetTLSAddress()); | 487 | system.ArmInterface(core_id).LoadContext(new_thread->GetContext64()); |
| 486 | cpu_core.SetTPIDR_EL0(new_thread->GetTPIDR_EL0()); | 488 | system.ArmInterface(core_id).SetTlsAddress(new_thread->GetTLSAddress()); |
| 489 | system.ArmInterface(core_id).SetTPIDR_EL0(new_thread->GetTPIDR_EL0()); | ||
| 487 | } else { | 490 | } else { |
| 488 | current_thread = nullptr; | 491 | current_thread = nullptr; |
| 489 | // Note: We do not reset the current process and current page table when idling because | 492 | // Note: We do not reset the current process and current page table when idling because |
diff --git a/src/core/hle/kernel/scheduler.h b/src/core/hle/kernel/scheduler.h index 1c93a838c..07df33f9c 100644 --- a/src/core/hle/kernel/scheduler.h +++ b/src/core/hle/kernel/scheduler.h | |||
| @@ -181,7 +181,7 @@ private: | |||
| 181 | 181 | ||
| 182 | class Scheduler final { | 182 | class Scheduler final { |
| 183 | public: | 183 | public: |
| 184 | explicit Scheduler(Core::System& system, Core::ARM_Interface& cpu_core, std::size_t core_id); | 184 | explicit Scheduler(Core::System& system, std::size_t core_id); |
| 185 | ~Scheduler(); | 185 | ~Scheduler(); |
| 186 | 186 | ||
| 187 | /// Returns whether there are any threads that are ready to run. | 187 | /// Returns whether there are any threads that are ready to run. |
| @@ -235,7 +235,6 @@ private: | |||
| 235 | std::shared_ptr<Thread> selected_thread = nullptr; | 235 | std::shared_ptr<Thread> selected_thread = nullptr; |
| 236 | 236 | ||
| 237 | Core::System& system; | 237 | Core::System& system; |
| 238 | Core::ARM_Interface& cpu_core; | ||
| 239 | u64 last_context_switch_time = 0; | 238 | u64 last_context_switch_time = 0; |
| 240 | u64 idle_selection_count = 0; | 239 | u64 idle_selection_count = 0; |
| 241 | const std::size_t core_id; | 240 | const std::size_t core_id; |
diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp index fd91779a3..4ffc113c2 100644 --- a/src/core/hle/kernel/svc.cpp +++ b/src/core/hle/kernel/svc.cpp | |||
| @@ -187,6 +187,13 @@ static ResultCode SetHeapSize(Core::System& system, VAddr* heap_addr, u64 heap_s | |||
| 187 | return RESULT_SUCCESS; | 187 | return RESULT_SUCCESS; |
| 188 | } | 188 | } |
| 189 | 189 | ||
| 190 | static ResultCode SetHeapSize32(Core::System& system, u32* heap_addr, u32 heap_size) { | ||
| 191 | VAddr temp_heap_addr{}; | ||
| 192 | const ResultCode result{SetHeapSize(system, &temp_heap_addr, heap_size)}; | ||
| 193 | *heap_addr = static_cast<u32>(temp_heap_addr); | ||
| 194 | return result; | ||
| 195 | } | ||
| 196 | |||
| 190 | static ResultCode SetMemoryPermission(Core::System& system, VAddr addr, u64 size, u32 prot) { | 197 | static ResultCode SetMemoryPermission(Core::System& system, VAddr addr, u64 size, u32 prot) { |
| 191 | LOG_TRACE(Kernel_SVC, "called, addr=0x{:X}, size=0x{:X}, prot=0x{:X}", addr, size, prot); | 198 | LOG_TRACE(Kernel_SVC, "called, addr=0x{:X}, size=0x{:X}, prot=0x{:X}", addr, size, prot); |
| 192 | 199 | ||
| @@ -371,6 +378,12 @@ static ResultCode ConnectToNamedPort(Core::System& system, Handle* out_handle, | |||
| 371 | return RESULT_SUCCESS; | 378 | return RESULT_SUCCESS; |
| 372 | } | 379 | } |
| 373 | 380 | ||
| 381 | static ResultCode ConnectToNamedPort32(Core::System& system, Handle* out_handle, | ||
| 382 | u32 port_name_address) { | ||
| 383 | |||
| 384 | return ConnectToNamedPort(system, out_handle, port_name_address); | ||
| 385 | } | ||
| 386 | |||
| 374 | /// Makes a blocking IPC call to an OS service. | 387 | /// Makes a blocking IPC call to an OS service. |
| 375 | static ResultCode SendSyncRequest(Core::System& system, Handle handle) { | 388 | static ResultCode SendSyncRequest(Core::System& system, Handle handle) { |
| 376 | const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable(); | 389 | const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable(); |
| @@ -390,6 +403,10 @@ static ResultCode SendSyncRequest(Core::System& system, Handle handle) { | |||
| 390 | return session->SendSyncRequest(SharedFrom(thread), system.Memory()); | 403 | return session->SendSyncRequest(SharedFrom(thread), system.Memory()); |
| 391 | } | 404 | } |
| 392 | 405 | ||
| 406 | static ResultCode SendSyncRequest32(Core::System& system, Handle handle) { | ||
| 407 | return SendSyncRequest(system, handle); | ||
| 408 | } | ||
| 409 | |||
| 393 | /// Get the ID for the specified thread. | 410 | /// Get the ID for the specified thread. |
| 394 | static ResultCode GetThreadId(Core::System& system, u64* thread_id, Handle thread_handle) { | 411 | static ResultCode GetThreadId(Core::System& system, u64* thread_id, Handle thread_handle) { |
| 395 | LOG_TRACE(Kernel_SVC, "called thread=0x{:08X}", thread_handle); | 412 | LOG_TRACE(Kernel_SVC, "called thread=0x{:08X}", thread_handle); |
| @@ -405,6 +422,17 @@ static ResultCode GetThreadId(Core::System& system, u64* thread_id, Handle threa | |||
| 405 | return RESULT_SUCCESS; | 422 | return RESULT_SUCCESS; |
| 406 | } | 423 | } |
| 407 | 424 | ||
| 425 | static ResultCode GetThreadId32(Core::System& system, u32* thread_id_low, u32* thread_id_high, | ||
| 426 | Handle thread_handle) { | ||
| 427 | u64 thread_id{}; | ||
| 428 | const ResultCode result{GetThreadId(system, &thread_id, thread_handle)}; | ||
| 429 | |||
| 430 | *thread_id_low = static_cast<u32>(thread_id >> 32); | ||
| 431 | *thread_id_high = static_cast<u32>(thread_id & std::numeric_limits<u32>::max()); | ||
| 432 | |||
| 433 | return result; | ||
| 434 | } | ||
| 435 | |||
| 408 | /// Gets the ID of the specified process or a specified thread's owning process. | 436 | /// Gets the ID of the specified process or a specified thread's owning process. |
| 409 | static ResultCode GetProcessId(Core::System& system, u64* process_id, Handle handle) { | 437 | static ResultCode GetProcessId(Core::System& system, u64* process_id, Handle handle) { |
| 410 | LOG_DEBUG(Kernel_SVC, "called handle=0x{:08X}", handle); | 438 | LOG_DEBUG(Kernel_SVC, "called handle=0x{:08X}", handle); |
| @@ -479,6 +507,12 @@ static ResultCode WaitSynchronization(Core::System& system, Handle* index, VAddr | |||
| 479 | return result; | 507 | return result; |
| 480 | } | 508 | } |
| 481 | 509 | ||
| 510 | static ResultCode WaitSynchronization32(Core::System& system, u32 timeout_low, u32 handles_address, | ||
| 511 | s32 handle_count, u32 timeout_high, Handle* index) { | ||
| 512 | const s64 nano_seconds{(static_cast<s64>(timeout_high) << 32) | static_cast<s64>(timeout_low)}; | ||
| 513 | return WaitSynchronization(system, index, handles_address, handle_count, nano_seconds); | ||
| 514 | } | ||
| 515 | |||
| 482 | /// Resumes a thread waiting on WaitSynchronization | 516 | /// Resumes a thread waiting on WaitSynchronization |
| 483 | static ResultCode CancelSynchronization(Core::System& system, Handle thread_handle) { | 517 | static ResultCode CancelSynchronization(Core::System& system, Handle thread_handle) { |
| 484 | LOG_TRACE(Kernel_SVC, "called thread=0x{:X}", thread_handle); | 518 | LOG_TRACE(Kernel_SVC, "called thread=0x{:X}", thread_handle); |
| @@ -917,6 +951,18 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha | |||
| 917 | } | 951 | } |
| 918 | } | 952 | } |
| 919 | 953 | ||
| 954 | static ResultCode GetInfo32(Core::System& system, u32* result_low, u32* result_high, u32 sub_id_low, | ||
| 955 | u32 info_id, u32 handle, u32 sub_id_high) { | ||
| 956 | const u64 sub_id{static_cast<u64>(sub_id_low | (static_cast<u64>(sub_id_high) << 32))}; | ||
| 957 | u64 res_value{}; | ||
| 958 | |||
| 959 | const ResultCode result{GetInfo(system, &res_value, info_id, handle, sub_id)}; | ||
| 960 | *result_high = static_cast<u32>(res_value >> 32); | ||
| 961 | *result_low = static_cast<u32>(res_value & std::numeric_limits<u32>::max()); | ||
| 962 | |||
| 963 | return result; | ||
| 964 | } | ||
| 965 | |||
| 920 | /// Maps memory at a desired address | 966 | /// Maps memory at a desired address |
| 921 | static ResultCode MapPhysicalMemory(Core::System& system, VAddr addr, u64 size) { | 967 | static ResultCode MapPhysicalMemory(Core::System& system, VAddr addr, u64 size) { |
| 922 | LOG_DEBUG(Kernel_SVC, "called, addr=0x{:016X}, size=0x{:X}", addr, size); | 968 | LOG_DEBUG(Kernel_SVC, "called, addr=0x{:016X}, size=0x{:X}", addr, size); |
| @@ -1058,7 +1104,7 @@ static ResultCode GetThreadContext(Core::System& system, VAddr thread_context, H | |||
| 1058 | return ERR_BUSY; | 1104 | return ERR_BUSY; |
| 1059 | } | 1105 | } |
| 1060 | 1106 | ||
| 1061 | Core::ARM_Interface::ThreadContext ctx = thread->GetContext(); | 1107 | Core::ARM_Interface::ThreadContext64 ctx = thread->GetContext64(); |
| 1062 | // Mask away mode bits, interrupt bits, IL bit, and other reserved bits. | 1108 | // Mask away mode bits, interrupt bits, IL bit, and other reserved bits. |
| 1063 | ctx.pstate &= 0xFF0FFE20; | 1109 | ctx.pstate &= 0xFF0FFE20; |
| 1064 | 1110 | ||
| @@ -1088,6 +1134,10 @@ static ResultCode GetThreadPriority(Core::System& system, u32* priority, Handle | |||
| 1088 | return RESULT_SUCCESS; | 1134 | return RESULT_SUCCESS; |
| 1089 | } | 1135 | } |
| 1090 | 1136 | ||
| 1137 | static ResultCode GetThreadPriority32(Core::System& system, u32* priority, Handle handle) { | ||
| 1138 | return GetThreadPriority(system, priority, handle); | ||
| 1139 | } | ||
| 1140 | |||
| 1091 | /// Sets the priority for the specified thread | 1141 | /// Sets the priority for the specified thread |
| 1092 | static ResultCode SetThreadPriority(Core::System& system, Handle handle, u32 priority) { | 1142 | static ResultCode SetThreadPriority(Core::System& system, Handle handle, u32 priority) { |
| 1093 | LOG_TRACE(Kernel_SVC, "called"); | 1143 | LOG_TRACE(Kernel_SVC, "called"); |
| @@ -1259,6 +1309,11 @@ static ResultCode QueryMemory(Core::System& system, VAddr memory_info_address, | |||
| 1259 | query_address); | 1309 | query_address); |
| 1260 | } | 1310 | } |
| 1261 | 1311 | ||
| 1312 | static ResultCode QueryMemory32(Core::System& system, u32 memory_info_address, | ||
| 1313 | u32 page_info_address, u32 query_address) { | ||
| 1314 | return QueryMemory(system, memory_info_address, page_info_address, query_address); | ||
| 1315 | } | ||
| 1316 | |||
| 1262 | static ResultCode MapProcessCodeMemory(Core::System& system, Handle process_handle, u64 dst_address, | 1317 | static ResultCode MapProcessCodeMemory(Core::System& system, Handle process_handle, u64 dst_address, |
| 1263 | u64 src_address, u64 size) { | 1318 | u64 src_address, u64 size) { |
| 1264 | LOG_DEBUG(Kernel_SVC, | 1319 | LOG_DEBUG(Kernel_SVC, |
| @@ -1675,6 +1730,10 @@ static void SignalProcessWideKey(Core::System& system, VAddr condition_variable_ | |||
| 1675 | } | 1730 | } |
| 1676 | } | 1731 | } |
| 1677 | 1732 | ||
| 1733 | static void SignalProcessWideKey32(Core::System& system, u32 condition_variable_addr, s32 target) { | ||
| 1734 | SignalProcessWideKey(system, condition_variable_addr, target); | ||
| 1735 | } | ||
| 1736 | |||
| 1678 | // Wait for an address (via Address Arbiter) | 1737 | // Wait for an address (via Address Arbiter) |
| 1679 | static ResultCode WaitForAddress(Core::System& system, VAddr address, u32 type, s32 value, | 1738 | static ResultCode WaitForAddress(Core::System& system, VAddr address, u32 type, s32 value, |
| 1680 | s64 timeout) { | 1739 | s64 timeout) { |
| @@ -1760,6 +1819,10 @@ static ResultCode CloseHandle(Core::System& system, Handle handle) { | |||
| 1760 | return handle_table.Close(handle); | 1819 | return handle_table.Close(handle); |
| 1761 | } | 1820 | } |
| 1762 | 1821 | ||
| 1822 | static ResultCode CloseHandle32(Core::System& system, Handle handle) { | ||
| 1823 | return CloseHandle(system, handle); | ||
| 1824 | } | ||
| 1825 | |||
| 1763 | /// Clears the signaled state of an event or process. | 1826 | /// Clears the signaled state of an event or process. |
| 1764 | static ResultCode ResetSignal(Core::System& system, Handle handle) { | 1827 | static ResultCode ResetSignal(Core::System& system, Handle handle) { |
| 1765 | LOG_DEBUG(Kernel_SVC, "called handle 0x{:08X}", handle); | 1828 | LOG_DEBUG(Kernel_SVC, "called handle 0x{:08X}", handle); |
| @@ -2317,69 +2380,196 @@ struct FunctionDef { | |||
| 2317 | }; | 2380 | }; |
| 2318 | } // namespace | 2381 | } // namespace |
| 2319 | 2382 | ||
| 2320 | static const FunctionDef SVC_Table[] = { | 2383 | static const FunctionDef SVC_Table_32[] = { |
| 2321 | {0x00, nullptr, "Unknown"}, | 2384 | {0x00, nullptr, "Unknown"}, |
| 2322 | {0x01, SvcWrap<SetHeapSize>, "SetHeapSize"}, | 2385 | {0x01, SvcWrap32<SetHeapSize32>, "SetHeapSize32"}, |
| 2323 | {0x02, SvcWrap<SetMemoryPermission>, "SetMemoryPermission"}, | 2386 | {0x02, nullptr, "Unknown"}, |
| 2324 | {0x03, SvcWrap<SetMemoryAttribute>, "SetMemoryAttribute"}, | 2387 | {0x03, nullptr, "SetMemoryAttribute32"}, |
| 2325 | {0x04, SvcWrap<MapMemory>, "MapMemory"}, | 2388 | {0x04, nullptr, "MapMemory32"}, |
| 2326 | {0x05, SvcWrap<UnmapMemory>, "UnmapMemory"}, | 2389 | {0x05, nullptr, "UnmapMemory32"}, |
| 2327 | {0x06, SvcWrap<QueryMemory>, "QueryMemory"}, | 2390 | {0x06, SvcWrap32<QueryMemory32>, "QueryMemory32"}, |
| 2328 | {0x07, SvcWrap<ExitProcess>, "ExitProcess"}, | 2391 | {0x07, nullptr, "ExitProcess32"}, |
| 2329 | {0x08, SvcWrap<CreateThread>, "CreateThread"}, | 2392 | {0x08, nullptr, "CreateThread32"}, |
| 2330 | {0x09, SvcWrap<StartThread>, "StartThread"}, | 2393 | {0x09, nullptr, "StartThread32"}, |
| 2331 | {0x0A, SvcWrap<ExitThread>, "ExitThread"}, | 2394 | {0x0a, nullptr, "ExitThread32"}, |
| 2332 | {0x0B, SvcWrap<SleepThread>, "SleepThread"}, | 2395 | {0x0b, nullptr, "SleepThread32"}, |
| 2333 | {0x0C, SvcWrap<GetThreadPriority>, "GetThreadPriority"}, | 2396 | {0x0c, SvcWrap32<GetThreadPriority32>, "GetThreadPriority32"}, |
| 2334 | {0x0D, SvcWrap<SetThreadPriority>, "SetThreadPriority"}, | 2397 | {0x0d, nullptr, "SetThreadPriority32"}, |
| 2335 | {0x0E, SvcWrap<GetThreadCoreMask>, "GetThreadCoreMask"}, | 2398 | {0x0e, nullptr, "GetThreadCoreMask32"}, |
| 2336 | {0x0F, SvcWrap<SetThreadCoreMask>, "SetThreadCoreMask"}, | 2399 | {0x0f, nullptr, "SetThreadCoreMask32"}, |
| 2337 | {0x10, SvcWrap<GetCurrentProcessorNumber>, "GetCurrentProcessorNumber"}, | 2400 | {0x10, nullptr, "GetCurrentProcessorNumber32"}, |
| 2338 | {0x11, SvcWrap<SignalEvent>, "SignalEvent"}, | 2401 | {0x11, nullptr, "SignalEvent32"}, |
| 2339 | {0x12, SvcWrap<ClearEvent>, "ClearEvent"}, | 2402 | {0x12, nullptr, "ClearEvent32"}, |
| 2340 | {0x13, SvcWrap<MapSharedMemory>, "MapSharedMemory"}, | 2403 | {0x13, nullptr, "MapSharedMemory32"}, |
| 2341 | {0x14, SvcWrap<UnmapSharedMemory>, "UnmapSharedMemory"}, | 2404 | {0x14, nullptr, "UnmapSharedMemory32"}, |
| 2342 | {0x15, SvcWrap<CreateTransferMemory>, "CreateTransferMemory"}, | 2405 | {0x15, nullptr, "CreateTransferMemory32"}, |
| 2343 | {0x16, SvcWrap<CloseHandle>, "CloseHandle"}, | 2406 | {0x16, SvcWrap32<CloseHandle32>, "CloseHandle32"}, |
| 2344 | {0x17, SvcWrap<ResetSignal>, "ResetSignal"}, | 2407 | {0x17, nullptr, "ResetSignal32"}, |
| 2345 | {0x18, SvcWrap<WaitSynchronization>, "WaitSynchronization"}, | 2408 | {0x18, SvcWrap32<WaitSynchronization32>, "WaitSynchronization32"}, |
| 2346 | {0x19, SvcWrap<CancelSynchronization>, "CancelSynchronization"}, | 2409 | {0x19, nullptr, "CancelSynchronization32"}, |
| 2347 | {0x1A, SvcWrap<ArbitrateLock>, "ArbitrateLock"}, | 2410 | {0x1a, nullptr, "ArbitrateLock32"}, |
| 2348 | {0x1B, SvcWrap<ArbitrateUnlock>, "ArbitrateUnlock"}, | 2411 | {0x1b, nullptr, "ArbitrateUnlock32"}, |
| 2349 | {0x1C, SvcWrap<WaitProcessWideKeyAtomic>, "WaitProcessWideKeyAtomic"}, | 2412 | {0x1c, nullptr, "WaitProcessWideKeyAtomic32"}, |
| 2350 | {0x1D, SvcWrap<SignalProcessWideKey>, "SignalProcessWideKey"}, | 2413 | {0x1d, SvcWrap32<SignalProcessWideKey32>, "SignalProcessWideKey32"}, |
| 2351 | {0x1E, SvcWrap<GetSystemTick>, "GetSystemTick"}, | 2414 | {0x1e, nullptr, "GetSystemTick32"}, |
| 2352 | {0x1F, SvcWrap<ConnectToNamedPort>, "ConnectToNamedPort"}, | 2415 | {0x1f, SvcWrap32<ConnectToNamedPort32>, "ConnectToNamedPort32"}, |
| 2416 | {0x20, nullptr, "Unknown"}, | ||
| 2417 | {0x21, SvcWrap32<SendSyncRequest32>, "SendSyncRequest32"}, | ||
| 2418 | {0x22, nullptr, "SendSyncRequestWithUserBuffer32"}, | ||
| 2419 | {0x23, nullptr, "Unknown"}, | ||
| 2420 | {0x24, nullptr, "GetProcessId32"}, | ||
| 2421 | {0x25, SvcWrap32<GetThreadId32>, "GetThreadId32"}, | ||
| 2422 | {0x26, nullptr, "Break32"}, | ||
| 2423 | {0x27, nullptr, "OutputDebugString32"}, | ||
| 2424 | {0x28, nullptr, "Unknown"}, | ||
| 2425 | {0x29, SvcWrap32<GetInfo32>, "GetInfo32"}, | ||
| 2426 | {0x2a, nullptr, "Unknown"}, | ||
| 2427 | {0x2b, nullptr, "Unknown"}, | ||
| 2428 | {0x2c, nullptr, "MapPhysicalMemory32"}, | ||
| 2429 | {0x2d, nullptr, "UnmapPhysicalMemory32"}, | ||
| 2430 | {0x2e, nullptr, "Unknown"}, | ||
| 2431 | {0x2f, nullptr, "Unknown"}, | ||
| 2432 | {0x30, nullptr, "Unknown"}, | ||
| 2433 | {0x31, nullptr, "Unknown"}, | ||
| 2434 | {0x32, nullptr, "SetThreadActivity32"}, | ||
| 2435 | {0x33, nullptr, "GetThreadContext32"}, | ||
| 2436 | {0x34, nullptr, "WaitForAddress32"}, | ||
| 2437 | {0x35, nullptr, "SignalToAddress32"}, | ||
| 2438 | {0x36, nullptr, "Unknown"}, | ||
| 2439 | {0x37, nullptr, "Unknown"}, | ||
| 2440 | {0x38, nullptr, "Unknown"}, | ||
| 2441 | {0x39, nullptr, "Unknown"}, | ||
| 2442 | {0x3a, nullptr, "Unknown"}, | ||
| 2443 | {0x3b, nullptr, "Unknown"}, | ||
| 2444 | {0x3c, nullptr, "Unknown"}, | ||
| 2445 | {0x3d, nullptr, "Unknown"}, | ||
| 2446 | {0x3e, nullptr, "Unknown"}, | ||
| 2447 | {0x3f, nullptr, "Unknown"}, | ||
| 2448 | {0x40, nullptr, "CreateSession32"}, | ||
| 2449 | {0x41, nullptr, "AcceptSession32"}, | ||
| 2450 | {0x42, nullptr, "Unknown"}, | ||
| 2451 | {0x43, nullptr, "ReplyAndReceive32"}, | ||
| 2452 | {0x44, nullptr, "Unknown"}, | ||
| 2453 | {0x45, nullptr, "CreateEvent32"}, | ||
| 2454 | {0x46, nullptr, "Unknown"}, | ||
| 2455 | {0x47, nullptr, "Unknown"}, | ||
| 2456 | {0x48, nullptr, "Unknown"}, | ||
| 2457 | {0x49, nullptr, "Unknown"}, | ||
| 2458 | {0x4a, nullptr, "Unknown"}, | ||
| 2459 | {0x4b, nullptr, "Unknown"}, | ||
| 2460 | {0x4c, nullptr, "Unknown"}, | ||
| 2461 | {0x4d, nullptr, "Unknown"}, | ||
| 2462 | {0x4e, nullptr, "Unknown"}, | ||
| 2463 | {0x4f, nullptr, "Unknown"}, | ||
| 2464 | {0x50, nullptr, "Unknown"}, | ||
| 2465 | {0x51, nullptr, "Unknown"}, | ||
| 2466 | {0x52, nullptr, "Unknown"}, | ||
| 2467 | {0x53, nullptr, "Unknown"}, | ||
| 2468 | {0x54, nullptr, "Unknown"}, | ||
| 2469 | {0x55, nullptr, "Unknown"}, | ||
| 2470 | {0x56, nullptr, "Unknown"}, | ||
| 2471 | {0x57, nullptr, "Unknown"}, | ||
| 2472 | {0x58, nullptr, "Unknown"}, | ||
| 2473 | {0x59, nullptr, "Unknown"}, | ||
| 2474 | {0x5a, nullptr, "Unknown"}, | ||
| 2475 | {0x5b, nullptr, "Unknown"}, | ||
| 2476 | {0x5c, nullptr, "Unknown"}, | ||
| 2477 | {0x5d, nullptr, "Unknown"}, | ||
| 2478 | {0x5e, nullptr, "Unknown"}, | ||
| 2479 | {0x5F, nullptr, "FlushProcessDataCache32"}, | ||
| 2480 | {0x60, nullptr, "Unknown"}, | ||
| 2481 | {0x61, nullptr, "Unknown"}, | ||
| 2482 | {0x62, nullptr, "Unknown"}, | ||
| 2483 | {0x63, nullptr, "Unknown"}, | ||
| 2484 | {0x64, nullptr, "Unknown"}, | ||
| 2485 | {0x65, nullptr, "GetProcessList32"}, | ||
| 2486 | {0x66, nullptr, "Unknown"}, | ||
| 2487 | {0x67, nullptr, "Unknown"}, | ||
| 2488 | {0x68, nullptr, "Unknown"}, | ||
| 2489 | {0x69, nullptr, "Unknown"}, | ||
| 2490 | {0x6A, nullptr, "Unknown"}, | ||
| 2491 | {0x6B, nullptr, "Unknown"}, | ||
| 2492 | {0x6C, nullptr, "Unknown"}, | ||
| 2493 | {0x6D, nullptr, "Unknown"}, | ||
| 2494 | {0x6E, nullptr, "Unknown"}, | ||
| 2495 | {0x6f, nullptr, "GetSystemInfo32"}, | ||
| 2496 | {0x70, nullptr, "CreatePort32"}, | ||
| 2497 | {0x71, nullptr, "ManageNamedPort32"}, | ||
| 2498 | {0x72, nullptr, "ConnectToPort32"}, | ||
| 2499 | {0x73, nullptr, "SetProcessMemoryPermission32"}, | ||
| 2500 | {0x74, nullptr, "Unknown"}, | ||
| 2501 | {0x75, nullptr, "Unknown"}, | ||
| 2502 | {0x76, nullptr, "Unknown"}, | ||
| 2503 | {0x77, nullptr, "MapProcessCodeMemory32"}, | ||
| 2504 | {0x78, nullptr, "UnmapProcessCodeMemory32"}, | ||
| 2505 | {0x79, nullptr, "Unknown"}, | ||
| 2506 | {0x7A, nullptr, "Unknown"}, | ||
| 2507 | {0x7B, nullptr, "TerminateProcess32"}, | ||
| 2508 | }; | ||
| 2509 | |||
| 2510 | static const FunctionDef SVC_Table_64[] = { | ||
| 2511 | {0x00, nullptr, "Unknown"}, | ||
| 2512 | {0x01, SvcWrap64<SetHeapSize>, "SetHeapSize"}, | ||
| 2513 | {0x02, SvcWrap64<SetMemoryPermission>, "SetMemoryPermission"}, | ||
| 2514 | {0x03, SvcWrap64<SetMemoryAttribute>, "SetMemoryAttribute"}, | ||
| 2515 | {0x04, SvcWrap64<MapMemory>, "MapMemory"}, | ||
| 2516 | {0x05, SvcWrap64<UnmapMemory>, "UnmapMemory"}, | ||
| 2517 | {0x06, SvcWrap64<QueryMemory>, "QueryMemory"}, | ||
| 2518 | {0x07, SvcWrap64<ExitProcess>, "ExitProcess"}, | ||
| 2519 | {0x08, SvcWrap64<CreateThread>, "CreateThread"}, | ||
| 2520 | {0x09, SvcWrap64<StartThread>, "StartThread"}, | ||
| 2521 | {0x0A, SvcWrap64<ExitThread>, "ExitThread"}, | ||
| 2522 | {0x0B, SvcWrap64<SleepThread>, "SleepThread"}, | ||
| 2523 | {0x0C, SvcWrap64<GetThreadPriority>, "GetThreadPriority"}, | ||
| 2524 | {0x0D, SvcWrap64<SetThreadPriority>, "SetThreadPriority"}, | ||
| 2525 | {0x0E, SvcWrap64<GetThreadCoreMask>, "GetThreadCoreMask"}, | ||
| 2526 | {0x0F, SvcWrap64<SetThreadCoreMask>, "SetThreadCoreMask"}, | ||
| 2527 | {0x10, SvcWrap64<GetCurrentProcessorNumber>, "GetCurrentProcessorNumber"}, | ||
| 2528 | {0x11, SvcWrap64<SignalEvent>, "SignalEvent"}, | ||
| 2529 | {0x12, SvcWrap64<ClearEvent>, "ClearEvent"}, | ||
| 2530 | {0x13, SvcWrap64<MapSharedMemory>, "MapSharedMemory"}, | ||
| 2531 | {0x14, SvcWrap64<UnmapSharedMemory>, "UnmapSharedMemory"}, | ||
| 2532 | {0x15, SvcWrap64<CreateTransferMemory>, "CreateTransferMemory"}, | ||
| 2533 | {0x16, SvcWrap64<CloseHandle>, "CloseHandle"}, | ||
| 2534 | {0x17, SvcWrap64<ResetSignal>, "ResetSignal"}, | ||
| 2535 | {0x18, SvcWrap64<WaitSynchronization>, "WaitSynchronization"}, | ||
| 2536 | {0x19, SvcWrap64<CancelSynchronization>, "CancelSynchronization"}, | ||
| 2537 | {0x1A, SvcWrap64<ArbitrateLock>, "ArbitrateLock"}, | ||
| 2538 | {0x1B, SvcWrap64<ArbitrateUnlock>, "ArbitrateUnlock"}, | ||
| 2539 | {0x1C, SvcWrap64<WaitProcessWideKeyAtomic>, "WaitProcessWideKeyAtomic"}, | ||
| 2540 | {0x1D, SvcWrap64<SignalProcessWideKey>, "SignalProcessWideKey"}, | ||
| 2541 | {0x1E, SvcWrap64<GetSystemTick>, "GetSystemTick"}, | ||
| 2542 | {0x1F, SvcWrap64<ConnectToNamedPort>, "ConnectToNamedPort"}, | ||
| 2353 | {0x20, nullptr, "SendSyncRequestLight"}, | 2543 | {0x20, nullptr, "SendSyncRequestLight"}, |
| 2354 | {0x21, SvcWrap<SendSyncRequest>, "SendSyncRequest"}, | 2544 | {0x21, SvcWrap64<SendSyncRequest>, "SendSyncRequest"}, |
| 2355 | {0x22, nullptr, "SendSyncRequestWithUserBuffer"}, | 2545 | {0x22, nullptr, "SendSyncRequestWithUserBuffer"}, |
| 2356 | {0x23, nullptr, "SendAsyncRequestWithUserBuffer"}, | 2546 | {0x23, nullptr, "SendAsyncRequestWithUserBuffer"}, |
| 2357 | {0x24, SvcWrap<GetProcessId>, "GetProcessId"}, | 2547 | {0x24, SvcWrap64<GetProcessId>, "GetProcessId"}, |
| 2358 | {0x25, SvcWrap<GetThreadId>, "GetThreadId"}, | 2548 | {0x25, SvcWrap64<GetThreadId>, "GetThreadId"}, |
| 2359 | {0x26, SvcWrap<Break>, "Break"}, | 2549 | {0x26, SvcWrap64<Break>, "Break"}, |
| 2360 | {0x27, SvcWrap<OutputDebugString>, "OutputDebugString"}, | 2550 | {0x27, SvcWrap64<OutputDebugString>, "OutputDebugString"}, |
| 2361 | {0x28, nullptr, "ReturnFromException"}, | 2551 | {0x28, nullptr, "ReturnFromException"}, |
| 2362 | {0x29, SvcWrap<GetInfo>, "GetInfo"}, | 2552 | {0x29, SvcWrap64<GetInfo>, "GetInfo"}, |
| 2363 | {0x2A, nullptr, "FlushEntireDataCache"}, | 2553 | {0x2A, nullptr, "FlushEntireDataCache"}, |
| 2364 | {0x2B, nullptr, "FlushDataCache"}, | 2554 | {0x2B, nullptr, "FlushDataCache"}, |
| 2365 | {0x2C, SvcWrap<MapPhysicalMemory>, "MapPhysicalMemory"}, | 2555 | {0x2C, SvcWrap64<MapPhysicalMemory>, "MapPhysicalMemory"}, |
| 2366 | {0x2D, SvcWrap<UnmapPhysicalMemory>, "UnmapPhysicalMemory"}, | 2556 | {0x2D, SvcWrap64<UnmapPhysicalMemory>, "UnmapPhysicalMemory"}, |
| 2367 | {0x2E, nullptr, "GetFutureThreadInfo"}, | 2557 | {0x2E, nullptr, "GetFutureThreadInfo"}, |
| 2368 | {0x2F, nullptr, "GetLastThreadInfo"}, | 2558 | {0x2F, nullptr, "GetLastThreadInfo"}, |
| 2369 | {0x30, SvcWrap<GetResourceLimitLimitValue>, "GetResourceLimitLimitValue"}, | 2559 | {0x30, SvcWrap64<GetResourceLimitLimitValue>, "GetResourceLimitLimitValue"}, |
| 2370 | {0x31, SvcWrap<GetResourceLimitCurrentValue>, "GetResourceLimitCurrentValue"}, | 2560 | {0x31, SvcWrap64<GetResourceLimitCurrentValue>, "GetResourceLimitCurrentValue"}, |
| 2371 | {0x32, SvcWrap<SetThreadActivity>, "SetThreadActivity"}, | 2561 | {0x32, SvcWrap64<SetThreadActivity>, "SetThreadActivity"}, |
| 2372 | {0x33, SvcWrap<GetThreadContext>, "GetThreadContext"}, | 2562 | {0x33, SvcWrap64<GetThreadContext>, "GetThreadContext"}, |
| 2373 | {0x34, SvcWrap<WaitForAddress>, "WaitForAddress"}, | 2563 | {0x34, SvcWrap64<WaitForAddress>, "WaitForAddress"}, |
| 2374 | {0x35, SvcWrap<SignalToAddress>, "SignalToAddress"}, | 2564 | {0x35, SvcWrap64<SignalToAddress>, "SignalToAddress"}, |
| 2375 | {0x36, nullptr, "SynchronizePreemptionState"}, | 2565 | {0x36, nullptr, "SynchronizePreemptionState"}, |
| 2376 | {0x37, nullptr, "Unknown"}, | 2566 | {0x37, nullptr, "Unknown"}, |
| 2377 | {0x38, nullptr, "Unknown"}, | 2567 | {0x38, nullptr, "Unknown"}, |
| 2378 | {0x39, nullptr, "Unknown"}, | 2568 | {0x39, nullptr, "Unknown"}, |
| 2379 | {0x3A, nullptr, "Unknown"}, | 2569 | {0x3A, nullptr, "Unknown"}, |
| 2380 | {0x3B, nullptr, "Unknown"}, | 2570 | {0x3B, nullptr, "Unknown"}, |
| 2381 | {0x3C, SvcWrap<KernelDebug>, "KernelDebug"}, | 2571 | {0x3C, SvcWrap64<KernelDebug>, "KernelDebug"}, |
| 2382 | {0x3D, SvcWrap<ChangeKernelTraceState>, "ChangeKernelTraceState"}, | 2572 | {0x3D, SvcWrap64<ChangeKernelTraceState>, "ChangeKernelTraceState"}, |
| 2383 | {0x3E, nullptr, "Unknown"}, | 2573 | {0x3E, nullptr, "Unknown"}, |
| 2384 | {0x3F, nullptr, "Unknown"}, | 2574 | {0x3F, nullptr, "Unknown"}, |
| 2385 | {0x40, nullptr, "CreateSession"}, | 2575 | {0x40, nullptr, "CreateSession"}, |
| @@ -2387,7 +2577,7 @@ static const FunctionDef SVC_Table[] = { | |||
| 2387 | {0x42, nullptr, "ReplyAndReceiveLight"}, | 2577 | {0x42, nullptr, "ReplyAndReceiveLight"}, |
| 2388 | {0x43, nullptr, "ReplyAndReceive"}, | 2578 | {0x43, nullptr, "ReplyAndReceive"}, |
| 2389 | {0x44, nullptr, "ReplyAndReceiveWithUserBuffer"}, | 2579 | {0x44, nullptr, "ReplyAndReceiveWithUserBuffer"}, |
| 2390 | {0x45, SvcWrap<CreateEvent>, "CreateEvent"}, | 2580 | {0x45, SvcWrap64<CreateEvent>, "CreateEvent"}, |
| 2391 | {0x46, nullptr, "Unknown"}, | 2581 | {0x46, nullptr, "Unknown"}, |
| 2392 | {0x47, nullptr, "Unknown"}, | 2582 | {0x47, nullptr, "Unknown"}, |
| 2393 | {0x48, nullptr, "MapPhysicalMemoryUnsafe"}, | 2583 | {0x48, nullptr, "MapPhysicalMemoryUnsafe"}, |
| @@ -2398,9 +2588,9 @@ static const FunctionDef SVC_Table[] = { | |||
| 2398 | {0x4D, nullptr, "SleepSystem"}, | 2588 | {0x4D, nullptr, "SleepSystem"}, |
| 2399 | {0x4E, nullptr, "ReadWriteRegister"}, | 2589 | {0x4E, nullptr, "ReadWriteRegister"}, |
| 2400 | {0x4F, nullptr, "SetProcessActivity"}, | 2590 | {0x4F, nullptr, "SetProcessActivity"}, |
| 2401 | {0x50, SvcWrap<CreateSharedMemory>, "CreateSharedMemory"}, | 2591 | {0x50, SvcWrap64<CreateSharedMemory>, "CreateSharedMemory"}, |
| 2402 | {0x51, SvcWrap<MapTransferMemory>, "MapTransferMemory"}, | 2592 | {0x51, SvcWrap64<MapTransferMemory>, "MapTransferMemory"}, |
| 2403 | {0x52, SvcWrap<UnmapTransferMemory>, "UnmapTransferMemory"}, | 2593 | {0x52, SvcWrap64<UnmapTransferMemory>, "UnmapTransferMemory"}, |
| 2404 | {0x53, nullptr, "CreateInterruptEvent"}, | 2594 | {0x53, nullptr, "CreateInterruptEvent"}, |
| 2405 | {0x54, nullptr, "QueryPhysicalAddress"}, | 2595 | {0x54, nullptr, "QueryPhysicalAddress"}, |
| 2406 | {0x55, nullptr, "QueryIoMapping"}, | 2596 | {0x55, nullptr, "QueryIoMapping"}, |
| @@ -2419,8 +2609,8 @@ static const FunctionDef SVC_Table[] = { | |||
| 2419 | {0x62, nullptr, "TerminateDebugProcess"}, | 2609 | {0x62, nullptr, "TerminateDebugProcess"}, |
| 2420 | {0x63, nullptr, "GetDebugEvent"}, | 2610 | {0x63, nullptr, "GetDebugEvent"}, |
| 2421 | {0x64, nullptr, "ContinueDebugEvent"}, | 2611 | {0x64, nullptr, "ContinueDebugEvent"}, |
| 2422 | {0x65, SvcWrap<GetProcessList>, "GetProcessList"}, | 2612 | {0x65, SvcWrap64<GetProcessList>, "GetProcessList"}, |
| 2423 | {0x66, SvcWrap<GetThreadList>, "GetThreadList"}, | 2613 | {0x66, SvcWrap64<GetThreadList>, "GetThreadList"}, |
| 2424 | {0x67, nullptr, "GetDebugThreadContext"}, | 2614 | {0x67, nullptr, "GetDebugThreadContext"}, |
| 2425 | {0x68, nullptr, "SetDebugThreadContext"}, | 2615 | {0x68, nullptr, "SetDebugThreadContext"}, |
| 2426 | {0x69, nullptr, "QueryDebugProcessMemory"}, | 2616 | {0x69, nullptr, "QueryDebugProcessMemory"}, |
| @@ -2436,24 +2626,32 @@ static const FunctionDef SVC_Table[] = { | |||
| 2436 | {0x73, nullptr, "SetProcessMemoryPermission"}, | 2626 | {0x73, nullptr, "SetProcessMemoryPermission"}, |
| 2437 | {0x74, nullptr, "MapProcessMemory"}, | 2627 | {0x74, nullptr, "MapProcessMemory"}, |
| 2438 | {0x75, nullptr, "UnmapProcessMemory"}, | 2628 | {0x75, nullptr, "UnmapProcessMemory"}, |
| 2439 | {0x76, SvcWrap<QueryProcessMemory>, "QueryProcessMemory"}, | 2629 | {0x76, SvcWrap64<QueryProcessMemory>, "QueryProcessMemory"}, |
| 2440 | {0x77, SvcWrap<MapProcessCodeMemory>, "MapProcessCodeMemory"}, | 2630 | {0x77, SvcWrap64<MapProcessCodeMemory>, "MapProcessCodeMemory"}, |
| 2441 | {0x78, SvcWrap<UnmapProcessCodeMemory>, "UnmapProcessCodeMemory"}, | 2631 | {0x78, SvcWrap64<UnmapProcessCodeMemory>, "UnmapProcessCodeMemory"}, |
| 2442 | {0x79, nullptr, "CreateProcess"}, | 2632 | {0x79, nullptr, "CreateProcess"}, |
| 2443 | {0x7A, nullptr, "StartProcess"}, | 2633 | {0x7A, nullptr, "StartProcess"}, |
| 2444 | {0x7B, nullptr, "TerminateProcess"}, | 2634 | {0x7B, nullptr, "TerminateProcess"}, |
| 2445 | {0x7C, SvcWrap<GetProcessInfo>, "GetProcessInfo"}, | 2635 | {0x7C, SvcWrap64<GetProcessInfo>, "GetProcessInfo"}, |
| 2446 | {0x7D, SvcWrap<CreateResourceLimit>, "CreateResourceLimit"}, | 2636 | {0x7D, SvcWrap64<CreateResourceLimit>, "CreateResourceLimit"}, |
| 2447 | {0x7E, SvcWrap<SetResourceLimitLimitValue>, "SetResourceLimitLimitValue"}, | 2637 | {0x7E, SvcWrap64<SetResourceLimitLimitValue>, "SetResourceLimitLimitValue"}, |
| 2448 | {0x7F, nullptr, "CallSecureMonitor"}, | 2638 | {0x7F, nullptr, "CallSecureMonitor"}, |
| 2449 | }; | 2639 | }; |
| 2450 | 2640 | ||
| 2451 | static const FunctionDef* GetSVCInfo(u32 func_num) { | 2641 | static const FunctionDef* GetSVCInfo32(u32 func_num) { |
| 2452 | if (func_num >= std::size(SVC_Table)) { | 2642 | if (func_num >= std::size(SVC_Table_32)) { |
| 2643 | LOG_ERROR(Kernel_SVC, "Unknown svc=0x{:02X}", func_num); | ||
| 2644 | return nullptr; | ||
| 2645 | } | ||
| 2646 | return &SVC_Table_32[func_num]; | ||
| 2647 | } | ||
| 2648 | |||
| 2649 | static const FunctionDef* GetSVCInfo64(u32 func_num) { | ||
| 2650 | if (func_num >= std::size(SVC_Table_64)) { | ||
| 2453 | LOG_ERROR(Kernel_SVC, "Unknown svc=0x{:02X}", func_num); | 2651 | LOG_ERROR(Kernel_SVC, "Unknown svc=0x{:02X}", func_num); |
| 2454 | return nullptr; | 2652 | return nullptr; |
| 2455 | } | 2653 | } |
| 2456 | return &SVC_Table[func_num]; | 2654 | return &SVC_Table_64[func_num]; |
| 2457 | } | 2655 | } |
| 2458 | 2656 | ||
| 2459 | MICROPROFILE_DEFINE(Kernel_SVC, "Kernel", "SVC", MP_RGB(70, 200, 70)); | 2657 | MICROPROFILE_DEFINE(Kernel_SVC, "Kernel", "SVC", MP_RGB(70, 200, 70)); |
| @@ -2464,7 +2662,8 @@ void CallSVC(Core::System& system, u32 immediate) { | |||
| 2464 | // Lock the global kernel mutex when we enter the kernel HLE. | 2662 | // Lock the global kernel mutex when we enter the kernel HLE. |
| 2465 | std::lock_guard lock{HLE::g_hle_lock}; | 2663 | std::lock_guard lock{HLE::g_hle_lock}; |
| 2466 | 2664 | ||
| 2467 | const FunctionDef* info = GetSVCInfo(immediate); | 2665 | const FunctionDef* info = system.CurrentProcess()->Is64BitProcess() ? GetSVCInfo64(immediate) |
| 2666 | : GetSVCInfo32(immediate); | ||
| 2468 | if (info) { | 2667 | if (info) { |
| 2469 | if (info->func) { | 2668 | if (info->func) { |
| 2470 | info->func(system); | 2669 | info->func(system); |
diff --git a/src/core/hle/kernel/svc_wrap.h b/src/core/hle/kernel/svc_wrap.h index 29a2cfa9d..7d735e3fa 100644 --- a/src/core/hle/kernel/svc_wrap.h +++ b/src/core/hle/kernel/svc_wrap.h | |||
| @@ -15,6 +15,10 @@ static inline u64 Param(const Core::System& system, int n) { | |||
| 15 | return system.CurrentArmInterface().GetReg(n); | 15 | return system.CurrentArmInterface().GetReg(n); |
| 16 | } | 16 | } |
| 17 | 17 | ||
| 18 | static inline u32 Param32(const Core::System& system, int n) { | ||
| 19 | return static_cast<u32>(system.CurrentArmInterface().GetReg(n)); | ||
| 20 | } | ||
| 21 | |||
| 18 | /** | 22 | /** |
| 19 | * HLE a function return from the current ARM userland process | 23 | * HLE a function return from the current ARM userland process |
| 20 | * @param system System context | 24 | * @param system System context |
| @@ -24,40 +28,44 @@ static inline void FuncReturn(Core::System& system, u64 result) { | |||
| 24 | system.CurrentArmInterface().SetReg(0, result); | 28 | system.CurrentArmInterface().SetReg(0, result); |
| 25 | } | 29 | } |
| 26 | 30 | ||
| 31 | static inline void FuncReturn32(Core::System& system, u32 result) { | ||
| 32 | system.CurrentArmInterface().SetReg(0, (u64)result); | ||
| 33 | } | ||
| 34 | |||
| 27 | //////////////////////////////////////////////////////////////////////////////////////////////////// | 35 | //////////////////////////////////////////////////////////////////////////////////////////////////// |
| 28 | // Function wrappers that return type ResultCode | 36 | // Function wrappers that return type ResultCode |
| 29 | 37 | ||
| 30 | template <ResultCode func(Core::System&, u64)> | 38 | template <ResultCode func(Core::System&, u64)> |
| 31 | void SvcWrap(Core::System& system) { | 39 | void SvcWrap64(Core::System& system) { |
| 32 | FuncReturn(system, func(system, Param(system, 0)).raw); | 40 | FuncReturn(system, func(system, Param(system, 0)).raw); |
| 33 | } | 41 | } |
| 34 | 42 | ||
| 35 | template <ResultCode func(Core::System&, u64, u64)> | 43 | template <ResultCode func(Core::System&, u64, u64)> |
| 36 | void SvcWrap(Core::System& system) { | 44 | void SvcWrap64(Core::System& system) { |
| 37 | FuncReturn(system, func(system, Param(system, 0), Param(system, 1)).raw); | 45 | FuncReturn(system, func(system, Param(system, 0), Param(system, 1)).raw); |
| 38 | } | 46 | } |
| 39 | 47 | ||
| 40 | template <ResultCode func(Core::System&, u32)> | 48 | template <ResultCode func(Core::System&, u32)> |
| 41 | void SvcWrap(Core::System& system) { | 49 | void SvcWrap64(Core::System& system) { |
| 42 | FuncReturn(system, func(system, static_cast<u32>(Param(system, 0))).raw); | 50 | FuncReturn(system, func(system, static_cast<u32>(Param(system, 0))).raw); |
| 43 | } | 51 | } |
| 44 | 52 | ||
| 45 | template <ResultCode func(Core::System&, u32, u32)> | 53 | template <ResultCode func(Core::System&, u32, u32)> |
| 46 | void SvcWrap(Core::System& system) { | 54 | void SvcWrap64(Core::System& system) { |
| 47 | FuncReturn( | 55 | FuncReturn( |
| 48 | system, | 56 | system, |
| 49 | func(system, static_cast<u32>(Param(system, 0)), static_cast<u32>(Param(system, 1))).raw); | 57 | func(system, static_cast<u32>(Param(system, 0)), static_cast<u32>(Param(system, 1))).raw); |
| 50 | } | 58 | } |
| 51 | 59 | ||
| 52 | template <ResultCode func(Core::System&, u32, u64, u64, u64)> | 60 | template <ResultCode func(Core::System&, u32, u64, u64, u64)> |
| 53 | void SvcWrap(Core::System& system) { | 61 | void SvcWrap64(Core::System& system) { |
| 54 | FuncReturn(system, func(system, static_cast<u32>(Param(system, 0)), Param(system, 1), | 62 | FuncReturn(system, func(system, static_cast<u32>(Param(system, 0)), Param(system, 1), |
| 55 | Param(system, 2), Param(system, 3)) | 63 | Param(system, 2), Param(system, 3)) |
| 56 | .raw); | 64 | .raw); |
| 57 | } | 65 | } |
| 58 | 66 | ||
| 59 | template <ResultCode func(Core::System&, u32*)> | 67 | template <ResultCode func(Core::System&, u32*)> |
| 60 | void SvcWrap(Core::System& system) { | 68 | void SvcWrap64(Core::System& system) { |
| 61 | u32 param = 0; | 69 | u32 param = 0; |
| 62 | const u32 retval = func(system, ¶m).raw; | 70 | const u32 retval = func(system, ¶m).raw; |
| 63 | system.CurrentArmInterface().SetReg(1, param); | 71 | system.CurrentArmInterface().SetReg(1, param); |
| @@ -65,7 +73,7 @@ void SvcWrap(Core::System& system) { | |||
| 65 | } | 73 | } |
| 66 | 74 | ||
| 67 | template <ResultCode func(Core::System&, u32*, u32)> | 75 | template <ResultCode func(Core::System&, u32*, u32)> |
| 68 | void SvcWrap(Core::System& system) { | 76 | void SvcWrap64(Core::System& system) { |
| 69 | u32 param_1 = 0; | 77 | u32 param_1 = 0; |
| 70 | const u32 retval = func(system, ¶m_1, static_cast<u32>(Param(system, 1))).raw; | 78 | const u32 retval = func(system, ¶m_1, static_cast<u32>(Param(system, 1))).raw; |
| 71 | system.CurrentArmInterface().SetReg(1, param_1); | 79 | system.CurrentArmInterface().SetReg(1, param_1); |
| @@ -73,7 +81,7 @@ void SvcWrap(Core::System& system) { | |||
| 73 | } | 81 | } |
| 74 | 82 | ||
| 75 | template <ResultCode func(Core::System&, u32*, u32*)> | 83 | template <ResultCode func(Core::System&, u32*, u32*)> |
| 76 | void SvcWrap(Core::System& system) { | 84 | void SvcWrap64(Core::System& system) { |
| 77 | u32 param_1 = 0; | 85 | u32 param_1 = 0; |
| 78 | u32 param_2 = 0; | 86 | u32 param_2 = 0; |
| 79 | const u32 retval = func(system, ¶m_1, ¶m_2).raw; | 87 | const u32 retval = func(system, ¶m_1, ¶m_2).raw; |
| @@ -86,7 +94,7 @@ void SvcWrap(Core::System& system) { | |||
| 86 | } | 94 | } |
| 87 | 95 | ||
| 88 | template <ResultCode func(Core::System&, u32*, u64)> | 96 | template <ResultCode func(Core::System&, u32*, u64)> |
| 89 | void SvcWrap(Core::System& system) { | 97 | void SvcWrap64(Core::System& system) { |
| 90 | u32 param_1 = 0; | 98 | u32 param_1 = 0; |
| 91 | const u32 retval = func(system, ¶m_1, Param(system, 1)).raw; | 99 | const u32 retval = func(system, ¶m_1, Param(system, 1)).raw; |
| 92 | system.CurrentArmInterface().SetReg(1, param_1); | 100 | system.CurrentArmInterface().SetReg(1, param_1); |
| @@ -94,7 +102,7 @@ void SvcWrap(Core::System& system) { | |||
| 94 | } | 102 | } |
| 95 | 103 | ||
| 96 | template <ResultCode func(Core::System&, u32*, u64, u32)> | 104 | template <ResultCode func(Core::System&, u32*, u64, u32)> |
| 97 | void SvcWrap(Core::System& system) { | 105 | void SvcWrap64(Core::System& system) { |
| 98 | u32 param_1 = 0; | 106 | u32 param_1 = 0; |
| 99 | const u32 retval = | 107 | const u32 retval = |
| 100 | func(system, ¶m_1, Param(system, 1), static_cast<u32>(Param(system, 2))).raw; | 108 | func(system, ¶m_1, Param(system, 1), static_cast<u32>(Param(system, 2))).raw; |
| @@ -104,7 +112,7 @@ void SvcWrap(Core::System& system) { | |||
| 104 | } | 112 | } |
| 105 | 113 | ||
| 106 | template <ResultCode func(Core::System&, u64*, u32)> | 114 | template <ResultCode func(Core::System&, u64*, u32)> |
| 107 | void SvcWrap(Core::System& system) { | 115 | void SvcWrap64(Core::System& system) { |
| 108 | u64 param_1 = 0; | 116 | u64 param_1 = 0; |
| 109 | const u32 retval = func(system, ¶m_1, static_cast<u32>(Param(system, 1))).raw; | 117 | const u32 retval = func(system, ¶m_1, static_cast<u32>(Param(system, 1))).raw; |
| 110 | 118 | ||
| @@ -113,12 +121,12 @@ void SvcWrap(Core::System& system) { | |||
| 113 | } | 121 | } |
| 114 | 122 | ||
| 115 | template <ResultCode func(Core::System&, u64, u32)> | 123 | template <ResultCode func(Core::System&, u64, u32)> |
| 116 | void SvcWrap(Core::System& system) { | 124 | void SvcWrap64(Core::System& system) { |
| 117 | FuncReturn(system, func(system, Param(system, 0), static_cast<u32>(Param(system, 1))).raw); | 125 | FuncReturn(system, func(system, Param(system, 0), static_cast<u32>(Param(system, 1))).raw); |
| 118 | } | 126 | } |
| 119 | 127 | ||
| 120 | template <ResultCode func(Core::System&, u64*, u64)> | 128 | template <ResultCode func(Core::System&, u64*, u64)> |
| 121 | void SvcWrap(Core::System& system) { | 129 | void SvcWrap64(Core::System& system) { |
| 122 | u64 param_1 = 0; | 130 | u64 param_1 = 0; |
| 123 | const u32 retval = func(system, ¶m_1, Param(system, 1)).raw; | 131 | const u32 retval = func(system, ¶m_1, Param(system, 1)).raw; |
| 124 | 132 | ||
| @@ -127,7 +135,7 @@ void SvcWrap(Core::System& system) { | |||
| 127 | } | 135 | } |
| 128 | 136 | ||
| 129 | template <ResultCode func(Core::System&, u64*, u32, u32)> | 137 | template <ResultCode func(Core::System&, u64*, u32, u32)> |
| 130 | void SvcWrap(Core::System& system) { | 138 | void SvcWrap64(Core::System& system) { |
| 131 | u64 param_1 = 0; | 139 | u64 param_1 = 0; |
| 132 | const u32 retval = func(system, ¶m_1, static_cast<u32>(Param(system, 1)), | 140 | const u32 retval = func(system, ¶m_1, static_cast<u32>(Param(system, 1)), |
| 133 | static_cast<u32>(Param(system, 2))) | 141 | static_cast<u32>(Param(system, 2))) |
| @@ -138,19 +146,19 @@ void SvcWrap(Core::System& system) { | |||
| 138 | } | 146 | } |
| 139 | 147 | ||
| 140 | template <ResultCode func(Core::System&, u32, u64)> | 148 | template <ResultCode func(Core::System&, u32, u64)> |
| 141 | void SvcWrap(Core::System& system) { | 149 | void SvcWrap64(Core::System& system) { |
| 142 | FuncReturn(system, func(system, static_cast<u32>(Param(system, 0)), Param(system, 1)).raw); | 150 | FuncReturn(system, func(system, static_cast<u32>(Param(system, 0)), Param(system, 1)).raw); |
| 143 | } | 151 | } |
| 144 | 152 | ||
| 145 | template <ResultCode func(Core::System&, u32, u32, u64)> | 153 | template <ResultCode func(Core::System&, u32, u32, u64)> |
| 146 | void SvcWrap(Core::System& system) { | 154 | void SvcWrap64(Core::System& system) { |
| 147 | FuncReturn(system, func(system, static_cast<u32>(Param(system, 0)), | 155 | FuncReturn(system, func(system, static_cast<u32>(Param(system, 0)), |
| 148 | static_cast<u32>(Param(system, 1)), Param(system, 2)) | 156 | static_cast<u32>(Param(system, 1)), Param(system, 2)) |
| 149 | .raw); | 157 | .raw); |
| 150 | } | 158 | } |
| 151 | 159 | ||
| 152 | template <ResultCode func(Core::System&, u32, u32*, u64*)> | 160 | template <ResultCode func(Core::System&, u32, u32*, u64*)> |
| 153 | void SvcWrap(Core::System& system) { | 161 | void SvcWrap64(Core::System& system) { |
| 154 | u32 param_1 = 0; | 162 | u32 param_1 = 0; |
| 155 | u64 param_2 = 0; | 163 | u64 param_2 = 0; |
| 156 | const ResultCode retval = func(system, static_cast<u32>(Param(system, 2)), ¶m_1, ¶m_2); | 164 | const ResultCode retval = func(system, static_cast<u32>(Param(system, 2)), ¶m_1, ¶m_2); |
| @@ -161,54 +169,54 @@ void SvcWrap(Core::System& system) { | |||
| 161 | } | 169 | } |
| 162 | 170 | ||
| 163 | template <ResultCode func(Core::System&, u64, u64, u32, u32)> | 171 | template <ResultCode func(Core::System&, u64, u64, u32, u32)> |
| 164 | void SvcWrap(Core::System& system) { | 172 | void SvcWrap64(Core::System& system) { |
| 165 | FuncReturn(system, func(system, Param(system, 0), Param(system, 1), | 173 | FuncReturn(system, func(system, Param(system, 0), Param(system, 1), |
| 166 | static_cast<u32>(Param(system, 2)), static_cast<u32>(Param(system, 3))) | 174 | static_cast<u32>(Param(system, 2)), static_cast<u32>(Param(system, 3))) |
| 167 | .raw); | 175 | .raw); |
| 168 | } | 176 | } |
| 169 | 177 | ||
| 170 | template <ResultCode func(Core::System&, u64, u64, u32, u64)> | 178 | template <ResultCode func(Core::System&, u64, u64, u32, u64)> |
| 171 | void SvcWrap(Core::System& system) { | 179 | void SvcWrap64(Core::System& system) { |
| 172 | FuncReturn(system, func(system, Param(system, 0), Param(system, 1), | 180 | FuncReturn(system, func(system, Param(system, 0), Param(system, 1), |
| 173 | static_cast<u32>(Param(system, 2)), Param(system, 3)) | 181 | static_cast<u32>(Param(system, 2)), Param(system, 3)) |
| 174 | .raw); | 182 | .raw); |
| 175 | } | 183 | } |
| 176 | 184 | ||
| 177 | template <ResultCode func(Core::System&, u32, u64, u32)> | 185 | template <ResultCode func(Core::System&, u32, u64, u32)> |
| 178 | void SvcWrap(Core::System& system) { | 186 | void SvcWrap64(Core::System& system) { |
| 179 | FuncReturn(system, func(system, static_cast<u32>(Param(system, 0)), Param(system, 1), | 187 | FuncReturn(system, func(system, static_cast<u32>(Param(system, 0)), Param(system, 1), |
| 180 | static_cast<u32>(Param(system, 2))) | 188 | static_cast<u32>(Param(system, 2))) |
| 181 | .raw); | 189 | .raw); |
| 182 | } | 190 | } |
| 183 | 191 | ||
| 184 | template <ResultCode func(Core::System&, u64, u64, u64)> | 192 | template <ResultCode func(Core::System&, u64, u64, u64)> |
| 185 | void SvcWrap(Core::System& system) { | 193 | void SvcWrap64(Core::System& system) { |
| 186 | FuncReturn(system, func(system, Param(system, 0), Param(system, 1), Param(system, 2)).raw); | 194 | FuncReturn(system, func(system, Param(system, 0), Param(system, 1), Param(system, 2)).raw); |
| 187 | } | 195 | } |
| 188 | 196 | ||
| 189 | template <ResultCode func(Core::System&, u64, u64, u32)> | 197 | template <ResultCode func(Core::System&, u64, u64, u32)> |
| 190 | void SvcWrap(Core::System& system) { | 198 | void SvcWrap64(Core::System& system) { |
| 191 | FuncReturn( | 199 | FuncReturn( |
| 192 | system, | 200 | system, |
| 193 | func(system, Param(system, 0), Param(system, 1), static_cast<u32>(Param(system, 2))).raw); | 201 | func(system, Param(system, 0), Param(system, 1), static_cast<u32>(Param(system, 2))).raw); |
| 194 | } | 202 | } |
| 195 | 203 | ||
| 196 | template <ResultCode func(Core::System&, u32, u64, u64, u32)> | 204 | template <ResultCode func(Core::System&, u32, u64, u64, u32)> |
| 197 | void SvcWrap(Core::System& system) { | 205 | void SvcWrap64(Core::System& system) { |
| 198 | FuncReturn(system, func(system, static_cast<u32>(Param(system, 0)), Param(system, 1), | 206 | FuncReturn(system, func(system, static_cast<u32>(Param(system, 0)), Param(system, 1), |
| 199 | Param(system, 2), static_cast<u32>(Param(system, 3))) | 207 | Param(system, 2), static_cast<u32>(Param(system, 3))) |
| 200 | .raw); | 208 | .raw); |
| 201 | } | 209 | } |
| 202 | 210 | ||
| 203 | template <ResultCode func(Core::System&, u32, u64, u64)> | 211 | template <ResultCode func(Core::System&, u32, u64, u64)> |
| 204 | void SvcWrap(Core::System& system) { | 212 | void SvcWrap64(Core::System& system) { |
| 205 | FuncReturn( | 213 | FuncReturn( |
| 206 | system, | 214 | system, |
| 207 | func(system, static_cast<u32>(Param(system, 0)), Param(system, 1), Param(system, 2)).raw); | 215 | func(system, static_cast<u32>(Param(system, 0)), Param(system, 1), Param(system, 2)).raw); |
| 208 | } | 216 | } |
| 209 | 217 | ||
| 210 | template <ResultCode func(Core::System&, u32*, u64, u64, s64)> | 218 | template <ResultCode func(Core::System&, u32*, u64, u64, s64)> |
| 211 | void SvcWrap(Core::System& system) { | 219 | void SvcWrap64(Core::System& system) { |
| 212 | u32 param_1 = 0; | 220 | u32 param_1 = 0; |
| 213 | const u32 retval = func(system, ¶m_1, Param(system, 1), static_cast<u32>(Param(system, 2)), | 221 | const u32 retval = func(system, ¶m_1, Param(system, 1), static_cast<u32>(Param(system, 2)), |
| 214 | static_cast<s64>(Param(system, 3))) | 222 | static_cast<s64>(Param(system, 3))) |
| @@ -219,14 +227,14 @@ void SvcWrap(Core::System& system) { | |||
| 219 | } | 227 | } |
| 220 | 228 | ||
| 221 | template <ResultCode func(Core::System&, u64, u64, u32, s64)> | 229 | template <ResultCode func(Core::System&, u64, u64, u32, s64)> |
| 222 | void SvcWrap(Core::System& system) { | 230 | void SvcWrap64(Core::System& system) { |
| 223 | FuncReturn(system, func(system, Param(system, 0), Param(system, 1), | 231 | FuncReturn(system, func(system, Param(system, 0), Param(system, 1), |
| 224 | static_cast<u32>(Param(system, 2)), static_cast<s64>(Param(system, 3))) | 232 | static_cast<u32>(Param(system, 2)), static_cast<s64>(Param(system, 3))) |
| 225 | .raw); | 233 | .raw); |
| 226 | } | 234 | } |
| 227 | 235 | ||
| 228 | template <ResultCode func(Core::System&, u64*, u64, u64, u64)> | 236 | template <ResultCode func(Core::System&, u64*, u64, u64, u64)> |
| 229 | void SvcWrap(Core::System& system) { | 237 | void SvcWrap64(Core::System& system) { |
| 230 | u64 param_1 = 0; | 238 | u64 param_1 = 0; |
| 231 | const u32 retval = | 239 | const u32 retval = |
| 232 | func(system, ¶m_1, Param(system, 1), Param(system, 2), Param(system, 3)).raw; | 240 | func(system, ¶m_1, Param(system, 1), Param(system, 2), Param(system, 3)).raw; |
| @@ -236,7 +244,7 @@ void SvcWrap(Core::System& system) { | |||
| 236 | } | 244 | } |
| 237 | 245 | ||
| 238 | template <ResultCode func(Core::System&, u32*, u64, u64, u64, u32, s32)> | 246 | template <ResultCode func(Core::System&, u32*, u64, u64, u64, u32, s32)> |
| 239 | void SvcWrap(Core::System& system) { | 247 | void SvcWrap64(Core::System& system) { |
| 240 | u32 param_1 = 0; | 248 | u32 param_1 = 0; |
| 241 | const u32 retval = func(system, ¶m_1, Param(system, 1), Param(system, 2), Param(system, 3), | 249 | const u32 retval = func(system, ¶m_1, Param(system, 1), Param(system, 2), Param(system, 3), |
| 242 | static_cast<u32>(Param(system, 4)), static_cast<s32>(Param(system, 5))) | 250 | static_cast<u32>(Param(system, 4)), static_cast<s32>(Param(system, 5))) |
| @@ -247,7 +255,7 @@ void SvcWrap(Core::System& system) { | |||
| 247 | } | 255 | } |
| 248 | 256 | ||
| 249 | template <ResultCode func(Core::System&, u32*, u64, u64, u32)> | 257 | template <ResultCode func(Core::System&, u32*, u64, u64, u32)> |
| 250 | void SvcWrap(Core::System& system) { | 258 | void SvcWrap64(Core::System& system) { |
| 251 | u32 param_1 = 0; | 259 | u32 param_1 = 0; |
| 252 | const u32 retval = func(system, ¶m_1, Param(system, 1), Param(system, 2), | 260 | const u32 retval = func(system, ¶m_1, Param(system, 1), Param(system, 2), |
| 253 | static_cast<u32>(Param(system, 3))) | 261 | static_cast<u32>(Param(system, 3))) |
| @@ -258,7 +266,7 @@ void SvcWrap(Core::System& system) { | |||
| 258 | } | 266 | } |
| 259 | 267 | ||
| 260 | template <ResultCode func(Core::System&, Handle*, u64, u32, u32)> | 268 | template <ResultCode func(Core::System&, Handle*, u64, u32, u32)> |
| 261 | void SvcWrap(Core::System& system) { | 269 | void SvcWrap64(Core::System& system) { |
| 262 | u32 param_1 = 0; | 270 | u32 param_1 = 0; |
| 263 | const u32 retval = func(system, ¶m_1, Param(system, 1), static_cast<u32>(Param(system, 2)), | 271 | const u32 retval = func(system, ¶m_1, Param(system, 1), static_cast<u32>(Param(system, 2)), |
| 264 | static_cast<u32>(Param(system, 3))) | 272 | static_cast<u32>(Param(system, 3))) |
| @@ -269,14 +277,14 @@ void SvcWrap(Core::System& system) { | |||
| 269 | } | 277 | } |
| 270 | 278 | ||
| 271 | template <ResultCode func(Core::System&, u64, u32, s32, s64)> | 279 | template <ResultCode func(Core::System&, u64, u32, s32, s64)> |
| 272 | void SvcWrap(Core::System& system) { | 280 | void SvcWrap64(Core::System& system) { |
| 273 | FuncReturn(system, func(system, Param(system, 0), static_cast<u32>(Param(system, 1)), | 281 | FuncReturn(system, func(system, Param(system, 0), static_cast<u32>(Param(system, 1)), |
| 274 | static_cast<s32>(Param(system, 2)), static_cast<s64>(Param(system, 3))) | 282 | static_cast<s32>(Param(system, 2)), static_cast<s64>(Param(system, 3))) |
| 275 | .raw); | 283 | .raw); |
| 276 | } | 284 | } |
| 277 | 285 | ||
| 278 | template <ResultCode func(Core::System&, u64, u32, s32, s32)> | 286 | template <ResultCode func(Core::System&, u64, u32, s32, s32)> |
| 279 | void SvcWrap(Core::System& system) { | 287 | void SvcWrap64(Core::System& system) { |
| 280 | FuncReturn(system, func(system, Param(system, 0), static_cast<u32>(Param(system, 1)), | 288 | FuncReturn(system, func(system, Param(system, 0), static_cast<u32>(Param(system, 1)), |
| 281 | static_cast<s32>(Param(system, 2)), static_cast<s32>(Param(system, 3))) | 289 | static_cast<s32>(Param(system, 2)), static_cast<s32>(Param(system, 3))) |
| 282 | .raw); | 290 | .raw); |
| @@ -286,7 +294,7 @@ void SvcWrap(Core::System& system) { | |||
| 286 | // Function wrappers that return type u32 | 294 | // Function wrappers that return type u32 |
| 287 | 295 | ||
| 288 | template <u32 func(Core::System&)> | 296 | template <u32 func(Core::System&)> |
| 289 | void SvcWrap(Core::System& system) { | 297 | void SvcWrap64(Core::System& system) { |
| 290 | FuncReturn(system, func(system)); | 298 | FuncReturn(system, func(system)); |
| 291 | } | 299 | } |
| 292 | 300 | ||
| @@ -294,7 +302,7 @@ void SvcWrap(Core::System& system) { | |||
| 294 | // Function wrappers that return type u64 | 302 | // Function wrappers that return type u64 |
| 295 | 303 | ||
| 296 | template <u64 func(Core::System&)> | 304 | template <u64 func(Core::System&)> |
| 297 | void SvcWrap(Core::System& system) { | 305 | void SvcWrap64(Core::System& system) { |
| 298 | FuncReturn(system, func(system)); | 306 | FuncReturn(system, func(system)); |
| 299 | } | 307 | } |
| 300 | 308 | ||
| @@ -302,44 +310,110 @@ void SvcWrap(Core::System& system) { | |||
| 302 | /// Function wrappers that return type void | 310 | /// Function wrappers that return type void |
| 303 | 311 | ||
| 304 | template <void func(Core::System&)> | 312 | template <void func(Core::System&)> |
| 305 | void SvcWrap(Core::System& system) { | 313 | void SvcWrap64(Core::System& system) { |
| 306 | func(system); | 314 | func(system); |
| 307 | } | 315 | } |
| 308 | 316 | ||
| 309 | template <void func(Core::System&, u32)> | 317 | template <void func(Core::System&, u32)> |
| 310 | void SvcWrap(Core::System& system) { | 318 | void SvcWrap64(Core::System& system) { |
| 311 | func(system, static_cast<u32>(Param(system, 0))); | 319 | func(system, static_cast<u32>(Param(system, 0))); |
| 312 | } | 320 | } |
| 313 | 321 | ||
| 314 | template <void func(Core::System&, u32, u64, u64, u64)> | 322 | template <void func(Core::System&, u32, u64, u64, u64)> |
| 315 | void SvcWrap(Core::System& system) { | 323 | void SvcWrap64(Core::System& system) { |
| 316 | func(system, static_cast<u32>(Param(system, 0)), Param(system, 1), Param(system, 2), | 324 | func(system, static_cast<u32>(Param(system, 0)), Param(system, 1), Param(system, 2), |
| 317 | Param(system, 3)); | 325 | Param(system, 3)); |
| 318 | } | 326 | } |
| 319 | 327 | ||
| 320 | template <void func(Core::System&, s64)> | 328 | template <void func(Core::System&, s64)> |
| 321 | void SvcWrap(Core::System& system) { | 329 | void SvcWrap64(Core::System& system) { |
| 322 | func(system, static_cast<s64>(Param(system, 0))); | 330 | func(system, static_cast<s64>(Param(system, 0))); |
| 323 | } | 331 | } |
| 324 | 332 | ||
| 325 | template <void func(Core::System&, u64, s32)> | 333 | template <void func(Core::System&, u64, s32)> |
| 326 | void SvcWrap(Core::System& system) { | 334 | void SvcWrap64(Core::System& system) { |
| 327 | func(system, Param(system, 0), static_cast<s32>(Param(system, 1))); | 335 | func(system, Param(system, 0), static_cast<s32>(Param(system, 1))); |
| 328 | } | 336 | } |
| 329 | 337 | ||
| 330 | template <void func(Core::System&, u64, u64)> | 338 | template <void func(Core::System&, u64, u64)> |
| 331 | void SvcWrap(Core::System& system) { | 339 | void SvcWrap64(Core::System& system) { |
| 332 | func(system, Param(system, 0), Param(system, 1)); | 340 | func(system, Param(system, 0), Param(system, 1)); |
| 333 | } | 341 | } |
| 334 | 342 | ||
| 335 | template <void func(Core::System&, u64, u64, u64)> | 343 | template <void func(Core::System&, u64, u64, u64)> |
| 336 | void SvcWrap(Core::System& system) { | 344 | void SvcWrap64(Core::System& system) { |
| 337 | func(system, Param(system, 0), Param(system, 1), Param(system, 2)); | 345 | func(system, Param(system, 0), Param(system, 1), Param(system, 2)); |
| 338 | } | 346 | } |
| 339 | 347 | ||
| 340 | template <void func(Core::System&, u32, u64, u64)> | 348 | template <void func(Core::System&, u32, u64, u64)> |
| 341 | void SvcWrap(Core::System& system) { | 349 | void SvcWrap64(Core::System& system) { |
| 342 | func(system, static_cast<u32>(Param(system, 0)), Param(system, 1), Param(system, 2)); | 350 | func(system, static_cast<u32>(Param(system, 0)), Param(system, 1), Param(system, 2)); |
| 343 | } | 351 | } |
| 344 | 352 | ||
| 353 | // Used by QueryMemory32 | ||
| 354 | template <ResultCode func(Core::System&, u32, u32, u32)> | ||
| 355 | void SvcWrap32(Core::System& system) { | ||
| 356 | FuncReturn32(system, | ||
| 357 | func(system, Param32(system, 0), Param32(system, 1), Param32(system, 2)).raw); | ||
| 358 | } | ||
| 359 | |||
| 360 | // Used by GetInfo32 | ||
| 361 | template <ResultCode func(Core::System&, u32*, u32*, u32, u32, u32, u32)> | ||
| 362 | void SvcWrap32(Core::System& system) { | ||
| 363 | u32 param_1 = 0; | ||
| 364 | u32 param_2 = 0; | ||
| 365 | |||
| 366 | const u32 retval = func(system, ¶m_1, ¶m_2, Param32(system, 0), Param32(system, 1), | ||
| 367 | Param32(system, 2), Param32(system, 3)) | ||
| 368 | .raw; | ||
| 369 | |||
| 370 | system.CurrentArmInterface().SetReg(1, param_1); | ||
| 371 | system.CurrentArmInterface().SetReg(2, param_2); | ||
| 372 | FuncReturn(system, retval); | ||
| 373 | } | ||
| 374 | |||
| 375 | // Used by GetThreadPriority32, ConnectToNamedPort32 | ||
| 376 | template <ResultCode func(Core::System&, u32*, u32)> | ||
| 377 | void SvcWrap32(Core::System& system) { | ||
| 378 | u32 param_1 = 0; | ||
| 379 | const u32 retval = func(system, ¶m_1, Param32(system, 1)).raw; | ||
| 380 | system.CurrentArmInterface().SetReg(1, param_1); | ||
| 381 | FuncReturn(system, retval); | ||
| 382 | } | ||
| 383 | |||
| 384 | // Used by GetThreadId32 | ||
| 385 | template <ResultCode func(Core::System&, u32*, u32*, u32)> | ||
| 386 | void SvcWrap32(Core::System& system) { | ||
| 387 | u32 param_1 = 0; | ||
| 388 | u32 param_2 = 0; | ||
| 389 | |||
| 390 | const u32 retval = func(system, ¶m_1, ¶m_2, Param32(system, 1)).raw; | ||
| 391 | system.CurrentArmInterface().SetReg(1, param_1); | ||
| 392 | system.CurrentArmInterface().SetReg(2, param_2); | ||
| 393 | FuncReturn(system, retval); | ||
| 394 | } | ||
| 395 | |||
| 396 | // Used by SignalProcessWideKey32 | ||
| 397 | template <void func(Core::System&, u32, s32)> | ||
| 398 | void SvcWrap32(Core::System& system) { | ||
| 399 | func(system, static_cast<u32>(Param(system, 0)), static_cast<s32>(Param(system, 1))); | ||
| 400 | } | ||
| 401 | |||
| 402 | // Used by SendSyncRequest32 | ||
| 403 | template <ResultCode func(Core::System&, u32)> | ||
| 404 | void SvcWrap32(Core::System& system) { | ||
| 405 | FuncReturn(system, func(system, static_cast<u32>(Param(system, 0))).raw); | ||
| 406 | } | ||
| 407 | |||
| 408 | // Used by WaitSynchronization32 | ||
| 409 | template <ResultCode func(Core::System&, u32, u32, s32, u32, Handle*)> | ||
| 410 | void SvcWrap32(Core::System& system) { | ||
| 411 | u32 param_1 = 0; | ||
| 412 | const u32 retval = func(system, Param32(system, 0), Param32(system, 1), Param32(system, 2), | ||
| 413 | Param32(system, 3), ¶m_1) | ||
| 414 | .raw; | ||
| 415 | system.CurrentArmInterface().SetReg(1, param_1); | ||
| 416 | FuncReturn(system, retval); | ||
| 417 | } | ||
| 418 | |||
| 345 | } // namespace Kernel | 419 | } // namespace Kernel |
diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp index bf850e0b2..83e956036 100644 --- a/src/core/hle/kernel/thread.cpp +++ b/src/core/hle/kernel/thread.cpp | |||
| @@ -133,15 +133,16 @@ void Thread::CancelWait() { | |||
| 133 | ResumeFromWait(); | 133 | ResumeFromWait(); |
| 134 | } | 134 | } |
| 135 | 135 | ||
| 136 | /** | 136 | static void ResetThreadContext32(Core::ARM_Interface::ThreadContext32& context, u32 stack_top, |
| 137 | * Resets a thread context, making it ready to be scheduled and run by the CPU | 137 | u32 entry_point, u32 arg) { |
| 138 | * @param context Thread context to reset | 138 | context = {}; |
| 139 | * @param stack_top Address of the top of the stack | 139 | context.cpu_registers[0] = arg; |
| 140 | * @param entry_point Address of entry point for execution | 140 | context.cpu_registers[15] = entry_point; |
| 141 | * @param arg User argument for thread | 141 | context.cpu_registers[13] = stack_top; |
| 142 | */ | 142 | } |
| 143 | static void ResetThreadContext(Core::ARM_Interface::ThreadContext& context, VAddr stack_top, | 143 | |
| 144 | VAddr entry_point, u64 arg) { | 144 | static void ResetThreadContext64(Core::ARM_Interface::ThreadContext64& context, VAddr stack_top, |
| 145 | VAddr entry_point, u64 arg) { | ||
| 145 | context = {}; | 146 | context = {}; |
| 146 | context.cpu_registers[0] = arg; | 147 | context.cpu_registers[0] = arg; |
| 147 | context.pc = entry_point; | 148 | context.pc = entry_point; |
| @@ -198,9 +199,9 @@ ResultVal<std::shared_ptr<Thread>> Thread::Create(KernelCore& kernel, std::strin | |||
| 198 | 199 | ||
| 199 | thread->owner_process->RegisterThread(thread.get()); | 200 | thread->owner_process->RegisterThread(thread.get()); |
| 200 | 201 | ||
| 201 | // TODO(peachum): move to ScheduleThread() when scheduler is added so selected core is used | 202 | ResetThreadContext32(thread->context_32, static_cast<u32>(stack_top), |
| 202 | // to initialize the context | 203 | static_cast<u32>(entry_point), static_cast<u32>(arg)); |
| 203 | ResetThreadContext(thread->context, stack_top, entry_point, arg); | 204 | ResetThreadContext64(thread->context_64, stack_top, entry_point, arg); |
| 204 | 205 | ||
| 205 | return MakeResult<std::shared_ptr<Thread>>(std::move(thread)); | 206 | return MakeResult<std::shared_ptr<Thread>>(std::move(thread)); |
| 206 | } | 207 | } |
| @@ -213,11 +214,13 @@ void Thread::SetPriority(u32 priority) { | |||
| 213 | } | 214 | } |
| 214 | 215 | ||
| 215 | void Thread::SetWaitSynchronizationResult(ResultCode result) { | 216 | void Thread::SetWaitSynchronizationResult(ResultCode result) { |
| 216 | context.cpu_registers[0] = result.raw; | 217 | context_32.cpu_registers[0] = result.raw; |
| 218 | context_64.cpu_registers[0] = result.raw; | ||
| 217 | } | 219 | } |
| 218 | 220 | ||
| 219 | void Thread::SetWaitSynchronizationOutput(s32 output) { | 221 | void Thread::SetWaitSynchronizationOutput(s32 output) { |
| 220 | context.cpu_registers[1] = output; | 222 | context_32.cpu_registers[1] = output; |
| 223 | context_64.cpu_registers[1] = output; | ||
| 221 | } | 224 | } |
| 222 | 225 | ||
| 223 | s32 Thread::GetSynchronizationObjectIndex(std::shared_ptr<SynchronizationObject> object) const { | 226 | s32 Thread::GetSynchronizationObjectIndex(std::shared_ptr<SynchronizationObject> object) const { |
diff --git a/src/core/hle/kernel/thread.h b/src/core/hle/kernel/thread.h index 129e7858a..23fdef8a4 100644 --- a/src/core/hle/kernel/thread.h +++ b/src/core/hle/kernel/thread.h | |||
| @@ -102,7 +102,8 @@ public: | |||
| 102 | 102 | ||
| 103 | using MutexWaitingThreads = std::vector<std::shared_ptr<Thread>>; | 103 | using MutexWaitingThreads = std::vector<std::shared_ptr<Thread>>; |
| 104 | 104 | ||
| 105 | using ThreadContext = Core::ARM_Interface::ThreadContext; | 105 | using ThreadContext32 = Core::ARM_Interface::ThreadContext32; |
| 106 | using ThreadContext64 = Core::ARM_Interface::ThreadContext64; | ||
| 106 | 107 | ||
| 107 | using ThreadSynchronizationObjects = std::vector<std::shared_ptr<SynchronizationObject>>; | 108 | using ThreadSynchronizationObjects = std::vector<std::shared_ptr<SynchronizationObject>>; |
| 108 | 109 | ||
| @@ -273,12 +274,20 @@ public: | |||
| 273 | return status == ThreadStatus::WaitSynch; | 274 | return status == ThreadStatus::WaitSynch; |
| 274 | } | 275 | } |
| 275 | 276 | ||
| 276 | ThreadContext& GetContext() { | 277 | ThreadContext32& GetContext32() { |
| 277 | return context; | 278 | return context_32; |
| 278 | } | 279 | } |
| 279 | 280 | ||
| 280 | const ThreadContext& GetContext() const { | 281 | const ThreadContext32& GetContext32() const { |
| 281 | return context; | 282 | return context_32; |
| 283 | } | ||
| 284 | |||
| 285 | ThreadContext64& GetContext64() { | ||
| 286 | return context_64; | ||
| 287 | } | ||
| 288 | |||
| 289 | const ThreadContext64& GetContext64() const { | ||
| 290 | return context_64; | ||
| 282 | } | 291 | } |
| 283 | 292 | ||
| 284 | ThreadStatus GetStatus() const { | 293 | ThreadStatus GetStatus() const { |
| @@ -466,7 +475,8 @@ private: | |||
| 466 | void AdjustSchedulingOnPriority(u32 old_priority); | 475 | void AdjustSchedulingOnPriority(u32 old_priority); |
| 467 | void AdjustSchedulingOnAffinity(u64 old_affinity_mask, s32 old_core); | 476 | void AdjustSchedulingOnAffinity(u64 old_affinity_mask, s32 old_core); |
| 468 | 477 | ||
| 469 | Core::ARM_Interface::ThreadContext context{}; | 478 | ThreadContext32 context_32{}; |
| 479 | ThreadContext64 context_64{}; | ||
| 470 | 480 | ||
| 471 | u64 thread_id = 0; | 481 | u64 thread_id = 0; |
| 472 | 482 | ||
diff --git a/src/core/hle/service/am/am.cpp b/src/core/hle/service/am/am.cpp index cc978713b..d1bf13c89 100644 --- a/src/core/hle/service/am/am.cpp +++ b/src/core/hle/service/am/am.cpp | |||
| @@ -607,7 +607,7 @@ ICommonStateGetter::ICommonStateGetter(Core::System& system, | |||
| 607 | {40, nullptr, "GetCradleFwVersion"}, | 607 | {40, nullptr, "GetCradleFwVersion"}, |
| 608 | {50, nullptr, "IsVrModeEnabled"}, | 608 | {50, nullptr, "IsVrModeEnabled"}, |
| 609 | {51, nullptr, "SetVrModeEnabled"}, | 609 | {51, nullptr, "SetVrModeEnabled"}, |
| 610 | {52, nullptr, "SwitchLcdBacklight"}, | 610 | {52, &ICommonStateGetter::SetLcdBacklighOffEnabled, "SetLcdBacklighOffEnabled"}, |
| 611 | {53, nullptr, "BeginVrModeEx"}, | 611 | {53, nullptr, "BeginVrModeEx"}, |
| 612 | {54, nullptr, "EndVrModeEx"}, | 612 | {54, nullptr, "EndVrModeEx"}, |
| 613 | {55, nullptr, "IsInControllerFirmwareUpdateSection"}, | 613 | {55, nullptr, "IsInControllerFirmwareUpdateSection"}, |
| @@ -636,7 +636,6 @@ void ICommonStateGetter::GetBootMode(Kernel::HLERequestContext& ctx) { | |||
| 636 | 636 | ||
| 637 | IPC::ResponseBuilder rb{ctx, 3}; | 637 | IPC::ResponseBuilder rb{ctx, 3}; |
| 638 | rb.Push(RESULT_SUCCESS); | 638 | rb.Push(RESULT_SUCCESS); |
| 639 | |||
| 640 | rb.Push<u8>(static_cast<u8>(Service::PM::SystemBootMode::Normal)); // Normal boot mode | 639 | rb.Push<u8>(static_cast<u8>(Service::PM::SystemBootMode::Normal)); // Normal boot mode |
| 641 | } | 640 | } |
| 642 | 641 | ||
| @@ -660,6 +659,7 @@ void ICommonStateGetter::ReceiveMessage(Kernel::HLERequestContext& ctx) { | |||
| 660 | rb.PushEnum<AppletMessageQueue::AppletMessage>(message); | 659 | rb.PushEnum<AppletMessageQueue::AppletMessage>(message); |
| 661 | return; | 660 | return; |
| 662 | } | 661 | } |
| 662 | |||
| 663 | rb.Push(RESULT_SUCCESS); | 663 | rb.Push(RESULT_SUCCESS); |
| 664 | rb.PushEnum<AppletMessageQueue::AppletMessage>(message); | 664 | rb.PushEnum<AppletMessageQueue::AppletMessage>(message); |
| 665 | } | 665 | } |
| @@ -672,6 +672,17 @@ void ICommonStateGetter::GetCurrentFocusState(Kernel::HLERequestContext& ctx) { | |||
| 672 | rb.Push(static_cast<u8>(FocusState::InFocus)); | 672 | rb.Push(static_cast<u8>(FocusState::InFocus)); |
| 673 | } | 673 | } |
| 674 | 674 | ||
| 675 | void ICommonStateGetter::SetLcdBacklighOffEnabled(Kernel::HLERequestContext& ctx) { | ||
| 676 | IPC::RequestParser rp{ctx}; | ||
| 677 | const auto is_lcd_backlight_off_enabled = rp.Pop<bool>(); | ||
| 678 | |||
| 679 | LOG_WARNING(Service_AM, "(STUBBED) called. is_lcd_backlight_off_enabled={}", | ||
| 680 | is_lcd_backlight_off_enabled); | ||
| 681 | |||
| 682 | IPC::ResponseBuilder rb{ctx, 2}; | ||
| 683 | rb.Push(RESULT_SUCCESS); | ||
| 684 | } | ||
| 685 | |||
| 675 | void ICommonStateGetter::GetDefaultDisplayResolutionChangeEvent(Kernel::HLERequestContext& ctx) { | 686 | void ICommonStateGetter::GetDefaultDisplayResolutionChangeEvent(Kernel::HLERequestContext& ctx) { |
| 676 | LOG_DEBUG(Service_AM, "called"); | 687 | LOG_DEBUG(Service_AM, "called"); |
| 677 | 688 | ||
diff --git a/src/core/hle/service/am/am.h b/src/core/hle/service/am/am.h index 0b9a4332d..0843de781 100644 --- a/src/core/hle/service/am/am.h +++ b/src/core/hle/service/am/am.h | |||
| @@ -182,6 +182,7 @@ private: | |||
| 182 | void GetOperationMode(Kernel::HLERequestContext& ctx); | 182 | void GetOperationMode(Kernel::HLERequestContext& ctx); |
| 183 | void GetPerformanceMode(Kernel::HLERequestContext& ctx); | 183 | void GetPerformanceMode(Kernel::HLERequestContext& ctx); |
| 184 | void GetBootMode(Kernel::HLERequestContext& ctx); | 184 | void GetBootMode(Kernel::HLERequestContext& ctx); |
| 185 | void SetLcdBacklighOffEnabled(Kernel::HLERequestContext& ctx); | ||
| 185 | void GetDefaultDisplayResolution(Kernel::HLERequestContext& ctx); | 186 | void GetDefaultDisplayResolution(Kernel::HLERequestContext& ctx); |
| 186 | void SetCpuBoostMode(Kernel::HLERequestContext& ctx); | 187 | void SetCpuBoostMode(Kernel::HLERequestContext& ctx); |
| 187 | 188 | ||
diff --git a/src/core/hle/service/am/applets/web_browser.cpp b/src/core/hle/service/am/applets/web_browser.cpp index 12443c910..9f30e167d 100644 --- a/src/core/hle/service/am/applets/web_browser.cpp +++ b/src/core/hle/service/am/applets/web_browser.cpp | |||
| @@ -254,6 +254,12 @@ void WebBrowser::Execute() { | |||
| 254 | 254 | ||
| 255 | if (status != RESULT_SUCCESS) { | 255 | if (status != RESULT_SUCCESS) { |
| 256 | complete = true; | 256 | complete = true; |
| 257 | |||
| 258 | // This is a workaround in order not to softlock yuzu when an error happens during the | ||
| 259 | // webapplet init. In order to avoid an svcBreak, the status is set to RESULT_SUCCESS | ||
| 260 | Finalize(); | ||
| 261 | status = RESULT_SUCCESS; | ||
| 262 | |||
| 257 | return; | 263 | return; |
| 258 | } | 264 | } |
| 259 | 265 | ||
diff --git a/src/core/hle/service/hid/controllers/npad.cpp b/src/core/hle/service/hid/controllers/npad.cpp index 15c09f04c..c1e32b28c 100644 --- a/src/core/hle/service/hid/controllers/npad.cpp +++ b/src/core/hle/service/hid/controllers/npad.cpp | |||
| @@ -287,13 +287,13 @@ void Controller_NPad::RequestPadStateUpdate(u32 npad_id) { | |||
| 287 | analog_state[static_cast<std::size_t>(JoystickId::Joystick_Left)]->GetAnalogDirectionStatus( | 287 | analog_state[static_cast<std::size_t>(JoystickId::Joystick_Left)]->GetAnalogDirectionStatus( |
| 288 | Input::AnalogDirection::DOWN)); | 288 | Input::AnalogDirection::DOWN)); |
| 289 | 289 | ||
| 290 | pad_state.r_stick_up.Assign(analog_state[static_cast<std::size_t>(JoystickId::Joystick_Right)] | ||
| 291 | ->GetAnalogDirectionStatus(Input::AnalogDirection::RIGHT)); | ||
| 292 | pad_state.r_stick_left.Assign(analog_state[static_cast<std::size_t>(JoystickId::Joystick_Right)] | ||
| 293 | ->GetAnalogDirectionStatus(Input::AnalogDirection::LEFT)); | ||
| 294 | pad_state.r_stick_right.Assign( | 290 | pad_state.r_stick_right.Assign( |
| 295 | analog_state[static_cast<std::size_t>(JoystickId::Joystick_Right)] | 291 | analog_state[static_cast<std::size_t>(JoystickId::Joystick_Right)] |
| 296 | ->GetAnalogDirectionStatus(Input::AnalogDirection::UP)); | 292 | ->GetAnalogDirectionStatus(Input::AnalogDirection::RIGHT)); |
| 293 | pad_state.r_stick_left.Assign(analog_state[static_cast<std::size_t>(JoystickId::Joystick_Right)] | ||
| 294 | ->GetAnalogDirectionStatus(Input::AnalogDirection::LEFT)); | ||
| 295 | pad_state.r_stick_up.Assign(analog_state[static_cast<std::size_t>(JoystickId::Joystick_Right)] | ||
| 296 | ->GetAnalogDirectionStatus(Input::AnalogDirection::UP)); | ||
| 297 | pad_state.r_stick_down.Assign(analog_state[static_cast<std::size_t>(JoystickId::Joystick_Right)] | 297 | pad_state.r_stick_down.Assign(analog_state[static_cast<std::size_t>(JoystickId::Joystick_Right)] |
| 298 | ->GetAnalogDirectionStatus(Input::AnalogDirection::DOWN)); | 298 | ->GetAnalogDirectionStatus(Input::AnalogDirection::DOWN)); |
| 299 | 299 | ||
diff --git a/src/core/hle/service/nvflinger/nvflinger.cpp b/src/core/hle/service/nvflinger/nvflinger.cpp index 134152210..437bc5dee 100644 --- a/src/core/hle/service/nvflinger/nvflinger.cpp +++ b/src/core/hle/service/nvflinger/nvflinger.cpp | |||
| @@ -191,8 +191,6 @@ void NVFlinger::Compose() { | |||
| 191 | // Search for a queued buffer and acquire it | 191 | // Search for a queued buffer and acquire it |
| 192 | auto buffer = buffer_queue.AcquireBuffer(); | 192 | auto buffer = buffer_queue.AcquireBuffer(); |
| 193 | 193 | ||
| 194 | MicroProfileFlip(); | ||
| 195 | |||
| 196 | if (!buffer) { | 194 | if (!buffer) { |
| 197 | continue; | 195 | continue; |
| 198 | } | 196 | } |
| @@ -206,6 +204,8 @@ void NVFlinger::Compose() { | |||
| 206 | gpu.WaitFence(fence.id, fence.value); | 204 | gpu.WaitFence(fence.id, fence.value); |
| 207 | } | 205 | } |
| 208 | 206 | ||
| 207 | MicroProfileFlip(); | ||
| 208 | |||
| 209 | // Now send the buffer to the GPU for drawing. | 209 | // Now send the buffer to the GPU for drawing. |
| 210 | // TODO(Subv): Support more than just disp0. The display device selection is probably based | 210 | // TODO(Subv): Support more than just disp0. The display device selection is probably based |
| 211 | // on which display we're drawing (Default, Internal, External, etc) | 211 | // on which display we're drawing (Default, Internal, External, etc) |
diff --git a/src/core/hle/service/set/set.cpp b/src/core/hle/service/set/set.cpp index 5bcc0b588..9e12c76fc 100644 --- a/src/core/hle/service/set/set.cpp +++ b/src/core/hle/service/set/set.cpp | |||
| @@ -111,6 +111,14 @@ void SET::GetLanguageCode(Kernel::HLERequestContext& ctx) { | |||
| 111 | rb.PushEnum(available_language_codes[Settings::values.language_index]); | 111 | rb.PushEnum(available_language_codes[Settings::values.language_index]); |
| 112 | } | 112 | } |
| 113 | 113 | ||
| 114 | void SET::GetRegionCode(Kernel::HLERequestContext& ctx) { | ||
| 115 | LOG_DEBUG(Service_SET, "called"); | ||
| 116 | |||
| 117 | IPC::ResponseBuilder rb{ctx, 3}; | ||
| 118 | rb.Push(RESULT_SUCCESS); | ||
| 119 | rb.Push(Settings::values.region_index); | ||
| 120 | } | ||
| 121 | |||
| 114 | SET::SET() : ServiceFramework("set") { | 122 | SET::SET() : ServiceFramework("set") { |
| 115 | // clang-format off | 123 | // clang-format off |
| 116 | static const FunctionInfo functions[] = { | 124 | static const FunctionInfo functions[] = { |
| @@ -118,7 +126,7 @@ SET::SET() : ServiceFramework("set") { | |||
| 118 | {1, &SET::GetAvailableLanguageCodes, "GetAvailableLanguageCodes"}, | 126 | {1, &SET::GetAvailableLanguageCodes, "GetAvailableLanguageCodes"}, |
| 119 | {2, &SET::MakeLanguageCode, "MakeLanguageCode"}, | 127 | {2, &SET::MakeLanguageCode, "MakeLanguageCode"}, |
| 120 | {3, &SET::GetAvailableLanguageCodeCount, "GetAvailableLanguageCodeCount"}, | 128 | {3, &SET::GetAvailableLanguageCodeCount, "GetAvailableLanguageCodeCount"}, |
| 121 | {4, nullptr, "GetRegionCode"}, | 129 | {4, &SET::GetRegionCode, "GetRegionCode"}, |
| 122 | {5, &SET::GetAvailableLanguageCodes2, "GetAvailableLanguageCodes2"}, | 130 | {5, &SET::GetAvailableLanguageCodes2, "GetAvailableLanguageCodes2"}, |
| 123 | {6, &SET::GetAvailableLanguageCodeCount2, "GetAvailableLanguageCodeCount2"}, | 131 | {6, &SET::GetAvailableLanguageCodeCount2, "GetAvailableLanguageCodeCount2"}, |
| 124 | {7, nullptr, "GetKeyCodeMap"}, | 132 | {7, nullptr, "GetKeyCodeMap"}, |
diff --git a/src/core/hle/service/set/set.h b/src/core/hle/service/set/set.h index b154e08aa..6084b345d 100644 --- a/src/core/hle/service/set/set.h +++ b/src/core/hle/service/set/set.h | |||
| @@ -43,6 +43,7 @@ private: | |||
| 43 | void GetAvailableLanguageCodeCount(Kernel::HLERequestContext& ctx); | 43 | void GetAvailableLanguageCodeCount(Kernel::HLERequestContext& ctx); |
| 44 | void GetAvailableLanguageCodeCount2(Kernel::HLERequestContext& ctx); | 44 | void GetAvailableLanguageCodeCount2(Kernel::HLERequestContext& ctx); |
| 45 | void GetQuestFlag(Kernel::HLERequestContext& ctx); | 45 | void GetQuestFlag(Kernel::HLERequestContext& ctx); |
| 46 | void GetRegionCode(Kernel::HLERequestContext& ctx); | ||
| 46 | }; | 47 | }; |
| 47 | 48 | ||
| 48 | } // namespace Service::Set | 49 | } // namespace Service::Set |
diff --git a/src/core/hle/service/sm/controller.cpp b/src/core/hle/service/sm/controller.cpp index c45b285f8..9cca84b31 100644 --- a/src/core/hle/service/sm/controller.cpp +++ b/src/core/hle/service/sm/controller.cpp | |||
| @@ -44,7 +44,7 @@ void Controller::QueryPointerBufferSize(Kernel::HLERequestContext& ctx) { | |||
| 44 | 44 | ||
| 45 | IPC::ResponseBuilder rb{ctx, 3}; | 45 | IPC::ResponseBuilder rb{ctx, 3}; |
| 46 | rb.Push(RESULT_SUCCESS); | 46 | rb.Push(RESULT_SUCCESS); |
| 47 | rb.Push<u16>(0x500); | 47 | rb.Push<u16>(0x1000); |
| 48 | } | 48 | } |
| 49 | 49 | ||
| 50 | Controller::Controller() : ServiceFramework("IpcController") { | 50 | Controller::Controller() : ServiceFramework("IpcController") { |
diff --git a/src/core/hle/service/time/time_zone_content_manager.cpp b/src/core/hle/service/time/time_zone_content_manager.cpp index 57b1a2bca..78d4acd95 100644 --- a/src/core/hle/service/time/time_zone_content_manager.cpp +++ b/src/core/hle/service/time/time_zone_content_manager.cpp | |||
| @@ -53,7 +53,7 @@ static std::vector<std::string> BuildLocationNameCache(Core::System& system) { | |||
| 53 | return {}; | 53 | return {}; |
| 54 | } | 54 | } |
| 55 | 55 | ||
| 56 | std::vector<char> raw_data(binary_list->GetSize()); | 56 | std::vector<char> raw_data(binary_list->GetSize() + 1); |
| 57 | binary_list->ReadBytes<char>(raw_data.data(), binary_list->GetSize()); | 57 | binary_list->ReadBytes<char>(raw_data.data(), binary_list->GetSize()); |
| 58 | 58 | ||
| 59 | std::stringstream data_stream{raw_data.data()}; | 59 | std::stringstream data_stream{raw_data.data()}; |
diff --git a/src/core/loader/deconstructed_rom_directory.cpp b/src/core/loader/deconstructed_rom_directory.cpp index d19c3623c..53559e8b1 100644 --- a/src/core/loader/deconstructed_rom_directory.cpp +++ b/src/core/loader/deconstructed_rom_directory.cpp | |||
| @@ -129,12 +129,6 @@ AppLoader_DeconstructedRomDirectory::LoadResult AppLoader_DeconstructedRomDirect | |||
| 129 | } | 129 | } |
| 130 | metadata.Print(); | 130 | metadata.Print(); |
| 131 | 131 | ||
| 132 | const FileSys::ProgramAddressSpaceType arch_bits{metadata.GetAddressSpaceType()}; | ||
| 133 | if (arch_bits == FileSys::ProgramAddressSpaceType::Is32Bit || | ||
| 134 | arch_bits == FileSys::ProgramAddressSpaceType::Is32BitNoMap) { | ||
| 135 | return {ResultStatus::Error32BitISA, {}}; | ||
| 136 | } | ||
| 137 | |||
| 138 | if (process.LoadFromMetadata(metadata).IsError()) { | 132 | if (process.LoadFromMetadata(metadata).IsError()) { |
| 139 | return {ResultStatus::ErrorUnableToParseKernelMetadata, {}}; | 133 | return {ResultStatus::ErrorUnableToParseKernelMetadata, {}}; |
| 140 | } | 134 | } |
diff --git a/src/core/reporter.cpp b/src/core/reporter.cpp index f95eee3b1..85ac81ef7 100644 --- a/src/core/reporter.cpp +++ b/src/core/reporter.cpp | |||
| @@ -111,7 +111,7 @@ json GetProcessorStateDataAuto(Core::System& system) { | |||
| 111 | const auto& vm_manager{process->VMManager()}; | 111 | const auto& vm_manager{process->VMManager()}; |
| 112 | auto& arm{system.CurrentArmInterface()}; | 112 | auto& arm{system.CurrentArmInterface()}; |
| 113 | 113 | ||
| 114 | Core::ARM_Interface::ThreadContext context{}; | 114 | Core::ARM_Interface::ThreadContext64 context{}; |
| 115 | arm.SaveContext(context); | 115 | arm.SaveContext(context); |
| 116 | 116 | ||
| 117 | return GetProcessorStateData(process->Is64BitProcess() ? "AArch64" : "AArch32", | 117 | return GetProcessorStateData(process->Is64BitProcess() ? "AArch64" : "AArch32", |
diff --git a/src/core/settings.cpp b/src/core/settings.cpp index d1fc94060..c1282cb80 100644 --- a/src/core/settings.cpp +++ b/src/core/settings.cpp | |||
| @@ -86,6 +86,7 @@ void LogSettings() { | |||
| 86 | LogSetting("System_RngSeed", Settings::values.rng_seed.value_or(0)); | 86 | LogSetting("System_RngSeed", Settings::values.rng_seed.value_or(0)); |
| 87 | LogSetting("System_CurrentUser", Settings::values.current_user); | 87 | LogSetting("System_CurrentUser", Settings::values.current_user); |
| 88 | LogSetting("System_LanguageIndex", Settings::values.language_index); | 88 | LogSetting("System_LanguageIndex", Settings::values.language_index); |
| 89 | LogSetting("System_RegionIndex", Settings::values.region_index); | ||
| 89 | LogSetting("Core_UseMultiCore", Settings::values.use_multi_core); | 90 | LogSetting("Core_UseMultiCore", Settings::values.use_multi_core); |
| 90 | LogSetting("Renderer_UseResolutionFactor", Settings::values.resolution_factor); | 91 | LogSetting("Renderer_UseResolutionFactor", Settings::values.resolution_factor); |
| 91 | LogSetting("Renderer_UseFrameLimit", Settings::values.use_frame_limit); | 92 | LogSetting("Renderer_UseFrameLimit", Settings::values.use_frame_limit); |
| @@ -94,6 +95,7 @@ void LogSettings() { | |||
| 94 | LogSetting("Renderer_UseAccurateGpuEmulation", Settings::values.use_accurate_gpu_emulation); | 95 | LogSetting("Renderer_UseAccurateGpuEmulation", Settings::values.use_accurate_gpu_emulation); |
| 95 | LogSetting("Renderer_UseAsynchronousGpuEmulation", | 96 | LogSetting("Renderer_UseAsynchronousGpuEmulation", |
| 96 | Settings::values.use_asynchronous_gpu_emulation); | 97 | Settings::values.use_asynchronous_gpu_emulation); |
| 98 | LogSetting("Renderer_UseVsync", Settings::values.use_vsync); | ||
| 97 | LogSetting("Audio_OutputEngine", Settings::values.sink_id); | 99 | LogSetting("Audio_OutputEngine", Settings::values.sink_id); |
| 98 | LogSetting("Audio_EnableAudioStretching", Settings::values.enable_audio_stretching); | 100 | LogSetting("Audio_EnableAudioStretching", Settings::values.enable_audio_stretching); |
| 99 | LogSetting("Audio_OutputDevice", Settings::values.audio_device_id); | 101 | LogSetting("Audio_OutputDevice", Settings::values.audio_device_id); |
diff --git a/src/core/settings.h b/src/core/settings.h index f837d3fbc..79ec01731 100644 --- a/src/core/settings.h +++ b/src/core/settings.h | |||
| @@ -387,6 +387,8 @@ struct Values { | |||
| 387 | 387 | ||
| 388 | s32 current_user; | 388 | s32 current_user; |
| 389 | s32 language_index; | 389 | s32 language_index; |
| 390 | s32 region_index; | ||
| 391 | s32 sound_index; | ||
| 390 | 392 | ||
| 391 | // Controls | 393 | // Controls |
| 392 | std::array<PlayerInput, 10> players; | 394 | std::array<PlayerInput, 10> players; |
| @@ -430,11 +432,13 @@ struct Values { | |||
| 430 | 432 | ||
| 431 | float resolution_factor; | 433 | float resolution_factor; |
| 432 | int aspect_ratio; | 434 | int aspect_ratio; |
| 435 | int max_anisotropy; | ||
| 433 | bool use_frame_limit; | 436 | bool use_frame_limit; |
| 434 | u16 frame_limit; | 437 | u16 frame_limit; |
| 435 | bool use_disk_shader_cache; | 438 | bool use_disk_shader_cache; |
| 436 | bool use_accurate_gpu_emulation; | 439 | bool use_accurate_gpu_emulation; |
| 437 | bool use_asynchronous_gpu_emulation; | 440 | bool use_asynchronous_gpu_emulation; |
| 441 | bool use_vsync; | ||
| 438 | bool force_30fps_mode; | 442 | bool force_30fps_mode; |
| 439 | 443 | ||
| 440 | float bg_red; | 444 | float bg_red; |
diff --git a/src/core/telemetry_session.cpp b/src/core/telemetry_session.cpp index 0e72d31cd..0f3685d1c 100644 --- a/src/core/telemetry_session.cpp +++ b/src/core/telemetry_session.cpp | |||
| @@ -188,6 +188,7 @@ void TelemetrySession::AddInitialInfo(Loader::AppLoader& app_loader) { | |||
| 188 | Settings::values.use_accurate_gpu_emulation); | 188 | Settings::values.use_accurate_gpu_emulation); |
| 189 | AddField(field_type, "Renderer_UseAsynchronousGpuEmulation", | 189 | AddField(field_type, "Renderer_UseAsynchronousGpuEmulation", |
| 190 | Settings::values.use_asynchronous_gpu_emulation); | 190 | Settings::values.use_asynchronous_gpu_emulation); |
| 191 | AddField(field_type, "Renderer_UseVsync", Settings::values.use_vsync); | ||
| 191 | AddField(field_type, "System_UseDockedMode", Settings::values.use_docked_mode); | 192 | AddField(field_type, "System_UseDockedMode", Settings::values.use_docked_mode); |
| 192 | } | 193 | } |
| 193 | 194 | ||
diff --git a/src/input_common/analog_from_button.cpp b/src/input_common/analog_from_button.cpp index e1a260762..6cabdaa3c 100755 --- a/src/input_common/analog_from_button.cpp +++ b/src/input_common/analog_from_button.cpp | |||
| @@ -34,6 +34,20 @@ public: | |||
| 34 | y * coef * (x == 0 ? 1.0f : SQRT_HALF)); | 34 | y * coef * (x == 0 ? 1.0f : SQRT_HALF)); |
| 35 | } | 35 | } |
| 36 | 36 | ||
| 37 | bool GetAnalogDirectionStatus(Input::AnalogDirection direction) const override { | ||
| 38 | switch (direction) { | ||
| 39 | case Input::AnalogDirection::RIGHT: | ||
| 40 | return right->GetStatus(); | ||
| 41 | case Input::AnalogDirection::LEFT: | ||
| 42 | return left->GetStatus(); | ||
| 43 | case Input::AnalogDirection::UP: | ||
| 44 | return up->GetStatus(); | ||
| 45 | case Input::AnalogDirection::DOWN: | ||
| 46 | return down->GetStatus(); | ||
| 47 | } | ||
| 48 | return false; | ||
| 49 | } | ||
| 50 | |||
| 37 | private: | 51 | private: |
| 38 | Button up; | 52 | Button up; |
| 39 | Button down; | 53 | Button down; |
diff --git a/src/input_common/udp/client.cpp b/src/input_common/udp/client.cpp index 2228571a6..da5227058 100644 --- a/src/input_common/udp/client.cpp +++ b/src/input_common/udp/client.cpp | |||
| @@ -32,8 +32,16 @@ public: | |||
| 32 | SocketCallback callback) | 32 | SocketCallback callback) |
| 33 | : callback(std::move(callback)), timer(io_service), | 33 | : callback(std::move(callback)), timer(io_service), |
| 34 | socket(io_service, udp::endpoint(udp::v4(), 0)), client_id(client_id), | 34 | socket(io_service, udp::endpoint(udp::v4(), 0)), client_id(client_id), |
| 35 | pad_index(pad_index), | 35 | pad_index(pad_index) { |
| 36 | send_endpoint(udp::endpoint(boost::asio::ip::make_address_v4(host), port)) {} | 36 | boost::system::error_code ec{}; |
| 37 | auto ipv4 = boost::asio::ip::make_address_v4(host, ec); | ||
| 38 | if (ec.value() != boost::system::errc::success) { | ||
| 39 | LOG_ERROR(Input, "Invalid IPv4 address \"{}\" provided to socket", host); | ||
| 40 | ipv4 = boost::asio::ip::address_v4{}; | ||
| 41 | } | ||
| 42 | |||
| 43 | send_endpoint = {udp::endpoint(ipv4, port)}; | ||
| 44 | } | ||
| 37 | 45 | ||
| 38 | void Stop() { | 46 | void Stop() { |
| 39 | io_service.stop(); | 47 | io_service.stop(); |
| @@ -85,17 +93,18 @@ private: | |||
| 85 | } | 93 | } |
| 86 | 94 | ||
| 87 | void HandleSend(const boost::system::error_code& error) { | 95 | void HandleSend(const boost::system::error_code& error) { |
| 96 | boost::system::error_code _ignored{}; | ||
| 88 | // Send a request for getting port info for the pad | 97 | // Send a request for getting port info for the pad |
| 89 | Request::PortInfo port_info{1, {pad_index, 0, 0, 0}}; | 98 | Request::PortInfo port_info{1, {pad_index, 0, 0, 0}}; |
| 90 | const auto port_message = Request::Create(port_info, client_id); | 99 | const auto port_message = Request::Create(port_info, client_id); |
| 91 | std::memcpy(&send_buffer1, &port_message, PORT_INFO_SIZE); | 100 | std::memcpy(&send_buffer1, &port_message, PORT_INFO_SIZE); |
| 92 | socket.send_to(boost::asio::buffer(send_buffer1), send_endpoint); | 101 | socket.send_to(boost::asio::buffer(send_buffer1), send_endpoint, {}, _ignored); |
| 93 | 102 | ||
| 94 | // Send a request for getting pad data for the pad | 103 | // Send a request for getting pad data for the pad |
| 95 | Request::PadData pad_data{Request::PadData::Flags::Id, pad_index, EMPTY_MAC_ADDRESS}; | 104 | Request::PadData pad_data{Request::PadData::Flags::Id, pad_index, EMPTY_MAC_ADDRESS}; |
| 96 | const auto pad_message = Request::Create(pad_data, client_id); | 105 | const auto pad_message = Request::Create(pad_data, client_id); |
| 97 | std::memcpy(send_buffer2.data(), &pad_message, PAD_DATA_SIZE); | 106 | std::memcpy(send_buffer2.data(), &pad_message, PAD_DATA_SIZE); |
| 98 | socket.send_to(boost::asio::buffer(send_buffer2), send_endpoint); | 107 | socket.send_to(boost::asio::buffer(send_buffer2), send_endpoint, {}, _ignored); |
| 99 | StartSend(timer.expiry()); | 108 | StartSend(timer.expiry()); |
| 100 | } | 109 | } |
| 101 | 110 | ||
diff --git a/src/input_common/udp/protocol.cpp b/src/input_common/udp/protocol.cpp index a982ac49d..5e50bd612 100644 --- a/src/input_common/udp/protocol.cpp +++ b/src/input_common/udp/protocol.cpp | |||
| @@ -31,7 +31,6 @@ namespace Response { | |||
| 31 | */ | 31 | */ |
| 32 | std::optional<Type> Validate(u8* data, std::size_t size) { | 32 | std::optional<Type> Validate(u8* data, std::size_t size) { |
| 33 | if (size < sizeof(Header)) { | 33 | if (size < sizeof(Header)) { |
| 34 | LOG_DEBUG(Input, "Invalid UDP packet received"); | ||
| 35 | return std::nullopt; | 34 | return std::nullopt; |
| 36 | } | 35 | } |
| 37 | Header header{}; | 36 | Header header{}; |
diff --git a/src/input_common/udp/udp.cpp b/src/input_common/udp/udp.cpp index ca99cc22f..8c6ef1394 100644 --- a/src/input_common/udp/udp.cpp +++ b/src/input_common/udp/udp.cpp | |||
| @@ -3,6 +3,7 @@ | |||
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <mutex> | 5 | #include <mutex> |
| 6 | #include <optional> | ||
| 6 | #include <tuple> | 7 | #include <tuple> |
| 7 | 8 | ||
| 8 | #include "common/param_package.h" | 9 | #include "common/param_package.h" |
| @@ -44,7 +45,7 @@ public: | |||
| 44 | std::unique_ptr<Input::TouchDevice> Create(const Common::ParamPackage& params) override { | 45 | std::unique_ptr<Input::TouchDevice> Create(const Common::ParamPackage& params) override { |
| 45 | { | 46 | { |
| 46 | std::lock_guard guard(status->update_mutex); | 47 | std::lock_guard guard(status->update_mutex); |
| 47 | status->touch_calibration.emplace(); | 48 | status->touch_calibration = DeviceStatus::CalibrationData{}; |
| 48 | // These default values work well for DS4 but probably not other touch inputs | 49 | // These default values work well for DS4 but probably not other touch inputs |
| 49 | status->touch_calibration->min_x = params.Get("min_x", 100); | 50 | status->touch_calibration->min_x = params.Get("min_x", 100); |
| 50 | status->touch_calibration->min_y = params.Get("min_y", 50); | 51 | status->touch_calibration->min_y = params.Get("min_y", 50); |
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 4b0c6346f..91df062d7 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt | |||
| @@ -2,6 +2,8 @@ add_library(video_core STATIC | |||
| 2 | buffer_cache/buffer_block.h | 2 | buffer_cache/buffer_block.h |
| 3 | buffer_cache/buffer_cache.h | 3 | buffer_cache/buffer_cache.h |
| 4 | buffer_cache/map_interval.h | 4 | buffer_cache/map_interval.h |
| 5 | dirty_flags.cpp | ||
| 6 | dirty_flags.h | ||
| 5 | dma_pusher.cpp | 7 | dma_pusher.cpp |
| 6 | dma_pusher.h | 8 | dma_pusher.h |
| 7 | engines/const_buffer_engine_interface.h | 9 | engines/const_buffer_engine_interface.h |
| @@ -63,14 +65,12 @@ add_library(video_core STATIC | |||
| 63 | renderer_opengl/gl_shader_decompiler.h | 65 | renderer_opengl/gl_shader_decompiler.h |
| 64 | renderer_opengl/gl_shader_disk_cache.cpp | 66 | renderer_opengl/gl_shader_disk_cache.cpp |
| 65 | renderer_opengl/gl_shader_disk_cache.h | 67 | renderer_opengl/gl_shader_disk_cache.h |
| 66 | renderer_opengl/gl_shader_gen.cpp | ||
| 67 | renderer_opengl/gl_shader_gen.h | ||
| 68 | renderer_opengl/gl_shader_manager.cpp | 68 | renderer_opengl/gl_shader_manager.cpp |
| 69 | renderer_opengl/gl_shader_manager.h | 69 | renderer_opengl/gl_shader_manager.h |
| 70 | renderer_opengl/gl_shader_util.cpp | 70 | renderer_opengl/gl_shader_util.cpp |
| 71 | renderer_opengl/gl_shader_util.h | 71 | renderer_opengl/gl_shader_util.h |
| 72 | renderer_opengl/gl_state.cpp | 72 | renderer_opengl/gl_state_tracker.cpp |
| 73 | renderer_opengl/gl_state.h | 73 | renderer_opengl/gl_state_tracker.h |
| 74 | renderer_opengl/gl_stream_buffer.cpp | 74 | renderer_opengl/gl_stream_buffer.cpp |
| 75 | renderer_opengl/gl_stream_buffer.h | 75 | renderer_opengl/gl_stream_buffer.h |
| 76 | renderer_opengl/gl_texture_cache.cpp | 76 | renderer_opengl/gl_texture_cache.cpp |
| @@ -116,8 +116,6 @@ add_library(video_core STATIC | |||
| 116 | shader/ast.h | 116 | shader/ast.h |
| 117 | shader/compiler_settings.cpp | 117 | shader/compiler_settings.cpp |
| 118 | shader/compiler_settings.h | 118 | shader/compiler_settings.h |
| 119 | shader/const_buffer_locker.cpp | ||
| 120 | shader/const_buffer_locker.h | ||
| 121 | shader/control_flow.cpp | 119 | shader/control_flow.cpp |
| 122 | shader/control_flow.h | 120 | shader/control_flow.h |
| 123 | shader/decode.cpp | 121 | shader/decode.cpp |
| @@ -126,9 +124,13 @@ add_library(video_core STATIC | |||
| 126 | shader/node_helper.cpp | 124 | shader/node_helper.cpp |
| 127 | shader/node_helper.h | 125 | shader/node_helper.h |
| 128 | shader/node.h | 126 | shader/node.h |
| 127 | shader/registry.cpp | ||
| 128 | shader/registry.h | ||
| 129 | shader/shader_ir.cpp | 129 | shader/shader_ir.cpp |
| 130 | shader/shader_ir.h | 130 | shader/shader_ir.h |
| 131 | shader/track.cpp | 131 | shader/track.cpp |
| 132 | shader/transform_feedback.cpp | ||
| 133 | shader/transform_feedback.h | ||
| 132 | surface.cpp | 134 | surface.cpp |
| 133 | surface.h | 135 | surface.h |
| 134 | texture_cache/format_lookup_table.cpp | 136 | texture_cache/format_lookup_table.cpp |
| @@ -198,6 +200,8 @@ if (ENABLE_VULKAN) | |||
| 198 | renderer_vulkan/vk_shader_util.h | 200 | renderer_vulkan/vk_shader_util.h |
| 199 | renderer_vulkan/vk_staging_buffer_pool.cpp | 201 | renderer_vulkan/vk_staging_buffer_pool.cpp |
| 200 | renderer_vulkan/vk_staging_buffer_pool.h | 202 | renderer_vulkan/vk_staging_buffer_pool.h |
| 203 | renderer_vulkan/vk_state_tracker.cpp | ||
| 204 | renderer_vulkan/vk_state_tracker.h | ||
| 201 | renderer_vulkan/vk_stream_buffer.cpp | 205 | renderer_vulkan/vk_stream_buffer.cpp |
| 202 | renderer_vulkan/vk_stream_buffer.h | 206 | renderer_vulkan/vk_stream_buffer.h |
| 203 | renderer_vulkan/vk_swapchain.cpp | 207 | renderer_vulkan/vk_swapchain.cpp |
diff --git a/src/video_core/dirty_flags.cpp b/src/video_core/dirty_flags.cpp new file mode 100644 index 000000000..e16075993 --- /dev/null +++ b/src/video_core/dirty_flags.cpp | |||
| @@ -0,0 +1,38 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <array> | ||
| 6 | #include <cstddef> | ||
| 7 | |||
| 8 | #include "common/common_types.h" | ||
| 9 | #include "video_core/dirty_flags.h" | ||
| 10 | |||
| 11 | #define OFF(field_name) MAXWELL3D_REG_INDEX(field_name) | ||
| 12 | #define NUM(field_name) (sizeof(::Tegra::Engines::Maxwell3D::Regs::field_name) / sizeof(u32)) | ||
| 13 | |||
| 14 | namespace VideoCommon::Dirty { | ||
| 15 | |||
| 16 | using Tegra::Engines::Maxwell3D; | ||
| 17 | |||
| 18 | void SetupDirtyRenderTargets(Tegra::Engines::Maxwell3D::DirtyState::Tables& tables) { | ||
| 19 | static constexpr std::size_t num_per_rt = NUM(rt[0]); | ||
| 20 | static constexpr std::size_t begin = OFF(rt); | ||
| 21 | static constexpr std::size_t num = num_per_rt * Maxwell3D::Regs::NumRenderTargets; | ||
| 22 | for (std::size_t rt = 0; rt < Maxwell3D::Regs::NumRenderTargets; ++rt) { | ||
| 23 | FillBlock(tables[0], begin + rt * num_per_rt, num_per_rt, ColorBuffer0 + rt); | ||
| 24 | } | ||
| 25 | FillBlock(tables[1], begin, num, RenderTargets); | ||
| 26 | |||
| 27 | static constexpr std::array zeta_flags{ZetaBuffer, RenderTargets}; | ||
| 28 | for (std::size_t i = 0; i < std::size(zeta_flags); ++i) { | ||
| 29 | const u8 flag = zeta_flags[i]; | ||
| 30 | auto& table = tables[i]; | ||
| 31 | table[OFF(zeta_enable)] = flag; | ||
| 32 | table[OFF(zeta_width)] = flag; | ||
| 33 | table[OFF(zeta_height)] = flag; | ||
| 34 | FillBlock(table, OFF(zeta), NUM(zeta), flag); | ||
| 35 | } | ||
| 36 | } | ||
| 37 | |||
| 38 | } // namespace VideoCommon::Dirty | ||
diff --git a/src/video_core/dirty_flags.h b/src/video_core/dirty_flags.h new file mode 100644 index 000000000..3f6c1d83a --- /dev/null +++ b/src/video_core/dirty_flags.h | |||
| @@ -0,0 +1,49 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <algorithm> | ||
| 8 | #include <cstddef> | ||
| 9 | #include <iterator> | ||
| 10 | |||
| 11 | #include "common/common_types.h" | ||
| 12 | #include "video_core/engines/maxwell_3d.h" | ||
| 13 | |||
| 14 | namespace VideoCommon::Dirty { | ||
| 15 | |||
| 16 | enum : u8 { | ||
| 17 | NullEntry = 0, | ||
| 18 | |||
| 19 | RenderTargets, | ||
| 20 | ColorBuffer0, | ||
| 21 | ColorBuffer1, | ||
| 22 | ColorBuffer2, | ||
| 23 | ColorBuffer3, | ||
| 24 | ColorBuffer4, | ||
| 25 | ColorBuffer5, | ||
| 26 | ColorBuffer6, | ||
| 27 | ColorBuffer7, | ||
| 28 | ZetaBuffer, | ||
| 29 | |||
| 30 | LastCommonEntry, | ||
| 31 | }; | ||
| 32 | |||
| 33 | template <typename Integer> | ||
| 34 | void FillBlock(Tegra::Engines::Maxwell3D::DirtyState::Table& table, std::size_t begin, | ||
| 35 | std::size_t num, Integer dirty_index) { | ||
| 36 | const auto it = std::begin(table) + begin; | ||
| 37 | std::fill(it, it + num, static_cast<u8>(dirty_index)); | ||
| 38 | } | ||
| 39 | |||
| 40 | template <typename Integer1, typename Integer2> | ||
| 41 | void FillBlock(Tegra::Engines::Maxwell3D::DirtyState::Tables& tables, std::size_t begin, | ||
| 42 | std::size_t num, Integer1 index_a, Integer2 index_b) { | ||
| 43 | FillBlock(tables[0], begin, num, index_a); | ||
| 44 | FillBlock(tables[1], begin, num, index_b); | ||
| 45 | } | ||
| 46 | |||
| 47 | void SetupDirtyRenderTargets(Tegra::Engines::Maxwell3D::DirtyState::Tables& tables); | ||
| 48 | |||
| 49 | } // namespace VideoCommon::Dirty | ||
diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp index 0094fd715..713c14182 100644 --- a/src/video_core/dma_pusher.cpp +++ b/src/video_core/dma_pusher.cpp | |||
| @@ -22,7 +22,7 @@ void DmaPusher::DispatchCalls() { | |||
| 22 | MICROPROFILE_SCOPE(DispatchCalls); | 22 | MICROPROFILE_SCOPE(DispatchCalls); |
| 23 | 23 | ||
| 24 | // On entering GPU code, assume all memory may be touched by the ARM core. | 24 | // On entering GPU code, assume all memory may be touched by the ARM core. |
| 25 | gpu.Maxwell3D().dirty.OnMemoryWrite(); | 25 | gpu.Maxwell3D().OnMemoryWrite(); |
| 26 | 26 | ||
| 27 | dma_pushbuffer_subindex = 0; | 27 | dma_pushbuffer_subindex = 0; |
| 28 | 28 | ||
diff --git a/src/video_core/engines/const_buffer_engine_interface.h b/src/video_core/engines/const_buffer_engine_interface.h index d56a47710..724ee0fd6 100644 --- a/src/video_core/engines/const_buffer_engine_interface.h +++ b/src/video_core/engines/const_buffer_engine_interface.h | |||
| @@ -16,11 +16,12 @@ namespace Tegra::Engines { | |||
| 16 | 16 | ||
| 17 | struct SamplerDescriptor { | 17 | struct SamplerDescriptor { |
| 18 | union { | 18 | union { |
| 19 | BitField<0, 20, Tegra::Shader::TextureType> texture_type; | 19 | u32 raw = 0; |
| 20 | BitField<20, 1, u32> is_array; | 20 | BitField<0, 2, Tegra::Shader::TextureType> texture_type; |
| 21 | BitField<21, 1, u32> is_buffer; | 21 | BitField<2, 3, Tegra::Texture::ComponentType> component_type; |
| 22 | BitField<22, 1, u32> is_shadow; | 22 | BitField<5, 1, u32> is_array; |
| 23 | u32 raw{}; | 23 | BitField<6, 1, u32> is_buffer; |
| 24 | BitField<7, 1, u32> is_shadow; | ||
| 24 | }; | 25 | }; |
| 25 | 26 | ||
| 26 | bool operator==(const SamplerDescriptor& rhs) const noexcept { | 27 | bool operator==(const SamplerDescriptor& rhs) const noexcept { |
| @@ -31,68 +32,48 @@ struct SamplerDescriptor { | |||
| 31 | return !operator==(rhs); | 32 | return !operator==(rhs); |
| 32 | } | 33 | } |
| 33 | 34 | ||
| 34 | static SamplerDescriptor FromTicTexture(Tegra::Texture::TextureType tic_texture_type) { | 35 | static SamplerDescriptor FromTIC(const Tegra::Texture::TICEntry& tic) { |
| 36 | using Tegra::Shader::TextureType; | ||
| 35 | SamplerDescriptor result; | 37 | SamplerDescriptor result; |
| 36 | switch (tic_texture_type) { | 38 | |
| 39 | // This is going to be used to determine the shading language type. | ||
| 40 | // Because of that we don't care about all component types on color textures. | ||
| 41 | result.component_type.Assign(tic.r_type.Value()); | ||
| 42 | |||
| 43 | switch (tic.texture_type.Value()) { | ||
| 37 | case Tegra::Texture::TextureType::Texture1D: | 44 | case Tegra::Texture::TextureType::Texture1D: |
| 38 | result.texture_type.Assign(Tegra::Shader::TextureType::Texture1D); | 45 | result.texture_type.Assign(TextureType::Texture1D); |
| 39 | result.is_array.Assign(0); | ||
| 40 | result.is_buffer.Assign(0); | ||
| 41 | result.is_shadow.Assign(0); | ||
| 42 | return result; | 46 | return result; |
| 43 | case Tegra::Texture::TextureType::Texture2D: | 47 | case Tegra::Texture::TextureType::Texture2D: |
| 44 | result.texture_type.Assign(Tegra::Shader::TextureType::Texture2D); | 48 | result.texture_type.Assign(TextureType::Texture2D); |
| 45 | result.is_array.Assign(0); | ||
| 46 | result.is_buffer.Assign(0); | ||
| 47 | result.is_shadow.Assign(0); | ||
| 48 | return result; | 49 | return result; |
| 49 | case Tegra::Texture::TextureType::Texture3D: | 50 | case Tegra::Texture::TextureType::Texture3D: |
| 50 | result.texture_type.Assign(Tegra::Shader::TextureType::Texture3D); | 51 | result.texture_type.Assign(TextureType::Texture3D); |
| 51 | result.is_array.Assign(0); | ||
| 52 | result.is_buffer.Assign(0); | ||
| 53 | result.is_shadow.Assign(0); | ||
| 54 | return result; | 52 | return result; |
| 55 | case Tegra::Texture::TextureType::TextureCubemap: | 53 | case Tegra::Texture::TextureType::TextureCubemap: |
| 56 | result.texture_type.Assign(Tegra::Shader::TextureType::TextureCube); | 54 | result.texture_type.Assign(TextureType::TextureCube); |
| 57 | result.is_array.Assign(0); | ||
| 58 | result.is_buffer.Assign(0); | ||
| 59 | result.is_shadow.Assign(0); | ||
| 60 | return result; | 55 | return result; |
| 61 | case Tegra::Texture::TextureType::Texture1DArray: | 56 | case Tegra::Texture::TextureType::Texture1DArray: |
| 62 | result.texture_type.Assign(Tegra::Shader::TextureType::Texture1D); | 57 | result.texture_type.Assign(TextureType::Texture1D); |
| 63 | result.is_array.Assign(1); | 58 | result.is_array.Assign(1); |
| 64 | result.is_buffer.Assign(0); | ||
| 65 | result.is_shadow.Assign(0); | ||
| 66 | return result; | 59 | return result; |
| 67 | case Tegra::Texture::TextureType::Texture2DArray: | 60 | case Tegra::Texture::TextureType::Texture2DArray: |
| 68 | result.texture_type.Assign(Tegra::Shader::TextureType::Texture2D); | 61 | result.texture_type.Assign(TextureType::Texture2D); |
| 69 | result.is_array.Assign(1); | 62 | result.is_array.Assign(1); |
| 70 | result.is_buffer.Assign(0); | ||
| 71 | result.is_shadow.Assign(0); | ||
| 72 | return result; | 63 | return result; |
| 73 | case Tegra::Texture::TextureType::Texture1DBuffer: | 64 | case Tegra::Texture::TextureType::Texture1DBuffer: |
| 74 | result.texture_type.Assign(Tegra::Shader::TextureType::Texture1D); | 65 | result.texture_type.Assign(TextureType::Texture1D); |
| 75 | result.is_array.Assign(0); | ||
| 76 | result.is_buffer.Assign(1); | 66 | result.is_buffer.Assign(1); |
| 77 | result.is_shadow.Assign(0); | ||
| 78 | return result; | 67 | return result; |
| 79 | case Tegra::Texture::TextureType::Texture2DNoMipmap: | 68 | case Tegra::Texture::TextureType::Texture2DNoMipmap: |
| 80 | result.texture_type.Assign(Tegra::Shader::TextureType::Texture2D); | 69 | result.texture_type.Assign(TextureType::Texture2D); |
| 81 | result.is_array.Assign(0); | ||
| 82 | result.is_buffer.Assign(0); | ||
| 83 | result.is_shadow.Assign(0); | ||
| 84 | return result; | 70 | return result; |
| 85 | case Tegra::Texture::TextureType::TextureCubeArray: | 71 | case Tegra::Texture::TextureType::TextureCubeArray: |
| 86 | result.texture_type.Assign(Tegra::Shader::TextureType::TextureCube); | 72 | result.texture_type.Assign(TextureType::TextureCube); |
| 87 | result.is_array.Assign(1); | 73 | result.is_array.Assign(1); |
| 88 | result.is_buffer.Assign(0); | ||
| 89 | result.is_shadow.Assign(0); | ||
| 90 | return result; | 74 | return result; |
| 91 | default: | 75 | default: |
| 92 | result.texture_type.Assign(Tegra::Shader::TextureType::Texture2D); | 76 | result.texture_type.Assign(TextureType::Texture2D); |
| 93 | result.is_array.Assign(0); | ||
| 94 | result.is_buffer.Assign(0); | ||
| 95 | result.is_shadow.Assign(0); | ||
| 96 | return result; | 77 | return result; |
| 97 | } | 78 | } |
| 98 | } | 79 | } |
diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp index 4b824aa4e..368c75a66 100644 --- a/src/video_core/engines/kepler_compute.cpp +++ b/src/video_core/engines/kepler_compute.cpp | |||
| @@ -39,7 +39,7 @@ void KeplerCompute::CallMethod(const GPU::MethodCall& method_call) { | |||
| 39 | const bool is_last_call = method_call.IsLastCall(); | 39 | const bool is_last_call = method_call.IsLastCall(); |
| 40 | upload_state.ProcessData(method_call.argument, is_last_call); | 40 | upload_state.ProcessData(method_call.argument, is_last_call); |
| 41 | if (is_last_call) { | 41 | if (is_last_call) { |
| 42 | system.GPU().Maxwell3D().dirty.OnMemoryWrite(); | 42 | system.GPU().Maxwell3D().OnMemoryWrite(); |
| 43 | } | 43 | } |
| 44 | break; | 44 | break; |
| 45 | } | 45 | } |
| @@ -89,7 +89,7 @@ SamplerDescriptor KeplerCompute::AccessBindlessSampler(ShaderType stage, u64 con | |||
| 89 | 89 | ||
| 90 | const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)}; | 90 | const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)}; |
| 91 | const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle); | 91 | const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle); |
| 92 | SamplerDescriptor result = SamplerDescriptor::FromTicTexture(tex_info.tic.texture_type.Value()); | 92 | SamplerDescriptor result = SamplerDescriptor::FromTIC(tex_info.tic); |
| 93 | result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value()); | 93 | result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value()); |
| 94 | return result; | 94 | return result; |
| 95 | } | 95 | } |
| @@ -119,14 +119,6 @@ Texture::TICEntry KeplerCompute::GetTICEntry(u32 tic_index) const { | |||
| 119 | Texture::TICEntry tic_entry; | 119 | Texture::TICEntry tic_entry; |
| 120 | memory_manager.ReadBlockUnsafe(tic_address_gpu, &tic_entry, sizeof(Texture::TICEntry)); | 120 | memory_manager.ReadBlockUnsafe(tic_address_gpu, &tic_entry, sizeof(Texture::TICEntry)); |
| 121 | 121 | ||
| 122 | const auto r_type{tic_entry.r_type.Value()}; | ||
| 123 | const auto g_type{tic_entry.g_type.Value()}; | ||
| 124 | const auto b_type{tic_entry.b_type.Value()}; | ||
| 125 | const auto a_type{tic_entry.a_type.Value()}; | ||
| 126 | |||
| 127 | // TODO(Subv): Different data types for separate components are not supported | ||
| 128 | DEBUG_ASSERT(r_type == g_type && r_type == b_type && r_type == a_type); | ||
| 129 | |||
| 130 | return tic_entry; | 122 | return tic_entry; |
| 131 | } | 123 | } |
| 132 | 124 | ||
diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp index fa4a7c5c1..597872e43 100644 --- a/src/video_core/engines/kepler_memory.cpp +++ b/src/video_core/engines/kepler_memory.cpp | |||
| @@ -34,7 +34,7 @@ void KeplerMemory::CallMethod(const GPU::MethodCall& method_call) { | |||
| 34 | const bool is_last_call = method_call.IsLastCall(); | 34 | const bool is_last_call = method_call.IsLastCall(); |
| 35 | upload_state.ProcessData(method_call.argument, is_last_call); | 35 | upload_state.ProcessData(method_call.argument, is_last_call); |
| 36 | if (is_last_call) { | 36 | if (is_last_call) { |
| 37 | system.GPU().Maxwell3D().dirty.OnMemoryWrite(); | 37 | system.GPU().Maxwell3D().OnMemoryWrite(); |
| 38 | } | 38 | } |
| 39 | break; | 39 | break; |
| 40 | } | 40 | } |
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index b28de1092..ba63b44b4 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp | |||
| @@ -26,7 +26,8 @@ Maxwell3D::Maxwell3D(Core::System& system, VideoCore::RasterizerInterface& raste | |||
| 26 | MemoryManager& memory_manager) | 26 | MemoryManager& memory_manager) |
| 27 | : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager}, | 27 | : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager}, |
| 28 | macro_interpreter{*this}, upload_state{memory_manager, regs.upload} { | 28 | macro_interpreter{*this}, upload_state{memory_manager, regs.upload} { |
| 29 | InitDirtySettings(); | 29 | dirty.flags.flip(); |
| 30 | |||
| 30 | InitializeRegisterDefaults(); | 31 | InitializeRegisterDefaults(); |
| 31 | } | 32 | } |
| 32 | 33 | ||
| @@ -75,8 +76,8 @@ void Maxwell3D::InitializeRegisterDefaults() { | |||
| 75 | regs.stencil_back_mask = 0xFFFFFFFF; | 76 | regs.stencil_back_mask = 0xFFFFFFFF; |
| 76 | 77 | ||
| 77 | regs.depth_test_func = Regs::ComparisonOp::Always; | 78 | regs.depth_test_func = Regs::ComparisonOp::Always; |
| 78 | regs.cull.front_face = Regs::Cull::FrontFace::CounterClockWise; | 79 | regs.front_face = Regs::FrontFace::CounterClockWise; |
| 79 | regs.cull.cull_face = Regs::Cull::CullFace::Back; | 80 | regs.cull_face = Regs::CullFace::Back; |
| 80 | 81 | ||
| 81 | // TODO(Rodrigo): Most games do not set a point size. I think this is a case of a | 82 | // TODO(Rodrigo): Most games do not set a point size. I think this is a case of a |
| 82 | // register carrying a default value. Assume it's OpenGL's default (1). | 83 | // register carrying a default value. Assume it's OpenGL's default (1). |
| @@ -95,7 +96,9 @@ void Maxwell3D::InitializeRegisterDefaults() { | |||
| 95 | regs.rasterize_enable = 1; | 96 | regs.rasterize_enable = 1; |
| 96 | regs.rt_separate_frag_data = 1; | 97 | regs.rt_separate_frag_data = 1; |
| 97 | regs.framebuffer_srgb = 1; | 98 | regs.framebuffer_srgb = 1; |
| 98 | regs.cull.front_face = Maxwell3D::Regs::Cull::FrontFace::ClockWise; | 99 | regs.front_face = Maxwell3D::Regs::FrontFace::ClockWise; |
| 100 | |||
| 101 | shadow_state = regs; | ||
| 99 | 102 | ||
| 100 | mme_inline[MAXWELL3D_REG_INDEX(draw.vertex_end_gl)] = true; | 103 | mme_inline[MAXWELL3D_REG_INDEX(draw.vertex_end_gl)] = true; |
| 101 | mme_inline[MAXWELL3D_REG_INDEX(draw.vertex_begin_gl)] = true; | 104 | mme_inline[MAXWELL3D_REG_INDEX(draw.vertex_begin_gl)] = true; |
| @@ -103,164 +106,6 @@ void Maxwell3D::InitializeRegisterDefaults() { | |||
| 103 | mme_inline[MAXWELL3D_REG_INDEX(index_array.count)] = true; | 106 | mme_inline[MAXWELL3D_REG_INDEX(index_array.count)] = true; |
| 104 | } | 107 | } |
| 105 | 108 | ||
| 106 | #define DIRTY_REGS_POS(field_name) static_cast<u8>(offsetof(Maxwell3D::DirtyRegs, field_name)) | ||
| 107 | |||
| 108 | void Maxwell3D::InitDirtySettings() { | ||
| 109 | const auto set_block = [this](std::size_t start, std::size_t range, u8 position) { | ||
| 110 | const auto start_itr = dirty_pointers.begin() + start; | ||
| 111 | const auto end_itr = start_itr + range; | ||
| 112 | std::fill(start_itr, end_itr, position); | ||
| 113 | }; | ||
| 114 | dirty.regs.fill(true); | ||
| 115 | |||
| 116 | // Init Render Targets | ||
| 117 | constexpr u32 registers_per_rt = sizeof(regs.rt[0]) / sizeof(u32); | ||
| 118 | constexpr u32 rt_start_reg = MAXWELL3D_REG_INDEX(rt); | ||
| 119 | constexpr u32 rt_end_reg = rt_start_reg + registers_per_rt * 8; | ||
| 120 | u8 rt_dirty_reg = DIRTY_REGS_POS(render_target); | ||
| 121 | for (u32 rt_reg = rt_start_reg; rt_reg < rt_end_reg; rt_reg += registers_per_rt) { | ||
| 122 | set_block(rt_reg, registers_per_rt, rt_dirty_reg); | ||
| 123 | ++rt_dirty_reg; | ||
| 124 | } | ||
| 125 | constexpr u32 depth_buffer_flag = DIRTY_REGS_POS(depth_buffer); | ||
| 126 | dirty_pointers[MAXWELL3D_REG_INDEX(zeta_enable)] = depth_buffer_flag; | ||
| 127 | dirty_pointers[MAXWELL3D_REG_INDEX(zeta_width)] = depth_buffer_flag; | ||
| 128 | dirty_pointers[MAXWELL3D_REG_INDEX(zeta_height)] = depth_buffer_flag; | ||
| 129 | constexpr u32 registers_in_zeta = sizeof(regs.zeta) / sizeof(u32); | ||
| 130 | constexpr u32 zeta_reg = MAXWELL3D_REG_INDEX(zeta); | ||
| 131 | set_block(zeta_reg, registers_in_zeta, depth_buffer_flag); | ||
| 132 | |||
| 133 | // Init Vertex Arrays | ||
| 134 | constexpr u32 vertex_array_start = MAXWELL3D_REG_INDEX(vertex_array); | ||
| 135 | constexpr u32 vertex_array_size = sizeof(regs.vertex_array[0]) / sizeof(u32); | ||
| 136 | constexpr u32 vertex_array_end = vertex_array_start + vertex_array_size * Regs::NumVertexArrays; | ||
| 137 | u8 va_dirty_reg = DIRTY_REGS_POS(vertex_array); | ||
| 138 | u8 vi_dirty_reg = DIRTY_REGS_POS(vertex_instance); | ||
| 139 | for (u32 vertex_reg = vertex_array_start; vertex_reg < vertex_array_end; | ||
| 140 | vertex_reg += vertex_array_size) { | ||
| 141 | set_block(vertex_reg, 3, va_dirty_reg); | ||
| 142 | // The divisor concerns vertex array instances | ||
| 143 | dirty_pointers[static_cast<std::size_t>(vertex_reg) + 3] = vi_dirty_reg; | ||
| 144 | ++va_dirty_reg; | ||
| 145 | ++vi_dirty_reg; | ||
| 146 | } | ||
| 147 | constexpr u32 vertex_limit_start = MAXWELL3D_REG_INDEX(vertex_array_limit); | ||
| 148 | constexpr u32 vertex_limit_size = sizeof(regs.vertex_array_limit[0]) / sizeof(u32); | ||
| 149 | constexpr u32 vertex_limit_end = vertex_limit_start + vertex_limit_size * Regs::NumVertexArrays; | ||
| 150 | va_dirty_reg = DIRTY_REGS_POS(vertex_array); | ||
| 151 | for (u32 vertex_reg = vertex_limit_start; vertex_reg < vertex_limit_end; | ||
| 152 | vertex_reg += vertex_limit_size) { | ||
| 153 | set_block(vertex_reg, vertex_limit_size, va_dirty_reg); | ||
| 154 | va_dirty_reg++; | ||
| 155 | } | ||
| 156 | constexpr u32 vertex_instance_start = MAXWELL3D_REG_INDEX(instanced_arrays); | ||
| 157 | constexpr u32 vertex_instance_size = | ||
| 158 | sizeof(regs.instanced_arrays.is_instanced[0]) / sizeof(u32); | ||
| 159 | constexpr u32 vertex_instance_end = | ||
| 160 | vertex_instance_start + vertex_instance_size * Regs::NumVertexArrays; | ||
| 161 | vi_dirty_reg = DIRTY_REGS_POS(vertex_instance); | ||
| 162 | for (u32 vertex_reg = vertex_instance_start; vertex_reg < vertex_instance_end; | ||
| 163 | vertex_reg += vertex_instance_size) { | ||
| 164 | set_block(vertex_reg, vertex_instance_size, vi_dirty_reg); | ||
| 165 | vi_dirty_reg++; | ||
| 166 | } | ||
| 167 | set_block(MAXWELL3D_REG_INDEX(vertex_attrib_format), regs.vertex_attrib_format.size(), | ||
| 168 | DIRTY_REGS_POS(vertex_attrib_format)); | ||
| 169 | |||
| 170 | // Init Shaders | ||
| 171 | constexpr u32 shader_registers_count = | ||
| 172 | sizeof(regs.shader_config[0]) * Regs::MaxShaderProgram / sizeof(u32); | ||
| 173 | set_block(MAXWELL3D_REG_INDEX(shader_config[0]), shader_registers_count, | ||
| 174 | DIRTY_REGS_POS(shaders)); | ||
| 175 | |||
| 176 | // State | ||
| 177 | |||
| 178 | // Viewport | ||
| 179 | constexpr u8 viewport_dirty_reg = DIRTY_REGS_POS(viewport); | ||
| 180 | constexpr u32 viewport_start = MAXWELL3D_REG_INDEX(viewports); | ||
| 181 | constexpr u32 viewport_size = sizeof(regs.viewports) / sizeof(u32); | ||
| 182 | set_block(viewport_start, viewport_size, viewport_dirty_reg); | ||
| 183 | constexpr u32 view_volume_start = MAXWELL3D_REG_INDEX(view_volume_clip_control); | ||
| 184 | constexpr u32 view_volume_size = sizeof(regs.view_volume_clip_control) / sizeof(u32); | ||
| 185 | set_block(view_volume_start, view_volume_size, viewport_dirty_reg); | ||
| 186 | |||
| 187 | // Viewport transformation | ||
| 188 | constexpr u32 viewport_trans_start = MAXWELL3D_REG_INDEX(viewport_transform); | ||
| 189 | constexpr u32 viewport_trans_size = sizeof(regs.viewport_transform) / sizeof(u32); | ||
| 190 | set_block(viewport_trans_start, viewport_trans_size, DIRTY_REGS_POS(viewport_transform)); | ||
| 191 | |||
| 192 | // Cullmode | ||
| 193 | constexpr u32 cull_mode_start = MAXWELL3D_REG_INDEX(cull); | ||
| 194 | constexpr u32 cull_mode_size = sizeof(regs.cull) / sizeof(u32); | ||
| 195 | set_block(cull_mode_start, cull_mode_size, DIRTY_REGS_POS(cull_mode)); | ||
| 196 | |||
| 197 | // Screen y control | ||
| 198 | dirty_pointers[MAXWELL3D_REG_INDEX(screen_y_control)] = DIRTY_REGS_POS(screen_y_control); | ||
| 199 | |||
| 200 | // Primitive Restart | ||
| 201 | constexpr u32 primitive_restart_start = MAXWELL3D_REG_INDEX(primitive_restart); | ||
| 202 | constexpr u32 primitive_restart_size = sizeof(regs.primitive_restart) / sizeof(u32); | ||
| 203 | set_block(primitive_restart_start, primitive_restart_size, DIRTY_REGS_POS(primitive_restart)); | ||
| 204 | |||
| 205 | // Depth Test | ||
| 206 | constexpr u8 depth_test_dirty_reg = DIRTY_REGS_POS(depth_test); | ||
| 207 | dirty_pointers[MAXWELL3D_REG_INDEX(depth_test_enable)] = depth_test_dirty_reg; | ||
| 208 | dirty_pointers[MAXWELL3D_REG_INDEX(depth_write_enabled)] = depth_test_dirty_reg; | ||
| 209 | dirty_pointers[MAXWELL3D_REG_INDEX(depth_test_func)] = depth_test_dirty_reg; | ||
| 210 | |||
| 211 | // Stencil Test | ||
| 212 | constexpr u32 stencil_test_dirty_reg = DIRTY_REGS_POS(stencil_test); | ||
| 213 | dirty_pointers[MAXWELL3D_REG_INDEX(stencil_enable)] = stencil_test_dirty_reg; | ||
| 214 | dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_func_func)] = stencil_test_dirty_reg; | ||
| 215 | dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_func_ref)] = stencil_test_dirty_reg; | ||
| 216 | dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_func_mask)] = stencil_test_dirty_reg; | ||
| 217 | dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_op_fail)] = stencil_test_dirty_reg; | ||
| 218 | dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_op_zfail)] = stencil_test_dirty_reg; | ||
| 219 | dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_op_zpass)] = stencil_test_dirty_reg; | ||
| 220 | dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_mask)] = stencil_test_dirty_reg; | ||
| 221 | dirty_pointers[MAXWELL3D_REG_INDEX(stencil_two_side_enable)] = stencil_test_dirty_reg; | ||
| 222 | dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_func_func)] = stencil_test_dirty_reg; | ||
| 223 | dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_func_ref)] = stencil_test_dirty_reg; | ||
| 224 | dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_func_mask)] = stencil_test_dirty_reg; | ||
| 225 | dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_op_fail)] = stencil_test_dirty_reg; | ||
| 226 | dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_op_zfail)] = stencil_test_dirty_reg; | ||
| 227 | dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_op_zpass)] = stencil_test_dirty_reg; | ||
| 228 | dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_mask)] = stencil_test_dirty_reg; | ||
| 229 | |||
| 230 | // Color Mask | ||
| 231 | constexpr u8 color_mask_dirty_reg = DIRTY_REGS_POS(color_mask); | ||
| 232 | dirty_pointers[MAXWELL3D_REG_INDEX(color_mask_common)] = color_mask_dirty_reg; | ||
| 233 | set_block(MAXWELL3D_REG_INDEX(color_mask), sizeof(regs.color_mask) / sizeof(u32), | ||
| 234 | color_mask_dirty_reg); | ||
| 235 | // Blend State | ||
| 236 | constexpr u8 blend_state_dirty_reg = DIRTY_REGS_POS(blend_state); | ||
| 237 | set_block(MAXWELL3D_REG_INDEX(blend_color), sizeof(regs.blend_color) / sizeof(u32), | ||
| 238 | blend_state_dirty_reg); | ||
| 239 | dirty_pointers[MAXWELL3D_REG_INDEX(independent_blend_enable)] = blend_state_dirty_reg; | ||
| 240 | set_block(MAXWELL3D_REG_INDEX(blend), sizeof(regs.blend) / sizeof(u32), blend_state_dirty_reg); | ||
| 241 | set_block(MAXWELL3D_REG_INDEX(independent_blend), sizeof(regs.independent_blend) / sizeof(u32), | ||
| 242 | blend_state_dirty_reg); | ||
| 243 | |||
| 244 | // Scissor State | ||
| 245 | constexpr u8 scissor_test_dirty_reg = DIRTY_REGS_POS(scissor_test); | ||
| 246 | set_block(MAXWELL3D_REG_INDEX(scissor_test), sizeof(regs.scissor_test) / sizeof(u32), | ||
| 247 | scissor_test_dirty_reg); | ||
| 248 | |||
| 249 | // Polygon Offset | ||
| 250 | constexpr u8 polygon_offset_dirty_reg = DIRTY_REGS_POS(polygon_offset); | ||
| 251 | dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_fill_enable)] = polygon_offset_dirty_reg; | ||
| 252 | dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_line_enable)] = polygon_offset_dirty_reg; | ||
| 253 | dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_point_enable)] = polygon_offset_dirty_reg; | ||
| 254 | dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_units)] = polygon_offset_dirty_reg; | ||
| 255 | dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_factor)] = polygon_offset_dirty_reg; | ||
| 256 | dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_clamp)] = polygon_offset_dirty_reg; | ||
| 257 | |||
| 258 | // Depth bounds | ||
| 259 | constexpr u8 depth_bounds_values_dirty_reg = DIRTY_REGS_POS(depth_bounds_values); | ||
| 260 | dirty_pointers[MAXWELL3D_REG_INDEX(depth_bounds[0])] = depth_bounds_values_dirty_reg; | ||
| 261 | dirty_pointers[MAXWELL3D_REG_INDEX(depth_bounds[1])] = depth_bounds_values_dirty_reg; | ||
| 262 | } | ||
| 263 | |||
| 264 | void Maxwell3D::CallMacroMethod(u32 method, std::size_t num_parameters, const u32* parameters) { | 109 | void Maxwell3D::CallMacroMethod(u32 method, std::size_t num_parameters, const u32* parameters) { |
| 265 | // Reset the current macro. | 110 | // Reset the current macro. |
| 266 | executing_macro = 0; | 111 | executing_macro = 0; |
| @@ -317,31 +162,34 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) { | |||
| 317 | ASSERT_MSG(method < Regs::NUM_REGS, | 162 | ASSERT_MSG(method < Regs::NUM_REGS, |
| 318 | "Invalid Maxwell3D register, increase the size of the Regs structure"); | 163 | "Invalid Maxwell3D register, increase the size of the Regs structure"); |
| 319 | 164 | ||
| 320 | if (regs.reg_array[method] != method_call.argument) { | 165 | u32 arg = method_call.argument; |
| 321 | regs.reg_array[method] = method_call.argument; | 166 | // Keep track of the register value in shadow_state when requested. |
| 322 | const std::size_t dirty_reg = dirty_pointers[method]; | 167 | if (shadow_state.shadow_ram_control == Regs::ShadowRamControl::Track || |
| 323 | if (dirty_reg) { | 168 | shadow_state.shadow_ram_control == Regs::ShadowRamControl::TrackWithFilter) { |
| 324 | dirty.regs[dirty_reg] = true; | 169 | shadow_state.reg_array[method] = arg; |
| 325 | if (dirty_reg >= DIRTY_REGS_POS(vertex_array) && | 170 | } else if (shadow_state.shadow_ram_control == Regs::ShadowRamControl::Replay) { |
| 326 | dirty_reg < DIRTY_REGS_POS(vertex_array_buffers)) { | 171 | arg = shadow_state.reg_array[method]; |
| 327 | dirty.vertex_array_buffers = true; | 172 | } |
| 328 | } else if (dirty_reg >= DIRTY_REGS_POS(vertex_instance) && | 173 | |
| 329 | dirty_reg < DIRTY_REGS_POS(vertex_instances)) { | 174 | if (regs.reg_array[method] != arg) { |
| 330 | dirty.vertex_instances = true; | 175 | regs.reg_array[method] = arg; |
| 331 | } else if (dirty_reg >= DIRTY_REGS_POS(render_target) && | 176 | |
| 332 | dirty_reg < DIRTY_REGS_POS(render_settings)) { | 177 | for (const auto& table : dirty.tables) { |
| 333 | dirty.render_settings = true; | 178 | dirty.flags[table[method]] = true; |
| 334 | } | ||
| 335 | } | 179 | } |
| 336 | } | 180 | } |
| 337 | 181 | ||
| 338 | switch (method) { | 182 | switch (method) { |
| 183 | case MAXWELL3D_REG_INDEX(shadow_ram_control): { | ||
| 184 | shadow_state.shadow_ram_control = static_cast<Regs::ShadowRamControl>(method_call.argument); | ||
| 185 | break; | ||
| 186 | } | ||
| 339 | case MAXWELL3D_REG_INDEX(macros.data): { | 187 | case MAXWELL3D_REG_INDEX(macros.data): { |
| 340 | ProcessMacroUpload(method_call.argument); | 188 | ProcessMacroUpload(arg); |
| 341 | break; | 189 | break; |
| 342 | } | 190 | } |
| 343 | case MAXWELL3D_REG_INDEX(macros.bind): { | 191 | case MAXWELL3D_REG_INDEX(macros.bind): { |
| 344 | ProcessMacroBind(method_call.argument); | 192 | ProcessMacroBind(arg); |
| 345 | break; | 193 | break; |
| 346 | } | 194 | } |
| 347 | case MAXWELL3D_REG_INDEX(firmware[4]): { | 195 | case MAXWELL3D_REG_INDEX(firmware[4]): { |
| @@ -417,9 +265,9 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) { | |||
| 417 | } | 265 | } |
| 418 | case MAXWELL3D_REG_INDEX(data_upload): { | 266 | case MAXWELL3D_REG_INDEX(data_upload): { |
| 419 | const bool is_last_call = method_call.IsLastCall(); | 267 | const bool is_last_call = method_call.IsLastCall(); |
| 420 | upload_state.ProcessData(method_call.argument, is_last_call); | 268 | upload_state.ProcessData(arg, is_last_call); |
| 421 | if (is_last_call) { | 269 | if (is_last_call) { |
| 422 | dirty.OnMemoryWrite(); | 270 | OnMemoryWrite(); |
| 423 | } | 271 | } |
| 424 | break; | 272 | break; |
| 425 | } | 273 | } |
| @@ -727,7 +575,7 @@ void Maxwell3D::FinishCBData() { | |||
| 727 | 575 | ||
| 728 | const u32 id = cb_data_state.id; | 576 | const u32 id = cb_data_state.id; |
| 729 | memory_manager.WriteBlock(address, cb_data_state.buffer[id].data(), size); | 577 | memory_manager.WriteBlock(address, cb_data_state.buffer[id].data(), size); |
| 730 | dirty.OnMemoryWrite(); | 578 | OnMemoryWrite(); |
| 731 | 579 | ||
| 732 | cb_data_state.id = null_cb_data; | 580 | cb_data_state.id = null_cb_data; |
| 733 | cb_data_state.current = null_cb_data; | 581 | cb_data_state.current = null_cb_data; |
| @@ -805,7 +653,7 @@ SamplerDescriptor Maxwell3D::AccessBindlessSampler(ShaderType stage, u64 const_b | |||
| 805 | 653 | ||
| 806 | const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)}; | 654 | const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)}; |
| 807 | const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle); | 655 | const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle); |
| 808 | SamplerDescriptor result = SamplerDescriptor::FromTicTexture(tex_info.tic.texture_type.Value()); | 656 | SamplerDescriptor result = SamplerDescriptor::FromTIC(tex_info.tic); |
| 809 | result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value()); | 657 | result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value()); |
| 810 | return result; | 658 | return result; |
| 811 | } | 659 | } |
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 26939be3f..d24c9f657 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h | |||
| @@ -6,6 +6,7 @@ | |||
| 6 | 6 | ||
| 7 | #include <array> | 7 | #include <array> |
| 8 | #include <bitset> | 8 | #include <bitset> |
| 9 | #include <limits> | ||
| 9 | #include <optional> | 10 | #include <optional> |
| 10 | #include <type_traits> | 11 | #include <type_traits> |
| 11 | #include <unordered_map> | 12 | #include <unordered_map> |
| @@ -66,6 +67,7 @@ public: | |||
| 66 | static constexpr std::size_t NumVaryings = 31; | 67 | static constexpr std::size_t NumVaryings = 31; |
| 67 | static constexpr std::size_t NumImages = 8; // TODO(Rodrigo): Investigate this number | 68 | static constexpr std::size_t NumImages = 8; // TODO(Rodrigo): Investigate this number |
| 68 | static constexpr std::size_t NumClipDistances = 8; | 69 | static constexpr std::size_t NumClipDistances = 8; |
| 70 | static constexpr std::size_t NumTransformFeedbackBuffers = 4; | ||
| 69 | static constexpr std::size_t MaxShaderProgram = 6; | 71 | static constexpr std::size_t MaxShaderProgram = 6; |
| 70 | static constexpr std::size_t MaxShaderStage = 5; | 72 | static constexpr std::size_t MaxShaderStage = 5; |
| 71 | // Maximum number of const buffers per shader stage. | 73 | // Maximum number of const buffers per shader stage. |
| @@ -431,21 +433,15 @@ public: | |||
| 431 | GeneratedPrimitives = 0x1F, | 433 | GeneratedPrimitives = 0x1F, |
| 432 | }; | 434 | }; |
| 433 | 435 | ||
| 434 | struct Cull { | 436 | enum class FrontFace : u32 { |
| 435 | enum class FrontFace : u32 { | 437 | ClockWise = 0x0900, |
| 436 | ClockWise = 0x0900, | 438 | CounterClockWise = 0x0901, |
| 437 | CounterClockWise = 0x0901, | 439 | }; |
| 438 | }; | ||
| 439 | |||
| 440 | enum class CullFace : u32 { | ||
| 441 | Front = 0x0404, | ||
| 442 | Back = 0x0405, | ||
| 443 | FrontAndBack = 0x0408, | ||
| 444 | }; | ||
| 445 | 440 | ||
| 446 | u32 enabled; | 441 | enum class CullFace : u32 { |
| 447 | FrontFace front_face; | 442 | Front = 0x0404, |
| 448 | CullFace cull_face; | 443 | Back = 0x0405, |
| 444 | FrontAndBack = 0x0408, | ||
| 449 | }; | 445 | }; |
| 450 | 446 | ||
| 451 | struct Blend { | 447 | struct Blend { |
| @@ -529,6 +525,23 @@ public: | |||
| 529 | FractionalEven = 2, | 525 | FractionalEven = 2, |
| 530 | }; | 526 | }; |
| 531 | 527 | ||
| 528 | enum class PolygonMode : u32 { | ||
| 529 | Point = 0x1b00, | ||
| 530 | Line = 0x1b01, | ||
| 531 | Fill = 0x1b02, | ||
| 532 | }; | ||
| 533 | |||
| 534 | enum class ShadowRamControl : u32 { | ||
| 535 | // write value to shadow ram | ||
| 536 | Track = 0, | ||
| 537 | // write value to shadow ram ( with validation ??? ) | ||
| 538 | TrackWithFilter = 1, | ||
| 539 | // only write to real hw register | ||
| 540 | Passthrough = 2, | ||
| 541 | // write value from shadow ram to real hw register | ||
| 542 | Replay = 3, | ||
| 543 | }; | ||
| 544 | |||
| 532 | struct RenderTargetConfig { | 545 | struct RenderTargetConfig { |
| 533 | u32 address_high; | 546 | u32 address_high; |
| 534 | u32 address_low; | 547 | u32 address_low; |
| @@ -542,7 +555,7 @@ public: | |||
| 542 | BitField<12, 1, InvMemoryLayout> type; | 555 | BitField<12, 1, InvMemoryLayout> type; |
| 543 | } memory_layout; | 556 | } memory_layout; |
| 544 | union { | 557 | union { |
| 545 | BitField<0, 16, u32> array_mode; | 558 | BitField<0, 16, u32> layers; |
| 546 | BitField<16, 1, u32> volume; | 559 | BitField<16, 1, u32> volume; |
| 547 | }; | 560 | }; |
| 548 | u32 layer_stride; | 561 | u32 layer_stride; |
| @@ -574,7 +587,7 @@ public: | |||
| 574 | f32 translate_z; | 587 | f32 translate_z; |
| 575 | INSERT_UNION_PADDING_WORDS(2); | 588 | INSERT_UNION_PADDING_WORDS(2); |
| 576 | 589 | ||
| 577 | Common::Rectangle<s32> GetRect() const { | 590 | Common::Rectangle<f32> GetRect() const { |
| 578 | return { | 591 | return { |
| 579 | GetX(), // left | 592 | GetX(), // left |
| 580 | GetY() + GetHeight(), // top | 593 | GetY() + GetHeight(), // top |
| @@ -583,20 +596,20 @@ public: | |||
| 583 | }; | 596 | }; |
| 584 | }; | 597 | }; |
| 585 | 598 | ||
| 586 | s32 GetX() const { | 599 | f32 GetX() const { |
| 587 | return static_cast<s32>(std::max(0.0f, translate_x - std::fabs(scale_x))); | 600 | return std::max(0.0f, translate_x - std::fabs(scale_x)); |
| 588 | } | 601 | } |
| 589 | 602 | ||
| 590 | s32 GetY() const { | 603 | f32 GetY() const { |
| 591 | return static_cast<s32>(std::max(0.0f, translate_y - std::fabs(scale_y))); | 604 | return std::max(0.0f, translate_y - std::fabs(scale_y)); |
| 592 | } | 605 | } |
| 593 | 606 | ||
| 594 | s32 GetWidth() const { | 607 | f32 GetWidth() const { |
| 595 | return static_cast<s32>(translate_x + std::fabs(scale_x)) - GetX(); | 608 | return translate_x + std::fabs(scale_x) - GetX(); |
| 596 | } | 609 | } |
| 597 | 610 | ||
| 598 | s32 GetHeight() const { | 611 | f32 GetHeight() const { |
| 599 | return static_cast<s32>(translate_y + std::fabs(scale_y)) - GetY(); | 612 | return translate_y + std::fabs(scale_y) - GetY(); |
| 600 | } | 613 | } |
| 601 | }; | 614 | }; |
| 602 | 615 | ||
| @@ -626,6 +639,29 @@ public: | |||
| 626 | float depth_range_far; | 639 | float depth_range_far; |
| 627 | }; | 640 | }; |
| 628 | 641 | ||
| 642 | struct TransformFeedbackBinding { | ||
| 643 | u32 buffer_enable; | ||
| 644 | u32 address_high; | ||
| 645 | u32 address_low; | ||
| 646 | s32 buffer_size; | ||
| 647 | s32 buffer_offset; | ||
| 648 | INSERT_UNION_PADDING_WORDS(3); | ||
| 649 | |||
| 650 | GPUVAddr Address() const { | ||
| 651 | return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | | ||
| 652 | address_low); | ||
| 653 | } | ||
| 654 | }; | ||
| 655 | static_assert(sizeof(TransformFeedbackBinding) == 32); | ||
| 656 | |||
| 657 | struct TransformFeedbackLayout { | ||
| 658 | u32 stream; | ||
| 659 | u32 varying_count; | ||
| 660 | u32 stride; | ||
| 661 | INSERT_UNION_PADDING_WORDS(1); | ||
| 662 | }; | ||
| 663 | static_assert(sizeof(TransformFeedbackLayout) == 16); | ||
| 664 | |||
| 629 | bool IsShaderConfigEnabled(std::size_t index) const { | 665 | bool IsShaderConfigEnabled(std::size_t index) const { |
| 630 | // The VertexB is always enabled. | 666 | // The VertexB is always enabled. |
| 631 | if (index == static_cast<std::size_t>(Regs::ShaderProgram::VertexB)) { | 667 | if (index == static_cast<std::size_t>(Regs::ShaderProgram::VertexB)) { |
| @@ -634,6 +670,10 @@ public: | |||
| 634 | return shader_config[index].enable != 0; | 670 | return shader_config[index].enable != 0; |
| 635 | } | 671 | } |
| 636 | 672 | ||
| 673 | bool IsShaderConfigEnabled(Regs::ShaderProgram type) const { | ||
| 674 | return IsShaderConfigEnabled(static_cast<std::size_t>(type)); | ||
| 675 | } | ||
| 676 | |||
| 637 | union { | 677 | union { |
| 638 | struct { | 678 | struct { |
| 639 | INSERT_UNION_PADDING_WORDS(0x45); | 679 | INSERT_UNION_PADDING_WORDS(0x45); |
| @@ -645,7 +685,9 @@ public: | |||
| 645 | u32 bind; | 685 | u32 bind; |
| 646 | } macros; | 686 | } macros; |
| 647 | 687 | ||
| 648 | INSERT_UNION_PADDING_WORDS(0x17); | 688 | ShadowRamControl shadow_ram_control; |
| 689 | |||
| 690 | INSERT_UNION_PADDING_WORDS(0x16); | ||
| 649 | 691 | ||
| 650 | Upload::Registers upload; | 692 | Upload::Registers upload; |
| 651 | struct { | 693 | struct { |
| @@ -682,7 +724,13 @@ public: | |||
| 682 | 724 | ||
| 683 | u32 rasterize_enable; | 725 | u32 rasterize_enable; |
| 684 | 726 | ||
| 685 | INSERT_UNION_PADDING_WORDS(0xF1); | 727 | std::array<TransformFeedbackBinding, NumTransformFeedbackBuffers> tfb_bindings; |
| 728 | |||
| 729 | INSERT_UNION_PADDING_WORDS(0xC0); | ||
| 730 | |||
| 731 | std::array<TransformFeedbackLayout, NumTransformFeedbackBuffers> tfb_layouts; | ||
| 732 | |||
| 733 | INSERT_UNION_PADDING_WORDS(0x1); | ||
| 686 | 734 | ||
| 687 | u32 tfb_enabled; | 735 | u32 tfb_enabled; |
| 688 | 736 | ||
| @@ -710,7 +758,12 @@ public: | |||
| 710 | 758 | ||
| 711 | s32 clear_stencil; | 759 | s32 clear_stencil; |
| 712 | 760 | ||
| 713 | INSERT_UNION_PADDING_WORDS(0x7); | 761 | INSERT_UNION_PADDING_WORDS(0x2); |
| 762 | |||
| 763 | PolygonMode polygon_mode_front; | ||
| 764 | PolygonMode polygon_mode_back; | ||
| 765 | |||
| 766 | INSERT_UNION_PADDING_WORDS(0x3); | ||
| 714 | 767 | ||
| 715 | u32 polygon_offset_point_enable; | 768 | u32 polygon_offset_point_enable; |
| 716 | u32 polygon_offset_line_enable; | 769 | u32 polygon_offset_line_enable; |
| @@ -769,7 +822,11 @@ public: | |||
| 769 | BitField<12, 4, u32> viewport; | 822 | BitField<12, 4, u32> viewport; |
| 770 | } clear_flags; | 823 | } clear_flags; |
| 771 | 824 | ||
| 772 | INSERT_UNION_PADDING_WORDS(0x19); | 825 | INSERT_UNION_PADDING_WORDS(0x10); |
| 826 | |||
| 827 | u32 fill_rectangle; | ||
| 828 | |||
| 829 | INSERT_UNION_PADDING_WORDS(0x8); | ||
| 773 | 830 | ||
| 774 | std::array<VertexAttribute, NumVertexAttributes> vertex_attrib_format; | 831 | std::array<VertexAttribute, NumVertexAttributes> vertex_attrib_format; |
| 775 | 832 | ||
| @@ -800,8 +857,12 @@ public: | |||
| 800 | 857 | ||
| 801 | u32 zeta_width; | 858 | u32 zeta_width; |
| 802 | u32 zeta_height; | 859 | u32 zeta_height; |
| 860 | union { | ||
| 861 | BitField<0, 16, u32> zeta_layers; | ||
| 862 | BitField<16, 1, u32> zeta_volume; | ||
| 863 | }; | ||
| 803 | 864 | ||
| 804 | INSERT_UNION_PADDING_WORDS(0x27); | 865 | INSERT_UNION_PADDING_WORDS(0x26); |
| 805 | 866 | ||
| 806 | u32 depth_test_enable; | 867 | u32 depth_test_enable; |
| 807 | 868 | ||
| @@ -868,16 +929,7 @@ public: | |||
| 868 | 929 | ||
| 869 | INSERT_UNION_PADDING_WORDS(0x35); | 930 | INSERT_UNION_PADDING_WORDS(0x35); |
| 870 | 931 | ||
| 871 | union { | 932 | u32 clip_distance_enabled; |
| 872 | BitField<0, 1, u32> c0; | ||
| 873 | BitField<1, 1, u32> c1; | ||
| 874 | BitField<2, 1, u32> c2; | ||
| 875 | BitField<3, 1, u32> c3; | ||
| 876 | BitField<4, 1, u32> c4; | ||
| 877 | BitField<5, 1, u32> c5; | ||
| 878 | BitField<6, 1, u32> c6; | ||
| 879 | BitField<7, 1, u32> c7; | ||
| 880 | } clip_distance_enabled; | ||
| 881 | 933 | ||
| 882 | u32 samplecnt_enable; | 934 | u32 samplecnt_enable; |
| 883 | 935 | ||
| @@ -1056,7 +1108,9 @@ public: | |||
| 1056 | 1108 | ||
| 1057 | INSERT_UNION_PADDING_WORDS(1); | 1109 | INSERT_UNION_PADDING_WORDS(1); |
| 1058 | 1110 | ||
| 1059 | Cull cull; | 1111 | u32 cull_test_enabled; |
| 1112 | FrontFace front_face; | ||
| 1113 | CullFace cull_face; | ||
| 1060 | 1114 | ||
| 1061 | u32 pixel_center_integer; | 1115 | u32 pixel_center_integer; |
| 1062 | 1116 | ||
| @@ -1195,7 +1249,11 @@ public: | |||
| 1195 | 1249 | ||
| 1196 | u32 tex_cb_index; | 1250 | u32 tex_cb_index; |
| 1197 | 1251 | ||
| 1198 | INSERT_UNION_PADDING_WORDS(0x395); | 1252 | INSERT_UNION_PADDING_WORDS(0x7D); |
| 1253 | |||
| 1254 | std::array<std::array<u8, 128>, NumTransformFeedbackBuffers> tfb_varying_locs; | ||
| 1255 | |||
| 1256 | INSERT_UNION_PADDING_WORDS(0x298); | ||
| 1199 | 1257 | ||
| 1200 | struct { | 1258 | struct { |
| 1201 | /// Compressed address of a buffer that holds information about bound SSBOs. | 1259 | /// Compressed address of a buffer that holds information about bound SSBOs. |
| @@ -1218,7 +1276,12 @@ public: | |||
| 1218 | }; | 1276 | }; |
| 1219 | std::array<u32, NUM_REGS> reg_array; | 1277 | std::array<u32, NUM_REGS> reg_array; |
| 1220 | }; | 1278 | }; |
| 1221 | } regs{}; | 1279 | }; |
| 1280 | |||
| 1281 | Regs regs{}; | ||
| 1282 | |||
| 1283 | /// Store temporary hw register values, used by some calls to restore state after a operation | ||
| 1284 | Regs shadow_state; | ||
| 1222 | 1285 | ||
| 1223 | static_assert(sizeof(Regs) == Regs::NUM_REGS * sizeof(u32), "Maxwell3D Regs has wrong size"); | 1286 | static_assert(sizeof(Regs) == Regs::NUM_REGS * sizeof(u32), "Maxwell3D Regs has wrong size"); |
| 1224 | static_assert(std::is_trivially_copyable_v<Regs>, "Maxwell3D Regs must be trivially copyable"); | 1287 | static_assert(std::is_trivially_copyable_v<Regs>, "Maxwell3D Regs must be trivially copyable"); |
| @@ -1234,79 +1297,6 @@ public: | |||
| 1234 | 1297 | ||
| 1235 | State state{}; | 1298 | State state{}; |
| 1236 | 1299 | ||
| 1237 | struct DirtyRegs { | ||
| 1238 | static constexpr std::size_t NUM_REGS = 256; | ||
| 1239 | static_assert(NUM_REGS - 1 <= std::numeric_limits<u8>::max()); | ||
| 1240 | |||
| 1241 | union { | ||
| 1242 | struct { | ||
| 1243 | bool null_dirty; | ||
| 1244 | |||
| 1245 | // Vertex Attributes | ||
| 1246 | bool vertex_attrib_format; | ||
| 1247 | |||
| 1248 | // Vertex Arrays | ||
| 1249 | std::array<bool, 32> vertex_array; | ||
| 1250 | |||
| 1251 | bool vertex_array_buffers; | ||
| 1252 | |||
| 1253 | // Vertex Instances | ||
| 1254 | std::array<bool, 32> vertex_instance; | ||
| 1255 | |||
| 1256 | bool vertex_instances; | ||
| 1257 | |||
| 1258 | // Render Targets | ||
| 1259 | std::array<bool, 8> render_target; | ||
| 1260 | bool depth_buffer; | ||
| 1261 | |||
| 1262 | bool render_settings; | ||
| 1263 | |||
| 1264 | // Shaders | ||
| 1265 | bool shaders; | ||
| 1266 | |||
| 1267 | // Rasterizer State | ||
| 1268 | bool viewport; | ||
| 1269 | bool clip_coefficient; | ||
| 1270 | bool cull_mode; | ||
| 1271 | bool primitive_restart; | ||
| 1272 | bool depth_test; | ||
| 1273 | bool stencil_test; | ||
| 1274 | bool blend_state; | ||
| 1275 | bool scissor_test; | ||
| 1276 | bool transform_feedback; | ||
| 1277 | bool color_mask; | ||
| 1278 | bool polygon_offset; | ||
| 1279 | bool depth_bounds_values; | ||
| 1280 | |||
| 1281 | // Complementary | ||
| 1282 | bool viewport_transform; | ||
| 1283 | bool screen_y_control; | ||
| 1284 | |||
| 1285 | bool memory_general; | ||
| 1286 | }; | ||
| 1287 | std::array<bool, NUM_REGS> regs; | ||
| 1288 | }; | ||
| 1289 | |||
| 1290 | void ResetVertexArrays() { | ||
| 1291 | vertex_array.fill(true); | ||
| 1292 | vertex_array_buffers = true; | ||
| 1293 | } | ||
| 1294 | |||
| 1295 | void ResetRenderTargets() { | ||
| 1296 | depth_buffer = true; | ||
| 1297 | render_target.fill(true); | ||
| 1298 | render_settings = true; | ||
| 1299 | } | ||
| 1300 | |||
| 1301 | void OnMemoryWrite() { | ||
| 1302 | shaders = true; | ||
| 1303 | memory_general = true; | ||
| 1304 | ResetRenderTargets(); | ||
| 1305 | ResetVertexArrays(); | ||
| 1306 | } | ||
| 1307 | |||
| 1308 | } dirty{}; | ||
| 1309 | |||
| 1310 | /// Reads a register value located at the input method address | 1300 | /// Reads a register value located at the input method address |
| 1311 | u32 GetRegisterValue(u32 method) const; | 1301 | u32 GetRegisterValue(u32 method) const; |
| 1312 | 1302 | ||
| @@ -1352,6 +1342,11 @@ public: | |||
| 1352 | return execute_on; | 1342 | return execute_on; |
| 1353 | } | 1343 | } |
| 1354 | 1344 | ||
| 1345 | /// Notify a memory write has happened. | ||
| 1346 | void OnMemoryWrite() { | ||
| 1347 | dirty.flags |= dirty.on_write_stores; | ||
| 1348 | } | ||
| 1349 | |||
| 1355 | enum class MMEDrawMode : u32 { | 1350 | enum class MMEDrawMode : u32 { |
| 1356 | Undefined, | 1351 | Undefined, |
| 1357 | Array, | 1352 | Array, |
| @@ -1367,6 +1362,16 @@ public: | |||
| 1367 | u32 gl_end_count{}; | 1362 | u32 gl_end_count{}; |
| 1368 | } mme_draw; | 1363 | } mme_draw; |
| 1369 | 1364 | ||
| 1365 | struct DirtyState { | ||
| 1366 | using Flags = std::bitset<std::numeric_limits<u8>::max()>; | ||
| 1367 | using Table = std::array<u8, Regs::NUM_REGS>; | ||
| 1368 | using Tables = std::array<Table, 2>; | ||
| 1369 | |||
| 1370 | Flags flags; | ||
| 1371 | Flags on_write_stores; | ||
| 1372 | Tables tables{}; | ||
| 1373 | } dirty; | ||
| 1374 | |||
| 1370 | private: | 1375 | private: |
| 1371 | void InitializeRegisterDefaults(); | 1376 | void InitializeRegisterDefaults(); |
| 1372 | 1377 | ||
| @@ -1413,8 +1418,6 @@ private: | |||
| 1413 | /// Retrieves information about a specific TSC entry from the TSC buffer. | 1418 | /// Retrieves information about a specific TSC entry from the TSC buffer. |
| 1414 | Texture::TSCEntry GetTSCEntry(u32 tsc_index) const; | 1419 | Texture::TSCEntry GetTSCEntry(u32 tsc_index) const; |
| 1415 | 1420 | ||
| 1416 | void InitDirtySettings(); | ||
| 1417 | |||
| 1418 | /** | 1421 | /** |
| 1419 | * Call a macro on this engine. | 1422 | * Call a macro on this engine. |
| 1420 | * @param method Method to call | 1423 | * @param method Method to call |
| @@ -1473,6 +1476,7 @@ private: | |||
| 1473 | "Field " #field_name " has invalid position") | 1476 | "Field " #field_name " has invalid position") |
| 1474 | 1477 | ||
| 1475 | ASSERT_REG_POSITION(macros, 0x45); | 1478 | ASSERT_REG_POSITION(macros, 0x45); |
| 1479 | ASSERT_REG_POSITION(shadow_ram_control, 0x49); | ||
| 1476 | ASSERT_REG_POSITION(upload, 0x60); | 1480 | ASSERT_REG_POSITION(upload, 0x60); |
| 1477 | ASSERT_REG_POSITION(exec_upload, 0x6C); | 1481 | ASSERT_REG_POSITION(exec_upload, 0x6C); |
| 1478 | ASSERT_REG_POSITION(data_upload, 0x6D); | 1482 | ASSERT_REG_POSITION(data_upload, 0x6D); |
| @@ -1481,6 +1485,8 @@ ASSERT_REG_POSITION(tess_mode, 0xC8); | |||
| 1481 | ASSERT_REG_POSITION(tess_level_outer, 0xC9); | 1485 | ASSERT_REG_POSITION(tess_level_outer, 0xC9); |
| 1482 | ASSERT_REG_POSITION(tess_level_inner, 0xCD); | 1486 | ASSERT_REG_POSITION(tess_level_inner, 0xCD); |
| 1483 | ASSERT_REG_POSITION(rasterize_enable, 0xDF); | 1487 | ASSERT_REG_POSITION(rasterize_enable, 0xDF); |
| 1488 | ASSERT_REG_POSITION(tfb_bindings, 0xE0); | ||
| 1489 | ASSERT_REG_POSITION(tfb_layouts, 0x1C0); | ||
| 1484 | ASSERT_REG_POSITION(tfb_enabled, 0x1D1); | 1490 | ASSERT_REG_POSITION(tfb_enabled, 0x1D1); |
| 1485 | ASSERT_REG_POSITION(rt, 0x200); | 1491 | ASSERT_REG_POSITION(rt, 0x200); |
| 1486 | ASSERT_REG_POSITION(viewport_transform, 0x280); | 1492 | ASSERT_REG_POSITION(viewport_transform, 0x280); |
| @@ -1490,6 +1496,8 @@ ASSERT_REG_POSITION(depth_mode, 0x35F); | |||
| 1490 | ASSERT_REG_POSITION(clear_color[0], 0x360); | 1496 | ASSERT_REG_POSITION(clear_color[0], 0x360); |
| 1491 | ASSERT_REG_POSITION(clear_depth, 0x364); | 1497 | ASSERT_REG_POSITION(clear_depth, 0x364); |
| 1492 | ASSERT_REG_POSITION(clear_stencil, 0x368); | 1498 | ASSERT_REG_POSITION(clear_stencil, 0x368); |
| 1499 | ASSERT_REG_POSITION(polygon_mode_front, 0x36B); | ||
| 1500 | ASSERT_REG_POSITION(polygon_mode_back, 0x36C); | ||
| 1493 | ASSERT_REG_POSITION(polygon_offset_point_enable, 0x370); | 1501 | ASSERT_REG_POSITION(polygon_offset_point_enable, 0x370); |
| 1494 | ASSERT_REG_POSITION(polygon_offset_line_enable, 0x371); | 1502 | ASSERT_REG_POSITION(polygon_offset_line_enable, 0x371); |
| 1495 | ASSERT_REG_POSITION(polygon_offset_fill_enable, 0x372); | 1503 | ASSERT_REG_POSITION(polygon_offset_fill_enable, 0x372); |
| @@ -1503,10 +1511,12 @@ ASSERT_REG_POSITION(rt_separate_frag_data, 0x3EB); | |||
| 1503 | ASSERT_REG_POSITION(depth_bounds, 0x3E7); | 1511 | ASSERT_REG_POSITION(depth_bounds, 0x3E7); |
| 1504 | ASSERT_REG_POSITION(zeta, 0x3F8); | 1512 | ASSERT_REG_POSITION(zeta, 0x3F8); |
| 1505 | ASSERT_REG_POSITION(clear_flags, 0x43E); | 1513 | ASSERT_REG_POSITION(clear_flags, 0x43E); |
| 1514 | ASSERT_REG_POSITION(fill_rectangle, 0x44F); | ||
| 1506 | ASSERT_REG_POSITION(vertex_attrib_format, 0x458); | 1515 | ASSERT_REG_POSITION(vertex_attrib_format, 0x458); |
| 1507 | ASSERT_REG_POSITION(rt_control, 0x487); | 1516 | ASSERT_REG_POSITION(rt_control, 0x487); |
| 1508 | ASSERT_REG_POSITION(zeta_width, 0x48a); | 1517 | ASSERT_REG_POSITION(zeta_width, 0x48a); |
| 1509 | ASSERT_REG_POSITION(zeta_height, 0x48b); | 1518 | ASSERT_REG_POSITION(zeta_height, 0x48b); |
| 1519 | ASSERT_REG_POSITION(zeta_layers, 0x48c); | ||
| 1510 | ASSERT_REG_POSITION(depth_test_enable, 0x4B3); | 1520 | ASSERT_REG_POSITION(depth_test_enable, 0x4B3); |
| 1511 | ASSERT_REG_POSITION(independent_blend_enable, 0x4B9); | 1521 | ASSERT_REG_POSITION(independent_blend_enable, 0x4B9); |
| 1512 | ASSERT_REG_POSITION(depth_write_enabled, 0x4BA); | 1522 | ASSERT_REG_POSITION(depth_write_enabled, 0x4BA); |
| @@ -1556,7 +1566,9 @@ ASSERT_REG_POSITION(index_array, 0x5F2); | |||
| 1556 | ASSERT_REG_POSITION(polygon_offset_clamp, 0x61F); | 1566 | ASSERT_REG_POSITION(polygon_offset_clamp, 0x61F); |
| 1557 | ASSERT_REG_POSITION(instanced_arrays, 0x620); | 1567 | ASSERT_REG_POSITION(instanced_arrays, 0x620); |
| 1558 | ASSERT_REG_POSITION(vp_point_size, 0x644); | 1568 | ASSERT_REG_POSITION(vp_point_size, 0x644); |
| 1559 | ASSERT_REG_POSITION(cull, 0x646); | 1569 | ASSERT_REG_POSITION(cull_test_enabled, 0x646); |
| 1570 | ASSERT_REG_POSITION(front_face, 0x647); | ||
| 1571 | ASSERT_REG_POSITION(cull_face, 0x648); | ||
| 1560 | ASSERT_REG_POSITION(pixel_center_integer, 0x649); | 1572 | ASSERT_REG_POSITION(pixel_center_integer, 0x649); |
| 1561 | ASSERT_REG_POSITION(viewport_transform_enabled, 0x64B); | 1573 | ASSERT_REG_POSITION(viewport_transform_enabled, 0x64B); |
| 1562 | ASSERT_REG_POSITION(view_volume_clip_control, 0x64F); | 1574 | ASSERT_REG_POSITION(view_volume_clip_control, 0x64F); |
| @@ -1573,6 +1585,7 @@ ASSERT_REG_POSITION(firmware, 0x8C0); | |||
| 1573 | ASSERT_REG_POSITION(const_buffer, 0x8E0); | 1585 | ASSERT_REG_POSITION(const_buffer, 0x8E0); |
| 1574 | ASSERT_REG_POSITION(cb_bind[0], 0x904); | 1586 | ASSERT_REG_POSITION(cb_bind[0], 0x904); |
| 1575 | ASSERT_REG_POSITION(tex_cb_index, 0x982); | 1587 | ASSERT_REG_POSITION(tex_cb_index, 0x982); |
| 1588 | ASSERT_REG_POSITION(tfb_varying_locs, 0xA00); | ||
| 1576 | ASSERT_REG_POSITION(ssbo_info, 0xD18); | 1589 | ASSERT_REG_POSITION(ssbo_info, 0xD18); |
| 1577 | ASSERT_REG_POSITION(tex_info_buffers.address[0], 0xD2A); | 1590 | ASSERT_REG_POSITION(tex_info_buffers.address[0], 0xD2A); |
| 1578 | ASSERT_REG_POSITION(tex_info_buffers.size[0], 0xD2F); | 1591 | ASSERT_REG_POSITION(tex_info_buffers.size[0], 0xD2F); |
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index ad8453c5f..c2610f992 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp | |||
| @@ -57,7 +57,7 @@ void MaxwellDMA::HandleCopy() { | |||
| 57 | } | 57 | } |
| 58 | 58 | ||
| 59 | // All copies here update the main memory, so mark all rasterizer states as invalid. | 59 | // All copies here update the main memory, so mark all rasterizer states as invalid. |
| 60 | system.GPU().Maxwell3D().dirty.OnMemoryWrite(); | 60 | system.GPU().Maxwell3D().OnMemoryWrite(); |
| 61 | 61 | ||
| 62 | if (regs.exec.is_dst_linear && regs.exec.is_src_linear) { | 62 | if (regs.exec.is_dst_linear && regs.exec.is_src_linear) { |
| 63 | // When the enable_2d bit is disabled, the copy is performed as if we were copying a 1D | 63 | // When the enable_2d bit is disabled, the copy is performed as if we were copying a 1D |
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index c9bc83cd7..49dc5abe0 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h | |||
| @@ -82,6 +82,10 @@ union Attribute { | |||
| 82 | Position = 7, | 82 | Position = 7, |
| 83 | Attribute_0 = 8, | 83 | Attribute_0 = 8, |
| 84 | Attribute_31 = 39, | 84 | Attribute_31 = 39, |
| 85 | FrontColor = 40, | ||
| 86 | FrontSecondaryColor = 41, | ||
| 87 | BackColor = 42, | ||
| 88 | BackSecondaryColor = 43, | ||
| 85 | ClipDistances0123 = 44, | 89 | ClipDistances0123 = 44, |
| 86 | ClipDistances4567 = 45, | 90 | ClipDistances4567 = 45, |
| 87 | PointCoord = 46, | 91 | PointCoord = 46, |
| @@ -89,6 +93,8 @@ union Attribute { | |||
| 89 | // shader, and a tuple of (TessCoord.x, TessCoord.y, TessCoord.z, ~) when inside a Tess Eval | 93 | // shader, and a tuple of (TessCoord.x, TessCoord.y, TessCoord.z, ~) when inside a Tess Eval |
| 90 | // shader. | 94 | // shader. |
| 91 | TessCoordInstanceIDVertexID = 47, | 95 | TessCoordInstanceIDVertexID = 47, |
| 96 | TexCoord_0 = 48, | ||
| 97 | TexCoord_7 = 55, | ||
| 92 | // This attribute contains a tuple of (Unk, Unk, Unk, gl_FrontFacing) when inside a fragment | 98 | // This attribute contains a tuple of (Unk, Unk, Unk, gl_FrontFacing) when inside a fragment |
| 93 | // shader. It is unknown what the other values contain. | 99 | // shader. It is unknown what the other values contain. |
| 94 | FrontFacing = 63, | 100 | FrontFacing = 63, |
| @@ -911,14 +917,9 @@ union Instruction { | |||
| 911 | } fadd32i; | 917 | } fadd32i; |
| 912 | 918 | ||
| 913 | union { | 919 | union { |
| 914 | BitField<20, 8, u64> shift_position; | 920 | BitField<40, 1, u64> brev; |
| 915 | BitField<28, 8, u64> shift_length; | 921 | BitField<47, 1, u64> rd_cc; |
| 916 | BitField<48, 1, u64> negate_b; | 922 | BitField<48, 1, u64> is_signed; |
| 917 | BitField<49, 1, u64> negate_a; | ||
| 918 | |||
| 919 | u64 GetLeftShiftValue() const { | ||
| 920 | return 32 - (shift_position + shift_length); | ||
| 921 | } | ||
| 922 | } bfe; | 923 | } bfe; |
| 923 | 924 | ||
| 924 | union { | 925 | union { |
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 7d7137109..e8f763ce9 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp | |||
| @@ -140,71 +140,6 @@ void GPU::FlushCommands() { | |||
| 140 | renderer.Rasterizer().FlushCommands(); | 140 | renderer.Rasterizer().FlushCommands(); |
| 141 | } | 141 | } |
| 142 | 142 | ||
| 143 | u32 RenderTargetBytesPerPixel(RenderTargetFormat format) { | ||
| 144 | ASSERT(format != RenderTargetFormat::NONE); | ||
| 145 | |||
| 146 | switch (format) { | ||
| 147 | case RenderTargetFormat::RGBA32_FLOAT: | ||
| 148 | case RenderTargetFormat::RGBA32_UINT: | ||
| 149 | return 16; | ||
| 150 | case RenderTargetFormat::RGBA16_UINT: | ||
| 151 | case RenderTargetFormat::RGBA16_UNORM: | ||
| 152 | case RenderTargetFormat::RGBA16_FLOAT: | ||
| 153 | case RenderTargetFormat::RGBX16_FLOAT: | ||
| 154 | case RenderTargetFormat::RG32_FLOAT: | ||
| 155 | case RenderTargetFormat::RG32_UINT: | ||
| 156 | return 8; | ||
| 157 | case RenderTargetFormat::RGBA8_UNORM: | ||
| 158 | case RenderTargetFormat::RGBA8_SNORM: | ||
| 159 | case RenderTargetFormat::RGBA8_SRGB: | ||
| 160 | case RenderTargetFormat::RGBA8_UINT: | ||
| 161 | case RenderTargetFormat::RGB10_A2_UNORM: | ||
| 162 | case RenderTargetFormat::BGRA8_UNORM: | ||
| 163 | case RenderTargetFormat::BGRA8_SRGB: | ||
| 164 | case RenderTargetFormat::RG16_UNORM: | ||
| 165 | case RenderTargetFormat::RG16_SNORM: | ||
| 166 | case RenderTargetFormat::RG16_UINT: | ||
| 167 | case RenderTargetFormat::RG16_SINT: | ||
| 168 | case RenderTargetFormat::RG16_FLOAT: | ||
| 169 | case RenderTargetFormat::R32_FLOAT: | ||
| 170 | case RenderTargetFormat::R11G11B10_FLOAT: | ||
| 171 | case RenderTargetFormat::R32_UINT: | ||
| 172 | return 4; | ||
| 173 | case RenderTargetFormat::R16_UNORM: | ||
| 174 | case RenderTargetFormat::R16_SNORM: | ||
| 175 | case RenderTargetFormat::R16_UINT: | ||
| 176 | case RenderTargetFormat::R16_SINT: | ||
| 177 | case RenderTargetFormat::R16_FLOAT: | ||
| 178 | case RenderTargetFormat::RG8_UNORM: | ||
| 179 | case RenderTargetFormat::RG8_SNORM: | ||
| 180 | return 2; | ||
| 181 | case RenderTargetFormat::R8_UNORM: | ||
| 182 | case RenderTargetFormat::R8_UINT: | ||
| 183 | return 1; | ||
| 184 | default: | ||
| 185 | UNIMPLEMENTED_MSG("Unimplemented render target format {}", static_cast<u32>(format)); | ||
| 186 | return 1; | ||
| 187 | } | ||
| 188 | } | ||
| 189 | |||
| 190 | u32 DepthFormatBytesPerPixel(DepthFormat format) { | ||
| 191 | switch (format) { | ||
| 192 | case DepthFormat::Z32_S8_X24_FLOAT: | ||
| 193 | return 8; | ||
| 194 | case DepthFormat::Z32_FLOAT: | ||
| 195 | case DepthFormat::S8_Z24_UNORM: | ||
| 196 | case DepthFormat::Z24_X8_UNORM: | ||
| 197 | case DepthFormat::Z24_S8_UNORM: | ||
| 198 | case DepthFormat::Z24_C8_UNORM: | ||
| 199 | return 4; | ||
| 200 | case DepthFormat::Z16_UNORM: | ||
| 201 | return 2; | ||
| 202 | default: | ||
| 203 | UNIMPLEMENTED_MSG("Unimplemented Depth format {}", static_cast<u32>(format)); | ||
| 204 | return 1; | ||
| 205 | } | ||
| 206 | } | ||
| 207 | |||
| 208 | // Note that, traditionally, methods are treated as 4-byte addressable locations, and hence | 143 | // Note that, traditionally, methods are treated as 4-byte addressable locations, and hence |
| 209 | // their numbers are written down multiplied by 4 in Docs. Here we are not multiply by 4. | 144 | // their numbers are written down multiplied by 4 in Docs. Here we are not multiply by 4. |
| 210 | // So the values you see in docs might be multiplied by 4. | 145 | // So the values you see in docs might be multiplied by 4. |
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index 07727210c..64acb17df 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h | |||
| @@ -39,6 +39,7 @@ enum class RenderTargetFormat : u32 { | |||
| 39 | RGBA32_FLOAT = 0xC0, | 39 | RGBA32_FLOAT = 0xC0, |
| 40 | RGBA32_UINT = 0xC2, | 40 | RGBA32_UINT = 0xC2, |
| 41 | RGBA16_UNORM = 0xC6, | 41 | RGBA16_UNORM = 0xC6, |
| 42 | RGBA16_SNORM = 0xC7, | ||
| 42 | RGBA16_UINT = 0xC9, | 43 | RGBA16_UINT = 0xC9, |
| 43 | RGBA16_FLOAT = 0xCA, | 44 | RGBA16_FLOAT = 0xCA, |
| 44 | RG32_FLOAT = 0xCB, | 45 | RG32_FLOAT = 0xCB, |
| @@ -57,6 +58,7 @@ enum class RenderTargetFormat : u32 { | |||
| 57 | RG16_UINT = 0xDD, | 58 | RG16_UINT = 0xDD, |
| 58 | RG16_FLOAT = 0xDE, | 59 | RG16_FLOAT = 0xDE, |
| 59 | R11G11B10_FLOAT = 0xE0, | 60 | R11G11B10_FLOAT = 0xE0, |
| 61 | R32_SINT = 0xE3, | ||
| 60 | R32_UINT = 0xE4, | 62 | R32_UINT = 0xE4, |
| 61 | R32_FLOAT = 0xE5, | 63 | R32_FLOAT = 0xE5, |
| 62 | B5G6R5_UNORM = 0xE8, | 64 | B5G6R5_UNORM = 0xE8, |
| @@ -82,12 +84,6 @@ enum class DepthFormat : u32 { | |||
| 82 | Z32_S8_X24_FLOAT = 0x19, | 84 | Z32_S8_X24_FLOAT = 0x19, |
| 83 | }; | 85 | }; |
| 84 | 86 | ||
| 85 | /// Returns the number of bytes per pixel of each rendertarget format. | ||
| 86 | u32 RenderTargetBytesPerPixel(RenderTargetFormat format); | ||
| 87 | |||
| 88 | /// Returns the number of bytes per pixel of each depth format. | ||
| 89 | u32 DepthFormatBytesPerPixel(DepthFormat format); | ||
| 90 | |||
| 91 | struct CommandListHeader; | 87 | struct CommandListHeader; |
| 92 | class DebugContext; | 88 | class DebugContext; |
| 93 | 89 | ||
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp index 2cdf1aa7f..b1088af3d 100644 --- a/src/video_core/gpu_thread.cpp +++ b/src/video_core/gpu_thread.cpp | |||
| @@ -5,7 +5,7 @@ | |||
| 5 | #include "common/assert.h" | 5 | #include "common/assert.h" |
| 6 | #include "common/microprofile.h" | 6 | #include "common/microprofile.h" |
| 7 | #include "core/core.h" | 7 | #include "core/core.h" |
| 8 | #include "core/frontend/scope_acquire_window_context.h" | 8 | #include "core/frontend/scope_acquire_context.h" |
| 9 | #include "video_core/dma_pusher.h" | 9 | #include "video_core/dma_pusher.h" |
| 10 | #include "video_core/gpu.h" | 10 | #include "video_core/gpu.h" |
| 11 | #include "video_core/gpu_thread.h" | 11 | #include "video_core/gpu_thread.h" |
| @@ -27,7 +27,7 @@ static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_p | |||
| 27 | return; | 27 | return; |
| 28 | } | 28 | } |
| 29 | 29 | ||
| 30 | Core::Frontend::ScopeAcquireWindowContext acquire_context{renderer.GetRenderWindow()}; | 30 | Core::Frontend::ScopeAcquireContext acquire_context{renderer.GetRenderWindow()}; |
| 31 | 31 | ||
| 32 | CommandDataContainer next; | 32 | CommandDataContainer next; |
| 33 | while (state.is_running) { | 33 | while (state.is_running) { |
diff --git a/src/video_core/guest_driver.cpp b/src/video_core/guest_driver.cpp index 6adef459e..f058f2744 100644 --- a/src/video_core/guest_driver.cpp +++ b/src/video_core/guest_driver.cpp | |||
| @@ -4,13 +4,15 @@ | |||
| 4 | 4 | ||
| 5 | #include <algorithm> | 5 | #include <algorithm> |
| 6 | #include <limits> | 6 | #include <limits> |
| 7 | #include <vector> | ||
| 7 | 8 | ||
| 9 | #include "common/common_types.h" | ||
| 8 | #include "video_core/guest_driver.h" | 10 | #include "video_core/guest_driver.h" |
| 9 | 11 | ||
| 10 | namespace VideoCore { | 12 | namespace VideoCore { |
| 11 | 13 | ||
| 12 | void GuestDriverProfile::DeduceTextureHandlerSize(std::vector<u32>&& bound_offsets) { | 14 | void GuestDriverProfile::DeduceTextureHandlerSize(std::vector<u32> bound_offsets) { |
| 13 | if (texture_handler_size_deduced) { | 15 | if (texture_handler_size) { |
| 14 | return; | 16 | return; |
| 15 | } | 17 | } |
| 16 | const std::size_t size = bound_offsets.size(); | 18 | const std::size_t size = bound_offsets.size(); |
| @@ -29,7 +31,6 @@ void GuestDriverProfile::DeduceTextureHandlerSize(std::vector<u32>&& bound_offse | |||
| 29 | if (min_val > 2) { | 31 | if (min_val > 2) { |
| 30 | return; | 32 | return; |
| 31 | } | 33 | } |
| 32 | texture_handler_size_deduced = true; | ||
| 33 | texture_handler_size = min_texture_handler_size * min_val; | 34 | texture_handler_size = min_texture_handler_size * min_val; |
| 34 | } | 35 | } |
| 35 | 36 | ||
diff --git a/src/video_core/guest_driver.h b/src/video_core/guest_driver.h index fc1917347..99450777e 100644 --- a/src/video_core/guest_driver.h +++ b/src/video_core/guest_driver.h | |||
| @@ -4,6 +4,7 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <optional> | ||
| 7 | #include <vector> | 8 | #include <vector> |
| 8 | 9 | ||
| 9 | #include "common/common_types.h" | 10 | #include "common/common_types.h" |
| @@ -17,25 +18,29 @@ namespace VideoCore { | |||
| 17 | */ | 18 | */ |
| 18 | class GuestDriverProfile { | 19 | class GuestDriverProfile { |
| 19 | public: | 20 | public: |
| 20 | void DeduceTextureHandlerSize(std::vector<u32>&& bound_offsets); | 21 | explicit GuestDriverProfile() = default; |
| 22 | explicit GuestDriverProfile(std::optional<u32> texture_handler_size) | ||
| 23 | : texture_handler_size{texture_handler_size} {} | ||
| 24 | |||
| 25 | void DeduceTextureHandlerSize(std::vector<u32> bound_offsets); | ||
| 21 | 26 | ||
| 22 | u32 GetTextureHandlerSize() const { | 27 | u32 GetTextureHandlerSize() const { |
| 23 | return texture_handler_size; | 28 | return texture_handler_size.value_or(default_texture_handler_size); |
| 24 | } | 29 | } |
| 25 | 30 | ||
| 26 | bool TextureHandlerSizeKnown() const { | 31 | bool IsTextureHandlerSizeKnown() const { |
| 27 | return texture_handler_size_deduced; | 32 | return texture_handler_size.has_value(); |
| 28 | } | 33 | } |
| 29 | 34 | ||
| 30 | private: | 35 | private: |
| 31 | // Minimum size of texture handler any driver can use. | 36 | // Minimum size of texture handler any driver can use. |
| 32 | static constexpr u32 min_texture_handler_size = 4; | 37 | static constexpr u32 min_texture_handler_size = 4; |
| 33 | // This goes with Vulkan and OpenGL standards but Nvidia GPUs can easily | 38 | |
| 34 | // use 4 bytes instead. Thus, certain drivers may squish the size. | 39 | // This goes with Vulkan and OpenGL standards but Nvidia GPUs can easily use 4 bytes instead. |
| 40 | // Thus, certain drivers may squish the size. | ||
| 35 | static constexpr u32 default_texture_handler_size = 8; | 41 | static constexpr u32 default_texture_handler_size = 8; |
| 36 | 42 | ||
| 37 | u32 texture_handler_size = default_texture_handler_size; | 43 | std::optional<u32> texture_handler_size = default_texture_handler_size; |
| 38 | bool texture_handler_size_deduced = false; | ||
| 39 | }; | 44 | }; |
| 40 | 45 | ||
| 41 | } // namespace VideoCore | 46 | } // namespace VideoCore |
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h index aea010087..073bdb491 100644 --- a/src/video_core/memory_manager.h +++ b/src/video_core/memory_manager.h | |||
| @@ -174,7 +174,7 @@ private: | |||
| 174 | /// End of address space, based on address space in bits. | 174 | /// End of address space, based on address space in bits. |
| 175 | static constexpr GPUVAddr address_space_end{1ULL << address_space_width}; | 175 | static constexpr GPUVAddr address_space_end{1ULL << address_space_width}; |
| 176 | 176 | ||
| 177 | Common::PageTable page_table{page_bits}; | 177 | Common::BackingPageTable page_table{page_bits}; |
| 178 | VMAMap vma_map; | 178 | VMAMap vma_map; |
| 179 | VideoCore::RasterizerInterface& rasterizer; | 179 | VideoCore::RasterizerInterface& rasterizer; |
| 180 | 180 | ||
diff --git a/src/video_core/morton.cpp b/src/video_core/morton.cpp index 2f2fe6859..6d522c318 100644 --- a/src/video_core/morton.cpp +++ b/src/video_core/morton.cpp | |||
| @@ -51,6 +51,7 @@ static constexpr ConversionArray morton_to_linear_fns = { | |||
| 51 | MortonCopy<true, PixelFormat::R8UI>, | 51 | MortonCopy<true, PixelFormat::R8UI>, |
| 52 | MortonCopy<true, PixelFormat::RGBA16F>, | 52 | MortonCopy<true, PixelFormat::RGBA16F>, |
| 53 | MortonCopy<true, PixelFormat::RGBA16U>, | 53 | MortonCopy<true, PixelFormat::RGBA16U>, |
| 54 | MortonCopy<true, PixelFormat::RGBA16S>, | ||
| 54 | MortonCopy<true, PixelFormat::RGBA16UI>, | 55 | MortonCopy<true, PixelFormat::RGBA16UI>, |
| 55 | MortonCopy<true, PixelFormat::R11FG11FB10F>, | 56 | MortonCopy<true, PixelFormat::R11FG11FB10F>, |
| 56 | MortonCopy<true, PixelFormat::RGBA32UI>, | 57 | MortonCopy<true, PixelFormat::RGBA32UI>, |
| @@ -85,6 +86,7 @@ static constexpr ConversionArray morton_to_linear_fns = { | |||
| 85 | MortonCopy<true, PixelFormat::RG32UI>, | 86 | MortonCopy<true, PixelFormat::RG32UI>, |
| 86 | MortonCopy<true, PixelFormat::RGBX16F>, | 87 | MortonCopy<true, PixelFormat::RGBX16F>, |
| 87 | MortonCopy<true, PixelFormat::R32UI>, | 88 | MortonCopy<true, PixelFormat::R32UI>, |
| 89 | MortonCopy<true, PixelFormat::R32I>, | ||
| 88 | MortonCopy<true, PixelFormat::ASTC_2D_8X8>, | 90 | MortonCopy<true, PixelFormat::ASTC_2D_8X8>, |
| 89 | MortonCopy<true, PixelFormat::ASTC_2D_8X5>, | 91 | MortonCopy<true, PixelFormat::ASTC_2D_8X5>, |
| 90 | MortonCopy<true, PixelFormat::ASTC_2D_5X4>, | 92 | MortonCopy<true, PixelFormat::ASTC_2D_5X4>, |
| @@ -130,6 +132,7 @@ static constexpr ConversionArray linear_to_morton_fns = { | |||
| 130 | MortonCopy<false, PixelFormat::R8U>, | 132 | MortonCopy<false, PixelFormat::R8U>, |
| 131 | MortonCopy<false, PixelFormat::R8UI>, | 133 | MortonCopy<false, PixelFormat::R8UI>, |
| 132 | MortonCopy<false, PixelFormat::RGBA16F>, | 134 | MortonCopy<false, PixelFormat::RGBA16F>, |
| 135 | MortonCopy<false, PixelFormat::RGBA16S>, | ||
| 133 | MortonCopy<false, PixelFormat::RGBA16U>, | 136 | MortonCopy<false, PixelFormat::RGBA16U>, |
| 134 | MortonCopy<false, PixelFormat::RGBA16UI>, | 137 | MortonCopy<false, PixelFormat::RGBA16UI>, |
| 135 | MortonCopy<false, PixelFormat::R11FG11FB10F>, | 138 | MortonCopy<false, PixelFormat::R11FG11FB10F>, |
| @@ -166,6 +169,7 @@ static constexpr ConversionArray linear_to_morton_fns = { | |||
| 166 | MortonCopy<false, PixelFormat::RG32UI>, | 169 | MortonCopy<false, PixelFormat::RG32UI>, |
| 167 | MortonCopy<false, PixelFormat::RGBX16F>, | 170 | MortonCopy<false, PixelFormat::RGBX16F>, |
| 168 | MortonCopy<false, PixelFormat::R32UI>, | 171 | MortonCopy<false, PixelFormat::R32UI>, |
| 172 | MortonCopy<false, PixelFormat::R32I>, | ||
| 169 | nullptr, | 173 | nullptr, |
| 170 | nullptr, | 174 | nullptr, |
| 171 | nullptr, | 175 | nullptr, |
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index f18eaf4bc..1a68e3caa 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h | |||
| @@ -25,7 +25,6 @@ constexpr std::size_t NumQueryTypes = 1; | |||
| 25 | 25 | ||
| 26 | enum class LoadCallbackStage { | 26 | enum class LoadCallbackStage { |
| 27 | Prepare, | 27 | Prepare, |
| 28 | Decompile, | ||
| 29 | Build, | 28 | Build, |
| 30 | Complete, | 29 | Complete, |
| 31 | }; | 30 | }; |
| @@ -89,6 +88,9 @@ public: | |||
| 89 | virtual void LoadDiskResources(const std::atomic_bool& stop_loading = false, | 88 | virtual void LoadDiskResources(const std::atomic_bool& stop_loading = false, |
| 90 | const DiskResourceLoadCallback& callback = {}) {} | 89 | const DiskResourceLoadCallback& callback = {}) {} |
| 91 | 90 | ||
| 91 | /// Initializes renderer dirty flags | ||
| 92 | virtual void SetupDirtyFlags() {} | ||
| 93 | |||
| 92 | /// Grant access to the Guest Driver Profile for recording/obtaining info on the guest driver. | 94 | /// Grant access to the Guest Driver Profile for recording/obtaining info on the guest driver. |
| 93 | GuestDriverProfile& AccessGuestDriverProfile() { | 95 | GuestDriverProfile& AccessGuestDriverProfile() { |
| 94 | return guest_driver_profile; | 96 | return guest_driver_profile; |
diff --git a/src/video_core/renderer_base.h b/src/video_core/renderer_base.h index af1bebc4f..5ec99a126 100644 --- a/src/video_core/renderer_base.h +++ b/src/video_core/renderer_base.h | |||
| @@ -35,15 +35,19 @@ public: | |||
| 35 | explicit RendererBase(Core::Frontend::EmuWindow& window); | 35 | explicit RendererBase(Core::Frontend::EmuWindow& window); |
| 36 | virtual ~RendererBase(); | 36 | virtual ~RendererBase(); |
| 37 | 37 | ||
| 38 | /// Swap buffers (render frame) | ||
| 39 | virtual void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) = 0; | ||
| 40 | |||
| 41 | /// Initialize the renderer | 38 | /// Initialize the renderer |
| 42 | virtual bool Init() = 0; | 39 | virtual bool Init() = 0; |
| 43 | 40 | ||
| 44 | /// Shutdown the renderer | 41 | /// Shutdown the renderer |
| 45 | virtual void ShutDown() = 0; | 42 | virtual void ShutDown() = 0; |
| 46 | 43 | ||
| 44 | /// Finalize rendering the guest frame and draw into the presentation texture | ||
| 45 | virtual void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) = 0; | ||
| 46 | |||
| 47 | /// Draws the latest frame to the window waiting timeout_ms for a frame to arrive (Renderer | ||
| 48 | /// specific implementation) | ||
| 49 | virtual void TryPresent(int timeout_ms) = 0; | ||
| 50 | |||
| 47 | // Getter/setter functions: | 51 | // Getter/setter functions: |
| 48 | // ------------------------ | 52 | // ------------------------ |
| 49 | 53 | ||
diff --git a/src/video_core/renderer_opengl/gl_framebuffer_cache.cpp b/src/video_core/renderer_opengl/gl_framebuffer_cache.cpp index 874ed3c6e..b8a512cb6 100644 --- a/src/video_core/renderer_opengl/gl_framebuffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_framebuffer_cache.cpp | |||
| @@ -11,7 +11,6 @@ | |||
| 11 | #include "common/common_types.h" | 11 | #include "common/common_types.h" |
| 12 | #include "video_core/engines/maxwell_3d.h" | 12 | #include "video_core/engines/maxwell_3d.h" |
| 13 | #include "video_core/renderer_opengl/gl_framebuffer_cache.h" | 13 | #include "video_core/renderer_opengl/gl_framebuffer_cache.h" |
| 14 | #include "video_core/renderer_opengl/gl_state.h" | ||
| 15 | 14 | ||
| 16 | namespace OpenGL { | 15 | namespace OpenGL { |
| 17 | 16 | ||
| @@ -36,8 +35,7 @@ OGLFramebuffer FramebufferCacheOpenGL::CreateFramebuffer(const FramebufferCacheK | |||
| 36 | framebuffer.Create(); | 35 | framebuffer.Create(); |
| 37 | 36 | ||
| 38 | // TODO(Rodrigo): Use DSA here after Nvidia fixes their framebuffer DSA bugs. | 37 | // TODO(Rodrigo): Use DSA here after Nvidia fixes their framebuffer DSA bugs. |
| 39 | local_state.draw.draw_framebuffer = framebuffer.handle; | 38 | glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer.handle); |
| 40 | local_state.ApplyFramebufferState(); | ||
| 41 | 39 | ||
| 42 | if (key.zeta) { | 40 | if (key.zeta) { |
| 43 | const bool stencil = key.zeta->GetSurfaceParams().type == SurfaceType::DepthStencil; | 41 | const bool stencil = key.zeta->GetSurfaceParams().type == SurfaceType::DepthStencil; |
diff --git a/src/video_core/renderer_opengl/gl_framebuffer_cache.h b/src/video_core/renderer_opengl/gl_framebuffer_cache.h index 02ec80ae9..8f698fee0 100644 --- a/src/video_core/renderer_opengl/gl_framebuffer_cache.h +++ b/src/video_core/renderer_opengl/gl_framebuffer_cache.h | |||
| @@ -13,7 +13,6 @@ | |||
| 13 | #include "common/common_types.h" | 13 | #include "common/common_types.h" |
| 14 | #include "video_core/engines/maxwell_3d.h" | 14 | #include "video_core/engines/maxwell_3d.h" |
| 15 | #include "video_core/renderer_opengl/gl_resource_manager.h" | 15 | #include "video_core/renderer_opengl/gl_resource_manager.h" |
| 16 | #include "video_core/renderer_opengl/gl_state.h" | ||
| 17 | #include "video_core/renderer_opengl/gl_texture_cache.h" | 16 | #include "video_core/renderer_opengl/gl_texture_cache.h" |
| 18 | 17 | ||
| 19 | namespace OpenGL { | 18 | namespace OpenGL { |
| @@ -63,7 +62,6 @@ public: | |||
| 63 | private: | 62 | private: |
| 64 | OGLFramebuffer CreateFramebuffer(const FramebufferCacheKey& key); | 63 | OGLFramebuffer CreateFramebuffer(const FramebufferCacheKey& key); |
| 65 | 64 | ||
| 66 | OpenGLState local_state; | ||
| 67 | std::unordered_map<FramebufferCacheKey, OGLFramebuffer> cache; | 65 | std::unordered_map<FramebufferCacheKey, OGLFramebuffer> cache; |
| 68 | }; | 66 | }; |
| 69 | 67 | ||
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index e1965fb21..826eee7df 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp | |||
| @@ -28,7 +28,6 @@ | |||
| 28 | #include "video_core/renderer_opengl/gl_query_cache.h" | 28 | #include "video_core/renderer_opengl/gl_query_cache.h" |
| 29 | #include "video_core/renderer_opengl/gl_rasterizer.h" | 29 | #include "video_core/renderer_opengl/gl_rasterizer.h" |
| 30 | #include "video_core/renderer_opengl/gl_shader_cache.h" | 30 | #include "video_core/renderer_opengl/gl_shader_cache.h" |
| 31 | #include "video_core/renderer_opengl/gl_shader_gen.h" | ||
| 32 | #include "video_core/renderer_opengl/maxwell_to_gl.h" | 31 | #include "video_core/renderer_opengl/maxwell_to_gl.h" |
| 33 | #include "video_core/renderer_opengl/renderer_opengl.h" | 32 | #include "video_core/renderer_opengl/renderer_opengl.h" |
| 34 | 33 | ||
| @@ -36,6 +35,7 @@ namespace OpenGL { | |||
| 36 | 35 | ||
| 37 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | 36 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; |
| 38 | 37 | ||
| 38 | using Tegra::Engines::ShaderType; | ||
| 39 | using VideoCore::Surface::PixelFormat; | 39 | using VideoCore::Surface::PixelFormat; |
| 40 | using VideoCore::Surface::SurfaceTarget; | 40 | using VideoCore::Surface::SurfaceTarget; |
| 41 | using VideoCore::Surface::SurfaceType; | 41 | using VideoCore::Surface::SurfaceType; |
| @@ -54,10 +54,11 @@ MICROPROFILE_DEFINE(OpenGL_PrimitiveAssembly, "OpenGL", "Prim Asmbl", MP_RGB(255 | |||
| 54 | 54 | ||
| 55 | namespace { | 55 | namespace { |
| 56 | 56 | ||
| 57 | constexpr std::size_t NumSupportedVertexAttributes = 16; | ||
| 58 | |||
| 57 | template <typename Engine, typename Entry> | 59 | template <typename Engine, typename Entry> |
| 58 | Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry, | 60 | Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry, |
| 59 | Tegra::Engines::ShaderType shader_type, | 61 | ShaderType shader_type, std::size_t index = 0) { |
| 60 | std::size_t index = 0) { | ||
| 61 | if (entry.IsBindless()) { | 62 | if (entry.IsBindless()) { |
| 62 | const Tegra::Texture::TextureHandle tex_handle = | 63 | const Tegra::Texture::TextureHandle tex_handle = |
| 63 | engine.AccessConstBuffer32(shader_type, entry.GetBuffer(), entry.GetOffset()); | 64 | engine.AccessConstBuffer32(shader_type, entry.GetBuffer(), entry.GetOffset()); |
| @@ -74,7 +75,7 @@ Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry | |||
| 74 | } | 75 | } |
| 75 | 76 | ||
| 76 | std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer, | 77 | std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer, |
| 77 | const GLShader::ConstBufferEntry& entry) { | 78 | const ConstBufferEntry& entry) { |
| 78 | if (!entry.IsIndirect()) { | 79 | if (!entry.IsIndirect()) { |
| 79 | return entry.GetSize(); | 80 | return entry.GetSize(); |
| 80 | } | 81 | } |
| @@ -88,18 +89,19 @@ std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer, | |||
| 88 | return buffer.size; | 89 | return buffer.size; |
| 89 | } | 90 | } |
| 90 | 91 | ||
| 92 | void oglEnable(GLenum cap, bool state) { | ||
| 93 | (state ? glEnable : glDisable)(cap); | ||
| 94 | } | ||
| 95 | |||
| 91 | } // Anonymous namespace | 96 | } // Anonymous namespace |
| 92 | 97 | ||
| 93 | RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window, | 98 | RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window, |
| 94 | ScreenInfo& info) | 99 | ScreenInfo& info, GLShader::ProgramManager& program_manager, |
| 95 | : RasterizerAccelerated{system.Memory()}, texture_cache{system, *this, device}, | 100 | StateTracker& state_tracker) |
| 101 | : RasterizerAccelerated{system.Memory()}, texture_cache{system, *this, device, state_tracker}, | ||
| 96 | shader_cache{*this, system, emu_window, device}, query_cache{system, *this}, system{system}, | 102 | shader_cache{*this, system, emu_window, device}, query_cache{system, *this}, system{system}, |
| 97 | screen_info{info}, buffer_cache{*this, system, device, STREAM_BUFFER_SIZE} { | 103 | screen_info{info}, program_manager{program_manager}, state_tracker{state_tracker}, |
| 98 | shader_program_manager = std::make_unique<GLShader::ProgramManager>(); | 104 | buffer_cache{*this, system, device, STREAM_BUFFER_SIZE} { |
| 99 | state.draw.shader_program = 0; | ||
| 100 | state.Apply(); | ||
| 101 | |||
| 102 | LOG_DEBUG(Render_OpenGL, "Sync fixed function OpenGL state here"); | ||
| 103 | CheckExtensions(); | 105 | CheckExtensions(); |
| 104 | } | 106 | } |
| 105 | 107 | ||
| @@ -113,93 +115,72 @@ void RasterizerOpenGL::CheckExtensions() { | |||
| 113 | } | 115 | } |
| 114 | } | 116 | } |
| 115 | 117 | ||
| 116 | GLuint RasterizerOpenGL::SetupVertexFormat() { | 118 | void RasterizerOpenGL::SetupVertexFormat() { |
| 117 | auto& gpu = system.GPU().Maxwell3D(); | 119 | auto& gpu = system.GPU().Maxwell3D(); |
| 118 | const auto& regs = gpu.regs; | 120 | auto& flags = gpu.dirty.flags; |
| 119 | 121 | if (!flags[Dirty::VertexFormats]) { | |
| 120 | if (!gpu.dirty.vertex_attrib_format) { | 122 | return; |
| 121 | return state.draw.vertex_array; | ||
| 122 | } | 123 | } |
| 123 | gpu.dirty.vertex_attrib_format = false; | 124 | flags[Dirty::VertexFormats] = false; |
| 124 | 125 | ||
| 125 | MICROPROFILE_SCOPE(OpenGL_VAO); | 126 | MICROPROFILE_SCOPE(OpenGL_VAO); |
| 126 | 127 | ||
| 127 | auto [iter, is_cache_miss] = vertex_array_cache.try_emplace(regs.vertex_attrib_format); | 128 | // Use the vertex array as-is, assumes that the data is formatted correctly for OpenGL. Enables |
| 128 | auto& vao_entry = iter->second; | 129 | // the first 16 vertex attributes always, as we don't know which ones are actually used until |
| 129 | 130 | // shader time. Note, Tegra technically supports 32, but we're capping this to 16 for now to | |
| 130 | if (is_cache_miss) { | 131 | // avoid OpenGL errors. |
| 131 | vao_entry.Create(); | 132 | // TODO(Subv): Analyze the shader to identify which attributes are actually used and don't |
| 132 | const GLuint vao = vao_entry.handle; | 133 | // assume every shader uses them all. |
| 133 | 134 | for (std::size_t index = 0; index < NumSupportedVertexAttributes; ++index) { | |
| 134 | // Eventhough we are using DSA to create this vertex array, there is a bug on Intel's blob | 135 | if (!flags[Dirty::VertexFormat0 + index]) { |
| 135 | // that fails to properly create the vertex array if it's not bound even after creating it | 136 | continue; |
| 136 | // with glCreateVertexArrays | ||
| 137 | state.draw.vertex_array = vao; | ||
| 138 | state.ApplyVertexArrayState(); | ||
| 139 | |||
| 140 | // Use the vertex array as-is, assumes that the data is formatted correctly for OpenGL. | ||
| 141 | // Enables the first 16 vertex attributes always, as we don't know which ones are actually | ||
| 142 | // used until shader time. Note, Tegra technically supports 32, but we're capping this to 16 | ||
| 143 | // for now to avoid OpenGL errors. | ||
| 144 | // TODO(Subv): Analyze the shader to identify which attributes are actually used and don't | ||
| 145 | // assume every shader uses them all. | ||
| 146 | for (u32 index = 0; index < 16; ++index) { | ||
| 147 | const auto& attrib = regs.vertex_attrib_format[index]; | ||
| 148 | |||
| 149 | // Ignore invalid attributes. | ||
| 150 | if (!attrib.IsValid()) | ||
| 151 | continue; | ||
| 152 | |||
| 153 | const auto& buffer = regs.vertex_array[attrib.buffer]; | ||
| 154 | LOG_TRACE(Render_OpenGL, | ||
| 155 | "vertex attrib {}, count={}, size={}, type={}, offset={}, normalize={}", | ||
| 156 | index, attrib.ComponentCount(), attrib.SizeString(), attrib.TypeString(), | ||
| 157 | attrib.offset.Value(), attrib.IsNormalized()); | ||
| 158 | |||
| 159 | ASSERT(buffer.IsEnabled()); | ||
| 160 | |||
| 161 | glEnableVertexArrayAttrib(vao, index); | ||
| 162 | if (attrib.type == Tegra::Engines::Maxwell3D::Regs::VertexAttribute::Type::SignedInt || | ||
| 163 | attrib.type == | ||
| 164 | Tegra::Engines::Maxwell3D::Regs::VertexAttribute::Type::UnsignedInt) { | ||
| 165 | glVertexArrayAttribIFormat(vao, index, attrib.ComponentCount(), | ||
| 166 | MaxwellToGL::VertexType(attrib), attrib.offset); | ||
| 167 | } else { | ||
| 168 | glVertexArrayAttribFormat( | ||
| 169 | vao, index, attrib.ComponentCount(), MaxwellToGL::VertexType(attrib), | ||
| 170 | attrib.IsNormalized() ? GL_TRUE : GL_FALSE, attrib.offset); | ||
| 171 | } | ||
| 172 | glVertexArrayAttribBinding(vao, index, attrib.buffer); | ||
| 173 | } | 137 | } |
| 174 | } | 138 | flags[Dirty::VertexFormat0 + index] = false; |
| 139 | |||
| 140 | const auto attrib = gpu.regs.vertex_attrib_format[index]; | ||
| 141 | const auto gl_index = static_cast<GLuint>(index); | ||
| 175 | 142 | ||
| 176 | // Rebinding the VAO invalidates the vertex buffer bindings. | 143 | // Ignore invalid attributes. |
| 177 | gpu.dirty.ResetVertexArrays(); | 144 | if (!attrib.IsValid()) { |
| 145 | glDisableVertexAttribArray(gl_index); | ||
| 146 | continue; | ||
| 147 | } | ||
| 148 | glEnableVertexAttribArray(gl_index); | ||
| 178 | 149 | ||
| 179 | state.draw.vertex_array = vao_entry.handle; | 150 | if (attrib.type == Maxwell::VertexAttribute::Type::SignedInt || |
| 180 | return vao_entry.handle; | 151 | attrib.type == Maxwell::VertexAttribute::Type::UnsignedInt) { |
| 152 | glVertexAttribIFormat(gl_index, attrib.ComponentCount(), | ||
| 153 | MaxwellToGL::VertexType(attrib), attrib.offset); | ||
| 154 | } else { | ||
| 155 | glVertexAttribFormat(gl_index, attrib.ComponentCount(), MaxwellToGL::VertexType(attrib), | ||
| 156 | attrib.IsNormalized() ? GL_TRUE : GL_FALSE, attrib.offset); | ||
| 157 | } | ||
| 158 | glVertexAttribBinding(gl_index, attrib.buffer); | ||
| 159 | } | ||
| 181 | } | 160 | } |
| 182 | 161 | ||
| 183 | void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) { | 162 | void RasterizerOpenGL::SetupVertexBuffer() { |
| 184 | auto& gpu = system.GPU().Maxwell3D(); | 163 | auto& gpu = system.GPU().Maxwell3D(); |
| 185 | if (!gpu.dirty.vertex_array_buffers) | 164 | auto& flags = gpu.dirty.flags; |
| 165 | if (!flags[Dirty::VertexBuffers]) { | ||
| 186 | return; | 166 | return; |
| 187 | gpu.dirty.vertex_array_buffers = false; | 167 | } |
| 188 | 168 | flags[Dirty::VertexBuffers] = false; | |
| 189 | const auto& regs = gpu.regs; | ||
| 190 | 169 | ||
| 191 | MICROPROFILE_SCOPE(OpenGL_VB); | 170 | MICROPROFILE_SCOPE(OpenGL_VB); |
| 192 | 171 | ||
| 193 | // Upload all guest vertex arrays sequentially to our buffer | 172 | // Upload all guest vertex arrays sequentially to our buffer |
| 194 | for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) { | 173 | const auto& regs = gpu.regs; |
| 195 | if (!gpu.dirty.vertex_array[index]) | 174 | for (std::size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { |
| 175 | if (!flags[Dirty::VertexBuffer0 + index]) { | ||
| 196 | continue; | 176 | continue; |
| 197 | gpu.dirty.vertex_array[index] = false; | 177 | } |
| 198 | gpu.dirty.vertex_instance[index] = false; | 178 | flags[Dirty::VertexBuffer0 + index] = false; |
| 199 | 179 | ||
| 200 | const auto& vertex_array = regs.vertex_array[index]; | 180 | const auto& vertex_array = regs.vertex_array[index]; |
| 201 | if (!vertex_array.IsEnabled()) | 181 | if (!vertex_array.IsEnabled()) { |
| 202 | continue; | 182 | continue; |
| 183 | } | ||
| 203 | 184 | ||
| 204 | const GPUVAddr start = vertex_array.StartAddress(); | 185 | const GPUVAddr start = vertex_array.StartAddress(); |
| 205 | const GPUVAddr end = regs.vertex_array_limit[index].LimitAddress(); | 186 | const GPUVAddr end = regs.vertex_array_limit[index].LimitAddress(); |
| @@ -209,42 +190,30 @@ void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) { | |||
| 209 | const auto [vertex_buffer, vertex_buffer_offset] = buffer_cache.UploadMemory(start, size); | 190 | const auto [vertex_buffer, vertex_buffer_offset] = buffer_cache.UploadMemory(start, size); |
| 210 | 191 | ||
| 211 | // Bind the vertex array to the buffer at the current offset. | 192 | // Bind the vertex array to the buffer at the current offset. |
| 212 | vertex_array_pushbuffer.SetVertexBuffer(index, vertex_buffer, vertex_buffer_offset, | 193 | vertex_array_pushbuffer.SetVertexBuffer(static_cast<GLuint>(index), vertex_buffer, |
| 213 | vertex_array.stride); | 194 | vertex_buffer_offset, vertex_array.stride); |
| 214 | |||
| 215 | if (regs.instanced_arrays.IsInstancingEnabled(index) && vertex_array.divisor != 0) { | ||
| 216 | // Enable vertex buffer instancing with the specified divisor. | ||
| 217 | glVertexArrayBindingDivisor(vao, index, vertex_array.divisor); | ||
| 218 | } else { | ||
| 219 | // Disable the vertex buffer instancing. | ||
| 220 | glVertexArrayBindingDivisor(vao, index, 0); | ||
| 221 | } | ||
| 222 | } | 195 | } |
| 223 | } | 196 | } |
| 224 | 197 | ||
| 225 | void RasterizerOpenGL::SetupVertexInstances(GLuint vao) { | 198 | void RasterizerOpenGL::SetupVertexInstances() { |
| 226 | auto& gpu = system.GPU().Maxwell3D(); | 199 | auto& gpu = system.GPU().Maxwell3D(); |
| 227 | 200 | auto& flags = gpu.dirty.flags; | |
| 228 | if (!gpu.dirty.vertex_instances) | 201 | if (!flags[Dirty::VertexInstances]) { |
| 229 | return; | 202 | return; |
| 230 | gpu.dirty.vertex_instances = false; | 203 | } |
| 204 | flags[Dirty::VertexInstances] = false; | ||
| 231 | 205 | ||
| 232 | const auto& regs = gpu.regs; | 206 | const auto& regs = gpu.regs; |
| 233 | // Upload all guest vertex arrays sequentially to our buffer | 207 | for (std::size_t index = 0; index < NumSupportedVertexAttributes; ++index) { |
| 234 | for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) { | 208 | if (!flags[Dirty::VertexInstance0 + index]) { |
| 235 | if (!gpu.dirty.vertex_instance[index]) | ||
| 236 | continue; | 209 | continue; |
| 237 | |||
| 238 | gpu.dirty.vertex_instance[index] = false; | ||
| 239 | |||
| 240 | if (regs.instanced_arrays.IsInstancingEnabled(index) && | ||
| 241 | regs.vertex_array[index].divisor != 0) { | ||
| 242 | // Enable vertex buffer instancing with the specified divisor. | ||
| 243 | glVertexArrayBindingDivisor(vao, index, regs.vertex_array[index].divisor); | ||
| 244 | } else { | ||
| 245 | // Disable the vertex buffer instancing. | ||
| 246 | glVertexArrayBindingDivisor(vao, index, 0); | ||
| 247 | } | 210 | } |
| 211 | flags[Dirty::VertexInstance0 + index] = false; | ||
| 212 | |||
| 213 | const auto gl_index = static_cast<GLuint>(index); | ||
| 214 | const bool instancing_enabled = regs.instanced_arrays.IsInstancingEnabled(gl_index); | ||
| 215 | const GLuint divisor = instancing_enabled ? regs.vertex_array[index].divisor : 0; | ||
| 216 | glVertexBindingDivisor(gl_index, divisor); | ||
| 248 | } | 217 | } |
| 249 | } | 218 | } |
| 250 | 219 | ||
| @@ -260,8 +229,7 @@ GLintptr RasterizerOpenGL::SetupIndexBuffer() { | |||
| 260 | void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { | 229 | void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { |
| 261 | MICROPROFILE_SCOPE(OpenGL_Shader); | 230 | MICROPROFILE_SCOPE(OpenGL_Shader); |
| 262 | auto& gpu = system.GPU().Maxwell3D(); | 231 | auto& gpu = system.GPU().Maxwell3D(); |
| 263 | 232 | u32 clip_distances = 0; | |
| 264 | std::array<bool, Maxwell::NumClipDistances> clip_distances{}; | ||
| 265 | 233 | ||
| 266 | for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { | 234 | for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { |
| 267 | const auto& shader_config = gpu.regs.shader_config[index]; | 235 | const auto& shader_config = gpu.regs.shader_config[index]; |
| @@ -271,10 +239,10 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { | |||
| 271 | if (!gpu.regs.IsShaderConfigEnabled(index)) { | 239 | if (!gpu.regs.IsShaderConfigEnabled(index)) { |
| 272 | switch (program) { | 240 | switch (program) { |
| 273 | case Maxwell::ShaderProgram::Geometry: | 241 | case Maxwell::ShaderProgram::Geometry: |
| 274 | shader_program_manager->UseTrivialGeometryShader(); | 242 | program_manager.UseGeometryShader(0); |
| 275 | break; | 243 | break; |
| 276 | case Maxwell::ShaderProgram::Fragment: | 244 | case Maxwell::ShaderProgram::Fragment: |
| 277 | shader_program_manager->UseTrivialFragmentShader(); | 245 | program_manager.UseFragmentShader(0); |
| 278 | break; | 246 | break; |
| 279 | default: | 247 | default: |
| 280 | break; | 248 | break; |
| @@ -299,19 +267,17 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { | |||
| 299 | SetupDrawTextures(stage, shader); | 267 | SetupDrawTextures(stage, shader); |
| 300 | SetupDrawImages(stage, shader); | 268 | SetupDrawImages(stage, shader); |
| 301 | 269 | ||
| 302 | const ProgramVariant variant(primitive_mode); | 270 | const GLuint program_handle = shader->GetHandle(); |
| 303 | const auto program_handle = shader->GetHandle(variant); | ||
| 304 | |||
| 305 | switch (program) { | 271 | switch (program) { |
| 306 | case Maxwell::ShaderProgram::VertexA: | 272 | case Maxwell::ShaderProgram::VertexA: |
| 307 | case Maxwell::ShaderProgram::VertexB: | 273 | case Maxwell::ShaderProgram::VertexB: |
| 308 | shader_program_manager->UseProgrammableVertexShader(program_handle); | 274 | program_manager.UseVertexShader(program_handle); |
| 309 | break; | 275 | break; |
| 310 | case Maxwell::ShaderProgram::Geometry: | 276 | case Maxwell::ShaderProgram::Geometry: |
| 311 | shader_program_manager->UseProgrammableGeometryShader(program_handle); | 277 | program_manager.UseGeometryShader(program_handle); |
| 312 | break; | 278 | break; |
| 313 | case Maxwell::ShaderProgram::Fragment: | 279 | case Maxwell::ShaderProgram::Fragment: |
| 314 | shader_program_manager->UseProgrammableFragmentShader(program_handle); | 280 | program_manager.UseFragmentShader(program_handle); |
| 315 | break; | 281 | break; |
| 316 | default: | 282 | default: |
| 317 | UNIMPLEMENTED_MSG("Unimplemented shader index={}, enable={}, offset=0x{:08X}", index, | 283 | UNIMPLEMENTED_MSG("Unimplemented shader index={}, enable={}, offset=0x{:08X}", index, |
| @@ -322,9 +288,7 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { | |||
| 322 | // When a clip distance is enabled but not set in the shader it crops parts of the screen | 288 | // When a clip distance is enabled but not set in the shader it crops parts of the screen |
| 323 | // (sometimes it's half the screen, sometimes three quarters). To avoid this, enable the | 289 | // (sometimes it's half the screen, sometimes three quarters). To avoid this, enable the |
| 324 | // clip distances only when it's written by a shader stage. | 290 | // clip distances only when it's written by a shader stage. |
| 325 | for (std::size_t i = 0; i < Maxwell::NumClipDistances; ++i) { | 291 | clip_distances |= shader->GetEntries().clip_distances; |
| 326 | clip_distances[i] = clip_distances[i] || shader->GetShaderEntries().clip_distances[i]; | ||
| 327 | } | ||
| 328 | 292 | ||
| 329 | // When VertexA is enabled, we have dual vertex shaders | 293 | // When VertexA is enabled, we have dual vertex shaders |
| 330 | if (program == Maxwell::ShaderProgram::VertexA) { | 294 | if (program == Maxwell::ShaderProgram::VertexA) { |
| @@ -334,8 +298,7 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { | |||
| 334 | } | 298 | } |
| 335 | 299 | ||
| 336 | SyncClipEnabled(clip_distances); | 300 | SyncClipEnabled(clip_distances); |
| 337 | 301 | gpu.dirty.flags[Dirty::Shaders] = false; | |
| 338 | gpu.dirty.shaders = false; | ||
| 339 | } | 302 | } |
| 340 | 303 | ||
| 341 | std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const { | 304 | std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const { |
| @@ -368,20 +331,23 @@ void RasterizerOpenGL::LoadDiskResources(const std::atomic_bool& stop_loading, | |||
| 368 | shader_cache.LoadDiskCache(stop_loading, callback); | 331 | shader_cache.LoadDiskCache(stop_loading, callback); |
| 369 | } | 332 | } |
| 370 | 333 | ||
| 334 | void RasterizerOpenGL::SetupDirtyFlags() { | ||
| 335 | state_tracker.Initialize(); | ||
| 336 | } | ||
| 337 | |||
| 371 | void RasterizerOpenGL::ConfigureFramebuffers() { | 338 | void RasterizerOpenGL::ConfigureFramebuffers() { |
| 372 | MICROPROFILE_SCOPE(OpenGL_Framebuffer); | 339 | MICROPROFILE_SCOPE(OpenGL_Framebuffer); |
| 373 | auto& gpu = system.GPU().Maxwell3D(); | 340 | auto& gpu = system.GPU().Maxwell3D(); |
| 374 | if (!gpu.dirty.render_settings) { | 341 | if (!gpu.dirty.flags[VideoCommon::Dirty::RenderTargets]) { |
| 375 | return; | 342 | return; |
| 376 | } | 343 | } |
| 377 | gpu.dirty.render_settings = false; | 344 | gpu.dirty.flags[VideoCommon::Dirty::RenderTargets] = false; |
| 378 | 345 | ||
| 379 | texture_cache.GuardRenderTargets(true); | 346 | texture_cache.GuardRenderTargets(true); |
| 380 | 347 | ||
| 381 | View depth_surface = texture_cache.GetDepthBufferSurface(true); | 348 | View depth_surface = texture_cache.GetDepthBufferSurface(true); |
| 382 | 349 | ||
| 383 | const auto& regs = gpu.regs; | 350 | const auto& regs = gpu.regs; |
| 384 | state.framebuffer_srgb.enabled = regs.framebuffer_srgb != 0; | ||
| 385 | UNIMPLEMENTED_IF(regs.rt_separate_frag_data == 0); | 351 | UNIMPLEMENTED_IF(regs.rt_separate_frag_data == 0); |
| 386 | 352 | ||
| 387 | // Bind the framebuffer surfaces | 353 | // Bind the framebuffer surfaces |
| @@ -409,14 +375,11 @@ void RasterizerOpenGL::ConfigureFramebuffers() { | |||
| 409 | 375 | ||
| 410 | texture_cache.GuardRenderTargets(false); | 376 | texture_cache.GuardRenderTargets(false); |
| 411 | 377 | ||
| 412 | state.draw.draw_framebuffer = framebuffer_cache.GetFramebuffer(key); | 378 | glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer_cache.GetFramebuffer(key)); |
| 413 | SyncViewport(state); | ||
| 414 | } | 379 | } |
| 415 | 380 | ||
| 416 | void RasterizerOpenGL::ConfigureClearFramebuffer(OpenGLState& current_state, bool using_color_fb, | 381 | void RasterizerOpenGL::ConfigureClearFramebuffer(bool using_color_fb, bool using_depth_fb, |
| 417 | bool using_depth_fb, bool using_stencil_fb) { | 382 | bool using_stencil_fb) { |
| 418 | using VideoCore::Surface::SurfaceType; | ||
| 419 | |||
| 420 | auto& gpu = system.GPU().Maxwell3D(); | 383 | auto& gpu = system.GPU().Maxwell3D(); |
| 421 | const auto& regs = gpu.regs; | 384 | const auto& regs = gpu.regs; |
| 422 | 385 | ||
| @@ -435,80 +398,44 @@ void RasterizerOpenGL::ConfigureClearFramebuffer(OpenGLState& current_state, boo | |||
| 435 | key.colors[0] = color_surface; | 398 | key.colors[0] = color_surface; |
| 436 | key.zeta = depth_surface; | 399 | key.zeta = depth_surface; |
| 437 | 400 | ||
| 438 | current_state.draw.draw_framebuffer = framebuffer_cache.GetFramebuffer(key); | 401 | state_tracker.NotifyFramebuffer(); |
| 439 | current_state.ApplyFramebufferState(); | 402 | glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer_cache.GetFramebuffer(key)); |
| 440 | } | 403 | } |
| 441 | 404 | ||
| 442 | void RasterizerOpenGL::Clear() { | 405 | void RasterizerOpenGL::Clear() { |
| 443 | const auto& maxwell3d = system.GPU().Maxwell3D(); | 406 | const auto& gpu = system.GPU().Maxwell3D(); |
| 444 | 407 | if (!gpu.ShouldExecute()) { | |
| 445 | if (!maxwell3d.ShouldExecute()) { | ||
| 446 | return; | 408 | return; |
| 447 | } | 409 | } |
| 448 | 410 | ||
| 449 | const auto& regs = maxwell3d.regs; | 411 | const auto& regs = gpu.regs; |
| 450 | bool use_color{}; | 412 | bool use_color{}; |
| 451 | bool use_depth{}; | 413 | bool use_depth{}; |
| 452 | bool use_stencil{}; | 414 | bool use_stencil{}; |
| 453 | 415 | ||
| 454 | OpenGLState prev_state{OpenGLState::GetCurState()}; | ||
| 455 | SCOPE_EXIT({ | ||
| 456 | prev_state.AllDirty(); | ||
| 457 | prev_state.Apply(); | ||
| 458 | }); | ||
| 459 | |||
| 460 | OpenGLState clear_state{OpenGLState::GetCurState()}; | ||
| 461 | clear_state.SetDefaultViewports(); | ||
| 462 | if (regs.clear_buffers.R || regs.clear_buffers.G || regs.clear_buffers.B || | 416 | if (regs.clear_buffers.R || regs.clear_buffers.G || regs.clear_buffers.B || |
| 463 | regs.clear_buffers.A) { | 417 | regs.clear_buffers.A) { |
| 464 | use_color = true; | 418 | use_color = true; |
| 465 | } | 419 | } |
| 466 | if (use_color) { | 420 | if (use_color) { |
| 467 | clear_state.color_mask[0].red_enabled = regs.clear_buffers.R ? GL_TRUE : GL_FALSE; | 421 | state_tracker.NotifyColorMask0(); |
| 468 | clear_state.color_mask[0].green_enabled = regs.clear_buffers.G ? GL_TRUE : GL_FALSE; | 422 | glColorMaski(0, regs.clear_buffers.R != 0, regs.clear_buffers.G != 0, |
| 469 | clear_state.color_mask[0].blue_enabled = regs.clear_buffers.B ? GL_TRUE : GL_FALSE; | 423 | regs.clear_buffers.B != 0, regs.clear_buffers.A != 0); |
| 470 | clear_state.color_mask[0].alpha_enabled = regs.clear_buffers.A ? GL_TRUE : GL_FALSE; | 424 | |
| 425 | // TODO(Rodrigo): Determine if clamping is used on clears | ||
| 426 | SyncFragmentColorClampState(); | ||
| 427 | SyncFramebufferSRGB(); | ||
| 471 | } | 428 | } |
| 472 | if (regs.clear_buffers.Z) { | 429 | if (regs.clear_buffers.Z) { |
| 473 | ASSERT_MSG(regs.zeta_enable != 0, "Tried to clear Z but buffer is not enabled!"); | 430 | ASSERT_MSG(regs.zeta_enable != 0, "Tried to clear Z but buffer is not enabled!"); |
| 474 | use_depth = true; | 431 | use_depth = true; |
| 475 | 432 | ||
| 476 | // Always enable the depth write when clearing the depth buffer. The depth write mask is | 433 | state_tracker.NotifyDepthMask(); |
| 477 | // ignored when clearing the buffer in the Switch, but OpenGL obeys it so we set it to | 434 | glDepthMask(GL_TRUE); |
| 478 | // true. | ||
| 479 | clear_state.depth.test_enabled = true; | ||
| 480 | clear_state.depth.test_func = GL_ALWAYS; | ||
| 481 | clear_state.depth.write_mask = GL_TRUE; | ||
| 482 | } | 435 | } |
| 483 | if (regs.clear_buffers.S) { | 436 | if (regs.clear_buffers.S) { |
| 484 | ASSERT_MSG(regs.zeta_enable != 0, "Tried to clear stencil but buffer is not enabled!"); | 437 | ASSERT_MSG(regs.zeta_enable, "Tried to clear stencil but buffer is not enabled!"); |
| 485 | use_stencil = true; | 438 | use_stencil = true; |
| 486 | clear_state.stencil.test_enabled = true; | ||
| 487 | |||
| 488 | if (regs.clear_flags.stencil) { | ||
| 489 | // Stencil affects the clear so fill it with the used masks | ||
| 490 | clear_state.stencil.front.test_func = GL_ALWAYS; | ||
| 491 | clear_state.stencil.front.test_mask = regs.stencil_front_func_mask; | ||
| 492 | clear_state.stencil.front.action_stencil_fail = GL_KEEP; | ||
| 493 | clear_state.stencil.front.action_depth_fail = GL_KEEP; | ||
| 494 | clear_state.stencil.front.action_depth_pass = GL_KEEP; | ||
| 495 | clear_state.stencil.front.write_mask = regs.stencil_front_mask; | ||
| 496 | if (regs.stencil_two_side_enable) { | ||
| 497 | clear_state.stencil.back.test_func = GL_ALWAYS; | ||
| 498 | clear_state.stencil.back.test_mask = regs.stencil_back_func_mask; | ||
| 499 | clear_state.stencil.back.action_stencil_fail = GL_KEEP; | ||
| 500 | clear_state.stencil.back.action_depth_fail = GL_KEEP; | ||
| 501 | clear_state.stencil.back.action_depth_pass = GL_KEEP; | ||
| 502 | clear_state.stencil.back.write_mask = regs.stencil_back_mask; | ||
| 503 | } else { | ||
| 504 | clear_state.stencil.back.test_func = GL_ALWAYS; | ||
| 505 | clear_state.stencil.back.test_mask = 0xFFFFFFFF; | ||
| 506 | clear_state.stencil.back.write_mask = 0xFFFFFFFF; | ||
| 507 | clear_state.stencil.back.action_stencil_fail = GL_KEEP; | ||
| 508 | clear_state.stencil.back.action_depth_fail = GL_KEEP; | ||
| 509 | clear_state.stencil.back.action_depth_pass = GL_KEEP; | ||
| 510 | } | ||
| 511 | } | ||
| 512 | } | 439 | } |
| 513 | 440 | ||
| 514 | if (!use_color && !use_depth && !use_stencil) { | 441 | if (!use_color && !use_depth && !use_stencil) { |
| @@ -516,20 +443,18 @@ void RasterizerOpenGL::Clear() { | |||
| 516 | return; | 443 | return; |
| 517 | } | 444 | } |
| 518 | 445 | ||
| 519 | ConfigureClearFramebuffer(clear_state, use_color, use_depth, use_stencil); | 446 | SyncRasterizeEnable(); |
| 520 | 447 | ||
| 521 | SyncViewport(clear_state); | ||
| 522 | SyncRasterizeEnable(clear_state); | ||
| 523 | if (regs.clear_flags.scissor) { | 448 | if (regs.clear_flags.scissor) { |
| 524 | SyncScissorTest(clear_state); | 449 | SyncScissorTest(); |
| 450 | } else { | ||
| 451 | state_tracker.NotifyScissor0(); | ||
| 452 | glDisablei(GL_SCISSOR_TEST, 0); | ||
| 525 | } | 453 | } |
| 526 | 454 | ||
| 527 | if (regs.clear_flags.viewport) { | 455 | UNIMPLEMENTED_IF(regs.clear_flags.viewport); |
| 528 | clear_state.EmulateViewportWithScissor(); | ||
| 529 | } | ||
| 530 | 456 | ||
| 531 | clear_state.AllDirty(); | 457 | ConfigureClearFramebuffer(use_color, use_depth, use_stencil); |
| 532 | clear_state.Apply(); | ||
| 533 | 458 | ||
| 534 | if (use_color) { | 459 | if (use_color) { |
| 535 | glClearBufferfv(GL_COLOR, 0, regs.clear_color); | 460 | glClearBufferfv(GL_COLOR, 0, regs.clear_color); |
| @@ -549,25 +474,27 @@ void RasterizerOpenGL::Clear() { | |||
| 549 | void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { | 474 | void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { |
| 550 | MICROPROFILE_SCOPE(OpenGL_Drawing); | 475 | MICROPROFILE_SCOPE(OpenGL_Drawing); |
| 551 | auto& gpu = system.GPU().Maxwell3D(); | 476 | auto& gpu = system.GPU().Maxwell3D(); |
| 552 | const auto& regs = gpu.regs; | ||
| 553 | 477 | ||
| 554 | query_cache.UpdateCounters(); | 478 | query_cache.UpdateCounters(); |
| 555 | 479 | ||
| 556 | SyncRasterizeEnable(state); | 480 | SyncViewport(); |
| 481 | SyncRasterizeEnable(); | ||
| 482 | SyncPolygonModes(); | ||
| 557 | SyncColorMask(); | 483 | SyncColorMask(); |
| 558 | SyncFragmentColorClampState(); | 484 | SyncFragmentColorClampState(); |
| 559 | SyncMultiSampleState(); | 485 | SyncMultiSampleState(); |
| 560 | SyncDepthTestState(); | 486 | SyncDepthTestState(); |
| 487 | SyncDepthClamp(); | ||
| 561 | SyncStencilTestState(); | 488 | SyncStencilTestState(); |
| 562 | SyncBlendState(); | 489 | SyncBlendState(); |
| 563 | SyncLogicOpState(); | 490 | SyncLogicOpState(); |
| 564 | SyncCullMode(); | 491 | SyncCullMode(); |
| 565 | SyncPrimitiveRestart(); | 492 | SyncPrimitiveRestart(); |
| 566 | SyncScissorTest(state); | 493 | SyncScissorTest(); |
| 567 | SyncTransformFeedback(); | ||
| 568 | SyncPointState(); | 494 | SyncPointState(); |
| 569 | SyncPolygonOffset(); | 495 | SyncPolygonOffset(); |
| 570 | SyncAlphaTest(); | 496 | SyncAlphaTest(); |
| 497 | SyncFramebufferSRGB(); | ||
| 571 | 498 | ||
| 572 | buffer_cache.Acquire(); | 499 | buffer_cache.Acquire(); |
| 573 | 500 | ||
| @@ -591,14 +518,13 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { | |||
| 591 | buffer_cache.Map(buffer_size); | 518 | buffer_cache.Map(buffer_size); |
| 592 | 519 | ||
| 593 | // Prepare vertex array format. | 520 | // Prepare vertex array format. |
| 594 | const GLuint vao = SetupVertexFormat(); | 521 | SetupVertexFormat(); |
| 595 | vertex_array_pushbuffer.Setup(vao); | 522 | vertex_array_pushbuffer.Setup(); |
| 596 | 523 | ||
| 597 | // Upload vertex and index data. | 524 | // Upload vertex and index data. |
| 598 | SetupVertexBuffer(vao); | 525 | SetupVertexBuffer(); |
| 599 | SetupVertexInstances(vao); | 526 | SetupVertexInstances(); |
| 600 | 527 | GLintptr index_buffer_offset = 0; | |
| 601 | GLintptr index_buffer_offset; | ||
| 602 | if (is_indexed) { | 528 | if (is_indexed) { |
| 603 | index_buffer_offset = SetupIndexBuffer(); | 529 | index_buffer_offset = SetupIndexBuffer(); |
| 604 | } | 530 | } |
| @@ -624,27 +550,20 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { | |||
| 624 | ConfigureFramebuffers(); | 550 | ConfigureFramebuffers(); |
| 625 | 551 | ||
| 626 | // Signal the buffer cache that we are not going to upload more things. | 552 | // Signal the buffer cache that we are not going to upload more things. |
| 627 | const bool invalidate = buffer_cache.Unmap(); | 553 | buffer_cache.Unmap(); |
| 628 | 554 | ||
| 629 | // Now that we are no longer uploading data, we can safely bind the buffers to OpenGL. | 555 | // Now that we are no longer uploading data, we can safely bind the buffers to OpenGL. |
| 630 | vertex_array_pushbuffer.Bind(); | 556 | vertex_array_pushbuffer.Bind(); |
| 631 | bind_ubo_pushbuffer.Bind(); | 557 | bind_ubo_pushbuffer.Bind(); |
| 632 | bind_ssbo_pushbuffer.Bind(); | 558 | bind_ssbo_pushbuffer.Bind(); |
| 633 | 559 | ||
| 634 | if (invalidate) { | 560 | program_manager.BindGraphicsPipeline(); |
| 635 | // As all cached buffers are invalidated, we need to recheck their state. | ||
| 636 | gpu.dirty.ResetVertexArrays(); | ||
| 637 | } | ||
| 638 | gpu.dirty.memory_general = false; | ||
| 639 | |||
| 640 | shader_program_manager->ApplyTo(state); | ||
| 641 | state.Apply(); | ||
| 642 | 561 | ||
| 643 | if (texture_cache.TextureBarrier()) { | 562 | if (texture_cache.TextureBarrier()) { |
| 644 | glTextureBarrier(); | 563 | glTextureBarrier(); |
| 645 | } | 564 | } |
| 646 | 565 | ||
| 647 | ++num_queued_commands; | 566 | BeginTransformFeedback(primitive_mode); |
| 648 | 567 | ||
| 649 | const GLuint base_instance = static_cast<GLuint>(gpu.regs.vb_base_instance); | 568 | const GLuint base_instance = static_cast<GLuint>(gpu.regs.vb_base_instance); |
| 650 | const GLsizei num_instances = | 569 | const GLsizei num_instances = |
| @@ -683,6 +602,10 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { | |||
| 683 | num_instances, base_instance); | 602 | num_instances, base_instance); |
| 684 | } | 603 | } |
| 685 | } | 604 | } |
| 605 | |||
| 606 | EndTransformFeedback(); | ||
| 607 | |||
| 608 | ++num_queued_commands; | ||
| 686 | } | 609 | } |
| 687 | 610 | ||
| 688 | void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { | 611 | void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { |
| @@ -695,13 +618,7 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { | |||
| 695 | auto kernel = shader_cache.GetComputeKernel(code_addr); | 618 | auto kernel = shader_cache.GetComputeKernel(code_addr); |
| 696 | SetupComputeTextures(kernel); | 619 | SetupComputeTextures(kernel); |
| 697 | SetupComputeImages(kernel); | 620 | SetupComputeImages(kernel); |
| 698 | 621 | program_manager.BindComputeShader(kernel->GetHandle()); | |
| 699 | const auto& launch_desc = system.GPU().KeplerCompute().launch_description; | ||
| 700 | const ProgramVariant variant(launch_desc.block_dim_x, launch_desc.block_dim_y, | ||
| 701 | launch_desc.block_dim_z, launch_desc.shared_alloc, | ||
| 702 | launch_desc.local_pos_alloc); | ||
| 703 | state.draw.shader_program = kernel->GetHandle(variant); | ||
| 704 | state.draw.program_pipeline = 0; | ||
| 705 | 622 | ||
| 706 | const std::size_t buffer_size = | 623 | const std::size_t buffer_size = |
| 707 | Tegra::Engines::KeplerCompute::NumConstBuffers * | 624 | Tegra::Engines::KeplerCompute::NumConstBuffers * |
| @@ -719,11 +636,7 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { | |||
| 719 | bind_ubo_pushbuffer.Bind(); | 636 | bind_ubo_pushbuffer.Bind(); |
| 720 | bind_ssbo_pushbuffer.Bind(); | 637 | bind_ssbo_pushbuffer.Bind(); |
| 721 | 638 | ||
| 722 | state.ApplyTextures(); | 639 | const auto& launch_desc = system.GPU().KeplerCompute().launch_description; |
| 723 | state.ApplyImages(); | ||
| 724 | state.ApplyShaderProgram(); | ||
| 725 | state.ApplyProgramPipeline(); | ||
| 726 | |||
| 727 | glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z); | 640 | glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z); |
| 728 | ++num_queued_commands; | 641 | ++num_queued_commands; |
| 729 | } | 642 | } |
| @@ -828,7 +741,7 @@ void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, const Shad | |||
| 828 | const auto& shader_stage = stages[stage_index]; | 741 | const auto& shader_stage = stages[stage_index]; |
| 829 | 742 | ||
| 830 | u32 binding = device.GetBaseBindings(stage_index).uniform_buffer; | 743 | u32 binding = device.GetBaseBindings(stage_index).uniform_buffer; |
| 831 | for (const auto& entry : shader->GetShaderEntries().const_buffers) { | 744 | for (const auto& entry : shader->GetEntries().const_buffers) { |
| 832 | const auto& buffer = shader_stage.const_buffers[entry.GetIndex()]; | 745 | const auto& buffer = shader_stage.const_buffers[entry.GetIndex()]; |
| 833 | SetupConstBuffer(binding++, buffer, entry); | 746 | SetupConstBuffer(binding++, buffer, entry); |
| 834 | } | 747 | } |
| @@ -839,7 +752,7 @@ void RasterizerOpenGL::SetupComputeConstBuffers(const Shader& kernel) { | |||
| 839 | const auto& launch_desc = system.GPU().KeplerCompute().launch_description; | 752 | const auto& launch_desc = system.GPU().KeplerCompute().launch_description; |
| 840 | 753 | ||
| 841 | u32 binding = 0; | 754 | u32 binding = 0; |
| 842 | for (const auto& entry : kernel->GetShaderEntries().const_buffers) { | 755 | for (const auto& entry : kernel->GetEntries().const_buffers) { |
| 843 | const auto& config = launch_desc.const_buffer_config[entry.GetIndex()]; | 756 | const auto& config = launch_desc.const_buffer_config[entry.GetIndex()]; |
| 844 | const std::bitset<8> mask = launch_desc.const_buffer_enable_mask.Value(); | 757 | const std::bitset<8> mask = launch_desc.const_buffer_enable_mask.Value(); |
| 845 | Tegra::Engines::ConstBufferInfo buffer; | 758 | Tegra::Engines::ConstBufferInfo buffer; |
| @@ -851,7 +764,7 @@ void RasterizerOpenGL::SetupComputeConstBuffers(const Shader& kernel) { | |||
| 851 | } | 764 | } |
| 852 | 765 | ||
| 853 | void RasterizerOpenGL::SetupConstBuffer(u32 binding, const Tegra::Engines::ConstBufferInfo& buffer, | 766 | void RasterizerOpenGL::SetupConstBuffer(u32 binding, const Tegra::Engines::ConstBufferInfo& buffer, |
| 854 | const GLShader::ConstBufferEntry& entry) { | 767 | const ConstBufferEntry& entry) { |
| 855 | if (!buffer.enabled) { | 768 | if (!buffer.enabled) { |
| 856 | // Set values to zero to unbind buffers | 769 | // Set values to zero to unbind buffers |
| 857 | bind_ubo_pushbuffer.Push(binding, buffer_cache.GetEmptyBuffer(sizeof(float)), 0, | 770 | bind_ubo_pushbuffer.Push(binding, buffer_cache.GetEmptyBuffer(sizeof(float)), 0, |
| @@ -875,7 +788,7 @@ void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, const Shad | |||
| 875 | const auto cbufs{gpu.Maxwell3D().state.shader_stages[stage_index]}; | 788 | const auto cbufs{gpu.Maxwell3D().state.shader_stages[stage_index]}; |
| 876 | 789 | ||
| 877 | u32 binding = device.GetBaseBindings(stage_index).shader_storage_buffer; | 790 | u32 binding = device.GetBaseBindings(stage_index).shader_storage_buffer; |
| 878 | for (const auto& entry : shader->GetShaderEntries().global_memory_entries) { | 791 | for (const auto& entry : shader->GetEntries().global_memory_entries) { |
| 879 | const auto addr{cbufs.const_buffers[entry.GetCbufIndex()].address + entry.GetCbufOffset()}; | 792 | const auto addr{cbufs.const_buffers[entry.GetCbufIndex()].address + entry.GetCbufOffset()}; |
| 880 | const auto gpu_addr{memory_manager.Read<u64>(addr)}; | 793 | const auto gpu_addr{memory_manager.Read<u64>(addr)}; |
| 881 | const auto size{memory_manager.Read<u32>(addr + 8)}; | 794 | const auto size{memory_manager.Read<u32>(addr + 8)}; |
| @@ -889,7 +802,7 @@ void RasterizerOpenGL::SetupComputeGlobalMemory(const Shader& kernel) { | |||
| 889 | const auto cbufs{gpu.KeplerCompute().launch_description.const_buffer_config}; | 802 | const auto cbufs{gpu.KeplerCompute().launch_description.const_buffer_config}; |
| 890 | 803 | ||
| 891 | u32 binding = 0; | 804 | u32 binding = 0; |
| 892 | for (const auto& entry : kernel->GetShaderEntries().global_memory_entries) { | 805 | for (const auto& entry : kernel->GetEntries().global_memory_entries) { |
| 893 | const auto addr{cbufs[entry.GetCbufIndex()].Address() + entry.GetCbufOffset()}; | 806 | const auto addr{cbufs[entry.GetCbufIndex()].Address() + entry.GetCbufOffset()}; |
| 894 | const auto gpu_addr{memory_manager.Read<u64>(addr)}; | 807 | const auto gpu_addr{memory_manager.Read<u64>(addr)}; |
| 895 | const auto size{memory_manager.Read<u32>(addr + 8)}; | 808 | const auto size{memory_manager.Read<u32>(addr + 8)}; |
| @@ -897,7 +810,7 @@ void RasterizerOpenGL::SetupComputeGlobalMemory(const Shader& kernel) { | |||
| 897 | } | 810 | } |
| 898 | } | 811 | } |
| 899 | 812 | ||
| 900 | void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GLShader::GlobalMemoryEntry& entry, | 813 | void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry, |
| 901 | GPUVAddr gpu_addr, std::size_t size) { | 814 | GPUVAddr gpu_addr, std::size_t size) { |
| 902 | const auto alignment{device.GetShaderStorageBufferAlignment()}; | 815 | const auto alignment{device.GetShaderStorageBufferAlignment()}; |
| 903 | const auto [ssbo, buffer_offset] = | 816 | const auto [ssbo, buffer_offset] = |
| @@ -909,16 +822,11 @@ void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, const Shader& | |||
| 909 | MICROPROFILE_SCOPE(OpenGL_Texture); | 822 | MICROPROFILE_SCOPE(OpenGL_Texture); |
| 910 | const auto& maxwell3d = system.GPU().Maxwell3D(); | 823 | const auto& maxwell3d = system.GPU().Maxwell3D(); |
| 911 | u32 binding = device.GetBaseBindings(stage_index).sampler; | 824 | u32 binding = device.GetBaseBindings(stage_index).sampler; |
| 912 | for (const auto& entry : shader->GetShaderEntries().samplers) { | 825 | for (const auto& entry : shader->GetEntries().samplers) { |
| 913 | const auto shader_type = static_cast<Tegra::Engines::ShaderType>(stage_index); | 826 | const auto shader_type = static_cast<ShaderType>(stage_index); |
| 914 | if (!entry.IsIndexed()) { | 827 | for (std::size_t i = 0; i < entry.Size(); ++i) { |
| 915 | const auto texture = GetTextureInfo(maxwell3d, entry, shader_type); | 828 | const auto texture = GetTextureInfo(maxwell3d, entry, shader_type, i); |
| 916 | SetupTexture(binding++, texture, entry); | 829 | SetupTexture(binding++, texture, entry); |
| 917 | } else { | ||
| 918 | for (std::size_t i = 0; i < entry.Size(); ++i) { | ||
| 919 | const auto texture = GetTextureInfo(maxwell3d, entry, shader_type, i); | ||
| 920 | SetupTexture(binding++, texture, entry); | ||
| 921 | } | ||
| 922 | } | 830 | } |
| 923 | } | 831 | } |
| 924 | } | 832 | } |
| @@ -927,46 +835,39 @@ void RasterizerOpenGL::SetupComputeTextures(const Shader& kernel) { | |||
| 927 | MICROPROFILE_SCOPE(OpenGL_Texture); | 835 | MICROPROFILE_SCOPE(OpenGL_Texture); |
| 928 | const auto& compute = system.GPU().KeplerCompute(); | 836 | const auto& compute = system.GPU().KeplerCompute(); |
| 929 | u32 binding = 0; | 837 | u32 binding = 0; |
| 930 | for (const auto& entry : kernel->GetShaderEntries().samplers) { | 838 | for (const auto& entry : kernel->GetEntries().samplers) { |
| 931 | if (!entry.IsIndexed()) { | 839 | for (std::size_t i = 0; i < entry.Size(); ++i) { |
| 932 | const auto texture = | 840 | const auto texture = GetTextureInfo(compute, entry, ShaderType::Compute, i); |
| 933 | GetTextureInfo(compute, entry, Tegra::Engines::ShaderType::Compute); | ||
| 934 | SetupTexture(binding++, texture, entry); | 841 | SetupTexture(binding++, texture, entry); |
| 935 | } else { | ||
| 936 | for (std::size_t i = 0; i < entry.Size(); ++i) { | ||
| 937 | const auto texture = | ||
| 938 | GetTextureInfo(compute, entry, Tegra::Engines::ShaderType::Compute, i); | ||
| 939 | SetupTexture(binding++, texture, entry); | ||
| 940 | } | ||
| 941 | } | 842 | } |
| 942 | } | 843 | } |
| 943 | } | 844 | } |
| 944 | 845 | ||
| 945 | void RasterizerOpenGL::SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture, | 846 | void RasterizerOpenGL::SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture, |
| 946 | const GLShader::SamplerEntry& entry) { | 847 | const SamplerEntry& entry) { |
| 947 | const auto view = texture_cache.GetTextureSurface(texture.tic, entry); | 848 | const auto view = texture_cache.GetTextureSurface(texture.tic, entry); |
| 948 | if (!view) { | 849 | if (!view) { |
| 949 | // Can occur when texture addr is null or its memory is unmapped/invalid | 850 | // Can occur when texture addr is null or its memory is unmapped/invalid |
| 950 | state.samplers[binding] = 0; | 851 | glBindSampler(binding, 0); |
| 951 | state.textures[binding] = 0; | 852 | glBindTextureUnit(binding, 0); |
| 952 | return; | 853 | return; |
| 953 | } | 854 | } |
| 954 | state.textures[binding] = view->GetTexture(); | 855 | glBindTextureUnit(binding, view->GetTexture()); |
| 955 | 856 | ||
| 956 | if (view->GetSurfaceParams().IsBuffer()) { | 857 | if (view->GetSurfaceParams().IsBuffer()) { |
| 957 | return; | 858 | return; |
| 958 | } | 859 | } |
| 959 | state.samplers[binding] = sampler_cache.GetSampler(texture.tsc); | ||
| 960 | |||
| 961 | // Apply swizzle to textures that are not buffers. | 860 | // Apply swizzle to textures that are not buffers. |
| 962 | view->ApplySwizzle(texture.tic.x_source, texture.tic.y_source, texture.tic.z_source, | 861 | view->ApplySwizzle(texture.tic.x_source, texture.tic.y_source, texture.tic.z_source, |
| 963 | texture.tic.w_source); | 862 | texture.tic.w_source); |
| 863 | |||
| 864 | glBindSampler(binding, sampler_cache.GetSampler(texture.tsc)); | ||
| 964 | } | 865 | } |
| 965 | 866 | ||
| 966 | void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, const Shader& shader) { | 867 | void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, const Shader& shader) { |
| 967 | const auto& maxwell3d = system.GPU().Maxwell3D(); | 868 | const auto& maxwell3d = system.GPU().Maxwell3D(); |
| 968 | u32 binding = device.GetBaseBindings(stage_index).image; | 869 | u32 binding = device.GetBaseBindings(stage_index).image; |
| 969 | for (const auto& entry : shader->GetShaderEntries().images) { | 870 | for (const auto& entry : shader->GetEntries().images) { |
| 970 | const auto shader_type = static_cast<Tegra::Engines::ShaderType>(stage_index); | 871 | const auto shader_type = static_cast<Tegra::Engines::ShaderType>(stage_index); |
| 971 | const auto tic = GetTextureInfo(maxwell3d, entry, shader_type).tic; | 872 | const auto tic = GetTextureInfo(maxwell3d, entry, shader_type).tic; |
| 972 | SetupImage(binding++, tic, entry); | 873 | SetupImage(binding++, tic, entry); |
| @@ -976,17 +877,17 @@ void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, const Shader& sh | |||
| 976 | void RasterizerOpenGL::SetupComputeImages(const Shader& shader) { | 877 | void RasterizerOpenGL::SetupComputeImages(const Shader& shader) { |
| 977 | const auto& compute = system.GPU().KeplerCompute(); | 878 | const auto& compute = system.GPU().KeplerCompute(); |
| 978 | u32 binding = 0; | 879 | u32 binding = 0; |
| 979 | for (const auto& entry : shader->GetShaderEntries().images) { | 880 | for (const auto& entry : shader->GetEntries().images) { |
| 980 | const auto tic = GetTextureInfo(compute, entry, Tegra::Engines::ShaderType::Compute).tic; | 881 | const auto tic = GetTextureInfo(compute, entry, Tegra::Engines::ShaderType::Compute).tic; |
| 981 | SetupImage(binding++, tic, entry); | 882 | SetupImage(binding++, tic, entry); |
| 982 | } | 883 | } |
| 983 | } | 884 | } |
| 984 | 885 | ||
| 985 | void RasterizerOpenGL::SetupImage(u32 binding, const Tegra::Texture::TICEntry& tic, | 886 | void RasterizerOpenGL::SetupImage(u32 binding, const Tegra::Texture::TICEntry& tic, |
| 986 | const GLShader::ImageEntry& entry) { | 887 | const ImageEntry& entry) { |
| 987 | const auto view = texture_cache.GetImageSurface(tic, entry); | 888 | const auto view = texture_cache.GetImageSurface(tic, entry); |
| 988 | if (!view) { | 889 | if (!view) { |
| 989 | state.images[binding] = 0; | 890 | glBindImageTexture(binding, 0, 0, GL_FALSE, 0, GL_READ_ONLY, GL_R8); |
| 990 | return; | 891 | return; |
| 991 | } | 892 | } |
| 992 | if (!tic.IsBuffer()) { | 893 | if (!tic.IsBuffer()) { |
| @@ -995,55 +896,87 @@ void RasterizerOpenGL::SetupImage(u32 binding, const Tegra::Texture::TICEntry& t | |||
| 995 | if (entry.IsWritten()) { | 896 | if (entry.IsWritten()) { |
| 996 | view->MarkAsModified(texture_cache.Tick()); | 897 | view->MarkAsModified(texture_cache.Tick()); |
| 997 | } | 898 | } |
| 998 | state.images[binding] = view->GetTexture(); | 899 | glBindImageTexture(binding, view->GetTexture(), 0, GL_TRUE, 0, GL_READ_WRITE, |
| 900 | view->GetFormat()); | ||
| 999 | } | 901 | } |
| 1000 | 902 | ||
| 1001 | void RasterizerOpenGL::SyncViewport(OpenGLState& current_state) { | 903 | void RasterizerOpenGL::SyncViewport() { |
| 1002 | const auto& regs = system.GPU().Maxwell3D().regs; | 904 | auto& gpu = system.GPU().Maxwell3D(); |
| 1003 | const bool geometry_shaders_enabled = | 905 | auto& flags = gpu.dirty.flags; |
| 1004 | regs.IsShaderConfigEnabled(static_cast<size_t>(Maxwell::ShaderProgram::Geometry)); | 906 | const auto& regs = gpu.regs; |
| 1005 | const std::size_t viewport_count = | 907 | |
| 1006 | geometry_shaders_enabled ? Tegra::Engines::Maxwell3D::Regs::NumViewports : 1; | 908 | const bool dirty_viewport = flags[Dirty::Viewports]; |
| 1007 | for (std::size_t i = 0; i < viewport_count; i++) { | 909 | if (dirty_viewport || flags[Dirty::ClipControl]) { |
| 1008 | auto& viewport = current_state.viewports[i]; | 910 | flags[Dirty::ClipControl] = false; |
| 1009 | const auto& src = regs.viewports[i]; | 911 | |
| 1010 | const Common::Rectangle<s32> viewport_rect{regs.viewport_transform[i].GetRect()}; | 912 | bool flip_y = false; |
| 1011 | viewport.x = viewport_rect.left; | 913 | if (regs.viewport_transform[0].scale_y < 0.0) { |
| 1012 | viewport.y = viewport_rect.bottom; | 914 | flip_y = !flip_y; |
| 1013 | viewport.width = viewport_rect.GetWidth(); | 915 | } |
| 1014 | viewport.height = viewport_rect.GetHeight(); | 916 | if (regs.screen_y_control.y_negate != 0) { |
| 1015 | viewport.depth_range_far = src.depth_range_far; | 917 | flip_y = !flip_y; |
| 1016 | viewport.depth_range_near = src.depth_range_near; | 918 | } |
| 1017 | } | 919 | glClipControl(flip_y ? GL_UPPER_LEFT : GL_LOWER_LEFT, |
| 1018 | state.depth_clamp.far_plane = regs.view_volume_clip_control.depth_clamp_far != 0; | 920 | regs.depth_mode == Maxwell::DepthMode::ZeroToOne ? GL_ZERO_TO_ONE |
| 1019 | state.depth_clamp.near_plane = regs.view_volume_clip_control.depth_clamp_near != 0; | 921 | : GL_NEGATIVE_ONE_TO_ONE); |
| 1020 | 922 | } | |
| 1021 | bool flip_y = false; | 923 | |
| 1022 | if (regs.viewport_transform[0].scale_y < 0.0) { | 924 | if (dirty_viewport) { |
| 1023 | flip_y = !flip_y; | 925 | flags[Dirty::Viewports] = false; |
| 1024 | } | 926 | |
| 1025 | if (regs.screen_y_control.y_negate != 0) { | 927 | const bool force = flags[Dirty::ViewportTransform]; |
| 1026 | flip_y = !flip_y; | 928 | flags[Dirty::ViewportTransform] = false; |
| 1027 | } | 929 | |
| 1028 | state.clip_control.origin = flip_y ? GL_UPPER_LEFT : GL_LOWER_LEFT; | 930 | for (std::size_t i = 0; i < Maxwell::NumViewports; ++i) { |
| 1029 | state.clip_control.depth_mode = | 931 | if (!force && !flags[Dirty::Viewport0 + i]) { |
| 1030 | regs.depth_mode == Tegra::Engines::Maxwell3D::Regs::DepthMode::ZeroToOne | 932 | continue; |
| 1031 | ? GL_ZERO_TO_ONE | 933 | } |
| 1032 | : GL_NEGATIVE_ONE_TO_ONE; | 934 | flags[Dirty::Viewport0 + i] = false; |
| 935 | |||
| 936 | const auto& src = regs.viewport_transform[i]; | ||
| 937 | const Common::Rectangle<f32> rect{src.GetRect()}; | ||
| 938 | glViewportIndexedf(static_cast<GLuint>(i), rect.left, rect.bottom, rect.GetWidth(), | ||
| 939 | rect.GetHeight()); | ||
| 940 | |||
| 941 | const GLdouble reduce_z = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne; | ||
| 942 | const GLdouble near_depth = src.translate_z - src.scale_z * reduce_z; | ||
| 943 | const GLdouble far_depth = src.translate_z + src.scale_z; | ||
| 944 | glDepthRangeIndexed(static_cast<GLuint>(i), near_depth, far_depth); | ||
| 945 | } | ||
| 946 | } | ||
| 1033 | } | 947 | } |
| 1034 | 948 | ||
| 1035 | void RasterizerOpenGL::SyncClipEnabled( | 949 | void RasterizerOpenGL::SyncDepthClamp() { |
| 1036 | const std::array<bool, Maxwell::Regs::NumClipDistances>& clip_mask) { | 950 | auto& gpu = system.GPU().Maxwell3D(); |
| 951 | auto& flags = gpu.dirty.flags; | ||
| 952 | if (!flags[Dirty::DepthClampEnabled]) { | ||
| 953 | return; | ||
| 954 | } | ||
| 955 | flags[Dirty::DepthClampEnabled] = false; | ||
| 1037 | 956 | ||
| 1038 | const auto& regs = system.GPU().Maxwell3D().regs; | 957 | const auto& state = gpu.regs.view_volume_clip_control; |
| 1039 | const std::array<bool, Maxwell::Regs::NumClipDistances> reg_state{ | 958 | UNIMPLEMENTED_IF_MSG(state.depth_clamp_far != state.depth_clamp_near, |
| 1040 | regs.clip_distance_enabled.c0 != 0, regs.clip_distance_enabled.c1 != 0, | 959 | "Unimplemented depth clamp separation!"); |
| 1041 | regs.clip_distance_enabled.c2 != 0, regs.clip_distance_enabled.c3 != 0, | 960 | |
| 1042 | regs.clip_distance_enabled.c4 != 0, regs.clip_distance_enabled.c5 != 0, | 961 | oglEnable(GL_DEPTH_CLAMP, state.depth_clamp_far || state.depth_clamp_near); |
| 1043 | regs.clip_distance_enabled.c6 != 0, regs.clip_distance_enabled.c7 != 0}; | 962 | } |
| 963 | |||
| 964 | void RasterizerOpenGL::SyncClipEnabled(u32 clip_mask) { | ||
| 965 | auto& gpu = system.GPU().Maxwell3D(); | ||
| 966 | auto& flags = gpu.dirty.flags; | ||
| 967 | if (!flags[Dirty::ClipDistances] && !flags[Dirty::Shaders]) { | ||
| 968 | return; | ||
| 969 | } | ||
| 970 | flags[Dirty::ClipDistances] = false; | ||
| 971 | |||
| 972 | clip_mask &= gpu.regs.clip_distance_enabled; | ||
| 973 | if (clip_mask == last_clip_distance_mask) { | ||
| 974 | return; | ||
| 975 | } | ||
| 976 | last_clip_distance_mask = clip_mask; | ||
| 1044 | 977 | ||
| 1045 | for (std::size_t i = 0; i < Maxwell::Regs::NumClipDistances; ++i) { | 978 | for (std::size_t i = 0; i < Maxwell::Regs::NumClipDistances; ++i) { |
| 1046 | state.clip_distance[i] = reg_state[i] && clip_mask[i]; | 979 | oglEnable(static_cast<GLenum>(GL_CLIP_DISTANCE0 + i), (clip_mask >> i) & 1); |
| 1047 | } | 980 | } |
| 1048 | } | 981 | } |
| 1049 | 982 | ||
| @@ -1052,247 +985,442 @@ void RasterizerOpenGL::SyncClipCoef() { | |||
| 1052 | } | 985 | } |
| 1053 | 986 | ||
| 1054 | void RasterizerOpenGL::SyncCullMode() { | 987 | void RasterizerOpenGL::SyncCullMode() { |
| 1055 | const auto& regs = system.GPU().Maxwell3D().regs; | 988 | auto& gpu = system.GPU().Maxwell3D(); |
| 989 | auto& flags = gpu.dirty.flags; | ||
| 990 | const auto& regs = gpu.regs; | ||
| 1056 | 991 | ||
| 1057 | state.cull.enabled = regs.cull.enabled != 0; | 992 | if (flags[Dirty::CullTest]) { |
| 1058 | if (state.cull.enabled) { | 993 | flags[Dirty::CullTest] = false; |
| 1059 | state.cull.mode = MaxwellToGL::CullFace(regs.cull.cull_face); | 994 | |
| 995 | if (regs.cull_test_enabled) { | ||
| 996 | glEnable(GL_CULL_FACE); | ||
| 997 | glCullFace(MaxwellToGL::CullFace(regs.cull_face)); | ||
| 998 | } else { | ||
| 999 | glDisable(GL_CULL_FACE); | ||
| 1000 | } | ||
| 1060 | } | 1001 | } |
| 1061 | 1002 | ||
| 1062 | state.cull.front_face = MaxwellToGL::FrontFace(regs.cull.front_face); | 1003 | if (flags[Dirty::FrontFace]) { |
| 1004 | flags[Dirty::FrontFace] = false; | ||
| 1005 | glFrontFace(MaxwellToGL::FrontFace(regs.front_face)); | ||
| 1006 | } | ||
| 1063 | } | 1007 | } |
| 1064 | 1008 | ||
| 1065 | void RasterizerOpenGL::SyncPrimitiveRestart() { | 1009 | void RasterizerOpenGL::SyncPrimitiveRestart() { |
| 1066 | const auto& regs = system.GPU().Maxwell3D().regs; | 1010 | auto& gpu = system.GPU().Maxwell3D(); |
| 1011 | auto& flags = gpu.dirty.flags; | ||
| 1012 | if (!flags[Dirty::PrimitiveRestart]) { | ||
| 1013 | return; | ||
| 1014 | } | ||
| 1015 | flags[Dirty::PrimitiveRestart] = false; | ||
| 1067 | 1016 | ||
| 1068 | state.primitive_restart.enabled = regs.primitive_restart.enabled; | 1017 | if (gpu.regs.primitive_restart.enabled) { |
| 1069 | state.primitive_restart.index = regs.primitive_restart.index; | 1018 | glEnable(GL_PRIMITIVE_RESTART); |
| 1019 | glPrimitiveRestartIndex(gpu.regs.primitive_restart.index); | ||
| 1020 | } else { | ||
| 1021 | glDisable(GL_PRIMITIVE_RESTART); | ||
| 1022 | } | ||
| 1070 | } | 1023 | } |
| 1071 | 1024 | ||
| 1072 | void RasterizerOpenGL::SyncDepthTestState() { | 1025 | void RasterizerOpenGL::SyncDepthTestState() { |
| 1073 | const auto& regs = system.GPU().Maxwell3D().regs; | 1026 | auto& gpu = system.GPU().Maxwell3D(); |
| 1074 | 1027 | auto& flags = gpu.dirty.flags; | |
| 1075 | state.depth.test_enabled = regs.depth_test_enable != 0; | ||
| 1076 | state.depth.write_mask = regs.depth_write_enabled ? GL_TRUE : GL_FALSE; | ||
| 1077 | 1028 | ||
| 1078 | if (!state.depth.test_enabled) { | 1029 | const auto& regs = gpu.regs; |
| 1079 | return; | 1030 | if (flags[Dirty::DepthMask]) { |
| 1031 | flags[Dirty::DepthMask] = false; | ||
| 1032 | glDepthMask(regs.depth_write_enabled ? GL_TRUE : GL_FALSE); | ||
| 1080 | } | 1033 | } |
| 1081 | 1034 | ||
| 1082 | state.depth.test_func = MaxwellToGL::ComparisonOp(regs.depth_test_func); | 1035 | if (flags[Dirty::DepthTest]) { |
| 1036 | flags[Dirty::DepthTest] = false; | ||
| 1037 | if (regs.depth_test_enable) { | ||
| 1038 | glEnable(GL_DEPTH_TEST); | ||
| 1039 | glDepthFunc(MaxwellToGL::ComparisonOp(regs.depth_test_func)); | ||
| 1040 | } else { | ||
| 1041 | glDisable(GL_DEPTH_TEST); | ||
| 1042 | } | ||
| 1043 | } | ||
| 1083 | } | 1044 | } |
| 1084 | 1045 | ||
| 1085 | void RasterizerOpenGL::SyncStencilTestState() { | 1046 | void RasterizerOpenGL::SyncStencilTestState() { |
| 1086 | auto& maxwell3d = system.GPU().Maxwell3D(); | 1047 | auto& gpu = system.GPU().Maxwell3D(); |
| 1087 | if (!maxwell3d.dirty.stencil_test) { | 1048 | auto& flags = gpu.dirty.flags; |
| 1049 | if (!flags[Dirty::StencilTest]) { | ||
| 1088 | return; | 1050 | return; |
| 1089 | } | 1051 | } |
| 1090 | maxwell3d.dirty.stencil_test = false; | 1052 | flags[Dirty::StencilTest] = false; |
| 1091 | |||
| 1092 | const auto& regs = maxwell3d.regs; | ||
| 1093 | state.stencil.test_enabled = regs.stencil_enable != 0; | ||
| 1094 | state.MarkDirtyStencilState(); | ||
| 1095 | 1053 | ||
| 1054 | const auto& regs = gpu.regs; | ||
| 1096 | if (!regs.stencil_enable) { | 1055 | if (!regs.stencil_enable) { |
| 1056 | glDisable(GL_STENCIL_TEST); | ||
| 1097 | return; | 1057 | return; |
| 1098 | } | 1058 | } |
| 1099 | 1059 | ||
| 1100 | state.stencil.front.test_func = MaxwellToGL::ComparisonOp(regs.stencil_front_func_func); | 1060 | glEnable(GL_STENCIL_TEST); |
| 1101 | state.stencil.front.test_ref = regs.stencil_front_func_ref; | 1061 | glStencilFuncSeparate(GL_FRONT, MaxwellToGL::ComparisonOp(regs.stencil_front_func_func), |
| 1102 | state.stencil.front.test_mask = regs.stencil_front_func_mask; | 1062 | regs.stencil_front_func_ref, regs.stencil_front_func_mask); |
| 1103 | state.stencil.front.action_stencil_fail = MaxwellToGL::StencilOp(regs.stencil_front_op_fail); | 1063 | glStencilOpSeparate(GL_FRONT, MaxwellToGL::StencilOp(regs.stencil_front_op_fail), |
| 1104 | state.stencil.front.action_depth_fail = MaxwellToGL::StencilOp(regs.stencil_front_op_zfail); | 1064 | MaxwellToGL::StencilOp(regs.stencil_front_op_zfail), |
| 1105 | state.stencil.front.action_depth_pass = MaxwellToGL::StencilOp(regs.stencil_front_op_zpass); | 1065 | MaxwellToGL::StencilOp(regs.stencil_front_op_zpass)); |
| 1106 | state.stencil.front.write_mask = regs.stencil_front_mask; | 1066 | glStencilMaskSeparate(GL_FRONT, regs.stencil_front_mask); |
| 1067 | |||
| 1107 | if (regs.stencil_two_side_enable) { | 1068 | if (regs.stencil_two_side_enable) { |
| 1108 | state.stencil.back.test_func = MaxwellToGL::ComparisonOp(regs.stencil_back_func_func); | 1069 | glStencilFuncSeparate(GL_BACK, MaxwellToGL::ComparisonOp(regs.stencil_back_func_func), |
| 1109 | state.stencil.back.test_ref = regs.stencil_back_func_ref; | 1070 | regs.stencil_back_func_ref, regs.stencil_back_func_mask); |
| 1110 | state.stencil.back.test_mask = regs.stencil_back_func_mask; | 1071 | glStencilOpSeparate(GL_BACK, MaxwellToGL::StencilOp(regs.stencil_back_op_fail), |
| 1111 | state.stencil.back.action_stencil_fail = MaxwellToGL::StencilOp(regs.stencil_back_op_fail); | 1072 | MaxwellToGL::StencilOp(regs.stencil_back_op_zfail), |
| 1112 | state.stencil.back.action_depth_fail = MaxwellToGL::StencilOp(regs.stencil_back_op_zfail); | 1073 | MaxwellToGL::StencilOp(regs.stencil_back_op_zpass)); |
| 1113 | state.stencil.back.action_depth_pass = MaxwellToGL::StencilOp(regs.stencil_back_op_zpass); | 1074 | glStencilMaskSeparate(GL_BACK, regs.stencil_back_mask); |
| 1114 | state.stencil.back.write_mask = regs.stencil_back_mask; | ||
| 1115 | } else { | 1075 | } else { |
| 1116 | state.stencil.back.test_func = GL_ALWAYS; | 1076 | glStencilFuncSeparate(GL_BACK, GL_ALWAYS, 0, 0xFFFFFFFF); |
| 1117 | state.stencil.back.test_ref = 0; | 1077 | glStencilOpSeparate(GL_BACK, GL_KEEP, GL_KEEP, GL_KEEP); |
| 1118 | state.stencil.back.test_mask = 0xFFFFFFFF; | 1078 | glStencilMaskSeparate(GL_BACK, 0xFFFFFFFF); |
| 1119 | state.stencil.back.write_mask = 0xFFFFFFFF; | ||
| 1120 | state.stencil.back.action_stencil_fail = GL_KEEP; | ||
| 1121 | state.stencil.back.action_depth_fail = GL_KEEP; | ||
| 1122 | state.stencil.back.action_depth_pass = GL_KEEP; | ||
| 1123 | } | 1079 | } |
| 1124 | } | 1080 | } |
| 1125 | 1081 | ||
| 1126 | void RasterizerOpenGL::SyncRasterizeEnable(OpenGLState& current_state) { | 1082 | void RasterizerOpenGL::SyncRasterizeEnable() { |
| 1127 | const auto& regs = system.GPU().Maxwell3D().regs; | 1083 | auto& gpu = system.GPU().Maxwell3D(); |
| 1128 | current_state.rasterizer_discard = regs.rasterize_enable == 0; | 1084 | auto& flags = gpu.dirty.flags; |
| 1085 | if (!flags[Dirty::RasterizeEnable]) { | ||
| 1086 | return; | ||
| 1087 | } | ||
| 1088 | flags[Dirty::RasterizeEnable] = false; | ||
| 1089 | |||
| 1090 | oglEnable(GL_RASTERIZER_DISCARD, gpu.regs.rasterize_enable == 0); | ||
| 1091 | } | ||
| 1092 | |||
| 1093 | void RasterizerOpenGL::SyncPolygonModes() { | ||
| 1094 | auto& gpu = system.GPU().Maxwell3D(); | ||
| 1095 | auto& flags = gpu.dirty.flags; | ||
| 1096 | if (!flags[Dirty::PolygonModes]) { | ||
| 1097 | return; | ||
| 1098 | } | ||
| 1099 | flags[Dirty::PolygonModes] = false; | ||
| 1100 | |||
| 1101 | if (gpu.regs.fill_rectangle) { | ||
| 1102 | if (!GLAD_GL_NV_fill_rectangle) { | ||
| 1103 | LOG_ERROR(Render_OpenGL, "GL_NV_fill_rectangle used and not supported"); | ||
| 1104 | glPolygonMode(GL_FRONT_AND_BACK, GL_FILL); | ||
| 1105 | return; | ||
| 1106 | } | ||
| 1107 | |||
| 1108 | flags[Dirty::PolygonModeFront] = true; | ||
| 1109 | flags[Dirty::PolygonModeBack] = true; | ||
| 1110 | glPolygonMode(GL_FRONT_AND_BACK, GL_FILL_RECTANGLE_NV); | ||
| 1111 | return; | ||
| 1112 | } | ||
| 1113 | |||
| 1114 | if (gpu.regs.polygon_mode_front == gpu.regs.polygon_mode_back) { | ||
| 1115 | flags[Dirty::PolygonModeFront] = false; | ||
| 1116 | flags[Dirty::PolygonModeBack] = false; | ||
| 1117 | glPolygonMode(GL_FRONT_AND_BACK, MaxwellToGL::PolygonMode(gpu.regs.polygon_mode_front)); | ||
| 1118 | return; | ||
| 1119 | } | ||
| 1120 | |||
| 1121 | if (flags[Dirty::PolygonModeFront]) { | ||
| 1122 | flags[Dirty::PolygonModeFront] = false; | ||
| 1123 | glPolygonMode(GL_FRONT, MaxwellToGL::PolygonMode(gpu.regs.polygon_mode_front)); | ||
| 1124 | } | ||
| 1125 | |||
| 1126 | if (flags[Dirty::PolygonModeBack]) { | ||
| 1127 | flags[Dirty::PolygonModeBack] = false; | ||
| 1128 | glPolygonMode(GL_BACK, MaxwellToGL::PolygonMode(gpu.regs.polygon_mode_back)); | ||
| 1129 | } | ||
| 1129 | } | 1130 | } |
| 1130 | 1131 | ||
| 1131 | void RasterizerOpenGL::SyncColorMask() { | 1132 | void RasterizerOpenGL::SyncColorMask() { |
| 1132 | auto& maxwell3d = system.GPU().Maxwell3D(); | 1133 | auto& gpu = system.GPU().Maxwell3D(); |
| 1133 | if (!maxwell3d.dirty.color_mask) { | 1134 | auto& flags = gpu.dirty.flags; |
| 1135 | if (!flags[Dirty::ColorMasks]) { | ||
| 1134 | return; | 1136 | return; |
| 1135 | } | 1137 | } |
| 1136 | const auto& regs = maxwell3d.regs; | 1138 | flags[Dirty::ColorMasks] = false; |
| 1139 | |||
| 1140 | const bool force = flags[Dirty::ColorMaskCommon]; | ||
| 1141 | flags[Dirty::ColorMaskCommon] = false; | ||
| 1142 | |||
| 1143 | const auto& regs = gpu.regs; | ||
| 1144 | if (regs.color_mask_common) { | ||
| 1145 | if (!force && !flags[Dirty::ColorMask0]) { | ||
| 1146 | return; | ||
| 1147 | } | ||
| 1148 | flags[Dirty::ColorMask0] = false; | ||
| 1137 | 1149 | ||
| 1138 | const std::size_t count = | 1150 | auto& mask = regs.color_mask[0]; |
| 1139 | regs.independent_blend_enable ? Tegra::Engines::Maxwell3D::Regs::NumRenderTargets : 1; | 1151 | glColorMask(mask.R != 0, mask.B != 0, mask.G != 0, mask.A != 0); |
| 1140 | for (std::size_t i = 0; i < count; i++) { | 1152 | return; |
| 1141 | const auto& source = regs.color_mask[regs.color_mask_common ? 0 : i]; | ||
| 1142 | auto& dest = state.color_mask[i]; | ||
| 1143 | dest.red_enabled = (source.R == 0) ? GL_FALSE : GL_TRUE; | ||
| 1144 | dest.green_enabled = (source.G == 0) ? GL_FALSE : GL_TRUE; | ||
| 1145 | dest.blue_enabled = (source.B == 0) ? GL_FALSE : GL_TRUE; | ||
| 1146 | dest.alpha_enabled = (source.A == 0) ? GL_FALSE : GL_TRUE; | ||
| 1147 | } | 1153 | } |
| 1148 | 1154 | ||
| 1149 | state.MarkDirtyColorMask(); | 1155 | // Path without color_mask_common set |
| 1150 | maxwell3d.dirty.color_mask = false; | 1156 | for (std::size_t i = 0; i < Maxwell::NumRenderTargets; ++i) { |
| 1157 | if (!force && !flags[Dirty::ColorMask0 + i]) { | ||
| 1158 | continue; | ||
| 1159 | } | ||
| 1160 | flags[Dirty::ColorMask0 + i] = false; | ||
| 1161 | |||
| 1162 | const auto& mask = regs.color_mask[i]; | ||
| 1163 | glColorMaski(static_cast<GLuint>(i), mask.R != 0, mask.G != 0, mask.B != 0, mask.A != 0); | ||
| 1164 | } | ||
| 1151 | } | 1165 | } |
| 1152 | 1166 | ||
| 1153 | void RasterizerOpenGL::SyncMultiSampleState() { | 1167 | void RasterizerOpenGL::SyncMultiSampleState() { |
| 1168 | auto& gpu = system.GPU().Maxwell3D(); | ||
| 1169 | auto& flags = gpu.dirty.flags; | ||
| 1170 | if (!flags[Dirty::MultisampleControl]) { | ||
| 1171 | return; | ||
| 1172 | } | ||
| 1173 | flags[Dirty::MultisampleControl] = false; | ||
| 1174 | |||
| 1154 | const auto& regs = system.GPU().Maxwell3D().regs; | 1175 | const auto& regs = system.GPU().Maxwell3D().regs; |
| 1155 | state.multisample_control.alpha_to_coverage = regs.multisample_control.alpha_to_coverage != 0; | 1176 | oglEnable(GL_SAMPLE_ALPHA_TO_COVERAGE, regs.multisample_control.alpha_to_coverage); |
| 1156 | state.multisample_control.alpha_to_one = regs.multisample_control.alpha_to_one != 0; | 1177 | oglEnable(GL_SAMPLE_ALPHA_TO_ONE, regs.multisample_control.alpha_to_one); |
| 1157 | } | 1178 | } |
| 1158 | 1179 | ||
| 1159 | void RasterizerOpenGL::SyncFragmentColorClampState() { | 1180 | void RasterizerOpenGL::SyncFragmentColorClampState() { |
| 1160 | const auto& regs = system.GPU().Maxwell3D().regs; | 1181 | auto& gpu = system.GPU().Maxwell3D(); |
| 1161 | state.fragment_color_clamp.enabled = regs.frag_color_clamp != 0; | 1182 | auto& flags = gpu.dirty.flags; |
| 1183 | if (!flags[Dirty::FragmentClampColor]) { | ||
| 1184 | return; | ||
| 1185 | } | ||
| 1186 | flags[Dirty::FragmentClampColor] = false; | ||
| 1187 | |||
| 1188 | glClampColor(GL_CLAMP_FRAGMENT_COLOR, gpu.regs.frag_color_clamp ? GL_TRUE : GL_FALSE); | ||
| 1162 | } | 1189 | } |
| 1163 | 1190 | ||
| 1164 | void RasterizerOpenGL::SyncBlendState() { | 1191 | void RasterizerOpenGL::SyncBlendState() { |
| 1165 | auto& maxwell3d = system.GPU().Maxwell3D(); | 1192 | auto& gpu = system.GPU().Maxwell3D(); |
| 1166 | if (!maxwell3d.dirty.blend_state) { | 1193 | auto& flags = gpu.dirty.flags; |
| 1194 | const auto& regs = gpu.regs; | ||
| 1195 | |||
| 1196 | if (flags[Dirty::BlendColor]) { | ||
| 1197 | flags[Dirty::BlendColor] = false; | ||
| 1198 | glBlendColor(regs.blend_color.r, regs.blend_color.g, regs.blend_color.b, | ||
| 1199 | regs.blend_color.a); | ||
| 1200 | } | ||
| 1201 | |||
| 1202 | // TODO(Rodrigo): Revisit blending, there are several registers we are not reading | ||
| 1203 | |||
| 1204 | if (!flags[Dirty::BlendStates]) { | ||
| 1167 | return; | 1205 | return; |
| 1168 | } | 1206 | } |
| 1169 | const auto& regs = maxwell3d.regs; | 1207 | flags[Dirty::BlendStates] = false; |
| 1170 | 1208 | ||
| 1171 | state.blend_color.red = regs.blend_color.r; | 1209 | if (!regs.independent_blend_enable) { |
| 1172 | state.blend_color.green = regs.blend_color.g; | 1210 | if (!regs.blend.enable[0]) { |
| 1173 | state.blend_color.blue = regs.blend_color.b; | 1211 | glDisable(GL_BLEND); |
| 1174 | state.blend_color.alpha = regs.blend_color.a; | 1212 | return; |
| 1175 | |||
| 1176 | state.independant_blend.enabled = regs.independent_blend_enable; | ||
| 1177 | if (!state.independant_blend.enabled) { | ||
| 1178 | auto& blend = state.blend[0]; | ||
| 1179 | const auto& src = regs.blend; | ||
| 1180 | blend.enabled = src.enable[0] != 0; | ||
| 1181 | if (blend.enabled) { | ||
| 1182 | blend.rgb_equation = MaxwellToGL::BlendEquation(src.equation_rgb); | ||
| 1183 | blend.src_rgb_func = MaxwellToGL::BlendFunc(src.factor_source_rgb); | ||
| 1184 | blend.dst_rgb_func = MaxwellToGL::BlendFunc(src.factor_dest_rgb); | ||
| 1185 | blend.a_equation = MaxwellToGL::BlendEquation(src.equation_a); | ||
| 1186 | blend.src_a_func = MaxwellToGL::BlendFunc(src.factor_source_a); | ||
| 1187 | blend.dst_a_func = MaxwellToGL::BlendFunc(src.factor_dest_a); | ||
| 1188 | } | ||
| 1189 | for (std::size_t i = 1; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) { | ||
| 1190 | state.blend[i].enabled = false; | ||
| 1191 | } | 1213 | } |
| 1192 | maxwell3d.dirty.blend_state = false; | 1214 | glEnable(GL_BLEND); |
| 1193 | state.MarkDirtyBlendState(); | 1215 | glBlendFuncSeparate(MaxwellToGL::BlendFunc(regs.blend.factor_source_rgb), |
| 1216 | MaxwellToGL::BlendFunc(regs.blend.factor_dest_rgb), | ||
| 1217 | MaxwellToGL::BlendFunc(regs.blend.factor_source_a), | ||
| 1218 | MaxwellToGL::BlendFunc(regs.blend.factor_dest_a)); | ||
| 1219 | glBlendEquationSeparate(MaxwellToGL::BlendEquation(regs.blend.equation_rgb), | ||
| 1220 | MaxwellToGL::BlendEquation(regs.blend.equation_a)); | ||
| 1194 | return; | 1221 | return; |
| 1195 | } | 1222 | } |
| 1196 | 1223 | ||
| 1197 | for (std::size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) { | 1224 | const bool force = flags[Dirty::BlendIndependentEnabled]; |
| 1198 | auto& blend = state.blend[i]; | 1225 | flags[Dirty::BlendIndependentEnabled] = false; |
| 1199 | const auto& src = regs.independent_blend[i]; | 1226 | |
| 1200 | blend.enabled = regs.blend.enable[i] != 0; | 1227 | for (std::size_t i = 0; i < Maxwell::NumRenderTargets; ++i) { |
| 1201 | if (!blend.enabled) | 1228 | if (!force && !flags[Dirty::BlendState0 + i]) { |
| 1202 | continue; | 1229 | continue; |
| 1203 | blend.rgb_equation = MaxwellToGL::BlendEquation(src.equation_rgb); | 1230 | } |
| 1204 | blend.src_rgb_func = MaxwellToGL::BlendFunc(src.factor_source_rgb); | 1231 | flags[Dirty::BlendState0 + i] = false; |
| 1205 | blend.dst_rgb_func = MaxwellToGL::BlendFunc(src.factor_dest_rgb); | 1232 | |
| 1206 | blend.a_equation = MaxwellToGL::BlendEquation(src.equation_a); | 1233 | if (!regs.blend.enable[i]) { |
| 1207 | blend.src_a_func = MaxwellToGL::BlendFunc(src.factor_source_a); | 1234 | glDisablei(GL_BLEND, static_cast<GLuint>(i)); |
| 1208 | blend.dst_a_func = MaxwellToGL::BlendFunc(src.factor_dest_a); | 1235 | continue; |
| 1209 | } | 1236 | } |
| 1237 | glEnablei(GL_BLEND, static_cast<GLuint>(i)); | ||
| 1210 | 1238 | ||
| 1211 | state.MarkDirtyBlendState(); | 1239 | const auto& src = regs.independent_blend[i]; |
| 1212 | maxwell3d.dirty.blend_state = false; | 1240 | glBlendFuncSeparatei(static_cast<GLuint>(i), MaxwellToGL::BlendFunc(src.factor_source_rgb), |
| 1241 | MaxwellToGL::BlendFunc(src.factor_dest_rgb), | ||
| 1242 | MaxwellToGL::BlendFunc(src.factor_source_a), | ||
| 1243 | MaxwellToGL::BlendFunc(src.factor_dest_a)); | ||
| 1244 | glBlendEquationSeparatei(static_cast<GLuint>(i), | ||
| 1245 | MaxwellToGL::BlendEquation(src.equation_rgb), | ||
| 1246 | MaxwellToGL::BlendEquation(src.equation_a)); | ||
| 1247 | } | ||
| 1213 | } | 1248 | } |
| 1214 | 1249 | ||
| 1215 | void RasterizerOpenGL::SyncLogicOpState() { | 1250 | void RasterizerOpenGL::SyncLogicOpState() { |
| 1216 | const auto& regs = system.GPU().Maxwell3D().regs; | 1251 | auto& gpu = system.GPU().Maxwell3D(); |
| 1252 | auto& flags = gpu.dirty.flags; | ||
| 1253 | if (!flags[Dirty::LogicOp]) { | ||
| 1254 | return; | ||
| 1255 | } | ||
| 1256 | flags[Dirty::LogicOp] = false; | ||
| 1217 | 1257 | ||
| 1218 | state.logic_op.enabled = regs.logic_op.enable != 0; | 1258 | const auto& regs = gpu.regs; |
| 1259 | if (regs.logic_op.enable) { | ||
| 1260 | glEnable(GL_COLOR_LOGIC_OP); | ||
| 1261 | glLogicOp(MaxwellToGL::LogicOp(regs.logic_op.operation)); | ||
| 1262 | } else { | ||
| 1263 | glDisable(GL_COLOR_LOGIC_OP); | ||
| 1264 | } | ||
| 1265 | } | ||
| 1219 | 1266 | ||
| 1220 | if (!state.logic_op.enabled) | 1267 | void RasterizerOpenGL::SyncScissorTest() { |
| 1268 | auto& gpu = system.GPU().Maxwell3D(); | ||
| 1269 | auto& flags = gpu.dirty.flags; | ||
| 1270 | if (!flags[Dirty::Scissors]) { | ||
| 1221 | return; | 1271 | return; |
| 1272 | } | ||
| 1273 | flags[Dirty::Scissors] = false; | ||
| 1222 | 1274 | ||
| 1223 | ASSERT_MSG(regs.blend.enable[0] == 0, | 1275 | const auto& regs = gpu.regs; |
| 1224 | "Blending and logic op can't be enabled at the same time."); | 1276 | for (std::size_t index = 0; index < Maxwell::NumViewports; ++index) { |
| 1225 | 1277 | if (!flags[Dirty::Scissor0 + index]) { | |
| 1226 | state.logic_op.operation = MaxwellToGL::LogicOp(regs.logic_op.operation); | 1278 | continue; |
| 1227 | } | 1279 | } |
| 1280 | flags[Dirty::Scissor0 + index] = false; | ||
| 1228 | 1281 | ||
| 1229 | void RasterizerOpenGL::SyncScissorTest(OpenGLState& current_state) { | 1282 | const auto& src = regs.scissor_test[index]; |
| 1230 | const auto& regs = system.GPU().Maxwell3D().regs; | 1283 | if (src.enable) { |
| 1231 | const bool geometry_shaders_enabled = | 1284 | glEnablei(GL_SCISSOR_TEST, static_cast<GLuint>(index)); |
| 1232 | regs.IsShaderConfigEnabled(static_cast<size_t>(Maxwell::ShaderProgram::Geometry)); | 1285 | glScissorIndexed(static_cast<GLuint>(index), src.min_x, src.min_y, |
| 1233 | const std::size_t viewport_count = | 1286 | src.max_x - src.min_x, src.max_y - src.min_y); |
| 1234 | geometry_shaders_enabled ? Tegra::Engines::Maxwell3D::Regs::NumViewports : 1; | 1287 | } else { |
| 1235 | for (std::size_t i = 0; i < viewport_count; i++) { | 1288 | glDisablei(GL_SCISSOR_TEST, static_cast<GLuint>(index)); |
| 1236 | const auto& src = regs.scissor_test[i]; | ||
| 1237 | auto& dst = current_state.viewports[i].scissor; | ||
| 1238 | dst.enabled = (src.enable != 0); | ||
| 1239 | if (dst.enabled == 0) { | ||
| 1240 | return; | ||
| 1241 | } | 1289 | } |
| 1242 | const u32 width = src.max_x - src.min_x; | ||
| 1243 | const u32 height = src.max_y - src.min_y; | ||
| 1244 | dst.x = src.min_x; | ||
| 1245 | dst.y = src.min_y; | ||
| 1246 | dst.width = width; | ||
| 1247 | dst.height = height; | ||
| 1248 | } | 1290 | } |
| 1249 | } | 1291 | } |
| 1250 | 1292 | ||
| 1251 | void RasterizerOpenGL::SyncTransformFeedback() { | ||
| 1252 | const auto& regs = system.GPU().Maxwell3D().regs; | ||
| 1253 | UNIMPLEMENTED_IF_MSG(regs.tfb_enabled != 0, "Transform feedbacks are not implemented"); | ||
| 1254 | } | ||
| 1255 | |||
| 1256 | void RasterizerOpenGL::SyncPointState() { | 1293 | void RasterizerOpenGL::SyncPointState() { |
| 1257 | const auto& regs = system.GPU().Maxwell3D().regs; | 1294 | auto& gpu = system.GPU().Maxwell3D(); |
| 1295 | auto& flags = gpu.dirty.flags; | ||
| 1296 | if (!flags[Dirty::PointSize]) { | ||
| 1297 | return; | ||
| 1298 | } | ||
| 1299 | flags[Dirty::PointSize] = false; | ||
| 1300 | |||
| 1301 | oglEnable(GL_POINT_SPRITE, gpu.regs.point_sprite_enable); | ||
| 1302 | |||
| 1303 | if (gpu.regs.vp_point_size.enable) { | ||
| 1304 | // By definition of GL_POINT_SIZE, it only matters if GL_PROGRAM_POINT_SIZE is disabled. | ||
| 1305 | glEnable(GL_PROGRAM_POINT_SIZE); | ||
| 1306 | return; | ||
| 1307 | } | ||
| 1308 | |||
| 1258 | // Limit the point size to 1 since nouveau sometimes sets a point size of 0 (and that's invalid | 1309 | // Limit the point size to 1 since nouveau sometimes sets a point size of 0 (and that's invalid |
| 1259 | // in OpenGL). | 1310 | // in OpenGL). |
| 1260 | state.point.program_control = regs.vp_point_size.enable != 0; | 1311 | glPointSize(std::max(1.0f, gpu.regs.point_size)); |
| 1261 | state.point.sprite = regs.point_sprite_enable != 0; | 1312 | glDisable(GL_PROGRAM_POINT_SIZE); |
| 1262 | state.point.size = std::max(1.0f, regs.point_size); | ||
| 1263 | } | 1313 | } |
| 1264 | 1314 | ||
| 1265 | void RasterizerOpenGL::SyncPolygonOffset() { | 1315 | void RasterizerOpenGL::SyncPolygonOffset() { |
| 1266 | auto& maxwell3d = system.GPU().Maxwell3D(); | 1316 | auto& gpu = system.GPU().Maxwell3D(); |
| 1267 | if (!maxwell3d.dirty.polygon_offset) { | 1317 | auto& flags = gpu.dirty.flags; |
| 1318 | if (!flags[Dirty::PolygonOffset]) { | ||
| 1268 | return; | 1319 | return; |
| 1269 | } | 1320 | } |
| 1270 | const auto& regs = maxwell3d.regs; | 1321 | flags[Dirty::PolygonOffset] = false; |
| 1271 | |||
| 1272 | state.polygon_offset.fill_enable = regs.polygon_offset_fill_enable != 0; | ||
| 1273 | state.polygon_offset.line_enable = regs.polygon_offset_line_enable != 0; | ||
| 1274 | state.polygon_offset.point_enable = regs.polygon_offset_point_enable != 0; | ||
| 1275 | 1322 | ||
| 1276 | // Hardware divides polygon offset units by two | 1323 | const auto& regs = gpu.regs; |
| 1277 | state.polygon_offset.units = regs.polygon_offset_units / 2.0f; | 1324 | oglEnable(GL_POLYGON_OFFSET_FILL, regs.polygon_offset_fill_enable); |
| 1278 | state.polygon_offset.factor = regs.polygon_offset_factor; | 1325 | oglEnable(GL_POLYGON_OFFSET_LINE, regs.polygon_offset_line_enable); |
| 1279 | state.polygon_offset.clamp = regs.polygon_offset_clamp; | 1326 | oglEnable(GL_POLYGON_OFFSET_POINT, regs.polygon_offset_point_enable); |
| 1280 | 1327 | ||
| 1281 | state.MarkDirtyPolygonOffset(); | 1328 | if (regs.polygon_offset_fill_enable || regs.polygon_offset_line_enable || |
| 1282 | maxwell3d.dirty.polygon_offset = false; | 1329 | regs.polygon_offset_point_enable) { |
| 1330 | // Hardware divides polygon offset units by two | ||
| 1331 | glPolygonOffsetClamp(regs.polygon_offset_factor, regs.polygon_offset_units / 2.0f, | ||
| 1332 | regs.polygon_offset_clamp); | ||
| 1333 | } | ||
| 1283 | } | 1334 | } |
| 1284 | 1335 | ||
| 1285 | void RasterizerOpenGL::SyncAlphaTest() { | 1336 | void RasterizerOpenGL::SyncAlphaTest() { |
| 1337 | auto& gpu = system.GPU().Maxwell3D(); | ||
| 1338 | auto& flags = gpu.dirty.flags; | ||
| 1339 | if (!flags[Dirty::AlphaTest]) { | ||
| 1340 | return; | ||
| 1341 | } | ||
| 1342 | flags[Dirty::AlphaTest] = false; | ||
| 1343 | |||
| 1344 | const auto& regs = gpu.regs; | ||
| 1345 | if (regs.alpha_test_enabled && regs.rt_control.count > 1) { | ||
| 1346 | LOG_WARNING(Render_OpenGL, "Alpha testing with more than one render target is not tested"); | ||
| 1347 | } | ||
| 1348 | |||
| 1349 | if (regs.alpha_test_enabled) { | ||
| 1350 | glEnable(GL_ALPHA_TEST); | ||
| 1351 | glAlphaFunc(MaxwellToGL::ComparisonOp(regs.alpha_test_func), regs.alpha_test_ref); | ||
| 1352 | } else { | ||
| 1353 | glDisable(GL_ALPHA_TEST); | ||
| 1354 | } | ||
| 1355 | } | ||
| 1356 | |||
| 1357 | void RasterizerOpenGL::SyncFramebufferSRGB() { | ||
| 1358 | auto& gpu = system.GPU().Maxwell3D(); | ||
| 1359 | auto& flags = gpu.dirty.flags; | ||
| 1360 | if (!flags[Dirty::FramebufferSRGB]) { | ||
| 1361 | return; | ||
| 1362 | } | ||
| 1363 | flags[Dirty::FramebufferSRGB] = false; | ||
| 1364 | |||
| 1365 | oglEnable(GL_FRAMEBUFFER_SRGB, gpu.regs.framebuffer_srgb); | ||
| 1366 | } | ||
| 1367 | |||
| 1368 | void RasterizerOpenGL::BeginTransformFeedback(GLenum primitive_mode) { | ||
| 1286 | const auto& regs = system.GPU().Maxwell3D().regs; | 1369 | const auto& regs = system.GPU().Maxwell3D().regs; |
| 1287 | UNIMPLEMENTED_IF_MSG(regs.alpha_test_enabled != 0 && regs.rt_control.count > 1, | 1370 | if (regs.tfb_enabled == 0) { |
| 1288 | "Alpha Testing is enabled with more than one rendertarget"); | 1371 | return; |
| 1372 | } | ||
| 1373 | |||
| 1374 | UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationControl) || | ||
| 1375 | regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationEval) || | ||
| 1376 | regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::Geometry)); | ||
| 1289 | 1377 | ||
| 1290 | state.alpha_test.enabled = regs.alpha_test_enabled; | 1378 | for (std::size_t index = 0; index < Maxwell::NumTransformFeedbackBuffers; ++index) { |
| 1291 | if (!state.alpha_test.enabled) { | 1379 | const auto& binding = regs.tfb_bindings[index]; |
| 1380 | if (!binding.buffer_enable) { | ||
| 1381 | if (enabled_transform_feedback_buffers[index]) { | ||
| 1382 | glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, static_cast<GLuint>(index), 0, 0, | ||
| 1383 | 0); | ||
| 1384 | } | ||
| 1385 | enabled_transform_feedback_buffers[index] = false; | ||
| 1386 | continue; | ||
| 1387 | } | ||
| 1388 | enabled_transform_feedback_buffers[index] = true; | ||
| 1389 | |||
| 1390 | auto& tfb_buffer = transform_feedback_buffers[index]; | ||
| 1391 | tfb_buffer.Create(); | ||
| 1392 | |||
| 1393 | const GLuint handle = tfb_buffer.handle; | ||
| 1394 | const std::size_t size = binding.buffer_size; | ||
| 1395 | glNamedBufferData(handle, static_cast<GLsizeiptr>(size), nullptr, GL_STREAM_COPY); | ||
| 1396 | glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, static_cast<GLuint>(index), handle, 0, | ||
| 1397 | static_cast<GLsizeiptr>(size)); | ||
| 1398 | } | ||
| 1399 | |||
| 1400 | glBeginTransformFeedback(GL_POINTS); | ||
| 1401 | } | ||
| 1402 | |||
| 1403 | void RasterizerOpenGL::EndTransformFeedback() { | ||
| 1404 | const auto& regs = system.GPU().Maxwell3D().regs; | ||
| 1405 | if (regs.tfb_enabled == 0) { | ||
| 1292 | return; | 1406 | return; |
| 1293 | } | 1407 | } |
| 1294 | state.alpha_test.func = MaxwellToGL::ComparisonOp(regs.alpha_test_func); | 1408 | |
| 1295 | state.alpha_test.ref = regs.alpha_test_ref; | 1409 | glEndTransformFeedback(); |
| 1410 | |||
| 1411 | for (std::size_t index = 0; index < Maxwell::NumTransformFeedbackBuffers; ++index) { | ||
| 1412 | const auto& binding = regs.tfb_bindings[index]; | ||
| 1413 | if (!binding.buffer_enable) { | ||
| 1414 | continue; | ||
| 1415 | } | ||
| 1416 | UNIMPLEMENTED_IF(binding.buffer_offset != 0); | ||
| 1417 | |||
| 1418 | const GLuint handle = transform_feedback_buffers[index].handle; | ||
| 1419 | const GPUVAddr gpu_addr = binding.Address(); | ||
| 1420 | const std::size_t size = binding.buffer_size; | ||
| 1421 | const auto [dest_buffer, offset] = buffer_cache.UploadMemory(gpu_addr, size, 4, true); | ||
| 1422 | glCopyNamedBufferSubData(handle, *dest_buffer, 0, offset, static_cast<GLsizeiptr>(size)); | ||
| 1423 | } | ||
| 1296 | } | 1424 | } |
| 1297 | 1425 | ||
| 1298 | } // namespace OpenGL | 1426 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 68abe9a21..2d3be2437 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h | |||
| @@ -30,7 +30,7 @@ | |||
| 30 | #include "video_core/renderer_opengl/gl_shader_cache.h" | 30 | #include "video_core/renderer_opengl/gl_shader_cache.h" |
| 31 | #include "video_core/renderer_opengl/gl_shader_decompiler.h" | 31 | #include "video_core/renderer_opengl/gl_shader_decompiler.h" |
| 32 | #include "video_core/renderer_opengl/gl_shader_manager.h" | 32 | #include "video_core/renderer_opengl/gl_shader_manager.h" |
| 33 | #include "video_core/renderer_opengl/gl_state.h" | 33 | #include "video_core/renderer_opengl/gl_state_tracker.h" |
| 34 | #include "video_core/renderer_opengl/gl_texture_cache.h" | 34 | #include "video_core/renderer_opengl/gl_texture_cache.h" |
| 35 | #include "video_core/renderer_opengl/utils.h" | 35 | #include "video_core/renderer_opengl/utils.h" |
| 36 | #include "video_core/textures/texture.h" | 36 | #include "video_core/textures/texture.h" |
| @@ -55,7 +55,8 @@ struct DrawParameters; | |||
| 55 | class RasterizerOpenGL : public VideoCore::RasterizerAccelerated { | 55 | class RasterizerOpenGL : public VideoCore::RasterizerAccelerated { |
| 56 | public: | 56 | public: |
| 57 | explicit RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window, | 57 | explicit RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window, |
| 58 | ScreenInfo& info); | 58 | ScreenInfo& info, GLShader::ProgramManager& program_manager, |
| 59 | StateTracker& state_tracker); | ||
| 59 | ~RasterizerOpenGL() override; | 60 | ~RasterizerOpenGL() override; |
| 60 | 61 | ||
| 61 | void Draw(bool is_indexed, bool is_instanced) override; | 62 | void Draw(bool is_indexed, bool is_instanced) override; |
| @@ -76,6 +77,7 @@ public: | |||
| 76 | u32 pixel_stride) override; | 77 | u32 pixel_stride) override; |
| 77 | void LoadDiskResources(const std::atomic_bool& stop_loading, | 78 | void LoadDiskResources(const std::atomic_bool& stop_loading, |
| 78 | const VideoCore::DiskResourceLoadCallback& callback) override; | 79 | const VideoCore::DiskResourceLoadCallback& callback) override; |
| 80 | void SetupDirtyFlags() override; | ||
| 79 | 81 | ||
| 80 | /// Returns true when there are commands queued to the OpenGL server. | 82 | /// Returns true when there are commands queued to the OpenGL server. |
| 81 | bool AnyCommandQueued() const { | 83 | bool AnyCommandQueued() const { |
| @@ -86,8 +88,7 @@ private: | |||
| 86 | /// Configures the color and depth framebuffer states. | 88 | /// Configures the color and depth framebuffer states. |
| 87 | void ConfigureFramebuffers(); | 89 | void ConfigureFramebuffers(); |
| 88 | 90 | ||
| 89 | void ConfigureClearFramebuffer(OpenGLState& current_state, bool using_color_fb, | 91 | void ConfigureClearFramebuffer(bool using_color_fb, bool using_depth_fb, bool using_stencil_fb); |
| 90 | bool using_depth_fb, bool using_stencil_fb); | ||
| 91 | 92 | ||
| 92 | /// Configures the current constbuffers to use for the draw command. | 93 | /// Configures the current constbuffers to use for the draw command. |
| 93 | void SetupDrawConstBuffers(std::size_t stage_index, const Shader& shader); | 94 | void SetupDrawConstBuffers(std::size_t stage_index, const Shader& shader); |
| @@ -97,7 +98,7 @@ private: | |||
| 97 | 98 | ||
| 98 | /// Configures a constant buffer. | 99 | /// Configures a constant buffer. |
| 99 | void SetupConstBuffer(u32 binding, const Tegra::Engines::ConstBufferInfo& buffer, | 100 | void SetupConstBuffer(u32 binding, const Tegra::Engines::ConstBufferInfo& buffer, |
| 100 | const GLShader::ConstBufferEntry& entry); | 101 | const ConstBufferEntry& entry); |
| 101 | 102 | ||
| 102 | /// Configures the current global memory entries to use for the draw command. | 103 | /// Configures the current global memory entries to use for the draw command. |
| 103 | void SetupDrawGlobalMemory(std::size_t stage_index, const Shader& shader); | 104 | void SetupDrawGlobalMemory(std::size_t stage_index, const Shader& shader); |
| @@ -106,7 +107,7 @@ private: | |||
| 106 | void SetupComputeGlobalMemory(const Shader& kernel); | 107 | void SetupComputeGlobalMemory(const Shader& kernel); |
| 107 | 108 | ||
| 108 | /// Configures a constant buffer. | 109 | /// Configures a constant buffer. |
| 109 | void SetupGlobalMemory(u32 binding, const GLShader::GlobalMemoryEntry& entry, GPUVAddr gpu_addr, | 110 | void SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry, GPUVAddr gpu_addr, |
| 110 | std::size_t size); | 111 | std::size_t size); |
| 111 | 112 | ||
| 112 | /// Configures the current textures to use for the draw command. | 113 | /// Configures the current textures to use for the draw command. |
| @@ -117,7 +118,7 @@ private: | |||
| 117 | 118 | ||
| 118 | /// Configures a texture. | 119 | /// Configures a texture. |
| 119 | void SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture, | 120 | void SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture, |
| 120 | const GLShader::SamplerEntry& entry); | 121 | const SamplerEntry& entry); |
| 121 | 122 | ||
| 122 | /// Configures images in a graphics shader. | 123 | /// Configures images in a graphics shader. |
| 123 | void SetupDrawImages(std::size_t stage_index, const Shader& shader); | 124 | void SetupDrawImages(std::size_t stage_index, const Shader& shader); |
| @@ -126,15 +127,16 @@ private: | |||
| 126 | void SetupComputeImages(const Shader& shader); | 127 | void SetupComputeImages(const Shader& shader); |
| 127 | 128 | ||
| 128 | /// Configures an image. | 129 | /// Configures an image. |
| 129 | void SetupImage(u32 binding, const Tegra::Texture::TICEntry& tic, | 130 | void SetupImage(u32 binding, const Tegra::Texture::TICEntry& tic, const ImageEntry& entry); |
| 130 | const GLShader::ImageEntry& entry); | ||
| 131 | 131 | ||
| 132 | /// Syncs the viewport and depth range to match the guest state | 132 | /// Syncs the viewport and depth range to match the guest state |
| 133 | void SyncViewport(OpenGLState& current_state); | 133 | void SyncViewport(); |
| 134 | |||
| 135 | /// Syncs the depth clamp state | ||
| 136 | void SyncDepthClamp(); | ||
| 134 | 137 | ||
| 135 | /// Syncs the clip enabled status to match the guest state | 138 | /// Syncs the clip enabled status to match the guest state |
| 136 | void SyncClipEnabled( | 139 | void SyncClipEnabled(u32 clip_mask); |
| 137 | const std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances>& clip_mask); | ||
| 138 | 140 | ||
| 139 | /// Syncs the clip coefficients to match the guest state | 141 | /// Syncs the clip coefficients to match the guest state |
| 140 | void SyncClipCoef(); | 142 | void SyncClipCoef(); |
| @@ -164,16 +166,16 @@ private: | |||
| 164 | void SyncMultiSampleState(); | 166 | void SyncMultiSampleState(); |
| 165 | 167 | ||
| 166 | /// Syncs the scissor test state to match the guest state | 168 | /// Syncs the scissor test state to match the guest state |
| 167 | void SyncScissorTest(OpenGLState& current_state); | 169 | void SyncScissorTest(); |
| 168 | |||
| 169 | /// Syncs the transform feedback state to match the guest state | ||
| 170 | void SyncTransformFeedback(); | ||
| 171 | 170 | ||
| 172 | /// Syncs the point state to match the guest state | 171 | /// Syncs the point state to match the guest state |
| 173 | void SyncPointState(); | 172 | void SyncPointState(); |
| 174 | 173 | ||
| 175 | /// Syncs the rasterizer enable state to match the guest state | 174 | /// Syncs the rasterizer enable state to match the guest state |
| 176 | void SyncRasterizeEnable(OpenGLState& current_state); | 175 | void SyncRasterizeEnable(); |
| 176 | |||
| 177 | /// Syncs polygon modes to match the guest state | ||
| 178 | void SyncPolygonModes(); | ||
| 177 | 179 | ||
| 178 | /// Syncs Color Mask | 180 | /// Syncs Color Mask |
| 179 | void SyncColorMask(); | 181 | void SyncColorMask(); |
| @@ -184,6 +186,15 @@ private: | |||
| 184 | /// Syncs the alpha test state to match the guest state | 186 | /// Syncs the alpha test state to match the guest state |
| 185 | void SyncAlphaTest(); | 187 | void SyncAlphaTest(); |
| 186 | 188 | ||
| 189 | /// Syncs the framebuffer sRGB state to match the guest state | ||
| 190 | void SyncFramebufferSRGB(); | ||
| 191 | |||
| 192 | /// Begin a transform feedback | ||
| 193 | void BeginTransformFeedback(GLenum primitive_mode); | ||
| 194 | |||
| 195 | /// End a transform feedback | ||
| 196 | void EndTransformFeedback(); | ||
| 197 | |||
| 187 | /// Check for extension that are not strictly required but are needed for correct emulation | 198 | /// Check for extension that are not strictly required but are needed for correct emulation |
| 188 | void CheckExtensions(); | 199 | void CheckExtensions(); |
| 189 | 200 | ||
| @@ -191,18 +202,17 @@ private: | |||
| 191 | 202 | ||
| 192 | std::size_t CalculateIndexBufferSize() const; | 203 | std::size_t CalculateIndexBufferSize() const; |
| 193 | 204 | ||
| 194 | /// Updates and returns a vertex array object representing current vertex format | 205 | /// Updates the current vertex format |
| 195 | GLuint SetupVertexFormat(); | 206 | void SetupVertexFormat(); |
| 196 | 207 | ||
| 197 | void SetupVertexBuffer(GLuint vao); | 208 | void SetupVertexBuffer(); |
| 198 | void SetupVertexInstances(GLuint vao); | 209 | void SetupVertexInstances(); |
| 199 | 210 | ||
| 200 | GLintptr SetupIndexBuffer(); | 211 | GLintptr SetupIndexBuffer(); |
| 201 | 212 | ||
| 202 | void SetupShaders(GLenum primitive_mode); | 213 | void SetupShaders(GLenum primitive_mode); |
| 203 | 214 | ||
| 204 | const Device device; | 215 | const Device device; |
| 205 | OpenGLState state; | ||
| 206 | 216 | ||
| 207 | TextureCacheOpenGL texture_cache; | 217 | TextureCacheOpenGL texture_cache; |
| 208 | ShaderCacheOpenGL shader_cache; | 218 | ShaderCacheOpenGL shader_cache; |
| @@ -212,22 +222,25 @@ private: | |||
| 212 | 222 | ||
| 213 | Core::System& system; | 223 | Core::System& system; |
| 214 | ScreenInfo& screen_info; | 224 | ScreenInfo& screen_info; |
| 215 | 225 | GLShader::ProgramManager& program_manager; | |
| 216 | std::unique_ptr<GLShader::ProgramManager> shader_program_manager; | 226 | StateTracker& state_tracker; |
| 217 | std::map<std::array<Tegra::Engines::Maxwell3D::Regs::VertexAttribute, | ||
| 218 | Tegra::Engines::Maxwell3D::Regs::NumVertexAttributes>, | ||
| 219 | OGLVertexArray> | ||
| 220 | vertex_array_cache; | ||
| 221 | 227 | ||
| 222 | static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024; | 228 | static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024; |
| 223 | OGLBufferCache buffer_cache; | 229 | OGLBufferCache buffer_cache; |
| 224 | 230 | ||
| 225 | VertexArrayPushBuffer vertex_array_pushbuffer; | 231 | VertexArrayPushBuffer vertex_array_pushbuffer{state_tracker}; |
| 226 | BindBuffersRangePushBuffer bind_ubo_pushbuffer{GL_UNIFORM_BUFFER}; | 232 | BindBuffersRangePushBuffer bind_ubo_pushbuffer{GL_UNIFORM_BUFFER}; |
| 227 | BindBuffersRangePushBuffer bind_ssbo_pushbuffer{GL_SHADER_STORAGE_BUFFER}; | 233 | BindBuffersRangePushBuffer bind_ssbo_pushbuffer{GL_SHADER_STORAGE_BUFFER}; |
| 228 | 234 | ||
| 235 | std::array<OGLBuffer, Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers> | ||
| 236 | transform_feedback_buffers; | ||
| 237 | std::bitset<Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers> | ||
| 238 | enabled_transform_feedback_buffers; | ||
| 239 | |||
| 229 | /// Number of commands queued to the OpenGL driver. Reseted on flush. | 240 | /// Number of commands queued to the OpenGL driver. Reseted on flush. |
| 230 | std::size_t num_queued_commands = 0; | 241 | std::size_t num_queued_commands = 0; |
| 242 | |||
| 243 | u32 last_clip_distance_mask = 0; | ||
| 231 | }; | 244 | }; |
| 232 | 245 | ||
| 233 | } // namespace OpenGL | 246 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/gl_resource_manager.cpp b/src/video_core/renderer_opengl/gl_resource_manager.cpp index f0ddfb276..97803d480 100644 --- a/src/video_core/renderer_opengl/gl_resource_manager.cpp +++ b/src/video_core/renderer_opengl/gl_resource_manager.cpp | |||
| @@ -8,13 +8,29 @@ | |||
| 8 | #include "common/microprofile.h" | 8 | #include "common/microprofile.h" |
| 9 | #include "video_core/renderer_opengl/gl_resource_manager.h" | 9 | #include "video_core/renderer_opengl/gl_resource_manager.h" |
| 10 | #include "video_core/renderer_opengl/gl_shader_util.h" | 10 | #include "video_core/renderer_opengl/gl_shader_util.h" |
| 11 | #include "video_core/renderer_opengl/gl_state.h" | ||
| 12 | 11 | ||
| 13 | MICROPROFILE_DEFINE(OpenGL_ResourceCreation, "OpenGL", "Resource Creation", MP_RGB(128, 128, 192)); | 12 | MICROPROFILE_DEFINE(OpenGL_ResourceCreation, "OpenGL", "Resource Creation", MP_RGB(128, 128, 192)); |
| 14 | MICROPROFILE_DEFINE(OpenGL_ResourceDeletion, "OpenGL", "Resource Deletion", MP_RGB(128, 128, 192)); | 13 | MICROPROFILE_DEFINE(OpenGL_ResourceDeletion, "OpenGL", "Resource Deletion", MP_RGB(128, 128, 192)); |
| 15 | 14 | ||
| 16 | namespace OpenGL { | 15 | namespace OpenGL { |
| 17 | 16 | ||
| 17 | void OGLRenderbuffer::Create() { | ||
| 18 | if (handle != 0) | ||
| 19 | return; | ||
| 20 | |||
| 21 | MICROPROFILE_SCOPE(OpenGL_ResourceCreation); | ||
| 22 | glCreateRenderbuffers(1, &handle); | ||
| 23 | } | ||
| 24 | |||
| 25 | void OGLRenderbuffer::Release() { | ||
| 26 | if (handle == 0) | ||
| 27 | return; | ||
| 28 | |||
| 29 | MICROPROFILE_SCOPE(OpenGL_ResourceDeletion); | ||
| 30 | glDeleteRenderbuffers(1, &handle); | ||
| 31 | handle = 0; | ||
| 32 | } | ||
| 33 | |||
| 18 | void OGLTexture::Create(GLenum target) { | 34 | void OGLTexture::Create(GLenum target) { |
| 19 | if (handle != 0) | 35 | if (handle != 0) |
| 20 | return; | 36 | return; |
| @@ -29,7 +45,6 @@ void OGLTexture::Release() { | |||
| 29 | 45 | ||
| 30 | MICROPROFILE_SCOPE(OpenGL_ResourceDeletion); | 46 | MICROPROFILE_SCOPE(OpenGL_ResourceDeletion); |
| 31 | glDeleteTextures(1, &handle); | 47 | glDeleteTextures(1, &handle); |
| 32 | OpenGLState::GetCurState().UnbindTexture(handle).Apply(); | ||
| 33 | handle = 0; | 48 | handle = 0; |
| 34 | } | 49 | } |
| 35 | 50 | ||
| @@ -47,7 +62,6 @@ void OGLTextureView::Release() { | |||
| 47 | 62 | ||
| 48 | MICROPROFILE_SCOPE(OpenGL_ResourceDeletion); | 63 | MICROPROFILE_SCOPE(OpenGL_ResourceDeletion); |
| 49 | glDeleteTextures(1, &handle); | 64 | glDeleteTextures(1, &handle); |
| 50 | OpenGLState::GetCurState().UnbindTexture(handle).Apply(); | ||
| 51 | handle = 0; | 65 | handle = 0; |
| 52 | } | 66 | } |
| 53 | 67 | ||
| @@ -65,7 +79,6 @@ void OGLSampler::Release() { | |||
| 65 | 79 | ||
| 66 | MICROPROFILE_SCOPE(OpenGL_ResourceDeletion); | 80 | MICROPROFILE_SCOPE(OpenGL_ResourceDeletion); |
| 67 | glDeleteSamplers(1, &handle); | 81 | glDeleteSamplers(1, &handle); |
| 68 | OpenGLState::GetCurState().ResetSampler(handle).Apply(); | ||
| 69 | handle = 0; | 82 | handle = 0; |
| 70 | } | 83 | } |
| 71 | 84 | ||
| @@ -109,7 +122,6 @@ void OGLProgram::Release() { | |||
| 109 | 122 | ||
| 110 | MICROPROFILE_SCOPE(OpenGL_ResourceDeletion); | 123 | MICROPROFILE_SCOPE(OpenGL_ResourceDeletion); |
| 111 | glDeleteProgram(handle); | 124 | glDeleteProgram(handle); |
| 112 | OpenGLState::GetCurState().ResetProgram(handle).Apply(); | ||
| 113 | handle = 0; | 125 | handle = 0; |
| 114 | } | 126 | } |
| 115 | 127 | ||
| @@ -127,7 +139,6 @@ void OGLPipeline::Release() { | |||
| 127 | 139 | ||
| 128 | MICROPROFILE_SCOPE(OpenGL_ResourceDeletion); | 140 | MICROPROFILE_SCOPE(OpenGL_ResourceDeletion); |
| 129 | glDeleteProgramPipelines(1, &handle); | 141 | glDeleteProgramPipelines(1, &handle); |
| 130 | OpenGLState::GetCurState().ResetPipeline(handle).Apply(); | ||
| 131 | handle = 0; | 142 | handle = 0; |
| 132 | } | 143 | } |
| 133 | 144 | ||
| @@ -171,24 +182,6 @@ void OGLSync::Release() { | |||
| 171 | handle = 0; | 182 | handle = 0; |
| 172 | } | 183 | } |
| 173 | 184 | ||
| 174 | void OGLVertexArray::Create() { | ||
| 175 | if (handle != 0) | ||
| 176 | return; | ||
| 177 | |||
| 178 | MICROPROFILE_SCOPE(OpenGL_ResourceCreation); | ||
| 179 | glCreateVertexArrays(1, &handle); | ||
| 180 | } | ||
| 181 | |||
| 182 | void OGLVertexArray::Release() { | ||
| 183 | if (handle == 0) | ||
| 184 | return; | ||
| 185 | |||
| 186 | MICROPROFILE_SCOPE(OpenGL_ResourceDeletion); | ||
| 187 | glDeleteVertexArrays(1, &handle); | ||
| 188 | OpenGLState::GetCurState().ResetVertexArray(handle).Apply(); | ||
| 189 | handle = 0; | ||
| 190 | } | ||
| 191 | |||
| 192 | void OGLFramebuffer::Create() { | 185 | void OGLFramebuffer::Create() { |
| 193 | if (handle != 0) | 186 | if (handle != 0) |
| 194 | return; | 187 | return; |
| @@ -203,7 +196,6 @@ void OGLFramebuffer::Release() { | |||
| 203 | 196 | ||
| 204 | MICROPROFILE_SCOPE(OpenGL_ResourceDeletion); | 197 | MICROPROFILE_SCOPE(OpenGL_ResourceDeletion); |
| 205 | glDeleteFramebuffers(1, &handle); | 198 | glDeleteFramebuffers(1, &handle); |
| 206 | OpenGLState::GetCurState().ResetFramebuffer(handle).Apply(); | ||
| 207 | handle = 0; | 199 | handle = 0; |
| 208 | } | 200 | } |
| 209 | 201 | ||
diff --git a/src/video_core/renderer_opengl/gl_resource_manager.h b/src/video_core/renderer_opengl/gl_resource_manager.h index 514d1d165..de93f4212 100644 --- a/src/video_core/renderer_opengl/gl_resource_manager.h +++ b/src/video_core/renderer_opengl/gl_resource_manager.h | |||
| @@ -11,6 +11,31 @@ | |||
| 11 | 11 | ||
| 12 | namespace OpenGL { | 12 | namespace OpenGL { |
| 13 | 13 | ||
| 14 | class OGLRenderbuffer : private NonCopyable { | ||
| 15 | public: | ||
| 16 | OGLRenderbuffer() = default; | ||
| 17 | |||
| 18 | OGLRenderbuffer(OGLRenderbuffer&& o) noexcept : handle(std::exchange(o.handle, 0)) {} | ||
| 19 | |||
| 20 | ~OGLRenderbuffer() { | ||
| 21 | Release(); | ||
| 22 | } | ||
| 23 | |||
| 24 | OGLRenderbuffer& operator=(OGLRenderbuffer&& o) noexcept { | ||
| 25 | Release(); | ||
| 26 | handle = std::exchange(o.handle, 0); | ||
| 27 | return *this; | ||
| 28 | } | ||
| 29 | |||
| 30 | /// Creates a new internal OpenGL resource and stores the handle | ||
| 31 | void Create(); | ||
| 32 | |||
| 33 | /// Deletes the internal OpenGL resource | ||
| 34 | void Release(); | ||
| 35 | |||
| 36 | GLuint handle = 0; | ||
| 37 | }; | ||
| 38 | |||
| 14 | class OGLTexture : private NonCopyable { | 39 | class OGLTexture : private NonCopyable { |
| 15 | public: | 40 | public: |
| 16 | OGLTexture() = default; | 41 | OGLTexture() = default; |
| @@ -216,31 +241,6 @@ public: | |||
| 216 | GLsync handle = 0; | 241 | GLsync handle = 0; |
| 217 | }; | 242 | }; |
| 218 | 243 | ||
| 219 | class OGLVertexArray : private NonCopyable { | ||
| 220 | public: | ||
| 221 | OGLVertexArray() = default; | ||
| 222 | |||
| 223 | OGLVertexArray(OGLVertexArray&& o) noexcept : handle(std::exchange(o.handle, 0)) {} | ||
| 224 | |||
| 225 | ~OGLVertexArray() { | ||
| 226 | Release(); | ||
| 227 | } | ||
| 228 | |||
| 229 | OGLVertexArray& operator=(OGLVertexArray&& o) noexcept { | ||
| 230 | Release(); | ||
| 231 | handle = std::exchange(o.handle, 0); | ||
| 232 | return *this; | ||
| 233 | } | ||
| 234 | |||
| 235 | /// Creates a new internal OpenGL resource and stores the handle | ||
| 236 | void Create(); | ||
| 237 | |||
| 238 | /// Deletes the internal OpenGL resource | ||
| 239 | void Release(); | ||
| 240 | |||
| 241 | GLuint handle = 0; | ||
| 242 | }; | ||
| 243 | |||
| 244 | class OGLFramebuffer : private NonCopyable { | 244 | class OGLFramebuffer : private NonCopyable { |
| 245 | public: | 245 | public: |
| 246 | OGLFramebuffer() = default; | 246 | OGLFramebuffer() = default; |
diff --git a/src/video_core/renderer_opengl/gl_sampler_cache.cpp b/src/video_core/renderer_opengl/gl_sampler_cache.cpp index 3ded5ecea..5c174879a 100644 --- a/src/video_core/renderer_opengl/gl_sampler_cache.cpp +++ b/src/video_core/renderer_opengl/gl_sampler_cache.cpp | |||
| @@ -38,7 +38,7 @@ OGLSampler SamplerCacheOpenGL::CreateSampler(const Tegra::Texture::TSCEntry& tsc | |||
| 38 | glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_ANISOTROPY, tsc.GetMaxAnisotropy()); | 38 | glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_ANISOTROPY, tsc.GetMaxAnisotropy()); |
| 39 | } else if (GLAD_GL_EXT_texture_filter_anisotropic) { | 39 | } else if (GLAD_GL_EXT_texture_filter_anisotropic) { |
| 40 | glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_ANISOTROPY_EXT, tsc.GetMaxAnisotropy()); | 40 | glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_ANISOTROPY_EXT, tsc.GetMaxAnisotropy()); |
| 41 | } else if (tsc.GetMaxAnisotropy() != 1) { | 41 | } else { |
| 42 | LOG_WARNING(Render_OpenGL, "Anisotropy not supported by host GPU driver"); | 42 | LOG_WARNING(Render_OpenGL, "Anisotropy not supported by host GPU driver"); |
| 43 | } | 43 | } |
| 44 | 44 | ||
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 489eb143c..e3d31c3eb 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp | |||
| @@ -2,12 +2,16 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <atomic> | ||
| 6 | #include <functional> | ||
| 5 | #include <mutex> | 7 | #include <mutex> |
| 6 | #include <optional> | 8 | #include <optional> |
| 7 | #include <string> | 9 | #include <string> |
| 8 | #include <thread> | 10 | #include <thread> |
| 9 | #include <unordered_set> | 11 | #include <unordered_set> |
| 12 | |||
| 10 | #include <boost/functional/hash.hpp> | 13 | #include <boost/functional/hash.hpp> |
| 14 | |||
| 11 | #include "common/alignment.h" | 15 | #include "common/alignment.h" |
| 12 | #include "common/assert.h" | 16 | #include "common/assert.h" |
| 13 | #include "common/logging/log.h" | 17 | #include "common/logging/log.h" |
| @@ -22,14 +26,16 @@ | |||
| 22 | #include "video_core/renderer_opengl/gl_shader_cache.h" | 26 | #include "video_core/renderer_opengl/gl_shader_cache.h" |
| 23 | #include "video_core/renderer_opengl/gl_shader_decompiler.h" | 27 | #include "video_core/renderer_opengl/gl_shader_decompiler.h" |
| 24 | #include "video_core/renderer_opengl/gl_shader_disk_cache.h" | 28 | #include "video_core/renderer_opengl/gl_shader_disk_cache.h" |
| 29 | #include "video_core/renderer_opengl/gl_state_tracker.h" | ||
| 25 | #include "video_core/renderer_opengl/utils.h" | 30 | #include "video_core/renderer_opengl/utils.h" |
| 31 | #include "video_core/shader/registry.h" | ||
| 26 | #include "video_core/shader/shader_ir.h" | 32 | #include "video_core/shader/shader_ir.h" |
| 27 | 33 | ||
| 28 | namespace OpenGL { | 34 | namespace OpenGL { |
| 29 | 35 | ||
| 30 | using Tegra::Engines::ShaderType; | 36 | using Tegra::Engines::ShaderType; |
| 31 | using VideoCommon::Shader::ConstBufferLocker; | ||
| 32 | using VideoCommon::Shader::ProgramCode; | 37 | using VideoCommon::Shader::ProgramCode; |
| 38 | using VideoCommon::Shader::Registry; | ||
| 33 | using VideoCommon::Shader::ShaderIR; | 39 | using VideoCommon::Shader::ShaderIR; |
| 34 | 40 | ||
| 35 | namespace { | 41 | namespace { |
| @@ -55,7 +61,7 @@ constexpr bool IsSchedInstruction(std::size_t offset, std::size_t main_offset) { | |||
| 55 | } | 61 | } |
| 56 | 62 | ||
| 57 | /// Calculates the size of a program stream | 63 | /// Calculates the size of a program stream |
| 58 | std::size_t CalculateProgramSize(const GLShader::ProgramCode& program) { | 64 | std::size_t CalculateProgramSize(const ProgramCode& program) { |
| 59 | constexpr std::size_t start_offset = 10; | 65 | constexpr std::size_t start_offset = 10; |
| 60 | // This is the encoded version of BRA that jumps to itself. All Nvidia | 66 | // This is the encoded version of BRA that jumps to itself. All Nvidia |
| 61 | // shaders end with one. | 67 | // shaders end with one. |
| @@ -108,32 +114,9 @@ constexpr GLenum GetGLShaderType(ShaderType shader_type) { | |||
| 108 | } | 114 | } |
| 109 | } | 115 | } |
| 110 | 116 | ||
| 111 | /// Describes primitive behavior on geometry shaders | ||
| 112 | constexpr std::pair<const char*, u32> GetPrimitiveDescription(GLenum primitive_mode) { | ||
| 113 | switch (primitive_mode) { | ||
| 114 | case GL_POINTS: | ||
| 115 | return {"points", 1}; | ||
| 116 | case GL_LINES: | ||
| 117 | case GL_LINE_STRIP: | ||
| 118 | return {"lines", 2}; | ||
| 119 | case GL_LINES_ADJACENCY: | ||
| 120 | case GL_LINE_STRIP_ADJACENCY: | ||
| 121 | return {"lines_adjacency", 4}; | ||
| 122 | case GL_TRIANGLES: | ||
| 123 | case GL_TRIANGLE_STRIP: | ||
| 124 | case GL_TRIANGLE_FAN: | ||
| 125 | return {"triangles", 3}; | ||
| 126 | case GL_TRIANGLES_ADJACENCY: | ||
| 127 | case GL_TRIANGLE_STRIP_ADJACENCY: | ||
| 128 | return {"triangles_adjacency", 6}; | ||
| 129 | default: | ||
| 130 | return {"points", 1}; | ||
| 131 | } | ||
| 132 | } | ||
| 133 | |||
| 134 | /// Hashes one (or two) program streams | 117 | /// Hashes one (or two) program streams |
| 135 | u64 GetUniqueIdentifier(ShaderType shader_type, bool is_a, const ProgramCode& code, | 118 | u64 GetUniqueIdentifier(ShaderType shader_type, bool is_a, const ProgramCode& code, |
| 136 | const ProgramCode& code_b) { | 119 | const ProgramCode& code_b = {}) { |
| 137 | u64 unique_identifier = boost::hash_value(code); | 120 | u64 unique_identifier = boost::hash_value(code); |
| 138 | if (is_a) { | 121 | if (is_a) { |
| 139 | // VertexA programs include two programs | 122 | // VertexA programs include two programs |
| @@ -142,24 +125,6 @@ u64 GetUniqueIdentifier(ShaderType shader_type, bool is_a, const ProgramCode& co | |||
| 142 | return unique_identifier; | 125 | return unique_identifier; |
| 143 | } | 126 | } |
| 144 | 127 | ||
| 145 | /// Creates an unspecialized program from code streams | ||
| 146 | std::string GenerateGLSL(const Device& device, ShaderType shader_type, const ShaderIR& ir, | ||
| 147 | const std::optional<ShaderIR>& ir_b) { | ||
| 148 | switch (shader_type) { | ||
| 149 | case ShaderType::Vertex: | ||
| 150 | return GLShader::GenerateVertexShader(device, ir, ir_b ? &*ir_b : nullptr); | ||
| 151 | case ShaderType::Geometry: | ||
| 152 | return GLShader::GenerateGeometryShader(device, ir); | ||
| 153 | case ShaderType::Fragment: | ||
| 154 | return GLShader::GenerateFragmentShader(device, ir); | ||
| 155 | case ShaderType::Compute: | ||
| 156 | return GLShader::GenerateComputeShader(device, ir); | ||
| 157 | default: | ||
| 158 | UNIMPLEMENTED_MSG("Unimplemented shader_type={}", static_cast<u32>(shader_type)); | ||
| 159 | return {}; | ||
| 160 | } | ||
| 161 | } | ||
| 162 | |||
| 163 | constexpr const char* GetShaderTypeName(ShaderType shader_type) { | 128 | constexpr const char* GetShaderTypeName(ShaderType shader_type) { |
| 164 | switch (shader_type) { | 129 | switch (shader_type) { |
| 165 | case ShaderType::Vertex: | 130 | case ShaderType::Vertex: |
| @@ -195,102 +160,38 @@ constexpr ShaderType GetShaderType(Maxwell::ShaderProgram program_type) { | |||
| 195 | return {}; | 160 | return {}; |
| 196 | } | 161 | } |
| 197 | 162 | ||
| 198 | std::string GetShaderId(u64 unique_identifier, ShaderType shader_type) { | 163 | std::string MakeShaderID(u64 unique_identifier, ShaderType shader_type) { |
| 199 | return fmt::format("{}{:016X}", GetShaderTypeName(shader_type), unique_identifier); | 164 | return fmt::format("{}{:016X}", GetShaderTypeName(shader_type), unique_identifier); |
| 200 | } | 165 | } |
| 201 | 166 | ||
| 202 | Tegra::Engines::ConstBufferEngineInterface& GetConstBufferEngineInterface(Core::System& system, | 167 | std::shared_ptr<Registry> MakeRegistry(const ShaderDiskCacheEntry& entry) { |
| 203 | ShaderType shader_type) { | 168 | const VideoCore::GuestDriverProfile guest_profile{entry.texture_handler_size}; |
| 204 | if (shader_type == ShaderType::Compute) { | 169 | const VideoCommon::Shader::SerializedRegistryInfo info{guest_profile, entry.bound_buffer, |
| 205 | return system.GPU().KeplerCompute(); | 170 | entry.graphics_info, entry.compute_info}; |
| 206 | } else { | 171 | const auto registry = std::make_shared<Registry>(entry.type, info); |
| 207 | return system.GPU().Maxwell3D(); | 172 | for (const auto& [address, value] : entry.keys) { |
| 208 | } | 173 | const auto [buffer, offset] = address; |
| 209 | } | 174 | registry->InsertKey(buffer, offset, value); |
| 210 | |||
| 211 | std::unique_ptr<ConstBufferLocker> MakeLocker(Core::System& system, ShaderType shader_type) { | ||
| 212 | return std::make_unique<ConstBufferLocker>(shader_type, | ||
| 213 | GetConstBufferEngineInterface(system, shader_type)); | ||
| 214 | } | ||
| 215 | |||
| 216 | void FillLocker(ConstBufferLocker& locker, const ShaderDiskCacheUsage& usage) { | ||
| 217 | locker.SetBoundBuffer(usage.bound_buffer); | ||
| 218 | for (const auto& key : usage.keys) { | ||
| 219 | const auto [buffer, offset] = key.first; | ||
| 220 | locker.InsertKey(buffer, offset, key.second); | ||
| 221 | } | 175 | } |
| 222 | for (const auto& [offset, sampler] : usage.bound_samplers) { | 176 | for (const auto& [offset, sampler] : entry.bound_samplers) { |
| 223 | locker.InsertBoundSampler(offset, sampler); | 177 | registry->InsertBoundSampler(offset, sampler); |
| 224 | } | 178 | } |
| 225 | for (const auto& [key, sampler] : usage.bindless_samplers) { | 179 | for (const auto& [key, sampler] : entry.bindless_samplers) { |
| 226 | const auto [buffer, offset] = key; | 180 | const auto [buffer, offset] = key; |
| 227 | locker.InsertBindlessSampler(buffer, offset, sampler); | 181 | registry->InsertBindlessSampler(buffer, offset, sampler); |
| 228 | } | 182 | } |
| 183 | return registry; | ||
| 229 | } | 184 | } |
| 230 | 185 | ||
| 231 | CachedProgram BuildShader(const Device& device, u64 unique_identifier, ShaderType shader_type, | 186 | std::shared_ptr<OGLProgram> BuildShader(const Device& device, ShaderType shader_type, |
| 232 | const ProgramCode& code, const ProgramCode& code_b, | 187 | u64 unique_identifier, const ShaderIR& ir, |
| 233 | ConstBufferLocker& locker, const ProgramVariant& variant, | 188 | const Registry& registry, bool hint_retrievable = false) { |
| 234 | bool hint_retrievable = false) { | 189 | const std::string shader_id = MakeShaderID(unique_identifier, shader_type); |
| 235 | LOG_INFO(Render_OpenGL, "called. {}", GetShaderId(unique_identifier, shader_type)); | 190 | LOG_INFO(Render_OpenGL, "{}", shader_id); |
| 236 | |||
| 237 | const bool is_compute = shader_type == ShaderType::Compute; | ||
| 238 | const u32 main_offset = is_compute ? KERNEL_MAIN_OFFSET : STAGE_MAIN_OFFSET; | ||
| 239 | const ShaderIR ir(code, main_offset, COMPILER_SETTINGS, locker); | ||
| 240 | std::optional<ShaderIR> ir_b; | ||
| 241 | if (!code_b.empty()) { | ||
| 242 | ir_b.emplace(code_b, main_offset, COMPILER_SETTINGS, locker); | ||
| 243 | } | ||
| 244 | |||
| 245 | std::string source = fmt::format(R"(// {} | ||
| 246 | #version 430 core | ||
| 247 | #extension GL_ARB_separate_shader_objects : enable | ||
| 248 | )", | ||
| 249 | GetShaderId(unique_identifier, shader_type)); | ||
| 250 | if (device.HasShaderBallot()) { | ||
| 251 | source += "#extension GL_ARB_shader_ballot : require\n"; | ||
| 252 | } | ||
| 253 | if (device.HasVertexViewportLayer()) { | ||
| 254 | source += "#extension GL_ARB_shader_viewport_layer_array : require\n"; | ||
| 255 | } | ||
| 256 | if (device.HasImageLoadFormatted()) { | ||
| 257 | source += "#extension GL_EXT_shader_image_load_formatted : require\n"; | ||
| 258 | } | ||
| 259 | if (device.HasWarpIntrinsics()) { | ||
| 260 | source += "#extension GL_NV_gpu_shader5 : require\n" | ||
| 261 | "#extension GL_NV_shader_thread_group : require\n" | ||
| 262 | "#extension GL_NV_shader_thread_shuffle : require\n"; | ||
| 263 | } | ||
| 264 | // This pragma stops Nvidia's driver from over optimizing math (probably using fp16 operations) | ||
| 265 | // on places where we don't want to. | ||
| 266 | // Thanks to Ryujinx for finding this workaround. | ||
| 267 | source += "#pragma optionNV(fastmath off)\n"; | ||
| 268 | |||
| 269 | if (shader_type == ShaderType::Geometry) { | ||
| 270 | const auto [glsl_topology, max_vertices] = GetPrimitiveDescription(variant.primitive_mode); | ||
| 271 | source += fmt::format("#define MAX_VERTEX_INPUT {}\n", max_vertices); | ||
| 272 | source += fmt::format("layout ({}) in;\n", glsl_topology); | ||
| 273 | } | ||
| 274 | if (shader_type == ShaderType::Compute) { | ||
| 275 | if (variant.local_memory_size > 0) { | ||
| 276 | source += fmt::format("#define LOCAL_MEMORY_SIZE {}\n", | ||
| 277 | Common::AlignUp(variant.local_memory_size, 4) / 4); | ||
| 278 | } | ||
| 279 | source += | ||
| 280 | fmt::format("layout (local_size_x = {}, local_size_y = {}, local_size_z = {}) in;\n", | ||
| 281 | variant.block_x, variant.block_y, variant.block_z); | ||
| 282 | |||
| 283 | if (variant.shared_memory_size > 0) { | ||
| 284 | // shared_memory_size is described in number of words | ||
| 285 | source += fmt::format("shared uint smem[{}];\n", variant.shared_memory_size); | ||
| 286 | } | ||
| 287 | } | ||
| 288 | |||
| 289 | source += '\n'; | ||
| 290 | source += GenerateGLSL(device, shader_type, ir, ir_b); | ||
| 291 | 191 | ||
| 192 | const std::string glsl = DecompileShader(device, ir, registry, shader_type, shader_id); | ||
| 292 | OGLShader shader; | 193 | OGLShader shader; |
| 293 | shader.Create(source.c_str(), GetGLShaderType(shader_type)); | 194 | shader.Create(glsl.c_str(), GetGLShaderType(shader_type)); |
| 294 | 195 | ||
| 295 | auto program = std::make_shared<OGLProgram>(); | 196 | auto program = std::make_shared<OGLProgram>(); |
| 296 | program->Create(true, hint_retrievable, shader.handle); | 197 | program->Create(true, hint_retrievable, shader.handle); |
| @@ -298,7 +199,7 @@ CachedProgram BuildShader(const Device& device, u64 unique_identifier, ShaderTyp | |||
| 298 | } | 199 | } |
| 299 | 200 | ||
| 300 | std::unordered_set<GLenum> GetSupportedFormats() { | 201 | std::unordered_set<GLenum> GetSupportedFormats() { |
| 301 | GLint num_formats{}; | 202 | GLint num_formats; |
| 302 | glGetIntegerv(GL_NUM_PROGRAM_BINARY_FORMATS, &num_formats); | 203 | glGetIntegerv(GL_NUM_PROGRAM_BINARY_FORMATS, &num_formats); |
| 303 | 204 | ||
| 304 | std::vector<GLint> formats(num_formats); | 205 | std::vector<GLint> formats(num_formats); |
| @@ -313,115 +214,82 @@ std::unordered_set<GLenum> GetSupportedFormats() { | |||
| 313 | 214 | ||
| 314 | } // Anonymous namespace | 215 | } // Anonymous namespace |
| 315 | 216 | ||
| 316 | CachedShader::CachedShader(const ShaderParameters& params, ShaderType shader_type, | 217 | CachedShader::CachedShader(const u8* host_ptr, VAddr cpu_addr, std::size_t size_in_bytes, |
| 317 | GLShader::ShaderEntries entries, ProgramCode code, ProgramCode code_b) | 218 | std::shared_ptr<VideoCommon::Shader::Registry> registry, |
| 318 | : RasterizerCacheObject{params.host_ptr}, system{params.system}, | 219 | ShaderEntries entries, std::shared_ptr<OGLProgram> program) |
| 319 | disk_cache{params.disk_cache}, device{params.device}, cpu_addr{params.cpu_addr}, | 220 | : RasterizerCacheObject{host_ptr}, registry{std::move(registry)}, entries{std::move(entries)}, |
| 320 | unique_identifier{params.unique_identifier}, shader_type{shader_type}, | 221 | cpu_addr{cpu_addr}, size_in_bytes{size_in_bytes}, program{std::move(program)} {} |
| 321 | entries{std::move(entries)}, code{std::move(code)}, code_b{std::move(code_b)} { | 222 | |
| 322 | if (!params.precompiled_variants) { | 223 | CachedShader::~CachedShader() = default; |
| 323 | return; | 224 | |
| 324 | } | 225 | GLuint CachedShader::GetHandle() const { |
| 325 | for (const auto& pair : *params.precompiled_variants) { | 226 | DEBUG_ASSERT(registry->IsConsistent()); |
| 326 | auto locker = MakeLocker(system, shader_type); | 227 | return program->handle; |
| 327 | const auto& usage = pair->first; | ||
| 328 | FillLocker(*locker, usage); | ||
| 329 | |||
| 330 | std::unique_ptr<LockerVariant>* locker_variant = nullptr; | ||
| 331 | const auto it = | ||
| 332 | std::find_if(locker_variants.begin(), locker_variants.end(), [&](const auto& variant) { | ||
| 333 | return variant->locker->HasEqualKeys(*locker); | ||
| 334 | }); | ||
| 335 | if (it == locker_variants.end()) { | ||
| 336 | locker_variant = &locker_variants.emplace_back(); | ||
| 337 | *locker_variant = std::make_unique<LockerVariant>(); | ||
| 338 | locker_variant->get()->locker = std::move(locker); | ||
| 339 | } else { | ||
| 340 | locker_variant = &*it; | ||
| 341 | } | ||
| 342 | locker_variant->get()->programs.emplace(usage.variant, pair->second); | ||
| 343 | } | ||
| 344 | } | 228 | } |
| 345 | 229 | ||
| 346 | Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params, | 230 | Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params, |
| 347 | Maxwell::ShaderProgram program_type, ProgramCode code, | 231 | Maxwell::ShaderProgram program_type, ProgramCode code, |
| 348 | ProgramCode code_b) { | 232 | ProgramCode code_b) { |
| 349 | const auto shader_type = GetShaderType(program_type); | 233 | const auto shader_type = GetShaderType(program_type); |
| 350 | params.disk_cache.SaveRaw( | 234 | const std::size_t size_in_bytes = code.size() * sizeof(u64); |
| 351 | ShaderDiskCacheRaw(params.unique_identifier, shader_type, code, code_b)); | ||
| 352 | 235 | ||
| 353 | ConstBufferLocker locker(shader_type, params.system.GPU().Maxwell3D()); | 236 | auto registry = std::make_shared<Registry>(shader_type, params.system.GPU().Maxwell3D()); |
| 354 | const ShaderIR ir(code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, locker); | 237 | const ShaderIR ir(code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, *registry); |
| 355 | // TODO(Rodrigo): Handle VertexA shaders | 238 | // TODO(Rodrigo): Handle VertexA shaders |
| 356 | // std::optional<ShaderIR> ir_b; | 239 | // std::optional<ShaderIR> ir_b; |
| 357 | // if (!code_b.empty()) { | 240 | // if (!code_b.empty()) { |
| 358 | // ir_b.emplace(code_b, STAGE_MAIN_OFFSET); | 241 | // ir_b.emplace(code_b, STAGE_MAIN_OFFSET); |
| 359 | // } | 242 | // } |
| 360 | return std::shared_ptr<CachedShader>(new CachedShader( | 243 | auto program = BuildShader(params.device, shader_type, params.unique_identifier, ir, *registry); |
| 361 | params, shader_type, GLShader::GetEntries(ir), std::move(code), std::move(code_b))); | 244 | |
| 245 | ShaderDiskCacheEntry entry; | ||
| 246 | entry.type = shader_type; | ||
| 247 | entry.code = std::move(code); | ||
| 248 | entry.code_b = std::move(code_b); | ||
| 249 | entry.unique_identifier = params.unique_identifier; | ||
| 250 | entry.bound_buffer = registry->GetBoundBuffer(); | ||
| 251 | entry.graphics_info = registry->GetGraphicsInfo(); | ||
| 252 | entry.keys = registry->GetKeys(); | ||
| 253 | entry.bound_samplers = registry->GetBoundSamplers(); | ||
| 254 | entry.bindless_samplers = registry->GetBindlessSamplers(); | ||
| 255 | params.disk_cache.SaveEntry(std::move(entry)); | ||
| 256 | |||
| 257 | return std::shared_ptr<CachedShader>(new CachedShader(params.host_ptr, params.cpu_addr, | ||
| 258 | size_in_bytes, std::move(registry), | ||
| 259 | MakeEntries(ir), std::move(program))); | ||
| 362 | } | 260 | } |
| 363 | 261 | ||
| 364 | Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code) { | 262 | Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code) { |
| 365 | params.disk_cache.SaveRaw( | 263 | const std::size_t size_in_bytes = code.size() * sizeof(u64); |
| 366 | ShaderDiskCacheRaw(params.unique_identifier, ShaderType::Compute, code)); | 264 | |
| 367 | 265 | auto& engine = params.system.GPU().KeplerCompute(); | |
| 368 | ConstBufferLocker locker(Tegra::Engines::ShaderType::Compute, | 266 | auto registry = std::make_shared<Registry>(ShaderType::Compute, engine); |
| 369 | params.system.GPU().KeplerCompute()); | 267 | const ShaderIR ir(code, KERNEL_MAIN_OFFSET, COMPILER_SETTINGS, *registry); |
| 370 | const ShaderIR ir(code, KERNEL_MAIN_OFFSET, COMPILER_SETTINGS, locker); | 268 | const u64 uid = params.unique_identifier; |
| 371 | return std::shared_ptr<CachedShader>(new CachedShader( | 269 | auto program = BuildShader(params.device, ShaderType::Compute, uid, ir, *registry); |
| 372 | params, ShaderType::Compute, GLShader::GetEntries(ir), std::move(code), {})); | 270 | |
| 271 | ShaderDiskCacheEntry entry; | ||
| 272 | entry.type = ShaderType::Compute; | ||
| 273 | entry.code = std::move(code); | ||
| 274 | entry.unique_identifier = uid; | ||
| 275 | entry.bound_buffer = registry->GetBoundBuffer(); | ||
| 276 | entry.compute_info = registry->GetComputeInfo(); | ||
| 277 | entry.keys = registry->GetKeys(); | ||
| 278 | entry.bound_samplers = registry->GetBoundSamplers(); | ||
| 279 | entry.bindless_samplers = registry->GetBindlessSamplers(); | ||
| 280 | params.disk_cache.SaveEntry(std::move(entry)); | ||
| 281 | |||
| 282 | return std::shared_ptr<CachedShader>(new CachedShader(params.host_ptr, params.cpu_addr, | ||
| 283 | size_in_bytes, std::move(registry), | ||
| 284 | MakeEntries(ir), std::move(program))); | ||
| 373 | } | 285 | } |
| 374 | 286 | ||
| 375 | Shader CachedShader::CreateFromCache(const ShaderParameters& params, | 287 | Shader CachedShader::CreateFromCache(const ShaderParameters& params, |
| 376 | const UnspecializedShader& unspecialized) { | 288 | const PrecompiledShader& precompiled_shader, |
| 377 | return std::shared_ptr<CachedShader>(new CachedShader(params, unspecialized.type, | 289 | std::size_t size_in_bytes) { |
| 378 | unspecialized.entries, unspecialized.code, | 290 | return std::shared_ptr<CachedShader>(new CachedShader( |
| 379 | unspecialized.code_b)); | 291 | params.host_ptr, params.cpu_addr, size_in_bytes, precompiled_shader.registry, |
| 380 | } | 292 | precompiled_shader.entries, precompiled_shader.program)); |
| 381 | |||
| 382 | GLuint CachedShader::GetHandle(const ProgramVariant& variant) { | ||
| 383 | EnsureValidLockerVariant(); | ||
| 384 | |||
| 385 | const auto [entry, is_cache_miss] = curr_locker_variant->programs.try_emplace(variant); | ||
| 386 | auto& program = entry->second; | ||
| 387 | if (!is_cache_miss) { | ||
| 388 | return program->handle; | ||
| 389 | } | ||
| 390 | |||
| 391 | program = BuildShader(device, unique_identifier, shader_type, code, code_b, | ||
| 392 | *curr_locker_variant->locker, variant); | ||
| 393 | disk_cache.SaveUsage(GetUsage(variant, *curr_locker_variant->locker)); | ||
| 394 | |||
| 395 | LabelGLObject(GL_PROGRAM, program->handle, cpu_addr); | ||
| 396 | return program->handle; | ||
| 397 | } | ||
| 398 | |||
| 399 | bool CachedShader::EnsureValidLockerVariant() { | ||
| 400 | const auto previous_variant = curr_locker_variant; | ||
| 401 | if (curr_locker_variant && !curr_locker_variant->locker->IsConsistent()) { | ||
| 402 | curr_locker_variant = nullptr; | ||
| 403 | } | ||
| 404 | if (!curr_locker_variant) { | ||
| 405 | for (auto& variant : locker_variants) { | ||
| 406 | if (variant->locker->IsConsistent()) { | ||
| 407 | curr_locker_variant = variant.get(); | ||
| 408 | } | ||
| 409 | } | ||
| 410 | } | ||
| 411 | if (!curr_locker_variant) { | ||
| 412 | auto& new_variant = locker_variants.emplace_back(); | ||
| 413 | new_variant = std::make_unique<LockerVariant>(); | ||
| 414 | new_variant->locker = MakeLocker(system, shader_type); | ||
| 415 | curr_locker_variant = new_variant.get(); | ||
| 416 | } | ||
| 417 | return previous_variant == curr_locker_variant; | ||
| 418 | } | ||
| 419 | |||
| 420 | ShaderDiskCacheUsage CachedShader::GetUsage(const ProgramVariant& variant, | ||
| 421 | const ConstBufferLocker& locker) const { | ||
| 422 | return ShaderDiskCacheUsage{unique_identifier, variant, | ||
| 423 | locker.GetBoundBuffer(), locker.GetKeys(), | ||
| 424 | locker.GetBoundSamplers(), locker.GetBindlessSamplers()}; | ||
| 425 | } | 293 | } |
| 426 | 294 | ||
| 427 | ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system, | 295 | ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system, |
| @@ -431,16 +299,12 @@ ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& | |||
| 431 | 299 | ||
| 432 | void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, | 300 | void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, |
| 433 | const VideoCore::DiskResourceLoadCallback& callback) { | 301 | const VideoCore::DiskResourceLoadCallback& callback) { |
| 434 | const auto transferable = disk_cache.LoadTransferable(); | 302 | const std::optional transferable = disk_cache.LoadTransferable(); |
| 435 | if (!transferable) { | 303 | if (!transferable) { |
| 436 | return; | 304 | return; |
| 437 | } | 305 | } |
| 438 | const auto [raws, shader_usages] = *transferable; | ||
| 439 | if (!GenerateUnspecializedShaders(stop_loading, callback, raws) || stop_loading) { | ||
| 440 | return; | ||
| 441 | } | ||
| 442 | 306 | ||
| 443 | const auto dumps = disk_cache.LoadPrecompiled(); | 307 | const std::vector gl_cache = disk_cache.LoadPrecompiled(); |
| 444 | const auto supported_formats = GetSupportedFormats(); | 308 | const auto supported_formats = GetSupportedFormats(); |
| 445 | 309 | ||
| 446 | // Track if precompiled cache was altered during loading to know if we have to | 310 | // Track if precompiled cache was altered during loading to know if we have to |
| @@ -449,77 +313,82 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, | |||
| 449 | 313 | ||
| 450 | // Inform the frontend about shader build initialization | 314 | // Inform the frontend about shader build initialization |
| 451 | if (callback) { | 315 | if (callback) { |
| 452 | callback(VideoCore::LoadCallbackStage::Build, 0, shader_usages.size()); | 316 | callback(VideoCore::LoadCallbackStage::Build, 0, transferable->size()); |
| 453 | } | 317 | } |
| 454 | 318 | ||
| 455 | std::mutex mutex; | 319 | std::mutex mutex; |
| 456 | std::size_t built_shaders = 0; // It doesn't have be atomic since it's used behind a mutex | 320 | std::size_t built_shaders = 0; // It doesn't have be atomic since it's used behind a mutex |
| 457 | std::atomic_bool compilation_failed = false; | 321 | std::atomic_bool gl_cache_failed = false; |
| 458 | 322 | ||
| 459 | const auto Worker = [&](Core::Frontend::GraphicsContext* context, std::size_t begin, | 323 | const auto find_precompiled = [&gl_cache](u64 id) { |
| 460 | std::size_t end, const std::vector<ShaderDiskCacheUsage>& shader_usages, | 324 | return std::find_if(gl_cache.begin(), gl_cache.end(), |
| 461 | const ShaderDumpsMap& dumps) { | 325 | [id](const auto& entry) { return entry.unique_identifier == id; }); |
| 326 | }; | ||
| 327 | |||
| 328 | const auto worker = [&](Core::Frontend::GraphicsContext* context, std::size_t begin, | ||
| 329 | std::size_t end) { | ||
| 462 | context->MakeCurrent(); | 330 | context->MakeCurrent(); |
| 463 | SCOPE_EXIT({ return context->DoneCurrent(); }); | 331 | SCOPE_EXIT({ return context->DoneCurrent(); }); |
| 464 | 332 | ||
| 465 | for (std::size_t i = begin; i < end; ++i) { | 333 | for (std::size_t i = begin; i < end; ++i) { |
| 466 | if (stop_loading || compilation_failed) { | 334 | if (stop_loading) { |
| 467 | return; | 335 | return; |
| 468 | } | 336 | } |
| 469 | const auto& usage{shader_usages[i]}; | 337 | const auto& entry = (*transferable)[i]; |
| 470 | const auto& unspecialized{unspecialized_shaders.at(usage.unique_identifier)}; | 338 | const u64 uid = entry.unique_identifier; |
| 471 | const auto dump{dumps.find(usage)}; | 339 | const auto it = find_precompiled(uid); |
| 472 | 340 | const auto precompiled_entry = it != gl_cache.end() ? &*it : nullptr; | |
| 473 | CachedProgram shader; | 341 | |
| 474 | if (dump != dumps.end()) { | 342 | const bool is_compute = entry.type == ShaderType::Compute; |
| 475 | // If the shader is dumped, attempt to load it with | 343 | const u32 main_offset = is_compute ? KERNEL_MAIN_OFFSET : STAGE_MAIN_OFFSET; |
| 476 | shader = GeneratePrecompiledProgram(dump->second, supported_formats); | 344 | auto registry = MakeRegistry(entry); |
| 477 | if (!shader) { | 345 | const ShaderIR ir(entry.code, main_offset, COMPILER_SETTINGS, *registry); |
| 478 | compilation_failed = true; | 346 | |
| 479 | return; | 347 | std::shared_ptr<OGLProgram> program; |
| 348 | if (precompiled_entry) { | ||
| 349 | // If the shader is precompiled, attempt to load it with | ||
| 350 | program = GeneratePrecompiledProgram(entry, *precompiled_entry, supported_formats); | ||
| 351 | if (!program) { | ||
| 352 | gl_cache_failed = true; | ||
| 480 | } | 353 | } |
| 481 | } | 354 | } |
| 482 | if (!shader) { | 355 | if (!program) { |
| 483 | auto locker{MakeLocker(system, unspecialized.type)}; | 356 | // Otherwise compile it from GLSL |
| 484 | FillLocker(*locker, usage); | 357 | program = BuildShader(device, entry.type, uid, ir, *registry, true); |
| 485 | |||
| 486 | shader = BuildShader(device, usage.unique_identifier, unspecialized.type, | ||
| 487 | unspecialized.code, unspecialized.code_b, *locker, | ||
| 488 | usage.variant, true); | ||
| 489 | } | 358 | } |
| 490 | 359 | ||
| 360 | PrecompiledShader shader; | ||
| 361 | shader.program = std::move(program); | ||
| 362 | shader.registry = std::move(registry); | ||
| 363 | shader.entries = MakeEntries(ir); | ||
| 364 | |||
| 491 | std::scoped_lock lock{mutex}; | 365 | std::scoped_lock lock{mutex}; |
| 492 | if (callback) { | 366 | if (callback) { |
| 493 | callback(VideoCore::LoadCallbackStage::Build, ++built_shaders, | 367 | callback(VideoCore::LoadCallbackStage::Build, ++built_shaders, |
| 494 | shader_usages.size()); | 368 | transferable->size()); |
| 495 | } | 369 | } |
| 496 | 370 | runtime_cache.emplace(entry.unique_identifier, std::move(shader)); | |
| 497 | precompiled_programs.emplace(usage, std::move(shader)); | ||
| 498 | |||
| 499 | // TODO(Rodrigo): Is there a better way to do this? | ||
| 500 | precompiled_variants[usage.unique_identifier].push_back( | ||
| 501 | precompiled_programs.find(usage)); | ||
| 502 | } | 371 | } |
| 503 | }; | 372 | }; |
| 504 | 373 | ||
| 505 | const auto num_workers{static_cast<std::size_t>(std::thread::hardware_concurrency() + 1ULL)}; | 374 | const auto num_workers{static_cast<std::size_t>(std::thread::hardware_concurrency() + 1ULL)}; |
| 506 | const std::size_t bucket_size{shader_usages.size() / num_workers}; | 375 | const std::size_t bucket_size{transferable->size() / num_workers}; |
| 507 | std::vector<std::unique_ptr<Core::Frontend::GraphicsContext>> contexts(num_workers); | 376 | std::vector<std::unique_ptr<Core::Frontend::GraphicsContext>> contexts(num_workers); |
| 508 | std::vector<std::thread> threads(num_workers); | 377 | std::vector<std::thread> threads(num_workers); |
| 509 | for (std::size_t i = 0; i < num_workers; ++i) { | 378 | for (std::size_t i = 0; i < num_workers; ++i) { |
| 510 | const bool is_last_worker = i + 1 == num_workers; | 379 | const bool is_last_worker = i + 1 == num_workers; |
| 511 | const std::size_t start{bucket_size * i}; | 380 | const std::size_t start{bucket_size * i}; |
| 512 | const std::size_t end{is_last_worker ? shader_usages.size() : start + bucket_size}; | 381 | const std::size_t end{is_last_worker ? transferable->size() : start + bucket_size}; |
| 513 | 382 | ||
| 514 | // On some platforms the shared context has to be created from the GUI thread | 383 | // On some platforms the shared context has to be created from the GUI thread |
| 515 | contexts[i] = emu_window.CreateSharedContext(); | 384 | contexts[i] = emu_window.CreateSharedContext(); |
| 516 | threads[i] = std::thread(Worker, contexts[i].get(), start, end, shader_usages, dumps); | 385 | threads[i] = std::thread(worker, contexts[i].get(), start, end); |
| 517 | } | 386 | } |
| 518 | for (auto& thread : threads) { | 387 | for (auto& thread : threads) { |
| 519 | thread.join(); | 388 | thread.join(); |
| 520 | } | 389 | } |
| 521 | 390 | ||
| 522 | if (compilation_failed) { | 391 | if (gl_cache_failed) { |
| 523 | // Invalidate the precompiled cache if a shader dumped shader was rejected | 392 | // Invalidate the precompiled cache if a shader dumped shader was rejected |
| 524 | disk_cache.InvalidatePrecompiled(); | 393 | disk_cache.InvalidatePrecompiled(); |
| 525 | precompiled_cache_altered = true; | 394 | precompiled_cache_altered = true; |
| @@ -532,11 +401,12 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, | |||
| 532 | // TODO(Rodrigo): Do state tracking for transferable shaders and do a dummy draw | 401 | // TODO(Rodrigo): Do state tracking for transferable shaders and do a dummy draw |
| 533 | // before precompiling them | 402 | // before precompiling them |
| 534 | 403 | ||
| 535 | for (std::size_t i = 0; i < shader_usages.size(); ++i) { | 404 | for (std::size_t i = 0; i < transferable->size(); ++i) { |
| 536 | const auto& usage{shader_usages[i]}; | 405 | const u64 id = (*transferable)[i].unique_identifier; |
| 537 | if (dumps.find(usage) == dumps.end()) { | 406 | const auto it = find_precompiled(id); |
| 538 | const auto& program{precompiled_programs.at(usage)}; | 407 | if (it == gl_cache.end()) { |
| 539 | disk_cache.SaveDump(usage, program->handle); | 408 | const GLuint program = runtime_cache.at(id).program->handle; |
| 409 | disk_cache.SavePrecompiled(id, program); | ||
| 540 | precompiled_cache_altered = true; | 410 | precompiled_cache_altered = true; |
| 541 | } | 411 | } |
| 542 | } | 412 | } |
| @@ -546,84 +416,33 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, | |||
| 546 | } | 416 | } |
| 547 | } | 417 | } |
| 548 | 418 | ||
| 549 | const PrecompiledVariants* ShaderCacheOpenGL::GetPrecompiledVariants(u64 unique_identifier) const { | 419 | std::shared_ptr<OGLProgram> ShaderCacheOpenGL::GeneratePrecompiledProgram( |
| 550 | const auto it = precompiled_variants.find(unique_identifier); | 420 | const ShaderDiskCacheEntry& entry, const ShaderDiskCachePrecompiled& precompiled_entry, |
| 551 | return it == precompiled_variants.end() ? nullptr : &it->second; | 421 | const std::unordered_set<GLenum>& supported_formats) { |
| 552 | } | 422 | if (supported_formats.find(precompiled_entry.binary_format) == supported_formats.end()) { |
| 553 | 423 | LOG_INFO(Render_OpenGL, "Precompiled cache entry with unsupported format, removing"); | |
| 554 | CachedProgram ShaderCacheOpenGL::GeneratePrecompiledProgram( | ||
| 555 | const ShaderDiskCacheDump& dump, const std::unordered_set<GLenum>& supported_formats) { | ||
| 556 | if (supported_formats.find(dump.binary_format) == supported_formats.end()) { | ||
| 557 | LOG_INFO(Render_OpenGL, "Precompiled cache entry with unsupported format - removing"); | ||
| 558 | return {}; | 424 | return {}; |
| 559 | } | 425 | } |
| 560 | 426 | ||
| 561 | CachedProgram shader = std::make_shared<OGLProgram>(); | 427 | auto program = std::make_shared<OGLProgram>(); |
| 562 | shader->handle = glCreateProgram(); | 428 | program->handle = glCreateProgram(); |
| 563 | glProgramParameteri(shader->handle, GL_PROGRAM_SEPARABLE, GL_TRUE); | 429 | glProgramParameteri(program->handle, GL_PROGRAM_SEPARABLE, GL_TRUE); |
| 564 | glProgramBinary(shader->handle, dump.binary_format, dump.binary.data(), | 430 | glProgramBinary(program->handle, precompiled_entry.binary_format, |
| 565 | static_cast<GLsizei>(dump.binary.size())); | 431 | precompiled_entry.binary.data(), |
| 566 | 432 | static_cast<GLsizei>(precompiled_entry.binary.size())); | |
| 567 | GLint link_status{}; | 433 | |
| 568 | glGetProgramiv(shader->handle, GL_LINK_STATUS, &link_status); | 434 | GLint link_status; |
| 435 | glGetProgramiv(program->handle, GL_LINK_STATUS, &link_status); | ||
| 569 | if (link_status == GL_FALSE) { | 436 | if (link_status == GL_FALSE) { |
| 570 | LOG_INFO(Render_OpenGL, "Precompiled cache rejected by the driver - removing"); | 437 | LOG_INFO(Render_OpenGL, "Precompiled cache rejected by the driver, removing"); |
| 571 | return {}; | 438 | return {}; |
| 572 | } | 439 | } |
| 573 | 440 | ||
| 574 | return shader; | 441 | return program; |
| 575 | } | ||
| 576 | |||
| 577 | bool ShaderCacheOpenGL::GenerateUnspecializedShaders( | ||
| 578 | const std::atomic_bool& stop_loading, const VideoCore::DiskResourceLoadCallback& callback, | ||
| 579 | const std::vector<ShaderDiskCacheRaw>& raws) { | ||
| 580 | if (callback) { | ||
| 581 | callback(VideoCore::LoadCallbackStage::Decompile, 0, raws.size()); | ||
| 582 | } | ||
| 583 | |||
| 584 | for (std::size_t i = 0; i < raws.size(); ++i) { | ||
| 585 | if (stop_loading) { | ||
| 586 | return false; | ||
| 587 | } | ||
| 588 | const auto& raw{raws[i]}; | ||
| 589 | const u64 unique_identifier{raw.GetUniqueIdentifier()}; | ||
| 590 | const u64 calculated_hash{ | ||
| 591 | GetUniqueIdentifier(raw.GetType(), raw.HasProgramA(), raw.GetCode(), raw.GetCodeB())}; | ||
| 592 | if (unique_identifier != calculated_hash) { | ||
| 593 | LOG_ERROR(Render_OpenGL, | ||
| 594 | "Invalid hash in entry={:016x} (obtained hash={:016x}) - " | ||
| 595 | "removing shader cache", | ||
| 596 | raw.GetUniqueIdentifier(), calculated_hash); | ||
| 597 | disk_cache.InvalidateTransferable(); | ||
| 598 | return false; | ||
| 599 | } | ||
| 600 | |||
| 601 | const u32 main_offset = | ||
| 602 | raw.GetType() == ShaderType::Compute ? KERNEL_MAIN_OFFSET : STAGE_MAIN_OFFSET; | ||
| 603 | ConstBufferLocker locker(raw.GetType()); | ||
| 604 | const ShaderIR ir(raw.GetCode(), main_offset, COMPILER_SETTINGS, locker); | ||
| 605 | // TODO(Rodrigo): Handle VertexA shaders | ||
| 606 | // std::optional<ShaderIR> ir_b; | ||
| 607 | // if (raw.HasProgramA()) { | ||
| 608 | // ir_b.emplace(raw.GetProgramCodeB(), main_offset); | ||
| 609 | // } | ||
| 610 | |||
| 611 | UnspecializedShader unspecialized; | ||
| 612 | unspecialized.entries = GLShader::GetEntries(ir); | ||
| 613 | unspecialized.type = raw.GetType(); | ||
| 614 | unspecialized.code = raw.GetCode(); | ||
| 615 | unspecialized.code_b = raw.GetCodeB(); | ||
| 616 | unspecialized_shaders.emplace(raw.GetUniqueIdentifier(), unspecialized); | ||
| 617 | |||
| 618 | if (callback) { | ||
| 619 | callback(VideoCore::LoadCallbackStage::Decompile, i, raws.size()); | ||
| 620 | } | ||
| 621 | } | ||
| 622 | return true; | ||
| 623 | } | 442 | } |
| 624 | 443 | ||
| 625 | Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { | 444 | Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { |
| 626 | if (!system.GPU().Maxwell3D().dirty.shaders) { | 445 | if (!system.GPU().Maxwell3D().dirty.flags[Dirty::Shaders]) { |
| 627 | return last_shaders[static_cast<std::size_t>(program)]; | 446 | return last_shaders[static_cast<std::size_t>(program)]; |
| 628 | } | 447 | } |
| 629 | 448 | ||
| @@ -647,17 +466,17 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { | |||
| 647 | 466 | ||
| 648 | const auto unique_identifier = GetUniqueIdentifier( | 467 | const auto unique_identifier = GetUniqueIdentifier( |
| 649 | GetShaderType(program), program == Maxwell::ShaderProgram::VertexA, code, code_b); | 468 | GetShaderType(program), program == Maxwell::ShaderProgram::VertexA, code, code_b); |
| 650 | const auto precompiled_variants = GetPrecompiledVariants(unique_identifier); | ||
| 651 | const auto cpu_addr{*memory_manager.GpuToCpuAddress(address)}; | 469 | const auto cpu_addr{*memory_manager.GpuToCpuAddress(address)}; |
| 652 | const ShaderParameters params{system, disk_cache, precompiled_variants, device, | 470 | const ShaderParameters params{system, disk_cache, device, |
| 653 | cpu_addr, host_ptr, unique_identifier}; | 471 | cpu_addr, host_ptr, unique_identifier}; |
| 654 | 472 | ||
| 655 | const auto found = unspecialized_shaders.find(unique_identifier); | 473 | const auto found = runtime_cache.find(unique_identifier); |
| 656 | if (found == unspecialized_shaders.end()) { | 474 | if (found == runtime_cache.end()) { |
| 657 | shader = CachedShader::CreateStageFromMemory(params, program, std::move(code), | 475 | shader = CachedShader::CreateStageFromMemory(params, program, std::move(code), |
| 658 | std::move(code_b)); | 476 | std::move(code_b)); |
| 659 | } else { | 477 | } else { |
| 660 | shader = CachedShader::CreateFromCache(params, found->second); | 478 | const std::size_t size_in_bytes = code.size() * sizeof(u64); |
| 479 | shader = CachedShader::CreateFromCache(params, found->second, size_in_bytes); | ||
| 661 | } | 480 | } |
| 662 | Register(shader); | 481 | Register(shader); |
| 663 | 482 | ||
| @@ -672,19 +491,19 @@ Shader ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) { | |||
| 672 | return kernel; | 491 | return kernel; |
| 673 | } | 492 | } |
| 674 | 493 | ||
| 675 | // No kernel found - create a new one | 494 | // No kernel found, create a new one |
| 676 | auto code{GetShaderCode(memory_manager, code_addr, host_ptr)}; | 495 | auto code{GetShaderCode(memory_manager, code_addr, host_ptr)}; |
| 677 | const auto unique_identifier{GetUniqueIdentifier(ShaderType::Compute, false, code, {})}; | 496 | const auto unique_identifier{GetUniqueIdentifier(ShaderType::Compute, false, code)}; |
| 678 | const auto precompiled_variants = GetPrecompiledVariants(unique_identifier); | ||
| 679 | const auto cpu_addr{*memory_manager.GpuToCpuAddress(code_addr)}; | 497 | const auto cpu_addr{*memory_manager.GpuToCpuAddress(code_addr)}; |
| 680 | const ShaderParameters params{system, disk_cache, precompiled_variants, device, | 498 | const ShaderParameters params{system, disk_cache, device, |
| 681 | cpu_addr, host_ptr, unique_identifier}; | 499 | cpu_addr, host_ptr, unique_identifier}; |
| 682 | 500 | ||
| 683 | const auto found = unspecialized_shaders.find(unique_identifier); | 501 | const auto found = runtime_cache.find(unique_identifier); |
| 684 | if (found == unspecialized_shaders.end()) { | 502 | if (found == runtime_cache.end()) { |
| 685 | kernel = CachedShader::CreateKernelFromMemory(params, std::move(code)); | 503 | kernel = CachedShader::CreateKernelFromMemory(params, std::move(code)); |
| 686 | } else { | 504 | } else { |
| 687 | kernel = CachedShader::CreateFromCache(params, found->second); | 505 | const std::size_t size_in_bytes = code.size() * sizeof(u64); |
| 506 | kernel = CachedShader::CreateFromCache(params, found->second, size_in_bytes); | ||
| 688 | } | 507 | } |
| 689 | 508 | ||
| 690 | Register(kernel); | 509 | Register(kernel); |
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index 7b1470db3..4935019fc 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h | |||
| @@ -22,7 +22,7 @@ | |||
| 22 | #include "video_core/renderer_opengl/gl_resource_manager.h" | 22 | #include "video_core/renderer_opengl/gl_resource_manager.h" |
| 23 | #include "video_core/renderer_opengl/gl_shader_decompiler.h" | 23 | #include "video_core/renderer_opengl/gl_shader_decompiler.h" |
| 24 | #include "video_core/renderer_opengl/gl_shader_disk_cache.h" | 24 | #include "video_core/renderer_opengl/gl_shader_disk_cache.h" |
| 25 | #include "video_core/shader/const_buffer_locker.h" | 25 | #include "video_core/shader/registry.h" |
| 26 | #include "video_core/shader/shader_ir.h" | 26 | #include "video_core/shader/shader_ir.h" |
| 27 | 27 | ||
| 28 | namespace Core { | 28 | namespace Core { |
| @@ -41,22 +41,17 @@ class RasterizerOpenGL; | |||
| 41 | struct UnspecializedShader; | 41 | struct UnspecializedShader; |
| 42 | 42 | ||
| 43 | using Shader = std::shared_ptr<CachedShader>; | 43 | using Shader = std::shared_ptr<CachedShader>; |
| 44 | using CachedProgram = std::shared_ptr<OGLProgram>; | ||
| 45 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | 44 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; |
| 46 | using PrecompiledPrograms = std::unordered_map<ShaderDiskCacheUsage, CachedProgram>; | 45 | |
| 47 | using PrecompiledVariants = std::vector<PrecompiledPrograms::iterator>; | 46 | struct PrecompiledShader { |
| 48 | 47 | std::shared_ptr<OGLProgram> program; | |
| 49 | struct UnspecializedShader { | 48 | std::shared_ptr<VideoCommon::Shader::Registry> registry; |
| 50 | GLShader::ShaderEntries entries; | 49 | ShaderEntries entries; |
| 51 | Tegra::Engines::ShaderType type; | ||
| 52 | ProgramCode code; | ||
| 53 | ProgramCode code_b; | ||
| 54 | }; | 50 | }; |
| 55 | 51 | ||
| 56 | struct ShaderParameters { | 52 | struct ShaderParameters { |
| 57 | Core::System& system; | 53 | Core::System& system; |
| 58 | ShaderDiskCacheOpenGL& disk_cache; | 54 | ShaderDiskCacheOpenGL& disk_cache; |
| 59 | const PrecompiledVariants* precompiled_variants; | ||
| 60 | const Device& device; | 55 | const Device& device; |
| 61 | VAddr cpu_addr; | 56 | VAddr cpu_addr; |
| 62 | u8* host_ptr; | 57 | u8* host_ptr; |
| @@ -65,61 +60,45 @@ struct ShaderParameters { | |||
| 65 | 60 | ||
| 66 | class CachedShader final : public RasterizerCacheObject { | 61 | class CachedShader final : public RasterizerCacheObject { |
| 67 | public: | 62 | public: |
| 68 | static Shader CreateStageFromMemory(const ShaderParameters& params, | 63 | ~CachedShader(); |
| 69 | Maxwell::ShaderProgram program_type, | ||
| 70 | ProgramCode program_code, ProgramCode program_code_b); | ||
| 71 | static Shader CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code); | ||
| 72 | 64 | ||
| 73 | static Shader CreateFromCache(const ShaderParameters& params, | 65 | /// Gets the GL program handle for the shader |
| 74 | const UnspecializedShader& unspecialized); | 66 | GLuint GetHandle() const; |
| 75 | 67 | ||
| 68 | /// Returns the guest CPU address of the shader | ||
| 76 | VAddr GetCpuAddr() const override { | 69 | VAddr GetCpuAddr() const override { |
| 77 | return cpu_addr; | 70 | return cpu_addr; |
| 78 | } | 71 | } |
| 79 | 72 | ||
| 73 | /// Returns the size in bytes of the shader | ||
| 80 | std::size_t GetSizeInBytes() const override { | 74 | std::size_t GetSizeInBytes() const override { |
| 81 | return code.size() * sizeof(u64); | 75 | return size_in_bytes; |
| 82 | } | 76 | } |
| 83 | 77 | ||
| 84 | /// Gets the shader entries for the shader | 78 | /// Gets the shader entries for the shader |
| 85 | const GLShader::ShaderEntries& GetShaderEntries() const { | 79 | const ShaderEntries& GetEntries() const { |
| 86 | return entries; | 80 | return entries; |
| 87 | } | 81 | } |
| 88 | 82 | ||
| 89 | /// Gets the GL program handle for the shader | 83 | static Shader CreateStageFromMemory(const ShaderParameters& params, |
| 90 | GLuint GetHandle(const ProgramVariant& variant); | 84 | Maxwell::ShaderProgram program_type, |
| 91 | 85 | ProgramCode program_code, ProgramCode program_code_b); | |
| 92 | private: | 86 | static Shader CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code); |
| 93 | struct LockerVariant { | ||
| 94 | std::unique_ptr<VideoCommon::Shader::ConstBufferLocker> locker; | ||
| 95 | std::unordered_map<ProgramVariant, CachedProgram> programs; | ||
| 96 | }; | ||
| 97 | |||
| 98 | explicit CachedShader(const ShaderParameters& params, Tegra::Engines::ShaderType shader_type, | ||
| 99 | GLShader::ShaderEntries entries, ProgramCode program_code, | ||
| 100 | ProgramCode program_code_b); | ||
| 101 | |||
| 102 | bool EnsureValidLockerVariant(); | ||
| 103 | |||
| 104 | ShaderDiskCacheUsage GetUsage(const ProgramVariant& variant, | ||
| 105 | const VideoCommon::Shader::ConstBufferLocker& locker) const; | ||
| 106 | |||
| 107 | Core::System& system; | ||
| 108 | ShaderDiskCacheOpenGL& disk_cache; | ||
| 109 | const Device& device; | ||
| 110 | |||
| 111 | VAddr cpu_addr{}; | ||
| 112 | |||
| 113 | u64 unique_identifier{}; | ||
| 114 | Tegra::Engines::ShaderType shader_type{}; | ||
| 115 | |||
| 116 | GLShader::ShaderEntries entries; | ||
| 117 | 87 | ||
| 118 | ProgramCode code; | 88 | static Shader CreateFromCache(const ShaderParameters& params, |
| 119 | ProgramCode code_b; | 89 | const PrecompiledShader& precompiled_shader, |
| 90 | std::size_t size_in_bytes); | ||
| 120 | 91 | ||
| 121 | LockerVariant* curr_locker_variant = nullptr; | 92 | private: |
| 122 | std::vector<std::unique_ptr<LockerVariant>> locker_variants; | 93 | explicit CachedShader(const u8* host_ptr, VAddr cpu_addr, std::size_t size_in_bytes, |
| 94 | std::shared_ptr<VideoCommon::Shader::Registry> registry, | ||
| 95 | ShaderEntries entries, std::shared_ptr<OGLProgram> program); | ||
| 96 | |||
| 97 | std::shared_ptr<VideoCommon::Shader::Registry> registry; | ||
| 98 | ShaderEntries entries; | ||
| 99 | VAddr cpu_addr = 0; | ||
| 100 | std::size_t size_in_bytes = 0; | ||
| 101 | std::shared_ptr<OGLProgram> program; | ||
| 123 | }; | 102 | }; |
| 124 | 103 | ||
| 125 | class ShaderCacheOpenGL final : public RasterizerCache<Shader> { | 104 | class ShaderCacheOpenGL final : public RasterizerCache<Shader> { |
| @@ -142,25 +121,15 @@ protected: | |||
| 142 | void FlushObjectInner(const Shader& object) override {} | 121 | void FlushObjectInner(const Shader& object) override {} |
| 143 | 122 | ||
| 144 | private: | 123 | private: |
| 145 | bool GenerateUnspecializedShaders(const std::atomic_bool& stop_loading, | 124 | std::shared_ptr<OGLProgram> GeneratePrecompiledProgram( |
| 146 | const VideoCore::DiskResourceLoadCallback& callback, | 125 | const ShaderDiskCacheEntry& entry, const ShaderDiskCachePrecompiled& precompiled_entry, |
| 147 | const std::vector<ShaderDiskCacheRaw>& raws); | 126 | const std::unordered_set<GLenum>& supported_formats); |
| 148 | |||
| 149 | CachedProgram GeneratePrecompiledProgram(const ShaderDiskCacheDump& dump, | ||
| 150 | const std::unordered_set<GLenum>& supported_formats); | ||
| 151 | |||
| 152 | const PrecompiledVariants* GetPrecompiledVariants(u64 unique_identifier) const; | ||
| 153 | 127 | ||
| 154 | Core::System& system; | 128 | Core::System& system; |
| 155 | Core::Frontend::EmuWindow& emu_window; | 129 | Core::Frontend::EmuWindow& emu_window; |
| 156 | const Device& device; | 130 | const Device& device; |
| 157 | |||
| 158 | ShaderDiskCacheOpenGL disk_cache; | 131 | ShaderDiskCacheOpenGL disk_cache; |
| 159 | 132 | std::unordered_map<u64, PrecompiledShader> runtime_cache; | |
| 160 | PrecompiledPrograms precompiled_programs; | ||
| 161 | std::unordered_map<u64, PrecompiledVariants> precompiled_variants; | ||
| 162 | |||
| 163 | std::unordered_map<u64, UnspecializedShader> unspecialized_shaders; | ||
| 164 | 133 | ||
| 165 | std::array<Shader, Maxwell::MaxShaderProgram> last_shaders; | 134 | std::array<Shader, Maxwell::MaxShaderProgram> last_shaders; |
| 166 | }; | 135 | }; |
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 4735000b5..8aa4a7ac9 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp | |||
| @@ -23,8 +23,9 @@ | |||
| 23 | #include "video_core/shader/ast.h" | 23 | #include "video_core/shader/ast.h" |
| 24 | #include "video_core/shader/node.h" | 24 | #include "video_core/shader/node.h" |
| 25 | #include "video_core/shader/shader_ir.h" | 25 | #include "video_core/shader/shader_ir.h" |
| 26 | #include "video_core/shader/transform_feedback.h" | ||
| 26 | 27 | ||
| 27 | namespace OpenGL::GLShader { | 28 | namespace OpenGL { |
| 28 | 29 | ||
| 29 | namespace { | 30 | namespace { |
| 30 | 31 | ||
| @@ -36,6 +37,8 @@ using Tegra::Shader::IpaInterpMode; | |||
| 36 | using Tegra::Shader::IpaMode; | 37 | using Tegra::Shader::IpaMode; |
| 37 | using Tegra::Shader::IpaSampleMode; | 38 | using Tegra::Shader::IpaSampleMode; |
| 38 | using Tegra::Shader::Register; | 39 | using Tegra::Shader::Register; |
| 40 | using VideoCommon::Shader::BuildTransformFeedback; | ||
| 41 | using VideoCommon::Shader::Registry; | ||
| 39 | 42 | ||
| 40 | using namespace std::string_literals; | 43 | using namespace std::string_literals; |
| 41 | using namespace VideoCommon::Shader; | 44 | using namespace VideoCommon::Shader; |
| @@ -48,6 +51,11 @@ class ExprDecompiler; | |||
| 48 | 51 | ||
| 49 | enum class Type { Void, Bool, Bool2, Float, Int, Uint, HalfFloat }; | 52 | enum class Type { Void, Bool, Bool2, Float, Int, Uint, HalfFloat }; |
| 50 | 53 | ||
| 54 | constexpr std::array FLOAT_TYPES{"float", "vec2", "vec3", "vec4"}; | ||
| 55 | |||
| 56 | constexpr std::string_view INPUT_ATTRIBUTE_NAME = "in_attr"; | ||
| 57 | constexpr std::string_view OUTPUT_ATTRIBUTE_NAME = "out_attr"; | ||
| 58 | |||
| 51 | struct TextureOffset {}; | 59 | struct TextureOffset {}; |
| 52 | struct TextureDerivates {}; | 60 | struct TextureDerivates {}; |
| 53 | using TextureArgument = std::pair<Type, Node>; | 61 | using TextureArgument = std::pair<Type, Node>; |
| @@ -56,6 +64,25 @@ using TextureIR = std::variant<TextureOffset, TextureDerivates, TextureArgument> | |||
| 56 | constexpr u32 MAX_CONSTBUFFER_ELEMENTS = | 64 | constexpr u32 MAX_CONSTBUFFER_ELEMENTS = |
| 57 | static_cast<u32>(Maxwell::MaxConstBufferSize) / (4 * sizeof(float)); | 65 | static_cast<u32>(Maxwell::MaxConstBufferSize) / (4 * sizeof(float)); |
| 58 | 66 | ||
| 67 | constexpr std::string_view CommonDeclarations = R"(#define ftoi floatBitsToInt | ||
| 68 | #define ftou floatBitsToUint | ||
| 69 | #define itof intBitsToFloat | ||
| 70 | #define utof uintBitsToFloat | ||
| 71 | |||
| 72 | bvec2 HalfFloatNanComparison(bvec2 comparison, vec2 pair1, vec2 pair2) {{ | ||
| 73 | bvec2 is_nan1 = isnan(pair1); | ||
| 74 | bvec2 is_nan2 = isnan(pair2); | ||
| 75 | return bvec2(comparison.x || is_nan1.x || is_nan2.x, comparison.y || is_nan1.y || is_nan2.y); | ||
| 76 | }} | ||
| 77 | |||
| 78 | const float fswzadd_modifiers_a[] = float[4](-1.0f, 1.0f, -1.0f, 0.0f ); | ||
| 79 | const float fswzadd_modifiers_b[] = float[4](-1.0f, -1.0f, 1.0f, -1.0f ); | ||
| 80 | |||
| 81 | layout (std140, binding = {}) uniform vs_config {{ | ||
| 82 | float y_direction; | ||
| 83 | }}; | ||
| 84 | )"; | ||
| 85 | |||
| 59 | class ShaderWriter final { | 86 | class ShaderWriter final { |
| 60 | public: | 87 | public: |
| 61 | void AddExpression(std::string_view text) { | 88 | void AddExpression(std::string_view text) { |
| @@ -269,12 +296,41 @@ const char* GetImageTypeDeclaration(Tegra::Shader::ImageType image_type) { | |||
| 269 | } | 296 | } |
| 270 | } | 297 | } |
| 271 | 298 | ||
| 299 | /// Describes primitive behavior on geometry shaders | ||
| 300 | std::pair<const char*, u32> GetPrimitiveDescription(Maxwell::PrimitiveTopology topology) { | ||
| 301 | switch (topology) { | ||
| 302 | case Maxwell::PrimitiveTopology::Points: | ||
| 303 | return {"points", 1}; | ||
| 304 | case Maxwell::PrimitiveTopology::Lines: | ||
| 305 | case Maxwell::PrimitiveTopology::LineStrip: | ||
| 306 | return {"lines", 2}; | ||
| 307 | case Maxwell::PrimitiveTopology::LinesAdjacency: | ||
| 308 | case Maxwell::PrimitiveTopology::LineStripAdjacency: | ||
| 309 | return {"lines_adjacency", 4}; | ||
| 310 | case Maxwell::PrimitiveTopology::Triangles: | ||
| 311 | case Maxwell::PrimitiveTopology::TriangleStrip: | ||
| 312 | case Maxwell::PrimitiveTopology::TriangleFan: | ||
| 313 | return {"triangles", 3}; | ||
| 314 | case Maxwell::PrimitiveTopology::TrianglesAdjacency: | ||
| 315 | case Maxwell::PrimitiveTopology::TriangleStripAdjacency: | ||
| 316 | return {"triangles_adjacency", 6}; | ||
| 317 | default: | ||
| 318 | UNIMPLEMENTED_MSG("topology={}", static_cast<int>(topology)); | ||
| 319 | return {"points", 1}; | ||
| 320 | } | ||
| 321 | } | ||
| 322 | |||
| 272 | /// Generates code to use for a swizzle operation. | 323 | /// Generates code to use for a swizzle operation. |
| 273 | constexpr const char* GetSwizzle(u32 element) { | 324 | constexpr const char* GetSwizzle(std::size_t element) { |
| 274 | constexpr std::array swizzle = {".x", ".y", ".z", ".w"}; | 325 | constexpr std::array swizzle = {".x", ".y", ".z", ".w"}; |
| 275 | return swizzle.at(element); | 326 | return swizzle.at(element); |
| 276 | } | 327 | } |
| 277 | 328 | ||
| 329 | constexpr const char* GetColorSwizzle(std::size_t element) { | ||
| 330 | constexpr std::array swizzle = {".r", ".g", ".b", ".a"}; | ||
| 331 | return swizzle.at(element); | ||
| 332 | } | ||
| 333 | |||
| 278 | /// Translate topology | 334 | /// Translate topology |
| 279 | std::string GetTopologyName(Tegra::Shader::OutputTopology topology) { | 335 | std::string GetTopologyName(Tegra::Shader::OutputTopology topology) { |
| 280 | switch (topology) { | 336 | switch (topology) { |
| @@ -310,10 +366,19 @@ constexpr bool IsGenericAttribute(Attribute::Index index) { | |||
| 310 | return index >= Attribute::Index::Attribute_0 && index <= Attribute::Index::Attribute_31; | 366 | return index >= Attribute::Index::Attribute_0 && index <= Attribute::Index::Attribute_31; |
| 311 | } | 367 | } |
| 312 | 368 | ||
| 369 | constexpr bool IsLegacyTexCoord(Attribute::Index index) { | ||
| 370 | return static_cast<int>(index) >= static_cast<int>(Attribute::Index::TexCoord_0) && | ||
| 371 | static_cast<int>(index) <= static_cast<int>(Attribute::Index::TexCoord_7); | ||
| 372 | } | ||
| 373 | |||
| 313 | constexpr Attribute::Index ToGenericAttribute(u64 value) { | 374 | constexpr Attribute::Index ToGenericAttribute(u64 value) { |
| 314 | return static_cast<Attribute::Index>(value + static_cast<u64>(Attribute::Index::Attribute_0)); | 375 | return static_cast<Attribute::Index>(value + static_cast<u64>(Attribute::Index::Attribute_0)); |
| 315 | } | 376 | } |
| 316 | 377 | ||
| 378 | constexpr int GetLegacyTexCoordIndex(Attribute::Index index) { | ||
| 379 | return static_cast<int>(index) - static_cast<int>(Attribute::Index::TexCoord_0); | ||
| 380 | } | ||
| 381 | |||
| 317 | u32 GetGenericAttributeIndex(Attribute::Index index) { | 382 | u32 GetGenericAttributeIndex(Attribute::Index index) { |
| 318 | ASSERT(IsGenericAttribute(index)); | 383 | ASSERT(IsGenericAttribute(index)); |
| 319 | return static_cast<u32>(index) - static_cast<u32>(Attribute::Index::Attribute_0); | 384 | return static_cast<u32>(index) - static_cast<u32>(Attribute::Index::Attribute_0); |
| @@ -337,15 +402,66 @@ std::string FlowStackTopName(MetaStackClass stack) { | |||
| 337 | return fmt::format("{}_flow_stack_top", GetFlowStackPrefix(stack)); | 402 | return fmt::format("{}_flow_stack_top", GetFlowStackPrefix(stack)); |
| 338 | } | 403 | } |
| 339 | 404 | ||
| 340 | [[deprecated]] constexpr bool IsVertexShader(ShaderType stage) { | 405 | struct GenericVaryingDescription { |
| 341 | return stage == ShaderType::Vertex; | 406 | std::string name; |
| 342 | } | 407 | u8 first_element = 0; |
| 408 | bool is_scalar = false; | ||
| 409 | }; | ||
| 343 | 410 | ||
| 344 | class GLSLDecompiler final { | 411 | class GLSLDecompiler final { |
| 345 | public: | 412 | public: |
| 346 | explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, ShaderType stage, | 413 | explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, const Registry& registry, |
| 347 | std::string suffix) | 414 | ShaderType stage, std::string_view identifier, std::string_view suffix) |
| 348 | : device{device}, ir{ir}, stage{stage}, suffix{suffix}, header{ir.GetHeader()} {} | 415 | : device{device}, ir{ir}, registry{registry}, stage{stage}, |
| 416 | identifier{identifier}, suffix{suffix}, header{ir.GetHeader()} { | ||
| 417 | if (stage != ShaderType::Compute) { | ||
| 418 | transform_feedback = BuildTransformFeedback(registry.GetGraphicsInfo()); | ||
| 419 | } | ||
| 420 | } | ||
| 421 | |||
| 422 | void Decompile() { | ||
| 423 | DeclareHeader(); | ||
| 424 | DeclareVertex(); | ||
| 425 | DeclareGeometry(); | ||
| 426 | DeclareFragment(); | ||
| 427 | DeclareCompute(); | ||
| 428 | DeclareInputAttributes(); | ||
| 429 | DeclareOutputAttributes(); | ||
| 430 | DeclareImages(); | ||
| 431 | DeclareSamplers(); | ||
| 432 | DeclareGlobalMemory(); | ||
| 433 | DeclareConstantBuffers(); | ||
| 434 | DeclareLocalMemory(); | ||
| 435 | DeclareRegisters(); | ||
| 436 | DeclarePredicates(); | ||
| 437 | DeclareInternalFlags(); | ||
| 438 | DeclareCustomVariables(); | ||
| 439 | DeclarePhysicalAttributeReader(); | ||
| 440 | |||
| 441 | code.AddLine("void main() {{"); | ||
| 442 | ++code.scope; | ||
| 443 | |||
| 444 | if (stage == ShaderType::Vertex) { | ||
| 445 | code.AddLine("gl_Position = vec4(0.0f, 0.0f, 0.0f, 1.0f);"); | ||
| 446 | } | ||
| 447 | |||
| 448 | if (ir.IsDecompiled()) { | ||
| 449 | DecompileAST(); | ||
| 450 | } else { | ||
| 451 | DecompileBranchMode(); | ||
| 452 | } | ||
| 453 | |||
| 454 | --code.scope; | ||
| 455 | code.AddLine("}}"); | ||
| 456 | } | ||
| 457 | |||
| 458 | std::string GetResult() { | ||
| 459 | return code.GetResult(); | ||
| 460 | } | ||
| 461 | |||
| 462 | private: | ||
| 463 | friend class ASTDecompiler; | ||
| 464 | friend class ExprDecompiler; | ||
| 349 | 465 | ||
| 350 | void DecompileBranchMode() { | 466 | void DecompileBranchMode() { |
| 351 | // VM's program counter | 467 | // VM's program counter |
| @@ -387,46 +503,40 @@ public: | |||
| 387 | 503 | ||
| 388 | void DecompileAST(); | 504 | void DecompileAST(); |
| 389 | 505 | ||
| 390 | void Decompile() { | 506 | void DeclareHeader() { |
| 391 | DeclareVertex(); | 507 | if (!identifier.empty()) { |
| 392 | DeclareGeometry(); | 508 | code.AddLine("// {}", identifier); |
| 393 | DeclareRegisters(); | 509 | } |
| 394 | DeclareCustomVariables(); | 510 | code.AddLine("#version 440 {}", ir.UsesLegacyVaryings() ? "compatibility" : "core"); |
| 395 | DeclarePredicates(); | 511 | code.AddLine("#extension GL_ARB_separate_shader_objects : enable"); |
| 396 | DeclareLocalMemory(); | 512 | if (device.HasShaderBallot()) { |
| 397 | DeclareInternalFlags(); | 513 | code.AddLine("#extension GL_ARB_shader_ballot : require"); |
| 398 | DeclareInputAttributes(); | 514 | } |
| 399 | DeclareOutputAttributes(); | 515 | if (device.HasVertexViewportLayer()) { |
| 400 | DeclareConstantBuffers(); | 516 | code.AddLine("#extension GL_ARB_shader_viewport_layer_array : require"); |
| 401 | DeclareGlobalMemory(); | 517 | } |
| 402 | DeclareSamplers(); | 518 | if (device.HasImageLoadFormatted()) { |
| 403 | DeclareImages(); | 519 | code.AddLine("#extension GL_EXT_shader_image_load_formatted : require"); |
| 404 | DeclarePhysicalAttributeReader(); | ||
| 405 | |||
| 406 | code.AddLine("void execute_{}() {{", suffix); | ||
| 407 | ++code.scope; | ||
| 408 | |||
| 409 | if (ir.IsDecompiled()) { | ||
| 410 | DecompileAST(); | ||
| 411 | } else { | ||
| 412 | DecompileBranchMode(); | ||
| 413 | } | 520 | } |
| 521 | if (device.HasWarpIntrinsics()) { | ||
| 522 | code.AddLine("#extension GL_NV_gpu_shader5 : require"); | ||
| 523 | code.AddLine("#extension GL_NV_shader_thread_group : require"); | ||
| 524 | code.AddLine("#extension GL_NV_shader_thread_shuffle : require"); | ||
| 525 | } | ||
| 526 | // This pragma stops Nvidia's driver from over optimizing math (probably using fp16 | ||
| 527 | // operations) on places where we don't want to. | ||
| 528 | // Thanks to Ryujinx for finding this workaround. | ||
| 529 | code.AddLine("#pragma optionNV(fastmath off)"); | ||
| 414 | 530 | ||
| 415 | --code.scope; | 531 | code.AddNewLine(); |
| 416 | code.AddLine("}}"); | ||
| 417 | } | ||
| 418 | 532 | ||
| 419 | std::string GetResult() { | 533 | code.AddLine(CommonDeclarations, EmulationUniformBlockBinding); |
| 420 | return code.GetResult(); | ||
| 421 | } | 534 | } |
| 422 | 535 | ||
| 423 | private: | ||
| 424 | friend class ASTDecompiler; | ||
| 425 | friend class ExprDecompiler; | ||
| 426 | |||
| 427 | void DeclareVertex() { | 536 | void DeclareVertex() { |
| 428 | if (!IsVertexShader(stage)) | 537 | if (stage != ShaderType::Vertex) { |
| 429 | return; | 538 | return; |
| 539 | } | ||
| 430 | 540 | ||
| 431 | DeclareVertexRedeclarations(); | 541 | DeclareVertexRedeclarations(); |
| 432 | } | 542 | } |
| @@ -436,9 +546,15 @@ private: | |||
| 436 | return; | 546 | return; |
| 437 | } | 547 | } |
| 438 | 548 | ||
| 549 | const auto& info = registry.GetGraphicsInfo(); | ||
| 550 | const auto input_topology = info.primitive_topology; | ||
| 551 | const auto [glsl_topology, max_vertices] = GetPrimitiveDescription(input_topology); | ||
| 552 | max_input_vertices = max_vertices; | ||
| 553 | code.AddLine("layout ({}) in;", glsl_topology); | ||
| 554 | |||
| 439 | const auto topology = GetTopologyName(header.common3.output_topology); | 555 | const auto topology = GetTopologyName(header.common3.output_topology); |
| 440 | const auto max_vertices = header.common4.max_output_vertices.Value(); | 556 | const auto max_output_vertices = header.common4.max_output_vertices.Value(); |
| 441 | code.AddLine("layout ({}, max_vertices = {}) out;", topology, max_vertices); | 557 | code.AddLine("layout ({}, max_vertices = {}) out;", topology, max_output_vertices); |
| 442 | code.AddNewLine(); | 558 | code.AddNewLine(); |
| 443 | 559 | ||
| 444 | code.AddLine("in gl_PerVertex {{"); | 560 | code.AddLine("in gl_PerVertex {{"); |
| @@ -450,11 +566,50 @@ private: | |||
| 450 | DeclareVertexRedeclarations(); | 566 | DeclareVertexRedeclarations(); |
| 451 | } | 567 | } |
| 452 | 568 | ||
| 569 | void DeclareFragment() { | ||
| 570 | if (stage != ShaderType::Fragment) { | ||
| 571 | return; | ||
| 572 | } | ||
| 573 | if (ir.UsesLegacyVaryings()) { | ||
| 574 | code.AddLine("in gl_PerFragment {{"); | ||
| 575 | ++code.scope; | ||
| 576 | code.AddLine("vec4 gl_TexCoord[8];"); | ||
| 577 | code.AddLine("vec4 gl_Color;"); | ||
| 578 | code.AddLine("vec4 gl_SecondaryColor;"); | ||
| 579 | --code.scope; | ||
| 580 | code.AddLine("}};"); | ||
| 581 | } | ||
| 582 | |||
| 583 | for (u32 rt = 0; rt < Maxwell::NumRenderTargets; ++rt) { | ||
| 584 | code.AddLine("layout (location = {}) out vec4 frag_color{};", rt, rt); | ||
| 585 | } | ||
| 586 | } | ||
| 587 | |||
| 588 | void DeclareCompute() { | ||
| 589 | if (stage != ShaderType::Compute) { | ||
| 590 | return; | ||
| 591 | } | ||
| 592 | const auto& info = registry.GetComputeInfo(); | ||
| 593 | if (const u32 size = info.shared_memory_size_in_words; size > 0) { | ||
| 594 | code.AddLine("shared uint smem[{}];", size); | ||
| 595 | code.AddNewLine(); | ||
| 596 | } | ||
| 597 | code.AddLine("layout (local_size_x = {}, local_size_y = {}, local_size_z = {}) in;", | ||
| 598 | info.workgroup_size[0], info.workgroup_size[1], info.workgroup_size[2]); | ||
| 599 | code.AddNewLine(); | ||
| 600 | } | ||
| 601 | |||
| 453 | void DeclareVertexRedeclarations() { | 602 | void DeclareVertexRedeclarations() { |
| 454 | code.AddLine("out gl_PerVertex {{"); | 603 | code.AddLine("out gl_PerVertex {{"); |
| 455 | ++code.scope; | 604 | ++code.scope; |
| 456 | 605 | ||
| 457 | code.AddLine("vec4 gl_Position;"); | 606 | auto pos_xfb = GetTransformFeedbackDecoration(Attribute::Index::Position); |
| 607 | if (!pos_xfb.empty()) { | ||
| 608 | pos_xfb = fmt::format("layout ({}) ", pos_xfb); | ||
| 609 | } | ||
| 610 | const char* pos_type = | ||
| 611 | FLOAT_TYPES.at(GetNumComponents(Attribute::Index::Position).value_or(4) - 1); | ||
| 612 | code.AddLine("{}{} gl_Position;", pos_xfb, pos_type); | ||
| 458 | 613 | ||
| 459 | for (const auto attribute : ir.GetOutputAttributes()) { | 614 | for (const auto attribute : ir.GetOutputAttributes()) { |
| 460 | if (attribute == Attribute::Index::ClipDistances0123 || | 615 | if (attribute == Attribute::Index::ClipDistances0123 || |
| @@ -463,14 +618,14 @@ private: | |||
| 463 | break; | 618 | break; |
| 464 | } | 619 | } |
| 465 | } | 620 | } |
| 466 | if (!IsVertexShader(stage) || device.HasVertexViewportLayer()) { | 621 | if (stage != ShaderType::Vertex || device.HasVertexViewportLayer()) { |
| 467 | if (ir.UsesLayer()) { | 622 | if (ir.UsesLayer()) { |
| 468 | code.AddLine("int gl_Layer;"); | 623 | code.AddLine("int gl_Layer;"); |
| 469 | } | 624 | } |
| 470 | if (ir.UsesViewportIndex()) { | 625 | if (ir.UsesViewportIndex()) { |
| 471 | code.AddLine("int gl_ViewportIndex;"); | 626 | code.AddLine("int gl_ViewportIndex;"); |
| 472 | } | 627 | } |
| 473 | } else if ((ir.UsesLayer() || ir.UsesViewportIndex()) && IsVertexShader(stage) && | 628 | } else if ((ir.UsesLayer() || ir.UsesViewportIndex()) && stage == ShaderType::Vertex && |
| 474 | !device.HasVertexViewportLayer()) { | 629 | !device.HasVertexViewportLayer()) { |
| 475 | LOG_ERROR( | 630 | LOG_ERROR( |
| 476 | Render_OpenGL, | 631 | Render_OpenGL, |
| @@ -481,12 +636,12 @@ private: | |||
| 481 | code.AddLine("float gl_PointSize;"); | 636 | code.AddLine("float gl_PointSize;"); |
| 482 | } | 637 | } |
| 483 | 638 | ||
| 484 | if (ir.UsesInstanceId()) { | 639 | if (ir.UsesLegacyVaryings()) { |
| 485 | code.AddLine("int gl_InstanceID;"); | 640 | code.AddLine("vec4 gl_TexCoord[8];"); |
| 486 | } | 641 | code.AddLine("vec4 gl_FrontColor;"); |
| 487 | 642 | code.AddLine("vec4 gl_FrontSecondaryColor;"); | |
| 488 | if (ir.UsesVertexId()) { | 643 | code.AddLine("vec4 gl_BackColor;"); |
| 489 | code.AddLine("int gl_VertexID;"); | 644 | code.AddLine("vec4 gl_BackSecondaryColor;"); |
| 490 | } | 645 | } |
| 491 | 646 | ||
| 492 | --code.scope; | 647 | --code.scope; |
| @@ -525,18 +680,16 @@ private: | |||
| 525 | } | 680 | } |
| 526 | 681 | ||
| 527 | void DeclareLocalMemory() { | 682 | void DeclareLocalMemory() { |
| 683 | u64 local_memory_size = 0; | ||
| 528 | if (stage == ShaderType::Compute) { | 684 | if (stage == ShaderType::Compute) { |
| 529 | code.AddLine("#ifdef LOCAL_MEMORY_SIZE"); | 685 | local_memory_size = registry.GetComputeInfo().local_memory_size_in_words * 4ULL; |
| 530 | code.AddLine("uint {}[LOCAL_MEMORY_SIZE];", GetLocalMemory()); | 686 | } else { |
| 531 | code.AddLine("#endif"); | 687 | local_memory_size = header.GetLocalMemorySize(); |
| 532 | return; | ||
| 533 | } | 688 | } |
| 534 | |||
| 535 | const u64 local_memory_size = header.GetLocalMemorySize(); | ||
| 536 | if (local_memory_size == 0) { | 689 | if (local_memory_size == 0) { |
| 537 | return; | 690 | return; |
| 538 | } | 691 | } |
| 539 | const auto element_count = Common::AlignUp(local_memory_size, 4) / 4; | 692 | const u64 element_count = Common::AlignUp(local_memory_size, 4) / 4; |
| 540 | code.AddLine("uint {}[{}];", GetLocalMemory(), element_count); | 693 | code.AddLine("uint {}[{}];", GetLocalMemory(), element_count); |
| 541 | code.AddNewLine(); | 694 | code.AddNewLine(); |
| 542 | } | 695 | } |
| @@ -589,7 +742,7 @@ private: | |||
| 589 | void DeclareInputAttribute(Attribute::Index index, bool skip_unused) { | 742 | void DeclareInputAttribute(Attribute::Index index, bool skip_unused) { |
| 590 | const u32 location{GetGenericAttributeIndex(index)}; | 743 | const u32 location{GetGenericAttributeIndex(index)}; |
| 591 | 744 | ||
| 592 | std::string name{GetInputAttribute(index)}; | 745 | std::string name{GetGenericInputAttribute(index)}; |
| 593 | if (stage == ShaderType::Geometry) { | 746 | if (stage == ShaderType::Geometry) { |
| 594 | name = "gs_" + name + "[]"; | 747 | name = "gs_" + name + "[]"; |
| 595 | } | 748 | } |
| @@ -626,9 +779,59 @@ private: | |||
| 626 | } | 779 | } |
| 627 | } | 780 | } |
| 628 | 781 | ||
| 782 | std::optional<std::size_t> GetNumComponents(Attribute::Index index, u8 element = 0) const { | ||
| 783 | const u8 location = static_cast<u8>(static_cast<u32>(index) * 4 + element); | ||
| 784 | const auto it = transform_feedback.find(location); | ||
| 785 | if (it == transform_feedback.end()) { | ||
| 786 | return {}; | ||
| 787 | } | ||
| 788 | return it->second.components; | ||
| 789 | } | ||
| 790 | |||
| 791 | std::string GetTransformFeedbackDecoration(Attribute::Index index, u8 element = 0) const { | ||
| 792 | const u8 location = static_cast<u8>(static_cast<u32>(index) * 4 + element); | ||
| 793 | const auto it = transform_feedback.find(location); | ||
| 794 | if (it == transform_feedback.end()) { | ||
| 795 | return {}; | ||
| 796 | } | ||
| 797 | |||
| 798 | const VaryingTFB& tfb = it->second; | ||
| 799 | return fmt::format("xfb_buffer = {}, xfb_offset = {}, xfb_stride = {}", tfb.buffer, | ||
| 800 | tfb.offset, tfb.stride); | ||
| 801 | } | ||
| 802 | |||
| 629 | void DeclareOutputAttribute(Attribute::Index index) { | 803 | void DeclareOutputAttribute(Attribute::Index index) { |
| 630 | const u32 location{GetGenericAttributeIndex(index)}; | 804 | static constexpr std::string_view swizzle = "xyzw"; |
| 631 | code.AddLine("layout (location = {}) out vec4 {};", location, GetOutputAttribute(index)); | 805 | u8 element = 0; |
| 806 | while (element < 4) { | ||
| 807 | auto xfb = GetTransformFeedbackDecoration(index, element); | ||
| 808 | if (!xfb.empty()) { | ||
| 809 | xfb = fmt::format(", {}", xfb); | ||
| 810 | } | ||
| 811 | const std::size_t remainder = 4 - element; | ||
| 812 | const std::size_t num_components = GetNumComponents(index, element).value_or(remainder); | ||
| 813 | const char* const type = FLOAT_TYPES.at(num_components - 1); | ||
| 814 | |||
| 815 | const u32 location = GetGenericAttributeIndex(index); | ||
| 816 | |||
| 817 | GenericVaryingDescription description; | ||
| 818 | description.first_element = static_cast<u8>(element); | ||
| 819 | description.is_scalar = num_components == 1; | ||
| 820 | description.name = AppendSuffix(location, OUTPUT_ATTRIBUTE_NAME); | ||
| 821 | if (element != 0 || num_components != 4) { | ||
| 822 | const std::string_view name_swizzle = swizzle.substr(element, num_components); | ||
| 823 | description.name = fmt::format("{}_{}", description.name, name_swizzle); | ||
| 824 | } | ||
| 825 | for (std::size_t i = 0; i < num_components; ++i) { | ||
| 826 | const u8 offset = static_cast<u8>(location * 4 + element + i); | ||
| 827 | varying_description.insert({offset, description}); | ||
| 828 | } | ||
| 829 | |||
| 830 | code.AddLine("layout (location = {}, component = {}{}) out {} {};", location, element, | ||
| 831 | xfb, type, description.name); | ||
| 832 | |||
| 833 | element = static_cast<u8>(static_cast<std::size_t>(element) + num_components); | ||
| 834 | } | ||
| 632 | } | 835 | } |
| 633 | 836 | ||
| 634 | void DeclareConstantBuffers() { | 837 | void DeclareConstantBuffers() { |
| @@ -925,7 +1128,8 @@ private: | |||
| 925 | // TODO(Rodrigo): Guard geometry inputs against out of bound reads. Some games | 1128 | // TODO(Rodrigo): Guard geometry inputs against out of bound reads. Some games |
| 926 | // set an 0x80000000 index for those and the shader fails to build. Find out why | 1129 | // set an 0x80000000 index for those and the shader fails to build. Find out why |
| 927 | // this happens and what's its intent. | 1130 | // this happens and what's its intent. |
| 928 | return fmt::format("gs_{}[{} % MAX_VERTEX_INPUT]", name, Visit(buffer).AsUint()); | 1131 | return fmt::format("gs_{}[{} % {}]", name, Visit(buffer).AsUint(), |
| 1132 | max_input_vertices.value()); | ||
| 929 | } | 1133 | } |
| 930 | return std::string(name); | 1134 | return std::string(name); |
| 931 | }; | 1135 | }; |
| @@ -943,6 +1147,10 @@ private: | |||
| 943 | default: | 1147 | default: |
| 944 | UNREACHABLE(); | 1148 | UNREACHABLE(); |
| 945 | } | 1149 | } |
| 1150 | case Attribute::Index::FrontColor: | ||
| 1151 | return {"gl_Color"s + GetSwizzle(element), Type::Float}; | ||
| 1152 | case Attribute::Index::FrontSecondaryColor: | ||
| 1153 | return {"gl_SecondaryColor"s + GetSwizzle(element), Type::Float}; | ||
| 946 | case Attribute::Index::PointCoord: | 1154 | case Attribute::Index::PointCoord: |
| 947 | switch (element) { | 1155 | switch (element) { |
| 948 | case 0: | 1156 | case 0: |
| @@ -959,7 +1167,7 @@ private: | |||
| 959 | // TODO(Subv): Find out what the values are for the first two elements when inside a | 1167 | // TODO(Subv): Find out what the values are for the first two elements when inside a |
| 960 | // vertex shader, and what's the value of the fourth element when inside a Tess Eval | 1168 | // vertex shader, and what's the value of the fourth element when inside a Tess Eval |
| 961 | // shader. | 1169 | // shader. |
| 962 | ASSERT(IsVertexShader(stage)); | 1170 | ASSERT(stage == ShaderType::Vertex); |
| 963 | switch (element) { | 1171 | switch (element) { |
| 964 | case 2: | 1172 | case 2: |
| 965 | // Config pack's first value is instance_id. | 1173 | // Config pack's first value is instance_id. |
| @@ -980,7 +1188,13 @@ private: | |||
| 980 | return {"0", Type::Int}; | 1188 | return {"0", Type::Int}; |
| 981 | default: | 1189 | default: |
| 982 | if (IsGenericAttribute(attribute)) { | 1190 | if (IsGenericAttribute(attribute)) { |
| 983 | return {GeometryPass(GetInputAttribute(attribute)) + GetSwizzle(element), | 1191 | return {GeometryPass(GetGenericInputAttribute(attribute)) + GetSwizzle(element), |
| 1192 | Type::Float}; | ||
| 1193 | } | ||
| 1194 | if (IsLegacyTexCoord(attribute)) { | ||
| 1195 | UNIMPLEMENTED_IF(stage == ShaderType::Geometry); | ||
| 1196 | return {fmt::format("gl_TexCoord[{}]{}", GetLegacyTexCoordIndex(attribute), | ||
| 1197 | GetSwizzle(element)), | ||
| 984 | Type::Float}; | 1198 | Type::Float}; |
| 985 | } | 1199 | } |
| 986 | break; | 1200 | break; |
| @@ -1021,21 +1235,22 @@ private: | |||
| 1021 | } | 1235 | } |
| 1022 | 1236 | ||
| 1023 | std::optional<Expression> GetOutputAttribute(const AbufNode* abuf) { | 1237 | std::optional<Expression> GetOutputAttribute(const AbufNode* abuf) { |
| 1238 | const u32 element = abuf->GetElement(); | ||
| 1024 | switch (const auto attribute = abuf->GetIndex()) { | 1239 | switch (const auto attribute = abuf->GetIndex()) { |
| 1025 | case Attribute::Index::Position: | 1240 | case Attribute::Index::Position: |
| 1026 | return {{"gl_Position"s + GetSwizzle(abuf->GetElement()), Type::Float}}; | 1241 | return {{"gl_Position"s + GetSwizzle(element), Type::Float}}; |
| 1027 | case Attribute::Index::LayerViewportPointSize: | 1242 | case Attribute::Index::LayerViewportPointSize: |
| 1028 | switch (abuf->GetElement()) { | 1243 | switch (element) { |
| 1029 | case 0: | 1244 | case 0: |
| 1030 | UNIMPLEMENTED(); | 1245 | UNIMPLEMENTED(); |
| 1031 | return {}; | 1246 | return {}; |
| 1032 | case 1: | 1247 | case 1: |
| 1033 | if (IsVertexShader(stage) && !device.HasVertexViewportLayer()) { | 1248 | if (stage == ShaderType::Vertex && !device.HasVertexViewportLayer()) { |
| 1034 | return {}; | 1249 | return {}; |
| 1035 | } | 1250 | } |
| 1036 | return {{"gl_Layer", Type::Int}}; | 1251 | return {{"gl_Layer", Type::Int}}; |
| 1037 | case 2: | 1252 | case 2: |
| 1038 | if (IsVertexShader(stage) && !device.HasVertexViewportLayer()) { | 1253 | if (stage == ShaderType::Vertex && !device.HasVertexViewportLayer()) { |
| 1039 | return {}; | 1254 | return {}; |
| 1040 | } | 1255 | } |
| 1041 | return {{"gl_ViewportIndex", Type::Int}}; | 1256 | return {{"gl_ViewportIndex", Type::Int}}; |
| @@ -1043,14 +1258,26 @@ private: | |||
| 1043 | return {{"gl_PointSize", Type::Float}}; | 1258 | return {{"gl_PointSize", Type::Float}}; |
| 1044 | } | 1259 | } |
| 1045 | return {}; | 1260 | return {}; |
| 1261 | case Attribute::Index::FrontColor: | ||
| 1262 | return {{"gl_FrontColor"s + GetSwizzle(element), Type::Float}}; | ||
| 1263 | case Attribute::Index::FrontSecondaryColor: | ||
| 1264 | return {{"gl_FrontSecondaryColor"s + GetSwizzle(element), Type::Float}}; | ||
| 1265 | case Attribute::Index::BackColor: | ||
| 1266 | return {{"gl_BackColor"s + GetSwizzle(element), Type::Float}}; | ||
| 1267 | case Attribute::Index::BackSecondaryColor: | ||
| 1268 | return {{"gl_BackSecondaryColor"s + GetSwizzle(element), Type::Float}}; | ||
| 1046 | case Attribute::Index::ClipDistances0123: | 1269 | case Attribute::Index::ClipDistances0123: |
| 1047 | return {{fmt::format("gl_ClipDistance[{}]", abuf->GetElement()), Type::Float}}; | 1270 | return {{fmt::format("gl_ClipDistance[{}]", element), Type::Float}}; |
| 1048 | case Attribute::Index::ClipDistances4567: | 1271 | case Attribute::Index::ClipDistances4567: |
| 1049 | return {{fmt::format("gl_ClipDistance[{}]", abuf->GetElement() + 4), Type::Float}}; | 1272 | return {{fmt::format("gl_ClipDistance[{}]", element + 4), Type::Float}}; |
| 1050 | default: | 1273 | default: |
| 1051 | if (IsGenericAttribute(attribute)) { | 1274 | if (IsGenericAttribute(attribute)) { |
| 1052 | return { | 1275 | return {{GetGenericOutputAttribute(attribute, element), Type::Float}}; |
| 1053 | {GetOutputAttribute(attribute) + GetSwizzle(abuf->GetElement()), Type::Float}}; | 1276 | } |
| 1277 | if (IsLegacyTexCoord(attribute)) { | ||
| 1278 | return {{fmt::format("gl_TexCoord[{}]{}", GetLegacyTexCoordIndex(attribute), | ||
| 1279 | GetSwizzle(element)), | ||
| 1280 | Type::Float}}; | ||
| 1054 | } | 1281 | } |
| 1055 | UNIMPLEMENTED_MSG("Unhandled output attribute: {}", static_cast<u32>(attribute)); | 1282 | UNIMPLEMENTED_MSG("Unhandled output attribute: {}", static_cast<u32>(attribute)); |
| 1056 | return {}; | 1283 | return {}; |
| @@ -1822,16 +2049,19 @@ private: | |||
| 1822 | expr += GetSampler(meta->sampler); | 2049 | expr += GetSampler(meta->sampler); |
| 1823 | expr += ", "; | 2050 | expr += ", "; |
| 1824 | 2051 | ||
| 1825 | expr += constructors.at(operation.GetOperandsCount() - 1); | 2052 | expr += constructors.at(operation.GetOperandsCount() + (meta->array ? 1 : 0) - 1); |
| 1826 | expr += '('; | 2053 | expr += '('; |
| 1827 | for (std::size_t i = 0; i < count; ++i) { | 2054 | for (std::size_t i = 0; i < count; ++i) { |
| 1828 | expr += VisitOperand(operation, i).AsInt(); | 2055 | if (i > 0) { |
| 1829 | const std::size_t next = i + 1; | ||
| 1830 | if (next == count) | ||
| 1831 | expr += ')'; | ||
| 1832 | else if (next < count) | ||
| 1833 | expr += ", "; | 2056 | expr += ", "; |
| 2057 | } | ||
| 2058 | expr += VisitOperand(operation, i).AsInt(); | ||
| 2059 | } | ||
| 2060 | if (meta->array) { | ||
| 2061 | expr += ", "; | ||
| 2062 | expr += Visit(meta->array).AsInt(); | ||
| 1834 | } | 2063 | } |
| 2064 | expr += ')'; | ||
| 1835 | 2065 | ||
| 1836 | if (meta->lod && !meta->sampler.IsBuffer()) { | 2066 | if (meta->lod && !meta->sampler.IsBuffer()) { |
| 1837 | expr += ", "; | 2067 | expr += ", "; |
| @@ -1945,7 +2175,7 @@ private: | |||
| 1945 | // TODO(Subv): Figure out how dual-source blending is configured in the Switch. | 2175 | // TODO(Subv): Figure out how dual-source blending is configured in the Switch. |
| 1946 | for (u32 component = 0; component < 4; ++component) { | 2176 | for (u32 component = 0; component < 4; ++component) { |
| 1947 | if (header.ps.IsColorComponentOutputEnabled(render_target, component)) { | 2177 | if (header.ps.IsColorComponentOutputEnabled(render_target, component)) { |
| 1948 | code.AddLine("FragColor{}[{}] = {};", render_target, component, | 2178 | code.AddLine("frag_color{}{} = {};", render_target, GetColorSwizzle(component), |
| 1949 | SafeGetRegister(current_reg).AsFloat()); | 2179 | SafeGetRegister(current_reg).AsFloat()); |
| 1950 | ++current_reg; | 2180 | ++current_reg; |
| 1951 | } | 2181 | } |
| @@ -2261,27 +2491,34 @@ private: | |||
| 2261 | static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount)); | 2491 | static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount)); |
| 2262 | 2492 | ||
| 2263 | std::string GetRegister(u32 index) const { | 2493 | std::string GetRegister(u32 index) const { |
| 2264 | return GetDeclarationWithSuffix(index, "gpr"); | 2494 | return AppendSuffix(index, "gpr"); |
| 2265 | } | 2495 | } |
| 2266 | 2496 | ||
| 2267 | std::string GetCustomVariable(u32 index) const { | 2497 | std::string GetCustomVariable(u32 index) const { |
| 2268 | return GetDeclarationWithSuffix(index, "custom_var"); | 2498 | return AppendSuffix(index, "custom_var"); |
| 2269 | } | 2499 | } |
| 2270 | 2500 | ||
| 2271 | std::string GetPredicate(Tegra::Shader::Pred pred) const { | 2501 | std::string GetPredicate(Tegra::Shader::Pred pred) const { |
| 2272 | return GetDeclarationWithSuffix(static_cast<u32>(pred), "pred"); | 2502 | return AppendSuffix(static_cast<u32>(pred), "pred"); |
| 2273 | } | 2503 | } |
| 2274 | 2504 | ||
| 2275 | std::string GetInputAttribute(Attribute::Index attribute) const { | 2505 | std::string GetGenericInputAttribute(Attribute::Index attribute) const { |
| 2276 | return GetDeclarationWithSuffix(GetGenericAttributeIndex(attribute), "input_attr"); | 2506 | return AppendSuffix(GetGenericAttributeIndex(attribute), INPUT_ATTRIBUTE_NAME); |
| 2277 | } | 2507 | } |
| 2278 | 2508 | ||
| 2279 | std::string GetOutputAttribute(Attribute::Index attribute) const { | 2509 | std::unordered_map<u8, GenericVaryingDescription> varying_description; |
| 2280 | return GetDeclarationWithSuffix(GetGenericAttributeIndex(attribute), "output_attr"); | 2510 | |
| 2511 | std::string GetGenericOutputAttribute(Attribute::Index attribute, std::size_t element) const { | ||
| 2512 | const u8 offset = static_cast<u8>(GetGenericAttributeIndex(attribute) * 4 + element); | ||
| 2513 | const auto& description = varying_description.at(offset); | ||
| 2514 | if (description.is_scalar) { | ||
| 2515 | return description.name; | ||
| 2516 | } | ||
| 2517 | return fmt::format("{}[{}]", description.name, element - description.first_element); | ||
| 2281 | } | 2518 | } |
| 2282 | 2519 | ||
| 2283 | std::string GetConstBuffer(u32 index) const { | 2520 | std::string GetConstBuffer(u32 index) const { |
| 2284 | return GetDeclarationWithSuffix(index, "cbuf"); | 2521 | return AppendSuffix(index, "cbuf"); |
| 2285 | } | 2522 | } |
| 2286 | 2523 | ||
| 2287 | std::string GetGlobalMemory(const GlobalMemoryBase& descriptor) const { | 2524 | std::string GetGlobalMemory(const GlobalMemoryBase& descriptor) const { |
| @@ -2294,11 +2531,15 @@ private: | |||
| 2294 | } | 2531 | } |
| 2295 | 2532 | ||
| 2296 | std::string GetConstBufferBlock(u32 index) const { | 2533 | std::string GetConstBufferBlock(u32 index) const { |
| 2297 | return GetDeclarationWithSuffix(index, "cbuf_block"); | 2534 | return AppendSuffix(index, "cbuf_block"); |
| 2298 | } | 2535 | } |
| 2299 | 2536 | ||
| 2300 | std::string GetLocalMemory() const { | 2537 | std::string GetLocalMemory() const { |
| 2301 | return "lmem_" + suffix; | 2538 | if (suffix.empty()) { |
| 2539 | return "lmem"; | ||
| 2540 | } else { | ||
| 2541 | return "lmem_" + std::string{suffix}; | ||
| 2542 | } | ||
| 2302 | } | 2543 | } |
| 2303 | 2544 | ||
| 2304 | std::string GetInternalFlag(InternalFlag flag) const { | 2545 | std::string GetInternalFlag(InternalFlag flag) const { |
| @@ -2307,23 +2548,31 @@ private: | |||
| 2307 | const auto index = static_cast<u32>(flag); | 2548 | const auto index = static_cast<u32>(flag); |
| 2308 | ASSERT(index < static_cast<u32>(InternalFlag::Amount)); | 2549 | ASSERT(index < static_cast<u32>(InternalFlag::Amount)); |
| 2309 | 2550 | ||
| 2310 | return fmt::format("{}_{}", InternalFlagNames[index], suffix); | 2551 | if (suffix.empty()) { |
| 2552 | return InternalFlagNames[index]; | ||
| 2553 | } else { | ||
| 2554 | return fmt::format("{}_{}", InternalFlagNames[index], suffix); | ||
| 2555 | } | ||
| 2311 | } | 2556 | } |
| 2312 | 2557 | ||
| 2313 | std::string GetSampler(const Sampler& sampler) const { | 2558 | std::string GetSampler(const Sampler& sampler) const { |
| 2314 | return GetDeclarationWithSuffix(static_cast<u32>(sampler.GetIndex()), "sampler"); | 2559 | return AppendSuffix(static_cast<u32>(sampler.GetIndex()), "sampler"); |
| 2315 | } | 2560 | } |
| 2316 | 2561 | ||
| 2317 | std::string GetImage(const Image& image) const { | 2562 | std::string GetImage(const Image& image) const { |
| 2318 | return GetDeclarationWithSuffix(static_cast<u32>(image.GetIndex()), "image"); | 2563 | return AppendSuffix(static_cast<u32>(image.GetIndex()), "image"); |
| 2319 | } | 2564 | } |
| 2320 | 2565 | ||
| 2321 | std::string GetDeclarationWithSuffix(u32 index, std::string_view name) const { | 2566 | std::string AppendSuffix(u32 index, std::string_view name) const { |
| 2322 | return fmt::format("{}_{}_{}", name, index, suffix); | 2567 | if (suffix.empty()) { |
| 2568 | return fmt::format("{}{}", name, index); | ||
| 2569 | } else { | ||
| 2570 | return fmt::format("{}{}_{}", name, index, suffix); | ||
| 2571 | } | ||
| 2323 | } | 2572 | } |
| 2324 | 2573 | ||
| 2325 | u32 GetNumPhysicalInputAttributes() const { | 2574 | u32 GetNumPhysicalInputAttributes() const { |
| 2326 | return IsVertexShader(stage) ? GetNumPhysicalAttributes() : GetNumPhysicalVaryings(); | 2575 | return stage == ShaderType::Vertex ? GetNumPhysicalAttributes() : GetNumPhysicalVaryings(); |
| 2327 | } | 2576 | } |
| 2328 | 2577 | ||
| 2329 | u32 GetNumPhysicalAttributes() const { | 2578 | u32 GetNumPhysicalAttributes() const { |
| @@ -2334,17 +2583,31 @@ private: | |||
| 2334 | return std::min<u32>(device.GetMaxVaryings(), Maxwell::NumVaryings); | 2583 | return std::min<u32>(device.GetMaxVaryings(), Maxwell::NumVaryings); |
| 2335 | } | 2584 | } |
| 2336 | 2585 | ||
| 2586 | bool IsRenderTargetEnabled(u32 render_target) const { | ||
| 2587 | for (u32 component = 0; component < 4; ++component) { | ||
| 2588 | if (header.ps.IsColorComponentOutputEnabled(render_target, component)) { | ||
| 2589 | return true; | ||
| 2590 | } | ||
| 2591 | } | ||
| 2592 | return false; | ||
| 2593 | } | ||
| 2594 | |||
| 2337 | const Device& device; | 2595 | const Device& device; |
| 2338 | const ShaderIR& ir; | 2596 | const ShaderIR& ir; |
| 2597 | const Registry& registry; | ||
| 2339 | const ShaderType stage; | 2598 | const ShaderType stage; |
| 2340 | const std::string suffix; | 2599 | const std::string_view identifier; |
| 2600 | const std::string_view suffix; | ||
| 2341 | const Header header; | 2601 | const Header header; |
| 2602 | std::unordered_map<u8, VaryingTFB> transform_feedback; | ||
| 2342 | 2603 | ||
| 2343 | ShaderWriter code; | 2604 | ShaderWriter code; |
| 2605 | |||
| 2606 | std::optional<u32> max_input_vertices; | ||
| 2344 | }; | 2607 | }; |
| 2345 | 2608 | ||
| 2346 | std::string GetFlowVariable(u32 i) { | 2609 | std::string GetFlowVariable(u32 index) { |
| 2347 | return fmt::format("flow_var_{}", i); | 2610 | return fmt::format("flow_var{}", index); |
| 2348 | } | 2611 | } |
| 2349 | 2612 | ||
| 2350 | class ExprDecompiler { | 2613 | class ExprDecompiler { |
| @@ -2531,7 +2794,7 @@ void GLSLDecompiler::DecompileAST() { | |||
| 2531 | 2794 | ||
| 2532 | } // Anonymous namespace | 2795 | } // Anonymous namespace |
| 2533 | 2796 | ||
| 2534 | ShaderEntries GetEntries(const VideoCommon::Shader::ShaderIR& ir) { | 2797 | ShaderEntries MakeEntries(const VideoCommon::Shader::ShaderIR& ir) { |
| 2535 | ShaderEntries entries; | 2798 | ShaderEntries entries; |
| 2536 | for (const auto& cbuf : ir.GetConstantBuffers()) { | 2799 | for (const auto& cbuf : ir.GetConstantBuffers()) { |
| 2537 | entries.const_buffers.emplace_back(cbuf.second.GetMaxOffset(), cbuf.second.IsIndirect(), | 2800 | entries.const_buffers.emplace_back(cbuf.second.GetMaxOffset(), cbuf.second.IsIndirect(), |
| @@ -2547,33 +2810,20 @@ ShaderEntries GetEntries(const VideoCommon::Shader::ShaderIR& ir) { | |||
| 2547 | for (const auto& image : ir.GetImages()) { | 2810 | for (const auto& image : ir.GetImages()) { |
| 2548 | entries.images.emplace_back(image); | 2811 | entries.images.emplace_back(image); |
| 2549 | } | 2812 | } |
| 2550 | entries.clip_distances = ir.GetClipDistances(); | 2813 | const auto clip_distances = ir.GetClipDistances(); |
| 2814 | for (std::size_t i = 0; i < std::size(clip_distances); ++i) { | ||
| 2815 | entries.clip_distances = (clip_distances[i] ? 1U : 0U) << i; | ||
| 2816 | } | ||
| 2551 | entries.shader_length = ir.GetLength(); | 2817 | entries.shader_length = ir.GetLength(); |
| 2552 | return entries; | 2818 | return entries; |
| 2553 | } | 2819 | } |
| 2554 | 2820 | ||
| 2555 | std::string GetCommonDeclarations() { | 2821 | std::string DecompileShader(const Device& device, const ShaderIR& ir, const Registry& registry, |
| 2556 | return R"(#define ftoi floatBitsToInt | 2822 | ShaderType stage, std::string_view identifier, |
| 2557 | #define ftou floatBitsToUint | 2823 | std::string_view suffix) { |
| 2558 | #define itof intBitsToFloat | 2824 | GLSLDecompiler decompiler(device, ir, registry, stage, identifier, suffix); |
| 2559 | #define utof uintBitsToFloat | ||
| 2560 | |||
| 2561 | bvec2 HalfFloatNanComparison(bvec2 comparison, vec2 pair1, vec2 pair2) { | ||
| 2562 | bvec2 is_nan1 = isnan(pair1); | ||
| 2563 | bvec2 is_nan2 = isnan(pair2); | ||
| 2564 | return bvec2(comparison.x || is_nan1.x || is_nan2.x, comparison.y || is_nan1.y || is_nan2.y); | ||
| 2565 | } | ||
| 2566 | |||
| 2567 | const float fswzadd_modifiers_a[] = float[4](-1.0f, 1.0f, -1.0f, 0.0f ); | ||
| 2568 | const float fswzadd_modifiers_b[] = float[4](-1.0f, -1.0f, 1.0f, -1.0f ); | ||
| 2569 | )"; | ||
| 2570 | } | ||
| 2571 | |||
| 2572 | std::string Decompile(const Device& device, const ShaderIR& ir, ShaderType stage, | ||
| 2573 | const std::string& suffix) { | ||
| 2574 | GLSLDecompiler decompiler(device, ir, stage, suffix); | ||
| 2575 | decompiler.Decompile(); | 2825 | decompiler.Decompile(); |
| 2576 | return decompiler.GetResult(); | 2826 | return decompiler.GetResult(); |
| 2577 | } | 2827 | } |
| 2578 | 2828 | ||
| 2579 | } // namespace OpenGL::GLShader | 2829 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h index 7876f48d6..e7dbd810c 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.h +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h | |||
| @@ -6,22 +6,18 @@ | |||
| 6 | 6 | ||
| 7 | #include <array> | 7 | #include <array> |
| 8 | #include <string> | 8 | #include <string> |
| 9 | #include <string_view> | ||
| 9 | #include <utility> | 10 | #include <utility> |
| 10 | #include <vector> | 11 | #include <vector> |
| 11 | #include "common/common_types.h" | 12 | #include "common/common_types.h" |
| 12 | #include "video_core/engines/maxwell_3d.h" | 13 | #include "video_core/engines/maxwell_3d.h" |
| 13 | #include "video_core/engines/shader_type.h" | 14 | #include "video_core/engines/shader_type.h" |
| 15 | #include "video_core/shader/registry.h" | ||
| 14 | #include "video_core/shader/shader_ir.h" | 16 | #include "video_core/shader/shader_ir.h" |
| 15 | 17 | ||
| 16 | namespace VideoCommon::Shader { | ||
| 17 | class ShaderIR; | ||
| 18 | } | ||
| 19 | |||
| 20 | namespace OpenGL { | 18 | namespace OpenGL { |
| 21 | class Device; | ||
| 22 | } | ||
| 23 | 19 | ||
| 24 | namespace OpenGL::GLShader { | 20 | class Device; |
| 25 | 21 | ||
| 26 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | 22 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; |
| 27 | using SamplerEntry = VideoCommon::Shader::Sampler; | 23 | using SamplerEntry = VideoCommon::Shader::Sampler; |
| @@ -74,15 +70,15 @@ struct ShaderEntries { | |||
| 74 | std::vector<GlobalMemoryEntry> global_memory_entries; | 70 | std::vector<GlobalMemoryEntry> global_memory_entries; |
| 75 | std::vector<SamplerEntry> samplers; | 71 | std::vector<SamplerEntry> samplers; |
| 76 | std::vector<ImageEntry> images; | 72 | std::vector<ImageEntry> images; |
| 77 | std::array<bool, Maxwell::NumClipDistances> clip_distances{}; | 73 | u32 clip_distances{}; |
| 78 | std::size_t shader_length{}; | 74 | std::size_t shader_length{}; |
| 79 | }; | 75 | }; |
| 80 | 76 | ||
| 81 | ShaderEntries GetEntries(const VideoCommon::Shader::ShaderIR& ir); | 77 | ShaderEntries MakeEntries(const VideoCommon::Shader::ShaderIR& ir); |
| 82 | |||
| 83 | std::string GetCommonDeclarations(); | ||
| 84 | 78 | ||
| 85 | std::string Decompile(const Device& device, const VideoCommon::Shader::ShaderIR& ir, | 79 | std::string DecompileShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir, |
| 86 | Tegra::Engines::ShaderType stage, const std::string& suffix); | 80 | const VideoCommon::Shader::Registry& registry, |
| 81 | Tegra::Engines::ShaderType stage, std::string_view identifier, | ||
| 82 | std::string_view suffix = {}); | ||
| 87 | 83 | ||
| 88 | } // namespace OpenGL::GLShader | 84 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp index 1fc204f6f..9e95a122b 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp | |||
| @@ -31,32 +31,24 @@ namespace { | |||
| 31 | 31 | ||
| 32 | using ShaderCacheVersionHash = std::array<u8, 64>; | 32 | using ShaderCacheVersionHash = std::array<u8, 64>; |
| 33 | 33 | ||
| 34 | enum class TransferableEntryKind : u32 { | ||
| 35 | Raw, | ||
| 36 | Usage, | ||
| 37 | }; | ||
| 38 | |||
| 39 | struct ConstBufferKey { | 34 | struct ConstBufferKey { |
| 40 | u32 cbuf{}; | 35 | u32 cbuf = 0; |
| 41 | u32 offset{}; | 36 | u32 offset = 0; |
| 42 | u32 value{}; | 37 | u32 value = 0; |
| 43 | }; | 38 | }; |
| 44 | 39 | ||
| 45 | struct BoundSamplerKey { | 40 | struct BoundSamplerKey { |
| 46 | u32 offset{}; | 41 | u32 offset = 0; |
| 47 | Tegra::Engines::SamplerDescriptor sampler{}; | 42 | Tegra::Engines::SamplerDescriptor sampler; |
| 48 | }; | 43 | }; |
| 49 | 44 | ||
| 50 | struct BindlessSamplerKey { | 45 | struct BindlessSamplerKey { |
| 51 | u32 cbuf{}; | 46 | u32 cbuf = 0; |
| 52 | u32 offset{}; | 47 | u32 offset = 0; |
| 53 | Tegra::Engines::SamplerDescriptor sampler{}; | 48 | Tegra::Engines::SamplerDescriptor sampler; |
| 54 | }; | 49 | }; |
| 55 | 50 | ||
| 56 | constexpr u32 NativeVersion = 12; | 51 | constexpr u32 NativeVersion = 20; |
| 57 | |||
| 58 | // Making sure sizes doesn't change by accident | ||
| 59 | static_assert(sizeof(ProgramVariant) == 20); | ||
| 60 | 52 | ||
| 61 | ShaderCacheVersionHash GetShaderCacheVersionHash() { | 53 | ShaderCacheVersionHash GetShaderCacheVersionHash() { |
| 62 | ShaderCacheVersionHash hash{}; | 54 | ShaderCacheVersionHash hash{}; |
| @@ -67,61 +59,124 @@ ShaderCacheVersionHash GetShaderCacheVersionHash() { | |||
| 67 | 59 | ||
| 68 | } // Anonymous namespace | 60 | } // Anonymous namespace |
| 69 | 61 | ||
| 70 | ShaderDiskCacheRaw::ShaderDiskCacheRaw(u64 unique_identifier, ShaderType type, ProgramCode code, | 62 | ShaderDiskCacheEntry::ShaderDiskCacheEntry() = default; |
| 71 | ProgramCode code_b) | ||
| 72 | : unique_identifier{unique_identifier}, type{type}, code{std::move(code)}, code_b{std::move( | ||
| 73 | code_b)} {} | ||
| 74 | 63 | ||
| 75 | ShaderDiskCacheRaw::ShaderDiskCacheRaw() = default; | 64 | ShaderDiskCacheEntry::~ShaderDiskCacheEntry() = default; |
| 76 | 65 | ||
| 77 | ShaderDiskCacheRaw::~ShaderDiskCacheRaw() = default; | 66 | bool ShaderDiskCacheEntry::Load(FileUtil::IOFile& file) { |
| 78 | 67 | if (file.ReadBytes(&type, sizeof(u32)) != sizeof(u32)) { | |
| 79 | bool ShaderDiskCacheRaw::Load(FileUtil::IOFile& file) { | ||
| 80 | if (file.ReadBytes(&unique_identifier, sizeof(u64)) != sizeof(u64) || | ||
| 81 | file.ReadBytes(&type, sizeof(u32)) != sizeof(u32)) { | ||
| 82 | return false; | 68 | return false; |
| 83 | } | 69 | } |
| 84 | u32 code_size{}; | 70 | u32 code_size; |
| 85 | u32 code_size_b{}; | 71 | u32 code_size_b; |
| 86 | if (file.ReadBytes(&code_size, sizeof(u32)) != sizeof(u32) || | 72 | if (file.ReadBytes(&code_size, sizeof(u32)) != sizeof(u32) || |
| 87 | file.ReadBytes(&code_size_b, sizeof(u32)) != sizeof(u32)) { | 73 | file.ReadBytes(&code_size_b, sizeof(u32)) != sizeof(u32)) { |
| 88 | return false; | 74 | return false; |
| 89 | } | 75 | } |
| 90 | |||
| 91 | code.resize(code_size); | 76 | code.resize(code_size); |
| 92 | code_b.resize(code_size_b); | 77 | code_b.resize(code_size_b); |
| 93 | 78 | ||
| 94 | if (file.ReadArray(code.data(), code_size) != code_size) | 79 | if (file.ReadArray(code.data(), code_size) != code_size) { |
| 95 | return false; | 80 | return false; |
| 96 | 81 | } | |
| 97 | if (HasProgramA() && file.ReadArray(code_b.data(), code_size_b) != code_size_b) { | 82 | if (HasProgramA() && file.ReadArray(code_b.data(), code_size_b) != code_size_b) { |
| 98 | return false; | 83 | return false; |
| 99 | } | 84 | } |
| 85 | |||
| 86 | u8 is_texture_handler_size_known; | ||
| 87 | u32 texture_handler_size_value; | ||
| 88 | u32 num_keys; | ||
| 89 | u32 num_bound_samplers; | ||
| 90 | u32 num_bindless_samplers; | ||
| 91 | if (file.ReadArray(&unique_identifier, 1) != 1 || file.ReadArray(&bound_buffer, 1) != 1 || | ||
| 92 | file.ReadArray(&is_texture_handler_size_known, 1) != 1 || | ||
| 93 | file.ReadArray(&texture_handler_size_value, 1) != 1 || | ||
| 94 | file.ReadArray(&graphics_info, 1) != 1 || file.ReadArray(&compute_info, 1) != 1 || | ||
| 95 | file.ReadArray(&num_keys, 1) != 1 || file.ReadArray(&num_bound_samplers, 1) != 1 || | ||
| 96 | file.ReadArray(&num_bindless_samplers, 1) != 1) { | ||
| 97 | return false; | ||
| 98 | } | ||
| 99 | if (is_texture_handler_size_known) { | ||
| 100 | texture_handler_size = texture_handler_size_value; | ||
| 101 | } | ||
| 102 | |||
| 103 | std::vector<ConstBufferKey> flat_keys(num_keys); | ||
| 104 | std::vector<BoundSamplerKey> flat_bound_samplers(num_bound_samplers); | ||
| 105 | std::vector<BindlessSamplerKey> flat_bindless_samplers(num_bindless_samplers); | ||
| 106 | if (file.ReadArray(flat_keys.data(), flat_keys.size()) != flat_keys.size() || | ||
| 107 | file.ReadArray(flat_bound_samplers.data(), flat_bound_samplers.size()) != | ||
| 108 | flat_bound_samplers.size() || | ||
| 109 | file.ReadArray(flat_bindless_samplers.data(), flat_bindless_samplers.size()) != | ||
| 110 | flat_bindless_samplers.size()) { | ||
| 111 | return false; | ||
| 112 | } | ||
| 113 | for (const auto& key : flat_keys) { | ||
| 114 | keys.insert({{key.cbuf, key.offset}, key.value}); | ||
| 115 | } | ||
| 116 | for (const auto& key : flat_bound_samplers) { | ||
| 117 | bound_samplers.emplace(key.offset, key.sampler); | ||
| 118 | } | ||
| 119 | for (const auto& key : flat_bindless_samplers) { | ||
| 120 | bindless_samplers.insert({{key.cbuf, key.offset}, key.sampler}); | ||
| 121 | } | ||
| 122 | |||
| 100 | return true; | 123 | return true; |
| 101 | } | 124 | } |
| 102 | 125 | ||
| 103 | bool ShaderDiskCacheRaw::Save(FileUtil::IOFile& file) const { | 126 | bool ShaderDiskCacheEntry::Save(FileUtil::IOFile& file) const { |
| 104 | if (file.WriteObject(unique_identifier) != 1 || file.WriteObject(static_cast<u32>(type)) != 1 || | 127 | if (file.WriteObject(static_cast<u32>(type)) != 1 || |
| 105 | file.WriteObject(static_cast<u32>(code.size())) != 1 || | 128 | file.WriteObject(static_cast<u32>(code.size())) != 1 || |
| 106 | file.WriteObject(static_cast<u32>(code_b.size())) != 1) { | 129 | file.WriteObject(static_cast<u32>(code_b.size())) != 1) { |
| 107 | return false; | 130 | return false; |
| 108 | } | 131 | } |
| 109 | 132 | if (file.WriteArray(code.data(), code.size()) != code.size()) { | |
| 110 | if (file.WriteArray(code.data(), code.size()) != code.size()) | ||
| 111 | return false; | 133 | return false; |
| 112 | 134 | } | |
| 113 | if (HasProgramA() && file.WriteArray(code_b.data(), code_b.size()) != code_b.size()) { | 135 | if (HasProgramA() && file.WriteArray(code_b.data(), code_b.size()) != code_b.size()) { |
| 114 | return false; | 136 | return false; |
| 115 | } | 137 | } |
| 116 | return true; | 138 | |
| 139 | if (file.WriteObject(unique_identifier) != 1 || file.WriteObject(bound_buffer) != 1 || | ||
| 140 | file.WriteObject(static_cast<u8>(texture_handler_size.has_value())) != 1 || | ||
| 141 | file.WriteObject(texture_handler_size.value_or(0)) != 1 || | ||
| 142 | file.WriteObject(graphics_info) != 1 || file.WriteObject(compute_info) != 1 || | ||
| 143 | file.WriteObject(static_cast<u32>(keys.size())) != 1 || | ||
| 144 | file.WriteObject(static_cast<u32>(bound_samplers.size())) != 1 || | ||
| 145 | file.WriteObject(static_cast<u32>(bindless_samplers.size())) != 1) { | ||
| 146 | return false; | ||
| 147 | } | ||
| 148 | |||
| 149 | std::vector<ConstBufferKey> flat_keys; | ||
| 150 | flat_keys.reserve(keys.size()); | ||
| 151 | for (const auto& [address, value] : keys) { | ||
| 152 | flat_keys.push_back(ConstBufferKey{address.first, address.second, value}); | ||
| 153 | } | ||
| 154 | |||
| 155 | std::vector<BoundSamplerKey> flat_bound_samplers; | ||
| 156 | flat_bound_samplers.reserve(bound_samplers.size()); | ||
| 157 | for (const auto& [address, sampler] : bound_samplers) { | ||
| 158 | flat_bound_samplers.push_back(BoundSamplerKey{address, sampler}); | ||
| 159 | } | ||
| 160 | |||
| 161 | std::vector<BindlessSamplerKey> flat_bindless_samplers; | ||
| 162 | flat_bindless_samplers.reserve(bindless_samplers.size()); | ||
| 163 | for (const auto& [address, sampler] : bindless_samplers) { | ||
| 164 | flat_bindless_samplers.push_back( | ||
| 165 | BindlessSamplerKey{address.first, address.second, sampler}); | ||
| 166 | } | ||
| 167 | |||
| 168 | return file.WriteArray(flat_keys.data(), flat_keys.size()) == flat_keys.size() && | ||
| 169 | file.WriteArray(flat_bound_samplers.data(), flat_bound_samplers.size()) == | ||
| 170 | flat_bound_samplers.size() && | ||
| 171 | file.WriteArray(flat_bindless_samplers.data(), flat_bindless_samplers.size()) == | ||
| 172 | flat_bindless_samplers.size(); | ||
| 117 | } | 173 | } |
| 118 | 174 | ||
| 119 | ShaderDiskCacheOpenGL::ShaderDiskCacheOpenGL(Core::System& system) : system{system} {} | 175 | ShaderDiskCacheOpenGL::ShaderDiskCacheOpenGL(Core::System& system) : system{system} {} |
| 120 | 176 | ||
| 121 | ShaderDiskCacheOpenGL::~ShaderDiskCacheOpenGL() = default; | 177 | ShaderDiskCacheOpenGL::~ShaderDiskCacheOpenGL() = default; |
| 122 | 178 | ||
| 123 | std::optional<std::pair<std::vector<ShaderDiskCacheRaw>, std::vector<ShaderDiskCacheUsage>>> | 179 | std::optional<std::vector<ShaderDiskCacheEntry>> ShaderDiskCacheOpenGL::LoadTransferable() { |
| 124 | ShaderDiskCacheOpenGL::LoadTransferable() { | ||
| 125 | // Skip games without title id | 180 | // Skip games without title id |
| 126 | const bool has_title_id = system.CurrentProcess()->GetTitleID() != 0; | 181 | const bool has_title_id = system.CurrentProcess()->GetTitleID() != 0; |
| 127 | if (!Settings::values.use_disk_shader_cache || !has_title_id) { | 182 | if (!Settings::values.use_disk_shader_cache || !has_title_id) { |
| @@ -130,17 +185,14 @@ ShaderDiskCacheOpenGL::LoadTransferable() { | |||
| 130 | 185 | ||
| 131 | FileUtil::IOFile file(GetTransferablePath(), "rb"); | 186 | FileUtil::IOFile file(GetTransferablePath(), "rb"); |
| 132 | if (!file.IsOpen()) { | 187 | if (!file.IsOpen()) { |
| 133 | LOG_INFO(Render_OpenGL, "No transferable shader cache found for game with title id={}", | 188 | LOG_INFO(Render_OpenGL, "No transferable shader cache found"); |
| 134 | GetTitleID()); | ||
| 135 | is_usable = true; | 189 | is_usable = true; |
| 136 | return {}; | 190 | return {}; |
| 137 | } | 191 | } |
| 138 | 192 | ||
| 139 | u32 version{}; | 193 | u32 version{}; |
| 140 | if (file.ReadBytes(&version, sizeof(version)) != sizeof(version)) { | 194 | if (file.ReadBytes(&version, sizeof(version)) != sizeof(version)) { |
| 141 | LOG_ERROR(Render_OpenGL, | 195 | LOG_ERROR(Render_OpenGL, "Failed to get transferable cache version, skipping it"); |
| 142 | "Failed to get transferable cache version for title id={}, skipping", | ||
| 143 | GetTitleID()); | ||
| 144 | return {}; | 196 | return {}; |
| 145 | } | 197 | } |
| 146 | 198 | ||
| @@ -158,105 +210,42 @@ ShaderDiskCacheOpenGL::LoadTransferable() { | |||
| 158 | } | 210 | } |
| 159 | 211 | ||
| 160 | // Version is valid, load the shaders | 212 | // Version is valid, load the shaders |
| 161 | constexpr const char error_loading[] = "Failed to load transferable raw entry, skipping"; | 213 | std::vector<ShaderDiskCacheEntry> entries; |
| 162 | std::vector<ShaderDiskCacheRaw> raws; | ||
| 163 | std::vector<ShaderDiskCacheUsage> usages; | ||
| 164 | while (file.Tell() < file.GetSize()) { | 214 | while (file.Tell() < file.GetSize()) { |
| 165 | TransferableEntryKind kind{}; | 215 | ShaderDiskCacheEntry& entry = entries.emplace_back(); |
| 166 | if (file.ReadBytes(&kind, sizeof(u32)) != sizeof(u32)) { | 216 | if (!entry.Load(file)) { |
| 167 | LOG_ERROR(Render_OpenGL, "Failed to read transferable file, skipping"); | 217 | LOG_ERROR(Render_OpenGL, "Failed to load transferable raw entry, skipping"); |
| 168 | return {}; | ||
| 169 | } | ||
| 170 | |||
| 171 | switch (kind) { | ||
| 172 | case TransferableEntryKind::Raw: { | ||
| 173 | ShaderDiskCacheRaw entry; | ||
| 174 | if (!entry.Load(file)) { | ||
| 175 | LOG_ERROR(Render_OpenGL, error_loading); | ||
| 176 | return {}; | ||
| 177 | } | ||
| 178 | transferable.insert({entry.GetUniqueIdentifier(), {}}); | ||
| 179 | raws.push_back(std::move(entry)); | ||
| 180 | break; | ||
| 181 | } | ||
| 182 | case TransferableEntryKind::Usage: { | ||
| 183 | ShaderDiskCacheUsage usage; | ||
| 184 | |||
| 185 | u32 num_keys{}; | ||
| 186 | u32 num_bound_samplers{}; | ||
| 187 | u32 num_bindless_samplers{}; | ||
| 188 | if (file.ReadArray(&usage.unique_identifier, 1) != 1 || | ||
| 189 | file.ReadArray(&usage.variant, 1) != 1 || | ||
| 190 | file.ReadArray(&usage.bound_buffer, 1) != 1 || file.ReadArray(&num_keys, 1) != 1 || | ||
| 191 | file.ReadArray(&num_bound_samplers, 1) != 1 || | ||
| 192 | file.ReadArray(&num_bindless_samplers, 1) != 1) { | ||
| 193 | LOG_ERROR(Render_OpenGL, error_loading); | ||
| 194 | return {}; | ||
| 195 | } | ||
| 196 | |||
| 197 | std::vector<ConstBufferKey> keys(num_keys); | ||
| 198 | std::vector<BoundSamplerKey> bound_samplers(num_bound_samplers); | ||
| 199 | std::vector<BindlessSamplerKey> bindless_samplers(num_bindless_samplers); | ||
| 200 | if (file.ReadArray(keys.data(), keys.size()) != keys.size() || | ||
| 201 | file.ReadArray(bound_samplers.data(), bound_samplers.size()) != | ||
| 202 | bound_samplers.size() || | ||
| 203 | file.ReadArray(bindless_samplers.data(), bindless_samplers.size()) != | ||
| 204 | bindless_samplers.size()) { | ||
| 205 | LOG_ERROR(Render_OpenGL, error_loading); | ||
| 206 | return {}; | ||
| 207 | } | ||
| 208 | for (const auto& key : keys) { | ||
| 209 | usage.keys.insert({{key.cbuf, key.offset}, key.value}); | ||
| 210 | } | ||
| 211 | for (const auto& key : bound_samplers) { | ||
| 212 | usage.bound_samplers.emplace(key.offset, key.sampler); | ||
| 213 | } | ||
| 214 | for (const auto& key : bindless_samplers) { | ||
| 215 | usage.bindless_samplers.insert({{key.cbuf, key.offset}, key.sampler}); | ||
| 216 | } | ||
| 217 | |||
| 218 | usages.push_back(std::move(usage)); | ||
| 219 | break; | ||
| 220 | } | ||
| 221 | default: | ||
| 222 | LOG_ERROR(Render_OpenGL, "Unknown transferable shader cache entry kind={}, skipping", | ||
| 223 | static_cast<u32>(kind)); | ||
| 224 | return {}; | 218 | return {}; |
| 225 | } | 219 | } |
| 226 | } | 220 | } |
| 227 | 221 | ||
| 228 | is_usable = true; | 222 | is_usable = true; |
| 229 | return {{std::move(raws), std::move(usages)}}; | 223 | return {std::move(entries)}; |
| 230 | } | 224 | } |
| 231 | 225 | ||
| 232 | std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump> | 226 | std::vector<ShaderDiskCachePrecompiled> ShaderDiskCacheOpenGL::LoadPrecompiled() { |
| 233 | ShaderDiskCacheOpenGL::LoadPrecompiled() { | ||
| 234 | if (!is_usable) { | 227 | if (!is_usable) { |
| 235 | return {}; | 228 | return {}; |
| 236 | } | 229 | } |
| 237 | 230 | ||
| 238 | std::string path = GetPrecompiledPath(); | 231 | FileUtil::IOFile file(GetPrecompiledPath(), "rb"); |
| 239 | FileUtil::IOFile file(path, "rb"); | ||
| 240 | if (!file.IsOpen()) { | 232 | if (!file.IsOpen()) { |
| 241 | LOG_INFO(Render_OpenGL, "No precompiled shader cache found for game with title id={}", | 233 | LOG_INFO(Render_OpenGL, "No precompiled shader cache found"); |
| 242 | GetTitleID()); | ||
| 243 | return {}; | 234 | return {}; |
| 244 | } | 235 | } |
| 245 | 236 | ||
| 246 | const auto result = LoadPrecompiledFile(file); | 237 | if (const auto result = LoadPrecompiledFile(file)) { |
| 247 | if (!result) { | 238 | return *result; |
| 248 | LOG_INFO(Render_OpenGL, | ||
| 249 | "Failed to load precompiled cache for game with title id={}, removing", | ||
| 250 | GetTitleID()); | ||
| 251 | file.Close(); | ||
| 252 | InvalidatePrecompiled(); | ||
| 253 | return {}; | ||
| 254 | } | 239 | } |
| 255 | return *result; | 240 | |
| 241 | LOG_INFO(Render_OpenGL, "Failed to load precompiled cache"); | ||
| 242 | file.Close(); | ||
| 243 | InvalidatePrecompiled(); | ||
| 244 | return {}; | ||
| 256 | } | 245 | } |
| 257 | 246 | ||
| 258 | std::optional<std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>> | 247 | std::optional<std::vector<ShaderDiskCachePrecompiled>> ShaderDiskCacheOpenGL::LoadPrecompiledFile( |
| 259 | ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) { | 248 | FileUtil::IOFile& file) { |
| 260 | // Read compressed file from disk and decompress to virtual precompiled cache file | 249 | // Read compressed file from disk and decompress to virtual precompiled cache file |
| 261 | std::vector<u8> compressed(file.GetSize()); | 250 | std::vector<u8> compressed(file.GetSize()); |
| 262 | file.ReadBytes(compressed.data(), compressed.size()); | 251 | file.ReadBytes(compressed.data(), compressed.size()); |
| @@ -275,58 +264,22 @@ ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) { | |||
| 275 | return {}; | 264 | return {}; |
| 276 | } | 265 | } |
| 277 | 266 | ||
| 278 | ShaderDumpsMap dumps; | 267 | std::vector<ShaderDiskCachePrecompiled> entries; |
| 279 | while (precompiled_cache_virtual_file_offset < precompiled_cache_virtual_file.GetSize()) { | 268 | while (precompiled_cache_virtual_file_offset < precompiled_cache_virtual_file.GetSize()) { |
| 280 | u32 num_keys{}; | 269 | u32 binary_size; |
| 281 | u32 num_bound_samplers{}; | 270 | auto& entry = entries.emplace_back(); |
| 282 | u32 num_bindless_samplers{}; | 271 | if (!LoadObjectFromPrecompiled(entry.unique_identifier) || |
| 283 | ShaderDiskCacheUsage usage; | 272 | !LoadObjectFromPrecompiled(entry.binary_format) || |
| 284 | if (!LoadObjectFromPrecompiled(usage.unique_identifier) || | 273 | !LoadObjectFromPrecompiled(binary_size)) { |
| 285 | !LoadObjectFromPrecompiled(usage.variant) || | ||
| 286 | !LoadObjectFromPrecompiled(usage.bound_buffer) || | ||
| 287 | !LoadObjectFromPrecompiled(num_keys) || | ||
| 288 | !LoadObjectFromPrecompiled(num_bound_samplers) || | ||
| 289 | !LoadObjectFromPrecompiled(num_bindless_samplers)) { | ||
| 290 | return {}; | ||
| 291 | } | ||
| 292 | std::vector<ConstBufferKey> keys(num_keys); | ||
| 293 | std::vector<BoundSamplerKey> bound_samplers(num_bound_samplers); | ||
| 294 | std::vector<BindlessSamplerKey> bindless_samplers(num_bindless_samplers); | ||
| 295 | if (!LoadArrayFromPrecompiled(keys.data(), keys.size()) || | ||
| 296 | !LoadArrayFromPrecompiled(bound_samplers.data(), bound_samplers.size()) != | ||
| 297 | bound_samplers.size() || | ||
| 298 | !LoadArrayFromPrecompiled(bindless_samplers.data(), bindless_samplers.size()) != | ||
| 299 | bindless_samplers.size()) { | ||
| 300 | return {}; | ||
| 301 | } | ||
| 302 | for (const auto& key : keys) { | ||
| 303 | usage.keys.insert({{key.cbuf, key.offset}, key.value}); | ||
| 304 | } | ||
| 305 | for (const auto& key : bound_samplers) { | ||
| 306 | usage.bound_samplers.emplace(key.offset, key.sampler); | ||
| 307 | } | ||
| 308 | for (const auto& key : bindless_samplers) { | ||
| 309 | usage.bindless_samplers.insert({{key.cbuf, key.offset}, key.sampler}); | ||
| 310 | } | ||
| 311 | |||
| 312 | ShaderDiskCacheDump dump; | ||
| 313 | if (!LoadObjectFromPrecompiled(dump.binary_format)) { | ||
| 314 | return {}; | ||
| 315 | } | ||
| 316 | |||
| 317 | u32 binary_length{}; | ||
| 318 | if (!LoadObjectFromPrecompiled(binary_length)) { | ||
| 319 | return {}; | 274 | return {}; |
| 320 | } | 275 | } |
| 321 | 276 | ||
| 322 | dump.binary.resize(binary_length); | 277 | entry.binary.resize(binary_size); |
| 323 | if (!LoadArrayFromPrecompiled(dump.binary.data(), dump.binary.size())) { | 278 | if (!LoadArrayFromPrecompiled(entry.binary.data(), entry.binary.size())) { |
| 324 | return {}; | 279 | return {}; |
| 325 | } | 280 | } |
| 326 | |||
| 327 | dumps.emplace(std::move(usage), dump); | ||
| 328 | } | 281 | } |
| 329 | return dumps; | 282 | return entries; |
| 330 | } | 283 | } |
| 331 | 284 | ||
| 332 | void ShaderDiskCacheOpenGL::InvalidateTransferable() { | 285 | void ShaderDiskCacheOpenGL::InvalidateTransferable() { |
| @@ -346,13 +299,13 @@ void ShaderDiskCacheOpenGL::InvalidatePrecompiled() { | |||
| 346 | } | 299 | } |
| 347 | } | 300 | } |
| 348 | 301 | ||
| 349 | void ShaderDiskCacheOpenGL::SaveRaw(const ShaderDiskCacheRaw& entry) { | 302 | void ShaderDiskCacheOpenGL::SaveEntry(const ShaderDiskCacheEntry& entry) { |
| 350 | if (!is_usable) { | 303 | if (!is_usable) { |
| 351 | return; | 304 | return; |
| 352 | } | 305 | } |
| 353 | 306 | ||
| 354 | const u64 id = entry.GetUniqueIdentifier(); | 307 | const u64 id = entry.unique_identifier; |
| 355 | if (transferable.find(id) != transferable.end()) { | 308 | if (stored_transferable.find(id) != stored_transferable.end()) { |
| 356 | // The shader already exists | 309 | // The shader already exists |
| 357 | return; | 310 | return; |
| 358 | } | 311 | } |
| @@ -361,71 +314,17 @@ void ShaderDiskCacheOpenGL::SaveRaw(const ShaderDiskCacheRaw& entry) { | |||
| 361 | if (!file.IsOpen()) { | 314 | if (!file.IsOpen()) { |
| 362 | return; | 315 | return; |
| 363 | } | 316 | } |
| 364 | if (file.WriteObject(TransferableEntryKind::Raw) != 1 || !entry.Save(file)) { | 317 | if (!entry.Save(file)) { |
| 365 | LOG_ERROR(Render_OpenGL, "Failed to save raw transferable cache entry, removing"); | 318 | LOG_ERROR(Render_OpenGL, "Failed to save raw transferable cache entry, removing"); |
| 366 | file.Close(); | 319 | file.Close(); |
| 367 | InvalidateTransferable(); | 320 | InvalidateTransferable(); |
| 368 | return; | 321 | return; |
| 369 | } | 322 | } |
| 370 | transferable.insert({id, {}}); | ||
| 371 | } | ||
| 372 | 323 | ||
| 373 | void ShaderDiskCacheOpenGL::SaveUsage(const ShaderDiskCacheUsage& usage) { | 324 | stored_transferable.insert(id); |
| 374 | if (!is_usable) { | ||
| 375 | return; | ||
| 376 | } | ||
| 377 | |||
| 378 | const auto it = transferable.find(usage.unique_identifier); | ||
| 379 | ASSERT_MSG(it != transferable.end(), "Saving shader usage without storing raw previously"); | ||
| 380 | |||
| 381 | auto& usages{it->second}; | ||
| 382 | if (usages.find(usage) != usages.end()) { | ||
| 383 | // Skip this variant since the shader is already stored. | ||
| 384 | return; | ||
| 385 | } | ||
| 386 | usages.insert(usage); | ||
| 387 | |||
| 388 | FileUtil::IOFile file = AppendTransferableFile(); | ||
| 389 | if (!file.IsOpen()) | ||
| 390 | return; | ||
| 391 | const auto Close = [&] { | ||
| 392 | LOG_ERROR(Render_OpenGL, "Failed to save usage transferable cache entry, removing"); | ||
| 393 | file.Close(); | ||
| 394 | InvalidateTransferable(); | ||
| 395 | }; | ||
| 396 | |||
| 397 | if (file.WriteObject(TransferableEntryKind::Usage) != 1 || | ||
| 398 | file.WriteObject(usage.unique_identifier) != 1 || file.WriteObject(usage.variant) != 1 || | ||
| 399 | file.WriteObject(usage.bound_buffer) != 1 || | ||
| 400 | file.WriteObject(static_cast<u32>(usage.keys.size())) != 1 || | ||
| 401 | file.WriteObject(static_cast<u32>(usage.bound_samplers.size())) != 1 || | ||
| 402 | file.WriteObject(static_cast<u32>(usage.bindless_samplers.size())) != 1) { | ||
| 403 | Close(); | ||
| 404 | return; | ||
| 405 | } | ||
| 406 | for (const auto& [pair, value] : usage.keys) { | ||
| 407 | const auto [cbuf, offset] = pair; | ||
| 408 | if (file.WriteObject(ConstBufferKey{cbuf, offset, value}) != 1) { | ||
| 409 | Close(); | ||
| 410 | return; | ||
| 411 | } | ||
| 412 | } | ||
| 413 | for (const auto& [offset, sampler] : usage.bound_samplers) { | ||
| 414 | if (file.WriteObject(BoundSamplerKey{offset, sampler}) != 1) { | ||
| 415 | Close(); | ||
| 416 | return; | ||
| 417 | } | ||
| 418 | } | ||
| 419 | for (const auto& [pair, sampler] : usage.bindless_samplers) { | ||
| 420 | const auto [cbuf, offset] = pair; | ||
| 421 | if (file.WriteObject(BindlessSamplerKey{cbuf, offset, sampler}) != 1) { | ||
| 422 | Close(); | ||
| 423 | return; | ||
| 424 | } | ||
| 425 | } | ||
| 426 | } | 325 | } |
| 427 | 326 | ||
| 428 | void ShaderDiskCacheOpenGL::SaveDump(const ShaderDiskCacheUsage& usage, GLuint program) { | 327 | void ShaderDiskCacheOpenGL::SavePrecompiled(u64 unique_identifier, GLuint program) { |
| 429 | if (!is_usable) { | 328 | if (!is_usable) { |
| 430 | return; | 329 | return; |
| 431 | } | 330 | } |
| @@ -437,51 +336,19 @@ void ShaderDiskCacheOpenGL::SaveDump(const ShaderDiskCacheUsage& usage, GLuint p | |||
| 437 | SavePrecompiledHeaderToVirtualPrecompiledCache(); | 336 | SavePrecompiledHeaderToVirtualPrecompiledCache(); |
| 438 | } | 337 | } |
| 439 | 338 | ||
| 440 | GLint binary_length{}; | 339 | GLint binary_length; |
| 441 | glGetProgramiv(program, GL_PROGRAM_BINARY_LENGTH, &binary_length); | 340 | glGetProgramiv(program, GL_PROGRAM_BINARY_LENGTH, &binary_length); |
| 442 | 341 | ||
| 443 | GLenum binary_format{}; | 342 | GLenum binary_format; |
| 444 | std::vector<u8> binary(binary_length); | 343 | std::vector<u8> binary(binary_length); |
| 445 | glGetProgramBinary(program, binary_length, nullptr, &binary_format, binary.data()); | 344 | glGetProgramBinary(program, binary_length, nullptr, &binary_format, binary.data()); |
| 446 | 345 | ||
| 447 | const auto Close = [&] { | 346 | if (!SaveObjectToPrecompiled(unique_identifier) || !SaveObjectToPrecompiled(binary_format) || |
| 347 | !SaveObjectToPrecompiled(static_cast<u32>(binary.size())) || | ||
| 348 | !SaveArrayToPrecompiled(binary.data(), binary.size())) { | ||
| 448 | LOG_ERROR(Render_OpenGL, "Failed to save binary program file in shader={:016X}, removing", | 349 | LOG_ERROR(Render_OpenGL, "Failed to save binary program file in shader={:016X}, removing", |
| 449 | usage.unique_identifier); | 350 | unique_identifier); |
| 450 | InvalidatePrecompiled(); | 351 | InvalidatePrecompiled(); |
| 451 | }; | ||
| 452 | |||
| 453 | if (!SaveObjectToPrecompiled(usage.unique_identifier) || | ||
| 454 | !SaveObjectToPrecompiled(usage.variant) || !SaveObjectToPrecompiled(usage.bound_buffer) || | ||
| 455 | !SaveObjectToPrecompiled(static_cast<u32>(usage.keys.size())) || | ||
| 456 | !SaveObjectToPrecompiled(static_cast<u32>(usage.bound_samplers.size())) || | ||
| 457 | !SaveObjectToPrecompiled(static_cast<u32>(usage.bindless_samplers.size()))) { | ||
| 458 | Close(); | ||
| 459 | return; | ||
| 460 | } | ||
| 461 | for (const auto& [pair, value] : usage.keys) { | ||
| 462 | const auto [cbuf, offset] = pair; | ||
| 463 | if (SaveObjectToPrecompiled(ConstBufferKey{cbuf, offset, value}) != 1) { | ||
| 464 | Close(); | ||
| 465 | return; | ||
| 466 | } | ||
| 467 | } | ||
| 468 | for (const auto& [offset, sampler] : usage.bound_samplers) { | ||
| 469 | if (SaveObjectToPrecompiled(BoundSamplerKey{offset, sampler}) != 1) { | ||
| 470 | Close(); | ||
| 471 | return; | ||
| 472 | } | ||
| 473 | } | ||
| 474 | for (const auto& [pair, sampler] : usage.bindless_samplers) { | ||
| 475 | const auto [cbuf, offset] = pair; | ||
| 476 | if (SaveObjectToPrecompiled(BindlessSamplerKey{cbuf, offset, sampler}) != 1) { | ||
| 477 | Close(); | ||
| 478 | return; | ||
| 479 | } | ||
| 480 | } | ||
| 481 | if (!SaveObjectToPrecompiled(static_cast<u32>(binary_format)) || | ||
| 482 | !SaveObjectToPrecompiled(static_cast<u32>(binary_length)) || | ||
| 483 | !SaveArrayToPrecompiled(binary.data(), binary.size())) { | ||
| 484 | Close(); | ||
| 485 | } | 352 | } |
| 486 | } | 353 | } |
| 487 | 354 | ||
| @@ -534,7 +401,6 @@ void ShaderDiskCacheOpenGL::SaveVirtualPrecompiledFile() { | |||
| 534 | if (file.WriteBytes(compressed.data(), compressed.size()) != compressed.size()) { | 401 | if (file.WriteBytes(compressed.data(), compressed.size()) != compressed.size()) { |
| 535 | LOG_ERROR(Render_OpenGL, "Failed to write precompiled cache version in path={}", | 402 | LOG_ERROR(Render_OpenGL, "Failed to write precompiled cache version in path={}", |
| 536 | precompiled_path); | 403 | precompiled_path); |
| 537 | return; | ||
| 538 | } | 404 | } |
| 539 | } | 405 | } |
| 540 | 406 | ||
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h index ef2371f6d..d5be52e40 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.h | |||
| @@ -19,8 +19,7 @@ | |||
| 19 | #include "common/common_types.h" | 19 | #include "common/common_types.h" |
| 20 | #include "core/file_sys/vfs_vector.h" | 20 | #include "core/file_sys/vfs_vector.h" |
| 21 | #include "video_core/engines/shader_type.h" | 21 | #include "video_core/engines/shader_type.h" |
| 22 | #include "video_core/renderer_opengl/gl_shader_gen.h" | 22 | #include "video_core/shader/registry.h" |
| 23 | #include "video_core/shader/const_buffer_locker.h" | ||
| 24 | 23 | ||
| 25 | namespace Core { | 24 | namespace Core { |
| 26 | class System; | 25 | class System; |
| @@ -32,139 +31,39 @@ class IOFile; | |||
| 32 | 31 | ||
| 33 | namespace OpenGL { | 32 | namespace OpenGL { |
| 34 | 33 | ||
| 35 | struct ShaderDiskCacheUsage; | ||
| 36 | struct ShaderDiskCacheDump; | ||
| 37 | |||
| 38 | using ProgramCode = std::vector<u64>; | 34 | using ProgramCode = std::vector<u64>; |
| 39 | using ShaderDumpsMap = std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>; | ||
| 40 | |||
| 41 | /// Describes the different variants a program can be compiled with. | ||
| 42 | struct ProgramVariant final { | ||
| 43 | ProgramVariant() = default; | ||
| 44 | |||
| 45 | /// Graphics constructor. | ||
| 46 | explicit constexpr ProgramVariant(GLenum primitive_mode) noexcept | ||
| 47 | : primitive_mode{primitive_mode} {} | ||
| 48 | |||
| 49 | /// Compute constructor. | ||
| 50 | explicit constexpr ProgramVariant(u32 block_x, u32 block_y, u32 block_z, u32 shared_memory_size, | ||
| 51 | u32 local_memory_size) noexcept | ||
| 52 | : block_x{block_x}, block_y{static_cast<u16>(block_y)}, block_z{static_cast<u16>(block_z)}, | ||
| 53 | shared_memory_size{shared_memory_size}, local_memory_size{local_memory_size} {} | ||
| 54 | |||
| 55 | // Graphics specific parameters. | ||
| 56 | GLenum primitive_mode{}; | ||
| 57 | |||
| 58 | // Compute specific parameters. | ||
| 59 | u32 block_x{}; | ||
| 60 | u16 block_y{}; | ||
| 61 | u16 block_z{}; | ||
| 62 | u32 shared_memory_size{}; | ||
| 63 | u32 local_memory_size{}; | ||
| 64 | |||
| 65 | bool operator==(const ProgramVariant& rhs) const noexcept { | ||
| 66 | return std::tie(primitive_mode, block_x, block_y, block_z, shared_memory_size, | ||
| 67 | local_memory_size) == std::tie(rhs.primitive_mode, rhs.block_x, rhs.block_y, | ||
| 68 | rhs.block_z, rhs.shared_memory_size, | ||
| 69 | rhs.local_memory_size); | ||
| 70 | } | ||
| 71 | |||
| 72 | bool operator!=(const ProgramVariant& rhs) const noexcept { | ||
| 73 | return !operator==(rhs); | ||
| 74 | } | ||
| 75 | }; | ||
| 76 | static_assert(std::is_trivially_copyable_v<ProgramVariant>); | ||
| 77 | |||
| 78 | /// Describes how a shader is used. | ||
| 79 | struct ShaderDiskCacheUsage { | ||
| 80 | u64 unique_identifier{}; | ||
| 81 | ProgramVariant variant; | ||
| 82 | u32 bound_buffer{}; | ||
| 83 | VideoCommon::Shader::KeyMap keys; | ||
| 84 | VideoCommon::Shader::BoundSamplerMap bound_samplers; | ||
| 85 | VideoCommon::Shader::BindlessSamplerMap bindless_samplers; | ||
| 86 | |||
| 87 | bool operator==(const ShaderDiskCacheUsage& rhs) const { | ||
| 88 | return std::tie(unique_identifier, variant, keys, bound_samplers, bindless_samplers) == | ||
| 89 | std::tie(rhs.unique_identifier, rhs.variant, rhs.keys, rhs.bound_samplers, | ||
| 90 | rhs.bindless_samplers); | ||
| 91 | } | ||
| 92 | |||
| 93 | bool operator!=(const ShaderDiskCacheUsage& rhs) const { | ||
| 94 | return !operator==(rhs); | ||
| 95 | } | ||
| 96 | }; | ||
| 97 | |||
| 98 | } // namespace OpenGL | ||
| 99 | |||
| 100 | namespace std { | ||
| 101 | |||
| 102 | template <> | ||
| 103 | struct hash<OpenGL::ProgramVariant> { | ||
| 104 | std::size_t operator()(const OpenGL::ProgramVariant& variant) const noexcept { | ||
| 105 | return (static_cast<std::size_t>(variant.primitive_mode) << 6) ^ | ||
| 106 | static_cast<std::size_t>(variant.block_x) ^ | ||
| 107 | (static_cast<std::size_t>(variant.block_y) << 32) ^ | ||
| 108 | (static_cast<std::size_t>(variant.block_z) << 48) ^ | ||
| 109 | (static_cast<std::size_t>(variant.shared_memory_size) << 16) ^ | ||
| 110 | (static_cast<std::size_t>(variant.local_memory_size) << 36); | ||
| 111 | } | ||
| 112 | }; | ||
| 113 | |||
| 114 | template <> | ||
| 115 | struct hash<OpenGL::ShaderDiskCacheUsage> { | ||
| 116 | std::size_t operator()(const OpenGL::ShaderDiskCacheUsage& usage) const noexcept { | ||
| 117 | return static_cast<std::size_t>(usage.unique_identifier) ^ | ||
| 118 | std::hash<OpenGL::ProgramVariant>{}(usage.variant); | ||
| 119 | } | ||
| 120 | }; | ||
| 121 | |||
| 122 | } // namespace std | ||
| 123 | |||
| 124 | namespace OpenGL { | ||
| 125 | 35 | ||
| 126 | /// Describes a shader how it's used by the guest GPU | 36 | /// Describes a shader and how it's used by the guest GPU |
| 127 | class ShaderDiskCacheRaw { | 37 | struct ShaderDiskCacheEntry { |
| 128 | public: | 38 | ShaderDiskCacheEntry(); |
| 129 | explicit ShaderDiskCacheRaw(u64 unique_identifier, Tegra::Engines::ShaderType type, | 39 | ~ShaderDiskCacheEntry(); |
| 130 | ProgramCode code, ProgramCode code_b = {}); | ||
| 131 | ShaderDiskCacheRaw(); | ||
| 132 | ~ShaderDiskCacheRaw(); | ||
| 133 | 40 | ||
| 134 | bool Load(FileUtil::IOFile& file); | 41 | bool Load(FileUtil::IOFile& file); |
| 135 | 42 | ||
| 136 | bool Save(FileUtil::IOFile& file) const; | 43 | bool Save(FileUtil::IOFile& file) const; |
| 137 | 44 | ||
| 138 | u64 GetUniqueIdentifier() const { | ||
| 139 | return unique_identifier; | ||
| 140 | } | ||
| 141 | |||
| 142 | bool HasProgramA() const { | 45 | bool HasProgramA() const { |
| 143 | return !code.empty() && !code_b.empty(); | 46 | return !code.empty() && !code_b.empty(); |
| 144 | } | 47 | } |
| 145 | 48 | ||
| 146 | Tegra::Engines::ShaderType GetType() const { | ||
| 147 | return type; | ||
| 148 | } | ||
| 149 | |||
| 150 | const ProgramCode& GetCode() const { | ||
| 151 | return code; | ||
| 152 | } | ||
| 153 | |||
| 154 | const ProgramCode& GetCodeB() const { | ||
| 155 | return code_b; | ||
| 156 | } | ||
| 157 | |||
| 158 | private: | ||
| 159 | u64 unique_identifier{}; | ||
| 160 | Tegra::Engines::ShaderType type{}; | 49 | Tegra::Engines::ShaderType type{}; |
| 161 | ProgramCode code; | 50 | ProgramCode code; |
| 162 | ProgramCode code_b; | 51 | ProgramCode code_b; |
| 52 | |||
| 53 | u64 unique_identifier = 0; | ||
| 54 | std::optional<u32> texture_handler_size; | ||
| 55 | u32 bound_buffer = 0; | ||
| 56 | VideoCommon::Shader::GraphicsInfo graphics_info; | ||
| 57 | VideoCommon::Shader::ComputeInfo compute_info; | ||
| 58 | VideoCommon::Shader::KeyMap keys; | ||
| 59 | VideoCommon::Shader::BoundSamplerMap bound_samplers; | ||
| 60 | VideoCommon::Shader::BindlessSamplerMap bindless_samplers; | ||
| 163 | }; | 61 | }; |
| 164 | 62 | ||
| 165 | /// Contains an OpenGL dumped binary program | 63 | /// Contains an OpenGL dumped binary program |
| 166 | struct ShaderDiskCacheDump { | 64 | struct ShaderDiskCachePrecompiled { |
| 167 | GLenum binary_format{}; | 65 | u64 unique_identifier = 0; |
| 66 | GLenum binary_format = 0; | ||
| 168 | std::vector<u8> binary; | 67 | std::vector<u8> binary; |
| 169 | }; | 68 | }; |
| 170 | 69 | ||
| @@ -174,11 +73,10 @@ public: | |||
| 174 | ~ShaderDiskCacheOpenGL(); | 73 | ~ShaderDiskCacheOpenGL(); |
| 175 | 74 | ||
| 176 | /// Loads transferable cache. If file has a old version or on failure, it deletes the file. | 75 | /// Loads transferable cache. If file has a old version or on failure, it deletes the file. |
| 177 | std::optional<std::pair<std::vector<ShaderDiskCacheRaw>, std::vector<ShaderDiskCacheUsage>>> | 76 | std::optional<std::vector<ShaderDiskCacheEntry>> LoadTransferable(); |
| 178 | LoadTransferable(); | ||
| 179 | 77 | ||
| 180 | /// Loads current game's precompiled cache. Invalidates on failure. | 78 | /// Loads current game's precompiled cache. Invalidates on failure. |
| 181 | std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump> LoadPrecompiled(); | 79 | std::vector<ShaderDiskCachePrecompiled> LoadPrecompiled(); |
| 182 | 80 | ||
| 183 | /// Removes the transferable (and precompiled) cache file. | 81 | /// Removes the transferable (and precompiled) cache file. |
| 184 | void InvalidateTransferable(); | 82 | void InvalidateTransferable(); |
| @@ -187,21 +85,18 @@ public: | |||
| 187 | void InvalidatePrecompiled(); | 85 | void InvalidatePrecompiled(); |
| 188 | 86 | ||
| 189 | /// Saves a raw dump to the transferable file. Checks for collisions. | 87 | /// Saves a raw dump to the transferable file. Checks for collisions. |
| 190 | void SaveRaw(const ShaderDiskCacheRaw& entry); | 88 | void SaveEntry(const ShaderDiskCacheEntry& entry); |
| 191 | |||
| 192 | /// Saves shader usage to the transferable file. Does not check for collisions. | ||
| 193 | void SaveUsage(const ShaderDiskCacheUsage& usage); | ||
| 194 | 89 | ||
| 195 | /// Saves a dump entry to the precompiled file. Does not check for collisions. | 90 | /// Saves a dump entry to the precompiled file. Does not check for collisions. |
| 196 | void SaveDump(const ShaderDiskCacheUsage& usage, GLuint program); | 91 | void SavePrecompiled(u64 unique_identifier, GLuint program); |
| 197 | 92 | ||
| 198 | /// Serializes virtual precompiled shader cache file to real file | 93 | /// Serializes virtual precompiled shader cache file to real file |
| 199 | void SaveVirtualPrecompiledFile(); | 94 | void SaveVirtualPrecompiledFile(); |
| 200 | 95 | ||
| 201 | private: | 96 | private: |
| 202 | /// Loads the transferable cache. Returns empty on failure. | 97 | /// Loads the transferable cache. Returns empty on failure. |
| 203 | std::optional<std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>> | 98 | std::optional<std::vector<ShaderDiskCachePrecompiled>> LoadPrecompiledFile( |
| 204 | LoadPrecompiledFile(FileUtil::IOFile& file); | 99 | FileUtil::IOFile& file); |
| 205 | 100 | ||
| 206 | /// Opens current game's transferable file and write it's header if it doesn't exist | 101 | /// Opens current game's transferable file and write it's header if it doesn't exist |
| 207 | FileUtil::IOFile AppendTransferableFile() const; | 102 | FileUtil::IOFile AppendTransferableFile() const; |
| @@ -270,7 +165,7 @@ private: | |||
| 270 | std::size_t precompiled_cache_virtual_file_offset = 0; | 165 | std::size_t precompiled_cache_virtual_file_offset = 0; |
| 271 | 166 | ||
| 272 | // Stored transferable shaders | 167 | // Stored transferable shaders |
| 273 | std::unordered_map<u64, std::unordered_set<ShaderDiskCacheUsage>> transferable; | 168 | std::unordered_set<u64> stored_transferable; |
| 274 | 169 | ||
| 275 | // The cache has been loaded at boot | 170 | // The cache has been loaded at boot |
| 276 | bool is_usable{}; | 171 | bool is_usable{}; |
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp deleted file mode 100644 index 34946fb47..000000000 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ /dev/null | |||
| @@ -1,109 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <string> | ||
| 6 | |||
| 7 | #include <fmt/format.h> | ||
| 8 | |||
| 9 | #include "video_core/engines/maxwell_3d.h" | ||
| 10 | #include "video_core/engines/shader_type.h" | ||
| 11 | #include "video_core/renderer_opengl/gl_device.h" | ||
| 12 | #include "video_core/renderer_opengl/gl_shader_decompiler.h" | ||
| 13 | #include "video_core/renderer_opengl/gl_shader_gen.h" | ||
| 14 | #include "video_core/shader/shader_ir.h" | ||
| 15 | |||
| 16 | namespace OpenGL::GLShader { | ||
| 17 | |||
| 18 | using Tegra::Engines::Maxwell3D; | ||
| 19 | using Tegra::Engines::ShaderType; | ||
| 20 | using VideoCommon::Shader::CompileDepth; | ||
| 21 | using VideoCommon::Shader::CompilerSettings; | ||
| 22 | using VideoCommon::Shader::ProgramCode; | ||
| 23 | using VideoCommon::Shader::ShaderIR; | ||
| 24 | |||
| 25 | std::string GenerateVertexShader(const Device& device, const ShaderIR& ir, const ShaderIR* ir_b) { | ||
| 26 | std::string out = GetCommonDeclarations(); | ||
| 27 | out += fmt::format(R"( | ||
| 28 | layout (std140, binding = {}) uniform vs_config {{ | ||
| 29 | float y_direction; | ||
| 30 | }}; | ||
| 31 | |||
| 32 | )", | ||
| 33 | EmulationUniformBlockBinding); | ||
| 34 | out += Decompile(device, ir, ShaderType::Vertex, "vertex"); | ||
| 35 | if (ir_b) { | ||
| 36 | out += Decompile(device, *ir_b, ShaderType::Vertex, "vertex_b"); | ||
| 37 | } | ||
| 38 | |||
| 39 | out += R"( | ||
| 40 | void main() { | ||
| 41 | gl_Position = vec4(0.0f, 0.0f, 0.0f, 1.0f); | ||
| 42 | execute_vertex(); | ||
| 43 | )"; | ||
| 44 | if (ir_b) { | ||
| 45 | out += " execute_vertex_b();"; | ||
| 46 | } | ||
| 47 | out += "}\n"; | ||
| 48 | return out; | ||
| 49 | } | ||
| 50 | |||
| 51 | std::string GenerateGeometryShader(const Device& device, const ShaderIR& ir) { | ||
| 52 | std::string out = GetCommonDeclarations(); | ||
| 53 | out += fmt::format(R"( | ||
| 54 | layout (std140, binding = {}) uniform gs_config {{ | ||
| 55 | float y_direction; | ||
| 56 | }}; | ||
| 57 | |||
| 58 | )", | ||
| 59 | EmulationUniformBlockBinding); | ||
| 60 | out += Decompile(device, ir, ShaderType::Geometry, "geometry"); | ||
| 61 | |||
| 62 | out += R"( | ||
| 63 | void main() { | ||
| 64 | execute_geometry(); | ||
| 65 | } | ||
| 66 | )"; | ||
| 67 | return out; | ||
| 68 | } | ||
| 69 | |||
| 70 | std::string GenerateFragmentShader(const Device& device, const ShaderIR& ir) { | ||
| 71 | std::string out = GetCommonDeclarations(); | ||
| 72 | out += fmt::format(R"( | ||
| 73 | layout (location = 0) out vec4 FragColor0; | ||
| 74 | layout (location = 1) out vec4 FragColor1; | ||
| 75 | layout (location = 2) out vec4 FragColor2; | ||
| 76 | layout (location = 3) out vec4 FragColor3; | ||
| 77 | layout (location = 4) out vec4 FragColor4; | ||
| 78 | layout (location = 5) out vec4 FragColor5; | ||
| 79 | layout (location = 6) out vec4 FragColor6; | ||
| 80 | layout (location = 7) out vec4 FragColor7; | ||
| 81 | |||
| 82 | layout (std140, binding = {}) uniform fs_config {{ | ||
| 83 | float y_direction; | ||
| 84 | }}; | ||
| 85 | |||
| 86 | )", | ||
| 87 | EmulationUniformBlockBinding); | ||
| 88 | out += Decompile(device, ir, ShaderType::Fragment, "fragment"); | ||
| 89 | |||
| 90 | out += R"( | ||
| 91 | void main() { | ||
| 92 | execute_fragment(); | ||
| 93 | } | ||
| 94 | )"; | ||
| 95 | return out; | ||
| 96 | } | ||
| 97 | |||
| 98 | std::string GenerateComputeShader(const Device& device, const ShaderIR& ir) { | ||
| 99 | std::string out = GetCommonDeclarations(); | ||
| 100 | out += Decompile(device, ir, ShaderType::Compute, "compute"); | ||
| 101 | out += R"( | ||
| 102 | void main() { | ||
| 103 | execute_compute(); | ||
| 104 | } | ||
| 105 | )"; | ||
| 106 | return out; | ||
| 107 | } | ||
| 108 | |||
| 109 | } // namespace OpenGL::GLShader | ||
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h deleted file mode 100644 index cba2be9f9..000000000 --- a/src/video_core/renderer_opengl/gl_shader_gen.h +++ /dev/null | |||
| @@ -1,34 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <vector> | ||
| 8 | |||
| 9 | #include "common/common_types.h" | ||
| 10 | #include "video_core/renderer_opengl/gl_shader_decompiler.h" | ||
| 11 | #include "video_core/shader/shader_ir.h" | ||
| 12 | |||
| 13 | namespace OpenGL { | ||
| 14 | class Device; | ||
| 15 | } | ||
| 16 | |||
| 17 | namespace OpenGL::GLShader { | ||
| 18 | |||
| 19 | using VideoCommon::Shader::ProgramCode; | ||
| 20 | using VideoCommon::Shader::ShaderIR; | ||
| 21 | |||
| 22 | /// Generates the GLSL vertex shader program source code for the given VS program | ||
| 23 | std::string GenerateVertexShader(const Device& device, const ShaderIR& ir, const ShaderIR* ir_b); | ||
| 24 | |||
| 25 | /// Generates the GLSL geometry shader program source code for the given GS program | ||
| 26 | std::string GenerateGeometryShader(const Device& device, const ShaderIR& ir); | ||
| 27 | |||
| 28 | /// Generates the GLSL fragment shader program source code for the given FS program | ||
| 29 | std::string GenerateFragmentShader(const Device& device, const ShaderIR& ir); | ||
| 30 | |||
| 31 | /// Generates the GLSL compute shader program source code for the given CS program | ||
| 32 | std::string GenerateComputeShader(const Device& device, const ShaderIR& ir); | ||
| 33 | |||
| 34 | } // namespace OpenGL::GLShader | ||
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp index 75d3fac04..9c7b0adbd 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.cpp +++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp | |||
| @@ -2,45 +2,52 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <glad/glad.h> | ||
| 6 | |||
| 5 | #include "common/common_types.h" | 7 | #include "common/common_types.h" |
| 6 | #include "video_core/engines/maxwell_3d.h" | 8 | #include "video_core/engines/maxwell_3d.h" |
| 7 | #include "video_core/renderer_opengl/gl_shader_manager.h" | 9 | #include "video_core/renderer_opengl/gl_shader_manager.h" |
| 8 | 10 | ||
| 9 | namespace OpenGL::GLShader { | 11 | namespace OpenGL::GLShader { |
| 10 | 12 | ||
| 11 | using Tegra::Engines::Maxwell3D; | 13 | ProgramManager::ProgramManager() = default; |
| 12 | |||
| 13 | ProgramManager::ProgramManager() { | ||
| 14 | pipeline.Create(); | ||
| 15 | } | ||
| 16 | 14 | ||
| 17 | ProgramManager::~ProgramManager() = default; | 15 | ProgramManager::~ProgramManager() = default; |
| 18 | 16 | ||
| 19 | void ProgramManager::ApplyTo(OpenGLState& state) { | 17 | void ProgramManager::Create() { |
| 20 | UpdatePipeline(); | 18 | graphics_pipeline.Create(); |
| 21 | state.draw.shader_program = 0; | 19 | glBindProgramPipeline(graphics_pipeline.handle); |
| 22 | state.draw.program_pipeline = pipeline.handle; | ||
| 23 | } | 20 | } |
| 24 | 21 | ||
| 25 | void ProgramManager::UpdatePipeline() { | 22 | void ProgramManager::BindGraphicsPipeline() { |
| 23 | if (!is_graphics_bound) { | ||
| 24 | is_graphics_bound = true; | ||
| 25 | glUseProgram(0); | ||
| 26 | } | ||
| 27 | |||
| 26 | // Avoid updating the pipeline when values have no changed | 28 | // Avoid updating the pipeline when values have no changed |
| 27 | if (old_state == current_state) { | 29 | if (old_state == current_state) { |
| 28 | return; | 30 | return; |
| 29 | } | 31 | } |
| 30 | 32 | ||
| 31 | // Workaround for AMD bug | 33 | // Workaround for AMD bug |
| 32 | constexpr GLenum all_used_stages{GL_VERTEX_SHADER_BIT | GL_GEOMETRY_SHADER_BIT | | 34 | static constexpr GLenum all_used_stages{GL_VERTEX_SHADER_BIT | GL_GEOMETRY_SHADER_BIT | |
| 33 | GL_FRAGMENT_SHADER_BIT}; | 35 | GL_FRAGMENT_SHADER_BIT}; |
| 34 | glUseProgramStages(pipeline.handle, all_used_stages, 0); | 36 | const GLuint handle = graphics_pipeline.handle; |
| 35 | 37 | glUseProgramStages(handle, all_used_stages, 0); | |
| 36 | glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, current_state.vertex_shader); | 38 | glUseProgramStages(handle, GL_VERTEX_SHADER_BIT, current_state.vertex_shader); |
| 37 | glUseProgramStages(pipeline.handle, GL_GEOMETRY_SHADER_BIT, current_state.geometry_shader); | 39 | glUseProgramStages(handle, GL_GEOMETRY_SHADER_BIT, current_state.geometry_shader); |
| 38 | glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, current_state.fragment_shader); | 40 | glUseProgramStages(handle, GL_FRAGMENT_SHADER_BIT, current_state.fragment_shader); |
| 39 | 41 | ||
| 40 | old_state = current_state; | 42 | old_state = current_state; |
| 41 | } | 43 | } |
| 42 | 44 | ||
| 43 | void MaxwellUniformData::SetFromRegs(const Maxwell3D& maxwell) { | 45 | void ProgramManager::BindComputeShader(GLuint program) { |
| 46 | is_graphics_bound = false; | ||
| 47 | glUseProgram(program); | ||
| 48 | } | ||
| 49 | |||
| 50 | void MaxwellUniformData::SetFromRegs(const Tegra::Engines::Maxwell3D& maxwell) { | ||
| 44 | const auto& regs = maxwell.regs; | 51 | const auto& regs = maxwell.regs; |
| 45 | 52 | ||
| 46 | // Y_NEGATE controls what value S2R returns for the Y_DIRECTION system value. | 53 | // Y_NEGATE controls what value S2R returns for the Y_DIRECTION system value. |
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h index 478c165ce..d2e47f2a9 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.h +++ b/src/video_core/renderer_opengl/gl_shader_manager.h | |||
| @@ -9,7 +9,6 @@ | |||
| 9 | #include <glad/glad.h> | 9 | #include <glad/glad.h> |
| 10 | 10 | ||
| 11 | #include "video_core/renderer_opengl/gl_resource_manager.h" | 11 | #include "video_core/renderer_opengl/gl_resource_manager.h" |
| 12 | #include "video_core/renderer_opengl/gl_state.h" | ||
| 13 | #include "video_core/renderer_opengl/maxwell_to_gl.h" | 12 | #include "video_core/renderer_opengl/maxwell_to_gl.h" |
| 14 | 13 | ||
| 15 | namespace OpenGL::GLShader { | 14 | namespace OpenGL::GLShader { |
| @@ -32,49 +31,47 @@ public: | |||
| 32 | explicit ProgramManager(); | 31 | explicit ProgramManager(); |
| 33 | ~ProgramManager(); | 32 | ~ProgramManager(); |
| 34 | 33 | ||
| 35 | void ApplyTo(OpenGLState& state); | 34 | void Create(); |
| 36 | 35 | ||
| 37 | void UseProgrammableVertexShader(GLuint program) { | 36 | /// Updates the graphics pipeline and binds it. |
| 37 | void BindGraphicsPipeline(); | ||
| 38 | |||
| 39 | /// Binds a compute shader. | ||
| 40 | void BindComputeShader(GLuint program); | ||
| 41 | |||
| 42 | void UseVertexShader(GLuint program) { | ||
| 38 | current_state.vertex_shader = program; | 43 | current_state.vertex_shader = program; |
| 39 | } | 44 | } |
| 40 | 45 | ||
| 41 | void UseProgrammableGeometryShader(GLuint program) { | 46 | void UseGeometryShader(GLuint program) { |
| 42 | current_state.geometry_shader = program; | 47 | current_state.geometry_shader = program; |
| 43 | } | 48 | } |
| 44 | 49 | ||
| 45 | void UseProgrammableFragmentShader(GLuint program) { | 50 | void UseFragmentShader(GLuint program) { |
| 46 | current_state.fragment_shader = program; | 51 | current_state.fragment_shader = program; |
| 47 | } | 52 | } |
| 48 | 53 | ||
| 49 | void UseTrivialGeometryShader() { | ||
| 50 | current_state.geometry_shader = 0; | ||
| 51 | } | ||
| 52 | |||
| 53 | void UseTrivialFragmentShader() { | ||
| 54 | current_state.fragment_shader = 0; | ||
| 55 | } | ||
| 56 | |||
| 57 | private: | 54 | private: |
| 58 | struct PipelineState { | 55 | struct PipelineState { |
| 59 | bool operator==(const PipelineState& rhs) const { | 56 | bool operator==(const PipelineState& rhs) const noexcept { |
| 60 | return vertex_shader == rhs.vertex_shader && fragment_shader == rhs.fragment_shader && | 57 | return vertex_shader == rhs.vertex_shader && fragment_shader == rhs.fragment_shader && |
| 61 | geometry_shader == rhs.geometry_shader; | 58 | geometry_shader == rhs.geometry_shader; |
| 62 | } | 59 | } |
| 63 | 60 | ||
| 64 | bool operator!=(const PipelineState& rhs) const { | 61 | bool operator!=(const PipelineState& rhs) const noexcept { |
| 65 | return !operator==(rhs); | 62 | return !operator==(rhs); |
| 66 | } | 63 | } |
| 67 | 64 | ||
| 68 | GLuint vertex_shader{}; | 65 | GLuint vertex_shader = 0; |
| 69 | GLuint fragment_shader{}; | 66 | GLuint fragment_shader = 0; |
| 70 | GLuint geometry_shader{}; | 67 | GLuint geometry_shader = 0; |
| 71 | }; | 68 | }; |
| 72 | 69 | ||
| 73 | void UpdatePipeline(); | 70 | OGLPipeline graphics_pipeline; |
| 74 | 71 | OGLPipeline compute_pipeline; | |
| 75 | OGLPipeline pipeline; | ||
| 76 | PipelineState current_state; | 72 | PipelineState current_state; |
| 77 | PipelineState old_state; | 73 | PipelineState old_state; |
| 74 | bool is_graphics_bound = true; | ||
| 78 | }; | 75 | }; |
| 79 | 76 | ||
| 80 | } // namespace OpenGL::GLShader | 77 | } // namespace OpenGL::GLShader |
diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp deleted file mode 100644 index ab1f7983c..000000000 --- a/src/video_core/renderer_opengl/gl_state.cpp +++ /dev/null | |||
| @@ -1,554 +0,0 @@ | |||
| 1 | // Copyright 2015 Citra Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <iterator> | ||
| 7 | #include <glad/glad.h> | ||
| 8 | #include "common/assert.h" | ||
| 9 | #include "common/logging/log.h" | ||
| 10 | #include "common/microprofile.h" | ||
| 11 | #include "video_core/renderer_opengl/gl_state.h" | ||
| 12 | |||
| 13 | MICROPROFILE_DEFINE(OpenGL_State, "OpenGL", "State Change", MP_RGB(192, 128, 128)); | ||
| 14 | |||
| 15 | namespace OpenGL { | ||
| 16 | |||
| 17 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | ||
| 18 | |||
| 19 | OpenGLState OpenGLState::cur_state; | ||
| 20 | |||
| 21 | namespace { | ||
| 22 | |||
| 23 | template <typename T> | ||
| 24 | bool UpdateValue(T& current_value, const T new_value) { | ||
| 25 | const bool changed = current_value != new_value; | ||
| 26 | current_value = new_value; | ||
| 27 | return changed; | ||
| 28 | } | ||
| 29 | |||
| 30 | template <typename T1, typename T2> | ||
| 31 | bool UpdateTie(T1 current_value, const T2 new_value) { | ||
| 32 | const bool changed = current_value != new_value; | ||
| 33 | current_value = new_value; | ||
| 34 | return changed; | ||
| 35 | } | ||
| 36 | |||
| 37 | template <typename T> | ||
| 38 | std::optional<std::pair<GLuint, GLsizei>> UpdateArray(T& current_values, const T& new_values) { | ||
| 39 | std::optional<std::size_t> first; | ||
| 40 | std::size_t last; | ||
| 41 | for (std::size_t i = 0; i < std::size(current_values); ++i) { | ||
| 42 | if (!UpdateValue(current_values[i], new_values[i])) { | ||
| 43 | continue; | ||
| 44 | } | ||
| 45 | if (!first) { | ||
| 46 | first = i; | ||
| 47 | } | ||
| 48 | last = i; | ||
| 49 | } | ||
| 50 | if (!first) { | ||
| 51 | return std::nullopt; | ||
| 52 | } | ||
| 53 | return std::make_pair(static_cast<GLuint>(*first), static_cast<GLsizei>(last - *first + 1)); | ||
| 54 | } | ||
| 55 | |||
| 56 | void Enable(GLenum cap, bool enable) { | ||
| 57 | if (enable) { | ||
| 58 | glEnable(cap); | ||
| 59 | } else { | ||
| 60 | glDisable(cap); | ||
| 61 | } | ||
| 62 | } | ||
| 63 | |||
| 64 | void Enable(GLenum cap, GLuint index, bool enable) { | ||
| 65 | if (enable) { | ||
| 66 | glEnablei(cap, index); | ||
| 67 | } else { | ||
| 68 | glDisablei(cap, index); | ||
| 69 | } | ||
| 70 | } | ||
| 71 | |||
| 72 | void Enable(GLenum cap, bool& current_value, bool new_value) { | ||
| 73 | if (UpdateValue(current_value, new_value)) { | ||
| 74 | Enable(cap, new_value); | ||
| 75 | } | ||
| 76 | } | ||
| 77 | |||
| 78 | void Enable(GLenum cap, GLuint index, bool& current_value, bool new_value) { | ||
| 79 | if (UpdateValue(current_value, new_value)) { | ||
| 80 | Enable(cap, index, new_value); | ||
| 81 | } | ||
| 82 | } | ||
| 83 | |||
| 84 | } // Anonymous namespace | ||
| 85 | |||
| 86 | OpenGLState::OpenGLState() = default; | ||
| 87 | |||
| 88 | void OpenGLState::SetDefaultViewports() { | ||
| 89 | viewports.fill(Viewport{}); | ||
| 90 | |||
| 91 | depth_clamp.far_plane = false; | ||
| 92 | depth_clamp.near_plane = false; | ||
| 93 | } | ||
| 94 | |||
| 95 | void OpenGLState::ApplyFramebufferState() { | ||
| 96 | if (UpdateValue(cur_state.draw.read_framebuffer, draw.read_framebuffer)) { | ||
| 97 | glBindFramebuffer(GL_READ_FRAMEBUFFER, draw.read_framebuffer); | ||
| 98 | } | ||
| 99 | if (UpdateValue(cur_state.draw.draw_framebuffer, draw.draw_framebuffer)) { | ||
| 100 | glBindFramebuffer(GL_DRAW_FRAMEBUFFER, draw.draw_framebuffer); | ||
| 101 | } | ||
| 102 | } | ||
| 103 | |||
| 104 | void OpenGLState::ApplyVertexArrayState() { | ||
| 105 | if (UpdateValue(cur_state.draw.vertex_array, draw.vertex_array)) { | ||
| 106 | glBindVertexArray(draw.vertex_array); | ||
| 107 | } | ||
| 108 | } | ||
| 109 | |||
| 110 | void OpenGLState::ApplyShaderProgram() { | ||
| 111 | if (UpdateValue(cur_state.draw.shader_program, draw.shader_program)) { | ||
| 112 | glUseProgram(draw.shader_program); | ||
| 113 | } | ||
| 114 | } | ||
| 115 | |||
| 116 | void OpenGLState::ApplyProgramPipeline() { | ||
| 117 | if (UpdateValue(cur_state.draw.program_pipeline, draw.program_pipeline)) { | ||
| 118 | glBindProgramPipeline(draw.program_pipeline); | ||
| 119 | } | ||
| 120 | } | ||
| 121 | |||
| 122 | void OpenGLState::ApplyClipDistances() { | ||
| 123 | for (std::size_t i = 0; i < clip_distance.size(); ++i) { | ||
| 124 | Enable(GL_CLIP_DISTANCE0 + static_cast<GLenum>(i), cur_state.clip_distance[i], | ||
| 125 | clip_distance[i]); | ||
| 126 | } | ||
| 127 | } | ||
| 128 | |||
| 129 | void OpenGLState::ApplyPointSize() { | ||
| 130 | Enable(GL_PROGRAM_POINT_SIZE, cur_state.point.program_control, point.program_control); | ||
| 131 | Enable(GL_POINT_SPRITE, cur_state.point.sprite, point.sprite); | ||
| 132 | if (UpdateValue(cur_state.point.size, point.size)) { | ||
| 133 | glPointSize(point.size); | ||
| 134 | } | ||
| 135 | } | ||
| 136 | |||
| 137 | void OpenGLState::ApplyFragmentColorClamp() { | ||
| 138 | if (UpdateValue(cur_state.fragment_color_clamp.enabled, fragment_color_clamp.enabled)) { | ||
| 139 | glClampColor(GL_CLAMP_FRAGMENT_COLOR_ARB, | ||
| 140 | fragment_color_clamp.enabled ? GL_TRUE : GL_FALSE); | ||
| 141 | } | ||
| 142 | } | ||
| 143 | |||
| 144 | void OpenGLState::ApplyMultisample() { | ||
| 145 | Enable(GL_SAMPLE_ALPHA_TO_COVERAGE, cur_state.multisample_control.alpha_to_coverage, | ||
| 146 | multisample_control.alpha_to_coverage); | ||
| 147 | Enable(GL_SAMPLE_ALPHA_TO_ONE, cur_state.multisample_control.alpha_to_one, | ||
| 148 | multisample_control.alpha_to_one); | ||
| 149 | } | ||
| 150 | |||
| 151 | void OpenGLState::ApplyDepthClamp() { | ||
| 152 | if (depth_clamp.far_plane == cur_state.depth_clamp.far_plane && | ||
| 153 | depth_clamp.near_plane == cur_state.depth_clamp.near_plane) { | ||
| 154 | return; | ||
| 155 | } | ||
| 156 | cur_state.depth_clamp = depth_clamp; | ||
| 157 | |||
| 158 | UNIMPLEMENTED_IF_MSG(depth_clamp.far_plane != depth_clamp.near_plane, | ||
| 159 | "Unimplemented Depth Clamp Separation!"); | ||
| 160 | |||
| 161 | Enable(GL_DEPTH_CLAMP, depth_clamp.far_plane || depth_clamp.near_plane); | ||
| 162 | } | ||
| 163 | |||
| 164 | void OpenGLState::ApplySRgb() { | ||
| 165 | if (cur_state.framebuffer_srgb.enabled == framebuffer_srgb.enabled) | ||
| 166 | return; | ||
| 167 | cur_state.framebuffer_srgb.enabled = framebuffer_srgb.enabled; | ||
| 168 | if (framebuffer_srgb.enabled) { | ||
| 169 | glEnable(GL_FRAMEBUFFER_SRGB); | ||
| 170 | } else { | ||
| 171 | glDisable(GL_FRAMEBUFFER_SRGB); | ||
| 172 | } | ||
| 173 | } | ||
| 174 | |||
| 175 | void OpenGLState::ApplyCulling() { | ||
| 176 | Enable(GL_CULL_FACE, cur_state.cull.enabled, cull.enabled); | ||
| 177 | |||
| 178 | if (UpdateValue(cur_state.cull.mode, cull.mode)) { | ||
| 179 | glCullFace(cull.mode); | ||
| 180 | } | ||
| 181 | |||
| 182 | if (UpdateValue(cur_state.cull.front_face, cull.front_face)) { | ||
| 183 | glFrontFace(cull.front_face); | ||
| 184 | } | ||
| 185 | } | ||
| 186 | |||
| 187 | void OpenGLState::ApplyRasterizerDiscard() { | ||
| 188 | Enable(GL_RASTERIZER_DISCARD, cur_state.rasterizer_discard, rasterizer_discard); | ||
| 189 | } | ||
| 190 | |||
| 191 | void OpenGLState::ApplyColorMask() { | ||
| 192 | if (!dirty.color_mask) { | ||
| 193 | return; | ||
| 194 | } | ||
| 195 | dirty.color_mask = false; | ||
| 196 | |||
| 197 | for (std::size_t i = 0; i < Maxwell::NumRenderTargets; ++i) { | ||
| 198 | const auto& updated = color_mask[i]; | ||
| 199 | auto& current = cur_state.color_mask[i]; | ||
| 200 | if (updated.red_enabled != current.red_enabled || | ||
| 201 | updated.green_enabled != current.green_enabled || | ||
| 202 | updated.blue_enabled != current.blue_enabled || | ||
| 203 | updated.alpha_enabled != current.alpha_enabled) { | ||
| 204 | current = updated; | ||
| 205 | glColorMaski(static_cast<GLuint>(i), updated.red_enabled, updated.green_enabled, | ||
| 206 | updated.blue_enabled, updated.alpha_enabled); | ||
| 207 | } | ||
| 208 | } | ||
| 209 | } | ||
| 210 | |||
| 211 | void OpenGLState::ApplyDepth() { | ||
| 212 | Enable(GL_DEPTH_TEST, cur_state.depth.test_enabled, depth.test_enabled); | ||
| 213 | |||
| 214 | if (cur_state.depth.test_func != depth.test_func) { | ||
| 215 | cur_state.depth.test_func = depth.test_func; | ||
| 216 | glDepthFunc(depth.test_func); | ||
| 217 | } | ||
| 218 | |||
| 219 | if (cur_state.depth.write_mask != depth.write_mask) { | ||
| 220 | cur_state.depth.write_mask = depth.write_mask; | ||
| 221 | glDepthMask(depth.write_mask); | ||
| 222 | } | ||
| 223 | } | ||
| 224 | |||
| 225 | void OpenGLState::ApplyPrimitiveRestart() { | ||
| 226 | Enable(GL_PRIMITIVE_RESTART, cur_state.primitive_restart.enabled, primitive_restart.enabled); | ||
| 227 | |||
| 228 | if (cur_state.primitive_restart.index != primitive_restart.index) { | ||
| 229 | cur_state.primitive_restart.index = primitive_restart.index; | ||
| 230 | glPrimitiveRestartIndex(primitive_restart.index); | ||
| 231 | } | ||
| 232 | } | ||
| 233 | |||
| 234 | void OpenGLState::ApplyStencilTest() { | ||
| 235 | if (!dirty.stencil_state) { | ||
| 236 | return; | ||
| 237 | } | ||
| 238 | dirty.stencil_state = false; | ||
| 239 | |||
| 240 | Enable(GL_STENCIL_TEST, cur_state.stencil.test_enabled, stencil.test_enabled); | ||
| 241 | |||
| 242 | const auto ConfigStencil = [](GLenum face, const auto& config, auto& current) { | ||
| 243 | if (current.test_func != config.test_func || current.test_ref != config.test_ref || | ||
| 244 | current.test_mask != config.test_mask) { | ||
| 245 | current.test_func = config.test_func; | ||
| 246 | current.test_ref = config.test_ref; | ||
| 247 | current.test_mask = config.test_mask; | ||
| 248 | glStencilFuncSeparate(face, config.test_func, config.test_ref, config.test_mask); | ||
| 249 | } | ||
| 250 | if (current.action_depth_fail != config.action_depth_fail || | ||
| 251 | current.action_depth_pass != config.action_depth_pass || | ||
| 252 | current.action_stencil_fail != config.action_stencil_fail) { | ||
| 253 | current.action_depth_fail = config.action_depth_fail; | ||
| 254 | current.action_depth_pass = config.action_depth_pass; | ||
| 255 | current.action_stencil_fail = config.action_stencil_fail; | ||
| 256 | glStencilOpSeparate(face, config.action_stencil_fail, config.action_depth_fail, | ||
| 257 | config.action_depth_pass); | ||
| 258 | } | ||
| 259 | if (current.write_mask != config.write_mask) { | ||
| 260 | current.write_mask = config.write_mask; | ||
| 261 | glStencilMaskSeparate(face, config.write_mask); | ||
| 262 | } | ||
| 263 | }; | ||
| 264 | ConfigStencil(GL_FRONT, stencil.front, cur_state.stencil.front); | ||
| 265 | ConfigStencil(GL_BACK, stencil.back, cur_state.stencil.back); | ||
| 266 | } | ||
| 267 | |||
| 268 | void OpenGLState::ApplyViewport() { | ||
| 269 | for (GLuint i = 0; i < static_cast<GLuint>(Maxwell::NumViewports); ++i) { | ||
| 270 | const auto& updated = viewports[i]; | ||
| 271 | auto& current = cur_state.viewports[i]; | ||
| 272 | |||
| 273 | if (current.x != updated.x || current.y != updated.y || current.width != updated.width || | ||
| 274 | current.height != updated.height) { | ||
| 275 | current.x = updated.x; | ||
| 276 | current.y = updated.y; | ||
| 277 | current.width = updated.width; | ||
| 278 | current.height = updated.height; | ||
| 279 | glViewportIndexedf(i, static_cast<GLfloat>(updated.x), static_cast<GLfloat>(updated.y), | ||
| 280 | static_cast<GLfloat>(updated.width), | ||
| 281 | static_cast<GLfloat>(updated.height)); | ||
| 282 | } | ||
| 283 | if (current.depth_range_near != updated.depth_range_near || | ||
| 284 | current.depth_range_far != updated.depth_range_far) { | ||
| 285 | current.depth_range_near = updated.depth_range_near; | ||
| 286 | current.depth_range_far = updated.depth_range_far; | ||
| 287 | glDepthRangeIndexed(i, updated.depth_range_near, updated.depth_range_far); | ||
| 288 | } | ||
| 289 | |||
| 290 | Enable(GL_SCISSOR_TEST, i, current.scissor.enabled, updated.scissor.enabled); | ||
| 291 | |||
| 292 | if (current.scissor.x != updated.scissor.x || current.scissor.y != updated.scissor.y || | ||
| 293 | current.scissor.width != updated.scissor.width || | ||
| 294 | current.scissor.height != updated.scissor.height) { | ||
| 295 | current.scissor.x = updated.scissor.x; | ||
| 296 | current.scissor.y = updated.scissor.y; | ||
| 297 | current.scissor.width = updated.scissor.width; | ||
| 298 | current.scissor.height = updated.scissor.height; | ||
| 299 | glScissorIndexed(i, updated.scissor.x, updated.scissor.y, updated.scissor.width, | ||
| 300 | updated.scissor.height); | ||
| 301 | } | ||
| 302 | } | ||
| 303 | } | ||
| 304 | |||
| 305 | void OpenGLState::ApplyGlobalBlending() { | ||
| 306 | const Blend& updated = blend[0]; | ||
| 307 | Blend& current = cur_state.blend[0]; | ||
| 308 | |||
| 309 | Enable(GL_BLEND, current.enabled, updated.enabled); | ||
| 310 | |||
| 311 | if (current.src_rgb_func != updated.src_rgb_func || | ||
| 312 | current.dst_rgb_func != updated.dst_rgb_func || current.src_a_func != updated.src_a_func || | ||
| 313 | current.dst_a_func != updated.dst_a_func) { | ||
| 314 | current.src_rgb_func = updated.src_rgb_func; | ||
| 315 | current.dst_rgb_func = updated.dst_rgb_func; | ||
| 316 | current.src_a_func = updated.src_a_func; | ||
| 317 | current.dst_a_func = updated.dst_a_func; | ||
| 318 | glBlendFuncSeparate(updated.src_rgb_func, updated.dst_rgb_func, updated.src_a_func, | ||
| 319 | updated.dst_a_func); | ||
| 320 | } | ||
| 321 | |||
| 322 | if (current.rgb_equation != updated.rgb_equation || current.a_equation != updated.a_equation) { | ||
| 323 | current.rgb_equation = updated.rgb_equation; | ||
| 324 | current.a_equation = updated.a_equation; | ||
| 325 | glBlendEquationSeparate(updated.rgb_equation, updated.a_equation); | ||
| 326 | } | ||
| 327 | } | ||
| 328 | |||
| 329 | void OpenGLState::ApplyTargetBlending(std::size_t target, bool force) { | ||
| 330 | const Blend& updated = blend[target]; | ||
| 331 | Blend& current = cur_state.blend[target]; | ||
| 332 | |||
| 333 | if (current.enabled != updated.enabled || force) { | ||
| 334 | current.enabled = updated.enabled; | ||
| 335 | Enable(GL_BLEND, static_cast<GLuint>(target), updated.enabled); | ||
| 336 | } | ||
| 337 | |||
| 338 | if (UpdateTie(std::tie(current.src_rgb_func, current.dst_rgb_func, current.src_a_func, | ||
| 339 | current.dst_a_func), | ||
| 340 | std::tie(updated.src_rgb_func, updated.dst_rgb_func, updated.src_a_func, | ||
| 341 | updated.dst_a_func))) { | ||
| 342 | glBlendFuncSeparatei(static_cast<GLuint>(target), updated.src_rgb_func, | ||
| 343 | updated.dst_rgb_func, updated.src_a_func, updated.dst_a_func); | ||
| 344 | } | ||
| 345 | |||
| 346 | if (UpdateTie(std::tie(current.rgb_equation, current.a_equation), | ||
| 347 | std::tie(updated.rgb_equation, updated.a_equation))) { | ||
| 348 | glBlendEquationSeparatei(static_cast<GLuint>(target), updated.rgb_equation, | ||
| 349 | updated.a_equation); | ||
| 350 | } | ||
| 351 | } | ||
| 352 | |||
| 353 | void OpenGLState::ApplyBlending() { | ||
| 354 | if (!dirty.blend_state) { | ||
| 355 | return; | ||
| 356 | } | ||
| 357 | dirty.blend_state = false; | ||
| 358 | |||
| 359 | if (independant_blend.enabled) { | ||
| 360 | const bool force = independant_blend.enabled != cur_state.independant_blend.enabled; | ||
| 361 | for (std::size_t target = 0; target < Maxwell::NumRenderTargets; ++target) { | ||
| 362 | ApplyTargetBlending(target, force); | ||
| 363 | } | ||
| 364 | } else { | ||
| 365 | ApplyGlobalBlending(); | ||
| 366 | } | ||
| 367 | cur_state.independant_blend.enabled = independant_blend.enabled; | ||
| 368 | |||
| 369 | if (UpdateTie( | ||
| 370 | std::tie(cur_state.blend_color.red, cur_state.blend_color.green, | ||
| 371 | cur_state.blend_color.blue, cur_state.blend_color.alpha), | ||
| 372 | std::tie(blend_color.red, blend_color.green, blend_color.blue, blend_color.alpha))) { | ||
| 373 | glBlendColor(blend_color.red, blend_color.green, blend_color.blue, blend_color.alpha); | ||
| 374 | } | ||
| 375 | } | ||
| 376 | |||
| 377 | void OpenGLState::ApplyLogicOp() { | ||
| 378 | Enable(GL_COLOR_LOGIC_OP, cur_state.logic_op.enabled, logic_op.enabled); | ||
| 379 | |||
| 380 | if (UpdateValue(cur_state.logic_op.operation, logic_op.operation)) { | ||
| 381 | glLogicOp(logic_op.operation); | ||
| 382 | } | ||
| 383 | } | ||
| 384 | |||
| 385 | void OpenGLState::ApplyPolygonOffset() { | ||
| 386 | if (!dirty.polygon_offset) { | ||
| 387 | return; | ||
| 388 | } | ||
| 389 | dirty.polygon_offset = false; | ||
| 390 | |||
| 391 | Enable(GL_POLYGON_OFFSET_FILL, cur_state.polygon_offset.fill_enable, | ||
| 392 | polygon_offset.fill_enable); | ||
| 393 | Enable(GL_POLYGON_OFFSET_LINE, cur_state.polygon_offset.line_enable, | ||
| 394 | polygon_offset.line_enable); | ||
| 395 | Enable(GL_POLYGON_OFFSET_POINT, cur_state.polygon_offset.point_enable, | ||
| 396 | polygon_offset.point_enable); | ||
| 397 | |||
| 398 | if (UpdateTie(std::tie(cur_state.polygon_offset.factor, cur_state.polygon_offset.units, | ||
| 399 | cur_state.polygon_offset.clamp), | ||
| 400 | std::tie(polygon_offset.factor, polygon_offset.units, polygon_offset.clamp))) { | ||
| 401 | if (GLAD_GL_EXT_polygon_offset_clamp && polygon_offset.clamp != 0) { | ||
| 402 | glPolygonOffsetClamp(polygon_offset.factor, polygon_offset.units, polygon_offset.clamp); | ||
| 403 | } else { | ||
| 404 | UNIMPLEMENTED_IF_MSG(polygon_offset.clamp != 0, | ||
| 405 | "Unimplemented Depth polygon offset clamp."); | ||
| 406 | glPolygonOffset(polygon_offset.factor, polygon_offset.units); | ||
| 407 | } | ||
| 408 | } | ||
| 409 | } | ||
| 410 | |||
| 411 | void OpenGLState::ApplyAlphaTest() { | ||
| 412 | Enable(GL_ALPHA_TEST, cur_state.alpha_test.enabled, alpha_test.enabled); | ||
| 413 | if (UpdateTie(std::tie(cur_state.alpha_test.func, cur_state.alpha_test.ref), | ||
| 414 | std::tie(alpha_test.func, alpha_test.ref))) { | ||
| 415 | glAlphaFunc(alpha_test.func, alpha_test.ref); | ||
| 416 | } | ||
| 417 | } | ||
| 418 | |||
| 419 | void OpenGLState::ApplyClipControl() { | ||
| 420 | if (UpdateTie(std::tie(cur_state.clip_control.origin, cur_state.clip_control.depth_mode), | ||
| 421 | std::tie(clip_control.origin, clip_control.depth_mode))) { | ||
| 422 | glClipControl(clip_control.origin, clip_control.depth_mode); | ||
| 423 | } | ||
| 424 | } | ||
| 425 | |||
| 426 | void OpenGLState::ApplyTextures() { | ||
| 427 | const std::size_t size = std::size(textures); | ||
| 428 | for (std::size_t i = 0; i < size; ++i) { | ||
| 429 | if (UpdateValue(cur_state.textures[i], textures[i])) { | ||
| 430 | // BindTextureUnit doesn't support binding null textures, skip those binds. | ||
| 431 | // TODO(Rodrigo): Stop using null textures | ||
| 432 | if (textures[i] != 0) { | ||
| 433 | glBindTextureUnit(static_cast<GLuint>(i), textures[i]); | ||
| 434 | } | ||
| 435 | } | ||
| 436 | } | ||
| 437 | } | ||
| 438 | |||
| 439 | void OpenGLState::ApplySamplers() { | ||
| 440 | const std::size_t size = std::size(samplers); | ||
| 441 | for (std::size_t i = 0; i < size; ++i) { | ||
| 442 | if (UpdateValue(cur_state.samplers[i], samplers[i])) { | ||
| 443 | glBindSampler(static_cast<GLuint>(i), samplers[i]); | ||
| 444 | } | ||
| 445 | } | ||
| 446 | } | ||
| 447 | |||
| 448 | void OpenGLState::ApplyImages() { | ||
| 449 | if (const auto update = UpdateArray(cur_state.images, images)) { | ||
| 450 | glBindImageTextures(update->first, update->second, images.data() + update->first); | ||
| 451 | } | ||
| 452 | } | ||
| 453 | |||
| 454 | void OpenGLState::Apply() { | ||
| 455 | MICROPROFILE_SCOPE(OpenGL_State); | ||
| 456 | ApplyFramebufferState(); | ||
| 457 | ApplyVertexArrayState(); | ||
| 458 | ApplyShaderProgram(); | ||
| 459 | ApplyProgramPipeline(); | ||
| 460 | ApplyClipDistances(); | ||
| 461 | ApplyPointSize(); | ||
| 462 | ApplyFragmentColorClamp(); | ||
| 463 | ApplyMultisample(); | ||
| 464 | ApplyRasterizerDiscard(); | ||
| 465 | ApplyColorMask(); | ||
| 466 | ApplyDepthClamp(); | ||
| 467 | ApplyViewport(); | ||
| 468 | ApplyStencilTest(); | ||
| 469 | ApplySRgb(); | ||
| 470 | ApplyCulling(); | ||
| 471 | ApplyDepth(); | ||
| 472 | ApplyPrimitiveRestart(); | ||
| 473 | ApplyBlending(); | ||
| 474 | ApplyLogicOp(); | ||
| 475 | ApplyTextures(); | ||
| 476 | ApplySamplers(); | ||
| 477 | ApplyImages(); | ||
| 478 | ApplyPolygonOffset(); | ||
| 479 | ApplyAlphaTest(); | ||
| 480 | ApplyClipControl(); | ||
| 481 | } | ||
| 482 | |||
| 483 | void OpenGLState::EmulateViewportWithScissor() { | ||
| 484 | auto& current = viewports[0]; | ||
| 485 | if (current.scissor.enabled) { | ||
| 486 | const GLint left = std::max(current.x, current.scissor.x); | ||
| 487 | const GLint right = | ||
| 488 | std::max(current.x + current.width, current.scissor.x + current.scissor.width); | ||
| 489 | const GLint bottom = std::max(current.y, current.scissor.y); | ||
| 490 | const GLint top = | ||
| 491 | std::max(current.y + current.height, current.scissor.y + current.scissor.height); | ||
| 492 | current.scissor.x = std::max(left, 0); | ||
| 493 | current.scissor.y = std::max(bottom, 0); | ||
| 494 | current.scissor.width = std::max(right - left, 0); | ||
| 495 | current.scissor.height = std::max(top - bottom, 0); | ||
| 496 | } else { | ||
| 497 | current.scissor.enabled = true; | ||
| 498 | current.scissor.x = current.x; | ||
| 499 | current.scissor.y = current.y; | ||
| 500 | current.scissor.width = current.width; | ||
| 501 | current.scissor.height = current.height; | ||
| 502 | } | ||
| 503 | } | ||
| 504 | |||
| 505 | OpenGLState& OpenGLState::UnbindTexture(GLuint handle) { | ||
| 506 | for (auto& texture : textures) { | ||
| 507 | if (texture == handle) { | ||
| 508 | texture = 0; | ||
| 509 | } | ||
| 510 | } | ||
| 511 | return *this; | ||
| 512 | } | ||
| 513 | |||
| 514 | OpenGLState& OpenGLState::ResetSampler(GLuint handle) { | ||
| 515 | for (auto& sampler : samplers) { | ||
| 516 | if (sampler == handle) { | ||
| 517 | sampler = 0; | ||
| 518 | } | ||
| 519 | } | ||
| 520 | return *this; | ||
| 521 | } | ||
| 522 | |||
| 523 | OpenGLState& OpenGLState::ResetProgram(GLuint handle) { | ||
| 524 | if (draw.shader_program == handle) { | ||
| 525 | draw.shader_program = 0; | ||
| 526 | } | ||
| 527 | return *this; | ||
| 528 | } | ||
| 529 | |||
| 530 | OpenGLState& OpenGLState::ResetPipeline(GLuint handle) { | ||
| 531 | if (draw.program_pipeline == handle) { | ||
| 532 | draw.program_pipeline = 0; | ||
| 533 | } | ||
| 534 | return *this; | ||
| 535 | } | ||
| 536 | |||
| 537 | OpenGLState& OpenGLState::ResetVertexArray(GLuint handle) { | ||
| 538 | if (draw.vertex_array == handle) { | ||
| 539 | draw.vertex_array = 0; | ||
| 540 | } | ||
| 541 | return *this; | ||
| 542 | } | ||
| 543 | |||
| 544 | OpenGLState& OpenGLState::ResetFramebuffer(GLuint handle) { | ||
| 545 | if (draw.read_framebuffer == handle) { | ||
| 546 | draw.read_framebuffer = 0; | ||
| 547 | } | ||
| 548 | if (draw.draw_framebuffer == handle) { | ||
| 549 | draw.draw_framebuffer = 0; | ||
| 550 | } | ||
| 551 | return *this; | ||
| 552 | } | ||
| 553 | |||
| 554 | } // namespace OpenGL | ||
diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h deleted file mode 100644 index 4953eeda2..000000000 --- a/src/video_core/renderer_opengl/gl_state.h +++ /dev/null | |||
| @@ -1,247 +0,0 @@ | |||
| 1 | // Copyright 2015 Citra Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <array> | ||
| 8 | #include <type_traits> | ||
| 9 | #include <glad/glad.h> | ||
| 10 | #include "video_core/engines/maxwell_3d.h" | ||
| 11 | |||
| 12 | namespace OpenGL { | ||
| 13 | |||
| 14 | class OpenGLState { | ||
| 15 | public: | ||
| 16 | struct { | ||
| 17 | bool enabled = false; // GL_FRAMEBUFFER_SRGB | ||
| 18 | } framebuffer_srgb; | ||
| 19 | |||
| 20 | struct { | ||
| 21 | bool alpha_to_coverage = false; // GL_ALPHA_TO_COVERAGE | ||
| 22 | bool alpha_to_one = false; // GL_ALPHA_TO_ONE | ||
| 23 | } multisample_control; | ||
| 24 | |||
| 25 | struct { | ||
| 26 | bool enabled = false; // GL_CLAMP_FRAGMENT_COLOR_ARB | ||
| 27 | } fragment_color_clamp; | ||
| 28 | |||
| 29 | struct { | ||
| 30 | bool far_plane = false; | ||
| 31 | bool near_plane = false; | ||
| 32 | } depth_clamp; // GL_DEPTH_CLAMP | ||
| 33 | |||
| 34 | struct { | ||
| 35 | bool enabled = false; // GL_CULL_FACE | ||
| 36 | GLenum mode = GL_BACK; // GL_CULL_FACE_MODE | ||
| 37 | GLenum front_face = GL_CCW; // GL_FRONT_FACE | ||
| 38 | } cull; | ||
| 39 | |||
| 40 | struct { | ||
| 41 | bool test_enabled = false; // GL_DEPTH_TEST | ||
| 42 | GLboolean write_mask = GL_TRUE; // GL_DEPTH_WRITEMASK | ||
| 43 | GLenum test_func = GL_LESS; // GL_DEPTH_FUNC | ||
| 44 | } depth; | ||
| 45 | |||
| 46 | struct { | ||
| 47 | bool enabled = false; | ||
| 48 | GLuint index = 0; | ||
| 49 | } primitive_restart; // GL_PRIMITIVE_RESTART | ||
| 50 | |||
| 51 | bool rasterizer_discard = false; // GL_RASTERIZER_DISCARD | ||
| 52 | |||
| 53 | struct ColorMask { | ||
| 54 | GLboolean red_enabled = GL_TRUE; | ||
| 55 | GLboolean green_enabled = GL_TRUE; | ||
| 56 | GLboolean blue_enabled = GL_TRUE; | ||
| 57 | GLboolean alpha_enabled = GL_TRUE; | ||
| 58 | }; | ||
| 59 | std::array<ColorMask, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> | ||
| 60 | color_mask; // GL_COLOR_WRITEMASK | ||
| 61 | |||
| 62 | struct { | ||
| 63 | bool test_enabled = false; // GL_STENCIL_TEST | ||
| 64 | struct { | ||
| 65 | GLenum test_func = GL_ALWAYS; // GL_STENCIL_FUNC | ||
| 66 | GLint test_ref = 0; // GL_STENCIL_REF | ||
| 67 | GLuint test_mask = 0xFFFFFFFF; // GL_STENCIL_VALUE_MASK | ||
| 68 | GLuint write_mask = 0xFFFFFFFF; // GL_STENCIL_WRITEMASK | ||
| 69 | GLenum action_stencil_fail = GL_KEEP; // GL_STENCIL_FAIL | ||
| 70 | GLenum action_depth_fail = GL_KEEP; // GL_STENCIL_PASS_DEPTH_FAIL | ||
| 71 | GLenum action_depth_pass = GL_KEEP; // GL_STENCIL_PASS_DEPTH_PASS | ||
| 72 | } front, back; | ||
| 73 | } stencil; | ||
| 74 | |||
| 75 | struct Blend { | ||
| 76 | bool enabled = false; // GL_BLEND | ||
| 77 | GLenum rgb_equation = GL_FUNC_ADD; // GL_BLEND_EQUATION_RGB | ||
| 78 | GLenum a_equation = GL_FUNC_ADD; // GL_BLEND_EQUATION_ALPHA | ||
| 79 | GLenum src_rgb_func = GL_ONE; // GL_BLEND_SRC_RGB | ||
| 80 | GLenum dst_rgb_func = GL_ZERO; // GL_BLEND_DST_RGB | ||
| 81 | GLenum src_a_func = GL_ONE; // GL_BLEND_SRC_ALPHA | ||
| 82 | GLenum dst_a_func = GL_ZERO; // GL_BLEND_DST_ALPHA | ||
| 83 | }; | ||
| 84 | std::array<Blend, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> blend; | ||
| 85 | |||
| 86 | struct { | ||
| 87 | bool enabled = false; | ||
| 88 | } independant_blend; | ||
| 89 | |||
| 90 | struct { | ||
| 91 | GLclampf red = 0.0f; | ||
| 92 | GLclampf green = 0.0f; | ||
| 93 | GLclampf blue = 0.0f; | ||
| 94 | GLclampf alpha = 0.0f; | ||
| 95 | } blend_color; // GL_BLEND_COLOR | ||
| 96 | |||
| 97 | struct { | ||
| 98 | bool enabled = false; // GL_LOGIC_OP_MODE | ||
| 99 | GLenum operation = GL_COPY; | ||
| 100 | } logic_op; | ||
| 101 | |||
| 102 | static constexpr std::size_t NumSamplers = 32 * 5; | ||
| 103 | static constexpr std::size_t NumImages = 8 * 5; | ||
| 104 | std::array<GLuint, NumSamplers> textures = {}; | ||
| 105 | std::array<GLuint, NumSamplers> samplers = {}; | ||
| 106 | std::array<GLuint, NumImages> images = {}; | ||
| 107 | |||
| 108 | struct { | ||
| 109 | GLuint read_framebuffer = 0; // GL_READ_FRAMEBUFFER_BINDING | ||
| 110 | GLuint draw_framebuffer = 0; // GL_DRAW_FRAMEBUFFER_BINDING | ||
| 111 | GLuint vertex_array = 0; // GL_VERTEX_ARRAY_BINDING | ||
| 112 | GLuint shader_program = 0; // GL_CURRENT_PROGRAM | ||
| 113 | GLuint program_pipeline = 0; // GL_PROGRAM_PIPELINE_BINDING | ||
| 114 | } draw; | ||
| 115 | |||
| 116 | struct Viewport { | ||
| 117 | GLint x = 0; | ||
| 118 | GLint y = 0; | ||
| 119 | GLint width = 0; | ||
| 120 | GLint height = 0; | ||
| 121 | GLfloat depth_range_near = 0.0f; // GL_DEPTH_RANGE | ||
| 122 | GLfloat depth_range_far = 1.0f; // GL_DEPTH_RANGE | ||
| 123 | struct { | ||
| 124 | bool enabled = false; // GL_SCISSOR_TEST | ||
| 125 | GLint x = 0; | ||
| 126 | GLint y = 0; | ||
| 127 | GLsizei width = 0; | ||
| 128 | GLsizei height = 0; | ||
| 129 | } scissor; | ||
| 130 | }; | ||
| 131 | std::array<Viewport, Tegra::Engines::Maxwell3D::Regs::NumViewports> viewports; | ||
| 132 | |||
| 133 | struct { | ||
| 134 | bool program_control = false; // GL_PROGRAM_POINT_SIZE | ||
| 135 | bool sprite = false; // GL_POINT_SPRITE | ||
| 136 | GLfloat size = 1.0f; // GL_POINT_SIZE | ||
| 137 | } point; | ||
| 138 | |||
| 139 | struct { | ||
| 140 | bool point_enable = false; | ||
| 141 | bool line_enable = false; | ||
| 142 | bool fill_enable = false; | ||
| 143 | GLfloat units = 0.0f; | ||
| 144 | GLfloat factor = 0.0f; | ||
| 145 | GLfloat clamp = 0.0f; | ||
| 146 | } polygon_offset; | ||
| 147 | |||
| 148 | struct { | ||
| 149 | bool enabled = false; // GL_ALPHA_TEST | ||
| 150 | GLenum func = GL_ALWAYS; // GL_ALPHA_TEST_FUNC | ||
| 151 | GLfloat ref = 0.0f; // GL_ALPHA_TEST_REF | ||
| 152 | } alpha_test; | ||
| 153 | |||
| 154 | std::array<bool, 8> clip_distance = {}; // GL_CLIP_DISTANCE | ||
| 155 | |||
| 156 | struct { | ||
| 157 | GLenum origin = GL_LOWER_LEFT; | ||
| 158 | GLenum depth_mode = GL_NEGATIVE_ONE_TO_ONE; | ||
| 159 | } clip_control; | ||
| 160 | |||
| 161 | OpenGLState(); | ||
| 162 | |||
| 163 | /// Get the currently active OpenGL state | ||
| 164 | static OpenGLState GetCurState() { | ||
| 165 | return cur_state; | ||
| 166 | } | ||
| 167 | |||
| 168 | void SetDefaultViewports(); | ||
| 169 | /// Apply this state as the current OpenGL state | ||
| 170 | void Apply(); | ||
| 171 | |||
| 172 | void ApplyFramebufferState(); | ||
| 173 | void ApplyVertexArrayState(); | ||
| 174 | void ApplyShaderProgram(); | ||
| 175 | void ApplyProgramPipeline(); | ||
| 176 | void ApplyClipDistances(); | ||
| 177 | void ApplyPointSize(); | ||
| 178 | void ApplyFragmentColorClamp(); | ||
| 179 | void ApplyMultisample(); | ||
| 180 | void ApplySRgb(); | ||
| 181 | void ApplyCulling(); | ||
| 182 | void ApplyRasterizerDiscard(); | ||
| 183 | void ApplyColorMask(); | ||
| 184 | void ApplyDepth(); | ||
| 185 | void ApplyPrimitiveRestart(); | ||
| 186 | void ApplyStencilTest(); | ||
| 187 | void ApplyViewport(); | ||
| 188 | void ApplyTargetBlending(std::size_t target, bool force); | ||
| 189 | void ApplyGlobalBlending(); | ||
| 190 | void ApplyBlending(); | ||
| 191 | void ApplyLogicOp(); | ||
| 192 | void ApplyTextures(); | ||
| 193 | void ApplySamplers(); | ||
| 194 | void ApplyImages(); | ||
| 195 | void ApplyDepthClamp(); | ||
| 196 | void ApplyPolygonOffset(); | ||
| 197 | void ApplyAlphaTest(); | ||
| 198 | void ApplyClipControl(); | ||
| 199 | |||
| 200 | /// Resets any references to the given resource | ||
| 201 | OpenGLState& UnbindTexture(GLuint handle); | ||
| 202 | OpenGLState& ResetSampler(GLuint handle); | ||
| 203 | OpenGLState& ResetProgram(GLuint handle); | ||
| 204 | OpenGLState& ResetPipeline(GLuint handle); | ||
| 205 | OpenGLState& ResetVertexArray(GLuint handle); | ||
| 206 | OpenGLState& ResetFramebuffer(GLuint handle); | ||
| 207 | |||
| 208 | /// Viewport does not affects glClearBuffer so emulate viewport using scissor test | ||
| 209 | void EmulateViewportWithScissor(); | ||
| 210 | |||
| 211 | void MarkDirtyBlendState() { | ||
| 212 | dirty.blend_state = true; | ||
| 213 | } | ||
| 214 | |||
| 215 | void MarkDirtyStencilState() { | ||
| 216 | dirty.stencil_state = true; | ||
| 217 | } | ||
| 218 | |||
| 219 | void MarkDirtyPolygonOffset() { | ||
| 220 | dirty.polygon_offset = true; | ||
| 221 | } | ||
| 222 | |||
| 223 | void MarkDirtyColorMask() { | ||
| 224 | dirty.color_mask = true; | ||
| 225 | } | ||
| 226 | |||
| 227 | void AllDirty() { | ||
| 228 | dirty.blend_state = true; | ||
| 229 | dirty.stencil_state = true; | ||
| 230 | dirty.polygon_offset = true; | ||
| 231 | dirty.color_mask = true; | ||
| 232 | } | ||
| 233 | |||
| 234 | private: | ||
| 235 | static OpenGLState cur_state; | ||
| 236 | |||
| 237 | struct { | ||
| 238 | bool blend_state; | ||
| 239 | bool stencil_state; | ||
| 240 | bool viewport_state; | ||
| 241 | bool polygon_offset; | ||
| 242 | bool color_mask; | ||
| 243 | } dirty{}; | ||
| 244 | }; | ||
| 245 | static_assert(std::is_trivially_copyable_v<OpenGLState>); | ||
| 246 | |||
| 247 | } // namespace OpenGL | ||
diff --git a/src/video_core/renderer_opengl/gl_state_tracker.cpp b/src/video_core/renderer_opengl/gl_state_tracker.cpp new file mode 100644 index 000000000..255ac3147 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_state_tracker.cpp | |||
| @@ -0,0 +1,247 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <array> | ||
| 7 | #include <cstddef> | ||
| 8 | |||
| 9 | #include "common/common_types.h" | ||
| 10 | #include "core/core.h" | ||
| 11 | #include "video_core/engines/maxwell_3d.h" | ||
| 12 | #include "video_core/gpu.h" | ||
| 13 | #include "video_core/renderer_opengl/gl_state_tracker.h" | ||
| 14 | |||
| 15 | #define OFF(field_name) MAXWELL3D_REG_INDEX(field_name) | ||
| 16 | #define NUM(field_name) (sizeof(Maxwell3D::Regs::field_name) / sizeof(u32)) | ||
| 17 | |||
| 18 | namespace OpenGL { | ||
| 19 | |||
| 20 | namespace { | ||
| 21 | |||
| 22 | using namespace Dirty; | ||
| 23 | using namespace VideoCommon::Dirty; | ||
| 24 | using Tegra::Engines::Maxwell3D; | ||
| 25 | using Regs = Maxwell3D::Regs; | ||
| 26 | using Tables = Maxwell3D::DirtyState::Tables; | ||
| 27 | using Table = Maxwell3D::DirtyState::Table; | ||
| 28 | |||
| 29 | void SetupDirtyColorMasks(Tables& tables) { | ||
| 30 | tables[0][OFF(color_mask_common)] = ColorMaskCommon; | ||
| 31 | for (std::size_t rt = 0; rt < Regs::NumRenderTargets; ++rt) { | ||
| 32 | const std::size_t offset = OFF(color_mask) + rt * NUM(color_mask[0]); | ||
| 33 | FillBlock(tables[0], offset, NUM(color_mask[0]), ColorMask0 + rt); | ||
| 34 | } | ||
| 35 | |||
| 36 | FillBlock(tables[1], OFF(color_mask), NUM(color_mask), ColorMasks); | ||
| 37 | } | ||
| 38 | |||
| 39 | void SetupDirtyVertexArrays(Tables& tables) { | ||
| 40 | static constexpr std::size_t num_array = 3; | ||
| 41 | static constexpr std::size_t instance_base_offset = 3; | ||
| 42 | for (std::size_t i = 0; i < Regs::NumVertexArrays; ++i) { | ||
| 43 | const std::size_t array_offset = OFF(vertex_array) + i * NUM(vertex_array[0]); | ||
| 44 | const std::size_t limit_offset = OFF(vertex_array_limit) + i * NUM(vertex_array_limit[0]); | ||
| 45 | |||
| 46 | FillBlock(tables, array_offset, num_array, VertexBuffer0 + i, VertexBuffers); | ||
| 47 | FillBlock(tables, limit_offset, NUM(vertex_array_limit), VertexBuffer0 + i, VertexBuffers); | ||
| 48 | |||
| 49 | const std::size_t instance_array_offset = array_offset + instance_base_offset; | ||
| 50 | tables[0][instance_array_offset] = static_cast<u8>(VertexInstance0 + i); | ||
| 51 | tables[1][instance_array_offset] = VertexInstances; | ||
| 52 | |||
| 53 | const std::size_t instance_offset = OFF(instanced_arrays) + i; | ||
| 54 | tables[0][instance_offset] = static_cast<u8>(VertexInstance0 + i); | ||
| 55 | tables[1][instance_offset] = VertexInstances; | ||
| 56 | } | ||
| 57 | } | ||
| 58 | |||
| 59 | void SetupDirtyVertexFormat(Tables& tables) { | ||
| 60 | for (std::size_t i = 0; i < Regs::NumVertexAttributes; ++i) { | ||
| 61 | const std::size_t offset = OFF(vertex_attrib_format) + i * NUM(vertex_attrib_format[0]); | ||
| 62 | FillBlock(tables[0], offset, NUM(vertex_attrib_format[0]), VertexFormat0 + i); | ||
| 63 | } | ||
| 64 | |||
| 65 | FillBlock(tables[1], OFF(vertex_attrib_format), Regs::NumVertexAttributes, VertexFormats); | ||
| 66 | } | ||
| 67 | |||
| 68 | void SetupDirtyViewports(Tables& tables) { | ||
| 69 | for (std::size_t i = 0; i < Regs::NumViewports; ++i) { | ||
| 70 | const std::size_t transf_offset = OFF(viewport_transform) + i * NUM(viewport_transform[0]); | ||
| 71 | const std::size_t viewport_offset = OFF(viewports) + i * NUM(viewports[0]); | ||
| 72 | |||
| 73 | FillBlock(tables[0], transf_offset, NUM(viewport_transform[0]), Viewport0 + i); | ||
| 74 | FillBlock(tables[0], viewport_offset, NUM(viewports[0]), Viewport0 + i); | ||
| 75 | } | ||
| 76 | |||
| 77 | FillBlock(tables[1], OFF(viewport_transform), NUM(viewport_transform), Viewports); | ||
| 78 | FillBlock(tables[1], OFF(viewports), NUM(viewports), Viewports); | ||
| 79 | |||
| 80 | tables[0][OFF(viewport_transform_enabled)] = ViewportTransform; | ||
| 81 | tables[1][OFF(viewport_transform_enabled)] = Viewports; | ||
| 82 | } | ||
| 83 | |||
| 84 | void SetupDirtyScissors(Tables& tables) { | ||
| 85 | for (std::size_t i = 0; i < Regs::NumViewports; ++i) { | ||
| 86 | const std::size_t offset = OFF(scissor_test) + i * NUM(scissor_test[0]); | ||
| 87 | FillBlock(tables[0], offset, NUM(scissor_test[0]), Scissor0 + i); | ||
| 88 | } | ||
| 89 | FillBlock(tables[1], OFF(scissor_test), NUM(scissor_test), Scissors); | ||
| 90 | } | ||
| 91 | |||
| 92 | void SetupDirtyShaders(Tables& tables) { | ||
| 93 | FillBlock(tables[0], OFF(shader_config[0]), NUM(shader_config[0]) * Regs::MaxShaderProgram, | ||
| 94 | Shaders); | ||
| 95 | } | ||
| 96 | |||
| 97 | void SetupDirtyPolygonModes(Tables& tables) { | ||
| 98 | tables[0][OFF(polygon_mode_front)] = PolygonModeFront; | ||
| 99 | tables[0][OFF(polygon_mode_back)] = PolygonModeBack; | ||
| 100 | |||
| 101 | tables[1][OFF(polygon_mode_front)] = PolygonModes; | ||
| 102 | tables[1][OFF(polygon_mode_back)] = PolygonModes; | ||
| 103 | tables[0][OFF(fill_rectangle)] = PolygonModes; | ||
| 104 | } | ||
| 105 | |||
| 106 | void SetupDirtyDepthTest(Tables& tables) { | ||
| 107 | auto& table = tables[0]; | ||
| 108 | table[OFF(depth_test_enable)] = DepthTest; | ||
| 109 | table[OFF(depth_write_enabled)] = DepthMask; | ||
| 110 | table[OFF(depth_test_func)] = DepthTest; | ||
| 111 | } | ||
| 112 | |||
| 113 | void SetupDirtyStencilTest(Tables& tables) { | ||
| 114 | static constexpr std::array offsets = { | ||
| 115 | OFF(stencil_enable), OFF(stencil_front_func_func), OFF(stencil_front_func_ref), | ||
| 116 | OFF(stencil_front_func_mask), OFF(stencil_front_op_fail), OFF(stencil_front_op_zfail), | ||
| 117 | OFF(stencil_front_op_zpass), OFF(stencil_front_mask), OFF(stencil_two_side_enable), | ||
| 118 | OFF(stencil_back_func_func), OFF(stencil_back_func_ref), OFF(stencil_back_func_mask), | ||
| 119 | OFF(stencil_back_op_fail), OFF(stencil_back_op_zfail), OFF(stencil_back_op_zpass), | ||
| 120 | OFF(stencil_back_mask)}; | ||
| 121 | for (const auto offset : offsets) { | ||
| 122 | tables[0][offset] = StencilTest; | ||
| 123 | } | ||
| 124 | } | ||
| 125 | |||
| 126 | void SetupDirtyAlphaTest(Tables& tables) { | ||
| 127 | auto& table = tables[0]; | ||
| 128 | table[OFF(alpha_test_ref)] = AlphaTest; | ||
| 129 | table[OFF(alpha_test_func)] = AlphaTest; | ||
| 130 | table[OFF(alpha_test_enabled)] = AlphaTest; | ||
| 131 | } | ||
| 132 | |||
| 133 | void SetupDirtyBlend(Tables& tables) { | ||
| 134 | FillBlock(tables[0], OFF(blend_color), NUM(blend_color), BlendColor); | ||
| 135 | |||
| 136 | tables[0][OFF(independent_blend_enable)] = BlendIndependentEnabled; | ||
| 137 | |||
| 138 | for (std::size_t i = 0; i < Regs::NumRenderTargets; ++i) { | ||
| 139 | const std::size_t offset = OFF(independent_blend) + i * NUM(independent_blend[0]); | ||
| 140 | FillBlock(tables[0], offset, NUM(independent_blend[0]), BlendState0 + i); | ||
| 141 | |||
| 142 | tables[0][OFF(blend.enable) + i] = static_cast<u8>(BlendState0 + i); | ||
| 143 | } | ||
| 144 | FillBlock(tables[1], OFF(independent_blend), NUM(independent_blend), BlendStates); | ||
| 145 | FillBlock(tables[1], OFF(blend), NUM(blend), BlendStates); | ||
| 146 | } | ||
| 147 | |||
| 148 | void SetupDirtyPrimitiveRestart(Tables& tables) { | ||
| 149 | FillBlock(tables[0], OFF(primitive_restart), NUM(primitive_restart), PrimitiveRestart); | ||
| 150 | } | ||
| 151 | |||
| 152 | void SetupDirtyPolygonOffset(Tables& tables) { | ||
| 153 | auto& table = tables[0]; | ||
| 154 | table[OFF(polygon_offset_fill_enable)] = PolygonOffset; | ||
| 155 | table[OFF(polygon_offset_line_enable)] = PolygonOffset; | ||
| 156 | table[OFF(polygon_offset_point_enable)] = PolygonOffset; | ||
| 157 | table[OFF(polygon_offset_factor)] = PolygonOffset; | ||
| 158 | table[OFF(polygon_offset_units)] = PolygonOffset; | ||
| 159 | table[OFF(polygon_offset_clamp)] = PolygonOffset; | ||
| 160 | } | ||
| 161 | |||
| 162 | void SetupDirtyMultisampleControl(Tables& tables) { | ||
| 163 | FillBlock(tables[0], OFF(multisample_control), NUM(multisample_control), MultisampleControl); | ||
| 164 | } | ||
| 165 | |||
| 166 | void SetupDirtyRasterizeEnable(Tables& tables) { | ||
| 167 | tables[0][OFF(rasterize_enable)] = RasterizeEnable; | ||
| 168 | } | ||
| 169 | |||
| 170 | void SetupDirtyFramebufferSRGB(Tables& tables) { | ||
| 171 | tables[0][OFF(framebuffer_srgb)] = FramebufferSRGB; | ||
| 172 | } | ||
| 173 | |||
| 174 | void SetupDirtyLogicOp(Tables& tables) { | ||
| 175 | FillBlock(tables[0], OFF(logic_op), NUM(logic_op), LogicOp); | ||
| 176 | } | ||
| 177 | |||
| 178 | void SetupDirtyFragmentClampColor(Tables& tables) { | ||
| 179 | tables[0][OFF(frag_color_clamp)] = FragmentClampColor; | ||
| 180 | } | ||
| 181 | |||
| 182 | void SetupDirtyPointSize(Tables& tables) { | ||
| 183 | tables[0][OFF(vp_point_size)] = PointSize; | ||
| 184 | tables[0][OFF(point_size)] = PointSize; | ||
| 185 | tables[0][OFF(point_sprite_enable)] = PointSize; | ||
| 186 | } | ||
| 187 | |||
| 188 | void SetupDirtyClipControl(Tables& tables) { | ||
| 189 | auto& table = tables[0]; | ||
| 190 | table[OFF(screen_y_control)] = ClipControl; | ||
| 191 | table[OFF(depth_mode)] = ClipControl; | ||
| 192 | } | ||
| 193 | |||
| 194 | void SetupDirtyDepthClampEnabled(Tables& tables) { | ||
| 195 | tables[0][OFF(view_volume_clip_control)] = DepthClampEnabled; | ||
| 196 | } | ||
| 197 | |||
| 198 | void SetupDirtyMisc(Tables& tables) { | ||
| 199 | auto& table = tables[0]; | ||
| 200 | |||
| 201 | table[OFF(clip_distance_enabled)] = ClipDistances; | ||
| 202 | |||
| 203 | table[OFF(front_face)] = FrontFace; | ||
| 204 | |||
| 205 | table[OFF(cull_test_enabled)] = CullTest; | ||
| 206 | table[OFF(cull_face)] = CullTest; | ||
| 207 | } | ||
| 208 | |||
| 209 | } // Anonymous namespace | ||
| 210 | |||
| 211 | StateTracker::StateTracker(Core::System& system) : system{system} {} | ||
| 212 | |||
| 213 | void StateTracker::Initialize() { | ||
| 214 | auto& dirty = system.GPU().Maxwell3D().dirty; | ||
| 215 | auto& tables = dirty.tables; | ||
| 216 | SetupDirtyRenderTargets(tables); | ||
| 217 | SetupDirtyColorMasks(tables); | ||
| 218 | SetupDirtyViewports(tables); | ||
| 219 | SetupDirtyScissors(tables); | ||
| 220 | SetupDirtyVertexArrays(tables); | ||
| 221 | SetupDirtyVertexFormat(tables); | ||
| 222 | SetupDirtyShaders(tables); | ||
| 223 | SetupDirtyPolygonModes(tables); | ||
| 224 | SetupDirtyDepthTest(tables); | ||
| 225 | SetupDirtyStencilTest(tables); | ||
| 226 | SetupDirtyAlphaTest(tables); | ||
| 227 | SetupDirtyBlend(tables); | ||
| 228 | SetupDirtyPrimitiveRestart(tables); | ||
| 229 | SetupDirtyPolygonOffset(tables); | ||
| 230 | SetupDirtyMultisampleControl(tables); | ||
| 231 | SetupDirtyRasterizeEnable(tables); | ||
| 232 | SetupDirtyFramebufferSRGB(tables); | ||
| 233 | SetupDirtyLogicOp(tables); | ||
| 234 | SetupDirtyFragmentClampColor(tables); | ||
| 235 | SetupDirtyPointSize(tables); | ||
| 236 | SetupDirtyClipControl(tables); | ||
| 237 | SetupDirtyDepthClampEnabled(tables); | ||
| 238 | SetupDirtyMisc(tables); | ||
| 239 | |||
| 240 | auto& store = dirty.on_write_stores; | ||
| 241 | store[VertexBuffers] = true; | ||
| 242 | for (std::size_t i = 0; i < Regs::NumVertexArrays; ++i) { | ||
| 243 | store[VertexBuffer0 + i] = true; | ||
| 244 | } | ||
| 245 | } | ||
| 246 | |||
| 247 | } // namespace OpenGL | ||
diff --git a/src/video_core/renderer_opengl/gl_state_tracker.h b/src/video_core/renderer_opengl/gl_state_tracker.h new file mode 100644 index 000000000..b882d75c3 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_state_tracker.h | |||
| @@ -0,0 +1,215 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <limits> | ||
| 8 | |||
| 9 | #include <glad/glad.h> | ||
| 10 | |||
| 11 | #include "common/common_types.h" | ||
| 12 | #include "core/core.h" | ||
| 13 | #include "video_core/dirty_flags.h" | ||
| 14 | #include "video_core/engines/maxwell_3d.h" | ||
| 15 | |||
| 16 | namespace Core { | ||
| 17 | class System; | ||
| 18 | } | ||
| 19 | |||
| 20 | namespace OpenGL { | ||
| 21 | |||
| 22 | namespace Dirty { | ||
| 23 | |||
| 24 | enum : u8 { | ||
| 25 | First = VideoCommon::Dirty::LastCommonEntry, | ||
| 26 | |||
| 27 | VertexFormats, | ||
| 28 | VertexFormat0, | ||
| 29 | VertexFormat31 = VertexFormat0 + 31, | ||
| 30 | |||
| 31 | VertexBuffers, | ||
| 32 | VertexBuffer0, | ||
| 33 | VertexBuffer31 = VertexBuffer0 + 31, | ||
| 34 | |||
| 35 | VertexInstances, | ||
| 36 | VertexInstance0, | ||
| 37 | VertexInstance31 = VertexInstance0 + 31, | ||
| 38 | |||
| 39 | ViewportTransform, | ||
| 40 | Viewports, | ||
| 41 | Viewport0, | ||
| 42 | Viewport15 = Viewport0 + 15, | ||
| 43 | |||
| 44 | Scissors, | ||
| 45 | Scissor0, | ||
| 46 | Scissor15 = Scissor0 + 15, | ||
| 47 | |||
| 48 | ColorMaskCommon, | ||
| 49 | ColorMasks, | ||
| 50 | ColorMask0, | ||
| 51 | ColorMask7 = ColorMask0 + 7, | ||
| 52 | |||
| 53 | BlendColor, | ||
| 54 | BlendIndependentEnabled, | ||
| 55 | BlendStates, | ||
| 56 | BlendState0, | ||
| 57 | BlendState7 = BlendState0 + 7, | ||
| 58 | |||
| 59 | Shaders, | ||
| 60 | ClipDistances, | ||
| 61 | |||
| 62 | PolygonModes, | ||
| 63 | PolygonModeFront, | ||
| 64 | PolygonModeBack, | ||
| 65 | |||
| 66 | ColorMask, | ||
| 67 | FrontFace, | ||
| 68 | CullTest, | ||
| 69 | DepthMask, | ||
| 70 | DepthTest, | ||
| 71 | StencilTest, | ||
| 72 | AlphaTest, | ||
| 73 | PrimitiveRestart, | ||
| 74 | PolygonOffset, | ||
| 75 | MultisampleControl, | ||
| 76 | RasterizeEnable, | ||
| 77 | FramebufferSRGB, | ||
| 78 | LogicOp, | ||
| 79 | FragmentClampColor, | ||
| 80 | PointSize, | ||
| 81 | ClipControl, | ||
| 82 | DepthClampEnabled, | ||
| 83 | |||
| 84 | Last | ||
| 85 | }; | ||
| 86 | static_assert(Last <= std::numeric_limits<u8>::max()); | ||
| 87 | |||
| 88 | } // namespace Dirty | ||
| 89 | |||
| 90 | class StateTracker { | ||
| 91 | public: | ||
| 92 | explicit StateTracker(Core::System& system); | ||
| 93 | |||
| 94 | void Initialize(); | ||
| 95 | |||
| 96 | void BindIndexBuffer(GLuint new_index_buffer) { | ||
| 97 | if (index_buffer == new_index_buffer) { | ||
| 98 | return; | ||
| 99 | } | ||
| 100 | index_buffer = new_index_buffer; | ||
| 101 | glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, new_index_buffer); | ||
| 102 | } | ||
| 103 | |||
| 104 | void NotifyScreenDrawVertexArray() { | ||
| 105 | auto& flags = system.GPU().Maxwell3D().dirty.flags; | ||
| 106 | flags[OpenGL::Dirty::VertexFormats] = true; | ||
| 107 | flags[OpenGL::Dirty::VertexFormat0 + 0] = true; | ||
| 108 | flags[OpenGL::Dirty::VertexFormat0 + 1] = true; | ||
| 109 | |||
| 110 | flags[OpenGL::Dirty::VertexBuffers] = true; | ||
| 111 | flags[OpenGL::Dirty::VertexBuffer0] = true; | ||
| 112 | |||
| 113 | flags[OpenGL::Dirty::VertexInstances] = true; | ||
| 114 | flags[OpenGL::Dirty::VertexInstance0 + 0] = true; | ||
| 115 | flags[OpenGL::Dirty::VertexInstance0 + 1] = true; | ||
| 116 | } | ||
| 117 | |||
| 118 | void NotifyPolygonModes() { | ||
| 119 | auto& flags = system.GPU().Maxwell3D().dirty.flags; | ||
| 120 | flags[OpenGL::Dirty::PolygonModes] = true; | ||
| 121 | flags[OpenGL::Dirty::PolygonModeFront] = true; | ||
| 122 | flags[OpenGL::Dirty::PolygonModeBack] = true; | ||
| 123 | } | ||
| 124 | |||
| 125 | void NotifyViewport0() { | ||
| 126 | auto& flags = system.GPU().Maxwell3D().dirty.flags; | ||
| 127 | flags[OpenGL::Dirty::Viewports] = true; | ||
| 128 | flags[OpenGL::Dirty::Viewport0] = true; | ||
| 129 | } | ||
| 130 | |||
| 131 | void NotifyScissor0() { | ||
| 132 | auto& flags = system.GPU().Maxwell3D().dirty.flags; | ||
| 133 | flags[OpenGL::Dirty::Scissors] = true; | ||
| 134 | flags[OpenGL::Dirty::Scissor0] = true; | ||
| 135 | } | ||
| 136 | |||
| 137 | void NotifyColorMask0() { | ||
| 138 | auto& flags = system.GPU().Maxwell3D().dirty.flags; | ||
| 139 | flags[OpenGL::Dirty::ColorMasks] = true; | ||
| 140 | flags[OpenGL::Dirty::ColorMask0] = true; | ||
| 141 | } | ||
| 142 | |||
| 143 | void NotifyBlend0() { | ||
| 144 | auto& flags = system.GPU().Maxwell3D().dirty.flags; | ||
| 145 | flags[OpenGL::Dirty::BlendStates] = true; | ||
| 146 | flags[OpenGL::Dirty::BlendState0] = true; | ||
| 147 | } | ||
| 148 | |||
| 149 | void NotifyFramebuffer() { | ||
| 150 | auto& flags = system.GPU().Maxwell3D().dirty.flags; | ||
| 151 | flags[VideoCommon::Dirty::RenderTargets] = true; | ||
| 152 | } | ||
| 153 | |||
| 154 | void NotifyFrontFace() { | ||
| 155 | auto& flags = system.GPU().Maxwell3D().dirty.flags; | ||
| 156 | flags[OpenGL::Dirty::FrontFace] = true; | ||
| 157 | } | ||
| 158 | |||
| 159 | void NotifyCullTest() { | ||
| 160 | auto& flags = system.GPU().Maxwell3D().dirty.flags; | ||
| 161 | flags[OpenGL::Dirty::CullTest] = true; | ||
| 162 | } | ||
| 163 | |||
| 164 | void NotifyDepthMask() { | ||
| 165 | auto& flags = system.GPU().Maxwell3D().dirty.flags; | ||
| 166 | flags[OpenGL::Dirty::DepthMask] = true; | ||
| 167 | } | ||
| 168 | |||
| 169 | void NotifyDepthTest() { | ||
| 170 | auto& flags = system.GPU().Maxwell3D().dirty.flags; | ||
| 171 | flags[OpenGL::Dirty::DepthTest] = true; | ||
| 172 | } | ||
| 173 | |||
| 174 | void NotifyStencilTest() { | ||
| 175 | auto& flags = system.GPU().Maxwell3D().dirty.flags; | ||
| 176 | flags[OpenGL::Dirty::StencilTest] = true; | ||
| 177 | } | ||
| 178 | |||
| 179 | void NotifyPolygonOffset() { | ||
| 180 | auto& flags = system.GPU().Maxwell3D().dirty.flags; | ||
| 181 | flags[OpenGL::Dirty::PolygonOffset] = true; | ||
| 182 | } | ||
| 183 | |||
| 184 | void NotifyRasterizeEnable() { | ||
| 185 | auto& flags = system.GPU().Maxwell3D().dirty.flags; | ||
| 186 | flags[OpenGL::Dirty::RasterizeEnable] = true; | ||
| 187 | } | ||
| 188 | |||
| 189 | void NotifyFramebufferSRGB() { | ||
| 190 | auto& flags = system.GPU().Maxwell3D().dirty.flags; | ||
| 191 | flags[OpenGL::Dirty::FramebufferSRGB] = true; | ||
| 192 | } | ||
| 193 | |||
| 194 | void NotifyLogicOp() { | ||
| 195 | auto& flags = system.GPU().Maxwell3D().dirty.flags; | ||
| 196 | flags[OpenGL::Dirty::LogicOp] = true; | ||
| 197 | } | ||
| 198 | |||
| 199 | void NotifyClipControl() { | ||
| 200 | auto& flags = system.GPU().Maxwell3D().dirty.flags; | ||
| 201 | flags[OpenGL::Dirty::ClipControl] = true; | ||
| 202 | } | ||
| 203 | |||
| 204 | void NotifyAlphaTest() { | ||
| 205 | auto& flags = system.GPU().Maxwell3D().dirty.flags; | ||
| 206 | flags[OpenGL::Dirty::AlphaTest] = true; | ||
| 207 | } | ||
| 208 | |||
| 209 | private: | ||
| 210 | Core::System& system; | ||
| 211 | |||
| 212 | GLuint index_buffer = 0; | ||
| 213 | }; | ||
| 214 | |||
| 215 | } // namespace OpenGL | ||
diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.cpp b/src/video_core/renderer_opengl/gl_stream_buffer.cpp index 35ba334e4..6ec328c53 100644 --- a/src/video_core/renderer_opengl/gl_stream_buffer.cpp +++ b/src/video_core/renderer_opengl/gl_stream_buffer.cpp | |||
| @@ -7,7 +7,6 @@ | |||
| 7 | #include "common/alignment.h" | 7 | #include "common/alignment.h" |
| 8 | #include "common/assert.h" | 8 | #include "common/assert.h" |
| 9 | #include "common/microprofile.h" | 9 | #include "common/microprofile.h" |
| 10 | #include "video_core/renderer_opengl/gl_state.h" | ||
| 11 | #include "video_core/renderer_opengl/gl_stream_buffer.h" | 10 | #include "video_core/renderer_opengl/gl_stream_buffer.h" |
| 12 | 11 | ||
| 13 | MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning", | 12 | MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning", |
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 5c1ae1418..f424e3000 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp | |||
| @@ -10,7 +10,7 @@ | |||
| 10 | #include "core/core.h" | 10 | #include "core/core.h" |
| 11 | #include "video_core/morton.h" | 11 | #include "video_core/morton.h" |
| 12 | #include "video_core/renderer_opengl/gl_resource_manager.h" | 12 | #include "video_core/renderer_opengl/gl_resource_manager.h" |
| 13 | #include "video_core/renderer_opengl/gl_state.h" | 13 | #include "video_core/renderer_opengl/gl_state_tracker.h" |
| 14 | #include "video_core/renderer_opengl/gl_texture_cache.h" | 14 | #include "video_core/renderer_opengl/gl_texture_cache.h" |
| 15 | #include "video_core/renderer_opengl/utils.h" | 15 | #include "video_core/renderer_opengl/utils.h" |
| 16 | #include "video_core/texture_cache/surface_base.h" | 16 | #include "video_core/texture_cache/surface_base.h" |
| @@ -53,6 +53,7 @@ constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format | |||
| 53 | {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE, false}, // R8UI | 53 | {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE, false}, // R8UI |
| 54 | {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT, false}, // RGBA16F | 54 | {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT, false}, // RGBA16F |
| 55 | {GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT, false}, // RGBA16U | 55 | {GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT, false}, // RGBA16U |
| 56 | {GL_RGBA16_SNORM, GL_RGBA, GL_SHORT, false}, // RGBA16S | ||
| 56 | {GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT, false}, // RGBA16UI | 57 | {GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT, false}, // RGBA16UI |
| 57 | {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV, false}, // R11FG11FB10F | 58 | {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV, false}, // R11FG11FB10F |
| 58 | {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT, false}, // RGBA32UI | 59 | {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT, false}, // RGBA32UI |
| @@ -87,6 +88,7 @@ constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format | |||
| 87 | {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT, false}, // RG32UI | 88 | {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT, false}, // RG32UI |
| 88 | {GL_RGB16F, GL_RGBA, GL_HALF_FLOAT, false}, // RGBX16F | 89 | {GL_RGB16F, GL_RGBA, GL_HALF_FLOAT, false}, // RGBX16F |
| 89 | {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT, false}, // R32UI | 90 | {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT, false}, // R32UI |
| 91 | {GL_R32I, GL_RED_INTEGER, GL_INT, false}, // R32I | ||
| 90 | {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_8X8 | 92 | {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_8X8 |
| 91 | {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_8X5 | 93 | {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_8X5 |
| 92 | {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_5X4 | 94 | {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_5X4 |
| @@ -396,6 +398,7 @@ CachedSurfaceView::CachedSurfaceView(CachedSurface& surface, const ViewParams& p | |||
| 396 | const bool is_proxy) | 398 | const bool is_proxy) |
| 397 | : VideoCommon::ViewBase(params), surface{surface}, is_proxy{is_proxy} { | 399 | : VideoCommon::ViewBase(params), surface{surface}, is_proxy{is_proxy} { |
| 398 | target = GetTextureTarget(params.target); | 400 | target = GetTextureTarget(params.target); |
| 401 | format = GetFormatTuple(surface.GetSurfaceParams().pixel_format).internal_format; | ||
| 399 | if (!is_proxy) { | 402 | if (!is_proxy) { |
| 400 | texture_view = CreateTextureView(); | 403 | texture_view = CreateTextureView(); |
| 401 | } | 404 | } |
| @@ -405,24 +408,36 @@ CachedSurfaceView::CachedSurfaceView(CachedSurface& surface, const ViewParams& p | |||
| 405 | CachedSurfaceView::~CachedSurfaceView() = default; | 408 | CachedSurfaceView::~CachedSurfaceView() = default; |
| 406 | 409 | ||
| 407 | void CachedSurfaceView::Attach(GLenum attachment, GLenum target) const { | 410 | void CachedSurfaceView::Attach(GLenum attachment, GLenum target) const { |
| 408 | ASSERT(params.num_layers == 1 && params.num_levels == 1); | 411 | ASSERT(params.num_levels == 1); |
| 409 | 412 | ||
| 410 | const auto& owner_params = surface.GetSurfaceParams(); | 413 | const GLuint texture = surface.GetTexture(); |
| 414 | if (params.num_layers > 1) { | ||
| 415 | // Layered framebuffer attachments | ||
| 416 | UNIMPLEMENTED_IF(params.base_layer != 0); | ||
| 411 | 417 | ||
| 412 | switch (owner_params.target) { | 418 | switch (params.target) { |
| 419 | case SurfaceTarget::Texture2DArray: | ||
| 420 | glFramebufferTexture(target, attachment, texture, params.base_level); | ||
| 421 | break; | ||
| 422 | default: | ||
| 423 | UNIMPLEMENTED(); | ||
| 424 | } | ||
| 425 | return; | ||
| 426 | } | ||
| 427 | |||
| 428 | const GLenum view_target = surface.GetTarget(); | ||
| 429 | switch (surface.GetSurfaceParams().target) { | ||
| 413 | case SurfaceTarget::Texture1D: | 430 | case SurfaceTarget::Texture1D: |
| 414 | glFramebufferTexture1D(target, attachment, surface.GetTarget(), surface.GetTexture(), | 431 | glFramebufferTexture1D(target, attachment, view_target, texture, params.base_level); |
| 415 | params.base_level); | ||
| 416 | break; | 432 | break; |
| 417 | case SurfaceTarget::Texture2D: | 433 | case SurfaceTarget::Texture2D: |
| 418 | glFramebufferTexture2D(target, attachment, surface.GetTarget(), surface.GetTexture(), | 434 | glFramebufferTexture2D(target, attachment, view_target, texture, params.base_level); |
| 419 | params.base_level); | ||
| 420 | break; | 435 | break; |
| 421 | case SurfaceTarget::Texture1DArray: | 436 | case SurfaceTarget::Texture1DArray: |
| 422 | case SurfaceTarget::Texture2DArray: | 437 | case SurfaceTarget::Texture2DArray: |
| 423 | case SurfaceTarget::TextureCubemap: | 438 | case SurfaceTarget::TextureCubemap: |
| 424 | case SurfaceTarget::TextureCubeArray: | 439 | case SurfaceTarget::TextureCubeArray: |
| 425 | glFramebufferTextureLayer(target, attachment, surface.GetTexture(), params.base_level, | 440 | glFramebufferTextureLayer(target, attachment, texture, params.base_level, |
| 426 | params.base_layer); | 441 | params.base_layer); |
| 427 | break; | 442 | break; |
| 428 | default: | 443 | default: |
| @@ -454,25 +469,20 @@ void CachedSurfaceView::ApplySwizzle(SwizzleSource x_source, SwizzleSource y_sou | |||
| 454 | } | 469 | } |
| 455 | 470 | ||
| 456 | OGLTextureView CachedSurfaceView::CreateTextureView() const { | 471 | OGLTextureView CachedSurfaceView::CreateTextureView() const { |
| 457 | const auto& owner_params = surface.GetSurfaceParams(); | ||
| 458 | OGLTextureView texture_view; | 472 | OGLTextureView texture_view; |
| 459 | texture_view.Create(); | 473 | texture_view.Create(); |
| 460 | 474 | ||
| 461 | const GLuint handle{texture_view.handle}; | 475 | glTextureView(texture_view.handle, target, surface.texture.handle, format, params.base_level, |
| 462 | const FormatTuple& tuple{GetFormatTuple(owner_params.pixel_format)}; | ||
| 463 | |||
| 464 | glTextureView(handle, target, surface.texture.handle, tuple.internal_format, params.base_level, | ||
| 465 | params.num_levels, params.base_layer, params.num_layers); | 476 | params.num_levels, params.base_layer, params.num_layers); |
| 466 | 477 | ApplyTextureDefaults(surface.GetSurfaceParams(), texture_view.handle); | |
| 467 | ApplyTextureDefaults(owner_params, handle); | ||
| 468 | 478 | ||
| 469 | return texture_view; | 479 | return texture_view; |
| 470 | } | 480 | } |
| 471 | 481 | ||
| 472 | TextureCacheOpenGL::TextureCacheOpenGL(Core::System& system, | 482 | TextureCacheOpenGL::TextureCacheOpenGL(Core::System& system, |
| 473 | VideoCore::RasterizerInterface& rasterizer, | 483 | VideoCore::RasterizerInterface& rasterizer, |
| 474 | const Device& device) | 484 | const Device& device, StateTracker& state_tracker) |
| 475 | : TextureCacheBase{system, rasterizer} { | 485 | : TextureCacheBase{system, rasterizer}, state_tracker{state_tracker} { |
| 476 | src_framebuffer.Create(); | 486 | src_framebuffer.Create(); |
| 477 | dst_framebuffer.Create(); | 487 | dst_framebuffer.Create(); |
| 478 | } | 488 | } |
| @@ -506,25 +516,26 @@ void TextureCacheOpenGL::ImageBlit(View& src_view, View& dst_view, | |||
| 506 | const Tegra::Engines::Fermi2D::Config& copy_config) { | 516 | const Tegra::Engines::Fermi2D::Config& copy_config) { |
| 507 | const auto& src_params{src_view->GetSurfaceParams()}; | 517 | const auto& src_params{src_view->GetSurfaceParams()}; |
| 508 | const auto& dst_params{dst_view->GetSurfaceParams()}; | 518 | const auto& dst_params{dst_view->GetSurfaceParams()}; |
| 519 | UNIMPLEMENTED_IF(src_params.target == SurfaceTarget::Texture3D); | ||
| 520 | UNIMPLEMENTED_IF(dst_params.target == SurfaceTarget::Texture3D); | ||
| 509 | 521 | ||
| 510 | OpenGLState prev_state{OpenGLState::GetCurState()}; | 522 | state_tracker.NotifyScissor0(); |
| 511 | SCOPE_EXIT({ | 523 | state_tracker.NotifyFramebuffer(); |
| 512 | prev_state.AllDirty(); | 524 | state_tracker.NotifyRasterizeEnable(); |
| 513 | prev_state.Apply(); | 525 | state_tracker.NotifyFramebufferSRGB(); |
| 514 | }); | ||
| 515 | |||
| 516 | OpenGLState state; | ||
| 517 | state.draw.read_framebuffer = src_framebuffer.handle; | ||
| 518 | state.draw.draw_framebuffer = dst_framebuffer.handle; | ||
| 519 | state.framebuffer_srgb.enabled = dst_params.srgb_conversion; | ||
| 520 | state.AllDirty(); | ||
| 521 | state.Apply(); | ||
| 522 | 526 | ||
| 523 | u32 buffers{}; | 527 | if (dst_params.srgb_conversion) { |
| 528 | glEnable(GL_FRAMEBUFFER_SRGB); | ||
| 529 | } else { | ||
| 530 | glDisable(GL_FRAMEBUFFER_SRGB); | ||
| 531 | } | ||
| 532 | glDisable(GL_RASTERIZER_DISCARD); | ||
| 533 | glDisablei(GL_SCISSOR_TEST, 0); | ||
| 524 | 534 | ||
| 525 | UNIMPLEMENTED_IF(src_params.target == SurfaceTarget::Texture3D); | 535 | glBindFramebuffer(GL_READ_FRAMEBUFFER, src_framebuffer.handle); |
| 526 | UNIMPLEMENTED_IF(dst_params.target == SurfaceTarget::Texture3D); | 536 | glBindFramebuffer(GL_DRAW_FRAMEBUFFER, dst_framebuffer.handle); |
| 527 | 537 | ||
| 538 | GLenum buffers = 0; | ||
| 528 | if (src_params.type == SurfaceType::ColorTexture) { | 539 | if (src_params.type == SurfaceType::ColorTexture) { |
| 529 | src_view->Attach(GL_COLOR_ATTACHMENT0, GL_READ_FRAMEBUFFER); | 540 | src_view->Attach(GL_COLOR_ATTACHMENT0, GL_READ_FRAMEBUFFER); |
| 530 | glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, | 541 | glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, |
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index 8e13ab38b..6658c6ffd 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h | |||
| @@ -27,6 +27,7 @@ using VideoCommon::ViewParams; | |||
| 27 | class CachedSurfaceView; | 27 | class CachedSurfaceView; |
| 28 | class CachedSurface; | 28 | class CachedSurface; |
| 29 | class TextureCacheOpenGL; | 29 | class TextureCacheOpenGL; |
| 30 | class StateTracker; | ||
| 30 | 31 | ||
| 31 | using Surface = std::shared_ptr<CachedSurface>; | 32 | using Surface = std::shared_ptr<CachedSurface>; |
| 32 | using View = std::shared_ptr<CachedSurfaceView>; | 33 | using View = std::shared_ptr<CachedSurfaceView>; |
| @@ -96,6 +97,10 @@ public: | |||
| 96 | return texture_view.handle; | 97 | return texture_view.handle; |
| 97 | } | 98 | } |
| 98 | 99 | ||
| 100 | GLenum GetFormat() const { | ||
| 101 | return format; | ||
| 102 | } | ||
| 103 | |||
| 99 | const SurfaceParams& GetSurfaceParams() const { | 104 | const SurfaceParams& GetSurfaceParams() const { |
| 100 | return surface.GetSurfaceParams(); | 105 | return surface.GetSurfaceParams(); |
| 101 | } | 106 | } |
| @@ -113,6 +118,7 @@ private: | |||
| 113 | 118 | ||
| 114 | CachedSurface& surface; | 119 | CachedSurface& surface; |
| 115 | GLenum target{}; | 120 | GLenum target{}; |
| 121 | GLenum format{}; | ||
| 116 | 122 | ||
| 117 | OGLTextureView texture_view; | 123 | OGLTextureView texture_view; |
| 118 | u32 swizzle{}; | 124 | u32 swizzle{}; |
| @@ -122,7 +128,7 @@ private: | |||
| 122 | class TextureCacheOpenGL final : public TextureCacheBase { | 128 | class TextureCacheOpenGL final : public TextureCacheBase { |
| 123 | public: | 129 | public: |
| 124 | explicit TextureCacheOpenGL(Core::System& system, VideoCore::RasterizerInterface& rasterizer, | 130 | explicit TextureCacheOpenGL(Core::System& system, VideoCore::RasterizerInterface& rasterizer, |
| 125 | const Device& device); | 131 | const Device& device, StateTracker& state_tracker); |
| 126 | ~TextureCacheOpenGL(); | 132 | ~TextureCacheOpenGL(); |
| 127 | 133 | ||
| 128 | protected: | 134 | protected: |
| @@ -139,6 +145,8 @@ protected: | |||
| 139 | private: | 145 | private: |
| 140 | GLuint FetchPBO(std::size_t buffer_size); | 146 | GLuint FetchPBO(std::size_t buffer_size); |
| 141 | 147 | ||
| 148 | StateTracker& state_tracker; | ||
| 149 | |||
| 142 | OGLFramebuffer src_framebuffer; | 150 | OGLFramebuffer src_framebuffer; |
| 143 | OGLFramebuffer dst_framebuffer; | 151 | OGLFramebuffer dst_framebuffer; |
| 144 | std::unordered_map<u32, OGLBuffer> copy_pbo_cache; | 152 | std::unordered_map<u32, OGLBuffer> copy_pbo_cache; |
diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h index 7ed505628..89f0e04ef 100644 --- a/src/video_core/renderer_opengl/maxwell_to_gl.h +++ b/src/video_core/renderer_opengl/maxwell_to_gl.h | |||
| @@ -92,8 +92,32 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) { | |||
| 92 | } | 92 | } |
| 93 | case Maxwell::VertexAttribute::Type::UnsignedScaled: | 93 | case Maxwell::VertexAttribute::Type::UnsignedScaled: |
| 94 | switch (attrib.size) { | 94 | switch (attrib.size) { |
| 95 | case Maxwell::VertexAttribute::Size::Size_8: | ||
| 95 | case Maxwell::VertexAttribute::Size::Size_8_8: | 96 | case Maxwell::VertexAttribute::Size::Size_8_8: |
| 97 | case Maxwell::VertexAttribute::Size::Size_8_8_8: | ||
| 98 | case Maxwell::VertexAttribute::Size::Size_8_8_8_8: | ||
| 96 | return GL_UNSIGNED_BYTE; | 99 | return GL_UNSIGNED_BYTE; |
| 100 | case Maxwell::VertexAttribute::Size::Size_16: | ||
| 101 | case Maxwell::VertexAttribute::Size::Size_16_16: | ||
| 102 | case Maxwell::VertexAttribute::Size::Size_16_16_16: | ||
| 103 | case Maxwell::VertexAttribute::Size::Size_16_16_16_16: | ||
| 104 | return GL_UNSIGNED_SHORT; | ||
| 105 | default: | ||
| 106 | LOG_ERROR(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString()); | ||
| 107 | return {}; | ||
| 108 | } | ||
| 109 | case Maxwell::VertexAttribute::Type::SignedScaled: | ||
| 110 | switch (attrib.size) { | ||
| 111 | case Maxwell::VertexAttribute::Size::Size_8: | ||
| 112 | case Maxwell::VertexAttribute::Size::Size_8_8: | ||
| 113 | case Maxwell::VertexAttribute::Size::Size_8_8_8: | ||
| 114 | case Maxwell::VertexAttribute::Size::Size_8_8_8_8: | ||
| 115 | return GL_BYTE; | ||
| 116 | case Maxwell::VertexAttribute::Size::Size_16: | ||
| 117 | case Maxwell::VertexAttribute::Size::Size_16_16: | ||
| 118 | case Maxwell::VertexAttribute::Size::Size_16_16_16: | ||
| 119 | case Maxwell::VertexAttribute::Size::Size_16_16_16_16: | ||
| 120 | return GL_SHORT; | ||
| 97 | default: | 121 | default: |
| 98 | LOG_ERROR(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString()); | 122 | LOG_ERROR(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString()); |
| 99 | return {}; | 123 | return {}; |
| @@ -401,24 +425,24 @@ inline GLenum StencilOp(Maxwell::StencilOp stencil) { | |||
| 401 | return GL_KEEP; | 425 | return GL_KEEP; |
| 402 | } | 426 | } |
| 403 | 427 | ||
| 404 | inline GLenum FrontFace(Maxwell::Cull::FrontFace front_face) { | 428 | inline GLenum FrontFace(Maxwell::FrontFace front_face) { |
| 405 | switch (front_face) { | 429 | switch (front_face) { |
| 406 | case Maxwell::Cull::FrontFace::ClockWise: | 430 | case Maxwell::FrontFace::ClockWise: |
| 407 | return GL_CW; | 431 | return GL_CW; |
| 408 | case Maxwell::Cull::FrontFace::CounterClockWise: | 432 | case Maxwell::FrontFace::CounterClockWise: |
| 409 | return GL_CCW; | 433 | return GL_CCW; |
| 410 | } | 434 | } |
| 411 | LOG_ERROR(Render_OpenGL, "Unimplemented front face cull={}", static_cast<u32>(front_face)); | 435 | LOG_ERROR(Render_OpenGL, "Unimplemented front face cull={}", static_cast<u32>(front_face)); |
| 412 | return GL_CCW; | 436 | return GL_CCW; |
| 413 | } | 437 | } |
| 414 | 438 | ||
| 415 | inline GLenum CullFace(Maxwell::Cull::CullFace cull_face) { | 439 | inline GLenum CullFace(Maxwell::CullFace cull_face) { |
| 416 | switch (cull_face) { | 440 | switch (cull_face) { |
| 417 | case Maxwell::Cull::CullFace::Front: | 441 | case Maxwell::CullFace::Front: |
| 418 | return GL_FRONT; | 442 | return GL_FRONT; |
| 419 | case Maxwell::Cull::CullFace::Back: | 443 | case Maxwell::CullFace::Back: |
| 420 | return GL_BACK; | 444 | return GL_BACK; |
| 421 | case Maxwell::Cull::CullFace::FrontAndBack: | 445 | case Maxwell::CullFace::FrontAndBack: |
| 422 | return GL_FRONT_AND_BACK; | 446 | return GL_FRONT_AND_BACK; |
| 423 | } | 447 | } |
| 424 | LOG_ERROR(Render_OpenGL, "Unimplemented cull face={}", static_cast<u32>(cull_face)); | 448 | LOG_ERROR(Render_OpenGL, "Unimplemented cull face={}", static_cast<u32>(cull_face)); |
| @@ -464,5 +488,18 @@ inline GLenum LogicOp(Maxwell::LogicOperation operation) { | |||
| 464 | return GL_COPY; | 488 | return GL_COPY; |
| 465 | } | 489 | } |
| 466 | 490 | ||
| 491 | inline GLenum PolygonMode(Maxwell::PolygonMode polygon_mode) { | ||
| 492 | switch (polygon_mode) { | ||
| 493 | case Maxwell::PolygonMode::Point: | ||
| 494 | return GL_POINT; | ||
| 495 | case Maxwell::PolygonMode::Line: | ||
| 496 | return GL_LINE; | ||
| 497 | case Maxwell::PolygonMode::Fill: | ||
| 498 | return GL_FILL; | ||
| 499 | } | ||
| 500 | UNREACHABLE_MSG("Invalid polygon mode={}", static_cast<int>(polygon_mode)); | ||
| 501 | return GL_FILL; | ||
| 502 | } | ||
| 503 | |||
| 467 | } // namespace MaxwellToGL | 504 | } // namespace MaxwellToGL |
| 468 | } // namespace OpenGL | 505 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index bba16afaf..fca5e3ec0 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp | |||
| @@ -5,30 +5,54 @@ | |||
| 5 | #include <algorithm> | 5 | #include <algorithm> |
| 6 | #include <cstddef> | 6 | #include <cstddef> |
| 7 | #include <cstdlib> | 7 | #include <cstdlib> |
| 8 | #include <cstring> | ||
| 8 | #include <memory> | 9 | #include <memory> |
| 10 | |||
| 9 | #include <glad/glad.h> | 11 | #include <glad/glad.h> |
| 12 | |||
| 10 | #include "common/assert.h" | 13 | #include "common/assert.h" |
| 11 | #include "common/logging/log.h" | 14 | #include "common/logging/log.h" |
| 15 | #include "common/microprofile.h" | ||
| 12 | #include "common/telemetry.h" | 16 | #include "common/telemetry.h" |
| 13 | #include "core/core.h" | 17 | #include "core/core.h" |
| 14 | #include "core/core_timing.h" | 18 | #include "core/core_timing.h" |
| 15 | #include "core/frontend/emu_window.h" | 19 | #include "core/frontend/emu_window.h" |
| 16 | #include "core/frontend/scope_acquire_window_context.h" | ||
| 17 | #include "core/memory.h" | 20 | #include "core/memory.h" |
| 18 | #include "core/perf_stats.h" | 21 | #include "core/perf_stats.h" |
| 19 | #include "core/settings.h" | 22 | #include "core/settings.h" |
| 20 | #include "core/telemetry_session.h" | 23 | #include "core/telemetry_session.h" |
| 21 | #include "video_core/morton.h" | 24 | #include "video_core/morton.h" |
| 22 | #include "video_core/renderer_opengl/gl_rasterizer.h" | 25 | #include "video_core/renderer_opengl/gl_rasterizer.h" |
| 26 | #include "video_core/renderer_opengl/gl_shader_manager.h" | ||
| 23 | #include "video_core/renderer_opengl/renderer_opengl.h" | 27 | #include "video_core/renderer_opengl/renderer_opengl.h" |
| 24 | 28 | ||
| 25 | namespace OpenGL { | 29 | namespace OpenGL { |
| 26 | 30 | ||
| 27 | namespace { | 31 | namespace { |
| 28 | 32 | ||
| 29 | constexpr char vertex_shader[] = R"( | 33 | // If the size of this is too small, it ends up creating a soft cap on FPS as the renderer will have |
| 34 | // to wait on available presentation frames. | ||
| 35 | constexpr std::size_t SWAP_CHAIN_SIZE = 3; | ||
| 36 | |||
| 37 | struct Frame { | ||
| 38 | u32 width{}; /// Width of the frame (to detect resize) | ||
| 39 | u32 height{}; /// Height of the frame | ||
| 40 | bool color_reloaded{}; /// Texture attachment was recreated (ie: resized) | ||
| 41 | OpenGL::OGLRenderbuffer color{}; /// Buffer shared between the render/present FBO | ||
| 42 | OpenGL::OGLFramebuffer render{}; /// FBO created on the render thread | ||
| 43 | OpenGL::OGLFramebuffer present{}; /// FBO created on the present thread | ||
| 44 | GLsync render_fence{}; /// Fence created on the render thread | ||
| 45 | GLsync present_fence{}; /// Fence created on the presentation thread | ||
| 46 | bool is_srgb{}; /// Framebuffer is sRGB or RGB | ||
| 47 | }; | ||
| 48 | |||
| 49 | constexpr char VERTEX_SHADER[] = R"( | ||
| 30 | #version 430 core | 50 | #version 430 core |
| 31 | 51 | ||
| 52 | out gl_PerVertex { | ||
| 53 | vec4 gl_Position; | ||
| 54 | }; | ||
| 55 | |||
| 32 | layout (location = 0) in vec2 vert_position; | 56 | layout (location = 0) in vec2 vert_position; |
| 33 | layout (location = 1) in vec2 vert_tex_coord; | 57 | layout (location = 1) in vec2 vert_tex_coord; |
| 34 | layout (location = 0) out vec2 frag_tex_coord; | 58 | layout (location = 0) out vec2 frag_tex_coord; |
| @@ -49,7 +73,7 @@ void main() { | |||
| 49 | } | 73 | } |
| 50 | )"; | 74 | )"; |
| 51 | 75 | ||
| 52 | constexpr char fragment_shader[] = R"( | 76 | constexpr char FRAGMENT_SHADER[] = R"( |
| 53 | #version 430 core | 77 | #version 430 core |
| 54 | 78 | ||
| 55 | layout (location = 0) in vec2 frag_tex_coord; | 79 | layout (location = 0) in vec2 frag_tex_coord; |
| @@ -58,7 +82,7 @@ layout (location = 0) out vec4 color; | |||
| 58 | layout (binding = 0) uniform sampler2D color_texture; | 82 | layout (binding = 0) uniform sampler2D color_texture; |
| 59 | 83 | ||
| 60 | void main() { | 84 | void main() { |
| 61 | color = texture(color_texture, frag_tex_coord); | 85 | color = vec4(texture(color_texture, frag_tex_coord).rgb, 1.0f); |
| 62 | } | 86 | } |
| 63 | )"; | 87 | )"; |
| 64 | 88 | ||
| @@ -67,13 +91,31 @@ constexpr GLint TexCoordLocation = 1; | |||
| 67 | constexpr GLint ModelViewMatrixLocation = 0; | 91 | constexpr GLint ModelViewMatrixLocation = 0; |
| 68 | 92 | ||
| 69 | struct ScreenRectVertex { | 93 | struct ScreenRectVertex { |
| 70 | constexpr ScreenRectVertex(GLfloat x, GLfloat y, GLfloat u, GLfloat v) | 94 | constexpr ScreenRectVertex(u32 x, u32 y, GLfloat u, GLfloat v) |
| 71 | : position{{x, y}}, tex_coord{{u, v}} {} | 95 | : position{{static_cast<GLfloat>(x), static_cast<GLfloat>(y)}}, tex_coord{{u, v}} {} |
| 72 | 96 | ||
| 73 | std::array<GLfloat, 2> position; | 97 | std::array<GLfloat, 2> position; |
| 74 | std::array<GLfloat, 2> tex_coord; | 98 | std::array<GLfloat, 2> tex_coord; |
| 75 | }; | 99 | }; |
| 76 | 100 | ||
| 101 | /// Returns true if any debug tool is attached | ||
| 102 | bool HasDebugTool() { | ||
| 103 | const bool nsight = std::getenv("NVTX_INJECTION64_PATH") || std::getenv("NSIGHT_LAUNCHED"); | ||
| 104 | if (nsight) { | ||
| 105 | return true; | ||
| 106 | } | ||
| 107 | |||
| 108 | GLint num_extensions; | ||
| 109 | glGetIntegerv(GL_NUM_EXTENSIONS, &num_extensions); | ||
| 110 | for (GLuint index = 0; index < static_cast<GLuint>(num_extensions); ++index) { | ||
| 111 | const auto name = reinterpret_cast<const char*>(glGetStringi(GL_EXTENSIONS, index)); | ||
| 112 | if (!std::strcmp(name, "GL_EXT_debug_tool")) { | ||
| 113 | return true; | ||
| 114 | } | ||
| 115 | } | ||
| 116 | return false; | ||
| 117 | } | ||
| 118 | |||
| 77 | /** | 119 | /** |
| 78 | * Defines a 1:1 pixel ortographic projection matrix with (0,0) on the top-left | 120 | * Defines a 1:1 pixel ortographic projection matrix with (0,0) on the top-left |
| 79 | * corner and (width, height) on the lower-bottom. | 121 | * corner and (width, height) on the lower-bottom. |
| @@ -157,22 +199,229 @@ void APIENTRY DebugHandler(GLenum source, GLenum type, GLuint id, GLenum severit | |||
| 157 | 199 | ||
| 158 | } // Anonymous namespace | 200 | } // Anonymous namespace |
| 159 | 201 | ||
| 202 | /** | ||
| 203 | * For smooth Vsync rendering, we want to always present the latest frame that the core generates, | ||
| 204 | * but also make sure that rendering happens at the pace that the frontend dictates. This is a | ||
| 205 | * helper class that the renderer uses to sync frames between the render thread and the presentation | ||
| 206 | * thread | ||
| 207 | */ | ||
| 208 | class FrameMailbox { | ||
| 209 | public: | ||
| 210 | std::mutex swap_chain_lock; | ||
| 211 | std::condition_variable present_cv; | ||
| 212 | std::array<Frame, SWAP_CHAIN_SIZE> swap_chain{}; | ||
| 213 | std::queue<Frame*> free_queue; | ||
| 214 | std::deque<Frame*> present_queue; | ||
| 215 | Frame* previous_frame{}; | ||
| 216 | |||
| 217 | FrameMailbox() : has_debug_tool{HasDebugTool()} { | ||
| 218 | for (auto& frame : swap_chain) { | ||
| 219 | free_queue.push(&frame); | ||
| 220 | } | ||
| 221 | } | ||
| 222 | |||
| 223 | ~FrameMailbox() { | ||
| 224 | // lock the mutex and clear out the present and free_queues and notify any people who are | ||
| 225 | // blocked to prevent deadlock on shutdown | ||
| 226 | std::scoped_lock lock{swap_chain_lock}; | ||
| 227 | std::queue<Frame*>().swap(free_queue); | ||
| 228 | present_queue.clear(); | ||
| 229 | present_cv.notify_all(); | ||
| 230 | } | ||
| 231 | |||
| 232 | void ReloadPresentFrame(Frame* frame, u32 height, u32 width) { | ||
| 233 | frame->present.Release(); | ||
| 234 | frame->present.Create(); | ||
| 235 | GLint previous_draw_fbo{}; | ||
| 236 | glGetIntegerv(GL_DRAW_FRAMEBUFFER_BINDING, &previous_draw_fbo); | ||
| 237 | glBindFramebuffer(GL_FRAMEBUFFER, frame->present.handle); | ||
| 238 | glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER, | ||
| 239 | frame->color.handle); | ||
| 240 | if (glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) { | ||
| 241 | LOG_CRITICAL(Render_OpenGL, "Failed to recreate present FBO!"); | ||
| 242 | } | ||
| 243 | glBindFramebuffer(GL_DRAW_FRAMEBUFFER, previous_draw_fbo); | ||
| 244 | frame->color_reloaded = false; | ||
| 245 | } | ||
| 246 | |||
| 247 | void ReloadRenderFrame(Frame* frame, u32 width, u32 height) { | ||
| 248 | // Recreate the color texture attachment | ||
| 249 | frame->color.Release(); | ||
| 250 | frame->color.Create(); | ||
| 251 | const GLenum internal_format = frame->is_srgb ? GL_SRGB8 : GL_RGB8; | ||
| 252 | glNamedRenderbufferStorage(frame->color.handle, internal_format, width, height); | ||
| 253 | |||
| 254 | // Recreate the FBO for the render target | ||
| 255 | frame->render.Release(); | ||
| 256 | frame->render.Create(); | ||
| 257 | glBindFramebuffer(GL_FRAMEBUFFER, frame->render.handle); | ||
| 258 | glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER, | ||
| 259 | frame->color.handle); | ||
| 260 | if (glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) { | ||
| 261 | LOG_CRITICAL(Render_OpenGL, "Failed to recreate render FBO!"); | ||
| 262 | } | ||
| 263 | |||
| 264 | frame->width = width; | ||
| 265 | frame->height = height; | ||
| 266 | frame->color_reloaded = true; | ||
| 267 | } | ||
| 268 | |||
| 269 | Frame* GetRenderFrame() { | ||
| 270 | std::unique_lock lock{swap_chain_lock}; | ||
| 271 | |||
| 272 | // If theres no free frames, we will reuse the oldest render frame | ||
| 273 | if (free_queue.empty()) { | ||
| 274 | auto frame = present_queue.back(); | ||
| 275 | present_queue.pop_back(); | ||
| 276 | return frame; | ||
| 277 | } | ||
| 278 | |||
| 279 | Frame* frame = free_queue.front(); | ||
| 280 | free_queue.pop(); | ||
| 281 | return frame; | ||
| 282 | } | ||
| 283 | |||
| 284 | void ReleaseRenderFrame(Frame* frame) { | ||
| 285 | std::unique_lock lock{swap_chain_lock}; | ||
| 286 | present_queue.push_front(frame); | ||
| 287 | present_cv.notify_one(); | ||
| 288 | |||
| 289 | DebugNotifyNextFrame(); | ||
| 290 | } | ||
| 291 | |||
| 292 | Frame* TryGetPresentFrame(int timeout_ms) { | ||
| 293 | DebugWaitForNextFrame(); | ||
| 294 | |||
| 295 | std::unique_lock lock{swap_chain_lock}; | ||
| 296 | // wait for new entries in the present_queue | ||
| 297 | present_cv.wait_for(lock, std::chrono::milliseconds(timeout_ms), | ||
| 298 | [&] { return !present_queue.empty(); }); | ||
| 299 | if (present_queue.empty()) { | ||
| 300 | // timed out waiting for a frame to draw so return the previous frame | ||
| 301 | return previous_frame; | ||
| 302 | } | ||
| 303 | |||
| 304 | // free the previous frame and add it back to the free queue | ||
| 305 | if (previous_frame) { | ||
| 306 | free_queue.push(previous_frame); | ||
| 307 | } | ||
| 308 | |||
| 309 | // the newest entries are pushed to the front of the queue | ||
| 310 | Frame* frame = present_queue.front(); | ||
| 311 | present_queue.pop_front(); | ||
| 312 | // remove all old entries from the present queue and move them back to the free_queue | ||
| 313 | for (auto f : present_queue) { | ||
| 314 | free_queue.push(f); | ||
| 315 | } | ||
| 316 | present_queue.clear(); | ||
| 317 | previous_frame = frame; | ||
| 318 | return frame; | ||
| 319 | } | ||
| 320 | |||
| 321 | private: | ||
| 322 | std::mutex debug_synch_mutex; | ||
| 323 | std::condition_variable debug_synch_condition; | ||
| 324 | std::atomic_int frame_for_debug{}; | ||
| 325 | const bool has_debug_tool; // When true, using a GPU debugger, so keep frames in lock-step | ||
| 326 | |||
| 327 | /// Signal that a new frame is available (called from GPU thread) | ||
| 328 | void DebugNotifyNextFrame() { | ||
| 329 | if (!has_debug_tool) { | ||
| 330 | return; | ||
| 331 | } | ||
| 332 | frame_for_debug++; | ||
| 333 | std::lock_guard lock{debug_synch_mutex}; | ||
| 334 | debug_synch_condition.notify_one(); | ||
| 335 | } | ||
| 336 | |||
| 337 | /// Wait for a new frame to be available (called from presentation thread) | ||
| 338 | void DebugWaitForNextFrame() { | ||
| 339 | if (!has_debug_tool) { | ||
| 340 | return; | ||
| 341 | } | ||
| 342 | const int last_frame = frame_for_debug; | ||
| 343 | std::unique_lock lock{debug_synch_mutex}; | ||
| 344 | debug_synch_condition.wait(lock, | ||
| 345 | [this, last_frame] { return frame_for_debug > last_frame; }); | ||
| 346 | } | ||
| 347 | }; | ||
| 348 | |||
| 160 | RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system) | 349 | RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system) |
| 161 | : VideoCore::RendererBase{emu_window}, emu_window{emu_window}, system{system} {} | 350 | : VideoCore::RendererBase{emu_window}, emu_window{emu_window}, system{system}, |
| 351 | frame_mailbox{std::make_unique<FrameMailbox>()} {} | ||
| 162 | 352 | ||
| 163 | RendererOpenGL::~RendererOpenGL() = default; | 353 | RendererOpenGL::~RendererOpenGL() = default; |
| 164 | 354 | ||
| 355 | MICROPROFILE_DEFINE(OpenGL_RenderFrame, "OpenGL", "Render Frame", MP_RGB(128, 128, 64)); | ||
| 356 | MICROPROFILE_DEFINE(OpenGL_WaitPresent, "OpenGL", "Wait For Present", MP_RGB(128, 128, 128)); | ||
| 357 | |||
| 165 | void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { | 358 | void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { |
| 166 | // Maintain the rasterizer's state as a priority | 359 | render_window.PollEvents(); |
| 167 | OpenGLState prev_state = OpenGLState::GetCurState(); | 360 | |
| 168 | state.AllDirty(); | 361 | if (!framebuffer) { |
| 169 | state.Apply(); | 362 | return; |
| 363 | } | ||
| 364 | |||
| 365 | PrepareRendertarget(framebuffer); | ||
| 366 | RenderScreenshot(); | ||
| 367 | |||
| 368 | Frame* frame; | ||
| 369 | { | ||
| 370 | MICROPROFILE_SCOPE(OpenGL_WaitPresent); | ||
| 371 | |||
| 372 | frame = frame_mailbox->GetRenderFrame(); | ||
| 373 | |||
| 374 | // Clean up sync objects before drawing | ||
| 375 | |||
| 376 | // INTEL driver workaround. We can't delete the previous render sync object until we are | ||
| 377 | // sure that the presentation is done | ||
| 378 | if (frame->present_fence) { | ||
| 379 | glClientWaitSync(frame->present_fence, 0, GL_TIMEOUT_IGNORED); | ||
| 380 | } | ||
| 381 | |||
| 382 | // delete the draw fence if the frame wasn't presented | ||
| 383 | if (frame->render_fence) { | ||
| 384 | glDeleteSync(frame->render_fence); | ||
| 385 | frame->render_fence = 0; | ||
| 386 | } | ||
| 387 | |||
| 388 | // wait for the presentation to be done | ||
| 389 | if (frame->present_fence) { | ||
| 390 | glWaitSync(frame->present_fence, 0, GL_TIMEOUT_IGNORED); | ||
| 391 | glDeleteSync(frame->present_fence); | ||
| 392 | frame->present_fence = 0; | ||
| 393 | } | ||
| 394 | } | ||
| 170 | 395 | ||
| 396 | { | ||
| 397 | MICROPROFILE_SCOPE(OpenGL_RenderFrame); | ||
| 398 | const auto& layout = render_window.GetFramebufferLayout(); | ||
| 399 | |||
| 400 | // Recreate the frame if the size of the window has changed | ||
| 401 | if (layout.width != frame->width || layout.height != frame->height || | ||
| 402 | screen_info.display_srgb != frame->is_srgb) { | ||
| 403 | LOG_DEBUG(Render_OpenGL, "Reloading render frame"); | ||
| 404 | frame->is_srgb = screen_info.display_srgb; | ||
| 405 | frame_mailbox->ReloadRenderFrame(frame, layout.width, layout.height); | ||
| 406 | } | ||
| 407 | glBindFramebuffer(GL_DRAW_FRAMEBUFFER, frame->render.handle); | ||
| 408 | DrawScreen(layout); | ||
| 409 | // Create a fence for the frontend to wait on and swap this frame to OffTex | ||
| 410 | frame->render_fence = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); | ||
| 411 | glFlush(); | ||
| 412 | frame_mailbox->ReleaseRenderFrame(frame); | ||
| 413 | m_current_frame++; | ||
| 414 | rasterizer->TickFrame(); | ||
| 415 | } | ||
| 416 | } | ||
| 417 | |||
| 418 | void RendererOpenGL::PrepareRendertarget(const Tegra::FramebufferConfig* framebuffer) { | ||
| 171 | if (framebuffer) { | 419 | if (framebuffer) { |
| 172 | // If framebuffer is provided, reload it from memory to a texture | 420 | // If framebuffer is provided, reload it from memory to a texture |
| 173 | if (screen_info.texture.width != static_cast<GLsizei>(framebuffer->width) || | 421 | if (screen_info.texture.width != static_cast<GLsizei>(framebuffer->width) || |
| 174 | screen_info.texture.height != static_cast<GLsizei>(framebuffer->height) || | 422 | screen_info.texture.height != static_cast<GLsizei>(framebuffer->height) || |
| 175 | screen_info.texture.pixel_format != framebuffer->pixel_format) { | 423 | screen_info.texture.pixel_format != framebuffer->pixel_format || |
| 424 | gl_framebuffer_data.empty()) { | ||
| 176 | // Reallocate texture if the framebuffer size has changed. | 425 | // Reallocate texture if the framebuffer size has changed. |
| 177 | // This is expected to not happen very often and hence should not be a | 426 | // This is expected to not happen very often and hence should not be a |
| 178 | // performance problem. | 427 | // performance problem. |
| @@ -181,22 +430,7 @@ void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { | |||
| 181 | 430 | ||
| 182 | // Load the framebuffer from memory, draw it to the screen, and swap buffers | 431 | // Load the framebuffer from memory, draw it to the screen, and swap buffers |
| 183 | LoadFBToScreenInfo(*framebuffer); | 432 | LoadFBToScreenInfo(*framebuffer); |
| 184 | |||
| 185 | if (renderer_settings.screenshot_requested) | ||
| 186 | CaptureScreenshot(); | ||
| 187 | |||
| 188 | DrawScreen(render_window.GetFramebufferLayout()); | ||
| 189 | |||
| 190 | rasterizer->TickFrame(); | ||
| 191 | |||
| 192 | render_window.SwapBuffers(); | ||
| 193 | } | 433 | } |
| 194 | |||
| 195 | render_window.PollEvents(); | ||
| 196 | |||
| 197 | // Restore the rasterizer state | ||
| 198 | prev_state.AllDirty(); | ||
| 199 | prev_state.Apply(); | ||
| 200 | } | 434 | } |
| 201 | 435 | ||
| 202 | void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuffer) { | 436 | void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuffer) { |
| @@ -249,31 +483,24 @@ void RendererOpenGL::InitOpenGLObjects() { | |||
| 249 | glClearColor(Settings::values.bg_red, Settings::values.bg_green, Settings::values.bg_blue, | 483 | glClearColor(Settings::values.bg_red, Settings::values.bg_green, Settings::values.bg_blue, |
| 250 | 0.0f); | 484 | 0.0f); |
| 251 | 485 | ||
| 252 | // Link shaders and get variable locations | 486 | // Create shader programs |
| 253 | shader.CreateFromSource(vertex_shader, nullptr, fragment_shader); | 487 | OGLShader vertex_shader; |
| 254 | state.draw.shader_program = shader.handle; | 488 | vertex_shader.Create(VERTEX_SHADER, GL_VERTEX_SHADER); |
| 255 | state.AllDirty(); | 489 | |
| 256 | state.Apply(); | 490 | OGLShader fragment_shader; |
| 491 | fragment_shader.Create(FRAGMENT_SHADER, GL_FRAGMENT_SHADER); | ||
| 492 | |||
| 493 | vertex_program.Create(true, false, vertex_shader.handle); | ||
| 494 | fragment_program.Create(true, false, fragment_shader.handle); | ||
| 495 | |||
| 496 | // Create program pipeline | ||
| 497 | program_manager.Create(); | ||
| 257 | 498 | ||
| 258 | // Generate VBO handle for drawing | 499 | // Generate VBO handle for drawing |
| 259 | vertex_buffer.Create(); | 500 | vertex_buffer.Create(); |
| 260 | 501 | ||
| 261 | // Generate VAO | ||
| 262 | vertex_array.Create(); | ||
| 263 | state.draw.vertex_array = vertex_array.handle; | ||
| 264 | |||
| 265 | // Attach vertex data to VAO | 502 | // Attach vertex data to VAO |
| 266 | glNamedBufferData(vertex_buffer.handle, sizeof(ScreenRectVertex) * 4, nullptr, GL_STREAM_DRAW); | 503 | glNamedBufferData(vertex_buffer.handle, sizeof(ScreenRectVertex) * 4, nullptr, GL_STREAM_DRAW); |
| 267 | glVertexArrayAttribFormat(vertex_array.handle, PositionLocation, 2, GL_FLOAT, GL_FALSE, | ||
| 268 | offsetof(ScreenRectVertex, position)); | ||
| 269 | glVertexArrayAttribFormat(vertex_array.handle, TexCoordLocation, 2, GL_FLOAT, GL_FALSE, | ||
| 270 | offsetof(ScreenRectVertex, tex_coord)); | ||
| 271 | glVertexArrayAttribBinding(vertex_array.handle, PositionLocation, 0); | ||
| 272 | glVertexArrayAttribBinding(vertex_array.handle, TexCoordLocation, 0); | ||
| 273 | glEnableVertexArrayAttrib(vertex_array.handle, PositionLocation); | ||
| 274 | glEnableVertexArrayAttrib(vertex_array.handle, TexCoordLocation); | ||
| 275 | glVertexArrayVertexBuffer(vertex_array.handle, 0, vertex_buffer.handle, 0, | ||
| 276 | sizeof(ScreenRectVertex)); | ||
| 277 | 504 | ||
| 278 | // Allocate textures for the screen | 505 | // Allocate textures for the screen |
| 279 | screen_info.texture.resource.Create(GL_TEXTURE_2D); | 506 | screen_info.texture.resource.Create(GL_TEXTURE_2D); |
| @@ -306,7 +533,8 @@ void RendererOpenGL::CreateRasterizer() { | |||
| 306 | if (rasterizer) { | 533 | if (rasterizer) { |
| 307 | return; | 534 | return; |
| 308 | } | 535 | } |
| 309 | rasterizer = std::make_unique<RasterizerOpenGL>(system, emu_window, screen_info); | 536 | rasterizer = std::make_unique<RasterizerOpenGL>(system, emu_window, screen_info, |
| 537 | program_manager, state_tracker); | ||
| 310 | } | 538 | } |
| 311 | 539 | ||
| 312 | void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture, | 540 | void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture, |
| @@ -345,8 +573,19 @@ void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture, | |||
| 345 | glTextureStorage2D(texture.resource.handle, 1, internal_format, texture.width, texture.height); | 573 | glTextureStorage2D(texture.resource.handle, 1, internal_format, texture.width, texture.height); |
| 346 | } | 574 | } |
| 347 | 575 | ||
| 348 | void RendererOpenGL::DrawScreenTriangles(const ScreenInfo& screen_info, float x, float y, float w, | 576 | void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { |
| 349 | float h) { | 577 | if (renderer_settings.set_background_color) { |
| 578 | // Update background color before drawing | ||
| 579 | glClearColor(Settings::values.bg_red, Settings::values.bg_green, Settings::values.bg_blue, | ||
| 580 | 0.0f); | ||
| 581 | } | ||
| 582 | |||
| 583 | // Set projection matrix | ||
| 584 | const std::array ortho_matrix = | ||
| 585 | MakeOrthographicMatrix(static_cast<float>(layout.width), static_cast<float>(layout.height)); | ||
| 586 | glProgramUniformMatrix3x2fv(vertex_program.handle, ModelViewMatrixLocation, 1, GL_FALSE, | ||
| 587 | std::data(ortho_matrix)); | ||
| 588 | |||
| 350 | const auto& texcoords = screen_info.display_texcoords; | 589 | const auto& texcoords = screen_info.display_texcoords; |
| 351 | auto left = texcoords.left; | 590 | auto left = texcoords.left; |
| 352 | auto right = texcoords.right; | 591 | auto right = texcoords.right; |
| @@ -378,60 +617,129 @@ void RendererOpenGL::DrawScreenTriangles(const ScreenInfo& screen_info, float x, | |||
| 378 | static_cast<f32>(screen_info.texture.height); | 617 | static_cast<f32>(screen_info.texture.height); |
| 379 | } | 618 | } |
| 380 | 619 | ||
| 620 | const auto& screen = layout.screen; | ||
| 381 | const std::array vertices = { | 621 | const std::array vertices = { |
| 382 | ScreenRectVertex(x, y, texcoords.top * scale_u, left * scale_v), | 622 | ScreenRectVertex(screen.left, screen.top, texcoords.top * scale_u, left * scale_v), |
| 383 | ScreenRectVertex(x + w, y, texcoords.bottom * scale_u, left * scale_v), | 623 | ScreenRectVertex(screen.right, screen.top, texcoords.bottom * scale_u, left * scale_v), |
| 384 | ScreenRectVertex(x, y + h, texcoords.top * scale_u, right * scale_v), | 624 | ScreenRectVertex(screen.left, screen.bottom, texcoords.top * scale_u, right * scale_v), |
| 385 | ScreenRectVertex(x + w, y + h, texcoords.bottom * scale_u, right * scale_v), | 625 | ScreenRectVertex(screen.right, screen.bottom, texcoords.bottom * scale_u, right * scale_v), |
| 386 | }; | 626 | }; |
| 387 | |||
| 388 | state.textures[0] = screen_info.display_texture; | ||
| 389 | state.framebuffer_srgb.enabled = screen_info.display_srgb; | ||
| 390 | state.AllDirty(); | ||
| 391 | state.Apply(); | ||
| 392 | glNamedBufferSubData(vertex_buffer.handle, 0, sizeof(vertices), std::data(vertices)); | 627 | glNamedBufferSubData(vertex_buffer.handle, 0, sizeof(vertices), std::data(vertices)); |
| 393 | glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); | ||
| 394 | // Restore default state | ||
| 395 | state.framebuffer_srgb.enabled = false; | ||
| 396 | state.textures[0] = 0; | ||
| 397 | state.AllDirty(); | ||
| 398 | state.Apply(); | ||
| 399 | } | ||
| 400 | 628 | ||
| 401 | void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { | 629 | // TODO: Signal state tracker about these changes |
| 402 | if (renderer_settings.set_background_color) { | 630 | state_tracker.NotifyScreenDrawVertexArray(); |
| 403 | // Update background color before drawing | 631 | state_tracker.NotifyPolygonModes(); |
| 404 | glClearColor(Settings::values.bg_red, Settings::values.bg_green, Settings::values.bg_blue, | 632 | state_tracker.NotifyViewport0(); |
| 405 | 0.0f); | 633 | state_tracker.NotifyScissor0(); |
| 634 | state_tracker.NotifyColorMask0(); | ||
| 635 | state_tracker.NotifyBlend0(); | ||
| 636 | state_tracker.NotifyFramebuffer(); | ||
| 637 | state_tracker.NotifyFrontFace(); | ||
| 638 | state_tracker.NotifyCullTest(); | ||
| 639 | state_tracker.NotifyDepthTest(); | ||
| 640 | state_tracker.NotifyStencilTest(); | ||
| 641 | state_tracker.NotifyPolygonOffset(); | ||
| 642 | state_tracker.NotifyRasterizeEnable(); | ||
| 643 | state_tracker.NotifyFramebufferSRGB(); | ||
| 644 | state_tracker.NotifyLogicOp(); | ||
| 645 | state_tracker.NotifyClipControl(); | ||
| 646 | state_tracker.NotifyAlphaTest(); | ||
| 647 | |||
| 648 | program_manager.UseVertexShader(vertex_program.handle); | ||
| 649 | program_manager.UseGeometryShader(0); | ||
| 650 | program_manager.UseFragmentShader(fragment_program.handle); | ||
| 651 | program_manager.BindGraphicsPipeline(); | ||
| 652 | |||
| 653 | glEnable(GL_CULL_FACE); | ||
| 654 | if (screen_info.display_srgb) { | ||
| 655 | glEnable(GL_FRAMEBUFFER_SRGB); | ||
| 656 | } else { | ||
| 657 | glDisable(GL_FRAMEBUFFER_SRGB); | ||
| 406 | } | 658 | } |
| 659 | glDisable(GL_COLOR_LOGIC_OP); | ||
| 660 | glDisable(GL_DEPTH_TEST); | ||
| 661 | glDisable(GL_STENCIL_TEST); | ||
| 662 | glDisable(GL_POLYGON_OFFSET_FILL); | ||
| 663 | glDisable(GL_RASTERIZER_DISCARD); | ||
| 664 | glDisable(GL_ALPHA_TEST); | ||
| 665 | glDisablei(GL_BLEND, 0); | ||
| 666 | glDisablei(GL_SCISSOR_TEST, 0); | ||
| 667 | glPolygonMode(GL_FRONT_AND_BACK, GL_FILL); | ||
| 668 | glCullFace(GL_BACK); | ||
| 669 | glFrontFace(GL_CW); | ||
| 670 | glColorMaski(0, GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); | ||
| 671 | glClipControl(GL_LOWER_LEFT, GL_ZERO_TO_ONE); | ||
| 672 | glViewportIndexedf(0, 0.0f, 0.0f, static_cast<GLfloat>(layout.width), | ||
| 673 | static_cast<GLfloat>(layout.height)); | ||
| 674 | glDepthRangeIndexed(0, 0.0, 0.0); | ||
| 675 | |||
| 676 | glEnableVertexAttribArray(PositionLocation); | ||
| 677 | glEnableVertexAttribArray(TexCoordLocation); | ||
| 678 | glVertexAttribDivisor(PositionLocation, 0); | ||
| 679 | glVertexAttribDivisor(TexCoordLocation, 0); | ||
| 680 | glVertexAttribFormat(PositionLocation, 2, GL_FLOAT, GL_FALSE, | ||
| 681 | offsetof(ScreenRectVertex, position)); | ||
| 682 | glVertexAttribFormat(TexCoordLocation, 2, GL_FLOAT, GL_FALSE, | ||
| 683 | offsetof(ScreenRectVertex, tex_coord)); | ||
| 684 | glVertexAttribBinding(PositionLocation, 0); | ||
| 685 | glVertexAttribBinding(TexCoordLocation, 0); | ||
| 686 | glBindVertexBuffer(0, vertex_buffer.handle, 0, sizeof(ScreenRectVertex)); | ||
| 687 | |||
| 688 | glBindTextureUnit(0, screen_info.display_texture); | ||
| 689 | glBindSampler(0, 0); | ||
| 407 | 690 | ||
| 408 | const auto& screen = layout.screen; | ||
| 409 | |||
| 410 | glViewport(0, 0, layout.width, layout.height); | ||
| 411 | glClear(GL_COLOR_BUFFER_BIT); | 691 | glClear(GL_COLOR_BUFFER_BIT); |
| 692 | glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); | ||
| 693 | } | ||
| 412 | 694 | ||
| 413 | // Set projection matrix | 695 | void RendererOpenGL::TryPresent(int timeout_ms) { |
| 414 | const std::array ortho_matrix = | 696 | const auto& layout = render_window.GetFramebufferLayout(); |
| 415 | MakeOrthographicMatrix(static_cast<float>(layout.width), static_cast<float>(layout.height)); | 697 | auto frame = frame_mailbox->TryGetPresentFrame(timeout_ms); |
| 416 | glUniformMatrix3x2fv(ModelViewMatrixLocation, 1, GL_FALSE, ortho_matrix.data()); | 698 | if (!frame) { |
| 699 | LOG_DEBUG(Render_OpenGL, "TryGetPresentFrame returned no frame to present"); | ||
| 700 | return; | ||
| 701 | } | ||
| 417 | 702 | ||
| 418 | DrawScreenTriangles(screen_info, static_cast<float>(screen.left), | 703 | // Clearing before a full overwrite of a fbo can signal to drivers that they can avoid a |
| 419 | static_cast<float>(screen.top), static_cast<float>(screen.GetWidth()), | 704 | // readback since we won't be doing any blending |
| 420 | static_cast<float>(screen.GetHeight())); | 705 | glClear(GL_COLOR_BUFFER_BIT); |
| 421 | 706 | ||
| 422 | m_current_frame++; | 707 | // Recreate the presentation FBO if the color attachment was changed |
| 708 | if (frame->color_reloaded) { | ||
| 709 | LOG_DEBUG(Render_OpenGL, "Reloading present frame"); | ||
| 710 | frame_mailbox->ReloadPresentFrame(frame, layout.width, layout.height); | ||
| 711 | } | ||
| 712 | glWaitSync(frame->render_fence, 0, GL_TIMEOUT_IGNORED); | ||
| 713 | // INTEL workaround. | ||
| 714 | // Normally we could just delete the draw fence here, but due to driver bugs, we can just delete | ||
| 715 | // it on the emulation thread without too much penalty | ||
| 716 | // glDeleteSync(frame.render_sync); | ||
| 717 | // frame.render_sync = 0; | ||
| 718 | |||
| 719 | glBindFramebuffer(GL_READ_FRAMEBUFFER, frame->present.handle); | ||
| 720 | glBlitFramebuffer(0, 0, frame->width, frame->height, 0, 0, layout.width, layout.height, | ||
| 721 | GL_COLOR_BUFFER_BIT, GL_LINEAR); | ||
| 722 | |||
| 723 | // Insert fence for the main thread to block on | ||
| 724 | frame->present_fence = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); | ||
| 725 | glFlush(); | ||
| 726 | |||
| 727 | glBindFramebuffer(GL_READ_FRAMEBUFFER, 0); | ||
| 423 | } | 728 | } |
| 424 | 729 | ||
| 425 | void RendererOpenGL::UpdateFramerate() {} | 730 | void RendererOpenGL::RenderScreenshot() { |
| 731 | if (!renderer_settings.screenshot_requested) { | ||
| 732 | return; | ||
| 733 | } | ||
| 734 | |||
| 735 | GLint old_read_fb; | ||
| 736 | GLint old_draw_fb; | ||
| 737 | glGetIntegerv(GL_READ_FRAMEBUFFER_BINDING, &old_read_fb); | ||
| 738 | glGetIntegerv(GL_DRAW_FRAMEBUFFER_BINDING, &old_draw_fb); | ||
| 426 | 739 | ||
| 427 | void RendererOpenGL::CaptureScreenshot() { | ||
| 428 | // Draw the current frame to the screenshot framebuffer | 740 | // Draw the current frame to the screenshot framebuffer |
| 429 | screenshot_framebuffer.Create(); | 741 | screenshot_framebuffer.Create(); |
| 430 | GLuint old_read_fb = state.draw.read_framebuffer; | 742 | glBindFramebuffer(GL_FRAMEBUFFER, screenshot_framebuffer.handle); |
| 431 | GLuint old_draw_fb = state.draw.draw_framebuffer; | ||
| 432 | state.draw.read_framebuffer = state.draw.draw_framebuffer = screenshot_framebuffer.handle; | ||
| 433 | state.AllDirty(); | ||
| 434 | state.Apply(); | ||
| 435 | 743 | ||
| 436 | Layout::FramebufferLayout layout{renderer_settings.screenshot_framebuffer_layout}; | 744 | Layout::FramebufferLayout layout{renderer_settings.screenshot_framebuffer_layout}; |
| 437 | 745 | ||
| @@ -448,19 +756,16 @@ void RendererOpenGL::CaptureScreenshot() { | |||
| 448 | renderer_settings.screenshot_bits); | 756 | renderer_settings.screenshot_bits); |
| 449 | 757 | ||
| 450 | screenshot_framebuffer.Release(); | 758 | screenshot_framebuffer.Release(); |
| 451 | state.draw.read_framebuffer = old_read_fb; | ||
| 452 | state.draw.draw_framebuffer = old_draw_fb; | ||
| 453 | state.AllDirty(); | ||
| 454 | state.Apply(); | ||
| 455 | glDeleteRenderbuffers(1, &renderbuffer); | 759 | glDeleteRenderbuffers(1, &renderbuffer); |
| 456 | 760 | ||
| 761 | glBindFramebuffer(GL_READ_FRAMEBUFFER, old_read_fb); | ||
| 762 | glBindFramebuffer(GL_DRAW_FRAMEBUFFER, old_draw_fb); | ||
| 763 | |||
| 457 | renderer_settings.screenshot_complete_callback(); | 764 | renderer_settings.screenshot_complete_callback(); |
| 458 | renderer_settings.screenshot_requested = false; | 765 | renderer_settings.screenshot_requested = false; |
| 459 | } | 766 | } |
| 460 | 767 | ||
| 461 | bool RendererOpenGL::Init() { | 768 | bool RendererOpenGL::Init() { |
| 462 | Core::Frontend::ScopeAcquireWindowContext acquire_context{render_window}; | ||
| 463 | |||
| 464 | if (GLAD_GL_KHR_debug) { | 769 | if (GLAD_GL_KHR_debug) { |
| 465 | glEnable(GL_DEBUG_OUTPUT); | 770 | glEnable(GL_DEBUG_OUTPUT); |
| 466 | glDebugMessageCallback(DebugHandler, nullptr); | 771 | glDebugMessageCallback(DebugHandler, nullptr); |
diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h index b56328a7f..33073ce5b 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.h +++ b/src/video_core/renderer_opengl/renderer_opengl.h | |||
| @@ -10,7 +10,8 @@ | |||
| 10 | #include "common/math_util.h" | 10 | #include "common/math_util.h" |
| 11 | #include "video_core/renderer_base.h" | 11 | #include "video_core/renderer_base.h" |
| 12 | #include "video_core/renderer_opengl/gl_resource_manager.h" | 12 | #include "video_core/renderer_opengl/gl_resource_manager.h" |
| 13 | #include "video_core/renderer_opengl/gl_state.h" | 13 | #include "video_core/renderer_opengl/gl_shader_manager.h" |
| 14 | #include "video_core/renderer_opengl/gl_state_tracker.h" | ||
| 14 | 15 | ||
| 15 | namespace Core { | 16 | namespace Core { |
| 16 | class System; | 17 | class System; |
| @@ -44,19 +45,23 @@ struct ScreenInfo { | |||
| 44 | TextureInfo texture; | 45 | TextureInfo texture; |
| 45 | }; | 46 | }; |
| 46 | 47 | ||
| 48 | struct PresentationTexture { | ||
| 49 | u32 width = 0; | ||
| 50 | u32 height = 0; | ||
| 51 | OGLTexture texture; | ||
| 52 | }; | ||
| 53 | |||
| 54 | class FrameMailbox; | ||
| 55 | |||
| 47 | class RendererOpenGL final : public VideoCore::RendererBase { | 56 | class RendererOpenGL final : public VideoCore::RendererBase { |
| 48 | public: | 57 | public: |
| 49 | explicit RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system); | 58 | explicit RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system); |
| 50 | ~RendererOpenGL() override; | 59 | ~RendererOpenGL() override; |
| 51 | 60 | ||
| 52 | /// Swap buffers (render frame) | ||
| 53 | void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override; | ||
| 54 | |||
| 55 | /// Initialize the renderer | ||
| 56 | bool Init() override; | 61 | bool Init() override; |
| 57 | |||
| 58 | /// Shutdown the renderer | ||
| 59 | void ShutDown() override; | 62 | void ShutDown() override; |
| 63 | void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override; | ||
| 64 | void TryPresent(int timeout_ms) override; | ||
| 60 | 65 | ||
| 61 | private: | 66 | private: |
| 62 | /// Initializes the OpenGL state and creates persistent objects. | 67 | /// Initializes the OpenGL state and creates persistent objects. |
| @@ -72,12 +77,7 @@ private: | |||
| 72 | /// Draws the emulated screens to the emulator window. | 77 | /// Draws the emulated screens to the emulator window. |
| 73 | void DrawScreen(const Layout::FramebufferLayout& layout); | 78 | void DrawScreen(const Layout::FramebufferLayout& layout); |
| 74 | 79 | ||
| 75 | void DrawScreenTriangles(const ScreenInfo& screen_info, float x, float y, float w, float h); | 80 | void RenderScreenshot(); |
| 76 | |||
| 77 | /// Updates the framerate. | ||
| 78 | void UpdateFramerate(); | ||
| 79 | |||
| 80 | void CaptureScreenshot(); | ||
| 81 | 81 | ||
| 82 | /// Loads framebuffer from emulated memory into the active OpenGL texture. | 82 | /// Loads framebuffer from emulated memory into the active OpenGL texture. |
| 83 | void LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuffer); | 83 | void LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuffer); |
| @@ -87,26 +87,34 @@ private: | |||
| 87 | void LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b, u8 color_a, | 87 | void LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b, u8 color_a, |
| 88 | const TextureInfo& texture); | 88 | const TextureInfo& texture); |
| 89 | 89 | ||
| 90 | void PrepareRendertarget(const Tegra::FramebufferConfig* framebuffer); | ||
| 91 | |||
| 90 | Core::Frontend::EmuWindow& emu_window; | 92 | Core::Frontend::EmuWindow& emu_window; |
| 91 | Core::System& system; | 93 | Core::System& system; |
| 92 | 94 | ||
| 93 | OpenGLState state; | 95 | StateTracker state_tracker{system}; |
| 94 | 96 | ||
| 95 | // OpenGL object IDs | 97 | // OpenGL object IDs |
| 96 | OGLVertexArray vertex_array; | ||
| 97 | OGLBuffer vertex_buffer; | 98 | OGLBuffer vertex_buffer; |
| 98 | OGLProgram shader; | 99 | OGLProgram vertex_program; |
| 100 | OGLProgram fragment_program; | ||
| 99 | OGLFramebuffer screenshot_framebuffer; | 101 | OGLFramebuffer screenshot_framebuffer; |
| 100 | 102 | ||
| 101 | /// Display information for Switch screen | 103 | /// Display information for Switch screen |
| 102 | ScreenInfo screen_info; | 104 | ScreenInfo screen_info; |
| 103 | 105 | ||
| 106 | /// Global dummy shader pipeline | ||
| 107 | GLShader::ProgramManager program_manager; | ||
| 108 | |||
| 104 | /// OpenGL framebuffer data | 109 | /// OpenGL framebuffer data |
| 105 | std::vector<u8> gl_framebuffer_data; | 110 | std::vector<u8> gl_framebuffer_data; |
| 106 | 111 | ||
| 107 | /// Used for transforming the framebuffer orientation | 112 | /// Used for transforming the framebuffer orientation |
| 108 | Tegra::FramebufferConfig::TransformFlags framebuffer_transform_flags; | 113 | Tegra::FramebufferConfig::TransformFlags framebuffer_transform_flags; |
| 109 | Common::Rectangle<int> framebuffer_crop_rect; | 114 | Common::Rectangle<int> framebuffer_crop_rect; |
| 115 | |||
| 116 | /// Frame presentation mailbox | ||
| 117 | std::unique_ptr<FrameMailbox> frame_mailbox; | ||
| 110 | }; | 118 | }; |
| 111 | 119 | ||
| 112 | } // namespace OpenGL | 120 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/utils.cpp b/src/video_core/renderer_opengl/utils.cpp index ac99e6385..b751086fa 100644 --- a/src/video_core/renderer_opengl/utils.cpp +++ b/src/video_core/renderer_opengl/utils.cpp | |||
| @@ -9,6 +9,7 @@ | |||
| 9 | #include <glad/glad.h> | 9 | #include <glad/glad.h> |
| 10 | 10 | ||
| 11 | #include "common/common_types.h" | 11 | #include "common/common_types.h" |
| 12 | #include "video_core/renderer_opengl/gl_state_tracker.h" | ||
| 12 | #include "video_core/renderer_opengl/utils.h" | 13 | #include "video_core/renderer_opengl/utils.h" |
| 13 | 14 | ||
| 14 | namespace OpenGL { | 15 | namespace OpenGL { |
| @@ -20,12 +21,12 @@ struct VertexArrayPushBuffer::Entry { | |||
| 20 | GLsizei stride{}; | 21 | GLsizei stride{}; |
| 21 | }; | 22 | }; |
| 22 | 23 | ||
| 23 | VertexArrayPushBuffer::VertexArrayPushBuffer() = default; | 24 | VertexArrayPushBuffer::VertexArrayPushBuffer(StateTracker& state_tracker) |
| 25 | : state_tracker{state_tracker} {} | ||
| 24 | 26 | ||
| 25 | VertexArrayPushBuffer::~VertexArrayPushBuffer() = default; | 27 | VertexArrayPushBuffer::~VertexArrayPushBuffer() = default; |
| 26 | 28 | ||
| 27 | void VertexArrayPushBuffer::Setup(GLuint vao_) { | 29 | void VertexArrayPushBuffer::Setup() { |
| 28 | vao = vao_; | ||
| 29 | index_buffer = nullptr; | 30 | index_buffer = nullptr; |
| 30 | vertex_buffers.clear(); | 31 | vertex_buffers.clear(); |
| 31 | } | 32 | } |
| @@ -41,13 +42,11 @@ void VertexArrayPushBuffer::SetVertexBuffer(GLuint binding_index, const GLuint* | |||
| 41 | 42 | ||
| 42 | void VertexArrayPushBuffer::Bind() { | 43 | void VertexArrayPushBuffer::Bind() { |
| 43 | if (index_buffer) { | 44 | if (index_buffer) { |
| 44 | glVertexArrayElementBuffer(vao, *index_buffer); | 45 | state_tracker.BindIndexBuffer(*index_buffer); |
| 45 | } | 46 | } |
| 46 | 47 | ||
| 47 | // TODO(Rodrigo): Find a way to ARB_multi_bind this | ||
| 48 | for (const auto& entry : vertex_buffers) { | 48 | for (const auto& entry : vertex_buffers) { |
| 49 | glVertexArrayVertexBuffer(vao, entry.binding_index, *entry.buffer, entry.offset, | 49 | glBindVertexBuffer(entry.binding_index, *entry.buffer, entry.offset, entry.stride); |
| 50 | entry.stride); | ||
| 51 | } | 50 | } |
| 52 | } | 51 | } |
| 53 | 52 | ||
diff --git a/src/video_core/renderer_opengl/utils.h b/src/video_core/renderer_opengl/utils.h index 3ad7c02d4..47ee3177b 100644 --- a/src/video_core/renderer_opengl/utils.h +++ b/src/video_core/renderer_opengl/utils.h | |||
| @@ -11,12 +11,14 @@ | |||
| 11 | 11 | ||
| 12 | namespace OpenGL { | 12 | namespace OpenGL { |
| 13 | 13 | ||
| 14 | class StateTracker; | ||
| 15 | |||
| 14 | class VertexArrayPushBuffer final { | 16 | class VertexArrayPushBuffer final { |
| 15 | public: | 17 | public: |
| 16 | explicit VertexArrayPushBuffer(); | 18 | explicit VertexArrayPushBuffer(StateTracker& state_tracker); |
| 17 | ~VertexArrayPushBuffer(); | 19 | ~VertexArrayPushBuffer(); |
| 18 | 20 | ||
| 19 | void Setup(GLuint vao_); | 21 | void Setup(); |
| 20 | 22 | ||
| 21 | void SetIndexBuffer(const GLuint* buffer); | 23 | void SetIndexBuffer(const GLuint* buffer); |
| 22 | 24 | ||
| @@ -28,7 +30,8 @@ public: | |||
| 28 | private: | 30 | private: |
| 29 | struct Entry; | 31 | struct Entry; |
| 30 | 32 | ||
| 31 | GLuint vao{}; | 33 | StateTracker& state_tracker; |
| 34 | |||
| 32 | const GLuint* index_buffer{}; | 35 | const GLuint* index_buffer{}; |
| 33 | std::vector<Entry> vertex_buffers; | 36 | std::vector<Entry> vertex_buffers; |
| 34 | }; | 37 | }; |
diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp index 4e3ff231e..2bb376555 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp | |||
| @@ -112,19 +112,18 @@ constexpr FixedPipelineState::Rasterizer GetRasterizerState(const Maxwell& regs) | |||
| 112 | const auto& clip = regs.view_volume_clip_control; | 112 | const auto& clip = regs.view_volume_clip_control; |
| 113 | const bool depth_clamp_enabled = clip.depth_clamp_near == 1 || clip.depth_clamp_far == 1; | 113 | const bool depth_clamp_enabled = clip.depth_clamp_near == 1 || clip.depth_clamp_far == 1; |
| 114 | 114 | ||
| 115 | Maxwell::Cull::FrontFace front_face = regs.cull.front_face; | 115 | Maxwell::FrontFace front_face = regs.front_face; |
| 116 | if (regs.screen_y_control.triangle_rast_flip != 0 && | 116 | if (regs.screen_y_control.triangle_rast_flip != 0 && |
| 117 | regs.viewport_transform[0].scale_y > 0.0f) { | 117 | regs.viewport_transform[0].scale_y > 0.0f) { |
| 118 | if (front_face == Maxwell::Cull::FrontFace::CounterClockWise) | 118 | if (front_face == Maxwell::FrontFace::CounterClockWise) |
| 119 | front_face = Maxwell::Cull::FrontFace::ClockWise; | 119 | front_face = Maxwell::FrontFace::ClockWise; |
| 120 | else if (front_face == Maxwell::Cull::FrontFace::ClockWise) | 120 | else if (front_face == Maxwell::FrontFace::ClockWise) |
| 121 | front_face = Maxwell::Cull::FrontFace::CounterClockWise; | 121 | front_face = Maxwell::FrontFace::CounterClockWise; |
| 122 | } | 122 | } |
| 123 | 123 | ||
| 124 | const bool gl_ndc = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne; | 124 | const bool gl_ndc = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne; |
| 125 | return FixedPipelineState::Rasterizer(regs.cull.enabled, depth_bias_enabled, | 125 | return FixedPipelineState::Rasterizer(regs.cull_test_enabled, depth_bias_enabled, |
| 126 | depth_clamp_enabled, gl_ndc, regs.cull.cull_face, | 126 | depth_clamp_enabled, gl_ndc, regs.cull_face, front_face); |
| 127 | front_face); | ||
| 128 | } | 127 | } |
| 129 | 128 | ||
| 130 | } // Anonymous namespace | 129 | } // Anonymous namespace |
diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.h b/src/video_core/renderer_vulkan/fixed_pipeline_state.h index 87056ef37..4c8ba7f90 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h | |||
| @@ -171,8 +171,8 @@ struct FixedPipelineState { | |||
| 171 | 171 | ||
| 172 | struct Rasterizer { | 172 | struct Rasterizer { |
| 173 | constexpr Rasterizer(bool cull_enable, bool depth_bias_enable, bool depth_clamp_enable, | 173 | constexpr Rasterizer(bool cull_enable, bool depth_bias_enable, bool depth_clamp_enable, |
| 174 | bool ndc_minus_one_to_one, Maxwell::Cull::CullFace cull_face, | 174 | bool ndc_minus_one_to_one, Maxwell::CullFace cull_face, |
| 175 | Maxwell::Cull::FrontFace front_face) | 175 | Maxwell::FrontFace front_face) |
| 176 | : cull_enable{cull_enable}, depth_bias_enable{depth_bias_enable}, | 176 | : cull_enable{cull_enable}, depth_bias_enable{depth_bias_enable}, |
| 177 | depth_clamp_enable{depth_clamp_enable}, ndc_minus_one_to_one{ndc_minus_one_to_one}, | 177 | depth_clamp_enable{depth_clamp_enable}, ndc_minus_one_to_one{ndc_minus_one_to_one}, |
| 178 | cull_face{cull_face}, front_face{front_face} {} | 178 | cull_face{cull_face}, front_face{front_face} {} |
| @@ -182,8 +182,8 @@ struct FixedPipelineState { | |||
| 182 | bool depth_bias_enable; | 182 | bool depth_bias_enable; |
| 183 | bool depth_clamp_enable; | 183 | bool depth_clamp_enable; |
| 184 | bool ndc_minus_one_to_one; | 184 | bool ndc_minus_one_to_one; |
| 185 | Maxwell::Cull::CullFace cull_face; | 185 | Maxwell::CullFace cull_face; |
| 186 | Maxwell::Cull::FrontFace front_face; | 186 | Maxwell::FrontFace front_face; |
| 187 | 187 | ||
| 188 | std::size_t Hash() const noexcept; | 188 | std::size_t Hash() const noexcept; |
| 189 | 189 | ||
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp index 5403c3ab7..f93447610 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp | |||
| @@ -120,11 +120,12 @@ struct FormatTuple { | |||
| 120 | {vk::Format::eA8B8G8R8UintPack32, Attachable | Storage}, // ABGR8UI | 120 | {vk::Format::eA8B8G8R8UintPack32, Attachable | Storage}, // ABGR8UI |
| 121 | {vk::Format::eB5G6R5UnormPack16, {}}, // B5G6R5U | 121 | {vk::Format::eB5G6R5UnormPack16, {}}, // B5G6R5U |
| 122 | {vk::Format::eA2B10G10R10UnormPack32, Attachable | Storage}, // A2B10G10R10U | 122 | {vk::Format::eA2B10G10R10UnormPack32, Attachable | Storage}, // A2B10G10R10U |
| 123 | {vk::Format::eA1R5G5B5UnormPack16, Attachable | Storage}, // A1B5G5R5U (flipped with swizzle) | 123 | {vk::Format::eA1R5G5B5UnormPack16, Attachable}, // A1B5G5R5U (flipped with swizzle) |
| 124 | {vk::Format::eR8Unorm, Attachable | Storage}, // R8U | 124 | {vk::Format::eR8Unorm, Attachable | Storage}, // R8U |
| 125 | {vk::Format::eR8Uint, Attachable | Storage}, // R8UI | 125 | {vk::Format::eR8Uint, Attachable | Storage}, // R8UI |
| 126 | {vk::Format::eR16G16B16A16Sfloat, Attachable | Storage}, // RGBA16F | 126 | {vk::Format::eR16G16B16A16Sfloat, Attachable | Storage}, // RGBA16F |
| 127 | {vk::Format::eR16G16B16A16Unorm, Attachable | Storage}, // RGBA16U | 127 | {vk::Format::eR16G16B16A16Unorm, Attachable | Storage}, // RGBA16U |
| 128 | {vk::Format::eR16G16B16A16Snorm, Attachable | Storage}, // RGBA16S | ||
| 128 | {vk::Format::eR16G16B16A16Uint, Attachable | Storage}, // RGBA16UI | 129 | {vk::Format::eR16G16B16A16Uint, Attachable | Storage}, // RGBA16UI |
| 129 | {vk::Format::eB10G11R11UfloatPack32, Attachable | Storage}, // R11FG11FB10F | 130 | {vk::Format::eB10G11R11UfloatPack32, Attachable | Storage}, // R11FG11FB10F |
| 130 | {vk::Format::eR32G32B32A32Uint, Attachable | Storage}, // RGBA32UI | 131 | {vk::Format::eR32G32B32A32Uint, Attachable | Storage}, // RGBA32UI |
| @@ -159,6 +160,7 @@ struct FormatTuple { | |||
| 159 | {vk::Format::eR32G32Uint, Attachable | Storage}, // RG32UI | 160 | {vk::Format::eR32G32Uint, Attachable | Storage}, // RG32UI |
| 160 | {vk::Format::eUndefined, {}}, // RGBX16F | 161 | {vk::Format::eUndefined, {}}, // RGBX16F |
| 161 | {vk::Format::eR32Uint, Attachable | Storage}, // R32UI | 162 | {vk::Format::eR32Uint, Attachable | Storage}, // R32UI |
| 163 | {vk::Format::eR32Sint, Attachable | Storage}, // R32I | ||
| 162 | {vk::Format::eAstc8x8UnormBlock, {}}, // ASTC_2D_8X8 | 164 | {vk::Format::eAstc8x8UnormBlock, {}}, // ASTC_2D_8X8 |
| 163 | {vk::Format::eUndefined, {}}, // ASTC_2D_8X5 | 165 | {vk::Format::eUndefined, {}}, // ASTC_2D_8X5 |
| 164 | {vk::Format::eUndefined, {}}, // ASTC_2D_5X4 | 166 | {vk::Format::eUndefined, {}}, // ASTC_2D_5X4 |
| @@ -255,6 +257,8 @@ vk::ShaderStageFlagBits ShaderStage(Tegra::Engines::ShaderType stage) { | |||
| 255 | return vk::ShaderStageFlagBits::eGeometry; | 257 | return vk::ShaderStageFlagBits::eGeometry; |
| 256 | case Tegra::Engines::ShaderType::Fragment: | 258 | case Tegra::Engines::ShaderType::Fragment: |
| 257 | return vk::ShaderStageFlagBits::eFragment; | 259 | return vk::ShaderStageFlagBits::eFragment; |
| 260 | case Tegra::Engines::ShaderType::Compute: | ||
| 261 | return vk::ShaderStageFlagBits::eCompute; | ||
| 258 | } | 262 | } |
| 259 | UNIMPLEMENTED_MSG("Unimplemented shader stage={}", static_cast<u32>(stage)); | 263 | UNIMPLEMENTED_MSG("Unimplemented shader stage={}", static_cast<u32>(stage)); |
| 260 | return {}; | 264 | return {}; |
| @@ -330,6 +334,8 @@ vk::Format VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttr | |||
| 330 | return vk::Format::eR16G16B16Unorm; | 334 | return vk::Format::eR16G16B16Unorm; |
| 331 | case Maxwell::VertexAttribute::Size::Size_16_16_16_16: | 335 | case Maxwell::VertexAttribute::Size::Size_16_16_16_16: |
| 332 | return vk::Format::eR16G16B16A16Unorm; | 336 | return vk::Format::eR16G16B16A16Unorm; |
| 337 | case Maxwell::VertexAttribute::Size::Size_10_10_10_2: | ||
| 338 | return vk::Format::eA2B10G10R10UnormPack32; | ||
| 333 | default: | 339 | default: |
| 334 | break; | 340 | break; |
| 335 | } | 341 | } |
| @@ -363,6 +369,10 @@ vk::Format VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttr | |||
| 363 | return vk::Format::eR8G8B8A8Uint; | 369 | return vk::Format::eR8G8B8A8Uint; |
| 364 | case Maxwell::VertexAttribute::Size::Size_32: | 370 | case Maxwell::VertexAttribute::Size::Size_32: |
| 365 | return vk::Format::eR32Uint; | 371 | return vk::Format::eR32Uint; |
| 372 | case Maxwell::VertexAttribute::Size::Size_32_32: | ||
| 373 | return vk::Format::eR32G32Uint; | ||
| 374 | case Maxwell::VertexAttribute::Size::Size_32_32_32: | ||
| 375 | return vk::Format::eR32G32B32Uint; | ||
| 366 | case Maxwell::VertexAttribute::Size::Size_32_32_32_32: | 376 | case Maxwell::VertexAttribute::Size::Size_32_32_32_32: |
| 367 | return vk::Format::eR32G32B32A32Uint; | 377 | return vk::Format::eR32G32B32A32Uint; |
| 368 | default: | 378 | default: |
| @@ -370,8 +380,22 @@ vk::Format VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttr | |||
| 370 | } | 380 | } |
| 371 | case Maxwell::VertexAttribute::Type::UnsignedScaled: | 381 | case Maxwell::VertexAttribute::Type::UnsignedScaled: |
| 372 | switch (size) { | 382 | switch (size) { |
| 383 | case Maxwell::VertexAttribute::Size::Size_8: | ||
| 384 | return vk::Format::eR8Uscaled; | ||
| 373 | case Maxwell::VertexAttribute::Size::Size_8_8: | 385 | case Maxwell::VertexAttribute::Size::Size_8_8: |
| 374 | return vk::Format::eR8G8Uscaled; | 386 | return vk::Format::eR8G8Uscaled; |
| 387 | case Maxwell::VertexAttribute::Size::Size_8_8_8: | ||
| 388 | return vk::Format::eR8G8B8Uscaled; | ||
| 389 | case Maxwell::VertexAttribute::Size::Size_8_8_8_8: | ||
| 390 | return vk::Format::eR8G8B8A8Uscaled; | ||
| 391 | case Maxwell::VertexAttribute::Size::Size_16: | ||
| 392 | return vk::Format::eR16Uscaled; | ||
| 393 | case Maxwell::VertexAttribute::Size::Size_16_16: | ||
| 394 | return vk::Format::eR16G16Uscaled; | ||
| 395 | case Maxwell::VertexAttribute::Size::Size_16_16_16: | ||
| 396 | return vk::Format::eR16G16B16Uscaled; | ||
| 397 | case Maxwell::VertexAttribute::Size::Size_16_16_16_16: | ||
| 398 | return vk::Format::eR16G16B16A16Uscaled; | ||
| 375 | default: | 399 | default: |
| 376 | break; | 400 | break; |
| 377 | } | 401 | } |
| @@ -571,24 +595,24 @@ vk::BlendFactor BlendFactor(Maxwell::Blend::Factor factor) { | |||
| 571 | return {}; | 595 | return {}; |
| 572 | } | 596 | } |
| 573 | 597 | ||
| 574 | vk::FrontFace FrontFace(Maxwell::Cull::FrontFace front_face) { | 598 | vk::FrontFace FrontFace(Maxwell::FrontFace front_face) { |
| 575 | switch (front_face) { | 599 | switch (front_face) { |
| 576 | case Maxwell::Cull::FrontFace::ClockWise: | 600 | case Maxwell::FrontFace::ClockWise: |
| 577 | return vk::FrontFace::eClockwise; | 601 | return vk::FrontFace::eClockwise; |
| 578 | case Maxwell::Cull::FrontFace::CounterClockWise: | 602 | case Maxwell::FrontFace::CounterClockWise: |
| 579 | return vk::FrontFace::eCounterClockwise; | 603 | return vk::FrontFace::eCounterClockwise; |
| 580 | } | 604 | } |
| 581 | UNIMPLEMENTED_MSG("Unimplemented front face={}", static_cast<u32>(front_face)); | 605 | UNIMPLEMENTED_MSG("Unimplemented front face={}", static_cast<u32>(front_face)); |
| 582 | return {}; | 606 | return {}; |
| 583 | } | 607 | } |
| 584 | 608 | ||
| 585 | vk::CullModeFlags CullFace(Maxwell::Cull::CullFace cull_face) { | 609 | vk::CullModeFlags CullFace(Maxwell::CullFace cull_face) { |
| 586 | switch (cull_face) { | 610 | switch (cull_face) { |
| 587 | case Maxwell::Cull::CullFace::Front: | 611 | case Maxwell::CullFace::Front: |
| 588 | return vk::CullModeFlagBits::eFront; | 612 | return vk::CullModeFlagBits::eFront; |
| 589 | case Maxwell::Cull::CullFace::Back: | 613 | case Maxwell::CullFace::Back: |
| 590 | return vk::CullModeFlagBits::eBack; | 614 | return vk::CullModeFlagBits::eBack; |
| 591 | case Maxwell::Cull::CullFace::FrontAndBack: | 615 | case Maxwell::CullFace::FrontAndBack: |
| 592 | return vk::CullModeFlagBits::eFrontAndBack; | 616 | return vk::CullModeFlagBits::eFrontAndBack; |
| 593 | } | 617 | } |
| 594 | UNIMPLEMENTED_MSG("Unimplemented cull face={}", static_cast<u32>(cull_face)); | 618 | UNIMPLEMENTED_MSG("Unimplemented cull face={}", static_cast<u32>(cull_face)); |
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.h b/src/video_core/renderer_vulkan/maxwell_to_vk.h index 7e9678b7b..24f6ab544 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.h +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.h | |||
| @@ -54,9 +54,9 @@ vk::BlendOp BlendEquation(Maxwell::Blend::Equation equation); | |||
| 54 | 54 | ||
| 55 | vk::BlendFactor BlendFactor(Maxwell::Blend::Factor factor); | 55 | vk::BlendFactor BlendFactor(Maxwell::Blend::Factor factor); |
| 56 | 56 | ||
| 57 | vk::FrontFace FrontFace(Maxwell::Cull::FrontFace front_face); | 57 | vk::FrontFace FrontFace(Maxwell::FrontFace front_face); |
| 58 | 58 | ||
| 59 | vk::CullModeFlags CullFace(Maxwell::Cull::CullFace cull_face); | 59 | vk::CullModeFlags CullFace(Maxwell::CullFace cull_face); |
| 60 | 60 | ||
| 61 | vk::ComponentSwizzle SwizzleSource(Tegra::Texture::SwizzleSource swizzle); | 61 | vk::ComponentSwizzle SwizzleSource(Tegra::Texture::SwizzleSource swizzle); |
| 62 | 62 | ||
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index d5032b432..42bb01418 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp | |||
| @@ -27,6 +27,7 @@ | |||
| 27 | #include "video_core/renderer_vulkan/vk_rasterizer.h" | 27 | #include "video_core/renderer_vulkan/vk_rasterizer.h" |
| 28 | #include "video_core/renderer_vulkan/vk_resource_manager.h" | 28 | #include "video_core/renderer_vulkan/vk_resource_manager.h" |
| 29 | #include "video_core/renderer_vulkan/vk_scheduler.h" | 29 | #include "video_core/renderer_vulkan/vk_scheduler.h" |
| 30 | #include "video_core/renderer_vulkan/vk_state_tracker.h" | ||
| 30 | #include "video_core/renderer_vulkan/vk_swapchain.h" | 31 | #include "video_core/renderer_vulkan/vk_swapchain.h" |
| 31 | 32 | ||
| 32 | namespace Vulkan { | 33 | namespace Vulkan { |
| @@ -106,8 +107,14 @@ RendererVulkan::~RendererVulkan() { | |||
| 106 | } | 107 | } |
| 107 | 108 | ||
| 108 | void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { | 109 | void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { |
| 110 | render_window.PollEvents(); | ||
| 111 | |||
| 112 | if (!framebuffer) { | ||
| 113 | return; | ||
| 114 | } | ||
| 115 | |||
| 109 | const auto& layout = render_window.GetFramebufferLayout(); | 116 | const auto& layout = render_window.GetFramebufferLayout(); |
| 110 | if (framebuffer && layout.width > 0 && layout.height > 0 && render_window.IsShown()) { | 117 | if (layout.width > 0 && layout.height > 0 && render_window.IsShown()) { |
| 111 | const VAddr framebuffer_addr = framebuffer->address + framebuffer->offset; | 118 | const VAddr framebuffer_addr = framebuffer->address + framebuffer->offset; |
| 112 | const bool use_accelerated = | 119 | const bool use_accelerated = |
| 113 | rasterizer->AccelerateDisplay(*framebuffer, framebuffer_addr, framebuffer->stride); | 120 | rasterizer->AccelerateDisplay(*framebuffer, framebuffer_addr, framebuffer->stride); |
| @@ -128,13 +135,16 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { | |||
| 128 | blit_screen->Recreate(); | 135 | blit_screen->Recreate(); |
| 129 | } | 136 | } |
| 130 | 137 | ||
| 131 | render_window.SwapBuffers(); | ||
| 132 | rasterizer->TickFrame(); | 138 | rasterizer->TickFrame(); |
| 133 | } | 139 | } |
| 134 | 140 | ||
| 135 | render_window.PollEvents(); | 141 | render_window.PollEvents(); |
| 136 | } | 142 | } |
| 137 | 143 | ||
| 144 | void RendererVulkan::TryPresent(int /*timeout_ms*/) { | ||
| 145 | // TODO (bunnei): ImplementMe | ||
| 146 | } | ||
| 147 | |||
| 138 | bool RendererVulkan::Init() { | 148 | bool RendererVulkan::Init() { |
| 139 | PFN_vkGetInstanceProcAddr vkGetInstanceProcAddr{}; | 149 | PFN_vkGetInstanceProcAddr vkGetInstanceProcAddr{}; |
| 140 | render_window.RetrieveVulkanHandlers(&vkGetInstanceProcAddr, &instance, &surface); | 150 | render_window.RetrieveVulkanHandlers(&vkGetInstanceProcAddr, &instance, &surface); |
| @@ -168,10 +178,13 @@ bool RendererVulkan::Init() { | |||
| 168 | swapchain = std::make_unique<VKSwapchain>(surface, *device); | 178 | swapchain = std::make_unique<VKSwapchain>(surface, *device); |
| 169 | swapchain->Create(framebuffer.width, framebuffer.height, false); | 179 | swapchain->Create(framebuffer.width, framebuffer.height, false); |
| 170 | 180 | ||
| 171 | scheduler = std::make_unique<VKScheduler>(*device, *resource_manager); | 181 | state_tracker = std::make_unique<StateTracker>(system); |
| 182 | |||
| 183 | scheduler = std::make_unique<VKScheduler>(*device, *resource_manager, *state_tracker); | ||
| 172 | 184 | ||
| 173 | rasterizer = std::make_unique<RasterizerVulkan>(system, render_window, screen_info, *device, | 185 | rasterizer = std::make_unique<RasterizerVulkan>(system, render_window, screen_info, *device, |
| 174 | *resource_manager, *memory_manager, *scheduler); | 186 | *resource_manager, *memory_manager, |
| 187 | *state_tracker, *scheduler); | ||
| 175 | 188 | ||
| 176 | blit_screen = std::make_unique<VKBlitScreen>(system, render_window, *rasterizer, *device, | 189 | blit_screen = std::make_unique<VKBlitScreen>(system, render_window, *rasterizer, *device, |
| 177 | *resource_manager, *memory_manager, *swapchain, | 190 | *resource_manager, *memory_manager, *swapchain, |
| @@ -262,4 +275,4 @@ void RendererVulkan::Report() const { | |||
| 262 | telemetry_session.AddField(field, "GPU_Vulkan_Extensions", extensions); | 275 | telemetry_session.AddField(field, "GPU_Vulkan_Extensions", extensions); |
| 263 | } | 276 | } |
| 264 | 277 | ||
| 265 | } // namespace Vulkan \ No newline at end of file | 278 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h index a472c5dc9..3da08d2e4 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.h +++ b/src/video_core/renderer_vulkan/renderer_vulkan.h | |||
| @@ -4,8 +4,10 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <memory> | ||
| 7 | #include <optional> | 8 | #include <optional> |
| 8 | #include <vector> | 9 | #include <vector> |
| 10 | |||
| 9 | #include "video_core/renderer_base.h" | 11 | #include "video_core/renderer_base.h" |
| 10 | #include "video_core/renderer_vulkan/declarations.h" | 12 | #include "video_core/renderer_vulkan/declarations.h" |
| 11 | 13 | ||
| @@ -15,6 +17,7 @@ class System; | |||
| 15 | 17 | ||
| 16 | namespace Vulkan { | 18 | namespace Vulkan { |
| 17 | 19 | ||
| 20 | class StateTracker; | ||
| 18 | class VKBlitScreen; | 21 | class VKBlitScreen; |
| 19 | class VKDevice; | 22 | class VKDevice; |
| 20 | class VKFence; | 23 | class VKFence; |
| @@ -36,14 +39,10 @@ public: | |||
| 36 | explicit RendererVulkan(Core::Frontend::EmuWindow& window, Core::System& system); | 39 | explicit RendererVulkan(Core::Frontend::EmuWindow& window, Core::System& system); |
| 37 | ~RendererVulkan() override; | 40 | ~RendererVulkan() override; |
| 38 | 41 | ||
| 39 | /// Swap buffers (render frame) | ||
| 40 | void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override; | ||
| 41 | |||
| 42 | /// Initialize the renderer | ||
| 43 | bool Init() override; | 42 | bool Init() override; |
| 44 | |||
| 45 | /// Shutdown the renderer | ||
| 46 | void ShutDown() override; | 43 | void ShutDown() override; |
| 44 | void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override; | ||
| 45 | void TryPresent(int timeout_ms) override; | ||
| 47 | 46 | ||
| 48 | private: | 47 | private: |
| 49 | std::optional<vk::DebugUtilsMessengerEXT> CreateDebugCallback( | 48 | std::optional<vk::DebugUtilsMessengerEXT> CreateDebugCallback( |
| @@ -65,6 +64,7 @@ private: | |||
| 65 | std::unique_ptr<VKSwapchain> swapchain; | 64 | std::unique_ptr<VKSwapchain> swapchain; |
| 66 | std::unique_ptr<VKMemoryManager> memory_manager; | 65 | std::unique_ptr<VKMemoryManager> memory_manager; |
| 67 | std::unique_ptr<VKResourceManager> resource_manager; | 66 | std::unique_ptr<VKResourceManager> resource_manager; |
| 67 | std::unique_ptr<StateTracker> state_tracker; | ||
| 68 | std::unique_ptr<VKScheduler> scheduler; | 68 | std::unique_ptr<VKScheduler> scheduler; |
| 69 | std::unique_ptr<VKBlitScreen> blit_screen; | 69 | std::unique_ptr<VKBlitScreen> blit_screen; |
| 70 | }; | 70 | }; |
diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 9d5b8de7a..60f57d83e 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp | |||
| @@ -73,7 +73,7 @@ UniqueDescriptorUpdateTemplate VKComputePipeline::CreateDescriptorUpdateTemplate | |||
| 73 | std::vector<vk::DescriptorUpdateTemplateEntry> template_entries; | 73 | std::vector<vk::DescriptorUpdateTemplateEntry> template_entries; |
| 74 | u32 binding = 0; | 74 | u32 binding = 0; |
| 75 | u32 offset = 0; | 75 | u32 offset = 0; |
| 76 | FillDescriptorUpdateTemplateEntries(device, entries, binding, offset, template_entries); | 76 | FillDescriptorUpdateTemplateEntries(entries, binding, offset, template_entries); |
| 77 | if (template_entries.empty()) { | 77 | if (template_entries.empty()) { |
| 78 | // If the shader doesn't use descriptor sets, skip template creation. | 78 | // If the shader doesn't use descriptor sets, skip template creation. |
| 79 | return UniqueDescriptorUpdateTemplate{}; | 79 | return UniqueDescriptorUpdateTemplate{}; |
diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp index d1da4f9d3..28d2fbc4f 100644 --- a/src/video_core/renderer_vulkan/vk_device.cpp +++ b/src/video_core/renderer_vulkan/vk_device.cpp | |||
| @@ -107,8 +107,7 @@ bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instan | |||
| 107 | features.occlusionQueryPrecise = true; | 107 | features.occlusionQueryPrecise = true; |
| 108 | features.fragmentStoresAndAtomics = true; | 108 | features.fragmentStoresAndAtomics = true; |
| 109 | features.shaderImageGatherExtended = true; | 109 | features.shaderImageGatherExtended = true; |
| 110 | features.shaderStorageImageReadWithoutFormat = | 110 | features.shaderStorageImageReadWithoutFormat = is_formatless_image_load_supported; |
| 111 | is_shader_storage_img_read_without_format_supported; | ||
| 112 | features.shaderStorageImageWriteWithoutFormat = true; | 111 | features.shaderStorageImageWriteWithoutFormat = true; |
| 113 | features.textureCompressionASTC_LDR = is_optimal_astc_supported; | 112 | features.textureCompressionASTC_LDR = is_optimal_astc_supported; |
| 114 | 113 | ||
| @@ -148,6 +147,15 @@ bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instan | |||
| 148 | LOG_INFO(Render_Vulkan, "Device doesn't support uint8 indexes"); | 147 | LOG_INFO(Render_Vulkan, "Device doesn't support uint8 indexes"); |
| 149 | } | 148 | } |
| 150 | 149 | ||
| 150 | vk::PhysicalDeviceTransformFeedbackFeaturesEXT transform_feedback; | ||
| 151 | if (ext_transform_feedback) { | ||
| 152 | transform_feedback.transformFeedback = true; | ||
| 153 | transform_feedback.geometryStreams = true; | ||
| 154 | SetNext(next, transform_feedback); | ||
| 155 | } else { | ||
| 156 | LOG_INFO(Render_Vulkan, "Device doesn't support transform feedbacks"); | ||
| 157 | } | ||
| 158 | |||
| 151 | if (!ext_depth_range_unrestricted) { | 159 | if (!ext_depth_range_unrestricted) { |
| 152 | LOG_INFO(Render_Vulkan, "Device doesn't support depth range unrestricted"); | 160 | LOG_INFO(Render_Vulkan, "Device doesn't support depth range unrestricted"); |
| 153 | } | 161 | } |
| @@ -385,7 +393,7 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami | |||
| 385 | } | 393 | } |
| 386 | }; | 394 | }; |
| 387 | 395 | ||
| 388 | extensions.reserve(14); | 396 | extensions.reserve(15); |
| 389 | extensions.push_back(VK_KHR_SWAPCHAIN_EXTENSION_NAME); | 397 | extensions.push_back(VK_KHR_SWAPCHAIN_EXTENSION_NAME); |
| 390 | extensions.push_back(VK_KHR_16BIT_STORAGE_EXTENSION_NAME); | 398 | extensions.push_back(VK_KHR_16BIT_STORAGE_EXTENSION_NAME); |
| 391 | extensions.push_back(VK_KHR_8BIT_STORAGE_EXTENSION_NAME); | 399 | extensions.push_back(VK_KHR_8BIT_STORAGE_EXTENSION_NAME); |
| @@ -397,18 +405,22 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami | |||
| 397 | 405 | ||
| 398 | [[maybe_unused]] const bool nsight = | 406 | [[maybe_unused]] const bool nsight = |
| 399 | std::getenv("NVTX_INJECTION64_PATH") || std::getenv("NSIGHT_LAUNCHED"); | 407 | std::getenv("NVTX_INJECTION64_PATH") || std::getenv("NSIGHT_LAUNCHED"); |
| 400 | bool khr_shader_float16_int8{}; | 408 | bool has_khr_shader_float16_int8{}; |
| 401 | bool ext_subgroup_size_control{}; | 409 | bool has_ext_subgroup_size_control{}; |
| 410 | bool has_ext_transform_feedback{}; | ||
| 402 | for (const auto& extension : physical.enumerateDeviceExtensionProperties(nullptr, dldi)) { | 411 | for (const auto& extension : physical.enumerateDeviceExtensionProperties(nullptr, dldi)) { |
| 403 | Test(extension, khr_uniform_buffer_standard_layout, | 412 | Test(extension, khr_uniform_buffer_standard_layout, |
| 404 | VK_KHR_UNIFORM_BUFFER_STANDARD_LAYOUT_EXTENSION_NAME, true); | 413 | VK_KHR_UNIFORM_BUFFER_STANDARD_LAYOUT_EXTENSION_NAME, true); |
| 405 | Test(extension, khr_shader_float16_int8, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME, false); | 414 | Test(extension, has_khr_shader_float16_int8, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME, |
| 415 | false); | ||
| 406 | Test(extension, ext_depth_range_unrestricted, | 416 | Test(extension, ext_depth_range_unrestricted, |
| 407 | VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME, true); | 417 | VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME, true); |
| 408 | Test(extension, ext_index_type_uint8, VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME, true); | 418 | Test(extension, ext_index_type_uint8, VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME, true); |
| 409 | Test(extension, ext_shader_viewport_index_layer, | 419 | Test(extension, ext_shader_viewport_index_layer, |
| 410 | VK_EXT_SHADER_VIEWPORT_INDEX_LAYER_EXTENSION_NAME, true); | 420 | VK_EXT_SHADER_VIEWPORT_INDEX_LAYER_EXTENSION_NAME, true); |
| 411 | Test(extension, ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME, | 421 | Test(extension, has_ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME, |
| 422 | false); | ||
| 423 | Test(extension, has_ext_transform_feedback, VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME, | ||
| 412 | false); | 424 | false); |
| 413 | if (Settings::values.renderer_debug) { | 425 | if (Settings::values.renderer_debug) { |
| 414 | Test(extension, nv_device_diagnostic_checkpoints, | 426 | Test(extension, nv_device_diagnostic_checkpoints, |
| @@ -416,13 +428,13 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami | |||
| 416 | } | 428 | } |
| 417 | } | 429 | } |
| 418 | 430 | ||
| 419 | if (khr_shader_float16_int8) { | 431 | if (has_khr_shader_float16_int8) { |
| 420 | is_float16_supported = | 432 | is_float16_supported = |
| 421 | GetFeatures<vk::PhysicalDeviceFloat16Int8FeaturesKHR>(physical, dldi).shaderFloat16; | 433 | GetFeatures<vk::PhysicalDeviceFloat16Int8FeaturesKHR>(physical, dldi).shaderFloat16; |
| 422 | extensions.push_back(VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME); | 434 | extensions.push_back(VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME); |
| 423 | } | 435 | } |
| 424 | 436 | ||
| 425 | if (ext_subgroup_size_control) { | 437 | if (has_ext_subgroup_size_control) { |
| 426 | const auto features = | 438 | const auto features = |
| 427 | GetFeatures<vk::PhysicalDeviceSubgroupSizeControlFeaturesEXT>(physical, dldi); | 439 | GetFeatures<vk::PhysicalDeviceSubgroupSizeControlFeaturesEXT>(physical, dldi); |
| 428 | const auto properties = | 440 | const auto properties = |
| @@ -439,6 +451,20 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami | |||
| 439 | is_warp_potentially_bigger = true; | 451 | is_warp_potentially_bigger = true; |
| 440 | } | 452 | } |
| 441 | 453 | ||
| 454 | if (has_ext_transform_feedback) { | ||
| 455 | const auto features = | ||
| 456 | GetFeatures<vk::PhysicalDeviceTransformFeedbackFeaturesEXT>(physical, dldi); | ||
| 457 | const auto properties = | ||
| 458 | GetProperties<vk::PhysicalDeviceTransformFeedbackPropertiesEXT>(physical, dldi); | ||
| 459 | |||
| 460 | if (features.transformFeedback && features.geometryStreams && | ||
| 461 | properties.maxTransformFeedbackStreams >= 4 && properties.maxTransformFeedbackBuffers && | ||
| 462 | properties.transformFeedbackQueries && properties.transformFeedbackDraw) { | ||
| 463 | extensions.push_back(VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME); | ||
| 464 | ext_transform_feedback = true; | ||
| 465 | } | ||
| 466 | } | ||
| 467 | |||
| 442 | return extensions; | 468 | return extensions; |
| 443 | } | 469 | } |
| 444 | 470 | ||
| @@ -467,8 +493,7 @@ void VKDevice::SetupFamilies(const vk::DispatchLoaderDynamic& dldi, vk::SurfaceK | |||
| 467 | 493 | ||
| 468 | void VKDevice::SetupFeatures(const vk::DispatchLoaderDynamic& dldi) { | 494 | void VKDevice::SetupFeatures(const vk::DispatchLoaderDynamic& dldi) { |
| 469 | const auto supported_features{physical.getFeatures(dldi)}; | 495 | const auto supported_features{physical.getFeatures(dldi)}; |
| 470 | is_shader_storage_img_read_without_format_supported = | 496 | is_formatless_image_load_supported = supported_features.shaderStorageImageReadWithoutFormat; |
| 471 | supported_features.shaderStorageImageReadWithoutFormat; | ||
| 472 | is_optimal_astc_supported = IsOptimalAstcSupported(supported_features, dldi); | 497 | is_optimal_astc_supported = IsOptimalAstcSupported(supported_features, dldi); |
| 473 | } | 498 | } |
| 474 | 499 | ||
| @@ -510,6 +535,7 @@ std::unordered_map<vk::Format, vk::FormatProperties> VKDevice::GetFormatProperti | |||
| 510 | vk::Format::eR32G32Sfloat, | 535 | vk::Format::eR32G32Sfloat, |
| 511 | vk::Format::eR32G32Uint, | 536 | vk::Format::eR32G32Uint, |
| 512 | vk::Format::eR16G16B16A16Uint, | 537 | vk::Format::eR16G16B16A16Uint, |
| 538 | vk::Format::eR16G16B16A16Snorm, | ||
| 513 | vk::Format::eR16G16B16A16Unorm, | 539 | vk::Format::eR16G16B16A16Unorm, |
| 514 | vk::Format::eR16G16Unorm, | 540 | vk::Format::eR16G16Unorm, |
| 515 | vk::Format::eR16G16Snorm, | 541 | vk::Format::eR16G16Snorm, |
| @@ -523,6 +549,7 @@ std::unordered_map<vk::Format, vk::FormatProperties> VKDevice::GetFormatProperti | |||
| 523 | vk::Format::eB10G11R11UfloatPack32, | 549 | vk::Format::eB10G11R11UfloatPack32, |
| 524 | vk::Format::eR32Sfloat, | 550 | vk::Format::eR32Sfloat, |
| 525 | vk::Format::eR32Uint, | 551 | vk::Format::eR32Uint, |
| 552 | vk::Format::eR32Sint, | ||
| 526 | vk::Format::eR16Sfloat, | 553 | vk::Format::eR16Sfloat, |
| 527 | vk::Format::eR16G16B16A16Sfloat, | 554 | vk::Format::eR16G16B16A16Sfloat, |
| 528 | vk::Format::eB8G8R8A8Unorm, | 555 | vk::Format::eB8G8R8A8Unorm, |
diff --git a/src/video_core/renderer_vulkan/vk_device.h b/src/video_core/renderer_vulkan/vk_device.h index 2c27ad730..6e656517f 100644 --- a/src/video_core/renderer_vulkan/vk_device.h +++ b/src/video_core/renderer_vulkan/vk_device.h | |||
| @@ -122,11 +122,6 @@ public: | |||
| 122 | return properties.limits.maxPushConstantsSize; | 122 | return properties.limits.maxPushConstantsSize; |
| 123 | } | 123 | } |
| 124 | 124 | ||
| 125 | /// Returns true if Shader storage Image Read Without Format supported. | ||
| 126 | bool IsShaderStorageImageReadWithoutFormatSupported() const { | ||
| 127 | return is_shader_storage_img_read_without_format_supported; | ||
| 128 | } | ||
| 129 | |||
| 130 | /// Returns true if ASTC is natively supported. | 125 | /// Returns true if ASTC is natively supported. |
| 131 | bool IsOptimalAstcSupported() const { | 126 | bool IsOptimalAstcSupported() const { |
| 132 | return is_optimal_astc_supported; | 127 | return is_optimal_astc_supported; |
| @@ -147,6 +142,11 @@ public: | |||
| 147 | return (guest_warp_stages & stage) != vk::ShaderStageFlags{}; | 142 | return (guest_warp_stages & stage) != vk::ShaderStageFlags{}; |
| 148 | } | 143 | } |
| 149 | 144 | ||
| 145 | /// Returns true if formatless image load is supported. | ||
| 146 | bool IsFormatlessImageLoadSupported() const { | ||
| 147 | return is_formatless_image_load_supported; | ||
| 148 | } | ||
| 149 | |||
| 150 | /// Returns true if the device supports VK_EXT_scalar_block_layout. | 150 | /// Returns true if the device supports VK_EXT_scalar_block_layout. |
| 151 | bool IsKhrUniformBufferStandardLayoutSupported() const { | 151 | bool IsKhrUniformBufferStandardLayoutSupported() const { |
| 152 | return khr_uniform_buffer_standard_layout; | 152 | return khr_uniform_buffer_standard_layout; |
| @@ -167,6 +167,11 @@ public: | |||
| 167 | return ext_shader_viewport_index_layer; | 167 | return ext_shader_viewport_index_layer; |
| 168 | } | 168 | } |
| 169 | 169 | ||
| 170 | /// Returns true if the device supports VK_EXT_transform_feedback. | ||
| 171 | bool IsExtTransformFeedbackSupported() const { | ||
| 172 | return ext_transform_feedback; | ||
| 173 | } | ||
| 174 | |||
| 170 | /// Returns true if the device supports VK_NV_device_diagnostic_checkpoints. | 175 | /// Returns true if the device supports VK_NV_device_diagnostic_checkpoints. |
| 171 | bool IsNvDeviceDiagnosticCheckpoints() const { | 176 | bool IsNvDeviceDiagnosticCheckpoints() const { |
| 172 | return nv_device_diagnostic_checkpoints; | 177 | return nv_device_diagnostic_checkpoints; |
| @@ -214,26 +219,26 @@ private: | |||
| 214 | static std::unordered_map<vk::Format, vk::FormatProperties> GetFormatProperties( | 219 | static std::unordered_map<vk::Format, vk::FormatProperties> GetFormatProperties( |
| 215 | const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical); | 220 | const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical); |
| 216 | 221 | ||
| 217 | const vk::PhysicalDevice physical; ///< Physical device. | 222 | const vk::PhysicalDevice physical; ///< Physical device. |
| 218 | vk::DispatchLoaderDynamic dld; ///< Device function pointers. | 223 | vk::DispatchLoaderDynamic dld; ///< Device function pointers. |
| 219 | vk::PhysicalDeviceProperties properties; ///< Device properties. | 224 | vk::PhysicalDeviceProperties properties; ///< Device properties. |
| 220 | UniqueDevice logical; ///< Logical device. | 225 | UniqueDevice logical; ///< Logical device. |
| 221 | vk::Queue graphics_queue; ///< Main graphics queue. | 226 | vk::Queue graphics_queue; ///< Main graphics queue. |
| 222 | vk::Queue present_queue; ///< Main present queue. | 227 | vk::Queue present_queue; ///< Main present queue. |
| 223 | u32 graphics_family{}; ///< Main graphics queue family index. | 228 | u32 graphics_family{}; ///< Main graphics queue family index. |
| 224 | u32 present_family{}; ///< Main present queue family index. | 229 | u32 present_family{}; ///< Main present queue family index. |
| 225 | vk::DriverIdKHR driver_id{}; ///< Driver ID. | 230 | vk::DriverIdKHR driver_id{}; ///< Driver ID. |
| 226 | vk::ShaderStageFlags guest_warp_stages{}; ///< Stages where the guest warp size can be forced. | 231 | vk::ShaderStageFlags guest_warp_stages{}; ///< Stages where the guest warp size can be forced.ed |
| 227 | bool is_optimal_astc_supported{}; ///< Support for native ASTC. | 232 | bool is_optimal_astc_supported{}; ///< Support for native ASTC. |
| 228 | bool is_float16_supported{}; ///< Support for float16 arithmetics. | 233 | bool is_float16_supported{}; ///< Support for float16 arithmetics. |
| 229 | bool is_warp_potentially_bigger{}; ///< Host warp size can be bigger than guest. | 234 | bool is_warp_potentially_bigger{}; ///< Host warp size can be bigger than guest. |
| 235 | bool is_formatless_image_load_supported{}; ///< Support for shader image read without format. | ||
| 230 | bool khr_uniform_buffer_standard_layout{}; ///< Support for std430 on UBOs. | 236 | bool khr_uniform_buffer_standard_layout{}; ///< Support for std430 on UBOs. |
| 231 | bool ext_index_type_uint8{}; ///< Support for VK_EXT_index_type_uint8. | 237 | bool ext_index_type_uint8{}; ///< Support for VK_EXT_index_type_uint8. |
| 232 | bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted. | 238 | bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted. |
| 233 | bool ext_shader_viewport_index_layer{}; ///< Support for VK_EXT_shader_viewport_index_layer. | 239 | bool ext_shader_viewport_index_layer{}; ///< Support for VK_EXT_shader_viewport_index_layer. |
| 240 | bool ext_transform_feedback{}; ///< Support for VK_EXT_transform_feedback. | ||
| 234 | bool nv_device_diagnostic_checkpoints{}; ///< Support for VK_NV_device_diagnostic_checkpoints. | 241 | bool nv_device_diagnostic_checkpoints{}; ///< Support for VK_NV_device_diagnostic_checkpoints. |
| 235 | bool is_shader_storage_img_read_without_format_supported{}; ///< Support for shader storage | ||
| 236 | ///< image read without format | ||
| 237 | 242 | ||
| 238 | // Telemetry parameters | 243 | // Telemetry parameters |
| 239 | std::string vendor_name; ///< Device's driver name. | 244 | std::string vendor_name; ///< Device's driver name. |
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index b155dfb49..6a02403c1 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp | |||
| @@ -97,8 +97,7 @@ UniqueDescriptorUpdateTemplate VKGraphicsPipeline::CreateDescriptorUpdateTemplat | |||
| 97 | u32 offset = 0; | 97 | u32 offset = 0; |
| 98 | for (const auto& stage : program) { | 98 | for (const auto& stage : program) { |
| 99 | if (stage) { | 99 | if (stage) { |
| 100 | FillDescriptorUpdateTemplateEntries(device, stage->entries, binding, offset, | 100 | FillDescriptorUpdateTemplateEntries(stage->entries, binding, offset, template_entries); |
| 101 | template_entries); | ||
| 102 | } | 101 | } |
| 103 | } | 102 | } |
| 104 | if (template_entries.empty()) { | 103 | if (template_entries.empty()) { |
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 7ddf7d3ee..557b9d662 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | |||
| @@ -36,6 +36,13 @@ using Tegra::Engines::ShaderType; | |||
| 36 | 36 | ||
| 37 | namespace { | 37 | namespace { |
| 38 | 38 | ||
| 39 | // C++20's using enum | ||
| 40 | constexpr auto eUniformBuffer = vk::DescriptorType::eUniformBuffer; | ||
| 41 | constexpr auto eStorageBuffer = vk::DescriptorType::eStorageBuffer; | ||
| 42 | constexpr auto eUniformTexelBuffer = vk::DescriptorType::eUniformTexelBuffer; | ||
| 43 | constexpr auto eCombinedImageSampler = vk::DescriptorType::eCombinedImageSampler; | ||
| 44 | constexpr auto eStorageImage = vk::DescriptorType::eStorageImage; | ||
| 45 | |||
| 39 | constexpr VideoCommon::Shader::CompilerSettings compiler_settings{ | 46 | constexpr VideoCommon::Shader::CompilerSettings compiler_settings{ |
| 40 | VideoCommon::Shader::CompileDepth::FullDecompile}; | 47 | VideoCommon::Shader::CompileDepth::FullDecompile}; |
| 41 | 48 | ||
| @@ -119,23 +126,32 @@ ShaderType GetShaderType(Maxwell::ShaderProgram program) { | |||
| 119 | } | 126 | } |
| 120 | } | 127 | } |
| 121 | 128 | ||
| 129 | template <vk::DescriptorType descriptor_type, class Container> | ||
| 130 | void AddBindings(std::vector<vk::DescriptorSetLayoutBinding>& bindings, u32& binding, | ||
| 131 | vk::ShaderStageFlags stage_flags, const Container& container) { | ||
| 132 | const u32 num_entries = static_cast<u32>(std::size(container)); | ||
| 133 | for (std::size_t i = 0; i < num_entries; ++i) { | ||
| 134 | u32 count = 1; | ||
| 135 | if constexpr (descriptor_type == eCombinedImageSampler) { | ||
| 136 | // Combined image samplers can be arrayed. | ||
| 137 | count = container[i].Size(); | ||
| 138 | } | ||
| 139 | bindings.emplace_back(binding++, descriptor_type, count, stage_flags, nullptr); | ||
| 140 | } | ||
| 141 | } | ||
| 142 | |||
| 122 | u32 FillDescriptorLayout(const ShaderEntries& entries, | 143 | u32 FillDescriptorLayout(const ShaderEntries& entries, |
| 123 | std::vector<vk::DescriptorSetLayoutBinding>& bindings, | 144 | std::vector<vk::DescriptorSetLayoutBinding>& bindings, |
| 124 | Maxwell::ShaderProgram program_type, u32 base_binding) { | 145 | Maxwell::ShaderProgram program_type, u32 base_binding) { |
| 125 | const ShaderType stage = GetStageFromProgram(program_type); | 146 | const ShaderType stage = GetStageFromProgram(program_type); |
| 126 | const vk::ShaderStageFlags stage_flags = MaxwellToVK::ShaderStage(stage); | 147 | const vk::ShaderStageFlags flags = MaxwellToVK::ShaderStage(stage); |
| 127 | 148 | ||
| 128 | u32 binding = base_binding; | 149 | u32 binding = base_binding; |
| 129 | const auto AddBindings = [&](vk::DescriptorType descriptor_type, std::size_t num_entries) { | 150 | AddBindings<eUniformBuffer>(bindings, binding, flags, entries.const_buffers); |
| 130 | for (std::size_t i = 0; i < num_entries; ++i) { | 151 | AddBindings<eStorageBuffer>(bindings, binding, flags, entries.global_buffers); |
| 131 | bindings.emplace_back(binding++, descriptor_type, 1, stage_flags, nullptr); | 152 | AddBindings<eUniformTexelBuffer>(bindings, binding, flags, entries.texel_buffers); |
| 132 | } | 153 | AddBindings<eCombinedImageSampler>(bindings, binding, flags, entries.samplers); |
| 133 | }; | 154 | AddBindings<eStorageImage>(bindings, binding, flags, entries.images); |
| 134 | AddBindings(vk::DescriptorType::eUniformBuffer, entries.const_buffers.size()); | ||
| 135 | AddBindings(vk::DescriptorType::eStorageBuffer, entries.global_buffers.size()); | ||
| 136 | AddBindings(vk::DescriptorType::eUniformTexelBuffer, entries.texel_buffers.size()); | ||
| 137 | AddBindings(vk::DescriptorType::eCombinedImageSampler, entries.samplers.size()); | ||
| 138 | AddBindings(vk::DescriptorType::eStorageImage, entries.images.size()); | ||
| 139 | return binding; | 155 | return binding; |
| 140 | } | 156 | } |
| 141 | 157 | ||
| @@ -145,8 +161,8 @@ CachedShader::CachedShader(Core::System& system, Tegra::Engines::ShaderType stag | |||
| 145 | GPUVAddr gpu_addr, VAddr cpu_addr, u8* host_ptr, | 161 | GPUVAddr gpu_addr, VAddr cpu_addr, u8* host_ptr, |
| 146 | ProgramCode program_code, u32 main_offset) | 162 | ProgramCode program_code, u32 main_offset) |
| 147 | : RasterizerCacheObject{host_ptr}, gpu_addr{gpu_addr}, cpu_addr{cpu_addr}, | 163 | : RasterizerCacheObject{host_ptr}, gpu_addr{gpu_addr}, cpu_addr{cpu_addr}, |
| 148 | program_code{std::move(program_code)}, locker{stage, GetEngine(system, stage)}, | 164 | program_code{std::move(program_code)}, registry{stage, GetEngine(system, stage)}, |
| 149 | shader_ir{this->program_code, main_offset, compiler_settings, locker}, | 165 | shader_ir{this->program_code, main_offset, compiler_settings, registry}, |
| 150 | entries{GenerateShaderEntries(shader_ir)} {} | 166 | entries{GenerateShaderEntries(shader_ir)} {} |
| 151 | 167 | ||
| 152 | CachedShader::~CachedShader() = default; | 168 | CachedShader::~CachedShader() = default; |
| @@ -163,24 +179,19 @@ Tegra::Engines::ConstBufferEngineInterface& CachedShader::GetEngine( | |||
| 163 | VKPipelineCache::VKPipelineCache(Core::System& system, RasterizerVulkan& rasterizer, | 179 | VKPipelineCache::VKPipelineCache(Core::System& system, RasterizerVulkan& rasterizer, |
| 164 | const VKDevice& device, VKScheduler& scheduler, | 180 | const VKDevice& device, VKScheduler& scheduler, |
| 165 | VKDescriptorPool& descriptor_pool, | 181 | VKDescriptorPool& descriptor_pool, |
| 166 | VKUpdateDescriptorQueue& update_descriptor_queue) | 182 | VKUpdateDescriptorQueue& update_descriptor_queue, |
| 183 | VKRenderPassCache& renderpass_cache) | ||
| 167 | : RasterizerCache{rasterizer}, system{system}, device{device}, scheduler{scheduler}, | 184 | : RasterizerCache{rasterizer}, system{system}, device{device}, scheduler{scheduler}, |
| 168 | descriptor_pool{descriptor_pool}, update_descriptor_queue{update_descriptor_queue}, | 185 | descriptor_pool{descriptor_pool}, update_descriptor_queue{update_descriptor_queue}, |
| 169 | renderpass_cache(device) {} | 186 | renderpass_cache{renderpass_cache} {} |
| 170 | 187 | ||
| 171 | VKPipelineCache::~VKPipelineCache() = default; | 188 | VKPipelineCache::~VKPipelineCache() = default; |
| 172 | 189 | ||
| 173 | std::array<Shader, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() { | 190 | std::array<Shader, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() { |
| 174 | const auto& gpu = system.GPU().Maxwell3D(); | 191 | const auto& gpu = system.GPU().Maxwell3D(); |
| 175 | auto& dirty = system.GPU().Maxwell3D().dirty.shaders; | ||
| 176 | if (!dirty) { | ||
| 177 | return last_shaders; | ||
| 178 | } | ||
| 179 | dirty = false; | ||
| 180 | 192 | ||
| 181 | std::array<Shader, Maxwell::MaxShaderProgram> shaders; | 193 | std::array<Shader, Maxwell::MaxShaderProgram> shaders; |
| 182 | for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { | 194 | for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { |
| 183 | const auto& shader_config = gpu.regs.shader_config[index]; | ||
| 184 | const auto program{static_cast<Maxwell::ShaderProgram>(index)}; | 195 | const auto program{static_cast<Maxwell::ShaderProgram>(index)}; |
| 185 | 196 | ||
| 186 | // Skip stages that are not enabled | 197 | // Skip stages that are not enabled |
| @@ -262,9 +273,9 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach | |||
| 262 | specialization.workgroup_size = key.workgroup_size; | 273 | specialization.workgroup_size = key.workgroup_size; |
| 263 | specialization.shared_memory_size = key.shared_memory_size; | 274 | specialization.shared_memory_size = key.shared_memory_size; |
| 264 | 275 | ||
| 265 | const SPIRVShader spirv_shader{ | 276 | const SPIRVShader spirv_shader{Decompile(device, shader->GetIR(), ShaderType::Compute, |
| 266 | Decompile(device, shader->GetIR(), ShaderType::Compute, specialization), | 277 | shader->GetRegistry(), specialization), |
| 267 | shader->GetEntries()}; | 278 | shader->GetEntries()}; |
| 268 | entry = std::make_unique<VKComputePipeline>(device, scheduler, descriptor_pool, | 279 | entry = std::make_unique<VKComputePipeline>(device, scheduler, descriptor_pool, |
| 269 | update_descriptor_queue, spirv_shader); | 280 | update_descriptor_queue, spirv_shader); |
| 270 | return *entry; | 281 | return *entry; |
| @@ -313,8 +324,7 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) { | |||
| 313 | const auto& gpu = system.GPU().Maxwell3D(); | 324 | const auto& gpu = system.GPU().Maxwell3D(); |
| 314 | 325 | ||
| 315 | Specialization specialization; | 326 | Specialization specialization; |
| 316 | specialization.primitive_topology = fixed_state.input_assembly.topology; | 327 | if (fixed_state.input_assembly.topology == Maxwell::PrimitiveTopology::Points) { |
| 317 | if (specialization.primitive_topology == Maxwell::PrimitiveTopology::Points) { | ||
| 318 | ASSERT(fixed_state.input_assembly.point_size != 0.0f); | 328 | ASSERT(fixed_state.input_assembly.point_size != 0.0f); |
| 319 | specialization.point_size = fixed_state.input_assembly.point_size; | 329 | specialization.point_size = fixed_state.input_assembly.point_size; |
| 320 | } | 330 | } |
| @@ -322,9 +332,6 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) { | |||
| 322 | specialization.attribute_types[i] = fixed_state.vertex_input.attributes[i].type; | 332 | specialization.attribute_types[i] = fixed_state.vertex_input.attributes[i].type; |
| 323 | } | 333 | } |
| 324 | specialization.ndc_minus_one_to_one = fixed_state.rasterizer.ndc_minus_one_to_one; | 334 | specialization.ndc_minus_one_to_one = fixed_state.rasterizer.ndc_minus_one_to_one; |
| 325 | specialization.tessellation.primitive = fixed_state.tessellation.primitive; | ||
| 326 | specialization.tessellation.spacing = fixed_state.tessellation.spacing; | ||
| 327 | specialization.tessellation.clockwise = fixed_state.tessellation.clockwise; | ||
| 328 | 335 | ||
| 329 | SPIRVProgram program; | 336 | SPIRVProgram program; |
| 330 | std::vector<vk::DescriptorSetLayoutBinding> bindings; | 337 | std::vector<vk::DescriptorSetLayoutBinding> bindings; |
| @@ -345,8 +352,9 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) { | |||
| 345 | const std::size_t stage = index == 0 ? 0 : index - 1; // Stage indices are 0 - 5 | 352 | const std::size_t stage = index == 0 ? 0 : index - 1; // Stage indices are 0 - 5 |
| 346 | const auto program_type = GetShaderType(program_enum); | 353 | const auto program_type = GetShaderType(program_enum); |
| 347 | const auto& entries = shader->GetEntries(); | 354 | const auto& entries = shader->GetEntries(); |
| 348 | program[stage] = {Decompile(device, shader->GetIR(), program_type, specialization), | 355 | program[stage] = { |
| 349 | entries}; | 356 | Decompile(device, shader->GetIR(), program_type, shader->GetRegistry(), specialization), |
| 357 | entries}; | ||
| 350 | 358 | ||
| 351 | if (program_enum == Maxwell::ShaderProgram::VertexA) { | 359 | if (program_enum == Maxwell::ShaderProgram::VertexA) { |
| 352 | // VertexB was combined with VertexA, so we skip the VertexB iteration | 360 | // VertexB was combined with VertexA, so we skip the VertexB iteration |
| @@ -361,32 +369,45 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) { | |||
| 361 | return {std::move(program), std::move(bindings)}; | 369 | return {std::move(program), std::move(bindings)}; |
| 362 | } | 370 | } |
| 363 | 371 | ||
| 364 | void FillDescriptorUpdateTemplateEntries( | 372 | template <vk::DescriptorType descriptor_type, class Container> |
| 365 | const VKDevice& device, const ShaderEntries& entries, u32& binding, u32& offset, | 373 | void AddEntry(std::vector<vk::DescriptorUpdateTemplateEntry>& template_entries, u32& binding, |
| 366 | std::vector<vk::DescriptorUpdateTemplateEntry>& template_entries) { | 374 | u32& offset, const Container& container) { |
| 367 | static constexpr auto entry_size = static_cast<u32>(sizeof(DescriptorUpdateEntry)); | 375 | static constexpr u32 entry_size = static_cast<u32>(sizeof(DescriptorUpdateEntry)); |
| 368 | const auto AddEntry = [&](vk::DescriptorType descriptor_type, std::size_t count_) { | 376 | const u32 count = static_cast<u32>(std::size(container)); |
| 369 | const u32 count = static_cast<u32>(count_); | 377 | |
| 370 | if (descriptor_type == vk::DescriptorType::eUniformTexelBuffer && | 378 | if constexpr (descriptor_type == eCombinedImageSampler) { |
| 371 | device.GetDriverID() == vk::DriverIdKHR::eNvidiaProprietary) { | 379 | for (u32 i = 0; i < count; ++i) { |
| 372 | // Nvidia has a bug where updating multiple uniform texels at once causes the driver to | 380 | const u32 num_samplers = container[i].Size(); |
| 373 | // crash. | 381 | template_entries.emplace_back(binding, 0, num_samplers, descriptor_type, offset, |
| 374 | for (u32 i = 0; i < count; ++i) { | 382 | entry_size); |
| 375 | template_entries.emplace_back(binding + i, 0, 1, descriptor_type, | 383 | ++binding; |
| 376 | offset + i * entry_size, entry_size); | 384 | offset += num_samplers * entry_size; |
| 377 | } | ||
| 378 | } else if (count != 0) { | ||
| 379 | template_entries.emplace_back(binding, 0, count, descriptor_type, offset, entry_size); | ||
| 380 | } | 385 | } |
| 381 | offset += count * entry_size; | 386 | return; |
| 382 | binding += count; | 387 | } |
| 383 | }; | ||
| 384 | 388 | ||
| 385 | AddEntry(vk::DescriptorType::eUniformBuffer, entries.const_buffers.size()); | 389 | if constexpr (descriptor_type == eUniformTexelBuffer) { |
| 386 | AddEntry(vk::DescriptorType::eStorageBuffer, entries.global_buffers.size()); | 390 | // Nvidia has a bug where updating multiple uniform texels at once causes the driver to |
| 387 | AddEntry(vk::DescriptorType::eUniformTexelBuffer, entries.texel_buffers.size()); | 391 | // crash. |
| 388 | AddEntry(vk::DescriptorType::eCombinedImageSampler, entries.samplers.size()); | 392 | for (u32 i = 0; i < count; ++i) { |
| 389 | AddEntry(vk::DescriptorType::eStorageImage, entries.images.size()); | 393 | template_entries.emplace_back(binding + i, 0, 1, descriptor_type, |
| 394 | offset + i * entry_size, entry_size); | ||
| 395 | } | ||
| 396 | } else if (count > 0) { | ||
| 397 | template_entries.emplace_back(binding, 0, count, descriptor_type, offset, entry_size); | ||
| 398 | } | ||
| 399 | offset += count * entry_size; | ||
| 400 | binding += count; | ||
| 401 | } | ||
| 402 | |||
| 403 | void FillDescriptorUpdateTemplateEntries( | ||
| 404 | const ShaderEntries& entries, u32& binding, u32& offset, | ||
| 405 | std::vector<vk::DescriptorUpdateTemplateEntry>& template_entries) { | ||
| 406 | AddEntry<eUniformBuffer>(template_entries, offset, binding, entries.const_buffers); | ||
| 407 | AddEntry<eStorageBuffer>(template_entries, offset, binding, entries.global_buffers); | ||
| 408 | AddEntry<eUniformTexelBuffer>(template_entries, offset, binding, entries.texel_buffers); | ||
| 409 | AddEntry<eCombinedImageSampler>(template_entries, offset, binding, entries.samplers); | ||
| 410 | AddEntry<eStorageImage>(template_entries, offset, binding, entries.images); | ||
| 390 | } | 411 | } |
| 391 | 412 | ||
| 392 | } // namespace Vulkan | 413 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index 8678fc9c3..c4c112290 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h | |||
| @@ -25,7 +25,7 @@ | |||
| 25 | #include "video_core/renderer_vulkan/vk_renderpass_cache.h" | 25 | #include "video_core/renderer_vulkan/vk_renderpass_cache.h" |
| 26 | #include "video_core/renderer_vulkan/vk_resource_manager.h" | 26 | #include "video_core/renderer_vulkan/vk_resource_manager.h" |
| 27 | #include "video_core/renderer_vulkan/vk_shader_decompiler.h" | 27 | #include "video_core/renderer_vulkan/vk_shader_decompiler.h" |
| 28 | #include "video_core/shader/const_buffer_locker.h" | 28 | #include "video_core/shader/registry.h" |
| 29 | #include "video_core/shader/shader_ir.h" | 29 | #include "video_core/shader/shader_ir.h" |
| 30 | #include "video_core/surface.h" | 30 | #include "video_core/surface.h" |
| 31 | 31 | ||
| @@ -132,6 +132,10 @@ public: | |||
| 132 | return shader_ir; | 132 | return shader_ir; |
| 133 | } | 133 | } |
| 134 | 134 | ||
| 135 | const VideoCommon::Shader::Registry& GetRegistry() const { | ||
| 136 | return registry; | ||
| 137 | } | ||
| 138 | |||
| 135 | const VideoCommon::Shader::ShaderIR& GetIR() const { | 139 | const VideoCommon::Shader::ShaderIR& GetIR() const { |
| 136 | return shader_ir; | 140 | return shader_ir; |
| 137 | } | 141 | } |
| @@ -147,7 +151,7 @@ private: | |||
| 147 | GPUVAddr gpu_addr{}; | 151 | GPUVAddr gpu_addr{}; |
| 148 | VAddr cpu_addr{}; | 152 | VAddr cpu_addr{}; |
| 149 | ProgramCode program_code; | 153 | ProgramCode program_code; |
| 150 | VideoCommon::Shader::ConstBufferLocker locker; | 154 | VideoCommon::Shader::Registry registry; |
| 151 | VideoCommon::Shader::ShaderIR shader_ir; | 155 | VideoCommon::Shader::ShaderIR shader_ir; |
| 152 | ShaderEntries entries; | 156 | ShaderEntries entries; |
| 153 | }; | 157 | }; |
| @@ -157,7 +161,8 @@ public: | |||
| 157 | explicit VKPipelineCache(Core::System& system, RasterizerVulkan& rasterizer, | 161 | explicit VKPipelineCache(Core::System& system, RasterizerVulkan& rasterizer, |
| 158 | const VKDevice& device, VKScheduler& scheduler, | 162 | const VKDevice& device, VKScheduler& scheduler, |
| 159 | VKDescriptorPool& descriptor_pool, | 163 | VKDescriptorPool& descriptor_pool, |
| 160 | VKUpdateDescriptorQueue& update_descriptor_queue); | 164 | VKUpdateDescriptorQueue& update_descriptor_queue, |
| 165 | VKRenderPassCache& renderpass_cache); | ||
| 161 | ~VKPipelineCache(); | 166 | ~VKPipelineCache(); |
| 162 | 167 | ||
| 163 | std::array<Shader, Maxwell::MaxShaderProgram> GetShaders(); | 168 | std::array<Shader, Maxwell::MaxShaderProgram> GetShaders(); |
| @@ -180,8 +185,7 @@ private: | |||
| 180 | VKScheduler& scheduler; | 185 | VKScheduler& scheduler; |
| 181 | VKDescriptorPool& descriptor_pool; | 186 | VKDescriptorPool& descriptor_pool; |
| 182 | VKUpdateDescriptorQueue& update_descriptor_queue; | 187 | VKUpdateDescriptorQueue& update_descriptor_queue; |
| 183 | 188 | VKRenderPassCache& renderpass_cache; | |
| 184 | VKRenderPassCache renderpass_cache; | ||
| 185 | 189 | ||
| 186 | std::array<Shader, Maxwell::MaxShaderProgram> last_shaders; | 190 | std::array<Shader, Maxwell::MaxShaderProgram> last_shaders; |
| 187 | 191 | ||
| @@ -194,7 +198,7 @@ private: | |||
| 194 | }; | 198 | }; |
| 195 | 199 | ||
| 196 | void FillDescriptorUpdateTemplateEntries( | 200 | void FillDescriptorUpdateTemplateEntries( |
| 197 | const VKDevice& device, const ShaderEntries& entries, u32& binding, u32& offset, | 201 | const ShaderEntries& entries, u32& binding, u32& offset, |
| 198 | std::vector<vk::DescriptorUpdateTemplateEntry>& template_entries); | 202 | std::vector<vk::DescriptorUpdateTemplateEntry>& template_entries); |
| 199 | 203 | ||
| 200 | } // namespace Vulkan | 204 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 31c078f6a..58c69b786 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp | |||
| @@ -36,6 +36,7 @@ | |||
| 36 | #include "video_core/renderer_vulkan/vk_sampler_cache.h" | 36 | #include "video_core/renderer_vulkan/vk_sampler_cache.h" |
| 37 | #include "video_core/renderer_vulkan/vk_scheduler.h" | 37 | #include "video_core/renderer_vulkan/vk_scheduler.h" |
| 38 | #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" | 38 | #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" |
| 39 | #include "video_core/renderer_vulkan/vk_state_tracker.h" | ||
| 39 | #include "video_core/renderer_vulkan/vk_texture_cache.h" | 40 | #include "video_core/renderer_vulkan/vk_texture_cache.h" |
| 40 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" | 41 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" |
| 41 | 42 | ||
| @@ -105,17 +106,20 @@ void TransitionImages(const std::vector<ImageView>& views, vk::PipelineStageFlag | |||
| 105 | 106 | ||
| 106 | template <typename Engine, typename Entry> | 107 | template <typename Engine, typename Entry> |
| 107 | Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry, | 108 | Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry, |
| 108 | std::size_t stage) { | 109 | std::size_t stage, std::size_t index = 0) { |
| 109 | const auto stage_type = static_cast<Tegra::Engines::ShaderType>(stage); | 110 | const auto stage_type = static_cast<Tegra::Engines::ShaderType>(stage); |
| 110 | if (entry.IsBindless()) { | 111 | if (entry.IsBindless()) { |
| 111 | const Tegra::Texture::TextureHandle tex_handle = | 112 | const Tegra::Texture::TextureHandle tex_handle = |
| 112 | engine.AccessConstBuffer32(stage_type, entry.GetBuffer(), entry.GetOffset()); | 113 | engine.AccessConstBuffer32(stage_type, entry.GetBuffer(), entry.GetOffset()); |
| 113 | return engine.GetTextureInfo(tex_handle); | 114 | return engine.GetTextureInfo(tex_handle); |
| 114 | } | 115 | } |
| 116 | const auto& gpu_profile = engine.AccessGuestDriverProfile(); | ||
| 117 | const u32 entry_offset = static_cast<u32>(index * gpu_profile.GetTextureHandlerSize()); | ||
| 118 | const u32 offset = entry.GetOffset() + entry_offset; | ||
| 115 | if constexpr (std::is_same_v<Engine, Tegra::Engines::Maxwell3D>) { | 119 | if constexpr (std::is_same_v<Engine, Tegra::Engines::Maxwell3D>) { |
| 116 | return engine.GetStageTexture(stage_type, entry.GetOffset()); | 120 | return engine.GetStageTexture(stage_type, offset); |
| 117 | } else { | 121 | } else { |
| 118 | return engine.GetTexture(entry.GetOffset()); | 122 | return engine.GetTexture(offset); |
| 119 | } | 123 | } |
| 120 | } | 124 | } |
| 121 | 125 | ||
| @@ -277,17 +281,19 @@ void RasterizerVulkan::DrawParameters::Draw(vk::CommandBuffer cmdbuf, | |||
| 277 | RasterizerVulkan::RasterizerVulkan(Core::System& system, Core::Frontend::EmuWindow& renderer, | 281 | RasterizerVulkan::RasterizerVulkan(Core::System& system, Core::Frontend::EmuWindow& renderer, |
| 278 | VKScreenInfo& screen_info, const VKDevice& device, | 282 | VKScreenInfo& screen_info, const VKDevice& device, |
| 279 | VKResourceManager& resource_manager, | 283 | VKResourceManager& resource_manager, |
| 280 | VKMemoryManager& memory_manager, VKScheduler& scheduler) | 284 | VKMemoryManager& memory_manager, StateTracker& state_tracker, |
| 285 | VKScheduler& scheduler) | ||
| 281 | : RasterizerAccelerated{system.Memory()}, system{system}, render_window{renderer}, | 286 | : RasterizerAccelerated{system.Memory()}, system{system}, render_window{renderer}, |
| 282 | screen_info{screen_info}, device{device}, resource_manager{resource_manager}, | 287 | screen_info{screen_info}, device{device}, resource_manager{resource_manager}, |
| 283 | memory_manager{memory_manager}, scheduler{scheduler}, | 288 | memory_manager{memory_manager}, state_tracker{state_tracker}, scheduler{scheduler}, |
| 284 | staging_pool(device, memory_manager, scheduler), descriptor_pool(device), | 289 | staging_pool(device, memory_manager, scheduler), descriptor_pool(device), |
| 285 | update_descriptor_queue(device, scheduler), | 290 | update_descriptor_queue(device, scheduler), renderpass_cache(device), |
| 286 | quad_array_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), | 291 | quad_array_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), |
| 287 | uint8_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), | 292 | uint8_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), |
| 288 | texture_cache(system, *this, device, resource_manager, memory_manager, scheduler, | 293 | texture_cache(system, *this, device, resource_manager, memory_manager, scheduler, |
| 289 | staging_pool), | 294 | staging_pool), |
| 290 | pipeline_cache(system, *this, device, scheduler, descriptor_pool, update_descriptor_queue), | 295 | pipeline_cache(system, *this, device, scheduler, descriptor_pool, update_descriptor_queue, |
| 296 | renderpass_cache), | ||
| 291 | buffer_cache(*this, system, device, memory_manager, scheduler, staging_pool), | 297 | buffer_cache(*this, system, device, memory_manager, scheduler, staging_pool), |
| 292 | sampler_cache(device), query_cache(system, *this, device, scheduler) { | 298 | sampler_cache(device), query_cache(system, *this, device, scheduler) { |
| 293 | scheduler.SetQueryCache(query_cache); | 299 | scheduler.SetQueryCache(query_cache); |
| @@ -342,6 +348,8 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { | |||
| 342 | [&pipeline](auto cmdbuf, auto& dld) { cmdbuf.setCheckpointNV(&pipeline, dld); }); | 348 | [&pipeline](auto cmdbuf, auto& dld) { cmdbuf.setCheckpointNV(&pipeline, dld); }); |
| 343 | } | 349 | } |
| 344 | 350 | ||
| 351 | BeginTransformFeedback(); | ||
| 352 | |||
| 345 | const auto pipeline_layout = pipeline.GetLayout(); | 353 | const auto pipeline_layout = pipeline.GetLayout(); |
| 346 | const auto descriptor_set = pipeline.CommitDescriptorSet(); | 354 | const auto descriptor_set = pipeline.CommitDescriptorSet(); |
| 347 | scheduler.Record([pipeline_layout, descriptor_set, draw_params](auto cmdbuf, auto& dld) { | 355 | scheduler.Record([pipeline_layout, descriptor_set, draw_params](auto cmdbuf, auto& dld) { |
| @@ -351,18 +359,23 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { | |||
| 351 | } | 359 | } |
| 352 | draw_params.Draw(cmdbuf, dld); | 360 | draw_params.Draw(cmdbuf, dld); |
| 353 | }); | 361 | }); |
| 362 | |||
| 363 | EndTransformFeedback(); | ||
| 354 | } | 364 | } |
| 355 | 365 | ||
| 356 | void RasterizerVulkan::Clear() { | 366 | void RasterizerVulkan::Clear() { |
| 357 | MICROPROFILE_SCOPE(Vulkan_Clearing); | 367 | MICROPROFILE_SCOPE(Vulkan_Clearing); |
| 358 | 368 | ||
| 359 | query_cache.UpdateCounters(); | ||
| 360 | |||
| 361 | const auto& gpu = system.GPU().Maxwell3D(); | 369 | const auto& gpu = system.GPU().Maxwell3D(); |
| 362 | if (!system.GPU().Maxwell3D().ShouldExecute()) { | 370 | if (!system.GPU().Maxwell3D().ShouldExecute()) { |
| 363 | return; | 371 | return; |
| 364 | } | 372 | } |
| 365 | 373 | ||
| 374 | sampled_views.clear(); | ||
| 375 | image_views.clear(); | ||
| 376 | |||
| 377 | query_cache.UpdateCounters(); | ||
| 378 | |||
| 366 | const auto& regs = gpu.regs; | 379 | const auto& regs = gpu.regs; |
| 367 | const bool use_color = regs.clear_buffers.R || regs.clear_buffers.G || regs.clear_buffers.B || | 380 | const bool use_color = regs.clear_buffers.R || regs.clear_buffers.G || regs.clear_buffers.B || |
| 368 | regs.clear_buffers.A; | 381 | regs.clear_buffers.A; |
| @@ -371,52 +384,54 @@ void RasterizerVulkan::Clear() { | |||
| 371 | if (!use_color && !use_depth && !use_stencil) { | 384 | if (!use_color && !use_depth && !use_stencil) { |
| 372 | return; | 385 | return; |
| 373 | } | 386 | } |
| 374 | // Clearing images requires to be out of a renderpass | ||
| 375 | scheduler.RequestOutsideRenderPassOperationContext(); | ||
| 376 | 387 | ||
| 377 | // TODO(Rodrigo): Implement clears rendering a quad or using beginning a renderpass. | 388 | [[maybe_unused]] const auto texceptions = UpdateAttachments(); |
| 389 | DEBUG_ASSERT(texceptions.none()); | ||
| 390 | SetupImageTransitions(0, color_attachments, zeta_attachment); | ||
| 378 | 391 | ||
| 379 | if (use_color) { | 392 | const vk::RenderPass renderpass = renderpass_cache.GetRenderPass(GetRenderPassParams(0)); |
| 380 | View color_view; | 393 | const auto [framebuffer, render_area] = ConfigureFramebuffers(renderpass); |
| 381 | { | 394 | scheduler.RequestRenderpass({renderpass, framebuffer, {{0, 0}, render_area}, 0, nullptr}); |
| 382 | MICROPROFILE_SCOPE(Vulkan_RenderTargets); | ||
| 383 | color_view = texture_cache.GetColorBufferSurface(regs.clear_buffers.RT.Value(), false); | ||
| 384 | } | ||
| 385 | 395 | ||
| 386 | color_view->Transition(vk::ImageLayout::eTransferDstOptimal, | 396 | const auto& scissor = regs.scissor_test[0]; |
| 387 | vk::PipelineStageFlagBits::eTransfer, | 397 | const vk::Offset2D scissor_offset(scissor.min_x, scissor.min_y); |
| 388 | vk::AccessFlagBits::eTransferWrite); | 398 | vk::Extent2D scissor_extent{scissor.max_x - scissor.min_x, scissor.max_y - scissor.min_y}; |
| 399 | scissor_extent.width = std::min(scissor_extent.width, render_area.width); | ||
| 400 | scissor_extent.height = std::min(scissor_extent.height, render_area.height); | ||
| 389 | 401 | ||
| 402 | const u32 layer = regs.clear_buffers.layer; | ||
| 403 | const vk::ClearRect clear_rect({scissor_offset, scissor_extent}, layer, 1); | ||
| 404 | |||
| 405 | if (use_color) { | ||
| 390 | const std::array clear_color = {regs.clear_color[0], regs.clear_color[1], | 406 | const std::array clear_color = {regs.clear_color[0], regs.clear_color[1], |
| 391 | regs.clear_color[2], regs.clear_color[3]}; | 407 | regs.clear_color[2], regs.clear_color[3]}; |
| 392 | const vk::ClearColorValue clear(clear_color); | 408 | const vk::ClearValue clear_value{clear_color}; |
| 393 | scheduler.Record([image = color_view->GetImage(), | 409 | const u32 color_attachment = regs.clear_buffers.RT; |
| 394 | subresource = color_view->GetImageSubresourceRange(), | 410 | scheduler.Record([color_attachment, clear_value, clear_rect](auto cmdbuf, auto& dld) { |
| 395 | clear](auto cmdbuf, auto& dld) { | 411 | const vk::ClearAttachment attachment(vk::ImageAspectFlagBits::eColor, color_attachment, |
| 396 | cmdbuf.clearColorImage(image, vk::ImageLayout::eTransferDstOptimal, clear, subresource, | 412 | clear_value); |
| 397 | dld); | 413 | cmdbuf.clearAttachments(1, &attachment, 1, &clear_rect, dld); |
| 398 | }); | 414 | }); |
| 399 | } | 415 | } |
| 400 | if (use_depth || use_stencil) { | ||
| 401 | View zeta_surface; | ||
| 402 | { | ||
| 403 | MICROPROFILE_SCOPE(Vulkan_RenderTargets); | ||
| 404 | zeta_surface = texture_cache.GetDepthBufferSurface(false); | ||
| 405 | } | ||
| 406 | 416 | ||
| 407 | zeta_surface->Transition(vk::ImageLayout::eTransferDstOptimal, | 417 | if (!use_depth && !use_stencil) { |
| 408 | vk::PipelineStageFlagBits::eTransfer, | 418 | return; |
| 409 | vk::AccessFlagBits::eTransferWrite); | 419 | } |
| 410 | 420 | vk::ImageAspectFlags aspect_flags; | |
| 411 | const vk::ClearDepthStencilValue clear(regs.clear_depth, | 421 | if (use_depth) { |
| 412 | static_cast<u32>(regs.clear_stencil)); | 422 | aspect_flags |= vk::ImageAspectFlagBits::eDepth; |
| 413 | scheduler.Record([image = zeta_surface->GetImage(), | 423 | } |
| 414 | subresource = zeta_surface->GetImageSubresourceRange(), | 424 | if (use_stencil) { |
| 415 | clear](auto cmdbuf, auto& dld) { | 425 | aspect_flags |= vk::ImageAspectFlagBits::eStencil; |
| 416 | cmdbuf.clearDepthStencilImage(image, vk::ImageLayout::eTransferDstOptimal, clear, | ||
| 417 | subresource, dld); | ||
| 418 | }); | ||
| 419 | } | 426 | } |
| 427 | |||
| 428 | scheduler.Record([clear_depth = regs.clear_depth, clear_stencil = regs.clear_stencil, | ||
| 429 | clear_rect, aspect_flags](auto cmdbuf, auto& dld) { | ||
| 430 | const vk::ClearDepthStencilValue clear_zeta(clear_depth, clear_stencil); | ||
| 431 | const vk::ClearValue clear_value{clear_zeta}; | ||
| 432 | const vk::ClearAttachment attachment(aspect_flags, 0, clear_value); | ||
| 433 | cmdbuf.clearAttachments(1, &attachment, 1, &clear_rect, dld); | ||
| 434 | }); | ||
| 420 | } | 435 | } |
| 421 | 436 | ||
| 422 | void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) { | 437 | void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) { |
| @@ -533,8 +548,6 @@ bool RasterizerVulkan::AccelerateDisplay(const Tegra::FramebufferConfig& config, | |||
| 533 | 548 | ||
| 534 | // Verify that the cached surface is the same size and format as the requested framebuffer | 549 | // Verify that the cached surface is the same size and format as the requested framebuffer |
| 535 | const auto& params{surface->GetSurfaceParams()}; | 550 | const auto& params{surface->GetSurfaceParams()}; |
| 536 | const auto& pixel_format{ | ||
| 537 | VideoCore::Surface::PixelFormatFromGPUPixelFormat(config.pixel_format)}; | ||
| 538 | ASSERT_MSG(params.width == config.width, "Framebuffer width is different"); | 551 | ASSERT_MSG(params.width == config.width, "Framebuffer width is different"); |
| 539 | ASSERT_MSG(params.height == config.height, "Framebuffer height is different"); | 552 | ASSERT_MSG(params.height == config.height, "Framebuffer height is different"); |
| 540 | 553 | ||
| @@ -545,6 +558,10 @@ bool RasterizerVulkan::AccelerateDisplay(const Tegra::FramebufferConfig& config, | |||
| 545 | return true; | 558 | return true; |
| 546 | } | 559 | } |
| 547 | 560 | ||
| 561 | void RasterizerVulkan::SetupDirtyFlags() { | ||
| 562 | state_tracker.Initialize(); | ||
| 563 | } | ||
| 564 | |||
| 548 | void RasterizerVulkan::FlushWork() { | 565 | void RasterizerVulkan::FlushWork() { |
| 549 | static constexpr u32 DRAWS_TO_DISPATCH = 4096; | 566 | static constexpr u32 DRAWS_TO_DISPATCH = 4096; |
| 550 | 567 | ||
| @@ -568,9 +585,9 @@ void RasterizerVulkan::FlushWork() { | |||
| 568 | 585 | ||
| 569 | RasterizerVulkan::Texceptions RasterizerVulkan::UpdateAttachments() { | 586 | RasterizerVulkan::Texceptions RasterizerVulkan::UpdateAttachments() { |
| 570 | MICROPROFILE_SCOPE(Vulkan_RenderTargets); | 587 | MICROPROFILE_SCOPE(Vulkan_RenderTargets); |
| 571 | auto& dirty = system.GPU().Maxwell3D().dirty; | 588 | auto& dirty = system.GPU().Maxwell3D().dirty.flags; |
| 572 | const bool update_rendertargets = dirty.render_settings; | 589 | const bool update_rendertargets = dirty[VideoCommon::Dirty::RenderTargets]; |
| 573 | dirty.render_settings = false; | 590 | dirty[VideoCommon::Dirty::RenderTargets] = false; |
| 574 | 591 | ||
| 575 | texture_cache.GuardRenderTargets(true); | 592 | texture_cache.GuardRenderTargets(true); |
| 576 | 593 | ||
| @@ -611,33 +628,34 @@ bool RasterizerVulkan::WalkAttachmentOverlaps(const CachedSurfaceView& attachmen | |||
| 611 | std::tuple<vk::Framebuffer, vk::Extent2D> RasterizerVulkan::ConfigureFramebuffers( | 628 | std::tuple<vk::Framebuffer, vk::Extent2D> RasterizerVulkan::ConfigureFramebuffers( |
| 612 | vk::RenderPass renderpass) { | 629 | vk::RenderPass renderpass) { |
| 613 | FramebufferCacheKey key{renderpass, std::numeric_limits<u32>::max(), | 630 | FramebufferCacheKey key{renderpass, std::numeric_limits<u32>::max(), |
| 614 | std::numeric_limits<u32>::max()}; | 631 | std::numeric_limits<u32>::max(), std::numeric_limits<u32>::max()}; |
| 615 | 632 | ||
| 616 | const auto MarkAsModifiedAndPush = [&](const View& view) { | 633 | const auto try_push = [&](const View& view) { |
| 617 | if (view == nullptr) { | 634 | if (!view) { |
| 618 | return false; | 635 | return false; |
| 619 | } | 636 | } |
| 620 | key.views.push_back(view->GetHandle()); | 637 | key.views.push_back(view->GetHandle()); |
| 621 | key.width = std::min(key.width, view->GetWidth()); | 638 | key.width = std::min(key.width, view->GetWidth()); |
| 622 | key.height = std::min(key.height, view->GetHeight()); | 639 | key.height = std::min(key.height, view->GetHeight()); |
| 640 | key.layers = std::min(key.layers, view->GetNumLayers()); | ||
| 623 | return true; | 641 | return true; |
| 624 | }; | 642 | }; |
| 625 | 643 | ||
| 626 | for (std::size_t index = 0; index < std::size(color_attachments); ++index) { | 644 | for (std::size_t index = 0; index < std::size(color_attachments); ++index) { |
| 627 | if (MarkAsModifiedAndPush(color_attachments[index])) { | 645 | if (try_push(color_attachments[index])) { |
| 628 | texture_cache.MarkColorBufferInUse(index); | 646 | texture_cache.MarkColorBufferInUse(index); |
| 629 | } | 647 | } |
| 630 | } | 648 | } |
| 631 | if (MarkAsModifiedAndPush(zeta_attachment)) { | 649 | if (try_push(zeta_attachment)) { |
| 632 | texture_cache.MarkDepthBufferInUse(); | 650 | texture_cache.MarkDepthBufferInUse(); |
| 633 | } | 651 | } |
| 634 | 652 | ||
| 635 | const auto [fbentry, is_cache_miss] = framebuffer_cache.try_emplace(key); | 653 | const auto [fbentry, is_cache_miss] = framebuffer_cache.try_emplace(key); |
| 636 | auto& framebuffer = fbentry->second; | 654 | auto& framebuffer = fbentry->second; |
| 637 | if (is_cache_miss) { | 655 | if (is_cache_miss) { |
| 638 | const vk::FramebufferCreateInfo framebuffer_ci({}, key.renderpass, | 656 | const vk::FramebufferCreateInfo framebuffer_ci( |
| 639 | static_cast<u32>(key.views.size()), | 657 | {}, key.renderpass, static_cast<u32>(key.views.size()), key.views.data(), key.width, |
| 640 | key.views.data(), key.width, key.height, 1); | 658 | key.height, key.layers); |
| 641 | const auto dev = device.GetLogical(); | 659 | const auto dev = device.GetLogical(); |
| 642 | const auto& dld = device.GetDispatchLoader(); | 660 | const auto& dld = device.GetDispatchLoader(); |
| 643 | framebuffer = dev.createFramebufferUnique(framebuffer_ci, nullptr, dld); | 661 | framebuffer = dev.createFramebufferUnique(framebuffer_ci, nullptr, dld); |
| @@ -719,13 +737,51 @@ void RasterizerVulkan::SetupImageTransitions( | |||
| 719 | } | 737 | } |
| 720 | 738 | ||
| 721 | void RasterizerVulkan::UpdateDynamicStates() { | 739 | void RasterizerVulkan::UpdateDynamicStates() { |
| 722 | auto& gpu = system.GPU().Maxwell3D(); | 740 | auto& regs = system.GPU().Maxwell3D().regs; |
| 723 | UpdateViewportsState(gpu); | 741 | UpdateViewportsState(regs); |
| 724 | UpdateScissorsState(gpu); | 742 | UpdateScissorsState(regs); |
| 725 | UpdateDepthBias(gpu); | 743 | UpdateDepthBias(regs); |
| 726 | UpdateBlendConstants(gpu); | 744 | UpdateBlendConstants(regs); |
| 727 | UpdateDepthBounds(gpu); | 745 | UpdateDepthBounds(regs); |
| 728 | UpdateStencilFaces(gpu); | 746 | UpdateStencilFaces(regs); |
| 747 | } | ||
| 748 | |||
| 749 | void RasterizerVulkan::BeginTransformFeedback() { | ||
| 750 | const auto& regs = system.GPU().Maxwell3D().regs; | ||
| 751 | if (regs.tfb_enabled == 0) { | ||
| 752 | return; | ||
| 753 | } | ||
| 754 | |||
| 755 | UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationControl) || | ||
| 756 | regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationEval) || | ||
| 757 | regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::Geometry)); | ||
| 758 | |||
| 759 | UNIMPLEMENTED_IF(regs.tfb_bindings[1].buffer_enable); | ||
| 760 | UNIMPLEMENTED_IF(regs.tfb_bindings[2].buffer_enable); | ||
| 761 | UNIMPLEMENTED_IF(regs.tfb_bindings[3].buffer_enable); | ||
| 762 | |||
| 763 | const auto& binding = regs.tfb_bindings[0]; | ||
| 764 | UNIMPLEMENTED_IF(binding.buffer_enable == 0); | ||
| 765 | UNIMPLEMENTED_IF(binding.buffer_offset != 0); | ||
| 766 | |||
| 767 | const GPUVAddr gpu_addr = binding.Address(); | ||
| 768 | const std::size_t size = binding.buffer_size; | ||
| 769 | const auto [buffer, offset] = buffer_cache.UploadMemory(gpu_addr, size, 4, true); | ||
| 770 | |||
| 771 | scheduler.Record([buffer = *buffer, offset = offset, size](auto cmdbuf, auto& dld) { | ||
| 772 | cmdbuf.bindTransformFeedbackBuffersEXT(0, {buffer}, {offset}, {size}, dld); | ||
| 773 | cmdbuf.beginTransformFeedbackEXT(0, {}, {}, dld); | ||
| 774 | }); | ||
| 775 | } | ||
| 776 | |||
| 777 | void RasterizerVulkan::EndTransformFeedback() { | ||
| 778 | const auto& regs = system.GPU().Maxwell3D().regs; | ||
| 779 | if (regs.tfb_enabled == 0) { | ||
| 780 | return; | ||
| 781 | } | ||
| 782 | |||
| 783 | scheduler.Record( | ||
| 784 | [](auto cmdbuf, auto& dld) { cmdbuf.endTransformFeedbackEXT(0, {}, {}, dld); }); | ||
| 729 | } | 785 | } |
| 730 | 786 | ||
| 731 | void RasterizerVulkan::SetupVertexArrays(FixedPipelineState::VertexInput& vertex_input, | 787 | void RasterizerVulkan::SetupVertexArrays(FixedPipelineState::VertexInput& vertex_input, |
| @@ -835,14 +891,16 @@ void RasterizerVulkan::SetupGraphicsTextures(const ShaderEntries& entries, std:: | |||
| 835 | MICROPROFILE_SCOPE(Vulkan_Textures); | 891 | MICROPROFILE_SCOPE(Vulkan_Textures); |
| 836 | const auto& gpu = system.GPU().Maxwell3D(); | 892 | const auto& gpu = system.GPU().Maxwell3D(); |
| 837 | for (const auto& entry : entries.samplers) { | 893 | for (const auto& entry : entries.samplers) { |
| 838 | const auto texture = GetTextureInfo(gpu, entry, stage); | 894 | for (std::size_t i = 0; i < entry.Size(); ++i) { |
| 839 | SetupTexture(texture, entry); | 895 | const auto texture = GetTextureInfo(gpu, entry, stage, i); |
| 896 | SetupTexture(texture, entry); | ||
| 897 | } | ||
| 840 | } | 898 | } |
| 841 | } | 899 | } |
| 842 | 900 | ||
| 843 | void RasterizerVulkan::SetupGraphicsImages(const ShaderEntries& entries, std::size_t stage) { | 901 | void RasterizerVulkan::SetupGraphicsImages(const ShaderEntries& entries, std::size_t stage) { |
| 844 | MICROPROFILE_SCOPE(Vulkan_Images); | 902 | MICROPROFILE_SCOPE(Vulkan_Images); |
| 845 | const auto& gpu = system.GPU().KeplerCompute(); | 903 | const auto& gpu = system.GPU().Maxwell3D(); |
| 846 | for (const auto& entry : entries.images) { | 904 | for (const auto& entry : entries.images) { |
| 847 | const auto tic = GetTextureInfo(gpu, entry, stage).tic; | 905 | const auto tic = GetTextureInfo(gpu, entry, stage).tic; |
| 848 | SetupImage(tic, entry); | 906 | SetupImage(tic, entry); |
| @@ -885,8 +943,10 @@ void RasterizerVulkan::SetupComputeTextures(const ShaderEntries& entries) { | |||
| 885 | MICROPROFILE_SCOPE(Vulkan_Textures); | 943 | MICROPROFILE_SCOPE(Vulkan_Textures); |
| 886 | const auto& gpu = system.GPU().KeplerCompute(); | 944 | const auto& gpu = system.GPU().KeplerCompute(); |
| 887 | for (const auto& entry : entries.samplers) { | 945 | for (const auto& entry : entries.samplers) { |
| 888 | const auto texture = GetTextureInfo(gpu, entry, ComputeShaderIndex); | 946 | for (std::size_t i = 0; i < entry.Size(); ++i) { |
| 889 | SetupTexture(texture, entry); | 947 | const auto texture = GetTextureInfo(gpu, entry, ComputeShaderIndex, i); |
| 948 | SetupTexture(texture, entry); | ||
| 949 | } | ||
| 890 | } | 950 | } |
| 891 | } | 951 | } |
| 892 | 952 | ||
| @@ -901,6 +961,13 @@ void RasterizerVulkan::SetupComputeImages(const ShaderEntries& entries) { | |||
| 901 | 961 | ||
| 902 | void RasterizerVulkan::SetupConstBuffer(const ConstBufferEntry& entry, | 962 | void RasterizerVulkan::SetupConstBuffer(const ConstBufferEntry& entry, |
| 903 | const Tegra::Engines::ConstBufferInfo& buffer) { | 963 | const Tegra::Engines::ConstBufferInfo& buffer) { |
| 964 | if (!buffer.enabled) { | ||
| 965 | // Set values to zero to unbind buffers | ||
| 966 | update_descriptor_queue.AddBuffer(buffer_cache.GetEmptyBuffer(sizeof(float)), 0, | ||
| 967 | sizeof(float)); | ||
| 968 | return; | ||
| 969 | } | ||
| 970 | |||
| 904 | // Align the size to avoid bad std140 interactions | 971 | // Align the size to avoid bad std140 interactions |
| 905 | const std::size_t size = | 972 | const std::size_t size = |
| 906 | Common::AlignUp(CalculateConstBufferSize(entry, buffer), 4 * sizeof(float)); | 973 | Common::AlignUp(CalculateConstBufferSize(entry, buffer), 4 * sizeof(float)); |
| @@ -971,12 +1038,10 @@ void RasterizerVulkan::SetupImage(const Tegra::Texture::TICEntry& tic, const Ima | |||
| 971 | image_views.push_back(ImageView{std::move(view), image_layout}); | 1038 | image_views.push_back(ImageView{std::move(view), image_layout}); |
| 972 | } | 1039 | } |
| 973 | 1040 | ||
| 974 | void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D& gpu) { | 1041 | void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs) { |
| 975 | if (!gpu.dirty.viewport_transform && scheduler.TouchViewports()) { | 1042 | if (!state_tracker.TouchViewports()) { |
| 976 | return; | 1043 | return; |
| 977 | } | 1044 | } |
| 978 | gpu.dirty.viewport_transform = false; | ||
| 979 | const auto& regs = gpu.regs; | ||
| 980 | const std::array viewports{ | 1045 | const std::array viewports{ |
| 981 | GetViewportState(device, regs, 0), GetViewportState(device, regs, 1), | 1046 | GetViewportState(device, regs, 0), GetViewportState(device, regs, 1), |
| 982 | GetViewportState(device, regs, 2), GetViewportState(device, regs, 3), | 1047 | GetViewportState(device, regs, 2), GetViewportState(device, regs, 3), |
| @@ -991,12 +1056,10 @@ void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D& gpu) { | |||
| 991 | }); | 1056 | }); |
| 992 | } | 1057 | } |
| 993 | 1058 | ||
| 994 | void RasterizerVulkan::UpdateScissorsState(Tegra::Engines::Maxwell3D& gpu) { | 1059 | void RasterizerVulkan::UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs) { |
| 995 | if (!gpu.dirty.scissor_test && scheduler.TouchScissors()) { | 1060 | if (!state_tracker.TouchScissors()) { |
| 996 | return; | 1061 | return; |
| 997 | } | 1062 | } |
| 998 | gpu.dirty.scissor_test = false; | ||
| 999 | const auto& regs = gpu.regs; | ||
| 1000 | const std::array scissors = { | 1063 | const std::array scissors = { |
| 1001 | GetScissorState(regs, 0), GetScissorState(regs, 1), GetScissorState(regs, 2), | 1064 | GetScissorState(regs, 0), GetScissorState(regs, 1), GetScissorState(regs, 2), |
| 1002 | GetScissorState(regs, 3), GetScissorState(regs, 4), GetScissorState(regs, 5), | 1065 | GetScissorState(regs, 3), GetScissorState(regs, 4), GetScissorState(regs, 5), |
| @@ -1009,46 +1072,39 @@ void RasterizerVulkan::UpdateScissorsState(Tegra::Engines::Maxwell3D& gpu) { | |||
| 1009 | }); | 1072 | }); |
| 1010 | } | 1073 | } |
| 1011 | 1074 | ||
| 1012 | void RasterizerVulkan::UpdateDepthBias(Tegra::Engines::Maxwell3D& gpu) { | 1075 | void RasterizerVulkan::UpdateDepthBias(Tegra::Engines::Maxwell3D::Regs& regs) { |
| 1013 | if (!gpu.dirty.polygon_offset && scheduler.TouchDepthBias()) { | 1076 | if (!state_tracker.TouchDepthBias()) { |
| 1014 | return; | 1077 | return; |
| 1015 | } | 1078 | } |
| 1016 | gpu.dirty.polygon_offset = false; | ||
| 1017 | const auto& regs = gpu.regs; | ||
| 1018 | scheduler.Record([constant = regs.polygon_offset_units, clamp = regs.polygon_offset_clamp, | 1079 | scheduler.Record([constant = regs.polygon_offset_units, clamp = regs.polygon_offset_clamp, |
| 1019 | factor = regs.polygon_offset_factor](auto cmdbuf, auto& dld) { | 1080 | factor = regs.polygon_offset_factor](auto cmdbuf, auto& dld) { |
| 1020 | cmdbuf.setDepthBias(constant, clamp, factor / 2.0f, dld); | 1081 | cmdbuf.setDepthBias(constant, clamp, factor / 2.0f, dld); |
| 1021 | }); | 1082 | }); |
| 1022 | } | 1083 | } |
| 1023 | 1084 | ||
| 1024 | void RasterizerVulkan::UpdateBlendConstants(Tegra::Engines::Maxwell3D& gpu) { | 1085 | void RasterizerVulkan::UpdateBlendConstants(Tegra::Engines::Maxwell3D::Regs& regs) { |
| 1025 | if (!gpu.dirty.blend_state && scheduler.TouchBlendConstants()) { | 1086 | if (!state_tracker.TouchBlendConstants()) { |
| 1026 | return; | 1087 | return; |
| 1027 | } | 1088 | } |
| 1028 | gpu.dirty.blend_state = false; | 1089 | const std::array blend_color = {regs.blend_color.r, regs.blend_color.g, regs.blend_color.b, |
| 1029 | const std::array blend_color = {gpu.regs.blend_color.r, gpu.regs.blend_color.g, | 1090 | regs.blend_color.a}; |
| 1030 | gpu.regs.blend_color.b, gpu.regs.blend_color.a}; | ||
| 1031 | scheduler.Record([blend_color](auto cmdbuf, auto& dld) { | 1091 | scheduler.Record([blend_color](auto cmdbuf, auto& dld) { |
| 1032 | cmdbuf.setBlendConstants(blend_color.data(), dld); | 1092 | cmdbuf.setBlendConstants(blend_color.data(), dld); |
| 1033 | }); | 1093 | }); |
| 1034 | } | 1094 | } |
| 1035 | 1095 | ||
| 1036 | void RasterizerVulkan::UpdateDepthBounds(Tegra::Engines::Maxwell3D& gpu) { | 1096 | void RasterizerVulkan::UpdateDepthBounds(Tegra::Engines::Maxwell3D::Regs& regs) { |
| 1037 | if (!gpu.dirty.depth_bounds_values && scheduler.TouchDepthBounds()) { | 1097 | if (!state_tracker.TouchDepthBounds()) { |
| 1038 | return; | 1098 | return; |
| 1039 | } | 1099 | } |
| 1040 | gpu.dirty.depth_bounds_values = false; | ||
| 1041 | const auto& regs = gpu.regs; | ||
| 1042 | scheduler.Record([min = regs.depth_bounds[0], max = regs.depth_bounds[1]]( | 1100 | scheduler.Record([min = regs.depth_bounds[0], max = regs.depth_bounds[1]]( |
| 1043 | auto cmdbuf, auto& dld) { cmdbuf.setDepthBounds(min, max, dld); }); | 1101 | auto cmdbuf, auto& dld) { cmdbuf.setDepthBounds(min, max, dld); }); |
| 1044 | } | 1102 | } |
| 1045 | 1103 | ||
| 1046 | void RasterizerVulkan::UpdateStencilFaces(Tegra::Engines::Maxwell3D& gpu) { | 1104 | void RasterizerVulkan::UpdateStencilFaces(Tegra::Engines::Maxwell3D::Regs& regs) { |
| 1047 | if (!gpu.dirty.stencil_test && scheduler.TouchStencilValues()) { | 1105 | if (!state_tracker.TouchStencilProperties()) { |
| 1048 | return; | 1106 | return; |
| 1049 | } | 1107 | } |
| 1050 | gpu.dirty.stencil_test = false; | ||
| 1051 | const auto& regs = gpu.regs; | ||
| 1052 | if (regs.stencil_two_side_enable) { | 1108 | if (regs.stencil_two_side_enable) { |
| 1053 | // Separate values per face | 1109 | // Separate values per face |
| 1054 | scheduler.Record( | 1110 | scheduler.Record( |
| @@ -1099,7 +1155,7 @@ std::size_t RasterizerVulkan::CalculateVertexArraysSize() const { | |||
| 1099 | // This implementation assumes that all attributes are used in the shader. | 1155 | // This implementation assumes that all attributes are used in the shader. |
| 1100 | const GPUVAddr start{regs.vertex_array[index].StartAddress()}; | 1156 | const GPUVAddr start{regs.vertex_array[index].StartAddress()}; |
| 1101 | const GPUVAddr end{regs.vertex_array_limit[index].LimitAddress()}; | 1157 | const GPUVAddr end{regs.vertex_array_limit[index].LimitAddress()}; |
| 1102 | DEBUG_ASSERT(end > start); | 1158 | DEBUG_ASSERT(end >= start); |
| 1103 | 1159 | ||
| 1104 | size += (end - start + 1) * regs.vertex_array[index].enable; | 1160 | size += (end - start + 1) * regs.vertex_array[index].enable; |
| 1105 | } | 1161 | } |
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 138903d60..3185868e9 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h | |||
| @@ -56,6 +56,7 @@ struct FramebufferCacheKey { | |||
| 56 | vk::RenderPass renderpass{}; | 56 | vk::RenderPass renderpass{}; |
| 57 | u32 width = 0; | 57 | u32 width = 0; |
| 58 | u32 height = 0; | 58 | u32 height = 0; |
| 59 | u32 layers = 0; | ||
| 59 | ImageViewsPack views; | 60 | ImageViewsPack views; |
| 60 | 61 | ||
| 61 | std::size_t Hash() const noexcept { | 62 | std::size_t Hash() const noexcept { |
| @@ -66,12 +67,17 @@ struct FramebufferCacheKey { | |||
| 66 | } | 67 | } |
| 67 | boost::hash_combine(hash, width); | 68 | boost::hash_combine(hash, width); |
| 68 | boost::hash_combine(hash, height); | 69 | boost::hash_combine(hash, height); |
| 70 | boost::hash_combine(hash, layers); | ||
| 69 | return hash; | 71 | return hash; |
| 70 | } | 72 | } |
| 71 | 73 | ||
| 72 | bool operator==(const FramebufferCacheKey& rhs) const noexcept { | 74 | bool operator==(const FramebufferCacheKey& rhs) const noexcept { |
| 73 | return std::tie(renderpass, views, width, height) == | 75 | return std::tie(renderpass, views, width, height, layers) == |
| 74 | std::tie(rhs.renderpass, rhs.views, rhs.width, rhs.height); | 76 | std::tie(rhs.renderpass, rhs.views, rhs.width, rhs.height, rhs.layers); |
| 77 | } | ||
| 78 | |||
| 79 | bool operator!=(const FramebufferCacheKey& rhs) const noexcept { | ||
| 80 | return !operator==(rhs); | ||
| 75 | } | 81 | } |
| 76 | }; | 82 | }; |
| 77 | 83 | ||
| @@ -90,6 +96,7 @@ struct hash<Vulkan::FramebufferCacheKey> { | |||
| 90 | 96 | ||
| 91 | namespace Vulkan { | 97 | namespace Vulkan { |
| 92 | 98 | ||
| 99 | class StateTracker; | ||
| 93 | class BufferBindings; | 100 | class BufferBindings; |
| 94 | 101 | ||
| 95 | struct ImageView { | 102 | struct ImageView { |
| @@ -102,7 +109,7 @@ public: | |||
| 102 | explicit RasterizerVulkan(Core::System& system, Core::Frontend::EmuWindow& render_window, | 109 | explicit RasterizerVulkan(Core::System& system, Core::Frontend::EmuWindow& render_window, |
| 103 | VKScreenInfo& screen_info, const VKDevice& device, | 110 | VKScreenInfo& screen_info, const VKDevice& device, |
| 104 | VKResourceManager& resource_manager, VKMemoryManager& memory_manager, | 111 | VKResourceManager& resource_manager, VKMemoryManager& memory_manager, |
| 105 | VKScheduler& scheduler); | 112 | StateTracker& state_tracker, VKScheduler& scheduler); |
| 106 | ~RasterizerVulkan() override; | 113 | ~RasterizerVulkan() override; |
| 107 | 114 | ||
| 108 | void Draw(bool is_indexed, bool is_instanced) override; | 115 | void Draw(bool is_indexed, bool is_instanced) override; |
| @@ -121,6 +128,7 @@ public: | |||
| 121 | const Tegra::Engines::Fermi2D::Config& copy_config) override; | 128 | const Tegra::Engines::Fermi2D::Config& copy_config) override; |
| 122 | bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, | 129 | bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, |
| 123 | u32 pixel_stride) override; | 130 | u32 pixel_stride) override; |
| 131 | void SetupDirtyFlags() override; | ||
| 124 | 132 | ||
| 125 | /// Maximum supported size that a constbuffer can have in bytes. | 133 | /// Maximum supported size that a constbuffer can have in bytes. |
| 126 | static constexpr std::size_t MaxConstbufferSize = 0x10000; | 134 | static constexpr std::size_t MaxConstbufferSize = 0x10000; |
| @@ -161,6 +169,10 @@ private: | |||
| 161 | 169 | ||
| 162 | void UpdateDynamicStates(); | 170 | void UpdateDynamicStates(); |
| 163 | 171 | ||
| 172 | void BeginTransformFeedback(); | ||
| 173 | |||
| 174 | void EndTransformFeedback(); | ||
| 175 | |||
| 164 | bool WalkAttachmentOverlaps(const CachedSurfaceView& attachment); | 176 | bool WalkAttachmentOverlaps(const CachedSurfaceView& attachment); |
| 165 | 177 | ||
| 166 | void SetupVertexArrays(FixedPipelineState::VertexInput& vertex_input, | 178 | void SetupVertexArrays(FixedPipelineState::VertexInput& vertex_input, |
| @@ -209,12 +221,12 @@ private: | |||
| 209 | 221 | ||
| 210 | void SetupImage(const Tegra::Texture::TICEntry& tic, const ImageEntry& entry); | 222 | void SetupImage(const Tegra::Texture::TICEntry& tic, const ImageEntry& entry); |
| 211 | 223 | ||
| 212 | void UpdateViewportsState(Tegra::Engines::Maxwell3D& gpu); | 224 | void UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs); |
| 213 | void UpdateScissorsState(Tegra::Engines::Maxwell3D& gpu); | 225 | void UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs); |
| 214 | void UpdateDepthBias(Tegra::Engines::Maxwell3D& gpu); | 226 | void UpdateDepthBias(Tegra::Engines::Maxwell3D::Regs& regs); |
| 215 | void UpdateBlendConstants(Tegra::Engines::Maxwell3D& gpu); | 227 | void UpdateBlendConstants(Tegra::Engines::Maxwell3D::Regs& regs); |
| 216 | void UpdateDepthBounds(Tegra::Engines::Maxwell3D& gpu); | 228 | void UpdateDepthBounds(Tegra::Engines::Maxwell3D::Regs& regs); |
| 217 | void UpdateStencilFaces(Tegra::Engines::Maxwell3D& gpu); | 229 | void UpdateStencilFaces(Tegra::Engines::Maxwell3D::Regs& regs); |
| 218 | 230 | ||
| 219 | std::size_t CalculateGraphicsStreamBufferSize(bool is_indexed) const; | 231 | std::size_t CalculateGraphicsStreamBufferSize(bool is_indexed) const; |
| 220 | 232 | ||
| @@ -235,11 +247,13 @@ private: | |||
| 235 | const VKDevice& device; | 247 | const VKDevice& device; |
| 236 | VKResourceManager& resource_manager; | 248 | VKResourceManager& resource_manager; |
| 237 | VKMemoryManager& memory_manager; | 249 | VKMemoryManager& memory_manager; |
| 250 | StateTracker& state_tracker; | ||
| 238 | VKScheduler& scheduler; | 251 | VKScheduler& scheduler; |
| 239 | 252 | ||
| 240 | VKStagingBufferPool staging_pool; | 253 | VKStagingBufferPool staging_pool; |
| 241 | VKDescriptorPool descriptor_pool; | 254 | VKDescriptorPool descriptor_pool; |
| 242 | VKUpdateDescriptorQueue update_descriptor_queue; | 255 | VKUpdateDescriptorQueue update_descriptor_queue; |
| 256 | VKRenderPassCache renderpass_cache; | ||
| 243 | QuadArrayPass quad_array_pass; | 257 | QuadArrayPass quad_array_pass; |
| 244 | Uint8Pass uint8_pass; | 258 | Uint8Pass uint8_pass; |
| 245 | 259 | ||
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index 92bd6c344..b61d4fe63 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp | |||
| @@ -2,6 +2,12 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <memory> | ||
| 6 | #include <mutex> | ||
| 7 | #include <optional> | ||
| 8 | #include <thread> | ||
| 9 | #include <utility> | ||
| 10 | |||
| 5 | #include "common/assert.h" | 11 | #include "common/assert.h" |
| 6 | #include "common/microprofile.h" | 12 | #include "common/microprofile.h" |
| 7 | #include "video_core/renderer_vulkan/declarations.h" | 13 | #include "video_core/renderer_vulkan/declarations.h" |
| @@ -9,6 +15,7 @@ | |||
| 9 | #include "video_core/renderer_vulkan/vk_query_cache.h" | 15 | #include "video_core/renderer_vulkan/vk_query_cache.h" |
| 10 | #include "video_core/renderer_vulkan/vk_resource_manager.h" | 16 | #include "video_core/renderer_vulkan/vk_resource_manager.h" |
| 11 | #include "video_core/renderer_vulkan/vk_scheduler.h" | 17 | #include "video_core/renderer_vulkan/vk_scheduler.h" |
| 18 | #include "video_core/renderer_vulkan/vk_state_tracker.h" | ||
| 12 | 19 | ||
| 13 | namespace Vulkan { | 20 | namespace Vulkan { |
| 14 | 21 | ||
| @@ -29,9 +36,10 @@ void VKScheduler::CommandChunk::ExecuteAll(vk::CommandBuffer cmdbuf, | |||
| 29 | last = nullptr; | 36 | last = nullptr; |
| 30 | } | 37 | } |
| 31 | 38 | ||
| 32 | VKScheduler::VKScheduler(const VKDevice& device, VKResourceManager& resource_manager) | 39 | VKScheduler::VKScheduler(const VKDevice& device, VKResourceManager& resource_manager, |
| 33 | : device{device}, resource_manager{resource_manager}, next_fence{ | 40 | StateTracker& state_tracker) |
| 34 | &resource_manager.CommitFence()} { | 41 | : device{device}, resource_manager{resource_manager}, state_tracker{state_tracker}, |
| 42 | next_fence{&resource_manager.CommitFence()} { | ||
| 35 | AcquireNewChunk(); | 43 | AcquireNewChunk(); |
| 36 | AllocateNewContext(); | 44 | AllocateNewContext(); |
| 37 | worker_thread = std::thread(&VKScheduler::WorkerThread, this); | 45 | worker_thread = std::thread(&VKScheduler::WorkerThread, this); |
| @@ -157,12 +165,7 @@ void VKScheduler::AllocateNewContext() { | |||
| 157 | 165 | ||
| 158 | void VKScheduler::InvalidateState() { | 166 | void VKScheduler::InvalidateState() { |
| 159 | state.graphics_pipeline = nullptr; | 167 | state.graphics_pipeline = nullptr; |
| 160 | state.viewports = false; | 168 | state_tracker.InvalidateCommandBufferState(); |
| 161 | state.scissors = false; | ||
| 162 | state.depth_bias = false; | ||
| 163 | state.blend_constants = false; | ||
| 164 | state.depth_bounds = false; | ||
| 165 | state.stencil_values = false; | ||
| 166 | } | 169 | } |
| 167 | 170 | ||
| 168 | void VKScheduler::EndPendingOperations() { | 171 | void VKScheduler::EndPendingOperations() { |
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h index 62fd7858b..c7cc291c3 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.h +++ b/src/video_core/renderer_vulkan/vk_scheduler.h | |||
| @@ -17,6 +17,7 @@ | |||
| 17 | 17 | ||
| 18 | namespace Vulkan { | 18 | namespace Vulkan { |
| 19 | 19 | ||
| 20 | class StateTracker; | ||
| 20 | class VKDevice; | 21 | class VKDevice; |
| 21 | class VKFence; | 22 | class VKFence; |
| 22 | class VKQueryCache; | 23 | class VKQueryCache; |
| @@ -43,7 +44,8 @@ private: | |||
| 43 | /// OpenGL-like operations on Vulkan command buffers. | 44 | /// OpenGL-like operations on Vulkan command buffers. |
| 44 | class VKScheduler { | 45 | class VKScheduler { |
| 45 | public: | 46 | public: |
| 46 | explicit VKScheduler(const VKDevice& device, VKResourceManager& resource_manager); | 47 | explicit VKScheduler(const VKDevice& device, VKResourceManager& resource_manager, |
| 48 | StateTracker& state_tracker); | ||
| 47 | ~VKScheduler(); | 49 | ~VKScheduler(); |
| 48 | 50 | ||
| 49 | /// Sends the current execution context to the GPU. | 51 | /// Sends the current execution context to the GPU. |
| @@ -74,36 +76,6 @@ public: | |||
| 74 | query_cache = &query_cache_; | 76 | query_cache = &query_cache_; |
| 75 | } | 77 | } |
| 76 | 78 | ||
| 77 | /// Returns true when viewports have been set in the current command buffer. | ||
| 78 | bool TouchViewports() { | ||
| 79 | return std::exchange(state.viewports, true); | ||
| 80 | } | ||
| 81 | |||
| 82 | /// Returns true when scissors have been set in the current command buffer. | ||
| 83 | bool TouchScissors() { | ||
| 84 | return std::exchange(state.scissors, true); | ||
| 85 | } | ||
| 86 | |||
| 87 | /// Returns true when depth bias have been set in the current command buffer. | ||
| 88 | bool TouchDepthBias() { | ||
| 89 | return std::exchange(state.depth_bias, true); | ||
| 90 | } | ||
| 91 | |||
| 92 | /// Returns true when blend constants have been set in the current command buffer. | ||
| 93 | bool TouchBlendConstants() { | ||
| 94 | return std::exchange(state.blend_constants, true); | ||
| 95 | } | ||
| 96 | |||
| 97 | /// Returns true when depth bounds have been set in the current command buffer. | ||
| 98 | bool TouchDepthBounds() { | ||
| 99 | return std::exchange(state.depth_bounds, true); | ||
| 100 | } | ||
| 101 | |||
| 102 | /// Returns true when stencil values have been set in the current command buffer. | ||
| 103 | bool TouchStencilValues() { | ||
| 104 | return std::exchange(state.stencil_values, true); | ||
| 105 | } | ||
| 106 | |||
| 107 | /// Send work to a separate thread. | 79 | /// Send work to a separate thread. |
| 108 | template <typename T> | 80 | template <typename T> |
| 109 | void Record(T&& command) { | 81 | void Record(T&& command) { |
| @@ -217,6 +189,8 @@ private: | |||
| 217 | 189 | ||
| 218 | const VKDevice& device; | 190 | const VKDevice& device; |
| 219 | VKResourceManager& resource_manager; | 191 | VKResourceManager& resource_manager; |
| 192 | StateTracker& state_tracker; | ||
| 193 | |||
| 220 | VKQueryCache* query_cache = nullptr; | 194 | VKQueryCache* query_cache = nullptr; |
| 221 | 195 | ||
| 222 | vk::CommandBuffer current_cmdbuf; | 196 | vk::CommandBuffer current_cmdbuf; |
| @@ -226,12 +200,6 @@ private: | |||
| 226 | struct State { | 200 | struct State { |
| 227 | std::optional<vk::RenderPassBeginInfo> renderpass; | 201 | std::optional<vk::RenderPassBeginInfo> renderpass; |
| 228 | vk::Pipeline graphics_pipeline; | 202 | vk::Pipeline graphics_pipeline; |
| 229 | bool viewports = false; | ||
| 230 | bool scissors = false; | ||
| 231 | bool depth_bias = false; | ||
| 232 | bool blend_constants = false; | ||
| 233 | bool depth_bounds = false; | ||
| 234 | bool stencil_values = false; | ||
| 235 | } state; | 203 | } state; |
| 236 | 204 | ||
| 237 | std::unique_ptr<CommandChunk> chunk; | 205 | std::unique_ptr<CommandChunk> chunk; |
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index 2da622d15..51ecb5567 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp | |||
| @@ -5,7 +5,9 @@ | |||
| 5 | #include <functional> | 5 | #include <functional> |
| 6 | #include <limits> | 6 | #include <limits> |
| 7 | #include <map> | 7 | #include <map> |
| 8 | #include <optional> | ||
| 8 | #include <type_traits> | 9 | #include <type_traits> |
| 10 | #include <unordered_map> | ||
| 9 | #include <utility> | 11 | #include <utility> |
| 10 | 12 | ||
| 11 | #include <fmt/format.h> | 13 | #include <fmt/format.h> |
| @@ -24,6 +26,7 @@ | |||
| 24 | #include "video_core/renderer_vulkan/vk_shader_decompiler.h" | 26 | #include "video_core/renderer_vulkan/vk_shader_decompiler.h" |
| 25 | #include "video_core/shader/node.h" | 27 | #include "video_core/shader/node.h" |
| 26 | #include "video_core/shader/shader_ir.h" | 28 | #include "video_core/shader/shader_ir.h" |
| 29 | #include "video_core/shader/transform_feedback.h" | ||
| 27 | 30 | ||
| 28 | namespace Vulkan { | 31 | namespace Vulkan { |
| 29 | 32 | ||
| @@ -69,8 +72,9 @@ struct TexelBuffer { | |||
| 69 | 72 | ||
| 70 | struct SampledImage { | 73 | struct SampledImage { |
| 71 | Id image_type{}; | 74 | Id image_type{}; |
| 72 | Id sampled_image_type{}; | 75 | Id sampler_type{}; |
| 73 | Id sampler{}; | 76 | Id sampler_pointer_type{}; |
| 77 | Id variable{}; | ||
| 74 | }; | 78 | }; |
| 75 | 79 | ||
| 76 | struct StorageImage { | 80 | struct StorageImage { |
| @@ -92,6 +96,12 @@ struct VertexIndices { | |||
| 92 | std::optional<u32> clip_distances; | 96 | std::optional<u32> clip_distances; |
| 93 | }; | 97 | }; |
| 94 | 98 | ||
| 99 | struct GenericVaryingDescription { | ||
| 100 | Id id = nullptr; | ||
| 101 | u32 first_element = 0; | ||
| 102 | bool is_scalar = false; | ||
| 103 | }; | ||
| 104 | |||
| 95 | spv::Dim GetSamplerDim(const Sampler& sampler) { | 105 | spv::Dim GetSamplerDim(const Sampler& sampler) { |
| 96 | ASSERT(!sampler.IsBuffer()); | 106 | ASSERT(!sampler.IsBuffer()); |
| 97 | switch (sampler.GetType()) { | 107 | switch (sampler.GetType()) { |
| @@ -265,9 +275,13 @@ bool IsPrecise(Operation operand) { | |||
| 265 | class SPIRVDecompiler final : public Sirit::Module { | 275 | class SPIRVDecompiler final : public Sirit::Module { |
| 266 | public: | 276 | public: |
| 267 | explicit SPIRVDecompiler(const VKDevice& device, const ShaderIR& ir, ShaderType stage, | 277 | explicit SPIRVDecompiler(const VKDevice& device, const ShaderIR& ir, ShaderType stage, |
| 268 | const Specialization& specialization) | 278 | const Registry& registry, const Specialization& specialization) |
| 269 | : Module(0x00010300), device{device}, ir{ir}, stage{stage}, header{ir.GetHeader()}, | 279 | : Module(0x00010300), device{device}, ir{ir}, stage{stage}, header{ir.GetHeader()}, |
| 270 | specialization{specialization} { | 280 | registry{registry}, specialization{specialization} { |
| 281 | if (stage != ShaderType::Compute) { | ||
| 282 | transform_feedback = BuildTransformFeedback(registry.GetGraphicsInfo()); | ||
| 283 | } | ||
| 284 | |||
| 271 | AddCapability(spv::Capability::Shader); | 285 | AddCapability(spv::Capability::Shader); |
| 272 | AddCapability(spv::Capability::UniformAndStorageBuffer16BitAccess); | 286 | AddCapability(spv::Capability::UniformAndStorageBuffer16BitAccess); |
| 273 | AddCapability(spv::Capability::ImageQuery); | 287 | AddCapability(spv::Capability::ImageQuery); |
| @@ -285,6 +299,15 @@ public: | |||
| 285 | AddExtension("SPV_KHR_variable_pointers"); | 299 | AddExtension("SPV_KHR_variable_pointers"); |
| 286 | AddExtension("SPV_KHR_shader_draw_parameters"); | 300 | AddExtension("SPV_KHR_shader_draw_parameters"); |
| 287 | 301 | ||
| 302 | if (!transform_feedback.empty()) { | ||
| 303 | if (device.IsExtTransformFeedbackSupported()) { | ||
| 304 | AddCapability(spv::Capability::TransformFeedback); | ||
| 305 | } else { | ||
| 306 | LOG_ERROR(Render_Vulkan, "Shader requires transform feedbacks but these are not " | ||
| 307 | "supported on this device"); | ||
| 308 | } | ||
| 309 | } | ||
| 310 | |||
| 288 | if (ir.UsesLayer() || ir.UsesViewportIndex()) { | 311 | if (ir.UsesLayer() || ir.UsesViewportIndex()) { |
| 289 | if (ir.UsesViewportIndex()) { | 312 | if (ir.UsesViewportIndex()) { |
| 290 | AddCapability(spv::Capability::MultiViewport); | 313 | AddCapability(spv::Capability::MultiViewport); |
| @@ -295,7 +318,7 @@ public: | |||
| 295 | } | 318 | } |
| 296 | } | 319 | } |
| 297 | 320 | ||
| 298 | if (device.IsShaderStorageImageReadWithoutFormatSupported()) { | 321 | if (device.IsFormatlessImageLoadSupported()) { |
| 299 | AddCapability(spv::Capability::StorageImageReadWithoutFormat); | 322 | AddCapability(spv::Capability::StorageImageReadWithoutFormat); |
| 300 | } | 323 | } |
| 301 | 324 | ||
| @@ -317,25 +340,29 @@ public: | |||
| 317 | AddExecutionMode(main, spv::ExecutionMode::OutputVertices, | 340 | AddExecutionMode(main, spv::ExecutionMode::OutputVertices, |
| 318 | header.common2.threads_per_input_primitive); | 341 | header.common2.threads_per_input_primitive); |
| 319 | break; | 342 | break; |
| 320 | case ShaderType::TesselationEval: | 343 | case ShaderType::TesselationEval: { |
| 344 | const auto& info = registry.GetGraphicsInfo(); | ||
| 321 | AddCapability(spv::Capability::Tessellation); | 345 | AddCapability(spv::Capability::Tessellation); |
| 322 | AddEntryPoint(spv::ExecutionModel::TessellationEvaluation, main, "main", interfaces); | 346 | AddEntryPoint(spv::ExecutionModel::TessellationEvaluation, main, "main", interfaces); |
| 323 | AddExecutionMode(main, GetExecutionMode(specialization.tessellation.primitive)); | 347 | AddExecutionMode(main, GetExecutionMode(info.tessellation_primitive)); |
| 324 | AddExecutionMode(main, GetExecutionMode(specialization.tessellation.spacing)); | 348 | AddExecutionMode(main, GetExecutionMode(info.tessellation_spacing)); |
| 325 | AddExecutionMode(main, specialization.tessellation.clockwise | 349 | AddExecutionMode(main, info.tessellation_clockwise |
| 326 | ? spv::ExecutionMode::VertexOrderCw | 350 | ? spv::ExecutionMode::VertexOrderCw |
| 327 | : spv::ExecutionMode::VertexOrderCcw); | 351 | : spv::ExecutionMode::VertexOrderCcw); |
| 328 | break; | 352 | break; |
| 329 | case ShaderType::Geometry: | 353 | } |
| 354 | case ShaderType::Geometry: { | ||
| 355 | const auto& info = registry.GetGraphicsInfo(); | ||
| 330 | AddCapability(spv::Capability::Geometry); | 356 | AddCapability(spv::Capability::Geometry); |
| 331 | AddEntryPoint(spv::ExecutionModel::Geometry, main, "main", interfaces); | 357 | AddEntryPoint(spv::ExecutionModel::Geometry, main, "main", interfaces); |
| 332 | AddExecutionMode(main, GetExecutionMode(specialization.primitive_topology)); | 358 | AddExecutionMode(main, GetExecutionMode(info.primitive_topology)); |
| 333 | AddExecutionMode(main, GetExecutionMode(header.common3.output_topology)); | 359 | AddExecutionMode(main, GetExecutionMode(header.common3.output_topology)); |
| 334 | AddExecutionMode(main, spv::ExecutionMode::OutputVertices, | 360 | AddExecutionMode(main, spv::ExecutionMode::OutputVertices, |
| 335 | header.common4.max_output_vertices); | 361 | header.common4.max_output_vertices); |
| 336 | // TODO(Rodrigo): Where can we get this info from? | 362 | // TODO(Rodrigo): Where can we get this info from? |
| 337 | AddExecutionMode(main, spv::ExecutionMode::Invocations, 1U); | 363 | AddExecutionMode(main, spv::ExecutionMode::Invocations, 1U); |
| 338 | break; | 364 | break; |
| 365 | } | ||
| 339 | case ShaderType::Fragment: | 366 | case ShaderType::Fragment: |
| 340 | AddEntryPoint(spv::ExecutionModel::Fragment, main, "main", interfaces); | 367 | AddEntryPoint(spv::ExecutionModel::Fragment, main, "main", interfaces); |
| 341 | AddExecutionMode(main, spv::ExecutionMode::OriginUpperLeft); | 368 | AddExecutionMode(main, spv::ExecutionMode::OriginUpperLeft); |
| @@ -544,7 +571,8 @@ private: | |||
| 544 | if (stage != ShaderType::Geometry) { | 571 | if (stage != ShaderType::Geometry) { |
| 545 | return; | 572 | return; |
| 546 | } | 573 | } |
| 547 | const u32 num_input = GetNumPrimitiveTopologyVertices(specialization.primitive_topology); | 574 | const auto& info = registry.GetGraphicsInfo(); |
| 575 | const u32 num_input = GetNumPrimitiveTopologyVertices(info.primitive_topology); | ||
| 548 | DeclareInputVertexArray(num_input); | 576 | DeclareInputVertexArray(num_input); |
| 549 | DeclareOutputVertex(); | 577 | DeclareOutputVertex(); |
| 550 | } | 578 | } |
| @@ -741,12 +769,34 @@ private: | |||
| 741 | } | 769 | } |
| 742 | 770 | ||
| 743 | void DeclareOutputAttributes() { | 771 | void DeclareOutputAttributes() { |
| 772 | if (stage == ShaderType::Compute || stage == ShaderType::Fragment) { | ||
| 773 | return; | ||
| 774 | } | ||
| 775 | |||
| 776 | UNIMPLEMENTED_IF(registry.GetGraphicsInfo().tfb_enabled && stage != ShaderType::Vertex); | ||
| 744 | for (const auto index : ir.GetOutputAttributes()) { | 777 | for (const auto index : ir.GetOutputAttributes()) { |
| 745 | if (!IsGenericAttribute(index)) { | 778 | if (!IsGenericAttribute(index)) { |
| 746 | continue; | 779 | continue; |
| 747 | } | 780 | } |
| 748 | const u32 location = GetGenericAttributeLocation(index); | 781 | DeclareOutputAttribute(index); |
| 749 | Id type = t_float4; | 782 | } |
| 783 | } | ||
| 784 | |||
| 785 | void DeclareOutputAttribute(Attribute::Index index) { | ||
| 786 | static constexpr std::string_view swizzle = "xyzw"; | ||
| 787 | |||
| 788 | const u32 location = GetGenericAttributeLocation(index); | ||
| 789 | u8 element = 0; | ||
| 790 | while (element < 4) { | ||
| 791 | const std::size_t remainder = 4 - element; | ||
| 792 | |||
| 793 | std::size_t num_components = remainder; | ||
| 794 | const std::optional tfb = GetTransformFeedbackInfo(index, element); | ||
| 795 | if (tfb) { | ||
| 796 | num_components = tfb->components; | ||
| 797 | } | ||
| 798 | |||
| 799 | Id type = GetTypeVectorDefinitionLut(Type::Float).at(num_components - 1); | ||
| 750 | Id varying_default = v_varying_default; | 800 | Id varying_default = v_varying_default; |
| 751 | if (IsOutputAttributeArray()) { | 801 | if (IsOutputAttributeArray()) { |
| 752 | const u32 num = GetNumOutputVertices(); | 802 | const u32 num = GetNumOutputVertices(); |
| @@ -759,15 +809,47 @@ private: | |||
| 759 | } | 809 | } |
| 760 | type = TypePointer(spv::StorageClass::Output, type); | 810 | type = TypePointer(spv::StorageClass::Output, type); |
| 761 | 811 | ||
| 812 | std::string name = fmt::format("out_attr{}", location); | ||
| 813 | if (num_components < 4 || element > 0) { | ||
| 814 | name = fmt::format("{}_{}", name, swizzle.substr(element, num_components)); | ||
| 815 | } | ||
| 816 | |||
| 762 | const Id id = OpVariable(type, spv::StorageClass::Output, varying_default); | 817 | const Id id = OpVariable(type, spv::StorageClass::Output, varying_default); |
| 763 | Name(AddGlobalVariable(id), fmt::format("out_attr{}", location)); | 818 | Name(AddGlobalVariable(id), name); |
| 764 | output_attributes.emplace(index, id); | 819 | |
| 820 | GenericVaryingDescription description; | ||
| 821 | description.id = id; | ||
| 822 | description.first_element = element; | ||
| 823 | description.is_scalar = num_components == 1; | ||
| 824 | for (u32 i = 0; i < num_components; ++i) { | ||
| 825 | const u8 offset = static_cast<u8>(static_cast<u32>(index) * 4 + element + i); | ||
| 826 | output_attributes.emplace(offset, description); | ||
| 827 | } | ||
| 765 | interfaces.push_back(id); | 828 | interfaces.push_back(id); |
| 766 | 829 | ||
| 767 | Decorate(id, spv::Decoration::Location, location); | 830 | Decorate(id, spv::Decoration::Location, location); |
| 831 | if (element > 0) { | ||
| 832 | Decorate(id, spv::Decoration::Component, static_cast<u32>(element)); | ||
| 833 | } | ||
| 834 | if (tfb && device.IsExtTransformFeedbackSupported()) { | ||
| 835 | Decorate(id, spv::Decoration::XfbBuffer, static_cast<u32>(tfb->buffer)); | ||
| 836 | Decorate(id, spv::Decoration::XfbStride, static_cast<u32>(tfb->stride)); | ||
| 837 | Decorate(id, spv::Decoration::Offset, static_cast<u32>(tfb->offset)); | ||
| 838 | } | ||
| 839 | |||
| 840 | element = static_cast<u8>(static_cast<std::size_t>(element) + num_components); | ||
| 768 | } | 841 | } |
| 769 | } | 842 | } |
| 770 | 843 | ||
| 844 | std::optional<VaryingTFB> GetTransformFeedbackInfo(Attribute::Index index, u8 element = 0) { | ||
| 845 | const u8 location = static_cast<u8>(static_cast<u32>(index) * 4 + element); | ||
| 846 | const auto it = transform_feedback.find(location); | ||
| 847 | if (it == transform_feedback.end()) { | ||
| 848 | return {}; | ||
| 849 | } | ||
| 850 | return it->second; | ||
| 851 | } | ||
| 852 | |||
| 771 | u32 DeclareConstantBuffers(u32 binding) { | 853 | u32 DeclareConstantBuffers(u32 binding) { |
| 772 | for (const auto& [index, size] : ir.GetConstantBuffers()) { | 854 | for (const auto& [index, size] : ir.GetConstantBuffers()) { |
| 773 | const Id type = device.IsKhrUniformBufferStandardLayoutSupported() ? t_cbuf_scalar_ubo | 855 | const Id type = device.IsKhrUniformBufferStandardLayoutSupported() ? t_cbuf_scalar_ubo |
| @@ -833,16 +915,20 @@ private: | |||
| 833 | constexpr int sampled = 1; | 915 | constexpr int sampled = 1; |
| 834 | constexpr auto format = spv::ImageFormat::Unknown; | 916 | constexpr auto format = spv::ImageFormat::Unknown; |
| 835 | const Id image_type = TypeImage(t_float, dim, depth, arrayed, ms, sampled, format); | 917 | const Id image_type = TypeImage(t_float, dim, depth, arrayed, ms, sampled, format); |
| 836 | const Id sampled_image_type = TypeSampledImage(image_type); | 918 | const Id sampler_type = TypeSampledImage(image_type); |
| 837 | const Id pointer_type = | 919 | const Id sampler_pointer_type = |
| 838 | TypePointer(spv::StorageClass::UniformConstant, sampled_image_type); | 920 | TypePointer(spv::StorageClass::UniformConstant, sampler_type); |
| 921 | const Id type = sampler.IsIndexed() | ||
| 922 | ? TypeArray(sampler_type, Constant(t_uint, sampler.Size())) | ||
| 923 | : sampler_type; | ||
| 924 | const Id pointer_type = TypePointer(spv::StorageClass::UniformConstant, type); | ||
| 839 | const Id id = OpVariable(pointer_type, spv::StorageClass::UniformConstant); | 925 | const Id id = OpVariable(pointer_type, spv::StorageClass::UniformConstant); |
| 840 | AddGlobalVariable(Name(id, fmt::format("sampler_{}", sampler.GetIndex()))); | 926 | AddGlobalVariable(Name(id, fmt::format("sampler_{}", sampler.GetIndex()))); |
| 841 | Decorate(id, spv::Decoration::Binding, binding++); | 927 | Decorate(id, spv::Decoration::Binding, binding++); |
| 842 | Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET); | 928 | Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET); |
| 843 | 929 | ||
| 844 | sampled_images.emplace(sampler.GetIndex(), | 930 | sampled_images.emplace(sampler.GetIndex(), SampledImage{image_type, sampler_type, |
| 845 | SampledImage{image_type, sampled_image_type, id}); | 931 | sampler_pointer_type, id}); |
| 846 | } | 932 | } |
| 847 | return binding; | 933 | return binding; |
| 848 | } | 934 | } |
| @@ -893,7 +979,7 @@ private: | |||
| 893 | u32 GetNumInputVertices() const { | 979 | u32 GetNumInputVertices() const { |
| 894 | switch (stage) { | 980 | switch (stage) { |
| 895 | case ShaderType::Geometry: | 981 | case ShaderType::Geometry: |
| 896 | return GetNumPrimitiveTopologyVertices(specialization.primitive_topology); | 982 | return GetNumPrimitiveTopologyVertices(registry.GetGraphicsInfo().primitive_topology); |
| 897 | case ShaderType::TesselationControl: | 983 | case ShaderType::TesselationControl: |
| 898 | case ShaderType::TesselationEval: | 984 | case ShaderType::TesselationEval: |
| 899 | return NumInputPatches; | 985 | return NumInputPatches; |
| @@ -1341,8 +1427,14 @@ private: | |||
| 1341 | } | 1427 | } |
| 1342 | default: | 1428 | default: |
| 1343 | if (IsGenericAttribute(attribute)) { | 1429 | if (IsGenericAttribute(attribute)) { |
| 1344 | const Id composite = output_attributes.at(attribute); | 1430 | const u8 offset = static_cast<u8>(static_cast<u8>(attribute) * 4 + element); |
| 1345 | return {ArrayPass(t_out_float, composite, {element}), Type::Float}; | 1431 | const GenericVaryingDescription description = output_attributes.at(offset); |
| 1432 | const Id composite = description.id; | ||
| 1433 | std::vector<u32> indices; | ||
| 1434 | if (!description.is_scalar) { | ||
| 1435 | indices.push_back(element - description.first_element); | ||
| 1436 | } | ||
| 1437 | return {ArrayPass(t_out_float, composite, indices), Type::Float}; | ||
| 1346 | } | 1438 | } |
| 1347 | UNIMPLEMENTED_MSG("Unhandled output attribute: {}", | 1439 | UNIMPLEMENTED_MSG("Unhandled output attribute: {}", |
| 1348 | static_cast<u32>(attribute)); | 1440 | static_cast<u32>(attribute)); |
| @@ -1525,7 +1617,12 @@ private: | |||
| 1525 | ASSERT(!meta.sampler.IsBuffer()); | 1617 | ASSERT(!meta.sampler.IsBuffer()); |
| 1526 | 1618 | ||
| 1527 | const auto& entry = sampled_images.at(meta.sampler.GetIndex()); | 1619 | const auto& entry = sampled_images.at(meta.sampler.GetIndex()); |
| 1528 | return OpLoad(entry.sampled_image_type, entry.sampler); | 1620 | Id sampler = entry.variable; |
| 1621 | if (meta.sampler.IsIndexed()) { | ||
| 1622 | const Id index = AsInt(Visit(meta.index)); | ||
| 1623 | sampler = OpAccessChain(entry.sampler_pointer_type, sampler, index); | ||
| 1624 | } | ||
| 1625 | return OpLoad(entry.sampler_type, sampler); | ||
| 1529 | } | 1626 | } |
| 1530 | 1627 | ||
| 1531 | Id GetTextureImage(Operation operation) { | 1628 | Id GetTextureImage(Operation operation) { |
| @@ -1783,7 +1880,7 @@ private: | |||
| 1783 | } | 1880 | } |
| 1784 | 1881 | ||
| 1785 | Expression ImageLoad(Operation operation) { | 1882 | Expression ImageLoad(Operation operation) { |
| 1786 | if (!device.IsShaderStorageImageReadWithoutFormatSupported()) { | 1883 | if (!device.IsFormatlessImageLoadSupported()) { |
| 1787 | return {v_float_zero, Type::Float}; | 1884 | return {v_float_zero, Type::Float}; |
| 1788 | } | 1885 | } |
| 1789 | 1886 | ||
| @@ -2211,16 +2308,14 @@ private: | |||
| 2211 | switch (specialization.attribute_types.at(location)) { | 2308 | switch (specialization.attribute_types.at(location)) { |
| 2212 | case Maxwell::VertexAttribute::Type::SignedNorm: | 2309 | case Maxwell::VertexAttribute::Type::SignedNorm: |
| 2213 | case Maxwell::VertexAttribute::Type::UnsignedNorm: | 2310 | case Maxwell::VertexAttribute::Type::UnsignedNorm: |
| 2311 | case Maxwell::VertexAttribute::Type::UnsignedScaled: | ||
| 2312 | case Maxwell::VertexAttribute::Type::SignedScaled: | ||
| 2214 | case Maxwell::VertexAttribute::Type::Float: | 2313 | case Maxwell::VertexAttribute::Type::Float: |
| 2215 | return {Type::Float, t_in_float, t_in_float4}; | 2314 | return {Type::Float, t_in_float, t_in_float4}; |
| 2216 | case Maxwell::VertexAttribute::Type::SignedInt: | 2315 | case Maxwell::VertexAttribute::Type::SignedInt: |
| 2217 | return {Type::Int, t_in_int, t_in_int4}; | 2316 | return {Type::Int, t_in_int, t_in_int4}; |
| 2218 | case Maxwell::VertexAttribute::Type::UnsignedInt: | 2317 | case Maxwell::VertexAttribute::Type::UnsignedInt: |
| 2219 | return {Type::Uint, t_in_uint, t_in_uint4}; | 2318 | return {Type::Uint, t_in_uint, t_in_uint4}; |
| 2220 | case Maxwell::VertexAttribute::Type::UnsignedScaled: | ||
| 2221 | case Maxwell::VertexAttribute::Type::SignedScaled: | ||
| 2222 | UNIMPLEMENTED(); | ||
| 2223 | return {Type::Float, t_in_float, t_in_float4}; | ||
| 2224 | default: | 2319 | default: |
| 2225 | UNREACHABLE(); | 2320 | UNREACHABLE(); |
| 2226 | return {Type::Float, t_in_float, t_in_float4}; | 2321 | return {Type::Float, t_in_float, t_in_float4}; |
| @@ -2250,11 +2345,11 @@ private: | |||
| 2250 | std::array<Id, 4> GetTypeVectorDefinitionLut(Type type) const { | 2345 | std::array<Id, 4> GetTypeVectorDefinitionLut(Type type) const { |
| 2251 | switch (type) { | 2346 | switch (type) { |
| 2252 | case Type::Float: | 2347 | case Type::Float: |
| 2253 | return {nullptr, t_float2, t_float3, t_float4}; | 2348 | return {t_float, t_float2, t_float3, t_float4}; |
| 2254 | case Type::Int: | 2349 | case Type::Int: |
| 2255 | return {nullptr, t_int2, t_int3, t_int4}; | 2350 | return {t_int, t_int2, t_int3, t_int4}; |
| 2256 | case Type::Uint: | 2351 | case Type::Uint: |
| 2257 | return {nullptr, t_uint2, t_uint3, t_uint4}; | 2352 | return {t_uint, t_uint2, t_uint3, t_uint4}; |
| 2258 | default: | 2353 | default: |
| 2259 | UNIMPLEMENTED(); | 2354 | UNIMPLEMENTED(); |
| 2260 | return {}; | 2355 | return {}; |
| @@ -2487,7 +2582,9 @@ private: | |||
| 2487 | const ShaderIR& ir; | 2582 | const ShaderIR& ir; |
| 2488 | const ShaderType stage; | 2583 | const ShaderType stage; |
| 2489 | const Tegra::Shader::Header header; | 2584 | const Tegra::Shader::Header header; |
| 2585 | const Registry& registry; | ||
| 2490 | const Specialization& specialization; | 2586 | const Specialization& specialization; |
| 2587 | std::unordered_map<u8, VaryingTFB> transform_feedback; | ||
| 2491 | 2588 | ||
| 2492 | const Id t_void = Name(TypeVoid(), "void"); | 2589 | const Id t_void = Name(TypeVoid(), "void"); |
| 2493 | 2590 | ||
| @@ -2576,7 +2673,7 @@ private: | |||
| 2576 | Id shared_memory{}; | 2673 | Id shared_memory{}; |
| 2577 | std::array<Id, INTERNAL_FLAGS_COUNT> internal_flags{}; | 2674 | std::array<Id, INTERNAL_FLAGS_COUNT> internal_flags{}; |
| 2578 | std::map<Attribute::Index, Id> input_attributes; | 2675 | std::map<Attribute::Index, Id> input_attributes; |
| 2579 | std::map<Attribute::Index, Id> output_attributes; | 2676 | std::unordered_map<u8, GenericVaryingDescription> output_attributes; |
| 2580 | std::map<u32, Id> constant_buffers; | 2677 | std::map<u32, Id> constant_buffers; |
| 2581 | std::map<GlobalMemoryBase, Id> global_buffers; | 2678 | std::map<GlobalMemoryBase, Id> global_buffers; |
| 2582 | std::map<u32, TexelBuffer> texel_buffers; | 2679 | std::map<u32, TexelBuffer> texel_buffers; |
| @@ -2862,8 +2959,9 @@ ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir) { | |||
| 2862 | } | 2959 | } |
| 2863 | 2960 | ||
| 2864 | std::vector<u32> Decompile(const VKDevice& device, const VideoCommon::Shader::ShaderIR& ir, | 2961 | std::vector<u32> Decompile(const VKDevice& device, const VideoCommon::Shader::ShaderIR& ir, |
| 2865 | ShaderType stage, const Specialization& specialization) { | 2962 | ShaderType stage, const VideoCommon::Shader::Registry& registry, |
| 2866 | return SPIRVDecompiler(device, ir, stage, specialization).Assemble(); | 2963 | const Specialization& specialization) { |
| 2964 | return SPIRVDecompiler(device, ir, stage, registry, specialization).Assemble(); | ||
| 2867 | } | 2965 | } |
| 2868 | 2966 | ||
| 2869 | } // namespace Vulkan | 2967 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.h b/src/video_core/renderer_vulkan/vk_shader_decompiler.h index f5dc14d9e..ffea4709e 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.h +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.h | |||
| @@ -15,6 +15,7 @@ | |||
| 15 | #include "common/common_types.h" | 15 | #include "common/common_types.h" |
| 16 | #include "video_core/engines/maxwell_3d.h" | 16 | #include "video_core/engines/maxwell_3d.h" |
| 17 | #include "video_core/engines/shader_type.h" | 17 | #include "video_core/engines/shader_type.h" |
| 18 | #include "video_core/shader/registry.h" | ||
| 18 | #include "video_core/shader/shader_ir.h" | 19 | #include "video_core/shader/shader_ir.h" |
| 19 | 20 | ||
| 20 | namespace Vulkan { | 21 | namespace Vulkan { |
| @@ -91,17 +92,9 @@ struct Specialization final { | |||
| 91 | u32 shared_memory_size{}; | 92 | u32 shared_memory_size{}; |
| 92 | 93 | ||
| 93 | // Graphics specific | 94 | // Graphics specific |
| 94 | Maxwell::PrimitiveTopology primitive_topology{}; | ||
| 95 | std::optional<float> point_size{}; | 95 | std::optional<float> point_size{}; |
| 96 | std::array<Maxwell::VertexAttribute::Type, Maxwell::NumVertexAttributes> attribute_types{}; | 96 | std::array<Maxwell::VertexAttribute::Type, Maxwell::NumVertexAttributes> attribute_types{}; |
| 97 | bool ndc_minus_one_to_one{}; | 97 | bool ndc_minus_one_to_one{}; |
| 98 | |||
| 99 | // Tessellation specific | ||
| 100 | struct { | ||
| 101 | Maxwell::TessellationPrimitive primitive{}; | ||
| 102 | Maxwell::TessellationSpacing spacing{}; | ||
| 103 | bool clockwise{}; | ||
| 104 | } tessellation; | ||
| 105 | }; | 98 | }; |
| 106 | // Old gcc versions don't consider this trivially copyable. | 99 | // Old gcc versions don't consider this trivially copyable. |
| 107 | // static_assert(std::is_trivially_copyable_v<Specialization>); | 100 | // static_assert(std::is_trivially_copyable_v<Specialization>); |
| @@ -114,6 +107,8 @@ struct SPIRVShader { | |||
| 114 | ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir); | 107 | ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir); |
| 115 | 108 | ||
| 116 | std::vector<u32> Decompile(const VKDevice& device, const VideoCommon::Shader::ShaderIR& ir, | 109 | std::vector<u32> Decompile(const VKDevice& device, const VideoCommon::Shader::ShaderIR& ir, |
| 117 | Tegra::Engines::ShaderType stage, const Specialization& specialization); | 110 | Tegra::Engines::ShaderType stage, |
| 111 | const VideoCommon::Shader::Registry& registry, | ||
| 112 | const Specialization& specialization); | ||
| 118 | 113 | ||
| 119 | } // namespace Vulkan | 114 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp index 171d78afc..374959f82 100644 --- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp +++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp | |||
| @@ -73,7 +73,8 @@ VKBuffer* VKStagingBufferPool::TryGetReservedBuffer(std::size_t size, bool host_ | |||
| 73 | VKBuffer& VKStagingBufferPool::CreateStagingBuffer(std::size_t size, bool host_visible) { | 73 | VKBuffer& VKStagingBufferPool::CreateStagingBuffer(std::size_t size, bool host_visible) { |
| 74 | const auto usage = | 74 | const auto usage = |
| 75 | vk::BufferUsageFlagBits::eTransferSrc | vk::BufferUsageFlagBits::eTransferDst | | 75 | vk::BufferUsageFlagBits::eTransferSrc | vk::BufferUsageFlagBits::eTransferDst | |
| 76 | vk::BufferUsageFlagBits::eStorageBuffer | vk::BufferUsageFlagBits::eIndexBuffer; | 76 | vk::BufferUsageFlagBits::eUniformBuffer | vk::BufferUsageFlagBits::eStorageBuffer | |
| 77 | vk::BufferUsageFlagBits::eIndexBuffer; | ||
| 77 | const u32 log2 = Common::Log2Ceil64(size); | 78 | const u32 log2 = Common::Log2Ceil64(size); |
| 78 | const vk::BufferCreateInfo buffer_ci({}, 1ULL << log2, usage, vk::SharingMode::eExclusive, 0, | 79 | const vk::BufferCreateInfo buffer_ci({}, 1ULL << log2, usage, vk::SharingMode::eExclusive, 0, |
| 79 | nullptr); | 80 | nullptr); |
| @@ -99,7 +100,6 @@ void VKStagingBufferPool::ReleaseCache(bool host_visible) { | |||
| 99 | } | 100 | } |
| 100 | 101 | ||
| 101 | u64 VKStagingBufferPool::ReleaseLevel(StagingBuffersCache& cache, std::size_t log2) { | 102 | u64 VKStagingBufferPool::ReleaseLevel(StagingBuffersCache& cache, std::size_t log2) { |
| 102 | static constexpr u64 epochs_to_destroy = 180; | ||
| 103 | static constexpr std::size_t deletions_per_tick = 16; | 103 | static constexpr std::size_t deletions_per_tick = 16; |
| 104 | 104 | ||
| 105 | auto& staging = cache[log2]; | 105 | auto& staging = cache[log2]; |
| @@ -107,6 +107,7 @@ u64 VKStagingBufferPool::ReleaseLevel(StagingBuffersCache& cache, std::size_t lo | |||
| 107 | const std::size_t old_size = entries.size(); | 107 | const std::size_t old_size = entries.size(); |
| 108 | 108 | ||
| 109 | const auto is_deleteable = [this](const auto& entry) { | 109 | const auto is_deleteable = [this](const auto& entry) { |
| 110 | static constexpr u64 epochs_to_destroy = 180; | ||
| 110 | return entry.last_epoch + epochs_to_destroy < epoch && !entry.watch.IsUsed(); | 111 | return entry.last_epoch + epochs_to_destroy < epoch && !entry.watch.IsUsed(); |
| 111 | }; | 112 | }; |
| 112 | const std::size_t begin_offset = staging.delete_index; | 113 | const std::size_t begin_offset = staging.delete_index; |
diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.cpp b/src/video_core/renderer_vulkan/vk_state_tracker.cpp new file mode 100644 index 000000000..94a89e388 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_state_tracker.cpp | |||
| @@ -0,0 +1,99 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <cstddef> | ||
| 7 | #include <iterator> | ||
| 8 | |||
| 9 | #include "common/common_types.h" | ||
| 10 | #include "core/core.h" | ||
| 11 | #include "video_core/dirty_flags.h" | ||
| 12 | #include "video_core/engines/maxwell_3d.h" | ||
| 13 | #include "video_core/gpu.h" | ||
| 14 | #include "video_core/renderer_vulkan/vk_state_tracker.h" | ||
| 15 | |||
| 16 | #define OFF(field_name) MAXWELL3D_REG_INDEX(field_name) | ||
| 17 | #define NUM(field_name) (sizeof(Maxwell3D::Regs::field_name) / sizeof(u32)) | ||
| 18 | |||
| 19 | namespace Vulkan { | ||
| 20 | |||
| 21 | namespace { | ||
| 22 | |||
| 23 | using namespace Dirty; | ||
| 24 | using namespace VideoCommon::Dirty; | ||
| 25 | using Tegra::Engines::Maxwell3D; | ||
| 26 | using Regs = Maxwell3D::Regs; | ||
| 27 | using Tables = Maxwell3D::DirtyState::Tables; | ||
| 28 | using Table = Maxwell3D::DirtyState::Table; | ||
| 29 | using Flags = Maxwell3D::DirtyState::Flags; | ||
| 30 | |||
| 31 | Flags MakeInvalidationFlags() { | ||
| 32 | Flags flags{}; | ||
| 33 | flags[Viewports] = true; | ||
| 34 | flags[Scissors] = true; | ||
| 35 | flags[DepthBias] = true; | ||
| 36 | flags[BlendConstants] = true; | ||
| 37 | flags[DepthBounds] = true; | ||
| 38 | flags[StencilProperties] = true; | ||
| 39 | return flags; | ||
| 40 | } | ||
| 41 | |||
| 42 | void SetupDirtyViewports(Tables& tables) { | ||
| 43 | FillBlock(tables[0], OFF(viewport_transform), NUM(viewport_transform), Viewports); | ||
| 44 | FillBlock(tables[0], OFF(viewports), NUM(viewports), Viewports); | ||
| 45 | tables[0][OFF(viewport_transform_enabled)] = Viewports; | ||
| 46 | } | ||
| 47 | |||
| 48 | void SetupDirtyScissors(Tables& tables) { | ||
| 49 | FillBlock(tables[0], OFF(scissor_test), NUM(scissor_test), Scissors); | ||
| 50 | } | ||
| 51 | |||
| 52 | void SetupDirtyDepthBias(Tables& tables) { | ||
| 53 | auto& table = tables[0]; | ||
| 54 | table[OFF(polygon_offset_units)] = DepthBias; | ||
| 55 | table[OFF(polygon_offset_clamp)] = DepthBias; | ||
| 56 | table[OFF(polygon_offset_factor)] = DepthBias; | ||
| 57 | } | ||
| 58 | |||
| 59 | void SetupDirtyBlendConstants(Tables& tables) { | ||
| 60 | FillBlock(tables[0], OFF(blend_color), NUM(blend_color), BlendConstants); | ||
| 61 | } | ||
| 62 | |||
| 63 | void SetupDirtyDepthBounds(Tables& tables) { | ||
| 64 | FillBlock(tables[0], OFF(depth_bounds), NUM(depth_bounds), DepthBounds); | ||
| 65 | } | ||
| 66 | |||
| 67 | void SetupDirtyStencilProperties(Tables& tables) { | ||
| 68 | auto& table = tables[0]; | ||
| 69 | table[OFF(stencil_two_side_enable)] = StencilProperties; | ||
| 70 | table[OFF(stencil_front_func_ref)] = StencilProperties; | ||
| 71 | table[OFF(stencil_front_mask)] = StencilProperties; | ||
| 72 | table[OFF(stencil_front_func_mask)] = StencilProperties; | ||
| 73 | table[OFF(stencil_back_func_ref)] = StencilProperties; | ||
| 74 | table[OFF(stencil_back_mask)] = StencilProperties; | ||
| 75 | table[OFF(stencil_back_func_mask)] = StencilProperties; | ||
| 76 | } | ||
| 77 | |||
| 78 | } // Anonymous namespace | ||
| 79 | |||
| 80 | StateTracker::StateTracker(Core::System& system) | ||
| 81 | : system{system}, invalidation_flags{MakeInvalidationFlags()} {} | ||
| 82 | |||
| 83 | void StateTracker::Initialize() { | ||
| 84 | auto& dirty = system.GPU().Maxwell3D().dirty; | ||
| 85 | auto& tables = dirty.tables; | ||
| 86 | SetupDirtyRenderTargets(tables); | ||
| 87 | SetupDirtyViewports(tables); | ||
| 88 | SetupDirtyScissors(tables); | ||
| 89 | SetupDirtyDepthBias(tables); | ||
| 90 | SetupDirtyBlendConstants(tables); | ||
| 91 | SetupDirtyDepthBounds(tables); | ||
| 92 | SetupDirtyStencilProperties(tables); | ||
| 93 | } | ||
| 94 | |||
| 95 | void StateTracker::InvalidateCommandBufferState() { | ||
| 96 | system.GPU().Maxwell3D().dirty.flags |= invalidation_flags; | ||
| 97 | } | ||
| 98 | |||
| 99 | } // namespace Vulkan | ||
diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.h b/src/video_core/renderer_vulkan/vk_state_tracker.h new file mode 100644 index 000000000..03bc415b2 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_state_tracker.h | |||
| @@ -0,0 +1,79 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <cstddef> | ||
| 8 | #include <limits> | ||
| 9 | |||
| 10 | #include "common/common_types.h" | ||
| 11 | #include "core/core.h" | ||
| 12 | #include "video_core/dirty_flags.h" | ||
| 13 | #include "video_core/engines/maxwell_3d.h" | ||
| 14 | |||
| 15 | namespace Vulkan { | ||
| 16 | |||
| 17 | namespace Dirty { | ||
| 18 | |||
| 19 | enum : u8 { | ||
| 20 | First = VideoCommon::Dirty::LastCommonEntry, | ||
| 21 | |||
| 22 | Viewports, | ||
| 23 | Scissors, | ||
| 24 | DepthBias, | ||
| 25 | BlendConstants, | ||
| 26 | DepthBounds, | ||
| 27 | StencilProperties, | ||
| 28 | |||
| 29 | Last | ||
| 30 | }; | ||
| 31 | static_assert(Last <= std::numeric_limits<u8>::max()); | ||
| 32 | |||
| 33 | } // namespace Dirty | ||
| 34 | |||
| 35 | class StateTracker { | ||
| 36 | public: | ||
| 37 | explicit StateTracker(Core::System& system); | ||
| 38 | |||
| 39 | void Initialize(); | ||
| 40 | |||
| 41 | void InvalidateCommandBufferState(); | ||
| 42 | |||
| 43 | bool TouchViewports() { | ||
| 44 | return Exchange(Dirty::Viewports, false); | ||
| 45 | } | ||
| 46 | |||
| 47 | bool TouchScissors() { | ||
| 48 | return Exchange(Dirty::Scissors, false); | ||
| 49 | } | ||
| 50 | |||
| 51 | bool TouchDepthBias() { | ||
| 52 | return Exchange(Dirty::DepthBias, false); | ||
| 53 | } | ||
| 54 | |||
| 55 | bool TouchBlendConstants() { | ||
| 56 | return Exchange(Dirty::BlendConstants, false); | ||
| 57 | } | ||
| 58 | |||
| 59 | bool TouchDepthBounds() { | ||
| 60 | return Exchange(Dirty::DepthBounds, false); | ||
| 61 | } | ||
| 62 | |||
| 63 | bool TouchStencilProperties() { | ||
| 64 | return Exchange(Dirty::StencilProperties, false); | ||
| 65 | } | ||
| 66 | |||
| 67 | private: | ||
| 68 | bool Exchange(std::size_t id, bool new_value) const noexcept { | ||
| 69 | auto& flags = system.GPU().Maxwell3D().dirty.flags; | ||
| 70 | const bool is_dirty = flags[id]; | ||
| 71 | flags[id] = new_value; | ||
| 72 | return is_dirty; | ||
| 73 | } | ||
| 74 | |||
| 75 | Core::System& system; | ||
| 76 | Tegra::Engines::Maxwell3D::DirtyState::Flags invalidation_flags; | ||
| 77 | }; | ||
| 78 | |||
| 79 | } // namespace Vulkan | ||
diff --git a/src/video_core/renderer_vulkan/vk_swapchain.cpp b/src/video_core/renderer_vulkan/vk_swapchain.cpp index f47b691a8..9e73fa9cd 100644 --- a/src/video_core/renderer_vulkan/vk_swapchain.cpp +++ b/src/video_core/renderer_vulkan/vk_swapchain.cpp | |||
| @@ -141,11 +141,6 @@ void VKSwapchain::CreateSwapchain(const vk::SurfaceCapabilitiesKHR& capabilities | |||
| 141 | 141 | ||
| 142 | const vk::SurfaceFormatKHR surface_format{ChooseSwapSurfaceFormat(formats, srgb)}; | 142 | const vk::SurfaceFormatKHR surface_format{ChooseSwapSurfaceFormat(formats, srgb)}; |
| 143 | const vk::PresentModeKHR present_mode{ChooseSwapPresentMode(present_modes)}; | 143 | const vk::PresentModeKHR present_mode{ChooseSwapPresentMode(present_modes)}; |
| 144 | extent = ChooseSwapExtent(capabilities, width, height); | ||
| 145 | |||
| 146 | current_width = extent.width; | ||
| 147 | current_height = extent.height; | ||
| 148 | current_srgb = srgb; | ||
| 149 | 144 | ||
| 150 | u32 requested_image_count{capabilities.minImageCount + 1}; | 145 | u32 requested_image_count{capabilities.minImageCount + 1}; |
| 151 | if (capabilities.maxImageCount > 0 && requested_image_count > capabilities.maxImageCount) { | 146 | if (capabilities.maxImageCount > 0 && requested_image_count > capabilities.maxImageCount) { |
| @@ -153,10 +148,9 @@ void VKSwapchain::CreateSwapchain(const vk::SurfaceCapabilitiesKHR& capabilities | |||
| 153 | } | 148 | } |
| 154 | 149 | ||
| 155 | vk::SwapchainCreateInfoKHR swapchain_ci( | 150 | vk::SwapchainCreateInfoKHR swapchain_ci( |
| 156 | {}, surface, requested_image_count, surface_format.format, surface_format.colorSpace, | 151 | {}, surface, requested_image_count, surface_format.format, surface_format.colorSpace, {}, 1, |
| 157 | extent, 1, vk::ImageUsageFlagBits::eColorAttachment, {}, {}, {}, | 152 | vk::ImageUsageFlagBits::eColorAttachment, {}, {}, {}, capabilities.currentTransform, |
| 158 | capabilities.currentTransform, vk::CompositeAlphaFlagBitsKHR::eOpaque, present_mode, false, | 153 | vk::CompositeAlphaFlagBitsKHR::eOpaque, present_mode, false, {}); |
| 159 | {}); | ||
| 160 | 154 | ||
| 161 | const u32 graphics_family{device.GetGraphicsFamily()}; | 155 | const u32 graphics_family{device.GetGraphicsFamily()}; |
| 162 | const u32 present_family{device.GetPresentFamily()}; | 156 | const u32 present_family{device.GetPresentFamily()}; |
| @@ -169,9 +163,18 @@ void VKSwapchain::CreateSwapchain(const vk::SurfaceCapabilitiesKHR& capabilities | |||
| 169 | swapchain_ci.imageSharingMode = vk::SharingMode::eExclusive; | 163 | swapchain_ci.imageSharingMode = vk::SharingMode::eExclusive; |
| 170 | } | 164 | } |
| 171 | 165 | ||
| 166 | // Request the size again to reduce the possibility of a TOCTOU race condition. | ||
| 167 | const auto updated_capabilities = physical_device.getSurfaceCapabilitiesKHR(surface, dld); | ||
| 168 | swapchain_ci.imageExtent = ChooseSwapExtent(updated_capabilities, width, height); | ||
| 169 | // Don't add code within this and the swapchain creation. | ||
| 172 | const auto dev{device.GetLogical()}; | 170 | const auto dev{device.GetLogical()}; |
| 173 | swapchain = dev.createSwapchainKHRUnique(swapchain_ci, nullptr, dld); | 171 | swapchain = dev.createSwapchainKHRUnique(swapchain_ci, nullptr, dld); |
| 174 | 172 | ||
| 173 | extent = swapchain_ci.imageExtent; | ||
| 174 | current_width = extent.width; | ||
| 175 | current_height = extent.height; | ||
| 176 | current_srgb = srgb; | ||
| 177 | |||
| 175 | images = dev.getSwapchainImagesKHR(*swapchain, dld); | 178 | images = dev.getSwapchainImagesKHR(*swapchain, dld); |
| 176 | image_count = static_cast<u32>(images.size()); | 179 | image_count = static_cast<u32>(images.size()); |
| 177 | image_format = surface_format.format; | 180 | image_format = surface_format.format; |
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 51b0d38a6..26175921b 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp | |||
| @@ -22,6 +22,7 @@ | |||
| 22 | #include "video_core/renderer_vulkan/vk_device.h" | 22 | #include "video_core/renderer_vulkan/vk_device.h" |
| 23 | #include "video_core/renderer_vulkan/vk_memory_manager.h" | 23 | #include "video_core/renderer_vulkan/vk_memory_manager.h" |
| 24 | #include "video_core/renderer_vulkan/vk_rasterizer.h" | 24 | #include "video_core/renderer_vulkan/vk_rasterizer.h" |
| 25 | #include "video_core/renderer_vulkan/vk_scheduler.h" | ||
| 25 | #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" | 26 | #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" |
| 26 | #include "video_core/renderer_vulkan/vk_texture_cache.h" | 27 | #include "video_core/renderer_vulkan/vk_texture_cache.h" |
| 27 | #include "video_core/surface.h" | 28 | #include "video_core/surface.h" |
| @@ -51,6 +52,9 @@ vk::ImageType SurfaceTargetToImage(SurfaceTarget target) { | |||
| 51 | return vk::ImageType::e2D; | 52 | return vk::ImageType::e2D; |
| 52 | case SurfaceTarget::Texture3D: | 53 | case SurfaceTarget::Texture3D: |
| 53 | return vk::ImageType::e3D; | 54 | return vk::ImageType::e3D; |
| 55 | case SurfaceTarget::TextureBuffer: | ||
| 56 | UNREACHABLE(); | ||
| 57 | return {}; | ||
| 54 | } | 58 | } |
| 55 | UNREACHABLE_MSG("Unknown texture target={}", static_cast<u32>(target)); | 59 | UNREACHABLE_MSG("Unknown texture target={}", static_cast<u32>(target)); |
| 56 | return {}; | 60 | return {}; |
| @@ -272,7 +276,6 @@ void CachedSurface::UploadImage(const std::vector<u8>& staging_buffer) { | |||
| 272 | 276 | ||
| 273 | for (u32 level = 0; level < params.num_levels; ++level) { | 277 | for (u32 level = 0; level < params.num_levels; ++level) { |
| 274 | vk::BufferImageCopy copy = GetBufferImageCopy(level); | 278 | vk::BufferImageCopy copy = GetBufferImageCopy(level); |
| 275 | const auto& dld = device.GetDispatchLoader(); | ||
| 276 | if (image->GetAspectMask() == | 279 | if (image->GetAspectMask() == |
| 277 | (vk::ImageAspectFlagBits::eDepth | vk::ImageAspectFlagBits::eStencil)) { | 280 | (vk::ImageAspectFlagBits::eDepth | vk::ImageAspectFlagBits::eStencil)) { |
| 278 | vk::BufferImageCopy depth = copy; | 281 | vk::BufferImageCopy depth = copy; |
| @@ -421,7 +424,6 @@ void VKTextureCache::ImageCopy(Surface& src_surface, Surface& dst_surface, | |||
| 421 | dst_base_layer, num_layers, copy_params.dest_level, 1, vk::PipelineStageFlagBits::eTransfer, | 424 | dst_base_layer, num_layers, copy_params.dest_level, 1, vk::PipelineStageFlagBits::eTransfer, |
| 422 | vk::AccessFlagBits::eTransferWrite, vk::ImageLayout::eTransferDstOptimal); | 425 | vk::AccessFlagBits::eTransferWrite, vk::ImageLayout::eTransferDstOptimal); |
| 423 | 426 | ||
| 424 | const auto& dld{device.GetDispatchLoader()}; | ||
| 425 | const vk::ImageSubresourceLayers src_subresource( | 427 | const vk::ImageSubresourceLayers src_subresource( |
| 426 | src_surface->GetAspectMask(), copy_params.source_level, copy_params.source_z, num_layers); | 428 | src_surface->GetAspectMask(), copy_params.source_level, copy_params.source_z, num_layers); |
| 427 | const vk::ImageSubresourceLayers dst_subresource( | 429 | const vk::ImageSubresourceLayers dst_subresource( |
| @@ -457,7 +459,6 @@ void VKTextureCache::ImageBlit(View& src_view, View& dst_view, | |||
| 457 | dst_view->GetImageSubresourceLayers(), {dst_top_left, dst_bot_right}); | 459 | dst_view->GetImageSubresourceLayers(), {dst_top_left, dst_bot_right}); |
| 458 | const bool is_linear = copy_config.filter == Tegra::Engines::Fermi2D::Filter::Linear; | 460 | const bool is_linear = copy_config.filter == Tegra::Engines::Fermi2D::Filter::Linear; |
| 459 | 461 | ||
| 460 | const auto& dld{device.GetDispatchLoader()}; | ||
| 461 | scheduler.Record([src_image = src_view->GetImage(), dst_image = dst_view->GetImage(), blit, | 462 | scheduler.Record([src_image = src_view->GetImage(), dst_image = dst_view->GetImage(), blit, |
| 462 | is_linear](auto cmdbuf, auto& dld) { | 463 | is_linear](auto cmdbuf, auto& dld) { |
| 463 | cmdbuf.blitImage(src_image, vk::ImageLayout::eTransferSrcOptimal, dst_image, | 464 | cmdbuf.blitImage(src_image, vk::ImageLayout::eTransferSrcOptimal, dst_image, |
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index d3edbe80c..22e3d34de 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h | |||
| @@ -151,6 +151,10 @@ public: | |||
| 151 | return params.GetMipHeight(base_level); | 151 | return params.GetMipHeight(base_level); |
| 152 | } | 152 | } |
| 153 | 153 | ||
| 154 | u32 GetNumLayers() const { | ||
| 155 | return num_layers; | ||
| 156 | } | ||
| 157 | |||
| 154 | bool IsBufferView() const { | 158 | bool IsBufferView() const { |
| 155 | return buffer_view; | 159 | return buffer_view; |
| 156 | } | 160 | } |
diff --git a/src/video_core/shader/const_buffer_locker.cpp b/src/video_core/shader/const_buffer_locker.cpp deleted file mode 100644 index 0638be8cb..000000000 --- a/src/video_core/shader/const_buffer_locker.cpp +++ /dev/null | |||
| @@ -1,126 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <tuple> | ||
| 7 | |||
| 8 | #include "common/common_types.h" | ||
| 9 | #include "video_core/engines/maxwell_3d.h" | ||
| 10 | #include "video_core/engines/shader_type.h" | ||
| 11 | #include "video_core/shader/const_buffer_locker.h" | ||
| 12 | |||
| 13 | namespace VideoCommon::Shader { | ||
| 14 | |||
| 15 | using Tegra::Engines::SamplerDescriptor; | ||
| 16 | |||
| 17 | ConstBufferLocker::ConstBufferLocker(Tegra::Engines::ShaderType shader_stage) | ||
| 18 | : stage{shader_stage} {} | ||
| 19 | |||
| 20 | ConstBufferLocker::ConstBufferLocker(Tegra::Engines::ShaderType shader_stage, | ||
| 21 | Tegra::Engines::ConstBufferEngineInterface& engine) | ||
| 22 | : stage{shader_stage}, engine{&engine} {} | ||
| 23 | |||
| 24 | ConstBufferLocker::~ConstBufferLocker() = default; | ||
| 25 | |||
| 26 | std::optional<u32> ConstBufferLocker::ObtainKey(u32 buffer, u32 offset) { | ||
| 27 | const std::pair<u32, u32> key = {buffer, offset}; | ||
| 28 | const auto iter = keys.find(key); | ||
| 29 | if (iter != keys.end()) { | ||
| 30 | return iter->second; | ||
| 31 | } | ||
| 32 | if (!engine) { | ||
| 33 | return std::nullopt; | ||
| 34 | } | ||
| 35 | const u32 value = engine->AccessConstBuffer32(stage, buffer, offset); | ||
| 36 | keys.emplace(key, value); | ||
| 37 | return value; | ||
| 38 | } | ||
| 39 | |||
| 40 | std::optional<SamplerDescriptor> ConstBufferLocker::ObtainBoundSampler(u32 offset) { | ||
| 41 | const u32 key = offset; | ||
| 42 | const auto iter = bound_samplers.find(key); | ||
| 43 | if (iter != bound_samplers.end()) { | ||
| 44 | return iter->second; | ||
| 45 | } | ||
| 46 | if (!engine) { | ||
| 47 | return std::nullopt; | ||
| 48 | } | ||
| 49 | const SamplerDescriptor value = engine->AccessBoundSampler(stage, offset); | ||
| 50 | bound_samplers.emplace(key, value); | ||
| 51 | return value; | ||
| 52 | } | ||
| 53 | |||
| 54 | std::optional<Tegra::Engines::SamplerDescriptor> ConstBufferLocker::ObtainBindlessSampler( | ||
| 55 | u32 buffer, u32 offset) { | ||
| 56 | const std::pair key = {buffer, offset}; | ||
| 57 | const auto iter = bindless_samplers.find(key); | ||
| 58 | if (iter != bindless_samplers.end()) { | ||
| 59 | return iter->second; | ||
| 60 | } | ||
| 61 | if (!engine) { | ||
| 62 | return std::nullopt; | ||
| 63 | } | ||
| 64 | const SamplerDescriptor value = engine->AccessBindlessSampler(stage, buffer, offset); | ||
| 65 | bindless_samplers.emplace(key, value); | ||
| 66 | return value; | ||
| 67 | } | ||
| 68 | |||
| 69 | std::optional<u32> ConstBufferLocker::ObtainBoundBuffer() { | ||
| 70 | if (bound_buffer_saved) { | ||
| 71 | return bound_buffer; | ||
| 72 | } | ||
| 73 | if (!engine) { | ||
| 74 | return std::nullopt; | ||
| 75 | } | ||
| 76 | bound_buffer_saved = true; | ||
| 77 | bound_buffer = engine->GetBoundBuffer(); | ||
| 78 | return bound_buffer; | ||
| 79 | } | ||
| 80 | |||
| 81 | void ConstBufferLocker::InsertKey(u32 buffer, u32 offset, u32 value) { | ||
| 82 | keys.insert_or_assign({buffer, offset}, value); | ||
| 83 | } | ||
| 84 | |||
| 85 | void ConstBufferLocker::InsertBoundSampler(u32 offset, SamplerDescriptor sampler) { | ||
| 86 | bound_samplers.insert_or_assign(offset, sampler); | ||
| 87 | } | ||
| 88 | |||
| 89 | void ConstBufferLocker::InsertBindlessSampler(u32 buffer, u32 offset, SamplerDescriptor sampler) { | ||
| 90 | bindless_samplers.insert_or_assign({buffer, offset}, sampler); | ||
| 91 | } | ||
| 92 | |||
| 93 | void ConstBufferLocker::SetBoundBuffer(u32 buffer) { | ||
| 94 | bound_buffer_saved = true; | ||
| 95 | bound_buffer = buffer; | ||
| 96 | } | ||
| 97 | |||
| 98 | bool ConstBufferLocker::IsConsistent() const { | ||
| 99 | if (!engine) { | ||
| 100 | return false; | ||
| 101 | } | ||
| 102 | return std::all_of(keys.begin(), keys.end(), | ||
| 103 | [this](const auto& pair) { | ||
| 104 | const auto [cbuf, offset] = pair.first; | ||
| 105 | const auto value = pair.second; | ||
| 106 | return value == engine->AccessConstBuffer32(stage, cbuf, offset); | ||
| 107 | }) && | ||
| 108 | std::all_of(bound_samplers.begin(), bound_samplers.end(), | ||
| 109 | [this](const auto& sampler) { | ||
| 110 | const auto [key, value] = sampler; | ||
| 111 | return value == engine->AccessBoundSampler(stage, key); | ||
| 112 | }) && | ||
| 113 | std::all_of(bindless_samplers.begin(), bindless_samplers.end(), | ||
| 114 | [this](const auto& sampler) { | ||
| 115 | const auto [cbuf, offset] = sampler.first; | ||
| 116 | const auto value = sampler.second; | ||
| 117 | return value == engine->AccessBindlessSampler(stage, cbuf, offset); | ||
| 118 | }); | ||
| 119 | } | ||
| 120 | |||
| 121 | bool ConstBufferLocker::HasEqualKeys(const ConstBufferLocker& rhs) const { | ||
| 122 | return std::tie(keys, bound_samplers, bindless_samplers) == | ||
| 123 | std::tie(rhs.keys, rhs.bound_samplers, rhs.bindless_samplers); | ||
| 124 | } | ||
| 125 | |||
| 126 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/const_buffer_locker.h b/src/video_core/shader/const_buffer_locker.h deleted file mode 100644 index d3ea11087..000000000 --- a/src/video_core/shader/const_buffer_locker.h +++ /dev/null | |||
| @@ -1,103 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <optional> | ||
| 8 | #include <unordered_map> | ||
| 9 | #include "common/common_types.h" | ||
| 10 | #include "common/hash.h" | ||
| 11 | #include "video_core/engines/const_buffer_engine_interface.h" | ||
| 12 | #include "video_core/engines/shader_type.h" | ||
| 13 | #include "video_core/guest_driver.h" | ||
| 14 | |||
| 15 | namespace VideoCommon::Shader { | ||
| 16 | |||
| 17 | using KeyMap = std::unordered_map<std::pair<u32, u32>, u32, Common::PairHash>; | ||
| 18 | using BoundSamplerMap = std::unordered_map<u32, Tegra::Engines::SamplerDescriptor>; | ||
| 19 | using BindlessSamplerMap = | ||
| 20 | std::unordered_map<std::pair<u32, u32>, Tegra::Engines::SamplerDescriptor, Common::PairHash>; | ||
| 21 | |||
| 22 | /** | ||
| 23 | * The ConstBufferLocker is a class use to interface the 3D and compute engines with the shader | ||
| 24 | * compiler. with it, the shader can obtain required data from GPU state and store it for disk | ||
| 25 | * shader compilation. | ||
| 26 | */ | ||
| 27 | class ConstBufferLocker { | ||
| 28 | public: | ||
| 29 | explicit ConstBufferLocker(Tegra::Engines::ShaderType shader_stage); | ||
| 30 | |||
| 31 | explicit ConstBufferLocker(Tegra::Engines::ShaderType shader_stage, | ||
| 32 | Tegra::Engines::ConstBufferEngineInterface& engine); | ||
| 33 | |||
| 34 | ~ConstBufferLocker(); | ||
| 35 | |||
| 36 | /// Retrieves a key from the locker, if it's registered, it will give the registered value, if | ||
| 37 | /// not it will obtain it from maxwell3d and register it. | ||
| 38 | std::optional<u32> ObtainKey(u32 buffer, u32 offset); | ||
| 39 | |||
| 40 | std::optional<Tegra::Engines::SamplerDescriptor> ObtainBoundSampler(u32 offset); | ||
| 41 | |||
| 42 | std::optional<Tegra::Engines::SamplerDescriptor> ObtainBindlessSampler(u32 buffer, u32 offset); | ||
| 43 | |||
| 44 | std::optional<u32> ObtainBoundBuffer(); | ||
| 45 | |||
| 46 | /// Inserts a key. | ||
| 47 | void InsertKey(u32 buffer, u32 offset, u32 value); | ||
| 48 | |||
| 49 | /// Inserts a bound sampler key. | ||
| 50 | void InsertBoundSampler(u32 offset, Tegra::Engines::SamplerDescriptor sampler); | ||
| 51 | |||
| 52 | /// Inserts a bindless sampler key. | ||
| 53 | void InsertBindlessSampler(u32 buffer, u32 offset, Tegra::Engines::SamplerDescriptor sampler); | ||
| 54 | |||
| 55 | /// Set the bound buffer for this locker. | ||
| 56 | void SetBoundBuffer(u32 buffer); | ||
| 57 | |||
| 58 | /// Checks keys and samplers against engine's current const buffers. Returns true if they are | ||
| 59 | /// the same value, false otherwise; | ||
| 60 | bool IsConsistent() const; | ||
| 61 | |||
| 62 | /// Returns true if the keys are equal to the other ones in the locker. | ||
| 63 | bool HasEqualKeys(const ConstBufferLocker& rhs) const; | ||
| 64 | |||
| 65 | /// Gives an getter to the const buffer keys in the database. | ||
| 66 | const KeyMap& GetKeys() const { | ||
| 67 | return keys; | ||
| 68 | } | ||
| 69 | |||
| 70 | /// Gets samplers database. | ||
| 71 | const BoundSamplerMap& GetBoundSamplers() const { | ||
| 72 | return bound_samplers; | ||
| 73 | } | ||
| 74 | |||
| 75 | /// Gets bindless samplers database. | ||
| 76 | const BindlessSamplerMap& GetBindlessSamplers() const { | ||
| 77 | return bindless_samplers; | ||
| 78 | } | ||
| 79 | |||
| 80 | /// Gets bound buffer used on this shader | ||
| 81 | u32 GetBoundBuffer() const { | ||
| 82 | return bound_buffer; | ||
| 83 | } | ||
| 84 | |||
| 85 | /// Obtains access to the guest driver's profile. | ||
| 86 | VideoCore::GuestDriverProfile* AccessGuestDriverProfile() const { | ||
| 87 | if (engine) { | ||
| 88 | return &engine->AccessGuestDriverProfile(); | ||
| 89 | } | ||
| 90 | return nullptr; | ||
| 91 | } | ||
| 92 | |||
| 93 | private: | ||
| 94 | const Tegra::Engines::ShaderType stage; | ||
| 95 | Tegra::Engines::ConstBufferEngineInterface* engine = nullptr; | ||
| 96 | KeyMap keys; | ||
| 97 | BoundSamplerMap bound_samplers; | ||
| 98 | BindlessSamplerMap bindless_samplers; | ||
| 99 | bool bound_buffer_saved{}; | ||
| 100 | u32 bound_buffer{}; | ||
| 101 | }; | ||
| 102 | |||
| 103 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp index 0229733b6..2e2711350 100644 --- a/src/video_core/shader/control_flow.cpp +++ b/src/video_core/shader/control_flow.cpp | |||
| @@ -13,6 +13,7 @@ | |||
| 13 | #include "common/common_types.h" | 13 | #include "common/common_types.h" |
| 14 | #include "video_core/shader/ast.h" | 14 | #include "video_core/shader/ast.h" |
| 15 | #include "video_core/shader/control_flow.h" | 15 | #include "video_core/shader/control_flow.h" |
| 16 | #include "video_core/shader/registry.h" | ||
| 16 | #include "video_core/shader/shader_ir.h" | 17 | #include "video_core/shader/shader_ir.h" |
| 17 | 18 | ||
| 18 | namespace VideoCommon::Shader { | 19 | namespace VideoCommon::Shader { |
| @@ -64,11 +65,11 @@ struct BlockInfo { | |||
| 64 | }; | 65 | }; |
| 65 | 66 | ||
| 66 | struct CFGRebuildState { | 67 | struct CFGRebuildState { |
| 67 | explicit CFGRebuildState(const ProgramCode& program_code, u32 start, ConstBufferLocker& locker) | 68 | explicit CFGRebuildState(const ProgramCode& program_code, u32 start, Registry& registry) |
| 68 | : program_code{program_code}, locker{locker}, start{start} {} | 69 | : program_code{program_code}, registry{registry}, start{start} {} |
| 69 | 70 | ||
| 70 | const ProgramCode& program_code; | 71 | const ProgramCode& program_code; |
| 71 | ConstBufferLocker& locker; | 72 | Registry& registry; |
| 72 | u32 start{}; | 73 | u32 start{}; |
| 73 | std::vector<BlockInfo> block_info; | 74 | std::vector<BlockInfo> block_info; |
| 74 | std::list<u32> inspect_queries; | 75 | std::list<u32> inspect_queries; |
| @@ -438,7 +439,7 @@ std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address) | |||
| 438 | const s32 pc_target = offset + result.relative_position; | 439 | const s32 pc_target = offset + result.relative_position; |
| 439 | std::vector<CaseBranch> branches; | 440 | std::vector<CaseBranch> branches; |
| 440 | for (u32 i = 0; i < result.entries; i++) { | 441 | for (u32 i = 0; i < result.entries; i++) { |
| 441 | auto key = state.locker.ObtainKey(result.buffer, result.offset + i * 4); | 442 | auto key = state.registry.ObtainKey(result.buffer, result.offset + i * 4); |
| 442 | if (!key) { | 443 | if (!key) { |
| 443 | return {ParseResult::AbnormalFlow, parse_info}; | 444 | return {ParseResult::AbnormalFlow, parse_info}; |
| 444 | } | 445 | } |
| @@ -656,14 +657,14 @@ void DecompileShader(CFGRebuildState& state) { | |||
| 656 | 657 | ||
| 657 | std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 start_address, | 658 | std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 start_address, |
| 658 | const CompilerSettings& settings, | 659 | const CompilerSettings& settings, |
| 659 | ConstBufferLocker& locker) { | 660 | Registry& registry) { |
| 660 | auto result_out = std::make_unique<ShaderCharacteristics>(); | 661 | auto result_out = std::make_unique<ShaderCharacteristics>(); |
| 661 | if (settings.depth == CompileDepth::BruteForce) { | 662 | if (settings.depth == CompileDepth::BruteForce) { |
| 662 | result_out->settings.depth = CompileDepth::BruteForce; | 663 | result_out->settings.depth = CompileDepth::BruteForce; |
| 663 | return result_out; | 664 | return result_out; |
| 664 | } | 665 | } |
| 665 | 666 | ||
| 666 | CFGRebuildState state{program_code, start_address, locker}; | 667 | CFGRebuildState state{program_code, start_address, registry}; |
| 667 | // Inspect Code and generate blocks | 668 | // Inspect Code and generate blocks |
| 668 | state.labels.clear(); | 669 | state.labels.clear(); |
| 669 | state.labels.emplace(start_address); | 670 | state.labels.emplace(start_address); |
diff --git a/src/video_core/shader/control_flow.h b/src/video_core/shader/control_flow.h index 5304998b9..62a3510d8 100644 --- a/src/video_core/shader/control_flow.h +++ b/src/video_core/shader/control_flow.h | |||
| @@ -12,6 +12,7 @@ | |||
| 12 | #include "video_core/engines/shader_bytecode.h" | 12 | #include "video_core/engines/shader_bytecode.h" |
| 13 | #include "video_core/shader/ast.h" | 13 | #include "video_core/shader/ast.h" |
| 14 | #include "video_core/shader/compiler_settings.h" | 14 | #include "video_core/shader/compiler_settings.h" |
| 15 | #include "video_core/shader/registry.h" | ||
| 15 | #include "video_core/shader/shader_ir.h" | 16 | #include "video_core/shader/shader_ir.h" |
| 16 | 17 | ||
| 17 | namespace VideoCommon::Shader { | 18 | namespace VideoCommon::Shader { |
| @@ -111,6 +112,6 @@ struct ShaderCharacteristics { | |||
| 111 | 112 | ||
| 112 | std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 start_address, | 113 | std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 start_address, |
| 113 | const CompilerSettings& settings, | 114 | const CompilerSettings& settings, |
| 114 | ConstBufferLocker& locker); | 115 | Registry& registry); |
| 115 | 116 | ||
| 116 | } // namespace VideoCommon::Shader | 117 | } // namespace VideoCommon::Shader |
diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp index 6b697ed5d..87ac9ac6c 100644 --- a/src/video_core/shader/decode.cpp +++ b/src/video_core/shader/decode.cpp | |||
| @@ -34,13 +34,9 @@ constexpr bool IsSchedInstruction(u32 offset, u32 main_offset) { | |||
| 34 | return (absolute_offset % SchedPeriod) == 0; | 34 | return (absolute_offset % SchedPeriod) == 0; |
| 35 | } | 35 | } |
| 36 | 36 | ||
| 37 | void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile* gpu_driver, | 37 | void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile& gpu_driver, |
| 38 | const std::list<Sampler>& used_samplers) { | 38 | const std::list<Sampler>& used_samplers) { |
| 39 | if (gpu_driver == nullptr) { | 39 | if (gpu_driver.IsTextureHandlerSizeKnown() || used_samplers.size() <= 1) { |
| 40 | LOG_CRITICAL(HW_GPU, "GPU driver profile has not been created yet"); | ||
| 41 | return; | ||
| 42 | } | ||
| 43 | if (gpu_driver->TextureHandlerSizeKnown() || used_samplers.size() <= 1) { | ||
| 44 | return; | 40 | return; |
| 45 | } | 41 | } |
| 46 | u32 count{}; | 42 | u32 count{}; |
| @@ -53,17 +49,13 @@ void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile* gpu_driver, | |||
| 53 | bound_offsets.emplace_back(sampler.GetOffset()); | 49 | bound_offsets.emplace_back(sampler.GetOffset()); |
| 54 | } | 50 | } |
| 55 | if (count > 1) { | 51 | if (count > 1) { |
| 56 | gpu_driver->DeduceTextureHandlerSize(std::move(bound_offsets)); | 52 | gpu_driver.DeduceTextureHandlerSize(std::move(bound_offsets)); |
| 57 | } | 53 | } |
| 58 | } | 54 | } |
| 59 | 55 | ||
| 60 | std::optional<u32> TryDeduceSamplerSize(const Sampler& sampler_to_deduce, | 56 | std::optional<u32> TryDeduceSamplerSize(const Sampler& sampler_to_deduce, |
| 61 | VideoCore::GuestDriverProfile* gpu_driver, | 57 | VideoCore::GuestDriverProfile& gpu_driver, |
| 62 | const std::list<Sampler>& used_samplers) { | 58 | const std::list<Sampler>& used_samplers) { |
| 63 | if (gpu_driver == nullptr) { | ||
| 64 | LOG_CRITICAL(HW_GPU, "GPU Driver profile has not been created yet"); | ||
| 65 | return std::nullopt; | ||
| 66 | } | ||
| 67 | const u32 base_offset = sampler_to_deduce.GetOffset(); | 59 | const u32 base_offset = sampler_to_deduce.GetOffset(); |
| 68 | u32 max_offset{std::numeric_limits<u32>::max()}; | 60 | u32 max_offset{std::numeric_limits<u32>::max()}; |
| 69 | for (const auto& sampler : used_samplers) { | 61 | for (const auto& sampler : used_samplers) { |
| @@ -77,7 +69,7 @@ std::optional<u32> TryDeduceSamplerSize(const Sampler& sampler_to_deduce, | |||
| 77 | if (max_offset == std::numeric_limits<u32>::max()) { | 69 | if (max_offset == std::numeric_limits<u32>::max()) { |
| 78 | return std::nullopt; | 70 | return std::nullopt; |
| 79 | } | 71 | } |
| 80 | return ((max_offset - base_offset) * 4) / gpu_driver->GetTextureHandlerSize(); | 72 | return ((max_offset - base_offset) * 4) / gpu_driver.GetTextureHandlerSize(); |
| 81 | } | 73 | } |
| 82 | 74 | ||
| 83 | } // Anonymous namespace | 75 | } // Anonymous namespace |
| @@ -149,7 +141,7 @@ void ShaderIR::Decode() { | |||
| 149 | std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header)); | 141 | std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header)); |
| 150 | 142 | ||
| 151 | decompiled = false; | 143 | decompiled = false; |
| 152 | auto info = ScanFlow(program_code, main_offset, settings, locker); | 144 | auto info = ScanFlow(program_code, main_offset, settings, registry); |
| 153 | auto& shader_info = *info; | 145 | auto& shader_info = *info; |
| 154 | coverage_begin = shader_info.start; | 146 | coverage_begin = shader_info.start; |
| 155 | coverage_end = shader_info.end; | 147 | coverage_end = shader_info.end; |
| @@ -364,7 +356,7 @@ u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) { | |||
| 364 | 356 | ||
| 365 | void ShaderIR::PostDecode() { | 357 | void ShaderIR::PostDecode() { |
| 366 | // Deduce texture handler size if needed | 358 | // Deduce texture handler size if needed |
| 367 | auto gpu_driver = locker.AccessGuestDriverProfile(); | 359 | auto gpu_driver = registry.AccessGuestDriverProfile(); |
| 368 | DeduceTextureHandlerSize(gpu_driver, used_samplers); | 360 | DeduceTextureHandlerSize(gpu_driver, used_samplers); |
| 369 | // Deduce Indexed Samplers | 361 | // Deduce Indexed Samplers |
| 370 | if (!uses_indexed_samplers) { | 362 | if (!uses_indexed_samplers) { |
diff --git a/src/video_core/shader/decode/arithmetic.cpp b/src/video_core/shader/decode/arithmetic.cpp index 90240c765..478394682 100644 --- a/src/video_core/shader/decode/arithmetic.cpp +++ b/src/video_core/shader/decode/arithmetic.cpp | |||
| @@ -53,29 +53,24 @@ u32 ShaderIR::DecodeArithmetic(NodeBlock& bb, u32 pc) { | |||
| 53 | 53 | ||
| 54 | op_b = GetOperandAbsNegFloat(op_b, false, instr.fmul.negate_b); | 54 | op_b = GetOperandAbsNegFloat(op_b, false, instr.fmul.negate_b); |
| 55 | 55 | ||
| 56 | // TODO(Rodrigo): Should precise be used when there's a postfactor? | 56 | static constexpr std::array FmulPostFactor = { |
| 57 | Node value = Operation(OperationCode::FMul, PRECISE, op_a, op_b); | 57 | 1.000f, // None |
| 58 | 0.500f, // Divide 2 | ||
| 59 | 0.250f, // Divide 4 | ||
| 60 | 0.125f, // Divide 8 | ||
| 61 | 8.000f, // Mul 8 | ||
| 62 | 4.000f, // Mul 4 | ||
| 63 | 2.000f, // Mul 2 | ||
| 64 | }; | ||
| 58 | 65 | ||
| 59 | if (instr.fmul.postfactor != 0) { | 66 | if (instr.fmul.postfactor != 0) { |
| 60 | auto postfactor = static_cast<s32>(instr.fmul.postfactor); | 67 | op_a = Operation(OperationCode::FMul, NO_PRECISE, op_a, |
| 61 | 68 | Immediate(FmulPostFactor[instr.fmul.postfactor])); | |
| 62 | // Postfactor encoded as 3-bit 1's complement in instruction, interpreted with below | ||
| 63 | // logic. | ||
| 64 | if (postfactor >= 4) { | ||
| 65 | postfactor = 7 - postfactor; | ||
| 66 | } else { | ||
| 67 | postfactor = 0 - postfactor; | ||
| 68 | } | ||
| 69 | |||
| 70 | if (postfactor > 0) { | ||
| 71 | value = Operation(OperationCode::FMul, NO_PRECISE, value, | ||
| 72 | Immediate(static_cast<f32>(1 << postfactor))); | ||
| 73 | } else { | ||
| 74 | value = Operation(OperationCode::FDiv, NO_PRECISE, value, | ||
| 75 | Immediate(static_cast<f32>(1 << -postfactor))); | ||
| 76 | } | ||
| 77 | } | 69 | } |
| 78 | 70 | ||
| 71 | // TODO(Rodrigo): Should precise be used when there's a postfactor? | ||
| 72 | Node value = Operation(OperationCode::FMul, PRECISE, op_a, op_b); | ||
| 73 | |||
| 79 | value = GetSaturatedFloat(value, instr.alu.saturate_d); | 74 | value = GetSaturatedFloat(value, instr.alu.saturate_d); |
| 80 | 75 | ||
| 81 | SetInternalFlagsFromFloat(bb, value, instr.generates_cc); | 76 | SetInternalFlagsFromFloat(bb, value, instr.generates_cc); |
diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp index 21366869d..2fe787d6f 100644 --- a/src/video_core/shader/decode/arithmetic_integer.cpp +++ b/src/video_core/shader/decode/arithmetic_integer.cpp | |||
| @@ -293,44 +293,66 @@ u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) { | |||
| 293 | 293 | ||
| 294 | void ShaderIR::WriteLop3Instruction(NodeBlock& bb, Register dest, Node op_a, Node op_b, Node op_c, | 294 | void ShaderIR::WriteLop3Instruction(NodeBlock& bb, Register dest, Node op_a, Node op_b, Node op_c, |
| 295 | Node imm_lut, bool sets_cc) { | 295 | Node imm_lut, bool sets_cc) { |
| 296 | constexpr u32 lop_iterations = 32; | 296 | const Node lop3_fast = [&](const Node na, const Node nb, const Node nc, const Node ttbl) { |
| 297 | const Node one = Immediate(1); | 297 | Node value = Immediate(0); |
| 298 | const Node two = Immediate(2); | 298 | const ImmediateNode imm = std::get<ImmediateNode>(*ttbl); |
| 299 | 299 | if (imm.GetValue() & 0x01) { | |
| 300 | Node value; | 300 | const Node a = Operation(OperationCode::IBitwiseNot, na); |
| 301 | for (u32 i = 0; i < lop_iterations; ++i) { | 301 | const Node b = Operation(OperationCode::IBitwiseNot, nb); |
| 302 | const Node shift_amount = Immediate(i); | 302 | const Node c = Operation(OperationCode::IBitwiseNot, nc); |
| 303 | 303 | Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, b); | |
| 304 | const Node a = Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, op_c, shift_amount); | 304 | r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, c); |
| 305 | const Node pack_0 = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, one); | 305 | value = Operation(OperationCode::IBitwiseOr, value, r); |
| 306 | |||
| 307 | const Node b = Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, op_b, shift_amount); | ||
| 308 | const Node c = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, b, one); | ||
| 309 | const Node pack_1 = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, c, one); | ||
| 310 | |||
| 311 | const Node d = Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, op_a, shift_amount); | ||
| 312 | const Node e = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, d, one); | ||
| 313 | const Node pack_2 = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, e, two); | ||
| 314 | |||
| 315 | const Node pack_01 = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, pack_0, pack_1); | ||
| 316 | const Node pack_012 = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, pack_01, pack_2); | ||
| 317 | |||
| 318 | const Node shifted_bit = | ||
| 319 | Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, imm_lut, pack_012); | ||
| 320 | const Node bit = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, shifted_bit, one); | ||
| 321 | |||
| 322 | const Node right = | ||
| 323 | Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, bit, shift_amount); | ||
| 324 | |||
| 325 | if (i > 0) { | ||
| 326 | value = Operation(OperationCode::IBitwiseOr, NO_PRECISE, value, right); | ||
| 327 | } else { | ||
| 328 | value = right; | ||
| 329 | } | 306 | } |
| 330 | } | 307 | if (imm.GetValue() & 0x02) { |
| 308 | const Node a = Operation(OperationCode::IBitwiseNot, na); | ||
| 309 | const Node b = Operation(OperationCode::IBitwiseNot, nb); | ||
| 310 | Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, b); | ||
| 311 | r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, nc); | ||
| 312 | value = Operation(OperationCode::IBitwiseOr, value, r); | ||
| 313 | } | ||
| 314 | if (imm.GetValue() & 0x04) { | ||
| 315 | const Node a = Operation(OperationCode::IBitwiseNot, na); | ||
| 316 | const Node c = Operation(OperationCode::IBitwiseNot, nc); | ||
| 317 | Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, nb); | ||
| 318 | r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, c); | ||
| 319 | value = Operation(OperationCode::IBitwiseOr, value, r); | ||
| 320 | } | ||
| 321 | if (imm.GetValue() & 0x08) { | ||
| 322 | const Node a = Operation(OperationCode::IBitwiseNot, na); | ||
| 323 | Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, nb); | ||
| 324 | r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, nc); | ||
| 325 | value = Operation(OperationCode::IBitwiseOr, value, r); | ||
| 326 | } | ||
| 327 | if (imm.GetValue() & 0x10) { | ||
| 328 | const Node b = Operation(OperationCode::IBitwiseNot, nb); | ||
| 329 | const Node c = Operation(OperationCode::IBitwiseNot, nc); | ||
| 330 | Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, na, b); | ||
| 331 | r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, c); | ||
| 332 | value = Operation(OperationCode::IBitwiseOr, value, r); | ||
| 333 | } | ||
| 334 | if (imm.GetValue() & 0x20) { | ||
| 335 | const Node b = Operation(OperationCode::IBitwiseNot, nb); | ||
| 336 | Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, na, b); | ||
| 337 | r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, nc); | ||
| 338 | value = Operation(OperationCode::IBitwiseOr, value, r); | ||
| 339 | } | ||
| 340 | if (imm.GetValue() & 0x40) { | ||
| 341 | const Node c = Operation(OperationCode::IBitwiseNot, nc); | ||
| 342 | Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, na, nb); | ||
| 343 | r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, c); | ||
| 344 | value = Operation(OperationCode::IBitwiseOr, value, r); | ||
| 345 | } | ||
| 346 | if (imm.GetValue() & 0x80) { | ||
| 347 | Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, na, nb); | ||
| 348 | r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, nc); | ||
| 349 | value = Operation(OperationCode::IBitwiseOr, value, r); | ||
| 350 | } | ||
| 351 | return value; | ||
| 352 | }(op_a, op_b, op_c, imm_lut); | ||
| 331 | 353 | ||
| 332 | SetInternalFlagsFromInteger(bb, value, sets_cc); | 354 | SetInternalFlagsFromInteger(bb, lop3_fast, sets_cc); |
| 333 | SetRegister(bb, dest, value); | 355 | SetRegister(bb, dest, lop3_fast); |
| 334 | } | 356 | } |
| 335 | 357 | ||
| 336 | } // namespace VideoCommon::Shader | 358 | } // namespace VideoCommon::Shader |
diff --git a/src/video_core/shader/decode/bfe.cpp b/src/video_core/shader/decode/bfe.cpp index e02bcd097..8e3b46e8e 100644 --- a/src/video_core/shader/decode/bfe.cpp +++ b/src/video_core/shader/decode/bfe.cpp | |||
| @@ -17,33 +17,60 @@ u32 ShaderIR::DecodeBfe(NodeBlock& bb, u32 pc) { | |||
| 17 | const Instruction instr = {program_code[pc]}; | 17 | const Instruction instr = {program_code[pc]}; |
| 18 | const auto opcode = OpCode::Decode(instr); | 18 | const auto opcode = OpCode::Decode(instr); |
| 19 | 19 | ||
| 20 | UNIMPLEMENTED_IF(instr.bfe.negate_b); | ||
| 21 | |||
| 22 | Node op_a = GetRegister(instr.gpr8); | 20 | Node op_a = GetRegister(instr.gpr8); |
| 23 | op_a = GetOperandAbsNegInteger(op_a, false, instr.bfe.negate_a, false); | 21 | Node op_b = [&] { |
| 24 | 22 | switch (opcode->get().GetId()) { | |
| 25 | switch (opcode->get().GetId()) { | 23 | case OpCode::Id::BFE_R: |
| 26 | case OpCode::Id::BFE_IMM: { | 24 | return GetRegister(instr.gpr20); |
| 27 | UNIMPLEMENTED_IF_MSG(instr.generates_cc, | 25 | case OpCode::Id::BFE_C: |
| 28 | "Condition codes generation in BFE is not implemented"); | 26 | return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); |
| 27 | case OpCode::Id::BFE_IMM: | ||
| 28 | return Immediate(instr.alu.GetSignedImm20_20()); | ||
| 29 | default: | ||
| 30 | UNREACHABLE(); | ||
| 31 | return Immediate(0); | ||
| 32 | } | ||
| 33 | }(); | ||
| 29 | 34 | ||
| 30 | const Node inner_shift_imm = Immediate(static_cast<u32>(instr.bfe.GetLeftShiftValue())); | 35 | UNIMPLEMENTED_IF_MSG(instr.bfe.rd_cc, "Condition codes in BFE is not implemented"); |
| 31 | const Node outer_shift_imm = | ||
| 32 | Immediate(static_cast<u32>(instr.bfe.GetLeftShiftValue() + instr.bfe.shift_position)); | ||
| 33 | 36 | ||
| 34 | const Node inner_shift = | 37 | const bool is_signed = instr.bfe.is_signed; |
| 35 | Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, op_a, inner_shift_imm); | ||
| 36 | const Node outer_shift = | ||
| 37 | Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, inner_shift, outer_shift_imm); | ||
| 38 | 38 | ||
| 39 | SetInternalFlagsFromInteger(bb, outer_shift, instr.generates_cc); | 39 | // using reverse parallel method in |
| 40 | SetRegister(bb, instr.gpr0, outer_shift); | 40 | // https://graphics.stanford.edu/~seander/bithacks.html#ReverseParallel |
| 41 | break; | 41 | // note for later if possible to implement faster method. |
| 42 | } | 42 | if (instr.bfe.brev) { |
| 43 | default: | 43 | const auto swap = [&](u32 s, u32 mask) { |
| 44 | UNIMPLEMENTED_MSG("Unhandled BFE instruction: {}", opcode->get().GetName()); | 44 | Node v1 = |
| 45 | SignedOperation(OperationCode::ILogicalShiftRight, is_signed, op_a, Immediate(s)); | ||
| 46 | if (mask != 0) { | ||
| 47 | v1 = SignedOperation(OperationCode::IBitwiseAnd, is_signed, std::move(v1), | ||
| 48 | Immediate(mask)); | ||
| 49 | } | ||
| 50 | Node v2 = op_a; | ||
| 51 | if (mask != 0) { | ||
| 52 | v2 = SignedOperation(OperationCode::IBitwiseAnd, is_signed, std::move(v2), | ||
| 53 | Immediate(mask)); | ||
| 54 | } | ||
| 55 | v2 = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, std::move(v2), | ||
| 56 | Immediate(s)); | ||
| 57 | return SignedOperation(OperationCode::IBitwiseOr, is_signed, std::move(v1), | ||
| 58 | std::move(v2)); | ||
| 59 | }; | ||
| 60 | op_a = swap(1, 0x55555555U); | ||
| 61 | op_a = swap(2, 0x33333333U); | ||
| 62 | op_a = swap(4, 0x0F0F0F0FU); | ||
| 63 | op_a = swap(8, 0x00FF00FFU); | ||
| 64 | op_a = swap(16, 0); | ||
| 45 | } | 65 | } |
| 46 | 66 | ||
| 67 | const auto offset = SignedOperation(OperationCode::IBitfieldExtract, is_signed, op_b, | ||
| 68 | Immediate(0), Immediate(8)); | ||
| 69 | const auto bits = SignedOperation(OperationCode::IBitfieldExtract, is_signed, op_b, | ||
| 70 | Immediate(8), Immediate(8)); | ||
| 71 | auto result = SignedOperation(OperationCode::IBitfieldExtract, is_signed, op_a, offset, bits); | ||
| 72 | SetRegister(bb, instr.gpr0, std::move(result)); | ||
| 73 | |||
| 47 | return pc; | 74 | return pc; |
| 48 | } | 75 | } |
| 49 | 76 | ||
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp index bee7d8cad..48350e042 100644 --- a/src/video_core/shader/decode/texture.cpp +++ b/src/video_core/shader/decode/texture.cpp | |||
| @@ -12,6 +12,7 @@ | |||
| 12 | #include "common/logging/log.h" | 12 | #include "common/logging/log.h" |
| 13 | #include "video_core/engines/shader_bytecode.h" | 13 | #include "video_core/engines/shader_bytecode.h" |
| 14 | #include "video_core/shader/node_helper.h" | 14 | #include "video_core/shader/node_helper.h" |
| 15 | #include "video_core/shader/registry.h" | ||
| 15 | #include "video_core/shader/shader_ir.h" | 16 | #include "video_core/shader/shader_ir.h" |
| 16 | 17 | ||
| 17 | namespace VideoCommon::Shader { | 18 | namespace VideoCommon::Shader { |
| @@ -359,8 +360,8 @@ ShaderIR::SamplerInfo ShaderIR::GetSamplerInfo(std::optional<SamplerInfo> sample | |||
| 359 | if (sampler_info) { | 360 | if (sampler_info) { |
| 360 | return *sampler_info; | 361 | return *sampler_info; |
| 361 | } | 362 | } |
| 362 | const auto sampler = | 363 | const auto sampler = buffer ? registry.ObtainBindlessSampler(*buffer, offset) |
| 363 | buffer ? locker.ObtainBindlessSampler(*buffer, offset) : locker.ObtainBoundSampler(offset); | 364 | : registry.ObtainBoundSampler(offset); |
| 364 | if (!sampler) { | 365 | if (!sampler) { |
| 365 | LOG_WARNING(HW_GPU, "Unknown sampler info"); | 366 | LOG_WARNING(HW_GPU, "Unknown sampler info"); |
| 366 | return SamplerInfo{TextureType::Texture2D, false, false, false}; | 367 | return SamplerInfo{TextureType::Texture2D, false, false, false}; |
diff --git a/src/video_core/shader/decode/xmad.cpp b/src/video_core/shader/decode/xmad.cpp index 206961909..6191ffba1 100644 --- a/src/video_core/shader/decode/xmad.cpp +++ b/src/video_core/shader/decode/xmad.cpp | |||
| @@ -12,6 +12,7 @@ namespace VideoCommon::Shader { | |||
| 12 | 12 | ||
| 13 | using Tegra::Shader::Instruction; | 13 | using Tegra::Shader::Instruction; |
| 14 | using Tegra::Shader::OpCode; | 14 | using Tegra::Shader::OpCode; |
| 15 | using Tegra::Shader::PredCondition; | ||
| 15 | 16 | ||
| 16 | u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) { | 17 | u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) { |
| 17 | const Instruction instr = {program_code[pc]}; | 18 | const Instruction instr = {program_code[pc]}; |
| @@ -30,7 +31,7 @@ u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) { | |||
| 30 | const bool is_signed_b = instr.xmad.sign_b == 1; | 31 | const bool is_signed_b = instr.xmad.sign_b == 1; |
| 31 | const bool is_signed_c = is_signed_a; | 32 | const bool is_signed_c = is_signed_a; |
| 32 | 33 | ||
| 33 | auto [is_merge, is_psl, is_high_b, mode, op_b, | 34 | auto [is_merge, is_psl, is_high_b, mode, op_b_binding, |
| 34 | op_c] = [&]() -> std::tuple<bool, bool, bool, Tegra::Shader::XmadMode, Node, Node> { | 35 | op_c] = [&]() -> std::tuple<bool, bool, bool, Tegra::Shader::XmadMode, Node, Node> { |
| 35 | switch (opcode->get().GetId()) { | 36 | switch (opcode->get().GetId()) { |
| 36 | case OpCode::Id::XMAD_CR: | 37 | case OpCode::Id::XMAD_CR: |
| @@ -63,15 +64,19 @@ u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) { | |||
| 63 | } | 64 | } |
| 64 | }(); | 65 | }(); |
| 65 | 66 | ||
| 66 | op_a = BitfieldExtract(op_a, instr.xmad.high_a ? 16 : 0, 16); | 67 | op_a = SignedOperation(OperationCode::IBitfieldExtract, is_signed_a, std::move(op_a), |
| 68 | instr.xmad.high_a ? Immediate(16) : Immediate(0), Immediate(16)); | ||
| 67 | 69 | ||
| 68 | const Node original_b = op_b; | 70 | const Node original_b = op_b_binding; |
| 69 | op_b = BitfieldExtract(op_b, is_high_b ? 16 : 0, 16); | 71 | const Node op_b = |
| 72 | SignedOperation(OperationCode::IBitfieldExtract, is_signed_b, std::move(op_b_binding), | ||
| 73 | is_high_b ? Immediate(16) : Immediate(0), Immediate(16)); | ||
| 70 | 74 | ||
| 71 | // TODO(Rodrigo): Use an appropiate sign for this operation | 75 | // we already check sign_a and sign_b is difference or not before so just use one in here. |
| 72 | Node product = Operation(OperationCode::IMul, NO_PRECISE, op_a, op_b); | 76 | Node product = SignedOperation(OperationCode::IMul, is_signed_a, op_a, op_b); |
| 73 | if (is_psl) { | 77 | if (is_psl) { |
| 74 | product = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, product, Immediate(16)); | 78 | product = |
| 79 | SignedOperation(OperationCode::ILogicalShiftLeft, is_signed_a, product, Immediate(16)); | ||
| 75 | } | 80 | } |
| 76 | SetTemporary(bb, 0, product); | 81 | SetTemporary(bb, 0, product); |
| 77 | product = GetTemporary(0); | 82 | product = GetTemporary(0); |
| @@ -88,12 +93,40 @@ u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) { | |||
| 88 | return BitfieldExtract(original_c, 16, 16); | 93 | return BitfieldExtract(original_c, 16, 16); |
| 89 | case Tegra::Shader::XmadMode::CBcc: { | 94 | case Tegra::Shader::XmadMode::CBcc: { |
| 90 | const Node shifted_b = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed_b, | 95 | const Node shifted_b = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed_b, |
| 91 | NO_PRECISE, original_b, Immediate(16)); | 96 | original_b, Immediate(16)); |
| 92 | return SignedOperation(OperationCode::IAdd, is_signed_c, NO_PRECISE, original_c, | 97 | return SignedOperation(OperationCode::IAdd, is_signed_c, original_c, shifted_b); |
| 93 | shifted_b); | 98 | } |
| 99 | case Tegra::Shader::XmadMode::CSfu: { | ||
| 100 | const Node comp_a = GetPredicateComparisonInteger(PredCondition::Equal, is_signed_a, | ||
| 101 | op_a, Immediate(0)); | ||
| 102 | const Node comp_b = GetPredicateComparisonInteger(PredCondition::Equal, is_signed_b, | ||
| 103 | op_b, Immediate(0)); | ||
| 104 | const Node comp = Operation(OperationCode::LogicalOr, comp_a, comp_b); | ||
| 105 | |||
| 106 | const Node comp_minus_a = GetPredicateComparisonInteger( | ||
| 107 | PredCondition::NotEqual, is_signed_a, | ||
| 108 | SignedOperation(OperationCode::IBitwiseAnd, is_signed_a, op_a, | ||
| 109 | Immediate(0x80000000)), | ||
| 110 | Immediate(0)); | ||
| 111 | const Node comp_minus_b = GetPredicateComparisonInteger( | ||
| 112 | PredCondition::NotEqual, is_signed_b, | ||
| 113 | SignedOperation(OperationCode::IBitwiseAnd, is_signed_b, op_b, | ||
| 114 | Immediate(0x80000000)), | ||
| 115 | Immediate(0)); | ||
| 116 | |||
| 117 | Node new_c = Operation( | ||
| 118 | OperationCode::Select, comp_minus_a, | ||
| 119 | SignedOperation(OperationCode::IAdd, is_signed_c, original_c, Immediate(-65536)), | ||
| 120 | original_c); | ||
| 121 | new_c = Operation( | ||
| 122 | OperationCode::Select, comp_minus_b, | ||
| 123 | SignedOperation(OperationCode::IAdd, is_signed_c, new_c, Immediate(-65536)), | ||
| 124 | std::move(new_c)); | ||
| 125 | |||
| 126 | return Operation(OperationCode::Select, comp, original_c, std::move(new_c)); | ||
| 94 | } | 127 | } |
| 95 | default: | 128 | default: |
| 96 | UNIMPLEMENTED_MSG("Unhandled XMAD mode: {}", static_cast<u32>(instr.xmad.mode.Value())); | 129 | UNREACHABLE(); |
| 97 | return Immediate(0); | 130 | return Immediate(0); |
| 98 | } | 131 | } |
| 99 | }(); | 132 | }(); |
| @@ -102,18 +135,19 @@ u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) { | |||
| 102 | op_c = GetTemporary(1); | 135 | op_c = GetTemporary(1); |
| 103 | 136 | ||
| 104 | // TODO(Rodrigo): Use an appropiate sign for this operation | 137 | // TODO(Rodrigo): Use an appropiate sign for this operation |
| 105 | Node sum = Operation(OperationCode::IAdd, product, op_c); | 138 | Node sum = SignedOperation(OperationCode::IAdd, is_signed_a, product, std::move(op_c)); |
| 106 | SetTemporary(bb, 2, sum); | 139 | SetTemporary(bb, 2, sum); |
| 107 | sum = GetTemporary(2); | 140 | sum = GetTemporary(2); |
| 108 | if (is_merge) { | 141 | if (is_merge) { |
| 109 | const Node a = BitfieldExtract(sum, 0, 16); | 142 | const Node a = SignedOperation(OperationCode::IBitfieldExtract, is_signed_a, std::move(sum), |
| 110 | const Node b = | 143 | Immediate(0), Immediate(16)); |
| 111 | Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, original_b, Immediate(16)); | 144 | const Node b = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed_b, original_b, |
| 112 | sum = Operation(OperationCode::IBitwiseOr, NO_PRECISE, a, b); | 145 | Immediate(16)); |
| 146 | sum = SignedOperation(OperationCode::IBitwiseOr, is_signed_a, a, b); | ||
| 113 | } | 147 | } |
| 114 | 148 | ||
| 115 | SetInternalFlagsFromInteger(bb, sum, instr.generates_cc); | 149 | SetInternalFlagsFromInteger(bb, sum, instr.generates_cc); |
| 116 | SetRegister(bb, instr.gpr0, sum); | 150 | SetRegister(bb, instr.gpr0, std::move(sum)); |
| 117 | 151 | ||
| 118 | return pc; | 152 | return pc; |
| 119 | } | 153 | } |
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h index a0a7b9111..a1828546e 100644 --- a/src/video_core/shader/node.h +++ b/src/video_core/shader/node.h | |||
| @@ -299,7 +299,7 @@ private: | |||
| 299 | u32 index{}; ///< Emulated index given for the this sampler. | 299 | u32 index{}; ///< Emulated index given for the this sampler. |
| 300 | u32 offset{}; ///< Offset in the const buffer from where the sampler is being read. | 300 | u32 offset{}; ///< Offset in the const buffer from where the sampler is being read. |
| 301 | u32 buffer{}; ///< Buffer where the bindless sampler is being read (unused on bound samplers). | 301 | u32 buffer{}; ///< Buffer where the bindless sampler is being read (unused on bound samplers). |
| 302 | u32 size{}; ///< Size of the sampler if indexed. | 302 | u32 size{1}; ///< Size of the sampler. |
| 303 | 303 | ||
| 304 | Tegra::Shader::TextureType type{}; ///< The type used to sample this texture (Texture2D, etc) | 304 | Tegra::Shader::TextureType type{}; ///< The type used to sample this texture (Texture2D, etc) |
| 305 | bool is_array{}; ///< Whether the texture is being sampled as an array texture or not. | 305 | bool is_array{}; ///< Whether the texture is being sampled as an array texture or not. |
diff --git a/src/video_core/shader/node_helper.cpp b/src/video_core/shader/node_helper.cpp index b3dcd291c..76c56abb5 100644 --- a/src/video_core/shader/node_helper.cpp +++ b/src/video_core/shader/node_helper.cpp | |||
| @@ -68,6 +68,8 @@ OperationCode SignedToUnsignedCode(OperationCode operation_code, bool is_signed) | |||
| 68 | return OperationCode::UBitwiseXor; | 68 | return OperationCode::UBitwiseXor; |
| 69 | case OperationCode::IBitwiseNot: | 69 | case OperationCode::IBitwiseNot: |
| 70 | return OperationCode::UBitwiseNot; | 70 | return OperationCode::UBitwiseNot; |
| 71 | case OperationCode::IBitfieldExtract: | ||
| 72 | return OperationCode::UBitfieldExtract; | ||
| 71 | case OperationCode::IBitfieldInsert: | 73 | case OperationCode::IBitfieldInsert: |
| 72 | return OperationCode::UBitfieldInsert; | 74 | return OperationCode::UBitfieldInsert; |
| 73 | case OperationCode::IBitCount: | 75 | case OperationCode::IBitCount: |
diff --git a/src/video_core/shader/registry.cpp b/src/video_core/shader/registry.cpp new file mode 100644 index 000000000..af70b3f35 --- /dev/null +++ b/src/video_core/shader/registry.cpp | |||
| @@ -0,0 +1,161 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <tuple> | ||
| 7 | |||
| 8 | #include "common/assert.h" | ||
| 9 | #include "common/common_types.h" | ||
| 10 | #include "video_core/engines/kepler_compute.h" | ||
| 11 | #include "video_core/engines/maxwell_3d.h" | ||
| 12 | #include "video_core/engines/shader_type.h" | ||
| 13 | #include "video_core/shader/registry.h" | ||
| 14 | |||
| 15 | namespace VideoCommon::Shader { | ||
| 16 | |||
| 17 | using Tegra::Engines::ConstBufferEngineInterface; | ||
| 18 | using Tegra::Engines::SamplerDescriptor; | ||
| 19 | using Tegra::Engines::ShaderType; | ||
| 20 | |||
| 21 | namespace { | ||
| 22 | |||
| 23 | GraphicsInfo MakeGraphicsInfo(ShaderType shader_stage, ConstBufferEngineInterface& engine) { | ||
| 24 | if (shader_stage == ShaderType::Compute) { | ||
| 25 | return {}; | ||
| 26 | } | ||
| 27 | auto& graphics = static_cast<Tegra::Engines::Maxwell3D&>(engine); | ||
| 28 | |||
| 29 | GraphicsInfo info; | ||
| 30 | info.tfb_layouts = graphics.regs.tfb_layouts; | ||
| 31 | info.tfb_varying_locs = graphics.regs.tfb_varying_locs; | ||
| 32 | info.primitive_topology = graphics.regs.draw.topology; | ||
| 33 | info.tessellation_primitive = graphics.regs.tess_mode.prim; | ||
| 34 | info.tessellation_spacing = graphics.regs.tess_mode.spacing; | ||
| 35 | info.tfb_enabled = graphics.regs.tfb_enabled; | ||
| 36 | info.tessellation_clockwise = graphics.regs.tess_mode.cw; | ||
| 37 | return info; | ||
| 38 | } | ||
| 39 | |||
| 40 | ComputeInfo MakeComputeInfo(ShaderType shader_stage, ConstBufferEngineInterface& engine) { | ||
| 41 | if (shader_stage != ShaderType::Compute) { | ||
| 42 | return {}; | ||
| 43 | } | ||
| 44 | auto& compute = static_cast<Tegra::Engines::KeplerCompute&>(engine); | ||
| 45 | const auto& launch = compute.launch_description; | ||
| 46 | |||
| 47 | ComputeInfo info; | ||
| 48 | info.workgroup_size = {launch.block_dim_x, launch.block_dim_y, launch.block_dim_z}; | ||
| 49 | info.local_memory_size_in_words = launch.local_pos_alloc; | ||
| 50 | info.shared_memory_size_in_words = launch.shared_alloc; | ||
| 51 | return info; | ||
| 52 | } | ||
| 53 | |||
| 54 | } // Anonymous namespace | ||
| 55 | |||
| 56 | Registry::Registry(Tegra::Engines::ShaderType shader_stage, const SerializedRegistryInfo& info) | ||
| 57 | : stage{shader_stage}, stored_guest_driver_profile{info.guest_driver_profile}, | ||
| 58 | bound_buffer{info.bound_buffer}, graphics_info{info.graphics}, compute_info{info.compute} {} | ||
| 59 | |||
| 60 | Registry::Registry(Tegra::Engines::ShaderType shader_stage, | ||
| 61 | Tegra::Engines::ConstBufferEngineInterface& engine) | ||
| 62 | : stage{shader_stage}, engine{&engine}, bound_buffer{engine.GetBoundBuffer()}, | ||
| 63 | graphics_info{MakeGraphicsInfo(shader_stage, engine)}, compute_info{MakeComputeInfo( | ||
| 64 | shader_stage, engine)} {} | ||
| 65 | |||
| 66 | Registry::~Registry() = default; | ||
| 67 | |||
| 68 | std::optional<u32> Registry::ObtainKey(u32 buffer, u32 offset) { | ||
| 69 | const std::pair<u32, u32> key = {buffer, offset}; | ||
| 70 | const auto iter = keys.find(key); | ||
| 71 | if (iter != keys.end()) { | ||
| 72 | return iter->second; | ||
| 73 | } | ||
| 74 | if (!engine) { | ||
| 75 | return std::nullopt; | ||
| 76 | } | ||
| 77 | const u32 value = engine->AccessConstBuffer32(stage, buffer, offset); | ||
| 78 | keys.emplace(key, value); | ||
| 79 | return value; | ||
| 80 | } | ||
| 81 | |||
| 82 | std::optional<SamplerDescriptor> Registry::ObtainBoundSampler(u32 offset) { | ||
| 83 | const u32 key = offset; | ||
| 84 | const auto iter = bound_samplers.find(key); | ||
| 85 | if (iter != bound_samplers.end()) { | ||
| 86 | return iter->second; | ||
| 87 | } | ||
| 88 | if (!engine) { | ||
| 89 | return std::nullopt; | ||
| 90 | } | ||
| 91 | const SamplerDescriptor value = engine->AccessBoundSampler(stage, offset); | ||
| 92 | bound_samplers.emplace(key, value); | ||
| 93 | return value; | ||
| 94 | } | ||
| 95 | |||
| 96 | std::optional<Tegra::Engines::SamplerDescriptor> Registry::ObtainBindlessSampler(u32 buffer, | ||
| 97 | u32 offset) { | ||
| 98 | const std::pair key = {buffer, offset}; | ||
| 99 | const auto iter = bindless_samplers.find(key); | ||
| 100 | if (iter != bindless_samplers.end()) { | ||
| 101 | return iter->second; | ||
| 102 | } | ||
| 103 | if (!engine) { | ||
| 104 | return std::nullopt; | ||
| 105 | } | ||
| 106 | const SamplerDescriptor value = engine->AccessBindlessSampler(stage, buffer, offset); | ||
| 107 | bindless_samplers.emplace(key, value); | ||
| 108 | return value; | ||
| 109 | } | ||
| 110 | |||
| 111 | void Registry::InsertKey(u32 buffer, u32 offset, u32 value) { | ||
| 112 | keys.insert_or_assign({buffer, offset}, value); | ||
| 113 | } | ||
| 114 | |||
| 115 | void Registry::InsertBoundSampler(u32 offset, SamplerDescriptor sampler) { | ||
| 116 | bound_samplers.insert_or_assign(offset, sampler); | ||
| 117 | } | ||
| 118 | |||
| 119 | void Registry::InsertBindlessSampler(u32 buffer, u32 offset, SamplerDescriptor sampler) { | ||
| 120 | bindless_samplers.insert_or_assign({buffer, offset}, sampler); | ||
| 121 | } | ||
| 122 | |||
| 123 | bool Registry::IsConsistent() const { | ||
| 124 | if (!engine) { | ||
| 125 | return true; | ||
| 126 | } | ||
| 127 | return std::all_of(keys.begin(), keys.end(), | ||
| 128 | [this](const auto& pair) { | ||
| 129 | const auto [cbuf, offset] = pair.first; | ||
| 130 | const auto value = pair.second; | ||
| 131 | return value == engine->AccessConstBuffer32(stage, cbuf, offset); | ||
| 132 | }) && | ||
| 133 | std::all_of(bound_samplers.begin(), bound_samplers.end(), | ||
| 134 | [this](const auto& sampler) { | ||
| 135 | const auto [key, value] = sampler; | ||
| 136 | return value == engine->AccessBoundSampler(stage, key); | ||
| 137 | }) && | ||
| 138 | std::all_of(bindless_samplers.begin(), bindless_samplers.end(), | ||
| 139 | [this](const auto& sampler) { | ||
| 140 | const auto [cbuf, offset] = sampler.first; | ||
| 141 | const auto value = sampler.second; | ||
| 142 | return value == engine->AccessBindlessSampler(stage, cbuf, offset); | ||
| 143 | }); | ||
| 144 | } | ||
| 145 | |||
| 146 | bool Registry::HasEqualKeys(const Registry& rhs) const { | ||
| 147 | return std::tie(keys, bound_samplers, bindless_samplers) == | ||
| 148 | std::tie(rhs.keys, rhs.bound_samplers, rhs.bindless_samplers); | ||
| 149 | } | ||
| 150 | |||
| 151 | const GraphicsInfo& Registry::GetGraphicsInfo() const { | ||
| 152 | ASSERT(stage != Tegra::Engines::ShaderType::Compute); | ||
| 153 | return graphics_info; | ||
| 154 | } | ||
| 155 | |||
| 156 | const ComputeInfo& Registry::GetComputeInfo() const { | ||
| 157 | ASSERT(stage == Tegra::Engines::ShaderType::Compute); | ||
| 158 | return compute_info; | ||
| 159 | } | ||
| 160 | |||
| 161 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/registry.h b/src/video_core/shader/registry.h new file mode 100644 index 000000000..0c80d35fd --- /dev/null +++ b/src/video_core/shader/registry.h | |||
| @@ -0,0 +1,137 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <array> | ||
| 8 | #include <optional> | ||
| 9 | #include <type_traits> | ||
| 10 | #include <unordered_map> | ||
| 11 | #include <utility> | ||
| 12 | |||
| 13 | #include "common/common_types.h" | ||
| 14 | #include "common/hash.h" | ||
| 15 | #include "video_core/engines/const_buffer_engine_interface.h" | ||
| 16 | #include "video_core/engines/maxwell_3d.h" | ||
| 17 | #include "video_core/engines/shader_type.h" | ||
| 18 | #include "video_core/guest_driver.h" | ||
| 19 | |||
| 20 | namespace VideoCommon::Shader { | ||
| 21 | |||
| 22 | using KeyMap = std::unordered_map<std::pair<u32, u32>, u32, Common::PairHash>; | ||
| 23 | using BoundSamplerMap = std::unordered_map<u32, Tegra::Engines::SamplerDescriptor>; | ||
| 24 | using BindlessSamplerMap = | ||
| 25 | std::unordered_map<std::pair<u32, u32>, Tegra::Engines::SamplerDescriptor, Common::PairHash>; | ||
| 26 | |||
| 27 | struct GraphicsInfo { | ||
| 28 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | ||
| 29 | |||
| 30 | std::array<Maxwell::TransformFeedbackLayout, Maxwell::NumTransformFeedbackBuffers> | ||
| 31 | tfb_layouts{}; | ||
| 32 | std::array<std::array<u8, 128>, Maxwell::NumTransformFeedbackBuffers> tfb_varying_locs{}; | ||
| 33 | Maxwell::PrimitiveTopology primitive_topology{}; | ||
| 34 | Maxwell::TessellationPrimitive tessellation_primitive{}; | ||
| 35 | Maxwell::TessellationSpacing tessellation_spacing{}; | ||
| 36 | bool tfb_enabled = false; | ||
| 37 | bool tessellation_clockwise = false; | ||
| 38 | }; | ||
| 39 | static_assert(std::is_trivially_copyable_v<GraphicsInfo> && | ||
| 40 | std::is_standard_layout_v<GraphicsInfo>); | ||
| 41 | |||
| 42 | struct ComputeInfo { | ||
| 43 | std::array<u32, 3> workgroup_size{}; | ||
| 44 | u32 shared_memory_size_in_words = 0; | ||
| 45 | u32 local_memory_size_in_words = 0; | ||
| 46 | }; | ||
| 47 | static_assert(std::is_trivially_copyable_v<ComputeInfo> && std::is_standard_layout_v<ComputeInfo>); | ||
| 48 | |||
| 49 | struct SerializedRegistryInfo { | ||
| 50 | VideoCore::GuestDriverProfile guest_driver_profile; | ||
| 51 | u32 bound_buffer = 0; | ||
| 52 | GraphicsInfo graphics; | ||
| 53 | ComputeInfo compute; | ||
| 54 | }; | ||
| 55 | |||
| 56 | /** | ||
| 57 | * The Registry is a class use to interface the 3D and compute engines with the shader compiler. | ||
| 58 | * With it, the shader can obtain required data from GPU state and store it for disk shader | ||
| 59 | * compilation. | ||
| 60 | */ | ||
| 61 | class Registry { | ||
| 62 | public: | ||
| 63 | explicit Registry(Tegra::Engines::ShaderType shader_stage, const SerializedRegistryInfo& info); | ||
| 64 | |||
| 65 | explicit Registry(Tegra::Engines::ShaderType shader_stage, | ||
| 66 | Tegra::Engines::ConstBufferEngineInterface& engine); | ||
| 67 | |||
| 68 | ~Registry(); | ||
| 69 | |||
| 70 | /// Retrieves a key from the registry, if it's registered, it will give the registered value, if | ||
| 71 | /// not it will obtain it from maxwell3d and register it. | ||
| 72 | std::optional<u32> ObtainKey(u32 buffer, u32 offset); | ||
| 73 | |||
| 74 | std::optional<Tegra::Engines::SamplerDescriptor> ObtainBoundSampler(u32 offset); | ||
| 75 | |||
| 76 | std::optional<Tegra::Engines::SamplerDescriptor> ObtainBindlessSampler(u32 buffer, u32 offset); | ||
| 77 | |||
| 78 | /// Inserts a key. | ||
| 79 | void InsertKey(u32 buffer, u32 offset, u32 value); | ||
| 80 | |||
| 81 | /// Inserts a bound sampler key. | ||
| 82 | void InsertBoundSampler(u32 offset, Tegra::Engines::SamplerDescriptor sampler); | ||
| 83 | |||
| 84 | /// Inserts a bindless sampler key. | ||
| 85 | void InsertBindlessSampler(u32 buffer, u32 offset, Tegra::Engines::SamplerDescriptor sampler); | ||
| 86 | |||
| 87 | /// Checks keys and samplers against engine's current const buffers. | ||
| 88 | /// Returns true if they are the same value, false otherwise. | ||
| 89 | bool IsConsistent() const; | ||
| 90 | |||
| 91 | /// Returns true if the keys are equal to the other ones in the registry. | ||
| 92 | bool HasEqualKeys(const Registry& rhs) const; | ||
| 93 | |||
| 94 | /// Returns graphics information from this shader | ||
| 95 | const GraphicsInfo& GetGraphicsInfo() const; | ||
| 96 | |||
| 97 | /// Returns compute information from this shader | ||
| 98 | const ComputeInfo& GetComputeInfo() const; | ||
| 99 | |||
| 100 | /// Gives an getter to the const buffer keys in the database. | ||
| 101 | const KeyMap& GetKeys() const { | ||
| 102 | return keys; | ||
| 103 | } | ||
| 104 | |||
| 105 | /// Gets samplers database. | ||
| 106 | const BoundSamplerMap& GetBoundSamplers() const { | ||
| 107 | return bound_samplers; | ||
| 108 | } | ||
| 109 | |||
| 110 | /// Gets bindless samplers database. | ||
| 111 | const BindlessSamplerMap& GetBindlessSamplers() const { | ||
| 112 | return bindless_samplers; | ||
| 113 | } | ||
| 114 | |||
| 115 | /// Gets bound buffer used on this shader | ||
| 116 | u32 GetBoundBuffer() const { | ||
| 117 | return bound_buffer; | ||
| 118 | } | ||
| 119 | |||
| 120 | /// Obtains access to the guest driver's profile. | ||
| 121 | VideoCore::GuestDriverProfile& AccessGuestDriverProfile() { | ||
| 122 | return engine ? engine->AccessGuestDriverProfile() : stored_guest_driver_profile; | ||
| 123 | } | ||
| 124 | |||
| 125 | private: | ||
| 126 | const Tegra::Engines::ShaderType stage; | ||
| 127 | VideoCore::GuestDriverProfile stored_guest_driver_profile; | ||
| 128 | Tegra::Engines::ConstBufferEngineInterface* engine = nullptr; | ||
| 129 | KeyMap keys; | ||
| 130 | BoundSamplerMap bound_samplers; | ||
| 131 | BindlessSamplerMap bindless_samplers; | ||
| 132 | u32 bound_buffer; | ||
| 133 | GraphicsInfo graphics_info; | ||
| 134 | ComputeInfo compute_info; | ||
| 135 | }; | ||
| 136 | |||
| 137 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp index 3a5d280a9..baf7188d2 100644 --- a/src/video_core/shader/shader_ir.cpp +++ b/src/video_core/shader/shader_ir.cpp | |||
| @@ -11,6 +11,7 @@ | |||
| 11 | #include "common/logging/log.h" | 11 | #include "common/logging/log.h" |
| 12 | #include "video_core/engines/shader_bytecode.h" | 12 | #include "video_core/engines/shader_bytecode.h" |
| 13 | #include "video_core/shader/node_helper.h" | 13 | #include "video_core/shader/node_helper.h" |
| 14 | #include "video_core/shader/registry.h" | ||
| 14 | #include "video_core/shader/shader_ir.h" | 15 | #include "video_core/shader/shader_ir.h" |
| 15 | 16 | ||
| 16 | namespace VideoCommon::Shader { | 17 | namespace VideoCommon::Shader { |
| @@ -24,8 +25,8 @@ using Tegra::Shader::PredOperation; | |||
| 24 | using Tegra::Shader::Register; | 25 | using Tegra::Shader::Register; |
| 25 | 26 | ||
| 26 | ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset, CompilerSettings settings, | 27 | ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset, CompilerSettings settings, |
| 27 | ConstBufferLocker& locker) | 28 | Registry& registry) |
| 28 | : program_code{program_code}, main_offset{main_offset}, settings{settings}, locker{locker} { | 29 | : program_code{program_code}, main_offset{main_offset}, settings{settings}, registry{registry} { |
| 29 | Decode(); | 30 | Decode(); |
| 30 | PostDecode(); | 31 | PostDecode(); |
| 31 | } | 32 | } |
| @@ -95,6 +96,7 @@ Node ShaderIR::GetPredicate(bool immediate) { | |||
| 95 | } | 96 | } |
| 96 | 97 | ||
| 97 | Node ShaderIR::GetInputAttribute(Attribute::Index index, u64 element, Node buffer) { | 98 | Node ShaderIR::GetInputAttribute(Attribute::Index index, u64 element, Node buffer) { |
| 99 | MarkAttributeUsage(index, element); | ||
| 98 | used_input_attributes.emplace(index); | 100 | used_input_attributes.emplace(index); |
| 99 | return MakeNode<AbufNode>(index, static_cast<u32>(element), std::move(buffer)); | 101 | return MakeNode<AbufNode>(index, static_cast<u32>(element), std::move(buffer)); |
| 100 | } | 102 | } |
| @@ -105,42 +107,8 @@ Node ShaderIR::GetPhysicalInputAttribute(Tegra::Shader::Register physical_addres | |||
| 105 | } | 107 | } |
| 106 | 108 | ||
| 107 | Node ShaderIR::GetOutputAttribute(Attribute::Index index, u64 element, Node buffer) { | 109 | Node ShaderIR::GetOutputAttribute(Attribute::Index index, u64 element, Node buffer) { |
| 108 | if (index == Attribute::Index::LayerViewportPointSize) { | 110 | MarkAttributeUsage(index, element); |
| 109 | switch (element) { | ||
| 110 | case 0: | ||
| 111 | UNIMPLEMENTED(); | ||
| 112 | break; | ||
| 113 | case 1: | ||
| 114 | uses_layer = true; | ||
| 115 | break; | ||
| 116 | case 2: | ||
| 117 | uses_viewport_index = true; | ||
| 118 | break; | ||
| 119 | case 3: | ||
| 120 | uses_point_size = true; | ||
| 121 | break; | ||
| 122 | } | ||
| 123 | } | ||
| 124 | if (index == Attribute::Index::TessCoordInstanceIDVertexID) { | ||
| 125 | switch (element) { | ||
| 126 | case 2: | ||
| 127 | uses_instance_id = true; | ||
| 128 | break; | ||
| 129 | case 3: | ||
| 130 | uses_vertex_id = true; | ||
| 131 | break; | ||
| 132 | default: | ||
| 133 | break; | ||
| 134 | } | ||
| 135 | } | ||
| 136 | if (index == Attribute::Index::ClipDistances0123 || | ||
| 137 | index == Attribute::Index::ClipDistances4567) { | ||
| 138 | const auto clip_index = | ||
| 139 | static_cast<u32>((index == Attribute::Index::ClipDistances4567 ? 1 : 0) + element); | ||
| 140 | used_clip_distances.at(clip_index) = true; | ||
| 141 | } | ||
| 142 | used_output_attributes.insert(index); | 111 | used_output_attributes.insert(index); |
| 143 | |||
| 144 | return MakeNode<AbufNode>(index, static_cast<u32>(element), std::move(buffer)); | 112 | return MakeNode<AbufNode>(index, static_cast<u32>(element), std::move(buffer)); |
| 145 | } | 113 | } |
| 146 | 114 | ||
| @@ -451,6 +419,54 @@ Node ShaderIR::BitfieldInsert(Node base, Node insert, u32 offset, u32 bits) { | |||
| 451 | Immediate(bits)); | 419 | Immediate(bits)); |
| 452 | } | 420 | } |
| 453 | 421 | ||
| 422 | void ShaderIR::MarkAttributeUsage(Attribute::Index index, u64 element) { | ||
| 423 | switch (index) { | ||
| 424 | case Attribute::Index::LayerViewportPointSize: | ||
| 425 | switch (element) { | ||
| 426 | case 0: | ||
| 427 | UNIMPLEMENTED(); | ||
| 428 | break; | ||
| 429 | case 1: | ||
| 430 | uses_layer = true; | ||
| 431 | break; | ||
| 432 | case 2: | ||
| 433 | uses_viewport_index = true; | ||
| 434 | break; | ||
| 435 | case 3: | ||
| 436 | uses_point_size = true; | ||
| 437 | break; | ||
| 438 | } | ||
| 439 | break; | ||
| 440 | case Attribute::Index::TessCoordInstanceIDVertexID: | ||
| 441 | switch (element) { | ||
| 442 | case 2: | ||
| 443 | uses_instance_id = true; | ||
| 444 | break; | ||
| 445 | case 3: | ||
| 446 | uses_vertex_id = true; | ||
| 447 | break; | ||
| 448 | } | ||
| 449 | break; | ||
| 450 | case Attribute::Index::ClipDistances0123: | ||
| 451 | case Attribute::Index::ClipDistances4567: { | ||
| 452 | const u64 clip_index = (index == Attribute::Index::ClipDistances4567 ? 4 : 0) + element; | ||
| 453 | used_clip_distances.at(clip_index) = true; | ||
| 454 | break; | ||
| 455 | } | ||
| 456 | case Attribute::Index::FrontColor: | ||
| 457 | case Attribute::Index::FrontSecondaryColor: | ||
| 458 | case Attribute::Index::BackColor: | ||
| 459 | case Attribute::Index::BackSecondaryColor: | ||
| 460 | uses_legacy_varyings = true; | ||
| 461 | break; | ||
| 462 | default: | ||
| 463 | if (index >= Attribute::Index::TexCoord_0 && index <= Attribute::Index::TexCoord_7) { | ||
| 464 | uses_legacy_varyings = true; | ||
| 465 | } | ||
| 466 | break; | ||
| 467 | } | ||
| 468 | } | ||
| 469 | |||
| 454 | std::size_t ShaderIR::DeclareAmend(Node new_amend) { | 470 | std::size_t ShaderIR::DeclareAmend(Node new_amend) { |
| 455 | const std::size_t id = amend_code.size(); | 471 | const std::size_t id = amend_code.size(); |
| 456 | amend_code.push_back(new_amend); | 472 | amend_code.push_back(new_amend); |
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index b0851c3be..80fc9b82c 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h | |||
| @@ -18,8 +18,8 @@ | |||
| 18 | #include "video_core/engines/shader_header.h" | 18 | #include "video_core/engines/shader_header.h" |
| 19 | #include "video_core/shader/ast.h" | 19 | #include "video_core/shader/ast.h" |
| 20 | #include "video_core/shader/compiler_settings.h" | 20 | #include "video_core/shader/compiler_settings.h" |
| 21 | #include "video_core/shader/const_buffer_locker.h" | ||
| 22 | #include "video_core/shader/node.h" | 21 | #include "video_core/shader/node.h" |
| 22 | #include "video_core/shader/registry.h" | ||
| 23 | 23 | ||
| 24 | namespace VideoCommon::Shader { | 24 | namespace VideoCommon::Shader { |
| 25 | 25 | ||
| @@ -69,7 +69,7 @@ struct GlobalMemoryUsage { | |||
| 69 | class ShaderIR final { | 69 | class ShaderIR final { |
| 70 | public: | 70 | public: |
| 71 | explicit ShaderIR(const ProgramCode& program_code, u32 main_offset, CompilerSettings settings, | 71 | explicit ShaderIR(const ProgramCode& program_code, u32 main_offset, CompilerSettings settings, |
| 72 | ConstBufferLocker& locker); | 72 | Registry& registry); |
| 73 | ~ShaderIR(); | 73 | ~ShaderIR(); |
| 74 | 74 | ||
| 75 | const std::map<u32, NodeBlock>& GetBasicBlocks() const { | 75 | const std::map<u32, NodeBlock>& GetBasicBlocks() const { |
| @@ -137,6 +137,10 @@ public: | |||
| 137 | return uses_vertex_id; | 137 | return uses_vertex_id; |
| 138 | } | 138 | } |
| 139 | 139 | ||
| 140 | bool UsesLegacyVaryings() const { | ||
| 141 | return uses_legacy_varyings; | ||
| 142 | } | ||
| 143 | |||
| 140 | bool UsesWarps() const { | 144 | bool UsesWarps() const { |
| 141 | return uses_warps; | 145 | return uses_warps; |
| 142 | } | 146 | } |
| @@ -343,6 +347,9 @@ private: | |||
| 343 | /// Inserts a sequence of bits from a node | 347 | /// Inserts a sequence of bits from a node |
| 344 | Node BitfieldInsert(Node base, Node insert, u32 offset, u32 bits); | 348 | Node BitfieldInsert(Node base, Node insert, u32 offset, u32 bits); |
| 345 | 349 | ||
| 350 | /// Marks the usage of a input or output attribute. | ||
| 351 | void MarkAttributeUsage(Tegra::Shader::Attribute::Index index, u64 element); | ||
| 352 | |||
| 346 | void WriteTexInstructionFloat(NodeBlock& bb, Tegra::Shader::Instruction instr, | 353 | void WriteTexInstructionFloat(NodeBlock& bb, Tegra::Shader::Instruction instr, |
| 347 | const Node4& components); | 354 | const Node4& components); |
| 348 | 355 | ||
| @@ -414,7 +421,7 @@ private: | |||
| 414 | const ProgramCode& program_code; | 421 | const ProgramCode& program_code; |
| 415 | const u32 main_offset; | 422 | const u32 main_offset; |
| 416 | const CompilerSettings settings; | 423 | const CompilerSettings settings; |
| 417 | ConstBufferLocker& locker; | 424 | Registry& registry; |
| 418 | 425 | ||
| 419 | bool decompiled{}; | 426 | bool decompiled{}; |
| 420 | bool disable_flow_stack{}; | 427 | bool disable_flow_stack{}; |
| @@ -443,6 +450,7 @@ private: | |||
| 443 | bool uses_physical_attributes{}; // Shader uses AL2P or physical attribute read/writes | 450 | bool uses_physical_attributes{}; // Shader uses AL2P or physical attribute read/writes |
| 444 | bool uses_instance_id{}; | 451 | bool uses_instance_id{}; |
| 445 | bool uses_vertex_id{}; | 452 | bool uses_vertex_id{}; |
| 453 | bool uses_legacy_varyings{}; | ||
| 446 | bool uses_warps{}; | 454 | bool uses_warps{}; |
| 447 | bool uses_indexed_samplers{}; | 455 | bool uses_indexed_samplers{}; |
| 448 | 456 | ||
diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp index face8c943..10739b37d 100644 --- a/src/video_core/shader/track.cpp +++ b/src/video_core/shader/track.cpp | |||
| @@ -81,26 +81,20 @@ std::tuple<Node, TrackSampler> ShaderIR::TrackBindlessSampler(Node tracked, cons | |||
| 81 | MakeTrackSampler<BindlessSamplerNode>(cbuf->GetIndex(), immediate->GetValue()); | 81 | MakeTrackSampler<BindlessSamplerNode>(cbuf->GetIndex(), immediate->GetValue()); |
| 82 | return {tracked, track}; | 82 | return {tracked, track}; |
| 83 | } else if (const auto operation = std::get_if<OperationNode>(&*offset)) { | 83 | } else if (const auto operation = std::get_if<OperationNode>(&*offset)) { |
| 84 | auto bound_buffer = locker.ObtainBoundBuffer(); | 84 | const u32 bound_buffer = registry.GetBoundBuffer(); |
| 85 | if (!bound_buffer) { | 85 | if (bound_buffer != cbuf->GetIndex()) { |
| 86 | return {}; | 86 | return {}; |
| 87 | } | 87 | } |
| 88 | if (*bound_buffer != cbuf->GetIndex()) { | 88 | const auto pair = DecoupleIndirectRead(*operation); |
| 89 | return {}; | ||
| 90 | } | ||
| 91 | auto pair = DecoupleIndirectRead(*operation); | ||
| 92 | if (!pair) { | 89 | if (!pair) { |
| 93 | return {}; | 90 | return {}; |
| 94 | } | 91 | } |
| 95 | auto [gpr, base_offset] = *pair; | 92 | auto [gpr, base_offset] = *pair; |
| 96 | const auto offset_inm = std::get_if<ImmediateNode>(&*base_offset); | 93 | const auto offset_inm = std::get_if<ImmediateNode>(&*base_offset); |
| 97 | auto gpu_driver = locker.AccessGuestDriverProfile(); | 94 | const auto& gpu_driver = registry.AccessGuestDriverProfile(); |
| 98 | if (gpu_driver == nullptr) { | ||
| 99 | return {}; | ||
| 100 | } | ||
| 101 | const u32 bindless_cv = NewCustomVariable(); | 95 | const u32 bindless_cv = NewCustomVariable(); |
| 102 | const Node op = Operation(OperationCode::UDiv, NO_PRECISE, gpr, | 96 | const Node op = |
| 103 | Immediate(gpu_driver->GetTextureHandlerSize())); | 97 | Operation(OperationCode::UDiv, gpr, Immediate(gpu_driver.GetTextureHandlerSize())); |
| 104 | 98 | ||
| 105 | const Node cv_node = GetCustomVariable(bindless_cv); | 99 | const Node cv_node = GetCustomVariable(bindless_cv); |
| 106 | Node amend_op = Operation(OperationCode::Assign, cv_node, std::move(op)); | 100 | Node amend_op = Operation(OperationCode::Assign, cv_node, std::move(op)); |
| @@ -157,13 +151,21 @@ std::tuple<Node, u32, u32> ShaderIR::TrackCbuf(Node tracked, const NodeBlock& co | |||
| 157 | if (gpr->GetIndex() == Tegra::Shader::Register::ZeroIndex) { | 151 | if (gpr->GetIndex() == Tegra::Shader::Register::ZeroIndex) { |
| 158 | return {}; | 152 | return {}; |
| 159 | } | 153 | } |
| 160 | // Reduce the cursor in one to avoid infinite loops when the instruction sets the same | 154 | s64 current_cursor = cursor; |
| 161 | // register that it uses as operand | 155 | while (current_cursor > 0) { |
| 162 | const auto [source, new_cursor] = TrackRegister(gpr, code, cursor - 1); | 156 | // Reduce the cursor in one to avoid infinite loops when the instruction sets the same |
| 163 | if (!source) { | 157 | // register that it uses as operand |
| 164 | return {}; | 158 | const auto [source, new_cursor] = TrackRegister(gpr, code, current_cursor - 1); |
| 159 | current_cursor = new_cursor; | ||
| 160 | if (!source) { | ||
| 161 | continue; | ||
| 162 | } | ||
| 163 | const auto [base_address, index, offset] = TrackCbuf(source, code, current_cursor); | ||
| 164 | if (base_address != nullptr) { | ||
| 165 | return {base_address, index, offset}; | ||
| 166 | } | ||
| 165 | } | 167 | } |
| 166 | return TrackCbuf(source, code, new_cursor); | 168 | return {}; |
| 167 | } | 169 | } |
| 168 | if (const auto operation = std::get_if<OperationNode>(&*tracked)) { | 170 | if (const auto operation = std::get_if<OperationNode>(&*tracked)) { |
| 169 | for (std::size_t i = operation->GetOperandsCount(); i > 0; --i) { | 171 | for (std::size_t i = operation->GetOperandsCount(); i > 0; --i) { |
diff --git a/src/video_core/shader/transform_feedback.cpp b/src/video_core/shader/transform_feedback.cpp new file mode 100644 index 000000000..22a933761 --- /dev/null +++ b/src/video_core/shader/transform_feedback.cpp | |||
| @@ -0,0 +1,115 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <array> | ||
| 7 | #include <unordered_map> | ||
| 8 | |||
| 9 | #include "common/assert.h" | ||
| 10 | #include "common/common_types.h" | ||
| 11 | #include "video_core/engines/maxwell_3d.h" | ||
| 12 | #include "video_core/shader/registry.h" | ||
| 13 | #include "video_core/shader/transform_feedback.h" | ||
| 14 | |||
| 15 | namespace VideoCommon::Shader { | ||
| 16 | |||
| 17 | namespace { | ||
| 18 | |||
| 19 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | ||
| 20 | |||
| 21 | // TODO(Rodrigo): Change this to constexpr std::unordered_set in C++20 | ||
| 22 | |||
| 23 | /// Attribute offsets that describe a vector | ||
| 24 | constexpr std::array VECTORS = { | ||
| 25 | 28, // gl_Position | ||
| 26 | 32, // Generic 0 | ||
| 27 | 36, // Generic 1 | ||
| 28 | 40, // Generic 2 | ||
| 29 | 44, // Generic 3 | ||
| 30 | 48, // Generic 4 | ||
| 31 | 52, // Generic 5 | ||
| 32 | 56, // Generic 6 | ||
| 33 | 60, // Generic 7 | ||
| 34 | 64, // Generic 8 | ||
| 35 | 68, // Generic 9 | ||
| 36 | 72, // Generic 10 | ||
| 37 | 76, // Generic 11 | ||
| 38 | 80, // Generic 12 | ||
| 39 | 84, // Generic 13 | ||
| 40 | 88, // Generic 14 | ||
| 41 | 92, // Generic 15 | ||
| 42 | 96, // Generic 16 | ||
| 43 | 100, // Generic 17 | ||
| 44 | 104, // Generic 18 | ||
| 45 | 108, // Generic 19 | ||
| 46 | 112, // Generic 20 | ||
| 47 | 116, // Generic 21 | ||
| 48 | 120, // Generic 22 | ||
| 49 | 124, // Generic 23 | ||
| 50 | 128, // Generic 24 | ||
| 51 | 132, // Generic 25 | ||
| 52 | 136, // Generic 26 | ||
| 53 | 140, // Generic 27 | ||
| 54 | 144, // Generic 28 | ||
| 55 | 148, // Generic 29 | ||
| 56 | 152, // Generic 30 | ||
| 57 | 156, // Generic 31 | ||
| 58 | 160, // gl_FrontColor | ||
| 59 | 164, // gl_FrontSecondaryColor | ||
| 60 | 160, // gl_BackColor | ||
| 61 | 164, // gl_BackSecondaryColor | ||
| 62 | 192, // gl_TexCoord[0] | ||
| 63 | 196, // gl_TexCoord[1] | ||
| 64 | 200, // gl_TexCoord[2] | ||
| 65 | 204, // gl_TexCoord[3] | ||
| 66 | 208, // gl_TexCoord[4] | ||
| 67 | 212, // gl_TexCoord[5] | ||
| 68 | 216, // gl_TexCoord[6] | ||
| 69 | 220, // gl_TexCoord[7] | ||
| 70 | }; | ||
| 71 | } // namespace | ||
| 72 | |||
| 73 | std::unordered_map<u8, VaryingTFB> BuildTransformFeedback(const GraphicsInfo& info) { | ||
| 74 | |||
| 75 | std::unordered_map<u8, VaryingTFB> tfb; | ||
| 76 | |||
| 77 | for (std::size_t buffer = 0; buffer < Maxwell::NumTransformFeedbackBuffers; ++buffer) { | ||
| 78 | const auto& locations = info.tfb_varying_locs[buffer]; | ||
| 79 | const auto& layout = info.tfb_layouts[buffer]; | ||
| 80 | const std::size_t varying_count = layout.varying_count; | ||
| 81 | |||
| 82 | std::size_t highest = 0; | ||
| 83 | |||
| 84 | for (std::size_t offset = 0; offset < varying_count; ++offset) { | ||
| 85 | const std::size_t base_offset = offset; | ||
| 86 | const u8 location = locations[offset]; | ||
| 87 | |||
| 88 | VaryingTFB varying; | ||
| 89 | varying.buffer = layout.stream; | ||
| 90 | varying.stride = layout.stride; | ||
| 91 | varying.offset = offset * sizeof(u32); | ||
| 92 | varying.components = 1; | ||
| 93 | |||
| 94 | if (std::find(VECTORS.begin(), VECTORS.end(), location / 4 * 4) != VECTORS.end()) { | ||
| 95 | UNIMPLEMENTED_IF_MSG(location % 4 != 0, "Unaligned TFB"); | ||
| 96 | |||
| 97 | const u8 base_index = location / 4; | ||
| 98 | while (offset + 1 < varying_count && base_index == locations[offset + 1] / 4) { | ||
| 99 | ++offset; | ||
| 100 | ++varying.components; | ||
| 101 | } | ||
| 102 | } | ||
| 103 | |||
| 104 | [[maybe_unused]] const bool inserted = tfb.emplace(location, varying).second; | ||
| 105 | UNIMPLEMENTED_IF_MSG(!inserted, "Varying already stored"); | ||
| 106 | |||
| 107 | highest = std::max(highest, (base_offset + varying.components) * sizeof(u32)); | ||
| 108 | } | ||
| 109 | |||
| 110 | UNIMPLEMENTED_IF(highest != layout.stride); | ||
| 111 | } | ||
| 112 | return tfb; | ||
| 113 | } | ||
| 114 | |||
| 115 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/transform_feedback.h b/src/video_core/shader/transform_feedback.h new file mode 100644 index 000000000..77d05f64c --- /dev/null +++ b/src/video_core/shader/transform_feedback.h | |||
| @@ -0,0 +1,23 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <unordered_map> | ||
| 8 | |||
| 9 | #include "common/common_types.h" | ||
| 10 | #include "video_core/shader/registry.h" | ||
| 11 | |||
| 12 | namespace VideoCommon::Shader { | ||
| 13 | |||
| 14 | struct VaryingTFB { | ||
| 15 | std::size_t buffer; | ||
| 16 | std::size_t stride; | ||
| 17 | std::size_t offset; | ||
| 18 | std::size_t components; | ||
| 19 | }; | ||
| 20 | |||
| 21 | std::unordered_map<u8, VaryingTFB> BuildTransformFeedback(const GraphicsInfo& info); | ||
| 22 | |||
| 23 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/surface.cpp b/src/video_core/surface.cpp index 1655ccf16..cc7181229 100644 --- a/src/video_core/surface.cpp +++ b/src/video_core/surface.cpp | |||
| @@ -111,6 +111,8 @@ PixelFormat PixelFormatFromRenderTargetFormat(Tegra::RenderTargetFormat format) | |||
| 111 | return PixelFormat::RGBA16F; | 111 | return PixelFormat::RGBA16F; |
| 112 | case Tegra::RenderTargetFormat::RGBA16_UNORM: | 112 | case Tegra::RenderTargetFormat::RGBA16_UNORM: |
| 113 | return PixelFormat::RGBA16U; | 113 | return PixelFormat::RGBA16U; |
| 114 | case Tegra::RenderTargetFormat::RGBA16_SNORM: | ||
| 115 | return PixelFormat::RGBA16S; | ||
| 114 | case Tegra::RenderTargetFormat::RGBA16_UINT: | 116 | case Tegra::RenderTargetFormat::RGBA16_UINT: |
| 115 | return PixelFormat::RGBA16UI; | 117 | return PixelFormat::RGBA16UI; |
| 116 | case Tegra::RenderTargetFormat::RGBA32_FLOAT: | 118 | case Tegra::RenderTargetFormat::RGBA32_FLOAT: |
| @@ -155,6 +157,8 @@ PixelFormat PixelFormatFromRenderTargetFormat(Tegra::RenderTargetFormat format) | |||
| 155 | return PixelFormat::R16I; | 157 | return PixelFormat::R16I; |
| 156 | case Tegra::RenderTargetFormat::R32_FLOAT: | 158 | case Tegra::RenderTargetFormat::R32_FLOAT: |
| 157 | return PixelFormat::R32F; | 159 | return PixelFormat::R32F; |
| 160 | case Tegra::RenderTargetFormat::R32_SINT: | ||
| 161 | return PixelFormat::R32I; | ||
| 158 | case Tegra::RenderTargetFormat::R32_UINT: | 162 | case Tegra::RenderTargetFormat::R32_UINT: |
| 159 | return PixelFormat::R32UI; | 163 | return PixelFormat::R32UI; |
| 160 | case Tegra::RenderTargetFormat::RG32_UINT: | 164 | case Tegra::RenderTargetFormat::RG32_UINT: |
diff --git a/src/video_core/surface.h b/src/video_core/surface.h index 0d17a93ed..ae8817465 100644 --- a/src/video_core/surface.h +++ b/src/video_core/surface.h | |||
| @@ -25,81 +25,83 @@ enum class PixelFormat { | |||
| 25 | R8UI = 7, | 25 | R8UI = 7, |
| 26 | RGBA16F = 8, | 26 | RGBA16F = 8, |
| 27 | RGBA16U = 9, | 27 | RGBA16U = 9, |
| 28 | RGBA16UI = 10, | 28 | RGBA16S = 10, |
| 29 | R11FG11FB10F = 11, | 29 | RGBA16UI = 11, |
| 30 | RGBA32UI = 12, | 30 | R11FG11FB10F = 12, |
| 31 | DXT1 = 13, | 31 | RGBA32UI = 13, |
| 32 | DXT23 = 14, | 32 | DXT1 = 14, |
| 33 | DXT45 = 15, | 33 | DXT23 = 15, |
| 34 | DXN1 = 16, // This is also known as BC4 | 34 | DXT45 = 16, |
| 35 | DXN2UNORM = 17, | 35 | DXN1 = 17, // This is also known as BC4 |
| 36 | DXN2SNORM = 18, | 36 | DXN2UNORM = 18, |
| 37 | BC7U = 19, | 37 | DXN2SNORM = 19, |
| 38 | BC6H_UF16 = 20, | 38 | BC7U = 20, |
| 39 | BC6H_SF16 = 21, | 39 | BC6H_UF16 = 21, |
| 40 | ASTC_2D_4X4 = 22, | 40 | BC6H_SF16 = 22, |
| 41 | BGRA8 = 23, | 41 | ASTC_2D_4X4 = 23, |
| 42 | RGBA32F = 24, | 42 | BGRA8 = 24, |
| 43 | RG32F = 25, | 43 | RGBA32F = 25, |
| 44 | R32F = 26, | 44 | RG32F = 26, |
| 45 | R16F = 27, | 45 | R32F = 27, |
| 46 | R16U = 28, | 46 | R16F = 28, |
| 47 | R16S = 29, | 47 | R16U = 29, |
| 48 | R16UI = 30, | 48 | R16S = 30, |
| 49 | R16I = 31, | 49 | R16UI = 31, |
| 50 | RG16 = 32, | 50 | R16I = 32, |
| 51 | RG16F = 33, | 51 | RG16 = 33, |
| 52 | RG16UI = 34, | 52 | RG16F = 34, |
| 53 | RG16I = 35, | 53 | RG16UI = 35, |
| 54 | RG16S = 36, | 54 | RG16I = 36, |
| 55 | RGB32F = 37, | 55 | RG16S = 37, |
| 56 | RGBA8_SRGB = 38, | 56 | RGB32F = 38, |
| 57 | RG8U = 39, | 57 | RGBA8_SRGB = 39, |
| 58 | RG8S = 40, | 58 | RG8U = 40, |
| 59 | RG32UI = 41, | 59 | RG8S = 41, |
| 60 | RGBX16F = 42, | 60 | RG32UI = 42, |
| 61 | R32UI = 43, | 61 | RGBX16F = 43, |
| 62 | ASTC_2D_8X8 = 44, | 62 | R32UI = 44, |
| 63 | ASTC_2D_8X5 = 45, | 63 | R32I = 45, |
| 64 | ASTC_2D_5X4 = 46, | 64 | ASTC_2D_8X8 = 46, |
| 65 | BGRA8_SRGB = 47, | 65 | ASTC_2D_8X5 = 47, |
| 66 | DXT1_SRGB = 48, | 66 | ASTC_2D_5X4 = 48, |
| 67 | DXT23_SRGB = 49, | 67 | BGRA8_SRGB = 49, |
| 68 | DXT45_SRGB = 50, | 68 | DXT1_SRGB = 50, |
| 69 | BC7U_SRGB = 51, | 69 | DXT23_SRGB = 51, |
| 70 | R4G4B4A4U = 52, | 70 | DXT45_SRGB = 52, |
| 71 | ASTC_2D_4X4_SRGB = 53, | 71 | BC7U_SRGB = 53, |
| 72 | ASTC_2D_8X8_SRGB = 54, | 72 | R4G4B4A4U = 54, |
| 73 | ASTC_2D_8X5_SRGB = 55, | 73 | ASTC_2D_4X4_SRGB = 55, |
| 74 | ASTC_2D_5X4_SRGB = 56, | 74 | ASTC_2D_8X8_SRGB = 56, |
| 75 | ASTC_2D_5X5 = 57, | 75 | ASTC_2D_8X5_SRGB = 57, |
| 76 | ASTC_2D_5X5_SRGB = 58, | 76 | ASTC_2D_5X4_SRGB = 58, |
| 77 | ASTC_2D_10X8 = 59, | 77 | ASTC_2D_5X5 = 59, |
| 78 | ASTC_2D_10X8_SRGB = 60, | 78 | ASTC_2D_5X5_SRGB = 60, |
| 79 | ASTC_2D_6X6 = 61, | 79 | ASTC_2D_10X8 = 61, |
| 80 | ASTC_2D_6X6_SRGB = 62, | 80 | ASTC_2D_10X8_SRGB = 62, |
| 81 | ASTC_2D_10X10 = 63, | 81 | ASTC_2D_6X6 = 63, |
| 82 | ASTC_2D_10X10_SRGB = 64, | 82 | ASTC_2D_6X6_SRGB = 64, |
| 83 | ASTC_2D_12X12 = 65, | 83 | ASTC_2D_10X10 = 65, |
| 84 | ASTC_2D_12X12_SRGB = 66, | 84 | ASTC_2D_10X10_SRGB = 66, |
| 85 | ASTC_2D_8X6 = 67, | 85 | ASTC_2D_12X12 = 67, |
| 86 | ASTC_2D_8X6_SRGB = 68, | 86 | ASTC_2D_12X12_SRGB = 68, |
| 87 | ASTC_2D_6X5 = 69, | 87 | ASTC_2D_8X6 = 69, |
| 88 | ASTC_2D_6X5_SRGB = 70, | 88 | ASTC_2D_8X6_SRGB = 70, |
| 89 | E5B9G9R9F = 71, | 89 | ASTC_2D_6X5 = 71, |
| 90 | ASTC_2D_6X5_SRGB = 72, | ||
| 91 | E5B9G9R9F = 73, | ||
| 90 | 92 | ||
| 91 | MaxColorFormat, | 93 | MaxColorFormat, |
| 92 | 94 | ||
| 93 | // Depth formats | 95 | // Depth formats |
| 94 | Z32F = 72, | 96 | Z32F = 74, |
| 95 | Z16 = 73, | 97 | Z16 = 75, |
| 96 | 98 | ||
| 97 | MaxDepthFormat, | 99 | MaxDepthFormat, |
| 98 | 100 | ||
| 99 | // DepthStencil formats | 101 | // DepthStencil formats |
| 100 | Z24S8 = 74, | 102 | Z24S8 = 76, |
| 101 | S8Z24 = 75, | 103 | S8Z24 = 77, |
| 102 | Z32FS8 = 76, | 104 | Z32FS8 = 78, |
| 103 | 105 | ||
| 104 | MaxDepthStencilFormat, | 106 | MaxDepthStencilFormat, |
| 105 | 107 | ||
| @@ -137,6 +139,7 @@ constexpr std::array<u32, MaxPixelFormat> compression_factor_shift_table = {{ | |||
| 137 | 0, // R8UI | 139 | 0, // R8UI |
| 138 | 0, // RGBA16F | 140 | 0, // RGBA16F |
| 139 | 0, // RGBA16U | 141 | 0, // RGBA16U |
| 142 | 0, // RGBA16S | ||
| 140 | 0, // RGBA16UI | 143 | 0, // RGBA16UI |
| 141 | 0, // R11FG11FB10F | 144 | 0, // R11FG11FB10F |
| 142 | 0, // RGBA32UI | 145 | 0, // RGBA32UI |
| @@ -171,6 +174,7 @@ constexpr std::array<u32, MaxPixelFormat> compression_factor_shift_table = {{ | |||
| 171 | 0, // RG32UI | 174 | 0, // RG32UI |
| 172 | 0, // RGBX16F | 175 | 0, // RGBX16F |
| 173 | 0, // R32UI | 176 | 0, // R32UI |
| 177 | 0, // R32I | ||
| 174 | 2, // ASTC_2D_8X8 | 178 | 2, // ASTC_2D_8X8 |
| 175 | 2, // ASTC_2D_8X5 | 179 | 2, // ASTC_2D_8X5 |
| 176 | 2, // ASTC_2D_5X4 | 180 | 2, // ASTC_2D_5X4 |
| @@ -233,6 +237,7 @@ constexpr std::array<u32, MaxPixelFormat> block_width_table = {{ | |||
| 233 | 1, // R8UI | 237 | 1, // R8UI |
| 234 | 1, // RGBA16F | 238 | 1, // RGBA16F |
| 235 | 1, // RGBA16U | 239 | 1, // RGBA16U |
| 240 | 1, // RGBA16S | ||
| 236 | 1, // RGBA16UI | 241 | 1, // RGBA16UI |
| 237 | 1, // R11FG11FB10F | 242 | 1, // R11FG11FB10F |
| 238 | 1, // RGBA32UI | 243 | 1, // RGBA32UI |
| @@ -267,6 +272,7 @@ constexpr std::array<u32, MaxPixelFormat> block_width_table = {{ | |||
| 267 | 1, // RG32UI | 272 | 1, // RG32UI |
| 268 | 1, // RGBX16F | 273 | 1, // RGBX16F |
| 269 | 1, // R32UI | 274 | 1, // R32UI |
| 275 | 1, // R32I | ||
| 270 | 8, // ASTC_2D_8X8 | 276 | 8, // ASTC_2D_8X8 |
| 271 | 8, // ASTC_2D_8X5 | 277 | 8, // ASTC_2D_8X5 |
| 272 | 5, // ASTC_2D_5X4 | 278 | 5, // ASTC_2D_5X4 |
| @@ -321,6 +327,7 @@ constexpr std::array<u32, MaxPixelFormat> block_height_table = {{ | |||
| 321 | 1, // R8UI | 327 | 1, // R8UI |
| 322 | 1, // RGBA16F | 328 | 1, // RGBA16F |
| 323 | 1, // RGBA16U | 329 | 1, // RGBA16U |
| 330 | 1, // RGBA16S | ||
| 324 | 1, // RGBA16UI | 331 | 1, // RGBA16UI |
| 325 | 1, // R11FG11FB10F | 332 | 1, // R11FG11FB10F |
| 326 | 1, // RGBA32UI | 333 | 1, // RGBA32UI |
| @@ -355,6 +362,7 @@ constexpr std::array<u32, MaxPixelFormat> block_height_table = {{ | |||
| 355 | 1, // RG32UI | 362 | 1, // RG32UI |
| 356 | 1, // RGBX16F | 363 | 1, // RGBX16F |
| 357 | 1, // R32UI | 364 | 1, // R32UI |
| 365 | 1, // R32I | ||
| 358 | 8, // ASTC_2D_8X8 | 366 | 8, // ASTC_2D_8X8 |
| 359 | 5, // ASTC_2D_8X5 | 367 | 5, // ASTC_2D_8X5 |
| 360 | 4, // ASTC_2D_5X4 | 368 | 4, // ASTC_2D_5X4 |
| @@ -409,6 +417,7 @@ constexpr std::array<u32, MaxPixelFormat> bpp_table = {{ | |||
| 409 | 8, // R8UI | 417 | 8, // R8UI |
| 410 | 64, // RGBA16F | 418 | 64, // RGBA16F |
| 411 | 64, // RGBA16U | 419 | 64, // RGBA16U |
| 420 | 64, // RGBA16S | ||
| 412 | 64, // RGBA16UI | 421 | 64, // RGBA16UI |
| 413 | 32, // R11FG11FB10F | 422 | 32, // R11FG11FB10F |
| 414 | 128, // RGBA32UI | 423 | 128, // RGBA32UI |
| @@ -443,6 +452,7 @@ constexpr std::array<u32, MaxPixelFormat> bpp_table = {{ | |||
| 443 | 64, // RG32UI | 452 | 64, // RG32UI |
| 444 | 64, // RGBX16F | 453 | 64, // RGBX16F |
| 445 | 32, // R32UI | 454 | 32, // R32UI |
| 455 | 32, // R32I | ||
| 446 | 128, // ASTC_2D_8X8 | 456 | 128, // ASTC_2D_8X8 |
| 447 | 128, // ASTC_2D_8X5 | 457 | 128, // ASTC_2D_8X5 |
| 448 | 128, // ASTC_2D_5X4 | 458 | 128, // ASTC_2D_5X4 |
| @@ -512,6 +522,7 @@ constexpr std::array<SurfaceCompression, MaxPixelFormat> compression_type_table | |||
| 512 | SurfaceCompression::None, // R8UI | 522 | SurfaceCompression::None, // R8UI |
| 513 | SurfaceCompression::None, // RGBA16F | 523 | SurfaceCompression::None, // RGBA16F |
| 514 | SurfaceCompression::None, // RGBA16U | 524 | SurfaceCompression::None, // RGBA16U |
| 525 | SurfaceCompression::None, // RGBA16S | ||
| 515 | SurfaceCompression::None, // RGBA16UI | 526 | SurfaceCompression::None, // RGBA16UI |
| 516 | SurfaceCompression::None, // R11FG11FB10F | 527 | SurfaceCompression::None, // R11FG11FB10F |
| 517 | SurfaceCompression::None, // RGBA32UI | 528 | SurfaceCompression::None, // RGBA32UI |
| @@ -546,6 +557,7 @@ constexpr std::array<SurfaceCompression, MaxPixelFormat> compression_type_table | |||
| 546 | SurfaceCompression::None, // RG32UI | 557 | SurfaceCompression::None, // RG32UI |
| 547 | SurfaceCompression::None, // RGBX16F | 558 | SurfaceCompression::None, // RGBX16F |
| 548 | SurfaceCompression::None, // R32UI | 559 | SurfaceCompression::None, // R32UI |
| 560 | SurfaceCompression::None, // R32I | ||
| 549 | SurfaceCompression::Converted, // ASTC_2D_8X8 | 561 | SurfaceCompression::Converted, // ASTC_2D_8X8 |
| 550 | SurfaceCompression::Converted, // ASTC_2D_8X5 | 562 | SurfaceCompression::Converted, // ASTC_2D_8X5 |
| 551 | SurfaceCompression::Converted, // ASTC_2D_5X4 | 563 | SurfaceCompression::Converted, // ASTC_2D_5X4 |
diff --git a/src/video_core/texture_cache/format_lookup_table.cpp b/src/video_core/texture_cache/format_lookup_table.cpp index 81fb9f633..e151c26c4 100644 --- a/src/video_core/texture_cache/format_lookup_table.cpp +++ b/src/video_core/texture_cache/format_lookup_table.cpp | |||
| @@ -41,7 +41,7 @@ struct Table { | |||
| 41 | ComponentType alpha_component; | 41 | ComponentType alpha_component; |
| 42 | bool is_srgb; | 42 | bool is_srgb; |
| 43 | }; | 43 | }; |
| 44 | constexpr std::array<Table, 74> DefinitionTable = {{ | 44 | constexpr std::array<Table, 76> DefinitionTable = {{ |
| 45 | {TextureFormat::A8R8G8B8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ABGR8U}, | 45 | {TextureFormat::A8R8G8B8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ABGR8U}, |
| 46 | {TextureFormat::A8R8G8B8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::ABGR8S}, | 46 | {TextureFormat::A8R8G8B8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::ABGR8S}, |
| 47 | {TextureFormat::A8R8G8B8, C, UINT, UINT, UINT, UINT, PixelFormat::ABGR8UI}, | 47 | {TextureFormat::A8R8G8B8, C, UINT, UINT, UINT, UINT, PixelFormat::ABGR8UI}, |
| @@ -61,6 +61,7 @@ constexpr std::array<Table, 74> DefinitionTable = {{ | |||
| 61 | {TextureFormat::G8R8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::RG8U}, | 61 | {TextureFormat::G8R8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::RG8U}, |
| 62 | {TextureFormat::G8R8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::RG8S}, | 62 | {TextureFormat::G8R8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::RG8S}, |
| 63 | 63 | ||
| 64 | {TextureFormat::R16_G16_B16_A16, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::RGBA16S}, | ||
| 64 | {TextureFormat::R16_G16_B16_A16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::RGBA16U}, | 65 | {TextureFormat::R16_G16_B16_A16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::RGBA16U}, |
| 65 | {TextureFormat::R16_G16_B16_A16, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::RGBA16F}, | 66 | {TextureFormat::R16_G16_B16_A16, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::RGBA16F}, |
| 66 | {TextureFormat::R16_G16_B16_A16, C, UINT, UINT, UINT, UINT, PixelFormat::RGBA16UI}, | 67 | {TextureFormat::R16_G16_B16_A16, C, UINT, UINT, UINT, UINT, PixelFormat::RGBA16UI}, |
| @@ -89,6 +90,7 @@ constexpr std::array<Table, 74> DefinitionTable = {{ | |||
| 89 | 90 | ||
| 90 | {TextureFormat::R32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R32F}, | 91 | {TextureFormat::R32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R32F}, |
| 91 | {TextureFormat::R32, C, UINT, UINT, UINT, UINT, PixelFormat::R32UI}, | 92 | {TextureFormat::R32, C, UINT, UINT, UINT, UINT, PixelFormat::R32UI}, |
| 93 | {TextureFormat::R32, C, SINT, SINT, SINT, SINT, PixelFormat::R32I}, | ||
| 92 | 94 | ||
| 93 | {TextureFormat::E5B9G9R9_SHAREDEXP, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::E5B9G9R9F}, | 95 | {TextureFormat::E5B9G9R9_SHAREDEXP, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::E5B9G9R9F}, |
| 94 | 96 | ||
diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp index 38b3a4ba8..9931c5ef7 100644 --- a/src/video_core/texture_cache/surface_params.cpp +++ b/src/video_core/texture_cache/surface_params.cpp | |||
| @@ -84,19 +84,16 @@ SurfaceParams SurfaceParams::CreateForTexture(const FormatLookupTable& lookup_ta | |||
| 84 | if (entry.IsShadow() && params.type == SurfaceType::ColorTexture) { | 84 | if (entry.IsShadow() && params.type == SurfaceType::ColorTexture) { |
| 85 | switch (params.pixel_format) { | 85 | switch (params.pixel_format) { |
| 86 | case PixelFormat::R16U: | 86 | case PixelFormat::R16U: |
| 87 | case PixelFormat::R16F: { | 87 | case PixelFormat::R16F: |
| 88 | params.pixel_format = PixelFormat::Z16; | 88 | params.pixel_format = PixelFormat::Z16; |
| 89 | break; | 89 | break; |
| 90 | } | 90 | case PixelFormat::R32F: |
| 91 | case PixelFormat::R32F: { | ||
| 92 | params.pixel_format = PixelFormat::Z32F; | 91 | params.pixel_format = PixelFormat::Z32F; |
| 93 | break; | 92 | break; |
| 94 | } | 93 | default: |
| 95 | default: { | ||
| 96 | UNIMPLEMENTED_MSG("Unimplemented shadow convert format: {}", | 94 | UNIMPLEMENTED_MSG("Unimplemented shadow convert format: {}", |
| 97 | static_cast<u32>(params.pixel_format)); | 95 | static_cast<u32>(params.pixel_format)); |
| 98 | } | 96 | } |
| 99 | } | ||
| 100 | params.type = GetFormatType(params.pixel_format); | 97 | params.type = GetFormatType(params.pixel_format); |
| 101 | } | 98 | } |
| 102 | params.type = GetFormatType(params.pixel_format); | 99 | params.type = GetFormatType(params.pixel_format); |
| @@ -116,8 +113,10 @@ SurfaceParams SurfaceParams::CreateForTexture(const FormatLookupTable& lookup_ta | |||
| 116 | params.height = tic.Height(); | 113 | params.height = tic.Height(); |
| 117 | params.depth = tic.Depth(); | 114 | params.depth = tic.Depth(); |
| 118 | params.pitch = params.is_tiled ? 0 : tic.Pitch(); | 115 | params.pitch = params.is_tiled ? 0 : tic.Pitch(); |
| 119 | if (params.target == SurfaceTarget::TextureCubemap || | 116 | if (params.target == SurfaceTarget::Texture2D && params.depth > 1) { |
| 120 | params.target == SurfaceTarget::TextureCubeArray) { | 117 | params.depth = 1; |
| 118 | } else if (params.target == SurfaceTarget::TextureCubemap || | ||
| 119 | params.target == SurfaceTarget::TextureCubeArray) { | ||
| 121 | params.depth *= 6; | 120 | params.depth *= 6; |
| 122 | } | 121 | } |
| 123 | params.num_levels = tic.max_mip_level + 1; | 122 | params.num_levels = tic.max_mip_level + 1; |
| @@ -168,27 +167,29 @@ SurfaceParams SurfaceParams::CreateForImage(const FormatLookupTable& lookup_tabl | |||
| 168 | return params; | 167 | return params; |
| 169 | } | 168 | } |
| 170 | 169 | ||
| 171 | SurfaceParams SurfaceParams::CreateForDepthBuffer( | 170 | SurfaceParams SurfaceParams::CreateForDepthBuffer(Core::System& system) { |
| 172 | Core::System& system, u32 zeta_width, u32 zeta_height, Tegra::DepthFormat format, | 171 | const auto& regs = system.GPU().Maxwell3D().regs; |
| 173 | u32 block_width, u32 block_height, u32 block_depth, | 172 | regs.zeta_width, regs.zeta_height, regs.zeta.format, regs.zeta.memory_layout.type; |
| 174 | Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout type) { | ||
| 175 | SurfaceParams params; | 173 | SurfaceParams params; |
| 176 | params.is_tiled = type == Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear; | 174 | params.is_tiled = regs.zeta.memory_layout.type == |
| 175 | Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear; | ||
| 177 | params.srgb_conversion = false; | 176 | params.srgb_conversion = false; |
| 178 | params.block_width = std::min(block_width, 5U); | 177 | params.block_width = std::min(regs.zeta.memory_layout.block_width.Value(), 5U); |
| 179 | params.block_height = std::min(block_height, 5U); | 178 | params.block_height = std::min(regs.zeta.memory_layout.block_height.Value(), 5U); |
| 180 | params.block_depth = std::min(block_depth, 5U); | 179 | params.block_depth = std::min(regs.zeta.memory_layout.block_depth.Value(), 5U); |
| 181 | params.tile_width_spacing = 1; | 180 | params.tile_width_spacing = 1; |
| 182 | params.pixel_format = PixelFormatFromDepthFormat(format); | 181 | params.pixel_format = PixelFormatFromDepthFormat(regs.zeta.format); |
| 183 | params.type = GetFormatType(params.pixel_format); | 182 | params.type = GetFormatType(params.pixel_format); |
| 184 | params.width = zeta_width; | 183 | params.width = regs.zeta_width; |
| 185 | params.height = zeta_height; | 184 | params.height = regs.zeta_height; |
| 186 | params.target = SurfaceTarget::Texture2D; | ||
| 187 | params.depth = 1; | ||
| 188 | params.pitch = 0; | 185 | params.pitch = 0; |
| 189 | params.num_levels = 1; | 186 | params.num_levels = 1; |
| 190 | params.emulated_levels = 1; | 187 | params.emulated_levels = 1; |
| 191 | params.is_layered = false; | 188 | |
| 189 | const bool is_layered = regs.zeta_layers > 1 && params.block_depth == 0; | ||
| 190 | params.is_layered = is_layered; | ||
| 191 | params.target = is_layered ? SurfaceTarget::Texture2DArray : SurfaceTarget::Texture2D; | ||
| 192 | params.depth = is_layered ? regs.zeta_layers.Value() : 1U; | ||
| 192 | return params; | 193 | return params; |
| 193 | } | 194 | } |
| 194 | 195 | ||
| @@ -214,11 +215,13 @@ SurfaceParams SurfaceParams::CreateForFramebuffer(Core::System& system, std::siz | |||
| 214 | params.width = params.pitch / bpp; | 215 | params.width = params.pitch / bpp; |
| 215 | } | 216 | } |
| 216 | params.height = config.height; | 217 | params.height = config.height; |
| 217 | params.depth = 1; | ||
| 218 | params.target = SurfaceTarget::Texture2D; | ||
| 219 | params.num_levels = 1; | 218 | params.num_levels = 1; |
| 220 | params.emulated_levels = 1; | 219 | params.emulated_levels = 1; |
| 221 | params.is_layered = false; | 220 | |
| 221 | const bool is_layered = config.layers > 1 && params.block_depth == 0; | ||
| 222 | params.is_layered = is_layered; | ||
| 223 | params.depth = is_layered ? config.layers.Value() : 1; | ||
| 224 | params.target = is_layered ? SurfaceTarget::Texture2DArray : SurfaceTarget::Texture2D; | ||
| 222 | return params; | 225 | return params; |
| 223 | } | 226 | } |
| 224 | 227 | ||
diff --git a/src/video_core/texture_cache/surface_params.h b/src/video_core/texture_cache/surface_params.h index 9256fd6d9..995cc3818 100644 --- a/src/video_core/texture_cache/surface_params.h +++ b/src/video_core/texture_cache/surface_params.h | |||
| @@ -35,10 +35,7 @@ public: | |||
| 35 | const VideoCommon::Shader::Image& entry); | 35 | const VideoCommon::Shader::Image& entry); |
| 36 | 36 | ||
| 37 | /// Creates SurfaceCachedParams for a depth buffer configuration. | 37 | /// Creates SurfaceCachedParams for a depth buffer configuration. |
| 38 | static SurfaceParams CreateForDepthBuffer( | 38 | static SurfaceParams CreateForDepthBuffer(Core::System& system); |
| 39 | Core::System& system, u32 zeta_width, u32 zeta_height, Tegra::DepthFormat format, | ||
| 40 | u32 block_width, u32 block_height, u32 block_depth, | ||
| 41 | Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout type); | ||
| 42 | 39 | ||
| 43 | /// Creates SurfaceCachedParams from a framebuffer configuration. | 40 | /// Creates SurfaceCachedParams from a framebuffer configuration. |
| 44 | static SurfaceParams CreateForFramebuffer(Core::System& system, std::size_t index); | 41 | static SurfaceParams CreateForFramebuffer(Core::System& system, std::size_t index); |
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 0d105d386..6cdbe63d0 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h | |||
| @@ -22,6 +22,7 @@ | |||
| 22 | #include "core/core.h" | 22 | #include "core/core.h" |
| 23 | #include "core/memory.h" | 23 | #include "core/memory.h" |
| 24 | #include "core/settings.h" | 24 | #include "core/settings.h" |
| 25 | #include "video_core/dirty_flags.h" | ||
| 25 | #include "video_core/engines/fermi_2d.h" | 26 | #include "video_core/engines/fermi_2d.h" |
| 26 | #include "video_core/engines/maxwell_3d.h" | 27 | #include "video_core/engines/maxwell_3d.h" |
| 27 | #include "video_core/gpu.h" | 28 | #include "video_core/gpu.h" |
| @@ -103,6 +104,11 @@ public: | |||
| 103 | if (!cache_addr) { | 104 | if (!cache_addr) { |
| 104 | return GetNullSurface(SurfaceParams::ExpectedTarget(entry)); | 105 | return GetNullSurface(SurfaceParams::ExpectedTarget(entry)); |
| 105 | } | 106 | } |
| 107 | |||
| 108 | if (!IsTypeCompatible(tic.texture_type, entry)) { | ||
| 109 | return GetNullSurface(SurfaceParams::ExpectedTarget(entry)); | ||
| 110 | } | ||
| 111 | |||
| 106 | const auto params{SurfaceParams::CreateForTexture(format_lookup_table, tic, entry)}; | 112 | const auto params{SurfaceParams::CreateForTexture(format_lookup_table, tic, entry)}; |
| 107 | const auto [surface, view] = GetSurface(gpu_addr, cache_addr, params, true, false); | 113 | const auto [surface, view] = GetSurface(gpu_addr, cache_addr, params, true, false); |
| 108 | if (guard_samplers) { | 114 | if (guard_samplers) { |
| @@ -142,11 +148,10 @@ public: | |||
| 142 | TView GetDepthBufferSurface(bool preserve_contents) { | 148 | TView GetDepthBufferSurface(bool preserve_contents) { |
| 143 | std::lock_guard lock{mutex}; | 149 | std::lock_guard lock{mutex}; |
| 144 | auto& maxwell3d = system.GPU().Maxwell3D(); | 150 | auto& maxwell3d = system.GPU().Maxwell3D(); |
| 145 | 151 | if (!maxwell3d.dirty.flags[VideoCommon::Dirty::ZetaBuffer]) { | |
| 146 | if (!maxwell3d.dirty.depth_buffer) { | ||
| 147 | return depth_buffer.view; | 152 | return depth_buffer.view; |
| 148 | } | 153 | } |
| 149 | maxwell3d.dirty.depth_buffer = false; | 154 | maxwell3d.dirty.flags[VideoCommon::Dirty::ZetaBuffer] = false; |
| 150 | 155 | ||
| 151 | const auto& regs{maxwell3d.regs}; | 156 | const auto& regs{maxwell3d.regs}; |
| 152 | const auto gpu_addr{regs.zeta.Address()}; | 157 | const auto gpu_addr{regs.zeta.Address()}; |
| @@ -160,10 +165,7 @@ public: | |||
| 160 | SetEmptyDepthBuffer(); | 165 | SetEmptyDepthBuffer(); |
| 161 | return {}; | 166 | return {}; |
| 162 | } | 167 | } |
| 163 | const auto depth_params{SurfaceParams::CreateForDepthBuffer( | 168 | const auto depth_params{SurfaceParams::CreateForDepthBuffer(system)}; |
| 164 | system, regs.zeta_width, regs.zeta_height, regs.zeta.format, | ||
| 165 | regs.zeta.memory_layout.block_width, regs.zeta.memory_layout.block_height, | ||
| 166 | regs.zeta.memory_layout.block_depth, regs.zeta.memory_layout.type)}; | ||
| 167 | auto surface_view = GetSurface(gpu_addr, cache_addr, depth_params, preserve_contents, true); | 169 | auto surface_view = GetSurface(gpu_addr, cache_addr, depth_params, preserve_contents, true); |
| 168 | if (depth_buffer.target) | 170 | if (depth_buffer.target) |
| 169 | depth_buffer.target->MarkAsRenderTarget(false, NO_RT); | 171 | depth_buffer.target->MarkAsRenderTarget(false, NO_RT); |
| @@ -178,10 +180,10 @@ public: | |||
| 178 | std::lock_guard lock{mutex}; | 180 | std::lock_guard lock{mutex}; |
| 179 | ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets); | 181 | ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets); |
| 180 | auto& maxwell3d = system.GPU().Maxwell3D(); | 182 | auto& maxwell3d = system.GPU().Maxwell3D(); |
| 181 | if (!maxwell3d.dirty.render_target[index]) { | 183 | if (!maxwell3d.dirty.flags[VideoCommon::Dirty::ColorBuffer0 + index]) { |
| 182 | return render_targets[index].view; | 184 | return render_targets[index].view; |
| 183 | } | 185 | } |
| 184 | maxwell3d.dirty.render_target[index] = false; | 186 | maxwell3d.dirty.flags[VideoCommon::Dirty::ColorBuffer0 + index] = false; |
| 185 | 187 | ||
| 186 | const auto& regs{maxwell3d.regs}; | 188 | const auto& regs{maxwell3d.regs}; |
| 187 | if (index >= regs.rt_control.count || regs.rt[index].Address() == 0 || | 189 | if (index >= regs.rt_control.count || regs.rt[index].Address() == 0 || |
| @@ -323,14 +325,14 @@ protected: | |||
| 323 | virtual void BufferCopy(TSurface& src_surface, TSurface& dst_surface) = 0; | 325 | virtual void BufferCopy(TSurface& src_surface, TSurface& dst_surface) = 0; |
| 324 | 326 | ||
| 325 | void ManageRenderTargetUnregister(TSurface& surface) { | 327 | void ManageRenderTargetUnregister(TSurface& surface) { |
| 326 | auto& maxwell3d = system.GPU().Maxwell3D(); | 328 | auto& dirty = system.GPU().Maxwell3D().dirty; |
| 327 | const u32 index = surface->GetRenderTarget(); | 329 | const u32 index = surface->GetRenderTarget(); |
| 328 | if (index == DEPTH_RT) { | 330 | if (index == DEPTH_RT) { |
| 329 | maxwell3d.dirty.depth_buffer = true; | 331 | dirty.flags[VideoCommon::Dirty::ZetaBuffer] = true; |
| 330 | } else { | 332 | } else { |
| 331 | maxwell3d.dirty.render_target[index] = true; | 333 | dirty.flags[VideoCommon::Dirty::ColorBuffer0 + index] = true; |
| 332 | } | 334 | } |
| 333 | maxwell3d.dirty.render_settings = true; | 335 | dirty.flags[VideoCommon::Dirty::RenderTargets] = true; |
| 334 | } | 336 | } |
| 335 | 337 | ||
| 336 | void Register(TSurface surface) { | 338 | void Register(TSurface surface) { |
| @@ -917,13 +919,15 @@ private: | |||
| 917 | params.width = 1; | 919 | params.width = 1; |
| 918 | params.height = 1; | 920 | params.height = 1; |
| 919 | params.depth = 1; | 921 | params.depth = 1; |
| 922 | if (target == SurfaceTarget::TextureCubemap || target == SurfaceTarget::TextureCubeArray) { | ||
| 923 | params.depth = 6; | ||
| 924 | } | ||
| 920 | params.pitch = 4; | 925 | params.pitch = 4; |
| 921 | params.num_levels = 1; | 926 | params.num_levels = 1; |
| 922 | params.emulated_levels = 1; | 927 | params.emulated_levels = 1; |
| 923 | params.pixel_format = VideoCore::Surface::PixelFormat::RGBA16F; | 928 | params.pixel_format = VideoCore::Surface::PixelFormat::R8U; |
| 924 | params.type = VideoCore::Surface::SurfaceType::ColorTexture; | 929 | params.type = VideoCore::Surface::SurfaceType::ColorTexture; |
| 925 | auto surface = CreateSurface(0ULL, params); | 930 | auto surface = CreateSurface(0ULL, params); |
| 926 | invalid_memory.clear(); | ||
| 927 | invalid_memory.resize(surface->GetHostSizeInBytes(), 0U); | 931 | invalid_memory.resize(surface->GetHostSizeInBytes(), 0U); |
| 928 | surface->UploadTexture(invalid_memory); | 932 | surface->UploadTexture(invalid_memory); |
| 929 | surface->MarkAsModified(false, Tick()); | 933 | surface->MarkAsModified(false, Tick()); |
| @@ -1085,6 +1089,36 @@ private: | |||
| 1085 | return siblings_table[static_cast<std::size_t>(format)]; | 1089 | return siblings_table[static_cast<std::size_t>(format)]; |
| 1086 | } | 1090 | } |
| 1087 | 1091 | ||
| 1092 | /// Returns true the shader sampler entry is compatible with the TIC texture type. | ||
| 1093 | static bool IsTypeCompatible(Tegra::Texture::TextureType tic_type, | ||
| 1094 | const VideoCommon::Shader::Sampler& entry) { | ||
| 1095 | const auto shader_type = entry.GetType(); | ||
| 1096 | switch (tic_type) { | ||
| 1097 | case Tegra::Texture::TextureType::Texture1D: | ||
| 1098 | case Tegra::Texture::TextureType::Texture1DArray: | ||
| 1099 | return shader_type == Tegra::Shader::TextureType::Texture1D; | ||
| 1100 | case Tegra::Texture::TextureType::Texture1DBuffer: | ||
| 1101 | // TODO(Rodrigo): Assume as valid for now | ||
| 1102 | return true; | ||
| 1103 | case Tegra::Texture::TextureType::Texture2D: | ||
| 1104 | case Tegra::Texture::TextureType::Texture2DNoMipmap: | ||
| 1105 | return shader_type == Tegra::Shader::TextureType::Texture2D; | ||
| 1106 | case Tegra::Texture::TextureType::Texture2DArray: | ||
| 1107 | return shader_type == Tegra::Shader::TextureType::Texture2D || | ||
| 1108 | shader_type == Tegra::Shader::TextureType::TextureCube; | ||
| 1109 | case Tegra::Texture::TextureType::Texture3D: | ||
| 1110 | return shader_type == Tegra::Shader::TextureType::Texture3D; | ||
| 1111 | case Tegra::Texture::TextureType::TextureCubeArray: | ||
| 1112 | case Tegra::Texture::TextureType::TextureCubemap: | ||
| 1113 | if (shader_type == Tegra::Shader::TextureType::TextureCube) { | ||
| 1114 | return true; | ||
| 1115 | } | ||
| 1116 | return shader_type == Tegra::Shader::TextureType::Texture2D && entry.IsArray(); | ||
| 1117 | } | ||
| 1118 | UNREACHABLE(); | ||
| 1119 | return true; | ||
| 1120 | } | ||
| 1121 | |||
| 1088 | struct FramebufferTargetInfo { | 1122 | struct FramebufferTargetInfo { |
| 1089 | TSurface target; | 1123 | TSurface target; |
| 1090 | TView view; | 1124 | TView view; |
diff --git a/src/video_core/textures/astc.cpp b/src/video_core/textures/astc.cpp index 33bd31865..062b4f252 100644 --- a/src/video_core/textures/astc.cpp +++ b/src/video_core/textures/astc.cpp | |||
| @@ -17,26 +17,37 @@ | |||
| 17 | 17 | ||
| 18 | #include <algorithm> | 18 | #include <algorithm> |
| 19 | #include <cassert> | 19 | #include <cassert> |
| 20 | #include <cstdint> | ||
| 21 | #include <cstring> | 20 | #include <cstring> |
| 22 | #include <vector> | 21 | #include <vector> |
| 23 | 22 | ||
| 23 | #include "common/common_types.h" | ||
| 24 | |||
| 24 | #include "video_core/textures/astc.h" | 25 | #include "video_core/textures/astc.h" |
| 25 | 26 | ||
| 27 | namespace { | ||
| 28 | |||
| 29 | /// Count the number of bits set in a number. | ||
| 30 | constexpr u32 Popcnt(u32 n) { | ||
| 31 | u32 c = 0; | ||
| 32 | for (; n; c++) { | ||
| 33 | n &= n - 1; | ||
| 34 | } | ||
| 35 | return c; | ||
| 36 | } | ||
| 37 | |||
| 38 | } // Anonymous namespace | ||
| 39 | |||
| 26 | class InputBitStream { | 40 | class InputBitStream { |
| 27 | public: | 41 | public: |
| 28 | explicit InputBitStream(const unsigned char* ptr, int start_offset = 0) | 42 | explicit InputBitStream(const u8* ptr, std::size_t start_offset = 0) |
| 29 | : m_CurByte(ptr), m_NextBit(start_offset % 8) {} | 43 | : m_CurByte(ptr), m_NextBit(start_offset % 8) {} |
| 30 | 44 | ||
| 31 | ~InputBitStream() = default; | 45 | std::size_t GetBitsRead() const { |
| 32 | |||
| 33 | int GetBitsRead() const { | ||
| 34 | return m_BitsRead; | 46 | return m_BitsRead; |
| 35 | } | 47 | } |
| 36 | 48 | ||
| 37 | int ReadBit() { | 49 | u32 ReadBit() { |
| 38 | 50 | u32 bit = *m_CurByte >> m_NextBit++; | |
| 39 | int bit = *m_CurByte >> m_NextBit++; | ||
| 40 | while (m_NextBit >= 8) { | 51 | while (m_NextBit >= 8) { |
| 41 | m_NextBit -= 8; | 52 | m_NextBit -= 8; |
| 42 | m_CurByte++; | 53 | m_CurByte++; |
| @@ -46,57 +57,66 @@ public: | |||
| 46 | return bit & 1; | 57 | return bit & 1; |
| 47 | } | 58 | } |
| 48 | 59 | ||
| 49 | unsigned int ReadBits(unsigned int nBits) { | 60 | u32 ReadBits(std::size_t nBits) { |
| 50 | unsigned int ret = 0; | 61 | u32 ret = 0; |
| 51 | for (unsigned int i = 0; i < nBits; i++) { | 62 | for (std::size_t i = 0; i < nBits; ++i) { |
| 63 | ret |= (ReadBit() & 1) << i; | ||
| 64 | } | ||
| 65 | return ret; | ||
| 66 | } | ||
| 67 | |||
| 68 | template <std::size_t nBits> | ||
| 69 | u32 ReadBits() { | ||
| 70 | u32 ret = 0; | ||
| 71 | for (std::size_t i = 0; i < nBits; ++i) { | ||
| 52 | ret |= (ReadBit() & 1) << i; | 72 | ret |= (ReadBit() & 1) << i; |
| 53 | } | 73 | } |
| 54 | return ret; | 74 | return ret; |
| 55 | } | 75 | } |
| 56 | 76 | ||
| 57 | private: | 77 | private: |
| 58 | const unsigned char* m_CurByte; | 78 | const u8* m_CurByte; |
| 59 | int m_NextBit = 0; | 79 | std::size_t m_NextBit = 0; |
| 60 | int m_BitsRead = 0; | 80 | std::size_t m_BitsRead = 0; |
| 61 | }; | 81 | }; |
| 62 | 82 | ||
| 63 | class OutputBitStream { | 83 | class OutputBitStream { |
| 64 | public: | 84 | public: |
| 65 | explicit OutputBitStream(unsigned char* ptr, int nBits = 0, int start_offset = 0) | 85 | explicit OutputBitStream(u8* ptr, s32 nBits = 0, s32 start_offset = 0) |
| 66 | : m_NumBits(nBits), m_CurByte(ptr), m_NextBit(start_offset % 8) {} | 86 | : m_NumBits(nBits), m_CurByte(ptr), m_NextBit(start_offset % 8) {} |
| 67 | 87 | ||
| 68 | ~OutputBitStream() = default; | 88 | ~OutputBitStream() = default; |
| 69 | 89 | ||
| 70 | int GetBitsWritten() const { | 90 | s32 GetBitsWritten() const { |
| 71 | return m_BitsWritten; | 91 | return m_BitsWritten; |
| 72 | } | 92 | } |
| 73 | 93 | ||
| 74 | void WriteBitsR(unsigned int val, unsigned int nBits) { | 94 | void WriteBitsR(u32 val, u32 nBits) { |
| 75 | for (unsigned int i = 0; i < nBits; i++) { | 95 | for (u32 i = 0; i < nBits; i++) { |
| 76 | WriteBit((val >> (nBits - i - 1)) & 1); | 96 | WriteBit((val >> (nBits - i - 1)) & 1); |
| 77 | } | 97 | } |
| 78 | } | 98 | } |
| 79 | 99 | ||
| 80 | void WriteBits(unsigned int val, unsigned int nBits) { | 100 | void WriteBits(u32 val, u32 nBits) { |
| 81 | for (unsigned int i = 0; i < nBits; i++) { | 101 | for (u32 i = 0; i < nBits; i++) { |
| 82 | WriteBit((val >> i) & 1); | 102 | WriteBit((val >> i) & 1); |
| 83 | } | 103 | } |
| 84 | } | 104 | } |
| 85 | 105 | ||
| 86 | private: | 106 | private: |
| 87 | void WriteBit(int b) { | 107 | void WriteBit(s32 b) { |
| 88 | 108 | ||
| 89 | if (done) | 109 | if (done) |
| 90 | return; | 110 | return; |
| 91 | 111 | ||
| 92 | const unsigned int mask = 1 << m_NextBit++; | 112 | const u32 mask = 1 << m_NextBit++; |
| 93 | 113 | ||
| 94 | // clear the bit | 114 | // clear the bit |
| 95 | *m_CurByte &= static_cast<unsigned char>(~mask); | 115 | *m_CurByte &= static_cast<u8>(~mask); |
| 96 | 116 | ||
| 97 | // Write the bit, if necessary | 117 | // Write the bit, if necessary |
| 98 | if (b) | 118 | if (b) |
| 99 | *m_CurByte |= static_cast<unsigned char>(mask); | 119 | *m_CurByte |= static_cast<u8>(mask); |
| 100 | 120 | ||
| 101 | // Next byte? | 121 | // Next byte? |
| 102 | if (m_NextBit >= 8) { | 122 | if (m_NextBit >= 8) { |
| @@ -107,10 +127,10 @@ private: | |||
| 107 | done = done || ++m_BitsWritten >= m_NumBits; | 127 | done = done || ++m_BitsWritten >= m_NumBits; |
| 108 | } | 128 | } |
| 109 | 129 | ||
| 110 | int m_BitsWritten = 0; | 130 | s32 m_BitsWritten = 0; |
| 111 | const int m_NumBits; | 131 | const s32 m_NumBits; |
| 112 | unsigned char* m_CurByte; | 132 | u8* m_CurByte; |
| 113 | int m_NextBit = 0; | 133 | s32 m_NextBit = 0; |
| 114 | 134 | ||
| 115 | bool done = false; | 135 | bool done = false; |
| 116 | }; | 136 | }; |
| @@ -123,20 +143,20 @@ public: | |||
| 123 | Bits(const Bits&) = delete; | 143 | Bits(const Bits&) = delete; |
| 124 | Bits& operator=(const Bits&) = delete; | 144 | Bits& operator=(const Bits&) = delete; |
| 125 | 145 | ||
| 126 | uint8_t operator[](uint32_t bitPos) const { | 146 | u8 operator[](u32 bitPos) const { |
| 127 | return static_cast<uint8_t>((m_Bits >> bitPos) & 1); | 147 | return static_cast<u8>((m_Bits >> bitPos) & 1); |
| 128 | } | 148 | } |
| 129 | 149 | ||
| 130 | IntType operator()(uint32_t start, uint32_t end) const { | 150 | IntType operator()(u32 start, u32 end) const { |
| 131 | if (start == end) { | 151 | if (start == end) { |
| 132 | return (*this)[start]; | 152 | return (*this)[start]; |
| 133 | } else if (start > end) { | 153 | } else if (start > end) { |
| 134 | uint32_t t = start; | 154 | u32 t = start; |
| 135 | start = end; | 155 | start = end; |
| 136 | end = t; | 156 | end = t; |
| 137 | } | 157 | } |
| 138 | 158 | ||
| 139 | uint64_t mask = (1 << (end - start + 1)) - 1; | 159 | u64 mask = (1 << (end - start + 1)) - 1; |
| 140 | return (m_Bits >> start) & static_cast<IntType>(mask); | 160 | return (m_Bits >> start) & static_cast<IntType>(mask); |
| 141 | } | 161 | } |
| 142 | 162 | ||
| @@ -144,273 +164,236 @@ private: | |||
| 144 | const IntType& m_Bits; | 164 | const IntType& m_Bits; |
| 145 | }; | 165 | }; |
| 146 | 166 | ||
| 147 | enum EIntegerEncoding { eIntegerEncoding_JustBits, eIntegerEncoding_Quint, eIntegerEncoding_Trit }; | 167 | enum class IntegerEncoding { JustBits, Qus32, Trit }; |
| 148 | |||
| 149 | class IntegerEncodedValue { | ||
| 150 | private: | ||
| 151 | const EIntegerEncoding m_Encoding; | ||
| 152 | const uint32_t m_NumBits; | ||
| 153 | uint32_t m_BitValue; | ||
| 154 | union { | ||
| 155 | uint32_t m_QuintValue; | ||
| 156 | uint32_t m_TritValue; | ||
| 157 | }; | ||
| 158 | 168 | ||
| 159 | public: | 169 | struct IntegerEncodedValue { |
| 160 | // Jank, but we're not doing any heavy lifting in this class, so it's | 170 | constexpr IntegerEncodedValue() = default; |
| 161 | // probably OK. It allows us to use these in std::vectors... | ||
| 162 | IntegerEncodedValue& operator=(const IntegerEncodedValue& other) { | ||
| 163 | new (this) IntegerEncodedValue(other); | ||
| 164 | return *this; | ||
| 165 | } | ||
| 166 | 171 | ||
| 167 | IntegerEncodedValue(EIntegerEncoding encoding, uint32_t numBits) | 172 | constexpr IntegerEncodedValue(IntegerEncoding encoding_, u32 num_bits_) |
| 168 | : m_Encoding(encoding), m_NumBits(numBits) {} | 173 | : encoding{encoding_}, num_bits{num_bits_} {} |
| 169 | 174 | ||
| 170 | EIntegerEncoding GetEncoding() const { | 175 | constexpr bool MatchesEncoding(const IntegerEncodedValue& other) const { |
| 171 | return m_Encoding; | 176 | return encoding == other.encoding && num_bits == other.num_bits; |
| 172 | } | ||
| 173 | uint32_t BaseBitLength() const { | ||
| 174 | return m_NumBits; | ||
| 175 | } | ||
| 176 | |||
| 177 | uint32_t GetBitValue() const { | ||
| 178 | return m_BitValue; | ||
| 179 | } | ||
| 180 | void SetBitValue(uint32_t val) { | ||
| 181 | m_BitValue = val; | ||
| 182 | } | ||
| 183 | |||
| 184 | uint32_t GetTritValue() const { | ||
| 185 | return m_TritValue; | ||
| 186 | } | ||
| 187 | void SetTritValue(uint32_t val) { | ||
| 188 | m_TritValue = val; | ||
| 189 | } | ||
| 190 | |||
| 191 | uint32_t GetQuintValue() const { | ||
| 192 | return m_QuintValue; | ||
| 193 | } | ||
| 194 | void SetQuintValue(uint32_t val) { | ||
| 195 | m_QuintValue = val; | ||
| 196 | } | ||
| 197 | |||
| 198 | bool MatchesEncoding(const IntegerEncodedValue& other) const { | ||
| 199 | return m_Encoding == other.m_Encoding && m_NumBits == other.m_NumBits; | ||
| 200 | } | 177 | } |
| 201 | 178 | ||
| 202 | // Returns the number of bits required to encode nVals values. | 179 | // Returns the number of bits required to encode nVals values. |
| 203 | uint32_t GetBitLength(uint32_t nVals) const { | 180 | u32 GetBitLength(u32 nVals) const { |
| 204 | uint32_t totalBits = m_NumBits * nVals; | 181 | u32 totalBits = num_bits * nVals; |
| 205 | if (m_Encoding == eIntegerEncoding_Trit) { | 182 | if (encoding == IntegerEncoding::Trit) { |
| 206 | totalBits += (nVals * 8 + 4) / 5; | 183 | totalBits += (nVals * 8 + 4) / 5; |
| 207 | } else if (m_Encoding == eIntegerEncoding_Quint) { | 184 | } else if (encoding == IntegerEncoding::Qus32) { |
| 208 | totalBits += (nVals * 7 + 2) / 3; | 185 | totalBits += (nVals * 7 + 2) / 3; |
| 209 | } | 186 | } |
| 210 | return totalBits; | 187 | return totalBits; |
| 211 | } | 188 | } |
| 212 | 189 | ||
| 213 | // Count the number of bits set in a number. | 190 | IntegerEncoding encoding{}; |
| 214 | static inline uint32_t Popcnt(uint32_t n) { | 191 | u32 num_bits = 0; |
| 215 | uint32_t c; | 192 | u32 bit_value = 0; |
| 216 | for (c = 0; n; c++) { | 193 | union { |
| 217 | n &= n - 1; | 194 | u32 qus32_value = 0; |
| 195 | u32 trit_value; | ||
| 196 | }; | ||
| 197 | }; | ||
| 198 | |||
| 199 | static void DecodeTritBlock(InputBitStream& bits, std::vector<IntegerEncodedValue>& result, | ||
| 200 | u32 nBitsPerValue) { | ||
| 201 | // Implement the algorithm in section C.2.12 | ||
| 202 | u32 m[5]; | ||
| 203 | u32 t[5]; | ||
| 204 | u32 T; | ||
| 205 | |||
| 206 | // Read the trit encoded block according to | ||
| 207 | // table C.2.14 | ||
| 208 | m[0] = bits.ReadBits(nBitsPerValue); | ||
| 209 | T = bits.ReadBits<2>(); | ||
| 210 | m[1] = bits.ReadBits(nBitsPerValue); | ||
| 211 | T |= bits.ReadBits<2>() << 2; | ||
| 212 | m[2] = bits.ReadBits(nBitsPerValue); | ||
| 213 | T |= bits.ReadBit() << 4; | ||
| 214 | m[3] = bits.ReadBits(nBitsPerValue); | ||
| 215 | T |= bits.ReadBits<2>() << 5; | ||
| 216 | m[4] = bits.ReadBits(nBitsPerValue); | ||
| 217 | T |= bits.ReadBit() << 7; | ||
| 218 | |||
| 219 | u32 C = 0; | ||
| 220 | |||
| 221 | Bits<u32> Tb(T); | ||
| 222 | if (Tb(2, 4) == 7) { | ||
| 223 | C = (Tb(5, 7) << 2) | Tb(0, 1); | ||
| 224 | t[4] = t[3] = 2; | ||
| 225 | } else { | ||
| 226 | C = Tb(0, 4); | ||
| 227 | if (Tb(5, 6) == 3) { | ||
| 228 | t[4] = 2; | ||
| 229 | t[3] = Tb[7]; | ||
| 230 | } else { | ||
| 231 | t[4] = Tb[7]; | ||
| 232 | t[3] = Tb(5, 6); | ||
| 218 | } | 233 | } |
| 219 | return c; | ||
| 220 | } | 234 | } |
| 221 | 235 | ||
| 222 | // Returns a new instance of this struct that corresponds to the | 236 | Bits<u32> Cb(C); |
| 223 | // can take no more than maxval values | 237 | if (Cb(0, 1) == 3) { |
| 224 | static IntegerEncodedValue CreateEncoding(uint32_t maxVal) { | 238 | t[2] = 2; |
| 225 | while (maxVal > 0) { | 239 | t[1] = Cb[4]; |
| 226 | uint32_t check = maxVal + 1; | 240 | t[0] = (Cb[3] << 1) | (Cb[2] & ~Cb[3]); |
| 227 | 241 | } else if (Cb(2, 3) == 3) { | |
| 228 | // Is maxVal a power of two? | 242 | t[2] = 2; |
| 229 | if (!(check & (check - 1))) { | 243 | t[1] = 2; |
| 230 | return IntegerEncodedValue(eIntegerEncoding_JustBits, Popcnt(maxVal)); | 244 | t[0] = Cb(0, 1); |
| 231 | } | 245 | } else { |
| 232 | 246 | t[2] = Cb[4]; | |
| 233 | // Is maxVal of the type 3*2^n - 1? | 247 | t[1] = Cb(2, 3); |
| 234 | if ((check % 3 == 0) && !((check / 3) & ((check / 3) - 1))) { | 248 | t[0] = (Cb[1] << 1) | (Cb[0] & ~Cb[1]); |
| 235 | return IntegerEncodedValue(eIntegerEncoding_Trit, Popcnt(check / 3 - 1)); | 249 | } |
| 236 | } | ||
| 237 | 250 | ||
| 238 | // Is maxVal of the type 5*2^n - 1? | 251 | for (std::size_t i = 0; i < 5; ++i) { |
| 239 | if ((check % 5 == 0) && !((check / 5) & ((check / 5) - 1))) { | 252 | IntegerEncodedValue& val = result.emplace_back(IntegerEncoding::Trit, nBitsPerValue); |
| 240 | return IntegerEncodedValue(eIntegerEncoding_Quint, Popcnt(check / 5 - 1)); | 253 | val.bit_value = m[i]; |
| 241 | } | 254 | val.trit_value = t[i]; |
| 255 | } | ||
| 256 | } | ||
| 242 | 257 | ||
| 243 | // Apparently it can't be represented with a bounded integer sequence... | 258 | static void DecodeQus32Block(InputBitStream& bits, std::vector<IntegerEncodedValue>& result, |
| 244 | // just iterate. | 259 | u32 nBitsPerValue) { |
| 245 | maxVal--; | 260 | // Implement the algorithm in section C.2.12 |
| 261 | u32 m[3]; | ||
| 262 | u32 q[3]; | ||
| 263 | u32 Q; | ||
| 264 | |||
| 265 | // Read the trit encoded block according to | ||
| 266 | // table C.2.15 | ||
| 267 | m[0] = bits.ReadBits(nBitsPerValue); | ||
| 268 | Q = bits.ReadBits<3>(); | ||
| 269 | m[1] = bits.ReadBits(nBitsPerValue); | ||
| 270 | Q |= bits.ReadBits<2>() << 3; | ||
| 271 | m[2] = bits.ReadBits(nBitsPerValue); | ||
| 272 | Q |= bits.ReadBits<2>() << 5; | ||
| 273 | |||
| 274 | Bits<u32> Qb(Q); | ||
| 275 | if (Qb(1, 2) == 3 && Qb(5, 6) == 0) { | ||
| 276 | q[0] = q[1] = 4; | ||
| 277 | q[2] = (Qb[0] << 2) | ((Qb[4] & ~Qb[0]) << 1) | (Qb[3] & ~Qb[0]); | ||
| 278 | } else { | ||
| 279 | u32 C = 0; | ||
| 280 | if (Qb(1, 2) == 3) { | ||
| 281 | q[2] = 4; | ||
| 282 | C = (Qb(3, 4) << 3) | ((~Qb(5, 6) & 3) << 1) | Qb[0]; | ||
| 283 | } else { | ||
| 284 | q[2] = Qb(5, 6); | ||
| 285 | C = Qb(0, 4); | ||
| 246 | } | 286 | } |
| 247 | return IntegerEncodedValue(eIntegerEncoding_JustBits, 0); | ||
| 248 | } | ||
| 249 | |||
| 250 | // Fills result with the values that are encoded in the given | ||
| 251 | // bitstream. We must know beforehand what the maximum possible | ||
| 252 | // value is, and how many values we're decoding. | ||
| 253 | static void DecodeIntegerSequence(std::vector<IntegerEncodedValue>& result, | ||
| 254 | InputBitStream& bits, uint32_t maxRange, uint32_t nValues) { | ||
| 255 | // Determine encoding parameters | ||
| 256 | IntegerEncodedValue val = IntegerEncodedValue::CreateEncoding(maxRange); | ||
| 257 | |||
| 258 | // Start decoding | ||
| 259 | uint32_t nValsDecoded = 0; | ||
| 260 | while (nValsDecoded < nValues) { | ||
| 261 | switch (val.GetEncoding()) { | ||
| 262 | case eIntegerEncoding_Quint: | ||
| 263 | DecodeQuintBlock(bits, result, val.BaseBitLength()); | ||
| 264 | nValsDecoded += 3; | ||
| 265 | break; | ||
| 266 | 287 | ||
| 267 | case eIntegerEncoding_Trit: | 288 | Bits<u32> Cb(C); |
| 268 | DecodeTritBlock(bits, result, val.BaseBitLength()); | 289 | if (Cb(0, 2) == 5) { |
| 269 | nValsDecoded += 5; | 290 | q[1] = 4; |
| 270 | break; | 291 | q[0] = Cb(3, 4); |
| 271 | 292 | } else { | |
| 272 | case eIntegerEncoding_JustBits: | 293 | q[1] = Cb(3, 4); |
| 273 | val.SetBitValue(bits.ReadBits(val.BaseBitLength())); | 294 | q[0] = Cb(0, 2); |
| 274 | result.push_back(val); | ||
| 275 | nValsDecoded++; | ||
| 276 | break; | ||
| 277 | } | ||
| 278 | } | 295 | } |
| 279 | } | 296 | } |
| 280 | 297 | ||
| 281 | private: | 298 | for (std::size_t i = 0; i < 3; ++i) { |
| 282 | static void DecodeTritBlock(InputBitStream& bits, std::vector<IntegerEncodedValue>& result, | 299 | IntegerEncodedValue& val = result.emplace_back(IntegerEncoding::Qus32, nBitsPerValue); |
| 283 | uint32_t nBitsPerValue) { | 300 | val.bit_value = m[i]; |
| 284 | // Implement the algorithm in section C.2.12 | 301 | val.qus32_value = q[i]; |
| 285 | uint32_t m[5]; | 302 | } |
| 286 | uint32_t t[5]; | 303 | } |
| 287 | uint32_t T; | 304 | |
| 288 | 305 | // Returns a new instance of this struct that corresponds to the | |
| 289 | // Read the trit encoded block according to | 306 | // can take no more than maxval values |
| 290 | // table C.2.14 | 307 | static constexpr IntegerEncodedValue CreateEncoding(u32 maxVal) { |
| 291 | m[0] = bits.ReadBits(nBitsPerValue); | 308 | while (maxVal > 0) { |
| 292 | T = bits.ReadBits(2); | 309 | u32 check = maxVal + 1; |
| 293 | m[1] = bits.ReadBits(nBitsPerValue); | 310 | |
| 294 | T |= bits.ReadBits(2) << 2; | 311 | // Is maxVal a power of two? |
| 295 | m[2] = bits.ReadBits(nBitsPerValue); | 312 | if (!(check & (check - 1))) { |
| 296 | T |= bits.ReadBit() << 4; | 313 | return IntegerEncodedValue(IntegerEncoding::JustBits, Popcnt(maxVal)); |
| 297 | m[3] = bits.ReadBits(nBitsPerValue); | ||
| 298 | T |= bits.ReadBits(2) << 5; | ||
| 299 | m[4] = bits.ReadBits(nBitsPerValue); | ||
| 300 | T |= bits.ReadBit() << 7; | ||
| 301 | |||
| 302 | uint32_t C = 0; | ||
| 303 | |||
| 304 | Bits<uint32_t> Tb(T); | ||
| 305 | if (Tb(2, 4) == 7) { | ||
| 306 | C = (Tb(5, 7) << 2) | Tb(0, 1); | ||
| 307 | t[4] = t[3] = 2; | ||
| 308 | } else { | ||
| 309 | C = Tb(0, 4); | ||
| 310 | if (Tb(5, 6) == 3) { | ||
| 311 | t[4] = 2; | ||
| 312 | t[3] = Tb[7]; | ||
| 313 | } else { | ||
| 314 | t[4] = Tb[7]; | ||
| 315 | t[3] = Tb(5, 6); | ||
| 316 | } | ||
| 317 | } | 314 | } |
| 318 | 315 | ||
| 319 | Bits<uint32_t> Cb(C); | 316 | // Is maxVal of the type 3*2^n - 1? |
| 320 | if (Cb(0, 1) == 3) { | 317 | if ((check % 3 == 0) && !((check / 3) & ((check / 3) - 1))) { |
| 321 | t[2] = 2; | 318 | return IntegerEncodedValue(IntegerEncoding::Trit, Popcnt(check / 3 - 1)); |
| 322 | t[1] = Cb[4]; | ||
| 323 | t[0] = (Cb[3] << 1) | (Cb[2] & ~Cb[3]); | ||
| 324 | } else if (Cb(2, 3) == 3) { | ||
| 325 | t[2] = 2; | ||
| 326 | t[1] = 2; | ||
| 327 | t[0] = Cb(0, 1); | ||
| 328 | } else { | ||
| 329 | t[2] = Cb[4]; | ||
| 330 | t[1] = Cb(2, 3); | ||
| 331 | t[0] = (Cb[1] << 1) | (Cb[0] & ~Cb[1]); | ||
| 332 | } | 319 | } |
| 333 | 320 | ||
| 334 | for (uint32_t i = 0; i < 5; i++) { | 321 | // Is maxVal of the type 5*2^n - 1? |
| 335 | IntegerEncodedValue val(eIntegerEncoding_Trit, nBitsPerValue); | 322 | if ((check % 5 == 0) && !((check / 5) & ((check / 5) - 1))) { |
| 336 | val.SetBitValue(m[i]); | 323 | return IntegerEncodedValue(IntegerEncoding::Qus32, Popcnt(check / 5 - 1)); |
| 337 | val.SetTritValue(t[i]); | ||
| 338 | result.push_back(val); | ||
| 339 | } | 324 | } |
| 325 | |||
| 326 | // Apparently it can't be represented with a bounded integer sequence... | ||
| 327 | // just iterate. | ||
| 328 | maxVal--; | ||
| 340 | } | 329 | } |
| 330 | return IntegerEncodedValue(IntegerEncoding::JustBits, 0); | ||
| 331 | } | ||
| 341 | 332 | ||
| 342 | static void DecodeQuintBlock(InputBitStream& bits, std::vector<IntegerEncodedValue>& result, | 333 | static constexpr std::array<IntegerEncodedValue, 256> MakeEncodedValues() { |
| 343 | uint32_t nBitsPerValue) { | 334 | std::array<IntegerEncodedValue, 256> encodings{}; |
| 344 | // Implement the algorithm in section C.2.12 | 335 | for (std::size_t i = 0; i < encodings.size(); ++i) { |
| 345 | uint32_t m[3]; | 336 | encodings[i] = CreateEncoding(static_cast<u32>(i)); |
| 346 | uint32_t q[3]; | 337 | } |
| 347 | uint32_t Q; | 338 | return encodings; |
| 348 | 339 | } | |
| 349 | // Read the trit encoded block according to | ||
| 350 | // table C.2.15 | ||
| 351 | m[0] = bits.ReadBits(nBitsPerValue); | ||
| 352 | Q = bits.ReadBits(3); | ||
| 353 | m[1] = bits.ReadBits(nBitsPerValue); | ||
| 354 | Q |= bits.ReadBits(2) << 3; | ||
| 355 | m[2] = bits.ReadBits(nBitsPerValue); | ||
| 356 | Q |= bits.ReadBits(2) << 5; | ||
| 357 | |||
| 358 | Bits<uint32_t> Qb(Q); | ||
| 359 | if (Qb(1, 2) == 3 && Qb(5, 6) == 0) { | ||
| 360 | q[0] = q[1] = 4; | ||
| 361 | q[2] = (Qb[0] << 2) | ((Qb[4] & ~Qb[0]) << 1) | (Qb[3] & ~Qb[0]); | ||
| 362 | } else { | ||
| 363 | uint32_t C = 0; | ||
| 364 | if (Qb(1, 2) == 3) { | ||
| 365 | q[2] = 4; | ||
| 366 | C = (Qb(3, 4) << 3) | ((~Qb(5, 6) & 3) << 1) | Qb[0]; | ||
| 367 | } else { | ||
| 368 | q[2] = Qb(5, 6); | ||
| 369 | C = Qb(0, 4); | ||
| 370 | } | ||
| 371 | 340 | ||
| 372 | Bits<uint32_t> Cb(C); | 341 | static constexpr std::array EncodingsValues = MakeEncodedValues(); |
| 373 | if (Cb(0, 2) == 5) { | 342 | |
| 374 | q[1] = 4; | 343 | // Fills result with the values that are encoded in the given |
| 375 | q[0] = Cb(3, 4); | 344 | // bitstream. We must know beforehand what the maximum possible |
| 376 | } else { | 345 | // value is, and how many values we're decoding. |
| 377 | q[1] = Cb(3, 4); | 346 | static void DecodeIntegerSequence(std::vector<IntegerEncodedValue>& result, InputBitStream& bits, |
| 378 | q[0] = Cb(0, 2); | 347 | u32 maxRange, u32 nValues) { |
| 379 | } | 348 | // Determine encoding parameters |
| 380 | } | 349 | IntegerEncodedValue val = EncodingsValues[maxRange]; |
| 350 | |||
| 351 | // Start decoding | ||
| 352 | u32 nValsDecoded = 0; | ||
| 353 | while (nValsDecoded < nValues) { | ||
| 354 | switch (val.encoding) { | ||
| 355 | case IntegerEncoding::Qus32: | ||
| 356 | DecodeQus32Block(bits, result, val.num_bits); | ||
| 357 | nValsDecoded += 3; | ||
| 358 | break; | ||
| 359 | |||
| 360 | case IntegerEncoding::Trit: | ||
| 361 | DecodeTritBlock(bits, result, val.num_bits); | ||
| 362 | nValsDecoded += 5; | ||
| 363 | break; | ||
| 381 | 364 | ||
| 382 | for (uint32_t i = 0; i < 3; i++) { | 365 | case IntegerEncoding::JustBits: |
| 383 | IntegerEncodedValue val(eIntegerEncoding_Quint, nBitsPerValue); | 366 | val.bit_value = bits.ReadBits(val.num_bits); |
| 384 | val.m_BitValue = m[i]; | ||
| 385 | val.m_QuintValue = q[i]; | ||
| 386 | result.push_back(val); | 367 | result.push_back(val); |
| 368 | nValsDecoded++; | ||
| 369 | break; | ||
| 387 | } | 370 | } |
| 388 | } | 371 | } |
| 389 | }; | 372 | } |
| 390 | 373 | ||
| 391 | namespace ASTCC { | 374 | namespace ASTCC { |
| 392 | 375 | ||
| 393 | struct TexelWeightParams { | 376 | struct TexelWeightParams { |
| 394 | uint32_t m_Width = 0; | 377 | u32 m_Width = 0; |
| 395 | uint32_t m_Height = 0; | 378 | u32 m_Height = 0; |
| 396 | bool m_bDualPlane = false; | 379 | bool m_bDualPlane = false; |
| 397 | uint32_t m_MaxWeight = 0; | 380 | u32 m_MaxWeight = 0; |
| 398 | bool m_bError = false; | 381 | bool m_bError = false; |
| 399 | bool m_bVoidExtentLDR = false; | 382 | bool m_bVoidExtentLDR = false; |
| 400 | bool m_bVoidExtentHDR = false; | 383 | bool m_bVoidExtentHDR = false; |
| 401 | 384 | ||
| 402 | uint32_t GetPackedBitSize() const { | 385 | u32 GetPackedBitSize() const { |
| 403 | // How many indices do we have? | 386 | // How many indices do we have? |
| 404 | uint32_t nIdxs = m_Height * m_Width; | 387 | u32 nIdxs = m_Height * m_Width; |
| 405 | if (m_bDualPlane) { | 388 | if (m_bDualPlane) { |
| 406 | nIdxs *= 2; | 389 | nIdxs *= 2; |
| 407 | } | 390 | } |
| 408 | 391 | ||
| 409 | return IntegerEncodedValue::CreateEncoding(m_MaxWeight).GetBitLength(nIdxs); | 392 | return EncodingsValues[m_MaxWeight].GetBitLength(nIdxs); |
| 410 | } | 393 | } |
| 411 | 394 | ||
| 412 | uint32_t GetNumWeightValues() const { | 395 | u32 GetNumWeightValues() const { |
| 413 | uint32_t ret = m_Width * m_Height; | 396 | u32 ret = m_Width * m_Height; |
| 414 | if (m_bDualPlane) { | 397 | if (m_bDualPlane) { |
| 415 | ret *= 2; | 398 | ret *= 2; |
| 416 | } | 399 | } |
| @@ -422,7 +405,7 @@ static TexelWeightParams DecodeBlockInfo(InputBitStream& strm) { | |||
| 422 | TexelWeightParams params; | 405 | TexelWeightParams params; |
| 423 | 406 | ||
| 424 | // Read the entire block mode all at once | 407 | // Read the entire block mode all at once |
| 425 | uint16_t modeBits = static_cast<uint16_t>(strm.ReadBits(11)); | 408 | u16 modeBits = static_cast<u16>(strm.ReadBits<11>()); |
| 426 | 409 | ||
| 427 | // Does this match the void extent block mode? | 410 | // Does this match the void extent block mode? |
| 428 | if ((modeBits & 0x01FF) == 0x1FC) { | 411 | if ((modeBits & 0x01FF) == 0x1FC) { |
| @@ -457,7 +440,7 @@ static TexelWeightParams DecodeBlockInfo(InputBitStream& strm) { | |||
| 457 | // of the block mode. Layout is determined by a number | 440 | // of the block mode. Layout is determined by a number |
| 458 | // between 0 and 9 corresponding to table C.2.8 of the | 441 | // between 0 and 9 corresponding to table C.2.8 of the |
| 459 | // ASTC spec. | 442 | // ASTC spec. |
| 460 | uint32_t layout = 0; | 443 | u32 layout = 0; |
| 461 | 444 | ||
| 462 | if ((modeBits & 0x1) || (modeBits & 0x2)) { | 445 | if ((modeBits & 0x1) || (modeBits & 0x2)) { |
| 463 | // layout is in [0-4] | 446 | // layout is in [0-4] |
| @@ -509,7 +492,7 @@ static TexelWeightParams DecodeBlockInfo(InputBitStream& strm) { | |||
| 509 | assert(layout < 10); | 492 | assert(layout < 10); |
| 510 | 493 | ||
| 511 | // Determine R | 494 | // Determine R |
| 512 | uint32_t R = !!(modeBits & 0x10); | 495 | u32 R = !!(modeBits & 0x10); |
| 513 | if (layout < 5) { | 496 | if (layout < 5) { |
| 514 | R |= (modeBits & 0x3) << 1; | 497 | R |= (modeBits & 0x3) << 1; |
| 515 | } else { | 498 | } else { |
| @@ -520,54 +503,54 @@ static TexelWeightParams DecodeBlockInfo(InputBitStream& strm) { | |||
| 520 | // Determine width & height | 503 | // Determine width & height |
| 521 | switch (layout) { | 504 | switch (layout) { |
| 522 | case 0: { | 505 | case 0: { |
| 523 | uint32_t A = (modeBits >> 5) & 0x3; | 506 | u32 A = (modeBits >> 5) & 0x3; |
| 524 | uint32_t B = (modeBits >> 7) & 0x3; | 507 | u32 B = (modeBits >> 7) & 0x3; |
| 525 | params.m_Width = B + 4; | 508 | params.m_Width = B + 4; |
| 526 | params.m_Height = A + 2; | 509 | params.m_Height = A + 2; |
| 527 | break; | 510 | break; |
| 528 | } | 511 | } |
| 529 | 512 | ||
| 530 | case 1: { | 513 | case 1: { |
| 531 | uint32_t A = (modeBits >> 5) & 0x3; | 514 | u32 A = (modeBits >> 5) & 0x3; |
| 532 | uint32_t B = (modeBits >> 7) & 0x3; | 515 | u32 B = (modeBits >> 7) & 0x3; |
| 533 | params.m_Width = B + 8; | 516 | params.m_Width = B + 8; |
| 534 | params.m_Height = A + 2; | 517 | params.m_Height = A + 2; |
| 535 | break; | 518 | break; |
| 536 | } | 519 | } |
| 537 | 520 | ||
| 538 | case 2: { | 521 | case 2: { |
| 539 | uint32_t A = (modeBits >> 5) & 0x3; | 522 | u32 A = (modeBits >> 5) & 0x3; |
| 540 | uint32_t B = (modeBits >> 7) & 0x3; | 523 | u32 B = (modeBits >> 7) & 0x3; |
| 541 | params.m_Width = A + 2; | 524 | params.m_Width = A + 2; |
| 542 | params.m_Height = B + 8; | 525 | params.m_Height = B + 8; |
| 543 | break; | 526 | break; |
| 544 | } | 527 | } |
| 545 | 528 | ||
| 546 | case 3: { | 529 | case 3: { |
| 547 | uint32_t A = (modeBits >> 5) & 0x3; | 530 | u32 A = (modeBits >> 5) & 0x3; |
| 548 | uint32_t B = (modeBits >> 7) & 0x1; | 531 | u32 B = (modeBits >> 7) & 0x1; |
| 549 | params.m_Width = A + 2; | 532 | params.m_Width = A + 2; |
| 550 | params.m_Height = B + 6; | 533 | params.m_Height = B + 6; |
| 551 | break; | 534 | break; |
| 552 | } | 535 | } |
| 553 | 536 | ||
| 554 | case 4: { | 537 | case 4: { |
| 555 | uint32_t A = (modeBits >> 5) & 0x3; | 538 | u32 A = (modeBits >> 5) & 0x3; |
| 556 | uint32_t B = (modeBits >> 7) & 0x1; | 539 | u32 B = (modeBits >> 7) & 0x1; |
| 557 | params.m_Width = B + 2; | 540 | params.m_Width = B + 2; |
| 558 | params.m_Height = A + 2; | 541 | params.m_Height = A + 2; |
| 559 | break; | 542 | break; |
| 560 | } | 543 | } |
| 561 | 544 | ||
| 562 | case 5: { | 545 | case 5: { |
| 563 | uint32_t A = (modeBits >> 5) & 0x3; | 546 | u32 A = (modeBits >> 5) & 0x3; |
| 564 | params.m_Width = 12; | 547 | params.m_Width = 12; |
| 565 | params.m_Height = A + 2; | 548 | params.m_Height = A + 2; |
| 566 | break; | 549 | break; |
| 567 | } | 550 | } |
| 568 | 551 | ||
| 569 | case 6: { | 552 | case 6: { |
| 570 | uint32_t A = (modeBits >> 5) & 0x3; | 553 | u32 A = (modeBits >> 5) & 0x3; |
| 571 | params.m_Width = A + 2; | 554 | params.m_Width = A + 2; |
| 572 | params.m_Height = 12; | 555 | params.m_Height = 12; |
| 573 | break; | 556 | break; |
| @@ -586,15 +569,15 @@ static TexelWeightParams DecodeBlockInfo(InputBitStream& strm) { | |||
| 586 | } | 569 | } |
| 587 | 570 | ||
| 588 | case 9: { | 571 | case 9: { |
| 589 | uint32_t A = (modeBits >> 5) & 0x3; | 572 | u32 A = (modeBits >> 5) & 0x3; |
| 590 | uint32_t B = (modeBits >> 9) & 0x3; | 573 | u32 B = (modeBits >> 9) & 0x3; |
| 591 | params.m_Width = A + 6; | 574 | params.m_Width = A + 6; |
| 592 | params.m_Height = B + 6; | 575 | params.m_Height = B + 6; |
| 593 | break; | 576 | break; |
| 594 | } | 577 | } |
| 595 | 578 | ||
| 596 | default: | 579 | default: |
| 597 | assert(!"Don't know this layout..."); | 580 | assert(false && "Don't know this layout..."); |
| 598 | params.m_bError = true; | 581 | params.m_bError = true; |
| 599 | break; | 582 | break; |
| 600 | } | 583 | } |
| @@ -605,10 +588,10 @@ static TexelWeightParams DecodeBlockInfo(InputBitStream& strm) { | |||
| 605 | bool H = (layout != 9) && (modeBits & 0x200); | 588 | bool H = (layout != 9) && (modeBits & 0x200); |
| 606 | 589 | ||
| 607 | if (H) { | 590 | if (H) { |
| 608 | const uint32_t maxWeights[6] = {9, 11, 15, 19, 23, 31}; | 591 | const u32 maxWeights[6] = {9, 11, 15, 19, 23, 31}; |
| 609 | params.m_MaxWeight = maxWeights[R - 2]; | 592 | params.m_MaxWeight = maxWeights[R - 2]; |
| 610 | } else { | 593 | } else { |
| 611 | const uint32_t maxWeights[6] = {1, 2, 3, 4, 5, 7}; | 594 | const u32 maxWeights[6] = {1, 2, 3, 4, 5, 7}; |
| 612 | params.m_MaxWeight = maxWeights[R - 2]; | 595 | params.m_MaxWeight = maxWeights[R - 2]; |
| 613 | } | 596 | } |
| 614 | 597 | ||
| @@ -617,32 +600,32 @@ static TexelWeightParams DecodeBlockInfo(InputBitStream& strm) { | |||
| 617 | return params; | 600 | return params; |
| 618 | } | 601 | } |
| 619 | 602 | ||
| 620 | static void FillVoidExtentLDR(InputBitStream& strm, uint32_t* const outBuf, uint32_t blockWidth, | 603 | static void FillVoidExtentLDR(InputBitStream& strm, u32* const outBuf, u32 blockWidth, |
| 621 | uint32_t blockHeight) { | 604 | u32 blockHeight) { |
| 622 | // Don't actually care about the void extent, just read the bits... | 605 | // Don't actually care about the void extent, just read the bits... |
| 623 | for (int i = 0; i < 4; ++i) { | 606 | for (s32 i = 0; i < 4; ++i) { |
| 624 | strm.ReadBits(13); | 607 | strm.ReadBits<13>(); |
| 625 | } | 608 | } |
| 626 | 609 | ||
| 627 | // Decode the RGBA components and renormalize them to the range [0, 255] | 610 | // Decode the RGBA components and renormalize them to the range [0, 255] |
| 628 | uint16_t r = static_cast<uint16_t>(strm.ReadBits(16)); | 611 | u16 r = static_cast<u16>(strm.ReadBits<16>()); |
| 629 | uint16_t g = static_cast<uint16_t>(strm.ReadBits(16)); | 612 | u16 g = static_cast<u16>(strm.ReadBits<16>()); |
| 630 | uint16_t b = static_cast<uint16_t>(strm.ReadBits(16)); | 613 | u16 b = static_cast<u16>(strm.ReadBits<16>()); |
| 631 | uint16_t a = static_cast<uint16_t>(strm.ReadBits(16)); | 614 | u16 a = static_cast<u16>(strm.ReadBits<16>()); |
| 632 | 615 | ||
| 633 | uint32_t rgba = (r >> 8) | (g & 0xFF00) | (static_cast<uint32_t>(b) & 0xFF00) << 8 | | 616 | u32 rgba = (r >> 8) | (g & 0xFF00) | (static_cast<u32>(b) & 0xFF00) << 8 | |
| 634 | (static_cast<uint32_t>(a) & 0xFF00) << 16; | 617 | (static_cast<u32>(a) & 0xFF00) << 16; |
| 635 | 618 | ||
| 636 | for (uint32_t j = 0; j < blockHeight; j++) { | 619 | for (u32 j = 0; j < blockHeight; j++) { |
| 637 | for (uint32_t i = 0; i < blockWidth; i++) { | 620 | for (u32 i = 0; i < blockWidth; i++) { |
| 638 | outBuf[j * blockWidth + i] = rgba; | 621 | outBuf[j * blockWidth + i] = rgba; |
| 639 | } | 622 | } |
| 640 | } | 623 | } |
| 641 | } | 624 | } |
| 642 | 625 | ||
| 643 | static void FillError(uint32_t* outBuf, uint32_t blockWidth, uint32_t blockHeight) { | 626 | static void FillError(u32* outBuf, u32 blockWidth, u32 blockHeight) { |
| 644 | for (uint32_t j = 0; j < blockHeight; j++) { | 627 | for (u32 j = 0; j < blockHeight; j++) { |
| 645 | for (uint32_t i = 0; i < blockWidth; i++) { | 628 | for (u32 i = 0; i < blockWidth; i++) { |
| 646 | outBuf[j * blockWidth + i] = 0xFFFF00FF; | 629 | outBuf[j * blockWidth + i] = 0xFFFF00FF; |
| 647 | } | 630 | } |
| 648 | } | 631 | } |
| @@ -651,18 +634,18 @@ static void FillError(uint32_t* outBuf, uint32_t blockWidth, uint32_t blockHeigh | |||
| 651 | // Replicates low numBits such that [(toBit - 1):(toBit - 1 - fromBit)] | 634 | // Replicates low numBits such that [(toBit - 1):(toBit - 1 - fromBit)] |
| 652 | // is the same as [(numBits - 1):0] and repeats all the way down. | 635 | // is the same as [(numBits - 1):0] and repeats all the way down. |
| 653 | template <typename IntType> | 636 | template <typename IntType> |
| 654 | static IntType Replicate(const IntType& val, uint32_t numBits, uint32_t toBit) { | 637 | static IntType Replicate(IntType val, u32 numBits, u32 toBit) { |
| 655 | if (numBits == 0) | 638 | if (numBits == 0) |
| 656 | return 0; | 639 | return 0; |
| 657 | if (toBit == 0) | 640 | if (toBit == 0) |
| 658 | return 0; | 641 | return 0; |
| 659 | IntType v = val & static_cast<IntType>((1 << numBits) - 1); | 642 | IntType v = val & static_cast<IntType>((1 << numBits) - 1); |
| 660 | IntType res = v; | 643 | IntType res = v; |
| 661 | uint32_t reslen = numBits; | 644 | u32 reslen = numBits; |
| 662 | while (reslen < toBit) { | 645 | while (reslen < toBit) { |
| 663 | uint32_t comp = 0; | 646 | u32 comp = 0; |
| 664 | if (numBits > toBit - reslen) { | 647 | if (numBits > toBit - reslen) { |
| 665 | uint32_t newshift = toBit - reslen; | 648 | u32 newshift = toBit - reslen; |
| 666 | comp = numBits - newshift; | 649 | comp = numBits - newshift; |
| 667 | numBits = newshift; | 650 | numBits = newshift; |
| 668 | } | 651 | } |
| @@ -675,14 +658,14 @@ static IntType Replicate(const IntType& val, uint32_t numBits, uint32_t toBit) { | |||
| 675 | 658 | ||
| 676 | class Pixel { | 659 | class Pixel { |
| 677 | protected: | 660 | protected: |
| 678 | using ChannelType = int16_t; | 661 | using ChannelType = s16; |
| 679 | uint8_t m_BitDepth[4] = {8, 8, 8, 8}; | 662 | u8 m_BitDepth[4] = {8, 8, 8, 8}; |
| 680 | int16_t color[4] = {}; | 663 | s16 color[4] = {}; |
| 681 | 664 | ||
| 682 | public: | 665 | public: |
| 683 | Pixel() = default; | 666 | Pixel() = default; |
| 684 | Pixel(uint32_t a, uint32_t r, uint32_t g, uint32_t b, unsigned bitDepth = 8) | 667 | Pixel(u32 a, u32 r, u32 g, u32 b, u32 bitDepth = 8) |
| 685 | : m_BitDepth{uint8_t(bitDepth), uint8_t(bitDepth), uint8_t(bitDepth), uint8_t(bitDepth)}, | 668 | : m_BitDepth{u8(bitDepth), u8(bitDepth), u8(bitDepth), u8(bitDepth)}, |
| 686 | color{static_cast<ChannelType>(a), static_cast<ChannelType>(r), | 669 | color{static_cast<ChannelType>(a), static_cast<ChannelType>(r), |
| 687 | static_cast<ChannelType>(g), static_cast<ChannelType>(b)} {} | 670 | static_cast<ChannelType>(g), static_cast<ChannelType>(b)} {} |
| 688 | 671 | ||
| @@ -691,22 +674,22 @@ public: | |||
| 691 | // significant bits when going from larger to smaller bit depth | 674 | // significant bits when going from larger to smaller bit depth |
| 692 | // or by repeating the most significant bits when going from | 675 | // or by repeating the most significant bits when going from |
| 693 | // smaller to larger bit depths. | 676 | // smaller to larger bit depths. |
| 694 | void ChangeBitDepth(const uint8_t (&depth)[4]) { | 677 | void ChangeBitDepth(const u8 (&depth)[4]) { |
| 695 | for (uint32_t i = 0; i < 4; i++) { | 678 | for (u32 i = 0; i < 4; i++) { |
| 696 | Component(i) = ChangeBitDepth(Component(i), m_BitDepth[i], depth[i]); | 679 | Component(i) = ChangeBitDepth(Component(i), m_BitDepth[i], depth[i]); |
| 697 | m_BitDepth[i] = depth[i]; | 680 | m_BitDepth[i] = depth[i]; |
| 698 | } | 681 | } |
| 699 | } | 682 | } |
| 700 | 683 | ||
| 701 | template <typename IntType> | 684 | template <typename IntType> |
| 702 | static float ConvertChannelToFloat(IntType channel, uint8_t bitDepth) { | 685 | static float ConvertChannelToFloat(IntType channel, u8 bitDepth) { |
| 703 | float denominator = static_cast<float>((1 << bitDepth) - 1); | 686 | float denominator = static_cast<float>((1 << bitDepth) - 1); |
| 704 | return static_cast<float>(channel) / denominator; | 687 | return static_cast<float>(channel) / denominator; |
| 705 | } | 688 | } |
| 706 | 689 | ||
| 707 | // Changes the bit depth of a single component. See the comment | 690 | // Changes the bit depth of a single component. See the comment |
| 708 | // above for how we do this. | 691 | // above for how we do this. |
| 709 | static ChannelType ChangeBitDepth(Pixel::ChannelType val, uint8_t oldDepth, uint8_t newDepth) { | 692 | static ChannelType ChangeBitDepth(Pixel::ChannelType val, u8 oldDepth, u8 newDepth) { |
| 710 | assert(newDepth <= 8); | 693 | assert(newDepth <= 8); |
| 711 | assert(oldDepth <= 8); | 694 | assert(oldDepth <= 8); |
| 712 | 695 | ||
| @@ -722,16 +705,15 @@ public: | |||
| 722 | if (newDepth == 0) { | 705 | if (newDepth == 0) { |
| 723 | return 0xFF; | 706 | return 0xFF; |
| 724 | } else { | 707 | } else { |
| 725 | uint8_t bitsWasted = static_cast<uint8_t>(oldDepth - newDepth); | 708 | u8 bitsWasted = static_cast<u8>(oldDepth - newDepth); |
| 726 | uint16_t v = static_cast<uint16_t>(val); | 709 | u16 v = static_cast<u16>(val); |
| 727 | v = static_cast<uint16_t>((v + (1 << (bitsWasted - 1))) >> bitsWasted); | 710 | v = static_cast<u16>((v + (1 << (bitsWasted - 1))) >> bitsWasted); |
| 728 | v = ::std::min<uint16_t>(::std::max<uint16_t>(0, v), | 711 | v = ::std::min<u16>(::std::max<u16>(0, v), static_cast<u16>((1 << newDepth) - 1)); |
| 729 | static_cast<uint16_t>((1 << newDepth) - 1)); | 712 | return static_cast<u8>(v); |
| 730 | return static_cast<uint8_t>(v); | ||
| 731 | } | 713 | } |
| 732 | } | 714 | } |
| 733 | 715 | ||
| 734 | assert(!"We shouldn't get here."); | 716 | assert(false && "We shouldn't get here."); |
| 735 | return 0; | 717 | return 0; |
| 736 | } | 718 | } |
| 737 | 719 | ||
| @@ -759,15 +741,15 @@ public: | |||
| 759 | ChannelType& B() { | 741 | ChannelType& B() { |
| 760 | return color[3]; | 742 | return color[3]; |
| 761 | } | 743 | } |
| 762 | const ChannelType& Component(uint32_t idx) const { | 744 | const ChannelType& Component(u32 idx) const { |
| 763 | return color[idx]; | 745 | return color[idx]; |
| 764 | } | 746 | } |
| 765 | ChannelType& Component(uint32_t idx) { | 747 | ChannelType& Component(u32 idx) { |
| 766 | return color[idx]; | 748 | return color[idx]; |
| 767 | } | 749 | } |
| 768 | 750 | ||
| 769 | void GetBitDepth(uint8_t (&outDepth)[4]) const { | 751 | void GetBitDepth(u8 (&outDepth)[4]) const { |
| 770 | for (int i = 0; i < 4; i++) { | 752 | for (s32 i = 0; i < 4; i++) { |
| 771 | outDepth[i] = m_BitDepth[i]; | 753 | outDepth[i] = m_BitDepth[i]; |
| 772 | } | 754 | } |
| 773 | } | 755 | } |
| @@ -776,12 +758,12 @@ public: | |||
| 776 | // and then pack each channel into an R8G8B8A8 32-bit integer. We assume | 758 | // and then pack each channel into an R8G8B8A8 32-bit integer. We assume |
| 777 | // that the architecture is little-endian, so the alpha channel will end | 759 | // that the architecture is little-endian, so the alpha channel will end |
| 778 | // up in the most-significant byte. | 760 | // up in the most-significant byte. |
| 779 | uint32_t Pack() const { | 761 | u32 Pack() const { |
| 780 | Pixel eightBit(*this); | 762 | Pixel eightBit(*this); |
| 781 | const uint8_t eightBitDepth[4] = {8, 8, 8, 8}; | 763 | const u8 eightBitDepth[4] = {8, 8, 8, 8}; |
| 782 | eightBit.ChangeBitDepth(eightBitDepth); | 764 | eightBit.ChangeBitDepth(eightBitDepth); |
| 783 | 765 | ||
| 784 | uint32_t r = 0; | 766 | u32 r = 0; |
| 785 | r |= eightBit.A(); | 767 | r |= eightBit.A(); |
| 786 | r <<= 8; | 768 | r <<= 8; |
| 787 | r |= eightBit.B(); | 769 | r |= eightBit.B(); |
| @@ -794,7 +776,7 @@ public: | |||
| 794 | 776 | ||
| 795 | // Clamps the pixel to the range [0,255] | 777 | // Clamps the pixel to the range [0,255] |
| 796 | void ClampByte() { | 778 | void ClampByte() { |
| 797 | for (uint32_t i = 0; i < 4; i++) { | 779 | for (u32 i = 0; i < 4; i++) { |
| 798 | color[i] = (color[i] < 0) ? 0 : ((color[i] > 255) ? 255 : color[i]); | 780 | color[i] = (color[i] < 0) ? 0 : ((color[i] > 255) ? 255 : color[i]); |
| 799 | } | 781 | } |
| 800 | } | 782 | } |
| @@ -804,24 +786,24 @@ public: | |||
| 804 | } | 786 | } |
| 805 | }; | 787 | }; |
| 806 | 788 | ||
| 807 | static void DecodeColorValues(uint32_t* out, uint8_t* data, const uint32_t* modes, | 789 | static void DecodeColorValues(u32* out, u8* data, const u32* modes, const u32 nPartitions, |
| 808 | const uint32_t nPartitions, const uint32_t nBitsForColorData) { | 790 | const u32 nBitsForColorData) { |
| 809 | // First figure out how many color values we have | 791 | // First figure out how many color values we have |
| 810 | uint32_t nValues = 0; | 792 | u32 nValues = 0; |
| 811 | for (uint32_t i = 0; i < nPartitions; i++) { | 793 | for (u32 i = 0; i < nPartitions; i++) { |
| 812 | nValues += ((modes[i] >> 2) + 1) << 1; | 794 | nValues += ((modes[i] >> 2) + 1) << 1; |
| 813 | } | 795 | } |
| 814 | 796 | ||
| 815 | // Then based on the number of values and the remaining number of bits, | 797 | // Then based on the number of values and the remaining number of bits, |
| 816 | // figure out the max value for each of them... | 798 | // figure out the max value for each of them... |
| 817 | uint32_t range = 256; | 799 | u32 range = 256; |
| 818 | while (--range > 0) { | 800 | while (--range > 0) { |
| 819 | IntegerEncodedValue val = IntegerEncodedValue::CreateEncoding(range); | 801 | IntegerEncodedValue val = EncodingsValues[range]; |
| 820 | uint32_t bitLength = val.GetBitLength(nValues); | 802 | u32 bitLength = val.GetBitLength(nValues); |
| 821 | if (bitLength <= nBitsForColorData) { | 803 | if (bitLength <= nBitsForColorData) { |
| 822 | // Find the smallest possible range that matches the given encoding | 804 | // Find the smallest possible range that matches the given encoding |
| 823 | while (--range > 0) { | 805 | while (--range > 0) { |
| 824 | IntegerEncodedValue newval = IntegerEncodedValue::CreateEncoding(range); | 806 | IntegerEncodedValue newval = EncodingsValues[range]; |
| 825 | if (!newval.MatchesEncoding(val)) { | 807 | if (!newval.MatchesEncoding(val)) { |
| 826 | break; | 808 | break; |
| 827 | } | 809 | } |
| @@ -835,12 +817,14 @@ static void DecodeColorValues(uint32_t* out, uint8_t* data, const uint32_t* mode | |||
| 835 | 817 | ||
| 836 | // We now have enough to decode our integer sequence. | 818 | // We now have enough to decode our integer sequence. |
| 837 | std::vector<IntegerEncodedValue> decodedColorValues; | 819 | std::vector<IntegerEncodedValue> decodedColorValues; |
| 820 | decodedColorValues.reserve(32); | ||
| 821 | |||
| 838 | InputBitStream colorStream(data); | 822 | InputBitStream colorStream(data); |
| 839 | IntegerEncodedValue::DecodeIntegerSequence(decodedColorValues, colorStream, range, nValues); | 823 | DecodeIntegerSequence(decodedColorValues, colorStream, range, nValues); |
| 840 | 824 | ||
| 841 | // Once we have the decoded values, we need to dequantize them to the 0-255 range | 825 | // Once we have the decoded values, we need to dequantize them to the 0-255 range |
| 842 | // This procedure is outlined in ASTC spec C.2.13 | 826 | // This procedure is outlined in ASTC spec C.2.13 |
| 843 | uint32_t outIdx = 0; | 827 | u32 outIdx = 0; |
| 844 | for (auto itr = decodedColorValues.begin(); itr != decodedColorValues.end(); ++itr) { | 828 | for (auto itr = decodedColorValues.begin(); itr != decodedColorValues.end(); ++itr) { |
| 845 | // Have we already decoded all that we need? | 829 | // Have we already decoded all that we need? |
| 846 | if (outIdx >= nValues) { | 830 | if (outIdx >= nValues) { |
| @@ -848,25 +832,25 @@ static void DecodeColorValues(uint32_t* out, uint8_t* data, const uint32_t* mode | |||
| 848 | } | 832 | } |
| 849 | 833 | ||
| 850 | const IntegerEncodedValue& val = *itr; | 834 | const IntegerEncodedValue& val = *itr; |
| 851 | uint32_t bitlen = val.BaseBitLength(); | 835 | u32 bitlen = val.num_bits; |
| 852 | uint32_t bitval = val.GetBitValue(); | 836 | u32 bitval = val.bit_value; |
| 853 | 837 | ||
| 854 | assert(bitlen >= 1); | 838 | assert(bitlen >= 1); |
| 855 | 839 | ||
| 856 | uint32_t A = 0, B = 0, C = 0, D = 0; | 840 | u32 A = 0, B = 0, C = 0, D = 0; |
| 857 | // A is just the lsb replicated 9 times. | 841 | // A is just the lsb replicated 9 times. |
| 858 | A = Replicate(bitval & 1, 1, 9); | 842 | A = Replicate(bitval & 1, 1, 9); |
| 859 | 843 | ||
| 860 | switch (val.GetEncoding()) { | 844 | switch (val.encoding) { |
| 861 | // Replicate bits | 845 | // Replicate bits |
| 862 | case eIntegerEncoding_JustBits: | 846 | case IntegerEncoding::JustBits: |
| 863 | out[outIdx++] = Replicate(bitval, bitlen, 8); | 847 | out[outIdx++] = Replicate(bitval, bitlen, 8); |
| 864 | break; | 848 | break; |
| 865 | 849 | ||
| 866 | // Use algorithm in C.2.13 | 850 | // Use algorithm in C.2.13 |
| 867 | case eIntegerEncoding_Trit: { | 851 | case IntegerEncoding::Trit: { |
| 868 | 852 | ||
| 869 | D = val.GetTritValue(); | 853 | D = val.trit_value; |
| 870 | 854 | ||
| 871 | switch (bitlen) { | 855 | switch (bitlen) { |
| 872 | case 1: { | 856 | case 1: { |
| @@ -876,48 +860,48 @@ static void DecodeColorValues(uint32_t* out, uint8_t* data, const uint32_t* mode | |||
| 876 | case 2: { | 860 | case 2: { |
| 877 | C = 93; | 861 | C = 93; |
| 878 | // B = b000b0bb0 | 862 | // B = b000b0bb0 |
| 879 | uint32_t b = (bitval >> 1) & 1; | 863 | u32 b = (bitval >> 1) & 1; |
| 880 | B = (b << 8) | (b << 4) | (b << 2) | (b << 1); | 864 | B = (b << 8) | (b << 4) | (b << 2) | (b << 1); |
| 881 | } break; | 865 | } break; |
| 882 | 866 | ||
| 883 | case 3: { | 867 | case 3: { |
| 884 | C = 44; | 868 | C = 44; |
| 885 | // B = cb000cbcb | 869 | // B = cb000cbcb |
| 886 | uint32_t cb = (bitval >> 1) & 3; | 870 | u32 cb = (bitval >> 1) & 3; |
| 887 | B = (cb << 7) | (cb << 2) | cb; | 871 | B = (cb << 7) | (cb << 2) | cb; |
| 888 | } break; | 872 | } break; |
| 889 | 873 | ||
| 890 | case 4: { | 874 | case 4: { |
| 891 | C = 22; | 875 | C = 22; |
| 892 | // B = dcb000dcb | 876 | // B = dcb000dcb |
| 893 | uint32_t dcb = (bitval >> 1) & 7; | 877 | u32 dcb = (bitval >> 1) & 7; |
| 894 | B = (dcb << 6) | dcb; | 878 | B = (dcb << 6) | dcb; |
| 895 | } break; | 879 | } break; |
| 896 | 880 | ||
| 897 | case 5: { | 881 | case 5: { |
| 898 | C = 11; | 882 | C = 11; |
| 899 | // B = edcb000ed | 883 | // B = edcb000ed |
| 900 | uint32_t edcb = (bitval >> 1) & 0xF; | 884 | u32 edcb = (bitval >> 1) & 0xF; |
| 901 | B = (edcb << 5) | (edcb >> 2); | 885 | B = (edcb << 5) | (edcb >> 2); |
| 902 | } break; | 886 | } break; |
| 903 | 887 | ||
| 904 | case 6: { | 888 | case 6: { |
| 905 | C = 5; | 889 | C = 5; |
| 906 | // B = fedcb000f | 890 | // B = fedcb000f |
| 907 | uint32_t fedcb = (bitval >> 1) & 0x1F; | 891 | u32 fedcb = (bitval >> 1) & 0x1F; |
| 908 | B = (fedcb << 4) | (fedcb >> 4); | 892 | B = (fedcb << 4) | (fedcb >> 4); |
| 909 | } break; | 893 | } break; |
| 910 | 894 | ||
| 911 | default: | 895 | default: |
| 912 | assert(!"Unsupported trit encoding for color values!"); | 896 | assert(false && "Unsupported trit encoding for color values!"); |
| 913 | break; | 897 | break; |
| 914 | } // switch(bitlen) | 898 | } // switch(bitlen) |
| 915 | } // case eIntegerEncoding_Trit | 899 | } // case IntegerEncoding::Trit |
| 916 | break; | 900 | break; |
| 917 | 901 | ||
| 918 | case eIntegerEncoding_Quint: { | 902 | case IntegerEncoding::Qus32: { |
| 919 | 903 | ||
| 920 | D = val.GetQuintValue(); | 904 | D = val.qus32_value; |
| 921 | 905 | ||
| 922 | switch (bitlen) { | 906 | switch (bitlen) { |
| 923 | case 1: { | 907 | case 1: { |
| @@ -927,41 +911,41 @@ static void DecodeColorValues(uint32_t* out, uint8_t* data, const uint32_t* mode | |||
| 927 | case 2: { | 911 | case 2: { |
| 928 | C = 54; | 912 | C = 54; |
| 929 | // B = b0000bb00 | 913 | // B = b0000bb00 |
| 930 | uint32_t b = (bitval >> 1) & 1; | 914 | u32 b = (bitval >> 1) & 1; |
| 931 | B = (b << 8) | (b << 3) | (b << 2); | 915 | B = (b << 8) | (b << 3) | (b << 2); |
| 932 | } break; | 916 | } break; |
| 933 | 917 | ||
| 934 | case 3: { | 918 | case 3: { |
| 935 | C = 26; | 919 | C = 26; |
| 936 | // B = cb0000cbc | 920 | // B = cb0000cbc |
| 937 | uint32_t cb = (bitval >> 1) & 3; | 921 | u32 cb = (bitval >> 1) & 3; |
| 938 | B = (cb << 7) | (cb << 1) | (cb >> 1); | 922 | B = (cb << 7) | (cb << 1) | (cb >> 1); |
| 939 | } break; | 923 | } break; |
| 940 | 924 | ||
| 941 | case 4: { | 925 | case 4: { |
| 942 | C = 13; | 926 | C = 13; |
| 943 | // B = dcb0000dc | 927 | // B = dcb0000dc |
| 944 | uint32_t dcb = (bitval >> 1) & 7; | 928 | u32 dcb = (bitval >> 1) & 7; |
| 945 | B = (dcb << 6) | (dcb >> 1); | 929 | B = (dcb << 6) | (dcb >> 1); |
| 946 | } break; | 930 | } break; |
| 947 | 931 | ||
| 948 | case 5: { | 932 | case 5: { |
| 949 | C = 6; | 933 | C = 6; |
| 950 | // B = edcb0000e | 934 | // B = edcb0000e |
| 951 | uint32_t edcb = (bitval >> 1) & 0xF; | 935 | u32 edcb = (bitval >> 1) & 0xF; |
| 952 | B = (edcb << 5) | (edcb >> 3); | 936 | B = (edcb << 5) | (edcb >> 3); |
| 953 | } break; | 937 | } break; |
| 954 | 938 | ||
| 955 | default: | 939 | default: |
| 956 | assert(!"Unsupported quint encoding for color values!"); | 940 | assert(false && "Unsupported quint encoding for color values!"); |
| 957 | break; | 941 | break; |
| 958 | } // switch(bitlen) | 942 | } // switch(bitlen) |
| 959 | } // case eIntegerEncoding_Quint | 943 | } // case IntegerEncoding::Qus32 |
| 960 | break; | 944 | break; |
| 961 | } // switch(val.GetEncoding()) | 945 | } // switch(val.encoding) |
| 962 | 946 | ||
| 963 | if (val.GetEncoding() != eIntegerEncoding_JustBits) { | 947 | if (val.encoding != IntegerEncoding::JustBits) { |
| 964 | uint32_t T = D * C + B; | 948 | u32 T = D * C + B; |
| 965 | T ^= A; | 949 | T ^= A; |
| 966 | T = (A & 0x80) | (T >> 2); | 950 | T = (A & 0x80) | (T >> 2); |
| 967 | out[outIdx++] = T; | 951 | out[outIdx++] = T; |
| @@ -969,31 +953,31 @@ static void DecodeColorValues(uint32_t* out, uint8_t* data, const uint32_t* mode | |||
| 969 | } | 953 | } |
| 970 | 954 | ||
| 971 | // Make sure that each of our values is in the proper range... | 955 | // Make sure that each of our values is in the proper range... |
| 972 | for (uint32_t i = 0; i < nValues; i++) { | 956 | for (u32 i = 0; i < nValues; i++) { |
| 973 | assert(out[i] <= 255); | 957 | assert(out[i] <= 255); |
| 974 | } | 958 | } |
| 975 | } | 959 | } |
| 976 | 960 | ||
| 977 | static uint32_t UnquantizeTexelWeight(const IntegerEncodedValue& val) { | 961 | static u32 UnquantizeTexelWeight(const IntegerEncodedValue& val) { |
| 978 | uint32_t bitval = val.GetBitValue(); | 962 | u32 bitval = val.bit_value; |
| 979 | uint32_t bitlen = val.BaseBitLength(); | 963 | u32 bitlen = val.num_bits; |
| 980 | 964 | ||
| 981 | uint32_t A = Replicate(bitval & 1, 1, 7); | 965 | u32 A = Replicate(bitval & 1, 1, 7); |
| 982 | uint32_t B = 0, C = 0, D = 0; | 966 | u32 B = 0, C = 0, D = 0; |
| 983 | 967 | ||
| 984 | uint32_t result = 0; | 968 | u32 result = 0; |
| 985 | switch (val.GetEncoding()) { | 969 | switch (val.encoding) { |
| 986 | case eIntegerEncoding_JustBits: | 970 | case IntegerEncoding::JustBits: |
| 987 | result = Replicate(bitval, bitlen, 6); | 971 | result = Replicate(bitval, bitlen, 6); |
| 988 | break; | 972 | break; |
| 989 | 973 | ||
| 990 | case eIntegerEncoding_Trit: { | 974 | case IntegerEncoding::Trit: { |
| 991 | D = val.GetTritValue(); | 975 | D = val.trit_value; |
| 992 | assert(D < 3); | 976 | assert(D < 3); |
| 993 | 977 | ||
| 994 | switch (bitlen) { | 978 | switch (bitlen) { |
| 995 | case 0: { | 979 | case 0: { |
| 996 | uint32_t results[3] = {0, 32, 63}; | 980 | u32 results[3] = {0, 32, 63}; |
| 997 | result = results[D]; | 981 | result = results[D]; |
| 998 | } break; | 982 | } break; |
| 999 | 983 | ||
| @@ -1003,29 +987,29 @@ static uint32_t UnquantizeTexelWeight(const IntegerEncodedValue& val) { | |||
| 1003 | 987 | ||
| 1004 | case 2: { | 988 | case 2: { |
| 1005 | C = 23; | 989 | C = 23; |
| 1006 | uint32_t b = (bitval >> 1) & 1; | 990 | u32 b = (bitval >> 1) & 1; |
| 1007 | B = (b << 6) | (b << 2) | b; | 991 | B = (b << 6) | (b << 2) | b; |
| 1008 | } break; | 992 | } break; |
| 1009 | 993 | ||
| 1010 | case 3: { | 994 | case 3: { |
| 1011 | C = 11; | 995 | C = 11; |
| 1012 | uint32_t cb = (bitval >> 1) & 3; | 996 | u32 cb = (bitval >> 1) & 3; |
| 1013 | B = (cb << 5) | cb; | 997 | B = (cb << 5) | cb; |
| 1014 | } break; | 998 | } break; |
| 1015 | 999 | ||
| 1016 | default: | 1000 | default: |
| 1017 | assert(!"Invalid trit encoding for texel weight"); | 1001 | assert(false && "Invalid trit encoding for texel weight"); |
| 1018 | break; | 1002 | break; |
| 1019 | } | 1003 | } |
| 1020 | } break; | 1004 | } break; |
| 1021 | 1005 | ||
| 1022 | case eIntegerEncoding_Quint: { | 1006 | case IntegerEncoding::Qus32: { |
| 1023 | D = val.GetQuintValue(); | 1007 | D = val.qus32_value; |
| 1024 | assert(D < 5); | 1008 | assert(D < 5); |
| 1025 | 1009 | ||
| 1026 | switch (bitlen) { | 1010 | switch (bitlen) { |
| 1027 | case 0: { | 1011 | case 0: { |
| 1028 | uint32_t results[5] = {0, 16, 32, 47, 63}; | 1012 | u32 results[5] = {0, 16, 32, 47, 63}; |
| 1029 | result = results[D]; | 1013 | result = results[D]; |
| 1030 | } break; | 1014 | } break; |
| 1031 | 1015 | ||
| @@ -1035,18 +1019,18 @@ static uint32_t UnquantizeTexelWeight(const IntegerEncodedValue& val) { | |||
| 1035 | 1019 | ||
| 1036 | case 2: { | 1020 | case 2: { |
| 1037 | C = 13; | 1021 | C = 13; |
| 1038 | uint32_t b = (bitval >> 1) & 1; | 1022 | u32 b = (bitval >> 1) & 1; |
| 1039 | B = (b << 6) | (b << 1); | 1023 | B = (b << 6) | (b << 1); |
| 1040 | } break; | 1024 | } break; |
| 1041 | 1025 | ||
| 1042 | default: | 1026 | default: |
| 1043 | assert(!"Invalid quint encoding for texel weight"); | 1027 | assert(false && "Invalid quint encoding for texel weight"); |
| 1044 | break; | 1028 | break; |
| 1045 | } | 1029 | } |
| 1046 | } break; | 1030 | } break; |
| 1047 | } | 1031 | } |
| 1048 | 1032 | ||
| 1049 | if (val.GetEncoding() != eIntegerEncoding_JustBits && bitlen > 0) { | 1033 | if (val.encoding != IntegerEncoding::JustBits && bitlen > 0) { |
| 1050 | // Decode the value... | 1034 | // Decode the value... |
| 1051 | result = D * C + B; | 1035 | result = D * C + B; |
| 1052 | result ^= A; | 1036 | result ^= A; |
| @@ -1063,12 +1047,11 @@ static uint32_t UnquantizeTexelWeight(const IntegerEncodedValue& val) { | |||
| 1063 | return result; | 1047 | return result; |
| 1064 | } | 1048 | } |
| 1065 | 1049 | ||
| 1066 | static void UnquantizeTexelWeights(uint32_t out[2][144], | 1050 | static void UnquantizeTexelWeights(u32 out[2][144], const std::vector<IntegerEncodedValue>& weights, |
| 1067 | const std::vector<IntegerEncodedValue>& weights, | 1051 | const TexelWeightParams& params, const u32 blockWidth, |
| 1068 | const TexelWeightParams& params, const uint32_t blockWidth, | 1052 | const u32 blockHeight) { |
| 1069 | const uint32_t blockHeight) { | 1053 | u32 weightIdx = 0; |
| 1070 | uint32_t weightIdx = 0; | 1054 | u32 unquantized[2][144]; |
| 1071 | uint32_t unquantized[2][144]; | ||
| 1072 | 1055 | ||
| 1073 | for (auto itr = weights.begin(); itr != weights.end(); ++itr) { | 1056 | for (auto itr = weights.begin(); itr != weights.end(); ++itr) { |
| 1074 | unquantized[0][weightIdx] = UnquantizeTexelWeight(*itr); | 1057 | unquantized[0][weightIdx] = UnquantizeTexelWeight(*itr); |
| @@ -1086,34 +1069,34 @@ static void UnquantizeTexelWeights(uint32_t out[2][144], | |||
| 1086 | } | 1069 | } |
| 1087 | 1070 | ||
| 1088 | // Do infill if necessary (Section C.2.18) ... | 1071 | // Do infill if necessary (Section C.2.18) ... |
| 1089 | uint32_t Ds = (1024 + (blockWidth / 2)) / (blockWidth - 1); | 1072 | u32 Ds = (1024 + (blockWidth / 2)) / (blockWidth - 1); |
| 1090 | uint32_t Dt = (1024 + (blockHeight / 2)) / (blockHeight - 1); | 1073 | u32 Dt = (1024 + (blockHeight / 2)) / (blockHeight - 1); |
| 1091 | 1074 | ||
| 1092 | const uint32_t kPlaneScale = params.m_bDualPlane ? 2U : 1U; | 1075 | const u32 kPlaneScale = params.m_bDualPlane ? 2U : 1U; |
| 1093 | for (uint32_t plane = 0; plane < kPlaneScale; plane++) | 1076 | for (u32 plane = 0; plane < kPlaneScale; plane++) |
| 1094 | for (uint32_t t = 0; t < blockHeight; t++) | 1077 | for (u32 t = 0; t < blockHeight; t++) |
| 1095 | for (uint32_t s = 0; s < blockWidth; s++) { | 1078 | for (u32 s = 0; s < blockWidth; s++) { |
| 1096 | uint32_t cs = Ds * s; | 1079 | u32 cs = Ds * s; |
| 1097 | uint32_t ct = Dt * t; | 1080 | u32 ct = Dt * t; |
| 1098 | 1081 | ||
| 1099 | uint32_t gs = (cs * (params.m_Width - 1) + 32) >> 6; | 1082 | u32 gs = (cs * (params.m_Width - 1) + 32) >> 6; |
| 1100 | uint32_t gt = (ct * (params.m_Height - 1) + 32) >> 6; | 1083 | u32 gt = (ct * (params.m_Height - 1) + 32) >> 6; |
| 1101 | 1084 | ||
| 1102 | uint32_t js = gs >> 4; | 1085 | u32 js = gs >> 4; |
| 1103 | uint32_t fs = gs & 0xF; | 1086 | u32 fs = gs & 0xF; |
| 1104 | 1087 | ||
| 1105 | uint32_t jt = gt >> 4; | 1088 | u32 jt = gt >> 4; |
| 1106 | uint32_t ft = gt & 0x0F; | 1089 | u32 ft = gt & 0x0F; |
| 1107 | 1090 | ||
| 1108 | uint32_t w11 = (fs * ft + 8) >> 4; | 1091 | u32 w11 = (fs * ft + 8) >> 4; |
| 1109 | uint32_t w10 = ft - w11; | 1092 | u32 w10 = ft - w11; |
| 1110 | uint32_t w01 = fs - w11; | 1093 | u32 w01 = fs - w11; |
| 1111 | uint32_t w00 = 16 - fs - ft + w11; | 1094 | u32 w00 = 16 - fs - ft + w11; |
| 1112 | 1095 | ||
| 1113 | uint32_t v0 = js + jt * params.m_Width; | 1096 | u32 v0 = js + jt * params.m_Width; |
| 1114 | 1097 | ||
| 1115 | #define FIND_TEXEL(tidx, bidx) \ | 1098 | #define FIND_TEXEL(tidx, bidx) \ |
| 1116 | uint32_t p##bidx = 0; \ | 1099 | u32 p##bidx = 0; \ |
| 1117 | do { \ | 1100 | do { \ |
| 1118 | if ((tidx) < (params.m_Width * params.m_Height)) { \ | 1101 | if ((tidx) < (params.m_Width * params.m_Height)) { \ |
| 1119 | p##bidx = unquantized[plane][(tidx)]; \ | 1102 | p##bidx = unquantized[plane][(tidx)]; \ |
| @@ -1133,7 +1116,7 @@ static void UnquantizeTexelWeights(uint32_t out[2][144], | |||
| 1133 | } | 1116 | } |
| 1134 | 1117 | ||
| 1135 | // Transfers a bit as described in C.2.14 | 1118 | // Transfers a bit as described in C.2.14 |
| 1136 | static inline void BitTransferSigned(int32_t& a, int32_t& b) { | 1119 | static inline void BitTransferSigned(s32& a, s32& b) { |
| 1137 | b >>= 1; | 1120 | b >>= 1; |
| 1138 | b |= a & 0x80; | 1121 | b |= a & 0x80; |
| 1139 | a >>= 1; | 1122 | a >>= 1; |
| @@ -1144,14 +1127,14 @@ static inline void BitTransferSigned(int32_t& a, int32_t& b) { | |||
| 1144 | 1127 | ||
| 1145 | // Adds more precision to the blue channel as described | 1128 | // Adds more precision to the blue channel as described |
| 1146 | // in C.2.14 | 1129 | // in C.2.14 |
| 1147 | static inline Pixel BlueContract(int32_t a, int32_t r, int32_t g, int32_t b) { | 1130 | static inline Pixel BlueContract(s32 a, s32 r, s32 g, s32 b) { |
| 1148 | return Pixel(static_cast<int16_t>(a), static_cast<int16_t>((r + b) >> 1), | 1131 | return Pixel(static_cast<s16>(a), static_cast<s16>((r + b) >> 1), |
| 1149 | static_cast<int16_t>((g + b) >> 1), static_cast<int16_t>(b)); | 1132 | static_cast<s16>((g + b) >> 1), static_cast<s16>(b)); |
| 1150 | } | 1133 | } |
| 1151 | 1134 | ||
| 1152 | // Partition selection functions as specified in | 1135 | // Partition selection functions as specified in |
| 1153 | // C.2.21 | 1136 | // C.2.21 |
| 1154 | static inline uint32_t hash52(uint32_t p) { | 1137 | static inline u32 hash52(u32 p) { |
| 1155 | p ^= p >> 15; | 1138 | p ^= p >> 15; |
| 1156 | p -= p << 17; | 1139 | p -= p << 17; |
| 1157 | p += p << 7; | 1140 | p += p << 7; |
| @@ -1165,8 +1148,7 @@ static inline uint32_t hash52(uint32_t p) { | |||
| 1165 | return p; | 1148 | return p; |
| 1166 | } | 1149 | } |
| 1167 | 1150 | ||
| 1168 | static uint32_t SelectPartition(int32_t seed, int32_t x, int32_t y, int32_t z, | 1151 | static u32 SelectPartition(s32 seed, s32 x, s32 y, s32 z, s32 partitionCount, s32 smallBlock) { |
| 1169 | int32_t partitionCount, int32_t smallBlock) { | ||
| 1170 | if (1 == partitionCount) | 1152 | if (1 == partitionCount) |
| 1171 | return 0; | 1153 | return 0; |
| 1172 | 1154 | ||
| @@ -1178,34 +1160,34 @@ static uint32_t SelectPartition(int32_t seed, int32_t x, int32_t y, int32_t z, | |||
| 1178 | 1160 | ||
| 1179 | seed += (partitionCount - 1) * 1024; | 1161 | seed += (partitionCount - 1) * 1024; |
| 1180 | 1162 | ||
| 1181 | uint32_t rnum = hash52(static_cast<uint32_t>(seed)); | 1163 | u32 rnum = hash52(static_cast<u32>(seed)); |
| 1182 | uint8_t seed1 = static_cast<uint8_t>(rnum & 0xF); | 1164 | u8 seed1 = static_cast<u8>(rnum & 0xF); |
| 1183 | uint8_t seed2 = static_cast<uint8_t>((rnum >> 4) & 0xF); | 1165 | u8 seed2 = static_cast<u8>((rnum >> 4) & 0xF); |
| 1184 | uint8_t seed3 = static_cast<uint8_t>((rnum >> 8) & 0xF); | 1166 | u8 seed3 = static_cast<u8>((rnum >> 8) & 0xF); |
| 1185 | uint8_t seed4 = static_cast<uint8_t>((rnum >> 12) & 0xF); | 1167 | u8 seed4 = static_cast<u8>((rnum >> 12) & 0xF); |
| 1186 | uint8_t seed5 = static_cast<uint8_t>((rnum >> 16) & 0xF); | 1168 | u8 seed5 = static_cast<u8>((rnum >> 16) & 0xF); |
| 1187 | uint8_t seed6 = static_cast<uint8_t>((rnum >> 20) & 0xF); | 1169 | u8 seed6 = static_cast<u8>((rnum >> 20) & 0xF); |
| 1188 | uint8_t seed7 = static_cast<uint8_t>((rnum >> 24) & 0xF); | 1170 | u8 seed7 = static_cast<u8>((rnum >> 24) & 0xF); |
| 1189 | uint8_t seed8 = static_cast<uint8_t>((rnum >> 28) & 0xF); | 1171 | u8 seed8 = static_cast<u8>((rnum >> 28) & 0xF); |
| 1190 | uint8_t seed9 = static_cast<uint8_t>((rnum >> 18) & 0xF); | 1172 | u8 seed9 = static_cast<u8>((rnum >> 18) & 0xF); |
| 1191 | uint8_t seed10 = static_cast<uint8_t>((rnum >> 22) & 0xF); | 1173 | u8 seed10 = static_cast<u8>((rnum >> 22) & 0xF); |
| 1192 | uint8_t seed11 = static_cast<uint8_t>((rnum >> 26) & 0xF); | 1174 | u8 seed11 = static_cast<u8>((rnum >> 26) & 0xF); |
| 1193 | uint8_t seed12 = static_cast<uint8_t>(((rnum >> 30) | (rnum << 2)) & 0xF); | 1175 | u8 seed12 = static_cast<u8>(((rnum >> 30) | (rnum << 2)) & 0xF); |
| 1194 | 1176 | ||
| 1195 | seed1 = static_cast<uint8_t>(seed1 * seed1); | 1177 | seed1 = static_cast<u8>(seed1 * seed1); |
| 1196 | seed2 = static_cast<uint8_t>(seed2 * seed2); | 1178 | seed2 = static_cast<u8>(seed2 * seed2); |
| 1197 | seed3 = static_cast<uint8_t>(seed3 * seed3); | 1179 | seed3 = static_cast<u8>(seed3 * seed3); |
| 1198 | seed4 = static_cast<uint8_t>(seed4 * seed4); | 1180 | seed4 = static_cast<u8>(seed4 * seed4); |
| 1199 | seed5 = static_cast<uint8_t>(seed5 * seed5); | 1181 | seed5 = static_cast<u8>(seed5 * seed5); |
| 1200 | seed6 = static_cast<uint8_t>(seed6 * seed6); | 1182 | seed6 = static_cast<u8>(seed6 * seed6); |
| 1201 | seed7 = static_cast<uint8_t>(seed7 * seed7); | 1183 | seed7 = static_cast<u8>(seed7 * seed7); |
| 1202 | seed8 = static_cast<uint8_t>(seed8 * seed8); | 1184 | seed8 = static_cast<u8>(seed8 * seed8); |
| 1203 | seed9 = static_cast<uint8_t>(seed9 * seed9); | 1185 | seed9 = static_cast<u8>(seed9 * seed9); |
| 1204 | seed10 = static_cast<uint8_t>(seed10 * seed10); | 1186 | seed10 = static_cast<u8>(seed10 * seed10); |
| 1205 | seed11 = static_cast<uint8_t>(seed11 * seed11); | 1187 | seed11 = static_cast<u8>(seed11 * seed11); |
| 1206 | seed12 = static_cast<uint8_t>(seed12 * seed12); | 1188 | seed12 = static_cast<u8>(seed12 * seed12); |
| 1207 | 1189 | ||
| 1208 | int32_t sh1, sh2, sh3; | 1190 | s32 sh1, sh2, sh3; |
| 1209 | if (seed & 1) { | 1191 | if (seed & 1) { |
| 1210 | sh1 = (seed & 2) ? 4 : 5; | 1192 | sh1 = (seed & 2) ? 4 : 5; |
| 1211 | sh2 = (partitionCount == 3) ? 6 : 5; | 1193 | sh2 = (partitionCount == 3) ? 6 : 5; |
| @@ -1215,23 +1197,23 @@ static uint32_t SelectPartition(int32_t seed, int32_t x, int32_t y, int32_t z, | |||
| 1215 | } | 1197 | } |
| 1216 | sh3 = (seed & 0x10) ? sh1 : sh2; | 1198 | sh3 = (seed & 0x10) ? sh1 : sh2; |
| 1217 | 1199 | ||
| 1218 | seed1 = static_cast<uint8_t>(seed1 >> sh1); | 1200 | seed1 = static_cast<u8>(seed1 >> sh1); |
| 1219 | seed2 = static_cast<uint8_t>(seed2 >> sh2); | 1201 | seed2 = static_cast<u8>(seed2 >> sh2); |
| 1220 | seed3 = static_cast<uint8_t>(seed3 >> sh1); | 1202 | seed3 = static_cast<u8>(seed3 >> sh1); |
| 1221 | seed4 = static_cast<uint8_t>(seed4 >> sh2); | 1203 | seed4 = static_cast<u8>(seed4 >> sh2); |
| 1222 | seed5 = static_cast<uint8_t>(seed5 >> sh1); | 1204 | seed5 = static_cast<u8>(seed5 >> sh1); |
| 1223 | seed6 = static_cast<uint8_t>(seed6 >> sh2); | 1205 | seed6 = static_cast<u8>(seed6 >> sh2); |
| 1224 | seed7 = static_cast<uint8_t>(seed7 >> sh1); | 1206 | seed7 = static_cast<u8>(seed7 >> sh1); |
| 1225 | seed8 = static_cast<uint8_t>(seed8 >> sh2); | 1207 | seed8 = static_cast<u8>(seed8 >> sh2); |
| 1226 | seed9 = static_cast<uint8_t>(seed9 >> sh3); | 1208 | seed9 = static_cast<u8>(seed9 >> sh3); |
| 1227 | seed10 = static_cast<uint8_t>(seed10 >> sh3); | 1209 | seed10 = static_cast<u8>(seed10 >> sh3); |
| 1228 | seed11 = static_cast<uint8_t>(seed11 >> sh3); | 1210 | seed11 = static_cast<u8>(seed11 >> sh3); |
| 1229 | seed12 = static_cast<uint8_t>(seed12 >> sh3); | 1211 | seed12 = static_cast<u8>(seed12 >> sh3); |
| 1230 | 1212 | ||
| 1231 | int32_t a = seed1 * x + seed2 * y + seed11 * z + (rnum >> 14); | 1213 | s32 a = seed1 * x + seed2 * y + seed11 * z + (rnum >> 14); |
| 1232 | int32_t b = seed3 * x + seed4 * y + seed12 * z + (rnum >> 10); | 1214 | s32 b = seed3 * x + seed4 * y + seed12 * z + (rnum >> 10); |
| 1233 | int32_t c = seed5 * x + seed6 * y + seed9 * z + (rnum >> 6); | 1215 | s32 c = seed5 * x + seed6 * y + seed9 * z + (rnum >> 6); |
| 1234 | int32_t d = seed7 * x + seed8 * y + seed10 * z + (rnum >> 2); | 1216 | s32 d = seed7 * x + seed8 * y + seed10 * z + (rnum >> 2); |
| 1235 | 1217 | ||
| 1236 | a &= 0x3F; | 1218 | a &= 0x3F; |
| 1237 | b &= 0x3F; | 1219 | b &= 0x3F; |
| @@ -1252,27 +1234,26 @@ static uint32_t SelectPartition(int32_t seed, int32_t x, int32_t y, int32_t z, | |||
| 1252 | return 3; | 1234 | return 3; |
| 1253 | } | 1235 | } |
| 1254 | 1236 | ||
| 1255 | static inline uint32_t Select2DPartition(int32_t seed, int32_t x, int32_t y, int32_t partitionCount, | 1237 | static inline u32 Select2DPartition(s32 seed, s32 x, s32 y, s32 partitionCount, s32 smallBlock) { |
| 1256 | int32_t smallBlock) { | ||
| 1257 | return SelectPartition(seed, x, y, 0, partitionCount, smallBlock); | 1238 | return SelectPartition(seed, x, y, 0, partitionCount, smallBlock); |
| 1258 | } | 1239 | } |
| 1259 | 1240 | ||
| 1260 | // Section C.2.14 | 1241 | // Section C.2.14 |
| 1261 | static void ComputeEndpoints(Pixel& ep1, Pixel& ep2, const uint32_t*& colorValues, | 1242 | static void ComputeEndpos32s(Pixel& ep1, Pixel& ep2, const u32*& colorValues, |
| 1262 | uint32_t colorEndpointMode) { | 1243 | u32 colorEndpos32Mode) { |
| 1263 | #define READ_UINT_VALUES(N) \ | 1244 | #define READ_UINT_VALUES(N) \ |
| 1264 | uint32_t v[N]; \ | 1245 | u32 v[N]; \ |
| 1265 | for (uint32_t i = 0; i < N; i++) { \ | 1246 | for (u32 i = 0; i < N; i++) { \ |
| 1266 | v[i] = *(colorValues++); \ | 1247 | v[i] = *(colorValues++); \ |
| 1267 | } | 1248 | } |
| 1268 | 1249 | ||
| 1269 | #define READ_INT_VALUES(N) \ | 1250 | #define READ_INT_VALUES(N) \ |
| 1270 | int32_t v[N]; \ | 1251 | s32 v[N]; \ |
| 1271 | for (uint32_t i = 0; i < N; i++) { \ | 1252 | for (u32 i = 0; i < N; i++) { \ |
| 1272 | v[i] = static_cast<int32_t>(*(colorValues++)); \ | 1253 | v[i] = static_cast<s32>(*(colorValues++)); \ |
| 1273 | } | 1254 | } |
| 1274 | 1255 | ||
| 1275 | switch (colorEndpointMode) { | 1256 | switch (colorEndpos32Mode) { |
| 1276 | case 0: { | 1257 | case 0: { |
| 1277 | READ_UINT_VALUES(2) | 1258 | READ_UINT_VALUES(2) |
| 1278 | ep1 = Pixel(0xFF, v[0], v[0], v[0]); | 1259 | ep1 = Pixel(0xFF, v[0], v[0], v[0]); |
| @@ -1281,8 +1262,8 @@ static void ComputeEndpoints(Pixel& ep1, Pixel& ep2, const uint32_t*& colorValue | |||
| 1281 | 1262 | ||
| 1282 | case 1: { | 1263 | case 1: { |
| 1283 | READ_UINT_VALUES(2) | 1264 | READ_UINT_VALUES(2) |
| 1284 | uint32_t L0 = (v[0] >> 2) | (v[1] & 0xC0); | 1265 | u32 L0 = (v[0] >> 2) | (v[1] & 0xC0); |
| 1285 | uint32_t L1 = std::max(L0 + (v[1] & 0x3F), 0xFFU); | 1266 | u32 L1 = std::max(L0 + (v[1] & 0x3F), 0xFFU); |
| 1286 | ep1 = Pixel(0xFF, L0, L0, L0); | 1267 | ep1 = Pixel(0xFF, L0, L0, L0); |
| 1287 | ep2 = Pixel(0xFF, L1, L1, L1); | 1268 | ep2 = Pixel(0xFF, L1, L1, L1); |
| 1288 | } break; | 1269 | } break; |
| @@ -1371,7 +1352,7 @@ static void ComputeEndpoints(Pixel& ep1, Pixel& ep2, const uint32_t*& colorValue | |||
| 1371 | } break; | 1352 | } break; |
| 1372 | 1353 | ||
| 1373 | default: | 1354 | default: |
| 1374 | assert(!"Unsupported color endpoint mode (is it HDR?)"); | 1355 | assert(false && "Unsupported color endpoint mode (is it HDR?)"); |
| 1375 | break; | 1356 | break; |
| 1376 | } | 1357 | } |
| 1377 | 1358 | ||
| @@ -1379,14 +1360,14 @@ static void ComputeEndpoints(Pixel& ep1, Pixel& ep2, const uint32_t*& colorValue | |||
| 1379 | #undef READ_INT_VALUES | 1360 | #undef READ_INT_VALUES |
| 1380 | } | 1361 | } |
| 1381 | 1362 | ||
| 1382 | static void DecompressBlock(const uint8_t inBuf[16], const uint32_t blockWidth, | 1363 | static void DecompressBlock(const u8 inBuf[16], const u32 blockWidth, const u32 blockHeight, |
| 1383 | const uint32_t blockHeight, uint32_t* outBuf) { | 1364 | u32* outBuf) { |
| 1384 | InputBitStream strm(inBuf); | 1365 | InputBitStream strm(inBuf); |
| 1385 | TexelWeightParams weightParams = DecodeBlockInfo(strm); | 1366 | TexelWeightParams weightParams = DecodeBlockInfo(strm); |
| 1386 | 1367 | ||
| 1387 | // Was there an error? | 1368 | // Was there an error? |
| 1388 | if (weightParams.m_bError) { | 1369 | if (weightParams.m_bError) { |
| 1389 | assert(!"Invalid block mode"); | 1370 | assert(false && "Invalid block mode"); |
| 1390 | FillError(outBuf, blockWidth, blockHeight); | 1371 | FillError(outBuf, blockWidth, blockHeight); |
| 1391 | return; | 1372 | return; |
| 1392 | } | 1373 | } |
| @@ -1397,63 +1378,63 @@ static void DecompressBlock(const uint8_t inBuf[16], const uint32_t blockWidth, | |||
| 1397 | } | 1378 | } |
| 1398 | 1379 | ||
| 1399 | if (weightParams.m_bVoidExtentHDR) { | 1380 | if (weightParams.m_bVoidExtentHDR) { |
| 1400 | assert(!"HDR void extent blocks are unsupported!"); | 1381 | assert(false && "HDR void extent blocks are unsupported!"); |
| 1401 | FillError(outBuf, blockWidth, blockHeight); | 1382 | FillError(outBuf, blockWidth, blockHeight); |
| 1402 | return; | 1383 | return; |
| 1403 | } | 1384 | } |
| 1404 | 1385 | ||
| 1405 | if (weightParams.m_Width > blockWidth) { | 1386 | if (weightParams.m_Width > blockWidth) { |
| 1406 | assert(!"Texel weight grid width should be smaller than block width"); | 1387 | assert(false && "Texel weight grid width should be smaller than block width"); |
| 1407 | FillError(outBuf, blockWidth, blockHeight); | 1388 | FillError(outBuf, blockWidth, blockHeight); |
| 1408 | return; | 1389 | return; |
| 1409 | } | 1390 | } |
| 1410 | 1391 | ||
| 1411 | if (weightParams.m_Height > blockHeight) { | 1392 | if (weightParams.m_Height > blockHeight) { |
| 1412 | assert(!"Texel weight grid height should be smaller than block height"); | 1393 | assert(false && "Texel weight grid height should be smaller than block height"); |
| 1413 | FillError(outBuf, blockWidth, blockHeight); | 1394 | FillError(outBuf, blockWidth, blockHeight); |
| 1414 | return; | 1395 | return; |
| 1415 | } | 1396 | } |
| 1416 | 1397 | ||
| 1417 | // Read num partitions | 1398 | // Read num partitions |
| 1418 | uint32_t nPartitions = strm.ReadBits(2) + 1; | 1399 | u32 nPartitions = strm.ReadBits<2>() + 1; |
| 1419 | assert(nPartitions <= 4); | 1400 | assert(nPartitions <= 4); |
| 1420 | 1401 | ||
| 1421 | if (nPartitions == 4 && weightParams.m_bDualPlane) { | 1402 | if (nPartitions == 4 && weightParams.m_bDualPlane) { |
| 1422 | assert(!"Dual plane mode is incompatible with four partition blocks"); | 1403 | assert(false && "Dual plane mode is incompatible with four partition blocks"); |
| 1423 | FillError(outBuf, blockWidth, blockHeight); | 1404 | FillError(outBuf, blockWidth, blockHeight); |
| 1424 | return; | 1405 | return; |
| 1425 | } | 1406 | } |
| 1426 | 1407 | ||
| 1427 | // Based on the number of partitions, read the color endpoint mode for | 1408 | // Based on the number of partitions, read the color endpos32 mode for |
| 1428 | // each partition. | 1409 | // each partition. |
| 1429 | 1410 | ||
| 1430 | // Determine partitions, partition index, and color endpoint modes | 1411 | // Determine partitions, partition index, and color endpos32 modes |
| 1431 | int32_t planeIdx = -1; | 1412 | s32 planeIdx = -1; |
| 1432 | uint32_t partitionIndex; | 1413 | u32 partitionIndex; |
| 1433 | uint32_t colorEndpointMode[4] = {0, 0, 0, 0}; | 1414 | u32 colorEndpos32Mode[4] = {0, 0, 0, 0}; |
| 1434 | 1415 | ||
| 1435 | // Define color data. | 1416 | // Define color data. |
| 1436 | uint8_t colorEndpointData[16]; | 1417 | u8 colorEndpos32Data[16]; |
| 1437 | memset(colorEndpointData, 0, sizeof(colorEndpointData)); | 1418 | memset(colorEndpos32Data, 0, sizeof(colorEndpos32Data)); |
| 1438 | OutputBitStream colorEndpointStream(colorEndpointData, 16 * 8, 0); | 1419 | OutputBitStream colorEndpos32Stream(colorEndpos32Data, 16 * 8, 0); |
| 1439 | 1420 | ||
| 1440 | // Read extra config data... | 1421 | // Read extra config data... |
| 1441 | uint32_t baseCEM = 0; | 1422 | u32 baseCEM = 0; |
| 1442 | if (nPartitions == 1) { | 1423 | if (nPartitions == 1) { |
| 1443 | colorEndpointMode[0] = strm.ReadBits(4); | 1424 | colorEndpos32Mode[0] = strm.ReadBits<4>(); |
| 1444 | partitionIndex = 0; | 1425 | partitionIndex = 0; |
| 1445 | } else { | 1426 | } else { |
| 1446 | partitionIndex = strm.ReadBits(10); | 1427 | partitionIndex = strm.ReadBits<10>(); |
| 1447 | baseCEM = strm.ReadBits(6); | 1428 | baseCEM = strm.ReadBits<6>(); |
| 1448 | } | 1429 | } |
| 1449 | uint32_t baseMode = (baseCEM & 3); | 1430 | u32 baseMode = (baseCEM & 3); |
| 1450 | 1431 | ||
| 1451 | // Remaining bits are color endpoint data... | 1432 | // Remaining bits are color endpos32 data... |
| 1452 | uint32_t nWeightBits = weightParams.GetPackedBitSize(); | 1433 | u32 nWeightBits = weightParams.GetPackedBitSize(); |
| 1453 | int32_t remainingBits = 128 - nWeightBits - strm.GetBitsRead(); | 1434 | s32 remainingBits = 128 - nWeightBits - static_cast<s32>(strm.GetBitsRead()); |
| 1454 | 1435 | ||
| 1455 | // Consider extra bits prior to texel data... | 1436 | // Consider extra bits prior to texel data... |
| 1456 | uint32_t extraCEMbits = 0; | 1437 | u32 extraCEMbits = 0; |
| 1457 | if (baseMode) { | 1438 | if (baseMode) { |
| 1458 | switch (nPartitions) { | 1439 | switch (nPartitions) { |
| 1459 | case 2: | 1440 | case 2: |
| @@ -1473,18 +1454,18 @@ static void DecompressBlock(const uint8_t inBuf[16], const uint32_t blockWidth, | |||
| 1473 | remainingBits -= extraCEMbits; | 1454 | remainingBits -= extraCEMbits; |
| 1474 | 1455 | ||
| 1475 | // Do we have a dual plane situation? | 1456 | // Do we have a dual plane situation? |
| 1476 | uint32_t planeSelectorBits = 0; | 1457 | u32 planeSelectorBits = 0; |
| 1477 | if (weightParams.m_bDualPlane) { | 1458 | if (weightParams.m_bDualPlane) { |
| 1478 | planeSelectorBits = 2; | 1459 | planeSelectorBits = 2; |
| 1479 | } | 1460 | } |
| 1480 | remainingBits -= planeSelectorBits; | 1461 | remainingBits -= planeSelectorBits; |
| 1481 | 1462 | ||
| 1482 | // Read color data... | 1463 | // Read color data... |
| 1483 | uint32_t colorDataBits = remainingBits; | 1464 | u32 colorDataBits = remainingBits; |
| 1484 | while (remainingBits > 0) { | 1465 | while (remainingBits > 0) { |
| 1485 | uint32_t nb = std::min(remainingBits, 8); | 1466 | u32 nb = std::min(remainingBits, 8); |
| 1486 | uint32_t b = strm.ReadBits(nb); | 1467 | u32 b = strm.ReadBits(nb); |
| 1487 | colorEndpointStream.WriteBits(b, nb); | 1468 | colorEndpos32Stream.WriteBits(b, nb); |
| 1488 | remainingBits -= 8; | 1469 | remainingBits -= 8; |
| 1489 | } | 1470 | } |
| 1490 | 1471 | ||
| @@ -1493,64 +1474,64 @@ static void DecompressBlock(const uint8_t inBuf[16], const uint32_t blockWidth, | |||
| 1493 | 1474 | ||
| 1494 | // Read the rest of the CEM | 1475 | // Read the rest of the CEM |
| 1495 | if (baseMode) { | 1476 | if (baseMode) { |
| 1496 | uint32_t extraCEM = strm.ReadBits(extraCEMbits); | 1477 | u32 extraCEM = strm.ReadBits(extraCEMbits); |
| 1497 | uint32_t CEM = (extraCEM << 6) | baseCEM; | 1478 | u32 CEM = (extraCEM << 6) | baseCEM; |
| 1498 | CEM >>= 2; | 1479 | CEM >>= 2; |
| 1499 | 1480 | ||
| 1500 | bool C[4] = {0}; | 1481 | bool C[4] = {0}; |
| 1501 | for (uint32_t i = 0; i < nPartitions; i++) { | 1482 | for (u32 i = 0; i < nPartitions; i++) { |
| 1502 | C[i] = CEM & 1; | 1483 | C[i] = CEM & 1; |
| 1503 | CEM >>= 1; | 1484 | CEM >>= 1; |
| 1504 | } | 1485 | } |
| 1505 | 1486 | ||
| 1506 | uint8_t M[4] = {0}; | 1487 | u8 M[4] = {0}; |
| 1507 | for (uint32_t i = 0; i < nPartitions; i++) { | 1488 | for (u32 i = 0; i < nPartitions; i++) { |
| 1508 | M[i] = CEM & 3; | 1489 | M[i] = CEM & 3; |
| 1509 | CEM >>= 2; | 1490 | CEM >>= 2; |
| 1510 | assert(M[i] <= 3); | 1491 | assert(M[i] <= 3); |
| 1511 | } | 1492 | } |
| 1512 | 1493 | ||
| 1513 | for (uint32_t i = 0; i < nPartitions; i++) { | 1494 | for (u32 i = 0; i < nPartitions; i++) { |
| 1514 | colorEndpointMode[i] = baseMode; | 1495 | colorEndpos32Mode[i] = baseMode; |
| 1515 | if (!(C[i])) | 1496 | if (!(C[i])) |
| 1516 | colorEndpointMode[i] -= 1; | 1497 | colorEndpos32Mode[i] -= 1; |
| 1517 | colorEndpointMode[i] <<= 2; | 1498 | colorEndpos32Mode[i] <<= 2; |
| 1518 | colorEndpointMode[i] |= M[i]; | 1499 | colorEndpos32Mode[i] |= M[i]; |
| 1519 | } | 1500 | } |
| 1520 | } else if (nPartitions > 1) { | 1501 | } else if (nPartitions > 1) { |
| 1521 | uint32_t CEM = baseCEM >> 2; | 1502 | u32 CEM = baseCEM >> 2; |
| 1522 | for (uint32_t i = 0; i < nPartitions; i++) { | 1503 | for (u32 i = 0; i < nPartitions; i++) { |
| 1523 | colorEndpointMode[i] = CEM; | 1504 | colorEndpos32Mode[i] = CEM; |
| 1524 | } | 1505 | } |
| 1525 | } | 1506 | } |
| 1526 | 1507 | ||
| 1527 | // Make sure everything up till here is sane. | 1508 | // Make sure everything up till here is sane. |
| 1528 | for (uint32_t i = 0; i < nPartitions; i++) { | 1509 | for (u32 i = 0; i < nPartitions; i++) { |
| 1529 | assert(colorEndpointMode[i] < 16); | 1510 | assert(colorEndpos32Mode[i] < 16); |
| 1530 | } | 1511 | } |
| 1531 | assert(strm.GetBitsRead() + weightParams.GetPackedBitSize() == 128); | 1512 | assert(strm.GetBitsRead() + weightParams.GetPackedBitSize() == 128); |
| 1532 | 1513 | ||
| 1533 | // Decode both color data and texel weight data | 1514 | // Decode both color data and texel weight data |
| 1534 | uint32_t colorValues[32]; // Four values, two endpoints, four maximum paritions | 1515 | u32 colorValues[32]; // Four values, two endpos32s, four maximum paritions |
| 1535 | DecodeColorValues(colorValues, colorEndpointData, colorEndpointMode, nPartitions, | 1516 | DecodeColorValues(colorValues, colorEndpos32Data, colorEndpos32Mode, nPartitions, |
| 1536 | colorDataBits); | 1517 | colorDataBits); |
| 1537 | 1518 | ||
| 1538 | Pixel endpoints[4][2]; | 1519 | Pixel endpos32s[4][2]; |
| 1539 | const uint32_t* colorValuesPtr = colorValues; | 1520 | const u32* colorValuesPtr = colorValues; |
| 1540 | for (uint32_t i = 0; i < nPartitions; i++) { | 1521 | for (u32 i = 0; i < nPartitions; i++) { |
| 1541 | ComputeEndpoints(endpoints[i][0], endpoints[i][1], colorValuesPtr, colorEndpointMode[i]); | 1522 | ComputeEndpos32s(endpos32s[i][0], endpos32s[i][1], colorValuesPtr, colorEndpos32Mode[i]); |
| 1542 | } | 1523 | } |
| 1543 | 1524 | ||
| 1544 | // Read the texel weight data.. | 1525 | // Read the texel weight data.. |
| 1545 | uint8_t texelWeightData[16]; | 1526 | u8 texelWeightData[16]; |
| 1546 | memcpy(texelWeightData, inBuf, sizeof(texelWeightData)); | 1527 | memcpy(texelWeightData, inBuf, sizeof(texelWeightData)); |
| 1547 | 1528 | ||
| 1548 | // Reverse everything | 1529 | // Reverse everything |
| 1549 | for (uint32_t i = 0; i < 8; i++) { | 1530 | for (u32 i = 0; i < 8; i++) { |
| 1550 | // Taken from http://graphics.stanford.edu/~seander/bithacks.html#ReverseByteWith64Bits | 1531 | // Taken from http://graphics.stanford.edu/~seander/bithacks.html#ReverseByteWith64Bits |
| 1551 | #define REVERSE_BYTE(b) (((b)*0x80200802ULL) & 0x0884422110ULL) * 0x0101010101ULL >> 32 | 1532 | #define REVERSE_BYTE(b) (((b)*0x80200802ULL) & 0x0884422110ULL) * 0x0101010101ULL >> 32 |
| 1552 | unsigned char a = static_cast<unsigned char>(REVERSE_BYTE(texelWeightData[i])); | 1533 | u8 a = static_cast<u8>(REVERSE_BYTE(texelWeightData[i])); |
| 1553 | unsigned char b = static_cast<unsigned char>(REVERSE_BYTE(texelWeightData[15 - i])); | 1534 | u8 b = static_cast<u8>(REVERSE_BYTE(texelWeightData[15 - i])); |
| 1554 | #undef REVERSE_BYTE | 1535 | #undef REVERSE_BYTE |
| 1555 | 1536 | ||
| 1556 | texelWeightData[i] = b; | 1537 | texelWeightData[i] = b; |
| @@ -1558,50 +1539,51 @@ static void DecompressBlock(const uint8_t inBuf[16], const uint32_t blockWidth, | |||
| 1558 | } | 1539 | } |
| 1559 | 1540 | ||
| 1560 | // Make sure that higher non-texel bits are set to zero | 1541 | // Make sure that higher non-texel bits are set to zero |
| 1561 | const uint32_t clearByteStart = (weightParams.GetPackedBitSize() >> 3) + 1; | 1542 | const u32 clearByteStart = (weightParams.GetPackedBitSize() >> 3) + 1; |
| 1562 | texelWeightData[clearByteStart - 1] = | 1543 | texelWeightData[clearByteStart - 1] = |
| 1563 | texelWeightData[clearByteStart - 1] & | 1544 | texelWeightData[clearByteStart - 1] & |
| 1564 | static_cast<uint8_t>((1 << (weightParams.GetPackedBitSize() % 8)) - 1); | 1545 | static_cast<u8>((1 << (weightParams.GetPackedBitSize() % 8)) - 1); |
| 1565 | memset(texelWeightData + clearByteStart, 0, 16 - clearByteStart); | 1546 | memset(texelWeightData + clearByteStart, 0, 16 - clearByteStart); |
| 1566 | 1547 | ||
| 1567 | std::vector<IntegerEncodedValue> texelWeightValues; | 1548 | std::vector<IntegerEncodedValue> texelWeightValues; |
| 1549 | texelWeightValues.reserve(64); | ||
| 1550 | |||
| 1568 | InputBitStream weightStream(texelWeightData); | 1551 | InputBitStream weightStream(texelWeightData); |
| 1569 | 1552 | ||
| 1570 | IntegerEncodedValue::DecodeIntegerSequence(texelWeightValues, weightStream, | 1553 | DecodeIntegerSequence(texelWeightValues, weightStream, weightParams.m_MaxWeight, |
| 1571 | weightParams.m_MaxWeight, | 1554 | weightParams.GetNumWeightValues()); |
| 1572 | weightParams.GetNumWeightValues()); | ||
| 1573 | 1555 | ||
| 1574 | // Blocks can be at most 12x12, so we can have as many as 144 weights | 1556 | // Blocks can be at most 12x12, so we can have as many as 144 weights |
| 1575 | uint32_t weights[2][144]; | 1557 | u32 weights[2][144]; |
| 1576 | UnquantizeTexelWeights(weights, texelWeightValues, weightParams, blockWidth, blockHeight); | 1558 | UnquantizeTexelWeights(weights, texelWeightValues, weightParams, blockWidth, blockHeight); |
| 1577 | 1559 | ||
| 1578 | // Now that we have endpoints and weights, we can interpolate and generate | 1560 | // Now that we have endpos32s and weights, we can s32erpolate and generate |
| 1579 | // the proper decoding... | 1561 | // the proper decoding... |
| 1580 | for (uint32_t j = 0; j < blockHeight; j++) | 1562 | for (u32 j = 0; j < blockHeight; j++) |
| 1581 | for (uint32_t i = 0; i < blockWidth; i++) { | 1563 | for (u32 i = 0; i < blockWidth; i++) { |
| 1582 | uint32_t partition = Select2DPartition(partitionIndex, i, j, nPartitions, | 1564 | u32 partition = Select2DPartition(partitionIndex, i, j, nPartitions, |
| 1583 | (blockHeight * blockWidth) < 32); | 1565 | (blockHeight * blockWidth) < 32); |
| 1584 | assert(partition < nPartitions); | 1566 | assert(partition < nPartitions); |
| 1585 | 1567 | ||
| 1586 | Pixel p; | 1568 | Pixel p; |
| 1587 | for (uint32_t c = 0; c < 4; c++) { | 1569 | for (u32 c = 0; c < 4; c++) { |
| 1588 | uint32_t C0 = endpoints[partition][0].Component(c); | 1570 | u32 C0 = endpos32s[partition][0].Component(c); |
| 1589 | C0 = Replicate(C0, 8, 16); | 1571 | C0 = Replicate(C0, 8, 16); |
| 1590 | uint32_t C1 = endpoints[partition][1].Component(c); | 1572 | u32 C1 = endpos32s[partition][1].Component(c); |
| 1591 | C1 = Replicate(C1, 8, 16); | 1573 | C1 = Replicate(C1, 8, 16); |
| 1592 | 1574 | ||
| 1593 | uint32_t plane = 0; | 1575 | u32 plane = 0; |
| 1594 | if (weightParams.m_bDualPlane && (((planeIdx + 1) & 3) == c)) { | 1576 | if (weightParams.m_bDualPlane && (((planeIdx + 1) & 3) == c)) { |
| 1595 | plane = 1; | 1577 | plane = 1; |
| 1596 | } | 1578 | } |
| 1597 | 1579 | ||
| 1598 | uint32_t weight = weights[plane][j * blockWidth + i]; | 1580 | u32 weight = weights[plane][j * blockWidth + i]; |
| 1599 | uint32_t C = (C0 * (64 - weight) + C1 * weight + 32) / 64; | 1581 | u32 C = (C0 * (64 - weight) + C1 * weight + 32) / 64; |
| 1600 | if (C == 65535) { | 1582 | if (C == 65535) { |
| 1601 | p.Component(c) = 255; | 1583 | p.Component(c) = 255; |
| 1602 | } else { | 1584 | } else { |
| 1603 | double Cf = static_cast<double>(C); | 1585 | double Cf = static_cast<double>(C); |
| 1604 | p.Component(c) = static_cast<uint16_t>(255.0 * (Cf / 65536.0) + 0.5); | 1586 | p.Component(c) = static_cast<u16>(255.0 * (Cf / 65536.0) + 0.5); |
| 1605 | } | 1587 | } |
| 1606 | } | 1588 | } |
| 1607 | 1589 | ||
| @@ -1613,26 +1595,26 @@ static void DecompressBlock(const uint8_t inBuf[16], const uint32_t blockWidth, | |||
| 1613 | 1595 | ||
| 1614 | namespace Tegra::Texture::ASTC { | 1596 | namespace Tegra::Texture::ASTC { |
| 1615 | 1597 | ||
| 1616 | std::vector<uint8_t> Decompress(const uint8_t* data, uint32_t width, uint32_t height, | 1598 | std::vector<u8> Decompress(const u8* data, u32 width, u32 height, u32 depth, u32 block_width, |
| 1617 | uint32_t depth, uint32_t block_width, uint32_t block_height) { | 1599 | u32 block_height) { |
| 1618 | uint32_t blockIdx = 0; | 1600 | u32 blockIdx = 0; |
| 1619 | std::size_t depth_offset = 0; | 1601 | std::size_t depth_offset = 0; |
| 1620 | std::vector<uint8_t> outData(height * width * depth * 4); | 1602 | std::vector<u8> outData(height * width * depth * 4); |
| 1621 | for (uint32_t k = 0; k < depth; k++) { | 1603 | for (u32 k = 0; k < depth; k++) { |
| 1622 | for (uint32_t j = 0; j < height; j += block_height) { | 1604 | for (u32 j = 0; j < height; j += block_height) { |
| 1623 | for (uint32_t i = 0; i < width; i += block_width) { | 1605 | for (u32 i = 0; i < width; i += block_width) { |
| 1624 | 1606 | ||
| 1625 | const uint8_t* blockPtr = data + blockIdx * 16; | 1607 | const u8* blockPtr = data + blockIdx * 16; |
| 1626 | 1608 | ||
| 1627 | // Blocks can be at most 12x12 | 1609 | // Blocks can be at most 12x12 |
| 1628 | uint32_t uncompData[144]; | 1610 | u32 uncompData[144]; |
| 1629 | ASTCC::DecompressBlock(blockPtr, block_width, block_height, uncompData); | 1611 | ASTCC::DecompressBlock(blockPtr, block_width, block_height, uncompData); |
| 1630 | 1612 | ||
| 1631 | uint32_t decompWidth = std::min(block_width, width - i); | 1613 | u32 decompWidth = std::min(block_width, width - i); |
| 1632 | uint32_t decompHeight = std::min(block_height, height - j); | 1614 | u32 decompHeight = std::min(block_height, height - j); |
| 1633 | 1615 | ||
| 1634 | uint8_t* outRow = depth_offset + outData.data() + (j * width + i) * 4; | 1616 | u8* outRow = depth_offset + outData.data() + (j * width + i) * 4; |
| 1635 | for (uint32_t jj = 0; jj < decompHeight; jj++) { | 1617 | for (u32 jj = 0; jj < decompHeight; jj++) { |
| 1636 | memcpy(outRow + jj * width * 4, uncompData + jj * block_width, decompWidth * 4); | 1618 | memcpy(outRow + jj * width * 4, uncompData + jj * block_width, decompWidth * 4); |
| 1637 | } | 1619 | } |
| 1638 | 1620 | ||
diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h index 8e82c6748..7edc4abe1 100644 --- a/src/video_core/textures/texture.h +++ b/src/video_core/textures/texture.h | |||
| @@ -8,6 +8,7 @@ | |||
| 8 | #include "common/assert.h" | 8 | #include "common/assert.h" |
| 9 | #include "common/bit_field.h" | 9 | #include "common/bit_field.h" |
| 10 | #include "common/common_types.h" | 10 | #include "common/common_types.h" |
| 11 | #include "core/settings.h" | ||
| 11 | 12 | ||
| 12 | namespace Tegra::Texture { | 13 | namespace Tegra::Texture { |
| 13 | 14 | ||
| @@ -294,6 +295,14 @@ enum class TextureMipmapFilter : u32 { | |||
| 294 | Linear = 3, | 295 | Linear = 3, |
| 295 | }; | 296 | }; |
| 296 | 297 | ||
| 298 | enum class Anisotropy { | ||
| 299 | Default, | ||
| 300 | Filter2x, | ||
| 301 | Filter4x, | ||
| 302 | Filter8x, | ||
| 303 | Filter16x, | ||
| 304 | }; | ||
| 305 | |||
| 297 | struct TSCEntry { | 306 | struct TSCEntry { |
| 298 | union { | 307 | union { |
| 299 | struct { | 308 | struct { |
| @@ -328,7 +337,22 @@ struct TSCEntry { | |||
| 328 | }; | 337 | }; |
| 329 | 338 | ||
| 330 | float GetMaxAnisotropy() const { | 339 | float GetMaxAnisotropy() const { |
| 331 | return static_cast<float>(1U << max_anisotropy); | 340 | const u32 min_value = [] { |
| 341 | switch (static_cast<Anisotropy>(Settings::values.max_anisotropy)) { | ||
| 342 | default: | ||
| 343 | case Anisotropy::Default: | ||
| 344 | return 1U; | ||
| 345 | case Anisotropy::Filter2x: | ||
| 346 | return 2U; | ||
| 347 | case Anisotropy::Filter4x: | ||
| 348 | return 4U; | ||
| 349 | case Anisotropy::Filter8x: | ||
| 350 | return 8U; | ||
| 351 | case Anisotropy::Filter16x: | ||
| 352 | return 16U; | ||
| 353 | } | ||
| 354 | }(); | ||
| 355 | return static_cast<float>(std::max(1U << max_anisotropy, min_value)); | ||
| 332 | } | 356 | } |
| 333 | 357 | ||
| 334 | float GetMinLod() const { | 358 | float GetMinLod() const { |
diff --git a/src/yuzu/CMakeLists.txt b/src/yuzu/CMakeLists.txt index b841e63fa..d34b47b3f 100644 --- a/src/yuzu/CMakeLists.txt +++ b/src/yuzu/CMakeLists.txt | |||
| @@ -42,6 +42,9 @@ add_executable(yuzu | |||
| 42 | configuration/configure_graphics.cpp | 42 | configuration/configure_graphics.cpp |
| 43 | configuration/configure_graphics.h | 43 | configuration/configure_graphics.h |
| 44 | configuration/configure_graphics.ui | 44 | configuration/configure_graphics.ui |
| 45 | configuration/configure_graphics_advanced.cpp | ||
| 46 | configuration/configure_graphics_advanced.h | ||
| 47 | configuration/configure_graphics_advanced.ui | ||
| 45 | configuration/configure_hotkeys.cpp | 48 | configuration/configure_hotkeys.cpp |
| 46 | configuration/configure_hotkeys.h | 49 | configuration/configure_hotkeys.h |
| 47 | configuration/configure_hotkeys.ui | 50 | configuration/configure_hotkeys.ui |
diff --git a/src/yuzu/bootmanager.cpp b/src/yuzu/bootmanager.cpp index 55a37fffa..c3dbb1a88 100644 --- a/src/yuzu/bootmanager.cpp +++ b/src/yuzu/bootmanager.cpp | |||
| @@ -9,6 +9,9 @@ | |||
| 9 | #include <QKeyEvent> | 9 | #include <QKeyEvent> |
| 10 | #include <QMessageBox> | 10 | #include <QMessageBox> |
| 11 | #include <QOffscreenSurface> | 11 | #include <QOffscreenSurface> |
| 12 | #include <QOpenGLContext> | ||
| 13 | #include <QOpenGLFunctions> | ||
| 14 | #include <QOpenGLFunctions_4_3_Core> | ||
| 12 | #include <QOpenGLWindow> | 15 | #include <QOpenGLWindow> |
| 13 | #include <QPainter> | 16 | #include <QPainter> |
| 14 | #include <QScreen> | 17 | #include <QScreen> |
| @@ -23,9 +26,10 @@ | |||
| 23 | #include "common/assert.h" | 26 | #include "common/assert.h" |
| 24 | #include "common/microprofile.h" | 27 | #include "common/microprofile.h" |
| 25 | #include "common/scm_rev.h" | 28 | #include "common/scm_rev.h" |
| 29 | #include "common/scope_exit.h" | ||
| 26 | #include "core/core.h" | 30 | #include "core/core.h" |
| 27 | #include "core/frontend/framebuffer_layout.h" | 31 | #include "core/frontend/framebuffer_layout.h" |
| 28 | #include "core/frontend/scope_acquire_window_context.h" | 32 | #include "core/frontend/scope_acquire_context.h" |
| 29 | #include "core/settings.h" | 33 | #include "core/settings.h" |
| 30 | #include "input_common/keyboard.h" | 34 | #include "input_common/keyboard.h" |
| 31 | #include "input_common/main.h" | 35 | #include "input_common/main.h" |
| @@ -35,15 +39,27 @@ | |||
| 35 | #include "yuzu/bootmanager.h" | 39 | #include "yuzu/bootmanager.h" |
| 36 | #include "yuzu/main.h" | 40 | #include "yuzu/main.h" |
| 37 | 41 | ||
| 38 | EmuThread::EmuThread(GRenderWindow* render_window) : render_window(render_window) {} | 42 | EmuThread::EmuThread(GRenderWindow& window) |
| 43 | : shared_context{window.CreateSharedContext()}, | ||
| 44 | context{(Settings::values.use_asynchronous_gpu_emulation && shared_context) ? *shared_context | ||
| 45 | : window} {} | ||
| 39 | 46 | ||
| 40 | EmuThread::~EmuThread() = default; | 47 | EmuThread::~EmuThread() = default; |
| 41 | 48 | ||
| 42 | void EmuThread::run() { | 49 | static GMainWindow* GetMainWindow() { |
| 43 | render_window->MakeCurrent(); | 50 | for (QWidget* w : qApp->topLevelWidgets()) { |
| 51 | if (GMainWindow* main = qobject_cast<GMainWindow*>(w)) { | ||
| 52 | return main; | ||
| 53 | } | ||
| 54 | } | ||
| 55 | return nullptr; | ||
| 56 | } | ||
| 44 | 57 | ||
| 58 | void EmuThread::run() { | ||
| 45 | MicroProfileOnThreadCreate("EmuThread"); | 59 | MicroProfileOnThreadCreate("EmuThread"); |
| 46 | 60 | ||
| 61 | Core::Frontend::ScopeAcquireContext acquire_context{context}; | ||
| 62 | |||
| 47 | emit LoadProgress(VideoCore::LoadCallbackStage::Prepare, 0, 0); | 63 | emit LoadProgress(VideoCore::LoadCallbackStage::Prepare, 0, 0); |
| 48 | 64 | ||
| 49 | Core::System::GetInstance().Renderer().Rasterizer().LoadDiskResources( | 65 | Core::System::GetInstance().Renderer().Rasterizer().LoadDiskResources( |
| @@ -53,11 +69,6 @@ void EmuThread::run() { | |||
| 53 | 69 | ||
| 54 | emit LoadProgress(VideoCore::LoadCallbackStage::Complete, 0, 0); | 70 | emit LoadProgress(VideoCore::LoadCallbackStage::Complete, 0, 0); |
| 55 | 71 | ||
| 56 | if (Settings::values.use_asynchronous_gpu_emulation) { | ||
| 57 | // Release OpenGL context for the GPU thread | ||
| 58 | render_window->DoneCurrent(); | ||
| 59 | } | ||
| 60 | |||
| 61 | // Holds whether the cpu was running during the last iteration, | 72 | // Holds whether the cpu was running during the last iteration, |
| 62 | // so that the DebugModeLeft signal can be emitted before the | 73 | // so that the DebugModeLeft signal can be emitted before the |
| 63 | // next execution step | 74 | // next execution step |
| @@ -98,190 +109,202 @@ void EmuThread::run() { | |||
| 98 | #if MICROPROFILE_ENABLED | 109 | #if MICROPROFILE_ENABLED |
| 99 | MicroProfileOnThreadExit(); | 110 | MicroProfileOnThreadExit(); |
| 100 | #endif | 111 | #endif |
| 101 | |||
| 102 | render_window->moveContext(); | ||
| 103 | } | 112 | } |
| 104 | 113 | ||
| 105 | class GGLContext : public Core::Frontend::GraphicsContext { | 114 | class GGLContext : public Core::Frontend::GraphicsContext { |
| 106 | public: | 115 | public: |
| 107 | explicit GGLContext(QOpenGLContext* shared_context) : shared_context{shared_context} { | 116 | explicit GGLContext(QOpenGLContext* shared_context) |
| 108 | context.setFormat(shared_context->format()); | 117 | : context(new QOpenGLContext(shared_context->parent())), |
| 109 | context.setShareContext(shared_context); | 118 | surface(new QOffscreenSurface(nullptr)) { |
| 110 | context.create(); | 119 | |
| 120 | // disable vsync for any shared contexts | ||
| 121 | auto format = shared_context->format(); | ||
| 122 | format.setSwapInterval(0); | ||
| 123 | |||
| 124 | context->setShareContext(shared_context); | ||
| 125 | context->setFormat(format); | ||
| 126 | context->create(); | ||
| 127 | surface->setParent(shared_context->parent()); | ||
| 128 | surface->setFormat(format); | ||
| 129 | surface->create(); | ||
| 111 | } | 130 | } |
| 112 | 131 | ||
| 113 | void MakeCurrent() override { | 132 | void MakeCurrent() override { |
| 114 | context.makeCurrent(shared_context->surface()); | 133 | context->makeCurrent(surface); |
| 115 | } | 134 | } |
| 116 | 135 | ||
| 117 | void DoneCurrent() override { | 136 | void DoneCurrent() override { |
| 118 | context.doneCurrent(); | 137 | context->doneCurrent(); |
| 119 | } | 138 | } |
| 120 | 139 | ||
| 121 | void SwapBuffers() override {} | ||
| 122 | |||
| 123 | private: | 140 | private: |
| 124 | QOpenGLContext* shared_context; | 141 | QOpenGLContext* context; |
| 125 | QOpenGLContext context; | 142 | QOffscreenSurface* surface; |
| 126 | }; | 143 | }; |
| 127 | 144 | ||
| 128 | class GWidgetInternal : public QWindow { | 145 | class ChildRenderWindow : public QWindow { |
| 129 | public: | 146 | public: |
| 130 | GWidgetInternal(GRenderWindow* parent) : parent(parent) {} | 147 | ChildRenderWindow(QWindow* parent, QWidget* event_handler) |
| 131 | virtual ~GWidgetInternal() = default; | 148 | : QWindow{parent}, event_handler{event_handler} {} |
| 132 | 149 | ||
| 133 | void resizeEvent(QResizeEvent* ev) override { | 150 | virtual ~ChildRenderWindow() = default; |
| 134 | parent->OnClientAreaResized(ev->size().width(), ev->size().height()); | ||
| 135 | parent->OnFramebufferSizeChanged(); | ||
| 136 | } | ||
| 137 | 151 | ||
| 138 | void keyPressEvent(QKeyEvent* event) override { | 152 | virtual void Present() = 0; |
| 139 | InputCommon::GetKeyboard()->PressKey(event->key()); | ||
| 140 | } | ||
| 141 | 153 | ||
| 142 | void keyReleaseEvent(QKeyEvent* event) override { | 154 | protected: |
| 143 | InputCommon::GetKeyboard()->ReleaseKey(event->key()); | 155 | bool event(QEvent* event) override { |
| 156 | switch (event->type()) { | ||
| 157 | case QEvent::UpdateRequest: | ||
| 158 | Present(); | ||
| 159 | return true; | ||
| 160 | case QEvent::MouseButtonPress: | ||
| 161 | case QEvent::MouseButtonRelease: | ||
| 162 | case QEvent::MouseButtonDblClick: | ||
| 163 | case QEvent::MouseMove: | ||
| 164 | case QEvent::KeyPress: | ||
| 165 | case QEvent::KeyRelease: | ||
| 166 | case QEvent::FocusIn: | ||
| 167 | case QEvent::FocusOut: | ||
| 168 | case QEvent::FocusAboutToChange: | ||
| 169 | case QEvent::Enter: | ||
| 170 | case QEvent::Leave: | ||
| 171 | case QEvent::Wheel: | ||
| 172 | case QEvent::TabletMove: | ||
| 173 | case QEvent::TabletPress: | ||
| 174 | case QEvent::TabletRelease: | ||
| 175 | case QEvent::TabletEnterProximity: | ||
| 176 | case QEvent::TabletLeaveProximity: | ||
| 177 | case QEvent::TouchBegin: | ||
| 178 | case QEvent::TouchUpdate: | ||
| 179 | case QEvent::TouchEnd: | ||
| 180 | case QEvent::InputMethodQuery: | ||
| 181 | case QEvent::TouchCancel: | ||
| 182 | return QCoreApplication::sendEvent(event_handler, event); | ||
| 183 | case QEvent::Drop: | ||
| 184 | GetMainWindow()->DropAction(static_cast<QDropEvent*>(event)); | ||
| 185 | return true; | ||
| 186 | case QEvent::DragResponse: | ||
| 187 | case QEvent::DragEnter: | ||
| 188 | case QEvent::DragLeave: | ||
| 189 | case QEvent::DragMove: | ||
| 190 | GetMainWindow()->AcceptDropEvent(static_cast<QDropEvent*>(event)); | ||
| 191 | return true; | ||
| 192 | default: | ||
| 193 | return QWindow::event(event); | ||
| 194 | } | ||
| 144 | } | 195 | } |
| 145 | 196 | ||
| 146 | void mousePressEvent(QMouseEvent* event) override { | 197 | void exposeEvent(QExposeEvent* event) override { |
| 147 | if (event->source() == Qt::MouseEventSynthesizedBySystem) | 198 | QWindow::requestUpdate(); |
| 148 | return; // touch input is handled in TouchBeginEvent | 199 | QWindow::exposeEvent(event); |
| 149 | |||
| 150 | const auto pos{event->pos()}; | ||
| 151 | if (event->button() == Qt::LeftButton) { | ||
| 152 | const auto [x, y] = parent->ScaleTouch(pos); | ||
| 153 | parent->TouchPressed(x, y); | ||
| 154 | } else if (event->button() == Qt::RightButton) { | ||
| 155 | InputCommon::GetMotionEmu()->BeginTilt(pos.x(), pos.y()); | ||
| 156 | } | ||
| 157 | } | 200 | } |
| 158 | 201 | ||
| 159 | void mouseMoveEvent(QMouseEvent* event) override { | 202 | private: |
| 160 | if (event->source() == Qt::MouseEventSynthesizedBySystem) | 203 | QWidget* event_handler{}; |
| 161 | return; // touch input is handled in TouchUpdateEvent | 204 | }; |
| 162 | 205 | ||
| 163 | const auto pos{event->pos()}; | 206 | class OpenGLWindow final : public ChildRenderWindow { |
| 164 | const auto [x, y] = parent->ScaleTouch(pos); | 207 | public: |
| 165 | parent->TouchMoved(x, y); | 208 | OpenGLWindow(QWindow* parent, QWidget* event_handler, QOpenGLContext* shared_context) |
| 166 | InputCommon::GetMotionEmu()->Tilt(pos.x(), pos.y()); | 209 | : ChildRenderWindow{parent, event_handler}, |
| 167 | } | 210 | context(new QOpenGLContext(shared_context->parent())) { |
| 168 | 211 | ||
| 169 | void mouseReleaseEvent(QMouseEvent* event) override { | 212 | // disable vsync for any shared contexts |
| 170 | if (event->source() == Qt::MouseEventSynthesizedBySystem) | 213 | auto format = shared_context->format(); |
| 171 | return; // touch input is handled in TouchEndEvent | 214 | format.setSwapInterval(Settings::values.use_vsync ? 1 : 0); |
| 215 | this->setFormat(format); | ||
| 172 | 216 | ||
| 173 | if (event->button() == Qt::LeftButton) | 217 | context->setShareContext(shared_context); |
| 174 | parent->TouchReleased(); | 218 | context->setScreen(this->screen()); |
| 175 | else if (event->button() == Qt::RightButton) | 219 | context->setFormat(format); |
| 176 | InputCommon::GetMotionEmu()->EndTilt(); | 220 | context->create(); |
| 177 | } | ||
| 178 | 221 | ||
| 179 | void DisablePainting() { | 222 | setSurfaceType(QWindow::OpenGLSurface); |
| 180 | do_painting = false; | ||
| 181 | } | ||
| 182 | 223 | ||
| 183 | void EnablePainting() { | 224 | // TODO: One of these flags might be interesting: WA_OpaquePaintEvent, WA_NoBackground, |
| 184 | do_painting = true; | 225 | // WA_DontShowOnScreen, WA_DeleteOnClose |
| 185 | } | 226 | } |
| 186 | 227 | ||
| 187 | std::pair<unsigned, unsigned> GetSize() const { | 228 | ~OpenGLWindow() override { |
| 188 | return std::make_pair(width(), height()); | 229 | context->doneCurrent(); |
| 189 | } | 230 | } |
| 190 | 231 | ||
| 191 | protected: | 232 | void Present() override { |
| 192 | bool IsPaintingEnabled() const { | 233 | if (!isExposed()) { |
| 193 | return do_painting; | 234 | return; |
| 235 | } | ||
| 236 | |||
| 237 | context->makeCurrent(this); | ||
| 238 | Core::System::GetInstance().Renderer().TryPresent(100); | ||
| 239 | context->swapBuffers(this); | ||
| 240 | auto f = context->versionFunctions<QOpenGLFunctions_4_3_Core>(); | ||
| 241 | f->glFinish(); | ||
| 242 | QWindow::requestUpdate(); | ||
| 194 | } | 243 | } |
| 195 | 244 | ||
| 196 | private: | 245 | private: |
| 197 | GRenderWindow* parent; | 246 | QOpenGLContext* context{}; |
| 198 | bool do_painting = false; | ||
| 199 | }; | ||
| 200 | |||
| 201 | // This class overrides paintEvent and resizeEvent to prevent the GUI thread from stealing GL | ||
| 202 | // context. | ||
| 203 | // The corresponding functionality is handled in EmuThread instead | ||
| 204 | class GGLWidgetInternal final : public GWidgetInternal, public QOpenGLWindow { | ||
| 205 | public: | ||
| 206 | GGLWidgetInternal(GRenderWindow* parent, QOpenGLContext* shared_context) | ||
| 207 | : GWidgetInternal(parent), QOpenGLWindow(shared_context) {} | ||
| 208 | ~GGLWidgetInternal() override = default; | ||
| 209 | |||
| 210 | void paintEvent(QPaintEvent* ev) override { | ||
| 211 | if (IsPaintingEnabled()) { | ||
| 212 | QPainter painter(this); | ||
| 213 | } | ||
| 214 | } | ||
| 215 | }; | 247 | }; |
| 216 | 248 | ||
| 217 | #ifdef HAS_VULKAN | 249 | #ifdef HAS_VULKAN |
| 218 | class GVKWidgetInternal final : public GWidgetInternal { | 250 | class VulkanWindow final : public ChildRenderWindow { |
| 219 | public: | 251 | public: |
| 220 | GVKWidgetInternal(GRenderWindow* parent, QVulkanInstance* instance) : GWidgetInternal(parent) { | 252 | VulkanWindow(QWindow* parent, QWidget* event_handler, QVulkanInstance* instance) |
| 253 | : ChildRenderWindow{parent, event_handler} { | ||
| 221 | setSurfaceType(QSurface::SurfaceType::VulkanSurface); | 254 | setSurfaceType(QSurface::SurfaceType::VulkanSurface); |
| 222 | setVulkanInstance(instance); | 255 | setVulkanInstance(instance); |
| 223 | } | 256 | } |
| 224 | ~GVKWidgetInternal() override = default; | 257 | |
| 258 | ~VulkanWindow() override = default; | ||
| 259 | |||
| 260 | void Present() override { | ||
| 261 | // TODO(bunnei): ImplementMe | ||
| 262 | } | ||
| 263 | |||
| 264 | private: | ||
| 265 | QWidget* event_handler{}; | ||
| 225 | }; | 266 | }; |
| 226 | #endif | 267 | #endif |
| 227 | 268 | ||
| 228 | GRenderWindow::GRenderWindow(GMainWindow* parent, EmuThread* emu_thread) | 269 | GRenderWindow::GRenderWindow(QWidget* parent_, EmuThread* emu_thread) |
| 229 | : QWidget(parent), emu_thread(emu_thread) { | 270 | : QWidget(parent_), emu_thread(emu_thread) { |
| 230 | setWindowTitle(QStringLiteral("yuzu %1 | %2-%3") | 271 | setWindowTitle(QStringLiteral("yuzu %1 | %2-%3") |
| 231 | .arg(QString::fromUtf8(Common::g_build_name), | 272 | .arg(QString::fromUtf8(Common::g_build_name), |
| 232 | QString::fromUtf8(Common::g_scm_branch), | 273 | QString::fromUtf8(Common::g_scm_branch), |
| 233 | QString::fromUtf8(Common::g_scm_desc))); | 274 | QString::fromUtf8(Common::g_scm_desc))); |
| 234 | setAttribute(Qt::WA_AcceptTouchEvents); | 275 | setAttribute(Qt::WA_AcceptTouchEvents); |
| 235 | 276 | auto layout = new QHBoxLayout(this); | |
| 277 | layout->setMargin(0); | ||
| 278 | setLayout(layout); | ||
| 236 | InputCommon::Init(); | 279 | InputCommon::Init(); |
| 280 | |||
| 281 | GMainWindow* parent = GetMainWindow(); | ||
| 237 | connect(this, &GRenderWindow::FirstFrameDisplayed, parent, &GMainWindow::OnLoadComplete); | 282 | connect(this, &GRenderWindow::FirstFrameDisplayed, parent, &GMainWindow::OnLoadComplete); |
| 238 | } | 283 | } |
| 239 | 284 | ||
| 240 | GRenderWindow::~GRenderWindow() { | 285 | GRenderWindow::~GRenderWindow() { |
| 241 | InputCommon::Shutdown(); | 286 | InputCommon::Shutdown(); |
| 242 | |||
| 243 | // Avoid an unordered destruction that generates a segfault | ||
| 244 | delete child; | ||
| 245 | } | 287 | } |
| 246 | 288 | ||
| 247 | void GRenderWindow::moveContext() { | 289 | void GRenderWindow::MakeCurrent() { |
| 248 | if (!context) { | 290 | if (core_context) { |
| 249 | return; | 291 | core_context->MakeCurrent(); |
| 250 | } | 292 | } |
| 251 | DoneCurrent(); | ||
| 252 | |||
| 253 | // If the thread started running, move the GL Context to the new thread. Otherwise, move it | ||
| 254 | // back. | ||
| 255 | auto thread = (QThread::currentThread() == qApp->thread() && emu_thread != nullptr) | ||
| 256 | ? emu_thread | ||
| 257 | : qApp->thread(); | ||
| 258 | context->moveToThread(thread); | ||
| 259 | } | 293 | } |
| 260 | 294 | ||
| 261 | void GRenderWindow::SwapBuffers() { | 295 | void GRenderWindow::DoneCurrent() { |
| 262 | if (context) { | 296 | if (core_context) { |
| 263 | context->swapBuffers(child); | 297 | core_context->DoneCurrent(); |
| 264 | } | 298 | } |
| 299 | } | ||
| 300 | |||
| 301 | void GRenderWindow::PollEvents() { | ||
| 265 | if (!first_frame) { | 302 | if (!first_frame) { |
| 266 | first_frame = true; | 303 | first_frame = true; |
| 267 | emit FirstFrameDisplayed(); | 304 | emit FirstFrameDisplayed(); |
| 268 | } | 305 | } |
| 269 | } | 306 | } |
| 270 | 307 | ||
| 271 | void GRenderWindow::MakeCurrent() { | ||
| 272 | if (context) { | ||
| 273 | context->makeCurrent(child); | ||
| 274 | } | ||
| 275 | } | ||
| 276 | |||
| 277 | void GRenderWindow::DoneCurrent() { | ||
| 278 | if (context) { | ||
| 279 | context->doneCurrent(); | ||
| 280 | } | ||
| 281 | } | ||
| 282 | |||
| 283 | void GRenderWindow::PollEvents() {} | ||
| 284 | |||
| 285 | bool GRenderWindow::IsShown() const { | 308 | bool GRenderWindow::IsShown() const { |
| 286 | return !isMinimized(); | 309 | return !isMinimized(); |
| 287 | } | 310 | } |
| @@ -291,7 +314,7 @@ void GRenderWindow::RetrieveVulkanHandlers(void* get_instance_proc_addr, void* i | |||
| 291 | #ifdef HAS_VULKAN | 314 | #ifdef HAS_VULKAN |
| 292 | const auto instance_proc_addr = vk_instance->getInstanceProcAddr("vkGetInstanceProcAddr"); | 315 | const auto instance_proc_addr = vk_instance->getInstanceProcAddr("vkGetInstanceProcAddr"); |
| 293 | const VkInstance instance_copy = vk_instance->vkInstance(); | 316 | const VkInstance instance_copy = vk_instance->vkInstance(); |
| 294 | const VkSurfaceKHR surface_copy = vk_instance->surfaceForWindow(child); | 317 | const VkSurfaceKHR surface_copy = vk_instance->surfaceForWindow(child_window); |
| 295 | 318 | ||
| 296 | std::memcpy(get_instance_proc_addr, &instance_proc_addr, sizeof(instance_proc_addr)); | 319 | std::memcpy(get_instance_proc_addr, &instance_proc_addr, sizeof(instance_proc_addr)); |
| 297 | std::memcpy(instance, &instance_copy, sizeof(instance_copy)); | 320 | std::memcpy(instance, &instance_copy, sizeof(instance_copy)); |
| @@ -309,21 +332,10 @@ void GRenderWindow::RetrieveVulkanHandlers(void* get_instance_proc_addr, void* i | |||
| 309 | void GRenderWindow::OnFramebufferSizeChanged() { | 332 | void GRenderWindow::OnFramebufferSizeChanged() { |
| 310 | // Screen changes potentially incur a change in screen DPI, hence we should update the | 333 | // Screen changes potentially incur a change in screen DPI, hence we should update the |
| 311 | // framebuffer size | 334 | // framebuffer size |
| 312 | const qreal pixelRatio{GetWindowPixelRatio()}; | 335 | const qreal pixel_ratio = windowPixelRatio(); |
| 313 | const auto size{child->GetSize()}; | 336 | const u32 width = this->width() * pixel_ratio; |
| 314 | UpdateCurrentFramebufferLayout(size.first * pixelRatio, size.second * pixelRatio); | 337 | const u32 height = this->height() * pixel_ratio; |
| 315 | } | 338 | UpdateCurrentFramebufferLayout(width, height); |
| 316 | |||
| 317 | void GRenderWindow::ForwardKeyPressEvent(QKeyEvent* event) { | ||
| 318 | if (child) { | ||
| 319 | child->keyPressEvent(event); | ||
| 320 | } | ||
| 321 | } | ||
| 322 | |||
| 323 | void GRenderWindow::ForwardKeyReleaseEvent(QKeyEvent* event) { | ||
| 324 | if (child) { | ||
| 325 | child->keyReleaseEvent(event); | ||
| 326 | } | ||
| 327 | } | 339 | } |
| 328 | 340 | ||
| 329 | void GRenderWindow::BackupGeometry() { | 341 | void GRenderWindow::BackupGeometry() { |
| @@ -351,13 +363,12 @@ QByteArray GRenderWindow::saveGeometry() { | |||
| 351 | return geometry; | 363 | return geometry; |
| 352 | } | 364 | } |
| 353 | 365 | ||
| 354 | qreal GRenderWindow::GetWindowPixelRatio() const { | 366 | qreal GRenderWindow::windowPixelRatio() const { |
| 355 | // windowHandle() might not be accessible until the window is displayed to screen. | 367 | return devicePixelRatio(); |
| 356 | return windowHandle() ? windowHandle()->screen()->devicePixelRatio() : 1.0f; | ||
| 357 | } | 368 | } |
| 358 | 369 | ||
| 359 | std::pair<u32, u32> GRenderWindow::ScaleTouch(const QPointF pos) const { | 370 | std::pair<u32, u32> GRenderWindow::ScaleTouch(const QPointF pos) const { |
| 360 | const qreal pixel_ratio{GetWindowPixelRatio()}; | 371 | const qreal pixel_ratio = windowPixelRatio(); |
| 361 | return {static_cast<u32>(std::max(std::round(pos.x() * pixel_ratio), qreal{0.0})), | 372 | return {static_cast<u32>(std::max(std::round(pos.x() * pixel_ratio), qreal{0.0})), |
| 362 | static_cast<u32>(std::max(std::round(pos.y() * pixel_ratio), qreal{0.0}))}; | 373 | static_cast<u32>(std::max(std::round(pos.y() * pixel_ratio), qreal{0.0}))}; |
| 363 | } | 374 | } |
| @@ -367,6 +378,47 @@ void GRenderWindow::closeEvent(QCloseEvent* event) { | |||
| 367 | QWidget::closeEvent(event); | 378 | QWidget::closeEvent(event); |
| 368 | } | 379 | } |
| 369 | 380 | ||
| 381 | void GRenderWindow::keyPressEvent(QKeyEvent* event) { | ||
| 382 | InputCommon::GetKeyboard()->PressKey(event->key()); | ||
| 383 | } | ||
| 384 | |||
| 385 | void GRenderWindow::keyReleaseEvent(QKeyEvent* event) { | ||
| 386 | InputCommon::GetKeyboard()->ReleaseKey(event->key()); | ||
| 387 | } | ||
| 388 | |||
| 389 | void GRenderWindow::mousePressEvent(QMouseEvent* event) { | ||
| 390 | if (event->source() == Qt::MouseEventSynthesizedBySystem) | ||
| 391 | return; // touch input is handled in TouchBeginEvent | ||
| 392 | |||
| 393 | auto pos = event->pos(); | ||
| 394 | if (event->button() == Qt::LeftButton) { | ||
| 395 | const auto [x, y] = ScaleTouch(pos); | ||
| 396 | this->TouchPressed(x, y); | ||
| 397 | } else if (event->button() == Qt::RightButton) { | ||
| 398 | InputCommon::GetMotionEmu()->BeginTilt(pos.x(), pos.y()); | ||
| 399 | } | ||
| 400 | } | ||
| 401 | |||
| 402 | void GRenderWindow::mouseMoveEvent(QMouseEvent* event) { | ||
| 403 | if (event->source() == Qt::MouseEventSynthesizedBySystem) | ||
| 404 | return; // touch input is handled in TouchUpdateEvent | ||
| 405 | |||
| 406 | auto pos = event->pos(); | ||
| 407 | const auto [x, y] = ScaleTouch(pos); | ||
| 408 | this->TouchMoved(x, y); | ||
| 409 | InputCommon::GetMotionEmu()->Tilt(pos.x(), pos.y()); | ||
| 410 | } | ||
| 411 | |||
| 412 | void GRenderWindow::mouseReleaseEvent(QMouseEvent* event) { | ||
| 413 | if (event->source() == Qt::MouseEventSynthesizedBySystem) | ||
| 414 | return; // touch input is handled in TouchEndEvent | ||
| 415 | |||
| 416 | if (event->button() == Qt::LeftButton) | ||
| 417 | this->TouchReleased(); | ||
| 418 | else if (event->button() == Qt::RightButton) | ||
| 419 | InputCommon::GetMotionEmu()->EndTilt(); | ||
| 420 | } | ||
| 421 | |||
| 370 | void GRenderWindow::TouchBeginEvent(const QTouchEvent* event) { | 422 | void GRenderWindow::TouchBeginEvent(const QTouchEvent* event) { |
| 371 | // TouchBegin always has exactly one touch point, so take the .first() | 423 | // TouchBegin always has exactly one touch point, so take the .first() |
| 372 | const auto [x, y] = ScaleTouch(event->touchPoints().first().pos()); | 424 | const auto [x, y] = ScaleTouch(event->touchPoints().first().pos()); |
| @@ -415,26 +467,20 @@ void GRenderWindow::focusOutEvent(QFocusEvent* event) { | |||
| 415 | InputCommon::GetKeyboard()->ReleaseAllKeys(); | 467 | InputCommon::GetKeyboard()->ReleaseAllKeys(); |
| 416 | } | 468 | } |
| 417 | 469 | ||
| 418 | void GRenderWindow::OnClientAreaResized(u32 width, u32 height) { | 470 | void GRenderWindow::resizeEvent(QResizeEvent* event) { |
| 419 | NotifyClientAreaSizeChanged(std::make_pair(width, height)); | 471 | QWidget::resizeEvent(event); |
| 472 | OnFramebufferSizeChanged(); | ||
| 420 | } | 473 | } |
| 421 | 474 | ||
| 422 | std::unique_ptr<Core::Frontend::GraphicsContext> GRenderWindow::CreateSharedContext() const { | 475 | std::unique_ptr<Core::Frontend::GraphicsContext> GRenderWindow::CreateSharedContext() const { |
| 423 | return std::make_unique<GGLContext>(context.get()); | 476 | if (Settings::values.renderer_backend == Settings::RendererBackend::OpenGL) { |
| 477 | return std::make_unique<GGLContext>(QOpenGLContext::globalShareContext()); | ||
| 478 | } | ||
| 479 | return {}; | ||
| 424 | } | 480 | } |
| 425 | 481 | ||
| 426 | bool GRenderWindow::InitRenderTarget() { | 482 | bool GRenderWindow::InitRenderTarget() { |
| 427 | shared_context.reset(); | 483 | ReleaseRenderTarget(); |
| 428 | context.reset(); | ||
| 429 | if (child) { | ||
| 430 | delete child; | ||
| 431 | } | ||
| 432 | if (container) { | ||
| 433 | delete container; | ||
| 434 | } | ||
| 435 | if (layout()) { | ||
| 436 | delete layout(); | ||
| 437 | } | ||
| 438 | 484 | ||
| 439 | first_frame = false; | 485 | first_frame = false; |
| 440 | 486 | ||
| @@ -451,13 +497,6 @@ bool GRenderWindow::InitRenderTarget() { | |||
| 451 | break; | 497 | break; |
| 452 | } | 498 | } |
| 453 | 499 | ||
| 454 | container = QWidget::createWindowContainer(child, this); | ||
| 455 | QBoxLayout* layout = new QHBoxLayout(this); | ||
| 456 | |||
| 457 | layout->addWidget(container); | ||
| 458 | layout->setMargin(0); | ||
| 459 | setLayout(layout); | ||
| 460 | |||
| 461 | // Reset minimum required size to avoid resizing issues on the main window after restarting. | 500 | // Reset minimum required size to avoid resizing issues on the main window after restarting. |
| 462 | setMinimumSize(1, 1); | 501 | setMinimumSize(1, 1); |
| 463 | 502 | ||
| @@ -467,14 +506,9 @@ bool GRenderWindow::InitRenderTarget() { | |||
| 467 | hide(); | 506 | hide(); |
| 468 | 507 | ||
| 469 | resize(Layout::ScreenUndocked::Width, Layout::ScreenUndocked::Height); | 508 | resize(Layout::ScreenUndocked::Width, Layout::ScreenUndocked::Height); |
| 470 | child->resize(Layout::ScreenUndocked::Width, Layout::ScreenUndocked::Height); | ||
| 471 | container->resize(Layout::ScreenUndocked::Width, Layout::ScreenUndocked::Height); | ||
| 472 | 509 | ||
| 473 | OnMinimalClientAreaChangeRequest(GetActiveConfig().min_client_area_size); | 510 | OnMinimalClientAreaChangeRequest(GetActiveConfig().min_client_area_size); |
| 474 | |||
| 475 | OnFramebufferSizeChanged(); | 511 | OnFramebufferSizeChanged(); |
| 476 | NotifyClientAreaSizeChanged(child->GetSize()); | ||
| 477 | |||
| 478 | BackupGeometry(); | 512 | BackupGeometry(); |
| 479 | 513 | ||
| 480 | if (Settings::values.renderer_backend == Settings::RendererBackend::OpenGL) { | 514 | if (Settings::values.renderer_backend == Settings::RendererBackend::OpenGL) { |
| @@ -486,6 +520,14 @@ bool GRenderWindow::InitRenderTarget() { | |||
| 486 | return true; | 520 | return true; |
| 487 | } | 521 | } |
| 488 | 522 | ||
| 523 | void GRenderWindow::ReleaseRenderTarget() { | ||
| 524 | if (child_widget) { | ||
| 525 | layout()->removeWidget(child_widget); | ||
| 526 | delete child_widget; | ||
| 527 | child_widget = nullptr; | ||
| 528 | } | ||
| 529 | } | ||
| 530 | |||
| 489 | void GRenderWindow::CaptureScreenshot(u32 res_scale, const QString& screenshot_path) { | 531 | void GRenderWindow::CaptureScreenshot(u32 res_scale, const QString& screenshot_path) { |
| 490 | auto& renderer = Core::System::GetInstance().Renderer(); | 532 | auto& renderer = Core::System::GetInstance().Renderer(); |
| 491 | 533 | ||
| @@ -521,16 +563,19 @@ bool GRenderWindow::InitializeOpenGL() { | |||
| 521 | fmt.setOption(QSurfaceFormat::FormatOption::DeprecatedFunctions); | 563 | fmt.setOption(QSurfaceFormat::FormatOption::DeprecatedFunctions); |
| 522 | // TODO: expose a setting for buffer value (ie default/single/double/triple) | 564 | // TODO: expose a setting for buffer value (ie default/single/double/triple) |
| 523 | fmt.setSwapBehavior(QSurfaceFormat::DefaultSwapBehavior); | 565 | fmt.setSwapBehavior(QSurfaceFormat::DefaultSwapBehavior); |
| 524 | shared_context = std::make_unique<QOpenGLContext>(); | 566 | fmt.setSwapInterval(0); |
| 525 | shared_context->setFormat(fmt); | 567 | QSurfaceFormat::setDefaultFormat(fmt); |
| 526 | shared_context->create(); | 568 | |
| 527 | context = std::make_unique<QOpenGLContext>(); | 569 | GMainWindow* parent = GetMainWindow(); |
| 528 | context->setShareContext(shared_context.get()); | 570 | QWindow* parent_win_handle = parent ? parent->windowHandle() : nullptr; |
| 529 | context->setFormat(fmt); | 571 | child_window = new OpenGLWindow(parent_win_handle, this, QOpenGLContext::globalShareContext()); |
| 530 | context->create(); | 572 | child_window->create(); |
| 531 | fmt.setSwapInterval(false); | 573 | child_widget = createWindowContainer(child_window, this); |
| 532 | 574 | child_widget->resize(Layout::ScreenUndocked::Width, Layout::ScreenUndocked::Height); | |
| 533 | child = new GGLWidgetInternal(this, shared_context.get()); | 575 | layout()->addWidget(child_widget); |
| 576 | |||
| 577 | core_context = CreateSharedContext(); | ||
| 578 | |||
| 534 | return true; | 579 | return true; |
| 535 | } | 580 | } |
| 536 | 581 | ||
| @@ -559,7 +604,14 @@ bool GRenderWindow::InitializeVulkan() { | |||
| 559 | return false; | 604 | return false; |
| 560 | } | 605 | } |
| 561 | 606 | ||
| 562 | child = new GVKWidgetInternal(this, vk_instance.get()); | 607 | GMainWindow* parent = GetMainWindow(); |
| 608 | QWindow* parent_win_handle = parent ? parent->windowHandle() : nullptr; | ||
| 609 | child_window = new VulkanWindow(parent_win_handle, this, vk_instance.get()); | ||
| 610 | child_window->create(); | ||
| 611 | child_widget = createWindowContainer(child_window, this); | ||
| 612 | child_widget->resize(Layout::ScreenUndocked::Width, Layout::ScreenUndocked::Height); | ||
| 613 | layout()->addWidget(child_widget); | ||
| 614 | |||
| 563 | return true; | 615 | return true; |
| 564 | #else | 616 | #else |
| 565 | QMessageBox::critical(this, tr("Vulkan not available!"), | 617 | QMessageBox::critical(this, tr("Vulkan not available!"), |
| @@ -569,7 +621,7 @@ bool GRenderWindow::InitializeVulkan() { | |||
| 569 | } | 621 | } |
| 570 | 622 | ||
| 571 | bool GRenderWindow::LoadOpenGL() { | 623 | bool GRenderWindow::LoadOpenGL() { |
| 572 | Core::Frontend::ScopeAcquireWindowContext acquire_context{*this}; | 624 | Core::Frontend::ScopeAcquireContext acquire_context{*this}; |
| 573 | if (!gladLoadGL()) { | 625 | if (!gladLoadGL()) { |
| 574 | QMessageBox::critical(this, tr("Error while initializing OpenGL 4.3!"), | 626 | QMessageBox::critical(this, tr("Error while initializing OpenGL 4.3!"), |
| 575 | tr("Your GPU may not support OpenGL 4.3, or you do not have the " | 627 | tr("Your GPU may not support OpenGL 4.3, or you do not have the " |
| @@ -621,12 +673,10 @@ QStringList GRenderWindow::GetUnsupportedGLExtensions() const { | |||
| 621 | 673 | ||
| 622 | void GRenderWindow::OnEmulationStarting(EmuThread* emu_thread) { | 674 | void GRenderWindow::OnEmulationStarting(EmuThread* emu_thread) { |
| 623 | this->emu_thread = emu_thread; | 675 | this->emu_thread = emu_thread; |
| 624 | child->DisablePainting(); | ||
| 625 | } | 676 | } |
| 626 | 677 | ||
| 627 | void GRenderWindow::OnEmulationStopping() { | 678 | void GRenderWindow::OnEmulationStopping() { |
| 628 | emu_thread = nullptr; | 679 | emu_thread = nullptr; |
| 629 | child->EnablePainting(); | ||
| 630 | } | 680 | } |
| 631 | 681 | ||
| 632 | void GRenderWindow::showEvent(QShowEvent* event) { | 682 | void GRenderWindow::showEvent(QShowEvent* event) { |
diff --git a/src/yuzu/bootmanager.h b/src/yuzu/bootmanager.h index 71a2fa321..79b030304 100644 --- a/src/yuzu/bootmanager.h +++ b/src/yuzu/bootmanager.h | |||
| @@ -11,11 +11,13 @@ | |||
| 11 | #include <QImage> | 11 | #include <QImage> |
| 12 | #include <QThread> | 12 | #include <QThread> |
| 13 | #include <QWidget> | 13 | #include <QWidget> |
| 14 | #include <QWindow> | ||
| 14 | 15 | ||
| 15 | #include "common/thread.h" | 16 | #include "common/thread.h" |
| 16 | #include "core/core.h" | 17 | #include "core/core.h" |
| 17 | #include "core/frontend/emu_window.h" | 18 | #include "core/frontend/emu_window.h" |
| 18 | 19 | ||
| 20 | class GRenderWindow; | ||
| 19 | class QKeyEvent; | 21 | class QKeyEvent; |
| 20 | class QScreen; | 22 | class QScreen; |
| 21 | class QTouchEvent; | 23 | class QTouchEvent; |
| @@ -26,14 +28,6 @@ class QOpenGLContext; | |||
| 26 | class QVulkanInstance; | 28 | class QVulkanInstance; |
| 27 | #endif | 29 | #endif |
| 28 | 30 | ||
| 29 | class GWidgetInternal; | ||
| 30 | class GGLWidgetInternal; | ||
| 31 | class GVKWidgetInternal; | ||
| 32 | class GMainWindow; | ||
| 33 | class GRenderWindow; | ||
| 34 | class QSurface; | ||
| 35 | class QOpenGLContext; | ||
| 36 | |||
| 37 | namespace VideoCore { | 31 | namespace VideoCore { |
| 38 | enum class LoadCallbackStage; | 32 | enum class LoadCallbackStage; |
| 39 | } | 33 | } |
| @@ -42,7 +36,7 @@ class EmuThread final : public QThread { | |||
| 42 | Q_OBJECT | 36 | Q_OBJECT |
| 43 | 37 | ||
| 44 | public: | 38 | public: |
| 45 | explicit EmuThread(GRenderWindow* render_window); | 39 | explicit EmuThread(GRenderWindow& window); |
| 46 | ~EmuThread() override; | 40 | ~EmuThread() override; |
| 47 | 41 | ||
| 48 | /** | 42 | /** |
| @@ -96,7 +90,11 @@ private: | |||
| 96 | std::mutex running_mutex; | 90 | std::mutex running_mutex; |
| 97 | std::condition_variable running_cv; | 91 | std::condition_variable running_cv; |
| 98 | 92 | ||
| 99 | GRenderWindow* render_window; | 93 | /// Only used in asynchronous GPU mode |
| 94 | std::unique_ptr<Core::Frontend::GraphicsContext> shared_context; | ||
| 95 | |||
| 96 | /// This is shared_context in asynchronous GPU mode, core_context in synchronous GPU mode | ||
| 97 | Core::Frontend::GraphicsContext& context; | ||
| 100 | 98 | ||
| 101 | signals: | 99 | signals: |
| 102 | /** | 100 | /** |
| @@ -126,11 +124,10 @@ class GRenderWindow : public QWidget, public Core::Frontend::EmuWindow { | |||
| 126 | Q_OBJECT | 124 | Q_OBJECT |
| 127 | 125 | ||
| 128 | public: | 126 | public: |
| 129 | GRenderWindow(GMainWindow* parent, EmuThread* emu_thread); | 127 | GRenderWindow(QWidget* parent, EmuThread* emu_thread); |
| 130 | ~GRenderWindow() override; | 128 | ~GRenderWindow() override; |
| 131 | 129 | ||
| 132 | // EmuWindow implementation | 130 | // EmuWindow implementation. |
| 133 | void SwapBuffers() override; | ||
| 134 | void MakeCurrent() override; | 131 | void MakeCurrent() override; |
| 135 | void DoneCurrent() override; | 132 | void DoneCurrent() override; |
| 136 | void PollEvents() override; | 133 | void PollEvents() override; |
| @@ -139,30 +136,36 @@ public: | |||
| 139 | void* surface) const override; | 136 | void* surface) const override; |
| 140 | std::unique_ptr<Core::Frontend::GraphicsContext> CreateSharedContext() const override; | 137 | std::unique_ptr<Core::Frontend::GraphicsContext> CreateSharedContext() const override; |
| 141 | 138 | ||
| 142 | void ForwardKeyPressEvent(QKeyEvent* event); | ||
| 143 | void ForwardKeyReleaseEvent(QKeyEvent* event); | ||
| 144 | |||
| 145 | void BackupGeometry(); | 139 | void BackupGeometry(); |
| 146 | void RestoreGeometry(); | 140 | void RestoreGeometry(); |
| 147 | void restoreGeometry(const QByteArray& geometry); // overridden | 141 | void restoreGeometry(const QByteArray& geometry); // overridden |
| 148 | QByteArray saveGeometry(); // overridden | 142 | QByteArray saveGeometry(); // overridden |
| 149 | 143 | ||
| 150 | qreal GetWindowPixelRatio() const; | 144 | qreal windowPixelRatio() const; |
| 151 | std::pair<u32, u32> ScaleTouch(QPointF pos) const; | ||
| 152 | 145 | ||
| 153 | void closeEvent(QCloseEvent* event) override; | 146 | void closeEvent(QCloseEvent* event) override; |
| 147 | |||
| 148 | void resizeEvent(QResizeEvent* event) override; | ||
| 149 | |||
| 150 | void keyPressEvent(QKeyEvent* event) override; | ||
| 151 | void keyReleaseEvent(QKeyEvent* event) override; | ||
| 152 | |||
| 153 | void mousePressEvent(QMouseEvent* event) override; | ||
| 154 | void mouseMoveEvent(QMouseEvent* event) override; | ||
| 155 | void mouseReleaseEvent(QMouseEvent* event) override; | ||
| 156 | |||
| 154 | bool event(QEvent* event) override; | 157 | bool event(QEvent* event) override; |
| 155 | void focusOutEvent(QFocusEvent* event) override; | ||
| 156 | 158 | ||
| 157 | void OnClientAreaResized(u32 width, u32 height); | 159 | void focusOutEvent(QFocusEvent* event) override; |
| 158 | 160 | ||
| 159 | bool InitRenderTarget(); | 161 | bool InitRenderTarget(); |
| 160 | 162 | ||
| 163 | /// Destroy the previous run's child_widget which should also destroy the child_window | ||
| 164 | void ReleaseRenderTarget(); | ||
| 165 | |||
| 161 | void CaptureScreenshot(u32 res_scale, const QString& screenshot_path); | 166 | void CaptureScreenshot(u32 res_scale, const QString& screenshot_path); |
| 162 | 167 | ||
| 163 | public slots: | 168 | public slots: |
| 164 | void moveContext(); // overridden | ||
| 165 | |||
| 166 | void OnEmulationStarting(EmuThread* emu_thread); | 169 | void OnEmulationStarting(EmuThread* emu_thread); |
| 167 | void OnEmulationStopping(); | 170 | void OnEmulationStopping(); |
| 168 | void OnFramebufferSizeChanged(); | 171 | void OnFramebufferSizeChanged(); |
| @@ -173,6 +176,7 @@ signals: | |||
| 173 | void FirstFrameDisplayed(); | 176 | void FirstFrameDisplayed(); |
| 174 | 177 | ||
| 175 | private: | 178 | private: |
| 179 | std::pair<u32, u32> ScaleTouch(QPointF pos) const; | ||
| 176 | void TouchBeginEvent(const QTouchEvent* event); | 180 | void TouchBeginEvent(const QTouchEvent* event); |
| 177 | void TouchUpdateEvent(const QTouchEvent* event); | 181 | void TouchUpdateEvent(const QTouchEvent* event); |
| 178 | void TouchEndEvent(); | 182 | void TouchEndEvent(); |
| @@ -184,15 +188,9 @@ private: | |||
| 184 | bool LoadOpenGL(); | 188 | bool LoadOpenGL(); |
| 185 | QStringList GetUnsupportedGLExtensions() const; | 189 | QStringList GetUnsupportedGLExtensions() const; |
| 186 | 190 | ||
| 187 | QWidget* container = nullptr; | ||
| 188 | GWidgetInternal* child = nullptr; | ||
| 189 | |||
| 190 | EmuThread* emu_thread; | 191 | EmuThread* emu_thread; |
| 191 | // Context that backs the GGLWidgetInternal (and will be used by core to render) | 192 | |
| 192 | std::unique_ptr<QOpenGLContext> context; | 193 | std::unique_ptr<GraphicsContext> core_context; |
| 193 | // Context that will be shared between all newly created contexts. This should never be made | ||
| 194 | // current | ||
| 195 | std::unique_ptr<QOpenGLContext> shared_context; | ||
| 196 | 194 | ||
| 197 | #ifdef HAS_VULKAN | 195 | #ifdef HAS_VULKAN |
| 198 | std::unique_ptr<QVulkanInstance> vk_instance; | 196 | std::unique_ptr<QVulkanInstance> vk_instance; |
| @@ -202,6 +200,15 @@ private: | |||
| 202 | QImage screenshot_image; | 200 | QImage screenshot_image; |
| 203 | 201 | ||
| 204 | QByteArray geometry; | 202 | QByteArray geometry; |
| 203 | |||
| 204 | /// Native window handle that backs this presentation widget | ||
| 205 | QWindow* child_window = nullptr; | ||
| 206 | |||
| 207 | /// In order to embed the window into GRenderWindow, you need to use createWindowContainer to | ||
| 208 | /// put the child_window into a widget then add it to the layout. This child_widget can be | ||
| 209 | /// parented to GRenderWindow and use Qt's lifetime system | ||
| 210 | QWidget* child_widget = nullptr; | ||
| 211 | |||
| 205 | bool first_frame = false; | 212 | bool first_frame = false; |
| 206 | 213 | ||
| 207 | protected: | 214 | protected: |
diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp index 6209fff75..3b9ab38dd 100644 --- a/src/yuzu/configuration/config.cpp +++ b/src/yuzu/configuration/config.cpp | |||
| @@ -539,7 +539,7 @@ void Config::ReadDebuggingValues() { | |||
| 539 | void Config::ReadServiceValues() { | 539 | void Config::ReadServiceValues() { |
| 540 | qt_config->beginGroup(QStringLiteral("Services")); | 540 | qt_config->beginGroup(QStringLiteral("Services")); |
| 541 | Settings::values.bcat_backend = | 541 | Settings::values.bcat_backend = |
| 542 | ReadSetting(QStringLiteral("bcat_backend"), QStringLiteral("boxcat")) | 542 | ReadSetting(QStringLiteral("bcat_backend"), QStringLiteral("null")) |
| 543 | .toString() | 543 | .toString() |
| 544 | .toStdString(); | 544 | .toStdString(); |
| 545 | Settings::values.bcat_boxcat_local = | 545 | Settings::values.bcat_boxcat_local = |
| @@ -631,6 +631,7 @@ void Config::ReadRendererValues() { | |||
| 631 | Settings::values.resolution_factor = | 631 | Settings::values.resolution_factor = |
| 632 | ReadSetting(QStringLiteral("resolution_factor"), 1.0).toFloat(); | 632 | ReadSetting(QStringLiteral("resolution_factor"), 1.0).toFloat(); |
| 633 | Settings::values.aspect_ratio = ReadSetting(QStringLiteral("aspect_ratio"), 0).toInt(); | 633 | Settings::values.aspect_ratio = ReadSetting(QStringLiteral("aspect_ratio"), 0).toInt(); |
| 634 | Settings::values.max_anisotropy = ReadSetting(QStringLiteral("max_anisotropy"), 0).toInt(); | ||
| 634 | Settings::values.use_frame_limit = | 635 | Settings::values.use_frame_limit = |
| 635 | ReadSetting(QStringLiteral("use_frame_limit"), true).toBool(); | 636 | ReadSetting(QStringLiteral("use_frame_limit"), true).toBool(); |
| 636 | Settings::values.frame_limit = ReadSetting(QStringLiteral("frame_limit"), 100).toInt(); | 637 | Settings::values.frame_limit = ReadSetting(QStringLiteral("frame_limit"), 100).toInt(); |
| @@ -640,6 +641,7 @@ void Config::ReadRendererValues() { | |||
| 640 | ReadSetting(QStringLiteral("use_accurate_gpu_emulation"), false).toBool(); | 641 | ReadSetting(QStringLiteral("use_accurate_gpu_emulation"), false).toBool(); |
| 641 | Settings::values.use_asynchronous_gpu_emulation = | 642 | Settings::values.use_asynchronous_gpu_emulation = |
| 642 | ReadSetting(QStringLiteral("use_asynchronous_gpu_emulation"), false).toBool(); | 643 | ReadSetting(QStringLiteral("use_asynchronous_gpu_emulation"), false).toBool(); |
| 644 | Settings::values.use_vsync = ReadSetting(QStringLiteral("use_vsync"), true).toBool(); | ||
| 643 | Settings::values.force_30fps_mode = | 645 | Settings::values.force_30fps_mode = |
| 644 | ReadSetting(QStringLiteral("force_30fps_mode"), false).toBool(); | 646 | ReadSetting(QStringLiteral("force_30fps_mode"), false).toBool(); |
| 645 | 647 | ||
| @@ -680,6 +682,8 @@ void Config::ReadSystemValues() { | |||
| 680 | 682 | ||
| 681 | Settings::values.language_index = ReadSetting(QStringLiteral("language_index"), 1).toInt(); | 683 | Settings::values.language_index = ReadSetting(QStringLiteral("language_index"), 1).toInt(); |
| 682 | 684 | ||
| 685 | Settings::values.region_index = ReadSetting(QStringLiteral("region_index"), 1).toInt(); | ||
| 686 | |||
| 683 | const auto rng_seed_enabled = ReadSetting(QStringLiteral("rng_seed_enabled"), false).toBool(); | 687 | const auto rng_seed_enabled = ReadSetting(QStringLiteral("rng_seed_enabled"), false).toBool(); |
| 684 | if (rng_seed_enabled) { | 688 | if (rng_seed_enabled) { |
| 685 | Settings::values.rng_seed = ReadSetting(QStringLiteral("rng_seed"), 0).toULongLong(); | 689 | Settings::values.rng_seed = ReadSetting(QStringLiteral("rng_seed"), 0).toULongLong(); |
| @@ -696,6 +700,8 @@ void Config::ReadSystemValues() { | |||
| 696 | Settings::values.custom_rtc = std::nullopt; | 700 | Settings::values.custom_rtc = std::nullopt; |
| 697 | } | 701 | } |
| 698 | 702 | ||
| 703 | Settings::values.sound_index = ReadSetting(QStringLiteral("sound_index"), 1).toInt(); | ||
| 704 | |||
| 699 | qt_config->endGroup(); | 705 | qt_config->endGroup(); |
| 700 | } | 706 | } |
| 701 | 707 | ||
| @@ -1066,6 +1072,7 @@ void Config::SaveRendererValues() { | |||
| 1066 | WriteSetting(QStringLiteral("resolution_factor"), | 1072 | WriteSetting(QStringLiteral("resolution_factor"), |
| 1067 | static_cast<double>(Settings::values.resolution_factor), 1.0); | 1073 | static_cast<double>(Settings::values.resolution_factor), 1.0); |
| 1068 | WriteSetting(QStringLiteral("aspect_ratio"), Settings::values.aspect_ratio, 0); | 1074 | WriteSetting(QStringLiteral("aspect_ratio"), Settings::values.aspect_ratio, 0); |
| 1075 | WriteSetting(QStringLiteral("max_anisotropy"), Settings::values.max_anisotropy, 0); | ||
| 1069 | WriteSetting(QStringLiteral("use_frame_limit"), Settings::values.use_frame_limit, true); | 1076 | WriteSetting(QStringLiteral("use_frame_limit"), Settings::values.use_frame_limit, true); |
| 1070 | WriteSetting(QStringLiteral("frame_limit"), Settings::values.frame_limit, 100); | 1077 | WriteSetting(QStringLiteral("frame_limit"), Settings::values.frame_limit, 100); |
| 1071 | WriteSetting(QStringLiteral("use_disk_shader_cache"), Settings::values.use_disk_shader_cache, | 1078 | WriteSetting(QStringLiteral("use_disk_shader_cache"), Settings::values.use_disk_shader_cache, |
| @@ -1074,6 +1081,7 @@ void Config::SaveRendererValues() { | |||
| 1074 | Settings::values.use_accurate_gpu_emulation, false); | 1081 | Settings::values.use_accurate_gpu_emulation, false); |
| 1075 | WriteSetting(QStringLiteral("use_asynchronous_gpu_emulation"), | 1082 | WriteSetting(QStringLiteral("use_asynchronous_gpu_emulation"), |
| 1076 | Settings::values.use_asynchronous_gpu_emulation, false); | 1083 | Settings::values.use_asynchronous_gpu_emulation, false); |
| 1084 | WriteSetting(QStringLiteral("use_vsync"), Settings::values.use_vsync, true); | ||
| 1077 | WriteSetting(QStringLiteral("force_30fps_mode"), Settings::values.force_30fps_mode, false); | 1085 | WriteSetting(QStringLiteral("force_30fps_mode"), Settings::values.force_30fps_mode, false); |
| 1078 | 1086 | ||
| 1079 | // Cast to double because Qt's written float values are not human-readable | 1087 | // Cast to double because Qt's written float values are not human-readable |
| @@ -1110,6 +1118,7 @@ void Config::SaveSystemValues() { | |||
| 1110 | WriteSetting(QStringLiteral("use_docked_mode"), Settings::values.use_docked_mode, false); | 1118 | WriteSetting(QStringLiteral("use_docked_mode"), Settings::values.use_docked_mode, false); |
| 1111 | WriteSetting(QStringLiteral("current_user"), Settings::values.current_user, 0); | 1119 | WriteSetting(QStringLiteral("current_user"), Settings::values.current_user, 0); |
| 1112 | WriteSetting(QStringLiteral("language_index"), Settings::values.language_index, 1); | 1120 | WriteSetting(QStringLiteral("language_index"), Settings::values.language_index, 1); |
| 1121 | WriteSetting(QStringLiteral("region_index"), Settings::values.region_index, 1); | ||
| 1113 | 1122 | ||
| 1114 | WriteSetting(QStringLiteral("rng_seed_enabled"), Settings::values.rng_seed.has_value(), false); | 1123 | WriteSetting(QStringLiteral("rng_seed_enabled"), Settings::values.rng_seed.has_value(), false); |
| 1115 | WriteSetting(QStringLiteral("rng_seed"), Settings::values.rng_seed.value_or(0), 0); | 1124 | WriteSetting(QStringLiteral("rng_seed"), Settings::values.rng_seed.value_or(0), 0); |
| @@ -1121,6 +1130,8 @@ void Config::SaveSystemValues() { | |||
| 1121 | Settings::values.custom_rtc.value_or(std::chrono::seconds{}).count()), | 1130 | Settings::values.custom_rtc.value_or(std::chrono::seconds{}).count()), |
| 1122 | 0); | 1131 | 0); |
| 1123 | 1132 | ||
| 1133 | WriteSetting(QStringLiteral("sound_index"), Settings::values.sound_index, 1); | ||
| 1134 | |||
| 1124 | qt_config->endGroup(); | 1135 | qt_config->endGroup(); |
| 1125 | } | 1136 | } |
| 1126 | 1137 | ||
diff --git a/src/yuzu/configuration/configure.ui b/src/yuzu/configuration/configure.ui index 67b990f1a..9aec1bd09 100644 --- a/src/yuzu/configuration/configure.ui +++ b/src/yuzu/configuration/configure.ui | |||
| @@ -83,6 +83,11 @@ | |||
| 83 | <string>Graphics</string> | 83 | <string>Graphics</string> |
| 84 | </attribute> | 84 | </attribute> |
| 85 | </widget> | 85 | </widget> |
| 86 | <widget class="ConfigureGraphicsAdvanced" name="graphicsAdvancedTab"> | ||
| 87 | <attribute name="title"> | ||
| 88 | <string>GraphicsAdvanced</string> | ||
| 89 | </attribute> | ||
| 90 | </widget> | ||
| 86 | <widget class="ConfigureAudio" name="audioTab"> | 91 | <widget class="ConfigureAudio" name="audioTab"> |
| 87 | <attribute name="title"> | 92 | <attribute name="title"> |
| 88 | <string>Audio</string> | 93 | <string>Audio</string> |
| @@ -160,6 +165,12 @@ | |||
| 160 | <container>1</container> | 165 | <container>1</container> |
| 161 | </customwidget> | 166 | </customwidget> |
| 162 | <customwidget> | 167 | <customwidget> |
| 168 | <class>ConfigureGraphicsAdvanced</class> | ||
| 169 | <extends>QWidget</extends> | ||
| 170 | <header>configuration/configure_graphics_advanced.h</header> | ||
| 171 | <container>1</container> | ||
| 172 | </customwidget> | ||
| 173 | <customwidget> | ||
| 163 | <class>ConfigureWeb</class> | 174 | <class>ConfigureWeb</class> |
| 164 | <extends>QWidget</extends> | 175 | <extends>QWidget</extends> |
| 165 | <header>configuration/configure_web.h</header> | 176 | <header>configuration/configure_web.h</header> |
diff --git a/src/yuzu/configuration/configure_dialog.cpp b/src/yuzu/configuration/configure_dialog.cpp index db3b19352..df4473b46 100644 --- a/src/yuzu/configuration/configure_dialog.cpp +++ b/src/yuzu/configuration/configure_dialog.cpp | |||
| @@ -41,6 +41,7 @@ void ConfigureDialog::ApplyConfiguration() { | |||
| 41 | ui->inputTab->ApplyConfiguration(); | 41 | ui->inputTab->ApplyConfiguration(); |
| 42 | ui->hotkeysTab->ApplyConfiguration(registry); | 42 | ui->hotkeysTab->ApplyConfiguration(registry); |
| 43 | ui->graphicsTab->ApplyConfiguration(); | 43 | ui->graphicsTab->ApplyConfiguration(); |
| 44 | ui->graphicsAdvancedTab->ApplyConfiguration(); | ||
| 44 | ui->audioTab->ApplyConfiguration(); | 45 | ui->audioTab->ApplyConfiguration(); |
| 45 | ui->debugTab->ApplyConfiguration(); | 46 | ui->debugTab->ApplyConfiguration(); |
| 46 | ui->webTab->ApplyConfiguration(); | 47 | ui->webTab->ApplyConfiguration(); |
| @@ -76,7 +77,7 @@ void ConfigureDialog::PopulateSelectionList() { | |||
| 76 | const std::array<std::pair<QString, QList<QWidget*>>, 5> items{ | 77 | const std::array<std::pair<QString, QList<QWidget*>>, 5> items{ |
| 77 | {{tr("General"), {ui->generalTab, ui->webTab, ui->debugTab, ui->uiTab}}, | 78 | {{tr("General"), {ui->generalTab, ui->webTab, ui->debugTab, ui->uiTab}}, |
| 78 | {tr("System"), {ui->systemTab, ui->profileManagerTab, ui->serviceTab, ui->filesystemTab}}, | 79 | {tr("System"), {ui->systemTab, ui->profileManagerTab, ui->serviceTab, ui->filesystemTab}}, |
| 79 | {tr("Graphics"), {ui->graphicsTab}}, | 80 | {tr("Graphics"), {ui->graphicsTab, ui->graphicsAdvancedTab}}, |
| 80 | {tr("Audio"), {ui->audioTab}}, | 81 | {tr("Audio"), {ui->audioTab}}, |
| 81 | {tr("Controls"), {ui->inputTab, ui->hotkeysTab}}}, | 82 | {tr("Controls"), {ui->inputTab, ui->hotkeysTab}}}, |
| 82 | }; | 83 | }; |
| @@ -105,6 +106,7 @@ void ConfigureDialog::UpdateVisibleTabs() { | |||
| 105 | {ui->inputTab, tr("Input")}, | 106 | {ui->inputTab, tr("Input")}, |
| 106 | {ui->hotkeysTab, tr("Hotkeys")}, | 107 | {ui->hotkeysTab, tr("Hotkeys")}, |
| 107 | {ui->graphicsTab, tr("Graphics")}, | 108 | {ui->graphicsTab, tr("Graphics")}, |
| 109 | {ui->graphicsAdvancedTab, tr("Advanced")}, | ||
| 108 | {ui->audioTab, tr("Audio")}, | 110 | {ui->audioTab, tr("Audio")}, |
| 109 | {ui->debugTab, tr("Debug")}, | 111 | {ui->debugTab, tr("Debug")}, |
| 110 | {ui->webTab, tr("Web")}, | 112 | {ui->webTab, tr("Web")}, |
diff --git a/src/yuzu/configuration/configure_graphics.cpp b/src/yuzu/configuration/configure_graphics.cpp index ea899c080..a821c7b3c 100644 --- a/src/yuzu/configuration/configure_graphics.cpp +++ b/src/yuzu/configuration/configure_graphics.cpp | |||
| @@ -100,11 +100,8 @@ void ConfigureGraphics::SetConfiguration() { | |||
| 100 | ui->aspect_ratio_combobox->setCurrentIndex(Settings::values.aspect_ratio); | 100 | ui->aspect_ratio_combobox->setCurrentIndex(Settings::values.aspect_ratio); |
| 101 | ui->use_disk_shader_cache->setEnabled(runtime_lock); | 101 | ui->use_disk_shader_cache->setEnabled(runtime_lock); |
| 102 | ui->use_disk_shader_cache->setChecked(Settings::values.use_disk_shader_cache); | 102 | ui->use_disk_shader_cache->setChecked(Settings::values.use_disk_shader_cache); |
| 103 | ui->use_accurate_gpu_emulation->setChecked(Settings::values.use_accurate_gpu_emulation); | ||
| 104 | ui->use_asynchronous_gpu_emulation->setEnabled(runtime_lock); | 103 | ui->use_asynchronous_gpu_emulation->setEnabled(runtime_lock); |
| 105 | ui->use_asynchronous_gpu_emulation->setChecked(Settings::values.use_asynchronous_gpu_emulation); | 104 | ui->use_asynchronous_gpu_emulation->setChecked(Settings::values.use_asynchronous_gpu_emulation); |
| 106 | ui->force_30fps_mode->setEnabled(runtime_lock); | ||
| 107 | ui->force_30fps_mode->setChecked(Settings::values.force_30fps_mode); | ||
| 108 | UpdateBackgroundColorButton(QColor::fromRgbF(Settings::values.bg_red, Settings::values.bg_green, | 105 | UpdateBackgroundColorButton(QColor::fromRgbF(Settings::values.bg_red, Settings::values.bg_green, |
| 109 | Settings::values.bg_blue)); | 106 | Settings::values.bg_blue)); |
| 110 | UpdateDeviceComboBox(); | 107 | UpdateDeviceComboBox(); |
| @@ -117,10 +114,8 @@ void ConfigureGraphics::ApplyConfiguration() { | |||
| 117 | ToResolutionFactor(static_cast<Resolution>(ui->resolution_factor_combobox->currentIndex())); | 114 | ToResolutionFactor(static_cast<Resolution>(ui->resolution_factor_combobox->currentIndex())); |
| 118 | Settings::values.aspect_ratio = ui->aspect_ratio_combobox->currentIndex(); | 115 | Settings::values.aspect_ratio = ui->aspect_ratio_combobox->currentIndex(); |
| 119 | Settings::values.use_disk_shader_cache = ui->use_disk_shader_cache->isChecked(); | 116 | Settings::values.use_disk_shader_cache = ui->use_disk_shader_cache->isChecked(); |
| 120 | Settings::values.use_accurate_gpu_emulation = ui->use_accurate_gpu_emulation->isChecked(); | ||
| 121 | Settings::values.use_asynchronous_gpu_emulation = | 117 | Settings::values.use_asynchronous_gpu_emulation = |
| 122 | ui->use_asynchronous_gpu_emulation->isChecked(); | 118 | ui->use_asynchronous_gpu_emulation->isChecked(); |
| 123 | Settings::values.force_30fps_mode = ui->force_30fps_mode->isChecked(); | ||
| 124 | Settings::values.bg_red = static_cast<float>(bg_color.redF()); | 119 | Settings::values.bg_red = static_cast<float>(bg_color.redF()); |
| 125 | Settings::values.bg_green = static_cast<float>(bg_color.greenF()); | 120 | Settings::values.bg_green = static_cast<float>(bg_color.greenF()); |
| 126 | Settings::values.bg_blue = static_cast<float>(bg_color.blueF()); | 121 | Settings::values.bg_blue = static_cast<float>(bg_color.blueF()); |
diff --git a/src/yuzu/configuration/configure_graphics.ui b/src/yuzu/configuration/configure_graphics.ui index db60426ab..c816d6108 100644 --- a/src/yuzu/configuration/configure_graphics.ui +++ b/src/yuzu/configuration/configure_graphics.ui | |||
| @@ -85,20 +85,6 @@ | |||
| 85 | </widget> | 85 | </widget> |
| 86 | </item> | 86 | </item> |
| 87 | <item> | 87 | <item> |
| 88 | <widget class="QCheckBox" name="use_accurate_gpu_emulation"> | ||
| 89 | <property name="text"> | ||
| 90 | <string>Use accurate GPU emulation (slow)</string> | ||
| 91 | </property> | ||
| 92 | </widget> | ||
| 93 | </item> | ||
| 94 | <item> | ||
| 95 | <widget class="QCheckBox" name="force_30fps_mode"> | ||
| 96 | <property name="text"> | ||
| 97 | <string>Force 30 FPS mode</string> | ||
| 98 | </property> | ||
| 99 | </widget> | ||
| 100 | </item> | ||
| 101 | <item> | ||
| 102 | <layout class="QHBoxLayout" name="horizontalLayout_2"> | 88 | <layout class="QHBoxLayout" name="horizontalLayout_2"> |
| 103 | <item> | 89 | <item> |
| 104 | <widget class="QLabel" name="label"> | 90 | <widget class="QLabel" name="label"> |
diff --git a/src/yuzu/configuration/configure_graphics_advanced.cpp b/src/yuzu/configuration/configure_graphics_advanced.cpp new file mode 100644 index 000000000..b9f429f84 --- /dev/null +++ b/src/yuzu/configuration/configure_graphics_advanced.cpp | |||
| @@ -0,0 +1,48 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "core/core.h" | ||
| 6 | #include "core/settings.h" | ||
| 7 | #include "ui_configure_graphics_advanced.h" | ||
| 8 | #include "yuzu/configuration/configure_graphics_advanced.h" | ||
| 9 | |||
| 10 | ConfigureGraphicsAdvanced::ConfigureGraphicsAdvanced(QWidget* parent) | ||
| 11 | : QWidget(parent), ui(new Ui::ConfigureGraphicsAdvanced) { | ||
| 12 | |||
| 13 | ui->setupUi(this); | ||
| 14 | |||
| 15 | SetConfiguration(); | ||
| 16 | } | ||
| 17 | |||
| 18 | ConfigureGraphicsAdvanced::~ConfigureGraphicsAdvanced() = default; | ||
| 19 | |||
| 20 | void ConfigureGraphicsAdvanced::SetConfiguration() { | ||
| 21 | const bool runtime_lock = !Core::System::GetInstance().IsPoweredOn(); | ||
| 22 | ui->use_accurate_gpu_emulation->setChecked(Settings::values.use_accurate_gpu_emulation); | ||
| 23 | ui->use_vsync->setEnabled(runtime_lock); | ||
| 24 | ui->use_vsync->setChecked(Settings::values.use_vsync); | ||
| 25 | ui->force_30fps_mode->setEnabled(runtime_lock); | ||
| 26 | ui->force_30fps_mode->setChecked(Settings::values.force_30fps_mode); | ||
| 27 | ui->anisotropic_filtering_combobox->setEnabled(runtime_lock); | ||
| 28 | ui->anisotropic_filtering_combobox->setCurrentIndex(Settings::values.max_anisotropy); | ||
| 29 | } | ||
| 30 | |||
| 31 | void ConfigureGraphicsAdvanced::ApplyConfiguration() { | ||
| 32 | Settings::values.use_accurate_gpu_emulation = ui->use_accurate_gpu_emulation->isChecked(); | ||
| 33 | Settings::values.use_vsync = ui->use_vsync->isChecked(); | ||
| 34 | Settings::values.force_30fps_mode = ui->force_30fps_mode->isChecked(); | ||
| 35 | Settings::values.max_anisotropy = ui->anisotropic_filtering_combobox->currentIndex(); | ||
| 36 | } | ||
| 37 | |||
| 38 | void ConfigureGraphicsAdvanced::changeEvent(QEvent* event) { | ||
| 39 | if (event->type() == QEvent::LanguageChange) { | ||
| 40 | RetranslateUI(); | ||
| 41 | } | ||
| 42 | |||
| 43 | QWidget::changeEvent(event); | ||
| 44 | } | ||
| 45 | |||
| 46 | void ConfigureGraphicsAdvanced::RetranslateUI() { | ||
| 47 | ui->retranslateUi(this); | ||
| 48 | } | ||
diff --git a/src/yuzu/configuration/configure_graphics_advanced.h b/src/yuzu/configuration/configure_graphics_advanced.h new file mode 100644 index 000000000..bbc9d4355 --- /dev/null +++ b/src/yuzu/configuration/configure_graphics_advanced.h | |||
| @@ -0,0 +1,30 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <memory> | ||
| 8 | #include <QWidget> | ||
| 9 | |||
| 10 | namespace Ui { | ||
| 11 | class ConfigureGraphicsAdvanced; | ||
| 12 | } | ||
| 13 | |||
| 14 | class ConfigureGraphicsAdvanced : public QWidget { | ||
| 15 | Q_OBJECT | ||
| 16 | |||
| 17 | public: | ||
| 18 | explicit ConfigureGraphicsAdvanced(QWidget* parent = nullptr); | ||
| 19 | ~ConfigureGraphicsAdvanced() override; | ||
| 20 | |||
| 21 | void ApplyConfiguration(); | ||
| 22 | |||
| 23 | private: | ||
| 24 | void changeEvent(QEvent* event) override; | ||
| 25 | void RetranslateUI(); | ||
| 26 | |||
| 27 | void SetConfiguration(); | ||
| 28 | |||
| 29 | std::unique_ptr<Ui::ConfigureGraphicsAdvanced> ui; | ||
| 30 | }; | ||
diff --git a/src/yuzu/configuration/configure_graphics_advanced.ui b/src/yuzu/configuration/configure_graphics_advanced.ui new file mode 100644 index 000000000..42eec278e --- /dev/null +++ b/src/yuzu/configuration/configure_graphics_advanced.ui | |||
| @@ -0,0 +1,111 @@ | |||
| 1 | <?xml version="1.0" encoding="UTF-8"?> | ||
| 2 | <ui version="4.0"> | ||
| 3 | <class>ConfigureGraphicsAdvanced</class> | ||
| 4 | <widget class="QWidget" name="ConfigureGraphicsAdvanced"> | ||
| 5 | <property name="geometry"> | ||
| 6 | <rect> | ||
| 7 | <x>0</x> | ||
| 8 | <y>0</y> | ||
| 9 | <width>400</width> | ||
| 10 | <height>321</height> | ||
| 11 | </rect> | ||
| 12 | </property> | ||
| 13 | <property name="windowTitle"> | ||
| 14 | <string>Form</string> | ||
| 15 | </property> | ||
| 16 | <layout class="QVBoxLayout" name="verticalLayout_1"> | ||
| 17 | <item> | ||
| 18 | <layout class="QVBoxLayout" name="verticalLayout_2"> | ||
| 19 | <item> | ||
| 20 | <widget class="QGroupBox" name="groupBox_1"> | ||
| 21 | <property name="title"> | ||
| 22 | <string>Advanced Graphics Settings</string> | ||
| 23 | </property> | ||
| 24 | <layout class="QVBoxLayout" name="verticalLayout_3"> | ||
| 25 | <item> | ||
| 26 | <widget class="QCheckBox" name="use_accurate_gpu_emulation"> | ||
| 27 | <property name="text"> | ||
| 28 | <string>Use accurate GPU emulation (slow)</string> | ||
| 29 | </property> | ||
| 30 | </widget> | ||
| 31 | </item> | ||
| 32 | <item> | ||
| 33 | <widget class="QCheckBox" name="use_vsync"> | ||
| 34 | <property name="toolTip"> | ||
| 35 | <string>VSync prevents the screen from tearing, but some graphics cards have lower performance with VSync enabled. Keep it enabled if you don't notice a performance difference.</string> | ||
| 36 | </property> | ||
| 37 | <property name="text"> | ||
| 38 | <string>Use VSync (OpenGL only)</string> | ||
| 39 | </property> | ||
| 40 | </widget> | ||
| 41 | </item> | ||
| 42 | <item> | ||
| 43 | <widget class="QCheckBox" name="force_30fps_mode"> | ||
| 44 | <property name="text"> | ||
| 45 | <string>Force 30 FPS mode</string> | ||
| 46 | </property> | ||
| 47 | </widget> | ||
| 48 | </item> | ||
| 49 | <item> | ||
| 50 | <layout class="QHBoxLayout" name="horizontalLayout_1"> | ||
| 51 | <item> | ||
| 52 | <widget class="QLabel" name="af_label"> | ||
| 53 | <property name="text"> | ||
| 54 | <string>Anisotropic Filtering:</string> | ||
| 55 | </property> | ||
| 56 | </widget> | ||
| 57 | </item> | ||
| 58 | <item> | ||
| 59 | <widget class="QComboBox" name="anisotropic_filtering_combobox"> | ||
| 60 | <item> | ||
| 61 | <property name="text"> | ||
| 62 | <string>Default</string> | ||
| 63 | </property> | ||
| 64 | </item> | ||
| 65 | <item> | ||
| 66 | <property name="text"> | ||
| 67 | <string>2x</string> | ||
| 68 | </property> | ||
| 69 | </item> | ||
| 70 | <item> | ||
| 71 | <property name="text"> | ||
| 72 | <string>4x</string> | ||
| 73 | </property> | ||
| 74 | </item> | ||
| 75 | <item> | ||
| 76 | <property name="text"> | ||
| 77 | <string>8x</string> | ||
| 78 | </property> | ||
| 79 | </item> | ||
| 80 | <item> | ||
| 81 | <property name="text"> | ||
| 82 | <string>16x</string> | ||
| 83 | </property> | ||
| 84 | </item> | ||
| 85 | </widget> | ||
| 86 | </item> | ||
| 87 | </layout> | ||
| 88 | </item> | ||
| 89 | </layout> | ||
| 90 | </widget> | ||
| 91 | </item> | ||
| 92 | </layout> | ||
| 93 | </item> | ||
| 94 | <item> | ||
| 95 | <spacer name="verticalSpacer"> | ||
| 96 | <property name="orientation"> | ||
| 97 | <enum>Qt::Vertical</enum> | ||
| 98 | </property> | ||
| 99 | <property name="sizeHint" stdset="0"> | ||
| 100 | <size> | ||
| 101 | <width>20</width> | ||
| 102 | <height>40</height> | ||
| 103 | </size> | ||
| 104 | </property> | ||
| 105 | </spacer> | ||
| 106 | </item> | ||
| 107 | </layout> | ||
| 108 | </widget> | ||
| 109 | <resources/> | ||
| 110 | <connections/> | ||
| 111 | </ui> | ||
diff --git a/src/yuzu/configuration/configure_system.cpp b/src/yuzu/configuration/configure_system.cpp index e1b52f8d9..f49cd4c8f 100644 --- a/src/yuzu/configuration/configure_system.cpp +++ b/src/yuzu/configuration/configure_system.cpp | |||
| @@ -56,6 +56,8 @@ void ConfigureSystem::SetConfiguration() { | |||
| 56 | enabled = !Core::System::GetInstance().IsPoweredOn(); | 56 | enabled = !Core::System::GetInstance().IsPoweredOn(); |
| 57 | 57 | ||
| 58 | ui->combo_language->setCurrentIndex(Settings::values.language_index); | 58 | ui->combo_language->setCurrentIndex(Settings::values.language_index); |
| 59 | ui->combo_region->setCurrentIndex(Settings::values.region_index); | ||
| 60 | ui->combo_sound->setCurrentIndex(Settings::values.sound_index); | ||
| 59 | 61 | ||
| 60 | ui->rng_seed_checkbox->setChecked(Settings::values.rng_seed.has_value()); | 62 | ui->rng_seed_checkbox->setChecked(Settings::values.rng_seed.has_value()); |
| 61 | ui->rng_seed_edit->setEnabled(Settings::values.rng_seed.has_value()); | 63 | ui->rng_seed_edit->setEnabled(Settings::values.rng_seed.has_value()); |
| @@ -81,6 +83,8 @@ void ConfigureSystem::ApplyConfiguration() { | |||
| 81 | } | 83 | } |
| 82 | 84 | ||
| 83 | Settings::values.language_index = ui->combo_language->currentIndex(); | 85 | Settings::values.language_index = ui->combo_language->currentIndex(); |
| 86 | Settings::values.region_index = ui->combo_region->currentIndex(); | ||
| 87 | Settings::values.sound_index = ui->combo_sound->currentIndex(); | ||
| 84 | 88 | ||
| 85 | if (ui->rng_seed_checkbox->isChecked()) { | 89 | if (ui->rng_seed_checkbox->isChecked()) { |
| 86 | Settings::values.rng_seed = ui->rng_seed_edit->text().toULongLong(nullptr, 16); | 90 | Settings::values.rng_seed = ui->rng_seed_edit->text().toULongLong(nullptr, 16); |
diff --git a/src/yuzu/configuration/configure_system.h b/src/yuzu/configuration/configure_system.h index 1eab3781d..d8fa2d2cc 100644 --- a/src/yuzu/configuration/configure_system.h +++ b/src/yuzu/configuration/configure_system.h | |||
| @@ -36,5 +36,6 @@ private: | |||
| 36 | bool enabled = false; | 36 | bool enabled = false; |
| 37 | 37 | ||
| 38 | int language_index = 0; | 38 | int language_index = 0; |
| 39 | int region_index = 0; | ||
| 39 | int sound_index = 0; | 40 | int sound_index = 0; |
| 40 | }; | 41 | }; |
diff --git a/src/yuzu/configuration/configure_system.ui b/src/yuzu/configuration/configure_system.ui index 65745a2f8..4e2c7e76e 100644 --- a/src/yuzu/configuration/configure_system.ui +++ b/src/yuzu/configuration/configure_system.ui | |||
| @@ -22,14 +22,14 @@ | |||
| 22 | <string>System Settings</string> | 22 | <string>System Settings</string> |
| 23 | </property> | 23 | </property> |
| 24 | <layout class="QGridLayout" name="gridLayout"> | 24 | <layout class="QGridLayout" name="gridLayout"> |
| 25 | <item row="1" column="0"> | 25 | <item row="2" column="0"> |
| 26 | <widget class="QLabel" name="label_sound"> | 26 | <widget class="QLabel" name="label_sound"> |
| 27 | <property name="text"> | 27 | <property name="text"> |
| 28 | <string>Sound output mode</string> | 28 | <string>Sound output mode</string> |
| 29 | </property> | 29 | </property> |
| 30 | </widget> | 30 | </widget> |
| 31 | </item> | 31 | </item> |
| 32 | <item row="2" column="0"> | 32 | <item row="3" column="0"> |
| 33 | <widget class="QLabel" name="label_console_id"> | 33 | <widget class="QLabel" name="label_console_id"> |
| 34 | <property name="text"> | 34 | <property name="text"> |
| 35 | <string>Console ID:</string> | 35 | <string>Console ID:</string> |
| @@ -128,14 +128,60 @@ | |||
| 128 | </item> | 128 | </item> |
| 129 | </widget> | 129 | </widget> |
| 130 | </item> | 130 | </item> |
| 131 | <item row="4" column="0"> | 131 | <item row="1" column="0"> |
| 132 | <widget class="QLabel" name="label_region"> | ||
| 133 | <property name="text"> | ||
| 134 | <string>Region:</string> | ||
| 135 | </property> | ||
| 136 | </widget> | ||
| 137 | </item> | ||
| 138 | <item row="1" column="1"> | ||
| 139 | <widget class="QComboBox" name="combo_region"> | ||
| 140 | <item> | ||
| 141 | <property name="text"> | ||
| 142 | <string>Japan</string> | ||
| 143 | </property> | ||
| 144 | </item> | ||
| 145 | <item> | ||
| 146 | <property name="text"> | ||
| 147 | <string>USA</string> | ||
| 148 | </property> | ||
| 149 | </item> | ||
| 150 | <item> | ||
| 151 | <property name="text"> | ||
| 152 | <string>Europe</string> | ||
| 153 | </property> | ||
| 154 | </item> | ||
| 155 | <item> | ||
| 156 | <property name="text"> | ||
| 157 | <string>Australia</string> | ||
| 158 | </property> | ||
| 159 | </item> | ||
| 160 | <item> | ||
| 161 | <property name="text"> | ||
| 162 | <string>China</string> | ||
| 163 | </property> | ||
| 164 | </item> | ||
| 165 | <item> | ||
| 166 | <property name="text"> | ||
| 167 | <string>Korea</string> | ||
| 168 | </property> | ||
| 169 | </item> | ||
| 170 | <item> | ||
| 171 | <property name="text"> | ||
| 172 | <string>Taiwan</string> | ||
| 173 | </property> | ||
| 174 | </item> | ||
| 175 | </widget> | ||
| 176 | </item> | ||
| 177 | <item row="5" column="0"> | ||
| 132 | <widget class="QCheckBox" name="rng_seed_checkbox"> | 178 | <widget class="QCheckBox" name="rng_seed_checkbox"> |
| 133 | <property name="text"> | 179 | <property name="text"> |
| 134 | <string>RNG Seed</string> | 180 | <string>RNG Seed</string> |
| 135 | </property> | 181 | </property> |
| 136 | </widget> | 182 | </widget> |
| 137 | </item> | 183 | </item> |
| 138 | <item row="1" column="1"> | 184 | <item row="2" column="1"> |
| 139 | <widget class="QComboBox" name="combo_sound"> | 185 | <widget class="QComboBox" name="combo_sound"> |
| 140 | <item> | 186 | <item> |
| 141 | <property name="text"> | 187 | <property name="text"> |
| @@ -161,7 +207,7 @@ | |||
| 161 | </property> | 207 | </property> |
| 162 | </widget> | 208 | </widget> |
| 163 | </item> | 209 | </item> |
| 164 | <item row="2" column="1"> | 210 | <item row="3" column="1"> |
| 165 | <widget class="QPushButton" name="button_regenerate_console_id"> | 211 | <widget class="QPushButton" name="button_regenerate_console_id"> |
| 166 | <property name="sizePolicy"> | 212 | <property name="sizePolicy"> |
| 167 | <sizepolicy hsizetype="Fixed" vsizetype="Fixed"> | 213 | <sizepolicy hsizetype="Fixed" vsizetype="Fixed"> |
| @@ -177,14 +223,14 @@ | |||
| 177 | </property> | 223 | </property> |
| 178 | </widget> | 224 | </widget> |
| 179 | </item> | 225 | </item> |
| 180 | <item row="3" column="0"> | 226 | <item row="4" column="0"> |
| 181 | <widget class="QCheckBox" name="custom_rtc_checkbox"> | 227 | <widget class="QCheckBox" name="custom_rtc_checkbox"> |
| 182 | <property name="text"> | 228 | <property name="text"> |
| 183 | <string>Custom RTC</string> | 229 | <string>Custom RTC</string> |
| 184 | </property> | 230 | </property> |
| 185 | </widget> | 231 | </widget> |
| 186 | </item> | 232 | </item> |
| 187 | <item row="3" column="1"> | 233 | <item row="4" column="1"> |
| 188 | <widget class="QDateTimeEdit" name="custom_rtc_edit"> | 234 | <widget class="QDateTimeEdit" name="custom_rtc_edit"> |
| 189 | <property name="minimumDate"> | 235 | <property name="minimumDate"> |
| 190 | <date> | 236 | <date> |
| @@ -198,7 +244,7 @@ | |||
| 198 | </property> | 244 | </property> |
| 199 | </widget> | 245 | </widget> |
| 200 | </item> | 246 | </item> |
| 201 | <item row="4" column="1"> | 247 | <item row="5" column="1"> |
| 202 | <widget class="QLineEdit" name="rng_seed_edit"> | 248 | <widget class="QLineEdit" name="rng_seed_edit"> |
| 203 | <property name="sizePolicy"> | 249 | <property name="sizePolicy"> |
| 204 | <sizepolicy hsizetype="Minimum" vsizetype="Fixed"> | 250 | <sizepolicy hsizetype="Minimum" vsizetype="Fixed"> |
diff --git a/src/yuzu/debugger/wait_tree.cpp b/src/yuzu/debugger/wait_tree.cpp index 3f1a94627..c1ea25fb8 100644 --- a/src/yuzu/debugger/wait_tree.cpp +++ b/src/yuzu/debugger/wait_tree.cpp | |||
| @@ -116,7 +116,7 @@ std::vector<std::unique_ptr<WaitTreeItem>> WaitTreeCallstack::GetChildren() cons | |||
| 116 | 116 | ||
| 117 | constexpr std::size_t BaseRegister = 29; | 117 | constexpr std::size_t BaseRegister = 29; |
| 118 | auto& memory = Core::System::GetInstance().Memory(); | 118 | auto& memory = Core::System::GetInstance().Memory(); |
| 119 | u64 base_pointer = thread.GetContext().cpu_registers[BaseRegister]; | 119 | u64 base_pointer = thread.GetContext64().cpu_registers[BaseRegister]; |
| 120 | 120 | ||
| 121 | while (base_pointer != 0) { | 121 | while (base_pointer != 0) { |
| 122 | const u64 lr = memory.Read64(base_pointer + sizeof(u64)); | 122 | const u64 lr = memory.Read64(base_pointer + sizeof(u64)); |
| @@ -240,7 +240,7 @@ QString WaitTreeThread::GetText() const { | |||
| 240 | break; | 240 | break; |
| 241 | } | 241 | } |
| 242 | 242 | ||
| 243 | const auto& context = thread.GetContext(); | 243 | const auto& context = thread.GetContext64(); |
| 244 | const QString pc_info = tr(" PC = 0x%1 LR = 0x%2") | 244 | const QString pc_info = tr(" PC = 0x%1 LR = 0x%2") |
| 245 | .arg(context.pc, 8, 16, QLatin1Char{'0'}) | 245 | .arg(context.pc, 8, 16, QLatin1Char{'0'}) |
| 246 | .arg(context.cpu_registers[30], 8, 16, QLatin1Char{'0'}); | 246 | .arg(context.cpu_registers[30], 8, 16, QLatin1Char{'0'}); |
diff --git a/src/yuzu/loading_screen.cpp b/src/yuzu/loading_screen.cpp index 4f2bfab48..2a6483370 100644 --- a/src/yuzu/loading_screen.cpp +++ b/src/yuzu/loading_screen.cpp | |||
| @@ -34,18 +34,6 @@ constexpr char PROGRESSBAR_STYLE_PREPARE[] = R"( | |||
| 34 | QProgressBar {} | 34 | QProgressBar {} |
| 35 | QProgressBar::chunk {})"; | 35 | QProgressBar::chunk {})"; |
| 36 | 36 | ||
| 37 | constexpr char PROGRESSBAR_STYLE_DECOMPILE[] = R"( | ||
| 38 | QProgressBar { | ||
| 39 | background-color: black; | ||
| 40 | border: 2px solid white; | ||
| 41 | border-radius: 4px; | ||
| 42 | padding: 2px; | ||
| 43 | } | ||
| 44 | QProgressBar::chunk { | ||
| 45 | background-color: #0ab9e6; | ||
| 46 | width: 1px; | ||
| 47 | })"; | ||
| 48 | |||
| 49 | constexpr char PROGRESSBAR_STYLE_BUILD[] = R"( | 37 | constexpr char PROGRESSBAR_STYLE_BUILD[] = R"( |
| 50 | QProgressBar { | 38 | QProgressBar { |
| 51 | background-color: black; | 39 | background-color: black; |
| @@ -100,13 +88,11 @@ LoadingScreen::LoadingScreen(QWidget* parent) | |||
| 100 | 88 | ||
| 101 | stage_translations = { | 89 | stage_translations = { |
| 102 | {VideoCore::LoadCallbackStage::Prepare, tr("Loading...")}, | 90 | {VideoCore::LoadCallbackStage::Prepare, tr("Loading...")}, |
| 103 | {VideoCore::LoadCallbackStage::Decompile, tr("Preparing Shaders %1 / %2")}, | ||
| 104 | {VideoCore::LoadCallbackStage::Build, tr("Loading Shaders %1 / %2")}, | 91 | {VideoCore::LoadCallbackStage::Build, tr("Loading Shaders %1 / %2")}, |
| 105 | {VideoCore::LoadCallbackStage::Complete, tr("Launching...")}, | 92 | {VideoCore::LoadCallbackStage::Complete, tr("Launching...")}, |
| 106 | }; | 93 | }; |
| 107 | progressbar_style = { | 94 | progressbar_style = { |
| 108 | {VideoCore::LoadCallbackStage::Prepare, PROGRESSBAR_STYLE_PREPARE}, | 95 | {VideoCore::LoadCallbackStage::Prepare, PROGRESSBAR_STYLE_PREPARE}, |
| 109 | {VideoCore::LoadCallbackStage::Decompile, PROGRESSBAR_STYLE_DECOMPILE}, | ||
| 110 | {VideoCore::LoadCallbackStage::Build, PROGRESSBAR_STYLE_BUILD}, | 96 | {VideoCore::LoadCallbackStage::Build, PROGRESSBAR_STYLE_BUILD}, |
| 111 | {VideoCore::LoadCallbackStage::Complete, PROGRESSBAR_STYLE_COMPLETE}, | 97 | {VideoCore::LoadCallbackStage::Complete, PROGRESSBAR_STYLE_COMPLETE}, |
| 112 | }; | 98 | }; |
| @@ -192,8 +178,7 @@ void LoadingScreen::OnLoadProgress(VideoCore::LoadCallbackStage stage, std::size | |||
| 192 | } | 178 | } |
| 193 | 179 | ||
| 194 | // update labels and progress bar | 180 | // update labels and progress bar |
| 195 | if (stage == VideoCore::LoadCallbackStage::Decompile || | 181 | if (stage == VideoCore::LoadCallbackStage::Build) { |
| 196 | stage == VideoCore::LoadCallbackStage::Build) { | ||
| 197 | ui->stage->setText(stage_translations[stage].arg(value).arg(total)); | 182 | ui->stage->setText(stage_translations[stage].arg(value).arg(total)); |
| 198 | } else { | 183 | } else { |
| 199 | ui->stage->setText(stage_translations[stage]); | 184 | ui->stage->setText(stage_translations[stage]); |
diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp index 1be61bd48..4769a612e 100644 --- a/src/yuzu/main.cpp +++ b/src/yuzu/main.cpp | |||
| @@ -20,7 +20,6 @@ | |||
| 20 | #include "core/file_sys/vfs.h" | 20 | #include "core/file_sys/vfs.h" |
| 21 | #include "core/file_sys/vfs_real.h" | 21 | #include "core/file_sys/vfs_real.h" |
| 22 | #include "core/frontend/applets/general_frontend.h" | 22 | #include "core/frontend/applets/general_frontend.h" |
| 23 | #include "core/frontend/scope_acquire_window_context.h" | ||
| 24 | #include "core/hle/service/acc/profile_manager.h" | 23 | #include "core/hle/service/acc/profile_manager.h" |
| 25 | #include "core/hle/service/am/applet_ae.h" | 24 | #include "core/hle/service/am/applet_ae.h" |
| 26 | #include "core/hle/service/am/applet_oe.h" | 25 | #include "core/hle/service/am/applet_oe.h" |
| @@ -985,11 +984,8 @@ void GMainWindow::BootGame(const QString& filename) { | |||
| 985 | return; | 984 | return; |
| 986 | 985 | ||
| 987 | // Create and start the emulation thread | 986 | // Create and start the emulation thread |
| 988 | emu_thread = std::make_unique<EmuThread>(render_window); | 987 | emu_thread = std::make_unique<EmuThread>(*render_window); |
| 989 | emit EmulationStarting(emu_thread.get()); | 988 | emit EmulationStarting(emu_thread.get()); |
| 990 | if (Settings::values.renderer_backend == Settings::RendererBackend::OpenGL) { | ||
| 991 | render_window->moveContext(); | ||
| 992 | } | ||
| 993 | emu_thread->start(); | 989 | emu_thread->start(); |
| 994 | 990 | ||
| 995 | connect(render_window, &GRenderWindow::Closed, this, &GMainWindow::OnStopGame); | 991 | connect(render_window, &GRenderWindow::Closed, this, &GMainWindow::OnStopGame); |
| @@ -1087,6 +1083,9 @@ void GMainWindow::ShutdownGame() { | |||
| 1087 | emulation_running = false; | 1083 | emulation_running = false; |
| 1088 | 1084 | ||
| 1089 | game_path.clear(); | 1085 | game_path.clear(); |
| 1086 | |||
| 1087 | // When closing the game, destroy the GLWindow to clear the context after the game is closed | ||
| 1088 | render_window->ReleaseRenderTarget(); | ||
| 1090 | } | 1089 | } |
| 1091 | 1090 | ||
| 1092 | void GMainWindow::StoreRecentFile(const QString& filename) { | 1091 | void GMainWindow::StoreRecentFile(const QString& filename) { |
| @@ -2210,48 +2209,47 @@ void GMainWindow::closeEvent(QCloseEvent* event) { | |||
| 2210 | QWidget::closeEvent(event); | 2209 | QWidget::closeEvent(event); |
| 2211 | } | 2210 | } |
| 2212 | 2211 | ||
| 2213 | void GMainWindow::keyPressEvent(QKeyEvent* event) { | 2212 | static bool IsSingleFileDropEvent(const QMimeData* mime) { |
| 2214 | if (render_window) { | 2213 | return mime->hasUrls() && mime->urls().length() == 1; |
| 2215 | render_window->ForwardKeyPressEvent(event); | ||
| 2216 | } | ||
| 2217 | } | 2214 | } |
| 2218 | 2215 | ||
| 2219 | void GMainWindow::keyReleaseEvent(QKeyEvent* event) { | 2216 | void GMainWindow::AcceptDropEvent(QDropEvent* event) { |
| 2220 | if (render_window) { | 2217 | if (IsSingleFileDropEvent(event->mimeData())) { |
| 2221 | render_window->ForwardKeyReleaseEvent(event); | 2218 | event->setDropAction(Qt::DropAction::LinkAction); |
| 2219 | event->accept(); | ||
| 2222 | } | 2220 | } |
| 2223 | } | 2221 | } |
| 2224 | 2222 | ||
| 2225 | static bool IsSingleFileDropEvent(QDropEvent* event) { | 2223 | bool GMainWindow::DropAction(QDropEvent* event) { |
| 2226 | const QMimeData* mimeData = event->mimeData(); | 2224 | if (!IsSingleFileDropEvent(event->mimeData())) { |
| 2227 | return mimeData->hasUrls() && mimeData->urls().length() == 1; | 2225 | return false; |
| 2228 | } | ||
| 2229 | |||
| 2230 | void GMainWindow::dropEvent(QDropEvent* event) { | ||
| 2231 | if (!IsSingleFileDropEvent(event)) { | ||
| 2232 | return; | ||
| 2233 | } | 2226 | } |
| 2234 | 2227 | ||
| 2235 | const QMimeData* mime_data = event->mimeData(); | 2228 | const QMimeData* mime_data = event->mimeData(); |
| 2236 | const QString filename = mime_data->urls().at(0).toLocalFile(); | 2229 | const QString& filename = mime_data->urls().at(0).toLocalFile(); |
| 2237 | 2230 | ||
| 2238 | if (emulation_running && QFileInfo(filename).suffix() == QStringLiteral("bin")) { | 2231 | if (emulation_running && QFileInfo(filename).suffix() == QStringLiteral("bin")) { |
| 2232 | // Amiibo | ||
| 2239 | LoadAmiibo(filename); | 2233 | LoadAmiibo(filename); |
| 2240 | } else { | 2234 | } else { |
| 2235 | // Game | ||
| 2241 | if (ConfirmChangeGame()) { | 2236 | if (ConfirmChangeGame()) { |
| 2242 | BootGame(filename); | 2237 | BootGame(filename); |
| 2243 | } | 2238 | } |
| 2244 | } | 2239 | } |
| 2240 | return true; | ||
| 2241 | } | ||
| 2242 | |||
| 2243 | void GMainWindow::dropEvent(QDropEvent* event) { | ||
| 2244 | DropAction(event); | ||
| 2245 | } | 2245 | } |
| 2246 | 2246 | ||
| 2247 | void GMainWindow::dragEnterEvent(QDragEnterEvent* event) { | 2247 | void GMainWindow::dragEnterEvent(QDragEnterEvent* event) { |
| 2248 | if (IsSingleFileDropEvent(event)) { | 2248 | AcceptDropEvent(event); |
| 2249 | event->acceptProposedAction(); | ||
| 2250 | } | ||
| 2251 | } | 2249 | } |
| 2252 | 2250 | ||
| 2253 | void GMainWindow::dragMoveEvent(QDragMoveEvent* event) { | 2251 | void GMainWindow::dragMoveEvent(QDragMoveEvent* event) { |
| 2254 | event->acceptProposedAction(); | 2252 | AcceptDropEvent(event); |
| 2255 | } | 2253 | } |
| 2256 | 2254 | ||
| 2257 | bool GMainWindow::ConfirmChangeGame() { | 2255 | bool GMainWindow::ConfirmChangeGame() { |
| @@ -2372,6 +2370,7 @@ int main(int argc, char* argv[]) { | |||
| 2372 | 2370 | ||
| 2373 | // Enables the core to make the qt created contexts current on std::threads | 2371 | // Enables the core to make the qt created contexts current on std::threads |
| 2374 | QCoreApplication::setAttribute(Qt::AA_DontCheckOpenGLContextThreadAffinity); | 2372 | QCoreApplication::setAttribute(Qt::AA_DontCheckOpenGLContextThreadAffinity); |
| 2373 | QCoreApplication::setAttribute(Qt::AA_ShareOpenGLContexts); | ||
| 2375 | QApplication app(argc, argv); | 2374 | QApplication app(argc, argv); |
| 2376 | 2375 | ||
| 2377 | // Qt changes the locale and causes issues in float conversion using std::to_string() when | 2376 | // Qt changes the locale and causes issues in float conversion using std::to_string() when |
diff --git a/src/yuzu/main.h b/src/yuzu/main.h index 8eba2172c..a67125567 100644 --- a/src/yuzu/main.h +++ b/src/yuzu/main.h | |||
| @@ -78,6 +78,9 @@ public: | |||
| 78 | 78 | ||
| 79 | std::unique_ptr<DiscordRPC::DiscordInterface> discord_rpc; | 79 | std::unique_ptr<DiscordRPC::DiscordInterface> discord_rpc; |
| 80 | 80 | ||
| 81 | bool DropAction(QDropEvent* event); | ||
| 82 | void AcceptDropEvent(QDropEvent* event); | ||
| 83 | |||
| 81 | signals: | 84 | signals: |
| 82 | 85 | ||
| 83 | /** | 86 | /** |
| @@ -264,8 +267,4 @@ protected: | |||
| 264 | void dropEvent(QDropEvent* event) override; | 267 | void dropEvent(QDropEvent* event) override; |
| 265 | void dragEnterEvent(QDragEnterEvent* event) override; | 268 | void dragEnterEvent(QDragEnterEvent* event) override; |
| 266 | void dragMoveEvent(QDragMoveEvent* event) override; | 269 | void dragMoveEvent(QDragMoveEvent* event) override; |
| 267 | |||
| 268 | // Overrides used to forward signals to the render window when the focus moves out. | ||
| 269 | void keyPressEvent(QKeyEvent* event) override; | ||
| 270 | void keyReleaseEvent(QKeyEvent* event) override; | ||
| 271 | }; | 270 | }; |
diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp index 96f1ce3af..f4cd905c9 100644 --- a/src/yuzu_cmd/config.cpp +++ b/src/yuzu_cmd/config.cpp | |||
| @@ -381,6 +381,8 @@ void Config::ReadValues() { | |||
| 381 | static_cast<float>(sdl2_config->GetReal("Renderer", "resolution_factor", 1.0)); | 381 | static_cast<float>(sdl2_config->GetReal("Renderer", "resolution_factor", 1.0)); |
| 382 | Settings::values.aspect_ratio = | 382 | Settings::values.aspect_ratio = |
| 383 | static_cast<int>(sdl2_config->GetInteger("Renderer", "aspect_ratio", 0)); | 383 | static_cast<int>(sdl2_config->GetInteger("Renderer", "aspect_ratio", 0)); |
| 384 | Settings::values.max_anisotropy = | ||
| 385 | static_cast<int>(sdl2_config->GetInteger("Renderer", "max_anisotropy", 0)); | ||
| 384 | Settings::values.use_frame_limit = sdl2_config->GetBoolean("Renderer", "use_frame_limit", true); | 386 | Settings::values.use_frame_limit = sdl2_config->GetBoolean("Renderer", "use_frame_limit", true); |
| 385 | Settings::values.frame_limit = | 387 | Settings::values.frame_limit = |
| 386 | static_cast<u16>(sdl2_config->GetInteger("Renderer", "frame_limit", 100)); | 388 | static_cast<u16>(sdl2_config->GetInteger("Renderer", "frame_limit", 100)); |
| @@ -390,6 +392,8 @@ void Config::ReadValues() { | |||
| 390 | sdl2_config->GetBoolean("Renderer", "use_accurate_gpu_emulation", false); | 392 | sdl2_config->GetBoolean("Renderer", "use_accurate_gpu_emulation", false); |
| 391 | Settings::values.use_asynchronous_gpu_emulation = | 393 | Settings::values.use_asynchronous_gpu_emulation = |
| 392 | sdl2_config->GetBoolean("Renderer", "use_asynchronous_gpu_emulation", false); | 394 | sdl2_config->GetBoolean("Renderer", "use_asynchronous_gpu_emulation", false); |
| 395 | Settings::values.use_vsync = | ||
| 396 | static_cast<u16>(sdl2_config->GetInteger("Renderer", "use_vsync", 1)); | ||
| 393 | 397 | ||
| 394 | Settings::values.bg_red = static_cast<float>(sdl2_config->GetReal("Renderer", "bg_red", 0.0)); | 398 | Settings::values.bg_red = static_cast<float>(sdl2_config->GetReal("Renderer", "bg_red", 0.0)); |
| 395 | Settings::values.bg_green = | 399 | Settings::values.bg_green = |
| @@ -448,7 +452,7 @@ void Config::ReadValues() { | |||
| 448 | Settings::values.yuzu_token = sdl2_config->Get("WebService", "yuzu_token", ""); | 452 | Settings::values.yuzu_token = sdl2_config->Get("WebService", "yuzu_token", ""); |
| 449 | 453 | ||
| 450 | // Services | 454 | // Services |
| 451 | Settings::values.bcat_backend = sdl2_config->Get("Services", "bcat_backend", "boxcat"); | 455 | Settings::values.bcat_backend = sdl2_config->Get("Services", "bcat_backend", "null"); |
| 452 | Settings::values.bcat_boxcat_local = | 456 | Settings::values.bcat_boxcat_local = |
| 453 | sdl2_config->GetBoolean("Services", "bcat_boxcat_local", false); | 457 | sdl2_config->GetBoolean("Services", "bcat_boxcat_local", false); |
| 454 | } | 458 | } |
diff --git a/src/yuzu_cmd/default_ini.h b/src/yuzu_cmd/default_ini.h index 8a2b658cd..d63d7a58e 100644 --- a/src/yuzu_cmd/default_ini.h +++ b/src/yuzu_cmd/default_ini.h | |||
| @@ -84,7 +84,7 @@ touch_device= | |||
| 84 | # from any cemuhook compatible motion program. | 84 | # from any cemuhook compatible motion program. |
| 85 | 85 | ||
| 86 | # IPv4 address of the udp input server (Default "127.0.0.1") | 86 | # IPv4 address of the udp input server (Default "127.0.0.1") |
| 87 | udp_input_address= | 87 | udp_input_address=127.0.0.1 |
| 88 | 88 | ||
| 89 | # Port of the udp input server. (Default 26760) | 89 | # Port of the udp input server. (Default 26760) |
| 90 | udp_input_port= | 90 | udp_input_port= |
| @@ -126,6 +126,10 @@ resolution_factor = | |||
| 126 | # 0: Default (16:9), 1: Force 4:3, 2: Force 21:9, 3: Stretch to Window | 126 | # 0: Default (16:9), 1: Force 4:3, 2: Force 21:9, 3: Stretch to Window |
| 127 | aspect_ratio = | 127 | aspect_ratio = |
| 128 | 128 | ||
| 129 | # Anisotropic filtering | ||
| 130 | # 0: Default, 1: 2x, 2: 4x, 3: 8x, 4: 16x | ||
| 131 | max_anisotropy = | ||
| 132 | |||
| 129 | # Whether to enable V-Sync (caps the framerate at 60FPS) or not. | 133 | # Whether to enable V-Sync (caps the framerate at 60FPS) or not. |
| 130 | # 0 (default): Off, 1: On | 134 | # 0 (default): Off, 1: On |
| 131 | use_vsync = | 135 | use_vsync = |
| @@ -150,6 +154,11 @@ use_accurate_gpu_emulation = | |||
| 150 | # 0 : Off (slow), 1 (default): On (fast) | 154 | # 0 : Off (slow), 1 (default): On (fast) |
| 151 | use_asynchronous_gpu_emulation = | 155 | use_asynchronous_gpu_emulation = |
| 152 | 156 | ||
| 157 | # Forces VSync on the display thread. Usually doesn't impact performance, but on some drivers it can | ||
| 158 | # so only turn this off if you notice a speed difference. | ||
| 159 | # 0: Off, 1 (default): On | ||
| 160 | use_vsync = | ||
| 161 | |||
| 153 | # The clear color for the renderer. What shows up on the sides of the bottom screen. | 162 | # The clear color for the renderer. What shows up on the sides of the bottom screen. |
| 154 | # Must be in range of 0.0-1.0. Defaults to 1.0 for all. | 163 | # Must be in range of 0.0-1.0. Defaults to 1.0 for all. |
| 155 | bg_red = | 164 | bg_red = |
diff --git a/src/yuzu_cmd/emu_window/emu_window_sdl2.cpp b/src/yuzu_cmd/emu_window/emu_window_sdl2.cpp index e96139885..19584360c 100644 --- a/src/yuzu_cmd/emu_window/emu_window_sdl2.cpp +++ b/src/yuzu_cmd/emu_window/emu_window_sdl2.cpp | |||
| @@ -13,7 +13,7 @@ | |||
| 13 | #include "input_common/sdl/sdl.h" | 13 | #include "input_common/sdl/sdl.h" |
| 14 | #include "yuzu_cmd/emu_window/emu_window_sdl2.h" | 14 | #include "yuzu_cmd/emu_window/emu_window_sdl2.h" |
| 15 | 15 | ||
| 16 | EmuWindow_SDL2::EmuWindow_SDL2(bool fullscreen) { | 16 | EmuWindow_SDL2::EmuWindow_SDL2(Core::System& system, bool fullscreen) : system{system} { |
| 17 | if (SDL_Init(SDL_INIT_VIDEO | SDL_INIT_JOYSTICK) < 0) { | 17 | if (SDL_Init(SDL_INIT_VIDEO | SDL_INIT_JOYSTICK) < 0) { |
| 18 | LOG_CRITICAL(Frontend, "Failed to initialize SDL2! Exiting..."); | 18 | LOG_CRITICAL(Frontend, "Failed to initialize SDL2! Exiting..."); |
| 19 | exit(1); | 19 | exit(1); |
diff --git a/src/yuzu_cmd/emu_window/emu_window_sdl2.h b/src/yuzu_cmd/emu_window/emu_window_sdl2.h index b38f56661..fffac4252 100644 --- a/src/yuzu_cmd/emu_window/emu_window_sdl2.h +++ b/src/yuzu_cmd/emu_window/emu_window_sdl2.h | |||
| @@ -10,9 +10,13 @@ | |||
| 10 | 10 | ||
| 11 | struct SDL_Window; | 11 | struct SDL_Window; |
| 12 | 12 | ||
| 13 | namespace Core { | ||
| 14 | class System; | ||
| 15 | } | ||
| 16 | |||
| 13 | class EmuWindow_SDL2 : public Core::Frontend::EmuWindow { | 17 | class EmuWindow_SDL2 : public Core::Frontend::EmuWindow { |
| 14 | public: | 18 | public: |
| 15 | explicit EmuWindow_SDL2(bool fullscreen); | 19 | explicit EmuWindow_SDL2(Core::System& system, bool fullscreen); |
| 16 | ~EmuWindow_SDL2(); | 20 | ~EmuWindow_SDL2(); |
| 17 | 21 | ||
| 18 | /// Polls window events | 22 | /// Polls window events |
| @@ -24,6 +28,9 @@ public: | |||
| 24 | /// Returns if window is shown (not minimized) | 28 | /// Returns if window is shown (not minimized) |
| 25 | bool IsShown() const override; | 29 | bool IsShown() const override; |
| 26 | 30 | ||
| 31 | /// Presents the next frame | ||
| 32 | virtual void Present() = 0; | ||
| 33 | |||
| 27 | protected: | 34 | protected: |
| 28 | /// Called by PollEvents when a key is pressed or released. | 35 | /// Called by PollEvents when a key is pressed or released. |
| 29 | void OnKeyEvent(int key, u8 state); | 36 | void OnKeyEvent(int key, u8 state); |
| @@ -55,6 +62,9 @@ protected: | |||
| 55 | /// Called when a configuration change affects the minimal size of the window | 62 | /// Called when a configuration change affects the minimal size of the window |
| 56 | void OnMinimalClientAreaChangeRequest(std::pair<unsigned, unsigned> minimal_size) override; | 63 | void OnMinimalClientAreaChangeRequest(std::pair<unsigned, unsigned> minimal_size) override; |
| 57 | 64 | ||
| 65 | /// Instance of the system, used to access renderer for the presentation thread | ||
| 66 | Core::System& system; | ||
| 67 | |||
| 58 | /// Is the window still open? | 68 | /// Is the window still open? |
| 59 | bool is_open = true; | 69 | bool is_open = true; |
| 60 | 70 | ||
| @@ -62,7 +72,7 @@ protected: | |||
| 62 | bool is_shown = true; | 72 | bool is_shown = true; |
| 63 | 73 | ||
| 64 | /// Internal SDL2 render window | 74 | /// Internal SDL2 render window |
| 65 | SDL_Window* render_window; | 75 | SDL_Window* render_window{}; |
| 66 | 76 | ||
| 67 | /// Keeps track of how often to update the title bar during gameplay | 77 | /// Keeps track of how often to update the title bar during gameplay |
| 68 | u32 last_time = 0; | 78 | u32 last_time = 0; |
diff --git a/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp b/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp index 7ffa0ac09..c0d373477 100644 --- a/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp +++ b/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp | |||
| @@ -13,24 +13,25 @@ | |||
| 13 | #include "common/logging/log.h" | 13 | #include "common/logging/log.h" |
| 14 | #include "common/scm_rev.h" | 14 | #include "common/scm_rev.h" |
| 15 | #include "common/string_util.h" | 15 | #include "common/string_util.h" |
| 16 | #include "core/core.h" | ||
| 16 | #include "core/settings.h" | 17 | #include "core/settings.h" |
| 17 | #include "input_common/keyboard.h" | 18 | #include "input_common/keyboard.h" |
| 18 | #include "input_common/main.h" | 19 | #include "input_common/main.h" |
| 19 | #include "input_common/motion_emu.h" | 20 | #include "input_common/motion_emu.h" |
| 21 | #include "video_core/renderer_base.h" | ||
| 20 | #include "yuzu_cmd/emu_window/emu_window_sdl2_gl.h" | 22 | #include "yuzu_cmd/emu_window/emu_window_sdl2_gl.h" |
| 21 | 23 | ||
| 22 | class SDLGLContext : public Core::Frontend::GraphicsContext { | 24 | class SDLGLContext : public Core::Frontend::GraphicsContext { |
| 23 | public: | 25 | public: |
| 24 | explicit SDLGLContext() { | 26 | explicit SDLGLContext() { |
| 25 | // create a hidden window to make the shared context against | 27 | // create a hidden window to make the shared context against |
| 26 | window = SDL_CreateWindow("", SDL_WINDOWPOS_UNDEFINED, // x position | 28 | window = SDL_CreateWindow(NULL, SDL_WINDOWPOS_UNDEFINED, SDL_WINDOWPOS_UNDEFINED, 0, 0, |
| 27 | SDL_WINDOWPOS_UNDEFINED, // y position | 29 | SDL_WINDOW_HIDDEN | SDL_WINDOW_OPENGL); |
| 28 | Layout::ScreenUndocked::Width, Layout::ScreenUndocked::Height, | ||
| 29 | SDL_WINDOW_OPENGL | SDL_WINDOW_HIDDEN); | ||
| 30 | context = SDL_GL_CreateContext(window); | 30 | context = SDL_GL_CreateContext(window); |
| 31 | } | 31 | } |
| 32 | 32 | ||
| 33 | ~SDLGLContext() { | 33 | ~SDLGLContext() { |
| 34 | DoneCurrent(); | ||
| 34 | SDL_GL_DeleteContext(context); | 35 | SDL_GL_DeleteContext(context); |
| 35 | SDL_DestroyWindow(window); | 36 | SDL_DestroyWindow(window); |
| 36 | } | 37 | } |
| @@ -43,8 +44,6 @@ public: | |||
| 43 | SDL_GL_MakeCurrent(window, nullptr); | 44 | SDL_GL_MakeCurrent(window, nullptr); |
| 44 | } | 45 | } |
| 45 | 46 | ||
| 46 | void SwapBuffers() override {} | ||
| 47 | |||
| 48 | private: | 47 | private: |
| 49 | SDL_Window* window; | 48 | SDL_Window* window; |
| 50 | SDL_GLContext context; | 49 | SDL_GLContext context; |
| @@ -80,7 +79,8 @@ bool EmuWindow_SDL2_GL::SupportsRequiredGLExtensions() { | |||
| 80 | return unsupported_ext.empty(); | 79 | return unsupported_ext.empty(); |
| 81 | } | 80 | } |
| 82 | 81 | ||
| 83 | EmuWindow_SDL2_GL::EmuWindow_SDL2_GL(bool fullscreen) : EmuWindow_SDL2(fullscreen) { | 82 | EmuWindow_SDL2_GL::EmuWindow_SDL2_GL(Core::System& system, bool fullscreen) |
| 83 | : EmuWindow_SDL2{system, fullscreen} { | ||
| 84 | SDL_GL_SetAttribute(SDL_GL_CONTEXT_MAJOR_VERSION, 4); | 84 | SDL_GL_SetAttribute(SDL_GL_CONTEXT_MAJOR_VERSION, 4); |
| 85 | SDL_GL_SetAttribute(SDL_GL_CONTEXT_MINOR_VERSION, 3); | 85 | SDL_GL_SetAttribute(SDL_GL_CONTEXT_MINOR_VERSION, 3); |
| 86 | SDL_GL_SetAttribute(SDL_GL_CONTEXT_PROFILE_MASK, SDL_GL_CONTEXT_PROFILE_COMPATIBILITY); | 86 | SDL_GL_SetAttribute(SDL_GL_CONTEXT_PROFILE_MASK, SDL_GL_CONTEXT_PROFILE_COMPATIBILITY); |
| @@ -90,6 +90,7 @@ EmuWindow_SDL2_GL::EmuWindow_SDL2_GL(bool fullscreen) : EmuWindow_SDL2(fullscree | |||
| 90 | SDL_GL_SetAttribute(SDL_GL_BLUE_SIZE, 8); | 90 | SDL_GL_SetAttribute(SDL_GL_BLUE_SIZE, 8); |
| 91 | SDL_GL_SetAttribute(SDL_GL_ALPHA_SIZE, 0); | 91 | SDL_GL_SetAttribute(SDL_GL_ALPHA_SIZE, 0); |
| 92 | SDL_GL_SetAttribute(SDL_GL_SHARE_WITH_CURRENT_CONTEXT, 1); | 92 | SDL_GL_SetAttribute(SDL_GL_SHARE_WITH_CURRENT_CONTEXT, 1); |
| 93 | SDL_GL_SetSwapInterval(0); | ||
| 93 | 94 | ||
| 94 | std::string window_title = fmt::format("yuzu {} | {}-{}", Common::g_build_fullname, | 95 | std::string window_title = fmt::format("yuzu {} | {}-{}", Common::g_build_fullname, |
| 95 | Common::g_scm_branch, Common::g_scm_desc); | 96 | Common::g_scm_branch, Common::g_scm_desc); |
| @@ -105,13 +106,22 @@ EmuWindow_SDL2_GL::EmuWindow_SDL2_GL(bool fullscreen) : EmuWindow_SDL2(fullscree | |||
| 105 | exit(1); | 106 | exit(1); |
| 106 | } | 107 | } |
| 107 | 108 | ||
| 109 | dummy_window = SDL_CreateWindow(NULL, SDL_WINDOWPOS_UNDEFINED, SDL_WINDOWPOS_UNDEFINED, 0, 0, | ||
| 110 | SDL_WINDOW_HIDDEN | SDL_WINDOW_OPENGL); | ||
| 111 | |||
| 108 | if (fullscreen) { | 112 | if (fullscreen) { |
| 109 | Fullscreen(); | 113 | Fullscreen(); |
| 110 | } | 114 | } |
| 111 | gl_context = SDL_GL_CreateContext(render_window); | ||
| 112 | 115 | ||
| 113 | if (gl_context == nullptr) { | 116 | window_context = SDL_GL_CreateContext(render_window); |
| 114 | LOG_CRITICAL(Frontend, "Failed to create SDL2 GL context! {}", SDL_GetError()); | 117 | core_context = CreateSharedContext(); |
| 118 | |||
| 119 | if (window_context == nullptr) { | ||
| 120 | LOG_CRITICAL(Frontend, "Failed to create SDL2 GL context: {}", SDL_GetError()); | ||
| 121 | exit(1); | ||
| 122 | } | ||
| 123 | if (core_context == nullptr) { | ||
| 124 | LOG_CRITICAL(Frontend, "Failed to create shared SDL2 GL context: {}", SDL_GetError()); | ||
| 115 | exit(1); | 125 | exit(1); |
| 116 | } | 126 | } |
| 117 | 127 | ||
| @@ -128,28 +138,22 @@ EmuWindow_SDL2_GL::EmuWindow_SDL2_GL(bool fullscreen) : EmuWindow_SDL2(fullscree | |||
| 128 | OnResize(); | 138 | OnResize(); |
| 129 | OnMinimalClientAreaChangeRequest(GetActiveConfig().min_client_area_size); | 139 | OnMinimalClientAreaChangeRequest(GetActiveConfig().min_client_area_size); |
| 130 | SDL_PumpEvents(); | 140 | SDL_PumpEvents(); |
| 131 | SDL_GL_SetSwapInterval(false); | ||
| 132 | LOG_INFO(Frontend, "yuzu Version: {} | {}-{}", Common::g_build_fullname, Common::g_scm_branch, | 141 | LOG_INFO(Frontend, "yuzu Version: {} | {}-{}", Common::g_build_fullname, Common::g_scm_branch, |
| 133 | Common::g_scm_desc); | 142 | Common::g_scm_desc); |
| 134 | Settings::LogSettings(); | 143 | Settings::LogSettings(); |
| 135 | |||
| 136 | DoneCurrent(); | ||
| 137 | } | 144 | } |
| 138 | 145 | ||
| 139 | EmuWindow_SDL2_GL::~EmuWindow_SDL2_GL() { | 146 | EmuWindow_SDL2_GL::~EmuWindow_SDL2_GL() { |
| 140 | SDL_GL_DeleteContext(gl_context); | 147 | core_context.reset(); |
| 141 | } | 148 | SDL_GL_DeleteContext(window_context); |
| 142 | |||
| 143 | void EmuWindow_SDL2_GL::SwapBuffers() { | ||
| 144 | SDL_GL_SwapWindow(render_window); | ||
| 145 | } | 149 | } |
| 146 | 150 | ||
| 147 | void EmuWindow_SDL2_GL::MakeCurrent() { | 151 | void EmuWindow_SDL2_GL::MakeCurrent() { |
| 148 | SDL_GL_MakeCurrent(render_window, gl_context); | 152 | core_context->MakeCurrent(); |
| 149 | } | 153 | } |
| 150 | 154 | ||
| 151 | void EmuWindow_SDL2_GL::DoneCurrent() { | 155 | void EmuWindow_SDL2_GL::DoneCurrent() { |
| 152 | SDL_GL_MakeCurrent(render_window, nullptr); | 156 | core_context->DoneCurrent(); |
| 153 | } | 157 | } |
| 154 | 158 | ||
| 155 | void EmuWindow_SDL2_GL::RetrieveVulkanHandlers(void* get_instance_proc_addr, void* instance, | 159 | void EmuWindow_SDL2_GL::RetrieveVulkanHandlers(void* get_instance_proc_addr, void* instance, |
| @@ -161,3 +165,13 @@ void EmuWindow_SDL2_GL::RetrieveVulkanHandlers(void* get_instance_proc_addr, voi | |||
| 161 | std::unique_ptr<Core::Frontend::GraphicsContext> EmuWindow_SDL2_GL::CreateSharedContext() const { | 165 | std::unique_ptr<Core::Frontend::GraphicsContext> EmuWindow_SDL2_GL::CreateSharedContext() const { |
| 162 | return std::make_unique<SDLGLContext>(); | 166 | return std::make_unique<SDLGLContext>(); |
| 163 | } | 167 | } |
| 168 | |||
| 169 | void EmuWindow_SDL2_GL::Present() { | ||
| 170 | SDL_GL_MakeCurrent(render_window, window_context); | ||
| 171 | SDL_GL_SetSwapInterval(Settings::values.use_vsync ? 1 : 0); | ||
| 172 | while (IsOpen()) { | ||
| 173 | system.Renderer().TryPresent(100); | ||
| 174 | SDL_GL_SwapWindow(render_window); | ||
| 175 | } | ||
| 176 | SDL_GL_MakeCurrent(render_window, nullptr); | ||
| 177 | } | ||
diff --git a/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.h b/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.h index c753085a8..b80669ff0 100644 --- a/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.h +++ b/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.h | |||
| @@ -10,17 +10,12 @@ | |||
| 10 | 10 | ||
| 11 | class EmuWindow_SDL2_GL final : public EmuWindow_SDL2 { | 11 | class EmuWindow_SDL2_GL final : public EmuWindow_SDL2 { |
| 12 | public: | 12 | public: |
| 13 | explicit EmuWindow_SDL2_GL(bool fullscreen); | 13 | explicit EmuWindow_SDL2_GL(Core::System& system, bool fullscreen); |
| 14 | ~EmuWindow_SDL2_GL(); | 14 | ~EmuWindow_SDL2_GL(); |
| 15 | 15 | ||
| 16 | /// Swap buffers to display the next frame | ||
| 17 | void SwapBuffers() override; | ||
| 18 | |||
| 19 | /// Makes the graphics context current for the caller thread | ||
| 20 | void MakeCurrent() override; | 16 | void MakeCurrent() override; |
| 21 | |||
| 22 | /// Releases the GL context from the caller thread | ||
| 23 | void DoneCurrent() override; | 17 | void DoneCurrent() override; |
| 18 | void Present() override; | ||
| 24 | 19 | ||
| 25 | /// Ignored in OpenGL | 20 | /// Ignored in OpenGL |
| 26 | void RetrieveVulkanHandlers(void* get_instance_proc_addr, void* instance, | 21 | void RetrieveVulkanHandlers(void* get_instance_proc_addr, void* instance, |
| @@ -29,10 +24,17 @@ public: | |||
| 29 | std::unique_ptr<Core::Frontend::GraphicsContext> CreateSharedContext() const override; | 24 | std::unique_ptr<Core::Frontend::GraphicsContext> CreateSharedContext() const override; |
| 30 | 25 | ||
| 31 | private: | 26 | private: |
| 27 | /// Fake hidden window for the core context | ||
| 28 | SDL_Window* dummy_window{}; | ||
| 29 | |||
| 32 | /// Whether the GPU and driver supports the OpenGL extension required | 30 | /// Whether the GPU and driver supports the OpenGL extension required |
| 33 | bool SupportsRequiredGLExtensions(); | 31 | bool SupportsRequiredGLExtensions(); |
| 34 | 32 | ||
| 35 | using SDL_GLContext = void*; | 33 | using SDL_GLContext = void*; |
| 34 | |||
| 36 | /// The OpenGL context associated with the window | 35 | /// The OpenGL context associated with the window |
| 37 | SDL_GLContext gl_context; | 36 | SDL_GLContext window_context; |
| 37 | |||
| 38 | /// The OpenGL context associated with the core | ||
| 39 | std::unique_ptr<Core::Frontend::GraphicsContext> core_context; | ||
| 38 | }; | 40 | }; |
diff --git a/src/yuzu_cmd/emu_window/emu_window_sdl2_vk.cpp b/src/yuzu_cmd/emu_window/emu_window_sdl2_vk.cpp index a203f0da9..abcc58165 100644 --- a/src/yuzu_cmd/emu_window/emu_window_sdl2_vk.cpp +++ b/src/yuzu_cmd/emu_window/emu_window_sdl2_vk.cpp | |||
| @@ -15,7 +15,8 @@ | |||
| 15 | #include "core/settings.h" | 15 | #include "core/settings.h" |
| 16 | #include "yuzu_cmd/emu_window/emu_window_sdl2_vk.h" | 16 | #include "yuzu_cmd/emu_window/emu_window_sdl2_vk.h" |
| 17 | 17 | ||
| 18 | EmuWindow_SDL2_VK::EmuWindow_SDL2_VK(bool fullscreen) : EmuWindow_SDL2(fullscreen) { | 18 | EmuWindow_SDL2_VK::EmuWindow_SDL2_VK(Core::System& system, bool fullscreen) |
| 19 | : EmuWindow_SDL2{system, fullscreen} { | ||
| 19 | if (SDL_Vulkan_LoadLibrary(nullptr) != 0) { | 20 | if (SDL_Vulkan_LoadLibrary(nullptr) != 0) { |
| 20 | LOG_CRITICAL(Frontend, "SDL failed to load the Vulkan library: {}", SDL_GetError()); | 21 | LOG_CRITICAL(Frontend, "SDL failed to load the Vulkan library: {}", SDL_GetError()); |
| 21 | exit(EXIT_FAILURE); | 22 | exit(EXIT_FAILURE); |
| @@ -110,8 +111,6 @@ EmuWindow_SDL2_VK::~EmuWindow_SDL2_VK() { | |||
| 110 | vkDestroyInstance(vk_instance, nullptr); | 111 | vkDestroyInstance(vk_instance, nullptr); |
| 111 | } | 112 | } |
| 112 | 113 | ||
| 113 | void EmuWindow_SDL2_VK::SwapBuffers() {} | ||
| 114 | |||
| 115 | void EmuWindow_SDL2_VK::MakeCurrent() { | 114 | void EmuWindow_SDL2_VK::MakeCurrent() { |
| 116 | // Unused on Vulkan | 115 | // Unused on Vulkan |
| 117 | } | 116 | } |
| @@ -160,3 +159,7 @@ bool EmuWindow_SDL2_VK::UseStandardLayers(PFN_vkGetInstanceProcAddr vkGetInstanc | |||
| 160 | return layer.layerName == std::string("VK_LAYER_LUNARG_standard_validation"); | 159 | return layer.layerName == std::string("VK_LAYER_LUNARG_standard_validation"); |
| 161 | }) != layers.end(); | 160 | }) != layers.end(); |
| 162 | } | 161 | } |
| 162 | |||
| 163 | void EmuWindow_SDL2_VK::Present() { | ||
| 164 | // TODO (bunnei): ImplementMe | ||
| 165 | } | ||
diff --git a/src/yuzu_cmd/emu_window/emu_window_sdl2_vk.h b/src/yuzu_cmd/emu_window/emu_window_sdl2_vk.h index 2a7c06a24..1eb8c0868 100644 --- a/src/yuzu_cmd/emu_window/emu_window_sdl2_vk.h +++ b/src/yuzu_cmd/emu_window/emu_window_sdl2_vk.h | |||
| @@ -10,19 +10,12 @@ | |||
| 10 | 10 | ||
| 11 | class EmuWindow_SDL2_VK final : public EmuWindow_SDL2 { | 11 | class EmuWindow_SDL2_VK final : public EmuWindow_SDL2 { |
| 12 | public: | 12 | public: |
| 13 | explicit EmuWindow_SDL2_VK(bool fullscreen); | 13 | explicit EmuWindow_SDL2_VK(Core::System& system, bool fullscreen); |
| 14 | ~EmuWindow_SDL2_VK(); | 14 | ~EmuWindow_SDL2_VK(); |
| 15 | 15 | ||
| 16 | /// Swap buffers to display the next frame | ||
| 17 | void SwapBuffers() override; | ||
| 18 | |||
| 19 | /// Makes the graphics context current for the caller thread | ||
| 20 | void MakeCurrent() override; | 16 | void MakeCurrent() override; |
| 21 | |||
| 22 | /// Releases the GL context from the caller thread | ||
| 23 | void DoneCurrent() override; | 17 | void DoneCurrent() override; |
| 24 | 18 | void Present() override; | |
| 25 | /// Retrieves Vulkan specific handlers from the window | ||
| 26 | void RetrieveVulkanHandlers(void* get_instance_proc_addr, void* instance, | 19 | void RetrieveVulkanHandlers(void* get_instance_proc_addr, void* instance, |
| 27 | void* surface) const override; | 20 | void* surface) const override; |
| 28 | 21 | ||
diff --git a/src/yuzu_cmd/yuzu.cpp b/src/yuzu_cmd/yuzu.cpp index 325795321..babf4c3a4 100644 --- a/src/yuzu_cmd/yuzu.cpp +++ b/src/yuzu_cmd/yuzu.cpp | |||
| @@ -177,14 +177,16 @@ int main(int argc, char** argv) { | |||
| 177 | Settings::values.use_gdbstub = use_gdbstub; | 177 | Settings::values.use_gdbstub = use_gdbstub; |
| 178 | Settings::Apply(); | 178 | Settings::Apply(); |
| 179 | 179 | ||
| 180 | Core::System& system{Core::System::GetInstance()}; | ||
| 181 | |||
| 180 | std::unique_ptr<EmuWindow_SDL2> emu_window; | 182 | std::unique_ptr<EmuWindow_SDL2> emu_window; |
| 181 | switch (Settings::values.renderer_backend) { | 183 | switch (Settings::values.renderer_backend) { |
| 182 | case Settings::RendererBackend::OpenGL: | 184 | case Settings::RendererBackend::OpenGL: |
| 183 | emu_window = std::make_unique<EmuWindow_SDL2_GL>(fullscreen); | 185 | emu_window = std::make_unique<EmuWindow_SDL2_GL>(system, fullscreen); |
| 184 | break; | 186 | break; |
| 185 | case Settings::RendererBackend::Vulkan: | 187 | case Settings::RendererBackend::Vulkan: |
| 186 | #ifdef HAS_VULKAN | 188 | #ifdef HAS_VULKAN |
| 187 | emu_window = std::make_unique<EmuWindow_SDL2_VK>(fullscreen); | 189 | emu_window = std::make_unique<EmuWindow_SDL2_VK>(system, fullscreen); |
| 188 | break; | 190 | break; |
| 189 | #else | 191 | #else |
| 190 | LOG_CRITICAL(Frontend, "Vulkan backend has not been compiled!"); | 192 | LOG_CRITICAL(Frontend, "Vulkan backend has not been compiled!"); |
| @@ -192,12 +194,6 @@ int main(int argc, char** argv) { | |||
| 192 | #endif | 194 | #endif |
| 193 | } | 195 | } |
| 194 | 196 | ||
| 195 | if (!Settings::values.use_multi_core) { | ||
| 196 | // Single core mode must acquire OpenGL context for entire emulation session | ||
| 197 | emu_window->MakeCurrent(); | ||
| 198 | } | ||
| 199 | |||
| 200 | Core::System& system{Core::System::GetInstance()}; | ||
| 201 | system.SetContentProvider(std::make_unique<FileSys::ContentProviderUnion>()); | 197 | system.SetContentProvider(std::make_unique<FileSys::ContentProviderUnion>()); |
| 202 | system.SetFilesystem(std::make_shared<FileSys::RealVfsFilesystem>()); | 198 | system.SetFilesystem(std::make_shared<FileSys::RealVfsFilesystem>()); |
| 203 | system.GetFileSystemController().CreateFactories(*system.GetFilesystem()); | 199 | system.GetFileSystemController().CreateFactories(*system.GetFilesystem()); |
| @@ -234,12 +230,23 @@ int main(int argc, char** argv) { | |||
| 234 | 230 | ||
| 235 | system.TelemetrySession().AddField(Telemetry::FieldType::App, "Frontend", "SDL"); | 231 | system.TelemetrySession().AddField(Telemetry::FieldType::App, "Frontend", "SDL"); |
| 236 | 232 | ||
| 237 | emu_window->MakeCurrent(); | ||
| 238 | system.Renderer().Rasterizer().LoadDiskResources(); | 233 | system.Renderer().Rasterizer().LoadDiskResources(); |
| 239 | 234 | ||
| 235 | // Acquire render context for duration of the thread if this is the rendering thread | ||
| 236 | if (!Settings::values.use_asynchronous_gpu_emulation) { | ||
| 237 | emu_window->MakeCurrent(); | ||
| 238 | } | ||
| 239 | SCOPE_EXIT({ | ||
| 240 | if (!Settings::values.use_asynchronous_gpu_emulation) { | ||
| 241 | emu_window->DoneCurrent(); | ||
| 242 | } | ||
| 243 | }); | ||
| 244 | |||
| 245 | std::thread render_thread([&emu_window] { emu_window->Present(); }); | ||
| 240 | while (emu_window->IsOpen()) { | 246 | while (emu_window->IsOpen()) { |
| 241 | system.RunLoop(); | 247 | system.RunLoop(); |
| 242 | } | 248 | } |
| 249 | render_thread.join(); | ||
| 243 | 250 | ||
| 244 | system.Shutdown(); | 251 | system.Shutdown(); |
| 245 | 252 | ||
diff --git a/src/yuzu_tester/config.cpp b/src/yuzu_tester/config.cpp index 0ac93b62a..ee2591c8f 100644 --- a/src/yuzu_tester/config.cpp +++ b/src/yuzu_tester/config.cpp | |||
| @@ -120,6 +120,8 @@ void Config::ReadValues() { | |||
| 120 | static_cast<float>(sdl2_config->GetReal("Renderer", "resolution_factor", 1.0)); | 120 | static_cast<float>(sdl2_config->GetReal("Renderer", "resolution_factor", 1.0)); |
| 121 | Settings::values.aspect_ratio = | 121 | Settings::values.aspect_ratio = |
| 122 | static_cast<int>(sdl2_config->GetInteger("Renderer", "aspect_ratio", 0)); | 122 | static_cast<int>(sdl2_config->GetInteger("Renderer", "aspect_ratio", 0)); |
| 123 | Settings::values.max_anisotropy = | ||
| 124 | static_cast<int>(sdl2_config->GetInteger("Renderer", "max_anisotropy", 0)); | ||
| 123 | Settings::values.use_frame_limit = false; | 125 | Settings::values.use_frame_limit = false; |
| 124 | Settings::values.frame_limit = 100; | 126 | Settings::values.frame_limit = 100; |
| 125 | Settings::values.use_disk_shader_cache = | 127 | Settings::values.use_disk_shader_cache = |
diff --git a/src/yuzu_tester/default_ini.h b/src/yuzu_tester/default_ini.h index 8d93f7b88..ca203b64d 100644 --- a/src/yuzu_tester/default_ini.h +++ b/src/yuzu_tester/default_ini.h | |||
| @@ -30,6 +30,10 @@ resolution_factor = | |||
| 30 | # 0: Default (16:9), 1: Force 4:3, 2: Force 21:9, 3: Stretch to Window | 30 | # 0: Default (16:9), 1: Force 4:3, 2: Force 21:9, 3: Stretch to Window |
| 31 | aspect_ratio = | 31 | aspect_ratio = |
| 32 | 32 | ||
| 33 | # Anisotropic filtering | ||
| 34 | # 0: Default, 1: 2x, 2: 4x, 3: 8x, 4: 16x | ||
| 35 | max_anisotropy = | ||
| 36 | |||
| 33 | # Whether to enable V-Sync (caps the framerate at 60FPS) or not. | 37 | # Whether to enable V-Sync (caps the framerate at 60FPS) or not. |
| 34 | # 0 (default): Off, 1: On | 38 | # 0 (default): Off, 1: On |
| 35 | use_vsync = | 39 | use_vsync = |
diff --git a/src/yuzu_tester/emu_window/emu_window_sdl2_hide.cpp b/src/yuzu_tester/emu_window/emu_window_sdl2_hide.cpp index f2cc4a797..a1bdb1a12 100644 --- a/src/yuzu_tester/emu_window/emu_window_sdl2_hide.cpp +++ b/src/yuzu_tester/emu_window/emu_window_sdl2_hide.cpp | |||
| @@ -112,10 +112,6 @@ EmuWindow_SDL2_Hide::~EmuWindow_SDL2_Hide() { | |||
| 112 | SDL_Quit(); | 112 | SDL_Quit(); |
| 113 | } | 113 | } |
| 114 | 114 | ||
| 115 | void EmuWindow_SDL2_Hide::SwapBuffers() { | ||
| 116 | SDL_GL_SwapWindow(render_window); | ||
| 117 | } | ||
| 118 | |||
| 119 | void EmuWindow_SDL2_Hide::PollEvents() {} | 115 | void EmuWindow_SDL2_Hide::PollEvents() {} |
| 120 | 116 | ||
| 121 | void EmuWindow_SDL2_Hide::MakeCurrent() { | 117 | void EmuWindow_SDL2_Hide::MakeCurrent() { |
diff --git a/src/yuzu_tester/emu_window/emu_window_sdl2_hide.h b/src/yuzu_tester/emu_window/emu_window_sdl2_hide.h index c7fccc002..b13e15309 100644 --- a/src/yuzu_tester/emu_window/emu_window_sdl2_hide.h +++ b/src/yuzu_tester/emu_window/emu_window_sdl2_hide.h | |||
| @@ -13,9 +13,6 @@ public: | |||
| 13 | explicit EmuWindow_SDL2_Hide(); | 13 | explicit EmuWindow_SDL2_Hide(); |
| 14 | ~EmuWindow_SDL2_Hide(); | 14 | ~EmuWindow_SDL2_Hide(); |
| 15 | 15 | ||
| 16 | /// Swap buffers to display the next frame | ||
| 17 | void SwapBuffers() override; | ||
| 18 | |||
| 19 | /// Polls window events | 16 | /// Polls window events |
| 20 | void PollEvents() override; | 17 | void PollEvents() override; |
| 21 | 18 | ||