diff options
Diffstat (limited to 'src')
300 files changed, 13639 insertions, 6513 deletions
diff --git a/src/audio_core/audio_renderer.cpp b/src/audio_core/audio_renderer.cpp index 9a0939883..da50a0bbc 100644 --- a/src/audio_core/audio_renderer.cpp +++ b/src/audio_core/audio_renderer.cpp | |||
| @@ -73,13 +73,15 @@ private: | |||
| 73 | EffectInStatus info{}; | 73 | EffectInStatus info{}; |
| 74 | }; | 74 | }; |
| 75 | AudioRenderer::AudioRenderer(Core::Timing::CoreTiming& core_timing, AudioRendererParameter params, | 75 | AudioRenderer::AudioRenderer(Core::Timing::CoreTiming& core_timing, AudioRendererParameter params, |
| 76 | Kernel::SharedPtr<Kernel::WritableEvent> buffer_event) | 76 | Kernel::SharedPtr<Kernel::WritableEvent> buffer_event, |
| 77 | std::size_t instance_number) | ||
| 77 | : worker_params{params}, buffer_event{buffer_event}, voices(params.voice_count), | 78 | : worker_params{params}, buffer_event{buffer_event}, voices(params.voice_count), |
| 78 | effects(params.effect_count) { | 79 | effects(params.effect_count) { |
| 79 | 80 | ||
| 80 | audio_out = std::make_unique<AudioCore::AudioOut>(); | 81 | audio_out = std::make_unique<AudioCore::AudioOut>(); |
| 81 | stream = audio_out->OpenStream(core_timing, STREAM_SAMPLE_RATE, STREAM_NUM_CHANNELS, | 82 | stream = audio_out->OpenStream(core_timing, STREAM_SAMPLE_RATE, STREAM_NUM_CHANNELS, |
| 82 | "AudioRenderer", [=]() { buffer_event->Signal(); }); | 83 | fmt::format("AudioRenderer-Instance{}", instance_number), |
| 84 | [=]() { buffer_event->Signal(); }); | ||
| 83 | audio_out->StartStream(stream); | 85 | audio_out->StartStream(stream); |
| 84 | 86 | ||
| 85 | QueueMixedBuffer(0); | 87 | QueueMixedBuffer(0); |
| @@ -217,13 +219,15 @@ std::vector<s16> AudioRenderer::VoiceState::DequeueSamples(std::size_t sample_co | |||
| 217 | if (offset == samples.size()) { | 219 | if (offset == samples.size()) { |
| 218 | offset = 0; | 220 | offset = 0; |
| 219 | 221 | ||
| 220 | if (!wave_buffer.is_looping) { | 222 | if (!wave_buffer.is_looping && wave_buffer.buffer_sz) { |
| 221 | SetWaveIndex(wave_index + 1); | 223 | SetWaveIndex(wave_index + 1); |
| 222 | } | 224 | } |
| 223 | 225 | ||
| 224 | out_status.wave_buffer_consumed++; | 226 | if (wave_buffer.buffer_sz) { |
| 227 | out_status.wave_buffer_consumed++; | ||
| 228 | } | ||
| 225 | 229 | ||
| 226 | if (wave_buffer.end_of_stream) { | 230 | if (wave_buffer.end_of_stream || wave_buffer.buffer_sz == 0) { |
| 227 | info.play_state = PlayState::Paused; | 231 | info.play_state = PlayState::Paused; |
| 228 | } | 232 | } |
| 229 | } | 233 | } |
diff --git a/src/audio_core/audio_renderer.h b/src/audio_core/audio_renderer.h index b2e5d336c..45afbe759 100644 --- a/src/audio_core/audio_renderer.h +++ b/src/audio_core/audio_renderer.h | |||
| @@ -215,7 +215,8 @@ static_assert(sizeof(UpdateDataHeader) == 0x40, "UpdateDataHeader has wrong size | |||
| 215 | class AudioRenderer { | 215 | class AudioRenderer { |
| 216 | public: | 216 | public: |
| 217 | AudioRenderer(Core::Timing::CoreTiming& core_timing, AudioRendererParameter params, | 217 | AudioRenderer(Core::Timing::CoreTiming& core_timing, AudioRendererParameter params, |
| 218 | Kernel::SharedPtr<Kernel::WritableEvent> buffer_event); | 218 | Kernel::SharedPtr<Kernel::WritableEvent> buffer_event, |
| 219 | std::size_t instance_number); | ||
| 219 | ~AudioRenderer(); | 220 | ~AudioRenderer(); |
| 220 | 221 | ||
| 221 | std::vector<u8> UpdateAudioRenderer(const std::vector<u8>& input_params); | 222 | std::vector<u8> UpdateAudioRenderer(const std::vector<u8>& input_params); |
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index 198b3fe07..01abdb3bb 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt | |||
| @@ -44,6 +44,7 @@ add_custom_command(OUTPUT scm_rev.cpp | |||
| 44 | "${VIDEO_CORE}/shader/decode/half_set.cpp" | 44 | "${VIDEO_CORE}/shader/decode/half_set.cpp" |
| 45 | "${VIDEO_CORE}/shader/decode/half_set_predicate.cpp" | 45 | "${VIDEO_CORE}/shader/decode/half_set_predicate.cpp" |
| 46 | "${VIDEO_CORE}/shader/decode/hfma2.cpp" | 46 | "${VIDEO_CORE}/shader/decode/hfma2.cpp" |
| 47 | "${VIDEO_CORE}/shader/decode/image.cpp" | ||
| 47 | "${VIDEO_CORE}/shader/decode/integer_set.cpp" | 48 | "${VIDEO_CORE}/shader/decode/integer_set.cpp" |
| 48 | "${VIDEO_CORE}/shader/decode/integer_set_predicate.cpp" | 49 | "${VIDEO_CORE}/shader/decode/integer_set_predicate.cpp" |
| 49 | "${VIDEO_CORE}/shader/decode/memory.cpp" | 50 | "${VIDEO_CORE}/shader/decode/memory.cpp" |
| @@ -54,7 +55,10 @@ add_custom_command(OUTPUT scm_rev.cpp | |||
| 54 | "${VIDEO_CORE}/shader/decode/register_set_predicate.cpp" | 55 | "${VIDEO_CORE}/shader/decode/register_set_predicate.cpp" |
| 55 | "${VIDEO_CORE}/shader/decode/shift.cpp" | 56 | "${VIDEO_CORE}/shader/decode/shift.cpp" |
| 56 | "${VIDEO_CORE}/shader/decode/video.cpp" | 57 | "${VIDEO_CORE}/shader/decode/video.cpp" |
| 58 | "${VIDEO_CORE}/shader/decode/warp.cpp" | ||
| 57 | "${VIDEO_CORE}/shader/decode/xmad.cpp" | 59 | "${VIDEO_CORE}/shader/decode/xmad.cpp" |
| 60 | "${VIDEO_CORE}/shader/control_flow.cpp" | ||
| 61 | "${VIDEO_CORE}/shader/control_flow.h" | ||
| 58 | "${VIDEO_CORE}/shader/decode.cpp" | 62 | "${VIDEO_CORE}/shader/decode.cpp" |
| 59 | "${VIDEO_CORE}/shader/node.h" | 63 | "${VIDEO_CORE}/shader/node.h" |
| 60 | "${VIDEO_CORE}/shader/node_helper.cpp" | 64 | "${VIDEO_CORE}/shader/node_helper.cpp" |
| @@ -74,6 +78,7 @@ add_library(common STATIC | |||
| 74 | assert.h | 78 | assert.h |
| 75 | detached_tasks.cpp | 79 | detached_tasks.cpp |
| 76 | detached_tasks.h | 80 | detached_tasks.h |
| 81 | binary_find.h | ||
| 77 | bit_field.h | 82 | bit_field.h |
| 78 | bit_util.h | 83 | bit_util.h |
| 79 | cityhash.cpp | 84 | cityhash.cpp |
diff --git a/src/common/alignment.h b/src/common/alignment.h index d94a2291f..88d5d3a65 100644 --- a/src/common/alignment.h +++ b/src/common/alignment.h | |||
| @@ -3,6 +3,7 @@ | |||
| 3 | #pragma once | 3 | #pragma once |
| 4 | 4 | ||
| 5 | #include <cstddef> | 5 | #include <cstddef> |
| 6 | #include <memory> | ||
| 6 | #include <type_traits> | 7 | #include <type_traits> |
| 7 | 8 | ||
| 8 | namespace Common { | 9 | namespace Common { |
| @@ -20,6 +21,12 @@ constexpr T AlignDown(T value, std::size_t size) { | |||
| 20 | } | 21 | } |
| 21 | 22 | ||
| 22 | template <typename T> | 23 | template <typename T> |
| 24 | constexpr T AlignBits(T value, std::size_t align) { | ||
| 25 | static_assert(std::is_unsigned_v<T>, "T must be an unsigned value."); | ||
| 26 | return static_cast<T>((value + ((1ULL << align) - 1)) >> align << align); | ||
| 27 | } | ||
| 28 | |||
| 29 | template <typename T> | ||
| 23 | constexpr bool Is4KBAligned(T value) { | 30 | constexpr bool Is4KBAligned(T value) { |
| 24 | static_assert(std::is_unsigned_v<T>, "T must be an unsigned value."); | 31 | static_assert(std::is_unsigned_v<T>, "T must be an unsigned value."); |
| 25 | return (value & 0xFFF) == 0; | 32 | return (value & 0xFFF) == 0; |
| @@ -31,4 +38,63 @@ constexpr bool IsWordAligned(T value) { | |||
| 31 | return (value & 0b11) == 0; | 38 | return (value & 0b11) == 0; |
| 32 | } | 39 | } |
| 33 | 40 | ||
| 41 | template <typename T, std::size_t Align = 16> | ||
| 42 | class AlignmentAllocator { | ||
| 43 | public: | ||
| 44 | using value_type = T; | ||
| 45 | using size_type = std::size_t; | ||
| 46 | using difference_type = std::ptrdiff_t; | ||
| 47 | |||
| 48 | using pointer = T*; | ||
| 49 | using const_pointer = const T*; | ||
| 50 | |||
| 51 | using reference = T&; | ||
| 52 | using const_reference = const T&; | ||
| 53 | |||
| 54 | public: | ||
| 55 | pointer address(reference r) noexcept { | ||
| 56 | return std::addressof(r); | ||
| 57 | } | ||
| 58 | |||
| 59 | const_pointer address(const_reference r) const noexcept { | ||
| 60 | return std::addressof(r); | ||
| 61 | } | ||
| 62 | |||
| 63 | pointer allocate(size_type n) { | ||
| 64 | return static_cast<pointer>(::operator new (n, std::align_val_t{Align})); | ||
| 65 | } | ||
| 66 | |||
| 67 | void deallocate(pointer p, size_type) { | ||
| 68 | ::operator delete (p, std::align_val_t{Align}); | ||
| 69 | } | ||
| 70 | |||
| 71 | void construct(pointer p, const value_type& wert) { | ||
| 72 | new (p) value_type(wert); | ||
| 73 | } | ||
| 74 | |||
| 75 | void destroy(pointer p) { | ||
| 76 | p->~value_type(); | ||
| 77 | } | ||
| 78 | |||
| 79 | size_type max_size() const noexcept { | ||
| 80 | return size_type(-1) / sizeof(value_type); | ||
| 81 | } | ||
| 82 | |||
| 83 | template <typename T2> | ||
| 84 | struct rebind { | ||
| 85 | using other = AlignmentAllocator<T2, Align>; | ||
| 86 | }; | ||
| 87 | |||
| 88 | bool operator!=(const AlignmentAllocator<T, Align>& other) const noexcept { | ||
| 89 | return !(*this == other); | ||
| 90 | } | ||
| 91 | |||
| 92 | // Returns true if and only if storage allocated from *this | ||
| 93 | // can be deallocated from other, and vice versa. | ||
| 94 | // Always returns true for stateless allocators. | ||
| 95 | bool operator==(const AlignmentAllocator<T, Align>& other) const noexcept { | ||
| 96 | return true; | ||
| 97 | } | ||
| 98 | }; | ||
| 99 | |||
| 34 | } // namespace Common | 100 | } // namespace Common |
diff --git a/src/common/binary_find.h b/src/common/binary_find.h new file mode 100644 index 000000000..5cc523bf9 --- /dev/null +++ b/src/common/binary_find.h | |||
| @@ -0,0 +1,21 @@ | |||
| 1 | // Copyright 2019 yuzu emulator team | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <algorithm> | ||
| 8 | |||
| 9 | namespace Common { | ||
| 10 | |||
| 11 | template <class ForwardIt, class T, class Compare = std::less<>> | ||
| 12 | ForwardIt BinaryFind(ForwardIt first, ForwardIt last, const T& value, Compare comp = {}) { | ||
| 13 | // Note: BOTH type T and the type after ForwardIt is dereferenced | ||
| 14 | // must be implicitly convertible to BOTH Type1 and Type2, used in Compare. | ||
| 15 | // This is stricter than lower_bound requirement (see above) | ||
| 16 | |||
| 17 | first = std::lower_bound(first, last, value, comp); | ||
| 18 | return first != last && !comp(value, *first) ? first : last; | ||
| 19 | } | ||
| 20 | |||
| 21 | } // namespace Common | ||
diff --git a/src/common/bit_util.h b/src/common/bit_util.h index d032df413..6f7d5a947 100644 --- a/src/common/bit_util.h +++ b/src/common/bit_util.h | |||
| @@ -97,4 +97,48 @@ inline u32 CountTrailingZeroes64(u64 value) { | |||
| 97 | } | 97 | } |
| 98 | #endif | 98 | #endif |
| 99 | 99 | ||
| 100 | #ifdef _MSC_VER | ||
| 101 | |||
| 102 | inline u32 MostSignificantBit32(const u32 value) { | ||
| 103 | unsigned long result; | ||
| 104 | _BitScanReverse(&result, value); | ||
| 105 | return static_cast<u32>(result); | ||
| 106 | } | ||
| 107 | |||
| 108 | inline u32 MostSignificantBit64(const u64 value) { | ||
| 109 | unsigned long result; | ||
| 110 | _BitScanReverse64(&result, value); | ||
| 111 | return static_cast<u32>(result); | ||
| 112 | } | ||
| 113 | |||
| 114 | #else | ||
| 115 | |||
| 116 | inline u32 MostSignificantBit32(const u32 value) { | ||
| 117 | return 31U - static_cast<u32>(__builtin_clz(value)); | ||
| 118 | } | ||
| 119 | |||
| 120 | inline u32 MostSignificantBit64(const u64 value) { | ||
| 121 | return 63U - static_cast<u32>(__builtin_clzll(value)); | ||
| 122 | } | ||
| 123 | |||
| 124 | #endif | ||
| 125 | |||
| 126 | inline u32 Log2Floor32(const u32 value) { | ||
| 127 | return MostSignificantBit32(value); | ||
| 128 | } | ||
| 129 | |||
| 130 | inline u32 Log2Ceil32(const u32 value) { | ||
| 131 | const u32 log2_f = Log2Floor32(value); | ||
| 132 | return log2_f + ((value ^ (1U << log2_f)) != 0U); | ||
| 133 | } | ||
| 134 | |||
| 135 | inline u32 Log2Floor64(const u64 value) { | ||
| 136 | return MostSignificantBit64(value); | ||
| 137 | } | ||
| 138 | |||
| 139 | inline u32 Log2Ceil64(const u64 value) { | ||
| 140 | const u64 log2_f = static_cast<u64>(Log2Floor64(value)); | ||
| 141 | return static_cast<u32>(log2_f + ((value ^ (1ULL << log2_f)) != 0ULL)); | ||
| 142 | } | ||
| 143 | |||
| 100 | } // namespace Common | 144 | } // namespace Common |
diff --git a/src/common/common_funcs.h b/src/common/common_funcs.h index 8b0d34da6..04ecac959 100644 --- a/src/common/common_funcs.h +++ b/src/common/common_funcs.h | |||
| @@ -4,6 +4,7 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <algorithm> | ||
| 7 | #include <string> | 8 | #include <string> |
| 8 | 9 | ||
| 9 | #if !defined(ARCHITECTURE_x86_64) | 10 | #if !defined(ARCHITECTURE_x86_64) |
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index cb77b99ee..877a9e353 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt | |||
| @@ -70,6 +70,8 @@ add_library(core STATIC | |||
| 70 | file_sys/sdmc_factory.h | 70 | file_sys/sdmc_factory.h |
| 71 | file_sys/submission_package.cpp | 71 | file_sys/submission_package.cpp |
| 72 | file_sys/submission_package.h | 72 | file_sys/submission_package.h |
| 73 | file_sys/system_archive/mii_model.cpp | ||
| 74 | file_sys/system_archive/mii_model.h | ||
| 73 | file_sys/system_archive/ng_word.cpp | 75 | file_sys/system_archive/ng_word.cpp |
| 74 | file_sys/system_archive/ng_word.h | 76 | file_sys/system_archive/ng_word.h |
| 75 | file_sys/system_archive/system_archive.cpp | 77 | file_sys/system_archive/system_archive.cpp |
| @@ -111,6 +113,8 @@ add_library(core STATIC | |||
| 111 | frontend/scope_acquire_window_context.h | 113 | frontend/scope_acquire_window_context.h |
| 112 | gdbstub/gdbstub.cpp | 114 | gdbstub/gdbstub.cpp |
| 113 | gdbstub/gdbstub.h | 115 | gdbstub/gdbstub.h |
| 116 | hardware_interrupt_manager.cpp | ||
| 117 | hardware_interrupt_manager.h | ||
| 114 | hle/ipc.h | 118 | hle/ipc.h |
| 115 | hle/ipc_helpers.h | 119 | hle/ipc_helpers.h |
| 116 | hle/kernel/address_arbiter.cpp | 120 | hle/kernel/address_arbiter.cpp |
| @@ -175,6 +179,7 @@ add_library(core STATIC | |||
| 175 | hle/service/acc/acc_u0.h | 179 | hle/service/acc/acc_u0.h |
| 176 | hle/service/acc/acc_u1.cpp | 180 | hle/service/acc/acc_u1.cpp |
| 177 | hle/service/acc/acc_u1.h | 181 | hle/service/acc/acc_u1.h |
| 182 | hle/service/acc/errors.h | ||
| 178 | hle/service/acc/profile_manager.cpp | 183 | hle/service/acc/profile_manager.cpp |
| 179 | hle/service/acc/profile_manager.h | 184 | hle/service/acc/profile_manager.h |
| 180 | hle/service/am/am.cpp | 185 | hle/service/am/am.cpp |
| @@ -207,6 +212,8 @@ add_library(core STATIC | |||
| 207 | hle/service/aoc/aoc_u.h | 212 | hle/service/aoc/aoc_u.h |
| 208 | hle/service/apm/apm.cpp | 213 | hle/service/apm/apm.cpp |
| 209 | hle/service/apm/apm.h | 214 | hle/service/apm/apm.h |
| 215 | hle/service/apm/controller.cpp | ||
| 216 | hle/service/apm/controller.h | ||
| 210 | hle/service/apm/interface.cpp | 217 | hle/service/apm/interface.cpp |
| 211 | hle/service/apm/interface.h | 218 | hle/service/apm/interface.h |
| 212 | hle/service/audio/audctl.cpp | 219 | hle/service/audio/audctl.cpp |
| @@ -270,6 +277,7 @@ add_library(core STATIC | |||
| 270 | hle/service/filesystem/fsp_srv.h | 277 | hle/service/filesystem/fsp_srv.h |
| 271 | hle/service/fgm/fgm.cpp | 278 | hle/service/fgm/fgm.cpp |
| 272 | hle/service/fgm/fgm.h | 279 | hle/service/fgm/fgm.h |
| 280 | hle/service/friend/errors.h | ||
| 273 | hle/service/friend/friend.cpp | 281 | hle/service/friend/friend.cpp |
| 274 | hle/service/friend/friend.h | 282 | hle/service/friend/friend.h |
| 275 | hle/service/friend/interface.cpp | 283 | hle/service/friend/interface.cpp |
| @@ -291,6 +299,7 @@ add_library(core STATIC | |||
| 291 | hle/service/hid/irs.h | 299 | hle/service/hid/irs.h |
| 292 | hle/service/hid/xcd.cpp | 300 | hle/service/hid/xcd.cpp |
| 293 | hle/service/hid/xcd.h | 301 | hle/service/hid/xcd.h |
| 302 | hle/service/hid/errors.h | ||
| 294 | hle/service/hid/controllers/controller_base.cpp | 303 | hle/service/hid/controllers/controller_base.cpp |
| 295 | hle/service/hid/controllers/controller_base.h | 304 | hle/service/hid/controllers/controller_base.h |
| 296 | hle/service/hid/controllers/debug_pad.cpp | 305 | hle/service/hid/controllers/debug_pad.cpp |
| @@ -367,6 +376,7 @@ add_library(core STATIC | |||
| 367 | hle/service/nvdrv/devices/nvmap.h | 376 | hle/service/nvdrv/devices/nvmap.h |
| 368 | hle/service/nvdrv/interface.cpp | 377 | hle/service/nvdrv/interface.cpp |
| 369 | hle/service/nvdrv/interface.h | 378 | hle/service/nvdrv/interface.h |
| 379 | hle/service/nvdrv/nvdata.h | ||
| 370 | hle/service/nvdrv/nvdrv.cpp | 380 | hle/service/nvdrv/nvdrv.cpp |
| 371 | hle/service/nvdrv/nvdrv.h | 381 | hle/service/nvdrv/nvdrv.h |
| 372 | hle/service/nvdrv/nvmemp.cpp | 382 | hle/service/nvdrv/nvmemp.cpp |
| @@ -429,6 +439,8 @@ add_library(core STATIC | |||
| 429 | hle/service/time/interface.h | 439 | hle/service/time/interface.h |
| 430 | hle/service/time/time.cpp | 440 | hle/service/time/time.cpp |
| 431 | hle/service/time/time.h | 441 | hle/service/time/time.h |
| 442 | hle/service/time/time_sharedmemory.cpp | ||
| 443 | hle/service/time/time_sharedmemory.h | ||
| 432 | hle/service/usb/usb.cpp | 444 | hle/service/usb/usb.cpp |
| 433 | hle/service/usb/usb.h | 445 | hle/service/usb/usb.h |
| 434 | hle/service/vi/display/vi_display.cpp | 446 | hle/service/vi/display/vi_display.cpp |
diff --git a/src/core/arm/arm_interface.h b/src/core/arm/arm_interface.h index c6691a8e1..45e94e625 100644 --- a/src/core/arm/arm_interface.h +++ b/src/core/arm/arm_interface.h | |||
| @@ -44,13 +44,6 @@ public: | |||
| 44 | /// Step CPU by one instruction | 44 | /// Step CPU by one instruction |
| 45 | virtual void Step() = 0; | 45 | virtual void Step() = 0; |
| 46 | 46 | ||
| 47 | /// Maps a backing memory region for the CPU | ||
| 48 | virtual void MapBackingMemory(VAddr address, std::size_t size, u8* memory, | ||
| 49 | Kernel::VMAPermission perms) = 0; | ||
| 50 | |||
| 51 | /// Unmaps a region of memory that was previously mapped using MapBackingMemory | ||
| 52 | virtual void UnmapMemory(VAddr address, std::size_t size) = 0; | ||
| 53 | |||
| 54 | /// Clear all instruction cache | 47 | /// Clear all instruction cache |
| 55 | virtual void ClearInstructionCache() = 0; | 48 | virtual void ClearInstructionCache() = 0; |
| 56 | 49 | ||
diff --git a/src/core/arm/dynarmic/arm_dynarmic.cpp b/src/core/arm/dynarmic/arm_dynarmic.cpp index 44307fa19..f1506b372 100644 --- a/src/core/arm/dynarmic/arm_dynarmic.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic.cpp | |||
| @@ -177,15 +177,6 @@ ARM_Dynarmic::ARM_Dynarmic(System& system, ExclusiveMonitor& exclusive_monitor, | |||
| 177 | 177 | ||
| 178 | ARM_Dynarmic::~ARM_Dynarmic() = default; | 178 | ARM_Dynarmic::~ARM_Dynarmic() = default; |
| 179 | 179 | ||
| 180 | void ARM_Dynarmic::MapBackingMemory(u64 address, std::size_t size, u8* memory, | ||
| 181 | Kernel::VMAPermission perms) { | ||
| 182 | inner_unicorn.MapBackingMemory(address, size, memory, perms); | ||
| 183 | } | ||
| 184 | |||
| 185 | void ARM_Dynarmic::UnmapMemory(u64 address, std::size_t size) { | ||
| 186 | inner_unicorn.UnmapMemory(address, size); | ||
| 187 | } | ||
| 188 | |||
| 189 | void ARM_Dynarmic::SetPC(u64 pc) { | 180 | void ARM_Dynarmic::SetPC(u64 pc) { |
| 190 | jit->SetPC(pc); | 181 | jit->SetPC(pc); |
| 191 | } | 182 | } |
diff --git a/src/core/arm/dynarmic/arm_dynarmic.h b/src/core/arm/dynarmic/arm_dynarmic.h index b701e97a3..504d46c68 100644 --- a/src/core/arm/dynarmic/arm_dynarmic.h +++ b/src/core/arm/dynarmic/arm_dynarmic.h | |||
| @@ -23,9 +23,6 @@ public: | |||
| 23 | ARM_Dynarmic(System& system, ExclusiveMonitor& exclusive_monitor, std::size_t core_index); | 23 | ARM_Dynarmic(System& system, ExclusiveMonitor& exclusive_monitor, std::size_t core_index); |
| 24 | ~ARM_Dynarmic() override; | 24 | ~ARM_Dynarmic() override; |
| 25 | 25 | ||
| 26 | void MapBackingMemory(VAddr address, std::size_t size, u8* memory, | ||
| 27 | Kernel::VMAPermission perms) override; | ||
| 28 | void UnmapMemory(u64 address, std::size_t size) override; | ||
| 29 | void SetPC(u64 pc) override; | 26 | void SetPC(u64 pc) override; |
| 30 | u64 GetPC() const override; | 27 | u64 GetPC() const override; |
| 31 | u64 GetReg(int index) const override; | 28 | u64 GetReg(int index) const override; |
diff --git a/src/core/arm/unicorn/arm_unicorn.cpp b/src/core/arm/unicorn/arm_unicorn.cpp index 4e07fe8b5..97d5c2a8a 100644 --- a/src/core/arm/unicorn/arm_unicorn.cpp +++ b/src/core/arm/unicorn/arm_unicorn.cpp | |||
| @@ -50,11 +50,14 @@ static void CodeHook(uc_engine* uc, uint64_t address, uint32_t size, void* user_ | |||
| 50 | 50 | ||
| 51 | static bool UnmappedMemoryHook(uc_engine* uc, uc_mem_type type, u64 addr, int size, u64 value, | 51 | static bool UnmappedMemoryHook(uc_engine* uc, uc_mem_type type, u64 addr, int size, u64 value, |
| 52 | void* user_data) { | 52 | void* user_data) { |
| 53 | auto* const system = static_cast<System*>(user_data); | ||
| 54 | |||
| 53 | ARM_Interface::ThreadContext ctx{}; | 55 | ARM_Interface::ThreadContext ctx{}; |
| 54 | Core::CurrentArmInterface().SaveContext(ctx); | 56 | system->CurrentArmInterface().SaveContext(ctx); |
| 55 | ASSERT_MSG(false, "Attempted to read from unmapped memory: 0x{:X}, pc=0x{:X}, lr=0x{:X}", addr, | 57 | ASSERT_MSG(false, "Attempted to read from unmapped memory: 0x{:X}, pc=0x{:X}, lr=0x{:X}", addr, |
| 56 | ctx.pc, ctx.cpu_registers[30]); | 58 | ctx.pc, ctx.cpu_registers[30]); |
| 57 | return {}; | 59 | |
| 60 | return false; | ||
| 58 | } | 61 | } |
| 59 | 62 | ||
| 60 | ARM_Unicorn::ARM_Unicorn(System& system) : system{system} { | 63 | ARM_Unicorn::ARM_Unicorn(System& system) : system{system} { |
| @@ -65,7 +68,7 @@ ARM_Unicorn::ARM_Unicorn(System& system) : system{system} { | |||
| 65 | 68 | ||
| 66 | uc_hook hook{}; | 69 | uc_hook hook{}; |
| 67 | CHECKED(uc_hook_add(uc, &hook, UC_HOOK_INTR, (void*)InterruptHook, this, 0, -1)); | 70 | CHECKED(uc_hook_add(uc, &hook, UC_HOOK_INTR, (void*)InterruptHook, this, 0, -1)); |
| 68 | CHECKED(uc_hook_add(uc, &hook, UC_HOOK_MEM_INVALID, (void*)UnmappedMemoryHook, this, 0, -1)); | 71 | CHECKED(uc_hook_add(uc, &hook, UC_HOOK_MEM_INVALID, (void*)UnmappedMemoryHook, &system, 0, -1)); |
| 69 | if (GDBStub::IsServerEnabled()) { | 72 | if (GDBStub::IsServerEnabled()) { |
| 70 | CHECKED(uc_hook_add(uc, &hook, UC_HOOK_CODE, (void*)CodeHook, this, 0, -1)); | 73 | CHECKED(uc_hook_add(uc, &hook, UC_HOOK_CODE, (void*)CodeHook, this, 0, -1)); |
| 71 | last_bkpt_hit = false; | 74 | last_bkpt_hit = false; |
| @@ -76,15 +79,6 @@ ARM_Unicorn::~ARM_Unicorn() { | |||
| 76 | CHECKED(uc_close(uc)); | 79 | CHECKED(uc_close(uc)); |
| 77 | } | 80 | } |
| 78 | 81 | ||
| 79 | void ARM_Unicorn::MapBackingMemory(VAddr address, std::size_t size, u8* memory, | ||
| 80 | Kernel::VMAPermission perms) { | ||
| 81 | CHECKED(uc_mem_map_ptr(uc, address, size, static_cast<u32>(perms), memory)); | ||
| 82 | } | ||
| 83 | |||
| 84 | void ARM_Unicorn::UnmapMemory(VAddr address, std::size_t size) { | ||
| 85 | CHECKED(uc_mem_unmap(uc, address, size)); | ||
| 86 | } | ||
| 87 | |||
| 88 | void ARM_Unicorn::SetPC(u64 pc) { | 82 | void ARM_Unicorn::SetPC(u64 pc) { |
| 89 | CHECKED(uc_reg_write(uc, UC_ARM64_REG_PC, &pc)); | 83 | CHECKED(uc_reg_write(uc, UC_ARM64_REG_PC, &pc)); |
| 90 | } | 84 | } |
diff --git a/src/core/arm/unicorn/arm_unicorn.h b/src/core/arm/unicorn/arm_unicorn.h index 34e974b4d..fe2ffd70c 100644 --- a/src/core/arm/unicorn/arm_unicorn.h +++ b/src/core/arm/unicorn/arm_unicorn.h | |||
| @@ -18,9 +18,6 @@ public: | |||
| 18 | explicit ARM_Unicorn(System& system); | 18 | explicit ARM_Unicorn(System& system); |
| 19 | ~ARM_Unicorn() override; | 19 | ~ARM_Unicorn() override; |
| 20 | 20 | ||
| 21 | void MapBackingMemory(VAddr address, std::size_t size, u8* memory, | ||
| 22 | Kernel::VMAPermission perms) override; | ||
| 23 | void UnmapMemory(VAddr address, std::size_t size) override; | ||
| 24 | void SetPC(u64 pc) override; | 21 | void SetPC(u64 pc) override; |
| 25 | u64 GetPC() const override; | 22 | u64 GetPC() const override; |
| 26 | u64 GetReg(int index) const override; | 23 | u64 GetReg(int index) const override; |
diff --git a/src/core/core.cpp b/src/core/core.cpp index 262411db8..3d0978cbf 100644 --- a/src/core/core.cpp +++ b/src/core/core.cpp | |||
| @@ -19,12 +19,14 @@ | |||
| 19 | #include "core/file_sys/vfs_concat.h" | 19 | #include "core/file_sys/vfs_concat.h" |
| 20 | #include "core/file_sys/vfs_real.h" | 20 | #include "core/file_sys/vfs_real.h" |
| 21 | #include "core/gdbstub/gdbstub.h" | 21 | #include "core/gdbstub/gdbstub.h" |
| 22 | #include "core/hardware_interrupt_manager.h" | ||
| 22 | #include "core/hle/kernel/client_port.h" | 23 | #include "core/hle/kernel/client_port.h" |
| 23 | #include "core/hle/kernel/kernel.h" | 24 | #include "core/hle/kernel/kernel.h" |
| 24 | #include "core/hle/kernel/process.h" | 25 | #include "core/hle/kernel/process.h" |
| 25 | #include "core/hle/kernel/scheduler.h" | 26 | #include "core/hle/kernel/scheduler.h" |
| 26 | #include "core/hle/kernel/thread.h" | 27 | #include "core/hle/kernel/thread.h" |
| 27 | #include "core/hle/service/am/applets/applets.h" | 28 | #include "core/hle/service/am/applets/applets.h" |
| 29 | #include "core/hle/service/apm/controller.h" | ||
| 28 | #include "core/hle/service/glue/manager.h" | 30 | #include "core/hle/service/glue/manager.h" |
| 29 | #include "core/hle/service/service.h" | 31 | #include "core/hle/service/service.h" |
| 30 | #include "core/hle/service/sm/sm.h" | 32 | #include "core/hle/service/sm/sm.h" |
| @@ -102,7 +104,8 @@ FileSys::VirtualFile GetGameFileFromPath(const FileSys::VirtualFilesystem& vfs, | |||
| 102 | return vfs->OpenFile(path, FileSys::Mode::Read); | 104 | return vfs->OpenFile(path, FileSys::Mode::Read); |
| 103 | } | 105 | } |
| 104 | struct System::Impl { | 106 | struct System::Impl { |
| 105 | explicit Impl(System& system) : kernel{system}, cpu_core_manager{system}, reporter{system} {} | 107 | explicit Impl(System& system) |
| 108 | : kernel{system}, cpu_core_manager{system}, applet_manager{system}, reporter{system} {} | ||
| 106 | 109 | ||
| 107 | Cpu& CurrentCpuCore() { | 110 | Cpu& CurrentCpuCore() { |
| 108 | return cpu_core_manager.GetCurrentCore(); | 111 | return cpu_core_manager.GetCurrentCore(); |
| @@ -143,14 +146,14 @@ struct System::Impl { | |||
| 143 | telemetry_session = std::make_unique<Core::TelemetrySession>(); | 146 | telemetry_session = std::make_unique<Core::TelemetrySession>(); |
| 144 | service_manager = std::make_shared<Service::SM::ServiceManager>(); | 147 | service_manager = std::make_shared<Service::SM::ServiceManager>(); |
| 145 | 148 | ||
| 146 | Service::Init(service_manager, system, *virtual_filesystem); | 149 | Service::Init(service_manager, system); |
| 147 | GDBStub::Init(); | 150 | GDBStub::Init(); |
| 148 | 151 | ||
| 149 | renderer = VideoCore::CreateRenderer(emu_window, system); | 152 | renderer = VideoCore::CreateRenderer(emu_window, system); |
| 150 | if (!renderer->Init()) { | 153 | if (!renderer->Init()) { |
| 151 | return ResultStatus::ErrorVideoCore; | 154 | return ResultStatus::ErrorVideoCore; |
| 152 | } | 155 | } |
| 153 | 156 | interrupt_manager = std::make_unique<Core::Hardware::InterruptManager>(system); | |
| 154 | gpu_core = VideoCore::CreateGPU(system); | 157 | gpu_core = VideoCore::CreateGPU(system); |
| 155 | 158 | ||
| 156 | is_powered_on = true; | 159 | is_powered_on = true; |
| @@ -297,6 +300,7 @@ struct System::Impl { | |||
| 297 | std::unique_ptr<VideoCore::RendererBase> renderer; | 300 | std::unique_ptr<VideoCore::RendererBase> renderer; |
| 298 | std::unique_ptr<Tegra::GPU> gpu_core; | 301 | std::unique_ptr<Tegra::GPU> gpu_core; |
| 299 | std::shared_ptr<Tegra::DebugContext> debug_context; | 302 | std::shared_ptr<Tegra::DebugContext> debug_context; |
| 303 | std::unique_ptr<Core::Hardware::InterruptManager> interrupt_manager; | ||
| 300 | CpuCoreManager cpu_core_manager; | 304 | CpuCoreManager cpu_core_manager; |
| 301 | bool is_powered_on = false; | 305 | bool is_powered_on = false; |
| 302 | 306 | ||
| @@ -306,6 +310,9 @@ struct System::Impl { | |||
| 306 | /// Frontend applets | 310 | /// Frontend applets |
| 307 | Service::AM::Applets::AppletManager applet_manager; | 311 | Service::AM::Applets::AppletManager applet_manager; |
| 308 | 312 | ||
| 313 | /// APM (Performance) services | ||
| 314 | Service::APM::Controller apm_controller{core_timing}; | ||
| 315 | |||
| 309 | /// Glue services | 316 | /// Glue services |
| 310 | Service::Glue::ARPManager arp_manager; | 317 | Service::Glue::ARPManager arp_manager; |
| 311 | 318 | ||
| @@ -440,6 +447,14 @@ const Tegra::GPU& System::GPU() const { | |||
| 440 | return *impl->gpu_core; | 447 | return *impl->gpu_core; |
| 441 | } | 448 | } |
| 442 | 449 | ||
| 450 | Core::Hardware::InterruptManager& System::InterruptManager() { | ||
| 451 | return *impl->interrupt_manager; | ||
| 452 | } | ||
| 453 | |||
| 454 | const Core::Hardware::InterruptManager& System::InterruptManager() const { | ||
| 455 | return *impl->interrupt_manager; | ||
| 456 | } | ||
| 457 | |||
| 443 | VideoCore::RendererBase& System::Renderer() { | 458 | VideoCore::RendererBase& System::Renderer() { |
| 444 | return *impl->renderer; | 459 | return *impl->renderer; |
| 445 | } | 460 | } |
| @@ -568,6 +583,14 @@ const Service::Glue::ARPManager& System::GetARPManager() const { | |||
| 568 | return impl->arp_manager; | 583 | return impl->arp_manager; |
| 569 | } | 584 | } |
| 570 | 585 | ||
| 586 | Service::APM::Controller& System::GetAPMController() { | ||
| 587 | return impl->apm_controller; | ||
| 588 | } | ||
| 589 | |||
| 590 | const Service::APM::Controller& System::GetAPMController() const { | ||
| 591 | return impl->apm_controller; | ||
| 592 | } | ||
| 593 | |||
| 571 | System::ResultStatus System::Init(Frontend::EmuWindow& emu_window) { | 594 | System::ResultStatus System::Init(Frontend::EmuWindow& emu_window) { |
| 572 | return impl->Init(*this, emu_window); | 595 | return impl->Init(*this, emu_window); |
| 573 | } | 596 | } |
diff --git a/src/core/core.h b/src/core/core.h index 70adb7af9..0138d93b0 100644 --- a/src/core/core.h +++ b/src/core/core.h | |||
| @@ -43,6 +43,10 @@ struct AppletFrontendSet; | |||
| 43 | class AppletManager; | 43 | class AppletManager; |
| 44 | } // namespace AM::Applets | 44 | } // namespace AM::Applets |
| 45 | 45 | ||
| 46 | namespace APM { | ||
| 47 | class Controller; | ||
| 48 | } | ||
| 49 | |||
| 46 | namespace Glue { | 50 | namespace Glue { |
| 47 | class ARPManager; | 51 | class ARPManager; |
| 48 | } | 52 | } |
| @@ -66,6 +70,10 @@ namespace Core::Timing { | |||
| 66 | class CoreTiming; | 70 | class CoreTiming; |
| 67 | } | 71 | } |
| 68 | 72 | ||
| 73 | namespace Core::Hardware { | ||
| 74 | class InterruptManager; | ||
| 75 | } | ||
| 76 | |||
| 69 | namespace Core { | 77 | namespace Core { |
| 70 | 78 | ||
| 71 | class ARM_Interface; | 79 | class ARM_Interface; |
| @@ -230,6 +238,12 @@ public: | |||
| 230 | /// Provides a constant reference to the core timing instance. | 238 | /// Provides a constant reference to the core timing instance. |
| 231 | const Timing::CoreTiming& CoreTiming() const; | 239 | const Timing::CoreTiming& CoreTiming() const; |
| 232 | 240 | ||
| 241 | /// Provides a reference to the interrupt manager instance. | ||
| 242 | Core::Hardware::InterruptManager& InterruptManager(); | ||
| 243 | |||
| 244 | /// Provides a constant reference to the interrupt manager instance. | ||
| 245 | const Core::Hardware::InterruptManager& InterruptManager() const; | ||
| 246 | |||
| 233 | /// Provides a reference to the kernel instance. | 247 | /// Provides a reference to the kernel instance. |
| 234 | Kernel::KernelCore& Kernel(); | 248 | Kernel::KernelCore& Kernel(); |
| 235 | 249 | ||
| @@ -296,6 +310,10 @@ public: | |||
| 296 | 310 | ||
| 297 | const Service::Glue::ARPManager& GetARPManager() const; | 311 | const Service::Glue::ARPManager& GetARPManager() const; |
| 298 | 312 | ||
| 313 | Service::APM::Controller& GetAPMController(); | ||
| 314 | |||
| 315 | const Service::APM::Controller& GetAPMController() const; | ||
| 316 | |||
| 299 | private: | 317 | private: |
| 300 | System(); | 318 | System(); |
| 301 | 319 | ||
| @@ -319,10 +337,6 @@ private: | |||
| 319 | static System s_instance; | 337 | static System s_instance; |
| 320 | }; | 338 | }; |
| 321 | 339 | ||
| 322 | inline ARM_Interface& CurrentArmInterface() { | ||
| 323 | return System::GetInstance().CurrentArmInterface(); | ||
| 324 | } | ||
| 325 | |||
| 326 | inline Kernel::Process* CurrentProcess() { | 340 | inline Kernel::Process* CurrentProcess() { |
| 327 | return System::GetInstance().CurrentProcess(); | 341 | return System::GetInstance().CurrentProcess(); |
| 328 | } | 342 | } |
diff --git a/src/core/core_cpu.cpp b/src/core/core_cpu.cpp index ba63c3e61..21c410e34 100644 --- a/src/core/core_cpu.cpp +++ b/src/core/core_cpu.cpp | |||
| @@ -53,16 +53,12 @@ bool CpuBarrier::Rendezvous() { | |||
| 53 | Cpu::Cpu(System& system, ExclusiveMonitor& exclusive_monitor, CpuBarrier& cpu_barrier, | 53 | Cpu::Cpu(System& system, ExclusiveMonitor& exclusive_monitor, CpuBarrier& cpu_barrier, |
| 54 | std::size_t core_index) | 54 | std::size_t core_index) |
| 55 | : cpu_barrier{cpu_barrier}, core_timing{system.CoreTiming()}, core_index{core_index} { | 55 | : cpu_barrier{cpu_barrier}, core_timing{system.CoreTiming()}, core_index{core_index} { |
| 56 | if (Settings::values.use_cpu_jit) { | ||
| 57 | #ifdef ARCHITECTURE_x86_64 | 56 | #ifdef ARCHITECTURE_x86_64 |
| 58 | arm_interface = std::make_unique<ARM_Dynarmic>(system, exclusive_monitor, core_index); | 57 | arm_interface = std::make_unique<ARM_Dynarmic>(system, exclusive_monitor, core_index); |
| 59 | #else | 58 | #else |
| 60 | arm_interface = std::make_unique<ARM_Unicorn>(system); | 59 | arm_interface = std::make_unique<ARM_Unicorn>(system); |
| 61 | LOG_WARNING(Core, "CPU JIT requested, but Dynarmic not available"); | 60 | LOG_WARNING(Core, "CPU JIT requested, but Dynarmic not available"); |
| 62 | #endif | 61 | #endif |
| 63 | } else { | ||
| 64 | arm_interface = std::make_unique<ARM_Unicorn>(system); | ||
| 65 | } | ||
| 66 | 62 | ||
| 67 | scheduler = std::make_unique<Kernel::Scheduler>(system, *arm_interface); | 63 | scheduler = std::make_unique<Kernel::Scheduler>(system, *arm_interface); |
| 68 | } | 64 | } |
| @@ -70,15 +66,12 @@ Cpu::Cpu(System& system, ExclusiveMonitor& exclusive_monitor, CpuBarrier& cpu_ba | |||
| 70 | Cpu::~Cpu() = default; | 66 | Cpu::~Cpu() = default; |
| 71 | 67 | ||
| 72 | std::unique_ptr<ExclusiveMonitor> Cpu::MakeExclusiveMonitor(std::size_t num_cores) { | 68 | std::unique_ptr<ExclusiveMonitor> Cpu::MakeExclusiveMonitor(std::size_t num_cores) { |
| 73 | if (Settings::values.use_cpu_jit) { | ||
| 74 | #ifdef ARCHITECTURE_x86_64 | 69 | #ifdef ARCHITECTURE_x86_64 |
| 75 | return std::make_unique<DynarmicExclusiveMonitor>(num_cores); | 70 | return std::make_unique<DynarmicExclusiveMonitor>(num_cores); |
| 76 | #else | 71 | #else |
| 77 | return nullptr; // TODO(merry): Passthrough exclusive monitor | 72 | // TODO(merry): Passthrough exclusive monitor |
| 73 | return nullptr; | ||
| 78 | #endif | 74 | #endif |
| 79 | } else { | ||
| 80 | return nullptr; // TODO(merry): Passthrough exclusive monitor | ||
| 81 | } | ||
| 82 | } | 75 | } |
| 83 | 76 | ||
| 84 | void Cpu::RunLoop(bool tight_loop) { | 77 | void Cpu::RunLoop(bool tight_loop) { |
diff --git a/src/core/crypto/key_manager.cpp b/src/core/crypto/key_manager.cpp index 6dd633363..46aceec3d 100644 --- a/src/core/crypto/key_manager.cpp +++ b/src/core/crypto/key_manager.cpp | |||
| @@ -37,6 +37,7 @@ | |||
| 37 | namespace Core::Crypto { | 37 | namespace Core::Crypto { |
| 38 | 38 | ||
| 39 | constexpr u64 CURRENT_CRYPTO_REVISION = 0x5; | 39 | constexpr u64 CURRENT_CRYPTO_REVISION = 0x5; |
| 40 | constexpr u64 FULL_TICKET_SIZE = 0x400; | ||
| 40 | 41 | ||
| 41 | using namespace Common; | 42 | using namespace Common; |
| 42 | 43 | ||
| @@ -55,6 +56,99 @@ const std::map<std::pair<S128KeyType, u64>, std::string> KEYS_VARIABLE_LENGTH{ | |||
| 55 | {{S128KeyType::KeyblobMAC, 0}, "keyblob_mac_key_"}, | 56 | {{S128KeyType::KeyblobMAC, 0}, "keyblob_mac_key_"}, |
| 56 | }; | 57 | }; |
| 57 | 58 | ||
| 59 | namespace { | ||
| 60 | template <std::size_t Size> | ||
| 61 | bool IsAllZeroArray(const std::array<u8, Size>& array) { | ||
| 62 | return std::all_of(array.begin(), array.end(), [](const auto& elem) { return elem == 0; }); | ||
| 63 | } | ||
| 64 | } // namespace | ||
| 65 | |||
| 66 | u64 GetSignatureTypeDataSize(SignatureType type) { | ||
| 67 | switch (type) { | ||
| 68 | case SignatureType::RSA_4096_SHA1: | ||
| 69 | case SignatureType::RSA_4096_SHA256: | ||
| 70 | return 0x200; | ||
| 71 | case SignatureType::RSA_2048_SHA1: | ||
| 72 | case SignatureType::RSA_2048_SHA256: | ||
| 73 | return 0x100; | ||
| 74 | case SignatureType::ECDSA_SHA1: | ||
| 75 | case SignatureType::ECDSA_SHA256: | ||
| 76 | return 0x3C; | ||
| 77 | } | ||
| 78 | UNREACHABLE(); | ||
| 79 | } | ||
| 80 | |||
| 81 | u64 GetSignatureTypePaddingSize(SignatureType type) { | ||
| 82 | switch (type) { | ||
| 83 | case SignatureType::RSA_4096_SHA1: | ||
| 84 | case SignatureType::RSA_4096_SHA256: | ||
| 85 | case SignatureType::RSA_2048_SHA1: | ||
| 86 | case SignatureType::RSA_2048_SHA256: | ||
| 87 | return 0x3C; | ||
| 88 | case SignatureType::ECDSA_SHA1: | ||
| 89 | case SignatureType::ECDSA_SHA256: | ||
| 90 | return 0x40; | ||
| 91 | } | ||
| 92 | UNREACHABLE(); | ||
| 93 | } | ||
| 94 | |||
| 95 | SignatureType Ticket::GetSignatureType() const { | ||
| 96 | if (auto ticket = std::get_if<RSA4096Ticket>(&data)) { | ||
| 97 | return ticket->sig_type; | ||
| 98 | } | ||
| 99 | if (auto ticket = std::get_if<RSA2048Ticket>(&data)) { | ||
| 100 | return ticket->sig_type; | ||
| 101 | } | ||
| 102 | if (auto ticket = std::get_if<ECDSATicket>(&data)) { | ||
| 103 | return ticket->sig_type; | ||
| 104 | } | ||
| 105 | |||
| 106 | UNREACHABLE(); | ||
| 107 | } | ||
| 108 | |||
| 109 | TicketData& Ticket::GetData() { | ||
| 110 | if (auto ticket = std::get_if<RSA4096Ticket>(&data)) { | ||
| 111 | return ticket->data; | ||
| 112 | } | ||
| 113 | if (auto ticket = std::get_if<RSA2048Ticket>(&data)) { | ||
| 114 | return ticket->data; | ||
| 115 | } | ||
| 116 | if (auto ticket = std::get_if<ECDSATicket>(&data)) { | ||
| 117 | return ticket->data; | ||
| 118 | } | ||
| 119 | |||
| 120 | UNREACHABLE(); | ||
| 121 | } | ||
| 122 | |||
| 123 | const TicketData& Ticket::GetData() const { | ||
| 124 | if (auto ticket = std::get_if<RSA4096Ticket>(&data)) { | ||
| 125 | return ticket->data; | ||
| 126 | } | ||
| 127 | if (auto ticket = std::get_if<RSA2048Ticket>(&data)) { | ||
| 128 | return ticket->data; | ||
| 129 | } | ||
| 130 | if (auto ticket = std::get_if<ECDSATicket>(&data)) { | ||
| 131 | return ticket->data; | ||
| 132 | } | ||
| 133 | |||
| 134 | UNREACHABLE(); | ||
| 135 | } | ||
| 136 | |||
| 137 | u64 Ticket::GetSize() const { | ||
| 138 | const auto sig_type = GetSignatureType(); | ||
| 139 | |||
| 140 | return sizeof(SignatureType) + GetSignatureTypeDataSize(sig_type) + | ||
| 141 | GetSignatureTypePaddingSize(sig_type) + sizeof(TicketData); | ||
| 142 | } | ||
| 143 | |||
| 144 | Ticket Ticket::SynthesizeCommon(Key128 title_key, const std::array<u8, 16>& rights_id) { | ||
| 145 | RSA2048Ticket out{}; | ||
| 146 | out.sig_type = SignatureType::RSA_2048_SHA256; | ||
| 147 | out.data.rights_id = rights_id; | ||
| 148 | out.data.title_key_common = title_key; | ||
| 149 | return Ticket{out}; | ||
| 150 | } | ||
| 151 | |||
| 58 | Key128 GenerateKeyEncryptionKey(Key128 source, Key128 master, Key128 kek_seed, Key128 key_seed) { | 152 | Key128 GenerateKeyEncryptionKey(Key128 source, Key128 master, Key128 kek_seed, Key128 key_seed) { |
| 59 | Key128 out{}; | 153 | Key128 out{}; |
| 60 | 154 | ||
| @@ -135,6 +229,27 @@ void KeyManager::DeriveGeneralPurposeKeys(std::size_t crypto_revision) { | |||
| 135 | } | 229 | } |
| 136 | } | 230 | } |
| 137 | 231 | ||
| 232 | RSAKeyPair<2048> KeyManager::GetETicketRSAKey() const { | ||
| 233 | if (IsAllZeroArray(eticket_extended_kek) || !HasKey(S128KeyType::ETicketRSAKek)) | ||
| 234 | return {}; | ||
| 235 | |||
| 236 | const auto eticket_final = GetKey(S128KeyType::ETicketRSAKek); | ||
| 237 | |||
| 238 | std::vector<u8> extended_iv(eticket_extended_kek.begin(), eticket_extended_kek.begin() + 0x10); | ||
| 239 | std::array<u8, 0x230> extended_dec{}; | ||
| 240 | AESCipher<Key128> rsa_1(eticket_final, Mode::CTR); | ||
| 241 | rsa_1.SetIV(extended_iv); | ||
| 242 | rsa_1.Transcode(eticket_extended_kek.data() + 0x10, eticket_extended_kek.size() - 0x10, | ||
| 243 | extended_dec.data(), Op::Decrypt); | ||
| 244 | |||
| 245 | RSAKeyPair<2048> rsa_key{}; | ||
| 246 | std::memcpy(rsa_key.decryption_key.data(), extended_dec.data(), rsa_key.decryption_key.size()); | ||
| 247 | std::memcpy(rsa_key.modulus.data(), extended_dec.data() + 0x100, rsa_key.modulus.size()); | ||
| 248 | std::memcpy(rsa_key.exponent.data(), extended_dec.data() + 0x200, rsa_key.exponent.size()); | ||
| 249 | |||
| 250 | return rsa_key; | ||
| 251 | } | ||
| 252 | |||
| 138 | Key128 DeriveKeyblobMACKey(const Key128& keyblob_key, const Key128& mac_source) { | 253 | Key128 DeriveKeyblobMACKey(const Key128& keyblob_key, const Key128& mac_source) { |
| 139 | AESCipher<Key128> mac_cipher(keyblob_key, Mode::ECB); | 254 | AESCipher<Key128> mac_cipher(keyblob_key, Mode::ECB); |
| 140 | Key128 mac_key{}; | 255 | Key128 mac_key{}; |
| @@ -237,7 +352,7 @@ Loader::ResultStatus DeriveSDKeys(std::array<Key256, 2>& sd_keys, KeyManager& ke | |||
| 237 | return Loader::ResultStatus::Success; | 352 | return Loader::ResultStatus::Success; |
| 238 | } | 353 | } |
| 239 | 354 | ||
| 240 | std::vector<TicketRaw> GetTicketblob(const FileUtil::IOFile& ticket_save) { | 355 | std::vector<Ticket> GetTicketblob(const FileUtil::IOFile& ticket_save) { |
| 241 | if (!ticket_save.IsOpen()) | 356 | if (!ticket_save.IsOpen()) |
| 242 | return {}; | 357 | return {}; |
| 243 | 358 | ||
| @@ -246,14 +361,14 @@ std::vector<TicketRaw> GetTicketblob(const FileUtil::IOFile& ticket_save) { | |||
| 246 | return {}; | 361 | return {}; |
| 247 | } | 362 | } |
| 248 | 363 | ||
| 249 | std::vector<TicketRaw> out; | 364 | std::vector<Ticket> out; |
| 250 | for (std::size_t offset = 0; offset + 0x4 < buffer.size(); ++offset) { | 365 | for (std::size_t offset = 0; offset + 0x4 < buffer.size(); ++offset) { |
| 251 | if (buffer[offset] == 0x4 && buffer[offset + 1] == 0x0 && buffer[offset + 2] == 0x1 && | 366 | if (buffer[offset] == 0x4 && buffer[offset + 1] == 0x0 && buffer[offset + 2] == 0x1 && |
| 252 | buffer[offset + 3] == 0x0) { | 367 | buffer[offset + 3] == 0x0) { |
| 253 | out.emplace_back(); | 368 | out.emplace_back(); |
| 254 | auto& next = out.back(); | 369 | auto& next = out.back(); |
| 255 | std::memcpy(&next, buffer.data() + offset, sizeof(TicketRaw)); | 370 | std::memcpy(&next, buffer.data() + offset, sizeof(Ticket)); |
| 256 | offset += next.size(); | 371 | offset += FULL_TICKET_SIZE; |
| 257 | } | 372 | } |
| 258 | } | 373 | } |
| 259 | 374 | ||
| @@ -305,29 +420,23 @@ static std::optional<u64> FindTicketOffset(const std::array<u8, size>& data) { | |||
| 305 | return offset; | 420 | return offset; |
| 306 | } | 421 | } |
| 307 | 422 | ||
| 308 | std::optional<std::pair<Key128, Key128>> ParseTicket(const TicketRaw& ticket, | 423 | std::optional<std::pair<Key128, Key128>> ParseTicket(const Ticket& ticket, |
| 309 | const RSAKeyPair<2048>& key) { | 424 | const RSAKeyPair<2048>& key) { |
| 310 | u32 cert_authority; | 425 | const auto issuer = ticket.GetData().issuer; |
| 311 | std::memcpy(&cert_authority, ticket.data() + 0x140, sizeof(cert_authority)); | 426 | if (issuer == std::array<u8, 0x40>{}) |
| 312 | if (cert_authority == 0) | ||
| 313 | return {}; | 427 | return {}; |
| 314 | if (cert_authority != Common::MakeMagic('R', 'o', 'o', 't')) { | 428 | if (issuer[0] != 'R' || issuer[1] != 'o' || issuer[2] != 'o' || issuer[3] != 't') { |
| 315 | LOG_INFO(Crypto, | 429 | LOG_INFO(Crypto, "Attempting to parse ticket with non-standard certificate authority."); |
| 316 | "Attempting to parse ticket with non-standard certificate authority {:08X}.", | ||
| 317 | cert_authority); | ||
| 318 | } | 430 | } |
| 319 | 431 | ||
| 320 | Key128 rights_id; | 432 | Key128 rights_id = ticket.GetData().rights_id; |
| 321 | std::memcpy(rights_id.data(), ticket.data() + 0x2A0, sizeof(Key128)); | ||
| 322 | 433 | ||
| 323 | if (rights_id == Key128{}) | 434 | if (rights_id == Key128{}) |
| 324 | return {}; | 435 | return {}; |
| 325 | 436 | ||
| 326 | Key128 key_temp{}; | 437 | if (!std::any_of(ticket.GetData().title_key_common_pad.begin(), |
| 327 | 438 | ticket.GetData().title_key_common_pad.end(), [](u8 b) { return b != 0; })) { | |
| 328 | if (!std::any_of(ticket.begin() + 0x190, ticket.begin() + 0x280, [](u8 b) { return b != 0; })) { | 439 | return std::make_pair(rights_id, ticket.GetData().title_key_common); |
| 329 | std::memcpy(key_temp.data(), ticket.data() + 0x180, key_temp.size()); | ||
| 330 | return std::make_pair(rights_id, key_temp); | ||
| 331 | } | 440 | } |
| 332 | 441 | ||
| 333 | mbedtls_mpi D; // RSA Private Exponent | 442 | mbedtls_mpi D; // RSA Private Exponent |
| @@ -342,7 +451,7 @@ std::optional<std::pair<Key128, Key128>> ParseTicket(const TicketRaw& ticket, | |||
| 342 | 451 | ||
| 343 | mbedtls_mpi_read_binary(&D, key.decryption_key.data(), key.decryption_key.size()); | 452 | mbedtls_mpi_read_binary(&D, key.decryption_key.data(), key.decryption_key.size()); |
| 344 | mbedtls_mpi_read_binary(&N, key.modulus.data(), key.modulus.size()); | 453 | mbedtls_mpi_read_binary(&N, key.modulus.data(), key.modulus.size()); |
| 345 | mbedtls_mpi_read_binary(&S, ticket.data() + 0x180, 0x100); | 454 | mbedtls_mpi_read_binary(&S, ticket.GetData().title_key_block.data(), 0x100); |
| 346 | 455 | ||
| 347 | mbedtls_mpi_exp_mod(&M, &S, &D, &N, nullptr); | 456 | mbedtls_mpi_exp_mod(&M, &S, &D, &N, nullptr); |
| 348 | 457 | ||
| @@ -366,6 +475,7 @@ std::optional<std::pair<Key128, Key128>> ParseTicket(const TicketRaw& ticket, | |||
| 366 | return {}; | 475 | return {}; |
| 367 | ASSERT(*offset > 0); | 476 | ASSERT(*offset > 0); |
| 368 | 477 | ||
| 478 | Key128 key_temp{}; | ||
| 369 | std::memcpy(key_temp.data(), m_2.data() + *offset, key_temp.size()); | 479 | std::memcpy(key_temp.data(), m_2.data() + *offset, key_temp.size()); |
| 370 | 480 | ||
| 371 | return std::make_pair(rights_id, key_temp); | 481 | return std::make_pair(rights_id, key_temp); |
| @@ -450,6 +560,8 @@ void KeyManager::LoadFromFile(const std::string& filename, bool is_title_keys) { | |||
| 450 | 560 | ||
| 451 | const auto index = std::stoul(out[0].substr(18, 2), nullptr, 16); | 561 | const auto index = std::stoul(out[0].substr(18, 2), nullptr, 16); |
| 452 | encrypted_keyblobs[index] = Common::HexStringToArray<0xB0>(out[1]); | 562 | encrypted_keyblobs[index] = Common::HexStringToArray<0xB0>(out[1]); |
| 563 | } else if (out[0].compare(0, 20, "eticket_extended_kek") == 0) { | ||
| 564 | eticket_extended_kek = Common::HexStringToArray<576>(out[1]); | ||
| 453 | } else { | 565 | } else { |
| 454 | for (const auto& kv : KEYS_VARIABLE_LENGTH) { | 566 | for (const auto& kv : KEYS_VARIABLE_LENGTH) { |
| 455 | if (!ValidCryptoRevisionString(out[0], kv.second.size(), 2)) | 567 | if (!ValidCryptoRevisionString(out[0], kv.second.size(), 2)) |
| @@ -862,20 +974,19 @@ void KeyManager::DeriveETicket(PartitionDataManager& data) { | |||
| 862 | // Titlekeys | 974 | // Titlekeys |
| 863 | data.DecryptProdInfo(GetBISKey(0)); | 975 | data.DecryptProdInfo(GetBISKey(0)); |
| 864 | 976 | ||
| 865 | const auto eticket_extended_kek = data.GetETicketExtendedKek(); | 977 | eticket_extended_kek = data.GetETicketExtendedKek(); |
| 978 | WriteKeyToFile(KeyCategory::Console, "eticket_extended_kek", eticket_extended_kek); | ||
| 979 | PopulateTickets(); | ||
| 980 | } | ||
| 866 | 981 | ||
| 867 | std::vector<u8> extended_iv(0x10); | 982 | void KeyManager::PopulateTickets() { |
| 868 | std::memcpy(extended_iv.data(), eticket_extended_kek.data(), extended_iv.size()); | 983 | const auto rsa_key = GetETicketRSAKey(); |
| 869 | std::array<u8, 0x230> extended_dec{}; | ||
| 870 | AESCipher<Key128> rsa_1(eticket_final, Mode::CTR); | ||
| 871 | rsa_1.SetIV(extended_iv); | ||
| 872 | rsa_1.Transcode(eticket_extended_kek.data() + 0x10, eticket_extended_kek.size() - 0x10, | ||
| 873 | extended_dec.data(), Op::Decrypt); | ||
| 874 | 984 | ||
| 875 | RSAKeyPair<2048> rsa_key{}; | 985 | if (rsa_key == RSAKeyPair<2048>{}) |
| 876 | std::memcpy(rsa_key.decryption_key.data(), extended_dec.data(), rsa_key.decryption_key.size()); | 986 | return; |
| 877 | std::memcpy(rsa_key.modulus.data(), extended_dec.data() + 0x100, rsa_key.modulus.size()); | 987 | |
| 878 | std::memcpy(rsa_key.exponent.data(), extended_dec.data() + 0x200, rsa_key.exponent.size()); | 988 | if (!common_tickets.empty() && !personal_tickets.empty()) |
| 989 | return; | ||
| 879 | 990 | ||
| 880 | const FileUtil::IOFile save1(FileUtil::GetUserPath(FileUtil::UserPath::NANDDir) + | 991 | const FileUtil::IOFile save1(FileUtil::GetUserPath(FileUtil::UserPath::NANDDir) + |
| 881 | "/system/save/80000000000000e1", | 992 | "/system/save/80000000000000e1", |
| @@ -886,19 +997,41 @@ void KeyManager::DeriveETicket(PartitionDataManager& data) { | |||
| 886 | 997 | ||
| 887 | const auto blob2 = GetTicketblob(save2); | 998 | const auto blob2 = GetTicketblob(save2); |
| 888 | auto res = GetTicketblob(save1); | 999 | auto res = GetTicketblob(save1); |
| 1000 | const auto idx = res.size(); | ||
| 889 | res.insert(res.end(), blob2.begin(), blob2.end()); | 1001 | res.insert(res.end(), blob2.begin(), blob2.end()); |
| 890 | 1002 | ||
| 891 | for (const auto& raw : res) { | 1003 | for (std::size_t i = 0; i < res.size(); ++i) { |
| 892 | const auto pair = ParseTicket(raw, rsa_key); | 1004 | const auto common = i < idx; |
| 1005 | const auto pair = ParseTicket(res[i], rsa_key); | ||
| 893 | if (!pair) | 1006 | if (!pair) |
| 894 | continue; | 1007 | continue; |
| 895 | const auto& [rid, key] = *pair; | 1008 | const auto& [rid, key] = *pair; |
| 896 | u128 rights_id; | 1009 | u128 rights_id; |
| 897 | std::memcpy(rights_id.data(), rid.data(), rid.size()); | 1010 | std::memcpy(rights_id.data(), rid.data(), rid.size()); |
| 1011 | |||
| 1012 | if (common) { | ||
| 1013 | common_tickets[rights_id] = res[i]; | ||
| 1014 | } else { | ||
| 1015 | personal_tickets[rights_id] = res[i]; | ||
| 1016 | } | ||
| 1017 | |||
| 898 | SetKey(S128KeyType::Titlekey, key, rights_id[1], rights_id[0]); | 1018 | SetKey(S128KeyType::Titlekey, key, rights_id[1], rights_id[0]); |
| 899 | } | 1019 | } |
| 900 | } | 1020 | } |
| 901 | 1021 | ||
| 1022 | void KeyManager::SynthesizeTickets() { | ||
| 1023 | for (const auto& key : s128_keys) { | ||
| 1024 | if (key.first.type != S128KeyType::Titlekey) { | ||
| 1025 | continue; | ||
| 1026 | } | ||
| 1027 | u128 rights_id{key.first.field1, key.first.field2}; | ||
| 1028 | Key128 rights_id_2; | ||
| 1029 | std::memcpy(rights_id_2.data(), rights_id.data(), rights_id_2.size()); | ||
| 1030 | const auto ticket = Ticket::SynthesizeCommon(key.second, rights_id_2); | ||
| 1031 | common_tickets.insert_or_assign(rights_id, ticket); | ||
| 1032 | } | ||
| 1033 | } | ||
| 1034 | |||
| 902 | void KeyManager::SetKeyWrapped(S128KeyType id, Key128 key, u64 field1, u64 field2) { | 1035 | void KeyManager::SetKeyWrapped(S128KeyType id, Key128 key, u64 field1, u64 field2) { |
| 903 | if (key == Key128{}) | 1036 | if (key == Key128{}) |
| 904 | return; | 1037 | return; |
| @@ -997,6 +1130,46 @@ void KeyManager::PopulateFromPartitionData(PartitionDataManager& data) { | |||
| 997 | DeriveBase(); | 1130 | DeriveBase(); |
| 998 | } | 1131 | } |
| 999 | 1132 | ||
| 1133 | const std::map<u128, Ticket>& KeyManager::GetCommonTickets() const { | ||
| 1134 | return common_tickets; | ||
| 1135 | } | ||
| 1136 | |||
| 1137 | const std::map<u128, Ticket>& KeyManager::GetPersonalizedTickets() const { | ||
| 1138 | return personal_tickets; | ||
| 1139 | } | ||
| 1140 | |||
| 1141 | bool KeyManager::AddTicketCommon(Ticket raw) { | ||
| 1142 | const auto rsa_key = GetETicketRSAKey(); | ||
| 1143 | if (rsa_key == RSAKeyPair<2048>{}) | ||
| 1144 | return false; | ||
| 1145 | |||
| 1146 | const auto pair = ParseTicket(raw, rsa_key); | ||
| 1147 | if (!pair) | ||
| 1148 | return false; | ||
| 1149 | const auto& [rid, key] = *pair; | ||
| 1150 | u128 rights_id; | ||
| 1151 | std::memcpy(rights_id.data(), rid.data(), rid.size()); | ||
| 1152 | common_tickets[rights_id] = raw; | ||
| 1153 | SetKey(S128KeyType::Titlekey, key, rights_id[1], rights_id[0]); | ||
| 1154 | return true; | ||
| 1155 | } | ||
| 1156 | |||
| 1157 | bool KeyManager::AddTicketPersonalized(Ticket raw) { | ||
| 1158 | const auto rsa_key = GetETicketRSAKey(); | ||
| 1159 | if (rsa_key == RSAKeyPair<2048>{}) | ||
| 1160 | return false; | ||
| 1161 | |||
| 1162 | const auto pair = ParseTicket(raw, rsa_key); | ||
| 1163 | if (!pair) | ||
| 1164 | return false; | ||
| 1165 | const auto& [rid, key] = *pair; | ||
| 1166 | u128 rights_id; | ||
| 1167 | std::memcpy(rights_id.data(), rid.data(), rid.size()); | ||
| 1168 | common_tickets[rights_id] = raw; | ||
| 1169 | SetKey(S128KeyType::Titlekey, key, rights_id[1], rights_id[0]); | ||
| 1170 | return true; | ||
| 1171 | } | ||
| 1172 | |||
| 1000 | const boost::container::flat_map<std::string, KeyIndex<S128KeyType>> KeyManager::s128_file_id = { | 1173 | const boost::container::flat_map<std::string, KeyIndex<S128KeyType>> KeyManager::s128_file_id = { |
| 1001 | {"eticket_rsa_kek", {S128KeyType::ETicketRSAKek, 0, 0}}, | 1174 | {"eticket_rsa_kek", {S128KeyType::ETicketRSAKek, 0, 0}}, |
| 1002 | {"eticket_rsa_kek_source", | 1175 | {"eticket_rsa_kek_source", |
diff --git a/src/core/crypto/key_manager.h b/src/core/crypto/key_manager.h index 22f268c65..7265c4171 100644 --- a/src/core/crypto/key_manager.h +++ b/src/core/crypto/key_manager.h | |||
| @@ -9,8 +9,10 @@ | |||
| 9 | #include <optional> | 9 | #include <optional> |
| 10 | #include <string> | 10 | #include <string> |
| 11 | 11 | ||
| 12 | #include <variant> | ||
| 12 | #include <boost/container/flat_map.hpp> | 13 | #include <boost/container/flat_map.hpp> |
| 13 | #include <fmt/format.h> | 14 | #include <fmt/format.h> |
| 15 | #include "common/common_funcs.h" | ||
| 14 | #include "common/common_types.h" | 16 | #include "common/common_types.h" |
| 15 | #include "core/crypto/partition_data_manager.h" | 17 | #include "core/crypto/partition_data_manager.h" |
| 16 | #include "core/file_sys/vfs_types.h" | 18 | #include "core/file_sys/vfs_types.h" |
| @@ -30,7 +32,79 @@ constexpr u64 TICKET_FILE_TITLEKEY_OFFSET = 0x180; | |||
| 30 | using Key128 = std::array<u8, 0x10>; | 32 | using Key128 = std::array<u8, 0x10>; |
| 31 | using Key256 = std::array<u8, 0x20>; | 33 | using Key256 = std::array<u8, 0x20>; |
| 32 | using SHA256Hash = std::array<u8, 0x20>; | 34 | using SHA256Hash = std::array<u8, 0x20>; |
| 33 | using TicketRaw = std::array<u8, 0x400>; | 35 | |
| 36 | enum class SignatureType { | ||
| 37 | RSA_4096_SHA1 = 0x10000, | ||
| 38 | RSA_2048_SHA1 = 0x10001, | ||
| 39 | ECDSA_SHA1 = 0x10002, | ||
| 40 | RSA_4096_SHA256 = 0x10003, | ||
| 41 | RSA_2048_SHA256 = 0x10004, | ||
| 42 | ECDSA_SHA256 = 0x10005, | ||
| 43 | }; | ||
| 44 | |||
| 45 | u64 GetSignatureTypeDataSize(SignatureType type); | ||
| 46 | u64 GetSignatureTypePaddingSize(SignatureType type); | ||
| 47 | |||
| 48 | enum class TitleKeyType : u8 { | ||
| 49 | Common = 0, | ||
| 50 | Personalized = 1, | ||
| 51 | }; | ||
| 52 | |||
| 53 | struct TicketData { | ||
| 54 | std::array<u8, 0x40> issuer; | ||
| 55 | union { | ||
| 56 | std::array<u8, 0x100> title_key_block; | ||
| 57 | |||
| 58 | struct { | ||
| 59 | Key128 title_key_common; | ||
| 60 | std::array<u8, 0xF0> title_key_common_pad; | ||
| 61 | }; | ||
| 62 | }; | ||
| 63 | |||
| 64 | INSERT_PADDING_BYTES(0x1); | ||
| 65 | TitleKeyType type; | ||
| 66 | INSERT_PADDING_BYTES(0x3); | ||
| 67 | u8 revision; | ||
| 68 | INSERT_PADDING_BYTES(0xA); | ||
| 69 | u64 ticket_id; | ||
| 70 | u64 device_id; | ||
| 71 | std::array<u8, 0x10> rights_id; | ||
| 72 | u32 account_id; | ||
| 73 | INSERT_PADDING_BYTES(0x14C); | ||
| 74 | }; | ||
| 75 | static_assert(sizeof(TicketData) == 0x2C0, "TicketData has incorrect size."); | ||
| 76 | |||
| 77 | struct RSA4096Ticket { | ||
| 78 | SignatureType sig_type; | ||
| 79 | std::array<u8, 0x200> sig_data; | ||
| 80 | INSERT_PADDING_BYTES(0x3C); | ||
| 81 | TicketData data; | ||
| 82 | }; | ||
| 83 | |||
| 84 | struct RSA2048Ticket { | ||
| 85 | SignatureType sig_type; | ||
| 86 | std::array<u8, 0x100> sig_data; | ||
| 87 | INSERT_PADDING_BYTES(0x3C); | ||
| 88 | TicketData data; | ||
| 89 | }; | ||
| 90 | |||
| 91 | struct ECDSATicket { | ||
| 92 | SignatureType sig_type; | ||
| 93 | std::array<u8, 0x3C> sig_data; | ||
| 94 | INSERT_PADDING_BYTES(0x40); | ||
| 95 | TicketData data; | ||
| 96 | }; | ||
| 97 | |||
| 98 | struct Ticket { | ||
| 99 | std::variant<RSA4096Ticket, RSA2048Ticket, ECDSATicket> data; | ||
| 100 | |||
| 101 | SignatureType GetSignatureType() const; | ||
| 102 | TicketData& GetData(); | ||
| 103 | const TicketData& GetData() const; | ||
| 104 | u64 GetSize() const; | ||
| 105 | |||
| 106 | static Ticket SynthesizeCommon(Key128 title_key, const std::array<u8, 0x10>& rights_id); | ||
| 107 | }; | ||
| 34 | 108 | ||
| 35 | static_assert(sizeof(Key128) == 16, "Key128 must be 128 bytes big."); | 109 | static_assert(sizeof(Key128) == 16, "Key128 must be 128 bytes big."); |
| 36 | static_assert(sizeof(Key256) == 32, "Key256 must be 256 bytes big."); | 110 | static_assert(sizeof(Key256) == 32, "Key256 must be 256 bytes big."); |
| @@ -43,6 +117,19 @@ struct RSAKeyPair { | |||
| 43 | std::array<u8, 4> exponent; | 117 | std::array<u8, 4> exponent; |
| 44 | }; | 118 | }; |
| 45 | 119 | ||
| 120 | template <size_t bit_size, size_t byte_size> | ||
| 121 | bool operator==(const RSAKeyPair<bit_size, byte_size>& lhs, | ||
| 122 | const RSAKeyPair<bit_size, byte_size>& rhs) { | ||
| 123 | return std::tie(lhs.encryption_key, lhs.decryption_key, lhs.modulus, lhs.exponent) == | ||
| 124 | std::tie(rhs.encryption_key, rhs.decryption_key, rhs.modulus, rhs.exponent); | ||
| 125 | } | ||
| 126 | |||
| 127 | template <size_t bit_size, size_t byte_size> | ||
| 128 | bool operator!=(const RSAKeyPair<bit_size, byte_size>& lhs, | ||
| 129 | const RSAKeyPair<bit_size, byte_size>& rhs) { | ||
| 130 | return !(lhs == rhs); | ||
| 131 | } | ||
| 132 | |||
| 46 | enum class KeyCategory : u8 { | 133 | enum class KeyCategory : u8 { |
| 47 | Standard, | 134 | Standard, |
| 48 | Title, | 135 | Title, |
| @@ -151,22 +238,35 @@ public: | |||
| 151 | 238 | ||
| 152 | static bool KeyFileExists(bool title); | 239 | static bool KeyFileExists(bool title); |
| 153 | 240 | ||
| 154 | // Call before using the sd seed to attempt to derive it if it dosen't exist. Needs system save | 241 | // Call before using the sd seed to attempt to derive it if it dosen't exist. Needs system |
| 155 | // 8*43 and the private file to exist. | 242 | // save 8*43 and the private file to exist. |
| 156 | void DeriveSDSeedLazy(); | 243 | void DeriveSDSeedLazy(); |
| 157 | 244 | ||
| 158 | bool BaseDeriveNecessary() const; | 245 | bool BaseDeriveNecessary() const; |
| 159 | void DeriveBase(); | 246 | void DeriveBase(); |
| 160 | void DeriveETicket(PartitionDataManager& data); | 247 | void DeriveETicket(PartitionDataManager& data); |
| 248 | void PopulateTickets(); | ||
| 249 | void SynthesizeTickets(); | ||
| 161 | 250 | ||
| 162 | void PopulateFromPartitionData(PartitionDataManager& data); | 251 | void PopulateFromPartitionData(PartitionDataManager& data); |
| 163 | 252 | ||
| 253 | const std::map<u128, Ticket>& GetCommonTickets() const; | ||
| 254 | const std::map<u128, Ticket>& GetPersonalizedTickets() const; | ||
| 255 | |||
| 256 | bool AddTicketCommon(Ticket raw); | ||
| 257 | bool AddTicketPersonalized(Ticket raw); | ||
| 258 | |||
| 164 | private: | 259 | private: |
| 165 | std::map<KeyIndex<S128KeyType>, Key128> s128_keys; | 260 | std::map<KeyIndex<S128KeyType>, Key128> s128_keys; |
| 166 | std::map<KeyIndex<S256KeyType>, Key256> s256_keys; | 261 | std::map<KeyIndex<S256KeyType>, Key256> s256_keys; |
| 167 | 262 | ||
| 263 | // Map from rights ID to ticket | ||
| 264 | std::map<u128, Ticket> common_tickets; | ||
| 265 | std::map<u128, Ticket> personal_tickets; | ||
| 266 | |||
| 168 | std::array<std::array<u8, 0xB0>, 0x20> encrypted_keyblobs{}; | 267 | std::array<std::array<u8, 0xB0>, 0x20> encrypted_keyblobs{}; |
| 169 | std::array<std::array<u8, 0x90>, 0x20> keyblobs{}; | 268 | std::array<std::array<u8, 0x90>, 0x20> keyblobs{}; |
| 269 | std::array<u8, 576> eticket_extended_kek{}; | ||
| 170 | 270 | ||
| 171 | bool dev_mode; | 271 | bool dev_mode; |
| 172 | void LoadFromFile(const std::string& filename, bool is_title_keys); | 272 | void LoadFromFile(const std::string& filename, bool is_title_keys); |
| @@ -178,6 +278,8 @@ private: | |||
| 178 | 278 | ||
| 179 | void DeriveGeneralPurposeKeys(std::size_t crypto_revision); | 279 | void DeriveGeneralPurposeKeys(std::size_t crypto_revision); |
| 180 | 280 | ||
| 281 | RSAKeyPair<2048> GetETicketRSAKey() const; | ||
| 282 | |||
| 181 | void SetKeyWrapped(S128KeyType id, Key128 key, u64 field1 = 0, u64 field2 = 0); | 283 | void SetKeyWrapped(S128KeyType id, Key128 key, u64 field1 = 0, u64 field2 = 0); |
| 182 | void SetKeyWrapped(S256KeyType id, Key256 key, u64 field1 = 0, u64 field2 = 0); | 284 | void SetKeyWrapped(S256KeyType id, Key256 key, u64 field1 = 0, u64 field2 = 0); |
| 183 | 285 | ||
| @@ -195,11 +297,11 @@ std::array<u8, 0x90> DecryptKeyblob(const std::array<u8, 0xB0>& encrypted_keyblo | |||
| 195 | std::optional<Key128> DeriveSDSeed(); | 297 | std::optional<Key128> DeriveSDSeed(); |
| 196 | Loader::ResultStatus DeriveSDKeys(std::array<Key256, 2>& sd_keys, KeyManager& keys); | 298 | Loader::ResultStatus DeriveSDKeys(std::array<Key256, 2>& sd_keys, KeyManager& keys); |
| 197 | 299 | ||
| 198 | std::vector<TicketRaw> GetTicketblob(const FileUtil::IOFile& ticket_save); | 300 | std::vector<Ticket> GetTicketblob(const FileUtil::IOFile& ticket_save); |
| 199 | 301 | ||
| 200 | // Returns a pair of {rights_id, titlekey}. Fails if the ticket has no certificate authority (offset | 302 | // Returns a pair of {rights_id, titlekey}. Fails if the ticket has no certificate authority |
| 201 | // 0x140-0x144 is zero) | 303 | // (offset 0x140-0x144 is zero) |
| 202 | std::optional<std::pair<Key128, Key128>> ParseTicket(const TicketRaw& ticket, | 304 | std::optional<std::pair<Key128, Key128>> ParseTicket(const Ticket& ticket, |
| 203 | const RSAKeyPair<2048>& eticket_extended_key); | 305 | const RSAKeyPair<2048>& eticket_extended_key); |
| 204 | 306 | ||
| 205 | } // namespace Core::Crypto | 307 | } // namespace Core::Crypto |
diff --git a/src/core/file_sys/program_metadata.cpp b/src/core/file_sys/program_metadata.cpp index eb76174c5..7310b3602 100644 --- a/src/core/file_sys/program_metadata.cpp +++ b/src/core/file_sys/program_metadata.cpp | |||
| @@ -94,6 +94,10 @@ u64 ProgramMetadata::GetFilesystemPermissions() const { | |||
| 94 | return aci_file_access.permissions; | 94 | return aci_file_access.permissions; |
| 95 | } | 95 | } |
| 96 | 96 | ||
| 97 | u32 ProgramMetadata::GetSystemResourceSize() const { | ||
| 98 | return npdm_header.system_resource_size; | ||
| 99 | } | ||
| 100 | |||
| 97 | const ProgramMetadata::KernelCapabilityDescriptors& ProgramMetadata::GetKernelCapabilities() const { | 101 | const ProgramMetadata::KernelCapabilityDescriptors& ProgramMetadata::GetKernelCapabilities() const { |
| 98 | return aci_kernel_capabilities; | 102 | return aci_kernel_capabilities; |
| 99 | } | 103 | } |
diff --git a/src/core/file_sys/program_metadata.h b/src/core/file_sys/program_metadata.h index 43bf2820a..88ec97d85 100644 --- a/src/core/file_sys/program_metadata.h +++ b/src/core/file_sys/program_metadata.h | |||
| @@ -58,6 +58,7 @@ public: | |||
| 58 | u32 GetMainThreadStackSize() const; | 58 | u32 GetMainThreadStackSize() const; |
| 59 | u64 GetTitleID() const; | 59 | u64 GetTitleID() const; |
| 60 | u64 GetFilesystemPermissions() const; | 60 | u64 GetFilesystemPermissions() const; |
| 61 | u32 GetSystemResourceSize() const; | ||
| 61 | const KernelCapabilityDescriptors& GetKernelCapabilities() const; | 62 | const KernelCapabilityDescriptors& GetKernelCapabilities() const; |
| 62 | 63 | ||
| 63 | void Print() const; | 64 | void Print() const; |
| @@ -76,7 +77,8 @@ private: | |||
| 76 | u8 reserved_3; | 77 | u8 reserved_3; |
| 77 | u8 main_thread_priority; | 78 | u8 main_thread_priority; |
| 78 | u8 main_thread_cpu; | 79 | u8 main_thread_cpu; |
| 79 | std::array<u8, 8> reserved_4; | 80 | std::array<u8, 4> reserved_4; |
| 81 | u32_le system_resource_size; | ||
| 80 | u32_le process_category; | 82 | u32_le process_category; |
| 81 | u32_le main_stack_size; | 83 | u32_le main_stack_size; |
| 82 | std::array<u8, 0x10> application_name; | 84 | std::array<u8, 0x10> application_name; |
diff --git a/src/core/file_sys/system_archive/mii_model.cpp b/src/core/file_sys/system_archive/mii_model.cpp new file mode 100644 index 000000000..6a9add87c --- /dev/null +++ b/src/core/file_sys/system_archive/mii_model.cpp | |||
| @@ -0,0 +1,46 @@ | |||
| 1 | // Copyright 2019 yuzu emulator team | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "core/file_sys/system_archive/mii_model.h" | ||
| 6 | #include "core/file_sys/vfs_vector.h" | ||
| 7 | |||
| 8 | namespace FileSys::SystemArchive { | ||
| 9 | |||
| 10 | namespace MiiModelData { | ||
| 11 | |||
| 12 | constexpr std::array<u8, 0x10> NFTR_STANDARD{'N', 'F', 'T', 'R', 0x01, 0x00, 0x00, 0x00, | ||
| 13 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}; | ||
| 14 | constexpr std::array<u8, 0x10> NFSR_STANDARD{'N', 'F', 'S', 'R', 0x01, 0x00, 0x00, 0x00, | ||
| 15 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}; | ||
| 16 | |||
| 17 | constexpr auto TEXTURE_LOW_LINEAR = NFTR_STANDARD; | ||
| 18 | constexpr auto TEXTURE_LOW_SRGB = NFTR_STANDARD; | ||
| 19 | constexpr auto TEXTURE_MID_LINEAR = NFTR_STANDARD; | ||
| 20 | constexpr auto TEXTURE_MID_SRGB = NFTR_STANDARD; | ||
| 21 | constexpr auto SHAPE_HIGH = NFSR_STANDARD; | ||
| 22 | constexpr auto SHAPE_MID = NFSR_STANDARD; | ||
| 23 | |||
| 24 | } // namespace MiiModelData | ||
| 25 | |||
| 26 | VirtualDir MiiModel() { | ||
| 27 | auto out = std::make_shared<VectorVfsDirectory>(std::vector<VirtualFile>{}, | ||
| 28 | std::vector<VirtualDir>{}, "data"); | ||
| 29 | |||
| 30 | out->AddFile(std::make_shared<ArrayVfsFile<MiiModelData::TEXTURE_LOW_LINEAR.size()>>( | ||
| 31 | MiiModelData::TEXTURE_LOW_LINEAR, "NXTextureLowLinear.dat")); | ||
| 32 | out->AddFile(std::make_shared<ArrayVfsFile<MiiModelData::TEXTURE_LOW_SRGB.size()>>( | ||
| 33 | MiiModelData::TEXTURE_LOW_SRGB, "NXTextureLowSRGB.dat")); | ||
| 34 | out->AddFile(std::make_shared<ArrayVfsFile<MiiModelData::TEXTURE_MID_LINEAR.size()>>( | ||
| 35 | MiiModelData::TEXTURE_MID_LINEAR, "NXTextureMidLinear.dat")); | ||
| 36 | out->AddFile(std::make_shared<ArrayVfsFile<MiiModelData::TEXTURE_MID_SRGB.size()>>( | ||
| 37 | MiiModelData::TEXTURE_MID_SRGB, "NXTextureMidSRGB.dat")); | ||
| 38 | out->AddFile(std::make_shared<ArrayVfsFile<MiiModelData::SHAPE_HIGH.size()>>( | ||
| 39 | MiiModelData::SHAPE_HIGH, "ShapeHigh.dat")); | ||
| 40 | out->AddFile(std::make_shared<ArrayVfsFile<MiiModelData::SHAPE_MID.size()>>( | ||
| 41 | MiiModelData::SHAPE_MID, "ShapeMid.dat")); | ||
| 42 | |||
| 43 | return std::move(out); | ||
| 44 | } | ||
| 45 | |||
| 46 | } // namespace FileSys::SystemArchive | ||
diff --git a/src/core/file_sys/system_archive/mii_model.h b/src/core/file_sys/system_archive/mii_model.h new file mode 100644 index 000000000..6c2d9398b --- /dev/null +++ b/src/core/file_sys/system_archive/mii_model.h | |||
| @@ -0,0 +1,13 @@ | |||
| 1 | // Copyright 2019 yuzu emulator team | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "core/file_sys/vfs_types.h" | ||
| 8 | |||
| 9 | namespace FileSys::SystemArchive { | ||
| 10 | |||
| 11 | VirtualDir MiiModel(); | ||
| 12 | |||
| 13 | } // namespace FileSys::SystemArchive | ||
diff --git a/src/core/file_sys/system_archive/system_archive.cpp b/src/core/file_sys/system_archive/system_archive.cpp index c9722ed77..6d8445383 100644 --- a/src/core/file_sys/system_archive/system_archive.cpp +++ b/src/core/file_sys/system_archive/system_archive.cpp | |||
| @@ -4,6 +4,7 @@ | |||
| 4 | 4 | ||
| 5 | #include "common/logging/log.h" | 5 | #include "common/logging/log.h" |
| 6 | #include "core/file_sys/romfs.h" | 6 | #include "core/file_sys/romfs.h" |
| 7 | #include "core/file_sys/system_archive/mii_model.h" | ||
| 7 | #include "core/file_sys/system_archive/ng_word.h" | 8 | #include "core/file_sys/system_archive/ng_word.h" |
| 8 | #include "core/file_sys/system_archive/system_archive.h" | 9 | #include "core/file_sys/system_archive/system_archive.h" |
| 9 | #include "core/file_sys/system_archive/system_version.h" | 10 | #include "core/file_sys/system_archive/system_version.h" |
| @@ -24,7 +25,7 @@ struct SystemArchiveDescriptor { | |||
| 24 | constexpr std::array<SystemArchiveDescriptor, SYSTEM_ARCHIVE_COUNT> SYSTEM_ARCHIVES{{ | 25 | constexpr std::array<SystemArchiveDescriptor, SYSTEM_ARCHIVE_COUNT> SYSTEM_ARCHIVES{{ |
| 25 | {0x0100000000000800, "CertStore", nullptr}, | 26 | {0x0100000000000800, "CertStore", nullptr}, |
| 26 | {0x0100000000000801, "ErrorMessage", nullptr}, | 27 | {0x0100000000000801, "ErrorMessage", nullptr}, |
| 27 | {0x0100000000000802, "MiiModel", nullptr}, | 28 | {0x0100000000000802, "MiiModel", &MiiModel}, |
| 28 | {0x0100000000000803, "BrowserDll", nullptr}, | 29 | {0x0100000000000803, "BrowserDll", nullptr}, |
| 29 | {0x0100000000000804, "Help", nullptr}, | 30 | {0x0100000000000804, "Help", nullptr}, |
| 30 | {0x0100000000000805, "SharedFont", nullptr}, | 31 | {0x0100000000000805, "SharedFont", nullptr}, |
diff --git a/src/core/hardware_interrupt_manager.cpp b/src/core/hardware_interrupt_manager.cpp new file mode 100644 index 000000000..c2115db2d --- /dev/null +++ b/src/core/hardware_interrupt_manager.cpp | |||
| @@ -0,0 +1,30 @@ | |||
| 1 | // Copyright 2019 Yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "core/core.h" | ||
| 6 | #include "core/core_timing.h" | ||
| 7 | #include "core/hardware_interrupt_manager.h" | ||
| 8 | #include "core/hle/service/nvdrv/interface.h" | ||
| 9 | #include "core/hle/service/sm/sm.h" | ||
| 10 | |||
| 11 | namespace Core::Hardware { | ||
| 12 | |||
| 13 | InterruptManager::InterruptManager(Core::System& system_in) : system(system_in) { | ||
| 14 | gpu_interrupt_event = | ||
| 15 | system.CoreTiming().RegisterEvent("GPUInterrupt", [this](u64 message, s64) { | ||
| 16 | auto nvdrv = system.ServiceManager().GetService<Service::Nvidia::NVDRV>("nvdrv"); | ||
| 17 | const u32 syncpt = static_cast<u32>(message >> 32); | ||
| 18 | const u32 value = static_cast<u32>(message); | ||
| 19 | nvdrv->SignalGPUInterruptSyncpt(syncpt, value); | ||
| 20 | }); | ||
| 21 | } | ||
| 22 | |||
| 23 | InterruptManager::~InterruptManager() = default; | ||
| 24 | |||
| 25 | void InterruptManager::GPUInterruptSyncpt(const u32 syncpoint_id, const u32 value) { | ||
| 26 | const u64 msg = (static_cast<u64>(syncpoint_id) << 32ULL) | value; | ||
| 27 | system.CoreTiming().ScheduleEvent(10, gpu_interrupt_event, msg); | ||
| 28 | } | ||
| 29 | |||
| 30 | } // namespace Core::Hardware | ||
diff --git a/src/core/hardware_interrupt_manager.h b/src/core/hardware_interrupt_manager.h new file mode 100644 index 000000000..494db883a --- /dev/null +++ b/src/core/hardware_interrupt_manager.h | |||
| @@ -0,0 +1,31 @@ | |||
| 1 | // Copyright 2019 Yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "common/common_types.h" | ||
| 8 | |||
| 9 | namespace Core { | ||
| 10 | class System; | ||
| 11 | } | ||
| 12 | |||
| 13 | namespace Core::Timing { | ||
| 14 | struct EventType; | ||
| 15 | } | ||
| 16 | |||
| 17 | namespace Core::Hardware { | ||
| 18 | |||
| 19 | class InterruptManager { | ||
| 20 | public: | ||
| 21 | explicit InterruptManager(Core::System& system); | ||
| 22 | ~InterruptManager(); | ||
| 23 | |||
| 24 | void GPUInterruptSyncpt(u32 syncpoint_id, u32 value); | ||
| 25 | |||
| 26 | private: | ||
| 27 | Core::System& system; | ||
| 28 | Core::Timing::EventType* gpu_interrupt_event{}; | ||
| 29 | }; | ||
| 30 | |||
| 31 | } // namespace Core::Hardware | ||
diff --git a/src/core/hle/kernel/code_set.h b/src/core/hle/kernel/code_set.h index 879957dcb..d8ad54030 100644 --- a/src/core/hle/kernel/code_set.h +++ b/src/core/hle/kernel/code_set.h | |||
| @@ -8,6 +8,7 @@ | |||
| 8 | #include <vector> | 8 | #include <vector> |
| 9 | 9 | ||
| 10 | #include "common/common_types.h" | 10 | #include "common/common_types.h" |
| 11 | #include "core/hle/kernel/physical_memory.h" | ||
| 11 | 12 | ||
| 12 | namespace Kernel { | 13 | namespace Kernel { |
| 13 | 14 | ||
| @@ -77,7 +78,7 @@ struct CodeSet final { | |||
| 77 | } | 78 | } |
| 78 | 79 | ||
| 79 | /// The overall data that backs this code set. | 80 | /// The overall data that backs this code set. |
| 80 | std::vector<u8> memory; | 81 | Kernel::PhysicalMemory memory; |
| 81 | 82 | ||
| 82 | /// The segments that comprise this code set. | 83 | /// The segments that comprise this code set. |
| 83 | std::array<Segment, 3> segments; | 84 | std::array<Segment, 3> segments; |
diff --git a/src/core/hle/kernel/physical_memory.h b/src/core/hle/kernel/physical_memory.h new file mode 100644 index 000000000..090565310 --- /dev/null +++ b/src/core/hle/kernel/physical_memory.h | |||
| @@ -0,0 +1,19 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "common/alignment.h" | ||
| 8 | |||
| 9 | namespace Kernel { | ||
| 10 | |||
| 11 | // This encapsulation serves 2 purposes: | ||
| 12 | // - First, to encapsulate host physical memory under a single type and set an | ||
| 13 | // standard for managing it. | ||
| 14 | // - Second to ensure all host backing memory used is aligned to 256 bytes due | ||
| 15 | // to strict alignment restrictions on GPU memory. | ||
| 16 | |||
| 17 | using PhysicalMemory = std::vector<u8, Common::AlignmentAllocator<u8, 256>>; | ||
| 18 | |||
| 19 | } // namespace Kernel | ||
diff --git a/src/core/hle/kernel/process.cpp b/src/core/hle/kernel/process.cpp index 7cfc513a1..e80a12ac3 100644 --- a/src/core/hle/kernel/process.cpp +++ b/src/core/hle/kernel/process.cpp | |||
| @@ -3,6 +3,7 @@ | |||
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <algorithm> | 5 | #include <algorithm> |
| 6 | #include <bitset> | ||
| 6 | #include <memory> | 7 | #include <memory> |
| 7 | #include <random> | 8 | #include <random> |
| 8 | #include "common/alignment.h" | 9 | #include "common/alignment.h" |
| @@ -48,8 +49,58 @@ void SetupMainThread(Process& owner_process, KernelCore& kernel, u32 priority) { | |||
| 48 | } | 49 | } |
| 49 | } // Anonymous namespace | 50 | } // Anonymous namespace |
| 50 | 51 | ||
| 51 | SharedPtr<Process> Process::Create(Core::System& system, std::string name, | 52 | // Represents a page used for thread-local storage. |
| 52 | Process::ProcessType type) { | 53 | // |
| 54 | // Each TLS page contains slots that may be used by processes and threads. | ||
| 55 | // Every process and thread is created with a slot in some arbitrary page | ||
| 56 | // (whichever page happens to have an available slot). | ||
| 57 | class TLSPage { | ||
| 58 | public: | ||
| 59 | static constexpr std::size_t num_slot_entries = Memory::PAGE_SIZE / Memory::TLS_ENTRY_SIZE; | ||
| 60 | |||
| 61 | explicit TLSPage(VAddr address) : base_address{address} {} | ||
| 62 | |||
| 63 | bool HasAvailableSlots() const { | ||
| 64 | return !is_slot_used.all(); | ||
| 65 | } | ||
| 66 | |||
| 67 | VAddr GetBaseAddress() const { | ||
| 68 | return base_address; | ||
| 69 | } | ||
| 70 | |||
| 71 | std::optional<VAddr> ReserveSlot() { | ||
| 72 | for (std::size_t i = 0; i < is_slot_used.size(); i++) { | ||
| 73 | if (is_slot_used[i]) { | ||
| 74 | continue; | ||
| 75 | } | ||
| 76 | |||
| 77 | is_slot_used[i] = true; | ||
| 78 | return base_address + (i * Memory::TLS_ENTRY_SIZE); | ||
| 79 | } | ||
| 80 | |||
| 81 | return std::nullopt; | ||
| 82 | } | ||
| 83 | |||
| 84 | void ReleaseSlot(VAddr address) { | ||
| 85 | // Ensure that all given addresses are consistent with how TLS pages | ||
| 86 | // are intended to be used when releasing slots. | ||
| 87 | ASSERT(IsWithinPage(address)); | ||
| 88 | ASSERT((address % Memory::TLS_ENTRY_SIZE) == 0); | ||
| 89 | |||
| 90 | const std::size_t index = (address - base_address) / Memory::TLS_ENTRY_SIZE; | ||
| 91 | is_slot_used[index] = false; | ||
| 92 | } | ||
| 93 | |||
| 94 | private: | ||
| 95 | bool IsWithinPage(VAddr address) const { | ||
| 96 | return base_address <= address && address < base_address + Memory::PAGE_SIZE; | ||
| 97 | } | ||
| 98 | |||
| 99 | VAddr base_address; | ||
| 100 | std::bitset<num_slot_entries> is_slot_used; | ||
| 101 | }; | ||
| 102 | |||
| 103 | SharedPtr<Process> Process::Create(Core::System& system, std::string name, ProcessType type) { | ||
| 53 | auto& kernel = system.Kernel(); | 104 | auto& kernel = system.Kernel(); |
| 54 | 105 | ||
| 55 | SharedPtr<Process> process(new Process(system)); | 106 | SharedPtr<Process> process(new Process(system)); |
| @@ -78,20 +129,17 @@ u64 Process::GetTotalPhysicalMemoryAvailable() const { | |||
| 78 | return vm_manager.GetTotalPhysicalMemoryAvailable(); | 129 | return vm_manager.GetTotalPhysicalMemoryAvailable(); |
| 79 | } | 130 | } |
| 80 | 131 | ||
| 81 | u64 Process::GetTotalPhysicalMemoryAvailableWithoutMmHeap() const { | 132 | u64 Process::GetTotalPhysicalMemoryAvailableWithoutSystemResource() const { |
| 82 | // TODO: Subtract the personal heap size from this when the | 133 | return GetTotalPhysicalMemoryAvailable() - GetSystemResourceSize(); |
| 83 | // personal heap is implemented. | ||
| 84 | return GetTotalPhysicalMemoryAvailable(); | ||
| 85 | } | 134 | } |
| 86 | 135 | ||
| 87 | u64 Process::GetTotalPhysicalMemoryUsed() const { | 136 | u64 Process::GetTotalPhysicalMemoryUsed() const { |
| 88 | return vm_manager.GetCurrentHeapSize() + main_thread_stack_size + code_memory_size; | 137 | return vm_manager.GetCurrentHeapSize() + main_thread_stack_size + code_memory_size + |
| 138 | GetSystemResourceUsage(); | ||
| 89 | } | 139 | } |
| 90 | 140 | ||
| 91 | u64 Process::GetTotalPhysicalMemoryUsedWithoutMmHeap() const { | 141 | u64 Process::GetTotalPhysicalMemoryUsedWithoutSystemResource() const { |
| 92 | // TODO: Subtract the personal heap size from this when the | 142 | return GetTotalPhysicalMemoryUsed() - GetSystemResourceUsage(); |
| 93 | // personal heap is implemented. | ||
| 94 | return GetTotalPhysicalMemoryUsed(); | ||
| 95 | } | 143 | } |
| 96 | 144 | ||
| 97 | void Process::RegisterThread(const Thread* thread) { | 145 | void Process::RegisterThread(const Thread* thread) { |
| @@ -121,6 +169,7 @@ ResultCode Process::LoadFromMetadata(const FileSys::ProgramMetadata& metadata) { | |||
| 121 | program_id = metadata.GetTitleID(); | 169 | program_id = metadata.GetTitleID(); |
| 122 | ideal_core = metadata.GetMainThreadCore(); | 170 | ideal_core = metadata.GetMainThreadCore(); |
| 123 | is_64bit_process = metadata.Is64BitProgram(); | 171 | is_64bit_process = metadata.Is64BitProgram(); |
| 172 | system_resource_size = metadata.GetSystemResourceSize(); | ||
| 124 | 173 | ||
| 125 | vm_manager.Reset(metadata.GetAddressSpaceType()); | 174 | vm_manager.Reset(metadata.GetAddressSpaceType()); |
| 126 | 175 | ||
| @@ -135,19 +184,11 @@ ResultCode Process::LoadFromMetadata(const FileSys::ProgramMetadata& metadata) { | |||
| 135 | } | 184 | } |
| 136 | 185 | ||
| 137 | void Process::Run(s32 main_thread_priority, u64 stack_size) { | 186 | void Process::Run(s32 main_thread_priority, u64 stack_size) { |
| 138 | // The kernel always ensures that the given stack size is page aligned. | 187 | AllocateMainThreadStack(stack_size); |
| 139 | main_thread_stack_size = Common::AlignUp(stack_size, Memory::PAGE_SIZE); | 188 | tls_region_address = CreateTLSRegion(); |
| 140 | |||
| 141 | // Allocate and map the main thread stack | ||
| 142 | // TODO(bunnei): This is heap area that should be allocated by the kernel and not mapped as part | ||
| 143 | // of the user address space. | ||
| 144 | const VAddr mapping_address = vm_manager.GetTLSIORegionEndAddress() - main_thread_stack_size; | ||
| 145 | vm_manager | ||
| 146 | .MapMemoryBlock(mapping_address, std::make_shared<std::vector<u8>>(main_thread_stack_size), | ||
| 147 | 0, main_thread_stack_size, MemoryState::Stack) | ||
| 148 | .Unwrap(); | ||
| 149 | 189 | ||
| 150 | vm_manager.LogLayout(); | 190 | vm_manager.LogLayout(); |
| 191 | |||
| 151 | ChangeStatus(ProcessStatus::Running); | 192 | ChangeStatus(ProcessStatus::Running); |
| 152 | 193 | ||
| 153 | SetupMainThread(*this, kernel, main_thread_priority); | 194 | SetupMainThread(*this, kernel, main_thread_priority); |
| @@ -177,69 +218,66 @@ void Process::PrepareForTermination() { | |||
| 177 | stop_threads(system.Scheduler(2).GetThreadList()); | 218 | stop_threads(system.Scheduler(2).GetThreadList()); |
| 178 | stop_threads(system.Scheduler(3).GetThreadList()); | 219 | stop_threads(system.Scheduler(3).GetThreadList()); |
| 179 | 220 | ||
| 221 | FreeTLSRegion(tls_region_address); | ||
| 222 | tls_region_address = 0; | ||
| 223 | |||
| 180 | ChangeStatus(ProcessStatus::Exited); | 224 | ChangeStatus(ProcessStatus::Exited); |
| 181 | } | 225 | } |
| 182 | 226 | ||
| 183 | /** | 227 | /** |
| 184 | * Finds a free location for the TLS section of a thread. | 228 | * Attempts to find a TLS page that contains a free slot for |
| 185 | * @param tls_slots The TLS page array of the thread's owner process. | 229 | * use by a thread. |
| 186 | * Returns a tuple of (page, slot, alloc_needed) where: | 230 | * |
| 187 | * page: The index of the first allocated TLS page that has free slots. | 231 | * @returns If a page with an available slot is found, then an iterator |
| 188 | * slot: The index of the first free slot in the indicated page. | 232 | * pointing to the page is returned. Otherwise the end iterator |
| 189 | * alloc_needed: Whether there's a need to allocate a new TLS page (All pages are full). | 233 | * is returned instead. |
| 190 | */ | 234 | */ |
| 191 | static std::tuple<std::size_t, std::size_t, bool> FindFreeThreadLocalSlot( | 235 | static auto FindTLSPageWithAvailableSlots(std::vector<TLSPage>& tls_pages) { |
| 192 | const std::vector<std::bitset<8>>& tls_slots) { | 236 | return std::find_if(tls_pages.begin(), tls_pages.end(), |
| 193 | // Iterate over all the allocated pages, and try to find one where not all slots are used. | 237 | [](const auto& page) { return page.HasAvailableSlots(); }); |
| 194 | for (std::size_t page = 0; page < tls_slots.size(); ++page) { | ||
| 195 | const auto& page_tls_slots = tls_slots[page]; | ||
| 196 | if (!page_tls_slots.all()) { | ||
| 197 | // We found a page with at least one free slot, find which slot it is | ||
| 198 | for (std::size_t slot = 0; slot < page_tls_slots.size(); ++slot) { | ||
| 199 | if (!page_tls_slots.test(slot)) { | ||
| 200 | return std::make_tuple(page, slot, false); | ||
| 201 | } | ||
| 202 | } | ||
| 203 | } | ||
| 204 | } | ||
| 205 | |||
| 206 | return std::make_tuple(0, 0, true); | ||
| 207 | } | 238 | } |
| 208 | 239 | ||
| 209 | VAddr Process::MarkNextAvailableTLSSlotAsUsed(Thread& thread) { | 240 | VAddr Process::CreateTLSRegion() { |
| 210 | auto [available_page, available_slot, needs_allocation] = FindFreeThreadLocalSlot(tls_slots); | 241 | auto tls_page_iter = FindTLSPageWithAvailableSlots(tls_pages); |
| 211 | const VAddr tls_begin = vm_manager.GetTLSIORegionBaseAddress(); | ||
| 212 | 242 | ||
| 213 | if (needs_allocation) { | 243 | if (tls_page_iter == tls_pages.cend()) { |
| 214 | tls_slots.emplace_back(0); // The page is completely available at the start | 244 | const auto region_address = |
| 215 | available_page = tls_slots.size() - 1; | 245 | vm_manager.FindFreeRegion(vm_manager.GetTLSIORegionBaseAddress(), |
| 216 | available_slot = 0; // Use the first slot in the new page | 246 | vm_manager.GetTLSIORegionEndAddress(), Memory::PAGE_SIZE); |
| 247 | ASSERT(region_address.Succeeded()); | ||
| 217 | 248 | ||
| 218 | // Allocate some memory from the end of the linear heap for this region. | 249 | const auto map_result = vm_manager.MapMemoryBlock( |
| 219 | auto& tls_memory = thread.GetTLSMemory(); | 250 | *region_address, std::make_shared<PhysicalMemory>(Memory::PAGE_SIZE), 0, |
| 220 | tls_memory->insert(tls_memory->end(), Memory::PAGE_SIZE, 0); | 251 | Memory::PAGE_SIZE, MemoryState::ThreadLocal); |
| 252 | ASSERT(map_result.Succeeded()); | ||
| 221 | 253 | ||
| 222 | vm_manager.RefreshMemoryBlockMappings(tls_memory.get()); | 254 | tls_pages.emplace_back(*region_address); |
| 223 | 255 | ||
| 224 | vm_manager.MapMemoryBlock(tls_begin + available_page * Memory::PAGE_SIZE, tls_memory, 0, | 256 | const auto reserve_result = tls_pages.back().ReserveSlot(); |
| 225 | Memory::PAGE_SIZE, MemoryState::ThreadLocal); | 257 | ASSERT(reserve_result.has_value()); |
| 226 | } | ||
| 227 | 258 | ||
| 228 | tls_slots[available_page].set(available_slot); | 259 | return *reserve_result; |
| 260 | } | ||
| 229 | 261 | ||
| 230 | return tls_begin + available_page * Memory::PAGE_SIZE + available_slot * Memory::TLS_ENTRY_SIZE; | 262 | return *tls_page_iter->ReserveSlot(); |
| 231 | } | 263 | } |
| 232 | 264 | ||
| 233 | void Process::FreeTLSSlot(VAddr tls_address) { | 265 | void Process::FreeTLSRegion(VAddr tls_address) { |
| 234 | const VAddr tls_base = tls_address - vm_manager.GetTLSIORegionBaseAddress(); | 266 | const VAddr aligned_address = Common::AlignDown(tls_address, Memory::PAGE_SIZE); |
| 235 | const VAddr tls_page = tls_base / Memory::PAGE_SIZE; | 267 | auto iter = |
| 236 | const VAddr tls_slot = (tls_base % Memory::PAGE_SIZE) / Memory::TLS_ENTRY_SIZE; | 268 | std::find_if(tls_pages.begin(), tls_pages.end(), [aligned_address](const auto& page) { |
| 269 | return page.GetBaseAddress() == aligned_address; | ||
| 270 | }); | ||
| 237 | 271 | ||
| 238 | tls_slots[tls_page].reset(tls_slot); | 272 | // Something has gone very wrong if we're freeing a region |
| 273 | // with no actual page available. | ||
| 274 | ASSERT(iter != tls_pages.cend()); | ||
| 275 | |||
| 276 | iter->ReleaseSlot(tls_address); | ||
| 239 | } | 277 | } |
| 240 | 278 | ||
| 241 | void Process::LoadModule(CodeSet module_, VAddr base_addr) { | 279 | void Process::LoadModule(CodeSet module_, VAddr base_addr) { |
| 242 | const auto memory = std::make_shared<std::vector<u8>>(std::move(module_.memory)); | 280 | const auto memory = std::make_shared<PhysicalMemory>(std::move(module_.memory)); |
| 243 | 281 | ||
| 244 | const auto MapSegment = [&](const CodeSet::Segment& segment, VMAPermission permissions, | 282 | const auto MapSegment = [&](const CodeSet::Segment& segment, VMAPermission permissions, |
| 245 | MemoryState memory_state) { | 283 | MemoryState memory_state) { |
| @@ -282,4 +320,16 @@ void Process::ChangeStatus(ProcessStatus new_status) { | |||
| 282 | WakeupAllWaitingThreads(); | 320 | WakeupAllWaitingThreads(); |
| 283 | } | 321 | } |
| 284 | 322 | ||
| 323 | void Process::AllocateMainThreadStack(u64 stack_size) { | ||
| 324 | // The kernel always ensures that the given stack size is page aligned. | ||
| 325 | main_thread_stack_size = Common::AlignUp(stack_size, Memory::PAGE_SIZE); | ||
| 326 | |||
| 327 | // Allocate and map the main thread stack | ||
| 328 | const VAddr mapping_address = vm_manager.GetTLSIORegionEndAddress() - main_thread_stack_size; | ||
| 329 | vm_manager | ||
| 330 | .MapMemoryBlock(mapping_address, std::make_shared<PhysicalMemory>(main_thread_stack_size), | ||
| 331 | 0, main_thread_stack_size, MemoryState::Stack) | ||
| 332 | .Unwrap(); | ||
| 333 | } | ||
| 334 | |||
| 285 | } // namespace Kernel | 335 | } // namespace Kernel |
diff --git a/src/core/hle/kernel/process.h b/src/core/hle/kernel/process.h index 248fd3840..c2df451f3 100644 --- a/src/core/hle/kernel/process.h +++ b/src/core/hle/kernel/process.h | |||
| @@ -5,7 +5,6 @@ | |||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <array> | 7 | #include <array> |
| 8 | #include <bitset> | ||
| 9 | #include <cstddef> | 8 | #include <cstddef> |
| 10 | #include <list> | 9 | #include <list> |
| 11 | #include <string> | 10 | #include <string> |
| @@ -32,6 +31,7 @@ namespace Kernel { | |||
| 32 | class KernelCore; | 31 | class KernelCore; |
| 33 | class ResourceLimit; | 32 | class ResourceLimit; |
| 34 | class Thread; | 33 | class Thread; |
| 34 | class TLSPage; | ||
| 35 | 35 | ||
| 36 | struct CodeSet; | 36 | struct CodeSet; |
| 37 | 37 | ||
| @@ -135,6 +135,11 @@ public: | |||
| 135 | return mutex; | 135 | return mutex; |
| 136 | } | 136 | } |
| 137 | 137 | ||
| 138 | /// Gets the address to the process' dedicated TLS region. | ||
| 139 | VAddr GetTLSRegionAddress() const { | ||
| 140 | return tls_region_address; | ||
| 141 | } | ||
| 142 | |||
| 138 | /// Gets the current status of the process | 143 | /// Gets the current status of the process |
| 139 | ProcessStatus GetStatus() const { | 144 | ProcessStatus GetStatus() const { |
| 140 | return status; | 145 | return status; |
| @@ -168,8 +173,24 @@ public: | |||
| 168 | return capabilities.GetPriorityMask(); | 173 | return capabilities.GetPriorityMask(); |
| 169 | } | 174 | } |
| 170 | 175 | ||
| 171 | u32 IsVirtualMemoryEnabled() const { | 176 | /// Gets the amount of secure memory to allocate for memory management. |
| 172 | return is_virtual_address_memory_enabled; | 177 | u32 GetSystemResourceSize() const { |
| 178 | return system_resource_size; | ||
| 179 | } | ||
| 180 | |||
| 181 | /// Gets the amount of secure memory currently in use for memory management. | ||
| 182 | u32 GetSystemResourceUsage() const { | ||
| 183 | // On hardware, this returns the amount of system resource memory that has | ||
| 184 | // been used by the kernel. This is problematic for Yuzu to emulate, because | ||
| 185 | // system resource memory is used for page tables -- and yuzu doesn't really | ||
| 186 | // have a way to calculate how much memory is required for page tables for | ||
| 187 | // the current process at any given time. | ||
| 188 | // TODO: Is this even worth implementing? Games may retrieve this value via | ||
| 189 | // an SDK function that gets used + available system resource size for debug | ||
| 190 | // or diagnostic purposes. However, it seems unlikely that a game would make | ||
| 191 | // decisions based on how much system memory is dedicated to its page tables. | ||
| 192 | // Is returning a value other than zero wise? | ||
| 193 | return 0; | ||
| 173 | } | 194 | } |
| 174 | 195 | ||
| 175 | /// Whether this process is an AArch64 or AArch32 process. | 196 | /// Whether this process is an AArch64 or AArch32 process. |
| @@ -196,15 +217,15 @@ public: | |||
| 196 | u64 GetTotalPhysicalMemoryAvailable() const; | 217 | u64 GetTotalPhysicalMemoryAvailable() const; |
| 197 | 218 | ||
| 198 | /// Retrieves the total physical memory available to this process in bytes, | 219 | /// Retrieves the total physical memory available to this process in bytes, |
| 199 | /// without the size of the personal heap added to it. | 220 | /// without the size of the personal system resource heap added to it. |
| 200 | u64 GetTotalPhysicalMemoryAvailableWithoutMmHeap() const; | 221 | u64 GetTotalPhysicalMemoryAvailableWithoutSystemResource() const; |
| 201 | 222 | ||
| 202 | /// Retrieves the total physical memory used by this process in bytes. | 223 | /// Retrieves the total physical memory used by this process in bytes. |
| 203 | u64 GetTotalPhysicalMemoryUsed() const; | 224 | u64 GetTotalPhysicalMemoryUsed() const; |
| 204 | 225 | ||
| 205 | /// Retrieves the total physical memory used by this process in bytes, | 226 | /// Retrieves the total physical memory used by this process in bytes, |
| 206 | /// without the size of the personal heap added to it. | 227 | /// without the size of the personal system resource heap added to it. |
| 207 | u64 GetTotalPhysicalMemoryUsedWithoutMmHeap() const; | 228 | u64 GetTotalPhysicalMemoryUsedWithoutSystemResource() const; |
| 208 | 229 | ||
| 209 | /// Gets the list of all threads created with this process as their owner. | 230 | /// Gets the list of all threads created with this process as their owner. |
| 210 | const std::list<const Thread*>& GetThreadList() const { | 231 | const std::list<const Thread*>& GetThreadList() const { |
| @@ -260,10 +281,10 @@ public: | |||
| 260 | // Thread-local storage management | 281 | // Thread-local storage management |
| 261 | 282 | ||
| 262 | // Marks the next available region as used and returns the address of the slot. | 283 | // Marks the next available region as used and returns the address of the slot. |
| 263 | VAddr MarkNextAvailableTLSSlotAsUsed(Thread& thread); | 284 | [[nodiscard]] VAddr CreateTLSRegion(); |
| 264 | 285 | ||
| 265 | // Frees a used TLS slot identified by the given address | 286 | // Frees a used TLS slot identified by the given address |
| 266 | void FreeTLSSlot(VAddr tls_address); | 287 | void FreeTLSRegion(VAddr tls_address); |
| 267 | 288 | ||
| 268 | private: | 289 | private: |
| 269 | explicit Process(Core::System& system); | 290 | explicit Process(Core::System& system); |
| @@ -280,6 +301,9 @@ private: | |||
| 280 | /// a process signal. | 301 | /// a process signal. |
| 281 | void ChangeStatus(ProcessStatus new_status); | 302 | void ChangeStatus(ProcessStatus new_status); |
| 282 | 303 | ||
| 304 | /// Allocates the main thread stack for the process, given the stack size in bytes. | ||
| 305 | void AllocateMainThreadStack(u64 stack_size); | ||
| 306 | |||
| 283 | /// Memory manager for this process. | 307 | /// Memory manager for this process. |
| 284 | Kernel::VMManager vm_manager; | 308 | Kernel::VMManager vm_manager; |
| 285 | 309 | ||
| @@ -290,7 +314,7 @@ private: | |||
| 290 | u64 code_memory_size = 0; | 314 | u64 code_memory_size = 0; |
| 291 | 315 | ||
| 292 | /// Current status of the process | 316 | /// Current status of the process |
| 293 | ProcessStatus status; | 317 | ProcessStatus status{}; |
| 294 | 318 | ||
| 295 | /// The ID of this process | 319 | /// The ID of this process |
| 296 | u64 process_id = 0; | 320 | u64 process_id = 0; |
| @@ -298,19 +322,23 @@ private: | |||
| 298 | /// Title ID corresponding to the process | 322 | /// Title ID corresponding to the process |
| 299 | u64 program_id = 0; | 323 | u64 program_id = 0; |
| 300 | 324 | ||
| 325 | /// Specifies additional memory to be reserved for the process's memory management by the | ||
| 326 | /// system. When this is non-zero, secure memory is allocated and used for page table allocation | ||
| 327 | /// instead of using the normal global page tables/memory block management. | ||
| 328 | u32 system_resource_size = 0; | ||
| 329 | |||
| 301 | /// Resource limit descriptor for this process | 330 | /// Resource limit descriptor for this process |
| 302 | SharedPtr<ResourceLimit> resource_limit; | 331 | SharedPtr<ResourceLimit> resource_limit; |
| 303 | 332 | ||
| 304 | /// The ideal CPU core for this process, threads are scheduled on this core by default. | 333 | /// The ideal CPU core for this process, threads are scheduled on this core by default. |
| 305 | u8 ideal_core = 0; | 334 | u8 ideal_core = 0; |
| 306 | u32 is_virtual_address_memory_enabled = 0; | ||
| 307 | 335 | ||
| 308 | /// The Thread Local Storage area is allocated as processes create threads, | 336 | /// The Thread Local Storage area is allocated as processes create threads, |
| 309 | /// each TLS area is 0x200 bytes, so one page (0x1000) is split up in 8 parts, and each part | 337 | /// each TLS area is 0x200 bytes, so one page (0x1000) is split up in 8 parts, and each part |
| 310 | /// holds the TLS for a specific thread. This vector contains which parts are in use for each | 338 | /// holds the TLS for a specific thread. This vector contains which parts are in use for each |
| 311 | /// page as a bitmask. | 339 | /// page as a bitmask. |
| 312 | /// This vector will grow as more pages are allocated for new threads. | 340 | /// This vector will grow as more pages are allocated for new threads. |
| 313 | std::vector<std::bitset<8>> tls_slots; | 341 | std::vector<TLSPage> tls_pages; |
| 314 | 342 | ||
| 315 | /// Contains the parsed process capability descriptors. | 343 | /// Contains the parsed process capability descriptors. |
| 316 | ProcessCapabilities capabilities; | 344 | ProcessCapabilities capabilities; |
| @@ -338,8 +366,11 @@ private: | |||
| 338 | /// variable related facilities. | 366 | /// variable related facilities. |
| 339 | Mutex mutex; | 367 | Mutex mutex; |
| 340 | 368 | ||
| 369 | /// Address indicating the location of the process' dedicated TLS region. | ||
| 370 | VAddr tls_region_address = 0; | ||
| 371 | |||
| 341 | /// Random values for svcGetInfo RandomEntropy | 372 | /// Random values for svcGetInfo RandomEntropy |
| 342 | std::array<u64, RANDOM_ENTROPY_SIZE> random_entropy; | 373 | std::array<u64, RANDOM_ENTROPY_SIZE> random_entropy{}; |
| 343 | 374 | ||
| 344 | /// List of threads that are running with this process as their owner. | 375 | /// List of threads that are running with this process as their owner. |
| 345 | std::list<const Thread*> thread_list; | 376 | std::list<const Thread*> thread_list; |
diff --git a/src/core/hle/kernel/shared_memory.cpp b/src/core/hle/kernel/shared_memory.cpp index f15c5ee36..a815c4eea 100644 --- a/src/core/hle/kernel/shared_memory.cpp +++ b/src/core/hle/kernel/shared_memory.cpp | |||
| @@ -28,7 +28,7 @@ SharedPtr<SharedMemory> SharedMemory::Create(KernelCore& kernel, Process* owner_ | |||
| 28 | shared_memory->other_permissions = other_permissions; | 28 | shared_memory->other_permissions = other_permissions; |
| 29 | 29 | ||
| 30 | if (address == 0) { | 30 | if (address == 0) { |
| 31 | shared_memory->backing_block = std::make_shared<std::vector<u8>>(size); | 31 | shared_memory->backing_block = std::make_shared<Kernel::PhysicalMemory>(size); |
| 32 | shared_memory->backing_block_offset = 0; | 32 | shared_memory->backing_block_offset = 0; |
| 33 | 33 | ||
| 34 | // Refresh the address mappings for the current process. | 34 | // Refresh the address mappings for the current process. |
| @@ -59,8 +59,8 @@ SharedPtr<SharedMemory> SharedMemory::Create(KernelCore& kernel, Process* owner_ | |||
| 59 | } | 59 | } |
| 60 | 60 | ||
| 61 | SharedPtr<SharedMemory> SharedMemory::CreateForApplet( | 61 | SharedPtr<SharedMemory> SharedMemory::CreateForApplet( |
| 62 | KernelCore& kernel, std::shared_ptr<std::vector<u8>> heap_block, std::size_t offset, u64 size, | 62 | KernelCore& kernel, std::shared_ptr<Kernel::PhysicalMemory> heap_block, std::size_t offset, |
| 63 | MemoryPermission permissions, MemoryPermission other_permissions, std::string name) { | 63 | u64 size, MemoryPermission permissions, MemoryPermission other_permissions, std::string name) { |
| 64 | SharedPtr<SharedMemory> shared_memory(new SharedMemory(kernel)); | 64 | SharedPtr<SharedMemory> shared_memory(new SharedMemory(kernel)); |
| 65 | 65 | ||
| 66 | shared_memory->owner_process = nullptr; | 66 | shared_memory->owner_process = nullptr; |
diff --git a/src/core/hle/kernel/shared_memory.h b/src/core/hle/kernel/shared_memory.h index c2b6155e1..01ca6dcd2 100644 --- a/src/core/hle/kernel/shared_memory.h +++ b/src/core/hle/kernel/shared_memory.h | |||
| @@ -10,6 +10,7 @@ | |||
| 10 | 10 | ||
| 11 | #include "common/common_types.h" | 11 | #include "common/common_types.h" |
| 12 | #include "core/hle/kernel/object.h" | 12 | #include "core/hle/kernel/object.h" |
| 13 | #include "core/hle/kernel/physical_memory.h" | ||
| 13 | #include "core/hle/kernel/process.h" | 14 | #include "core/hle/kernel/process.h" |
| 14 | #include "core/hle/result.h" | 15 | #include "core/hle/result.h" |
| 15 | 16 | ||
| @@ -62,12 +63,10 @@ public: | |||
| 62 | * block. | 63 | * block. |
| 63 | * @param name Optional object name, used for debugging purposes. | 64 | * @param name Optional object name, used for debugging purposes. |
| 64 | */ | 65 | */ |
| 65 | static SharedPtr<SharedMemory> CreateForApplet(KernelCore& kernel, | 66 | static SharedPtr<SharedMemory> CreateForApplet( |
| 66 | std::shared_ptr<std::vector<u8>> heap_block, | 67 | KernelCore& kernel, std::shared_ptr<Kernel::PhysicalMemory> heap_block, std::size_t offset, |
| 67 | std::size_t offset, u64 size, | 68 | u64 size, MemoryPermission permissions, MemoryPermission other_permissions, |
| 68 | MemoryPermission permissions, | 69 | std::string name = "Unknown Applet"); |
| 69 | MemoryPermission other_permissions, | ||
| 70 | std::string name = "Unknown Applet"); | ||
| 71 | 70 | ||
| 72 | std::string GetTypeName() const override { | 71 | std::string GetTypeName() const override { |
| 73 | return "SharedMemory"; | 72 | return "SharedMemory"; |
| @@ -135,7 +134,7 @@ private: | |||
| 135 | ~SharedMemory() override; | 134 | ~SharedMemory() override; |
| 136 | 135 | ||
| 137 | /// Backing memory for this shared memory block. | 136 | /// Backing memory for this shared memory block. |
| 138 | std::shared_ptr<std::vector<u8>> backing_block; | 137 | std::shared_ptr<PhysicalMemory> backing_block; |
| 139 | /// Offset into the backing block for this shared memory. | 138 | /// Offset into the backing block for this shared memory. |
| 140 | std::size_t backing_block_offset = 0; | 139 | std::size_t backing_block_offset = 0; |
| 141 | /// Size of the memory block. Page-aligned. | 140 | /// Size of the memory block. Page-aligned. |
diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp index de6363ff2..1fd1a732a 100644 --- a/src/core/hle/kernel/svc.cpp +++ b/src/core/hle/kernel/svc.cpp | |||
| @@ -98,9 +98,9 @@ ResultCode MapUnmapMemorySanityChecks(const VMManager& vm_manager, VAddr dst_add | |||
| 98 | return ERR_INVALID_ADDRESS_STATE; | 98 | return ERR_INVALID_ADDRESS_STATE; |
| 99 | } | 99 | } |
| 100 | 100 | ||
| 101 | if (!vm_manager.IsWithinNewMapRegion(dst_addr, size)) { | 101 | if (!vm_manager.IsWithinStackRegion(dst_addr, size)) { |
| 102 | LOG_ERROR(Kernel_SVC, | 102 | LOG_ERROR(Kernel_SVC, |
| 103 | "Destination is not within the new map region, addr=0x{:016X}, size=0x{:016X}", | 103 | "Destination is not within the stack region, addr=0x{:016X}, size=0x{:016X}", |
| 104 | dst_addr, size); | 104 | dst_addr, size); |
| 105 | return ERR_INVALID_MEMORY_RANGE; | 105 | return ERR_INVALID_MEMORY_RANGE; |
| 106 | } | 106 | } |
| @@ -318,7 +318,14 @@ static ResultCode UnmapMemory(Core::System& system, VAddr dst_addr, VAddr src_ad | |||
| 318 | return result; | 318 | return result; |
| 319 | } | 319 | } |
| 320 | 320 | ||
| 321 | return vm_manager.UnmapRange(dst_addr, size); | 321 | const auto unmap_res = vm_manager.UnmapRange(dst_addr, size); |
| 322 | |||
| 323 | // Reprotect the source mapping on success | ||
| 324 | if (unmap_res.IsSuccess()) { | ||
| 325 | ASSERT(vm_manager.ReprotectRange(src_addr, size, VMAPermission::ReadWrite).IsSuccess()); | ||
| 326 | } | ||
| 327 | |||
| 328 | return unmap_res; | ||
| 322 | } | 329 | } |
| 323 | 330 | ||
| 324 | /// Connect to an OS service given the port name, returns the handle to the port to out | 331 | /// Connect to an OS service given the port name, returns the handle to the port to out |
| @@ -726,19 +733,19 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha | |||
| 726 | // 2.0.0+ | 733 | // 2.0.0+ |
| 727 | ASLRRegionBaseAddr = 12, | 734 | ASLRRegionBaseAddr = 12, |
| 728 | ASLRRegionSize = 13, | 735 | ASLRRegionSize = 13, |
| 729 | NewMapRegionBaseAddr = 14, | 736 | StackRegionBaseAddr = 14, |
| 730 | NewMapRegionSize = 15, | 737 | StackRegionSize = 15, |
| 731 | // 3.0.0+ | 738 | // 3.0.0+ |
| 732 | IsVirtualAddressMemoryEnabled = 16, | 739 | SystemResourceSize = 16, |
| 733 | PersonalMmHeapUsage = 17, | 740 | SystemResourceUsage = 17, |
| 734 | TitleId = 18, | 741 | TitleId = 18, |
| 735 | // 4.0.0+ | 742 | // 4.0.0+ |
| 736 | PrivilegedProcessId = 19, | 743 | PrivilegedProcessId = 19, |
| 737 | // 5.0.0+ | 744 | // 5.0.0+ |
| 738 | UserExceptionContextAddr = 20, | 745 | UserExceptionContextAddr = 20, |
| 739 | // 6.0.0+ | 746 | // 6.0.0+ |
| 740 | TotalPhysicalMemoryAvailableWithoutMmHeap = 21, | 747 | TotalPhysicalMemoryAvailableWithoutSystemResource = 21, |
| 741 | TotalPhysicalMemoryUsedWithoutMmHeap = 22, | 748 | TotalPhysicalMemoryUsedWithoutSystemResource = 22, |
| 742 | }; | 749 | }; |
| 743 | 750 | ||
| 744 | const auto info_id_type = static_cast<GetInfoType>(info_id); | 751 | const auto info_id_type = static_cast<GetInfoType>(info_id); |
| @@ -752,16 +759,16 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha | |||
| 752 | case GetInfoType::HeapRegionSize: | 759 | case GetInfoType::HeapRegionSize: |
| 753 | case GetInfoType::ASLRRegionBaseAddr: | 760 | case GetInfoType::ASLRRegionBaseAddr: |
| 754 | case GetInfoType::ASLRRegionSize: | 761 | case GetInfoType::ASLRRegionSize: |
| 755 | case GetInfoType::NewMapRegionBaseAddr: | 762 | case GetInfoType::StackRegionBaseAddr: |
| 756 | case GetInfoType::NewMapRegionSize: | 763 | case GetInfoType::StackRegionSize: |
| 757 | case GetInfoType::TotalPhysicalMemoryAvailable: | 764 | case GetInfoType::TotalPhysicalMemoryAvailable: |
| 758 | case GetInfoType::TotalPhysicalMemoryUsed: | 765 | case GetInfoType::TotalPhysicalMemoryUsed: |
| 759 | case GetInfoType::IsVirtualAddressMemoryEnabled: | 766 | case GetInfoType::SystemResourceSize: |
| 760 | case GetInfoType::PersonalMmHeapUsage: | 767 | case GetInfoType::SystemResourceUsage: |
| 761 | case GetInfoType::TitleId: | 768 | case GetInfoType::TitleId: |
| 762 | case GetInfoType::UserExceptionContextAddr: | 769 | case GetInfoType::UserExceptionContextAddr: |
| 763 | case GetInfoType::TotalPhysicalMemoryAvailableWithoutMmHeap: | 770 | case GetInfoType::TotalPhysicalMemoryAvailableWithoutSystemResource: |
| 764 | case GetInfoType::TotalPhysicalMemoryUsedWithoutMmHeap: { | 771 | case GetInfoType::TotalPhysicalMemoryUsedWithoutSystemResource: { |
| 765 | if (info_sub_id != 0) { | 772 | if (info_sub_id != 0) { |
| 766 | return ERR_INVALID_ENUM_VALUE; | 773 | return ERR_INVALID_ENUM_VALUE; |
| 767 | } | 774 | } |
| @@ -806,12 +813,12 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha | |||
| 806 | *result = process->VMManager().GetASLRRegionSize(); | 813 | *result = process->VMManager().GetASLRRegionSize(); |
| 807 | return RESULT_SUCCESS; | 814 | return RESULT_SUCCESS; |
| 808 | 815 | ||
| 809 | case GetInfoType::NewMapRegionBaseAddr: | 816 | case GetInfoType::StackRegionBaseAddr: |
| 810 | *result = process->VMManager().GetNewMapRegionBaseAddress(); | 817 | *result = process->VMManager().GetStackRegionBaseAddress(); |
| 811 | return RESULT_SUCCESS; | 818 | return RESULT_SUCCESS; |
| 812 | 819 | ||
| 813 | case GetInfoType::NewMapRegionSize: | 820 | case GetInfoType::StackRegionSize: |
| 814 | *result = process->VMManager().GetNewMapRegionSize(); | 821 | *result = process->VMManager().GetStackRegionSize(); |
| 815 | return RESULT_SUCCESS; | 822 | return RESULT_SUCCESS; |
| 816 | 823 | ||
| 817 | case GetInfoType::TotalPhysicalMemoryAvailable: | 824 | case GetInfoType::TotalPhysicalMemoryAvailable: |
| @@ -822,8 +829,13 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha | |||
| 822 | *result = process->GetTotalPhysicalMemoryUsed(); | 829 | *result = process->GetTotalPhysicalMemoryUsed(); |
| 823 | return RESULT_SUCCESS; | 830 | return RESULT_SUCCESS; |
| 824 | 831 | ||
| 825 | case GetInfoType::IsVirtualAddressMemoryEnabled: | 832 | case GetInfoType::SystemResourceSize: |
| 826 | *result = process->IsVirtualMemoryEnabled(); | 833 | *result = process->GetSystemResourceSize(); |
| 834 | return RESULT_SUCCESS; | ||
| 835 | |||
| 836 | case GetInfoType::SystemResourceUsage: | ||
| 837 | LOG_WARNING(Kernel_SVC, "(STUBBED) Attempted to query system resource usage"); | ||
| 838 | *result = process->GetSystemResourceUsage(); | ||
| 827 | return RESULT_SUCCESS; | 839 | return RESULT_SUCCESS; |
| 828 | 840 | ||
| 829 | case GetInfoType::TitleId: | 841 | case GetInfoType::TitleId: |
| @@ -831,17 +843,15 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha | |||
| 831 | return RESULT_SUCCESS; | 843 | return RESULT_SUCCESS; |
| 832 | 844 | ||
| 833 | case GetInfoType::UserExceptionContextAddr: | 845 | case GetInfoType::UserExceptionContextAddr: |
| 834 | LOG_WARNING(Kernel_SVC, | 846 | *result = process->GetTLSRegionAddress(); |
| 835 | "(STUBBED) Attempted to query user exception context address, returned 0"); | ||
| 836 | *result = 0; | ||
| 837 | return RESULT_SUCCESS; | 847 | return RESULT_SUCCESS; |
| 838 | 848 | ||
| 839 | case GetInfoType::TotalPhysicalMemoryAvailableWithoutMmHeap: | 849 | case GetInfoType::TotalPhysicalMemoryAvailableWithoutSystemResource: |
| 840 | *result = process->GetTotalPhysicalMemoryAvailable(); | 850 | *result = process->GetTotalPhysicalMemoryAvailableWithoutSystemResource(); |
| 841 | return RESULT_SUCCESS; | 851 | return RESULT_SUCCESS; |
| 842 | 852 | ||
| 843 | case GetInfoType::TotalPhysicalMemoryUsedWithoutMmHeap: | 853 | case GetInfoType::TotalPhysicalMemoryUsedWithoutSystemResource: |
| 844 | *result = process->GetTotalPhysicalMemoryUsedWithoutMmHeap(); | 854 | *result = process->GetTotalPhysicalMemoryUsedWithoutSystemResource(); |
| 845 | return RESULT_SUCCESS; | 855 | return RESULT_SUCCESS; |
| 846 | 856 | ||
| 847 | default: | 857 | default: |
| @@ -946,6 +956,86 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha | |||
| 946 | } | 956 | } |
| 947 | } | 957 | } |
| 948 | 958 | ||
| 959 | /// Maps memory at a desired address | ||
| 960 | static ResultCode MapPhysicalMemory(Core::System& system, VAddr addr, u64 size) { | ||
| 961 | LOG_DEBUG(Kernel_SVC, "called, addr=0x{:016X}, size=0x{:X}", addr, size); | ||
| 962 | |||
| 963 | if (!Common::Is4KBAligned(addr)) { | ||
| 964 | LOG_ERROR(Kernel_SVC, "Address is not aligned to 4KB, 0x{:016X}", addr); | ||
| 965 | return ERR_INVALID_ADDRESS; | ||
| 966 | } | ||
| 967 | |||
| 968 | if (!Common::Is4KBAligned(size)) { | ||
| 969 | LOG_ERROR(Kernel_SVC, "Size is not aligned to 4KB, 0x{:X}", size); | ||
| 970 | return ERR_INVALID_SIZE; | ||
| 971 | } | ||
| 972 | |||
| 973 | if (size == 0) { | ||
| 974 | LOG_ERROR(Kernel_SVC, "Size is zero"); | ||
| 975 | return ERR_INVALID_SIZE; | ||
| 976 | } | ||
| 977 | |||
| 978 | if (!(addr < addr + size)) { | ||
| 979 | LOG_ERROR(Kernel_SVC, "Size causes 64-bit overflow of address"); | ||
| 980 | return ERR_INVALID_MEMORY_RANGE; | ||
| 981 | } | ||
| 982 | |||
| 983 | Process* const current_process = system.Kernel().CurrentProcess(); | ||
| 984 | auto& vm_manager = current_process->VMManager(); | ||
| 985 | |||
| 986 | if (current_process->GetSystemResourceSize() == 0) { | ||
| 987 | LOG_ERROR(Kernel_SVC, "System Resource Size is zero"); | ||
| 988 | return ERR_INVALID_STATE; | ||
| 989 | } | ||
| 990 | |||
| 991 | if (!vm_manager.IsWithinMapRegion(addr, size)) { | ||
| 992 | LOG_ERROR(Kernel_SVC, "Range not within map region"); | ||
| 993 | return ERR_INVALID_MEMORY_RANGE; | ||
| 994 | } | ||
| 995 | |||
| 996 | return vm_manager.MapPhysicalMemory(addr, size); | ||
| 997 | } | ||
| 998 | |||
| 999 | /// Unmaps memory previously mapped via MapPhysicalMemory | ||
| 1000 | static ResultCode UnmapPhysicalMemory(Core::System& system, VAddr addr, u64 size) { | ||
| 1001 | LOG_DEBUG(Kernel_SVC, "called, addr=0x{:016X}, size=0x{:X}", addr, size); | ||
| 1002 | |||
| 1003 | if (!Common::Is4KBAligned(addr)) { | ||
| 1004 | LOG_ERROR(Kernel_SVC, "Address is not aligned to 4KB, 0x{:016X}", addr); | ||
| 1005 | return ERR_INVALID_ADDRESS; | ||
| 1006 | } | ||
| 1007 | |||
| 1008 | if (!Common::Is4KBAligned(size)) { | ||
| 1009 | LOG_ERROR(Kernel_SVC, "Size is not aligned to 4KB, 0x{:X}", size); | ||
| 1010 | return ERR_INVALID_SIZE; | ||
| 1011 | } | ||
| 1012 | |||
| 1013 | if (size == 0) { | ||
| 1014 | LOG_ERROR(Kernel_SVC, "Size is zero"); | ||
| 1015 | return ERR_INVALID_SIZE; | ||
| 1016 | } | ||
| 1017 | |||
| 1018 | if (!(addr < addr + size)) { | ||
| 1019 | LOG_ERROR(Kernel_SVC, "Size causes 64-bit overflow of address"); | ||
| 1020 | return ERR_INVALID_MEMORY_RANGE; | ||
| 1021 | } | ||
| 1022 | |||
| 1023 | Process* const current_process = system.Kernel().CurrentProcess(); | ||
| 1024 | auto& vm_manager = current_process->VMManager(); | ||
| 1025 | |||
| 1026 | if (current_process->GetSystemResourceSize() == 0) { | ||
| 1027 | LOG_ERROR(Kernel_SVC, "System Resource Size is zero"); | ||
| 1028 | return ERR_INVALID_STATE; | ||
| 1029 | } | ||
| 1030 | |||
| 1031 | if (!vm_manager.IsWithinMapRegion(addr, size)) { | ||
| 1032 | LOG_ERROR(Kernel_SVC, "Range not within map region"); | ||
| 1033 | return ERR_INVALID_MEMORY_RANGE; | ||
| 1034 | } | ||
| 1035 | |||
| 1036 | return vm_manager.UnmapPhysicalMemory(addr, size); | ||
| 1037 | } | ||
| 1038 | |||
| 949 | /// Sets the thread activity | 1039 | /// Sets the thread activity |
| 950 | static ResultCode SetThreadActivity(Core::System& system, Handle handle, u32 activity) { | 1040 | static ResultCode SetThreadActivity(Core::System& system, Handle handle, u32 activity) { |
| 951 | LOG_DEBUG(Kernel_SVC, "called, handle=0x{:08X}, activity=0x{:08X}", handle, activity); | 1041 | LOG_DEBUG(Kernel_SVC, "called, handle=0x{:08X}, activity=0x{:08X}", handle, activity); |
| @@ -1647,8 +1737,8 @@ static ResultCode SignalProcessWideKey(Core::System& system, VAddr condition_var | |||
| 1647 | // Wait for an address (via Address Arbiter) | 1737 | // Wait for an address (via Address Arbiter) |
| 1648 | static ResultCode WaitForAddress(Core::System& system, VAddr address, u32 type, s32 value, | 1738 | static ResultCode WaitForAddress(Core::System& system, VAddr address, u32 type, s32 value, |
| 1649 | s64 timeout) { | 1739 | s64 timeout) { |
| 1650 | LOG_WARNING(Kernel_SVC, "called, address=0x{:X}, type=0x{:X}, value=0x{:X}, timeout={}", | 1740 | LOG_TRACE(Kernel_SVC, "called, address=0x{:X}, type=0x{:X}, value=0x{:X}, timeout={}", address, |
| 1651 | address, type, value, timeout); | 1741 | type, value, timeout); |
| 1652 | 1742 | ||
| 1653 | // If the passed address is a kernel virtual address, return invalid memory state. | 1743 | // If the passed address is a kernel virtual address, return invalid memory state. |
| 1654 | if (Memory::IsKernelVirtualAddress(address)) { | 1744 | if (Memory::IsKernelVirtualAddress(address)) { |
| @@ -1670,8 +1760,8 @@ static ResultCode WaitForAddress(Core::System& system, VAddr address, u32 type, | |||
| 1670 | // Signals to an address (via Address Arbiter) | 1760 | // Signals to an address (via Address Arbiter) |
| 1671 | static ResultCode SignalToAddress(Core::System& system, VAddr address, u32 type, s32 value, | 1761 | static ResultCode SignalToAddress(Core::System& system, VAddr address, u32 type, s32 value, |
| 1672 | s32 num_to_wake) { | 1762 | s32 num_to_wake) { |
| 1673 | LOG_WARNING(Kernel_SVC, "called, address=0x{:X}, type=0x{:X}, value=0x{:X}, num_to_wake=0x{:X}", | 1763 | LOG_TRACE(Kernel_SVC, "called, address=0x{:X}, type=0x{:X}, value=0x{:X}, num_to_wake=0x{:X}", |
| 1674 | address, type, value, num_to_wake); | 1764 | address, type, value, num_to_wake); |
| 1675 | 1765 | ||
| 1676 | // If the passed address is a kernel virtual address, return invalid memory state. | 1766 | // If the passed address is a kernel virtual address, return invalid memory state. |
| 1677 | if (Memory::IsKernelVirtualAddress(address)) { | 1767 | if (Memory::IsKernelVirtualAddress(address)) { |
| @@ -2303,8 +2393,8 @@ static const FunctionDef SVC_Table[] = { | |||
| 2303 | {0x29, SvcWrap<GetInfo>, "GetInfo"}, | 2393 | {0x29, SvcWrap<GetInfo>, "GetInfo"}, |
| 2304 | {0x2A, nullptr, "FlushEntireDataCache"}, | 2394 | {0x2A, nullptr, "FlushEntireDataCache"}, |
| 2305 | {0x2B, nullptr, "FlushDataCache"}, | 2395 | {0x2B, nullptr, "FlushDataCache"}, |
| 2306 | {0x2C, nullptr, "MapPhysicalMemory"}, | 2396 | {0x2C, SvcWrap<MapPhysicalMemory>, "MapPhysicalMemory"}, |
| 2307 | {0x2D, nullptr, "UnmapPhysicalMemory"}, | 2397 | {0x2D, SvcWrap<UnmapPhysicalMemory>, "UnmapPhysicalMemory"}, |
| 2308 | {0x2E, nullptr, "GetFutureThreadInfo"}, | 2398 | {0x2E, nullptr, "GetFutureThreadInfo"}, |
| 2309 | {0x2F, nullptr, "GetLastThreadInfo"}, | 2399 | {0x2F, nullptr, "GetLastThreadInfo"}, |
| 2310 | {0x30, SvcWrap<GetResourceLimitLimitValue>, "GetResourceLimitLimitValue"}, | 2400 | {0x30, SvcWrap<GetResourceLimitLimitValue>, "GetResourceLimitLimitValue"}, |
diff --git a/src/core/hle/kernel/svc_wrap.h b/src/core/hle/kernel/svc_wrap.h index 865473c6f..c2d8d0dc3 100644 --- a/src/core/hle/kernel/svc_wrap.h +++ b/src/core/hle/kernel/svc_wrap.h | |||
| @@ -32,6 +32,11 @@ void SvcWrap(Core::System& system) { | |||
| 32 | FuncReturn(system, func(system, Param(system, 0)).raw); | 32 | FuncReturn(system, func(system, Param(system, 0)).raw); |
| 33 | } | 33 | } |
| 34 | 34 | ||
| 35 | template <ResultCode func(Core::System&, u64, u64)> | ||
| 36 | void SvcWrap(Core::System& system) { | ||
| 37 | FuncReturn(system, func(system, Param(system, 0), Param(system, 1)).raw); | ||
| 38 | } | ||
| 39 | |||
| 35 | template <ResultCode func(Core::System&, u32)> | 40 | template <ResultCode func(Core::System&, u32)> |
| 36 | void SvcWrap(Core::System& system) { | 41 | void SvcWrap(Core::System& system) { |
| 37 | FuncReturn(system, func(system, static_cast<u32>(Param(system, 0))).raw); | 42 | FuncReturn(system, func(system, static_cast<u32>(Param(system, 0))).raw); |
diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp index a055a5002..ec529e7f2 100644 --- a/src/core/hle/kernel/thread.cpp +++ b/src/core/hle/kernel/thread.cpp | |||
| @@ -65,7 +65,7 @@ void Thread::Stop() { | |||
| 65 | owner_process->UnregisterThread(this); | 65 | owner_process->UnregisterThread(this); |
| 66 | 66 | ||
| 67 | // Mark the TLS slot in the thread's page as free. | 67 | // Mark the TLS slot in the thread's page as free. |
| 68 | owner_process->FreeTLSSlot(tls_address); | 68 | owner_process->FreeTLSRegion(tls_address); |
| 69 | } | 69 | } |
| 70 | 70 | ||
| 71 | void Thread::WakeAfterDelay(s64 nanoseconds) { | 71 | void Thread::WakeAfterDelay(s64 nanoseconds) { |
| @@ -205,9 +205,9 @@ ResultVal<SharedPtr<Thread>> Thread::Create(KernelCore& kernel, std::string name | |||
| 205 | thread->name = std::move(name); | 205 | thread->name = std::move(name); |
| 206 | thread->callback_handle = kernel.ThreadWakeupCallbackHandleTable().Create(thread).Unwrap(); | 206 | thread->callback_handle = kernel.ThreadWakeupCallbackHandleTable().Create(thread).Unwrap(); |
| 207 | thread->owner_process = &owner_process; | 207 | thread->owner_process = &owner_process; |
| 208 | thread->tls_address = thread->owner_process->CreateTLSRegion(); | ||
| 208 | thread->scheduler = &system.Scheduler(processor_id); | 209 | thread->scheduler = &system.Scheduler(processor_id); |
| 209 | thread->scheduler->AddThread(thread); | 210 | thread->scheduler->AddThread(thread); |
| 210 | thread->tls_address = thread->owner_process->MarkNextAvailableTLSSlotAsUsed(*thread); | ||
| 211 | 211 | ||
| 212 | thread->owner_process->RegisterThread(thread.get()); | 212 | thread->owner_process->RegisterThread(thread.get()); |
| 213 | 213 | ||
diff --git a/src/core/hle/kernel/thread.h b/src/core/hle/kernel/thread.h index b4b9cda7c..07e989637 100644 --- a/src/core/hle/kernel/thread.h +++ b/src/core/hle/kernel/thread.h | |||
| @@ -5,7 +5,6 @@ | |||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <functional> | 7 | #include <functional> |
| 8 | #include <memory> | ||
| 9 | #include <string> | 8 | #include <string> |
| 10 | #include <vector> | 9 | #include <vector> |
| 11 | 10 | ||
| @@ -78,9 +77,6 @@ enum class ThreadActivity : u32 { | |||
| 78 | 77 | ||
| 79 | class Thread final : public WaitObject { | 78 | class Thread final : public WaitObject { |
| 80 | public: | 79 | public: |
| 81 | using TLSMemory = std::vector<u8>; | ||
| 82 | using TLSMemoryPtr = std::shared_ptr<TLSMemory>; | ||
| 83 | |||
| 84 | using MutexWaitingThreads = std::vector<SharedPtr<Thread>>; | 80 | using MutexWaitingThreads = std::vector<SharedPtr<Thread>>; |
| 85 | 81 | ||
| 86 | using ThreadContext = Core::ARM_Interface::ThreadContext; | 82 | using ThreadContext = Core::ARM_Interface::ThreadContext; |
| @@ -169,14 +165,6 @@ public: | |||
| 169 | return thread_id; | 165 | return thread_id; |
| 170 | } | 166 | } |
| 171 | 167 | ||
| 172 | TLSMemoryPtr& GetTLSMemory() { | ||
| 173 | return tls_memory; | ||
| 174 | } | ||
| 175 | |||
| 176 | const TLSMemoryPtr& GetTLSMemory() const { | ||
| 177 | return tls_memory; | ||
| 178 | } | ||
| 179 | |||
| 180 | /// Resumes a thread from waiting | 168 | /// Resumes a thread from waiting |
| 181 | void ResumeFromWait(); | 169 | void ResumeFromWait(); |
| 182 | 170 | ||
| @@ -463,11 +451,9 @@ private: | |||
| 463 | u32 ideal_core{0xFFFFFFFF}; | 451 | u32 ideal_core{0xFFFFFFFF}; |
| 464 | u64 affinity_mask{0x1}; | 452 | u64 affinity_mask{0x1}; |
| 465 | 453 | ||
| 466 | TLSMemoryPtr tls_memory = std::make_shared<TLSMemory>(); | 454 | ThreadActivity activity = ThreadActivity::Normal; |
| 467 | 455 | ||
| 468 | std::string name; | 456 | std::string name; |
| 469 | |||
| 470 | ThreadActivity activity = ThreadActivity::Normal; | ||
| 471 | }; | 457 | }; |
| 472 | 458 | ||
| 473 | /** | 459 | /** |
diff --git a/src/core/hle/kernel/transfer_memory.cpp b/src/core/hle/kernel/transfer_memory.cpp index 26c4e5e67..1113c815e 100644 --- a/src/core/hle/kernel/transfer_memory.cpp +++ b/src/core/hle/kernel/transfer_memory.cpp | |||
| @@ -47,7 +47,7 @@ ResultCode TransferMemory::MapMemory(VAddr address, u64 size, MemoryPermission p | |||
| 47 | return ERR_INVALID_STATE; | 47 | return ERR_INVALID_STATE; |
| 48 | } | 48 | } |
| 49 | 49 | ||
| 50 | backing_block = std::make_shared<std::vector<u8>>(size); | 50 | backing_block = std::make_shared<PhysicalMemory>(size); |
| 51 | 51 | ||
| 52 | const auto map_state = owner_permissions == MemoryPermission::None | 52 | const auto map_state = owner_permissions == MemoryPermission::None |
| 53 | ? MemoryState::TransferMemoryIsolated | 53 | ? MemoryState::TransferMemoryIsolated |
diff --git a/src/core/hle/kernel/transfer_memory.h b/src/core/hle/kernel/transfer_memory.h index a140b1e2b..6be9dc094 100644 --- a/src/core/hle/kernel/transfer_memory.h +++ b/src/core/hle/kernel/transfer_memory.h | |||
| @@ -8,6 +8,7 @@ | |||
| 8 | #include <vector> | 8 | #include <vector> |
| 9 | 9 | ||
| 10 | #include "core/hle/kernel/object.h" | 10 | #include "core/hle/kernel/object.h" |
| 11 | #include "core/hle/kernel/physical_memory.h" | ||
| 11 | 12 | ||
| 12 | union ResultCode; | 13 | union ResultCode; |
| 13 | 14 | ||
| @@ -82,7 +83,7 @@ private: | |||
| 82 | ~TransferMemory() override; | 83 | ~TransferMemory() override; |
| 83 | 84 | ||
| 84 | /// Memory block backing this instance. | 85 | /// Memory block backing this instance. |
| 85 | std::shared_ptr<std::vector<u8>> backing_block; | 86 | std::shared_ptr<PhysicalMemory> backing_block; |
| 86 | 87 | ||
| 87 | /// The base address for the memory managed by this instance. | 88 | /// The base address for the memory managed by this instance. |
| 88 | VAddr base_address = 0; | 89 | VAddr base_address = 0; |
diff --git a/src/core/hle/kernel/vm_manager.cpp b/src/core/hle/kernel/vm_manager.cpp index c929c2a52..c7af87073 100644 --- a/src/core/hle/kernel/vm_manager.cpp +++ b/src/core/hle/kernel/vm_manager.cpp | |||
| @@ -5,13 +5,15 @@ | |||
| 5 | #include <algorithm> | 5 | #include <algorithm> |
| 6 | #include <iterator> | 6 | #include <iterator> |
| 7 | #include <utility> | 7 | #include <utility> |
| 8 | #include "common/alignment.h" | ||
| 8 | #include "common/assert.h" | 9 | #include "common/assert.h" |
| 9 | #include "common/logging/log.h" | 10 | #include "common/logging/log.h" |
| 10 | #include "common/memory_hook.h" | 11 | #include "common/memory_hook.h" |
| 11 | #include "core/arm/arm_interface.h" | ||
| 12 | #include "core/core.h" | 12 | #include "core/core.h" |
| 13 | #include "core/file_sys/program_metadata.h" | 13 | #include "core/file_sys/program_metadata.h" |
| 14 | #include "core/hle/kernel/errors.h" | 14 | #include "core/hle/kernel/errors.h" |
| 15 | #include "core/hle/kernel/process.h" | ||
| 16 | #include "core/hle/kernel/resource_limit.h" | ||
| 15 | #include "core/hle/kernel/vm_manager.h" | 17 | #include "core/hle/kernel/vm_manager.h" |
| 16 | #include "core/memory.h" | 18 | #include "core/memory.h" |
| 17 | #include "core/memory_setup.h" | 19 | #include "core/memory_setup.h" |
| @@ -49,10 +51,14 @@ bool VirtualMemoryArea::CanBeMergedWith(const VirtualMemoryArea& next) const { | |||
| 49 | type != next.type) { | 51 | type != next.type) { |
| 50 | return false; | 52 | return false; |
| 51 | } | 53 | } |
| 52 | if (type == VMAType::AllocatedMemoryBlock && | 54 | if ((attribute & MemoryAttribute::DeviceMapped) == MemoryAttribute::DeviceMapped) { |
| 53 | (backing_block != next.backing_block || offset + size != next.offset)) { | 55 | // TODO: Can device mapped memory be merged sanely? |
| 56 | // Not merging it may cause inaccuracies versus hardware when memory layout is queried. | ||
| 54 | return false; | 57 | return false; |
| 55 | } | 58 | } |
| 59 | if (type == VMAType::AllocatedMemoryBlock) { | ||
| 60 | return true; | ||
| 61 | } | ||
| 56 | if (type == VMAType::BackingMemory && backing_memory + size != next.backing_memory) { | 62 | if (type == VMAType::BackingMemory && backing_memory + size != next.backing_memory) { |
| 57 | return false; | 63 | return false; |
| 58 | } | 64 | } |
| @@ -98,9 +104,9 @@ bool VMManager::IsValidHandle(VMAHandle handle) const { | |||
| 98 | } | 104 | } |
| 99 | 105 | ||
| 100 | ResultVal<VMManager::VMAHandle> VMManager::MapMemoryBlock(VAddr target, | 106 | ResultVal<VMManager::VMAHandle> VMManager::MapMemoryBlock(VAddr target, |
| 101 | std::shared_ptr<std::vector<u8>> block, | 107 | std::shared_ptr<PhysicalMemory> block, |
| 102 | std::size_t offset, u64 size, | 108 | std::size_t offset, u64 size, |
| 103 | MemoryState state) { | 109 | MemoryState state, VMAPermission perm) { |
| 104 | ASSERT(block != nullptr); | 110 | ASSERT(block != nullptr); |
| 105 | ASSERT(offset + size <= block->size()); | 111 | ASSERT(offset + size <= block->size()); |
| 106 | 112 | ||
| @@ -109,17 +115,8 @@ ResultVal<VMManager::VMAHandle> VMManager::MapMemoryBlock(VAddr target, | |||
| 109 | VirtualMemoryArea& final_vma = vma_handle->second; | 115 | VirtualMemoryArea& final_vma = vma_handle->second; |
| 110 | ASSERT(final_vma.size == size); | 116 | ASSERT(final_vma.size == size); |
| 111 | 117 | ||
| 112 | system.ArmInterface(0).MapBackingMemory(target, size, block->data() + offset, | ||
| 113 | VMAPermission::ReadWriteExecute); | ||
| 114 | system.ArmInterface(1).MapBackingMemory(target, size, block->data() + offset, | ||
| 115 | VMAPermission::ReadWriteExecute); | ||
| 116 | system.ArmInterface(2).MapBackingMemory(target, size, block->data() + offset, | ||
| 117 | VMAPermission::ReadWriteExecute); | ||
| 118 | system.ArmInterface(3).MapBackingMemory(target, size, block->data() + offset, | ||
| 119 | VMAPermission::ReadWriteExecute); | ||
| 120 | |||
| 121 | final_vma.type = VMAType::AllocatedMemoryBlock; | 118 | final_vma.type = VMAType::AllocatedMemoryBlock; |
| 122 | final_vma.permissions = VMAPermission::ReadWrite; | 119 | final_vma.permissions = perm; |
| 123 | final_vma.state = state; | 120 | final_vma.state = state; |
| 124 | final_vma.backing_block = std::move(block); | 121 | final_vma.backing_block = std::move(block); |
| 125 | final_vma.offset = offset; | 122 | final_vma.offset = offset; |
| @@ -137,11 +134,6 @@ ResultVal<VMManager::VMAHandle> VMManager::MapBackingMemory(VAddr target, u8* me | |||
| 137 | VirtualMemoryArea& final_vma = vma_handle->second; | 134 | VirtualMemoryArea& final_vma = vma_handle->second; |
| 138 | ASSERT(final_vma.size == size); | 135 | ASSERT(final_vma.size == size); |
| 139 | 136 | ||
| 140 | system.ArmInterface(0).MapBackingMemory(target, size, memory, VMAPermission::ReadWriteExecute); | ||
| 141 | system.ArmInterface(1).MapBackingMemory(target, size, memory, VMAPermission::ReadWriteExecute); | ||
| 142 | system.ArmInterface(2).MapBackingMemory(target, size, memory, VMAPermission::ReadWriteExecute); | ||
| 143 | system.ArmInterface(3).MapBackingMemory(target, size, memory, VMAPermission::ReadWriteExecute); | ||
| 144 | |||
| 145 | final_vma.type = VMAType::BackingMemory; | 137 | final_vma.type = VMAType::BackingMemory; |
| 146 | final_vma.permissions = VMAPermission::ReadWrite; | 138 | final_vma.permissions = VMAPermission::ReadWrite; |
| 147 | final_vma.state = state; | 139 | final_vma.state = state; |
| @@ -152,22 +144,33 @@ ResultVal<VMManager::VMAHandle> VMManager::MapBackingMemory(VAddr target, u8* me | |||
| 152 | } | 144 | } |
| 153 | 145 | ||
| 154 | ResultVal<VAddr> VMManager::FindFreeRegion(u64 size) const { | 146 | ResultVal<VAddr> VMManager::FindFreeRegion(u64 size) const { |
| 155 | // Find the first Free VMA. | 147 | return FindFreeRegion(GetASLRRegionBaseAddress(), GetASLRRegionEndAddress(), size); |
| 156 | const VAddr base = GetASLRRegionBaseAddress(); | 148 | } |
| 157 | const VMAHandle vma_handle = std::find_if(vma_map.begin(), vma_map.end(), [&](const auto& vma) { | ||
| 158 | if (vma.second.type != VMAType::Free) | ||
| 159 | return false; | ||
| 160 | 149 | ||
| 161 | const VAddr vma_end = vma.second.base + vma.second.size; | 150 | ResultVal<VAddr> VMManager::FindFreeRegion(VAddr begin, VAddr end, u64 size) const { |
| 162 | return vma_end > base && vma_end >= base + size; | 151 | ASSERT(begin < end); |
| 163 | }); | 152 | ASSERT(size <= end - begin); |
| 164 | 153 | ||
| 165 | if (vma_handle == vma_map.end()) { | 154 | const VMAHandle vma_handle = |
| 155 | std::find_if(vma_map.begin(), vma_map.end(), [begin, end, size](const auto& vma) { | ||
| 156 | if (vma.second.type != VMAType::Free) { | ||
| 157 | return false; | ||
| 158 | } | ||
| 159 | const VAddr vma_base = vma.second.base; | ||
| 160 | const VAddr vma_end = vma_base + vma.second.size; | ||
| 161 | const VAddr assumed_base = (begin < vma_base) ? vma_base : begin; | ||
| 162 | const VAddr used_range = assumed_base + size; | ||
| 163 | |||
| 164 | return vma_base <= assumed_base && assumed_base < used_range && used_range < end && | ||
| 165 | used_range <= vma_end; | ||
| 166 | }); | ||
| 167 | |||
| 168 | if (vma_handle == vma_map.cend()) { | ||
| 166 | // TODO(Subv): Find the correct error code here. | 169 | // TODO(Subv): Find the correct error code here. |
| 167 | return ResultCode(-1); | 170 | return ResultCode(-1); |
| 168 | } | 171 | } |
| 169 | 172 | ||
| 170 | const VAddr target = std::max(base, vma_handle->second.base); | 173 | const VAddr target = std::max(begin, vma_handle->second.base); |
| 171 | return MakeResult<VAddr>(target); | 174 | return MakeResult<VAddr>(target); |
| 172 | } | 175 | } |
| 173 | 176 | ||
| @@ -219,11 +222,6 @@ ResultCode VMManager::UnmapRange(VAddr target, u64 size) { | |||
| 219 | 222 | ||
| 220 | ASSERT(FindVMA(target)->second.size >= size); | 223 | ASSERT(FindVMA(target)->second.size >= size); |
| 221 | 224 | ||
| 222 | system.ArmInterface(0).UnmapMemory(target, size); | ||
| 223 | system.ArmInterface(1).UnmapMemory(target, size); | ||
| 224 | system.ArmInterface(2).UnmapMemory(target, size); | ||
| 225 | system.ArmInterface(3).UnmapMemory(target, size); | ||
| 226 | |||
| 227 | return RESULT_SUCCESS; | 225 | return RESULT_SUCCESS; |
| 228 | } | 226 | } |
| 229 | 227 | ||
| @@ -263,7 +261,7 @@ ResultVal<VAddr> VMManager::SetHeapSize(u64 size) { | |||
| 263 | 261 | ||
| 264 | if (heap_memory == nullptr) { | 262 | if (heap_memory == nullptr) { |
| 265 | // Initialize heap | 263 | // Initialize heap |
| 266 | heap_memory = std::make_shared<std::vector<u8>>(size); | 264 | heap_memory = std::make_shared<PhysicalMemory>(size); |
| 267 | heap_end = heap_region_base + size; | 265 | heap_end = heap_region_base + size; |
| 268 | } else { | 266 | } else { |
| 269 | UnmapRange(heap_region_base, GetCurrentHeapSize()); | 267 | UnmapRange(heap_region_base, GetCurrentHeapSize()); |
| @@ -297,6 +295,162 @@ ResultVal<VAddr> VMManager::SetHeapSize(u64 size) { | |||
| 297 | return MakeResult<VAddr>(heap_region_base); | 295 | return MakeResult<VAddr>(heap_region_base); |
| 298 | } | 296 | } |
| 299 | 297 | ||
| 298 | ResultCode VMManager::MapPhysicalMemory(VAddr target, u64 size) { | ||
| 299 | // Check how much memory we've already mapped. | ||
| 300 | const auto mapped_size_result = SizeOfAllocatedVMAsInRange(target, size); | ||
| 301 | if (mapped_size_result.Failed()) { | ||
| 302 | return mapped_size_result.Code(); | ||
| 303 | } | ||
| 304 | |||
| 305 | // If we've already mapped the desired amount, return early. | ||
| 306 | const std::size_t mapped_size = *mapped_size_result; | ||
| 307 | if (mapped_size == size) { | ||
| 308 | return RESULT_SUCCESS; | ||
| 309 | } | ||
| 310 | |||
| 311 | // Check that we can map the memory we want. | ||
| 312 | const auto res_limit = system.CurrentProcess()->GetResourceLimit(); | ||
| 313 | const u64 physmem_remaining = res_limit->GetMaxResourceValue(ResourceType::PhysicalMemory) - | ||
| 314 | res_limit->GetCurrentResourceValue(ResourceType::PhysicalMemory); | ||
| 315 | if (physmem_remaining < (size - mapped_size)) { | ||
| 316 | return ERR_RESOURCE_LIMIT_EXCEEDED; | ||
| 317 | } | ||
| 318 | |||
| 319 | // Keep track of the memory regions we unmap. | ||
| 320 | std::vector<std::pair<u64, u64>> mapped_regions; | ||
| 321 | ResultCode result = RESULT_SUCCESS; | ||
| 322 | |||
| 323 | // Iterate, trying to map memory. | ||
| 324 | { | ||
| 325 | const auto end_addr = target + size; | ||
| 326 | const auto last_addr = end_addr - 1; | ||
| 327 | VAddr cur_addr = target; | ||
| 328 | |||
| 329 | auto iter = FindVMA(target); | ||
| 330 | ASSERT(iter != vma_map.end()); | ||
| 331 | |||
| 332 | while (true) { | ||
| 333 | const auto& vma = iter->second; | ||
| 334 | const auto vma_start = vma.base; | ||
| 335 | const auto vma_end = vma_start + vma.size; | ||
| 336 | const auto vma_last = vma_end - 1; | ||
| 337 | |||
| 338 | // Map the memory block | ||
| 339 | const auto map_size = std::min(end_addr - cur_addr, vma_end - cur_addr); | ||
| 340 | if (vma.state == MemoryState::Unmapped) { | ||
| 341 | const auto map_res = | ||
| 342 | MapMemoryBlock(cur_addr, std::make_shared<PhysicalMemory>(map_size), 0, | ||
| 343 | map_size, MemoryState::Heap, VMAPermission::ReadWrite); | ||
| 344 | result = map_res.Code(); | ||
| 345 | if (result.IsError()) { | ||
| 346 | break; | ||
| 347 | } | ||
| 348 | |||
| 349 | mapped_regions.emplace_back(cur_addr, map_size); | ||
| 350 | } | ||
| 351 | |||
| 352 | // Break once we hit the end of the range. | ||
| 353 | if (last_addr <= vma_last) { | ||
| 354 | break; | ||
| 355 | } | ||
| 356 | |||
| 357 | // Advance to the next block. | ||
| 358 | cur_addr = vma_end; | ||
| 359 | iter = FindVMA(cur_addr); | ||
| 360 | ASSERT(iter != vma_map.end()); | ||
| 361 | } | ||
| 362 | } | ||
| 363 | |||
| 364 | // If we failed, unmap memory. | ||
| 365 | if (result.IsError()) { | ||
| 366 | for (const auto [unmap_address, unmap_size] : mapped_regions) { | ||
| 367 | ASSERT_MSG(UnmapRange(unmap_address, unmap_size).IsSuccess(), | ||
| 368 | "Failed to unmap memory range."); | ||
| 369 | } | ||
| 370 | |||
| 371 | return result; | ||
| 372 | } | ||
| 373 | |||
| 374 | // Update amount of mapped physical memory. | ||
| 375 | physical_memory_mapped += size - mapped_size; | ||
| 376 | |||
| 377 | return RESULT_SUCCESS; | ||
| 378 | } | ||
| 379 | |||
| 380 | ResultCode VMManager::UnmapPhysicalMemory(VAddr target, u64 size) { | ||
| 381 | // Check how much memory is currently mapped. | ||
| 382 | const auto mapped_size_result = SizeOfUnmappablePhysicalMemoryInRange(target, size); | ||
| 383 | if (mapped_size_result.Failed()) { | ||
| 384 | return mapped_size_result.Code(); | ||
| 385 | } | ||
| 386 | |||
| 387 | // If we've already unmapped all the memory, return early. | ||
| 388 | const std::size_t mapped_size = *mapped_size_result; | ||
| 389 | if (mapped_size == 0) { | ||
| 390 | return RESULT_SUCCESS; | ||
| 391 | } | ||
| 392 | |||
| 393 | // Keep track of the memory regions we unmap. | ||
| 394 | std::vector<std::pair<u64, u64>> unmapped_regions; | ||
| 395 | ResultCode result = RESULT_SUCCESS; | ||
| 396 | |||
| 397 | // Try to unmap regions. | ||
| 398 | { | ||
| 399 | const auto end_addr = target + size; | ||
| 400 | const auto last_addr = end_addr - 1; | ||
| 401 | VAddr cur_addr = target; | ||
| 402 | |||
| 403 | auto iter = FindVMA(target); | ||
| 404 | ASSERT(iter != vma_map.end()); | ||
| 405 | |||
| 406 | while (true) { | ||
| 407 | const auto& vma = iter->second; | ||
| 408 | const auto vma_start = vma.base; | ||
| 409 | const auto vma_end = vma_start + vma.size; | ||
| 410 | const auto vma_last = vma_end - 1; | ||
| 411 | |||
| 412 | // Unmap the memory block | ||
| 413 | const auto unmap_size = std::min(end_addr - cur_addr, vma_end - cur_addr); | ||
| 414 | if (vma.state == MemoryState::Heap) { | ||
| 415 | result = UnmapRange(cur_addr, unmap_size); | ||
| 416 | if (result.IsError()) { | ||
| 417 | break; | ||
| 418 | } | ||
| 419 | |||
| 420 | unmapped_regions.emplace_back(cur_addr, unmap_size); | ||
| 421 | } | ||
| 422 | |||
| 423 | // Break once we hit the end of the range. | ||
| 424 | if (last_addr <= vma_last) { | ||
| 425 | break; | ||
| 426 | } | ||
| 427 | |||
| 428 | // Advance to the next block. | ||
| 429 | cur_addr = vma_end; | ||
| 430 | iter = FindVMA(cur_addr); | ||
| 431 | ASSERT(iter != vma_map.end()); | ||
| 432 | } | ||
| 433 | } | ||
| 434 | |||
| 435 | // If we failed, re-map regions. | ||
| 436 | // TODO: Preserve memory contents? | ||
| 437 | if (result.IsError()) { | ||
| 438 | for (const auto [map_address, map_size] : unmapped_regions) { | ||
| 439 | const auto remap_res = | ||
| 440 | MapMemoryBlock(map_address, std::make_shared<PhysicalMemory>(map_size), 0, map_size, | ||
| 441 | MemoryState::Heap, VMAPermission::None); | ||
| 442 | ASSERT_MSG(remap_res.Succeeded(), "Failed to remap a memory block."); | ||
| 443 | } | ||
| 444 | |||
| 445 | return result; | ||
| 446 | } | ||
| 447 | |||
| 448 | // Update mapped amount | ||
| 449 | physical_memory_mapped -= mapped_size; | ||
| 450 | |||
| 451 | return RESULT_SUCCESS; | ||
| 452 | } | ||
| 453 | |||
| 300 | ResultCode VMManager::MapCodeMemory(VAddr dst_address, VAddr src_address, u64 size) { | 454 | ResultCode VMManager::MapCodeMemory(VAddr dst_address, VAddr src_address, u64 size) { |
| 301 | constexpr auto ignore_attribute = MemoryAttribute::LockedForIPC | MemoryAttribute::DeviceMapped; | 455 | constexpr auto ignore_attribute = MemoryAttribute::LockedForIPC | MemoryAttribute::DeviceMapped; |
| 302 | const auto src_check_result = CheckRangeState( | 456 | const auto src_check_result = CheckRangeState( |
| @@ -436,7 +590,7 @@ ResultCode VMManager::MirrorMemory(VAddr dst_addr, VAddr src_addr, u64 size, Mem | |||
| 436 | ASSERT_MSG(vma_offset + size <= vma->second.size, | 590 | ASSERT_MSG(vma_offset + size <= vma->second.size, |
| 437 | "Shared memory exceeds bounds of mapped block"); | 591 | "Shared memory exceeds bounds of mapped block"); |
| 438 | 592 | ||
| 439 | const std::shared_ptr<std::vector<u8>>& backing_block = vma->second.backing_block; | 593 | const std::shared_ptr<PhysicalMemory>& backing_block = vma->second.backing_block; |
| 440 | const std::size_t backing_block_offset = vma->second.offset + vma_offset; | 594 | const std::size_t backing_block_offset = vma->second.offset + vma_offset; |
| 441 | 595 | ||
| 442 | CASCADE_RESULT(auto new_vma, | 596 | CASCADE_RESULT(auto new_vma, |
| @@ -444,12 +598,12 @@ ResultCode VMManager::MirrorMemory(VAddr dst_addr, VAddr src_addr, u64 size, Mem | |||
| 444 | // Protect mirror with permissions from old region | 598 | // Protect mirror with permissions from old region |
| 445 | Reprotect(new_vma, vma->second.permissions); | 599 | Reprotect(new_vma, vma->second.permissions); |
| 446 | // Remove permissions from old region | 600 | // Remove permissions from old region |
| 447 | Reprotect(vma, VMAPermission::None); | 601 | ReprotectRange(src_addr, size, VMAPermission::None); |
| 448 | 602 | ||
| 449 | return RESULT_SUCCESS; | 603 | return RESULT_SUCCESS; |
| 450 | } | 604 | } |
| 451 | 605 | ||
| 452 | void VMManager::RefreshMemoryBlockMappings(const std::vector<u8>* block) { | 606 | void VMManager::RefreshMemoryBlockMappings(const PhysicalMemory* block) { |
| 453 | // If this ever proves to have a noticeable performance impact, allow users of the function to | 607 | // If this ever proves to have a noticeable performance impact, allow users of the function to |
| 454 | // specify a specific range of addresses to limit the scan to. | 608 | // specify a specific range of addresses to limit the scan to. |
| 455 | for (const auto& p : vma_map) { | 609 | for (const auto& p : vma_map) { |
| @@ -577,14 +731,14 @@ VMManager::VMAIter VMManager::SplitVMA(VMAIter vma_handle, u64 offset_in_vma) { | |||
| 577 | VMManager::VMAIter VMManager::MergeAdjacent(VMAIter iter) { | 731 | VMManager::VMAIter VMManager::MergeAdjacent(VMAIter iter) { |
| 578 | const VMAIter next_vma = std::next(iter); | 732 | const VMAIter next_vma = std::next(iter); |
| 579 | if (next_vma != vma_map.end() && iter->second.CanBeMergedWith(next_vma->second)) { | 733 | if (next_vma != vma_map.end() && iter->second.CanBeMergedWith(next_vma->second)) { |
| 580 | iter->second.size += next_vma->second.size; | 734 | MergeAdjacentVMA(iter->second, next_vma->second); |
| 581 | vma_map.erase(next_vma); | 735 | vma_map.erase(next_vma); |
| 582 | } | 736 | } |
| 583 | 737 | ||
| 584 | if (iter != vma_map.begin()) { | 738 | if (iter != vma_map.begin()) { |
| 585 | VMAIter prev_vma = std::prev(iter); | 739 | VMAIter prev_vma = std::prev(iter); |
| 586 | if (prev_vma->second.CanBeMergedWith(iter->second)) { | 740 | if (prev_vma->second.CanBeMergedWith(iter->second)) { |
| 587 | prev_vma->second.size += iter->second.size; | 741 | MergeAdjacentVMA(prev_vma->second, iter->second); |
| 588 | vma_map.erase(iter); | 742 | vma_map.erase(iter); |
| 589 | iter = prev_vma; | 743 | iter = prev_vma; |
| 590 | } | 744 | } |
| @@ -593,6 +747,44 @@ VMManager::VMAIter VMManager::MergeAdjacent(VMAIter iter) { | |||
| 593 | return iter; | 747 | return iter; |
| 594 | } | 748 | } |
| 595 | 749 | ||
| 750 | void VMManager::MergeAdjacentVMA(VirtualMemoryArea& left, const VirtualMemoryArea& right) { | ||
| 751 | ASSERT(left.CanBeMergedWith(right)); | ||
| 752 | |||
| 753 | // Always merge allocated memory blocks, even when they don't share the same backing block. | ||
| 754 | if (left.type == VMAType::AllocatedMemoryBlock && | ||
| 755 | (left.backing_block != right.backing_block || left.offset + left.size != right.offset)) { | ||
| 756 | const auto right_begin = right.backing_block->begin() + right.offset; | ||
| 757 | const auto right_end = right_begin + right.size; | ||
| 758 | |||
| 759 | // Check if we can save work. | ||
| 760 | if (left.offset == 0 && left.size == left.backing_block->size()) { | ||
| 761 | // Fast case: left is an entire backing block. | ||
| 762 | left.backing_block->insert(left.backing_block->end(), right_begin, right_end); | ||
| 763 | } else { | ||
| 764 | // Slow case: make a new memory block for left and right. | ||
| 765 | const auto left_begin = left.backing_block->begin() + left.offset; | ||
| 766 | const auto left_end = left_begin + left.size; | ||
| 767 | const auto left_size = static_cast<std::size_t>(std::distance(left_begin, left_end)); | ||
| 768 | const auto right_size = static_cast<std::size_t>(std::distance(right_begin, right_end)); | ||
| 769 | |||
| 770 | auto new_memory = std::make_shared<PhysicalMemory>(); | ||
| 771 | new_memory->reserve(left_size + right_size); | ||
| 772 | new_memory->insert(new_memory->end(), left_begin, left_end); | ||
| 773 | new_memory->insert(new_memory->end(), right_begin, right_end); | ||
| 774 | |||
| 775 | left.backing_block = std::move(new_memory); | ||
| 776 | left.offset = 0; | ||
| 777 | } | ||
| 778 | |||
| 779 | // Page table update is needed, because backing memory changed. | ||
| 780 | left.size += right.size; | ||
| 781 | UpdatePageTableForVMA(left); | ||
| 782 | } else { | ||
| 783 | // Just update the size. | ||
| 784 | left.size += right.size; | ||
| 785 | } | ||
| 786 | } | ||
| 787 | |||
| 596 | void VMManager::UpdatePageTableForVMA(const VirtualMemoryArea& vma) { | 788 | void VMManager::UpdatePageTableForVMA(const VirtualMemoryArea& vma) { |
| 597 | switch (vma.type) { | 789 | switch (vma.type) { |
| 598 | case VMAType::Free: | 790 | case VMAType::Free: |
| @@ -614,9 +806,11 @@ void VMManager::UpdatePageTableForVMA(const VirtualMemoryArea& vma) { | |||
| 614 | void VMManager::InitializeMemoryRegionRanges(FileSys::ProgramAddressSpaceType type) { | 806 | void VMManager::InitializeMemoryRegionRanges(FileSys::ProgramAddressSpaceType type) { |
| 615 | u64 map_region_size = 0; | 807 | u64 map_region_size = 0; |
| 616 | u64 heap_region_size = 0; | 808 | u64 heap_region_size = 0; |
| 617 | u64 new_map_region_size = 0; | 809 | u64 stack_region_size = 0; |
| 618 | u64 tls_io_region_size = 0; | 810 | u64 tls_io_region_size = 0; |
| 619 | 811 | ||
| 812 | u64 stack_and_tls_io_end = 0; | ||
| 813 | |||
| 620 | switch (type) { | 814 | switch (type) { |
| 621 | case FileSys::ProgramAddressSpaceType::Is32Bit: | 815 | case FileSys::ProgramAddressSpaceType::Is32Bit: |
| 622 | case FileSys::ProgramAddressSpaceType::Is32BitNoMap: | 816 | case FileSys::ProgramAddressSpaceType::Is32BitNoMap: |
| @@ -632,6 +826,7 @@ void VMManager::InitializeMemoryRegionRanges(FileSys::ProgramAddressSpaceType ty | |||
| 632 | map_region_size = 0; | 826 | map_region_size = 0; |
| 633 | heap_region_size = 0x80000000; | 827 | heap_region_size = 0x80000000; |
| 634 | } | 828 | } |
| 829 | stack_and_tls_io_end = 0x40000000; | ||
| 635 | break; | 830 | break; |
| 636 | case FileSys::ProgramAddressSpaceType::Is36Bit: | 831 | case FileSys::ProgramAddressSpaceType::Is36Bit: |
| 637 | address_space_width = 36; | 832 | address_space_width = 36; |
| @@ -641,6 +836,7 @@ void VMManager::InitializeMemoryRegionRanges(FileSys::ProgramAddressSpaceType ty | |||
| 641 | aslr_region_end = aslr_region_base + 0xFF8000000; | 836 | aslr_region_end = aslr_region_base + 0xFF8000000; |
| 642 | map_region_size = 0x180000000; | 837 | map_region_size = 0x180000000; |
| 643 | heap_region_size = 0x180000000; | 838 | heap_region_size = 0x180000000; |
| 839 | stack_and_tls_io_end = 0x80000000; | ||
| 644 | break; | 840 | break; |
| 645 | case FileSys::ProgramAddressSpaceType::Is39Bit: | 841 | case FileSys::ProgramAddressSpaceType::Is39Bit: |
| 646 | address_space_width = 39; | 842 | address_space_width = 39; |
| @@ -650,7 +846,7 @@ void VMManager::InitializeMemoryRegionRanges(FileSys::ProgramAddressSpaceType ty | |||
| 650 | aslr_region_end = aslr_region_base + 0x7FF8000000; | 846 | aslr_region_end = aslr_region_base + 0x7FF8000000; |
| 651 | map_region_size = 0x1000000000; | 847 | map_region_size = 0x1000000000; |
| 652 | heap_region_size = 0x180000000; | 848 | heap_region_size = 0x180000000; |
| 653 | new_map_region_size = 0x80000000; | 849 | stack_region_size = 0x80000000; |
| 654 | tls_io_region_size = 0x1000000000; | 850 | tls_io_region_size = 0x1000000000; |
| 655 | break; | 851 | break; |
| 656 | default: | 852 | default: |
| @@ -658,6 +854,8 @@ void VMManager::InitializeMemoryRegionRanges(FileSys::ProgramAddressSpaceType ty | |||
| 658 | return; | 854 | return; |
| 659 | } | 855 | } |
| 660 | 856 | ||
| 857 | const u64 stack_and_tls_io_begin = aslr_region_base; | ||
| 858 | |||
| 661 | address_space_base = 0; | 859 | address_space_base = 0; |
| 662 | address_space_end = 1ULL << address_space_width; | 860 | address_space_end = 1ULL << address_space_width; |
| 663 | 861 | ||
| @@ -668,15 +866,20 @@ void VMManager::InitializeMemoryRegionRanges(FileSys::ProgramAddressSpaceType ty | |||
| 668 | heap_region_end = heap_region_base + heap_region_size; | 866 | heap_region_end = heap_region_base + heap_region_size; |
| 669 | heap_end = heap_region_base; | 867 | heap_end = heap_region_base; |
| 670 | 868 | ||
| 671 | new_map_region_base = heap_region_end; | 869 | stack_region_base = heap_region_end; |
| 672 | new_map_region_end = new_map_region_base + new_map_region_size; | 870 | stack_region_end = stack_region_base + stack_region_size; |
| 673 | 871 | ||
| 674 | tls_io_region_base = new_map_region_end; | 872 | tls_io_region_base = stack_region_end; |
| 675 | tls_io_region_end = tls_io_region_base + tls_io_region_size; | 873 | tls_io_region_end = tls_io_region_base + tls_io_region_size; |
| 676 | 874 | ||
| 677 | if (new_map_region_size == 0) { | 875 | if (stack_region_size == 0) { |
| 678 | new_map_region_base = address_space_base; | 876 | stack_region_base = stack_and_tls_io_begin; |
| 679 | new_map_region_end = address_space_end; | 877 | stack_region_end = stack_and_tls_io_end; |
| 878 | } | ||
| 879 | |||
| 880 | if (tls_io_region_size == 0) { | ||
| 881 | tls_io_region_base = stack_and_tls_io_begin; | ||
| 882 | tls_io_region_end = stack_and_tls_io_end; | ||
| 680 | } | 883 | } |
| 681 | } | 884 | } |
| 682 | 885 | ||
| @@ -756,6 +959,84 @@ VMManager::CheckResults VMManager::CheckRangeState(VAddr address, u64 size, Memo | |||
| 756 | std::make_tuple(initial_state, initial_permissions, initial_attributes & ~ignore_mask)); | 959 | std::make_tuple(initial_state, initial_permissions, initial_attributes & ~ignore_mask)); |
| 757 | } | 960 | } |
| 758 | 961 | ||
| 962 | ResultVal<std::size_t> VMManager::SizeOfAllocatedVMAsInRange(VAddr address, | ||
| 963 | std::size_t size) const { | ||
| 964 | const VAddr end_addr = address + size; | ||
| 965 | const VAddr last_addr = end_addr - 1; | ||
| 966 | std::size_t mapped_size = 0; | ||
| 967 | |||
| 968 | VAddr cur_addr = address; | ||
| 969 | auto iter = FindVMA(cur_addr); | ||
| 970 | ASSERT(iter != vma_map.end()); | ||
| 971 | |||
| 972 | while (true) { | ||
| 973 | const auto& vma = iter->second; | ||
| 974 | const VAddr vma_start = vma.base; | ||
| 975 | const VAddr vma_end = vma_start + vma.size; | ||
| 976 | const VAddr vma_last = vma_end - 1; | ||
| 977 | |||
| 978 | // Add size if relevant. | ||
| 979 | if (vma.state != MemoryState::Unmapped) { | ||
| 980 | mapped_size += std::min(end_addr - cur_addr, vma_end - cur_addr); | ||
| 981 | } | ||
| 982 | |||
| 983 | // Break once we hit the end of the range. | ||
| 984 | if (last_addr <= vma_last) { | ||
| 985 | break; | ||
| 986 | } | ||
| 987 | |||
| 988 | // Advance to the next block. | ||
| 989 | cur_addr = vma_end; | ||
| 990 | iter = std::next(iter); | ||
| 991 | ASSERT(iter != vma_map.end()); | ||
| 992 | } | ||
| 993 | |||
| 994 | return MakeResult(mapped_size); | ||
| 995 | } | ||
| 996 | |||
| 997 | ResultVal<std::size_t> VMManager::SizeOfUnmappablePhysicalMemoryInRange(VAddr address, | ||
| 998 | std::size_t size) const { | ||
| 999 | const VAddr end_addr = address + size; | ||
| 1000 | const VAddr last_addr = end_addr - 1; | ||
| 1001 | std::size_t mapped_size = 0; | ||
| 1002 | |||
| 1003 | VAddr cur_addr = address; | ||
| 1004 | auto iter = FindVMA(cur_addr); | ||
| 1005 | ASSERT(iter != vma_map.end()); | ||
| 1006 | |||
| 1007 | while (true) { | ||
| 1008 | const auto& vma = iter->second; | ||
| 1009 | const auto vma_start = vma.base; | ||
| 1010 | const auto vma_end = vma_start + vma.size; | ||
| 1011 | const auto vma_last = vma_end - 1; | ||
| 1012 | const auto state = vma.state; | ||
| 1013 | const auto attr = vma.attribute; | ||
| 1014 | |||
| 1015 | // Memory within region must be free or mapped heap. | ||
| 1016 | if (!((state == MemoryState::Heap && attr == MemoryAttribute::None) || | ||
| 1017 | (state == MemoryState::Unmapped))) { | ||
| 1018 | return ERR_INVALID_ADDRESS_STATE; | ||
| 1019 | } | ||
| 1020 | |||
| 1021 | // Add size if relevant. | ||
| 1022 | if (state != MemoryState::Unmapped) { | ||
| 1023 | mapped_size += std::min(end_addr - cur_addr, vma_end - cur_addr); | ||
| 1024 | } | ||
| 1025 | |||
| 1026 | // Break once we hit the end of the range. | ||
| 1027 | if (last_addr <= vma_last) { | ||
| 1028 | break; | ||
| 1029 | } | ||
| 1030 | |||
| 1031 | // Advance to the next block. | ||
| 1032 | cur_addr = vma_end; | ||
| 1033 | iter = std::next(iter); | ||
| 1034 | ASSERT(iter != vma_map.end()); | ||
| 1035 | } | ||
| 1036 | |||
| 1037 | return MakeResult(mapped_size); | ||
| 1038 | } | ||
| 1039 | |||
| 759 | u64 VMManager::GetTotalPhysicalMemoryAvailable() const { | 1040 | u64 VMManager::GetTotalPhysicalMemoryAvailable() const { |
| 760 | LOG_WARNING(Kernel, "(STUBBED) called"); | 1041 | LOG_WARNING(Kernel, "(STUBBED) called"); |
| 761 | return 0xF8000000; | 1042 | return 0xF8000000; |
| @@ -868,21 +1149,21 @@ bool VMManager::IsWithinMapRegion(VAddr address, u64 size) const { | |||
| 868 | return IsInsideAddressRange(address, size, GetMapRegionBaseAddress(), GetMapRegionEndAddress()); | 1149 | return IsInsideAddressRange(address, size, GetMapRegionBaseAddress(), GetMapRegionEndAddress()); |
| 869 | } | 1150 | } |
| 870 | 1151 | ||
| 871 | VAddr VMManager::GetNewMapRegionBaseAddress() const { | 1152 | VAddr VMManager::GetStackRegionBaseAddress() const { |
| 872 | return new_map_region_base; | 1153 | return stack_region_base; |
| 873 | } | 1154 | } |
| 874 | 1155 | ||
| 875 | VAddr VMManager::GetNewMapRegionEndAddress() const { | 1156 | VAddr VMManager::GetStackRegionEndAddress() const { |
| 876 | return new_map_region_end; | 1157 | return stack_region_end; |
| 877 | } | 1158 | } |
| 878 | 1159 | ||
| 879 | u64 VMManager::GetNewMapRegionSize() const { | 1160 | u64 VMManager::GetStackRegionSize() const { |
| 880 | return new_map_region_end - new_map_region_base; | 1161 | return stack_region_end - stack_region_base; |
| 881 | } | 1162 | } |
| 882 | 1163 | ||
| 883 | bool VMManager::IsWithinNewMapRegion(VAddr address, u64 size) const { | 1164 | bool VMManager::IsWithinStackRegion(VAddr address, u64 size) const { |
| 884 | return IsInsideAddressRange(address, size, GetNewMapRegionBaseAddress(), | 1165 | return IsInsideAddressRange(address, size, GetStackRegionBaseAddress(), |
| 885 | GetNewMapRegionEndAddress()); | 1166 | GetStackRegionEndAddress()); |
| 886 | } | 1167 | } |
| 887 | 1168 | ||
| 888 | VAddr VMManager::GetTLSIORegionBaseAddress() const { | 1169 | VAddr VMManager::GetTLSIORegionBaseAddress() const { |
diff --git a/src/core/hle/kernel/vm_manager.h b/src/core/hle/kernel/vm_manager.h index dfbf7a894..850a7ebc3 100644 --- a/src/core/hle/kernel/vm_manager.h +++ b/src/core/hle/kernel/vm_manager.h | |||
| @@ -11,6 +11,7 @@ | |||
| 11 | #include "common/common_types.h" | 11 | #include "common/common_types.h" |
| 12 | #include "common/memory_hook.h" | 12 | #include "common/memory_hook.h" |
| 13 | #include "common/page_table.h" | 13 | #include "common/page_table.h" |
| 14 | #include "core/hle/kernel/physical_memory.h" | ||
| 14 | #include "core/hle/result.h" | 15 | #include "core/hle/result.h" |
| 15 | #include "core/memory.h" | 16 | #include "core/memory.h" |
| 16 | 17 | ||
| @@ -290,7 +291,7 @@ struct VirtualMemoryArea { | |||
| 290 | 291 | ||
| 291 | // Settings for type = AllocatedMemoryBlock | 292 | // Settings for type = AllocatedMemoryBlock |
| 292 | /// Memory block backing this VMA. | 293 | /// Memory block backing this VMA. |
| 293 | std::shared_ptr<std::vector<u8>> backing_block = nullptr; | 294 | std::shared_ptr<PhysicalMemory> backing_block = nullptr; |
| 294 | /// Offset into the backing_memory the mapping starts from. | 295 | /// Offset into the backing_memory the mapping starts from. |
| 295 | std::size_t offset = 0; | 296 | std::size_t offset = 0; |
| 296 | 297 | ||
| @@ -348,8 +349,9 @@ public: | |||
| 348 | * @param size Size of the mapping. | 349 | * @param size Size of the mapping. |
| 349 | * @param state MemoryState tag to attach to the VMA. | 350 | * @param state MemoryState tag to attach to the VMA. |
| 350 | */ | 351 | */ |
| 351 | ResultVal<VMAHandle> MapMemoryBlock(VAddr target, std::shared_ptr<std::vector<u8>> block, | 352 | ResultVal<VMAHandle> MapMemoryBlock(VAddr target, std::shared_ptr<PhysicalMemory> block, |
| 352 | std::size_t offset, u64 size, MemoryState state); | 353 | std::size_t offset, u64 size, MemoryState state, |
| 354 | VMAPermission perm = VMAPermission::ReadWrite); | ||
| 353 | 355 | ||
| 354 | /** | 356 | /** |
| 355 | * Maps an unmanaged host memory pointer at a given address. | 357 | * Maps an unmanaged host memory pointer at a given address. |
| @@ -362,14 +364,39 @@ public: | |||
| 362 | ResultVal<VMAHandle> MapBackingMemory(VAddr target, u8* memory, u64 size, MemoryState state); | 364 | ResultVal<VMAHandle> MapBackingMemory(VAddr target, u8* memory, u64 size, MemoryState state); |
| 363 | 365 | ||
| 364 | /** | 366 | /** |
| 365 | * Finds the first free address that can hold a region of the desired size. | 367 | * Finds the first free memory region of the given size within |
| 368 | * the user-addressable ASLR memory region. | ||
| 366 | * | 369 | * |
| 367 | * @param size Size of the desired region. | 370 | * @param size The size of the desired region in bytes. |
| 368 | * @return The found free address. | 371 | * |
| 372 | * @returns If successful, the base address of the free region with | ||
| 373 | * the given size. | ||
| 369 | */ | 374 | */ |
| 370 | ResultVal<VAddr> FindFreeRegion(u64 size) const; | 375 | ResultVal<VAddr> FindFreeRegion(u64 size) const; |
| 371 | 376 | ||
| 372 | /** | 377 | /** |
| 378 | * Finds the first free address range that can hold a region of the desired size | ||
| 379 | * | ||
| 380 | * @param begin The starting address of the range. | ||
| 381 | * This is treated as an inclusive beginning address. | ||
| 382 | * | ||
| 383 | * @param end The ending address of the range. | ||
| 384 | * This is treated as an exclusive ending address. | ||
| 385 | * | ||
| 386 | * @param size The size of the free region to attempt to locate, | ||
| 387 | * in bytes. | ||
| 388 | * | ||
| 389 | * @returns If successful, the base address of the free region with | ||
| 390 | * the given size. | ||
| 391 | * | ||
| 392 | * @returns If unsuccessful, a result containing an error code. | ||
| 393 | * | ||
| 394 | * @pre The starting address must be less than the ending address. | ||
| 395 | * @pre The size must not exceed the address range itself. | ||
| 396 | */ | ||
| 397 | ResultVal<VAddr> FindFreeRegion(VAddr begin, VAddr end, u64 size) const; | ||
| 398 | |||
| 399 | /** | ||
| 373 | * Maps a memory-mapped IO region at a given address. | 400 | * Maps a memory-mapped IO region at a given address. |
| 374 | * | 401 | * |
| 375 | * @param target The guest address to start the mapping at. | 402 | * @param target The guest address to start the mapping at. |
| @@ -425,6 +452,34 @@ public: | |||
| 425 | /// | 452 | /// |
| 426 | ResultVal<VAddr> SetHeapSize(u64 size); | 453 | ResultVal<VAddr> SetHeapSize(u64 size); |
| 427 | 454 | ||
| 455 | /// Maps memory at a given address. | ||
| 456 | /// | ||
| 457 | /// @param target The virtual address to map memory at. | ||
| 458 | /// @param size The amount of memory to map. | ||
| 459 | /// | ||
| 460 | /// @note The destination address must lie within the Map region. | ||
| 461 | /// | ||
| 462 | /// @note This function requires that SystemResourceSize be non-zero, | ||
| 463 | /// however, this is just because if it were not then the | ||
| 464 | /// resulting page tables could be exploited on hardware by | ||
| 465 | /// a malicious program. SystemResource usage does not need | ||
| 466 | /// to be explicitly checked or updated here. | ||
| 467 | ResultCode MapPhysicalMemory(VAddr target, u64 size); | ||
| 468 | |||
| 469 | /// Unmaps memory at a given address. | ||
| 470 | /// | ||
| 471 | /// @param target The virtual address to unmap memory at. | ||
| 472 | /// @param size The amount of memory to unmap. | ||
| 473 | /// | ||
| 474 | /// @note The destination address must lie within the Map region. | ||
| 475 | /// | ||
| 476 | /// @note This function requires that SystemResourceSize be non-zero, | ||
| 477 | /// however, this is just because if it were not then the | ||
| 478 | /// resulting page tables could be exploited on hardware by | ||
| 479 | /// a malicious program. SystemResource usage does not need | ||
| 480 | /// to be explicitly checked or updated here. | ||
| 481 | ResultCode UnmapPhysicalMemory(VAddr target, u64 size); | ||
| 482 | |||
| 428 | /// Maps a region of memory as code memory. | 483 | /// Maps a region of memory as code memory. |
| 429 | /// | 484 | /// |
| 430 | /// @param dst_address The base address of the region to create the aliasing memory region. | 485 | /// @param dst_address The base address of the region to create the aliasing memory region. |
| @@ -493,7 +548,7 @@ public: | |||
| 493 | * Scans all VMAs and updates the page table range of any that use the given vector as backing | 548 | * Scans all VMAs and updates the page table range of any that use the given vector as backing |
| 494 | * memory. This should be called after any operation that causes reallocation of the vector. | 549 | * memory. This should be called after any operation that causes reallocation of the vector. |
| 495 | */ | 550 | */ |
| 496 | void RefreshMemoryBlockMappings(const std::vector<u8>* block); | 551 | void RefreshMemoryBlockMappings(const PhysicalMemory* block); |
| 497 | 552 | ||
| 498 | /// Dumps the address space layout to the log, for debugging | 553 | /// Dumps the address space layout to the log, for debugging |
| 499 | void LogLayout() const; | 554 | void LogLayout() const; |
| @@ -571,17 +626,17 @@ public: | |||
| 571 | /// Determines whether or not the specified range is within the map region. | 626 | /// Determines whether or not the specified range is within the map region. |
| 572 | bool IsWithinMapRegion(VAddr address, u64 size) const; | 627 | bool IsWithinMapRegion(VAddr address, u64 size) const; |
| 573 | 628 | ||
| 574 | /// Gets the base address of the new map region. | 629 | /// Gets the base address of the stack region. |
| 575 | VAddr GetNewMapRegionBaseAddress() const; | 630 | VAddr GetStackRegionBaseAddress() const; |
| 576 | 631 | ||
| 577 | /// Gets the end address of the new map region. | 632 | /// Gets the end address of the stack region. |
| 578 | VAddr GetNewMapRegionEndAddress() const; | 633 | VAddr GetStackRegionEndAddress() const; |
| 579 | 634 | ||
| 580 | /// Gets the total size of the new map region in bytes. | 635 | /// Gets the total size of the stack region in bytes. |
| 581 | u64 GetNewMapRegionSize() const; | 636 | u64 GetStackRegionSize() const; |
| 582 | 637 | ||
| 583 | /// Determines whether or not the given address range is within the new map region | 638 | /// Determines whether or not the given address range is within the stack region |
| 584 | bool IsWithinNewMapRegion(VAddr address, u64 size) const; | 639 | bool IsWithinStackRegion(VAddr address, u64 size) const; |
| 585 | 640 | ||
| 586 | /// Gets the base address of the TLS IO region. | 641 | /// Gets the base address of the TLS IO region. |
| 587 | VAddr GetTLSIORegionBaseAddress() const; | 642 | VAddr GetTLSIORegionBaseAddress() const; |
| @@ -632,6 +687,11 @@ private: | |||
| 632 | */ | 687 | */ |
| 633 | VMAIter MergeAdjacent(VMAIter vma); | 688 | VMAIter MergeAdjacent(VMAIter vma); |
| 634 | 689 | ||
| 690 | /** | ||
| 691 | * Merges two adjacent VMAs. | ||
| 692 | */ | ||
| 693 | void MergeAdjacentVMA(VirtualMemoryArea& left, const VirtualMemoryArea& right); | ||
| 694 | |||
| 635 | /// Updates the pages corresponding to this VMA so they match the VMA's attributes. | 695 | /// Updates the pages corresponding to this VMA so they match the VMA's attributes. |
| 636 | void UpdatePageTableForVMA(const VirtualMemoryArea& vma); | 696 | void UpdatePageTableForVMA(const VirtualMemoryArea& vma); |
| 637 | 697 | ||
| @@ -676,6 +736,13 @@ private: | |||
| 676 | MemoryAttribute attribute_mask, MemoryAttribute attribute, | 736 | MemoryAttribute attribute_mask, MemoryAttribute attribute, |
| 677 | MemoryAttribute ignore_mask) const; | 737 | MemoryAttribute ignore_mask) const; |
| 678 | 738 | ||
| 739 | /// Gets the amount of memory currently mapped (state != Unmapped) in a range. | ||
| 740 | ResultVal<std::size_t> SizeOfAllocatedVMAsInRange(VAddr address, std::size_t size) const; | ||
| 741 | |||
| 742 | /// Gets the amount of memory unmappable by UnmapPhysicalMemory in a range. | ||
| 743 | ResultVal<std::size_t> SizeOfUnmappablePhysicalMemoryInRange(VAddr address, | ||
| 744 | std::size_t size) const; | ||
| 745 | |||
| 679 | /** | 746 | /** |
| 680 | * A map covering the entirety of the managed address space, keyed by the `base` field of each | 747 | * A map covering the entirety of the managed address space, keyed by the `base` field of each |
| 681 | * VMA. It must always be modified by splitting or merging VMAs, so that the invariant | 748 | * VMA. It must always be modified by splitting or merging VMAs, so that the invariant |
| @@ -701,8 +768,8 @@ private: | |||
| 701 | VAddr map_region_base = 0; | 768 | VAddr map_region_base = 0; |
| 702 | VAddr map_region_end = 0; | 769 | VAddr map_region_end = 0; |
| 703 | 770 | ||
| 704 | VAddr new_map_region_base = 0; | 771 | VAddr stack_region_base = 0; |
| 705 | VAddr new_map_region_end = 0; | 772 | VAddr stack_region_end = 0; |
| 706 | 773 | ||
| 707 | VAddr tls_io_region_base = 0; | 774 | VAddr tls_io_region_base = 0; |
| 708 | VAddr tls_io_region_end = 0; | 775 | VAddr tls_io_region_end = 0; |
| @@ -711,12 +778,17 @@ private: | |||
| 711 | // the entire virtual address space extents that bound the allocations, including any holes. | 778 | // the entire virtual address space extents that bound the allocations, including any holes. |
| 712 | // This makes deallocation and reallocation of holes fast and keeps process memory contiguous | 779 | // This makes deallocation and reallocation of holes fast and keeps process memory contiguous |
| 713 | // in the emulator address space, allowing Memory::GetPointer to be reasonably safe. | 780 | // in the emulator address space, allowing Memory::GetPointer to be reasonably safe. |
| 714 | std::shared_ptr<std::vector<u8>> heap_memory; | 781 | std::shared_ptr<PhysicalMemory> heap_memory; |
| 715 | 782 | ||
| 716 | // The end of the currently allocated heap. This is not an inclusive | 783 | // The end of the currently allocated heap. This is not an inclusive |
| 717 | // end of the range. This is essentially 'base_address + current_size'. | 784 | // end of the range. This is essentially 'base_address + current_size'. |
| 718 | VAddr heap_end = 0; | 785 | VAddr heap_end = 0; |
| 719 | 786 | ||
| 787 | // The current amount of memory mapped via MapPhysicalMemory. | ||
| 788 | // This is used here (and in Nintendo's kernel) only for debugging, and does not impact | ||
| 789 | // any behavior. | ||
| 790 | u64 physical_memory_mapped = 0; | ||
| 791 | |||
| 720 | Core::System& system; | 792 | Core::System& system; |
| 721 | }; | 793 | }; |
| 722 | } // namespace Kernel | 794 | } // namespace Kernel |
diff --git a/src/core/hle/service/acc/acc.cpp b/src/core/hle/service/acc/acc.cpp index 002efaa7a..a7c55e116 100644 --- a/src/core/hle/service/acc/acc.cpp +++ b/src/core/hle/service/acc/acc.cpp | |||
| @@ -15,13 +15,18 @@ | |||
| 15 | #include "core/file_sys/control_metadata.h" | 15 | #include "core/file_sys/control_metadata.h" |
| 16 | #include "core/file_sys/patch_manager.h" | 16 | #include "core/file_sys/patch_manager.h" |
| 17 | #include "core/hle/ipc_helpers.h" | 17 | #include "core/hle/ipc_helpers.h" |
| 18 | #include "core/hle/kernel/kernel.h" | ||
| 18 | #include "core/hle/kernel/process.h" | 19 | #include "core/hle/kernel/process.h" |
| 19 | #include "core/hle/service/acc/acc.h" | 20 | #include "core/hle/service/acc/acc.h" |
| 20 | #include "core/hle/service/acc/acc_aa.h" | 21 | #include "core/hle/service/acc/acc_aa.h" |
| 21 | #include "core/hle/service/acc/acc_su.h" | 22 | #include "core/hle/service/acc/acc_su.h" |
| 22 | #include "core/hle/service/acc/acc_u0.h" | 23 | #include "core/hle/service/acc/acc_u0.h" |
| 23 | #include "core/hle/service/acc/acc_u1.h" | 24 | #include "core/hle/service/acc/acc_u1.h" |
| 25 | #include "core/hle/service/acc/errors.h" | ||
| 24 | #include "core/hle/service/acc/profile_manager.h" | 26 | #include "core/hle/service/acc/profile_manager.h" |
| 27 | #include "core/hle/service/glue/arp.h" | ||
| 28 | #include "core/hle/service/glue/manager.h" | ||
| 29 | #include "core/hle/service/sm/sm.h" | ||
| 25 | #include "core/loader/loader.h" | 30 | #include "core/loader/loader.h" |
| 26 | 31 | ||
| 27 | namespace Service::Account { | 32 | namespace Service::Account { |
| @@ -312,10 +317,72 @@ void Module::Interface::IsUserRegistrationRequestPermitted(Kernel::HLERequestCon | |||
| 312 | rb.Push(profile_manager->CanSystemRegisterUser()); | 317 | rb.Push(profile_manager->CanSystemRegisterUser()); |
| 313 | } | 318 | } |
| 314 | 319 | ||
| 315 | void Module::Interface::InitializeApplicationInfoOld(Kernel::HLERequestContext& ctx) { | 320 | void Module::Interface::InitializeApplicationInfo(Kernel::HLERequestContext& ctx) { |
| 316 | LOG_WARNING(Service_ACC, "(STUBBED) called"); | 321 | IPC::RequestParser rp{ctx}; |
| 322 | auto pid = rp.Pop<u64>(); | ||
| 323 | |||
| 324 | LOG_DEBUG(Service_ACC, "called, process_id={}", pid); | ||
| 317 | IPC::ResponseBuilder rb{ctx, 2}; | 325 | IPC::ResponseBuilder rb{ctx, 2}; |
| 318 | rb.Push(RESULT_SUCCESS); | 326 | rb.Push(InitializeApplicationInfoBase(pid)); |
| 327 | } | ||
| 328 | |||
| 329 | void Module::Interface::InitializeApplicationInfoRestricted(Kernel::HLERequestContext& ctx) { | ||
| 330 | IPC::RequestParser rp{ctx}; | ||
| 331 | auto pid = rp.Pop<u64>(); | ||
| 332 | |||
| 333 | LOG_WARNING(Service_ACC, "(Partial implementation) called, process_id={}", pid); | ||
| 334 | |||
| 335 | // TODO(ogniK): We require checking if the user actually owns the title and what not. As of | ||
| 336 | // currently, we assume the user owns the title. InitializeApplicationInfoBase SHOULD be called | ||
| 337 | // first then we do extra checks if the game is a digital copy. | ||
| 338 | |||
| 339 | IPC::ResponseBuilder rb{ctx, 2}; | ||
| 340 | rb.Push(InitializeApplicationInfoBase(pid)); | ||
| 341 | } | ||
| 342 | |||
| 343 | ResultCode Module::Interface::InitializeApplicationInfoBase(u64 process_id) { | ||
| 344 | if (application_info) { | ||
| 345 | LOG_ERROR(Service_ACC, "Application already initialized"); | ||
| 346 | return ERR_ACCOUNTINFO_ALREADY_INITIALIZED; | ||
| 347 | } | ||
| 348 | |||
| 349 | const auto& list = system.Kernel().GetProcessList(); | ||
| 350 | const auto iter = std::find_if(list.begin(), list.end(), [&process_id](const auto& process) { | ||
| 351 | return process->GetProcessID() == process_id; | ||
| 352 | }); | ||
| 353 | |||
| 354 | if (iter == list.end()) { | ||
| 355 | LOG_ERROR(Service_ACC, "Failed to find process ID"); | ||
| 356 | application_info.application_type = ApplicationType::Unknown; | ||
| 357 | |||
| 358 | return ERR_ACCOUNTINFO_BAD_APPLICATION; | ||
| 359 | } | ||
| 360 | |||
| 361 | const auto launch_property = system.GetARPManager().GetLaunchProperty((*iter)->GetTitleID()); | ||
| 362 | |||
| 363 | if (launch_property.Failed()) { | ||
| 364 | LOG_ERROR(Service_ACC, "Failed to get launch property"); | ||
| 365 | return ERR_ACCOUNTINFO_BAD_APPLICATION; | ||
| 366 | } | ||
| 367 | |||
| 368 | switch (launch_property->base_game_storage_id) { | ||
| 369 | case FileSys::StorageId::GameCard: | ||
| 370 | application_info.application_type = ApplicationType::GameCard; | ||
| 371 | break; | ||
| 372 | case FileSys::StorageId::Host: | ||
| 373 | case FileSys::StorageId::NandUser: | ||
| 374 | case FileSys::StorageId::SdCard: | ||
| 375 | application_info.application_type = ApplicationType::Digital; | ||
| 376 | break; | ||
| 377 | default: | ||
| 378 | LOG_ERROR(Service_ACC, "Invalid game storage ID"); | ||
| 379 | return ERR_ACCOUNTINFO_BAD_APPLICATION; | ||
| 380 | } | ||
| 381 | |||
| 382 | LOG_WARNING(Service_ACC, "ApplicationInfo init required"); | ||
| 383 | // TODO(ogniK): Actual initalization here | ||
| 384 | |||
| 385 | return RESULT_SUCCESS; | ||
| 319 | } | 386 | } |
| 320 | 387 | ||
| 321 | void Module::Interface::GetBaasAccountManagerForApplication(Kernel::HLERequestContext& ctx) { | 388 | void Module::Interface::GetBaasAccountManagerForApplication(Kernel::HLERequestContext& ctx) { |
diff --git a/src/core/hle/service/acc/acc.h b/src/core/hle/service/acc/acc.h index 69e4f34fc..7a7dc9ec6 100644 --- a/src/core/hle/service/acc/acc.h +++ b/src/core/hle/service/acc/acc.h | |||
| @@ -4,6 +4,7 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include "core/hle/service/glue/manager.h" | ||
| 7 | #include "core/hle/service/service.h" | 8 | #include "core/hle/service/service.h" |
| 8 | 9 | ||
| 9 | namespace Service::Account { | 10 | namespace Service::Account { |
| @@ -25,13 +26,34 @@ public: | |||
| 25 | void ListOpenUsers(Kernel::HLERequestContext& ctx); | 26 | void ListOpenUsers(Kernel::HLERequestContext& ctx); |
| 26 | void GetLastOpenedUser(Kernel::HLERequestContext& ctx); | 27 | void GetLastOpenedUser(Kernel::HLERequestContext& ctx); |
| 27 | void GetProfile(Kernel::HLERequestContext& ctx); | 28 | void GetProfile(Kernel::HLERequestContext& ctx); |
| 28 | void InitializeApplicationInfoOld(Kernel::HLERequestContext& ctx); | 29 | void InitializeApplicationInfo(Kernel::HLERequestContext& ctx); |
| 30 | void InitializeApplicationInfoRestricted(Kernel::HLERequestContext& ctx); | ||
| 29 | void GetBaasAccountManagerForApplication(Kernel::HLERequestContext& ctx); | 31 | void GetBaasAccountManagerForApplication(Kernel::HLERequestContext& ctx); |
| 30 | void IsUserRegistrationRequestPermitted(Kernel::HLERequestContext& ctx); | 32 | void IsUserRegistrationRequestPermitted(Kernel::HLERequestContext& ctx); |
| 31 | void TrySelectUserWithoutInteraction(Kernel::HLERequestContext& ctx); | 33 | void TrySelectUserWithoutInteraction(Kernel::HLERequestContext& ctx); |
| 32 | void IsUserAccountSwitchLocked(Kernel::HLERequestContext& ctx); | 34 | void IsUserAccountSwitchLocked(Kernel::HLERequestContext& ctx); |
| 33 | void GetProfileEditor(Kernel::HLERequestContext& ctx); | 35 | void GetProfileEditor(Kernel::HLERequestContext& ctx); |
| 34 | 36 | ||
| 37 | private: | ||
| 38 | ResultCode InitializeApplicationInfoBase(u64 process_id); | ||
| 39 | |||
| 40 | enum class ApplicationType : u32_le { | ||
| 41 | GameCard = 0, | ||
| 42 | Digital = 1, | ||
| 43 | Unknown = 3, | ||
| 44 | }; | ||
| 45 | |||
| 46 | struct ApplicationInfo { | ||
| 47 | Service::Glue::ApplicationLaunchProperty launch_property; | ||
| 48 | ApplicationType application_type; | ||
| 49 | |||
| 50 | constexpr explicit operator bool() const { | ||
| 51 | return launch_property.title_id != 0x0; | ||
| 52 | } | ||
| 53 | }; | ||
| 54 | |||
| 55 | ApplicationInfo application_info{}; | ||
| 56 | |||
| 35 | protected: | 57 | protected: |
| 36 | std::shared_ptr<Module> module; | 58 | std::shared_ptr<Module> module; |
| 37 | std::shared_ptr<ProfileManager> profile_manager; | 59 | std::shared_ptr<ProfileManager> profile_manager; |
diff --git a/src/core/hle/service/acc/acc_u0.cpp b/src/core/hle/service/acc/acc_u0.cpp index 2f239e8c0..0ac19f4ff 100644 --- a/src/core/hle/service/acc/acc_u0.cpp +++ b/src/core/hle/service/acc/acc_u0.cpp | |||
| @@ -22,7 +22,7 @@ ACC_U0::ACC_U0(std::shared_ptr<Module> module, std::shared_ptr<ProfileManager> p | |||
| 22 | {51, &ACC_U0::TrySelectUserWithoutInteraction, "TrySelectUserWithoutInteraction"}, | 22 | {51, &ACC_U0::TrySelectUserWithoutInteraction, "TrySelectUserWithoutInteraction"}, |
| 23 | {60, nullptr, "ListOpenContextStoredUsers"}, | 23 | {60, nullptr, "ListOpenContextStoredUsers"}, |
| 24 | {99, nullptr, "DebugActivateOpenContextRetention"}, | 24 | {99, nullptr, "DebugActivateOpenContextRetention"}, |
| 25 | {100, &ACC_U0::InitializeApplicationInfoOld, "InitializeApplicationInfoOld"}, | 25 | {100, &ACC_U0::InitializeApplicationInfo, "InitializeApplicationInfo"}, |
| 26 | {101, &ACC_U0::GetBaasAccountManagerForApplication, "GetBaasAccountManagerForApplication"}, | 26 | {101, &ACC_U0::GetBaasAccountManagerForApplication, "GetBaasAccountManagerForApplication"}, |
| 27 | {102, nullptr, "AuthenticateApplicationAsync"}, | 27 | {102, nullptr, "AuthenticateApplicationAsync"}, |
| 28 | {103, nullptr, "CheckNetworkServiceAvailabilityAsync"}, | 28 | {103, nullptr, "CheckNetworkServiceAvailabilityAsync"}, |
| @@ -31,7 +31,7 @@ ACC_U0::ACC_U0(std::shared_ptr<Module> module, std::shared_ptr<ProfileManager> p | |||
| 31 | {120, nullptr, "CreateGuestLoginRequest"}, | 31 | {120, nullptr, "CreateGuestLoginRequest"}, |
| 32 | {130, nullptr, "LoadOpenContext"}, | 32 | {130, nullptr, "LoadOpenContext"}, |
| 33 | {131, nullptr, "ListOpenContextStoredUsers"}, | 33 | {131, nullptr, "ListOpenContextStoredUsers"}, |
| 34 | {140, nullptr, "InitializeApplicationInfo"}, | 34 | {140, &ACC_U0::InitializeApplicationInfoRestricted, "InitializeApplicationInfoRestricted"}, |
| 35 | {141, nullptr, "ListQualifiedUsers"}, | 35 | {141, nullptr, "ListQualifiedUsers"}, |
| 36 | {150, &ACC_U0::IsUserAccountSwitchLocked, "IsUserAccountSwitchLocked"}, | 36 | {150, &ACC_U0::IsUserAccountSwitchLocked, "IsUserAccountSwitchLocked"}, |
| 37 | }; | 37 | }; |
diff --git a/src/core/hle/service/acc/errors.h b/src/core/hle/service/acc/errors.h new file mode 100644 index 000000000..1f0577239 --- /dev/null +++ b/src/core/hle/service/acc/errors.h | |||
| @@ -0,0 +1,14 @@ | |||
| 1 | // Copyright 2019 yuzu emulator team | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "core/hle/result.h" | ||
| 8 | |||
| 9 | namespace Service::Account { | ||
| 10 | |||
| 11 | constexpr ResultCode ERR_ACCOUNTINFO_BAD_APPLICATION{ErrorModule::Account, 22}; | ||
| 12 | constexpr ResultCode ERR_ACCOUNTINFO_ALREADY_INITIALIZED{ErrorModule::Account, 41}; | ||
| 13 | |||
| 14 | } // namespace Service::Account | ||
diff --git a/src/core/hle/service/am/am.cpp b/src/core/hle/service/am/am.cpp index 33cebb48b..aa2c83937 100644 --- a/src/core/hle/service/am/am.cpp +++ b/src/core/hle/service/am/am.cpp | |||
| @@ -29,7 +29,8 @@ | |||
| 29 | #include "core/hle/service/am/omm.h" | 29 | #include "core/hle/service/am/omm.h" |
| 30 | #include "core/hle/service/am/spsm.h" | 30 | #include "core/hle/service/am/spsm.h" |
| 31 | #include "core/hle/service/am/tcap.h" | 31 | #include "core/hle/service/am/tcap.h" |
| 32 | #include "core/hle/service/apm/apm.h" | 32 | #include "core/hle/service/apm/controller.h" |
| 33 | #include "core/hle/service/apm/interface.h" | ||
| 33 | #include "core/hle/service/filesystem/filesystem.h" | 34 | #include "core/hle/service/filesystem/filesystem.h" |
| 34 | #include "core/hle/service/ns/ns.h" | 35 | #include "core/hle/service/ns/ns.h" |
| 35 | #include "core/hle/service/nvflinger/nvflinger.h" | 36 | #include "core/hle/service/nvflinger/nvflinger.h" |
| @@ -55,7 +56,8 @@ struct LaunchParameters { | |||
| 55 | }; | 56 | }; |
| 56 | static_assert(sizeof(LaunchParameters) == 0x88); | 57 | static_assert(sizeof(LaunchParameters) == 0x88); |
| 57 | 58 | ||
| 58 | IWindowController::IWindowController() : ServiceFramework("IWindowController") { | 59 | IWindowController::IWindowController(Core::System& system_) |
| 60 | : ServiceFramework("IWindowController"), system{system_} { | ||
| 59 | // clang-format off | 61 | // clang-format off |
| 60 | static const FunctionInfo functions[] = { | 62 | static const FunctionInfo functions[] = { |
| 61 | {0, nullptr, "CreateWindow"}, | 63 | {0, nullptr, "CreateWindow"}, |
| @@ -74,7 +76,7 @@ IWindowController::IWindowController() : ServiceFramework("IWindowController") { | |||
| 74 | IWindowController::~IWindowController() = default; | 76 | IWindowController::~IWindowController() = default; |
| 75 | 77 | ||
| 76 | void IWindowController::GetAppletResourceUserId(Kernel::HLERequestContext& ctx) { | 78 | void IWindowController::GetAppletResourceUserId(Kernel::HLERequestContext& ctx) { |
| 77 | const u64 process_id = Core::System::GetInstance().Kernel().CurrentProcess()->GetProcessID(); | 79 | const u64 process_id = system.CurrentProcess()->GetProcessID(); |
| 78 | 80 | ||
| 79 | LOG_DEBUG(Service_AM, "called. Process ID=0x{:016X}", process_id); | 81 | LOG_DEBUG(Service_AM, "called. Process ID=0x{:016X}", process_id); |
| 80 | 82 | ||
| @@ -230,8 +232,9 @@ IDebugFunctions::IDebugFunctions() : ServiceFramework{"IDebugFunctions"} { | |||
| 230 | 232 | ||
| 231 | IDebugFunctions::~IDebugFunctions() = default; | 233 | IDebugFunctions::~IDebugFunctions() = default; |
| 232 | 234 | ||
| 233 | ISelfController::ISelfController(std::shared_ptr<NVFlinger::NVFlinger> nvflinger) | 235 | ISelfController::ISelfController(Core::System& system_, |
| 234 | : ServiceFramework("ISelfController"), nvflinger(std::move(nvflinger)) { | 236 | std::shared_ptr<NVFlinger::NVFlinger> nvflinger_) |
| 237 | : ServiceFramework("ISelfController"), nvflinger(std::move(nvflinger_)) { | ||
| 235 | // clang-format off | 238 | // clang-format off |
| 236 | static const FunctionInfo functions[] = { | 239 | static const FunctionInfo functions[] = { |
| 237 | {0, nullptr, "Exit"}, | 240 | {0, nullptr, "Exit"}, |
| @@ -265,12 +268,12 @@ ISelfController::ISelfController(std::shared_ptr<NVFlinger::NVFlinger> nvflinger | |||
| 265 | {65, nullptr, "ReportUserIsActive"}, | 268 | {65, nullptr, "ReportUserIsActive"}, |
| 266 | {66, nullptr, "GetCurrentIlluminance"}, | 269 | {66, nullptr, "GetCurrentIlluminance"}, |
| 267 | {67, nullptr, "IsIlluminanceAvailable"}, | 270 | {67, nullptr, "IsIlluminanceAvailable"}, |
| 268 | {68, nullptr, "SetAutoSleepDisabled"}, | 271 | {68, &ISelfController::SetAutoSleepDisabled, "SetAutoSleepDisabled"}, |
| 269 | {69, nullptr, "IsAutoSleepDisabled"}, | 272 | {69, &ISelfController::IsAutoSleepDisabled, "IsAutoSleepDisabled"}, |
| 270 | {70, nullptr, "ReportMultimediaError"}, | 273 | {70, nullptr, "ReportMultimediaError"}, |
| 271 | {71, nullptr, "GetCurrentIlluminanceEx"}, | 274 | {71, nullptr, "GetCurrentIlluminanceEx"}, |
| 272 | {80, nullptr, "SetWirelessPriorityMode"}, | 275 | {80, nullptr, "SetWirelessPriorityMode"}, |
| 273 | {90, nullptr, "GetAccumulatedSuspendedTickValue"}, | 276 | {90, &ISelfController::GetAccumulatedSuspendedTickValue, "GetAccumulatedSuspendedTickValue"}, |
| 274 | {91, &ISelfController::GetAccumulatedSuspendedTickChangedEvent, "GetAccumulatedSuspendedTickChangedEvent"}, | 277 | {91, &ISelfController::GetAccumulatedSuspendedTickChangedEvent, "GetAccumulatedSuspendedTickChangedEvent"}, |
| 275 | {100, nullptr, "SetAlbumImageTakenNotificationEnabled"}, | 278 | {100, nullptr, "SetAlbumImageTakenNotificationEnabled"}, |
| 276 | {1000, nullptr, "GetDebugStorageChannel"}, | 279 | {1000, nullptr, "GetDebugStorageChannel"}, |
| @@ -279,14 +282,18 @@ ISelfController::ISelfController(std::shared_ptr<NVFlinger::NVFlinger> nvflinger | |||
| 279 | 282 | ||
| 280 | RegisterHandlers(functions); | 283 | RegisterHandlers(functions); |
| 281 | 284 | ||
| 282 | auto& kernel = Core::System::GetInstance().Kernel(); | 285 | auto& kernel = system_.Kernel(); |
| 283 | launchable_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Manual, | 286 | launchable_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Manual, |
| 284 | "ISelfController:LaunchableEvent"); | 287 | "ISelfController:LaunchableEvent"); |
| 285 | 288 | ||
| 286 | // TODO(ogniK): Figure out where, when and why this event gets signalled | 289 | // This event is created by AM on the first time GetAccumulatedSuspendedTickChangedEvent() is |
| 290 | // called. Yuzu can just create it unconditionally, since it doesn't need to support multiple | ||
| 291 | // ISelfControllers. The event is signaled on creation, and on transition from suspended -> not | ||
| 292 | // suspended if the event has previously been created by a call to | ||
| 293 | // GetAccumulatedSuspendedTickChangedEvent. | ||
| 287 | accumulated_suspended_tick_changed_event = Kernel::WritableEvent::CreateEventPair( | 294 | accumulated_suspended_tick_changed_event = Kernel::WritableEvent::CreateEventPair( |
| 288 | kernel, Kernel::ResetType::Manual, "ISelfController:AccumulatedSuspendedTickChangedEvent"); | 295 | kernel, Kernel::ResetType::Manual, "ISelfController:AccumulatedSuspendedTickChangedEvent"); |
| 289 | accumulated_suspended_tick_changed_event.writable->Signal(); // Is signalled on creation | 296 | accumulated_suspended_tick_changed_event.writable->Signal(); |
| 290 | } | 297 | } |
| 291 | 298 | ||
| 292 | ISelfController::~ISelfController() = default; | 299 | ISelfController::~ISelfController() = default; |
| @@ -449,19 +456,54 @@ void ISelfController::GetIdleTimeDetectionExtension(Kernel::HLERequestContext& c | |||
| 449 | rb.Push<u32>(idle_time_detection_extension); | 456 | rb.Push<u32>(idle_time_detection_extension); |
| 450 | } | 457 | } |
| 451 | 458 | ||
| 459 | void ISelfController::SetAutoSleepDisabled(Kernel::HLERequestContext& ctx) { | ||
| 460 | IPC::RequestParser rp{ctx}; | ||
| 461 | is_auto_sleep_disabled = rp.Pop<bool>(); | ||
| 462 | |||
| 463 | // On the system itself, if the previous state of is_auto_sleep_disabled | ||
| 464 | // differed from the current value passed in, it'd signify the internal | ||
| 465 | // window manager to update (and also increment some statistics like update counts) | ||
| 466 | // | ||
| 467 | // It'd also indicate this change to an idle handling context. | ||
| 468 | // | ||
| 469 | // However, given we're emulating this behavior, most of this can be ignored | ||
| 470 | // and it's sufficient to simply set the member variable for querying via | ||
| 471 | // IsAutoSleepDisabled(). | ||
| 472 | |||
| 473 | LOG_DEBUG(Service_AM, "called. is_auto_sleep_disabled={}", is_auto_sleep_disabled); | ||
| 474 | |||
| 475 | IPC::ResponseBuilder rb{ctx, 2}; | ||
| 476 | rb.Push(RESULT_SUCCESS); | ||
| 477 | } | ||
| 478 | |||
| 479 | void ISelfController::IsAutoSleepDisabled(Kernel::HLERequestContext& ctx) { | ||
| 480 | LOG_DEBUG(Service_AM, "called."); | ||
| 481 | |||
| 482 | IPC::ResponseBuilder rb{ctx, 3}; | ||
| 483 | rb.Push(RESULT_SUCCESS); | ||
| 484 | rb.Push(is_auto_sleep_disabled); | ||
| 485 | } | ||
| 486 | |||
| 487 | void ISelfController::GetAccumulatedSuspendedTickValue(Kernel::HLERequestContext& ctx) { | ||
| 488 | LOG_DEBUG(Service_AM, "called."); | ||
| 489 | |||
| 490 | // This command returns the total number of system ticks since ISelfController creation | ||
| 491 | // where the game was suspended. Since Yuzu doesn't implement game suspension, this command | ||
| 492 | // can just always return 0 ticks. | ||
| 493 | IPC::ResponseBuilder rb{ctx, 4}; | ||
| 494 | rb.Push(RESULT_SUCCESS); | ||
| 495 | rb.Push<u64>(0); | ||
| 496 | } | ||
| 497 | |||
| 452 | void ISelfController::GetAccumulatedSuspendedTickChangedEvent(Kernel::HLERequestContext& ctx) { | 498 | void ISelfController::GetAccumulatedSuspendedTickChangedEvent(Kernel::HLERequestContext& ctx) { |
| 453 | // The implementation of this function is fine as is, the reason we're labelling it as stubbed | 499 | LOG_DEBUG(Service_AM, "called."); |
| 454 | // is because we're currently unsure when and where accumulated_suspended_tick_changed_event is | ||
| 455 | // actually signalled for the time being. | ||
| 456 | LOG_WARNING(Service_AM, "(STUBBED) called"); | ||
| 457 | 500 | ||
| 458 | IPC::ResponseBuilder rb{ctx, 2, 1}; | 501 | IPC::ResponseBuilder rb{ctx, 2, 1}; |
| 459 | rb.Push(RESULT_SUCCESS); | 502 | rb.Push(RESULT_SUCCESS); |
| 460 | rb.PushCopyObjects(accumulated_suspended_tick_changed_event.readable); | 503 | rb.PushCopyObjects(accumulated_suspended_tick_changed_event.readable); |
| 461 | } | 504 | } |
| 462 | 505 | ||
| 463 | AppletMessageQueue::AppletMessageQueue() { | 506 | AppletMessageQueue::AppletMessageQueue(Kernel::KernelCore& kernel) { |
| 464 | auto& kernel = Core::System::GetInstance().Kernel(); | ||
| 465 | on_new_message = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Manual, | 507 | on_new_message = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Manual, |
| 466 | "AMMessageQueue:OnMessageRecieved"); | 508 | "AMMessageQueue:OnMessageRecieved"); |
| 467 | on_operation_mode_changed = Kernel::WritableEvent::CreateEventPair( | 509 | on_operation_mode_changed = Kernel::WritableEvent::CreateEventPair( |
| @@ -508,8 +550,9 @@ void AppletMessageQueue::OperationModeChanged() { | |||
| 508 | on_operation_mode_changed.writable->Signal(); | 550 | on_operation_mode_changed.writable->Signal(); |
| 509 | } | 551 | } |
| 510 | 552 | ||
| 511 | ICommonStateGetter::ICommonStateGetter(std::shared_ptr<AppletMessageQueue> msg_queue) | 553 | ICommonStateGetter::ICommonStateGetter(Core::System& system, |
| 512 | : ServiceFramework("ICommonStateGetter"), msg_queue(std::move(msg_queue)) { | 554 | std::shared_ptr<AppletMessageQueue> msg_queue) |
| 555 | : ServiceFramework("ICommonStateGetter"), system(system), msg_queue(std::move(msg_queue)) { | ||
| 513 | // clang-format off | 556 | // clang-format off |
| 514 | static const FunctionInfo functions[] = { | 557 | static const FunctionInfo functions[] = { |
| 515 | {0, &ICommonStateGetter::GetEventHandle, "GetEventHandle"}, | 558 | {0, &ICommonStateGetter::GetEventHandle, "GetEventHandle"}, |
| @@ -542,7 +585,7 @@ ICommonStateGetter::ICommonStateGetter(std::shared_ptr<AppletMessageQueue> msg_q | |||
| 542 | {63, nullptr, "GetHdcpAuthenticationStateChangeEvent"}, | 585 | {63, nullptr, "GetHdcpAuthenticationStateChangeEvent"}, |
| 543 | {64, nullptr, "SetTvPowerStateMatchingMode"}, | 586 | {64, nullptr, "SetTvPowerStateMatchingMode"}, |
| 544 | {65, nullptr, "GetApplicationIdByContentActionName"}, | 587 | {65, nullptr, "GetApplicationIdByContentActionName"}, |
| 545 | {66, nullptr, "SetCpuBoostMode"}, | 588 | {66, &ICommonStateGetter::SetCpuBoostMode, "SetCpuBoostMode"}, |
| 546 | {80, nullptr, "PerformSystemButtonPressingIfInFocus"}, | 589 | {80, nullptr, "PerformSystemButtonPressingIfInFocus"}, |
| 547 | {90, nullptr, "SetPerformanceConfigurationChangedNotification"}, | 590 | {90, nullptr, "SetPerformanceConfigurationChangedNotification"}, |
| 548 | {91, nullptr, "GetCurrentPerformanceConfiguration"}, | 591 | {91, nullptr, "GetCurrentPerformanceConfiguration"}, |
| @@ -623,6 +666,16 @@ void ICommonStateGetter::GetDefaultDisplayResolution(Kernel::HLERequestContext& | |||
| 623 | } | 666 | } |
| 624 | } | 667 | } |
| 625 | 668 | ||
| 669 | void ICommonStateGetter::SetCpuBoostMode(Kernel::HLERequestContext& ctx) { | ||
| 670 | LOG_DEBUG(Service_AM, "called, forwarding to APM:SYS"); | ||
| 671 | |||
| 672 | const auto& sm = system.ServiceManager(); | ||
| 673 | const auto apm_sys = sm.GetService<APM::APM_Sys>("apm:sys"); | ||
| 674 | ASSERT(apm_sys != nullptr); | ||
| 675 | |||
| 676 | apm_sys->SetCpuBoostMode(ctx); | ||
| 677 | } | ||
| 678 | |||
| 626 | IStorage::IStorage(std::vector<u8> buffer) | 679 | IStorage::IStorage(std::vector<u8> buffer) |
| 627 | : ServiceFramework("IStorage"), buffer(std::move(buffer)) { | 680 | : ServiceFramework("IStorage"), buffer(std::move(buffer)) { |
| 628 | // clang-format off | 681 | // clang-format off |
| @@ -651,13 +704,11 @@ void ICommonStateGetter::GetOperationMode(Kernel::HLERequestContext& ctx) { | |||
| 651 | } | 704 | } |
| 652 | 705 | ||
| 653 | void ICommonStateGetter::GetPerformanceMode(Kernel::HLERequestContext& ctx) { | 706 | void ICommonStateGetter::GetPerformanceMode(Kernel::HLERequestContext& ctx) { |
| 654 | const bool use_docked_mode{Settings::values.use_docked_mode}; | 707 | LOG_DEBUG(Service_AM, "called"); |
| 655 | LOG_DEBUG(Service_AM, "called, use_docked_mode={}", use_docked_mode); | ||
| 656 | 708 | ||
| 657 | IPC::ResponseBuilder rb{ctx, 3}; | 709 | IPC::ResponseBuilder rb{ctx, 3}; |
| 658 | rb.Push(RESULT_SUCCESS); | 710 | rb.Push(RESULT_SUCCESS); |
| 659 | rb.Push(static_cast<u32>(use_docked_mode ? APM::PerformanceMode::Docked | 711 | rb.PushEnum(system.GetAPMController().GetCurrentPerformanceMode()); |
| 660 | : APM::PerformanceMode::Handheld)); | ||
| 661 | } | 712 | } |
| 662 | 713 | ||
| 663 | class ILibraryAppletAccessor final : public ServiceFramework<ILibraryAppletAccessor> { | 714 | class ILibraryAppletAccessor final : public ServiceFramework<ILibraryAppletAccessor> { |
| @@ -887,9 +938,8 @@ void IStorageAccessor::Read(Kernel::HLERequestContext& ctx) { | |||
| 887 | rb.Push(RESULT_SUCCESS); | 938 | rb.Push(RESULT_SUCCESS); |
| 888 | } | 939 | } |
| 889 | 940 | ||
| 890 | ILibraryAppletCreator::ILibraryAppletCreator(u64 current_process_title_id) | 941 | ILibraryAppletCreator::ILibraryAppletCreator(Core::System& system_) |
| 891 | : ServiceFramework("ILibraryAppletCreator"), | 942 | : ServiceFramework("ILibraryAppletCreator"), system{system_} { |
| 892 | current_process_title_id(current_process_title_id) { | ||
| 893 | static const FunctionInfo functions[] = { | 943 | static const FunctionInfo functions[] = { |
| 894 | {0, &ILibraryAppletCreator::CreateLibraryApplet, "CreateLibraryApplet"}, | 944 | {0, &ILibraryAppletCreator::CreateLibraryApplet, "CreateLibraryApplet"}, |
| 895 | {1, nullptr, "TerminateAllLibraryApplets"}, | 945 | {1, nullptr, "TerminateAllLibraryApplets"}, |
| @@ -911,8 +961,8 @@ void ILibraryAppletCreator::CreateLibraryApplet(Kernel::HLERequestContext& ctx) | |||
| 911 | LOG_DEBUG(Service_AM, "called with applet_id={:08X}, applet_mode={:08X}", | 961 | LOG_DEBUG(Service_AM, "called with applet_id={:08X}, applet_mode={:08X}", |
| 912 | static_cast<u32>(applet_id), applet_mode); | 962 | static_cast<u32>(applet_id), applet_mode); |
| 913 | 963 | ||
| 914 | const auto& applet_manager{Core::System::GetInstance().GetAppletManager()}; | 964 | const auto& applet_manager{system.GetAppletManager()}; |
| 915 | const auto applet = applet_manager.GetApplet(applet_id, current_process_title_id); | 965 | const auto applet = applet_manager.GetApplet(applet_id); |
| 916 | 966 | ||
| 917 | if (applet == nullptr) { | 967 | if (applet == nullptr) { |
| 918 | LOG_ERROR(Service_AM, "Applet doesn't exist! applet_id={}", static_cast<u32>(applet_id)); | 968 | LOG_ERROR(Service_AM, "Applet doesn't exist! applet_id={}", static_cast<u32>(applet_id)); |
| @@ -949,8 +999,7 @@ void ILibraryAppletCreator::CreateTransferMemoryStorage(Kernel::HLERequestContex | |||
| 949 | const auto handle{rp.Pop<Kernel::Handle>()}; | 999 | const auto handle{rp.Pop<Kernel::Handle>()}; |
| 950 | 1000 | ||
| 951 | const auto transfer_mem = | 1001 | const auto transfer_mem = |
| 952 | Core::System::GetInstance().CurrentProcess()->GetHandleTable().Get<Kernel::TransferMemory>( | 1002 | system.CurrentProcess()->GetHandleTable().Get<Kernel::TransferMemory>(handle); |
| 953 | handle); | ||
| 954 | 1003 | ||
| 955 | if (transfer_mem == nullptr) { | 1004 | if (transfer_mem == nullptr) { |
| 956 | LOG_ERROR(Service_AM, "shared_mem is a nullpr for handle={:08X}", handle); | 1005 | LOG_ERROR(Service_AM, "shared_mem is a nullpr for handle={:08X}", handle); |
| @@ -968,7 +1017,8 @@ void ILibraryAppletCreator::CreateTransferMemoryStorage(Kernel::HLERequestContex | |||
| 968 | rb.PushIpcInterface(std::make_shared<IStorage>(std::move(memory))); | 1017 | rb.PushIpcInterface(std::make_shared<IStorage>(std::move(memory))); |
| 969 | } | 1018 | } |
| 970 | 1019 | ||
| 971 | IApplicationFunctions::IApplicationFunctions() : ServiceFramework("IApplicationFunctions") { | 1020 | IApplicationFunctions::IApplicationFunctions(Core::System& system_) |
| 1021 | : ServiceFramework("IApplicationFunctions"), system{system_} { | ||
| 972 | // clang-format off | 1022 | // clang-format off |
| 973 | static const FunctionInfo functions[] = { | 1023 | static const FunctionInfo functions[] = { |
| 974 | {1, &IApplicationFunctions::PopLaunchParameter, "PopLaunchParameter"}, | 1024 | {1, &IApplicationFunctions::PopLaunchParameter, "PopLaunchParameter"}, |
| @@ -1007,6 +1057,7 @@ IApplicationFunctions::IApplicationFunctions() : ServiceFramework("IApplicationF | |||
| 1007 | {120, nullptr, "ExecuteProgram"}, | 1057 | {120, nullptr, "ExecuteProgram"}, |
| 1008 | {121, nullptr, "ClearUserChannel"}, | 1058 | {121, nullptr, "ClearUserChannel"}, |
| 1009 | {122, nullptr, "UnpopToUserChannel"}, | 1059 | {122, nullptr, "UnpopToUserChannel"}, |
| 1060 | {130, &IApplicationFunctions::GetGpuErrorDetectedSystemEvent, "GetGpuErrorDetectedSystemEvent"}, | ||
| 1010 | {500, nullptr, "StartContinuousRecordingFlushForDebug"}, | 1061 | {500, nullptr, "StartContinuousRecordingFlushForDebug"}, |
| 1011 | {1000, nullptr, "CreateMovieMaker"}, | 1062 | {1000, nullptr, "CreateMovieMaker"}, |
| 1012 | {1001, nullptr, "PrepareForJit"}, | 1063 | {1001, nullptr, "PrepareForJit"}, |
| @@ -1014,6 +1065,10 @@ IApplicationFunctions::IApplicationFunctions() : ServiceFramework("IApplicationF | |||
| 1014 | // clang-format on | 1065 | // clang-format on |
| 1015 | 1066 | ||
| 1016 | RegisterHandlers(functions); | 1067 | RegisterHandlers(functions); |
| 1068 | |||
| 1069 | auto& kernel = Core::System::GetInstance().Kernel(); | ||
| 1070 | gpu_error_detected_event = Kernel::WritableEvent::CreateEventPair( | ||
| 1071 | kernel, Kernel::ResetType::Manual, "IApplicationFunctions:GpuErrorDetectedSystemEvent"); | ||
| 1017 | } | 1072 | } |
| 1018 | 1073 | ||
| 1019 | IApplicationFunctions::~IApplicationFunctions() = default; | 1074 | IApplicationFunctions::~IApplicationFunctions() = default; |
| @@ -1125,7 +1180,7 @@ void IApplicationFunctions::GetDesiredLanguage(Kernel::HLERequestContext& ctx) { | |||
| 1125 | // Get supported languages from NACP, if possible | 1180 | // Get supported languages from NACP, if possible |
| 1126 | // Default to 0 (all languages supported) | 1181 | // Default to 0 (all languages supported) |
| 1127 | u32 supported_languages = 0; | 1182 | u32 supported_languages = 0; |
| 1128 | FileSys::PatchManager pm{Core::System::GetInstance().CurrentProcess()->GetTitleID()}; | 1183 | FileSys::PatchManager pm{system.CurrentProcess()->GetTitleID()}; |
| 1129 | 1184 | ||
| 1130 | const auto res = pm.GetControlMetadata(); | 1185 | const auto res = pm.GetControlMetadata(); |
| 1131 | if (res.first != nullptr) { | 1186 | if (res.first != nullptr) { |
| @@ -1133,8 +1188,8 @@ void IApplicationFunctions::GetDesiredLanguage(Kernel::HLERequestContext& ctx) { | |||
| 1133 | } | 1188 | } |
| 1134 | 1189 | ||
| 1135 | // Call IApplicationManagerInterface implementation. | 1190 | // Call IApplicationManagerInterface implementation. |
| 1136 | auto& service_manager = Core::System::GetInstance().ServiceManager(); | 1191 | auto& service_manager = system.ServiceManager(); |
| 1137 | auto ns_am2 = service_manager.GetService<Service::NS::NS>("ns:am2"); | 1192 | auto ns_am2 = service_manager.GetService<NS::NS>("ns:am2"); |
| 1138 | auto app_man = ns_am2->GetApplicationManagerInterface(); | 1193 | auto app_man = ns_am2->GetApplicationManagerInterface(); |
| 1139 | 1194 | ||
| 1140 | // Get desired application language | 1195 | // Get desired application language |
| @@ -1206,8 +1261,8 @@ void IApplicationFunctions::ExtendSaveData(Kernel::HLERequestContext& ctx) { | |||
| 1206 | "new_journal={:016X}", | 1261 | "new_journal={:016X}", |
| 1207 | static_cast<u8>(type), user_id[1], user_id[0], new_normal_size, new_journal_size); | 1262 | static_cast<u8>(type), user_id[1], user_id[0], new_normal_size, new_journal_size); |
| 1208 | 1263 | ||
| 1209 | FileSystem::WriteSaveDataSize(type, Core::CurrentProcess()->GetTitleID(), user_id, | 1264 | const auto title_id = system.CurrentProcess()->GetTitleID(); |
| 1210 | {new_normal_size, new_journal_size}); | 1265 | FileSystem::WriteSaveDataSize(type, title_id, user_id, {new_normal_size, new_journal_size}); |
| 1211 | 1266 | ||
| 1212 | IPC::ResponseBuilder rb{ctx, 4}; | 1267 | IPC::ResponseBuilder rb{ctx, 4}; |
| 1213 | rb.Push(RESULT_SUCCESS); | 1268 | rb.Push(RESULT_SUCCESS); |
| @@ -1226,8 +1281,8 @@ void IApplicationFunctions::GetSaveDataSize(Kernel::HLERequestContext& ctx) { | |||
| 1226 | LOG_DEBUG(Service_AM, "called with type={:02X}, user_id={:016X}{:016X}", static_cast<u8>(type), | 1281 | LOG_DEBUG(Service_AM, "called with type={:02X}, user_id={:016X}{:016X}", static_cast<u8>(type), |
| 1227 | user_id[1], user_id[0]); | 1282 | user_id[1], user_id[0]); |
| 1228 | 1283 | ||
| 1229 | const auto size = | 1284 | const auto title_id = system.CurrentProcess()->GetTitleID(); |
| 1230 | FileSystem::ReadSaveDataSize(type, Core::CurrentProcess()->GetTitleID(), user_id); | 1285 | const auto size = FileSystem::ReadSaveDataSize(type, title_id, user_id); |
| 1231 | 1286 | ||
| 1232 | IPC::ResponseBuilder rb{ctx, 6}; | 1287 | IPC::ResponseBuilder rb{ctx, 6}; |
| 1233 | rb.Push(RESULT_SUCCESS); | 1288 | rb.Push(RESULT_SUCCESS); |
| @@ -1235,11 +1290,19 @@ void IApplicationFunctions::GetSaveDataSize(Kernel::HLERequestContext& ctx) { | |||
| 1235 | rb.Push(size.journal); | 1290 | rb.Push(size.journal); |
| 1236 | } | 1291 | } |
| 1237 | 1292 | ||
| 1293 | void IApplicationFunctions::GetGpuErrorDetectedSystemEvent(Kernel::HLERequestContext& ctx) { | ||
| 1294 | LOG_WARNING(Service_AM, "(STUBBED) called"); | ||
| 1295 | |||
| 1296 | IPC::ResponseBuilder rb{ctx, 2, 1}; | ||
| 1297 | rb.Push(RESULT_SUCCESS); | ||
| 1298 | rb.PushCopyObjects(gpu_error_detected_event.readable); | ||
| 1299 | } | ||
| 1300 | |||
| 1238 | void InstallInterfaces(SM::ServiceManager& service_manager, | 1301 | void InstallInterfaces(SM::ServiceManager& service_manager, |
| 1239 | std::shared_ptr<NVFlinger::NVFlinger> nvflinger, Core::System& system) { | 1302 | std::shared_ptr<NVFlinger::NVFlinger> nvflinger, Core::System& system) { |
| 1240 | auto message_queue = std::make_shared<AppletMessageQueue>(); | 1303 | auto message_queue = std::make_shared<AppletMessageQueue>(system.Kernel()); |
| 1241 | message_queue->PushMessage(AppletMessageQueue::AppletMessage::FocusStateChanged); // Needed on | 1304 | // Needed on game boot |
| 1242 | // game boot | 1305 | message_queue->PushMessage(AppletMessageQueue::AppletMessage::FocusStateChanged); |
| 1243 | 1306 | ||
| 1244 | std::make_shared<AppletAE>(nvflinger, message_queue, system)->InstallAsService(service_manager); | 1307 | std::make_shared<AppletAE>(nvflinger, message_queue, system)->InstallAsService(service_manager); |
| 1245 | std::make_shared<AppletOE>(nvflinger, message_queue, system)->InstallAsService(service_manager); | 1308 | std::make_shared<AppletOE>(nvflinger, message_queue, system)->InstallAsService(service_manager); |
diff --git a/src/core/hle/service/am/am.h b/src/core/hle/service/am/am.h index 4ea609d23..28f870302 100644 --- a/src/core/hle/service/am/am.h +++ b/src/core/hle/service/am/am.h | |||
| @@ -10,12 +10,15 @@ | |||
| 10 | #include "core/hle/kernel/writable_event.h" | 10 | #include "core/hle/kernel/writable_event.h" |
| 11 | #include "core/hle/service/service.h" | 11 | #include "core/hle/service/service.h" |
| 12 | 12 | ||
| 13 | namespace Service { | 13 | namespace Kernel { |
| 14 | namespace NVFlinger { | 14 | class KernelCore; |
| 15 | } | ||
| 16 | |||
| 17 | namespace Service::NVFlinger { | ||
| 15 | class NVFlinger; | 18 | class NVFlinger; |
| 16 | } | 19 | } |
| 17 | 20 | ||
| 18 | namespace AM { | 21 | namespace Service::AM { |
| 19 | 22 | ||
| 20 | enum SystemLanguage { | 23 | enum SystemLanguage { |
| 21 | Japanese = 0, | 24 | Japanese = 0, |
| @@ -47,7 +50,7 @@ public: | |||
| 47 | PerformanceModeChanged = 31, | 50 | PerformanceModeChanged = 31, |
| 48 | }; | 51 | }; |
| 49 | 52 | ||
| 50 | AppletMessageQueue(); | 53 | explicit AppletMessageQueue(Kernel::KernelCore& kernel); |
| 51 | ~AppletMessageQueue(); | 54 | ~AppletMessageQueue(); |
| 52 | 55 | ||
| 53 | const Kernel::SharedPtr<Kernel::ReadableEvent>& GetMesssageRecieveEvent() const; | 56 | const Kernel::SharedPtr<Kernel::ReadableEvent>& GetMesssageRecieveEvent() const; |
| @@ -65,12 +68,14 @@ private: | |||
| 65 | 68 | ||
| 66 | class IWindowController final : public ServiceFramework<IWindowController> { | 69 | class IWindowController final : public ServiceFramework<IWindowController> { |
| 67 | public: | 70 | public: |
| 68 | IWindowController(); | 71 | explicit IWindowController(Core::System& system_); |
| 69 | ~IWindowController() override; | 72 | ~IWindowController() override; |
| 70 | 73 | ||
| 71 | private: | 74 | private: |
| 72 | void GetAppletResourceUserId(Kernel::HLERequestContext& ctx); | 75 | void GetAppletResourceUserId(Kernel::HLERequestContext& ctx); |
| 73 | void AcquireForegroundRights(Kernel::HLERequestContext& ctx); | 76 | void AcquireForegroundRights(Kernel::HLERequestContext& ctx); |
| 77 | |||
| 78 | Core::System& system; | ||
| 74 | }; | 79 | }; |
| 75 | 80 | ||
| 76 | class IAudioController final : public ServiceFramework<IAudioController> { | 81 | class IAudioController final : public ServiceFramework<IAudioController> { |
| @@ -113,7 +118,8 @@ public: | |||
| 113 | 118 | ||
| 114 | class ISelfController final : public ServiceFramework<ISelfController> { | 119 | class ISelfController final : public ServiceFramework<ISelfController> { |
| 115 | public: | 120 | public: |
| 116 | explicit ISelfController(std::shared_ptr<NVFlinger::NVFlinger> nvflinger); | 121 | explicit ISelfController(Core::System& system_, |
| 122 | std::shared_ptr<NVFlinger::NVFlinger> nvflinger_); | ||
| 117 | ~ISelfController() override; | 123 | ~ISelfController() override; |
| 118 | 124 | ||
| 119 | private: | 125 | private: |
| @@ -133,6 +139,9 @@ private: | |||
| 133 | void SetHandlesRequestToDisplay(Kernel::HLERequestContext& ctx); | 139 | void SetHandlesRequestToDisplay(Kernel::HLERequestContext& ctx); |
| 134 | void SetIdleTimeDetectionExtension(Kernel::HLERequestContext& ctx); | 140 | void SetIdleTimeDetectionExtension(Kernel::HLERequestContext& ctx); |
| 135 | void GetIdleTimeDetectionExtension(Kernel::HLERequestContext& ctx); | 141 | void GetIdleTimeDetectionExtension(Kernel::HLERequestContext& ctx); |
| 142 | void SetAutoSleepDisabled(Kernel::HLERequestContext& ctx); | ||
| 143 | void IsAutoSleepDisabled(Kernel::HLERequestContext& ctx); | ||
| 144 | void GetAccumulatedSuspendedTickValue(Kernel::HLERequestContext& ctx); | ||
| 136 | void GetAccumulatedSuspendedTickChangedEvent(Kernel::HLERequestContext& ctx); | 145 | void GetAccumulatedSuspendedTickChangedEvent(Kernel::HLERequestContext& ctx); |
| 137 | 146 | ||
| 138 | std::shared_ptr<NVFlinger::NVFlinger> nvflinger; | 147 | std::shared_ptr<NVFlinger::NVFlinger> nvflinger; |
| @@ -141,11 +150,13 @@ private: | |||
| 141 | 150 | ||
| 142 | u32 idle_time_detection_extension = 0; | 151 | u32 idle_time_detection_extension = 0; |
| 143 | u64 num_fatal_sections_entered = 0; | 152 | u64 num_fatal_sections_entered = 0; |
| 153 | bool is_auto_sleep_disabled = false; | ||
| 144 | }; | 154 | }; |
| 145 | 155 | ||
| 146 | class ICommonStateGetter final : public ServiceFramework<ICommonStateGetter> { | 156 | class ICommonStateGetter final : public ServiceFramework<ICommonStateGetter> { |
| 147 | public: | 157 | public: |
| 148 | explicit ICommonStateGetter(std::shared_ptr<AppletMessageQueue> msg_queue); | 158 | explicit ICommonStateGetter(Core::System& system, |
| 159 | std::shared_ptr<AppletMessageQueue> msg_queue); | ||
| 149 | ~ICommonStateGetter() override; | 160 | ~ICommonStateGetter() override; |
| 150 | 161 | ||
| 151 | private: | 162 | private: |
| @@ -167,7 +178,9 @@ private: | |||
| 167 | void GetPerformanceMode(Kernel::HLERequestContext& ctx); | 178 | void GetPerformanceMode(Kernel::HLERequestContext& ctx); |
| 168 | void GetBootMode(Kernel::HLERequestContext& ctx); | 179 | void GetBootMode(Kernel::HLERequestContext& ctx); |
| 169 | void GetDefaultDisplayResolution(Kernel::HLERequestContext& ctx); | 180 | void GetDefaultDisplayResolution(Kernel::HLERequestContext& ctx); |
| 181 | void SetCpuBoostMode(Kernel::HLERequestContext& ctx); | ||
| 170 | 182 | ||
| 183 | Core::System& system; | ||
| 171 | std::shared_ptr<AppletMessageQueue> msg_queue; | 184 | std::shared_ptr<AppletMessageQueue> msg_queue; |
| 172 | }; | 185 | }; |
| 173 | 186 | ||
| @@ -201,7 +214,7 @@ private: | |||
| 201 | 214 | ||
| 202 | class ILibraryAppletCreator final : public ServiceFramework<ILibraryAppletCreator> { | 215 | class ILibraryAppletCreator final : public ServiceFramework<ILibraryAppletCreator> { |
| 203 | public: | 216 | public: |
| 204 | ILibraryAppletCreator(u64 current_process_title_id); | 217 | explicit ILibraryAppletCreator(Core::System& system_); |
| 205 | ~ILibraryAppletCreator() override; | 218 | ~ILibraryAppletCreator() override; |
| 206 | 219 | ||
| 207 | private: | 220 | private: |
| @@ -209,12 +222,12 @@ private: | |||
| 209 | void CreateStorage(Kernel::HLERequestContext& ctx); | 222 | void CreateStorage(Kernel::HLERequestContext& ctx); |
| 210 | void CreateTransferMemoryStorage(Kernel::HLERequestContext& ctx); | 223 | void CreateTransferMemoryStorage(Kernel::HLERequestContext& ctx); |
| 211 | 224 | ||
| 212 | u64 current_process_title_id; | 225 | Core::System& system; |
| 213 | }; | 226 | }; |
| 214 | 227 | ||
| 215 | class IApplicationFunctions final : public ServiceFramework<IApplicationFunctions> { | 228 | class IApplicationFunctions final : public ServiceFramework<IApplicationFunctions> { |
| 216 | public: | 229 | public: |
| 217 | IApplicationFunctions(); | 230 | explicit IApplicationFunctions(Core::System& system_); |
| 218 | ~IApplicationFunctions() override; | 231 | ~IApplicationFunctions() override; |
| 219 | 232 | ||
| 220 | private: | 233 | private: |
| @@ -235,6 +248,10 @@ private: | |||
| 235 | void BeginBlockingHomeButton(Kernel::HLERequestContext& ctx); | 248 | void BeginBlockingHomeButton(Kernel::HLERequestContext& ctx); |
| 236 | void EndBlockingHomeButton(Kernel::HLERequestContext& ctx); | 249 | void EndBlockingHomeButton(Kernel::HLERequestContext& ctx); |
| 237 | void EnableApplicationCrashReport(Kernel::HLERequestContext& ctx); | 250 | void EnableApplicationCrashReport(Kernel::HLERequestContext& ctx); |
| 251 | void GetGpuErrorDetectedSystemEvent(Kernel::HLERequestContext& ctx); | ||
| 252 | |||
| 253 | Kernel::EventPair gpu_error_detected_event; | ||
| 254 | Core::System& system; | ||
| 238 | }; | 255 | }; |
| 239 | 256 | ||
| 240 | class IHomeMenuFunctions final : public ServiceFramework<IHomeMenuFunctions> { | 257 | class IHomeMenuFunctions final : public ServiceFramework<IHomeMenuFunctions> { |
| @@ -268,5 +285,4 @@ public: | |||
| 268 | void InstallInterfaces(SM::ServiceManager& service_manager, | 285 | void InstallInterfaces(SM::ServiceManager& service_manager, |
| 269 | std::shared_ptr<NVFlinger::NVFlinger> nvflinger, Core::System& system); | 286 | std::shared_ptr<NVFlinger::NVFlinger> nvflinger, Core::System& system); |
| 270 | 287 | ||
| 271 | } // namespace AM | 288 | } // namespace Service::AM |
| 272 | } // namespace Service | ||
diff --git a/src/core/hle/service/am/applet_ae.cpp b/src/core/hle/service/am/applet_ae.cpp index fe5beb8f9..e454b77d8 100644 --- a/src/core/hle/service/am/applet_ae.cpp +++ b/src/core/hle/service/am/applet_ae.cpp | |||
| @@ -42,7 +42,7 @@ private: | |||
| 42 | 42 | ||
| 43 | IPC::ResponseBuilder rb{ctx, 2, 0, 1}; | 43 | IPC::ResponseBuilder rb{ctx, 2, 0, 1}; |
| 44 | rb.Push(RESULT_SUCCESS); | 44 | rb.Push(RESULT_SUCCESS); |
| 45 | rb.PushIpcInterface<ICommonStateGetter>(msg_queue); | 45 | rb.PushIpcInterface<ICommonStateGetter>(system, msg_queue); |
| 46 | } | 46 | } |
| 47 | 47 | ||
| 48 | void GetSelfController(Kernel::HLERequestContext& ctx) { | 48 | void GetSelfController(Kernel::HLERequestContext& ctx) { |
| @@ -50,7 +50,7 @@ private: | |||
| 50 | 50 | ||
| 51 | IPC::ResponseBuilder rb{ctx, 2, 0, 1}; | 51 | IPC::ResponseBuilder rb{ctx, 2, 0, 1}; |
| 52 | rb.Push(RESULT_SUCCESS); | 52 | rb.Push(RESULT_SUCCESS); |
| 53 | rb.PushIpcInterface<ISelfController>(nvflinger); | 53 | rb.PushIpcInterface<ISelfController>(system, nvflinger); |
| 54 | } | 54 | } |
| 55 | 55 | ||
| 56 | void GetWindowController(Kernel::HLERequestContext& ctx) { | 56 | void GetWindowController(Kernel::HLERequestContext& ctx) { |
| @@ -58,7 +58,7 @@ private: | |||
| 58 | 58 | ||
| 59 | IPC::ResponseBuilder rb{ctx, 2, 0, 1}; | 59 | IPC::ResponseBuilder rb{ctx, 2, 0, 1}; |
| 60 | rb.Push(RESULT_SUCCESS); | 60 | rb.Push(RESULT_SUCCESS); |
| 61 | rb.PushIpcInterface<IWindowController>(); | 61 | rb.PushIpcInterface<IWindowController>(system); |
| 62 | } | 62 | } |
| 63 | 63 | ||
| 64 | void GetAudioController(Kernel::HLERequestContext& ctx) { | 64 | void GetAudioController(Kernel::HLERequestContext& ctx) { |
| @@ -98,7 +98,7 @@ private: | |||
| 98 | 98 | ||
| 99 | IPC::ResponseBuilder rb{ctx, 2, 0, 1}; | 99 | IPC::ResponseBuilder rb{ctx, 2, 0, 1}; |
| 100 | rb.Push(RESULT_SUCCESS); | 100 | rb.Push(RESULT_SUCCESS); |
| 101 | rb.PushIpcInterface<ILibraryAppletCreator>(system.CurrentProcess()->GetTitleID()); | 101 | rb.PushIpcInterface<ILibraryAppletCreator>(system); |
| 102 | } | 102 | } |
| 103 | 103 | ||
| 104 | void GetApplicationFunctions(Kernel::HLERequestContext& ctx) { | 104 | void GetApplicationFunctions(Kernel::HLERequestContext& ctx) { |
| @@ -106,7 +106,7 @@ private: | |||
| 106 | 106 | ||
| 107 | IPC::ResponseBuilder rb{ctx, 2, 0, 1}; | 107 | IPC::ResponseBuilder rb{ctx, 2, 0, 1}; |
| 108 | rb.Push(RESULT_SUCCESS); | 108 | rb.Push(RESULT_SUCCESS); |
| 109 | rb.PushIpcInterface<IApplicationFunctions>(); | 109 | rb.PushIpcInterface<IApplicationFunctions>(system); |
| 110 | } | 110 | } |
| 111 | 111 | ||
| 112 | std::shared_ptr<NVFlinger::NVFlinger> nvflinger; | 112 | std::shared_ptr<NVFlinger::NVFlinger> nvflinger; |
| @@ -146,7 +146,7 @@ private: | |||
| 146 | 146 | ||
| 147 | IPC::ResponseBuilder rb{ctx, 2, 0, 1}; | 147 | IPC::ResponseBuilder rb{ctx, 2, 0, 1}; |
| 148 | rb.Push(RESULT_SUCCESS); | 148 | rb.Push(RESULT_SUCCESS); |
| 149 | rb.PushIpcInterface<ICommonStateGetter>(msg_queue); | 149 | rb.PushIpcInterface<ICommonStateGetter>(system, msg_queue); |
| 150 | } | 150 | } |
| 151 | 151 | ||
| 152 | void GetSelfController(Kernel::HLERequestContext& ctx) { | 152 | void GetSelfController(Kernel::HLERequestContext& ctx) { |
| @@ -154,7 +154,7 @@ private: | |||
| 154 | 154 | ||
| 155 | IPC::ResponseBuilder rb{ctx, 2, 0, 1}; | 155 | IPC::ResponseBuilder rb{ctx, 2, 0, 1}; |
| 156 | rb.Push(RESULT_SUCCESS); | 156 | rb.Push(RESULT_SUCCESS); |
| 157 | rb.PushIpcInterface<ISelfController>(nvflinger); | 157 | rb.PushIpcInterface<ISelfController>(system, nvflinger); |
| 158 | } | 158 | } |
| 159 | 159 | ||
| 160 | void GetWindowController(Kernel::HLERequestContext& ctx) { | 160 | void GetWindowController(Kernel::HLERequestContext& ctx) { |
| @@ -162,7 +162,7 @@ private: | |||
| 162 | 162 | ||
| 163 | IPC::ResponseBuilder rb{ctx, 2, 0, 1}; | 163 | IPC::ResponseBuilder rb{ctx, 2, 0, 1}; |
| 164 | rb.Push(RESULT_SUCCESS); | 164 | rb.Push(RESULT_SUCCESS); |
| 165 | rb.PushIpcInterface<IWindowController>(); | 165 | rb.PushIpcInterface<IWindowController>(system); |
| 166 | } | 166 | } |
| 167 | 167 | ||
| 168 | void GetAudioController(Kernel::HLERequestContext& ctx) { | 168 | void GetAudioController(Kernel::HLERequestContext& ctx) { |
| @@ -194,7 +194,7 @@ private: | |||
| 194 | 194 | ||
| 195 | IPC::ResponseBuilder rb{ctx, 2, 0, 1}; | 195 | IPC::ResponseBuilder rb{ctx, 2, 0, 1}; |
| 196 | rb.Push(RESULT_SUCCESS); | 196 | rb.Push(RESULT_SUCCESS); |
| 197 | rb.PushIpcInterface<ILibraryAppletCreator>(system.CurrentProcess()->GetTitleID()); | 197 | rb.PushIpcInterface<ILibraryAppletCreator>(system); |
| 198 | } | 198 | } |
| 199 | 199 | ||
| 200 | void GetHomeMenuFunctions(Kernel::HLERequestContext& ctx) { | 200 | void GetHomeMenuFunctions(Kernel::HLERequestContext& ctx) { |
diff --git a/src/core/hle/service/am/applet_oe.cpp b/src/core/hle/service/am/applet_oe.cpp index 6e255fe95..a2ffaa440 100644 --- a/src/core/hle/service/am/applet_oe.cpp +++ b/src/core/hle/service/am/applet_oe.cpp | |||
| @@ -4,7 +4,6 @@ | |||
| 4 | 4 | ||
| 5 | #include "common/logging/log.h" | 5 | #include "common/logging/log.h" |
| 6 | #include "core/hle/ipc_helpers.h" | 6 | #include "core/hle/ipc_helpers.h" |
| 7 | #include "core/hle/kernel/process.h" | ||
| 8 | #include "core/hle/service/am/am.h" | 7 | #include "core/hle/service/am/am.h" |
| 9 | #include "core/hle/service/am/applet_oe.h" | 8 | #include "core/hle/service/am/applet_oe.h" |
| 10 | #include "core/hle/service/nvflinger/nvflinger.h" | 9 | #include "core/hle/service/nvflinger/nvflinger.h" |
| @@ -64,7 +63,7 @@ private: | |||
| 64 | 63 | ||
| 65 | IPC::ResponseBuilder rb{ctx, 2, 0, 1}; | 64 | IPC::ResponseBuilder rb{ctx, 2, 0, 1}; |
| 66 | rb.Push(RESULT_SUCCESS); | 65 | rb.Push(RESULT_SUCCESS); |
| 67 | rb.PushIpcInterface<IWindowController>(); | 66 | rb.PushIpcInterface<IWindowController>(system); |
| 68 | } | 67 | } |
| 69 | 68 | ||
| 70 | void GetSelfController(Kernel::HLERequestContext& ctx) { | 69 | void GetSelfController(Kernel::HLERequestContext& ctx) { |
| @@ -72,7 +71,7 @@ private: | |||
| 72 | 71 | ||
| 73 | IPC::ResponseBuilder rb{ctx, 2, 0, 1}; | 72 | IPC::ResponseBuilder rb{ctx, 2, 0, 1}; |
| 74 | rb.Push(RESULT_SUCCESS); | 73 | rb.Push(RESULT_SUCCESS); |
| 75 | rb.PushIpcInterface<ISelfController>(nvflinger); | 74 | rb.PushIpcInterface<ISelfController>(system, nvflinger); |
| 76 | } | 75 | } |
| 77 | 76 | ||
| 78 | void GetCommonStateGetter(Kernel::HLERequestContext& ctx) { | 77 | void GetCommonStateGetter(Kernel::HLERequestContext& ctx) { |
| @@ -80,7 +79,7 @@ private: | |||
| 80 | 79 | ||
| 81 | IPC::ResponseBuilder rb{ctx, 2, 0, 1}; | 80 | IPC::ResponseBuilder rb{ctx, 2, 0, 1}; |
| 82 | rb.Push(RESULT_SUCCESS); | 81 | rb.Push(RESULT_SUCCESS); |
| 83 | rb.PushIpcInterface<ICommonStateGetter>(msg_queue); | 82 | rb.PushIpcInterface<ICommonStateGetter>(system, msg_queue); |
| 84 | } | 83 | } |
| 85 | 84 | ||
| 86 | void GetLibraryAppletCreator(Kernel::HLERequestContext& ctx) { | 85 | void GetLibraryAppletCreator(Kernel::HLERequestContext& ctx) { |
| @@ -88,7 +87,7 @@ private: | |||
| 88 | 87 | ||
| 89 | IPC::ResponseBuilder rb{ctx, 2, 0, 1}; | 88 | IPC::ResponseBuilder rb{ctx, 2, 0, 1}; |
| 90 | rb.Push(RESULT_SUCCESS); | 89 | rb.Push(RESULT_SUCCESS); |
| 91 | rb.PushIpcInterface<ILibraryAppletCreator>(system.CurrentProcess()->GetTitleID()); | 90 | rb.PushIpcInterface<ILibraryAppletCreator>(system); |
| 92 | } | 91 | } |
| 93 | 92 | ||
| 94 | void GetApplicationFunctions(Kernel::HLERequestContext& ctx) { | 93 | void GetApplicationFunctions(Kernel::HLERequestContext& ctx) { |
| @@ -96,7 +95,7 @@ private: | |||
| 96 | 95 | ||
| 97 | IPC::ResponseBuilder rb{ctx, 2, 0, 1}; | 96 | IPC::ResponseBuilder rb{ctx, 2, 0, 1}; |
| 98 | rb.Push(RESULT_SUCCESS); | 97 | rb.Push(RESULT_SUCCESS); |
| 99 | rb.PushIpcInterface<IApplicationFunctions>(); | 98 | rb.PushIpcInterface<IApplicationFunctions>(system); |
| 100 | } | 99 | } |
| 101 | 100 | ||
| 102 | std::shared_ptr<NVFlinger::NVFlinger> nvflinger; | 101 | std::shared_ptr<NVFlinger::NVFlinger> nvflinger; |
diff --git a/src/core/hle/service/am/applets/applets.cpp b/src/core/hle/service/am/applets/applets.cpp index 6bdba2468..d2e35362f 100644 --- a/src/core/hle/service/am/applets/applets.cpp +++ b/src/core/hle/service/am/applets/applets.cpp | |||
| @@ -23,8 +23,7 @@ | |||
| 23 | 23 | ||
| 24 | namespace Service::AM::Applets { | 24 | namespace Service::AM::Applets { |
| 25 | 25 | ||
| 26 | AppletDataBroker::AppletDataBroker() { | 26 | AppletDataBroker::AppletDataBroker(Kernel::KernelCore& kernel) { |
| 27 | auto& kernel = Core::System::GetInstance().Kernel(); | ||
| 28 | state_changed_event = Kernel::WritableEvent::CreateEventPair( | 27 | state_changed_event = Kernel::WritableEvent::CreateEventPair( |
| 29 | kernel, Kernel::ResetType::Manual, "ILibraryAppletAccessor:StateChangedEvent"); | 28 | kernel, Kernel::ResetType::Manual, "ILibraryAppletAccessor:StateChangedEvent"); |
| 30 | pop_out_data_event = Kernel::WritableEvent::CreateEventPair( | 29 | pop_out_data_event = Kernel::WritableEvent::CreateEventPair( |
| @@ -121,7 +120,7 @@ Kernel::SharedPtr<Kernel::ReadableEvent> AppletDataBroker::GetStateChangedEvent( | |||
| 121 | return state_changed_event.readable; | 120 | return state_changed_event.readable; |
| 122 | } | 121 | } |
| 123 | 122 | ||
| 124 | Applet::Applet() = default; | 123 | Applet::Applet(Kernel::KernelCore& kernel_) : broker{kernel_} {} |
| 125 | 124 | ||
| 126 | Applet::~Applet() = default; | 125 | Applet::~Applet() = default; |
| 127 | 126 | ||
| @@ -154,7 +153,7 @@ AppletFrontendSet::AppletFrontendSet(AppletFrontendSet&&) noexcept = default; | |||
| 154 | 153 | ||
| 155 | AppletFrontendSet& AppletFrontendSet::operator=(AppletFrontendSet&&) noexcept = default; | 154 | AppletFrontendSet& AppletFrontendSet::operator=(AppletFrontendSet&&) noexcept = default; |
| 156 | 155 | ||
| 157 | AppletManager::AppletManager() = default; | 156 | AppletManager::AppletManager(Core::System& system_) : system{system_} {} |
| 158 | 157 | ||
| 159 | AppletManager::~AppletManager() = default; | 158 | AppletManager::~AppletManager() = default; |
| 160 | 159 | ||
| @@ -216,28 +215,28 @@ void AppletManager::ClearAll() { | |||
| 216 | frontend = {}; | 215 | frontend = {}; |
| 217 | } | 216 | } |
| 218 | 217 | ||
| 219 | std::shared_ptr<Applet> AppletManager::GetApplet(AppletId id, u64 current_process_title_id) const { | 218 | std::shared_ptr<Applet> AppletManager::GetApplet(AppletId id) const { |
| 220 | switch (id) { | 219 | switch (id) { |
| 221 | case AppletId::Auth: | 220 | case AppletId::Auth: |
| 222 | return std::make_shared<Auth>(*frontend.parental_controls); | 221 | return std::make_shared<Auth>(system, *frontend.parental_controls); |
| 223 | case AppletId::Error: | 222 | case AppletId::Error: |
| 224 | return std::make_shared<Error>(*frontend.error); | 223 | return std::make_shared<Error>(system, *frontend.error); |
| 225 | case AppletId::ProfileSelect: | 224 | case AppletId::ProfileSelect: |
| 226 | return std::make_shared<ProfileSelect>(*frontend.profile_select); | 225 | return std::make_shared<ProfileSelect>(system, *frontend.profile_select); |
| 227 | case AppletId::SoftwareKeyboard: | 226 | case AppletId::SoftwareKeyboard: |
| 228 | return std::make_shared<SoftwareKeyboard>(*frontend.software_keyboard); | 227 | return std::make_shared<SoftwareKeyboard>(system, *frontend.software_keyboard); |
| 229 | case AppletId::PhotoViewer: | 228 | case AppletId::PhotoViewer: |
| 230 | return std::make_shared<PhotoViewer>(*frontend.photo_viewer); | 229 | return std::make_shared<PhotoViewer>(system, *frontend.photo_viewer); |
| 231 | case AppletId::LibAppletShop: | 230 | case AppletId::LibAppletShop: |
| 232 | return std::make_shared<WebBrowser>(*frontend.web_browser, current_process_title_id, | 231 | return std::make_shared<WebBrowser>(system, *frontend.web_browser, |
| 233 | frontend.e_commerce.get()); | 232 | frontend.e_commerce.get()); |
| 234 | case AppletId::LibAppletOff: | 233 | case AppletId::LibAppletOff: |
| 235 | return std::make_shared<WebBrowser>(*frontend.web_browser, current_process_title_id); | 234 | return std::make_shared<WebBrowser>(system, *frontend.web_browser); |
| 236 | default: | 235 | default: |
| 237 | UNIMPLEMENTED_MSG( | 236 | UNIMPLEMENTED_MSG( |
| 238 | "No backend implementation exists for applet_id={:02X}! Falling back to stub applet.", | 237 | "No backend implementation exists for applet_id={:02X}! Falling back to stub applet.", |
| 239 | static_cast<u8>(id)); | 238 | static_cast<u8>(id)); |
| 240 | return std::make_shared<StubApplet>(id); | 239 | return std::make_shared<StubApplet>(system, id); |
| 241 | } | 240 | } |
| 242 | } | 241 | } |
| 243 | 242 | ||
diff --git a/src/core/hle/service/am/applets/applets.h b/src/core/hle/service/am/applets/applets.h index adc973dad..764c3418c 100644 --- a/src/core/hle/service/am/applets/applets.h +++ b/src/core/hle/service/am/applets/applets.h | |||
| @@ -12,6 +12,10 @@ | |||
| 12 | 12 | ||
| 13 | union ResultCode; | 13 | union ResultCode; |
| 14 | 14 | ||
| 15 | namespace Core { | ||
| 16 | class System; | ||
| 17 | } | ||
| 18 | |||
| 15 | namespace Core::Frontend { | 19 | namespace Core::Frontend { |
| 16 | class ECommerceApplet; | 20 | class ECommerceApplet; |
| 17 | class ErrorApplet; | 21 | class ErrorApplet; |
| @@ -22,6 +26,10 @@ class SoftwareKeyboardApplet; | |||
| 22 | class WebBrowserApplet; | 26 | class WebBrowserApplet; |
| 23 | } // namespace Core::Frontend | 27 | } // namespace Core::Frontend |
| 24 | 28 | ||
| 29 | namespace Kernel { | ||
| 30 | class KernelCore; | ||
| 31 | } | ||
| 32 | |||
| 25 | namespace Service::AM { | 33 | namespace Service::AM { |
| 26 | 34 | ||
| 27 | class IStorage; | 35 | class IStorage; |
| @@ -53,7 +61,7 @@ enum class AppletId : u32 { | |||
| 53 | 61 | ||
| 54 | class AppletDataBroker final { | 62 | class AppletDataBroker final { |
| 55 | public: | 63 | public: |
| 56 | AppletDataBroker(); | 64 | explicit AppletDataBroker(Kernel::KernelCore& kernel_); |
| 57 | ~AppletDataBroker(); | 65 | ~AppletDataBroker(); |
| 58 | 66 | ||
| 59 | struct RawChannelData { | 67 | struct RawChannelData { |
| @@ -108,7 +116,7 @@ private: | |||
| 108 | 116 | ||
| 109 | class Applet { | 117 | class Applet { |
| 110 | public: | 118 | public: |
| 111 | Applet(); | 119 | explicit Applet(Kernel::KernelCore& kernel_); |
| 112 | virtual ~Applet(); | 120 | virtual ~Applet(); |
| 113 | 121 | ||
| 114 | virtual void Initialize(); | 122 | virtual void Initialize(); |
| @@ -179,7 +187,7 @@ struct AppletFrontendSet { | |||
| 179 | 187 | ||
| 180 | class AppletManager { | 188 | class AppletManager { |
| 181 | public: | 189 | public: |
| 182 | AppletManager(); | 190 | explicit AppletManager(Core::System& system_); |
| 183 | ~AppletManager(); | 191 | ~AppletManager(); |
| 184 | 192 | ||
| 185 | void SetAppletFrontendSet(AppletFrontendSet set); | 193 | void SetAppletFrontendSet(AppletFrontendSet set); |
| @@ -187,10 +195,11 @@ public: | |||
| 187 | void SetDefaultAppletsIfMissing(); | 195 | void SetDefaultAppletsIfMissing(); |
| 188 | void ClearAll(); | 196 | void ClearAll(); |
| 189 | 197 | ||
| 190 | std::shared_ptr<Applet> GetApplet(AppletId id, u64 current_process_title_id) const; | 198 | std::shared_ptr<Applet> GetApplet(AppletId id) const; |
| 191 | 199 | ||
| 192 | private: | 200 | private: |
| 193 | AppletFrontendSet frontend; | 201 | AppletFrontendSet frontend; |
| 202 | Core::System& system; | ||
| 194 | }; | 203 | }; |
| 195 | 204 | ||
| 196 | } // namespace Applets | 205 | } // namespace Applets |
diff --git a/src/core/hle/service/am/applets/error.cpp b/src/core/hle/service/am/applets/error.cpp index af3a900f8..a7db26725 100644 --- a/src/core/hle/service/am/applets/error.cpp +++ b/src/core/hle/service/am/applets/error.cpp | |||
| @@ -85,7 +85,8 @@ ResultCode Decode64BitError(u64 error) { | |||
| 85 | 85 | ||
| 86 | } // Anonymous namespace | 86 | } // Anonymous namespace |
| 87 | 87 | ||
| 88 | Error::Error(const Core::Frontend::ErrorApplet& frontend) : frontend(frontend) {} | 88 | Error::Error(Core::System& system_, const Core::Frontend::ErrorApplet& frontend_) |
| 89 | : Applet{system_.Kernel()}, frontend(frontend_), system{system_} {} | ||
| 89 | 90 | ||
| 90 | Error::~Error() = default; | 91 | Error::~Error() = default; |
| 91 | 92 | ||
| @@ -145,8 +146,8 @@ void Error::Execute() { | |||
| 145 | } | 146 | } |
| 146 | 147 | ||
| 147 | const auto callback = [this] { DisplayCompleted(); }; | 148 | const auto callback = [this] { DisplayCompleted(); }; |
| 148 | const auto title_id = Core::CurrentProcess()->GetTitleID(); | 149 | const auto title_id = system.CurrentProcess()->GetTitleID(); |
| 149 | const auto& reporter{Core::System::GetInstance().GetReporter()}; | 150 | const auto& reporter{system.GetReporter()}; |
| 150 | 151 | ||
| 151 | switch (mode) { | 152 | switch (mode) { |
| 152 | case ErrorAppletMode::ShowError: | 153 | case ErrorAppletMode::ShowError: |
diff --git a/src/core/hle/service/am/applets/error.h b/src/core/hle/service/am/applets/error.h index a3590d181..a105cdb0c 100644 --- a/src/core/hle/service/am/applets/error.h +++ b/src/core/hle/service/am/applets/error.h | |||
| @@ -7,6 +7,10 @@ | |||
| 7 | #include "core/hle/result.h" | 7 | #include "core/hle/result.h" |
| 8 | #include "core/hle/service/am/applets/applets.h" | 8 | #include "core/hle/service/am/applets/applets.h" |
| 9 | 9 | ||
| 10 | namespace Core { | ||
| 11 | class System; | ||
| 12 | } | ||
| 13 | |||
| 10 | namespace Service::AM::Applets { | 14 | namespace Service::AM::Applets { |
| 11 | 15 | ||
| 12 | enum class ErrorAppletMode : u8 { | 16 | enum class ErrorAppletMode : u8 { |
| @@ -21,7 +25,7 @@ enum class ErrorAppletMode : u8 { | |||
| 21 | 25 | ||
| 22 | class Error final : public Applet { | 26 | class Error final : public Applet { |
| 23 | public: | 27 | public: |
| 24 | explicit Error(const Core::Frontend::ErrorApplet& frontend); | 28 | explicit Error(Core::System& system_, const Core::Frontend::ErrorApplet& frontend_); |
| 25 | ~Error() override; | 29 | ~Error() override; |
| 26 | 30 | ||
| 27 | void Initialize() override; | 31 | void Initialize() override; |
| @@ -42,6 +46,7 @@ private: | |||
| 42 | std::unique_ptr<ErrorArguments> args; | 46 | std::unique_ptr<ErrorArguments> args; |
| 43 | 47 | ||
| 44 | bool complete = false; | 48 | bool complete = false; |
| 49 | Core::System& system; | ||
| 45 | }; | 50 | }; |
| 46 | 51 | ||
| 47 | } // namespace Service::AM::Applets | 52 | } // namespace Service::AM::Applets |
diff --git a/src/core/hle/service/am/applets/general_backend.cpp b/src/core/hle/service/am/applets/general_backend.cpp index e0def8dff..328438a1d 100644 --- a/src/core/hle/service/am/applets/general_backend.cpp +++ b/src/core/hle/service/am/applets/general_backend.cpp | |||
| @@ -37,7 +37,8 @@ static void LogCurrentStorage(AppletDataBroker& broker, std::string_view prefix) | |||
| 37 | } | 37 | } |
| 38 | } | 38 | } |
| 39 | 39 | ||
| 40 | Auth::Auth(Core::Frontend::ParentalControlsApplet& frontend) : frontend(frontend) {} | 40 | Auth::Auth(Core::System& system_, Core::Frontend::ParentalControlsApplet& frontend_) |
| 41 | : Applet{system_.Kernel()}, frontend(frontend_) {} | ||
| 41 | 42 | ||
| 42 | Auth::~Auth() = default; | 43 | Auth::~Auth() = default; |
| 43 | 44 | ||
| @@ -151,7 +152,8 @@ void Auth::AuthFinished(bool successful) { | |||
| 151 | broker.SignalStateChanged(); | 152 | broker.SignalStateChanged(); |
| 152 | } | 153 | } |
| 153 | 154 | ||
| 154 | PhotoViewer::PhotoViewer(const Core::Frontend::PhotoViewerApplet& frontend) : frontend(frontend) {} | 155 | PhotoViewer::PhotoViewer(Core::System& system_, const Core::Frontend::PhotoViewerApplet& frontend_) |
| 156 | : Applet{system_.Kernel()}, frontend(frontend_), system{system_} {} | ||
| 155 | 157 | ||
| 156 | PhotoViewer::~PhotoViewer() = default; | 158 | PhotoViewer::~PhotoViewer() = default; |
| 157 | 159 | ||
| @@ -185,7 +187,7 @@ void PhotoViewer::Execute() { | |||
| 185 | const auto callback = [this] { ViewFinished(); }; | 187 | const auto callback = [this] { ViewFinished(); }; |
| 186 | switch (mode) { | 188 | switch (mode) { |
| 187 | case PhotoViewerAppletMode::CurrentApp: | 189 | case PhotoViewerAppletMode::CurrentApp: |
| 188 | frontend.ShowPhotosForApplication(Core::CurrentProcess()->GetTitleID(), callback); | 190 | frontend.ShowPhotosForApplication(system.CurrentProcess()->GetTitleID(), callback); |
| 189 | break; | 191 | break; |
| 190 | case PhotoViewerAppletMode::AllApps: | 192 | case PhotoViewerAppletMode::AllApps: |
| 191 | frontend.ShowAllPhotos(callback); | 193 | frontend.ShowAllPhotos(callback); |
| @@ -200,7 +202,8 @@ void PhotoViewer::ViewFinished() { | |||
| 200 | broker.SignalStateChanged(); | 202 | broker.SignalStateChanged(); |
| 201 | } | 203 | } |
| 202 | 204 | ||
| 203 | StubApplet::StubApplet(AppletId id) : id(id) {} | 205 | StubApplet::StubApplet(Core::System& system_, AppletId id_) |
| 206 | : Applet{system_.Kernel()}, id(id_), system{system_} {} | ||
| 204 | 207 | ||
| 205 | StubApplet::~StubApplet() = default; | 208 | StubApplet::~StubApplet() = default; |
| 206 | 209 | ||
| @@ -209,7 +212,7 @@ void StubApplet::Initialize() { | |||
| 209 | Applet::Initialize(); | 212 | Applet::Initialize(); |
| 210 | 213 | ||
| 211 | const auto data = broker.PeekDataToAppletForDebug(); | 214 | const auto data = broker.PeekDataToAppletForDebug(); |
| 212 | Core::System::GetInstance().GetReporter().SaveUnimplementedAppletReport( | 215 | system.GetReporter().SaveUnimplementedAppletReport( |
| 213 | static_cast<u32>(id), common_args.arguments_version, common_args.library_version, | 216 | static_cast<u32>(id), common_args.arguments_version, common_args.library_version, |
| 214 | common_args.theme_color, common_args.play_startup_sound, common_args.system_tick, | 217 | common_args.theme_color, common_args.play_startup_sound, common_args.system_tick, |
| 215 | data.normal, data.interactive); | 218 | data.normal, data.interactive); |
diff --git a/src/core/hle/service/am/applets/general_backend.h b/src/core/hle/service/am/applets/general_backend.h index 0da252044..cfa2df369 100644 --- a/src/core/hle/service/am/applets/general_backend.h +++ b/src/core/hle/service/am/applets/general_backend.h | |||
| @@ -6,6 +6,10 @@ | |||
| 6 | 6 | ||
| 7 | #include "core/hle/service/am/applets/applets.h" | 7 | #include "core/hle/service/am/applets/applets.h" |
| 8 | 8 | ||
| 9 | namespace Core { | ||
| 10 | class System; | ||
| 11 | } | ||
| 12 | |||
| 9 | namespace Service::AM::Applets { | 13 | namespace Service::AM::Applets { |
| 10 | 14 | ||
| 11 | enum class AuthAppletType : u32 { | 15 | enum class AuthAppletType : u32 { |
| @@ -16,7 +20,7 @@ enum class AuthAppletType : u32 { | |||
| 16 | 20 | ||
| 17 | class Auth final : public Applet { | 21 | class Auth final : public Applet { |
| 18 | public: | 22 | public: |
| 19 | explicit Auth(Core::Frontend::ParentalControlsApplet& frontend); | 23 | explicit Auth(Core::System& system_, Core::Frontend::ParentalControlsApplet& frontend_); |
| 20 | ~Auth() override; | 24 | ~Auth() override; |
| 21 | 25 | ||
| 22 | void Initialize() override; | 26 | void Initialize() override; |
| @@ -45,7 +49,7 @@ enum class PhotoViewerAppletMode : u8 { | |||
| 45 | 49 | ||
| 46 | class PhotoViewer final : public Applet { | 50 | class PhotoViewer final : public Applet { |
| 47 | public: | 51 | public: |
| 48 | explicit PhotoViewer(const Core::Frontend::PhotoViewerApplet& frontend); | 52 | explicit PhotoViewer(Core::System& system_, const Core::Frontend::PhotoViewerApplet& frontend_); |
| 49 | ~PhotoViewer() override; | 53 | ~PhotoViewer() override; |
| 50 | 54 | ||
| 51 | void Initialize() override; | 55 | void Initialize() override; |
| @@ -60,11 +64,12 @@ private: | |||
| 60 | const Core::Frontend::PhotoViewerApplet& frontend; | 64 | const Core::Frontend::PhotoViewerApplet& frontend; |
| 61 | bool complete = false; | 65 | bool complete = false; |
| 62 | PhotoViewerAppletMode mode = PhotoViewerAppletMode::CurrentApp; | 66 | PhotoViewerAppletMode mode = PhotoViewerAppletMode::CurrentApp; |
| 67 | Core::System& system; | ||
| 63 | }; | 68 | }; |
| 64 | 69 | ||
| 65 | class StubApplet final : public Applet { | 70 | class StubApplet final : public Applet { |
| 66 | public: | 71 | public: |
| 67 | explicit StubApplet(AppletId id); | 72 | explicit StubApplet(Core::System& system_, AppletId id_); |
| 68 | ~StubApplet() override; | 73 | ~StubApplet() override; |
| 69 | 74 | ||
| 70 | void Initialize() override; | 75 | void Initialize() override; |
| @@ -76,6 +81,7 @@ public: | |||
| 76 | 81 | ||
| 77 | private: | 82 | private: |
| 78 | AppletId id; | 83 | AppletId id; |
| 84 | Core::System& system; | ||
| 79 | }; | 85 | }; |
| 80 | 86 | ||
| 81 | } // namespace Service::AM::Applets | 87 | } // namespace Service::AM::Applets |
diff --git a/src/core/hle/service/am/applets/profile_select.cpp b/src/core/hle/service/am/applets/profile_select.cpp index 57b5419e8..3eba696ca 100644 --- a/src/core/hle/service/am/applets/profile_select.cpp +++ b/src/core/hle/service/am/applets/profile_select.cpp | |||
| @@ -15,8 +15,9 @@ namespace Service::AM::Applets { | |||
| 15 | 15 | ||
| 16 | constexpr ResultCode ERR_USER_CANCELLED_SELECTION{ErrorModule::Account, 1}; | 16 | constexpr ResultCode ERR_USER_CANCELLED_SELECTION{ErrorModule::Account, 1}; |
| 17 | 17 | ||
| 18 | ProfileSelect::ProfileSelect(const Core::Frontend::ProfileSelectApplet& frontend) | 18 | ProfileSelect::ProfileSelect(Core::System& system_, |
| 19 | : frontend(frontend) {} | 19 | const Core::Frontend::ProfileSelectApplet& frontend_) |
| 20 | : Applet{system_.Kernel()}, frontend(frontend_) {} | ||
| 20 | 21 | ||
| 21 | ProfileSelect::~ProfileSelect() = default; | 22 | ProfileSelect::~ProfileSelect() = default; |
| 22 | 23 | ||
diff --git a/src/core/hle/service/am/applets/profile_select.h b/src/core/hle/service/am/applets/profile_select.h index 563cd744a..16364ead7 100644 --- a/src/core/hle/service/am/applets/profile_select.h +++ b/src/core/hle/service/am/applets/profile_select.h | |||
| @@ -11,6 +11,10 @@ | |||
| 11 | #include "core/hle/result.h" | 11 | #include "core/hle/result.h" |
| 12 | #include "core/hle/service/am/applets/applets.h" | 12 | #include "core/hle/service/am/applets/applets.h" |
| 13 | 13 | ||
| 14 | namespace Core { | ||
| 15 | class System; | ||
| 16 | } | ||
| 17 | |||
| 14 | namespace Service::AM::Applets { | 18 | namespace Service::AM::Applets { |
| 15 | 19 | ||
| 16 | struct UserSelectionConfig { | 20 | struct UserSelectionConfig { |
| @@ -29,7 +33,8 @@ static_assert(sizeof(UserSelectionOutput) == 0x18, "UserSelectionOutput has inco | |||
| 29 | 33 | ||
| 30 | class ProfileSelect final : public Applet { | 34 | class ProfileSelect final : public Applet { |
| 31 | public: | 35 | public: |
| 32 | explicit ProfileSelect(const Core::Frontend::ProfileSelectApplet& frontend); | 36 | explicit ProfileSelect(Core::System& system_, |
| 37 | const Core::Frontend::ProfileSelectApplet& frontend_); | ||
| 33 | ~ProfileSelect() override; | 38 | ~ProfileSelect() override; |
| 34 | 39 | ||
| 35 | void Initialize() override; | 40 | void Initialize() override; |
diff --git a/src/core/hle/service/am/applets/software_keyboard.cpp b/src/core/hle/service/am/applets/software_keyboard.cpp index e197990f7..748559cd0 100644 --- a/src/core/hle/service/am/applets/software_keyboard.cpp +++ b/src/core/hle/service/am/applets/software_keyboard.cpp | |||
| @@ -39,8 +39,9 @@ static Core::Frontend::SoftwareKeyboardParameters ConvertToFrontendParameters( | |||
| 39 | return params; | 39 | return params; |
| 40 | } | 40 | } |
| 41 | 41 | ||
| 42 | SoftwareKeyboard::SoftwareKeyboard(const Core::Frontend::SoftwareKeyboardApplet& frontend) | 42 | SoftwareKeyboard::SoftwareKeyboard(Core::System& system_, |
| 43 | : frontend(frontend) {} | 43 | const Core::Frontend::SoftwareKeyboardApplet& frontend_) |
| 44 | : Applet{system_.Kernel()}, frontend(frontend_) {} | ||
| 44 | 45 | ||
| 45 | SoftwareKeyboard::~SoftwareKeyboard() = default; | 46 | SoftwareKeyboard::~SoftwareKeyboard() = default; |
| 46 | 47 | ||
diff --git a/src/core/hle/service/am/applets/software_keyboard.h b/src/core/hle/service/am/applets/software_keyboard.h index 0fbc43e51..ef4801fc6 100644 --- a/src/core/hle/service/am/applets/software_keyboard.h +++ b/src/core/hle/service/am/applets/software_keyboard.h | |||
| @@ -16,6 +16,10 @@ | |||
| 16 | 16 | ||
| 17 | union ResultCode; | 17 | union ResultCode; |
| 18 | 18 | ||
| 19 | namespace Core { | ||
| 20 | class System; | ||
| 21 | } | ||
| 22 | |||
| 19 | namespace Service::AM::Applets { | 23 | namespace Service::AM::Applets { |
| 20 | 24 | ||
| 21 | enum class KeysetDisable : u32 { | 25 | enum class KeysetDisable : u32 { |
| @@ -55,7 +59,8 @@ static_assert(sizeof(KeyboardConfig) == 0x3E0, "KeyboardConfig has incorrect siz | |||
| 55 | 59 | ||
| 56 | class SoftwareKeyboard final : public Applet { | 60 | class SoftwareKeyboard final : public Applet { |
| 57 | public: | 61 | public: |
| 58 | explicit SoftwareKeyboard(const Core::Frontend::SoftwareKeyboardApplet& frontend); | 62 | explicit SoftwareKeyboard(Core::System& system_, |
| 63 | const Core::Frontend::SoftwareKeyboardApplet& frontend_); | ||
| 59 | ~SoftwareKeyboard() override; | 64 | ~SoftwareKeyboard() override; |
| 60 | 65 | ||
| 61 | void Initialize() override; | 66 | void Initialize() override; |
diff --git a/src/core/hle/service/am/applets/web_browser.cpp b/src/core/hle/service/am/applets/web_browser.cpp index f3c9fef0e..32283e819 100644 --- a/src/core/hle/service/am/applets/web_browser.cpp +++ b/src/core/hle/service/am/applets/web_browser.cpp | |||
| @@ -190,8 +190,9 @@ std::map<WebArgTLVType, std::vector<u8>> GetWebArguments(const std::vector<u8>& | |||
| 190 | return out; | 190 | return out; |
| 191 | } | 191 | } |
| 192 | 192 | ||
| 193 | FileSys::VirtualFile GetApplicationRomFS(u64 title_id, FileSys::ContentRecordType type) { | 193 | FileSys::VirtualFile GetApplicationRomFS(const Core::System& system, u64 title_id, |
| 194 | const auto& installed{Core::System::GetInstance().GetContentProvider()}; | 194 | FileSys::ContentRecordType type) { |
| 195 | const auto& installed{system.GetContentProvider()}; | ||
| 195 | const auto res = installed.GetEntry(title_id, type); | 196 | const auto res = installed.GetEntry(title_id, type); |
| 196 | 197 | ||
| 197 | if (res != nullptr) { | 198 | if (res != nullptr) { |
| @@ -207,10 +208,10 @@ FileSys::VirtualFile GetApplicationRomFS(u64 title_id, FileSys::ContentRecordTyp | |||
| 207 | 208 | ||
| 208 | } // Anonymous namespace | 209 | } // Anonymous namespace |
| 209 | 210 | ||
| 210 | WebBrowser::WebBrowser(Core::Frontend::WebBrowserApplet& frontend, u64 current_process_title_id, | 211 | WebBrowser::WebBrowser(Core::System& system_, Core::Frontend::WebBrowserApplet& frontend_, |
| 211 | Core::Frontend::ECommerceApplet* frontend_e_commerce) | 212 | Core::Frontend::ECommerceApplet* frontend_e_commerce_) |
| 212 | : frontend(frontend), frontend_e_commerce(frontend_e_commerce), | 213 | : Applet{system_.Kernel()}, frontend(frontend_), |
| 213 | current_process_title_id(current_process_title_id) {} | 214 | frontend_e_commerce(frontend_e_commerce_), system{system_} {} |
| 214 | 215 | ||
| 215 | WebBrowser::~WebBrowser() = default; | 216 | WebBrowser::~WebBrowser() = default; |
| 216 | 217 | ||
| @@ -266,7 +267,7 @@ void WebBrowser::UnpackRomFS() { | |||
| 266 | ASSERT(offline_romfs != nullptr); | 267 | ASSERT(offline_romfs != nullptr); |
| 267 | const auto dir = | 268 | const auto dir = |
| 268 | FileSys::ExtractRomFS(offline_romfs, FileSys::RomFSExtractionType::SingleDiscard); | 269 | FileSys::ExtractRomFS(offline_romfs, FileSys::RomFSExtractionType::SingleDiscard); |
| 269 | const auto& vfs{Core::System::GetInstance().GetFilesystem()}; | 270 | const auto& vfs{system.GetFilesystem()}; |
| 270 | const auto temp_dir = vfs->CreateDirectory(temporary_dir, FileSys::Mode::ReadWrite); | 271 | const auto temp_dir = vfs->CreateDirectory(temporary_dir, FileSys::Mode::ReadWrite); |
| 271 | FileSys::VfsRawCopyD(dir, temp_dir); | 272 | FileSys::VfsRawCopyD(dir, temp_dir); |
| 272 | 273 | ||
| @@ -470,10 +471,10 @@ void WebBrowser::InitializeOffline() { | |||
| 470 | } | 471 | } |
| 471 | 472 | ||
| 472 | if (title_id == 0) { | 473 | if (title_id == 0) { |
| 473 | title_id = current_process_title_id; | 474 | title_id = system.CurrentProcess()->GetTitleID(); |
| 474 | } | 475 | } |
| 475 | 476 | ||
| 476 | offline_romfs = GetApplicationRomFS(title_id, type); | 477 | offline_romfs = GetApplicationRomFS(system, title_id, type); |
| 477 | if (offline_romfs == nullptr) { | 478 | if (offline_romfs == nullptr) { |
| 478 | status = ResultCode(-1); | 479 | status = ResultCode(-1); |
| 479 | LOG_ERROR(Service_AM, "Failed to find offline data for request!"); | 480 | LOG_ERROR(Service_AM, "Failed to find offline data for request!"); |
diff --git a/src/core/hle/service/am/applets/web_browser.h b/src/core/hle/service/am/applets/web_browser.h index 870f57b64..8d4027411 100644 --- a/src/core/hle/service/am/applets/web_browser.h +++ b/src/core/hle/service/am/applets/web_browser.h | |||
| @@ -9,6 +9,10 @@ | |||
| 9 | #include "core/hle/service/am/am.h" | 9 | #include "core/hle/service/am/am.h" |
| 10 | #include "core/hle/service/am/applets/applets.h" | 10 | #include "core/hle/service/am/applets/applets.h" |
| 11 | 11 | ||
| 12 | namespace Core { | ||
| 13 | class System; | ||
| 14 | } | ||
| 15 | |||
| 12 | namespace Service::AM::Applets { | 16 | namespace Service::AM::Applets { |
| 13 | 17 | ||
| 14 | enum class ShimKind : u32; | 18 | enum class ShimKind : u32; |
| @@ -17,8 +21,8 @@ enum class WebArgTLVType : u16; | |||
| 17 | 21 | ||
| 18 | class WebBrowser final : public Applet { | 22 | class WebBrowser final : public Applet { |
| 19 | public: | 23 | public: |
| 20 | WebBrowser(Core::Frontend::WebBrowserApplet& frontend, u64 current_process_title_id, | 24 | WebBrowser(Core::System& system_, Core::Frontend::WebBrowserApplet& frontend_, |
| 21 | Core::Frontend::ECommerceApplet* frontend_e_commerce = nullptr); | 25 | Core::Frontend::ECommerceApplet* frontend_e_commerce_ = nullptr); |
| 22 | 26 | ||
| 23 | ~WebBrowser() override; | 27 | ~WebBrowser() override; |
| 24 | 28 | ||
| @@ -59,8 +63,6 @@ private: | |||
| 59 | bool unpacked = false; | 63 | bool unpacked = false; |
| 60 | ResultCode status = RESULT_SUCCESS; | 64 | ResultCode status = RESULT_SUCCESS; |
| 61 | 65 | ||
| 62 | u64 current_process_title_id; | ||
| 63 | |||
| 64 | ShimKind kind; | 66 | ShimKind kind; |
| 65 | std::map<WebArgTLVType, std::vector<u8>> args; | 67 | std::map<WebArgTLVType, std::vector<u8>> args; |
| 66 | 68 | ||
| @@ -74,6 +76,8 @@ private: | |||
| 74 | std::optional<u128> user_id; | 76 | std::optional<u128> user_id; |
| 75 | std::optional<bool> shop_full_display; | 77 | std::optional<bool> shop_full_display; |
| 76 | std::string shop_extra_parameter; | 78 | std::string shop_extra_parameter; |
| 79 | |||
| 80 | Core::System& system; | ||
| 77 | }; | 81 | }; |
| 78 | 82 | ||
| 79 | } // namespace Service::AM::Applets | 83 | } // namespace Service::AM::Applets |
diff --git a/src/core/hle/service/apm/apm.cpp b/src/core/hle/service/apm/apm.cpp index f3c09bbb1..85bbf5988 100644 --- a/src/core/hle/service/apm/apm.cpp +++ b/src/core/hle/service/apm/apm.cpp | |||
| @@ -2,7 +2,6 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include "common/logging/log.h" | ||
| 6 | #include "core/hle/ipc_helpers.h" | 5 | #include "core/hle/ipc_helpers.h" |
| 7 | #include "core/hle/service/apm/apm.h" | 6 | #include "core/hle/service/apm/apm.h" |
| 8 | #include "core/hle/service/apm/interface.h" | 7 | #include "core/hle/service/apm/interface.h" |
| @@ -12,11 +11,15 @@ namespace Service::APM { | |||
| 12 | Module::Module() = default; | 11 | Module::Module() = default; |
| 13 | Module::~Module() = default; | 12 | Module::~Module() = default; |
| 14 | 13 | ||
| 15 | void InstallInterfaces(SM::ServiceManager& service_manager) { | 14 | void InstallInterfaces(Core::System& system) { |
| 16 | auto module_ = std::make_shared<Module>(); | 15 | auto module_ = std::make_shared<Module>(); |
| 17 | std::make_shared<APM>(module_, "apm")->InstallAsService(service_manager); | 16 | std::make_shared<APM>(module_, system.GetAPMController(), "apm") |
| 18 | std::make_shared<APM>(module_, "apm:p")->InstallAsService(service_manager); | 17 | ->InstallAsService(system.ServiceManager()); |
| 19 | std::make_shared<APM_Sys>()->InstallAsService(service_manager); | 18 | std::make_shared<APM>(module_, system.GetAPMController(), "apm:p") |
| 19 | ->InstallAsService(system.ServiceManager()); | ||
| 20 | std::make_shared<APM>(module_, system.GetAPMController(), "apm:am") | ||
| 21 | ->InstallAsService(system.ServiceManager()); | ||
| 22 | std::make_shared<APM_Sys>(system.GetAPMController())->InstallAsService(system.ServiceManager()); | ||
| 20 | } | 23 | } |
| 21 | 24 | ||
| 22 | } // namespace Service::APM | 25 | } // namespace Service::APM |
diff --git a/src/core/hle/service/apm/apm.h b/src/core/hle/service/apm/apm.h index 4d7d5bb7c..cf4c2bb11 100644 --- a/src/core/hle/service/apm/apm.h +++ b/src/core/hle/service/apm/apm.h | |||
| @@ -8,11 +8,6 @@ | |||
| 8 | 8 | ||
| 9 | namespace Service::APM { | 9 | namespace Service::APM { |
| 10 | 10 | ||
| 11 | enum class PerformanceMode : u8 { | ||
| 12 | Handheld = 0, | ||
| 13 | Docked = 1, | ||
| 14 | }; | ||
| 15 | |||
| 16 | class Module final { | 11 | class Module final { |
| 17 | public: | 12 | public: |
| 18 | Module(); | 13 | Module(); |
| @@ -20,6 +15,6 @@ public: | |||
| 20 | }; | 15 | }; |
| 21 | 16 | ||
| 22 | /// Registers all AM services with the specified service manager. | 17 | /// Registers all AM services with the specified service manager. |
| 23 | void InstallInterfaces(SM::ServiceManager& service_manager); | 18 | void InstallInterfaces(Core::System& system); |
| 24 | 19 | ||
| 25 | } // namespace Service::APM | 20 | } // namespace Service::APM |
diff --git a/src/core/hle/service/apm/controller.cpp b/src/core/hle/service/apm/controller.cpp new file mode 100644 index 000000000..4376612eb --- /dev/null +++ b/src/core/hle/service/apm/controller.cpp | |||
| @@ -0,0 +1,68 @@ | |||
| 1 | // Copyright 2019 yuzu emulator team | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/logging/log.h" | ||
| 6 | #include "core/core_timing.h" | ||
| 7 | #include "core/hle/service/apm/controller.h" | ||
| 8 | #include "core/settings.h" | ||
| 9 | |||
| 10 | namespace Service::APM { | ||
| 11 | |||
| 12 | constexpr PerformanceConfiguration DEFAULT_PERFORMANCE_CONFIGURATION = | ||
| 13 | PerformanceConfiguration::Config7; | ||
| 14 | |||
| 15 | Controller::Controller(Core::Timing::CoreTiming& core_timing) | ||
| 16 | : core_timing(core_timing), configs{ | ||
| 17 | {PerformanceMode::Handheld, DEFAULT_PERFORMANCE_CONFIGURATION}, | ||
| 18 | {PerformanceMode::Docked, DEFAULT_PERFORMANCE_CONFIGURATION}, | ||
| 19 | } {} | ||
| 20 | |||
| 21 | Controller::~Controller() = default; | ||
| 22 | |||
| 23 | void Controller::SetPerformanceConfiguration(PerformanceMode mode, | ||
| 24 | PerformanceConfiguration config) { | ||
| 25 | static const std::map<PerformanceConfiguration, u32> PCONFIG_TO_SPEED_MAP{ | ||
| 26 | {PerformanceConfiguration::Config1, 1020}, {PerformanceConfiguration::Config2, 1020}, | ||
| 27 | {PerformanceConfiguration::Config3, 1224}, {PerformanceConfiguration::Config4, 1020}, | ||
| 28 | {PerformanceConfiguration::Config5, 1020}, {PerformanceConfiguration::Config6, 1224}, | ||
| 29 | {PerformanceConfiguration::Config7, 1020}, {PerformanceConfiguration::Config8, 1020}, | ||
| 30 | {PerformanceConfiguration::Config9, 1020}, {PerformanceConfiguration::Config10, 1020}, | ||
| 31 | {PerformanceConfiguration::Config11, 1020}, {PerformanceConfiguration::Config12, 1020}, | ||
| 32 | {PerformanceConfiguration::Config13, 1785}, {PerformanceConfiguration::Config14, 1785}, | ||
| 33 | {PerformanceConfiguration::Config15, 1020}, {PerformanceConfiguration::Config16, 1020}, | ||
| 34 | }; | ||
| 35 | |||
| 36 | SetClockSpeed(PCONFIG_TO_SPEED_MAP.find(config)->second); | ||
| 37 | configs.insert_or_assign(mode, config); | ||
| 38 | } | ||
| 39 | |||
| 40 | void Controller::SetFromCpuBoostMode(CpuBoostMode mode) { | ||
| 41 | constexpr std::array<PerformanceConfiguration, 3> BOOST_MODE_TO_CONFIG_MAP{{ | ||
| 42 | PerformanceConfiguration::Config7, | ||
| 43 | PerformanceConfiguration::Config13, | ||
| 44 | PerformanceConfiguration::Config15, | ||
| 45 | }}; | ||
| 46 | |||
| 47 | SetPerformanceConfiguration(PerformanceMode::Docked, | ||
| 48 | BOOST_MODE_TO_CONFIG_MAP.at(static_cast<u32>(mode))); | ||
| 49 | } | ||
| 50 | |||
| 51 | PerformanceMode Controller::GetCurrentPerformanceMode() { | ||
| 52 | return Settings::values.use_docked_mode ? PerformanceMode::Docked : PerformanceMode::Handheld; | ||
| 53 | } | ||
| 54 | |||
| 55 | PerformanceConfiguration Controller::GetCurrentPerformanceConfiguration(PerformanceMode mode) { | ||
| 56 | if (configs.find(mode) == configs.end()) { | ||
| 57 | configs.insert_or_assign(mode, DEFAULT_PERFORMANCE_CONFIGURATION); | ||
| 58 | } | ||
| 59 | |||
| 60 | return configs[mode]; | ||
| 61 | } | ||
| 62 | |||
| 63 | void Controller::SetClockSpeed(u32 mhz) { | ||
| 64 | LOG_INFO(Service_APM, "called, mhz={:08X}", mhz); | ||
| 65 | // TODO(DarkLordZach): Actually signal core_timing to change clock speed. | ||
| 66 | } | ||
| 67 | |||
| 68 | } // namespace Service::APM | ||
diff --git a/src/core/hle/service/apm/controller.h b/src/core/hle/service/apm/controller.h new file mode 100644 index 000000000..8ac80eaea --- /dev/null +++ b/src/core/hle/service/apm/controller.h | |||
| @@ -0,0 +1,70 @@ | |||
| 1 | // Copyright 2019 yuzu emulator team | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <map> | ||
| 8 | #include "common/common_types.h" | ||
| 9 | |||
| 10 | namespace Core::Timing { | ||
| 11 | class CoreTiming; | ||
| 12 | } | ||
| 13 | |||
| 14 | namespace Service::APM { | ||
| 15 | |||
| 16 | enum class PerformanceConfiguration : u32 { | ||
| 17 | Config1 = 0x00010000, | ||
| 18 | Config2 = 0x00010001, | ||
| 19 | Config3 = 0x00010002, | ||
| 20 | Config4 = 0x00020000, | ||
| 21 | Config5 = 0x00020001, | ||
| 22 | Config6 = 0x00020002, | ||
| 23 | Config7 = 0x00020003, | ||
| 24 | Config8 = 0x00020004, | ||
| 25 | Config9 = 0x00020005, | ||
| 26 | Config10 = 0x00020006, | ||
| 27 | Config11 = 0x92220007, | ||
| 28 | Config12 = 0x92220008, | ||
| 29 | Config13 = 0x92220009, | ||
| 30 | Config14 = 0x9222000A, | ||
| 31 | Config15 = 0x9222000B, | ||
| 32 | Config16 = 0x9222000C, | ||
| 33 | }; | ||
| 34 | |||
| 35 | enum class CpuBoostMode : u32 { | ||
| 36 | Disabled = 0, | ||
| 37 | Full = 1, // CPU + GPU -> Config 13, 14, 15, or 16 | ||
| 38 | Partial = 2, // GPU Only -> Config 15 or 16 | ||
| 39 | }; | ||
| 40 | |||
| 41 | enum class PerformanceMode : u8 { | ||
| 42 | Handheld = 0, | ||
| 43 | Docked = 1, | ||
| 44 | }; | ||
| 45 | |||
| 46 | // Class to manage the state and change of the emulated system performance. | ||
| 47 | // Specifically, this deals with PerformanceMode, which corresponds to the system being docked or | ||
| 48 | // undocked, and PerformanceConfig which specifies the exact CPU, GPU, and Memory clocks to operate | ||
| 49 | // at. Additionally, this manages 'Boost Mode', which allows games to temporarily overclock the | ||
| 50 | // system during times of high load -- this simply maps to different PerformanceConfigs to use. | ||
| 51 | class Controller { | ||
| 52 | public: | ||
| 53 | Controller(Core::Timing::CoreTiming& core_timing); | ||
| 54 | ~Controller(); | ||
| 55 | |||
| 56 | void SetPerformanceConfiguration(PerformanceMode mode, PerformanceConfiguration config); | ||
| 57 | void SetFromCpuBoostMode(CpuBoostMode mode); | ||
| 58 | |||
| 59 | PerformanceMode GetCurrentPerformanceMode(); | ||
| 60 | PerformanceConfiguration GetCurrentPerformanceConfiguration(PerformanceMode mode); | ||
| 61 | |||
| 62 | private: | ||
| 63 | void SetClockSpeed(u32 mhz); | ||
| 64 | |||
| 65 | std::map<PerformanceMode, PerformanceConfiguration> configs; | ||
| 66 | |||
| 67 | Core::Timing::CoreTiming& core_timing; | ||
| 68 | }; | ||
| 69 | |||
| 70 | } // namespace Service::APM | ||
diff --git a/src/core/hle/service/apm/interface.cpp b/src/core/hle/service/apm/interface.cpp index d058c0245..06f0f8edd 100644 --- a/src/core/hle/service/apm/interface.cpp +++ b/src/core/hle/service/apm/interface.cpp | |||
| @@ -5,43 +5,32 @@ | |||
| 5 | #include "common/logging/log.h" | 5 | #include "common/logging/log.h" |
| 6 | #include "core/hle/ipc_helpers.h" | 6 | #include "core/hle/ipc_helpers.h" |
| 7 | #include "core/hle/service/apm/apm.h" | 7 | #include "core/hle/service/apm/apm.h" |
| 8 | #include "core/hle/service/apm/controller.h" | ||
| 8 | #include "core/hle/service/apm/interface.h" | 9 | #include "core/hle/service/apm/interface.h" |
| 9 | 10 | ||
| 10 | namespace Service::APM { | 11 | namespace Service::APM { |
| 11 | 12 | ||
| 12 | class ISession final : public ServiceFramework<ISession> { | 13 | class ISession final : public ServiceFramework<ISession> { |
| 13 | public: | 14 | public: |
| 14 | ISession() : ServiceFramework("ISession") { | 15 | ISession(Controller& controller) : ServiceFramework("ISession"), controller(controller) { |
| 15 | static const FunctionInfo functions[] = { | 16 | static const FunctionInfo functions[] = { |
| 16 | {0, &ISession::SetPerformanceConfiguration, "SetPerformanceConfiguration"}, | 17 | {0, &ISession::SetPerformanceConfiguration, "SetPerformanceConfiguration"}, |
| 17 | {1, &ISession::GetPerformanceConfiguration, "GetPerformanceConfiguration"}, | 18 | {1, &ISession::GetPerformanceConfiguration, "GetPerformanceConfiguration"}, |
| 19 | {2, nullptr, "SetCpuOverclockEnabled"}, | ||
| 18 | }; | 20 | }; |
| 19 | RegisterHandlers(functions); | 21 | RegisterHandlers(functions); |
| 20 | } | 22 | } |
| 21 | 23 | ||
| 22 | private: | 24 | private: |
| 23 | enum class PerformanceConfiguration : u32 { | ||
| 24 | Config1 = 0x00010000, | ||
| 25 | Config2 = 0x00010001, | ||
| 26 | Config3 = 0x00010002, | ||
| 27 | Config4 = 0x00020000, | ||
| 28 | Config5 = 0x00020001, | ||
| 29 | Config6 = 0x00020002, | ||
| 30 | Config7 = 0x00020003, | ||
| 31 | Config8 = 0x00020004, | ||
| 32 | Config9 = 0x00020005, | ||
| 33 | Config10 = 0x00020006, | ||
| 34 | Config11 = 0x92220007, | ||
| 35 | Config12 = 0x92220008, | ||
| 36 | }; | ||
| 37 | |||
| 38 | void SetPerformanceConfiguration(Kernel::HLERequestContext& ctx) { | 25 | void SetPerformanceConfiguration(Kernel::HLERequestContext& ctx) { |
| 39 | IPC::RequestParser rp{ctx}; | 26 | IPC::RequestParser rp{ctx}; |
| 40 | 27 | ||
| 41 | auto mode = static_cast<PerformanceMode>(rp.Pop<u32>()); | 28 | const auto mode = rp.PopEnum<PerformanceMode>(); |
| 42 | u32 config = rp.Pop<u32>(); | 29 | const auto config = rp.PopEnum<PerformanceConfiguration>(); |
| 43 | LOG_WARNING(Service_APM, "(STUBBED) called mode={} config={}", static_cast<u32>(mode), | 30 | LOG_DEBUG(Service_APM, "called mode={} config={}", static_cast<u32>(mode), |
| 44 | config); | 31 | static_cast<u32>(config)); |
| 32 | |||
| 33 | controller.SetPerformanceConfiguration(mode, config); | ||
| 45 | 34 | ||
| 46 | IPC::ResponseBuilder rb{ctx, 2}; | 35 | IPC::ResponseBuilder rb{ctx, 2}; |
| 47 | rb.Push(RESULT_SUCCESS); | 36 | rb.Push(RESULT_SUCCESS); |
| @@ -50,20 +39,23 @@ private: | |||
| 50 | void GetPerformanceConfiguration(Kernel::HLERequestContext& ctx) { | 39 | void GetPerformanceConfiguration(Kernel::HLERequestContext& ctx) { |
| 51 | IPC::RequestParser rp{ctx}; | 40 | IPC::RequestParser rp{ctx}; |
| 52 | 41 | ||
| 53 | auto mode = static_cast<PerformanceMode>(rp.Pop<u32>()); | 42 | const auto mode = rp.PopEnum<PerformanceMode>(); |
| 54 | LOG_WARNING(Service_APM, "(STUBBED) called mode={}", static_cast<u32>(mode)); | 43 | LOG_DEBUG(Service_APM, "called mode={}", static_cast<u32>(mode)); |
| 55 | 44 | ||
| 56 | IPC::ResponseBuilder rb{ctx, 3}; | 45 | IPC::ResponseBuilder rb{ctx, 3}; |
| 57 | rb.Push(RESULT_SUCCESS); | 46 | rb.Push(RESULT_SUCCESS); |
| 58 | rb.Push<u32>(static_cast<u32>(PerformanceConfiguration::Config1)); | 47 | rb.PushEnum(controller.GetCurrentPerformanceConfiguration(mode)); |
| 59 | } | 48 | } |
| 49 | |||
| 50 | Controller& controller; | ||
| 60 | }; | 51 | }; |
| 61 | 52 | ||
| 62 | APM::APM(std::shared_ptr<Module> apm, const char* name) | 53 | APM::APM(std::shared_ptr<Module> apm, Controller& controller, const char* name) |
| 63 | : ServiceFramework(name), apm(std::move(apm)) { | 54 | : ServiceFramework(name), apm(std::move(apm)), controller(controller) { |
| 64 | static const FunctionInfo functions[] = { | 55 | static const FunctionInfo functions[] = { |
| 65 | {0, &APM::OpenSession, "OpenSession"}, | 56 | {0, &APM::OpenSession, "OpenSession"}, |
| 66 | {1, nullptr, "GetPerformanceMode"}, | 57 | {1, &APM::GetPerformanceMode, "GetPerformanceMode"}, |
| 58 | {6, nullptr, "IsCpuOverclockEnabled"}, | ||
| 67 | }; | 59 | }; |
| 68 | RegisterHandlers(functions); | 60 | RegisterHandlers(functions); |
| 69 | } | 61 | } |
| @@ -75,10 +67,17 @@ void APM::OpenSession(Kernel::HLERequestContext& ctx) { | |||
| 75 | 67 | ||
| 76 | IPC::ResponseBuilder rb{ctx, 2, 0, 1}; | 68 | IPC::ResponseBuilder rb{ctx, 2, 0, 1}; |
| 77 | rb.Push(RESULT_SUCCESS); | 69 | rb.Push(RESULT_SUCCESS); |
| 78 | rb.PushIpcInterface<ISession>(); | 70 | rb.PushIpcInterface<ISession>(controller); |
| 71 | } | ||
| 72 | |||
| 73 | void APM::GetPerformanceMode(Kernel::HLERequestContext& ctx) { | ||
| 74 | LOG_DEBUG(Service_APM, "called"); | ||
| 75 | |||
| 76 | IPC::ResponseBuilder rb{ctx, 2}; | ||
| 77 | rb.PushEnum(controller.GetCurrentPerformanceMode()); | ||
| 79 | } | 78 | } |
| 80 | 79 | ||
| 81 | APM_Sys::APM_Sys() : ServiceFramework{"apm:sys"} { | 80 | APM_Sys::APM_Sys(Controller& controller) : ServiceFramework{"apm:sys"}, controller(controller) { |
| 82 | // clang-format off | 81 | // clang-format off |
| 83 | static const FunctionInfo functions[] = { | 82 | static const FunctionInfo functions[] = { |
| 84 | {0, nullptr, "RequestPerformanceMode"}, | 83 | {0, nullptr, "RequestPerformanceMode"}, |
| @@ -87,8 +86,8 @@ APM_Sys::APM_Sys() : ServiceFramework{"apm:sys"} { | |||
| 87 | {3, nullptr, "GetLastThrottlingState"}, | 86 | {3, nullptr, "GetLastThrottlingState"}, |
| 88 | {4, nullptr, "ClearLastThrottlingState"}, | 87 | {4, nullptr, "ClearLastThrottlingState"}, |
| 89 | {5, nullptr, "LoadAndApplySettings"}, | 88 | {5, nullptr, "LoadAndApplySettings"}, |
| 90 | {6, nullptr, "SetCpuBoostMode"}, | 89 | {6, &APM_Sys::SetCpuBoostMode, "SetCpuBoostMode"}, |
| 91 | {7, nullptr, "GetCurrentPerformanceConfiguration"}, | 90 | {7, &APM_Sys::GetCurrentPerformanceConfiguration, "GetCurrentPerformanceConfiguration"}, |
| 92 | }; | 91 | }; |
| 93 | // clang-format on | 92 | // clang-format on |
| 94 | 93 | ||
| @@ -102,7 +101,28 @@ void APM_Sys::GetPerformanceEvent(Kernel::HLERequestContext& ctx) { | |||
| 102 | 101 | ||
| 103 | IPC::ResponseBuilder rb{ctx, 2, 0, 1}; | 102 | IPC::ResponseBuilder rb{ctx, 2, 0, 1}; |
| 104 | rb.Push(RESULT_SUCCESS); | 103 | rb.Push(RESULT_SUCCESS); |
| 105 | rb.PushIpcInterface<ISession>(); | 104 | rb.PushIpcInterface<ISession>(controller); |
| 105 | } | ||
| 106 | |||
| 107 | void APM_Sys::SetCpuBoostMode(Kernel::HLERequestContext& ctx) { | ||
| 108 | IPC::RequestParser rp{ctx}; | ||
| 109 | const auto mode = rp.PopEnum<CpuBoostMode>(); | ||
| 110 | |||
| 111 | LOG_DEBUG(Service_APM, "called, mode={:08X}", static_cast<u32>(mode)); | ||
| 112 | |||
| 113 | controller.SetFromCpuBoostMode(mode); | ||
| 114 | |||
| 115 | IPC::ResponseBuilder rb{ctx, 2}; | ||
| 116 | rb.Push(RESULT_SUCCESS); | ||
| 117 | } | ||
| 118 | |||
| 119 | void APM_Sys::GetCurrentPerformanceConfiguration(Kernel::HLERequestContext& ctx) { | ||
| 120 | LOG_DEBUG(Service_APM, "called"); | ||
| 121 | |||
| 122 | IPC::ResponseBuilder rb{ctx, 3}; | ||
| 123 | rb.Push(RESULT_SUCCESS); | ||
| 124 | rb.PushEnum( | ||
| 125 | controller.GetCurrentPerformanceConfiguration(controller.GetCurrentPerformanceMode())); | ||
| 106 | } | 126 | } |
| 107 | 127 | ||
| 108 | } // namespace Service::APM | 128 | } // namespace Service::APM |
diff --git a/src/core/hle/service/apm/interface.h b/src/core/hle/service/apm/interface.h index 773541aa4..de1b89437 100644 --- a/src/core/hle/service/apm/interface.h +++ b/src/core/hle/service/apm/interface.h | |||
| @@ -8,24 +8,34 @@ | |||
| 8 | 8 | ||
| 9 | namespace Service::APM { | 9 | namespace Service::APM { |
| 10 | 10 | ||
| 11 | class Controller; | ||
| 12 | class Module; | ||
| 13 | |||
| 11 | class APM final : public ServiceFramework<APM> { | 14 | class APM final : public ServiceFramework<APM> { |
| 12 | public: | 15 | public: |
| 13 | explicit APM(std::shared_ptr<Module> apm, const char* name); | 16 | explicit APM(std::shared_ptr<Module> apm, Controller& controller, const char* name); |
| 14 | ~APM() override; | 17 | ~APM() override; |
| 15 | 18 | ||
| 16 | private: | 19 | private: |
| 17 | void OpenSession(Kernel::HLERequestContext& ctx); | 20 | void OpenSession(Kernel::HLERequestContext& ctx); |
| 21 | void GetPerformanceMode(Kernel::HLERequestContext& ctx); | ||
| 18 | 22 | ||
| 19 | std::shared_ptr<Module> apm; | 23 | std::shared_ptr<Module> apm; |
| 24 | Controller& controller; | ||
| 20 | }; | 25 | }; |
| 21 | 26 | ||
| 22 | class APM_Sys final : public ServiceFramework<APM_Sys> { | 27 | class APM_Sys final : public ServiceFramework<APM_Sys> { |
| 23 | public: | 28 | public: |
| 24 | explicit APM_Sys(); | 29 | explicit APM_Sys(Controller& controller); |
| 25 | ~APM_Sys() override; | 30 | ~APM_Sys() override; |
| 26 | 31 | ||
| 32 | void SetCpuBoostMode(Kernel::HLERequestContext& ctx); | ||
| 33 | |||
| 27 | private: | 34 | private: |
| 28 | void GetPerformanceEvent(Kernel::HLERequestContext& ctx); | 35 | void GetPerformanceEvent(Kernel::HLERequestContext& ctx); |
| 36 | void GetCurrentPerformanceConfiguration(Kernel::HLERequestContext& ctx); | ||
| 37 | |||
| 38 | Controller& controller; | ||
| 29 | }; | 39 | }; |
| 30 | 40 | ||
| 31 | } // namespace Service::APM | 41 | } // namespace Service::APM |
diff --git a/src/core/hle/service/audio/audio.cpp b/src/core/hle/service/audio/audio.cpp index 128df7db5..1781bec83 100644 --- a/src/core/hle/service/audio/audio.cpp +++ b/src/core/hle/service/audio/audio.cpp | |||
| @@ -19,16 +19,16 @@ | |||
| 19 | 19 | ||
| 20 | namespace Service::Audio { | 20 | namespace Service::Audio { |
| 21 | 21 | ||
| 22 | void InstallInterfaces(SM::ServiceManager& service_manager) { | 22 | void InstallInterfaces(SM::ServiceManager& service_manager, Core::System& system) { |
| 23 | std::make_shared<AudCtl>()->InstallAsService(service_manager); | 23 | std::make_shared<AudCtl>()->InstallAsService(service_manager); |
| 24 | std::make_shared<AudOutA>()->InstallAsService(service_manager); | 24 | std::make_shared<AudOutA>()->InstallAsService(service_manager); |
| 25 | std::make_shared<AudOutU>()->InstallAsService(service_manager); | 25 | std::make_shared<AudOutU>(system)->InstallAsService(service_manager); |
| 26 | std::make_shared<AudInA>()->InstallAsService(service_manager); | 26 | std::make_shared<AudInA>()->InstallAsService(service_manager); |
| 27 | std::make_shared<AudInU>()->InstallAsService(service_manager); | 27 | std::make_shared<AudInU>()->InstallAsService(service_manager); |
| 28 | std::make_shared<AudRecA>()->InstallAsService(service_manager); | 28 | std::make_shared<AudRecA>()->InstallAsService(service_manager); |
| 29 | std::make_shared<AudRecU>()->InstallAsService(service_manager); | 29 | std::make_shared<AudRecU>()->InstallAsService(service_manager); |
| 30 | std::make_shared<AudRenA>()->InstallAsService(service_manager); | 30 | std::make_shared<AudRenA>()->InstallAsService(service_manager); |
| 31 | std::make_shared<AudRenU>()->InstallAsService(service_manager); | 31 | std::make_shared<AudRenU>(system)->InstallAsService(service_manager); |
| 32 | std::make_shared<CodecCtl>()->InstallAsService(service_manager); | 32 | std::make_shared<CodecCtl>()->InstallAsService(service_manager); |
| 33 | std::make_shared<HwOpus>()->InstallAsService(service_manager); | 33 | std::make_shared<HwOpus>()->InstallAsService(service_manager); |
| 34 | 34 | ||
diff --git a/src/core/hle/service/audio/audio.h b/src/core/hle/service/audio/audio.h index f5bd3bf5f..b6d13912e 100644 --- a/src/core/hle/service/audio/audio.h +++ b/src/core/hle/service/audio/audio.h | |||
| @@ -4,6 +4,10 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | namespace Core { | ||
| 8 | class System; | ||
| 9 | } | ||
| 10 | |||
| 7 | namespace Service::SM { | 11 | namespace Service::SM { |
| 8 | class ServiceManager; | 12 | class ServiceManager; |
| 9 | } | 13 | } |
| @@ -11,6 +15,6 @@ class ServiceManager; | |||
| 11 | namespace Service::Audio { | 15 | namespace Service::Audio { |
| 12 | 16 | ||
| 13 | /// Registers all Audio services with the specified service manager. | 17 | /// Registers all Audio services with the specified service manager. |
| 14 | void InstallInterfaces(SM::ServiceManager& service_manager); | 18 | void InstallInterfaces(SM::ServiceManager& service_manager, Core::System& system); |
| 15 | 19 | ||
| 16 | } // namespace Service::Audio | 20 | } // namespace Service::Audio |
diff --git a/src/core/hle/service/audio/audout_u.cpp b/src/core/hle/service/audio/audout_u.cpp index 7db6eb08d..fb84a8f13 100644 --- a/src/core/hle/service/audio/audout_u.cpp +++ b/src/core/hle/service/audio/audout_u.cpp | |||
| @@ -40,8 +40,8 @@ enum class AudioState : u32 { | |||
| 40 | 40 | ||
| 41 | class IAudioOut final : public ServiceFramework<IAudioOut> { | 41 | class IAudioOut final : public ServiceFramework<IAudioOut> { |
| 42 | public: | 42 | public: |
| 43 | IAudioOut(AudoutParams audio_params, AudioCore::AudioOut& audio_core, std::string&& device_name, | 43 | IAudioOut(Core::System& system, AudoutParams audio_params, AudioCore::AudioOut& audio_core, |
| 44 | std::string&& unique_name) | 44 | std::string&& device_name, std::string&& unique_name) |
| 45 | : ServiceFramework("IAudioOut"), audio_core(audio_core), | 45 | : ServiceFramework("IAudioOut"), audio_core(audio_core), |
| 46 | device_name(std::move(device_name)), audio_params(audio_params) { | 46 | device_name(std::move(device_name)), audio_params(audio_params) { |
| 47 | // clang-format off | 47 | // clang-format off |
| @@ -65,7 +65,6 @@ public: | |||
| 65 | RegisterHandlers(functions); | 65 | RegisterHandlers(functions); |
| 66 | 66 | ||
| 67 | // This is the event handle used to check if the audio buffer was released | 67 | // This is the event handle used to check if the audio buffer was released |
| 68 | auto& system = Core::System::GetInstance(); | ||
| 69 | buffer_event = Kernel::WritableEvent::CreateEventPair( | 68 | buffer_event = Kernel::WritableEvent::CreateEventPair( |
| 70 | system.Kernel(), Kernel::ResetType::Manual, "IAudioOutBufferReleased"); | 69 | system.Kernel(), Kernel::ResetType::Manual, "IAudioOutBufferReleased"); |
| 71 | 70 | ||
| @@ -212,6 +211,22 @@ private: | |||
| 212 | Kernel::EventPair buffer_event; | 211 | Kernel::EventPair buffer_event; |
| 213 | }; | 212 | }; |
| 214 | 213 | ||
| 214 | AudOutU::AudOutU(Core::System& system_) : ServiceFramework("audout:u"), system{system_} { | ||
| 215 | // clang-format off | ||
| 216 | static const FunctionInfo functions[] = { | ||
| 217 | {0, &AudOutU::ListAudioOutsImpl, "ListAudioOuts"}, | ||
| 218 | {1, &AudOutU::OpenAudioOutImpl, "OpenAudioOut"}, | ||
| 219 | {2, &AudOutU::ListAudioOutsImpl, "ListAudioOutsAuto"}, | ||
| 220 | {3, &AudOutU::OpenAudioOutImpl, "OpenAudioOutAuto"}, | ||
| 221 | }; | ||
| 222 | // clang-format on | ||
| 223 | |||
| 224 | RegisterHandlers(functions); | ||
| 225 | audio_core = std::make_unique<AudioCore::AudioOut>(); | ||
| 226 | } | ||
| 227 | |||
| 228 | AudOutU::~AudOutU() = default; | ||
| 229 | |||
| 215 | void AudOutU::ListAudioOutsImpl(Kernel::HLERequestContext& ctx) { | 230 | void AudOutU::ListAudioOutsImpl(Kernel::HLERequestContext& ctx) { |
| 216 | LOG_DEBUG(Service_Audio, "called"); | 231 | LOG_DEBUG(Service_Audio, "called"); |
| 217 | 232 | ||
| @@ -248,7 +263,7 @@ void AudOutU::OpenAudioOutImpl(Kernel::HLERequestContext& ctx) { | |||
| 248 | 263 | ||
| 249 | std::string unique_name{fmt::format("{}-{}", device_name, audio_out_interfaces.size())}; | 264 | std::string unique_name{fmt::format("{}-{}", device_name, audio_out_interfaces.size())}; |
| 250 | auto audio_out_interface = std::make_shared<IAudioOut>( | 265 | auto audio_out_interface = std::make_shared<IAudioOut>( |
| 251 | params, *audio_core, std::move(device_name), std::move(unique_name)); | 266 | system, params, *audio_core, std::move(device_name), std::move(unique_name)); |
| 252 | 267 | ||
| 253 | IPC::ResponseBuilder rb{ctx, 6, 0, 1}; | 268 | IPC::ResponseBuilder rb{ctx, 6, 0, 1}; |
| 254 | rb.Push(RESULT_SUCCESS); | 269 | rb.Push(RESULT_SUCCESS); |
| @@ -256,20 +271,9 @@ void AudOutU::OpenAudioOutImpl(Kernel::HLERequestContext& ctx) { | |||
| 256 | rb.Push<u32>(params.channel_count); | 271 | rb.Push<u32>(params.channel_count); |
| 257 | rb.Push<u32>(static_cast<u32>(AudioCore::Codec::PcmFormat::Int16)); | 272 | rb.Push<u32>(static_cast<u32>(AudioCore::Codec::PcmFormat::Int16)); |
| 258 | rb.Push<u32>(static_cast<u32>(AudioState::Stopped)); | 273 | rb.Push<u32>(static_cast<u32>(AudioState::Stopped)); |
| 259 | rb.PushIpcInterface<Audio::IAudioOut>(audio_out_interface); | 274 | rb.PushIpcInterface<IAudioOut>(audio_out_interface); |
| 260 | 275 | ||
| 261 | audio_out_interfaces.push_back(std::move(audio_out_interface)); | 276 | audio_out_interfaces.push_back(std::move(audio_out_interface)); |
| 262 | } | 277 | } |
| 263 | 278 | ||
| 264 | AudOutU::AudOutU() : ServiceFramework("audout:u") { | ||
| 265 | static const FunctionInfo functions[] = {{0, &AudOutU::ListAudioOutsImpl, "ListAudioOuts"}, | ||
| 266 | {1, &AudOutU::OpenAudioOutImpl, "OpenAudioOut"}, | ||
| 267 | {2, &AudOutU::ListAudioOutsImpl, "ListAudioOutsAuto"}, | ||
| 268 | {3, &AudOutU::OpenAudioOutImpl, "OpenAudioOutAuto"}}; | ||
| 269 | RegisterHandlers(functions); | ||
| 270 | audio_core = std::make_unique<AudioCore::AudioOut>(); | ||
| 271 | } | ||
| 272 | |||
| 273 | AudOutU::~AudOutU() = default; | ||
| 274 | |||
| 275 | } // namespace Service::Audio | 279 | } // namespace Service::Audio |
diff --git a/src/core/hle/service/audio/audout_u.h b/src/core/hle/service/audio/audout_u.h index aed4c43b2..c9f532ccd 100644 --- a/src/core/hle/service/audio/audout_u.h +++ b/src/core/hle/service/audio/audout_u.h | |||
| @@ -11,6 +11,10 @@ namespace AudioCore { | |||
| 11 | class AudioOut; | 11 | class AudioOut; |
| 12 | } | 12 | } |
| 13 | 13 | ||
| 14 | namespace Core { | ||
| 15 | class System; | ||
| 16 | } | ||
| 17 | |||
| 14 | namespace Kernel { | 18 | namespace Kernel { |
| 15 | class HLERequestContext; | 19 | class HLERequestContext; |
| 16 | } | 20 | } |
| @@ -21,15 +25,17 @@ class IAudioOut; | |||
| 21 | 25 | ||
| 22 | class AudOutU final : public ServiceFramework<AudOutU> { | 26 | class AudOutU final : public ServiceFramework<AudOutU> { |
| 23 | public: | 27 | public: |
| 24 | AudOutU(); | 28 | explicit AudOutU(Core::System& system_); |
| 25 | ~AudOutU() override; | 29 | ~AudOutU() override; |
| 26 | 30 | ||
| 27 | private: | 31 | private: |
| 32 | void ListAudioOutsImpl(Kernel::HLERequestContext& ctx); | ||
| 33 | void OpenAudioOutImpl(Kernel::HLERequestContext& ctx); | ||
| 34 | |||
| 28 | std::vector<std::shared_ptr<IAudioOut>> audio_out_interfaces; | 35 | std::vector<std::shared_ptr<IAudioOut>> audio_out_interfaces; |
| 29 | std::unique_ptr<AudioCore::AudioOut> audio_core; | 36 | std::unique_ptr<AudioCore::AudioOut> audio_core; |
| 30 | 37 | ||
| 31 | void ListAudioOutsImpl(Kernel::HLERequestContext& ctx); | 38 | Core::System& system; |
| 32 | void OpenAudioOutImpl(Kernel::HLERequestContext& ctx); | ||
| 33 | }; | 39 | }; |
| 34 | 40 | ||
| 35 | } // namespace Service::Audio | 41 | } // namespace Service::Audio |
diff --git a/src/core/hle/service/audio/audren_u.cpp b/src/core/hle/service/audio/audren_u.cpp index 75db0c2dc..f162249ed 100644 --- a/src/core/hle/service/audio/audren_u.cpp +++ b/src/core/hle/service/audio/audren_u.cpp | |||
| @@ -5,6 +5,7 @@ | |||
| 5 | #include <algorithm> | 5 | #include <algorithm> |
| 6 | #include <array> | 6 | #include <array> |
| 7 | #include <memory> | 7 | #include <memory> |
| 8 | #include <string_view> | ||
| 8 | 9 | ||
| 9 | #include "audio_core/audio_renderer.h" | 10 | #include "audio_core/audio_renderer.h" |
| 10 | #include "common/alignment.h" | 11 | #include "common/alignment.h" |
| @@ -25,7 +26,8 @@ namespace Service::Audio { | |||
| 25 | 26 | ||
| 26 | class IAudioRenderer final : public ServiceFramework<IAudioRenderer> { | 27 | class IAudioRenderer final : public ServiceFramework<IAudioRenderer> { |
| 27 | public: | 28 | public: |
| 28 | explicit IAudioRenderer(AudioCore::AudioRendererParameter audren_params) | 29 | explicit IAudioRenderer(Core::System& system, AudioCore::AudioRendererParameter audren_params, |
| 30 | const std::size_t instance_number) | ||
| 29 | : ServiceFramework("IAudioRenderer") { | 31 | : ServiceFramework("IAudioRenderer") { |
| 30 | // clang-format off | 32 | // clang-format off |
| 31 | static const FunctionInfo functions[] = { | 33 | static const FunctionInfo functions[] = { |
| @@ -45,11 +47,10 @@ public: | |||
| 45 | // clang-format on | 47 | // clang-format on |
| 46 | RegisterHandlers(functions); | 48 | RegisterHandlers(functions); |
| 47 | 49 | ||
| 48 | auto& system = Core::System::GetInstance(); | ||
| 49 | system_event = Kernel::WritableEvent::CreateEventPair( | 50 | system_event = Kernel::WritableEvent::CreateEventPair( |
| 50 | system.Kernel(), Kernel::ResetType::Manual, "IAudioRenderer:SystemEvent"); | 51 | system.Kernel(), Kernel::ResetType::Manual, "IAudioRenderer:SystemEvent"); |
| 51 | renderer = std::make_unique<AudioCore::AudioRenderer>(system.CoreTiming(), audren_params, | 52 | renderer = std::make_unique<AudioCore::AudioRenderer>( |
| 52 | system_event.writable); | 53 | system.CoreTiming(), audren_params, system_event.writable, instance_number); |
| 53 | } | 54 | } |
| 54 | 55 | ||
| 55 | private: | 56 | private: |
| @@ -159,40 +160,81 @@ private: | |||
| 159 | 160 | ||
| 160 | class IAudioDevice final : public ServiceFramework<IAudioDevice> { | 161 | class IAudioDevice final : public ServiceFramework<IAudioDevice> { |
| 161 | public: | 162 | public: |
| 162 | IAudioDevice() : ServiceFramework("IAudioDevice") { | 163 | explicit IAudioDevice(Core::System& system, u32_le revision_num) |
| 164 | : ServiceFramework("IAudioDevice"), revision{revision_num} { | ||
| 163 | static const FunctionInfo functions[] = { | 165 | static const FunctionInfo functions[] = { |
| 164 | {0, &IAudioDevice::ListAudioDeviceName, "ListAudioDeviceName"}, | 166 | {0, &IAudioDevice::ListAudioDeviceName, "ListAudioDeviceName"}, |
| 165 | {1, &IAudioDevice::SetAudioDeviceOutputVolume, "SetAudioDeviceOutputVolume"}, | 167 | {1, &IAudioDevice::SetAudioDeviceOutputVolume, "SetAudioDeviceOutputVolume"}, |
| 166 | {2, nullptr, "GetAudioDeviceOutputVolume"}, | 168 | {2, &IAudioDevice::GetAudioDeviceOutputVolume, "GetAudioDeviceOutputVolume"}, |
| 167 | {3, &IAudioDevice::GetActiveAudioDeviceName, "GetActiveAudioDeviceName"}, | 169 | {3, &IAudioDevice::GetActiveAudioDeviceName, "GetActiveAudioDeviceName"}, |
| 168 | {4, &IAudioDevice::QueryAudioDeviceSystemEvent, "QueryAudioDeviceSystemEvent"}, | 170 | {4, &IAudioDevice::QueryAudioDeviceSystemEvent, "QueryAudioDeviceSystemEvent"}, |
| 169 | {5, &IAudioDevice::GetActiveChannelCount, "GetActiveChannelCount"}, | 171 | {5, &IAudioDevice::GetActiveChannelCount, "GetActiveChannelCount"}, |
| 170 | {6, &IAudioDevice::ListAudioDeviceName, | 172 | {6, &IAudioDevice::ListAudioDeviceName, "ListAudioDeviceNameAuto"}, |
| 171 | "ListAudioDeviceNameAuto"}, // TODO(ogniK): Confirm if autos are identical to non auto | ||
| 172 | {7, &IAudioDevice::SetAudioDeviceOutputVolume, "SetAudioDeviceOutputVolumeAuto"}, | 173 | {7, &IAudioDevice::SetAudioDeviceOutputVolume, "SetAudioDeviceOutputVolumeAuto"}, |
| 173 | {8, nullptr, "GetAudioDeviceOutputVolumeAuto"}, | 174 | {8, &IAudioDevice::GetAudioDeviceOutputVolume, "GetAudioDeviceOutputVolumeAuto"}, |
| 174 | {10, &IAudioDevice::GetActiveAudioDeviceName, "GetActiveAudioDeviceNameAuto"}, | 175 | {10, &IAudioDevice::GetActiveAudioDeviceName, "GetActiveAudioDeviceNameAuto"}, |
| 175 | {11, nullptr, "QueryAudioDeviceInputEvent"}, | 176 | {11, &IAudioDevice::QueryAudioDeviceInputEvent, "QueryAudioDeviceInputEvent"}, |
| 176 | {12, nullptr, "QueryAudioDeviceOutputEvent"}, | 177 | {12, &IAudioDevice::QueryAudioDeviceOutputEvent, "QueryAudioDeviceOutputEvent"}, |
| 177 | {13, nullptr, "GetAudioSystemMasterVolumeSetting"}, | 178 | {13, nullptr, "GetAudioSystemMasterVolumeSetting"}, |
| 178 | }; | 179 | }; |
| 179 | RegisterHandlers(functions); | 180 | RegisterHandlers(functions); |
| 180 | 181 | ||
| 181 | auto& kernel = Core::System::GetInstance().Kernel(); | 182 | auto& kernel = system.Kernel(); |
| 182 | buffer_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Automatic, | 183 | buffer_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Automatic, |
| 183 | "IAudioOutBufferReleasedEvent"); | 184 | "IAudioOutBufferReleasedEvent"); |
| 185 | |||
| 186 | // Should be similar to audio_output_device_switch_event | ||
| 187 | audio_input_device_switch_event = Kernel::WritableEvent::CreateEventPair( | ||
| 188 | kernel, Kernel::ResetType::Automatic, "IAudioDevice:AudioInputDeviceSwitchedEvent"); | ||
| 189 | |||
| 190 | // Should only be signalled when an audio output device has been changed, example: speaker | ||
| 191 | // to headset | ||
| 192 | audio_output_device_switch_event = Kernel::WritableEvent::CreateEventPair( | ||
| 193 | kernel, Kernel::ResetType::Automatic, "IAudioDevice:AudioOutputDeviceSwitchedEvent"); | ||
| 184 | } | 194 | } |
| 185 | 195 | ||
| 186 | private: | 196 | private: |
| 197 | using AudioDeviceName = std::array<char, 256>; | ||
| 198 | static constexpr std::array<std::string_view, 4> audio_device_names{{ | ||
| 199 | "AudioStereoJackOutput", | ||
| 200 | "AudioBuiltInSpeakerOutput", | ||
| 201 | "AudioTvOutput", | ||
| 202 | "AudioUsbDeviceOutput", | ||
| 203 | }}; | ||
| 204 | enum class DeviceType { | ||
| 205 | AHUBHeadphones, | ||
| 206 | AHUBSpeakers, | ||
| 207 | HDA, | ||
| 208 | USBOutput, | ||
| 209 | }; | ||
| 210 | |||
| 187 | void ListAudioDeviceName(Kernel::HLERequestContext& ctx) { | 211 | void ListAudioDeviceName(Kernel::HLERequestContext& ctx) { |
| 188 | LOG_WARNING(Service_Audio, "(STUBBED) called"); | 212 | LOG_DEBUG(Service_Audio, "called"); |
| 213 | |||
| 214 | const bool usb_output_supported = | ||
| 215 | IsFeatureSupported(AudioFeatures::AudioUSBDeviceOutput, revision); | ||
| 216 | const std::size_t count = ctx.GetWriteBufferSize() / sizeof(AudioDeviceName); | ||
| 189 | 217 | ||
| 190 | constexpr std::array<char, 15> audio_interface{{"AudioInterface"}}; | 218 | std::vector<AudioDeviceName> name_buffer; |
| 191 | ctx.WriteBuffer(audio_interface); | 219 | name_buffer.reserve(audio_device_names.size()); |
| 220 | |||
| 221 | for (std::size_t i = 0; i < count && i < audio_device_names.size(); i++) { | ||
| 222 | const auto type = static_cast<DeviceType>(i); | ||
| 223 | |||
| 224 | if (!usb_output_supported && type == DeviceType::USBOutput) { | ||
| 225 | continue; | ||
| 226 | } | ||
| 227 | |||
| 228 | const auto& device_name = audio_device_names[i]; | ||
| 229 | auto& entry = name_buffer.emplace_back(); | ||
| 230 | device_name.copy(entry.data(), device_name.size()); | ||
| 231 | } | ||
| 232 | |||
| 233 | ctx.WriteBuffer(name_buffer); | ||
| 192 | 234 | ||
| 193 | IPC::ResponseBuilder rb{ctx, 3}; | 235 | IPC::ResponseBuilder rb{ctx, 3}; |
| 194 | rb.Push(RESULT_SUCCESS); | 236 | rb.Push(RESULT_SUCCESS); |
| 195 | rb.Push<u32>(1); | 237 | rb.Push(static_cast<u32>(name_buffer.size())); |
| 196 | } | 238 | } |
| 197 | 239 | ||
| 198 | void SetAudioDeviceOutputVolume(Kernel::HLERequestContext& ctx) { | 240 | void SetAudioDeviceOutputVolume(Kernel::HLERequestContext& ctx) { |
| @@ -208,15 +250,32 @@ private: | |||
| 208 | rb.Push(RESULT_SUCCESS); | 250 | rb.Push(RESULT_SUCCESS); |
| 209 | } | 251 | } |
| 210 | 252 | ||
| 253 | void GetAudioDeviceOutputVolume(Kernel::HLERequestContext& ctx) { | ||
| 254 | IPC::RequestParser rp{ctx}; | ||
| 255 | |||
| 256 | const auto device_name_buffer = ctx.ReadBuffer(); | ||
| 257 | const std::string name = Common::StringFromBuffer(device_name_buffer); | ||
| 258 | |||
| 259 | LOG_WARNING(Service_Audio, "(STUBBED) called. name={}", name); | ||
| 260 | |||
| 261 | IPC::ResponseBuilder rb{ctx, 3}; | ||
| 262 | rb.Push(RESULT_SUCCESS); | ||
| 263 | rb.Push(1.0f); | ||
| 264 | } | ||
| 265 | |||
| 211 | void GetActiveAudioDeviceName(Kernel::HLERequestContext& ctx) { | 266 | void GetActiveAudioDeviceName(Kernel::HLERequestContext& ctx) { |
| 212 | LOG_WARNING(Service_Audio, "(STUBBED) called"); | 267 | LOG_WARNING(Service_Audio, "(STUBBED) called"); |
| 213 | 268 | ||
| 214 | constexpr std::array<char, 12> audio_interface{{"AudioDevice"}}; | 269 | // Currently set to always be TV audio output. |
| 215 | ctx.WriteBuffer(audio_interface); | 270 | const auto& device_name = audio_device_names[2]; |
| 216 | 271 | ||
| 217 | IPC::ResponseBuilder rb{ctx, 3}; | 272 | AudioDeviceName out_device_name{}; |
| 273 | device_name.copy(out_device_name.data(), device_name.size()); | ||
| 274 | |||
| 275 | ctx.WriteBuffer(out_device_name); | ||
| 276 | |||
| 277 | IPC::ResponseBuilder rb{ctx, 2}; | ||
| 218 | rb.Push(RESULT_SUCCESS); | 278 | rb.Push(RESULT_SUCCESS); |
| 219 | rb.Push<u32>(1); | ||
| 220 | } | 279 | } |
| 221 | 280 | ||
| 222 | void QueryAudioDeviceSystemEvent(Kernel::HLERequestContext& ctx) { | 281 | void QueryAudioDeviceSystemEvent(Kernel::HLERequestContext& ctx) { |
| @@ -237,11 +296,31 @@ private: | |||
| 237 | rb.Push<u32>(1); | 296 | rb.Push<u32>(1); |
| 238 | } | 297 | } |
| 239 | 298 | ||
| 299 | // Should be similar to QueryAudioDeviceOutputEvent | ||
| 300 | void QueryAudioDeviceInputEvent(Kernel::HLERequestContext& ctx) { | ||
| 301 | LOG_WARNING(Service_Audio, "(STUBBED) called"); | ||
| 302 | |||
| 303 | IPC::ResponseBuilder rb{ctx, 2, 1}; | ||
| 304 | rb.Push(RESULT_SUCCESS); | ||
| 305 | rb.PushCopyObjects(audio_input_device_switch_event.readable); | ||
| 306 | } | ||
| 307 | |||
| 308 | void QueryAudioDeviceOutputEvent(Kernel::HLERequestContext& ctx) { | ||
| 309 | LOG_DEBUG(Service_Audio, "called"); | ||
| 310 | |||
| 311 | IPC::ResponseBuilder rb{ctx, 2, 1}; | ||
| 312 | rb.Push(RESULT_SUCCESS); | ||
| 313 | rb.PushCopyObjects(audio_output_device_switch_event.readable); | ||
| 314 | } | ||
| 315 | |||
| 316 | u32_le revision = 0; | ||
| 240 | Kernel::EventPair buffer_event; | 317 | Kernel::EventPair buffer_event; |
| 318 | Kernel::EventPair audio_input_device_switch_event; | ||
| 319 | Kernel::EventPair audio_output_device_switch_event; | ||
| 241 | 320 | ||
| 242 | }; // namespace Audio | 321 | }; // namespace Audio |
| 243 | 322 | ||
| 244 | AudRenU::AudRenU() : ServiceFramework("audren:u") { | 323 | AudRenU::AudRenU(Core::System& system_) : ServiceFramework("audren:u"), system{system_} { |
| 245 | // clang-format off | 324 | // clang-format off |
| 246 | static const FunctionInfo functions[] = { | 325 | static const FunctionInfo functions[] = { |
| 247 | {0, &AudRenU::OpenAudioRenderer, "OpenAudioRenderer"}, | 326 | {0, &AudRenU::OpenAudioRenderer, "OpenAudioRenderer"}, |
| @@ -314,7 +393,7 @@ void AudRenU::GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx) { | |||
| 314 | }; | 393 | }; |
| 315 | 394 | ||
| 316 | // Calculates the portion of the size related to the mix data (and the sorting thereof). | 395 | // Calculates the portion of the size related to the mix data (and the sorting thereof). |
| 317 | const auto calculate_mix_info_size = [this](const AudioCore::AudioRendererParameter& params) { | 396 | const auto calculate_mix_info_size = [](const AudioCore::AudioRendererParameter& params) { |
| 318 | // The size of the mixing info data structure. | 397 | // The size of the mixing info data structure. |
| 319 | constexpr u64 mix_info_size = 0x940; | 398 | constexpr u64 mix_info_size = 0x940; |
| 320 | 399 | ||
| @@ -386,7 +465,7 @@ void AudRenU::GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx) { | |||
| 386 | 465 | ||
| 387 | // Calculates the part of the size related to the splitter context. | 466 | // Calculates the part of the size related to the splitter context. |
| 388 | const auto calculate_splitter_context_size = | 467 | const auto calculate_splitter_context_size = |
| 389 | [this](const AudioCore::AudioRendererParameter& params) -> u64 { | 468 | [](const AudioCore::AudioRendererParameter& params) -> u64 { |
| 390 | if (!IsFeatureSupported(AudioFeatures::Splitter, params.revision)) { | 469 | if (!IsFeatureSupported(AudioFeatures::Splitter, params.revision)) { |
| 391 | return 0; | 470 | return 0; |
| 392 | } | 471 | } |
| @@ -433,7 +512,7 @@ void AudRenU::GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx) { | |||
| 433 | }; | 512 | }; |
| 434 | 513 | ||
| 435 | // Calculates the part of the size related to performance statistics. | 514 | // Calculates the part of the size related to performance statistics. |
| 436 | const auto calculate_perf_size = [this](const AudioCore::AudioRendererParameter& params) { | 515 | const auto calculate_perf_size = [](const AudioCore::AudioRendererParameter& params) { |
| 437 | // Extra size value appended to the end of the calculation. | 516 | // Extra size value appended to the end of the calculation. |
| 438 | constexpr u64 appended = 128; | 517 | constexpr u64 appended = 128; |
| 439 | 518 | ||
| @@ -460,78 +539,76 @@ void AudRenU::GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx) { | |||
| 460 | }; | 539 | }; |
| 461 | 540 | ||
| 462 | // Calculates the part of the size that relates to the audio command buffer. | 541 | // Calculates the part of the size that relates to the audio command buffer. |
| 463 | const auto calculate_command_buffer_size = | 542 | const auto calculate_command_buffer_size = [](const AudioCore::AudioRendererParameter& params) { |
| 464 | [this](const AudioCore::AudioRendererParameter& params) { | 543 | constexpr u64 alignment = (buffer_alignment_size - 1) * 2; |
| 465 | constexpr u64 alignment = (buffer_alignment_size - 1) * 2; | ||
| 466 | 544 | ||
| 467 | if (!IsFeatureSupported(AudioFeatures::VariadicCommandBuffer, params.revision)) { | 545 | if (!IsFeatureSupported(AudioFeatures::VariadicCommandBuffer, params.revision)) { |
| 468 | constexpr u64 command_buffer_size = 0x18000; | 546 | constexpr u64 command_buffer_size = 0x18000; |
| 469 | 547 | ||
| 470 | return command_buffer_size + alignment; | 548 | return command_buffer_size + alignment; |
| 471 | } | 549 | } |
| 472 | 550 | ||
| 473 | // When the variadic command buffer is supported, this means | 551 | // When the variadic command buffer is supported, this means |
| 474 | // the command generator for the audio renderer can issue commands | 552 | // the command generator for the audio renderer can issue commands |
| 475 | // that are (as one would expect), variable in size. So what we need to do | 553 | // that are (as one would expect), variable in size. So what we need to do |
| 476 | // is determine the maximum possible size for a few command data structures | 554 | // is determine the maximum possible size for a few command data structures |
| 477 | // then multiply them by the amount of present commands indicated by the given | 555 | // then multiply them by the amount of present commands indicated by the given |
| 478 | // respective audio parameters. | 556 | // respective audio parameters. |
| 479 | 557 | ||
| 480 | constexpr u64 max_biquad_filters = 2; | 558 | constexpr u64 max_biquad_filters = 2; |
| 481 | constexpr u64 max_mix_buffers = 24; | 559 | constexpr u64 max_mix_buffers = 24; |
| 482 | 560 | ||
| 483 | constexpr u64 biquad_filter_command_size = 0x2C; | 561 | constexpr u64 biquad_filter_command_size = 0x2C; |
| 484 | 562 | ||
| 485 | constexpr u64 depop_mix_command_size = 0x24; | 563 | constexpr u64 depop_mix_command_size = 0x24; |
| 486 | constexpr u64 depop_setup_command_size = 0x50; | 564 | constexpr u64 depop_setup_command_size = 0x50; |
| 487 | 565 | ||
| 488 | constexpr u64 effect_command_max_size = 0x540; | 566 | constexpr u64 effect_command_max_size = 0x540; |
| 489 | 567 | ||
| 490 | constexpr u64 mix_command_size = 0x1C; | 568 | constexpr u64 mix_command_size = 0x1C; |
| 491 | constexpr u64 mix_ramp_command_size = 0x24; | 569 | constexpr u64 mix_ramp_command_size = 0x24; |
| 492 | constexpr u64 mix_ramp_grouped_command_size = 0x13C; | 570 | constexpr u64 mix_ramp_grouped_command_size = 0x13C; |
| 493 | 571 | ||
| 494 | constexpr u64 perf_command_size = 0x28; | 572 | constexpr u64 perf_command_size = 0x28; |
| 495 | 573 | ||
| 496 | constexpr u64 sink_command_size = 0x130; | 574 | constexpr u64 sink_command_size = 0x130; |
| 497 | 575 | ||
| 498 | constexpr u64 submix_command_max_size = | 576 | constexpr u64 submix_command_max_size = |
| 499 | depop_mix_command_size + (mix_command_size * max_mix_buffers) * max_mix_buffers; | 577 | depop_mix_command_size + (mix_command_size * max_mix_buffers) * max_mix_buffers; |
| 500 | 578 | ||
| 501 | constexpr u64 volume_command_size = 0x1C; | 579 | constexpr u64 volume_command_size = 0x1C; |
| 502 | constexpr u64 volume_ramp_command_size = 0x20; | 580 | constexpr u64 volume_ramp_command_size = 0x20; |
| 503 | 581 | ||
| 504 | constexpr u64 voice_biquad_filter_command_size = | 582 | constexpr u64 voice_biquad_filter_command_size = |
| 505 | biquad_filter_command_size * max_biquad_filters; | 583 | biquad_filter_command_size * max_biquad_filters; |
| 506 | constexpr u64 voice_data_command_size = 0x9C; | 584 | constexpr u64 voice_data_command_size = 0x9C; |
| 507 | const u64 voice_command_max_size = | 585 | const u64 voice_command_max_size = |
| 508 | (params.splitter_count * depop_setup_command_size) + | 586 | (params.splitter_count * depop_setup_command_size) + |
| 509 | (voice_data_command_size + voice_biquad_filter_command_size + | 587 | (voice_data_command_size + voice_biquad_filter_command_size + volume_ramp_command_size + |
| 510 | volume_ramp_command_size + mix_ramp_grouped_command_size); | 588 | mix_ramp_grouped_command_size); |
| 511 | 589 | ||
| 512 | // Now calculate the individual elements that comprise the size and add them together. | 590 | // Now calculate the individual elements that comprise the size and add them together. |
| 513 | const u64 effect_commands_size = params.effect_count * effect_command_max_size; | 591 | const u64 effect_commands_size = params.effect_count * effect_command_max_size; |
| 514 | 592 | ||
| 515 | const u64 final_mix_commands_size = | 593 | const u64 final_mix_commands_size = |
| 516 | depop_mix_command_size + volume_command_size * max_mix_buffers; | 594 | depop_mix_command_size + volume_command_size * max_mix_buffers; |
| 517 | 595 | ||
| 518 | const u64 perf_commands_size = | 596 | const u64 perf_commands_size = |
| 519 | perf_command_size * | 597 | perf_command_size * (CalculateNumPerformanceEntries(params) + max_perf_detail_entries); |
| 520 | (CalculateNumPerformanceEntries(params) + max_perf_detail_entries); | ||
| 521 | 598 | ||
| 522 | const u64 sink_commands_size = params.sink_count * sink_command_size; | 599 | const u64 sink_commands_size = params.sink_count * sink_command_size; |
| 523 | 600 | ||
| 524 | const u64 splitter_commands_size = | 601 | const u64 splitter_commands_size = |
| 525 | params.num_splitter_send_channels * max_mix_buffers * mix_ramp_command_size; | 602 | params.num_splitter_send_channels * max_mix_buffers * mix_ramp_command_size; |
| 526 | 603 | ||
| 527 | const u64 submix_commands_size = params.submix_count * submix_command_max_size; | 604 | const u64 submix_commands_size = params.submix_count * submix_command_max_size; |
| 528 | 605 | ||
| 529 | const u64 voice_commands_size = params.voice_count * voice_command_max_size; | 606 | const u64 voice_commands_size = params.voice_count * voice_command_max_size; |
| 530 | 607 | ||
| 531 | return effect_commands_size + final_mix_commands_size + perf_commands_size + | 608 | return effect_commands_size + final_mix_commands_size + perf_commands_size + |
| 532 | sink_commands_size + splitter_commands_size + submix_commands_size + | 609 | sink_commands_size + splitter_commands_size + submix_commands_size + |
| 533 | voice_commands_size + alignment; | 610 | voice_commands_size + alignment; |
| 534 | }; | 611 | }; |
| 535 | 612 | ||
| 536 | IPC::RequestParser rp{ctx}; | 613 | IPC::RequestParser rp{ctx}; |
| 537 | const auto params = rp.PopRaw<AudioCore::AudioRendererParameter>(); | 614 | const auto params = rp.PopRaw<AudioCore::AudioRendererParameter>(); |
| @@ -564,12 +641,16 @@ void AudRenU::GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx) { | |||
| 564 | } | 641 | } |
| 565 | 642 | ||
| 566 | void AudRenU::GetAudioDeviceService(Kernel::HLERequestContext& ctx) { | 643 | void AudRenU::GetAudioDeviceService(Kernel::HLERequestContext& ctx) { |
| 567 | LOG_DEBUG(Service_Audio, "called"); | 644 | IPC::RequestParser rp{ctx}; |
| 645 | const u64 aruid = rp.Pop<u64>(); | ||
| 568 | 646 | ||
| 569 | IPC::ResponseBuilder rb{ctx, 2, 0, 1}; | 647 | LOG_DEBUG(Service_Audio, "called. aruid={:016X}", aruid); |
| 570 | 648 | ||
| 649 | // Revisionless variant of GetAudioDeviceServiceWithRevisionInfo that | ||
| 650 | // always assumes the initial release revision (REV1). | ||
| 651 | IPC::ResponseBuilder rb{ctx, 2, 0, 1}; | ||
| 571 | rb.Push(RESULT_SUCCESS); | 652 | rb.Push(RESULT_SUCCESS); |
| 572 | rb.PushIpcInterface<Audio::IAudioDevice>(); | 653 | rb.PushIpcInterface<IAudioDevice>(system, Common::MakeMagic('R', 'E', 'V', '1')); |
| 573 | } | 654 | } |
| 574 | 655 | ||
| 575 | void AudRenU::OpenAudioRendererAuto(Kernel::HLERequestContext& ctx) { | 656 | void AudRenU::OpenAudioRendererAuto(Kernel::HLERequestContext& ctx) { |
| @@ -579,13 +660,19 @@ void AudRenU::OpenAudioRendererAuto(Kernel::HLERequestContext& ctx) { | |||
| 579 | } | 660 | } |
| 580 | 661 | ||
| 581 | void AudRenU::GetAudioDeviceServiceWithRevisionInfo(Kernel::HLERequestContext& ctx) { | 662 | void AudRenU::GetAudioDeviceServiceWithRevisionInfo(Kernel::HLERequestContext& ctx) { |
| 582 | LOG_WARNING(Service_Audio, "(STUBBED) called"); | 663 | struct Parameters { |
| 664 | u32 revision; | ||
| 665 | u64 aruid; | ||
| 666 | }; | ||
| 583 | 667 | ||
| 584 | IPC::ResponseBuilder rb{ctx, 2, 0, 1}; | 668 | IPC::RequestParser rp{ctx}; |
| 669 | const auto [revision, aruid] = rp.PopRaw<Parameters>(); | ||
| 670 | |||
| 671 | LOG_DEBUG(Service_Audio, "called. revision={:08X}, aruid={:016X}", revision, aruid); | ||
| 585 | 672 | ||
| 673 | IPC::ResponseBuilder rb{ctx, 2, 0, 1}; | ||
| 586 | rb.Push(RESULT_SUCCESS); | 674 | rb.Push(RESULT_SUCCESS); |
| 587 | rb.PushIpcInterface<Audio::IAudioDevice>(); // TODO(ogniK): Figure out what is different | 675 | rb.PushIpcInterface<IAudioDevice>(system, revision); |
| 588 | // based on the current revision | ||
| 589 | } | 676 | } |
| 590 | 677 | ||
| 591 | void AudRenU::OpenAudioRendererImpl(Kernel::HLERequestContext& ctx) { | 678 | void AudRenU::OpenAudioRendererImpl(Kernel::HLERequestContext& ctx) { |
| @@ -594,14 +681,16 @@ void AudRenU::OpenAudioRendererImpl(Kernel::HLERequestContext& ctx) { | |||
| 594 | IPC::ResponseBuilder rb{ctx, 2, 0, 1}; | 681 | IPC::ResponseBuilder rb{ctx, 2, 0, 1}; |
| 595 | 682 | ||
| 596 | rb.Push(RESULT_SUCCESS); | 683 | rb.Push(RESULT_SUCCESS); |
| 597 | rb.PushIpcInterface<IAudioRenderer>(params); | 684 | rb.PushIpcInterface<IAudioRenderer>(system, params, audren_instance_count++); |
| 598 | } | 685 | } |
| 599 | 686 | ||
| 600 | bool AudRenU::IsFeatureSupported(AudioFeatures feature, u32_le revision) const { | 687 | bool IsFeatureSupported(AudioFeatures feature, u32_le revision) { |
| 601 | // Byte swap | 688 | // Byte swap |
| 602 | const u32_be version_num = revision - Common::MakeMagic('R', 'E', 'V', '0'); | 689 | const u32_be version_num = revision - Common::MakeMagic('R', 'E', 'V', '0'); |
| 603 | 690 | ||
| 604 | switch (feature) { | 691 | switch (feature) { |
| 692 | case AudioFeatures::AudioUSBDeviceOutput: | ||
| 693 | return version_num >= 4U; | ||
| 605 | case AudioFeatures::Splitter: | 694 | case AudioFeatures::Splitter: |
| 606 | return version_num >= 2U; | 695 | return version_num >= 2U; |
| 607 | case AudioFeatures::PerformanceMetricsVersion2: | 696 | case AudioFeatures::PerformanceMetricsVersion2: |
diff --git a/src/core/hle/service/audio/audren_u.h b/src/core/hle/service/audio/audren_u.h index 1d3c8df61..4e0ccc792 100644 --- a/src/core/hle/service/audio/audren_u.h +++ b/src/core/hle/service/audio/audren_u.h | |||
| @@ -6,6 +6,10 @@ | |||
| 6 | 6 | ||
| 7 | #include "core/hle/service/service.h" | 7 | #include "core/hle/service/service.h" |
| 8 | 8 | ||
| 9 | namespace Core { | ||
| 10 | class System; | ||
| 11 | } | ||
| 12 | |||
| 9 | namespace Kernel { | 13 | namespace Kernel { |
| 10 | class HLERequestContext; | 14 | class HLERequestContext; |
| 11 | } | 15 | } |
| @@ -14,7 +18,7 @@ namespace Service::Audio { | |||
| 14 | 18 | ||
| 15 | class AudRenU final : public ServiceFramework<AudRenU> { | 19 | class AudRenU final : public ServiceFramework<AudRenU> { |
| 16 | public: | 20 | public: |
| 17 | explicit AudRenU(); | 21 | explicit AudRenU(Core::System& system_); |
| 18 | ~AudRenU() override; | 22 | ~AudRenU() override; |
| 19 | 23 | ||
| 20 | private: | 24 | private: |
| @@ -26,13 +30,19 @@ private: | |||
| 26 | 30 | ||
| 27 | void OpenAudioRendererImpl(Kernel::HLERequestContext& ctx); | 31 | void OpenAudioRendererImpl(Kernel::HLERequestContext& ctx); |
| 28 | 32 | ||
| 29 | enum class AudioFeatures : u32 { | 33 | std::size_t audren_instance_count = 0; |
| 30 | Splitter, | 34 | Core::System& system; |
| 31 | PerformanceMetricsVersion2, | 35 | }; |
| 32 | VariadicCommandBuffer, | ||
| 33 | }; | ||
| 34 | 36 | ||
| 35 | bool IsFeatureSupported(AudioFeatures feature, u32_le revision) const; | 37 | // Describes a particular audio feature that may be supported in a particular revision. |
| 38 | enum class AudioFeatures : u32 { | ||
| 39 | AudioUSBDeviceOutput, | ||
| 40 | Splitter, | ||
| 41 | PerformanceMetricsVersion2, | ||
| 42 | VariadicCommandBuffer, | ||
| 36 | }; | 43 | }; |
| 37 | 44 | ||
| 45 | // Tests if a particular audio feature is supported with a given audio revision. | ||
| 46 | bool IsFeatureSupported(AudioFeatures feature, u32_le revision); | ||
| 47 | |||
| 38 | } // namespace Service::Audio | 48 | } // namespace Service::Audio |
diff --git a/src/core/hle/service/es/es.cpp b/src/core/hle/service/es/es.cpp index 6701cb913..af70d174d 100644 --- a/src/core/hle/service/es/es.cpp +++ b/src/core/hle/service/es/es.cpp | |||
| @@ -2,32 +2,37 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include "core/crypto/key_manager.h" | ||
| 6 | #include "core/hle/ipc_helpers.h" | ||
| 5 | #include "core/hle/service/service.h" | 7 | #include "core/hle/service/service.h" |
| 6 | 8 | ||
| 7 | namespace Service::ES { | 9 | namespace Service::ES { |
| 8 | 10 | ||
| 11 | constexpr ResultCode ERROR_INVALID_ARGUMENT{ErrorModule::ETicket, 2}; | ||
| 12 | constexpr ResultCode ERROR_INVALID_RIGHTS_ID{ErrorModule::ETicket, 3}; | ||
| 13 | |||
| 9 | class ETicket final : public ServiceFramework<ETicket> { | 14 | class ETicket final : public ServiceFramework<ETicket> { |
| 10 | public: | 15 | public: |
| 11 | explicit ETicket() : ServiceFramework{"es"} { | 16 | explicit ETicket() : ServiceFramework{"es"} { |
| 12 | // clang-format off | 17 | // clang-format off |
| 13 | static const FunctionInfo functions[] = { | 18 | static const FunctionInfo functions[] = { |
| 14 | {1, nullptr, "ImportTicket"}, | 19 | {1, &ETicket::ImportTicket, "ImportTicket"}, |
| 15 | {2, nullptr, "ImportTicketCertificateSet"}, | 20 | {2, nullptr, "ImportTicketCertificateSet"}, |
| 16 | {3, nullptr, "DeleteTicket"}, | 21 | {3, nullptr, "DeleteTicket"}, |
| 17 | {4, nullptr, "DeletePersonalizedTicket"}, | 22 | {4, nullptr, "DeletePersonalizedTicket"}, |
| 18 | {5, nullptr, "DeleteAllCommonTicket"}, | 23 | {5, nullptr, "DeleteAllCommonTicket"}, |
| 19 | {6, nullptr, "DeleteAllPersonalizedTicket"}, | 24 | {6, nullptr, "DeleteAllPersonalizedTicket"}, |
| 20 | {7, nullptr, "DeleteAllPersonalizedTicketEx"}, | 25 | {7, nullptr, "DeleteAllPersonalizedTicketEx"}, |
| 21 | {8, nullptr, "GetTitleKey"}, | 26 | {8, &ETicket::GetTitleKey, "GetTitleKey"}, |
| 22 | {9, nullptr, "CountCommonTicket"}, | 27 | {9, &ETicket::CountCommonTicket, "CountCommonTicket"}, |
| 23 | {10, nullptr, "CountPersonalizedTicket"}, | 28 | {10, &ETicket::CountPersonalizedTicket, "CountPersonalizedTicket"}, |
| 24 | {11, nullptr, "ListCommonTicket"}, | 29 | {11, &ETicket::ListCommonTicket, "ListCommonTicket"}, |
| 25 | {12, nullptr, "ListPersonalizedTicket"}, | 30 | {12, &ETicket::ListPersonalizedTicket, "ListPersonalizedTicket"}, |
| 26 | {13, nullptr, "ListMissingPersonalizedTicket"}, | 31 | {13, nullptr, "ListMissingPersonalizedTicket"}, |
| 27 | {14, nullptr, "GetCommonTicketSize"}, | 32 | {14, &ETicket::GetCommonTicketSize, "GetCommonTicketSize"}, |
| 28 | {15, nullptr, "GetPersonalizedTicketSize"}, | 33 | {15, &ETicket::GetPersonalizedTicketSize, "GetPersonalizedTicketSize"}, |
| 29 | {16, nullptr, "GetCommonTicketData"}, | 34 | {16, &ETicket::GetCommonTicketData, "GetCommonTicketData"}, |
| 30 | {17, nullptr, "GetPersonalizedTicketData"}, | 35 | {17, &ETicket::GetPersonalizedTicketData, "GetPersonalizedTicketData"}, |
| 31 | {18, nullptr, "OwnTicket"}, | 36 | {18, nullptr, "OwnTicket"}, |
| 32 | {19, nullptr, "GetTicketInfo"}, | 37 | {19, nullptr, "GetTicketInfo"}, |
| 33 | {20, nullptr, "ListLightTicketInfo"}, | 38 | {20, nullptr, "ListLightTicketInfo"}, |
| @@ -51,7 +56,212 @@ public: | |||
| 51 | }; | 56 | }; |
| 52 | // clang-format on | 57 | // clang-format on |
| 53 | RegisterHandlers(functions); | 58 | RegisterHandlers(functions); |
| 59 | |||
| 60 | keys.PopulateTickets(); | ||
| 61 | keys.SynthesizeTickets(); | ||
| 62 | } | ||
| 63 | |||
| 64 | private: | ||
| 65 | bool CheckRightsId(Kernel::HLERequestContext& ctx, const u128& rights_id) { | ||
| 66 | if (rights_id == u128{}) { | ||
| 67 | LOG_ERROR(Service_ETicket, "The rights ID was invalid!"); | ||
| 68 | IPC::ResponseBuilder rb{ctx, 2}; | ||
| 69 | rb.Push(ERROR_INVALID_RIGHTS_ID); | ||
| 70 | return false; | ||
| 71 | } | ||
| 72 | |||
| 73 | return true; | ||
| 74 | } | ||
| 75 | |||
| 76 | void ImportTicket(Kernel::HLERequestContext& ctx) { | ||
| 77 | IPC::RequestParser rp{ctx}; | ||
| 78 | const auto ticket = ctx.ReadBuffer(); | ||
| 79 | const auto cert = ctx.ReadBuffer(1); | ||
| 80 | |||
| 81 | if (ticket.size() < sizeof(Core::Crypto::Ticket)) { | ||
| 82 | LOG_ERROR(Service_ETicket, "The input buffer is not large enough!"); | ||
| 83 | IPC::ResponseBuilder rb{ctx, 2}; | ||
| 84 | rb.Push(ERROR_INVALID_ARGUMENT); | ||
| 85 | return; | ||
| 86 | } | ||
| 87 | |||
| 88 | Core::Crypto::Ticket raw{}; | ||
| 89 | std::memcpy(&raw, ticket.data(), sizeof(Core::Crypto::Ticket)); | ||
| 90 | |||
| 91 | if (!keys.AddTicketPersonalized(raw)) { | ||
| 92 | LOG_ERROR(Service_ETicket, "The ticket could not be imported!"); | ||
| 93 | IPC::ResponseBuilder rb{ctx, 2}; | ||
| 94 | rb.Push(ERROR_INVALID_ARGUMENT); | ||
| 95 | return; | ||
| 96 | } | ||
| 97 | |||
| 98 | IPC::ResponseBuilder rb{ctx, 2}; | ||
| 99 | rb.Push(RESULT_SUCCESS); | ||
| 100 | } | ||
| 101 | |||
| 102 | void GetTitleKey(Kernel::HLERequestContext& ctx) { | ||
| 103 | IPC::RequestParser rp{ctx}; | ||
| 104 | const auto rights_id = rp.PopRaw<u128>(); | ||
| 105 | |||
| 106 | LOG_DEBUG(Service_ETicket, "called, rights_id={:016X}{:016X}", rights_id[1], rights_id[0]); | ||
| 107 | |||
| 108 | if (!CheckRightsId(ctx, rights_id)) | ||
| 109 | return; | ||
| 110 | |||
| 111 | const auto key = | ||
| 112 | keys.GetKey(Core::Crypto::S128KeyType::Titlekey, rights_id[1], rights_id[0]); | ||
| 113 | |||
| 114 | if (key == Core::Crypto::Key128{}) { | ||
| 115 | LOG_ERROR(Service_ETicket, | ||
| 116 | "The titlekey doesn't exist in the KeyManager or the rights ID was invalid!"); | ||
| 117 | IPC::ResponseBuilder rb{ctx, 2}; | ||
| 118 | rb.Push(ERROR_INVALID_RIGHTS_ID); | ||
| 119 | return; | ||
| 120 | } | ||
| 121 | |||
| 122 | ctx.WriteBuffer(key.data(), key.size()); | ||
| 123 | |||
| 124 | IPC::ResponseBuilder rb{ctx, 2}; | ||
| 125 | rb.Push(RESULT_SUCCESS); | ||
| 126 | } | ||
| 127 | |||
| 128 | void CountCommonTicket(Kernel::HLERequestContext& ctx) { | ||
| 129 | LOG_DEBUG(Service_ETicket, "called"); | ||
| 130 | |||
| 131 | const auto count = keys.GetCommonTickets().size(); | ||
| 132 | |||
| 133 | IPC::ResponseBuilder rb{ctx, 3}; | ||
| 134 | rb.Push(RESULT_SUCCESS); | ||
| 135 | rb.Push<u32>(count); | ||
| 136 | } | ||
| 137 | |||
| 138 | void CountPersonalizedTicket(Kernel::HLERequestContext& ctx) { | ||
| 139 | LOG_DEBUG(Service_ETicket, "called"); | ||
| 140 | |||
| 141 | const auto count = keys.GetPersonalizedTickets().size(); | ||
| 142 | |||
| 143 | IPC::ResponseBuilder rb{ctx, 3}; | ||
| 144 | rb.Push(RESULT_SUCCESS); | ||
| 145 | rb.Push<u32>(count); | ||
| 146 | } | ||
| 147 | |||
| 148 | void ListCommonTicket(Kernel::HLERequestContext& ctx) { | ||
| 149 | u32 out_entries; | ||
| 150 | if (keys.GetCommonTickets().empty()) | ||
| 151 | out_entries = 0; | ||
| 152 | else | ||
| 153 | out_entries = ctx.GetWriteBufferSize() / sizeof(u128); | ||
| 154 | |||
| 155 | LOG_DEBUG(Service_ETicket, "called, entries={:016X}", out_entries); | ||
| 156 | |||
| 157 | keys.PopulateTickets(); | ||
| 158 | const auto tickets = keys.GetCommonTickets(); | ||
| 159 | std::vector<u128> ids; | ||
| 160 | std::transform(tickets.begin(), tickets.end(), std::back_inserter(ids), | ||
| 161 | [](const auto& pair) { return pair.first; }); | ||
| 162 | |||
| 163 | out_entries = std::min<u32>(ids.size(), out_entries); | ||
| 164 | ctx.WriteBuffer(ids.data(), out_entries * sizeof(u128)); | ||
| 165 | |||
| 166 | IPC::ResponseBuilder rb{ctx, 3}; | ||
| 167 | rb.Push(RESULT_SUCCESS); | ||
| 168 | rb.Push<u32>(out_entries); | ||
| 54 | } | 169 | } |
| 170 | |||
| 171 | void ListPersonalizedTicket(Kernel::HLERequestContext& ctx) { | ||
| 172 | u32 out_entries; | ||
| 173 | if (keys.GetPersonalizedTickets().empty()) | ||
| 174 | out_entries = 0; | ||
| 175 | else | ||
| 176 | out_entries = ctx.GetWriteBufferSize() / sizeof(u128); | ||
| 177 | |||
| 178 | LOG_DEBUG(Service_ETicket, "called, entries={:016X}", out_entries); | ||
| 179 | |||
| 180 | keys.PopulateTickets(); | ||
| 181 | const auto tickets = keys.GetPersonalizedTickets(); | ||
| 182 | std::vector<u128> ids; | ||
| 183 | std::transform(tickets.begin(), tickets.end(), std::back_inserter(ids), | ||
| 184 | [](const auto& pair) { return pair.first; }); | ||
| 185 | |||
| 186 | out_entries = std::min<u32>(ids.size(), out_entries); | ||
| 187 | ctx.WriteBuffer(ids.data(), out_entries * sizeof(u128)); | ||
| 188 | |||
| 189 | IPC::ResponseBuilder rb{ctx, 3}; | ||
| 190 | rb.Push(RESULT_SUCCESS); | ||
| 191 | rb.Push<u32>(out_entries); | ||
| 192 | } | ||
| 193 | |||
| 194 | void GetCommonTicketSize(Kernel::HLERequestContext& ctx) { | ||
| 195 | IPC::RequestParser rp{ctx}; | ||
| 196 | const auto rights_id = rp.PopRaw<u128>(); | ||
| 197 | |||
| 198 | LOG_DEBUG(Service_ETicket, "called, rights_id={:016X}{:016X}", rights_id[1], rights_id[0]); | ||
| 199 | |||
| 200 | if (!CheckRightsId(ctx, rights_id)) | ||
| 201 | return; | ||
| 202 | |||
| 203 | const auto ticket = keys.GetCommonTickets().at(rights_id); | ||
| 204 | |||
| 205 | IPC::ResponseBuilder rb{ctx, 4}; | ||
| 206 | rb.Push(RESULT_SUCCESS); | ||
| 207 | rb.Push<u64>(ticket.GetSize()); | ||
| 208 | } | ||
| 209 | |||
| 210 | void GetPersonalizedTicketSize(Kernel::HLERequestContext& ctx) { | ||
| 211 | IPC::RequestParser rp{ctx}; | ||
| 212 | const auto rights_id = rp.PopRaw<u128>(); | ||
| 213 | |||
| 214 | LOG_DEBUG(Service_ETicket, "called, rights_id={:016X}{:016X}", rights_id[1], rights_id[0]); | ||
| 215 | |||
| 216 | if (!CheckRightsId(ctx, rights_id)) | ||
| 217 | return; | ||
| 218 | |||
| 219 | const auto ticket = keys.GetPersonalizedTickets().at(rights_id); | ||
| 220 | |||
| 221 | IPC::ResponseBuilder rb{ctx, 4}; | ||
| 222 | rb.Push(RESULT_SUCCESS); | ||
| 223 | rb.Push<u64>(ticket.GetSize()); | ||
| 224 | } | ||
| 225 | |||
| 226 | void GetCommonTicketData(Kernel::HLERequestContext& ctx) { | ||
| 227 | IPC::RequestParser rp{ctx}; | ||
| 228 | const auto rights_id = rp.PopRaw<u128>(); | ||
| 229 | |||
| 230 | LOG_DEBUG(Service_ETicket, "called, rights_id={:016X}{:016X}", rights_id[1], rights_id[0]); | ||
| 231 | |||
| 232 | if (!CheckRightsId(ctx, rights_id)) | ||
| 233 | return; | ||
| 234 | |||
| 235 | const auto ticket = keys.GetCommonTickets().at(rights_id); | ||
| 236 | |||
| 237 | const auto write_size = std::min<u64>(ticket.GetSize(), ctx.GetWriteBufferSize()); | ||
| 238 | ctx.WriteBuffer(&ticket, write_size); | ||
| 239 | |||
| 240 | IPC::ResponseBuilder rb{ctx, 4}; | ||
| 241 | rb.Push(RESULT_SUCCESS); | ||
| 242 | rb.Push<u64>(write_size); | ||
| 243 | } | ||
| 244 | |||
| 245 | void GetPersonalizedTicketData(Kernel::HLERequestContext& ctx) { | ||
| 246 | IPC::RequestParser rp{ctx}; | ||
| 247 | const auto rights_id = rp.PopRaw<u128>(); | ||
| 248 | |||
| 249 | LOG_DEBUG(Service_ETicket, "called, rights_id={:016X}{:016X}", rights_id[1], rights_id[0]); | ||
| 250 | |||
| 251 | if (!CheckRightsId(ctx, rights_id)) | ||
| 252 | return; | ||
| 253 | |||
| 254 | const auto ticket = keys.GetPersonalizedTickets().at(rights_id); | ||
| 255 | |||
| 256 | const auto write_size = std::min<u64>(ticket.GetSize(), ctx.GetWriteBufferSize()); | ||
| 257 | ctx.WriteBuffer(&ticket, write_size); | ||
| 258 | |||
| 259 | IPC::ResponseBuilder rb{ctx, 4}; | ||
| 260 | rb.Push(RESULT_SUCCESS); | ||
| 261 | rb.Push<u64>(write_size); | ||
| 262 | } | ||
| 263 | |||
| 264 | Core::Crypto::KeyManager keys; | ||
| 55 | }; | 265 | }; |
| 56 | 266 | ||
| 57 | void InstallInterfaces(SM::ServiceManager& service_manager) { | 267 | void InstallInterfaces(SM::ServiceManager& service_manager) { |
diff --git a/src/core/hle/service/fatal/fatal.cpp b/src/core/hle/service/fatal/fatal.cpp index fe49c2161..01fa06ad3 100644 --- a/src/core/hle/service/fatal/fatal.cpp +++ b/src/core/hle/service/fatal/fatal.cpp | |||
| @@ -5,7 +5,7 @@ | |||
| 5 | #include <array> | 5 | #include <array> |
| 6 | #include <cstring> | 6 | #include <cstring> |
| 7 | #include <ctime> | 7 | #include <ctime> |
| 8 | #include <fmt/time.h> | 8 | #include <fmt/chrono.h> |
| 9 | #include "common/file_util.h" | 9 | #include "common/file_util.h" |
| 10 | #include "common/logging/log.h" | 10 | #include "common/logging/log.h" |
| 11 | #include "common/scm_rev.h" | 11 | #include "common/scm_rev.h" |
diff --git a/src/core/hle/service/filesystem/filesystem.cpp b/src/core/hle/service/filesystem/filesystem.cpp index 1ebfeb4bf..8ce110dd1 100644 --- a/src/core/hle/service/filesystem/filesystem.cpp +++ b/src/core/hle/service/filesystem/filesystem.cpp | |||
| @@ -472,12 +472,12 @@ void CreateFactories(FileSys::VfsFilesystem& vfs, bool overwrite) { | |||
| 472 | } | 472 | } |
| 473 | } | 473 | } |
| 474 | 474 | ||
| 475 | void InstallInterfaces(SM::ServiceManager& service_manager, FileSys::VfsFilesystem& vfs) { | 475 | void InstallInterfaces(Core::System& system) { |
| 476 | romfs_factory = nullptr; | 476 | romfs_factory = nullptr; |
| 477 | CreateFactories(vfs, false); | 477 | CreateFactories(*system.GetFilesystem(), false); |
| 478 | std::make_shared<FSP_LDR>()->InstallAsService(service_manager); | 478 | std::make_shared<FSP_LDR>()->InstallAsService(system.ServiceManager()); |
| 479 | std::make_shared<FSP_PR>()->InstallAsService(service_manager); | 479 | std::make_shared<FSP_PR>()->InstallAsService(system.ServiceManager()); |
| 480 | std::make_shared<FSP_SRV>()->InstallAsService(service_manager); | 480 | std::make_shared<FSP_SRV>(system.GetReporter())->InstallAsService(system.ServiceManager()); |
| 481 | } | 481 | } |
| 482 | 482 | ||
| 483 | } // namespace Service::FileSystem | 483 | } // namespace Service::FileSystem |
diff --git a/src/core/hle/service/filesystem/filesystem.h b/src/core/hle/service/filesystem/filesystem.h index 6481f237c..3849dd89e 100644 --- a/src/core/hle/service/filesystem/filesystem.h +++ b/src/core/hle/service/filesystem/filesystem.h | |||
| @@ -65,7 +65,7 @@ FileSys::VirtualDir GetModificationDumpRoot(u64 title_id); | |||
| 65 | // above is called. | 65 | // above is called. |
| 66 | void CreateFactories(FileSys::VfsFilesystem& vfs, bool overwrite = true); | 66 | void CreateFactories(FileSys::VfsFilesystem& vfs, bool overwrite = true); |
| 67 | 67 | ||
| 68 | void InstallInterfaces(SM::ServiceManager& service_manager, FileSys::VfsFilesystem& vfs); | 68 | void InstallInterfaces(Core::System& system); |
| 69 | 69 | ||
| 70 | // A class that wraps a VfsDirectory with methods that return ResultVal and ResultCode instead of | 70 | // A class that wraps a VfsDirectory with methods that return ResultVal and ResultCode instead of |
| 71 | // pointers and booleans. This makes using a VfsDirectory with switch services much easier and | 71 | // pointers and booleans. This makes using a VfsDirectory with switch services much easier and |
diff --git a/src/core/hle/service/filesystem/fsp_srv.cpp b/src/core/hle/service/filesystem/fsp_srv.cpp index e7df8fd98..d3cd46a9b 100644 --- a/src/core/hle/service/filesystem/fsp_srv.cpp +++ b/src/core/hle/service/filesystem/fsp_srv.cpp | |||
| @@ -26,6 +26,7 @@ | |||
| 26 | #include "core/hle/kernel/process.h" | 26 | #include "core/hle/kernel/process.h" |
| 27 | #include "core/hle/service/filesystem/filesystem.h" | 27 | #include "core/hle/service/filesystem/filesystem.h" |
| 28 | #include "core/hle/service/filesystem/fsp_srv.h" | 28 | #include "core/hle/service/filesystem/fsp_srv.h" |
| 29 | #include "core/reporter.h" | ||
| 29 | 30 | ||
| 30 | namespace Service::FileSystem { | 31 | namespace Service::FileSystem { |
| 31 | 32 | ||
| @@ -613,7 +614,7 @@ private: | |||
| 613 | u64 next_entry_index = 0; | 614 | u64 next_entry_index = 0; |
| 614 | }; | 615 | }; |
| 615 | 616 | ||
| 616 | FSP_SRV::FSP_SRV() : ServiceFramework("fsp-srv") { | 617 | FSP_SRV::FSP_SRV(const Core::Reporter& reporter) : ServiceFramework("fsp-srv"), reporter(reporter) { |
| 617 | // clang-format off | 618 | // clang-format off |
| 618 | static const FunctionInfo functions[] = { | 619 | static const FunctionInfo functions[] = { |
| 619 | {0, nullptr, "OpenFileSystem"}, | 620 | {0, nullptr, "OpenFileSystem"}, |
| @@ -710,14 +711,14 @@ FSP_SRV::FSP_SRV() : ServiceFramework("fsp-srv") { | |||
| 710 | {1001, nullptr, "SetSaveDataSize"}, | 711 | {1001, nullptr, "SetSaveDataSize"}, |
| 711 | {1002, nullptr, "SetSaveDataRootPath"}, | 712 | {1002, nullptr, "SetSaveDataRootPath"}, |
| 712 | {1003, nullptr, "DisableAutoSaveDataCreation"}, | 713 | {1003, nullptr, "DisableAutoSaveDataCreation"}, |
| 713 | {1004, nullptr, "SetGlobalAccessLogMode"}, | 714 | {1004, &FSP_SRV::SetGlobalAccessLogMode, "SetGlobalAccessLogMode"}, |
| 714 | {1005, &FSP_SRV::GetGlobalAccessLogMode, "GetGlobalAccessLogMode"}, | 715 | {1005, &FSP_SRV::GetGlobalAccessLogMode, "GetGlobalAccessLogMode"}, |
| 715 | {1006, nullptr, "OutputAccessLogToSdCard"}, | 716 | {1006, &FSP_SRV::OutputAccessLogToSdCard, "OutputAccessLogToSdCard"}, |
| 716 | {1007, nullptr, "RegisterUpdatePartition"}, | 717 | {1007, nullptr, "RegisterUpdatePartition"}, |
| 717 | {1008, nullptr, "OpenRegisteredUpdatePartition"}, | 718 | {1008, nullptr, "OpenRegisteredUpdatePartition"}, |
| 718 | {1009, nullptr, "GetAndClearMemoryReportInfo"}, | 719 | {1009, nullptr, "GetAndClearMemoryReportInfo"}, |
| 719 | {1010, nullptr, "SetDataStorageRedirectTarget"}, | 720 | {1010, nullptr, "SetDataStorageRedirectTarget"}, |
| 720 | {1011, nullptr, "OutputAccessLogToSdCard2"}, | 721 | {1011, &FSP_SRV::GetAccessLogVersionInfo, "GetAccessLogVersionInfo"}, |
| 721 | {1100, nullptr, "OverrideSaveDataTransferTokenSignVerificationKey"}, | 722 | {1100, nullptr, "OverrideSaveDataTransferTokenSignVerificationKey"}, |
| 722 | {1110, nullptr, "CorruptSaveDataFileSystemBySaveDataSpaceId2"}, | 723 | {1110, nullptr, "CorruptSaveDataFileSystemBySaveDataSpaceId2"}, |
| 723 | {1200, nullptr, "OpenMultiCommitManager"}, | 724 | {1200, nullptr, "OpenMultiCommitManager"}, |
| @@ -814,21 +815,22 @@ void FSP_SRV::OpenSaveDataInfoReaderBySaveDataSpaceId(Kernel::HLERequestContext& | |||
| 814 | rb.PushIpcInterface<ISaveDataInfoReader>(std::make_shared<ISaveDataInfoReader>(space)); | 815 | rb.PushIpcInterface<ISaveDataInfoReader>(std::make_shared<ISaveDataInfoReader>(space)); |
| 815 | } | 816 | } |
| 816 | 817 | ||
| 817 | void FSP_SRV::GetGlobalAccessLogMode(Kernel::HLERequestContext& ctx) { | 818 | void FSP_SRV::SetGlobalAccessLogMode(Kernel::HLERequestContext& ctx) { |
| 818 | LOG_WARNING(Service_FS, "(STUBBED) called"); | 819 | IPC::RequestParser rp{ctx}; |
| 820 | log_mode = rp.PopEnum<LogMode>(); | ||
| 819 | 821 | ||
| 820 | enum class LogMode : u32 { | 822 | LOG_DEBUG(Service_FS, "called, log_mode={:08X}", static_cast<u32>(log_mode)); |
| 821 | Off, | 823 | |
| 822 | Log, | 824 | IPC::ResponseBuilder rb{ctx, 2}; |
| 823 | RedirectToSdCard, | 825 | rb.Push(RESULT_SUCCESS); |
| 824 | LogToSdCard = Log | RedirectToSdCard, | 826 | } |
| 825 | }; | 827 | |
| 828 | void FSP_SRV::GetGlobalAccessLogMode(Kernel::HLERequestContext& ctx) { | ||
| 829 | LOG_DEBUG(Service_FS, "called"); | ||
| 826 | 830 | ||
| 827 | // Given we always want to receive logging information, | ||
| 828 | // we always specify logging as enabled. | ||
| 829 | IPC::ResponseBuilder rb{ctx, 3}; | 831 | IPC::ResponseBuilder rb{ctx, 3}; |
| 830 | rb.Push(RESULT_SUCCESS); | 832 | rb.Push(RESULT_SUCCESS); |
| 831 | rb.PushEnum(LogMode::Log); | 833 | rb.PushEnum(log_mode); |
| 832 | } | 834 | } |
| 833 | 835 | ||
| 834 | void FSP_SRV::OpenDataStorageByCurrentProcess(Kernel::HLERequestContext& ctx) { | 836 | void FSP_SRV::OpenDataStorageByCurrentProcess(Kernel::HLERequestContext& ctx) { |
| @@ -902,4 +904,26 @@ void FSP_SRV::OpenPatchDataStorageByCurrentProcess(Kernel::HLERequestContext& ct | |||
| 902 | rb.Push(FileSys::ERROR_ENTITY_NOT_FOUND); | 904 | rb.Push(FileSys::ERROR_ENTITY_NOT_FOUND); |
| 903 | } | 905 | } |
| 904 | 906 | ||
| 907 | void FSP_SRV::OutputAccessLogToSdCard(Kernel::HLERequestContext& ctx) { | ||
| 908 | const auto raw = ctx.ReadBuffer(); | ||
| 909 | auto log = Common::StringFromFixedZeroTerminatedBuffer( | ||
| 910 | reinterpret_cast<const char*>(raw.data()), raw.size()); | ||
| 911 | |||
| 912 | LOG_DEBUG(Service_FS, "called, log='{}'", log); | ||
| 913 | |||
| 914 | reporter.SaveFilesystemAccessReport(log_mode, std::move(log)); | ||
| 915 | |||
| 916 | IPC::ResponseBuilder rb{ctx, 2}; | ||
| 917 | rb.Push(RESULT_SUCCESS); | ||
| 918 | } | ||
| 919 | |||
| 920 | void FSP_SRV::GetAccessLogVersionInfo(Kernel::HLERequestContext& ctx) { | ||
| 921 | LOG_DEBUG(Service_FS, "called"); | ||
| 922 | |||
| 923 | IPC::ResponseBuilder rb{ctx, 4}; | ||
| 924 | rb.Push(RESULT_SUCCESS); | ||
| 925 | rb.PushEnum(AccessLogVersion::Latest); | ||
| 926 | rb.Push(access_log_program_index); | ||
| 927 | } | ||
| 928 | |||
| 905 | } // namespace Service::FileSystem | 929 | } // namespace Service::FileSystem |
diff --git a/src/core/hle/service/filesystem/fsp_srv.h b/src/core/hle/service/filesystem/fsp_srv.h index d7572ba7a..b5486a193 100644 --- a/src/core/hle/service/filesystem/fsp_srv.h +++ b/src/core/hle/service/filesystem/fsp_srv.h | |||
| @@ -7,15 +7,32 @@ | |||
| 7 | #include <memory> | 7 | #include <memory> |
| 8 | #include "core/hle/service/service.h" | 8 | #include "core/hle/service/service.h" |
| 9 | 9 | ||
| 10 | namespace Core { | ||
| 11 | class Reporter; | ||
| 12 | } | ||
| 13 | |||
| 10 | namespace FileSys { | 14 | namespace FileSys { |
| 11 | class FileSystemBackend; | 15 | class FileSystemBackend; |
| 12 | } | 16 | } |
| 13 | 17 | ||
| 14 | namespace Service::FileSystem { | 18 | namespace Service::FileSystem { |
| 15 | 19 | ||
| 20 | enum class AccessLogVersion : u32 { | ||
| 21 | V7_0_0 = 2, | ||
| 22 | |||
| 23 | Latest = V7_0_0, | ||
| 24 | }; | ||
| 25 | |||
| 26 | enum class LogMode : u32 { | ||
| 27 | Off, | ||
| 28 | Log, | ||
| 29 | RedirectToSdCard, | ||
| 30 | LogToSdCard = Log | RedirectToSdCard, | ||
| 31 | }; | ||
| 32 | |||
| 16 | class FSP_SRV final : public ServiceFramework<FSP_SRV> { | 33 | class FSP_SRV final : public ServiceFramework<FSP_SRV> { |
| 17 | public: | 34 | public: |
| 18 | explicit FSP_SRV(); | 35 | explicit FSP_SRV(const Core::Reporter& reporter); |
| 19 | ~FSP_SRV() override; | 36 | ~FSP_SRV() override; |
| 20 | 37 | ||
| 21 | private: | 38 | private: |
| @@ -26,13 +43,20 @@ private: | |||
| 26 | void OpenSaveDataFileSystem(Kernel::HLERequestContext& ctx); | 43 | void OpenSaveDataFileSystem(Kernel::HLERequestContext& ctx); |
| 27 | void OpenReadOnlySaveDataFileSystem(Kernel::HLERequestContext& ctx); | 44 | void OpenReadOnlySaveDataFileSystem(Kernel::HLERequestContext& ctx); |
| 28 | void OpenSaveDataInfoReaderBySaveDataSpaceId(Kernel::HLERequestContext& ctx); | 45 | void OpenSaveDataInfoReaderBySaveDataSpaceId(Kernel::HLERequestContext& ctx); |
| 46 | void SetGlobalAccessLogMode(Kernel::HLERequestContext& ctx); | ||
| 29 | void GetGlobalAccessLogMode(Kernel::HLERequestContext& ctx); | 47 | void GetGlobalAccessLogMode(Kernel::HLERequestContext& ctx); |
| 30 | void OpenDataStorageByCurrentProcess(Kernel::HLERequestContext& ctx); | 48 | void OpenDataStorageByCurrentProcess(Kernel::HLERequestContext& ctx); |
| 31 | void OpenDataStorageByDataId(Kernel::HLERequestContext& ctx); | 49 | void OpenDataStorageByDataId(Kernel::HLERequestContext& ctx); |
| 32 | void OpenPatchDataStorageByCurrentProcess(Kernel::HLERequestContext& ctx); | 50 | void OpenPatchDataStorageByCurrentProcess(Kernel::HLERequestContext& ctx); |
| 51 | void OutputAccessLogToSdCard(Kernel::HLERequestContext& ctx); | ||
| 52 | void GetAccessLogVersionInfo(Kernel::HLERequestContext& ctx); | ||
| 33 | 53 | ||
| 34 | FileSys::VirtualFile romfs; | 54 | FileSys::VirtualFile romfs; |
| 35 | u64 current_process_id = 0; | 55 | u64 current_process_id = 0; |
| 56 | u32 access_log_program_index = 0; | ||
| 57 | LogMode log_mode = LogMode::LogToSdCard; | ||
| 58 | |||
| 59 | const Core::Reporter& reporter; | ||
| 36 | }; | 60 | }; |
| 37 | 61 | ||
| 38 | } // namespace Service::FileSystem | 62 | } // namespace Service::FileSystem |
diff --git a/src/core/hle/service/friend/errors.h b/src/core/hle/service/friend/errors.h new file mode 100644 index 000000000..b3996e275 --- /dev/null +++ b/src/core/hle/service/friend/errors.h | |||
| @@ -0,0 +1,12 @@ | |||
| 1 | // Copyright 2019 yuzu emulator team | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "core/hle/result.h" | ||
| 8 | |||
| 9 | namespace Service::Friend { | ||
| 10 | |||
| 11 | constexpr ResultCode ERR_NO_NOTIFICATIONS{ErrorModule::Account, 15}; | ||
| 12 | } | ||
diff --git a/src/core/hle/service/friend/friend.cpp b/src/core/hle/service/friend/friend.cpp index 5100e376c..d1ec12ef9 100644 --- a/src/core/hle/service/friend/friend.cpp +++ b/src/core/hle/service/friend/friend.cpp | |||
| @@ -2,8 +2,13 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <queue> | ||
| 5 | #include "common/logging/log.h" | 6 | #include "common/logging/log.h" |
| 7 | #include "common/uuid.h" | ||
| 6 | #include "core/hle/ipc_helpers.h" | 8 | #include "core/hle/ipc_helpers.h" |
| 9 | #include "core/hle/kernel/readable_event.h" | ||
| 10 | #include "core/hle/kernel/writable_event.h" | ||
| 11 | #include "core/hle/service/friend/errors.h" | ||
| 7 | #include "core/hle/service/friend/friend.h" | 12 | #include "core/hle/service/friend/friend.h" |
| 8 | #include "core/hle/service/friend/interface.h" | 13 | #include "core/hle/service/friend/interface.h" |
| 9 | 14 | ||
| @@ -17,7 +22,7 @@ public: | |||
| 17 | {0, nullptr, "GetCompletionEvent"}, | 22 | {0, nullptr, "GetCompletionEvent"}, |
| 18 | {1, nullptr, "Cancel"}, | 23 | {1, nullptr, "Cancel"}, |
| 19 | {10100, nullptr, "GetFriendListIds"}, | 24 | {10100, nullptr, "GetFriendListIds"}, |
| 20 | {10101, nullptr, "GetFriendList"}, | 25 | {10101, &IFriendService::GetFriendList, "GetFriendList"}, |
| 21 | {10102, nullptr, "UpdateFriendInfo"}, | 26 | {10102, nullptr, "UpdateFriendInfo"}, |
| 22 | {10110, nullptr, "GetFriendProfileImage"}, | 27 | {10110, nullptr, "GetFriendProfileImage"}, |
| 23 | {10200, nullptr, "SendFriendRequestForApplication"}, | 28 | {10200, nullptr, "SendFriendRequestForApplication"}, |
| @@ -94,6 +99,23 @@ public: | |||
| 94 | } | 99 | } |
| 95 | 100 | ||
| 96 | private: | 101 | private: |
| 102 | enum class PresenceFilter : u32 { | ||
| 103 | None = 0, | ||
| 104 | Online = 1, | ||
| 105 | OnlinePlay = 2, | ||
| 106 | OnlineOrOnlinePlay = 3, | ||
| 107 | }; | ||
| 108 | |||
| 109 | struct SizedFriendFilter { | ||
| 110 | PresenceFilter presence; | ||
| 111 | u8 is_favorite; | ||
| 112 | u8 same_app; | ||
| 113 | u8 same_app_played; | ||
| 114 | u8 arbitary_app_played; | ||
| 115 | u64 group_id; | ||
| 116 | }; | ||
| 117 | static_assert(sizeof(SizedFriendFilter) == 0x10, "SizedFriendFilter is an invalid size"); | ||
| 118 | |||
| 97 | void DeclareCloseOnlinePlaySession(Kernel::HLERequestContext& ctx) { | 119 | void DeclareCloseOnlinePlaySession(Kernel::HLERequestContext& ctx) { |
| 98 | // Stub used by Splatoon 2 | 120 | // Stub used by Splatoon 2 |
| 99 | LOG_WARNING(Service_ACC, "(STUBBED) called"); | 121 | LOG_WARNING(Service_ACC, "(STUBBED) called"); |
| @@ -107,6 +129,121 @@ private: | |||
| 107 | IPC::ResponseBuilder rb{ctx, 2}; | 129 | IPC::ResponseBuilder rb{ctx, 2}; |
| 108 | rb.Push(RESULT_SUCCESS); | 130 | rb.Push(RESULT_SUCCESS); |
| 109 | } | 131 | } |
| 132 | |||
| 133 | void GetFriendList(Kernel::HLERequestContext& ctx) { | ||
| 134 | IPC::RequestParser rp{ctx}; | ||
| 135 | const auto friend_offset = rp.Pop<u32>(); | ||
| 136 | const auto uuid = rp.PopRaw<Common::UUID>(); | ||
| 137 | [[maybe_unused]] const auto filter = rp.PopRaw<SizedFriendFilter>(); | ||
| 138 | const auto pid = rp.Pop<u64>(); | ||
| 139 | LOG_WARNING(Service_ACC, "(STUBBED) called, offset={}, uuid={}, pid={}", friend_offset, | ||
| 140 | uuid.Format(), pid); | ||
| 141 | |||
| 142 | IPC::ResponseBuilder rb{ctx, 3}; | ||
| 143 | rb.Push(RESULT_SUCCESS); | ||
| 144 | |||
| 145 | rb.Push<u32>(0); // Friend count | ||
| 146 | // TODO(ogniK): Return a buffer of u64s which are the "NetworkServiceAccountId" | ||
| 147 | } | ||
| 148 | }; | ||
| 149 | |||
| 150 | class INotificationService final : public ServiceFramework<INotificationService> { | ||
| 151 | public: | ||
| 152 | INotificationService(Common::UUID uuid) : ServiceFramework("INotificationService"), uuid(uuid) { | ||
| 153 | // clang-format off | ||
| 154 | static const FunctionInfo functions[] = { | ||
| 155 | {0, &INotificationService::GetEvent, "GetEvent"}, | ||
| 156 | {1, &INotificationService::Clear, "Clear"}, | ||
| 157 | {2, &INotificationService::Pop, "Pop"} | ||
| 158 | }; | ||
| 159 | // clang-format on | ||
| 160 | |||
| 161 | RegisterHandlers(functions); | ||
| 162 | } | ||
| 163 | |||
| 164 | private: | ||
| 165 | void GetEvent(Kernel::HLERequestContext& ctx) { | ||
| 166 | LOG_DEBUG(Service_ACC, "called"); | ||
| 167 | |||
| 168 | IPC::ResponseBuilder rb{ctx, 2, 1}; | ||
| 169 | rb.Push(RESULT_SUCCESS); | ||
| 170 | |||
| 171 | if (!is_event_created) { | ||
| 172 | auto& kernel = Core::System::GetInstance().Kernel(); | ||
| 173 | notification_event = Kernel::WritableEvent::CreateEventPair( | ||
| 174 | kernel, Kernel::ResetType::Manual, "INotificationService:NotifyEvent"); | ||
| 175 | is_event_created = true; | ||
| 176 | } | ||
| 177 | rb.PushCopyObjects(notification_event.readable); | ||
| 178 | } | ||
| 179 | |||
| 180 | void Clear(Kernel::HLERequestContext& ctx) { | ||
| 181 | LOG_DEBUG(Service_ACC, "called"); | ||
| 182 | while (!notifications.empty()) { | ||
| 183 | notifications.pop(); | ||
| 184 | } | ||
| 185 | std::memset(&states, 0, sizeof(States)); | ||
| 186 | |||
| 187 | IPC::ResponseBuilder rb{ctx, 2}; | ||
| 188 | rb.Push(RESULT_SUCCESS); | ||
| 189 | } | ||
| 190 | |||
| 191 | void Pop(Kernel::HLERequestContext& ctx) { | ||
| 192 | LOG_DEBUG(Service_ACC, "called"); | ||
| 193 | |||
| 194 | if (notifications.empty()) { | ||
| 195 | LOG_ERROR(Service_ACC, "No notifications in queue!"); | ||
| 196 | IPC::ResponseBuilder rb{ctx, 2}; | ||
| 197 | rb.Push(ERR_NO_NOTIFICATIONS); | ||
| 198 | return; | ||
| 199 | } | ||
| 200 | |||
| 201 | const auto notification = notifications.front(); | ||
| 202 | notifications.pop(); | ||
| 203 | |||
| 204 | switch (notification.notification_type) { | ||
| 205 | case NotificationTypes::HasUpdatedFriendsList: | ||
| 206 | states.has_updated_friends = false; | ||
| 207 | break; | ||
| 208 | case NotificationTypes::HasReceivedFriendRequest: | ||
| 209 | states.has_received_friend_request = false; | ||
| 210 | break; | ||
| 211 | default: | ||
| 212 | // HOS seems not have an error case for an unknown notification | ||
| 213 | LOG_WARNING(Service_ACC, "Unknown notification {:08X}", | ||
| 214 | static_cast<u32>(notification.notification_type)); | ||
| 215 | break; | ||
| 216 | } | ||
| 217 | |||
| 218 | IPC::ResponseBuilder rb{ctx, 6}; | ||
| 219 | rb.Push(RESULT_SUCCESS); | ||
| 220 | rb.PushRaw<SizedNotificationInfo>(notification); | ||
| 221 | } | ||
| 222 | |||
| 223 | enum class NotificationTypes : u32 { | ||
| 224 | HasUpdatedFriendsList = 0x65, | ||
| 225 | HasReceivedFriendRequest = 0x1 | ||
| 226 | }; | ||
| 227 | |||
| 228 | struct SizedNotificationInfo { | ||
| 229 | NotificationTypes notification_type; | ||
| 230 | INSERT_PADDING_WORDS( | ||
| 231 | 1); // TODO(ogniK): This doesn't seem to be used within any IPC returns as of now | ||
| 232 | u64_le account_id; | ||
| 233 | }; | ||
| 234 | static_assert(sizeof(SizedNotificationInfo) == 0x10, | ||
| 235 | "SizedNotificationInfo is an incorrect size"); | ||
| 236 | |||
| 237 | struct States { | ||
| 238 | bool has_updated_friends; | ||
| 239 | bool has_received_friend_request; | ||
| 240 | }; | ||
| 241 | |||
| 242 | Common::UUID uuid; | ||
| 243 | bool is_event_created = false; | ||
| 244 | Kernel::EventPair notification_event; | ||
| 245 | std::queue<SizedNotificationInfo> notifications; | ||
| 246 | States states{}; | ||
| 110 | }; | 247 | }; |
| 111 | 248 | ||
| 112 | void Module::Interface::CreateFriendService(Kernel::HLERequestContext& ctx) { | 249 | void Module::Interface::CreateFriendService(Kernel::HLERequestContext& ctx) { |
| @@ -116,6 +253,17 @@ void Module::Interface::CreateFriendService(Kernel::HLERequestContext& ctx) { | |||
| 116 | LOG_DEBUG(Service_ACC, "called"); | 253 | LOG_DEBUG(Service_ACC, "called"); |
| 117 | } | 254 | } |
| 118 | 255 | ||
| 256 | void Module::Interface::CreateNotificationService(Kernel::HLERequestContext& ctx) { | ||
| 257 | IPC::RequestParser rp{ctx}; | ||
| 258 | auto uuid = rp.PopRaw<Common::UUID>(); | ||
| 259 | |||
| 260 | LOG_DEBUG(Service_ACC, "called, uuid={}", uuid.Format()); | ||
| 261 | |||
| 262 | IPC::ResponseBuilder rb{ctx, 2, 0, 1}; | ||
| 263 | rb.Push(RESULT_SUCCESS); | ||
| 264 | rb.PushIpcInterface<INotificationService>(uuid); | ||
| 265 | } | ||
| 266 | |||
| 119 | Module::Interface::Interface(std::shared_ptr<Module> module, const char* name) | 267 | Module::Interface::Interface(std::shared_ptr<Module> module, const char* name) |
| 120 | : ServiceFramework(name), module(std::move(module)) {} | 268 | : ServiceFramework(name), module(std::move(module)) {} |
| 121 | 269 | ||
diff --git a/src/core/hle/service/friend/friend.h b/src/core/hle/service/friend/friend.h index e762840cb..38d05fa8e 100644 --- a/src/core/hle/service/friend/friend.h +++ b/src/core/hle/service/friend/friend.h | |||
| @@ -16,6 +16,7 @@ public: | |||
| 16 | ~Interface() override; | 16 | ~Interface() override; |
| 17 | 17 | ||
| 18 | void CreateFriendService(Kernel::HLERequestContext& ctx); | 18 | void CreateFriendService(Kernel::HLERequestContext& ctx); |
| 19 | void CreateNotificationService(Kernel::HLERequestContext& ctx); | ||
| 19 | 20 | ||
| 20 | protected: | 21 | protected: |
| 21 | std::shared_ptr<Module> module; | 22 | std::shared_ptr<Module> module; |
diff --git a/src/core/hle/service/friend/interface.cpp b/src/core/hle/service/friend/interface.cpp index 5a6840af5..5b384f733 100644 --- a/src/core/hle/service/friend/interface.cpp +++ b/src/core/hle/service/friend/interface.cpp | |||
| @@ -10,7 +10,7 @@ Friend::Friend(std::shared_ptr<Module> module, const char* name) | |||
| 10 | : Interface(std::move(module), name) { | 10 | : Interface(std::move(module), name) { |
| 11 | static const FunctionInfo functions[] = { | 11 | static const FunctionInfo functions[] = { |
| 12 | {0, &Friend::CreateFriendService, "CreateFriendService"}, | 12 | {0, &Friend::CreateFriendService, "CreateFriendService"}, |
| 13 | {1, nullptr, "CreateNotificationService"}, | 13 | {1, &Friend::CreateNotificationService, "CreateNotificationService"}, |
| 14 | {2, nullptr, "CreateDaemonSuspendSessionService"}, | 14 | {2, nullptr, "CreateDaemonSuspendSessionService"}, |
| 15 | }; | 15 | }; |
| 16 | RegisterHandlers(functions); | 16 | RegisterHandlers(functions); |
diff --git a/src/core/hle/service/hid/controllers/npad.cpp b/src/core/hle/service/hid/controllers/npad.cpp index fdd6d79a2..e47fe8188 100644 --- a/src/core/hle/service/hid/controllers/npad.cpp +++ b/src/core/hle/service/hid/controllers/npad.cpp | |||
| @@ -548,6 +548,37 @@ void Controller_NPad::DisconnectNPad(u32 npad_id) { | |||
| 548 | connected_controllers[NPadIdToIndex(npad_id)].is_connected = false; | 548 | connected_controllers[NPadIdToIndex(npad_id)].is_connected = false; |
| 549 | } | 549 | } |
| 550 | 550 | ||
| 551 | void Controller_NPad::StartLRAssignmentMode() { | ||
| 552 | // Nothing internally is used for lr assignment mode. Since we have the ability to set the | ||
| 553 | // controller types from boot, it doesn't really matter about showing a selection screen | ||
| 554 | is_in_lr_assignment_mode = true; | ||
| 555 | } | ||
| 556 | |||
| 557 | void Controller_NPad::StopLRAssignmentMode() { | ||
| 558 | is_in_lr_assignment_mode = false; | ||
| 559 | } | ||
| 560 | |||
| 561 | bool Controller_NPad::SwapNpadAssignment(u32 npad_id_1, u32 npad_id_2) { | ||
| 562 | if (npad_id_1 == NPAD_HANDHELD || npad_id_2 == NPAD_HANDHELD || npad_id_1 == NPAD_UNKNOWN || | ||
| 563 | npad_id_2 == NPAD_UNKNOWN) { | ||
| 564 | return true; | ||
| 565 | } | ||
| 566 | const auto npad_index_1 = NPadIdToIndex(npad_id_1); | ||
| 567 | const auto npad_index_2 = NPadIdToIndex(npad_id_2); | ||
| 568 | |||
| 569 | if (!IsControllerSupported(connected_controllers[npad_index_1].type) || | ||
| 570 | !IsControllerSupported(connected_controllers[npad_index_2].type)) { | ||
| 571 | return false; | ||
| 572 | } | ||
| 573 | |||
| 574 | std::swap(connected_controllers[npad_index_1].type, connected_controllers[npad_index_2].type); | ||
| 575 | |||
| 576 | InitNewlyAddedControler(npad_index_1); | ||
| 577 | InitNewlyAddedControler(npad_index_2); | ||
| 578 | |||
| 579 | return true; | ||
| 580 | } | ||
| 581 | |||
| 551 | bool Controller_NPad::IsControllerSupported(NPadControllerType controller) { | 582 | bool Controller_NPad::IsControllerSupported(NPadControllerType controller) { |
| 552 | if (controller == NPadControllerType::Handheld) { | 583 | if (controller == NPadControllerType::Handheld) { |
| 553 | // Handheld is not even a supported type, lets stop here | 584 | // Handheld is not even a supported type, lets stop here |
| @@ -605,10 +636,15 @@ Controller_NPad::LedPattern Controller_NPad::GetLedPattern(u32 npad_id) { | |||
| 605 | return LedPattern{0, 0, 0, 0}; | 636 | return LedPattern{0, 0, 0, 0}; |
| 606 | }; | 637 | }; |
| 607 | } | 638 | } |
| 639 | |||
| 608 | void Controller_NPad::SetVibrationEnabled(bool can_vibrate) { | 640 | void Controller_NPad::SetVibrationEnabled(bool can_vibrate) { |
| 609 | can_controllers_vibrate = can_vibrate; | 641 | can_controllers_vibrate = can_vibrate; |
| 610 | } | 642 | } |
| 611 | 643 | ||
| 644 | bool Controller_NPad::IsVibrationEnabled() const { | ||
| 645 | return can_controllers_vibrate; | ||
| 646 | } | ||
| 647 | |||
| 612 | void Controller_NPad::ClearAllConnectedControllers() { | 648 | void Controller_NPad::ClearAllConnectedControllers() { |
| 613 | for (auto& controller : connected_controllers) { | 649 | for (auto& controller : connected_controllers) { |
| 614 | if (controller.is_connected && controller.type != NPadControllerType::None) { | 650 | if (controller.is_connected && controller.type != NPadControllerType::None) { |
| @@ -617,6 +653,7 @@ void Controller_NPad::ClearAllConnectedControllers() { | |||
| 617 | } | 653 | } |
| 618 | } | 654 | } |
| 619 | } | 655 | } |
| 656 | |||
| 620 | void Controller_NPad::DisconnectAllConnectedControllers() { | 657 | void Controller_NPad::DisconnectAllConnectedControllers() { |
| 621 | std::for_each(connected_controllers.begin(), connected_controllers.end(), | 658 | std::for_each(connected_controllers.begin(), connected_controllers.end(), |
| 622 | [](ControllerHolder& controller) { controller.is_connected = false; }); | 659 | [](ControllerHolder& controller) { controller.is_connected = false; }); |
diff --git a/src/core/hle/service/hid/controllers/npad.h b/src/core/hle/service/hid/controllers/npad.h index 4ff50b3cd..f28b36806 100644 --- a/src/core/hle/service/hid/controllers/npad.h +++ b/src/core/hle/service/hid/controllers/npad.h | |||
| @@ -119,11 +119,16 @@ public: | |||
| 119 | void DisconnectNPad(u32 npad_id); | 119 | void DisconnectNPad(u32 npad_id); |
| 120 | LedPattern GetLedPattern(u32 npad_id); | 120 | LedPattern GetLedPattern(u32 npad_id); |
| 121 | void SetVibrationEnabled(bool can_vibrate); | 121 | void SetVibrationEnabled(bool can_vibrate); |
| 122 | bool IsVibrationEnabled() const; | ||
| 122 | void ClearAllConnectedControllers(); | 123 | void ClearAllConnectedControllers(); |
| 123 | void DisconnectAllConnectedControllers(); | 124 | void DisconnectAllConnectedControllers(); |
| 124 | void ConnectAllDisconnectedControllers(); | 125 | void ConnectAllDisconnectedControllers(); |
| 125 | void ClearAllControllers(); | 126 | void ClearAllControllers(); |
| 126 | 127 | ||
| 128 | void StartLRAssignmentMode(); | ||
| 129 | void StopLRAssignmentMode(); | ||
| 130 | bool SwapNpadAssignment(u32 npad_id_1, u32 npad_id_2); | ||
| 131 | |||
| 127 | // Logical OR for all buttons presses on all controllers | 132 | // Logical OR for all buttons presses on all controllers |
| 128 | // Specifically for cheat engine and other features. | 133 | // Specifically for cheat engine and other features. |
| 129 | u32 GetAndResetPressState(); | 134 | u32 GetAndResetPressState(); |
| @@ -321,5 +326,6 @@ private: | |||
| 321 | void RequestPadStateUpdate(u32 npad_id); | 326 | void RequestPadStateUpdate(u32 npad_id); |
| 322 | std::array<ControllerPad, 10> npad_pad_states{}; | 327 | std::array<ControllerPad, 10> npad_pad_states{}; |
| 323 | bool IsControllerSupported(NPadControllerType controller); | 328 | bool IsControllerSupported(NPadControllerType controller); |
| 329 | bool is_in_lr_assignment_mode{false}; | ||
| 324 | }; | 330 | }; |
| 325 | } // namespace Service::HID | 331 | } // namespace Service::HID |
diff --git a/src/core/hle/service/hid/errors.h b/src/core/hle/service/hid/errors.h new file mode 100644 index 000000000..3583642e7 --- /dev/null +++ b/src/core/hle/service/hid/errors.h | |||
| @@ -0,0 +1,13 @@ | |||
| 1 | // Copyright 2019 yuzu emulator team | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "core/hle/result.h" | ||
| 8 | |||
| 9 | namespace Service::HID { | ||
| 10 | |||
| 11 | constexpr ResultCode ERR_NPAD_NOT_CONNECTED{ErrorModule::HID, 710}; | ||
| 12 | |||
| 13 | } // namespace Service::HID | ||
diff --git a/src/core/hle/service/hid/hid.cpp b/src/core/hle/service/hid/hid.cpp index a4ad95d96..f8b1ca816 100644 --- a/src/core/hle/service/hid/hid.cpp +++ b/src/core/hle/service/hid/hid.cpp | |||
| @@ -16,6 +16,7 @@ | |||
| 16 | #include "core/hle/kernel/readable_event.h" | 16 | #include "core/hle/kernel/readable_event.h" |
| 17 | #include "core/hle/kernel/shared_memory.h" | 17 | #include "core/hle/kernel/shared_memory.h" |
| 18 | #include "core/hle/kernel/writable_event.h" | 18 | #include "core/hle/kernel/writable_event.h" |
| 19 | #include "core/hle/service/hid/errors.h" | ||
| 19 | #include "core/hle/service/hid/hid.h" | 20 | #include "core/hle/service/hid/hid.h" |
| 20 | #include "core/hle/service/hid/irs.h" | 21 | #include "core/hle/service/hid/irs.h" |
| 21 | #include "core/hle/service/hid/xcd.h" | 22 | #include "core/hle/service/hid/xcd.h" |
| @@ -202,11 +203,11 @@ Hid::Hid() : ServiceFramework("hid") { | |||
| 202 | {123, nullptr, "SetNpadJoyAssignmentModeSingleByDefault"}, | 203 | {123, nullptr, "SetNpadJoyAssignmentModeSingleByDefault"}, |
| 203 | {124, &Hid::SetNpadJoyAssignmentModeDual, "SetNpadJoyAssignmentModeDual"}, | 204 | {124, &Hid::SetNpadJoyAssignmentModeDual, "SetNpadJoyAssignmentModeDual"}, |
| 204 | {125, &Hid::MergeSingleJoyAsDualJoy, "MergeSingleJoyAsDualJoy"}, | 205 | {125, &Hid::MergeSingleJoyAsDualJoy, "MergeSingleJoyAsDualJoy"}, |
| 205 | {126, nullptr, "StartLrAssignmentMode"}, | 206 | {126, &Hid::StartLrAssignmentMode, "StartLrAssignmentMode"}, |
| 206 | {127, nullptr, "StopLrAssignmentMode"}, | 207 | {127, &Hid::StopLrAssignmentMode, "StopLrAssignmentMode"}, |
| 207 | {128, &Hid::SetNpadHandheldActivationMode, "SetNpadHandheldActivationMode"}, | 208 | {128, &Hid::SetNpadHandheldActivationMode, "SetNpadHandheldActivationMode"}, |
| 208 | {129, nullptr, "GetNpadHandheldActivationMode"}, | 209 | {129, nullptr, "GetNpadHandheldActivationMode"}, |
| 209 | {130, nullptr, "SwapNpadAssignment"}, | 210 | {130, &Hid::SwapNpadAssignment, "SwapNpadAssignment"}, |
| 210 | {131, nullptr, "IsUnintendedHomeButtonInputProtectionEnabled"}, | 211 | {131, nullptr, "IsUnintendedHomeButtonInputProtectionEnabled"}, |
| 211 | {132, nullptr, "EnableUnintendedHomeButtonInputProtection"}, | 212 | {132, nullptr, "EnableUnintendedHomeButtonInputProtection"}, |
| 212 | {133, nullptr, "SetNpadJoyAssignmentModeSingleWithDestination"}, | 213 | {133, nullptr, "SetNpadJoyAssignmentModeSingleWithDestination"}, |
| @@ -215,8 +216,8 @@ Hid::Hid() : ServiceFramework("hid") { | |||
| 215 | {201, &Hid::SendVibrationValue, "SendVibrationValue"}, | 216 | {201, &Hid::SendVibrationValue, "SendVibrationValue"}, |
| 216 | {202, &Hid::GetActualVibrationValue, "GetActualVibrationValue"}, | 217 | {202, &Hid::GetActualVibrationValue, "GetActualVibrationValue"}, |
| 217 | {203, &Hid::CreateActiveVibrationDeviceList, "CreateActiveVibrationDeviceList"}, | 218 | {203, &Hid::CreateActiveVibrationDeviceList, "CreateActiveVibrationDeviceList"}, |
| 218 | {204, nullptr, "PermitVibration"}, | 219 | {204, &Hid::PermitVibration, "PermitVibration"}, |
| 219 | {205, nullptr, "IsVibrationPermitted"}, | 220 | {205, &Hid::IsVibrationPermitted, "IsVibrationPermitted"}, |
| 220 | {206, &Hid::SendVibrationValues, "SendVibrationValues"}, | 221 | {206, &Hid::SendVibrationValues, "SendVibrationValues"}, |
| 221 | {207, nullptr, "SendVibrationGcErmCommand"}, | 222 | {207, nullptr, "SendVibrationGcErmCommand"}, |
| 222 | {208, nullptr, "GetActualVibrationGcErmCommand"}, | 223 | {208, nullptr, "GetActualVibrationGcErmCommand"}, |
| @@ -678,6 +679,27 @@ void Hid::CreateActiveVibrationDeviceList(Kernel::HLERequestContext& ctx) { | |||
| 678 | rb.PushIpcInterface<IActiveVibrationDeviceList>(); | 679 | rb.PushIpcInterface<IActiveVibrationDeviceList>(); |
| 679 | } | 680 | } |
| 680 | 681 | ||
| 682 | void Hid::PermitVibration(Kernel::HLERequestContext& ctx) { | ||
| 683 | IPC::RequestParser rp{ctx}; | ||
| 684 | const auto can_vibrate{rp.Pop<bool>()}; | ||
| 685 | applet_resource->GetController<Controller_NPad>(HidController::NPad) | ||
| 686 | .SetVibrationEnabled(can_vibrate); | ||
| 687 | |||
| 688 | LOG_DEBUG(Service_HID, "called, can_vibrate={}", can_vibrate); | ||
| 689 | |||
| 690 | IPC::ResponseBuilder rb{ctx, 2}; | ||
| 691 | rb.Push(RESULT_SUCCESS); | ||
| 692 | } | ||
| 693 | |||
| 694 | void Hid::IsVibrationPermitted(Kernel::HLERequestContext& ctx) { | ||
| 695 | LOG_DEBUG(Service_HID, "called"); | ||
| 696 | |||
| 697 | IPC::ResponseBuilder rb{ctx, 3}; | ||
| 698 | rb.Push(RESULT_SUCCESS); | ||
| 699 | rb.Push( | ||
| 700 | applet_resource->GetController<Controller_NPad>(HidController::NPad).IsVibrationEnabled()); | ||
| 701 | } | ||
| 702 | |||
| 681 | void Hid::ActivateConsoleSixAxisSensor(Kernel::HLERequestContext& ctx) { | 703 | void Hid::ActivateConsoleSixAxisSensor(Kernel::HLERequestContext& ctx) { |
| 682 | IPC::RequestParser rp{ctx}; | 704 | IPC::RequestParser rp{ctx}; |
| 683 | const auto applet_resource_user_id{rp.Pop<u64>()}; | 705 | const auto applet_resource_user_id{rp.Pop<u64>()}; |
| @@ -733,6 +755,49 @@ void Hid::SetPalmaBoostMode(Kernel::HLERequestContext& ctx) { | |||
| 733 | rb.Push(RESULT_SUCCESS); | 755 | rb.Push(RESULT_SUCCESS); |
| 734 | } | 756 | } |
| 735 | 757 | ||
| 758 | void Hid::StartLrAssignmentMode(Kernel::HLERequestContext& ctx) { | ||
| 759 | IPC::RequestParser rp{ctx}; | ||
| 760 | const auto applet_resource_user_id{rp.Pop<u64>()}; | ||
| 761 | |||
| 762 | LOG_DEBUG(Service_HID, "called, applet_resource_user_id={}", applet_resource_user_id); | ||
| 763 | auto& controller = applet_resource->GetController<Controller_NPad>(HidController::NPad); | ||
| 764 | controller.StartLRAssignmentMode(); | ||
| 765 | |||
| 766 | IPC::ResponseBuilder rb{ctx, 2}; | ||
| 767 | rb.Push(RESULT_SUCCESS); | ||
| 768 | } | ||
| 769 | |||
| 770 | void Hid::StopLrAssignmentMode(Kernel::HLERequestContext& ctx) { | ||
| 771 | IPC::RequestParser rp{ctx}; | ||
| 772 | const auto applet_resource_user_id{rp.Pop<u64>()}; | ||
| 773 | |||
| 774 | LOG_DEBUG(Service_HID, "called, applet_resource_user_id={}", applet_resource_user_id); | ||
| 775 | auto& controller = applet_resource->GetController<Controller_NPad>(HidController::NPad); | ||
| 776 | controller.StopLRAssignmentMode(); | ||
| 777 | |||
| 778 | IPC::ResponseBuilder rb{ctx, 2}; | ||
| 779 | rb.Push(RESULT_SUCCESS); | ||
| 780 | } | ||
| 781 | |||
| 782 | void Hid::SwapNpadAssignment(Kernel::HLERequestContext& ctx) { | ||
| 783 | IPC::RequestParser rp{ctx}; | ||
| 784 | const auto npad_1{rp.Pop<u32>()}; | ||
| 785 | const auto npad_2{rp.Pop<u32>()}; | ||
| 786 | const auto applet_resource_user_id{rp.Pop<u64>()}; | ||
| 787 | |||
| 788 | LOG_DEBUG(Service_HID, "called, applet_resource_user_id={}, npad_1={}, npad_2={}", | ||
| 789 | applet_resource_user_id, npad_1, npad_2); | ||
| 790 | |||
| 791 | auto& controller = applet_resource->GetController<Controller_NPad>(HidController::NPad); | ||
| 792 | IPC::ResponseBuilder rb{ctx, 2}; | ||
| 793 | if (controller.SwapNpadAssignment(npad_1, npad_2)) { | ||
| 794 | rb.Push(RESULT_SUCCESS); | ||
| 795 | } else { | ||
| 796 | LOG_ERROR(Service_HID, "Npads are not connected!"); | ||
| 797 | rb.Push(ERR_NPAD_NOT_CONNECTED); | ||
| 798 | } | ||
| 799 | } | ||
| 800 | |||
| 736 | class HidDbg final : public ServiceFramework<HidDbg> { | 801 | class HidDbg final : public ServiceFramework<HidDbg> { |
| 737 | public: | 802 | public: |
| 738 | explicit HidDbg() : ServiceFramework{"hid:dbg"} { | 803 | explicit HidDbg() : ServiceFramework{"hid:dbg"} { |
diff --git a/src/core/hle/service/hid/hid.h b/src/core/hle/service/hid/hid.h index d3660cad2..2fd6d9fc7 100644 --- a/src/core/hle/service/hid/hid.h +++ b/src/core/hle/service/hid/hid.h | |||
| @@ -114,11 +114,16 @@ private: | |||
| 114 | void SetNpadHandheldActivationMode(Kernel::HLERequestContext& ctx); | 114 | void SetNpadHandheldActivationMode(Kernel::HLERequestContext& ctx); |
| 115 | void GetVibrationDeviceInfo(Kernel::HLERequestContext& ctx); | 115 | void GetVibrationDeviceInfo(Kernel::HLERequestContext& ctx); |
| 116 | void CreateActiveVibrationDeviceList(Kernel::HLERequestContext& ctx); | 116 | void CreateActiveVibrationDeviceList(Kernel::HLERequestContext& ctx); |
| 117 | void PermitVibration(Kernel::HLERequestContext& ctx); | ||
| 118 | void IsVibrationPermitted(Kernel::HLERequestContext& ctx); | ||
| 117 | void ActivateConsoleSixAxisSensor(Kernel::HLERequestContext& ctx); | 119 | void ActivateConsoleSixAxisSensor(Kernel::HLERequestContext& ctx); |
| 118 | void StartConsoleSixAxisSensor(Kernel::HLERequestContext& ctx); | 120 | void StartConsoleSixAxisSensor(Kernel::HLERequestContext& ctx); |
| 119 | void StopSixAxisSensor(Kernel::HLERequestContext& ctx); | 121 | void StopSixAxisSensor(Kernel::HLERequestContext& ctx); |
| 120 | void SetIsPalmaAllConnectable(Kernel::HLERequestContext& ctx); | 122 | void SetIsPalmaAllConnectable(Kernel::HLERequestContext& ctx); |
| 121 | void SetPalmaBoostMode(Kernel::HLERequestContext& ctx); | 123 | void SetPalmaBoostMode(Kernel::HLERequestContext& ctx); |
| 124 | void StartLrAssignmentMode(Kernel::HLERequestContext& ctx); | ||
| 125 | void StopLrAssignmentMode(Kernel::HLERequestContext& ctx); | ||
| 126 | void SwapNpadAssignment(Kernel::HLERequestContext& ctx); | ||
| 122 | 127 | ||
| 123 | std::shared_ptr<IAppletResource> applet_resource; | 128 | std::shared_ptr<IAppletResource> applet_resource; |
| 124 | }; | 129 | }; |
diff --git a/src/core/hle/service/ldr/ldr.cpp b/src/core/hle/service/ldr/ldr.cpp index b839303ac..8ddad8682 100644 --- a/src/core/hle/service/ldr/ldr.cpp +++ b/src/core/hle/service/ldr/ldr.cpp | |||
| @@ -345,14 +345,16 @@ public: | |||
| 345 | vm_manager | 345 | vm_manager |
| 346 | .MirrorMemory(*map_address, nro_address, nro_size, Kernel::MemoryState::ModuleCode) | 346 | .MirrorMemory(*map_address, nro_address, nro_size, Kernel::MemoryState::ModuleCode) |
| 347 | .IsSuccess()); | 347 | .IsSuccess()); |
| 348 | ASSERT(vm_manager.UnmapRange(nro_address, nro_size).IsSuccess()); | 348 | ASSERT(vm_manager.ReprotectRange(nro_address, nro_size, Kernel::VMAPermission::None) |
| 349 | .IsSuccess()); | ||
| 349 | 350 | ||
| 350 | if (bss_size > 0) { | 351 | if (bss_size > 0) { |
| 351 | ASSERT(vm_manager | 352 | ASSERT(vm_manager |
| 352 | .MirrorMemory(*map_address + nro_size, bss_address, bss_size, | 353 | .MirrorMemory(*map_address + nro_size, bss_address, bss_size, |
| 353 | Kernel::MemoryState::ModuleCode) | 354 | Kernel::MemoryState::ModuleCode) |
| 354 | .IsSuccess()); | 355 | .IsSuccess()); |
| 355 | ASSERT(vm_manager.UnmapRange(bss_address, bss_size).IsSuccess()); | 356 | ASSERT(vm_manager.ReprotectRange(bss_address, bss_size, Kernel::VMAPermission::None) |
| 357 | .IsSuccess()); | ||
| 356 | } | 358 | } |
| 357 | 359 | ||
| 358 | vm_manager.ReprotectRange(*map_address, header.text_size, | 360 | vm_manager.ReprotectRange(*map_address, header.text_size, |
| @@ -364,7 +366,8 @@ public: | |||
| 364 | 366 | ||
| 365 | Core::System::GetInstance().InvalidateCpuInstructionCaches(); | 367 | Core::System::GetInstance().InvalidateCpuInstructionCaches(); |
| 366 | 368 | ||
| 367 | nro.insert_or_assign(*map_address, NROInfo{hash, nro_size + bss_size}); | 369 | nro.insert_or_assign(*map_address, |
| 370 | NROInfo{hash, nro_address, nro_size, bss_address, bss_size}); | ||
| 368 | 371 | ||
| 369 | IPC::ResponseBuilder rb{ctx, 4}; | 372 | IPC::ResponseBuilder rb{ctx, 4}; |
| 370 | rb.Push(RESULT_SUCCESS); | 373 | rb.Push(RESULT_SUCCESS); |
| @@ -409,9 +412,23 @@ public: | |||
| 409 | } | 412 | } |
| 410 | 413 | ||
| 411 | auto& vm_manager = Core::CurrentProcess()->VMManager(); | 414 | auto& vm_manager = Core::CurrentProcess()->VMManager(); |
| 412 | const auto& nro_size = iter->second.size; | 415 | const auto& nro_info = iter->second; |
| 413 | 416 | ||
| 414 | ASSERT(vm_manager.UnmapRange(nro_address, nro_size).IsSuccess()); | 417 | // Unmap the mirrored memory |
| 418 | ASSERT( | ||
| 419 | vm_manager.UnmapRange(nro_address, nro_info.nro_size + nro_info.bss_size).IsSuccess()); | ||
| 420 | |||
| 421 | // Reprotect the source memory | ||
| 422 | ASSERT(vm_manager | ||
| 423 | .ReprotectRange(nro_info.nro_address, nro_info.nro_size, | ||
| 424 | Kernel::VMAPermission::ReadWrite) | ||
| 425 | .IsSuccess()); | ||
| 426 | if (nro_info.bss_size > 0) { | ||
| 427 | ASSERT(vm_manager | ||
| 428 | .ReprotectRange(nro_info.bss_address, nro_info.bss_size, | ||
| 429 | Kernel::VMAPermission::ReadWrite) | ||
| 430 | .IsSuccess()); | ||
| 431 | } | ||
| 415 | 432 | ||
| 416 | Core::System::GetInstance().InvalidateCpuInstructionCaches(); | 433 | Core::System::GetInstance().InvalidateCpuInstructionCaches(); |
| 417 | 434 | ||
| @@ -473,7 +490,10 @@ private: | |||
| 473 | 490 | ||
| 474 | struct NROInfo { | 491 | struct NROInfo { |
| 475 | SHA256Hash hash; | 492 | SHA256Hash hash; |
| 476 | u64 size; | 493 | VAddr nro_address; |
| 494 | u64 nro_size; | ||
| 495 | VAddr bss_address; | ||
| 496 | u64 bss_size; | ||
| 477 | }; | 497 | }; |
| 478 | 498 | ||
| 479 | bool initialized = false; | 499 | bool initialized = false; |
diff --git a/src/core/hle/service/mii/mii.cpp b/src/core/hle/service/mii/mii.cpp index ce84e25ed..0b3923ad9 100644 --- a/src/core/hle/service/mii/mii.cpp +++ b/src/core/hle/service/mii/mii.cpp | |||
| @@ -48,7 +48,7 @@ public: | |||
| 48 | {19, nullptr, "Export"}, | 48 | {19, nullptr, "Export"}, |
| 49 | {20, nullptr, "IsBrokenDatabaseWithClearFlag"}, | 49 | {20, nullptr, "IsBrokenDatabaseWithClearFlag"}, |
| 50 | {21, &IDatabaseService::GetIndex, "GetIndex"}, | 50 | {21, &IDatabaseService::GetIndex, "GetIndex"}, |
| 51 | {22, nullptr, "SetInterfaceVersion"}, | 51 | {22, &IDatabaseService::SetInterfaceVersion, "SetInterfaceVersion"}, |
| 52 | {23, nullptr, "Convert"}, | 52 | {23, nullptr, "Convert"}, |
| 53 | }; | 53 | }; |
| 54 | // clang-format on | 54 | // clang-format on |
| @@ -350,8 +350,22 @@ private: | |||
| 350 | rb.Push(index); | 350 | rb.Push(index); |
| 351 | } | 351 | } |
| 352 | 352 | ||
| 353 | void SetInterfaceVersion(Kernel::HLERequestContext& ctx) { | ||
| 354 | IPC::RequestParser rp{ctx}; | ||
| 355 | current_interface_version = rp.PopRaw<u32>(); | ||
| 356 | |||
| 357 | LOG_DEBUG(Service_Mii, "called, interface_version={:08X}", current_interface_version); | ||
| 358 | |||
| 359 | UNIMPLEMENTED_IF(current_interface_version != 1); | ||
| 360 | |||
| 361 | IPC::ResponseBuilder rb{ctx, 2}; | ||
| 362 | rb.Push(RESULT_SUCCESS); | ||
| 363 | } | ||
| 364 | |||
| 353 | MiiManager db; | 365 | MiiManager db; |
| 354 | 366 | ||
| 367 | u32 current_interface_version = 0; | ||
| 368 | |||
| 355 | // Last read offsets of Get functions | 369 | // Last read offsets of Get functions |
| 356 | std::array<u32, 4> offsets{}; | 370 | std::array<u32, 4> offsets{}; |
| 357 | }; | 371 | }; |
diff --git a/src/core/hle/service/mii/mii_manager.cpp b/src/core/hle/service/mii/mii_manager.cpp index 131b01d62..8d0353075 100644 --- a/src/core/hle/service/mii/mii_manager.cpp +++ b/src/core/hle/service/mii/mii_manager.cpp | |||
| @@ -175,6 +175,10 @@ MiiStoreData ConvertInfoToStoreData(const MiiInfo& info) { | |||
| 175 | } // namespace | 175 | } // namespace |
| 176 | 176 | ||
| 177 | std::ostream& operator<<(std::ostream& os, Source source) { | 177 | std::ostream& operator<<(std::ostream& os, Source source) { |
| 178 | if (static_cast<std::size_t>(source) >= SOURCE_NAMES.size()) { | ||
| 179 | return os << "[UNKNOWN SOURCE]"; | ||
| 180 | } | ||
| 181 | |||
| 178 | os << SOURCE_NAMES.at(static_cast<std::size_t>(source)); | 182 | os << SOURCE_NAMES.at(static_cast<std::size_t>(source)); |
| 179 | return os; | 183 | return os; |
| 180 | } | 184 | } |
diff --git a/src/core/hle/service/ns/pl_u.cpp b/src/core/hle/service/ns/pl_u.cpp index ad176f89d..2a522136d 100644 --- a/src/core/hle/service/ns/pl_u.cpp +++ b/src/core/hle/service/ns/pl_u.cpp | |||
| @@ -77,7 +77,7 @@ enum class LoadState : u32 { | |||
| 77 | Done = 1, | 77 | Done = 1, |
| 78 | }; | 78 | }; |
| 79 | 79 | ||
| 80 | static void DecryptSharedFont(const std::vector<u32>& input, std::vector<u8>& output, | 80 | static void DecryptSharedFont(const std::vector<u32>& input, Kernel::PhysicalMemory& output, |
| 81 | std::size_t& offset) { | 81 | std::size_t& offset) { |
| 82 | ASSERT_MSG(offset + (input.size() * sizeof(u32)) < SHARED_FONT_MEM_SIZE, | 82 | ASSERT_MSG(offset + (input.size() * sizeof(u32)) < SHARED_FONT_MEM_SIZE, |
| 83 | "Shared fonts exceeds 17mb!"); | 83 | "Shared fonts exceeds 17mb!"); |
| @@ -94,7 +94,7 @@ static void DecryptSharedFont(const std::vector<u32>& input, std::vector<u8>& ou | |||
| 94 | offset += transformed_font.size() * sizeof(u32); | 94 | offset += transformed_font.size() * sizeof(u32); |
| 95 | } | 95 | } |
| 96 | 96 | ||
| 97 | static void EncryptSharedFont(const std::vector<u8>& input, std::vector<u8>& output, | 97 | static void EncryptSharedFont(const std::vector<u8>& input, Kernel::PhysicalMemory& output, |
| 98 | std::size_t& offset) { | 98 | std::size_t& offset) { |
| 99 | ASSERT_MSG(offset + input.size() + 8 < SHARED_FONT_MEM_SIZE, "Shared fonts exceeds 17mb!"); | 99 | ASSERT_MSG(offset + input.size() + 8 < SHARED_FONT_MEM_SIZE, "Shared fonts exceeds 17mb!"); |
| 100 | const u32 KEY = EXPECTED_MAGIC ^ EXPECTED_RESULT; | 100 | const u32 KEY = EXPECTED_MAGIC ^ EXPECTED_RESULT; |
| @@ -121,7 +121,7 @@ struct PL_U::Impl { | |||
| 121 | return shared_font_regions.at(index); | 121 | return shared_font_regions.at(index); |
| 122 | } | 122 | } |
| 123 | 123 | ||
| 124 | void BuildSharedFontsRawRegions(const std::vector<u8>& input) { | 124 | void BuildSharedFontsRawRegions(const Kernel::PhysicalMemory& input) { |
| 125 | // As we can derive the xor key we can just populate the offsets | 125 | // As we can derive the xor key we can just populate the offsets |
| 126 | // based on the shared memory dump | 126 | // based on the shared memory dump |
| 127 | unsigned cur_offset = 0; | 127 | unsigned cur_offset = 0; |
| @@ -144,7 +144,7 @@ struct PL_U::Impl { | |||
| 144 | Kernel::SharedPtr<Kernel::SharedMemory> shared_font_mem; | 144 | Kernel::SharedPtr<Kernel::SharedMemory> shared_font_mem; |
| 145 | 145 | ||
| 146 | /// Backing memory for the shared font data | 146 | /// Backing memory for the shared font data |
| 147 | std::shared_ptr<std::vector<u8>> shared_font; | 147 | std::shared_ptr<Kernel::PhysicalMemory> shared_font; |
| 148 | 148 | ||
| 149 | // Automatically populated based on shared_fonts dump or system archives. | 149 | // Automatically populated based on shared_fonts dump or system archives. |
| 150 | std::vector<FontRegion> shared_font_regions; | 150 | std::vector<FontRegion> shared_font_regions; |
| @@ -166,7 +166,7 @@ PL_U::PL_U() : ServiceFramework("pl:u"), impl{std::make_unique<Impl>()} { | |||
| 166 | // Rebuild shared fonts from data ncas | 166 | // Rebuild shared fonts from data ncas |
| 167 | if (nand->HasEntry(static_cast<u64>(FontArchives::Standard), | 167 | if (nand->HasEntry(static_cast<u64>(FontArchives::Standard), |
| 168 | FileSys::ContentRecordType::Data)) { | 168 | FileSys::ContentRecordType::Data)) { |
| 169 | impl->shared_font = std::make_shared<std::vector<u8>>(SHARED_FONT_MEM_SIZE); | 169 | impl->shared_font = std::make_shared<Kernel::PhysicalMemory>(SHARED_FONT_MEM_SIZE); |
| 170 | for (auto font : SHARED_FONTS) { | 170 | for (auto font : SHARED_FONTS) { |
| 171 | const auto nca = | 171 | const auto nca = |
| 172 | nand->GetEntry(static_cast<u64>(font.first), FileSys::ContentRecordType::Data); | 172 | nand->GetEntry(static_cast<u64>(font.first), FileSys::ContentRecordType::Data); |
| @@ -207,7 +207,7 @@ PL_U::PL_U() : ServiceFramework("pl:u"), impl{std::make_unique<Impl>()} { | |||
| 207 | } | 207 | } |
| 208 | 208 | ||
| 209 | } else { | 209 | } else { |
| 210 | impl->shared_font = std::make_shared<std::vector<u8>>( | 210 | impl->shared_font = std::make_shared<Kernel::PhysicalMemory>( |
| 211 | SHARED_FONT_MEM_SIZE); // Shared memory needs to always be allocated and a fixed size | 211 | SHARED_FONT_MEM_SIZE); // Shared memory needs to always be allocated and a fixed size |
| 212 | 212 | ||
| 213 | const std::string user_path = FileUtil::GetUserPath(FileUtil::UserPath::SysDataDir); | 213 | const std::string user_path = FileUtil::GetUserPath(FileUtil::UserPath::SysDataDir); |
diff --git a/src/core/hle/service/nvdrv/devices/nvdevice.h b/src/core/hle/service/nvdrv/devices/nvdevice.h index 4f6042b00..5b8248433 100644 --- a/src/core/hle/service/nvdrv/devices/nvdevice.h +++ b/src/core/hle/service/nvdrv/devices/nvdevice.h | |||
| @@ -8,6 +8,11 @@ | |||
| 8 | #include "common/bit_field.h" | 8 | #include "common/bit_field.h" |
| 9 | #include "common/common_types.h" | 9 | #include "common/common_types.h" |
| 10 | #include "common/swap.h" | 10 | #include "common/swap.h" |
| 11 | #include "core/hle/service/nvdrv/nvdata.h" | ||
| 12 | |||
| 13 | namespace Core { | ||
| 14 | class System; | ||
| 15 | } | ||
| 11 | 16 | ||
| 12 | namespace Service::Nvidia::Devices { | 17 | namespace Service::Nvidia::Devices { |
| 13 | 18 | ||
| @@ -15,7 +20,7 @@ namespace Service::Nvidia::Devices { | |||
| 15 | /// implement the ioctl interface. | 20 | /// implement the ioctl interface. |
| 16 | class nvdevice { | 21 | class nvdevice { |
| 17 | public: | 22 | public: |
| 18 | nvdevice() = default; | 23 | explicit nvdevice(Core::System& system) : system{system} {}; |
| 19 | virtual ~nvdevice() = default; | 24 | virtual ~nvdevice() = default; |
| 20 | union Ioctl { | 25 | union Ioctl { |
| 21 | u32_le raw; | 26 | u32_le raw; |
| @@ -33,7 +38,11 @@ public: | |||
| 33 | * @param output A buffer where the output data will be written to. | 38 | * @param output A buffer where the output data will be written to. |
| 34 | * @returns The result code of the ioctl. | 39 | * @returns The result code of the ioctl. |
| 35 | */ | 40 | */ |
| 36 | virtual u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) = 0; | 41 | virtual u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, |
| 42 | IoctlCtrl& ctrl) = 0; | ||
| 43 | |||
| 44 | protected: | ||
| 45 | Core::System& system; | ||
| 37 | }; | 46 | }; |
| 38 | 47 | ||
| 39 | } // namespace Service::Nvidia::Devices | 48 | } // namespace Service::Nvidia::Devices |
diff --git a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp index 20c7c39aa..926a1285d 100644 --- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp +++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp | |||
| @@ -13,10 +13,12 @@ | |||
| 13 | 13 | ||
| 14 | namespace Service::Nvidia::Devices { | 14 | namespace Service::Nvidia::Devices { |
| 15 | 15 | ||
| 16 | nvdisp_disp0::nvdisp_disp0(std::shared_ptr<nvmap> nvmap_dev) : nvmap_dev(std::move(nvmap_dev)) {} | 16 | nvdisp_disp0::nvdisp_disp0(Core::System& system, std::shared_ptr<nvmap> nvmap_dev) |
| 17 | : nvdevice(system), nvmap_dev(std::move(nvmap_dev)) {} | ||
| 17 | nvdisp_disp0 ::~nvdisp_disp0() = default; | 18 | nvdisp_disp0 ::~nvdisp_disp0() = default; |
| 18 | 19 | ||
| 19 | u32 nvdisp_disp0::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) { | 20 | u32 nvdisp_disp0::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, |
| 21 | IoctlCtrl& ctrl) { | ||
| 20 | UNIMPLEMENTED_MSG("Unimplemented ioctl"); | 22 | UNIMPLEMENTED_MSG("Unimplemented ioctl"); |
| 21 | return 0; | 23 | return 0; |
| 22 | } | 24 | } |
| @@ -34,9 +36,8 @@ void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u3 | |||
| 34 | addr, offset, width, height, stride, static_cast<PixelFormat>(format), | 36 | addr, offset, width, height, stride, static_cast<PixelFormat>(format), |
| 35 | transform, crop_rect}; | 37 | transform, crop_rect}; |
| 36 | 38 | ||
| 37 | auto& instance = Core::System::GetInstance(); | 39 | system.GetPerfStats().EndGameFrame(); |
| 38 | instance.GetPerfStats().EndGameFrame(); | 40 | system.GPU().SwapBuffers(&framebuffer); |
| 39 | instance.GPU().SwapBuffers(framebuffer); | ||
| 40 | } | 41 | } |
| 41 | 42 | ||
| 42 | } // namespace Service::Nvidia::Devices | 43 | } // namespace Service::Nvidia::Devices |
diff --git a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h index 12f3ef825..e79e490ff 100644 --- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h +++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h | |||
| @@ -17,10 +17,11 @@ class nvmap; | |||
| 17 | 17 | ||
| 18 | class nvdisp_disp0 final : public nvdevice { | 18 | class nvdisp_disp0 final : public nvdevice { |
| 19 | public: | 19 | public: |
| 20 | explicit nvdisp_disp0(std::shared_ptr<nvmap> nvmap_dev); | 20 | explicit nvdisp_disp0(Core::System& system, std::shared_ptr<nvmap> nvmap_dev); |
| 21 | ~nvdisp_disp0() override; | 21 | ~nvdisp_disp0() override; |
| 22 | 22 | ||
| 23 | u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override; | 23 | u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, |
| 24 | IoctlCtrl& ctrl) override; | ||
| 24 | 25 | ||
| 25 | /// Performs a screen flip, drawing the buffer pointed to by the handle. | 26 | /// Performs a screen flip, drawing the buffer pointed to by the handle. |
| 26 | void flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u32 height, u32 stride, | 27 | void flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u32 height, u32 stride, |
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp index af62d33d2..24ab3f2e9 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp | |||
| @@ -22,10 +22,12 @@ enum { | |||
| 22 | }; | 22 | }; |
| 23 | } | 23 | } |
| 24 | 24 | ||
| 25 | nvhost_as_gpu::nvhost_as_gpu(std::shared_ptr<nvmap> nvmap_dev) : nvmap_dev(std::move(nvmap_dev)) {} | 25 | nvhost_as_gpu::nvhost_as_gpu(Core::System& system, std::shared_ptr<nvmap> nvmap_dev) |
| 26 | : nvdevice(system), nvmap_dev(std::move(nvmap_dev)) {} | ||
| 26 | nvhost_as_gpu::~nvhost_as_gpu() = default; | 27 | nvhost_as_gpu::~nvhost_as_gpu() = default; |
| 27 | 28 | ||
| 28 | u32 nvhost_as_gpu::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) { | 29 | u32 nvhost_as_gpu::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, |
| 30 | IoctlCtrl& ctrl) { | ||
| 29 | LOG_DEBUG(Service_NVDRV, "called, command=0x{:08X}, input_size=0x{:X}, output_size=0x{:X}", | 31 | LOG_DEBUG(Service_NVDRV, "called, command=0x{:08X}, input_size=0x{:X}, output_size=0x{:X}", |
| 30 | command.raw, input.size(), output.size()); | 32 | command.raw, input.size(), output.size()); |
| 31 | 33 | ||
| @@ -65,7 +67,7 @@ u32 nvhost_as_gpu::AllocateSpace(const std::vector<u8>& input, std::vector<u8>& | |||
| 65 | LOG_DEBUG(Service_NVDRV, "called, pages={:X}, page_size={:X}, flags={:X}", params.pages, | 67 | LOG_DEBUG(Service_NVDRV, "called, pages={:X}, page_size={:X}, flags={:X}", params.pages, |
| 66 | params.page_size, params.flags); | 68 | params.page_size, params.flags); |
| 67 | 69 | ||
| 68 | auto& gpu = Core::System::GetInstance().GPU(); | 70 | auto& gpu = system.GPU(); |
| 69 | const u64 size{static_cast<u64>(params.pages) * static_cast<u64>(params.page_size)}; | 71 | const u64 size{static_cast<u64>(params.pages) * static_cast<u64>(params.page_size)}; |
| 70 | if (params.flags & 1) { | 72 | if (params.flags & 1) { |
| 71 | params.offset = gpu.MemoryManager().AllocateSpace(params.offset, size, 1); | 73 | params.offset = gpu.MemoryManager().AllocateSpace(params.offset, size, 1); |
| @@ -85,7 +87,7 @@ u32 nvhost_as_gpu::Remap(const std::vector<u8>& input, std::vector<u8>& output) | |||
| 85 | std::vector<IoctlRemapEntry> entries(num_entries); | 87 | std::vector<IoctlRemapEntry> entries(num_entries); |
| 86 | std::memcpy(entries.data(), input.data(), input.size()); | 88 | std::memcpy(entries.data(), input.data(), input.size()); |
| 87 | 89 | ||
| 88 | auto& gpu = Core::System::GetInstance().GPU(); | 90 | auto& gpu = system.GPU(); |
| 89 | for (const auto& entry : entries) { | 91 | for (const auto& entry : entries) { |
| 90 | LOG_WARNING(Service_NVDRV, "remap entry, offset=0x{:X} handle=0x{:X} pages=0x{:X}", | 92 | LOG_WARNING(Service_NVDRV, "remap entry, offset=0x{:X} handle=0x{:X} pages=0x{:X}", |
| 91 | entry.offset, entry.nvmap_handle, entry.pages); | 93 | entry.offset, entry.nvmap_handle, entry.pages); |
| @@ -136,7 +138,7 @@ u32 nvhost_as_gpu::MapBufferEx(const std::vector<u8>& input, std::vector<u8>& ou | |||
| 136 | // case to prevent unexpected behavior. | 138 | // case to prevent unexpected behavior. |
| 137 | ASSERT(object->id == params.nvmap_handle); | 139 | ASSERT(object->id == params.nvmap_handle); |
| 138 | 140 | ||
| 139 | auto& gpu = Core::System::GetInstance().GPU(); | 141 | auto& gpu = system.GPU(); |
| 140 | 142 | ||
| 141 | if (params.flags & 1) { | 143 | if (params.flags & 1) { |
| 142 | params.offset = gpu.MemoryManager().MapBufferEx(object->addr, params.offset, object->size); | 144 | params.offset = gpu.MemoryManager().MapBufferEx(object->addr, params.offset, object->size); |
| @@ -173,8 +175,7 @@ u32 nvhost_as_gpu::UnmapBuffer(const std::vector<u8>& input, std::vector<u8>& ou | |||
| 173 | return 0; | 175 | return 0; |
| 174 | } | 176 | } |
| 175 | 177 | ||
| 176 | params.offset = Core::System::GetInstance().GPU().MemoryManager().UnmapBuffer(params.offset, | 178 | params.offset = system.GPU().MemoryManager().UnmapBuffer(params.offset, itr->second.size); |
| 177 | itr->second.size); | ||
| 178 | buffer_mappings.erase(itr->second.offset); | 179 | buffer_mappings.erase(itr->second.offset); |
| 179 | 180 | ||
| 180 | std::memcpy(output.data(), ¶ms, output.size()); | 181 | std::memcpy(output.data(), ¶ms, output.size()); |
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h index eb14b1da8..30ca5f4c3 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h | |||
| @@ -17,10 +17,11 @@ class nvmap; | |||
| 17 | 17 | ||
| 18 | class nvhost_as_gpu final : public nvdevice { | 18 | class nvhost_as_gpu final : public nvdevice { |
| 19 | public: | 19 | public: |
| 20 | explicit nvhost_as_gpu(std::shared_ptr<nvmap> nvmap_dev); | 20 | explicit nvhost_as_gpu(Core::System& system, std::shared_ptr<nvmap> nvmap_dev); |
| 21 | ~nvhost_as_gpu() override; | 21 | ~nvhost_as_gpu() override; |
| 22 | 22 | ||
| 23 | u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override; | 23 | u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, |
| 24 | IoctlCtrl& ctrl) override; | ||
| 24 | 25 | ||
| 25 | private: | 26 | private: |
| 26 | enum class IoctlCommand : u32_le { | 27 | enum class IoctlCommand : u32_le { |
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp index b39fb9ef9..9a66a5f88 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp | |||
| @@ -7,14 +7,20 @@ | |||
| 7 | 7 | ||
| 8 | #include "common/assert.h" | 8 | #include "common/assert.h" |
| 9 | #include "common/logging/log.h" | 9 | #include "common/logging/log.h" |
| 10 | #include "core/core.h" | ||
| 11 | #include "core/hle/kernel/readable_event.h" | ||
| 12 | #include "core/hle/kernel/writable_event.h" | ||
| 10 | #include "core/hle/service/nvdrv/devices/nvhost_ctrl.h" | 13 | #include "core/hle/service/nvdrv/devices/nvhost_ctrl.h" |
| 14 | #include "video_core/gpu.h" | ||
| 11 | 15 | ||
| 12 | namespace Service::Nvidia::Devices { | 16 | namespace Service::Nvidia::Devices { |
| 13 | 17 | ||
| 14 | nvhost_ctrl::nvhost_ctrl() = default; | 18 | nvhost_ctrl::nvhost_ctrl(Core::System& system, EventInterface& events_interface) |
| 19 | : nvdevice(system), events_interface{events_interface} {} | ||
| 15 | nvhost_ctrl::~nvhost_ctrl() = default; | 20 | nvhost_ctrl::~nvhost_ctrl() = default; |
| 16 | 21 | ||
| 17 | u32 nvhost_ctrl::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) { | 22 | u32 nvhost_ctrl::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, |
| 23 | IoctlCtrl& ctrl) { | ||
| 18 | LOG_DEBUG(Service_NVDRV, "called, command=0x{:08X}, input_size=0x{:X}, output_size=0x{:X}", | 24 | LOG_DEBUG(Service_NVDRV, "called, command=0x{:08X}, input_size=0x{:X}, output_size=0x{:X}", |
| 19 | command.raw, input.size(), output.size()); | 25 | command.raw, input.size(), output.size()); |
| 20 | 26 | ||
| @@ -22,11 +28,15 @@ u32 nvhost_ctrl::ioctl(Ioctl command, const std::vector<u8>& input, std::vector< | |||
| 22 | case IoctlCommand::IocGetConfigCommand: | 28 | case IoctlCommand::IocGetConfigCommand: |
| 23 | return NvOsGetConfigU32(input, output); | 29 | return NvOsGetConfigU32(input, output); |
| 24 | case IoctlCommand::IocCtrlEventWaitCommand: | 30 | case IoctlCommand::IocCtrlEventWaitCommand: |
| 25 | return IocCtrlEventWait(input, output, false); | 31 | return IocCtrlEventWait(input, output, false, ctrl); |
| 26 | case IoctlCommand::IocCtrlEventWaitAsyncCommand: | 32 | case IoctlCommand::IocCtrlEventWaitAsyncCommand: |
| 27 | return IocCtrlEventWait(input, output, true); | 33 | return IocCtrlEventWait(input, output, true, ctrl); |
| 28 | case IoctlCommand::IocCtrlEventRegisterCommand: | 34 | case IoctlCommand::IocCtrlEventRegisterCommand: |
| 29 | return IocCtrlEventRegister(input, output); | 35 | return IocCtrlEventRegister(input, output); |
| 36 | case IoctlCommand::IocCtrlEventUnregisterCommand: | ||
| 37 | return IocCtrlEventUnregister(input, output); | ||
| 38 | case IoctlCommand::IocCtrlEventSignalCommand: | ||
| 39 | return IocCtrlEventSignal(input, output); | ||
| 30 | } | 40 | } |
| 31 | UNIMPLEMENTED_MSG("Unimplemented ioctl"); | 41 | UNIMPLEMENTED_MSG("Unimplemented ioctl"); |
| 32 | return 0; | 42 | return 0; |
| @@ -41,23 +51,137 @@ u32 nvhost_ctrl::NvOsGetConfigU32(const std::vector<u8>& input, std::vector<u8>& | |||
| 41 | } | 51 | } |
| 42 | 52 | ||
| 43 | u32 nvhost_ctrl::IocCtrlEventWait(const std::vector<u8>& input, std::vector<u8>& output, | 53 | u32 nvhost_ctrl::IocCtrlEventWait(const std::vector<u8>& input, std::vector<u8>& output, |
| 44 | bool is_async) { | 54 | bool is_async, IoctlCtrl& ctrl) { |
| 45 | IocCtrlEventWaitParams params{}; | 55 | IocCtrlEventWaitParams params{}; |
| 46 | std::memcpy(¶ms, input.data(), sizeof(params)); | 56 | std::memcpy(¶ms, input.data(), sizeof(params)); |
| 47 | LOG_WARNING(Service_NVDRV, | 57 | LOG_DEBUG(Service_NVDRV, "syncpt_id={}, threshold={}, timeout={}, is_async={}", |
| 48 | "(STUBBED) called, syncpt_id={}, threshold={}, timeout={}, is_async={}", | 58 | params.syncpt_id, params.threshold, params.timeout, is_async); |
| 49 | params.syncpt_id, params.threshold, params.timeout, is_async); | ||
| 50 | 59 | ||
| 51 | // TODO(Subv): Implement actual syncpt waiting. | 60 | if (params.syncpt_id >= MaxSyncPoints) { |
| 52 | params.value = 0; | 61 | return NvResult::BadParameter; |
| 62 | } | ||
| 63 | |||
| 64 | auto& gpu = system.GPU(); | ||
| 65 | // This is mostly to take into account unimplemented features. As synced | ||
| 66 | // gpu is always synced. | ||
| 67 | if (!gpu.IsAsync()) { | ||
| 68 | return NvResult::Success; | ||
| 69 | } | ||
| 70 | auto lock = gpu.LockSync(); | ||
| 71 | const u32 current_syncpoint_value = gpu.GetSyncpointValue(params.syncpt_id); | ||
| 72 | const s32 diff = current_syncpoint_value - params.threshold; | ||
| 73 | if (diff >= 0) { | ||
| 74 | params.value = current_syncpoint_value; | ||
| 75 | std::memcpy(output.data(), ¶ms, sizeof(params)); | ||
| 76 | return NvResult::Success; | ||
| 77 | } | ||
| 78 | const u32 target_value = current_syncpoint_value - diff; | ||
| 79 | |||
| 80 | if (!is_async) { | ||
| 81 | params.value = 0; | ||
| 82 | } | ||
| 83 | |||
| 84 | if (params.timeout == 0) { | ||
| 85 | std::memcpy(output.data(), ¶ms, sizeof(params)); | ||
| 86 | return NvResult::Timeout; | ||
| 87 | } | ||
| 88 | |||
| 89 | u32 event_id; | ||
| 90 | if (is_async) { | ||
| 91 | event_id = params.value & 0x00FF; | ||
| 92 | if (event_id >= MaxNvEvents) { | ||
| 93 | std::memcpy(output.data(), ¶ms, sizeof(params)); | ||
| 94 | return NvResult::BadParameter; | ||
| 95 | } | ||
| 96 | } else { | ||
| 97 | if (ctrl.fresh_call) { | ||
| 98 | const auto result = events_interface.GetFreeEvent(); | ||
| 99 | if (result) { | ||
| 100 | event_id = *result; | ||
| 101 | } else { | ||
| 102 | LOG_CRITICAL(Service_NVDRV, "No Free Events available!"); | ||
| 103 | event_id = params.value & 0x00FF; | ||
| 104 | } | ||
| 105 | } else { | ||
| 106 | event_id = ctrl.event_id; | ||
| 107 | } | ||
| 108 | } | ||
| 109 | |||
| 110 | EventState status = events_interface.status[event_id]; | ||
| 111 | if (event_id < MaxNvEvents || status == EventState::Free || status == EventState::Registered) { | ||
| 112 | events_interface.SetEventStatus(event_id, EventState::Waiting); | ||
| 113 | events_interface.assigned_syncpt[event_id] = params.syncpt_id; | ||
| 114 | events_interface.assigned_value[event_id] = target_value; | ||
| 115 | if (is_async) { | ||
| 116 | params.value = params.syncpt_id << 4; | ||
| 117 | } else { | ||
| 118 | params.value = ((params.syncpt_id & 0xfff) << 16) | 0x10000000; | ||
| 119 | } | ||
| 120 | params.value |= event_id; | ||
| 121 | events_interface.events[event_id].writable->Clear(); | ||
| 122 | gpu.RegisterSyncptInterrupt(params.syncpt_id, target_value); | ||
| 123 | if (!is_async && ctrl.fresh_call) { | ||
| 124 | ctrl.must_delay = true; | ||
| 125 | ctrl.timeout = params.timeout; | ||
| 126 | ctrl.event_id = event_id; | ||
| 127 | return NvResult::Timeout; | ||
| 128 | } | ||
| 129 | std::memcpy(output.data(), ¶ms, sizeof(params)); | ||
| 130 | return NvResult::Timeout; | ||
| 131 | } | ||
| 53 | std::memcpy(output.data(), ¶ms, sizeof(params)); | 132 | std::memcpy(output.data(), ¶ms, sizeof(params)); |
| 54 | return 0; | 133 | return NvResult::BadParameter; |
| 55 | } | 134 | } |
| 56 | 135 | ||
| 57 | u32 nvhost_ctrl::IocCtrlEventRegister(const std::vector<u8>& input, std::vector<u8>& output) { | 136 | u32 nvhost_ctrl::IocCtrlEventRegister(const std::vector<u8>& input, std::vector<u8>& output) { |
| 58 | LOG_WARNING(Service_NVDRV, "(STUBBED) called"); | 137 | IocCtrlEventRegisterParams params{}; |
| 59 | // TODO(bunnei): Implement this. | 138 | std::memcpy(¶ms, input.data(), sizeof(params)); |
| 60 | return 0; | 139 | const u32 event_id = params.user_event_id & 0x00FF; |
| 140 | LOG_DEBUG(Service_NVDRV, " called, user_event_id: {:X}", event_id); | ||
| 141 | if (event_id >= MaxNvEvents) { | ||
| 142 | return NvResult::BadParameter; | ||
| 143 | } | ||
| 144 | if (events_interface.registered[event_id]) { | ||
| 145 | return NvResult::BadParameter; | ||
| 146 | } | ||
| 147 | events_interface.RegisterEvent(event_id); | ||
| 148 | return NvResult::Success; | ||
| 149 | } | ||
| 150 | |||
| 151 | u32 nvhost_ctrl::IocCtrlEventUnregister(const std::vector<u8>& input, std::vector<u8>& output) { | ||
| 152 | IocCtrlEventUnregisterParams params{}; | ||
| 153 | std::memcpy(¶ms, input.data(), sizeof(params)); | ||
| 154 | const u32 event_id = params.user_event_id & 0x00FF; | ||
| 155 | LOG_DEBUG(Service_NVDRV, " called, user_event_id: {:X}", event_id); | ||
| 156 | if (event_id >= MaxNvEvents) { | ||
| 157 | return NvResult::BadParameter; | ||
| 158 | } | ||
| 159 | if (!events_interface.registered[event_id]) { | ||
| 160 | return NvResult::BadParameter; | ||
| 161 | } | ||
| 162 | events_interface.UnregisterEvent(event_id); | ||
| 163 | return NvResult::Success; | ||
| 164 | } | ||
| 165 | |||
| 166 | u32 nvhost_ctrl::IocCtrlEventSignal(const std::vector<u8>& input, std::vector<u8>& output) { | ||
| 167 | IocCtrlEventSignalParams params{}; | ||
| 168 | std::memcpy(¶ms, input.data(), sizeof(params)); | ||
| 169 | // TODO(Blinkhawk): This is normally called when an NvEvents timeout on WaitSynchronization | ||
| 170 | // It is believed from RE to cancel the GPU Event. However, better research is required | ||
| 171 | u32 event_id = params.user_event_id & 0x00FF; | ||
| 172 | LOG_WARNING(Service_NVDRV, "(STUBBED) called, user_event_id: {:X}", event_id); | ||
| 173 | if (event_id >= MaxNvEvents) { | ||
| 174 | return NvResult::BadParameter; | ||
| 175 | } | ||
| 176 | if (events_interface.status[event_id] == EventState::Waiting) { | ||
| 177 | auto& gpu = system.GPU(); | ||
| 178 | if (gpu.CancelSyncptInterrupt(events_interface.assigned_syncpt[event_id], | ||
| 179 | events_interface.assigned_value[event_id])) { | ||
| 180 | events_interface.LiberateEvent(event_id); | ||
| 181 | events_interface.events[event_id].writable->Signal(); | ||
| 182 | } | ||
| 183 | } | ||
| 184 | return NvResult::Success; | ||
| 61 | } | 185 | } |
| 62 | 186 | ||
| 63 | } // namespace Service::Nvidia::Devices | 187 | } // namespace Service::Nvidia::Devices |
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h index 6d0de2212..14e6e7e57 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h | |||
| @@ -8,15 +8,17 @@ | |||
| 8 | #include <vector> | 8 | #include <vector> |
| 9 | #include "common/common_types.h" | 9 | #include "common/common_types.h" |
| 10 | #include "core/hle/service/nvdrv/devices/nvdevice.h" | 10 | #include "core/hle/service/nvdrv/devices/nvdevice.h" |
| 11 | #include "core/hle/service/nvdrv/nvdrv.h" | ||
| 11 | 12 | ||
| 12 | namespace Service::Nvidia::Devices { | 13 | namespace Service::Nvidia::Devices { |
| 13 | 14 | ||
| 14 | class nvhost_ctrl final : public nvdevice { | 15 | class nvhost_ctrl final : public nvdevice { |
| 15 | public: | 16 | public: |
| 16 | nvhost_ctrl(); | 17 | explicit nvhost_ctrl(Core::System& system, EventInterface& events_interface); |
| 17 | ~nvhost_ctrl() override; | 18 | ~nvhost_ctrl() override; |
| 18 | 19 | ||
| 19 | u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override; | 20 | u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, |
| 21 | IoctlCtrl& ctrl) override; | ||
| 20 | 22 | ||
| 21 | private: | 23 | private: |
| 22 | enum class IoctlCommand : u32_le { | 24 | enum class IoctlCommand : u32_le { |
| @@ -132,9 +134,16 @@ private: | |||
| 132 | 134 | ||
| 133 | u32 NvOsGetConfigU32(const std::vector<u8>& input, std::vector<u8>& output); | 135 | u32 NvOsGetConfigU32(const std::vector<u8>& input, std::vector<u8>& output); |
| 134 | 136 | ||
| 135 | u32 IocCtrlEventWait(const std::vector<u8>& input, std::vector<u8>& output, bool is_async); | 137 | u32 IocCtrlEventWait(const std::vector<u8>& input, std::vector<u8>& output, bool is_async, |
| 138 | IoctlCtrl& ctrl); | ||
| 136 | 139 | ||
| 137 | u32 IocCtrlEventRegister(const std::vector<u8>& input, std::vector<u8>& output); | 140 | u32 IocCtrlEventRegister(const std::vector<u8>& input, std::vector<u8>& output); |
| 141 | |||
| 142 | u32 IocCtrlEventUnregister(const std::vector<u8>& input, std::vector<u8>& output); | ||
| 143 | |||
| 144 | u32 IocCtrlEventSignal(const std::vector<u8>& input, std::vector<u8>& output); | ||
| 145 | |||
| 146 | EventInterface& events_interface; | ||
| 138 | }; | 147 | }; |
| 139 | 148 | ||
| 140 | } // namespace Service::Nvidia::Devices | 149 | } // namespace Service::Nvidia::Devices |
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp index 0e28755bd..988effd90 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp | |||
| @@ -12,10 +12,11 @@ | |||
| 12 | 12 | ||
| 13 | namespace Service::Nvidia::Devices { | 13 | namespace Service::Nvidia::Devices { |
| 14 | 14 | ||
| 15 | nvhost_ctrl_gpu::nvhost_ctrl_gpu() = default; | 15 | nvhost_ctrl_gpu::nvhost_ctrl_gpu(Core::System& system) : nvdevice(system) {} |
| 16 | nvhost_ctrl_gpu::~nvhost_ctrl_gpu() = default; | 16 | nvhost_ctrl_gpu::~nvhost_ctrl_gpu() = default; |
| 17 | 17 | ||
| 18 | u32 nvhost_ctrl_gpu::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) { | 18 | u32 nvhost_ctrl_gpu::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, |
| 19 | IoctlCtrl& ctrl) { | ||
| 19 | LOG_DEBUG(Service_NVDRV, "called, command=0x{:08X}, input_size=0x{:X}, output_size=0x{:X}", | 20 | LOG_DEBUG(Service_NVDRV, "called, command=0x{:08X}, input_size=0x{:X}, output_size=0x{:X}", |
| 20 | command.raw, input.size(), output.size()); | 21 | command.raw, input.size(), output.size()); |
| 21 | 22 | ||
| @@ -185,7 +186,7 @@ u32 nvhost_ctrl_gpu::GetGpuTime(const std::vector<u8>& input, std::vector<u8>& o | |||
| 185 | 186 | ||
| 186 | IoctlGetGpuTime params{}; | 187 | IoctlGetGpuTime params{}; |
| 187 | std::memcpy(¶ms, input.data(), input.size()); | 188 | std::memcpy(¶ms, input.data(), input.size()); |
| 188 | const auto ns = Core::Timing::CyclesToNs(Core::System::GetInstance().CoreTiming().GetTicks()); | 189 | const auto ns = Core::Timing::CyclesToNs(system.CoreTiming().GetTicks()); |
| 189 | params.gpu_time = static_cast<u64_le>(ns.count()); | 190 | params.gpu_time = static_cast<u64_le>(ns.count()); |
| 190 | std::memcpy(output.data(), ¶ms, output.size()); | 191 | std::memcpy(output.data(), ¶ms, output.size()); |
| 191 | return 0; | 192 | return 0; |
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h index 240435eea..2b035ae3f 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h | |||
| @@ -13,10 +13,11 @@ namespace Service::Nvidia::Devices { | |||
| 13 | 13 | ||
| 14 | class nvhost_ctrl_gpu final : public nvdevice { | 14 | class nvhost_ctrl_gpu final : public nvdevice { |
| 15 | public: | 15 | public: |
| 16 | nvhost_ctrl_gpu(); | 16 | explicit nvhost_ctrl_gpu(Core::System& system); |
| 17 | ~nvhost_ctrl_gpu() override; | 17 | ~nvhost_ctrl_gpu() override; |
| 18 | 18 | ||
| 19 | u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override; | 19 | u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, |
| 20 | IoctlCtrl& ctrl) override; | ||
| 20 | 21 | ||
| 21 | private: | 22 | private: |
| 22 | enum class IoctlCommand : u32_le { | 23 | enum class IoctlCommand : u32_le { |
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp index 8ce7bc7a5..241dac881 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp | |||
| @@ -13,10 +13,12 @@ | |||
| 13 | 13 | ||
| 14 | namespace Service::Nvidia::Devices { | 14 | namespace Service::Nvidia::Devices { |
| 15 | 15 | ||
| 16 | nvhost_gpu::nvhost_gpu(std::shared_ptr<nvmap> nvmap_dev) : nvmap_dev(std::move(nvmap_dev)) {} | 16 | nvhost_gpu::nvhost_gpu(Core::System& system, std::shared_ptr<nvmap> nvmap_dev) |
| 17 | : nvdevice(system), nvmap_dev(std::move(nvmap_dev)) {} | ||
| 17 | nvhost_gpu::~nvhost_gpu() = default; | 18 | nvhost_gpu::~nvhost_gpu() = default; |
| 18 | 19 | ||
| 19 | u32 nvhost_gpu::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) { | 20 | u32 nvhost_gpu::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, |
| 21 | IoctlCtrl& ctrl) { | ||
| 20 | LOG_DEBUG(Service_NVDRV, "called, command=0x{:08X}, input_size=0x{:X}, output_size=0x{:X}", | 22 | LOG_DEBUG(Service_NVDRV, "called, command=0x{:08X}, input_size=0x{:X}, output_size=0x{:X}", |
| 21 | command.raw, input.size(), output.size()); | 23 | command.raw, input.size(), output.size()); |
| 22 | 24 | ||
| @@ -119,8 +121,10 @@ u32 nvhost_gpu::AllocGPFIFOEx2(const std::vector<u8>& input, std::vector<u8>& ou | |||
| 119 | params.num_entries, params.flags, params.unk0, params.unk1, params.unk2, | 121 | params.num_entries, params.flags, params.unk0, params.unk1, params.unk2, |
| 120 | params.unk3); | 122 | params.unk3); |
| 121 | 123 | ||
| 122 | params.fence_out.id = 0; | 124 | auto& gpu = system.GPU(); |
| 123 | params.fence_out.value = 0; | 125 | params.fence_out.id = assigned_syncpoints; |
| 126 | params.fence_out.value = gpu.GetSyncpointValue(assigned_syncpoints); | ||
| 127 | assigned_syncpoints++; | ||
| 124 | std::memcpy(output.data(), ¶ms, output.size()); | 128 | std::memcpy(output.data(), ¶ms, output.size()); |
| 125 | return 0; | 129 | return 0; |
| 126 | } | 130 | } |
| @@ -143,7 +147,7 @@ u32 nvhost_gpu::SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& outp | |||
| 143 | IoctlSubmitGpfifo params{}; | 147 | IoctlSubmitGpfifo params{}; |
| 144 | std::memcpy(¶ms, input.data(), sizeof(IoctlSubmitGpfifo)); | 148 | std::memcpy(¶ms, input.data(), sizeof(IoctlSubmitGpfifo)); |
| 145 | LOG_WARNING(Service_NVDRV, "(STUBBED) called, gpfifo={:X}, num_entries={:X}, flags={:X}", | 149 | LOG_WARNING(Service_NVDRV, "(STUBBED) called, gpfifo={:X}, num_entries={:X}, flags={:X}", |
| 146 | params.address, params.num_entries, params.flags); | 150 | params.address, params.num_entries, params.flags.raw); |
| 147 | 151 | ||
| 148 | ASSERT_MSG(input.size() == sizeof(IoctlSubmitGpfifo) + | 152 | ASSERT_MSG(input.size() == sizeof(IoctlSubmitGpfifo) + |
| 149 | params.num_entries * sizeof(Tegra::CommandListHeader), | 153 | params.num_entries * sizeof(Tegra::CommandListHeader), |
| @@ -153,10 +157,18 @@ u32 nvhost_gpu::SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& outp | |||
| 153 | std::memcpy(entries.data(), &input[sizeof(IoctlSubmitGpfifo)], | 157 | std::memcpy(entries.data(), &input[sizeof(IoctlSubmitGpfifo)], |
| 154 | params.num_entries * sizeof(Tegra::CommandListHeader)); | 158 | params.num_entries * sizeof(Tegra::CommandListHeader)); |
| 155 | 159 | ||
| 156 | Core::System::GetInstance().GPU().PushGPUEntries(std::move(entries)); | 160 | UNIMPLEMENTED_IF(params.flags.add_wait.Value() != 0); |
| 161 | UNIMPLEMENTED_IF(params.flags.add_increment.Value() != 0); | ||
| 162 | |||
| 163 | auto& gpu = system.GPU(); | ||
| 164 | u32 current_syncpoint_value = gpu.GetSyncpointValue(params.fence_out.id); | ||
| 165 | if (params.flags.increment.Value()) { | ||
| 166 | params.fence_out.value += current_syncpoint_value; | ||
| 167 | } else { | ||
| 168 | params.fence_out.value = current_syncpoint_value; | ||
| 169 | } | ||
| 170 | gpu.PushGPUEntries(std::move(entries)); | ||
| 157 | 171 | ||
| 158 | params.fence_out.id = 0; | ||
| 159 | params.fence_out.value = 0; | ||
| 160 | std::memcpy(output.data(), ¶ms, sizeof(IoctlSubmitGpfifo)); | 172 | std::memcpy(output.data(), ¶ms, sizeof(IoctlSubmitGpfifo)); |
| 161 | return 0; | 173 | return 0; |
| 162 | } | 174 | } |
| @@ -168,16 +180,24 @@ u32 nvhost_gpu::KickoffPB(const std::vector<u8>& input, std::vector<u8>& output) | |||
| 168 | IoctlSubmitGpfifo params{}; | 180 | IoctlSubmitGpfifo params{}; |
| 169 | std::memcpy(¶ms, input.data(), sizeof(IoctlSubmitGpfifo)); | 181 | std::memcpy(¶ms, input.data(), sizeof(IoctlSubmitGpfifo)); |
| 170 | LOG_WARNING(Service_NVDRV, "(STUBBED) called, gpfifo={:X}, num_entries={:X}, flags={:X}", | 182 | LOG_WARNING(Service_NVDRV, "(STUBBED) called, gpfifo={:X}, num_entries={:X}, flags={:X}", |
| 171 | params.address, params.num_entries, params.flags); | 183 | params.address, params.num_entries, params.flags.raw); |
| 172 | 184 | ||
| 173 | Tegra::CommandList entries(params.num_entries); | 185 | Tegra::CommandList entries(params.num_entries); |
| 174 | Memory::ReadBlock(params.address, entries.data(), | 186 | Memory::ReadBlock(params.address, entries.data(), |
| 175 | params.num_entries * sizeof(Tegra::CommandListHeader)); | 187 | params.num_entries * sizeof(Tegra::CommandListHeader)); |
| 176 | 188 | ||
| 177 | Core::System::GetInstance().GPU().PushGPUEntries(std::move(entries)); | 189 | UNIMPLEMENTED_IF(params.flags.add_wait.Value() != 0); |
| 190 | UNIMPLEMENTED_IF(params.flags.add_increment.Value() != 0); | ||
| 191 | |||
| 192 | auto& gpu = system.GPU(); | ||
| 193 | u32 current_syncpoint_value = gpu.GetSyncpointValue(params.fence_out.id); | ||
| 194 | if (params.flags.increment.Value()) { | ||
| 195 | params.fence_out.value += current_syncpoint_value; | ||
| 196 | } else { | ||
| 197 | params.fence_out.value = current_syncpoint_value; | ||
| 198 | } | ||
| 199 | gpu.PushGPUEntries(std::move(entries)); | ||
| 178 | 200 | ||
| 179 | params.fence_out.id = 0; | ||
| 180 | params.fence_out.value = 0; | ||
| 181 | std::memcpy(output.data(), ¶ms, output.size()); | 201 | std::memcpy(output.data(), ¶ms, output.size()); |
| 182 | return 0; | 202 | return 0; |
| 183 | } | 203 | } |
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.h b/src/core/hle/service/nvdrv/devices/nvhost_gpu.h index 62beb5c0c..d2e8fbae9 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.h | |||
| @@ -10,6 +10,7 @@ | |||
| 10 | #include "common/common_types.h" | 10 | #include "common/common_types.h" |
| 11 | #include "common/swap.h" | 11 | #include "common/swap.h" |
| 12 | #include "core/hle/service/nvdrv/devices/nvdevice.h" | 12 | #include "core/hle/service/nvdrv/devices/nvdevice.h" |
| 13 | #include "core/hle/service/nvdrv/nvdata.h" | ||
| 13 | 14 | ||
| 14 | namespace Service::Nvidia::Devices { | 15 | namespace Service::Nvidia::Devices { |
| 15 | 16 | ||
| @@ -20,10 +21,11 @@ constexpr u32 NVGPU_IOCTL_CHANNEL_KICKOFF_PB(0x1b); | |||
| 20 | 21 | ||
| 21 | class nvhost_gpu final : public nvdevice { | 22 | class nvhost_gpu final : public nvdevice { |
| 22 | public: | 23 | public: |
| 23 | explicit nvhost_gpu(std::shared_ptr<nvmap> nvmap_dev); | 24 | explicit nvhost_gpu(Core::System& system, std::shared_ptr<nvmap> nvmap_dev); |
| 24 | ~nvhost_gpu() override; | 25 | ~nvhost_gpu() override; |
| 25 | 26 | ||
| 26 | u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override; | 27 | u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, |
| 28 | IoctlCtrl& ctrl) override; | ||
| 27 | 29 | ||
| 28 | private: | 30 | private: |
| 29 | enum class IoctlCommand : u32_le { | 31 | enum class IoctlCommand : u32_le { |
| @@ -113,11 +115,7 @@ private: | |||
| 113 | static_assert(sizeof(IoctlGetErrorNotification) == 16, | 115 | static_assert(sizeof(IoctlGetErrorNotification) == 16, |
| 114 | "IoctlGetErrorNotification is incorrect size"); | 116 | "IoctlGetErrorNotification is incorrect size"); |
| 115 | 117 | ||
| 116 | struct IoctlFence { | 118 | static_assert(sizeof(Fence) == 8, "Fence is incorrect size"); |
| 117 | u32_le id; | ||
| 118 | u32_le value; | ||
| 119 | }; | ||
| 120 | static_assert(sizeof(IoctlFence) == 8, "IoctlFence is incorrect size"); | ||
| 121 | 119 | ||
| 122 | struct IoctlAllocGpfifoEx { | 120 | struct IoctlAllocGpfifoEx { |
| 123 | u32_le num_entries; | 121 | u32_le num_entries; |
| @@ -132,13 +130,13 @@ private: | |||
| 132 | static_assert(sizeof(IoctlAllocGpfifoEx) == 32, "IoctlAllocGpfifoEx is incorrect size"); | 130 | static_assert(sizeof(IoctlAllocGpfifoEx) == 32, "IoctlAllocGpfifoEx is incorrect size"); |
| 133 | 131 | ||
| 134 | struct IoctlAllocGpfifoEx2 { | 132 | struct IoctlAllocGpfifoEx2 { |
| 135 | u32_le num_entries; // in | 133 | u32_le num_entries; // in |
| 136 | u32_le flags; // in | 134 | u32_le flags; // in |
| 137 | u32_le unk0; // in (1 works) | 135 | u32_le unk0; // in (1 works) |
| 138 | IoctlFence fence_out; // out | 136 | Fence fence_out; // out |
| 139 | u32_le unk1; // in | 137 | u32_le unk1; // in |
| 140 | u32_le unk2; // in | 138 | u32_le unk2; // in |
| 141 | u32_le unk3; // in | 139 | u32_le unk3; // in |
| 142 | }; | 140 | }; |
| 143 | static_assert(sizeof(IoctlAllocGpfifoEx2) == 32, "IoctlAllocGpfifoEx2 is incorrect size"); | 141 | static_assert(sizeof(IoctlAllocGpfifoEx2) == 32, "IoctlAllocGpfifoEx2 is incorrect size"); |
| 144 | 142 | ||
| @@ -153,10 +151,16 @@ private: | |||
| 153 | struct IoctlSubmitGpfifo { | 151 | struct IoctlSubmitGpfifo { |
| 154 | u64_le address; // pointer to gpfifo entry structs | 152 | u64_le address; // pointer to gpfifo entry structs |
| 155 | u32_le num_entries; // number of fence objects being submitted | 153 | u32_le num_entries; // number of fence objects being submitted |
| 156 | u32_le flags; | 154 | union { |
| 157 | IoctlFence fence_out; // returned new fence object for others to wait on | 155 | u32_le raw; |
| 158 | }; | 156 | BitField<0, 1, u32_le> add_wait; // append a wait sync_point to the list |
| 159 | static_assert(sizeof(IoctlSubmitGpfifo) == 16 + sizeof(IoctlFence), | 157 | BitField<1, 1, u32_le> add_increment; // append an increment to the list |
| 158 | BitField<2, 1, u32_le> new_hw_format; // Mostly ignored | ||
| 159 | BitField<8, 1, u32_le> increment; // increment the returned fence | ||
| 160 | } flags; | ||
| 161 | Fence fence_out; // returned new fence object for others to wait on | ||
| 162 | }; | ||
| 163 | static_assert(sizeof(IoctlSubmitGpfifo) == 16 + sizeof(Fence), | ||
| 160 | "IoctlSubmitGpfifo is incorrect size"); | 164 | "IoctlSubmitGpfifo is incorrect size"); |
| 161 | 165 | ||
| 162 | struct IoctlGetWaitbase { | 166 | struct IoctlGetWaitbase { |
| @@ -184,6 +188,7 @@ private: | |||
| 184 | u32 ChannelSetTimeout(const std::vector<u8>& input, std::vector<u8>& output); | 188 | u32 ChannelSetTimeout(const std::vector<u8>& input, std::vector<u8>& output); |
| 185 | 189 | ||
| 186 | std::shared_ptr<nvmap> nvmap_dev; | 190 | std::shared_ptr<nvmap> nvmap_dev; |
| 191 | u32 assigned_syncpoints{}; | ||
| 187 | }; | 192 | }; |
| 188 | 193 | ||
| 189 | } // namespace Service::Nvidia::Devices | 194 | } // namespace Service::Nvidia::Devices |
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp index f5e8ea7c3..f572ad30f 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp | |||
| @@ -10,10 +10,11 @@ | |||
| 10 | 10 | ||
| 11 | namespace Service::Nvidia::Devices { | 11 | namespace Service::Nvidia::Devices { |
| 12 | 12 | ||
| 13 | nvhost_nvdec::nvhost_nvdec() = default; | 13 | nvhost_nvdec::nvhost_nvdec(Core::System& system) : nvdevice(system) {} |
| 14 | nvhost_nvdec::~nvhost_nvdec() = default; | 14 | nvhost_nvdec::~nvhost_nvdec() = default; |
| 15 | 15 | ||
| 16 | u32 nvhost_nvdec::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) { | 16 | u32 nvhost_nvdec::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, |
| 17 | IoctlCtrl& ctrl) { | ||
| 17 | LOG_DEBUG(Service_NVDRV, "called, command=0x{:08X}, input_size=0x{:X}, output_size=0x{:X}", | 18 | LOG_DEBUG(Service_NVDRV, "called, command=0x{:08X}, input_size=0x{:X}, output_size=0x{:X}", |
| 18 | command.raw, input.size(), output.size()); | 19 | command.raw, input.size(), output.size()); |
| 19 | 20 | ||
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h index 0e7b284f8..2710f0511 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h | |||
| @@ -13,10 +13,11 @@ namespace Service::Nvidia::Devices { | |||
| 13 | 13 | ||
| 14 | class nvhost_nvdec final : public nvdevice { | 14 | class nvhost_nvdec final : public nvdevice { |
| 15 | public: | 15 | public: |
| 16 | nvhost_nvdec(); | 16 | explicit nvhost_nvdec(Core::System& system); |
| 17 | ~nvhost_nvdec() override; | 17 | ~nvhost_nvdec() override; |
| 18 | 18 | ||
| 19 | u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override; | 19 | u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, |
| 20 | IoctlCtrl& ctrl) override; | ||
| 20 | 21 | ||
| 21 | private: | 22 | private: |
| 22 | enum class IoctlCommand : u32_le { | 23 | enum class IoctlCommand : u32_le { |
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.cpp b/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.cpp index 3e0951ab0..38282956f 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.cpp | |||
| @@ -10,10 +10,11 @@ | |||
| 10 | 10 | ||
| 11 | namespace Service::Nvidia::Devices { | 11 | namespace Service::Nvidia::Devices { |
| 12 | 12 | ||
| 13 | nvhost_nvjpg::nvhost_nvjpg() = default; | 13 | nvhost_nvjpg::nvhost_nvjpg(Core::System& system) : nvdevice(system) {} |
| 14 | nvhost_nvjpg::~nvhost_nvjpg() = default; | 14 | nvhost_nvjpg::~nvhost_nvjpg() = default; |
| 15 | 15 | ||
| 16 | u32 nvhost_nvjpg::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) { | 16 | u32 nvhost_nvjpg::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, |
| 17 | IoctlCtrl& ctrl) { | ||
| 17 | LOG_DEBUG(Service_NVDRV, "called, command=0x{:08X}, input_size=0x{:X}, output_size=0x{:X}", | 18 | LOG_DEBUG(Service_NVDRV, "called, command=0x{:08X}, input_size=0x{:X}, output_size=0x{:X}", |
| 18 | command.raw, input.size(), output.size()); | 19 | command.raw, input.size(), output.size()); |
| 19 | 20 | ||
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.h b/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.h index 89fd5e95e..379766693 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.h | |||
| @@ -13,10 +13,11 @@ namespace Service::Nvidia::Devices { | |||
| 13 | 13 | ||
| 14 | class nvhost_nvjpg final : public nvdevice { | 14 | class nvhost_nvjpg final : public nvdevice { |
| 15 | public: | 15 | public: |
| 16 | nvhost_nvjpg(); | 16 | explicit nvhost_nvjpg(Core::System& system); |
| 17 | ~nvhost_nvjpg() override; | 17 | ~nvhost_nvjpg() override; |
| 18 | 18 | ||
| 19 | u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override; | 19 | u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, |
| 20 | IoctlCtrl& ctrl) override; | ||
| 20 | 21 | ||
| 21 | private: | 22 | private: |
| 22 | enum class IoctlCommand : u32_le { | 23 | enum class IoctlCommand : u32_le { |
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp b/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp index d544f0f31..70e8091db 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp | |||
| @@ -10,10 +10,11 @@ | |||
| 10 | 10 | ||
| 11 | namespace Service::Nvidia::Devices { | 11 | namespace Service::Nvidia::Devices { |
| 12 | 12 | ||
| 13 | nvhost_vic::nvhost_vic() = default; | 13 | nvhost_vic::nvhost_vic(Core::System& system) : nvdevice(system) {} |
| 14 | nvhost_vic::~nvhost_vic() = default; | 14 | nvhost_vic::~nvhost_vic() = default; |
| 15 | 15 | ||
| 16 | u32 nvhost_vic::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) { | 16 | u32 nvhost_vic::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, |
| 17 | IoctlCtrl& ctrl) { | ||
| 17 | LOG_DEBUG(Service_NVDRV, "called, command=0x{:08X}, input_size=0x{:X}, output_size=0x{:X}", | 18 | LOG_DEBUG(Service_NVDRV, "called, command=0x{:08X}, input_size=0x{:X}, output_size=0x{:X}", |
| 18 | command.raw, input.size(), output.size()); | 19 | command.raw, input.size(), output.size()); |
| 19 | 20 | ||
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_vic.h b/src/core/hle/service/nvdrv/devices/nvhost_vic.h index fc24c3f9c..7d111977e 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_vic.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_vic.h | |||
| @@ -13,10 +13,11 @@ namespace Service::Nvidia::Devices { | |||
| 13 | 13 | ||
| 14 | class nvhost_vic final : public nvdevice { | 14 | class nvhost_vic final : public nvdevice { |
| 15 | public: | 15 | public: |
| 16 | nvhost_vic(); | 16 | explicit nvhost_vic(Core::System& system); |
| 17 | ~nvhost_vic() override; | 17 | ~nvhost_vic() override; |
| 18 | 18 | ||
| 19 | u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override; | 19 | u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, |
| 20 | IoctlCtrl& ctrl) override; | ||
| 20 | 21 | ||
| 21 | private: | 22 | private: |
| 22 | enum class IoctlCommand : u32_le { | 23 | enum class IoctlCommand : u32_le { |
diff --git a/src/core/hle/service/nvdrv/devices/nvmap.cpp b/src/core/hle/service/nvdrv/devices/nvmap.cpp index 1ec796fc6..223b496b7 100644 --- a/src/core/hle/service/nvdrv/devices/nvmap.cpp +++ b/src/core/hle/service/nvdrv/devices/nvmap.cpp | |||
| @@ -18,7 +18,7 @@ enum { | |||
| 18 | }; | 18 | }; |
| 19 | } | 19 | } |
| 20 | 20 | ||
| 21 | nvmap::nvmap() = default; | 21 | nvmap::nvmap(Core::System& system) : nvdevice(system) {} |
| 22 | nvmap::~nvmap() = default; | 22 | nvmap::~nvmap() = default; |
| 23 | 23 | ||
| 24 | VAddr nvmap::GetObjectAddress(u32 handle) const { | 24 | VAddr nvmap::GetObjectAddress(u32 handle) const { |
| @@ -28,7 +28,8 @@ VAddr nvmap::GetObjectAddress(u32 handle) const { | |||
| 28 | return object->addr; | 28 | return object->addr; |
| 29 | } | 29 | } |
| 30 | 30 | ||
| 31 | u32 nvmap::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) { | 31 | u32 nvmap::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, |
| 32 | IoctlCtrl& ctrl) { | ||
| 32 | switch (static_cast<IoctlCommand>(command.raw)) { | 33 | switch (static_cast<IoctlCommand>(command.raw)) { |
| 33 | case IoctlCommand::Create: | 34 | case IoctlCommand::Create: |
| 34 | return IocCreate(input, output); | 35 | return IocCreate(input, output); |
diff --git a/src/core/hle/service/nvdrv/devices/nvmap.h b/src/core/hle/service/nvdrv/devices/nvmap.h index 396230c19..bf4a101c2 100644 --- a/src/core/hle/service/nvdrv/devices/nvmap.h +++ b/src/core/hle/service/nvdrv/devices/nvmap.h | |||
| @@ -16,13 +16,14 @@ namespace Service::Nvidia::Devices { | |||
| 16 | 16 | ||
| 17 | class nvmap final : public nvdevice { | 17 | class nvmap final : public nvdevice { |
| 18 | public: | 18 | public: |
| 19 | nvmap(); | 19 | explicit nvmap(Core::System& system); |
| 20 | ~nvmap() override; | 20 | ~nvmap() override; |
| 21 | 21 | ||
| 22 | /// Returns the allocated address of an nvmap object given its handle. | 22 | /// Returns the allocated address of an nvmap object given its handle. |
| 23 | VAddr GetObjectAddress(u32 handle) const; | 23 | VAddr GetObjectAddress(u32 handle) const; |
| 24 | 24 | ||
| 25 | u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override; | 25 | u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, |
| 26 | IoctlCtrl& ctrl) override; | ||
| 26 | 27 | ||
| 27 | /// Represents an nvmap object. | 28 | /// Represents an nvmap object. |
| 28 | struct Object { | 29 | struct Object { |
diff --git a/src/core/hle/service/nvdrv/interface.cpp b/src/core/hle/service/nvdrv/interface.cpp index b60fc748b..d5be64ed2 100644 --- a/src/core/hle/service/nvdrv/interface.cpp +++ b/src/core/hle/service/nvdrv/interface.cpp | |||
| @@ -8,12 +8,18 @@ | |||
| 8 | #include "core/hle/ipc_helpers.h" | 8 | #include "core/hle/ipc_helpers.h" |
| 9 | #include "core/hle/kernel/kernel.h" | 9 | #include "core/hle/kernel/kernel.h" |
| 10 | #include "core/hle/kernel/readable_event.h" | 10 | #include "core/hle/kernel/readable_event.h" |
| 11 | #include "core/hle/kernel/thread.h" | ||
| 11 | #include "core/hle/kernel/writable_event.h" | 12 | #include "core/hle/kernel/writable_event.h" |
| 12 | #include "core/hle/service/nvdrv/interface.h" | 13 | #include "core/hle/service/nvdrv/interface.h" |
| 14 | #include "core/hle/service/nvdrv/nvdata.h" | ||
| 13 | #include "core/hle/service/nvdrv/nvdrv.h" | 15 | #include "core/hle/service/nvdrv/nvdrv.h" |
| 14 | 16 | ||
| 15 | namespace Service::Nvidia { | 17 | namespace Service::Nvidia { |
| 16 | 18 | ||
| 19 | void NVDRV::SignalGPUInterruptSyncpt(const u32 syncpoint_id, const u32 value) { | ||
| 20 | nvdrv->SignalSyncpt(syncpoint_id, value); | ||
| 21 | } | ||
| 22 | |||
| 17 | void NVDRV::Open(Kernel::HLERequestContext& ctx) { | 23 | void NVDRV::Open(Kernel::HLERequestContext& ctx) { |
| 18 | LOG_DEBUG(Service_NVDRV, "called"); | 24 | LOG_DEBUG(Service_NVDRV, "called"); |
| 19 | 25 | ||
| @@ -36,11 +42,31 @@ void NVDRV::Ioctl(Kernel::HLERequestContext& ctx) { | |||
| 36 | 42 | ||
| 37 | std::vector<u8> output(ctx.GetWriteBufferSize()); | 43 | std::vector<u8> output(ctx.GetWriteBufferSize()); |
| 38 | 44 | ||
| 45 | IoctlCtrl ctrl{}; | ||
| 46 | |||
| 47 | u32 result = nvdrv->Ioctl(fd, command, ctx.ReadBuffer(), output, ctrl); | ||
| 48 | |||
| 49 | if (ctrl.must_delay) { | ||
| 50 | ctrl.fresh_call = false; | ||
| 51 | ctx.SleepClientThread( | ||
| 52 | "NVServices::DelayedResponse", ctrl.timeout, | ||
| 53 | [=](Kernel::SharedPtr<Kernel::Thread> thread, Kernel::HLERequestContext& ctx, | ||
| 54 | Kernel::ThreadWakeupReason reason) { | ||
| 55 | IoctlCtrl ctrl2{ctrl}; | ||
| 56 | std::vector<u8> output2 = output; | ||
| 57 | u32 result = nvdrv->Ioctl(fd, command, ctx.ReadBuffer(), output2, ctrl2); | ||
| 58 | ctx.WriteBuffer(output2); | ||
| 59 | IPC::ResponseBuilder rb{ctx, 3}; | ||
| 60 | rb.Push(RESULT_SUCCESS); | ||
| 61 | rb.Push(result); | ||
| 62 | }, | ||
| 63 | nvdrv->GetEventWriteable(ctrl.event_id)); | ||
| 64 | } else { | ||
| 65 | ctx.WriteBuffer(output); | ||
| 66 | } | ||
| 39 | IPC::ResponseBuilder rb{ctx, 3}; | 67 | IPC::ResponseBuilder rb{ctx, 3}; |
| 40 | rb.Push(RESULT_SUCCESS); | 68 | rb.Push(RESULT_SUCCESS); |
| 41 | rb.Push(nvdrv->Ioctl(fd, command, ctx.ReadBuffer(), output)); | 69 | rb.Push(result); |
| 42 | |||
| 43 | ctx.WriteBuffer(output); | ||
| 44 | } | 70 | } |
| 45 | 71 | ||
| 46 | void NVDRV::Close(Kernel::HLERequestContext& ctx) { | 72 | void NVDRV::Close(Kernel::HLERequestContext& ctx) { |
| @@ -66,13 +92,19 @@ void NVDRV::Initialize(Kernel::HLERequestContext& ctx) { | |||
| 66 | void NVDRV::QueryEvent(Kernel::HLERequestContext& ctx) { | 92 | void NVDRV::QueryEvent(Kernel::HLERequestContext& ctx) { |
| 67 | IPC::RequestParser rp{ctx}; | 93 | IPC::RequestParser rp{ctx}; |
| 68 | u32 fd = rp.Pop<u32>(); | 94 | u32 fd = rp.Pop<u32>(); |
| 69 | u32 event_id = rp.Pop<u32>(); | 95 | // TODO(Blinkhawk): Figure the meaning of the flag at bit 16 |
| 96 | u32 event_id = rp.Pop<u32>() & 0x000000FF; | ||
| 70 | LOG_WARNING(Service_NVDRV, "(STUBBED) called, fd={:X}, event_id={:X}", fd, event_id); | 97 | LOG_WARNING(Service_NVDRV, "(STUBBED) called, fd={:X}, event_id={:X}", fd, event_id); |
| 71 | 98 | ||
| 72 | IPC::ResponseBuilder rb{ctx, 3, 1}; | 99 | IPC::ResponseBuilder rb{ctx, 3, 1}; |
| 73 | rb.Push(RESULT_SUCCESS); | 100 | rb.Push(RESULT_SUCCESS); |
| 74 | rb.PushCopyObjects(query_event.readable); | 101 | if (event_id < MaxNvEvents) { |
| 75 | rb.Push<u32>(0); | 102 | rb.PushCopyObjects(nvdrv->GetEvent(event_id)); |
| 103 | rb.Push<u32>(NvResult::Success); | ||
| 104 | } else { | ||
| 105 | rb.Push<u32>(0); | ||
| 106 | rb.Push<u32>(NvResult::BadParameter); | ||
| 107 | } | ||
| 76 | } | 108 | } |
| 77 | 109 | ||
| 78 | void NVDRV::SetClientPID(Kernel::HLERequestContext& ctx) { | 110 | void NVDRV::SetClientPID(Kernel::HLERequestContext& ctx) { |
| @@ -127,10 +159,6 @@ NVDRV::NVDRV(std::shared_ptr<Module> nvdrv, const char* name) | |||
| 127 | {13, &NVDRV::FinishInitialize, "FinishInitialize"}, | 159 | {13, &NVDRV::FinishInitialize, "FinishInitialize"}, |
| 128 | }; | 160 | }; |
| 129 | RegisterHandlers(functions); | 161 | RegisterHandlers(functions); |
| 130 | |||
| 131 | auto& kernel = Core::System::GetInstance().Kernel(); | ||
| 132 | query_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Automatic, | ||
| 133 | "NVDRV::query_event"); | ||
| 134 | } | 162 | } |
| 135 | 163 | ||
| 136 | NVDRV::~NVDRV() = default; | 164 | NVDRV::~NVDRV() = default; |
diff --git a/src/core/hle/service/nvdrv/interface.h b/src/core/hle/service/nvdrv/interface.h index 5b4889910..10a0ecd52 100644 --- a/src/core/hle/service/nvdrv/interface.h +++ b/src/core/hle/service/nvdrv/interface.h | |||
| @@ -19,6 +19,8 @@ public: | |||
| 19 | NVDRV(std::shared_ptr<Module> nvdrv, const char* name); | 19 | NVDRV(std::shared_ptr<Module> nvdrv, const char* name); |
| 20 | ~NVDRV() override; | 20 | ~NVDRV() override; |
| 21 | 21 | ||
| 22 | void SignalGPUInterruptSyncpt(const u32 syncpoint_id, const u32 value); | ||
| 23 | |||
| 22 | private: | 24 | private: |
| 23 | void Open(Kernel::HLERequestContext& ctx); | 25 | void Open(Kernel::HLERequestContext& ctx); |
| 24 | void Ioctl(Kernel::HLERequestContext& ctx); | 26 | void Ioctl(Kernel::HLERequestContext& ctx); |
| @@ -33,8 +35,6 @@ private: | |||
| 33 | std::shared_ptr<Module> nvdrv; | 35 | std::shared_ptr<Module> nvdrv; |
| 34 | 36 | ||
| 35 | u64 pid{}; | 37 | u64 pid{}; |
| 36 | |||
| 37 | Kernel::EventPair query_event; | ||
| 38 | }; | 38 | }; |
| 39 | 39 | ||
| 40 | } // namespace Service::Nvidia | 40 | } // namespace Service::Nvidia |
diff --git a/src/core/hle/service/nvdrv/nvdata.h b/src/core/hle/service/nvdrv/nvdata.h new file mode 100644 index 000000000..ac03cbc23 --- /dev/null +++ b/src/core/hle/service/nvdrv/nvdata.h | |||
| @@ -0,0 +1,48 @@ | |||
| 1 | #pragma once | ||
| 2 | |||
| 3 | #include <array> | ||
| 4 | #include "common/common_types.h" | ||
| 5 | |||
| 6 | namespace Service::Nvidia { | ||
| 7 | |||
| 8 | constexpr u32 MaxSyncPoints = 192; | ||
| 9 | constexpr u32 MaxNvEvents = 64; | ||
| 10 | |||
| 11 | struct Fence { | ||
| 12 | s32 id; | ||
| 13 | u32 value; | ||
| 14 | }; | ||
| 15 | |||
| 16 | static_assert(sizeof(Fence) == 8, "Fence has wrong size"); | ||
| 17 | |||
| 18 | struct MultiFence { | ||
| 19 | u32 num_fences; | ||
| 20 | std::array<Fence, 4> fences; | ||
| 21 | }; | ||
| 22 | |||
| 23 | enum NvResult : u32 { | ||
| 24 | Success = 0, | ||
| 25 | BadParameter = 4, | ||
| 26 | Timeout = 5, | ||
| 27 | ResourceError = 15, | ||
| 28 | }; | ||
| 29 | |||
| 30 | enum class EventState { | ||
| 31 | Free = 0, | ||
| 32 | Registered = 1, | ||
| 33 | Waiting = 2, | ||
| 34 | Busy = 3, | ||
| 35 | }; | ||
| 36 | |||
| 37 | struct IoctlCtrl { | ||
| 38 | // First call done to the servioce for services that call itself again after a call. | ||
| 39 | bool fresh_call{true}; | ||
| 40 | // Tells the Ioctl Wrapper that it must delay the IPC response and send the thread to sleep | ||
| 41 | bool must_delay{}; | ||
| 42 | // Timeout for the delay | ||
| 43 | s64 timeout{}; | ||
| 44 | // NV Event Id | ||
| 45 | s32 event_id{-1}; | ||
| 46 | }; | ||
| 47 | |||
| 48 | } // namespace Service::Nvidia | ||
diff --git a/src/core/hle/service/nvdrv/nvdrv.cpp b/src/core/hle/service/nvdrv/nvdrv.cpp index 6e4b8f2c6..2011a226a 100644 --- a/src/core/hle/service/nvdrv/nvdrv.cpp +++ b/src/core/hle/service/nvdrv/nvdrv.cpp | |||
| @@ -4,7 +4,10 @@ | |||
| 4 | 4 | ||
| 5 | #include <utility> | 5 | #include <utility> |
| 6 | 6 | ||
| 7 | #include <fmt/format.h> | ||
| 7 | #include "core/hle/ipc_helpers.h" | 8 | #include "core/hle/ipc_helpers.h" |
| 9 | #include "core/hle/kernel/readable_event.h" | ||
| 10 | #include "core/hle/kernel/writable_event.h" | ||
| 8 | #include "core/hle/service/nvdrv/devices/nvdevice.h" | 11 | #include "core/hle/service/nvdrv/devices/nvdevice.h" |
| 9 | #include "core/hle/service/nvdrv/devices/nvdisp_disp0.h" | 12 | #include "core/hle/service/nvdrv/devices/nvdisp_disp0.h" |
| 10 | #include "core/hle/service/nvdrv/devices/nvhost_as_gpu.h" | 13 | #include "core/hle/service/nvdrv/devices/nvhost_as_gpu.h" |
| @@ -22,8 +25,9 @@ | |||
| 22 | 25 | ||
| 23 | namespace Service::Nvidia { | 26 | namespace Service::Nvidia { |
| 24 | 27 | ||
| 25 | void InstallInterfaces(SM::ServiceManager& service_manager, NVFlinger::NVFlinger& nvflinger) { | 28 | void InstallInterfaces(SM::ServiceManager& service_manager, NVFlinger::NVFlinger& nvflinger, |
| 26 | auto module_ = std::make_shared<Module>(); | 29 | Core::System& system) { |
| 30 | auto module_ = std::make_shared<Module>(system); | ||
| 27 | std::make_shared<NVDRV>(module_, "nvdrv")->InstallAsService(service_manager); | 31 | std::make_shared<NVDRV>(module_, "nvdrv")->InstallAsService(service_manager); |
| 28 | std::make_shared<NVDRV>(module_, "nvdrv:a")->InstallAsService(service_manager); | 32 | std::make_shared<NVDRV>(module_, "nvdrv:a")->InstallAsService(service_manager); |
| 29 | std::make_shared<NVDRV>(module_, "nvdrv:s")->InstallAsService(service_manager); | 33 | std::make_shared<NVDRV>(module_, "nvdrv:s")->InstallAsService(service_manager); |
| @@ -32,17 +36,25 @@ void InstallInterfaces(SM::ServiceManager& service_manager, NVFlinger::NVFlinger | |||
| 32 | nvflinger.SetNVDrvInstance(module_); | 36 | nvflinger.SetNVDrvInstance(module_); |
| 33 | } | 37 | } |
| 34 | 38 | ||
| 35 | Module::Module() { | 39 | Module::Module(Core::System& system) { |
| 36 | auto nvmap_dev = std::make_shared<Devices::nvmap>(); | 40 | auto& kernel = system.Kernel(); |
| 37 | devices["/dev/nvhost-as-gpu"] = std::make_shared<Devices::nvhost_as_gpu>(nvmap_dev); | 41 | for (u32 i = 0; i < MaxNvEvents; i++) { |
| 38 | devices["/dev/nvhost-gpu"] = std::make_shared<Devices::nvhost_gpu>(nvmap_dev); | 42 | std::string event_label = fmt::format("NVDRV::NvEvent_{}", i); |
| 39 | devices["/dev/nvhost-ctrl-gpu"] = std::make_shared<Devices::nvhost_ctrl_gpu>(); | 43 | events_interface.events[i] = Kernel::WritableEvent::CreateEventPair( |
| 44 | kernel, Kernel::ResetType::Automatic, event_label); | ||
| 45 | events_interface.status[i] = EventState::Free; | ||
| 46 | events_interface.registered[i] = false; | ||
| 47 | } | ||
| 48 | auto nvmap_dev = std::make_shared<Devices::nvmap>(system); | ||
| 49 | devices["/dev/nvhost-as-gpu"] = std::make_shared<Devices::nvhost_as_gpu>(system, nvmap_dev); | ||
| 50 | devices["/dev/nvhost-gpu"] = std::make_shared<Devices::nvhost_gpu>(system, nvmap_dev); | ||
| 51 | devices["/dev/nvhost-ctrl-gpu"] = std::make_shared<Devices::nvhost_ctrl_gpu>(system); | ||
| 40 | devices["/dev/nvmap"] = nvmap_dev; | 52 | devices["/dev/nvmap"] = nvmap_dev; |
| 41 | devices["/dev/nvdisp_disp0"] = std::make_shared<Devices::nvdisp_disp0>(nvmap_dev); | 53 | devices["/dev/nvdisp_disp0"] = std::make_shared<Devices::nvdisp_disp0>(system, nvmap_dev); |
| 42 | devices["/dev/nvhost-ctrl"] = std::make_shared<Devices::nvhost_ctrl>(); | 54 | devices["/dev/nvhost-ctrl"] = std::make_shared<Devices::nvhost_ctrl>(system, events_interface); |
| 43 | devices["/dev/nvhost-nvdec"] = std::make_shared<Devices::nvhost_nvdec>(); | 55 | devices["/dev/nvhost-nvdec"] = std::make_shared<Devices::nvhost_nvdec>(system); |
| 44 | devices["/dev/nvhost-nvjpg"] = std::make_shared<Devices::nvhost_nvjpg>(); | 56 | devices["/dev/nvhost-nvjpg"] = std::make_shared<Devices::nvhost_nvjpg>(system); |
| 45 | devices["/dev/nvhost-vic"] = std::make_shared<Devices::nvhost_vic>(); | 57 | devices["/dev/nvhost-vic"] = std::make_shared<Devices::nvhost_vic>(system); |
| 46 | } | 58 | } |
| 47 | 59 | ||
| 48 | Module::~Module() = default; | 60 | Module::~Module() = default; |
| @@ -59,12 +71,13 @@ u32 Module::Open(const std::string& device_name) { | |||
| 59 | return fd; | 71 | return fd; |
| 60 | } | 72 | } |
| 61 | 73 | ||
| 62 | u32 Module::Ioctl(u32 fd, u32 command, const std::vector<u8>& input, std::vector<u8>& output) { | 74 | u32 Module::Ioctl(u32 fd, u32 command, const std::vector<u8>& input, std::vector<u8>& output, |
| 75 | IoctlCtrl& ctrl) { | ||
| 63 | auto itr = open_files.find(fd); | 76 | auto itr = open_files.find(fd); |
| 64 | ASSERT_MSG(itr != open_files.end(), "Tried to talk to an invalid device"); | 77 | ASSERT_MSG(itr != open_files.end(), "Tried to talk to an invalid device"); |
| 65 | 78 | ||
| 66 | auto& device = itr->second; | 79 | auto& device = itr->second; |
| 67 | return device->ioctl({command}, input, output); | 80 | return device->ioctl({command}, input, output, ctrl); |
| 68 | } | 81 | } |
| 69 | 82 | ||
| 70 | ResultCode Module::Close(u32 fd) { | 83 | ResultCode Module::Close(u32 fd) { |
| @@ -77,4 +90,22 @@ ResultCode Module::Close(u32 fd) { | |||
| 77 | return RESULT_SUCCESS; | 90 | return RESULT_SUCCESS; |
| 78 | } | 91 | } |
| 79 | 92 | ||
| 93 | void Module::SignalSyncpt(const u32 syncpoint_id, const u32 value) { | ||
| 94 | for (u32 i = 0; i < MaxNvEvents; i++) { | ||
| 95 | if (events_interface.assigned_syncpt[i] == syncpoint_id && | ||
| 96 | events_interface.assigned_value[i] == value) { | ||
| 97 | events_interface.LiberateEvent(i); | ||
| 98 | events_interface.events[i].writable->Signal(); | ||
| 99 | } | ||
| 100 | } | ||
| 101 | } | ||
| 102 | |||
| 103 | Kernel::SharedPtr<Kernel::ReadableEvent> Module::GetEvent(const u32 event_id) const { | ||
| 104 | return events_interface.events[event_id].readable; | ||
| 105 | } | ||
| 106 | |||
| 107 | Kernel::SharedPtr<Kernel::WritableEvent> Module::GetEventWriteable(const u32 event_id) const { | ||
| 108 | return events_interface.events[event_id].writable; | ||
| 109 | } | ||
| 110 | |||
| 80 | } // namespace Service::Nvidia | 111 | } // namespace Service::Nvidia |
diff --git a/src/core/hle/service/nvdrv/nvdrv.h b/src/core/hle/service/nvdrv/nvdrv.h index 53564f696..a339ab672 100644 --- a/src/core/hle/service/nvdrv/nvdrv.h +++ b/src/core/hle/service/nvdrv/nvdrv.h | |||
| @@ -8,8 +8,14 @@ | |||
| 8 | #include <unordered_map> | 8 | #include <unordered_map> |
| 9 | #include <vector> | 9 | #include <vector> |
| 10 | #include "common/common_types.h" | 10 | #include "common/common_types.h" |
| 11 | #include "core/hle/kernel/writable_event.h" | ||
| 12 | #include "core/hle/service/nvdrv/nvdata.h" | ||
| 11 | #include "core/hle/service/service.h" | 13 | #include "core/hle/service/service.h" |
| 12 | 14 | ||
| 15 | namespace Core { | ||
| 16 | class System; | ||
| 17 | } | ||
| 18 | |||
| 13 | namespace Service::NVFlinger { | 19 | namespace Service::NVFlinger { |
| 14 | class NVFlinger; | 20 | class NVFlinger; |
| 15 | } | 21 | } |
| @@ -20,16 +26,72 @@ namespace Devices { | |||
| 20 | class nvdevice; | 26 | class nvdevice; |
| 21 | } | 27 | } |
| 22 | 28 | ||
| 23 | struct IoctlFence { | 29 | struct EventInterface { |
| 24 | u32 id; | 30 | // Mask representing currently busy events |
| 25 | u32 value; | 31 | u64 events_mask{}; |
| 32 | // Each kernel event associated to an NV event | ||
| 33 | std::array<Kernel::EventPair, MaxNvEvents> events; | ||
| 34 | // The status of the current NVEvent | ||
| 35 | std::array<EventState, MaxNvEvents> status{}; | ||
| 36 | // Tells if an NVEvent is registered or not | ||
| 37 | std::array<bool, MaxNvEvents> registered{}; | ||
| 38 | // When an NVEvent is waiting on GPU interrupt, this is the sync_point | ||
| 39 | // associated with it. | ||
| 40 | std::array<u32, MaxNvEvents> assigned_syncpt{}; | ||
| 41 | // This is the value of the GPU interrupt for which the NVEvent is waiting | ||
| 42 | // for. | ||
| 43 | std::array<u32, MaxNvEvents> assigned_value{}; | ||
| 44 | // Constant to denote an unasigned syncpoint. | ||
| 45 | static constexpr u32 unassigned_syncpt = 0xFFFFFFFF; | ||
| 46 | std::optional<u32> GetFreeEvent() const { | ||
| 47 | u64 mask = events_mask; | ||
| 48 | for (u32 i = 0; i < MaxNvEvents; i++) { | ||
| 49 | const bool is_free = (mask & 0x1) == 0; | ||
| 50 | if (is_free) { | ||
| 51 | if (status[i] == EventState::Registered || status[i] == EventState::Free) { | ||
| 52 | return {i}; | ||
| 53 | } | ||
| 54 | } | ||
| 55 | mask = mask >> 1; | ||
| 56 | } | ||
| 57 | return {}; | ||
| 58 | } | ||
| 59 | void SetEventStatus(const u32 event_id, EventState new_status) { | ||
| 60 | EventState old_status = status[event_id]; | ||
| 61 | if (old_status == new_status) { | ||
| 62 | return; | ||
| 63 | } | ||
| 64 | status[event_id] = new_status; | ||
| 65 | if (new_status == EventState::Registered) { | ||
| 66 | registered[event_id] = true; | ||
| 67 | } | ||
| 68 | if (new_status == EventState::Waiting || new_status == EventState::Busy) { | ||
| 69 | events_mask |= (1ULL << event_id); | ||
| 70 | } | ||
| 71 | } | ||
| 72 | void RegisterEvent(const u32 event_id) { | ||
| 73 | registered[event_id] = true; | ||
| 74 | if (status[event_id] == EventState::Free) { | ||
| 75 | status[event_id] = EventState::Registered; | ||
| 76 | } | ||
| 77 | } | ||
| 78 | void UnregisterEvent(const u32 event_id) { | ||
| 79 | registered[event_id] = false; | ||
| 80 | if (status[event_id] == EventState::Registered) { | ||
| 81 | status[event_id] = EventState::Free; | ||
| 82 | } | ||
| 83 | } | ||
| 84 | void LiberateEvent(const u32 event_id) { | ||
| 85 | status[event_id] = registered[event_id] ? EventState::Registered : EventState::Free; | ||
| 86 | events_mask &= ~(1ULL << event_id); | ||
| 87 | assigned_syncpt[event_id] = unassigned_syncpt; | ||
| 88 | assigned_value[event_id] = 0; | ||
| 89 | } | ||
| 26 | }; | 90 | }; |
| 27 | 91 | ||
| 28 | static_assert(sizeof(IoctlFence) == 8, "IoctlFence has wrong size"); | ||
| 29 | |||
| 30 | class Module final { | 92 | class Module final { |
| 31 | public: | 93 | public: |
| 32 | Module(); | 94 | Module(Core::System& system); |
| 33 | ~Module(); | 95 | ~Module(); |
| 34 | 96 | ||
| 35 | /// Returns a pointer to one of the available devices, identified by its name. | 97 | /// Returns a pointer to one of the available devices, identified by its name. |
| @@ -44,10 +106,17 @@ public: | |||
| 44 | /// Opens a device node and returns a file descriptor to it. | 106 | /// Opens a device node and returns a file descriptor to it. |
| 45 | u32 Open(const std::string& device_name); | 107 | u32 Open(const std::string& device_name); |
| 46 | /// Sends an ioctl command to the specified file descriptor. | 108 | /// Sends an ioctl command to the specified file descriptor. |
| 47 | u32 Ioctl(u32 fd, u32 command, const std::vector<u8>& input, std::vector<u8>& output); | 109 | u32 Ioctl(u32 fd, u32 command, const std::vector<u8>& input, std::vector<u8>& output, |
| 110 | IoctlCtrl& ctrl); | ||
| 48 | /// Closes a device file descriptor and returns operation success. | 111 | /// Closes a device file descriptor and returns operation success. |
| 49 | ResultCode Close(u32 fd); | 112 | ResultCode Close(u32 fd); |
| 50 | 113 | ||
| 114 | void SignalSyncpt(const u32 syncpoint_id, const u32 value); | ||
| 115 | |||
| 116 | Kernel::SharedPtr<Kernel::ReadableEvent> GetEvent(u32 event_id) const; | ||
| 117 | |||
| 118 | Kernel::SharedPtr<Kernel::WritableEvent> GetEventWriteable(u32 event_id) const; | ||
| 119 | |||
| 51 | private: | 120 | private: |
| 52 | /// Id to use for the next open file descriptor. | 121 | /// Id to use for the next open file descriptor. |
| 53 | u32 next_fd = 1; | 122 | u32 next_fd = 1; |
| @@ -57,9 +126,12 @@ private: | |||
| 57 | 126 | ||
| 58 | /// Mapping of device node names to their implementation. | 127 | /// Mapping of device node names to their implementation. |
| 59 | std::unordered_map<std::string, std::shared_ptr<Devices::nvdevice>> devices; | 128 | std::unordered_map<std::string, std::shared_ptr<Devices::nvdevice>> devices; |
| 129 | |||
| 130 | EventInterface events_interface; | ||
| 60 | }; | 131 | }; |
| 61 | 132 | ||
| 62 | /// Registers all NVDRV services with the specified service manager. | 133 | /// Registers all NVDRV services with the specified service manager. |
| 63 | void InstallInterfaces(SM::ServiceManager& service_manager, NVFlinger::NVFlinger& nvflinger); | 134 | void InstallInterfaces(SM::ServiceManager& service_manager, NVFlinger::NVFlinger& nvflinger, |
| 135 | Core::System& system); | ||
| 64 | 136 | ||
| 65 | } // namespace Service::Nvidia | 137 | } // namespace Service::Nvidia |
diff --git a/src/core/hle/service/nvflinger/buffer_queue.cpp b/src/core/hle/service/nvflinger/buffer_queue.cpp index 5731e815f..e1a07d3ee 100644 --- a/src/core/hle/service/nvflinger/buffer_queue.cpp +++ b/src/core/hle/service/nvflinger/buffer_queue.cpp | |||
| @@ -34,7 +34,8 @@ void BufferQueue::SetPreallocatedBuffer(u32 slot, const IGBPBuffer& igbp_buffer) | |||
| 34 | buffer_wait_event.writable->Signal(); | 34 | buffer_wait_event.writable->Signal(); |
| 35 | } | 35 | } |
| 36 | 36 | ||
| 37 | std::optional<u32> BufferQueue::DequeueBuffer(u32 width, u32 height) { | 37 | std::optional<std::pair<u32, Service::Nvidia::MultiFence*>> BufferQueue::DequeueBuffer(u32 width, |
| 38 | u32 height) { | ||
| 38 | auto itr = std::find_if(queue.begin(), queue.end(), [&](const Buffer& buffer) { | 39 | auto itr = std::find_if(queue.begin(), queue.end(), [&](const Buffer& buffer) { |
| 39 | // Only consider free buffers. Buffers become free once again after they've been Acquired | 40 | // Only consider free buffers. Buffers become free once again after they've been Acquired |
| 40 | // and Released by the compositor, see the NVFlinger::Compose method. | 41 | // and Released by the compositor, see the NVFlinger::Compose method. |
| @@ -51,7 +52,7 @@ std::optional<u32> BufferQueue::DequeueBuffer(u32 width, u32 height) { | |||
| 51 | } | 52 | } |
| 52 | 53 | ||
| 53 | itr->status = Buffer::Status::Dequeued; | 54 | itr->status = Buffer::Status::Dequeued; |
| 54 | return itr->slot; | 55 | return {{itr->slot, &itr->multi_fence}}; |
| 55 | } | 56 | } |
| 56 | 57 | ||
| 57 | const IGBPBuffer& BufferQueue::RequestBuffer(u32 slot) const { | 58 | const IGBPBuffer& BufferQueue::RequestBuffer(u32 slot) const { |
| @@ -63,7 +64,8 @@ const IGBPBuffer& BufferQueue::RequestBuffer(u32 slot) const { | |||
| 63 | } | 64 | } |
| 64 | 65 | ||
| 65 | void BufferQueue::QueueBuffer(u32 slot, BufferTransformFlags transform, | 66 | void BufferQueue::QueueBuffer(u32 slot, BufferTransformFlags transform, |
| 66 | const Common::Rectangle<int>& crop_rect) { | 67 | const Common::Rectangle<int>& crop_rect, u32 swap_interval, |
| 68 | Service::Nvidia::MultiFence& multi_fence) { | ||
| 67 | auto itr = std::find_if(queue.begin(), queue.end(), | 69 | auto itr = std::find_if(queue.begin(), queue.end(), |
| 68 | [&](const Buffer& buffer) { return buffer.slot == slot; }); | 70 | [&](const Buffer& buffer) { return buffer.slot == slot; }); |
| 69 | ASSERT(itr != queue.end()); | 71 | ASSERT(itr != queue.end()); |
| @@ -71,12 +73,21 @@ void BufferQueue::QueueBuffer(u32 slot, BufferTransformFlags transform, | |||
| 71 | itr->status = Buffer::Status::Queued; | 73 | itr->status = Buffer::Status::Queued; |
| 72 | itr->transform = transform; | 74 | itr->transform = transform; |
| 73 | itr->crop_rect = crop_rect; | 75 | itr->crop_rect = crop_rect; |
| 76 | itr->swap_interval = swap_interval; | ||
| 77 | itr->multi_fence = multi_fence; | ||
| 78 | queue_sequence.push_back(slot); | ||
| 74 | } | 79 | } |
| 75 | 80 | ||
| 76 | std::optional<std::reference_wrapper<const BufferQueue::Buffer>> BufferQueue::AcquireBuffer() { | 81 | std::optional<std::reference_wrapper<const BufferQueue::Buffer>> BufferQueue::AcquireBuffer() { |
| 77 | auto itr = std::find_if(queue.begin(), queue.end(), [](const Buffer& buffer) { | 82 | auto itr = queue.end(); |
| 78 | return buffer.status == Buffer::Status::Queued; | 83 | // Iterate to find a queued buffer matching the requested slot. |
| 79 | }); | 84 | while (itr == queue.end() && !queue_sequence.empty()) { |
| 85 | u32 slot = queue_sequence.front(); | ||
| 86 | itr = std::find_if(queue.begin(), queue.end(), [&slot](const Buffer& buffer) { | ||
| 87 | return buffer.status == Buffer::Status::Queued && buffer.slot == slot; | ||
| 88 | }); | ||
| 89 | queue_sequence.pop_front(); | ||
| 90 | } | ||
| 80 | if (itr == queue.end()) | 91 | if (itr == queue.end()) |
| 81 | return {}; | 92 | return {}; |
| 82 | itr->status = Buffer::Status::Acquired; | 93 | itr->status = Buffer::Status::Acquired; |
diff --git a/src/core/hle/service/nvflinger/buffer_queue.h b/src/core/hle/service/nvflinger/buffer_queue.h index e1ccb6171..356bedb81 100644 --- a/src/core/hle/service/nvflinger/buffer_queue.h +++ b/src/core/hle/service/nvflinger/buffer_queue.h | |||
| @@ -4,6 +4,7 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <list> | ||
| 7 | #include <optional> | 8 | #include <optional> |
| 8 | #include <vector> | 9 | #include <vector> |
| 9 | 10 | ||
| @@ -12,6 +13,7 @@ | |||
| 12 | #include "common/swap.h" | 13 | #include "common/swap.h" |
| 13 | #include "core/hle/kernel/object.h" | 14 | #include "core/hle/kernel/object.h" |
| 14 | #include "core/hle/kernel/writable_event.h" | 15 | #include "core/hle/kernel/writable_event.h" |
| 16 | #include "core/hle/service/nvdrv/nvdata.h" | ||
| 15 | 17 | ||
| 16 | namespace Service::NVFlinger { | 18 | namespace Service::NVFlinger { |
| 17 | 19 | ||
| @@ -68,13 +70,17 @@ public: | |||
| 68 | IGBPBuffer igbp_buffer; | 70 | IGBPBuffer igbp_buffer; |
| 69 | BufferTransformFlags transform; | 71 | BufferTransformFlags transform; |
| 70 | Common::Rectangle<int> crop_rect; | 72 | Common::Rectangle<int> crop_rect; |
| 73 | u32 swap_interval; | ||
| 74 | Service::Nvidia::MultiFence multi_fence; | ||
| 71 | }; | 75 | }; |
| 72 | 76 | ||
| 73 | void SetPreallocatedBuffer(u32 slot, const IGBPBuffer& igbp_buffer); | 77 | void SetPreallocatedBuffer(u32 slot, const IGBPBuffer& igbp_buffer); |
| 74 | std::optional<u32> DequeueBuffer(u32 width, u32 height); | 78 | std::optional<std::pair<u32, Service::Nvidia::MultiFence*>> DequeueBuffer(u32 width, |
| 79 | u32 height); | ||
| 75 | const IGBPBuffer& RequestBuffer(u32 slot) const; | 80 | const IGBPBuffer& RequestBuffer(u32 slot) const; |
| 76 | void QueueBuffer(u32 slot, BufferTransformFlags transform, | 81 | void QueueBuffer(u32 slot, BufferTransformFlags transform, |
| 77 | const Common::Rectangle<int>& crop_rect); | 82 | const Common::Rectangle<int>& crop_rect, u32 swap_interval, |
| 83 | Service::Nvidia::MultiFence& multi_fence); | ||
| 78 | std::optional<std::reference_wrapper<const Buffer>> AcquireBuffer(); | 84 | std::optional<std::reference_wrapper<const Buffer>> AcquireBuffer(); |
| 79 | void ReleaseBuffer(u32 slot); | 85 | void ReleaseBuffer(u32 slot); |
| 80 | u32 Query(QueryType type); | 86 | u32 Query(QueryType type); |
| @@ -92,6 +98,7 @@ private: | |||
| 92 | u64 layer_id; | 98 | u64 layer_id; |
| 93 | 99 | ||
| 94 | std::vector<Buffer> queue; | 100 | std::vector<Buffer> queue; |
| 101 | std::list<u32> queue_sequence; | ||
| 95 | Kernel::EventPair buffer_wait_event; | 102 | Kernel::EventPair buffer_wait_event; |
| 96 | }; | 103 | }; |
| 97 | 104 | ||
diff --git a/src/core/hle/service/nvflinger/nvflinger.cpp b/src/core/hle/service/nvflinger/nvflinger.cpp index 3c5c53e24..f9db79370 100644 --- a/src/core/hle/service/nvflinger/nvflinger.cpp +++ b/src/core/hle/service/nvflinger/nvflinger.cpp | |||
| @@ -37,15 +37,14 @@ NVFlinger::NVFlinger(Core::Timing::CoreTiming& core_timing) : core_timing{core_t | |||
| 37 | displays.emplace_back(4, "Null"); | 37 | displays.emplace_back(4, "Null"); |
| 38 | 38 | ||
| 39 | // Schedule the screen composition events | 39 | // Schedule the screen composition events |
| 40 | const auto ticks = Settings::values.force_30fps_mode ? frame_ticks_30fps : frame_ticks; | 40 | composition_event = core_timing.RegisterEvent("ScreenComposition", [this](u64 userdata, |
| 41 | 41 | s64 cycles_late) { | |
| 42 | composition_event = core_timing.RegisterEvent( | 42 | Compose(); |
| 43 | "ScreenComposition", [this, ticks](u64 userdata, s64 cycles_late) { | 43 | const auto ticks = Settings::values.force_30fps_mode ? frame_ticks_30fps : GetNextTicks(); |
| 44 | Compose(); | 44 | this->core_timing.ScheduleEvent(std::max<s64>(0LL, ticks - cycles_late), composition_event); |
| 45 | this->core_timing.ScheduleEvent(ticks - cycles_late, composition_event); | 45 | }); |
| 46 | }); | 46 | |
| 47 | 47 | core_timing.ScheduleEvent(frame_ticks, composition_event); | |
| 48 | core_timing.ScheduleEvent(ticks, composition_event); | ||
| 49 | } | 48 | } |
| 50 | 49 | ||
| 51 | NVFlinger::~NVFlinger() { | 50 | NVFlinger::~NVFlinger() { |
| @@ -206,8 +205,14 @@ void NVFlinger::Compose() { | |||
| 206 | igbp_buffer.width, igbp_buffer.height, igbp_buffer.stride, | 205 | igbp_buffer.width, igbp_buffer.height, igbp_buffer.stride, |
| 207 | buffer->get().transform, buffer->get().crop_rect); | 206 | buffer->get().transform, buffer->get().crop_rect); |
| 208 | 207 | ||
| 208 | swap_interval = buffer->get().swap_interval; | ||
| 209 | buffer_queue.ReleaseBuffer(buffer->get().slot); | 209 | buffer_queue.ReleaseBuffer(buffer->get().slot); |
| 210 | } | 210 | } |
| 211 | } | 211 | } |
| 212 | 212 | ||
| 213 | s64 NVFlinger::GetNextTicks() const { | ||
| 214 | constexpr s64 max_hertz = 120LL; | ||
| 215 | return (Core::Timing::BASE_CLOCK_RATE * (1LL << swap_interval)) / max_hertz; | ||
| 216 | } | ||
| 217 | |||
| 213 | } // namespace Service::NVFlinger | 218 | } // namespace Service::NVFlinger |
diff --git a/src/core/hle/service/nvflinger/nvflinger.h b/src/core/hle/service/nvflinger/nvflinger.h index c0a83fffb..988be8726 100644 --- a/src/core/hle/service/nvflinger/nvflinger.h +++ b/src/core/hle/service/nvflinger/nvflinger.h | |||
| @@ -74,6 +74,8 @@ public: | |||
| 74 | /// finished. | 74 | /// finished. |
| 75 | void Compose(); | 75 | void Compose(); |
| 76 | 76 | ||
| 77 | s64 GetNextTicks() const; | ||
| 78 | |||
| 77 | private: | 79 | private: |
| 78 | /// Finds the display identified by the specified ID. | 80 | /// Finds the display identified by the specified ID. |
| 79 | VI::Display* FindDisplay(u64 display_id); | 81 | VI::Display* FindDisplay(u64 display_id); |
| @@ -98,6 +100,8 @@ private: | |||
| 98 | /// layers. | 100 | /// layers. |
| 99 | u32 next_buffer_queue_id = 1; | 101 | u32 next_buffer_queue_id = 1; |
| 100 | 102 | ||
| 103 | u32 swap_interval = 1; | ||
| 104 | |||
| 101 | /// Event that handles screen composition. | 105 | /// Event that handles screen composition. |
| 102 | Core::Timing::EventType* composition_event; | 106 | Core::Timing::EventType* composition_event; |
| 103 | 107 | ||
diff --git a/src/core/hle/service/pm/pm.cpp b/src/core/hle/service/pm/pm.cpp index ebcc41a43..fe6b5f798 100644 --- a/src/core/hle/service/pm/pm.cpp +++ b/src/core/hle/service/pm/pm.cpp | |||
| @@ -3,11 +3,44 @@ | |||
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include "core/hle/ipc_helpers.h" | 5 | #include "core/hle/ipc_helpers.h" |
| 6 | #include "core/hle/kernel/kernel.h" | ||
| 7 | #include "core/hle/kernel/process.h" | ||
| 6 | #include "core/hle/service/pm/pm.h" | 8 | #include "core/hle/service/pm/pm.h" |
| 7 | #include "core/hle/service/service.h" | 9 | #include "core/hle/service/service.h" |
| 8 | 10 | ||
| 9 | namespace Service::PM { | 11 | namespace Service::PM { |
| 10 | 12 | ||
| 13 | namespace { | ||
| 14 | |||
| 15 | constexpr ResultCode ERROR_PROCESS_NOT_FOUND{ErrorModule::PM, 1}; | ||
| 16 | |||
| 17 | constexpr u64 NO_PROCESS_FOUND_PID{0}; | ||
| 18 | |||
| 19 | std::optional<Kernel::SharedPtr<Kernel::Process>> SearchProcessList( | ||
| 20 | const std::vector<Kernel::SharedPtr<Kernel::Process>>& process_list, | ||
| 21 | std::function<bool(const Kernel::SharedPtr<Kernel::Process>&)> predicate) { | ||
| 22 | const auto iter = std::find_if(process_list.begin(), process_list.end(), predicate); | ||
| 23 | |||
| 24 | if (iter == process_list.end()) { | ||
| 25 | return std::nullopt; | ||
| 26 | } | ||
| 27 | |||
| 28 | return *iter; | ||
| 29 | } | ||
| 30 | |||
| 31 | void GetApplicationPidGeneric(Kernel::HLERequestContext& ctx, | ||
| 32 | const std::vector<Kernel::SharedPtr<Kernel::Process>>& process_list) { | ||
| 33 | const auto process = SearchProcessList(process_list, [](const auto& process) { | ||
| 34 | return process->GetProcessID() == Kernel::Process::ProcessIDMin; | ||
| 35 | }); | ||
| 36 | |||
| 37 | IPC::ResponseBuilder rb{ctx, 4}; | ||
| 38 | rb.Push(RESULT_SUCCESS); | ||
| 39 | rb.Push(process.has_value() ? (*process)->GetProcessID() : NO_PROCESS_FOUND_PID); | ||
| 40 | } | ||
| 41 | |||
| 42 | } // Anonymous namespace | ||
| 43 | |||
| 11 | class BootMode final : public ServiceFramework<BootMode> { | 44 | class BootMode final : public ServiceFramework<BootMode> { |
| 12 | public: | 45 | public: |
| 13 | explicit BootMode() : ServiceFramework{"pm:bm"} { | 46 | explicit BootMode() : ServiceFramework{"pm:bm"} { |
| @@ -41,14 +74,15 @@ private: | |||
| 41 | 74 | ||
| 42 | class DebugMonitor final : public ServiceFramework<DebugMonitor> { | 75 | class DebugMonitor final : public ServiceFramework<DebugMonitor> { |
| 43 | public: | 76 | public: |
| 44 | explicit DebugMonitor() : ServiceFramework{"pm:dmnt"} { | 77 | explicit DebugMonitor(const Kernel::KernelCore& kernel) |
| 78 | : ServiceFramework{"pm:dmnt"}, kernel(kernel) { | ||
| 45 | // clang-format off | 79 | // clang-format off |
| 46 | static const FunctionInfo functions[] = { | 80 | static const FunctionInfo functions[] = { |
| 47 | {0, nullptr, "GetDebugProcesses"}, | 81 | {0, nullptr, "GetDebugProcesses"}, |
| 48 | {1, nullptr, "StartDebugProcess"}, | 82 | {1, nullptr, "StartDebugProcess"}, |
| 49 | {2, nullptr, "GetTitlePid"}, | 83 | {2, &DebugMonitor::GetTitlePid, "GetTitlePid"}, |
| 50 | {3, nullptr, "EnableDebugForTitleId"}, | 84 | {3, nullptr, "EnableDebugForTitleId"}, |
| 51 | {4, nullptr, "GetApplicationPid"}, | 85 | {4, &DebugMonitor::GetApplicationPid, "GetApplicationPid"}, |
| 52 | {5, nullptr, "EnableDebugForApplication"}, | 86 | {5, nullptr, "EnableDebugForApplication"}, |
| 53 | {6, nullptr, "DisableDebug"}, | 87 | {6, nullptr, "DisableDebug"}, |
| 54 | }; | 88 | }; |
| @@ -56,21 +90,77 @@ public: | |||
| 56 | 90 | ||
| 57 | RegisterHandlers(functions); | 91 | RegisterHandlers(functions); |
| 58 | } | 92 | } |
| 93 | |||
| 94 | private: | ||
| 95 | void GetTitlePid(Kernel::HLERequestContext& ctx) { | ||
| 96 | IPC::RequestParser rp{ctx}; | ||
| 97 | const auto title_id = rp.PopRaw<u64>(); | ||
| 98 | |||
| 99 | LOG_DEBUG(Service_PM, "called, title_id={:016X}", title_id); | ||
| 100 | |||
| 101 | const auto process = | ||
| 102 | SearchProcessList(kernel.GetProcessList(), [title_id](const auto& process) { | ||
| 103 | return process->GetTitleID() == title_id; | ||
| 104 | }); | ||
| 105 | |||
| 106 | if (!process.has_value()) { | ||
| 107 | IPC::ResponseBuilder rb{ctx, 2}; | ||
| 108 | rb.Push(ERROR_PROCESS_NOT_FOUND); | ||
| 109 | return; | ||
| 110 | } | ||
| 111 | |||
| 112 | IPC::ResponseBuilder rb{ctx, 4}; | ||
| 113 | rb.Push(RESULT_SUCCESS); | ||
| 114 | rb.Push((*process)->GetProcessID()); | ||
| 115 | } | ||
| 116 | |||
| 117 | void GetApplicationPid(Kernel::HLERequestContext& ctx) { | ||
| 118 | LOG_DEBUG(Service_PM, "called"); | ||
| 119 | GetApplicationPidGeneric(ctx, kernel.GetProcessList()); | ||
| 120 | } | ||
| 121 | |||
| 122 | const Kernel::KernelCore& kernel; | ||
| 59 | }; | 123 | }; |
| 60 | 124 | ||
| 61 | class Info final : public ServiceFramework<Info> { | 125 | class Info final : public ServiceFramework<Info> { |
| 62 | public: | 126 | public: |
| 63 | explicit Info() : ServiceFramework{"pm:info"} { | 127 | explicit Info(const std::vector<Kernel::SharedPtr<Kernel::Process>>& process_list) |
| 128 | : ServiceFramework{"pm:info"}, process_list(process_list) { | ||
| 64 | static const FunctionInfo functions[] = { | 129 | static const FunctionInfo functions[] = { |
| 65 | {0, nullptr, "GetTitleId"}, | 130 | {0, &Info::GetTitleId, "GetTitleId"}, |
| 66 | }; | 131 | }; |
| 67 | RegisterHandlers(functions); | 132 | RegisterHandlers(functions); |
| 68 | } | 133 | } |
| 134 | |||
| 135 | private: | ||
| 136 | void GetTitleId(Kernel::HLERequestContext& ctx) { | ||
| 137 | IPC::RequestParser rp{ctx}; | ||
| 138 | const auto process_id = rp.PopRaw<u64>(); | ||
| 139 | |||
| 140 | LOG_DEBUG(Service_PM, "called, process_id={:016X}", process_id); | ||
| 141 | |||
| 142 | const auto process = SearchProcessList(process_list, [process_id](const auto& process) { | ||
| 143 | return process->GetProcessID() == process_id; | ||
| 144 | }); | ||
| 145 | |||
| 146 | if (!process.has_value()) { | ||
| 147 | IPC::ResponseBuilder rb{ctx, 2}; | ||
| 148 | rb.Push(ERROR_PROCESS_NOT_FOUND); | ||
| 149 | return; | ||
| 150 | } | ||
| 151 | |||
| 152 | IPC::ResponseBuilder rb{ctx, 4}; | ||
| 153 | rb.Push(RESULT_SUCCESS); | ||
| 154 | rb.Push((*process)->GetTitleID()); | ||
| 155 | } | ||
| 156 | |||
| 157 | const std::vector<Kernel::SharedPtr<Kernel::Process>>& process_list; | ||
| 69 | }; | 158 | }; |
| 70 | 159 | ||
| 71 | class Shell final : public ServiceFramework<Shell> { | 160 | class Shell final : public ServiceFramework<Shell> { |
| 72 | public: | 161 | public: |
| 73 | explicit Shell() : ServiceFramework{"pm:shell"} { | 162 | explicit Shell(const Kernel::KernelCore& kernel) |
| 163 | : ServiceFramework{"pm:shell"}, kernel(kernel) { | ||
| 74 | // clang-format off | 164 | // clang-format off |
| 75 | static const FunctionInfo functions[] = { | 165 | static const FunctionInfo functions[] = { |
| 76 | {0, nullptr, "LaunchProcess"}, | 166 | {0, nullptr, "LaunchProcess"}, |
| @@ -79,21 +169,31 @@ public: | |||
| 79 | {3, nullptr, "GetProcessEventWaiter"}, | 169 | {3, nullptr, "GetProcessEventWaiter"}, |
| 80 | {4, nullptr, "GetProcessEventType"}, | 170 | {4, nullptr, "GetProcessEventType"}, |
| 81 | {5, nullptr, "NotifyBootFinished"}, | 171 | {5, nullptr, "NotifyBootFinished"}, |
| 82 | {6, nullptr, "GetApplicationPid"}, | 172 | {6, &Shell::GetApplicationPid, "GetApplicationPid"}, |
| 83 | {7, nullptr, "BoostSystemMemoryResourceLimit"}, | 173 | {7, nullptr, "BoostSystemMemoryResourceLimit"}, |
| 84 | {8, nullptr, "EnableAdditionalSystemThreads"}, | 174 | {8, nullptr, "EnableAdditionalSystemThreads"}, |
| 175 | {9, nullptr, "GetUnimplementedEventHandle"}, | ||
| 85 | }; | 176 | }; |
| 86 | // clang-format on | 177 | // clang-format on |
| 87 | 178 | ||
| 88 | RegisterHandlers(functions); | 179 | RegisterHandlers(functions); |
| 89 | } | 180 | } |
| 181 | |||
| 182 | private: | ||
| 183 | void GetApplicationPid(Kernel::HLERequestContext& ctx) { | ||
| 184 | LOG_DEBUG(Service_PM, "called"); | ||
| 185 | GetApplicationPidGeneric(ctx, kernel.GetProcessList()); | ||
| 186 | } | ||
| 187 | |||
| 188 | const Kernel::KernelCore& kernel; | ||
| 90 | }; | 189 | }; |
| 91 | 190 | ||
| 92 | void InstallInterfaces(SM::ServiceManager& sm) { | 191 | void InstallInterfaces(Core::System& system) { |
| 93 | std::make_shared<BootMode>()->InstallAsService(sm); | 192 | std::make_shared<BootMode>()->InstallAsService(system.ServiceManager()); |
| 94 | std::make_shared<DebugMonitor>()->InstallAsService(sm); | 193 | std::make_shared<DebugMonitor>(system.Kernel())->InstallAsService(system.ServiceManager()); |
| 95 | std::make_shared<Info>()->InstallAsService(sm); | 194 | std::make_shared<Info>(system.Kernel().GetProcessList()) |
| 96 | std::make_shared<Shell>()->InstallAsService(sm); | 195 | ->InstallAsService(system.ServiceManager()); |
| 196 | std::make_shared<Shell>(system.Kernel())->InstallAsService(system.ServiceManager()); | ||
| 97 | } | 197 | } |
| 98 | 198 | ||
| 99 | } // namespace Service::PM | 199 | } // namespace Service::PM |
diff --git a/src/core/hle/service/pm/pm.h b/src/core/hle/service/pm/pm.h index cc8d3f215..852e7050c 100644 --- a/src/core/hle/service/pm/pm.h +++ b/src/core/hle/service/pm/pm.h | |||
| @@ -4,8 +4,8 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | namespace Service::SM { | 7 | namespace Core { |
| 8 | class ServiceManager; | 8 | class System; |
| 9 | } | 9 | } |
| 10 | 10 | ||
| 11 | namespace Service::PM { | 11 | namespace Service::PM { |
| @@ -16,6 +16,6 @@ enum class SystemBootMode { | |||
| 16 | }; | 16 | }; |
| 17 | 17 | ||
| 18 | /// Registers all PM services with the specified service manager. | 18 | /// Registers all PM services with the specified service manager. |
| 19 | void InstallInterfaces(SM::ServiceManager& service_manager); | 19 | void InstallInterfaces(Core::System& system); |
| 20 | 20 | ||
| 21 | } // namespace Service::PM | 21 | } // namespace Service::PM |
diff --git a/src/core/hle/service/service.cpp b/src/core/hle/service/service.cpp index ec9d755b7..3a0f8c3f6 100644 --- a/src/core/hle/service/service.cpp +++ b/src/core/hle/service/service.cpp | |||
| @@ -195,8 +195,7 @@ ResultCode ServiceFrameworkBase::HandleSyncRequest(Kernel::HLERequestContext& co | |||
| 195 | // Module interface | 195 | // Module interface |
| 196 | 196 | ||
| 197 | /// Initialize ServiceManager | 197 | /// Initialize ServiceManager |
| 198 | void Init(std::shared_ptr<SM::ServiceManager>& sm, Core::System& system, | 198 | void Init(std::shared_ptr<SM::ServiceManager>& sm, Core::System& system) { |
| 199 | FileSys::VfsFilesystem& vfs) { | ||
| 200 | // NVFlinger needs to be accessed by several services like Vi and AppletOE so we instantiate it | 199 | // NVFlinger needs to be accessed by several services like Vi and AppletOE so we instantiate it |
| 201 | // here and pass it into the respective InstallInterfaces functions. | 200 | // here and pass it into the respective InstallInterfaces functions. |
| 202 | auto nv_flinger = std::make_shared<NVFlinger::NVFlinger>(system.CoreTiming()); | 201 | auto nv_flinger = std::make_shared<NVFlinger::NVFlinger>(system.CoreTiming()); |
| @@ -206,8 +205,8 @@ void Init(std::shared_ptr<SM::ServiceManager>& sm, Core::System& system, | |||
| 206 | Account::InstallInterfaces(system); | 205 | Account::InstallInterfaces(system); |
| 207 | AM::InstallInterfaces(*sm, nv_flinger, system); | 206 | AM::InstallInterfaces(*sm, nv_flinger, system); |
| 208 | AOC::InstallInterfaces(*sm); | 207 | AOC::InstallInterfaces(*sm); |
| 209 | APM::InstallInterfaces(*sm); | 208 | APM::InstallInterfaces(system); |
| 210 | Audio::InstallInterfaces(*sm); | 209 | Audio::InstallInterfaces(*sm, system); |
| 211 | BCAT::InstallInterfaces(*sm); | 210 | BCAT::InstallInterfaces(*sm); |
| 212 | BPC::InstallInterfaces(*sm); | 211 | BPC::InstallInterfaces(*sm); |
| 213 | BtDrv::InstallInterfaces(*sm); | 212 | BtDrv::InstallInterfaces(*sm); |
| @@ -218,7 +217,7 @@ void Init(std::shared_ptr<SM::ServiceManager>& sm, Core::System& system, | |||
| 218 | EUPLD::InstallInterfaces(*sm); | 217 | EUPLD::InstallInterfaces(*sm); |
| 219 | Fatal::InstallInterfaces(*sm); | 218 | Fatal::InstallInterfaces(*sm); |
| 220 | FGM::InstallInterfaces(*sm); | 219 | FGM::InstallInterfaces(*sm); |
| 221 | FileSystem::InstallInterfaces(*sm, vfs); | 220 | FileSystem::InstallInterfaces(system); |
| 222 | Friend::InstallInterfaces(*sm); | 221 | Friend::InstallInterfaces(*sm); |
| 223 | Glue::InstallInterfaces(system); | 222 | Glue::InstallInterfaces(system); |
| 224 | GRC::InstallInterfaces(*sm); | 223 | GRC::InstallInterfaces(*sm); |
| @@ -237,19 +236,19 @@ void Init(std::shared_ptr<SM::ServiceManager>& sm, Core::System& system, | |||
| 237 | NIM::InstallInterfaces(*sm); | 236 | NIM::InstallInterfaces(*sm); |
| 238 | NPNS::InstallInterfaces(*sm); | 237 | NPNS::InstallInterfaces(*sm); |
| 239 | NS::InstallInterfaces(*sm); | 238 | NS::InstallInterfaces(*sm); |
| 240 | Nvidia::InstallInterfaces(*sm, *nv_flinger); | 239 | Nvidia::InstallInterfaces(*sm, *nv_flinger, system); |
| 241 | PCIe::InstallInterfaces(*sm); | 240 | PCIe::InstallInterfaces(*sm); |
| 242 | PCTL::InstallInterfaces(*sm); | 241 | PCTL::InstallInterfaces(*sm); |
| 243 | PCV::InstallInterfaces(*sm); | 242 | PCV::InstallInterfaces(*sm); |
| 244 | PlayReport::InstallInterfaces(*sm); | 243 | PlayReport::InstallInterfaces(*sm); |
| 245 | PM::InstallInterfaces(*sm); | 244 | PM::InstallInterfaces(system); |
| 246 | PSC::InstallInterfaces(*sm); | 245 | PSC::InstallInterfaces(*sm); |
| 247 | PSM::InstallInterfaces(*sm); | 246 | PSM::InstallInterfaces(*sm); |
| 248 | Set::InstallInterfaces(*sm); | 247 | Set::InstallInterfaces(*sm); |
| 249 | Sockets::InstallInterfaces(*sm); | 248 | Sockets::InstallInterfaces(*sm); |
| 250 | SPL::InstallInterfaces(*sm); | 249 | SPL::InstallInterfaces(*sm); |
| 251 | SSL::InstallInterfaces(*sm); | 250 | SSL::InstallInterfaces(*sm); |
| 252 | Time::InstallInterfaces(*sm); | 251 | Time::InstallInterfaces(system); |
| 253 | USB::InstallInterfaces(*sm); | 252 | USB::InstallInterfaces(*sm); |
| 254 | VI::InstallInterfaces(*sm, nv_flinger); | 253 | VI::InstallInterfaces(*sm, nv_flinger); |
| 255 | WLAN::InstallInterfaces(*sm); | 254 | WLAN::InstallInterfaces(*sm); |
diff --git a/src/core/hle/service/service.h b/src/core/hle/service/service.h index abbfe5524..c6c4bdae5 100644 --- a/src/core/hle/service/service.h +++ b/src/core/hle/service/service.h | |||
| @@ -182,8 +182,7 @@ private: | |||
| 182 | }; | 182 | }; |
| 183 | 183 | ||
| 184 | /// Initialize ServiceManager | 184 | /// Initialize ServiceManager |
| 185 | void Init(std::shared_ptr<SM::ServiceManager>& sm, Core::System& system, | 185 | void Init(std::shared_ptr<SM::ServiceManager>& sm, Core::System& system); |
| 186 | FileSys::VfsFilesystem& vfs); | ||
| 187 | 186 | ||
| 188 | /// Shutdown ServiceManager | 187 | /// Shutdown ServiceManager |
| 189 | void Shutdown(); | 188 | void Shutdown(); |
diff --git a/src/core/hle/service/set/set.cpp b/src/core/hle/service/set/set.cpp index 298d85011..b54214421 100644 --- a/src/core/hle/service/set/set.cpp +++ b/src/core/hle/service/set/set.cpp | |||
| @@ -95,6 +95,14 @@ void SET::GetAvailableLanguageCodeCount2(Kernel::HLERequestContext& ctx) { | |||
| 95 | PushResponseLanguageCode(ctx, post4_0_0_max_entries); | 95 | PushResponseLanguageCode(ctx, post4_0_0_max_entries); |
| 96 | } | 96 | } |
| 97 | 97 | ||
| 98 | void SET::GetQuestFlag(Kernel::HLERequestContext& ctx) { | ||
| 99 | LOG_DEBUG(Service_SET, "called"); | ||
| 100 | |||
| 101 | IPC::ResponseBuilder rb{ctx, 3}; | ||
| 102 | rb.Push(RESULT_SUCCESS); | ||
| 103 | rb.Push(static_cast<u32>(Settings::values.quest_flag)); | ||
| 104 | } | ||
| 105 | |||
| 98 | void SET::GetLanguageCode(Kernel::HLERequestContext& ctx) { | 106 | void SET::GetLanguageCode(Kernel::HLERequestContext& ctx) { |
| 99 | LOG_DEBUG(Service_SET, "called {}", Settings::values.language_index); | 107 | LOG_DEBUG(Service_SET, "called {}", Settings::values.language_index); |
| 100 | 108 | ||
| @@ -114,7 +122,7 @@ SET::SET() : ServiceFramework("set") { | |||
| 114 | {5, &SET::GetAvailableLanguageCodes2, "GetAvailableLanguageCodes2"}, | 122 | {5, &SET::GetAvailableLanguageCodes2, "GetAvailableLanguageCodes2"}, |
| 115 | {6, &SET::GetAvailableLanguageCodeCount2, "GetAvailableLanguageCodeCount2"}, | 123 | {6, &SET::GetAvailableLanguageCodeCount2, "GetAvailableLanguageCodeCount2"}, |
| 116 | {7, nullptr, "GetKeyCodeMap"}, | 124 | {7, nullptr, "GetKeyCodeMap"}, |
| 117 | {8, nullptr, "GetQuestFlag"}, | 125 | {8, &SET::GetQuestFlag, "GetQuestFlag"}, |
| 118 | {9, nullptr, "GetKeyCodeMap2"}, | 126 | {9, nullptr, "GetKeyCodeMap2"}, |
| 119 | }; | 127 | }; |
| 120 | // clang-format on | 128 | // clang-format on |
diff --git a/src/core/hle/service/set/set.h b/src/core/hle/service/set/set.h index 31f9cb296..b154e08aa 100644 --- a/src/core/hle/service/set/set.h +++ b/src/core/hle/service/set/set.h | |||
| @@ -42,6 +42,7 @@ private: | |||
| 42 | void GetAvailableLanguageCodes2(Kernel::HLERequestContext& ctx); | 42 | void GetAvailableLanguageCodes2(Kernel::HLERequestContext& ctx); |
| 43 | void GetAvailableLanguageCodeCount(Kernel::HLERequestContext& ctx); | 43 | void GetAvailableLanguageCodeCount(Kernel::HLERequestContext& ctx); |
| 44 | void GetAvailableLanguageCodeCount2(Kernel::HLERequestContext& ctx); | 44 | void GetAvailableLanguageCodeCount2(Kernel::HLERequestContext& ctx); |
| 45 | void GetQuestFlag(Kernel::HLERequestContext& ctx); | ||
| 45 | }; | 46 | }; |
| 46 | 47 | ||
| 47 | } // namespace Service::Set | 48 | } // namespace Service::Set |
diff --git a/src/core/hle/service/time/interface.cpp b/src/core/hle/service/time/interface.cpp index 8d122ae33..1030185e0 100644 --- a/src/core/hle/service/time/interface.cpp +++ b/src/core/hle/service/time/interface.cpp | |||
| @@ -6,8 +6,9 @@ | |||
| 6 | 6 | ||
| 7 | namespace Service::Time { | 7 | namespace Service::Time { |
| 8 | 8 | ||
| 9 | Time::Time(std::shared_ptr<Module> time, const char* name) | 9 | Time::Time(std::shared_ptr<Module> time, std::shared_ptr<SharedMemory> shared_memory, |
| 10 | : Module::Interface(std::move(time), name) { | 10 | const char* name) |
| 11 | : Module::Interface(std::move(time), std::move(shared_memory), name) { | ||
| 11 | // clang-format off | 12 | // clang-format off |
| 12 | static const FunctionInfo functions[] = { | 13 | static const FunctionInfo functions[] = { |
| 13 | {0, &Time::GetStandardUserSystemClock, "GetStandardUserSystemClock"}, | 14 | {0, &Time::GetStandardUserSystemClock, "GetStandardUserSystemClock"}, |
| @@ -16,12 +17,12 @@ Time::Time(std::shared_ptr<Module> time, const char* name) | |||
| 16 | {3, &Time::GetTimeZoneService, "GetTimeZoneService"}, | 17 | {3, &Time::GetTimeZoneService, "GetTimeZoneService"}, |
| 17 | {4, &Time::GetStandardLocalSystemClock, "GetStandardLocalSystemClock"}, | 18 | {4, &Time::GetStandardLocalSystemClock, "GetStandardLocalSystemClock"}, |
| 18 | {5, nullptr, "GetEphemeralNetworkSystemClock"}, | 19 | {5, nullptr, "GetEphemeralNetworkSystemClock"}, |
| 19 | {20, nullptr, "GetSharedMemoryNativeHandle"}, | 20 | {20, &Time::GetSharedMemoryNativeHandle, "GetSharedMemoryNativeHandle"}, |
| 20 | {30, nullptr, "GetStandardNetworkClockOperationEventReadableHandle"}, | 21 | {30, nullptr, "GetStandardNetworkClockOperationEventReadableHandle"}, |
| 21 | {31, nullptr, "GetEphemeralNetworkClockOperationEventReadableHandle"}, | 22 | {31, nullptr, "GetEphemeralNetworkClockOperationEventReadableHandle"}, |
| 22 | {50, nullptr, "SetStandardSteadyClockInternalOffset"}, | 23 | {50, nullptr, "SetStandardSteadyClockInternalOffset"}, |
| 23 | {100, nullptr, "IsStandardUserSystemClockAutomaticCorrectionEnabled"}, | 24 | {100, &Time::IsStandardUserSystemClockAutomaticCorrectionEnabled, "IsStandardUserSystemClockAutomaticCorrectionEnabled"}, |
| 24 | {101, nullptr, "SetStandardUserSystemClockAutomaticCorrectionEnabled"}, | 25 | {101, &Time::SetStandardUserSystemClockAutomaticCorrectionEnabled, "SetStandardUserSystemClockAutomaticCorrectionEnabled"}, |
| 25 | {102, nullptr, "GetStandardUserSystemClockInitialYear"}, | 26 | {102, nullptr, "GetStandardUserSystemClockInitialYear"}, |
| 26 | {200, nullptr, "IsStandardNetworkSystemClockAccuracySufficient"}, | 27 | {200, nullptr, "IsStandardNetworkSystemClockAccuracySufficient"}, |
| 27 | {201, nullptr, "GetStandardUserSystemClockAutomaticCorrectionUpdatedTime"}, | 28 | {201, nullptr, "GetStandardUserSystemClockAutomaticCorrectionUpdatedTime"}, |
diff --git a/src/core/hle/service/time/interface.h b/src/core/hle/service/time/interface.h index cd6b44dec..bdf0883e2 100644 --- a/src/core/hle/service/time/interface.h +++ b/src/core/hle/service/time/interface.h | |||
| @@ -8,9 +8,12 @@ | |||
| 8 | 8 | ||
| 9 | namespace Service::Time { | 9 | namespace Service::Time { |
| 10 | 10 | ||
| 11 | class SharedMemory; | ||
| 12 | |||
| 11 | class Time final : public Module::Interface { | 13 | class Time final : public Module::Interface { |
| 12 | public: | 14 | public: |
| 13 | explicit Time(std::shared_ptr<Module> time, const char* name); | 15 | explicit Time(std::shared_ptr<Module> time, std::shared_ptr<SharedMemory> shared_memory, |
| 16 | const char* name); | ||
| 14 | ~Time() override; | 17 | ~Time() override; |
| 15 | }; | 18 | }; |
| 16 | 19 | ||
diff --git a/src/core/hle/service/time/time.cpp b/src/core/hle/service/time/time.cpp index 346bad80d..ae6446204 100644 --- a/src/core/hle/service/time/time.cpp +++ b/src/core/hle/service/time/time.cpp | |||
| @@ -13,6 +13,7 @@ | |||
| 13 | #include "core/hle/kernel/client_session.h" | 13 | #include "core/hle/kernel/client_session.h" |
| 14 | #include "core/hle/service/time/interface.h" | 14 | #include "core/hle/service/time/interface.h" |
| 15 | #include "core/hle/service/time/time.h" | 15 | #include "core/hle/service/time/time.h" |
| 16 | #include "core/hle/service/time/time_sharedmemory.h" | ||
| 16 | #include "core/settings.h" | 17 | #include "core/settings.h" |
| 17 | 18 | ||
| 18 | namespace Service::Time { | 19 | namespace Service::Time { |
| @@ -61,9 +62,18 @@ static u64 CalendarToPosix(const CalendarTime& calendar_time, | |||
| 61 | return static_cast<u64>(epoch_time); | 62 | return static_cast<u64>(epoch_time); |
| 62 | } | 63 | } |
| 63 | 64 | ||
| 65 | enum class ClockContextType { | ||
| 66 | StandardSteady, | ||
| 67 | StandardUserSystem, | ||
| 68 | StandardNetworkSystem, | ||
| 69 | StandardLocalSystem, | ||
| 70 | }; | ||
| 71 | |||
| 64 | class ISystemClock final : public ServiceFramework<ISystemClock> { | 72 | class ISystemClock final : public ServiceFramework<ISystemClock> { |
| 65 | public: | 73 | public: |
| 66 | ISystemClock() : ServiceFramework("ISystemClock") { | 74 | ISystemClock(std::shared_ptr<Service::Time::SharedMemory> shared_memory, |
| 75 | ClockContextType clock_type) | ||
| 76 | : ServiceFramework("ISystemClock"), shared_memory(shared_memory), clock_type(clock_type) { | ||
| 67 | static const FunctionInfo functions[] = { | 77 | static const FunctionInfo functions[] = { |
| 68 | {0, &ISystemClock::GetCurrentTime, "GetCurrentTime"}, | 78 | {0, &ISystemClock::GetCurrentTime, "GetCurrentTime"}, |
| 69 | {1, nullptr, "SetCurrentTime"}, | 79 | {1, nullptr, "SetCurrentTime"}, |
| @@ -72,6 +82,8 @@ public: | |||
| 72 | 82 | ||
| 73 | }; | 83 | }; |
| 74 | RegisterHandlers(functions); | 84 | RegisterHandlers(functions); |
| 85 | |||
| 86 | UpdateSharedMemoryContext(system_clock_context); | ||
| 75 | } | 87 | } |
| 76 | 88 | ||
| 77 | private: | 89 | private: |
| @@ -87,34 +99,63 @@ private: | |||
| 87 | void GetSystemClockContext(Kernel::HLERequestContext& ctx) { | 99 | void GetSystemClockContext(Kernel::HLERequestContext& ctx) { |
| 88 | LOG_WARNING(Service_Time, "(STUBBED) called"); | 100 | LOG_WARNING(Service_Time, "(STUBBED) called"); |
| 89 | 101 | ||
| 90 | SystemClockContext system_clock_ontext{}; | 102 | // TODO(ogniK): This should be updated periodically however since we have it stubbed we'll |
| 103 | // only update when we get a new context | ||
| 104 | UpdateSharedMemoryContext(system_clock_context); | ||
| 105 | |||
| 91 | IPC::ResponseBuilder rb{ctx, (sizeof(SystemClockContext) / 4) + 2}; | 106 | IPC::ResponseBuilder rb{ctx, (sizeof(SystemClockContext) / 4) + 2}; |
| 92 | rb.Push(RESULT_SUCCESS); | 107 | rb.Push(RESULT_SUCCESS); |
| 93 | rb.PushRaw(system_clock_ontext); | 108 | rb.PushRaw(system_clock_context); |
| 94 | } | 109 | } |
| 110 | |||
| 111 | void UpdateSharedMemoryContext(const SystemClockContext& clock_context) { | ||
| 112 | switch (clock_type) { | ||
| 113 | case ClockContextType::StandardLocalSystem: | ||
| 114 | shared_memory->SetStandardLocalSystemClockContext(clock_context); | ||
| 115 | break; | ||
| 116 | case ClockContextType::StandardNetworkSystem: | ||
| 117 | shared_memory->SetStandardNetworkSystemClockContext(clock_context); | ||
| 118 | break; | ||
| 119 | } | ||
| 120 | } | ||
| 121 | |||
| 122 | SystemClockContext system_clock_context{}; | ||
| 123 | std::shared_ptr<Service::Time::SharedMemory> shared_memory; | ||
| 124 | ClockContextType clock_type; | ||
| 95 | }; | 125 | }; |
| 96 | 126 | ||
| 97 | class ISteadyClock final : public ServiceFramework<ISteadyClock> { | 127 | class ISteadyClock final : public ServiceFramework<ISteadyClock> { |
| 98 | public: | 128 | public: |
| 99 | ISteadyClock() : ServiceFramework("ISteadyClock") { | 129 | ISteadyClock(std::shared_ptr<SharedMemory> shared_memory) |
| 130 | : ServiceFramework("ISteadyClock"), shared_memory(shared_memory) { | ||
| 100 | static const FunctionInfo functions[] = { | 131 | static const FunctionInfo functions[] = { |
| 101 | {0, &ISteadyClock::GetCurrentTimePoint, "GetCurrentTimePoint"}, | 132 | {0, &ISteadyClock::GetCurrentTimePoint, "GetCurrentTimePoint"}, |
| 102 | }; | 133 | }; |
| 103 | RegisterHandlers(functions); | 134 | RegisterHandlers(functions); |
| 135 | |||
| 136 | shared_memory->SetStandardSteadyClockTimepoint(GetCurrentTimePoint()); | ||
| 104 | } | 137 | } |
| 105 | 138 | ||
| 106 | private: | 139 | private: |
| 107 | void GetCurrentTimePoint(Kernel::HLERequestContext& ctx) { | 140 | void GetCurrentTimePoint(Kernel::HLERequestContext& ctx) { |
| 108 | LOG_DEBUG(Service_Time, "called"); | 141 | LOG_DEBUG(Service_Time, "called"); |
| 109 | 142 | ||
| 110 | const auto& core_timing = Core::System::GetInstance().CoreTiming(); | 143 | const auto time_point = GetCurrentTimePoint(); |
| 111 | const auto ms = Core::Timing::CyclesToMs(core_timing.GetTicks()); | 144 | // TODO(ogniK): This should be updated periodically |
| 112 | const SteadyClockTimePoint steady_clock_time_point{static_cast<u64_le>(ms.count() / 1000), | 145 | shared_memory->SetStandardSteadyClockTimepoint(time_point); |
| 113 | {}}; | 146 | |
| 114 | IPC::ResponseBuilder rb{ctx, (sizeof(SteadyClockTimePoint) / 4) + 2}; | 147 | IPC::ResponseBuilder rb{ctx, (sizeof(SteadyClockTimePoint) / 4) + 2}; |
| 115 | rb.Push(RESULT_SUCCESS); | 148 | rb.Push(RESULT_SUCCESS); |
| 116 | rb.PushRaw(steady_clock_time_point); | 149 | rb.PushRaw(time_point); |
| 117 | } | 150 | } |
| 151 | |||
| 152 | SteadyClockTimePoint GetCurrentTimePoint() const { | ||
| 153 | const auto& core_timing = Core::System::GetInstance().CoreTiming(); | ||
| 154 | const auto ms = Core::Timing::CyclesToMs(core_timing.GetTicks()); | ||
| 155 | return {static_cast<u64_le>(ms.count() / 1000), {}}; | ||
| 156 | } | ||
| 157 | |||
| 158 | std::shared_ptr<SharedMemory> shared_memory; | ||
| 118 | }; | 159 | }; |
| 119 | 160 | ||
| 120 | class ITimeZoneService final : public ServiceFramework<ITimeZoneService> { | 161 | class ITimeZoneService final : public ServiceFramework<ITimeZoneService> { |
| @@ -233,7 +274,7 @@ void Module::Interface::GetStandardUserSystemClock(Kernel::HLERequestContext& ct | |||
| 233 | 274 | ||
| 234 | IPC::ResponseBuilder rb{ctx, 2, 0, 1}; | 275 | IPC::ResponseBuilder rb{ctx, 2, 0, 1}; |
| 235 | rb.Push(RESULT_SUCCESS); | 276 | rb.Push(RESULT_SUCCESS); |
| 236 | rb.PushIpcInterface<ISystemClock>(); | 277 | rb.PushIpcInterface<ISystemClock>(shared_memory, ClockContextType::StandardUserSystem); |
| 237 | } | 278 | } |
| 238 | 279 | ||
| 239 | void Module::Interface::GetStandardNetworkSystemClock(Kernel::HLERequestContext& ctx) { | 280 | void Module::Interface::GetStandardNetworkSystemClock(Kernel::HLERequestContext& ctx) { |
| @@ -241,7 +282,7 @@ void Module::Interface::GetStandardNetworkSystemClock(Kernel::HLERequestContext& | |||
| 241 | 282 | ||
| 242 | IPC::ResponseBuilder rb{ctx, 2, 0, 1}; | 283 | IPC::ResponseBuilder rb{ctx, 2, 0, 1}; |
| 243 | rb.Push(RESULT_SUCCESS); | 284 | rb.Push(RESULT_SUCCESS); |
| 244 | rb.PushIpcInterface<ISystemClock>(); | 285 | rb.PushIpcInterface<ISystemClock>(shared_memory, ClockContextType::StandardNetworkSystem); |
| 245 | } | 286 | } |
| 246 | 287 | ||
| 247 | void Module::Interface::GetStandardSteadyClock(Kernel::HLERequestContext& ctx) { | 288 | void Module::Interface::GetStandardSteadyClock(Kernel::HLERequestContext& ctx) { |
| @@ -249,7 +290,7 @@ void Module::Interface::GetStandardSteadyClock(Kernel::HLERequestContext& ctx) { | |||
| 249 | 290 | ||
| 250 | IPC::ResponseBuilder rb{ctx, 2, 0, 1}; | 291 | IPC::ResponseBuilder rb{ctx, 2, 0, 1}; |
| 251 | rb.Push(RESULT_SUCCESS); | 292 | rb.Push(RESULT_SUCCESS); |
| 252 | rb.PushIpcInterface<ISteadyClock>(); | 293 | rb.PushIpcInterface<ISteadyClock>(shared_memory); |
| 253 | } | 294 | } |
| 254 | 295 | ||
| 255 | void Module::Interface::GetTimeZoneService(Kernel::HLERequestContext& ctx) { | 296 | void Module::Interface::GetTimeZoneService(Kernel::HLERequestContext& ctx) { |
| @@ -265,7 +306,7 @@ void Module::Interface::GetStandardLocalSystemClock(Kernel::HLERequestContext& c | |||
| 265 | 306 | ||
| 266 | IPC::ResponseBuilder rb{ctx, 2, 0, 1}; | 307 | IPC::ResponseBuilder rb{ctx, 2, 0, 1}; |
| 267 | rb.Push(RESULT_SUCCESS); | 308 | rb.Push(RESULT_SUCCESS); |
| 268 | rb.PushIpcInterface<ISystemClock>(); | 309 | rb.PushIpcInterface<ISystemClock>(shared_memory, ClockContextType::StandardLocalSystem); |
| 269 | } | 310 | } |
| 270 | 311 | ||
| 271 | void Module::Interface::GetClockSnapshot(Kernel::HLERequestContext& ctx) { | 312 | void Module::Interface::GetClockSnapshot(Kernel::HLERequestContext& ctx) { |
| @@ -333,16 +374,52 @@ void Module::Interface::CalculateStandardUserSystemClockDifferenceByUser( | |||
| 333 | rb.PushRaw<u64>(difference); | 374 | rb.PushRaw<u64>(difference); |
| 334 | } | 375 | } |
| 335 | 376 | ||
| 336 | Module::Interface::Interface(std::shared_ptr<Module> time, const char* name) | 377 | void Module::Interface::GetSharedMemoryNativeHandle(Kernel::HLERequestContext& ctx) { |
| 337 | : ServiceFramework(name), time(std::move(time)) {} | 378 | LOG_DEBUG(Service_Time, "called"); |
| 379 | IPC::ResponseBuilder rb{ctx, 2, 1}; | ||
| 380 | rb.Push(RESULT_SUCCESS); | ||
| 381 | rb.PushCopyObjects(shared_memory->GetSharedMemoryHolder()); | ||
| 382 | } | ||
| 383 | |||
| 384 | void Module::Interface::IsStandardUserSystemClockAutomaticCorrectionEnabled( | ||
| 385 | Kernel::HLERequestContext& ctx) { | ||
| 386 | // ogniK(TODO): When clock contexts are implemented, the value should be read from the context | ||
| 387 | // instead of our shared memory holder | ||
| 388 | LOG_DEBUG(Service_Time, "called"); | ||
| 389 | |||
| 390 | IPC::ResponseBuilder rb{ctx, 3}; | ||
| 391 | rb.Push(RESULT_SUCCESS); | ||
| 392 | rb.Push<u8>(shared_memory->GetStandardUserSystemClockAutomaticCorrectionEnabled()); | ||
| 393 | } | ||
| 394 | |||
| 395 | void Module::Interface::SetStandardUserSystemClockAutomaticCorrectionEnabled( | ||
| 396 | Kernel::HLERequestContext& ctx) { | ||
| 397 | IPC::RequestParser rp{ctx}; | ||
| 398 | const auto enabled = rp.Pop<u8>(); | ||
| 399 | |||
| 400 | LOG_WARNING(Service_Time, "(PARTIAL IMPLEMENTATION) called"); | ||
| 401 | |||
| 402 | // TODO(ogniK): Update clock contexts and correct timespans | ||
| 403 | |||
| 404 | shared_memory->SetStandardUserSystemClockAutomaticCorrectionEnabled(enabled > 0); | ||
| 405 | IPC::ResponseBuilder rb{ctx, 2}; | ||
| 406 | rb.Push(RESULT_SUCCESS); | ||
| 407 | } | ||
| 408 | |||
| 409 | Module::Interface::Interface(std::shared_ptr<Module> time, | ||
| 410 | std::shared_ptr<SharedMemory> shared_memory, const char* name) | ||
| 411 | : ServiceFramework(name), time(std::move(time)), shared_memory(std::move(shared_memory)) {} | ||
| 338 | 412 | ||
| 339 | Module::Interface::~Interface() = default; | 413 | Module::Interface::~Interface() = default; |
| 340 | 414 | ||
| 341 | void InstallInterfaces(SM::ServiceManager& service_manager) { | 415 | void InstallInterfaces(Core::System& system) { |
| 342 | auto time = std::make_shared<Module>(); | 416 | auto time = std::make_shared<Module>(); |
| 343 | std::make_shared<Time>(time, "time:a")->InstallAsService(service_manager); | 417 | auto shared_mem = std::make_shared<SharedMemory>(system); |
| 344 | std::make_shared<Time>(time, "time:s")->InstallAsService(service_manager); | 418 | |
| 345 | std::make_shared<Time>(time, "time:u")->InstallAsService(service_manager); | 419 | std::make_shared<Time>(time, shared_mem, "time:a")->InstallAsService(system.ServiceManager()); |
| 420 | std::make_shared<Time>(time, shared_mem, "time:s")->InstallAsService(system.ServiceManager()); | ||
| 421 | std::make_shared<Time>(std::move(time), shared_mem, "time:u") | ||
| 422 | ->InstallAsService(system.ServiceManager()); | ||
| 346 | } | 423 | } |
| 347 | 424 | ||
| 348 | } // namespace Service::Time | 425 | } // namespace Service::Time |
diff --git a/src/core/hle/service/time/time.h b/src/core/hle/service/time/time.h index f11affe95..e0708f856 100644 --- a/src/core/hle/service/time/time.h +++ b/src/core/hle/service/time/time.h | |||
| @@ -10,6 +10,8 @@ | |||
| 10 | 10 | ||
| 11 | namespace Service::Time { | 11 | namespace Service::Time { |
| 12 | 12 | ||
| 13 | class SharedMemory; | ||
| 14 | |||
| 13 | struct LocationName { | 15 | struct LocationName { |
| 14 | std::array<u8, 0x24> name; | 16 | std::array<u8, 0x24> name; |
| 15 | }; | 17 | }; |
| @@ -77,7 +79,8 @@ class Module final { | |||
| 77 | public: | 79 | public: |
| 78 | class Interface : public ServiceFramework<Interface> { | 80 | class Interface : public ServiceFramework<Interface> { |
| 79 | public: | 81 | public: |
| 80 | explicit Interface(std::shared_ptr<Module> time, const char* name); | 82 | explicit Interface(std::shared_ptr<Module> time, |
| 83 | std::shared_ptr<SharedMemory> shared_memory, const char* name); | ||
| 81 | ~Interface() override; | 84 | ~Interface() override; |
| 82 | 85 | ||
| 83 | void GetStandardUserSystemClock(Kernel::HLERequestContext& ctx); | 86 | void GetStandardUserSystemClock(Kernel::HLERequestContext& ctx); |
| @@ -87,13 +90,17 @@ public: | |||
| 87 | void GetStandardLocalSystemClock(Kernel::HLERequestContext& ctx); | 90 | void GetStandardLocalSystemClock(Kernel::HLERequestContext& ctx); |
| 88 | void GetClockSnapshot(Kernel::HLERequestContext& ctx); | 91 | void GetClockSnapshot(Kernel::HLERequestContext& ctx); |
| 89 | void CalculateStandardUserSystemClockDifferenceByUser(Kernel::HLERequestContext& ctx); | 92 | void CalculateStandardUserSystemClockDifferenceByUser(Kernel::HLERequestContext& ctx); |
| 93 | void GetSharedMemoryNativeHandle(Kernel::HLERequestContext& ctx); | ||
| 94 | void IsStandardUserSystemClockAutomaticCorrectionEnabled(Kernel::HLERequestContext& ctx); | ||
| 95 | void SetStandardUserSystemClockAutomaticCorrectionEnabled(Kernel::HLERequestContext& ctx); | ||
| 90 | 96 | ||
| 91 | protected: | 97 | protected: |
| 92 | std::shared_ptr<Module> time; | 98 | std::shared_ptr<Module> time; |
| 99 | std::shared_ptr<SharedMemory> shared_memory; | ||
| 93 | }; | 100 | }; |
| 94 | }; | 101 | }; |
| 95 | 102 | ||
| 96 | /// Registers all Time services with the specified service manager. | 103 | /// Registers all Time services with the specified service manager. |
| 97 | void InstallInterfaces(SM::ServiceManager& service_manager); | 104 | void InstallInterfaces(Core::System& system); |
| 98 | 105 | ||
| 99 | } // namespace Service::Time | 106 | } // namespace Service::Time |
diff --git a/src/core/hle/service/time/time_sharedmemory.cpp b/src/core/hle/service/time/time_sharedmemory.cpp new file mode 100644 index 000000000..bfc81b83c --- /dev/null +++ b/src/core/hle/service/time/time_sharedmemory.cpp | |||
| @@ -0,0 +1,68 @@ | |||
| 1 | // Copyright 2019 yuzu emulator team | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "core/core.h" | ||
| 6 | #include "core/hle/service/time/time_sharedmemory.h" | ||
| 7 | |||
| 8 | namespace Service::Time { | ||
| 9 | const std::size_t SHARED_MEMORY_SIZE = 0x1000; | ||
| 10 | |||
| 11 | SharedMemory::SharedMemory(Core::System& system) : system(system) { | ||
| 12 | shared_memory_holder = Kernel::SharedMemory::Create( | ||
| 13 | system.Kernel(), nullptr, SHARED_MEMORY_SIZE, Kernel::MemoryPermission::ReadWrite, | ||
| 14 | Kernel::MemoryPermission::Read, 0, Kernel::MemoryRegion::BASE, "Time:SharedMemory"); | ||
| 15 | |||
| 16 | // Seems static from 1.0.0 -> 8.1.0. Specific games seem to check this value and crash | ||
| 17 | // if it's set to anything else | ||
| 18 | shared_memory_format.format_version = 14; | ||
| 19 | std::memcpy(shared_memory_holder->GetPointer(), &shared_memory_format, sizeof(Format)); | ||
| 20 | } | ||
| 21 | |||
| 22 | SharedMemory::~SharedMemory() = default; | ||
| 23 | |||
| 24 | Kernel::SharedPtr<Kernel::SharedMemory> SharedMemory::GetSharedMemoryHolder() const { | ||
| 25 | return shared_memory_holder; | ||
| 26 | } | ||
| 27 | |||
| 28 | void SharedMemory::SetStandardSteadyClockTimepoint(const SteadyClockTimePoint& timepoint) { | ||
| 29 | shared_memory_format.standard_steady_clock_timepoint.StoreData( | ||
| 30 | shared_memory_holder->GetPointer(), timepoint); | ||
| 31 | } | ||
| 32 | |||
| 33 | void SharedMemory::SetStandardLocalSystemClockContext(const SystemClockContext& context) { | ||
| 34 | shared_memory_format.standard_local_system_clock_context.StoreData( | ||
| 35 | shared_memory_holder->GetPointer(), context); | ||
| 36 | } | ||
| 37 | |||
| 38 | void SharedMemory::SetStandardNetworkSystemClockContext(const SystemClockContext& context) { | ||
| 39 | shared_memory_format.standard_network_system_clock_context.StoreData( | ||
| 40 | shared_memory_holder->GetPointer(), context); | ||
| 41 | } | ||
| 42 | |||
| 43 | void SharedMemory::SetStandardUserSystemClockAutomaticCorrectionEnabled(bool enabled) { | ||
| 44 | shared_memory_format.standard_user_system_clock_automatic_correction.StoreData( | ||
| 45 | shared_memory_holder->GetPointer(), enabled); | ||
| 46 | } | ||
| 47 | |||
| 48 | SteadyClockTimePoint SharedMemory::GetStandardSteadyClockTimepoint() { | ||
| 49 | return shared_memory_format.standard_steady_clock_timepoint.ReadData( | ||
| 50 | shared_memory_holder->GetPointer()); | ||
| 51 | } | ||
| 52 | |||
| 53 | SystemClockContext SharedMemory::GetStandardLocalSystemClockContext() { | ||
| 54 | return shared_memory_format.standard_local_system_clock_context.ReadData( | ||
| 55 | shared_memory_holder->GetPointer()); | ||
| 56 | } | ||
| 57 | |||
| 58 | SystemClockContext SharedMemory::GetStandardNetworkSystemClockContext() { | ||
| 59 | return shared_memory_format.standard_network_system_clock_context.ReadData( | ||
| 60 | shared_memory_holder->GetPointer()); | ||
| 61 | } | ||
| 62 | |||
| 63 | bool SharedMemory::GetStandardUserSystemClockAutomaticCorrectionEnabled() { | ||
| 64 | return shared_memory_format.standard_user_system_clock_automatic_correction.ReadData( | ||
| 65 | shared_memory_holder->GetPointer()); | ||
| 66 | } | ||
| 67 | |||
| 68 | } // namespace Service::Time | ||
diff --git a/src/core/hle/service/time/time_sharedmemory.h b/src/core/hle/service/time/time_sharedmemory.h new file mode 100644 index 000000000..cb8253541 --- /dev/null +++ b/src/core/hle/service/time/time_sharedmemory.h | |||
| @@ -0,0 +1,74 @@ | |||
| 1 | // Copyright 2019 yuzu emulator team | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "common/common_types.h" | ||
| 8 | #include "core/hle/kernel/shared_memory.h" | ||
| 9 | #include "core/hle/service/time/time.h" | ||
| 10 | |||
| 11 | namespace Service::Time { | ||
| 12 | class SharedMemory { | ||
| 13 | public: | ||
| 14 | explicit SharedMemory(Core::System& system); | ||
| 15 | ~SharedMemory(); | ||
| 16 | |||
| 17 | // Return the shared memory handle | ||
| 18 | Kernel::SharedPtr<Kernel::SharedMemory> GetSharedMemoryHolder() const; | ||
| 19 | |||
| 20 | // Set memory barriers in shared memory and update them | ||
| 21 | void SetStandardSteadyClockTimepoint(const SteadyClockTimePoint& timepoint); | ||
| 22 | void SetStandardLocalSystemClockContext(const SystemClockContext& context); | ||
| 23 | void SetStandardNetworkSystemClockContext(const SystemClockContext& context); | ||
| 24 | void SetStandardUserSystemClockAutomaticCorrectionEnabled(bool enabled); | ||
| 25 | |||
| 26 | // Pull from memory barriers in the shared memory | ||
| 27 | SteadyClockTimePoint GetStandardSteadyClockTimepoint(); | ||
| 28 | SystemClockContext GetStandardLocalSystemClockContext(); | ||
| 29 | SystemClockContext GetStandardNetworkSystemClockContext(); | ||
| 30 | bool GetStandardUserSystemClockAutomaticCorrectionEnabled(); | ||
| 31 | |||
| 32 | // TODO(ogniK): We have to properly simulate memory barriers, how are we going to do this? | ||
| 33 | template <typename T, std::size_t Offset> | ||
| 34 | struct MemoryBarrier { | ||
| 35 | static_assert(std::is_trivially_constructible_v<T>, "T must be trivially constructable"); | ||
| 36 | u32_le read_attempt{}; | ||
| 37 | std::array<T, 2> data{}; | ||
| 38 | |||
| 39 | // These are not actually memory barriers at the moment as we don't have multicore and all | ||
| 40 | // HLE is mutexed. This will need to properly be implemented when we start updating the time | ||
| 41 | // points on threads. As of right now, we'll be updated both values synchronously and just | ||
| 42 | // incrementing the read_attempt to indicate that we waited. | ||
| 43 | void StoreData(u8* shared_memory, T data_to_store) { | ||
| 44 | std::memcpy(this, shared_memory + Offset, sizeof(*this)); | ||
| 45 | read_attempt++; | ||
| 46 | data[read_attempt & 1] = data_to_store; | ||
| 47 | std::memcpy(shared_memory + Offset, this, sizeof(*this)); | ||
| 48 | } | ||
| 49 | |||
| 50 | // For reading we're just going to read the last stored value. If there was no value stored | ||
| 51 | // it will just end up reading an empty value as intended. | ||
| 52 | T ReadData(u8* shared_memory) { | ||
| 53 | std::memcpy(this, shared_memory + Offset, sizeof(*this)); | ||
| 54 | return data[(read_attempt - 1) & 1]; | ||
| 55 | } | ||
| 56 | }; | ||
| 57 | |||
| 58 | // Shared memory format | ||
| 59 | struct Format { | ||
| 60 | MemoryBarrier<SteadyClockTimePoint, 0x0> standard_steady_clock_timepoint; | ||
| 61 | MemoryBarrier<SystemClockContext, 0x38> standard_local_system_clock_context; | ||
| 62 | MemoryBarrier<SystemClockContext, 0x80> standard_network_system_clock_context; | ||
| 63 | MemoryBarrier<bool, 0xc8> standard_user_system_clock_automatic_correction; | ||
| 64 | u32_le format_version; | ||
| 65 | }; | ||
| 66 | static_assert(sizeof(Format) == 0xd8, "Format is an invalid size"); | ||
| 67 | |||
| 68 | private: | ||
| 69 | Kernel::SharedPtr<Kernel::SharedMemory> shared_memory_holder{}; | ||
| 70 | Core::System& system; | ||
| 71 | Format shared_memory_format{}; | ||
| 72 | }; | ||
| 73 | |||
| 74 | } // namespace Service::Time | ||
diff --git a/src/core/hle/service/vi/vi.cpp b/src/core/hle/service/vi/vi.cpp index f1fa6ccd1..199b30635 100644 --- a/src/core/hle/service/vi/vi.cpp +++ b/src/core/hle/service/vi/vi.cpp | |||
| @@ -21,6 +21,7 @@ | |||
| 21 | #include "core/hle/kernel/readable_event.h" | 21 | #include "core/hle/kernel/readable_event.h" |
| 22 | #include "core/hle/kernel/thread.h" | 22 | #include "core/hle/kernel/thread.h" |
| 23 | #include "core/hle/kernel/writable_event.h" | 23 | #include "core/hle/kernel/writable_event.h" |
| 24 | #include "core/hle/service/nvdrv/nvdata.h" | ||
| 24 | #include "core/hle/service/nvdrv/nvdrv.h" | 25 | #include "core/hle/service/nvdrv/nvdrv.h" |
| 25 | #include "core/hle/service/nvflinger/buffer_queue.h" | 26 | #include "core/hle/service/nvflinger/buffer_queue.h" |
| 26 | #include "core/hle/service/nvflinger/nvflinger.h" | 27 | #include "core/hle/service/nvflinger/nvflinger.h" |
| @@ -328,32 +329,22 @@ public: | |||
| 328 | Data data; | 329 | Data data; |
| 329 | }; | 330 | }; |
| 330 | 331 | ||
| 331 | struct BufferProducerFence { | ||
| 332 | u32 is_valid; | ||
| 333 | std::array<Nvidia::IoctlFence, 4> fences; | ||
| 334 | }; | ||
| 335 | static_assert(sizeof(BufferProducerFence) == 36, "BufferProducerFence has wrong size"); | ||
| 336 | |||
| 337 | class IGBPDequeueBufferResponseParcel : public Parcel { | 332 | class IGBPDequeueBufferResponseParcel : public Parcel { |
| 338 | public: | 333 | public: |
| 339 | explicit IGBPDequeueBufferResponseParcel(u32 slot) : slot(slot) {} | 334 | explicit IGBPDequeueBufferResponseParcel(u32 slot, Service::Nvidia::MultiFence& multi_fence) |
| 335 | : slot(slot), multi_fence(multi_fence) {} | ||
| 340 | ~IGBPDequeueBufferResponseParcel() override = default; | 336 | ~IGBPDequeueBufferResponseParcel() override = default; |
| 341 | 337 | ||
| 342 | protected: | 338 | protected: |
| 343 | void SerializeData() override { | 339 | void SerializeData() override { |
| 344 | // TODO(Subv): Find out how this Fence is used. | ||
| 345 | BufferProducerFence fence = {}; | ||
| 346 | fence.is_valid = 1; | ||
| 347 | for (auto& fence_ : fence.fences) | ||
| 348 | fence_.id = -1; | ||
| 349 | |||
| 350 | Write(slot); | 340 | Write(slot); |
| 351 | Write<u32_le>(1); | 341 | Write<u32_le>(1); |
| 352 | WriteObject(fence); | 342 | WriteObject(multi_fence); |
| 353 | Write<u32_le>(0); | 343 | Write<u32_le>(0); |
| 354 | } | 344 | } |
| 355 | 345 | ||
| 356 | u32_le slot; | 346 | u32_le slot; |
| 347 | Service::Nvidia::MultiFence multi_fence; | ||
| 357 | }; | 348 | }; |
| 358 | 349 | ||
| 359 | class IGBPRequestBufferRequestParcel : public Parcel { | 350 | class IGBPRequestBufferRequestParcel : public Parcel { |
| @@ -400,12 +391,6 @@ public: | |||
| 400 | data = Read<Data>(); | 391 | data = Read<Data>(); |
| 401 | } | 392 | } |
| 402 | 393 | ||
| 403 | struct Fence { | ||
| 404 | u32_le id; | ||
| 405 | u32_le value; | ||
| 406 | }; | ||
| 407 | static_assert(sizeof(Fence) == 8, "Fence has wrong size"); | ||
| 408 | |||
| 409 | struct Data { | 394 | struct Data { |
| 410 | u32_le slot; | 395 | u32_le slot; |
| 411 | INSERT_PADDING_WORDS(3); | 396 | INSERT_PADDING_WORDS(3); |
| @@ -418,15 +403,15 @@ public: | |||
| 418 | s32_le scaling_mode; | 403 | s32_le scaling_mode; |
| 419 | NVFlinger::BufferQueue::BufferTransformFlags transform; | 404 | NVFlinger::BufferQueue::BufferTransformFlags transform; |
| 420 | u32_le sticky_transform; | 405 | u32_le sticky_transform; |
| 421 | INSERT_PADDING_WORDS(2); | 406 | INSERT_PADDING_WORDS(1); |
| 422 | u32_le fence_is_valid; | 407 | u32_le swap_interval; |
| 423 | std::array<Fence, 2> fences; | 408 | Service::Nvidia::MultiFence multi_fence; |
| 424 | 409 | ||
| 425 | Common::Rectangle<int> GetCropRect() const { | 410 | Common::Rectangle<int> GetCropRect() const { |
| 426 | return {crop_left, crop_top, crop_right, crop_bottom}; | 411 | return {crop_left, crop_top, crop_right, crop_bottom}; |
| 427 | } | 412 | } |
| 428 | }; | 413 | }; |
| 429 | static_assert(sizeof(Data) == 80, "ParcelData has wrong size"); | 414 | static_assert(sizeof(Data) == 96, "ParcelData has wrong size"); |
| 430 | 415 | ||
| 431 | Data data; | 416 | Data data; |
| 432 | }; | 417 | }; |
| @@ -547,11 +532,11 @@ private: | |||
| 547 | IGBPDequeueBufferRequestParcel request{ctx.ReadBuffer()}; | 532 | IGBPDequeueBufferRequestParcel request{ctx.ReadBuffer()}; |
| 548 | const u32 width{request.data.width}; | 533 | const u32 width{request.data.width}; |
| 549 | const u32 height{request.data.height}; | 534 | const u32 height{request.data.height}; |
| 550 | std::optional<u32> slot = buffer_queue.DequeueBuffer(width, height); | 535 | auto result = buffer_queue.DequeueBuffer(width, height); |
| 551 | 536 | ||
| 552 | if (slot) { | 537 | if (result) { |
| 553 | // Buffer is available | 538 | // Buffer is available |
| 554 | IGBPDequeueBufferResponseParcel response{*slot}; | 539 | IGBPDequeueBufferResponseParcel response{result->first, *result->second}; |
| 555 | ctx.WriteBuffer(response.Serialize()); | 540 | ctx.WriteBuffer(response.Serialize()); |
| 556 | } else { | 541 | } else { |
| 557 | // Wait the current thread until a buffer becomes available | 542 | // Wait the current thread until a buffer becomes available |
| @@ -561,10 +546,10 @@ private: | |||
| 561 | Kernel::ThreadWakeupReason reason) { | 546 | Kernel::ThreadWakeupReason reason) { |
| 562 | // Repeat TransactParcel DequeueBuffer when a buffer is available | 547 | // Repeat TransactParcel DequeueBuffer when a buffer is available |
| 563 | auto& buffer_queue = nv_flinger->FindBufferQueue(id); | 548 | auto& buffer_queue = nv_flinger->FindBufferQueue(id); |
| 564 | std::optional<u32> slot = buffer_queue.DequeueBuffer(width, height); | 549 | auto result = buffer_queue.DequeueBuffer(width, height); |
| 565 | ASSERT_MSG(slot != std::nullopt, "Could not dequeue buffer."); | 550 | ASSERT_MSG(result != std::nullopt, "Could not dequeue buffer."); |
| 566 | 551 | ||
| 567 | IGBPDequeueBufferResponseParcel response{*slot}; | 552 | IGBPDequeueBufferResponseParcel response{result->first, *result->second}; |
| 568 | ctx.WriteBuffer(response.Serialize()); | 553 | ctx.WriteBuffer(response.Serialize()); |
| 569 | IPC::ResponseBuilder rb{ctx, 2}; | 554 | IPC::ResponseBuilder rb{ctx, 2}; |
| 570 | rb.Push(RESULT_SUCCESS); | 555 | rb.Push(RESULT_SUCCESS); |
| @@ -582,7 +567,8 @@ private: | |||
| 582 | IGBPQueueBufferRequestParcel request{ctx.ReadBuffer()}; | 567 | IGBPQueueBufferRequestParcel request{ctx.ReadBuffer()}; |
| 583 | 568 | ||
| 584 | buffer_queue.QueueBuffer(request.data.slot, request.data.transform, | 569 | buffer_queue.QueueBuffer(request.data.slot, request.data.transform, |
| 585 | request.data.GetCropRect()); | 570 | request.data.GetCropRect(), request.data.swap_interval, |
| 571 | request.data.multi_fence); | ||
| 586 | 572 | ||
| 587 | IGBPQueueBufferResponseParcel response{1280, 720}; | 573 | IGBPQueueBufferResponseParcel response{1280, 720}; |
| 588 | ctx.WriteBuffer(response.Serialize()); | 574 | ctx.WriteBuffer(response.Serialize()); |
diff --git a/src/core/loader/elf.cpp b/src/core/loader/elf.cpp index 6d4b02375..f1795fdd6 100644 --- a/src/core/loader/elf.cpp +++ b/src/core/loader/elf.cpp | |||
| @@ -295,7 +295,7 @@ Kernel::CodeSet ElfReader::LoadInto(VAddr vaddr) { | |||
| 295 | } | 295 | } |
| 296 | } | 296 | } |
| 297 | 297 | ||
| 298 | std::vector<u8> program_image(total_image_size); | 298 | Kernel::PhysicalMemory program_image(total_image_size); |
| 299 | std::size_t current_image_position = 0; | 299 | std::size_t current_image_position = 0; |
| 300 | 300 | ||
| 301 | Kernel::CodeSet codeset; | 301 | Kernel::CodeSet codeset; |
diff --git a/src/core/loader/kip.cpp b/src/core/loader/kip.cpp index 70051c13a..474b55cb1 100644 --- a/src/core/loader/kip.cpp +++ b/src/core/loader/kip.cpp | |||
| @@ -69,7 +69,7 @@ AppLoader::LoadResult AppLoader_KIP::Load(Kernel::Process& process) { | |||
| 69 | 69 | ||
| 70 | const VAddr base_address = process.VMManager().GetCodeRegionBaseAddress(); | 70 | const VAddr base_address = process.VMManager().GetCodeRegionBaseAddress(); |
| 71 | Kernel::CodeSet codeset; | 71 | Kernel::CodeSet codeset; |
| 72 | std::vector<u8> program_image; | 72 | Kernel::PhysicalMemory program_image; |
| 73 | 73 | ||
| 74 | const auto load_segment = [&program_image](Kernel::CodeSet::Segment& segment, | 74 | const auto load_segment = [&program_image](Kernel::CodeSet::Segment& segment, |
| 75 | const std::vector<u8>& data, u32 offset) { | 75 | const std::vector<u8>& data, u32 offset) { |
diff --git a/src/core/loader/nro.cpp b/src/core/loader/nro.cpp index 6a0ca389b..3a5361fdd 100644 --- a/src/core/loader/nro.cpp +++ b/src/core/loader/nro.cpp | |||
| @@ -143,7 +143,7 @@ static bool LoadNroImpl(Kernel::Process& process, const std::vector<u8>& data, | |||
| 143 | } | 143 | } |
| 144 | 144 | ||
| 145 | // Build program image | 145 | // Build program image |
| 146 | std::vector<u8> program_image(PageAlignSize(nro_header.file_size)); | 146 | Kernel::PhysicalMemory program_image(PageAlignSize(nro_header.file_size)); |
| 147 | std::memcpy(program_image.data(), data.data(), program_image.size()); | 147 | std::memcpy(program_image.data(), data.data(), program_image.size()); |
| 148 | if (program_image.size() != PageAlignSize(nro_header.file_size)) { | 148 | if (program_image.size() != PageAlignSize(nro_header.file_size)) { |
| 149 | return {}; | 149 | return {}; |
| @@ -258,6 +258,15 @@ ResultStatus AppLoader_NRO::ReadTitle(std::string& title) { | |||
| 258 | return ResultStatus::Success; | 258 | return ResultStatus::Success; |
| 259 | } | 259 | } |
| 260 | 260 | ||
| 261 | ResultStatus AppLoader_NRO::ReadControlData(FileSys::NACP& control) { | ||
| 262 | if (nacp == nullptr) { | ||
| 263 | return ResultStatus::ErrorNoControl; | ||
| 264 | } | ||
| 265 | |||
| 266 | control = *nacp; | ||
| 267 | return ResultStatus::Success; | ||
| 268 | } | ||
| 269 | |||
| 261 | bool AppLoader_NRO::IsRomFSUpdatable() const { | 270 | bool AppLoader_NRO::IsRomFSUpdatable() const { |
| 262 | return false; | 271 | return false; |
| 263 | } | 272 | } |
diff --git a/src/core/loader/nro.h b/src/core/loader/nro.h index 1ffdae805..71811bc29 100644 --- a/src/core/loader/nro.h +++ b/src/core/loader/nro.h | |||
| @@ -43,6 +43,7 @@ public: | |||
| 43 | ResultStatus ReadProgramId(u64& out_program_id) override; | 43 | ResultStatus ReadProgramId(u64& out_program_id) override; |
| 44 | ResultStatus ReadRomFS(FileSys::VirtualFile& dir) override; | 44 | ResultStatus ReadRomFS(FileSys::VirtualFile& dir) override; |
| 45 | ResultStatus ReadTitle(std::string& title) override; | 45 | ResultStatus ReadTitle(std::string& title) override; |
| 46 | ResultStatus ReadControlData(FileSys::NACP& control) override; | ||
| 46 | bool IsRomFSUpdatable() const override; | 47 | bool IsRomFSUpdatable() const override; |
| 47 | 48 | ||
| 48 | private: | 49 | private: |
diff --git a/src/core/loader/nso.cpp b/src/core/loader/nso.cpp index 29311404a..70c90109f 100644 --- a/src/core/loader/nso.cpp +++ b/src/core/loader/nso.cpp | |||
| @@ -89,7 +89,7 @@ std::optional<VAddr> AppLoader_NSO::LoadModule(Kernel::Process& process, | |||
| 89 | 89 | ||
| 90 | // Build program image | 90 | // Build program image |
| 91 | Kernel::CodeSet codeset; | 91 | Kernel::CodeSet codeset; |
| 92 | std::vector<u8> program_image; | 92 | Kernel::PhysicalMemory program_image; |
| 93 | for (std::size_t i = 0; i < nso_header.segments.size(); ++i) { | 93 | for (std::size_t i = 0; i < nso_header.segments.size(); ++i) { |
| 94 | std::vector<u8> data = | 94 | std::vector<u8> data = |
| 95 | file.ReadBytes(nso_header.segments_compressed_size[i], nso_header.segments[i].offset); | 95 | file.ReadBytes(nso_header.segments_compressed_size[i], nso_header.segments[i].offset); |
diff --git a/src/core/memory.cpp b/src/core/memory.cpp index f18f6226b..8555691c0 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp | |||
| @@ -16,11 +16,9 @@ | |||
| 16 | #include "core/core.h" | 16 | #include "core/core.h" |
| 17 | #include "core/hle/kernel/process.h" | 17 | #include "core/hle/kernel/process.h" |
| 18 | #include "core/hle/kernel/vm_manager.h" | 18 | #include "core/hle/kernel/vm_manager.h" |
| 19 | #include "core/hle/lock.h" | ||
| 20 | #include "core/memory.h" | 19 | #include "core/memory.h" |
| 21 | #include "core/memory_setup.h" | 20 | #include "core/memory_setup.h" |
| 22 | #include "video_core/gpu.h" | 21 | #include "video_core/gpu.h" |
| 23 | #include "video_core/renderer_base.h" | ||
| 24 | 22 | ||
| 25 | namespace Memory { | 23 | namespace Memory { |
| 26 | 24 | ||
diff --git a/src/core/memory.h b/src/core/memory.h index 04e2c5f1d..09008e1dd 100644 --- a/src/core/memory.h +++ b/src/core/memory.h | |||
| @@ -8,10 +8,6 @@ | |||
| 8 | #include <string> | 8 | #include <string> |
| 9 | #include "common/common_types.h" | 9 | #include "common/common_types.h" |
| 10 | 10 | ||
| 11 | namespace Common { | ||
| 12 | struct PageTable; | ||
| 13 | } | ||
| 14 | |||
| 15 | namespace Kernel { | 11 | namespace Kernel { |
| 16 | class Process; | 12 | class Process; |
| 17 | } | 13 | } |
diff --git a/src/core/reporter.cpp b/src/core/reporter.cpp index 774022569..cfe0771e2 100644 --- a/src/core/reporter.cpp +++ b/src/core/reporter.cpp | |||
| @@ -2,8 +2,13 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <ctime> | ||
| 5 | #include <fstream> | 6 | #include <fstream> |
| 7 | |||
| 8 | #include <fmt/chrono.h> | ||
| 9 | #include <fmt/format.h> | ||
| 6 | #include <json.hpp> | 10 | #include <json.hpp> |
| 11 | |||
| 7 | #include "common/file_util.h" | 12 | #include "common/file_util.h" |
| 8 | #include "common/hex_util.h" | 13 | #include "common/hex_util.h" |
| 9 | #include "common/scm_rev.h" | 14 | #include "common/scm_rev.h" |
| @@ -14,7 +19,6 @@ | |||
| 14 | #include "core/hle/result.h" | 19 | #include "core/hle/result.h" |
| 15 | #include "core/reporter.h" | 20 | #include "core/reporter.h" |
| 16 | #include "core/settings.h" | 21 | #include "core/settings.h" |
| 17 | #include "fmt/time.h" | ||
| 18 | 22 | ||
| 19 | namespace { | 23 | namespace { |
| 20 | 24 | ||
| @@ -30,9 +34,11 @@ std::string GetTimestamp() { | |||
| 30 | 34 | ||
| 31 | using namespace nlohmann; | 35 | using namespace nlohmann; |
| 32 | 36 | ||
| 33 | void SaveToFile(const json& json, const std::string& filename) { | 37 | void SaveToFile(json json, const std::string& filename) { |
| 34 | if (!FileUtil::CreateFullPath(filename)) | 38 | if (!FileUtil::CreateFullPath(filename)) { |
| 35 | LOG_ERROR(Core, "Failed to create path for '{}' to save report!", filename); | 39 | LOG_ERROR(Core, "Failed to create path for '{}' to save report!", filename); |
| 40 | return; | ||
| 41 | } | ||
| 36 | 42 | ||
| 37 | std::ofstream file( | 43 | std::ofstream file( |
| 38 | FileUtil::SanitizePath(filename, FileUtil::DirectorySeparator::PlatformDefault)); | 44 | FileUtil::SanitizePath(filename, FileUtil::DirectorySeparator::PlatformDefault)); |
| @@ -61,8 +67,11 @@ json GetReportCommonData(u64 title_id, ResultCode result, const std::string& tim | |||
| 61 | {"result_description", fmt::format("{:08X}", result.description.Value())}, | 67 | {"result_description", fmt::format("{:08X}", result.description.Value())}, |
| 62 | {"timestamp", timestamp}, | 68 | {"timestamp", timestamp}, |
| 63 | }; | 69 | }; |
| 64 | if (user_id.has_value()) | 70 | |
| 71 | if (user_id.has_value()) { | ||
| 65 | out["user_id"] = fmt::format("{:016X}{:016X}", (*user_id)[1], (*user_id)[0]); | 72 | out["user_id"] = fmt::format("{:016X}{:016X}", (*user_id)[1], (*user_id)[0]); |
| 73 | } | ||
| 74 | |||
| 66 | return out; | 75 | return out; |
| 67 | } | 76 | } |
| 68 | 77 | ||
| @@ -171,14 +180,14 @@ json GetHLERequestContextData(Kernel::HLERequestContext& ctx) { | |||
| 171 | out["buffer_descriptor_c"] = GetHLEBufferDescriptorData<false>(ctx.BufferDescriptorC()); | 180 | out["buffer_descriptor_c"] = GetHLEBufferDescriptorData<false>(ctx.BufferDescriptorC()); |
| 172 | out["buffer_descriptor_x"] = GetHLEBufferDescriptorData<true>(ctx.BufferDescriptorX()); | 181 | out["buffer_descriptor_x"] = GetHLEBufferDescriptorData<true>(ctx.BufferDescriptorX()); |
| 173 | 182 | ||
| 174 | return std::move(out); | 183 | return out; |
| 175 | } | 184 | } |
| 176 | 185 | ||
| 177 | } // Anonymous namespace | 186 | } // Anonymous namespace |
| 178 | 187 | ||
| 179 | namespace Core { | 188 | namespace Core { |
| 180 | 189 | ||
| 181 | Reporter::Reporter(Core::System& system) : system(system) {} | 190 | Reporter::Reporter(System& system) : system(system) {} |
| 182 | 191 | ||
| 183 | Reporter::~Reporter() = default; | 192 | Reporter::~Reporter() = default; |
| 184 | 193 | ||
| @@ -187,8 +196,9 @@ void Reporter::SaveCrashReport(u64 title_id, ResultCode result, u64 set_flags, u | |||
| 187 | const std::array<u64, 31>& registers, | 196 | const std::array<u64, 31>& registers, |
| 188 | const std::array<u64, 32>& backtrace, u32 backtrace_size, | 197 | const std::array<u64, 32>& backtrace, u32 backtrace_size, |
| 189 | const std::string& arch, u32 unk10) const { | 198 | const std::string& arch, u32 unk10) const { |
| 190 | if (!IsReportingEnabled()) | 199 | if (!IsReportingEnabled()) { |
| 191 | return; | 200 | return; |
| 201 | } | ||
| 192 | 202 | ||
| 193 | const auto timestamp = GetTimestamp(); | 203 | const auto timestamp = GetTimestamp(); |
| 194 | json out; | 204 | json out; |
| @@ -212,8 +222,9 @@ void Reporter::SaveCrashReport(u64 title_id, ResultCode result, u64 set_flags, u | |||
| 212 | 222 | ||
| 213 | void Reporter::SaveSvcBreakReport(u32 type, bool signal_debugger, u64 info1, u64 info2, | 223 | void Reporter::SaveSvcBreakReport(u32 type, bool signal_debugger, u64 info1, u64 info2, |
| 214 | std::optional<std::vector<u8>> resolved_buffer) const { | 224 | std::optional<std::vector<u8>> resolved_buffer) const { |
| 215 | if (!IsReportingEnabled()) | 225 | if (!IsReportingEnabled()) { |
| 216 | return; | 226 | return; |
| 227 | } | ||
| 217 | 228 | ||
| 218 | const auto timestamp = GetTimestamp(); | 229 | const auto timestamp = GetTimestamp(); |
| 219 | const auto title_id = system.CurrentProcess()->GetTitleID(); | 230 | const auto title_id = system.CurrentProcess()->GetTitleID(); |
| @@ -238,8 +249,9 @@ void Reporter::SaveSvcBreakReport(u32 type, bool signal_debugger, u64 info1, u64 | |||
| 238 | void Reporter::SaveUnimplementedFunctionReport(Kernel::HLERequestContext& ctx, u32 command_id, | 249 | void Reporter::SaveUnimplementedFunctionReport(Kernel::HLERequestContext& ctx, u32 command_id, |
| 239 | const std::string& name, | 250 | const std::string& name, |
| 240 | const std::string& service_name) const { | 251 | const std::string& service_name) const { |
| 241 | if (!IsReportingEnabled()) | 252 | if (!IsReportingEnabled()) { |
| 242 | return; | 253 | return; |
| 254 | } | ||
| 243 | 255 | ||
| 244 | const auto timestamp = GetTimestamp(); | 256 | const auto timestamp = GetTimestamp(); |
| 245 | const auto title_id = system.CurrentProcess()->GetTitleID(); | 257 | const auto title_id = system.CurrentProcess()->GetTitleID(); |
| @@ -259,8 +271,9 @@ void Reporter::SaveUnimplementedAppletReport( | |||
| 259 | u32 applet_id, u32 common_args_version, u32 library_version, u32 theme_color, | 271 | u32 applet_id, u32 common_args_version, u32 library_version, u32 theme_color, |
| 260 | bool startup_sound, u64 system_tick, std::vector<std::vector<u8>> normal_channel, | 272 | bool startup_sound, u64 system_tick, std::vector<std::vector<u8>> normal_channel, |
| 261 | std::vector<std::vector<u8>> interactive_channel) const { | 273 | std::vector<std::vector<u8>> interactive_channel) const { |
| 262 | if (!IsReportingEnabled()) | 274 | if (!IsReportingEnabled()) { |
| 263 | return; | 275 | return; |
| 276 | } | ||
| 264 | 277 | ||
| 265 | const auto timestamp = GetTimestamp(); | 278 | const auto timestamp = GetTimestamp(); |
| 266 | const auto title_id = system.CurrentProcess()->GetTitleID(); | 279 | const auto title_id = system.CurrentProcess()->GetTitleID(); |
| @@ -293,8 +306,9 @@ void Reporter::SaveUnimplementedAppletReport( | |||
| 293 | 306 | ||
| 294 | void Reporter::SavePlayReport(u64 title_id, u64 process_id, std::vector<std::vector<u8>> data, | 307 | void Reporter::SavePlayReport(u64 title_id, u64 process_id, std::vector<std::vector<u8>> data, |
| 295 | std::optional<u128> user_id) const { | 308 | std::optional<u128> user_id) const { |
| 296 | if (!IsReportingEnabled()) | 309 | if (!IsReportingEnabled()) { |
| 297 | return; | 310 | return; |
| 311 | } | ||
| 298 | 312 | ||
| 299 | const auto timestamp = GetTimestamp(); | 313 | const auto timestamp = GetTimestamp(); |
| 300 | json out; | 314 | json out; |
| @@ -316,8 +330,9 @@ void Reporter::SavePlayReport(u64 title_id, u64 process_id, std::vector<std::vec | |||
| 316 | void Reporter::SaveErrorReport(u64 title_id, ResultCode result, | 330 | void Reporter::SaveErrorReport(u64 title_id, ResultCode result, |
| 317 | std::optional<std::string> custom_text_main, | 331 | std::optional<std::string> custom_text_main, |
| 318 | std::optional<std::string> custom_text_detail) const { | 332 | std::optional<std::string> custom_text_detail) const { |
| 319 | if (!IsReportingEnabled()) | 333 | if (!IsReportingEnabled()) { |
| 320 | return; | 334 | return; |
| 335 | } | ||
| 321 | 336 | ||
| 322 | const auto timestamp = GetTimestamp(); | 337 | const auto timestamp = GetTimestamp(); |
| 323 | json out; | 338 | json out; |
| @@ -335,12 +350,31 @@ void Reporter::SaveErrorReport(u64 title_id, ResultCode result, | |||
| 335 | SaveToFile(std::move(out), GetPath("error_report", title_id, timestamp)); | 350 | SaveToFile(std::move(out), GetPath("error_report", title_id, timestamp)); |
| 336 | } | 351 | } |
| 337 | 352 | ||
| 338 | void Reporter::SaveUserReport() const { | 353 | void Reporter::SaveFilesystemAccessReport(Service::FileSystem::LogMode log_mode, |
| 354 | std::string log_message) const { | ||
| 339 | if (!IsReportingEnabled()) | 355 | if (!IsReportingEnabled()) |
| 340 | return; | 356 | return; |
| 341 | 357 | ||
| 342 | const auto timestamp = GetTimestamp(); | 358 | const auto timestamp = GetTimestamp(); |
| 343 | const auto title_id = system.CurrentProcess()->GetTitleID(); | 359 | const auto title_id = system.CurrentProcess()->GetTitleID(); |
| 360 | json out; | ||
| 361 | |||
| 362 | out["yuzu_version"] = GetYuzuVersionData(); | ||
| 363 | out["report_common"] = GetReportCommonData(title_id, RESULT_SUCCESS, timestamp); | ||
| 364 | |||
| 365 | out["log_mode"] = fmt::format("{:08X}", static_cast<u32>(log_mode)); | ||
| 366 | out["log_message"] = std::move(log_message); | ||
| 367 | |||
| 368 | SaveToFile(std::move(out), GetPath("filesystem_access_report", title_id, timestamp)); | ||
| 369 | } | ||
| 370 | |||
| 371 | void Reporter::SaveUserReport() const { | ||
| 372 | if (!IsReportingEnabled()) { | ||
| 373 | return; | ||
| 374 | } | ||
| 375 | |||
| 376 | const auto timestamp = GetTimestamp(); | ||
| 377 | const auto title_id = system.CurrentProcess()->GetTitleID(); | ||
| 344 | 378 | ||
| 345 | SaveToFile(GetFullDataAuto(timestamp, title_id, system), | 379 | SaveToFile(GetFullDataAuto(timestamp, title_id, system), |
| 346 | GetPath("user_report", title_id, timestamp)); | 380 | GetPath("user_report", title_id, timestamp)); |
diff --git a/src/core/reporter.h b/src/core/reporter.h index 3de19c0f7..44256de50 100644 --- a/src/core/reporter.h +++ b/src/core/reporter.h | |||
| @@ -4,7 +4,9 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <array> | ||
| 7 | #include <optional> | 8 | #include <optional> |
| 9 | #include <string> | ||
| 8 | #include <vector> | 10 | #include <vector> |
| 9 | #include "common/common_types.h" | 11 | #include "common/common_types.h" |
| 10 | 12 | ||
| @@ -14,11 +16,17 @@ namespace Kernel { | |||
| 14 | class HLERequestContext; | 16 | class HLERequestContext; |
| 15 | } // namespace Kernel | 17 | } // namespace Kernel |
| 16 | 18 | ||
| 19 | namespace Service::FileSystem { | ||
| 20 | enum class LogMode : u32; | ||
| 21 | } | ||
| 22 | |||
| 17 | namespace Core { | 23 | namespace Core { |
| 18 | 24 | ||
| 25 | class System; | ||
| 26 | |||
| 19 | class Reporter { | 27 | class Reporter { |
| 20 | public: | 28 | public: |
| 21 | explicit Reporter(Core::System& system); | 29 | explicit Reporter(System& system); |
| 22 | ~Reporter(); | 30 | ~Reporter(); |
| 23 | 31 | ||
| 24 | void SaveCrashReport(u64 title_id, ResultCode result, u64 set_flags, u64 entry_point, u64 sp, | 32 | void SaveCrashReport(u64 title_id, ResultCode result, u64 set_flags, u64 entry_point, u64 sp, |
| @@ -45,12 +53,15 @@ public: | |||
| 45 | std::optional<std::string> custom_text_main = {}, | 53 | std::optional<std::string> custom_text_main = {}, |
| 46 | std::optional<std::string> custom_text_detail = {}) const; | 54 | std::optional<std::string> custom_text_detail = {}) const; |
| 47 | 55 | ||
| 56 | void SaveFilesystemAccessReport(Service::FileSystem::LogMode log_mode, | ||
| 57 | std::string log_message) const; | ||
| 58 | |||
| 48 | void SaveUserReport() const; | 59 | void SaveUserReport() const; |
| 49 | 60 | ||
| 50 | private: | 61 | private: |
| 51 | bool IsReportingEnabled() const; | 62 | bool IsReportingEnabled() const; |
| 52 | 63 | ||
| 53 | Core::System& system; | 64 | System& system; |
| 54 | }; | 65 | }; |
| 55 | 66 | ||
| 56 | } // namespace Core | 67 | } // namespace Core |
diff --git a/src/core/settings.cpp b/src/core/settings.cpp index 6d32ebea3..0dd1632ac 100644 --- a/src/core/settings.cpp +++ b/src/core/settings.cpp | |||
| @@ -85,7 +85,6 @@ void LogSettings() { | |||
| 85 | LogSetting("System_RngSeed", Settings::values.rng_seed.value_or(0)); | 85 | LogSetting("System_RngSeed", Settings::values.rng_seed.value_or(0)); |
| 86 | LogSetting("System_CurrentUser", Settings::values.current_user); | 86 | LogSetting("System_CurrentUser", Settings::values.current_user); |
| 87 | LogSetting("System_LanguageIndex", Settings::values.language_index); | 87 | LogSetting("System_LanguageIndex", Settings::values.language_index); |
| 88 | LogSetting("Core_UseCpuJit", Settings::values.use_cpu_jit); | ||
| 89 | LogSetting("Core_UseMultiCore", Settings::values.use_multi_core); | 88 | LogSetting("Core_UseMultiCore", Settings::values.use_multi_core); |
| 90 | LogSetting("Renderer_UseResolutionFactor", Settings::values.resolution_factor); | 89 | LogSetting("Renderer_UseResolutionFactor", Settings::values.resolution_factor); |
| 91 | LogSetting("Renderer_UseFrameLimit", Settings::values.use_frame_limit); | 90 | LogSetting("Renderer_UseFrameLimit", Settings::values.use_frame_limit); |
diff --git a/src/core/settings.h b/src/core/settings.h index e2ffcaaf7..6638ce8f9 100644 --- a/src/core/settings.h +++ b/src/core/settings.h | |||
| @@ -378,7 +378,6 @@ struct Values { | |||
| 378 | std::atomic_bool is_device_reload_pending{true}; | 378 | std::atomic_bool is_device_reload_pending{true}; |
| 379 | 379 | ||
| 380 | // Core | 380 | // Core |
| 381 | bool use_cpu_jit; | ||
| 382 | bool use_multi_core; | 381 | bool use_multi_core; |
| 383 | 382 | ||
| 384 | // Data Storage | 383 | // Data Storage |
| @@ -416,6 +415,7 @@ struct Values { | |||
| 416 | bool dump_exefs; | 415 | bool dump_exefs; |
| 417 | bool dump_nso; | 416 | bool dump_nso; |
| 418 | bool reporting_services; | 417 | bool reporting_services; |
| 418 | bool quest_flag; | ||
| 419 | 419 | ||
| 420 | // WebService | 420 | // WebService |
| 421 | bool enable_telemetry; | 421 | bool enable_telemetry; |
diff --git a/src/core/telemetry_session.cpp b/src/core/telemetry_session.cpp index 90d06830f..793d102d3 100644 --- a/src/core/telemetry_session.cpp +++ b/src/core/telemetry_session.cpp | |||
| @@ -168,7 +168,6 @@ void TelemetrySession::AddInitialInfo(Loader::AppLoader& app_loader) { | |||
| 168 | AddField(Telemetry::FieldType::UserConfig, "Audio_SinkId", Settings::values.sink_id); | 168 | AddField(Telemetry::FieldType::UserConfig, "Audio_SinkId", Settings::values.sink_id); |
| 169 | AddField(Telemetry::FieldType::UserConfig, "Audio_EnableAudioStretching", | 169 | AddField(Telemetry::FieldType::UserConfig, "Audio_EnableAudioStretching", |
| 170 | Settings::values.enable_audio_stretching); | 170 | Settings::values.enable_audio_stretching); |
| 171 | AddField(Telemetry::FieldType::UserConfig, "Core_UseCpuJit", Settings::values.use_cpu_jit); | ||
| 172 | AddField(Telemetry::FieldType::UserConfig, "Core_UseMultiCore", | 171 | AddField(Telemetry::FieldType::UserConfig, "Core_UseMultiCore", |
| 173 | Settings::values.use_multi_core); | 172 | Settings::values.use_multi_core); |
| 174 | AddField(Telemetry::FieldType::UserConfig, "Renderer_ResolutionFactor", | 173 | AddField(Telemetry::FieldType::UserConfig, "Renderer_ResolutionFactor", |
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index f8b67cbe1..e2f85c5f1 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt | |||
| @@ -1,4 +1,7 @@ | |||
| 1 | add_library(video_core STATIC | 1 | add_library(video_core STATIC |
| 2 | buffer_cache/buffer_block.h | ||
| 3 | buffer_cache/buffer_cache.h | ||
| 4 | buffer_cache/map_interval.h | ||
| 2 | dma_pusher.cpp | 5 | dma_pusher.cpp |
| 3 | dma_pusher.h | 6 | dma_pusher.h |
| 4 | debug_utils/debug_utils.cpp | 7 | debug_utils/debug_utils.cpp |
| @@ -41,12 +44,10 @@ add_library(video_core STATIC | |||
| 41 | renderer_opengl/gl_buffer_cache.h | 44 | renderer_opengl/gl_buffer_cache.h |
| 42 | renderer_opengl/gl_device.cpp | 45 | renderer_opengl/gl_device.cpp |
| 43 | renderer_opengl/gl_device.h | 46 | renderer_opengl/gl_device.h |
| 44 | renderer_opengl/gl_global_cache.cpp | 47 | renderer_opengl/gl_framebuffer_cache.cpp |
| 45 | renderer_opengl/gl_global_cache.h | 48 | renderer_opengl/gl_framebuffer_cache.h |
| 46 | renderer_opengl/gl_rasterizer.cpp | 49 | renderer_opengl/gl_rasterizer.cpp |
| 47 | renderer_opengl/gl_rasterizer.h | 50 | renderer_opengl/gl_rasterizer.h |
| 48 | renderer_opengl/gl_rasterizer_cache.cpp | ||
| 49 | renderer_opengl/gl_rasterizer_cache.h | ||
| 50 | renderer_opengl/gl_resource_manager.cpp | 51 | renderer_opengl/gl_resource_manager.cpp |
| 51 | renderer_opengl/gl_resource_manager.h | 52 | renderer_opengl/gl_resource_manager.h |
| 52 | renderer_opengl/gl_sampler_cache.cpp | 53 | renderer_opengl/gl_sampler_cache.cpp |
| @@ -67,6 +68,8 @@ add_library(video_core STATIC | |||
| 67 | renderer_opengl/gl_state.h | 68 | renderer_opengl/gl_state.h |
| 68 | renderer_opengl/gl_stream_buffer.cpp | 69 | renderer_opengl/gl_stream_buffer.cpp |
| 69 | renderer_opengl/gl_stream_buffer.h | 70 | renderer_opengl/gl_stream_buffer.h |
| 71 | renderer_opengl/gl_texture_cache.cpp | ||
| 72 | renderer_opengl/gl_texture_cache.h | ||
| 70 | renderer_opengl/maxwell_to_gl.h | 73 | renderer_opengl/maxwell_to_gl.h |
| 71 | renderer_opengl/renderer_opengl.cpp | 74 | renderer_opengl/renderer_opengl.cpp |
| 72 | renderer_opengl/renderer_opengl.h | 75 | renderer_opengl/renderer_opengl.h |
| @@ -88,6 +91,7 @@ add_library(video_core STATIC | |||
| 88 | shader/decode/conversion.cpp | 91 | shader/decode/conversion.cpp |
| 89 | shader/decode/memory.cpp | 92 | shader/decode/memory.cpp |
| 90 | shader/decode/texture.cpp | 93 | shader/decode/texture.cpp |
| 94 | shader/decode/image.cpp | ||
| 91 | shader/decode/float_set_predicate.cpp | 95 | shader/decode/float_set_predicate.cpp |
| 92 | shader/decode/integer_set_predicate.cpp | 96 | shader/decode/integer_set_predicate.cpp |
| 93 | shader/decode/half_set_predicate.cpp | 97 | shader/decode/half_set_predicate.cpp |
| @@ -98,8 +102,11 @@ add_library(video_core STATIC | |||
| 98 | shader/decode/integer_set.cpp | 102 | shader/decode/integer_set.cpp |
| 99 | shader/decode/half_set.cpp | 103 | shader/decode/half_set.cpp |
| 100 | shader/decode/video.cpp | 104 | shader/decode/video.cpp |
| 105 | shader/decode/warp.cpp | ||
| 101 | shader/decode/xmad.cpp | 106 | shader/decode/xmad.cpp |
| 102 | shader/decode/other.cpp | 107 | shader/decode/other.cpp |
| 108 | shader/control_flow.cpp | ||
| 109 | shader/control_flow.h | ||
| 103 | shader/decode.cpp | 110 | shader/decode.cpp |
| 104 | shader/node_helper.cpp | 111 | shader/node_helper.cpp |
| 105 | shader/node_helper.h | 112 | shader/node_helper.h |
| @@ -109,6 +116,13 @@ add_library(video_core STATIC | |||
| 109 | shader/track.cpp | 116 | shader/track.cpp |
| 110 | surface.cpp | 117 | surface.cpp |
| 111 | surface.h | 118 | surface.h |
| 119 | texture_cache/surface_base.cpp | ||
| 120 | texture_cache/surface_base.h | ||
| 121 | texture_cache/surface_params.cpp | ||
| 122 | texture_cache/surface_params.h | ||
| 123 | texture_cache/surface_view.cpp | ||
| 124 | texture_cache/surface_view.h | ||
| 125 | texture_cache/texture_cache.h | ||
| 112 | textures/astc.cpp | 126 | textures/astc.cpp |
| 113 | textures/astc.h | 127 | textures/astc.h |
| 114 | textures/convert.cpp | 128 | textures/convert.cpp |
| @@ -116,8 +130,6 @@ add_library(video_core STATIC | |||
| 116 | textures/decoders.cpp | 130 | textures/decoders.cpp |
| 117 | textures/decoders.h | 131 | textures/decoders.h |
| 118 | textures/texture.h | 132 | textures/texture.h |
| 119 | texture_cache.cpp | ||
| 120 | texture_cache.h | ||
| 121 | video_core.cpp | 133 | video_core.cpp |
| 122 | video_core.h | 134 | video_core.h |
| 123 | ) | 135 | ) |
diff --git a/src/video_core/buffer_cache/buffer_block.h b/src/video_core/buffer_cache/buffer_block.h new file mode 100644 index 000000000..4b9193182 --- /dev/null +++ b/src/video_core/buffer_cache/buffer_block.h | |||
| @@ -0,0 +1,76 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <unordered_set> | ||
| 8 | #include <utility> | ||
| 9 | |||
| 10 | #include "common/alignment.h" | ||
| 11 | #include "common/common_types.h" | ||
| 12 | #include "video_core/gpu.h" | ||
| 13 | |||
| 14 | namespace VideoCommon { | ||
| 15 | |||
| 16 | class BufferBlock { | ||
| 17 | public: | ||
| 18 | bool Overlaps(const CacheAddr start, const CacheAddr end) const { | ||
| 19 | return (cache_addr < end) && (cache_addr_end > start); | ||
| 20 | } | ||
| 21 | |||
| 22 | bool IsInside(const CacheAddr other_start, const CacheAddr other_end) const { | ||
| 23 | return cache_addr <= other_start && other_end <= cache_addr_end; | ||
| 24 | } | ||
| 25 | |||
| 26 | u8* GetWritableHostPtr() const { | ||
| 27 | return FromCacheAddr(cache_addr); | ||
| 28 | } | ||
| 29 | |||
| 30 | u8* GetWritableHostPtr(std::size_t offset) const { | ||
| 31 | return FromCacheAddr(cache_addr + offset); | ||
| 32 | } | ||
| 33 | |||
| 34 | std::size_t GetOffset(const CacheAddr in_addr) { | ||
| 35 | return static_cast<std::size_t>(in_addr - cache_addr); | ||
| 36 | } | ||
| 37 | |||
| 38 | CacheAddr GetCacheAddr() const { | ||
| 39 | return cache_addr; | ||
| 40 | } | ||
| 41 | |||
| 42 | CacheAddr GetCacheAddrEnd() const { | ||
| 43 | return cache_addr_end; | ||
| 44 | } | ||
| 45 | |||
| 46 | void SetCacheAddr(const CacheAddr new_addr) { | ||
| 47 | cache_addr = new_addr; | ||
| 48 | cache_addr_end = new_addr + size; | ||
| 49 | } | ||
| 50 | |||
| 51 | std::size_t GetSize() const { | ||
| 52 | return size; | ||
| 53 | } | ||
| 54 | |||
| 55 | void SetEpoch(u64 new_epoch) { | ||
| 56 | epoch = new_epoch; | ||
| 57 | } | ||
| 58 | |||
| 59 | u64 GetEpoch() { | ||
| 60 | return epoch; | ||
| 61 | } | ||
| 62 | |||
| 63 | protected: | ||
| 64 | explicit BufferBlock(CacheAddr cache_addr, const std::size_t size) : size{size} { | ||
| 65 | SetCacheAddr(cache_addr); | ||
| 66 | } | ||
| 67 | ~BufferBlock() = default; | ||
| 68 | |||
| 69 | private: | ||
| 70 | CacheAddr cache_addr{}; | ||
| 71 | CacheAddr cache_addr_end{}; | ||
| 72 | std::size_t size{}; | ||
| 73 | u64 epoch{}; | ||
| 74 | }; | ||
| 75 | |||
| 76 | } // namespace VideoCommon | ||
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h new file mode 100644 index 000000000..2442ddfd6 --- /dev/null +++ b/src/video_core/buffer_cache/buffer_cache.h | |||
| @@ -0,0 +1,447 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <array> | ||
| 8 | #include <memory> | ||
| 9 | #include <mutex> | ||
| 10 | #include <unordered_map> | ||
| 11 | #include <unordered_set> | ||
| 12 | #include <utility> | ||
| 13 | #include <vector> | ||
| 14 | |||
| 15 | #include "common/alignment.h" | ||
| 16 | #include "common/common_types.h" | ||
| 17 | #include "core/core.h" | ||
| 18 | #include "video_core/buffer_cache/buffer_block.h" | ||
| 19 | #include "video_core/buffer_cache/map_interval.h" | ||
| 20 | #include "video_core/memory_manager.h" | ||
| 21 | #include "video_core/rasterizer_interface.h" | ||
| 22 | |||
| 23 | namespace VideoCommon { | ||
| 24 | |||
| 25 | using MapInterval = std::shared_ptr<MapIntervalBase>; | ||
| 26 | |||
| 27 | template <typename TBuffer, typename TBufferType, typename StreamBuffer> | ||
| 28 | class BufferCache { | ||
| 29 | public: | ||
| 30 | using BufferInfo = std::pair<const TBufferType*, u64>; | ||
| 31 | |||
| 32 | BufferInfo UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4, | ||
| 33 | bool is_written = false) { | ||
| 34 | std::lock_guard lock{mutex}; | ||
| 35 | |||
| 36 | auto& memory_manager = system.GPU().MemoryManager(); | ||
| 37 | const auto host_ptr = memory_manager.GetPointer(gpu_addr); | ||
| 38 | if (!host_ptr) { | ||
| 39 | return {GetEmptyBuffer(size), 0}; | ||
| 40 | } | ||
| 41 | const auto cache_addr = ToCacheAddr(host_ptr); | ||
| 42 | |||
| 43 | // Cache management is a big overhead, so only cache entries with a given size. | ||
| 44 | // TODO: Figure out which size is the best for given games. | ||
| 45 | constexpr std::size_t max_stream_size = 0x800; | ||
| 46 | if (size < max_stream_size) { | ||
| 47 | if (!is_written && !IsRegionWritten(cache_addr, cache_addr + size - 1)) { | ||
| 48 | return StreamBufferUpload(host_ptr, size, alignment); | ||
| 49 | } | ||
| 50 | } | ||
| 51 | |||
| 52 | auto block = GetBlock(cache_addr, size); | ||
| 53 | auto map = MapAddress(block, gpu_addr, cache_addr, size); | ||
| 54 | if (is_written) { | ||
| 55 | map->MarkAsModified(true, GetModifiedTicks()); | ||
| 56 | if (!map->IsWritten()) { | ||
| 57 | map->MarkAsWritten(true); | ||
| 58 | MarkRegionAsWritten(map->GetStart(), map->GetEnd() - 1); | ||
| 59 | } | ||
| 60 | } else { | ||
| 61 | if (map->IsWritten()) { | ||
| 62 | WriteBarrier(); | ||
| 63 | } | ||
| 64 | } | ||
| 65 | |||
| 66 | const u64 offset = static_cast<u64>(block->GetOffset(cache_addr)); | ||
| 67 | |||
| 68 | return {ToHandle(block), offset}; | ||
| 69 | } | ||
| 70 | |||
| 71 | /// Uploads from a host memory. Returns the OpenGL buffer where it's located and its offset. | ||
| 72 | BufferInfo UploadHostMemory(const void* raw_pointer, std::size_t size, | ||
| 73 | std::size_t alignment = 4) { | ||
| 74 | std::lock_guard lock{mutex}; | ||
| 75 | return StreamBufferUpload(raw_pointer, size, alignment); | ||
| 76 | } | ||
| 77 | |||
| 78 | void Map(std::size_t max_size) { | ||
| 79 | std::lock_guard lock{mutex}; | ||
| 80 | |||
| 81 | std::tie(buffer_ptr, buffer_offset_base, invalidated) = stream_buffer->Map(max_size, 4); | ||
| 82 | buffer_offset = buffer_offset_base; | ||
| 83 | } | ||
| 84 | |||
| 85 | /// Finishes the upload stream, returns true on bindings invalidation. | ||
| 86 | bool Unmap() { | ||
| 87 | std::lock_guard lock{mutex}; | ||
| 88 | |||
| 89 | stream_buffer->Unmap(buffer_offset - buffer_offset_base); | ||
| 90 | return std::exchange(invalidated, false); | ||
| 91 | } | ||
| 92 | |||
| 93 | void TickFrame() { | ||
| 94 | ++epoch; | ||
| 95 | while (!pending_destruction.empty()) { | ||
| 96 | if (pending_destruction.front()->GetEpoch() + 1 > epoch) { | ||
| 97 | break; | ||
| 98 | } | ||
| 99 | pending_destruction.pop_front(); | ||
| 100 | } | ||
| 101 | } | ||
| 102 | |||
| 103 | /// Write any cached resources overlapping the specified region back to memory | ||
| 104 | void FlushRegion(CacheAddr addr, std::size_t size) { | ||
| 105 | std::lock_guard lock{mutex}; | ||
| 106 | |||
| 107 | std::vector<MapInterval> objects = GetMapsInRange(addr, size); | ||
| 108 | std::sort(objects.begin(), objects.end(), [](const MapInterval& a, const MapInterval& b) { | ||
| 109 | return a->GetModificationTick() < b->GetModificationTick(); | ||
| 110 | }); | ||
| 111 | for (auto& object : objects) { | ||
| 112 | if (object->IsModified() && object->IsRegistered()) { | ||
| 113 | FlushMap(object); | ||
| 114 | } | ||
| 115 | } | ||
| 116 | } | ||
| 117 | |||
| 118 | /// Mark the specified region as being invalidated | ||
| 119 | void InvalidateRegion(CacheAddr addr, u64 size) { | ||
| 120 | std::lock_guard lock{mutex}; | ||
| 121 | |||
| 122 | std::vector<MapInterval> objects = GetMapsInRange(addr, size); | ||
| 123 | for (auto& object : objects) { | ||
| 124 | if (object->IsRegistered()) { | ||
| 125 | Unregister(object); | ||
| 126 | } | ||
| 127 | } | ||
| 128 | } | ||
| 129 | |||
| 130 | virtual const TBufferType* GetEmptyBuffer(std::size_t size) = 0; | ||
| 131 | |||
| 132 | protected: | ||
| 133 | explicit BufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system, | ||
| 134 | std::unique_ptr<StreamBuffer> stream_buffer) | ||
| 135 | : rasterizer{rasterizer}, system{system}, stream_buffer{std::move(stream_buffer)}, | ||
| 136 | stream_buffer_handle{this->stream_buffer->GetHandle()} {} | ||
| 137 | |||
| 138 | ~BufferCache() = default; | ||
| 139 | |||
| 140 | virtual const TBufferType* ToHandle(const TBuffer& storage) = 0; | ||
| 141 | |||
| 142 | virtual void WriteBarrier() = 0; | ||
| 143 | |||
| 144 | virtual TBuffer CreateBlock(CacheAddr cache_addr, std::size_t size) = 0; | ||
| 145 | |||
| 146 | virtual void UploadBlockData(const TBuffer& buffer, std::size_t offset, std::size_t size, | ||
| 147 | const u8* data) = 0; | ||
| 148 | |||
| 149 | virtual void DownloadBlockData(const TBuffer& buffer, std::size_t offset, std::size_t size, | ||
| 150 | u8* data) = 0; | ||
| 151 | |||
| 152 | virtual void CopyBlock(const TBuffer& src, const TBuffer& dst, std::size_t src_offset, | ||
| 153 | std::size_t dst_offset, std::size_t size) = 0; | ||
| 154 | |||
| 155 | /// Register an object into the cache | ||
| 156 | void Register(const MapInterval& new_map, bool inherit_written = false) { | ||
| 157 | const CacheAddr cache_ptr = new_map->GetStart(); | ||
| 158 | const std::optional<VAddr> cpu_addr = | ||
| 159 | system.GPU().MemoryManager().GpuToCpuAddress(new_map->GetGpuAddress()); | ||
| 160 | if (!cache_ptr || !cpu_addr) { | ||
| 161 | LOG_CRITICAL(HW_GPU, "Failed to register buffer with unmapped gpu_address 0x{:016x}", | ||
| 162 | new_map->GetGpuAddress()); | ||
| 163 | return; | ||
| 164 | } | ||
| 165 | const std::size_t size = new_map->GetEnd() - new_map->GetStart(); | ||
| 166 | new_map->SetCpuAddress(*cpu_addr); | ||
| 167 | new_map->MarkAsRegistered(true); | ||
| 168 | const IntervalType interval{new_map->GetStart(), new_map->GetEnd()}; | ||
| 169 | mapped_addresses.insert({interval, new_map}); | ||
| 170 | rasterizer.UpdatePagesCachedCount(*cpu_addr, size, 1); | ||
| 171 | if (inherit_written) { | ||
| 172 | MarkRegionAsWritten(new_map->GetStart(), new_map->GetEnd() - 1); | ||
| 173 | new_map->MarkAsWritten(true); | ||
| 174 | } | ||
| 175 | } | ||
| 176 | |||
| 177 | /// Unregisters an object from the cache | ||
| 178 | void Unregister(MapInterval& map) { | ||
| 179 | const std::size_t size = map->GetEnd() - map->GetStart(); | ||
| 180 | rasterizer.UpdatePagesCachedCount(map->GetCpuAddress(), size, -1); | ||
| 181 | map->MarkAsRegistered(false); | ||
| 182 | if (map->IsWritten()) { | ||
| 183 | UnmarkRegionAsWritten(map->GetStart(), map->GetEnd() - 1); | ||
| 184 | } | ||
| 185 | const IntervalType delete_interval{map->GetStart(), map->GetEnd()}; | ||
| 186 | mapped_addresses.erase(delete_interval); | ||
| 187 | } | ||
| 188 | |||
| 189 | private: | ||
| 190 | MapInterval CreateMap(const CacheAddr start, const CacheAddr end, const GPUVAddr gpu_addr) { | ||
| 191 | return std::make_shared<MapIntervalBase>(start, end, gpu_addr); | ||
| 192 | } | ||
| 193 | |||
| 194 | MapInterval MapAddress(const TBuffer& block, const GPUVAddr gpu_addr, | ||
| 195 | const CacheAddr cache_addr, const std::size_t size) { | ||
| 196 | |||
| 197 | std::vector<MapInterval> overlaps = GetMapsInRange(cache_addr, size); | ||
| 198 | if (overlaps.empty()) { | ||
| 199 | const CacheAddr cache_addr_end = cache_addr + size; | ||
| 200 | MapInterval new_map = CreateMap(cache_addr, cache_addr_end, gpu_addr); | ||
| 201 | u8* host_ptr = FromCacheAddr(cache_addr); | ||
| 202 | UploadBlockData(block, block->GetOffset(cache_addr), size, host_ptr); | ||
| 203 | Register(new_map); | ||
| 204 | return new_map; | ||
| 205 | } | ||
| 206 | |||
| 207 | const CacheAddr cache_addr_end = cache_addr + size; | ||
| 208 | if (overlaps.size() == 1) { | ||
| 209 | MapInterval& current_map = overlaps[0]; | ||
| 210 | if (current_map->IsInside(cache_addr, cache_addr_end)) { | ||
| 211 | return current_map; | ||
| 212 | } | ||
| 213 | } | ||
| 214 | CacheAddr new_start = cache_addr; | ||
| 215 | CacheAddr new_end = cache_addr_end; | ||
| 216 | bool write_inheritance = false; | ||
| 217 | bool modified_inheritance = false; | ||
| 218 | // Calculate new buffer parameters | ||
| 219 | for (auto& overlap : overlaps) { | ||
| 220 | new_start = std::min(overlap->GetStart(), new_start); | ||
| 221 | new_end = std::max(overlap->GetEnd(), new_end); | ||
| 222 | write_inheritance |= overlap->IsWritten(); | ||
| 223 | modified_inheritance |= overlap->IsModified(); | ||
| 224 | } | ||
| 225 | GPUVAddr new_gpu_addr = gpu_addr + new_start - cache_addr; | ||
| 226 | for (auto& overlap : overlaps) { | ||
| 227 | Unregister(overlap); | ||
| 228 | } | ||
| 229 | UpdateBlock(block, new_start, new_end, overlaps); | ||
| 230 | MapInterval new_map = CreateMap(new_start, new_end, new_gpu_addr); | ||
| 231 | if (modified_inheritance) { | ||
| 232 | new_map->MarkAsModified(true, GetModifiedTicks()); | ||
| 233 | } | ||
| 234 | Register(new_map, write_inheritance); | ||
| 235 | return new_map; | ||
| 236 | } | ||
| 237 | |||
| 238 | void UpdateBlock(const TBuffer& block, CacheAddr start, CacheAddr end, | ||
| 239 | std::vector<MapInterval>& overlaps) { | ||
| 240 | const IntervalType base_interval{start, end}; | ||
| 241 | IntervalSet interval_set{}; | ||
| 242 | interval_set.add(base_interval); | ||
| 243 | for (auto& overlap : overlaps) { | ||
| 244 | const IntervalType subtract{overlap->GetStart(), overlap->GetEnd()}; | ||
| 245 | interval_set.subtract(subtract); | ||
| 246 | } | ||
| 247 | for (auto& interval : interval_set) { | ||
| 248 | std::size_t size = interval.upper() - interval.lower(); | ||
| 249 | if (size > 0) { | ||
| 250 | u8* host_ptr = FromCacheAddr(interval.lower()); | ||
| 251 | UploadBlockData(block, block->GetOffset(interval.lower()), size, host_ptr); | ||
| 252 | } | ||
| 253 | } | ||
| 254 | } | ||
| 255 | |||
| 256 | std::vector<MapInterval> GetMapsInRange(CacheAddr addr, std::size_t size) { | ||
| 257 | if (size == 0) { | ||
| 258 | return {}; | ||
| 259 | } | ||
| 260 | |||
| 261 | std::vector<MapInterval> objects{}; | ||
| 262 | const IntervalType interval{addr, addr + size}; | ||
| 263 | for (auto& pair : boost::make_iterator_range(mapped_addresses.equal_range(interval))) { | ||
| 264 | objects.push_back(pair.second); | ||
| 265 | } | ||
| 266 | |||
| 267 | return objects; | ||
| 268 | } | ||
| 269 | |||
| 270 | /// Returns a ticks counter used for tracking when cached objects were last modified | ||
| 271 | u64 GetModifiedTicks() { | ||
| 272 | return ++modified_ticks; | ||
| 273 | } | ||
| 274 | |||
| 275 | void FlushMap(MapInterval map) { | ||
| 276 | std::size_t size = map->GetEnd() - map->GetStart(); | ||
| 277 | TBuffer block = blocks[map->GetStart() >> block_page_bits]; | ||
| 278 | u8* host_ptr = FromCacheAddr(map->GetStart()); | ||
| 279 | DownloadBlockData(block, block->GetOffset(map->GetStart()), size, host_ptr); | ||
| 280 | map->MarkAsModified(false, 0); | ||
| 281 | } | ||
| 282 | |||
| 283 | BufferInfo StreamBufferUpload(const void* raw_pointer, std::size_t size, | ||
| 284 | std::size_t alignment) { | ||
| 285 | AlignBuffer(alignment); | ||
| 286 | const std::size_t uploaded_offset = buffer_offset; | ||
| 287 | std::memcpy(buffer_ptr, raw_pointer, size); | ||
| 288 | |||
| 289 | buffer_ptr += size; | ||
| 290 | buffer_offset += size; | ||
| 291 | return {&stream_buffer_handle, uploaded_offset}; | ||
| 292 | } | ||
| 293 | |||
| 294 | void AlignBuffer(std::size_t alignment) { | ||
| 295 | // Align the offset, not the mapped pointer | ||
| 296 | const std::size_t offset_aligned = Common::AlignUp(buffer_offset, alignment); | ||
| 297 | buffer_ptr += offset_aligned - buffer_offset; | ||
| 298 | buffer_offset = offset_aligned; | ||
| 299 | } | ||
| 300 | |||
| 301 | TBuffer EnlargeBlock(TBuffer buffer) { | ||
| 302 | const std::size_t old_size = buffer->GetSize(); | ||
| 303 | const std::size_t new_size = old_size + block_page_size; | ||
| 304 | const CacheAddr cache_addr = buffer->GetCacheAddr(); | ||
| 305 | TBuffer new_buffer = CreateBlock(cache_addr, new_size); | ||
| 306 | CopyBlock(buffer, new_buffer, 0, 0, old_size); | ||
| 307 | buffer->SetEpoch(epoch); | ||
| 308 | pending_destruction.push_back(buffer); | ||
| 309 | const CacheAddr cache_addr_end = cache_addr + new_size - 1; | ||
| 310 | u64 page_start = cache_addr >> block_page_bits; | ||
| 311 | const u64 page_end = cache_addr_end >> block_page_bits; | ||
| 312 | while (page_start <= page_end) { | ||
| 313 | blocks[page_start] = new_buffer; | ||
| 314 | ++page_start; | ||
| 315 | } | ||
| 316 | return new_buffer; | ||
| 317 | } | ||
| 318 | |||
| 319 | TBuffer MergeBlocks(TBuffer first, TBuffer second) { | ||
| 320 | const std::size_t size_1 = first->GetSize(); | ||
| 321 | const std::size_t size_2 = second->GetSize(); | ||
| 322 | const CacheAddr first_addr = first->GetCacheAddr(); | ||
| 323 | const CacheAddr second_addr = second->GetCacheAddr(); | ||
| 324 | const CacheAddr new_addr = std::min(first_addr, second_addr); | ||
| 325 | const std::size_t new_size = size_1 + size_2; | ||
| 326 | TBuffer new_buffer = CreateBlock(new_addr, new_size); | ||
| 327 | CopyBlock(first, new_buffer, 0, new_buffer->GetOffset(first_addr), size_1); | ||
| 328 | CopyBlock(second, new_buffer, 0, new_buffer->GetOffset(second_addr), size_2); | ||
| 329 | first->SetEpoch(epoch); | ||
| 330 | second->SetEpoch(epoch); | ||
| 331 | pending_destruction.push_back(first); | ||
| 332 | pending_destruction.push_back(second); | ||
| 333 | const CacheAddr cache_addr_end = new_addr + new_size - 1; | ||
| 334 | u64 page_start = new_addr >> block_page_bits; | ||
| 335 | const u64 page_end = cache_addr_end >> block_page_bits; | ||
| 336 | while (page_start <= page_end) { | ||
| 337 | blocks[page_start] = new_buffer; | ||
| 338 | ++page_start; | ||
| 339 | } | ||
| 340 | return new_buffer; | ||
| 341 | } | ||
| 342 | |||
| 343 | TBuffer GetBlock(const CacheAddr cache_addr, const std::size_t size) { | ||
| 344 | TBuffer found{}; | ||
| 345 | const CacheAddr cache_addr_end = cache_addr + size - 1; | ||
| 346 | u64 page_start = cache_addr >> block_page_bits; | ||
| 347 | const u64 page_end = cache_addr_end >> block_page_bits; | ||
| 348 | while (page_start <= page_end) { | ||
| 349 | auto it = blocks.find(page_start); | ||
| 350 | if (it == blocks.end()) { | ||
| 351 | if (found) { | ||
| 352 | found = EnlargeBlock(found); | ||
| 353 | } else { | ||
| 354 | const CacheAddr start_addr = (page_start << block_page_bits); | ||
| 355 | found = CreateBlock(start_addr, block_page_size); | ||
| 356 | blocks[page_start] = found; | ||
| 357 | } | ||
| 358 | } else { | ||
| 359 | if (found) { | ||
| 360 | if (found == it->second) { | ||
| 361 | ++page_start; | ||
| 362 | continue; | ||
| 363 | } | ||
| 364 | found = MergeBlocks(found, it->second); | ||
| 365 | } else { | ||
| 366 | found = it->second; | ||
| 367 | } | ||
| 368 | } | ||
| 369 | ++page_start; | ||
| 370 | } | ||
| 371 | return found; | ||
| 372 | } | ||
| 373 | |||
| 374 | void MarkRegionAsWritten(const CacheAddr start, const CacheAddr end) { | ||
| 375 | u64 page_start = start >> write_page_bit; | ||
| 376 | const u64 page_end = end >> write_page_bit; | ||
| 377 | while (page_start <= page_end) { | ||
| 378 | auto it = written_pages.find(page_start); | ||
| 379 | if (it != written_pages.end()) { | ||
| 380 | it->second = it->second + 1; | ||
| 381 | } else { | ||
| 382 | written_pages[page_start] = 1; | ||
| 383 | } | ||
| 384 | page_start++; | ||
| 385 | } | ||
| 386 | } | ||
| 387 | |||
| 388 | void UnmarkRegionAsWritten(const CacheAddr start, const CacheAddr end) { | ||
| 389 | u64 page_start = start >> write_page_bit; | ||
| 390 | const u64 page_end = end >> write_page_bit; | ||
| 391 | while (page_start <= page_end) { | ||
| 392 | auto it = written_pages.find(page_start); | ||
| 393 | if (it != written_pages.end()) { | ||
| 394 | if (it->second > 1) { | ||
| 395 | it->second = it->second - 1; | ||
| 396 | } else { | ||
| 397 | written_pages.erase(it); | ||
| 398 | } | ||
| 399 | } | ||
| 400 | page_start++; | ||
| 401 | } | ||
| 402 | } | ||
| 403 | |||
| 404 | bool IsRegionWritten(const CacheAddr start, const CacheAddr end) const { | ||
| 405 | u64 page_start = start >> write_page_bit; | ||
| 406 | const u64 page_end = end >> write_page_bit; | ||
| 407 | while (page_start <= page_end) { | ||
| 408 | if (written_pages.count(page_start) > 0) { | ||
| 409 | return true; | ||
| 410 | } | ||
| 411 | page_start++; | ||
| 412 | } | ||
| 413 | return false; | ||
| 414 | } | ||
| 415 | |||
| 416 | VideoCore::RasterizerInterface& rasterizer; | ||
| 417 | Core::System& system; | ||
| 418 | std::unique_ptr<StreamBuffer> stream_buffer; | ||
| 419 | |||
| 420 | TBufferType stream_buffer_handle{}; | ||
| 421 | |||
| 422 | bool invalidated = false; | ||
| 423 | |||
| 424 | u8* buffer_ptr = nullptr; | ||
| 425 | u64 buffer_offset = 0; | ||
| 426 | u64 buffer_offset_base = 0; | ||
| 427 | |||
| 428 | using IntervalSet = boost::icl::interval_set<CacheAddr>; | ||
| 429 | using IntervalCache = boost::icl::interval_map<CacheAddr, MapInterval>; | ||
| 430 | using IntervalType = typename IntervalCache::interval_type; | ||
| 431 | IntervalCache mapped_addresses{}; | ||
| 432 | |||
| 433 | static constexpr u64 write_page_bit{11}; | ||
| 434 | std::unordered_map<u64, u32> written_pages{}; | ||
| 435 | |||
| 436 | static constexpr u64 block_page_bits{21}; | ||
| 437 | static constexpr u64 block_page_size{1 << block_page_bits}; | ||
| 438 | std::unordered_map<u64, TBuffer> blocks{}; | ||
| 439 | |||
| 440 | std::list<TBuffer> pending_destruction{}; | ||
| 441 | u64 epoch{}; | ||
| 442 | u64 modified_ticks{}; | ||
| 443 | |||
| 444 | std::recursive_mutex mutex; | ||
| 445 | }; | ||
| 446 | |||
| 447 | } // namespace VideoCommon | ||
diff --git a/src/video_core/buffer_cache/map_interval.h b/src/video_core/buffer_cache/map_interval.h new file mode 100644 index 000000000..3a104d5cd --- /dev/null +++ b/src/video_core/buffer_cache/map_interval.h | |||
| @@ -0,0 +1,89 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "common/common_types.h" | ||
| 8 | #include "video_core/gpu.h" | ||
| 9 | |||
| 10 | namespace VideoCommon { | ||
| 11 | |||
| 12 | class MapIntervalBase { | ||
| 13 | public: | ||
| 14 | MapIntervalBase(const CacheAddr start, const CacheAddr end, const GPUVAddr gpu_addr) | ||
| 15 | : start{start}, end{end}, gpu_addr{gpu_addr} {} | ||
| 16 | |||
| 17 | void SetCpuAddress(VAddr new_cpu_addr) { | ||
| 18 | cpu_addr = new_cpu_addr; | ||
| 19 | } | ||
| 20 | |||
| 21 | VAddr GetCpuAddress() const { | ||
| 22 | return cpu_addr; | ||
| 23 | } | ||
| 24 | |||
| 25 | GPUVAddr GetGpuAddress() const { | ||
| 26 | return gpu_addr; | ||
| 27 | } | ||
| 28 | |||
| 29 | bool IsInside(const CacheAddr other_start, const CacheAddr other_end) const { | ||
| 30 | return (start <= other_start && other_end <= end); | ||
| 31 | } | ||
| 32 | |||
| 33 | bool operator==(const MapIntervalBase& rhs) const { | ||
| 34 | return std::tie(start, end) == std::tie(rhs.start, rhs.end); | ||
| 35 | } | ||
| 36 | |||
| 37 | bool operator!=(const MapIntervalBase& rhs) const { | ||
| 38 | return !operator==(rhs); | ||
| 39 | } | ||
| 40 | |||
| 41 | void MarkAsRegistered(const bool registered) { | ||
| 42 | is_registered = registered; | ||
| 43 | } | ||
| 44 | |||
| 45 | bool IsRegistered() const { | ||
| 46 | return is_registered; | ||
| 47 | } | ||
| 48 | |||
| 49 | CacheAddr GetStart() const { | ||
| 50 | return start; | ||
| 51 | } | ||
| 52 | |||
| 53 | CacheAddr GetEnd() const { | ||
| 54 | return end; | ||
| 55 | } | ||
| 56 | |||
| 57 | void MarkAsModified(const bool is_modified_, const u64 tick) { | ||
| 58 | is_modified = is_modified_; | ||
| 59 | ticks = tick; | ||
| 60 | } | ||
| 61 | |||
| 62 | bool IsModified() const { | ||
| 63 | return is_modified; | ||
| 64 | } | ||
| 65 | |||
| 66 | u64 GetModificationTick() const { | ||
| 67 | return ticks; | ||
| 68 | } | ||
| 69 | |||
| 70 | void MarkAsWritten(const bool is_written_) { | ||
| 71 | is_written = is_written_; | ||
| 72 | } | ||
| 73 | |||
| 74 | bool IsWritten() const { | ||
| 75 | return is_written; | ||
| 76 | } | ||
| 77 | |||
| 78 | private: | ||
| 79 | CacheAddr start; | ||
| 80 | CacheAddr end; | ||
| 81 | GPUVAddr gpu_addr; | ||
| 82 | VAddr cpu_addr{}; | ||
| 83 | bool is_written{}; | ||
| 84 | bool is_modified{}; | ||
| 85 | bool is_registered{}; | ||
| 86 | u64 ticks{}; | ||
| 87 | }; | ||
| 88 | |||
| 89 | } // namespace VideoCommon | ||
diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp index 3175579cc..0094fd715 100644 --- a/src/video_core/dma_pusher.cpp +++ b/src/video_core/dma_pusher.cpp | |||
| @@ -22,7 +22,7 @@ void DmaPusher::DispatchCalls() { | |||
| 22 | MICROPROFILE_SCOPE(DispatchCalls); | 22 | MICROPROFILE_SCOPE(DispatchCalls); |
| 23 | 23 | ||
| 24 | // On entering GPU code, assume all memory may be touched by the ARM core. | 24 | // On entering GPU code, assume all memory may be touched by the ARM core. |
| 25 | gpu.Maxwell3D().dirty_flags.OnMemoryWrite(); | 25 | gpu.Maxwell3D().dirty.OnMemoryWrite(); |
| 26 | 26 | ||
| 27 | dma_pushbuffer_subindex = 0; | 27 | dma_pushbuffer_subindex = 0; |
| 28 | 28 | ||
| @@ -31,6 +31,7 @@ void DmaPusher::DispatchCalls() { | |||
| 31 | break; | 31 | break; |
| 32 | } | 32 | } |
| 33 | } | 33 | } |
| 34 | gpu.FlushCommands(); | ||
| 34 | } | 35 | } |
| 35 | 36 | ||
| 36 | bool DmaPusher::Step() { | 37 | bool DmaPusher::Step() { |
diff --git a/src/video_core/engines/engine_upload.cpp b/src/video_core/engines/engine_upload.cpp index 082a40cd9..d44ad0cd8 100644 --- a/src/video_core/engines/engine_upload.cpp +++ b/src/video_core/engines/engine_upload.cpp | |||
| @@ -36,10 +36,10 @@ void State::ProcessData(const u32 data, const bool is_last_call) { | |||
| 36 | } else { | 36 | } else { |
| 37 | UNIMPLEMENTED_IF(regs.dest.z != 0); | 37 | UNIMPLEMENTED_IF(regs.dest.z != 0); |
| 38 | UNIMPLEMENTED_IF(regs.dest.depth != 1); | 38 | UNIMPLEMENTED_IF(regs.dest.depth != 1); |
| 39 | UNIMPLEMENTED_IF(regs.dest.BlockWidth() != 1); | 39 | UNIMPLEMENTED_IF(regs.dest.BlockWidth() != 0); |
| 40 | UNIMPLEMENTED_IF(regs.dest.BlockDepth() != 1); | 40 | UNIMPLEMENTED_IF(regs.dest.BlockDepth() != 0); |
| 41 | const std::size_t dst_size = Tegra::Texture::CalculateSize( | 41 | const std::size_t dst_size = Tegra::Texture::CalculateSize( |
| 42 | true, 1, regs.dest.width, regs.dest.height, 1, regs.dest.BlockHeight(), 1); | 42 | true, 1, regs.dest.width, regs.dest.height, 1, regs.dest.BlockHeight(), 0); |
| 43 | tmp_buffer.resize(dst_size); | 43 | tmp_buffer.resize(dst_size); |
| 44 | memory_manager.ReadBlock(address, tmp_buffer.data(), dst_size); | 44 | memory_manager.ReadBlock(address, tmp_buffer.data(), dst_size); |
| 45 | Tegra::Texture::SwizzleKepler(regs.dest.width, regs.dest.height, regs.dest.x, regs.dest.y, | 45 | Tegra::Texture::SwizzleKepler(regs.dest.width, regs.dest.height, regs.dest.x, regs.dest.y, |
diff --git a/src/video_core/engines/engine_upload.h b/src/video_core/engines/engine_upload.h index ef4f5839a..462da419e 100644 --- a/src/video_core/engines/engine_upload.h +++ b/src/video_core/engines/engine_upload.h | |||
| @@ -39,15 +39,15 @@ struct Registers { | |||
| 39 | } | 39 | } |
| 40 | 40 | ||
| 41 | u32 BlockWidth() const { | 41 | u32 BlockWidth() const { |
| 42 | return 1U << block_width.Value(); | 42 | return block_width.Value(); |
| 43 | } | 43 | } |
| 44 | 44 | ||
| 45 | u32 BlockHeight() const { | 45 | u32 BlockHeight() const { |
| 46 | return 1U << block_height.Value(); | 46 | return block_height.Value(); |
| 47 | } | 47 | } |
| 48 | 48 | ||
| 49 | u32 BlockDepth() const { | 49 | u32 BlockDepth() const { |
| 50 | return 1U << block_depth.Value(); | 50 | return block_depth.Value(); |
| 51 | } | 51 | } |
| 52 | } dest; | 52 | } dest; |
| 53 | }; | 53 | }; |
diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp index 55966eef1..98a8b5337 100644 --- a/src/video_core/engines/fermi_2d.cpp +++ b/src/video_core/engines/fermi_2d.cpp | |||
| @@ -4,15 +4,13 @@ | |||
| 4 | 4 | ||
| 5 | #include "common/assert.h" | 5 | #include "common/assert.h" |
| 6 | #include "common/logging/log.h" | 6 | #include "common/logging/log.h" |
| 7 | #include "common/math_util.h" | ||
| 8 | #include "video_core/engines/fermi_2d.h" | 7 | #include "video_core/engines/fermi_2d.h" |
| 9 | #include "video_core/memory_manager.h" | 8 | #include "video_core/memory_manager.h" |
| 10 | #include "video_core/rasterizer_interface.h" | 9 | #include "video_core/rasterizer_interface.h" |
| 11 | 10 | ||
| 12 | namespace Tegra::Engines { | 11 | namespace Tegra::Engines { |
| 13 | 12 | ||
| 14 | Fermi2D::Fermi2D(VideoCore::RasterizerInterface& rasterizer, MemoryManager& memory_manager) | 13 | Fermi2D::Fermi2D(VideoCore::RasterizerInterface& rasterizer) : rasterizer{rasterizer} {} |
| 15 | : rasterizer{rasterizer}, memory_manager{memory_manager} {} | ||
| 16 | 14 | ||
| 17 | void Fermi2D::CallMethod(const GPU::MethodCall& method_call) { | 15 | void Fermi2D::CallMethod(const GPU::MethodCall& method_call) { |
| 18 | ASSERT_MSG(method_call.method < Regs::NUM_REGS, | 16 | ASSERT_MSG(method_call.method < Regs::NUM_REGS, |
| @@ -35,21 +33,31 @@ void Fermi2D::HandleSurfaceCopy() { | |||
| 35 | static_cast<u32>(regs.operation)); | 33 | static_cast<u32>(regs.operation)); |
| 36 | 34 | ||
| 37 | // TODO(Subv): Only raw copies are implemented. | 35 | // TODO(Subv): Only raw copies are implemented. |
| 38 | ASSERT(regs.operation == Regs::Operation::SrcCopy); | 36 | ASSERT(regs.operation == Operation::SrcCopy); |
| 39 | 37 | ||
| 40 | const u32 src_blit_x1{static_cast<u32>(regs.blit_src_x >> 32)}; | 38 | const u32 src_blit_x1{static_cast<u32>(regs.blit_src_x >> 32)}; |
| 41 | const u32 src_blit_y1{static_cast<u32>(regs.blit_src_y >> 32)}; | 39 | const u32 src_blit_y1{static_cast<u32>(regs.blit_src_y >> 32)}; |
| 42 | const u32 src_blit_x2{ | 40 | u32 src_blit_x2, src_blit_y2; |
| 43 | static_cast<u32>((regs.blit_src_x + (regs.blit_dst_width * regs.blit_du_dx)) >> 32)}; | 41 | if (regs.blit_control.origin == Origin::Corner) { |
| 44 | const u32 src_blit_y2{ | 42 | src_blit_x2 = |
| 45 | static_cast<u32>((regs.blit_src_y + (regs.blit_dst_height * regs.blit_dv_dy)) >> 32)}; | 43 | static_cast<u32>((regs.blit_src_x + (regs.blit_du_dx * regs.blit_dst_width)) >> 32); |
| 46 | 44 | src_blit_y2 = | |
| 45 | static_cast<u32>((regs.blit_src_y + (regs.blit_dv_dy * regs.blit_dst_height)) >> 32); | ||
| 46 | } else { | ||
| 47 | src_blit_x2 = static_cast<u32>((regs.blit_src_x >> 32) + regs.blit_dst_width); | ||
| 48 | src_blit_y2 = static_cast<u32>((regs.blit_src_y >> 32) + regs.blit_dst_height); | ||
| 49 | } | ||
| 47 | const Common::Rectangle<u32> src_rect{src_blit_x1, src_blit_y1, src_blit_x2, src_blit_y2}; | 50 | const Common::Rectangle<u32> src_rect{src_blit_x1, src_blit_y1, src_blit_x2, src_blit_y2}; |
| 48 | const Common::Rectangle<u32> dst_rect{regs.blit_dst_x, regs.blit_dst_y, | 51 | const Common::Rectangle<u32> dst_rect{regs.blit_dst_x, regs.blit_dst_y, |
| 49 | regs.blit_dst_x + regs.blit_dst_width, | 52 | regs.blit_dst_x + regs.blit_dst_width, |
| 50 | regs.blit_dst_y + regs.blit_dst_height}; | 53 | regs.blit_dst_y + regs.blit_dst_height}; |
| 54 | Config copy_config; | ||
| 55 | copy_config.operation = regs.operation; | ||
| 56 | copy_config.filter = regs.blit_control.filter; | ||
| 57 | copy_config.src_rect = src_rect; | ||
| 58 | copy_config.dst_rect = dst_rect; | ||
| 51 | 59 | ||
| 52 | if (!rasterizer.AccelerateSurfaceCopy(regs.src, regs.dst, src_rect, dst_rect)) { | 60 | if (!rasterizer.AccelerateSurfaceCopy(regs.src, regs.dst, copy_config)) { |
| 53 | UNIMPLEMENTED(); | 61 | UNIMPLEMENTED(); |
| 54 | } | 62 | } |
| 55 | } | 63 | } |
diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h index 45f59a4d9..0901cf2fa 100644 --- a/src/video_core/engines/fermi_2d.h +++ b/src/video_core/engines/fermi_2d.h | |||
| @@ -9,6 +9,7 @@ | |||
| 9 | #include "common/bit_field.h" | 9 | #include "common/bit_field.h" |
| 10 | #include "common/common_funcs.h" | 10 | #include "common/common_funcs.h" |
| 11 | #include "common/common_types.h" | 11 | #include "common/common_types.h" |
| 12 | #include "common/math_util.h" | ||
| 12 | #include "video_core/gpu.h" | 13 | #include "video_core/gpu.h" |
| 13 | 14 | ||
| 14 | namespace Tegra { | 15 | namespace Tegra { |
| @@ -32,12 +33,32 @@ namespace Tegra::Engines { | |||
| 32 | 33 | ||
| 33 | class Fermi2D final { | 34 | class Fermi2D final { |
| 34 | public: | 35 | public: |
| 35 | explicit Fermi2D(VideoCore::RasterizerInterface& rasterizer, MemoryManager& memory_manager); | 36 | explicit Fermi2D(VideoCore::RasterizerInterface& rasterizer); |
| 36 | ~Fermi2D() = default; | 37 | ~Fermi2D() = default; |
| 37 | 38 | ||
| 38 | /// Write the value to the register identified by method. | 39 | /// Write the value to the register identified by method. |
| 39 | void CallMethod(const GPU::MethodCall& method_call); | 40 | void CallMethod(const GPU::MethodCall& method_call); |
| 40 | 41 | ||
| 42 | enum class Origin : u32 { | ||
| 43 | Center = 0, | ||
| 44 | Corner = 1, | ||
| 45 | }; | ||
| 46 | |||
| 47 | enum class Filter : u32 { | ||
| 48 | PointSample = 0, // Nearest | ||
| 49 | Linear = 1, | ||
| 50 | }; | ||
| 51 | |||
| 52 | enum class Operation : u32 { | ||
| 53 | SrcCopyAnd = 0, | ||
| 54 | ROPAnd = 1, | ||
| 55 | Blend = 2, | ||
| 56 | SrcCopy = 3, | ||
| 57 | ROP = 4, | ||
| 58 | SrcCopyPremult = 5, | ||
| 59 | BlendPremult = 6, | ||
| 60 | }; | ||
| 61 | |||
| 41 | struct Regs { | 62 | struct Regs { |
| 42 | static constexpr std::size_t NUM_REGS = 0x258; | 63 | static constexpr std::size_t NUM_REGS = 0x258; |
| 43 | 64 | ||
| @@ -63,32 +84,19 @@ public: | |||
| 63 | } | 84 | } |
| 64 | 85 | ||
| 65 | u32 BlockWidth() const { | 86 | u32 BlockWidth() const { |
| 66 | // The block width is stored in log2 format. | 87 | return block_width.Value(); |
| 67 | return 1 << block_width; | ||
| 68 | } | 88 | } |
| 69 | 89 | ||
| 70 | u32 BlockHeight() const { | 90 | u32 BlockHeight() const { |
| 71 | // The block height is stored in log2 format. | 91 | return block_height.Value(); |
| 72 | return 1 << block_height; | ||
| 73 | } | 92 | } |
| 74 | 93 | ||
| 75 | u32 BlockDepth() const { | 94 | u32 BlockDepth() const { |
| 76 | // The block depth is stored in log2 format. | 95 | return block_depth.Value(); |
| 77 | return 1 << block_depth; | ||
| 78 | } | 96 | } |
| 79 | }; | 97 | }; |
| 80 | static_assert(sizeof(Surface) == 0x28, "Surface has incorrect size"); | 98 | static_assert(sizeof(Surface) == 0x28, "Surface has incorrect size"); |
| 81 | 99 | ||
| 82 | enum class Operation : u32 { | ||
| 83 | SrcCopyAnd = 0, | ||
| 84 | ROPAnd = 1, | ||
| 85 | Blend = 2, | ||
| 86 | SrcCopy = 3, | ||
| 87 | ROP = 4, | ||
| 88 | SrcCopyPremult = 5, | ||
| 89 | BlendPremult = 6, | ||
| 90 | }; | ||
| 91 | |||
| 92 | union { | 100 | union { |
| 93 | struct { | 101 | struct { |
| 94 | INSERT_PADDING_WORDS(0x80); | 102 | INSERT_PADDING_WORDS(0x80); |
| @@ -105,7 +113,11 @@ public: | |||
| 105 | 113 | ||
| 106 | INSERT_PADDING_WORDS(0x177); | 114 | INSERT_PADDING_WORDS(0x177); |
| 107 | 115 | ||
| 108 | u32 blit_control; | 116 | union { |
| 117 | u32 raw; | ||
| 118 | BitField<0, 1, Origin> origin; | ||
| 119 | BitField<4, 1, Filter> filter; | ||
| 120 | } blit_control; | ||
| 109 | 121 | ||
| 110 | INSERT_PADDING_WORDS(0x8); | 122 | INSERT_PADDING_WORDS(0x8); |
| 111 | 123 | ||
| @@ -124,9 +136,15 @@ public: | |||
| 124 | }; | 136 | }; |
| 125 | } regs{}; | 137 | } regs{}; |
| 126 | 138 | ||
| 139 | struct Config { | ||
| 140 | Operation operation; | ||
| 141 | Filter filter; | ||
| 142 | Common::Rectangle<u32> src_rect; | ||
| 143 | Common::Rectangle<u32> dst_rect; | ||
| 144 | }; | ||
| 145 | |||
| 127 | private: | 146 | private: |
| 128 | VideoCore::RasterizerInterface& rasterizer; | 147 | VideoCore::RasterizerInterface& rasterizer; |
| 129 | MemoryManager& memory_manager; | ||
| 130 | 148 | ||
| 131 | /// Performs the copy from the source surface to the destination surface as configured in the | 149 | /// Performs the copy from the source surface to the destination surface as configured in the |
| 132 | /// registers. | 150 | /// registers. |
diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp index 7404a8163..63d449135 100644 --- a/src/video_core/engines/kepler_compute.cpp +++ b/src/video_core/engines/kepler_compute.cpp | |||
| @@ -2,6 +2,7 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <bitset> | ||
| 5 | #include "common/assert.h" | 6 | #include "common/assert.h" |
| 6 | #include "common/logging/log.h" | 7 | #include "common/logging/log.h" |
| 7 | #include "core/core.h" | 8 | #include "core/core.h" |
| @@ -37,7 +38,7 @@ void KeplerCompute::CallMethod(const GPU::MethodCall& method_call) { | |||
| 37 | const bool is_last_call = method_call.IsLastCall(); | 38 | const bool is_last_call = method_call.IsLastCall(); |
| 38 | upload_state.ProcessData(method_call.argument, is_last_call); | 39 | upload_state.ProcessData(method_call.argument, is_last_call); |
| 39 | if (is_last_call) { | 40 | if (is_last_call) { |
| 40 | system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); | 41 | system.GPU().Maxwell3D().dirty.OnMemoryWrite(); |
| 41 | } | 42 | } |
| 42 | break; | 43 | break; |
| 43 | } | 44 | } |
| @@ -49,14 +50,67 @@ void KeplerCompute::CallMethod(const GPU::MethodCall& method_call) { | |||
| 49 | } | 50 | } |
| 50 | } | 51 | } |
| 51 | 52 | ||
| 52 | void KeplerCompute::ProcessLaunch() { | 53 | Tegra::Texture::FullTextureInfo KeplerCompute::GetTexture(std::size_t offset) const { |
| 54 | const std::bitset<8> cbuf_mask = launch_description.const_buffer_enable_mask.Value(); | ||
| 55 | ASSERT(cbuf_mask[regs.tex_cb_index]); | ||
| 56 | |||
| 57 | const auto& texinfo = launch_description.const_buffer_config[regs.tex_cb_index]; | ||
| 58 | ASSERT(texinfo.Address() != 0); | ||
| 59 | |||
| 60 | const GPUVAddr address = texinfo.Address() + offset * sizeof(Texture::TextureHandle); | ||
| 61 | ASSERT(address < texinfo.Address() + texinfo.size); | ||
| 62 | |||
| 63 | const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(address)}; | ||
| 64 | return GetTextureInfo(tex_handle, offset); | ||
| 65 | } | ||
| 53 | 66 | ||
| 67 | Texture::FullTextureInfo KeplerCompute::GetTextureInfo(const Texture::TextureHandle tex_handle, | ||
| 68 | std::size_t offset) const { | ||
| 69 | return Texture::FullTextureInfo{static_cast<u32>(offset), GetTICEntry(tex_handle.tic_id), | ||
| 70 | GetTSCEntry(tex_handle.tsc_id)}; | ||
| 71 | } | ||
| 72 | |||
| 73 | u32 KeplerCompute::AccessConstBuffer32(u64 const_buffer, u64 offset) const { | ||
| 74 | const auto& buffer = launch_description.const_buffer_config[const_buffer]; | ||
| 75 | u32 result; | ||
| 76 | std::memcpy(&result, memory_manager.GetPointer(buffer.Address() + offset), sizeof(u32)); | ||
| 77 | return result; | ||
| 78 | } | ||
| 79 | |||
| 80 | void KeplerCompute::ProcessLaunch() { | ||
| 54 | const GPUVAddr launch_desc_loc = regs.launch_desc_loc.Address(); | 81 | const GPUVAddr launch_desc_loc = regs.launch_desc_loc.Address(); |
| 55 | memory_manager.ReadBlockUnsafe(launch_desc_loc, &launch_description, | 82 | memory_manager.ReadBlockUnsafe(launch_desc_loc, &launch_description, |
| 56 | LaunchParams::NUM_LAUNCH_PARAMETERS * sizeof(u32)); | 83 | LaunchParams::NUM_LAUNCH_PARAMETERS * sizeof(u32)); |
| 57 | 84 | ||
| 58 | const GPUVAddr code_loc = regs.code_loc.Address() + launch_description.program_start; | 85 | const GPUVAddr code_addr = regs.code_loc.Address() + launch_description.program_start; |
| 59 | LOG_WARNING(HW_GPU, "Compute Kernel Execute at Address 0x{:016x}, STUBBED", code_loc); | 86 | LOG_TRACE(HW_GPU, "Compute invocation launched at address 0x{:016x}", code_addr); |
| 87 | |||
| 88 | rasterizer.DispatchCompute(code_addr); | ||
| 89 | } | ||
| 90 | |||
| 91 | Texture::TICEntry KeplerCompute::GetTICEntry(u32 tic_index) const { | ||
| 92 | const GPUVAddr tic_address_gpu{regs.tic.Address() + tic_index * sizeof(Texture::TICEntry)}; | ||
| 93 | |||
| 94 | Texture::TICEntry tic_entry; | ||
| 95 | memory_manager.ReadBlockUnsafe(tic_address_gpu, &tic_entry, sizeof(Texture::TICEntry)); | ||
| 96 | |||
| 97 | const auto r_type{tic_entry.r_type.Value()}; | ||
| 98 | const auto g_type{tic_entry.g_type.Value()}; | ||
| 99 | const auto b_type{tic_entry.b_type.Value()}; | ||
| 100 | const auto a_type{tic_entry.a_type.Value()}; | ||
| 101 | |||
| 102 | // TODO(Subv): Different data types for separate components are not supported | ||
| 103 | DEBUG_ASSERT(r_type == g_type && r_type == b_type && r_type == a_type); | ||
| 104 | |||
| 105 | return tic_entry; | ||
| 106 | } | ||
| 107 | |||
| 108 | Texture::TSCEntry KeplerCompute::GetTSCEntry(u32 tsc_index) const { | ||
| 109 | const GPUVAddr tsc_address_gpu{regs.tsc.Address() + tsc_index * sizeof(Texture::TSCEntry)}; | ||
| 110 | |||
| 111 | Texture::TSCEntry tsc_entry; | ||
| 112 | memory_manager.ReadBlockUnsafe(tsc_address_gpu, &tsc_entry, sizeof(Texture::TSCEntry)); | ||
| 113 | return tsc_entry; | ||
| 60 | } | 114 | } |
| 61 | 115 | ||
| 62 | } // namespace Tegra::Engines | 116 | } // namespace Tegra::Engines |
diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h index 6a3309a2c..90cf650d2 100644 --- a/src/video_core/engines/kepler_compute.h +++ b/src/video_core/engines/kepler_compute.h | |||
| @@ -12,6 +12,7 @@ | |||
| 12 | #include "common/common_types.h" | 12 | #include "common/common_types.h" |
| 13 | #include "video_core/engines/engine_upload.h" | 13 | #include "video_core/engines/engine_upload.h" |
| 14 | #include "video_core/gpu.h" | 14 | #include "video_core/gpu.h" |
| 15 | #include "video_core/textures/texture.h" | ||
| 15 | 16 | ||
| 16 | namespace Core { | 17 | namespace Core { |
| 17 | class System; | 18 | class System; |
| @@ -111,7 +112,7 @@ public: | |||
| 111 | 112 | ||
| 112 | INSERT_PADDING_WORDS(0x3FE); | 113 | INSERT_PADDING_WORDS(0x3FE); |
| 113 | 114 | ||
| 114 | u32 texture_const_buffer_index; | 115 | u32 tex_cb_index; |
| 115 | 116 | ||
| 116 | INSERT_PADDING_WORDS(0x374); | 117 | INSERT_PADDING_WORDS(0x374); |
| 117 | }; | 118 | }; |
| @@ -149,7 +150,7 @@ public: | |||
| 149 | union { | 150 | union { |
| 150 | BitField<0, 8, u32> const_buffer_enable_mask; | 151 | BitField<0, 8, u32> const_buffer_enable_mask; |
| 151 | BitField<29, 2, u32> cache_layout; | 152 | BitField<29, 2, u32> cache_layout; |
| 152 | } memory_config; | 153 | }; |
| 153 | 154 | ||
| 154 | INSERT_PADDING_WORDS(0x8); | 155 | INSERT_PADDING_WORDS(0x8); |
| 155 | 156 | ||
| @@ -194,6 +195,14 @@ public: | |||
| 194 | /// Write the value to the register identified by method. | 195 | /// Write the value to the register identified by method. |
| 195 | void CallMethod(const GPU::MethodCall& method_call); | 196 | void CallMethod(const GPU::MethodCall& method_call); |
| 196 | 197 | ||
| 198 | Tegra::Texture::FullTextureInfo GetTexture(std::size_t offset) const; | ||
| 199 | |||
| 200 | /// Given a Texture Handle, returns the TSC and TIC entries. | ||
| 201 | Texture::FullTextureInfo GetTextureInfo(const Texture::TextureHandle tex_handle, | ||
| 202 | std::size_t offset) const; | ||
| 203 | |||
| 204 | u32 AccessConstBuffer32(u64 const_buffer, u64 offset) const; | ||
| 205 | |||
| 197 | private: | 206 | private: |
| 198 | Core::System& system; | 207 | Core::System& system; |
| 199 | VideoCore::RasterizerInterface& rasterizer; | 208 | VideoCore::RasterizerInterface& rasterizer; |
| @@ -201,6 +210,12 @@ private: | |||
| 201 | Upload::State upload_state; | 210 | Upload::State upload_state; |
| 202 | 211 | ||
| 203 | void ProcessLaunch(); | 212 | void ProcessLaunch(); |
| 213 | |||
| 214 | /// Retrieves information about a specific TIC entry from the TIC buffer. | ||
| 215 | Texture::TICEntry GetTICEntry(u32 tic_index) const; | ||
| 216 | |||
| 217 | /// Retrieves information about a specific TSC entry from the TSC buffer. | ||
| 218 | Texture::TSCEntry GetTSCEntry(u32 tsc_index) const; | ||
| 204 | }; | 219 | }; |
| 205 | 220 | ||
| 206 | #define ASSERT_REG_POSITION(field_name, position) \ | 221 | #define ASSERT_REG_POSITION(field_name, position) \ |
| @@ -218,12 +233,12 @@ ASSERT_REG_POSITION(launch, 0xAF); | |||
| 218 | ASSERT_REG_POSITION(tsc, 0x557); | 233 | ASSERT_REG_POSITION(tsc, 0x557); |
| 219 | ASSERT_REG_POSITION(tic, 0x55D); | 234 | ASSERT_REG_POSITION(tic, 0x55D); |
| 220 | ASSERT_REG_POSITION(code_loc, 0x582); | 235 | ASSERT_REG_POSITION(code_loc, 0x582); |
| 221 | ASSERT_REG_POSITION(texture_const_buffer_index, 0x982); | 236 | ASSERT_REG_POSITION(tex_cb_index, 0x982); |
| 222 | ASSERT_LAUNCH_PARAM_POSITION(program_start, 0x8); | 237 | ASSERT_LAUNCH_PARAM_POSITION(program_start, 0x8); |
| 223 | ASSERT_LAUNCH_PARAM_POSITION(grid_dim_x, 0xC); | 238 | ASSERT_LAUNCH_PARAM_POSITION(grid_dim_x, 0xC); |
| 224 | ASSERT_LAUNCH_PARAM_POSITION(shared_alloc, 0x11); | 239 | ASSERT_LAUNCH_PARAM_POSITION(shared_alloc, 0x11); |
| 225 | ASSERT_LAUNCH_PARAM_POSITION(block_dim_x, 0x12); | 240 | ASSERT_LAUNCH_PARAM_POSITION(block_dim_x, 0x12); |
| 226 | ASSERT_LAUNCH_PARAM_POSITION(memory_config, 0x14); | 241 | ASSERT_LAUNCH_PARAM_POSITION(const_buffer_enable_mask, 0x14); |
| 227 | ASSERT_LAUNCH_PARAM_POSITION(const_buffer_config, 0x1D); | 242 | ASSERT_LAUNCH_PARAM_POSITION(const_buffer_config, 0x1D); |
| 228 | 243 | ||
| 229 | #undef ASSERT_REG_POSITION | 244 | #undef ASSERT_REG_POSITION |
diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp index 0561f676c..fa4a7c5c1 100644 --- a/src/video_core/engines/kepler_memory.cpp +++ b/src/video_core/engines/kepler_memory.cpp | |||
| @@ -15,7 +15,7 @@ | |||
| 15 | namespace Tegra::Engines { | 15 | namespace Tegra::Engines { |
| 16 | 16 | ||
| 17 | KeplerMemory::KeplerMemory(Core::System& system, MemoryManager& memory_manager) | 17 | KeplerMemory::KeplerMemory(Core::System& system, MemoryManager& memory_manager) |
| 18 | : system{system}, memory_manager{memory_manager}, upload_state{memory_manager, regs.upload} {} | 18 | : system{system}, upload_state{memory_manager, regs.upload} {} |
| 19 | 19 | ||
| 20 | KeplerMemory::~KeplerMemory() = default; | 20 | KeplerMemory::~KeplerMemory() = default; |
| 21 | 21 | ||
| @@ -34,7 +34,7 @@ void KeplerMemory::CallMethod(const GPU::MethodCall& method_call) { | |||
| 34 | const bool is_last_call = method_call.IsLastCall(); | 34 | const bool is_last_call = method_call.IsLastCall(); |
| 35 | upload_state.ProcessData(method_call.argument, is_last_call); | 35 | upload_state.ProcessData(method_call.argument, is_last_call); |
| 36 | if (is_last_call) { | 36 | if (is_last_call) { |
| 37 | system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); | 37 | system.GPU().Maxwell3D().dirty.OnMemoryWrite(); |
| 38 | } | 38 | } |
| 39 | break; | 39 | break; |
| 40 | } | 40 | } |
diff --git a/src/video_core/engines/kepler_memory.h b/src/video_core/engines/kepler_memory.h index f3bc675a9..e0e25c321 100644 --- a/src/video_core/engines/kepler_memory.h +++ b/src/video_core/engines/kepler_memory.h | |||
| @@ -65,7 +65,6 @@ public: | |||
| 65 | 65 | ||
| 66 | private: | 66 | private: |
| 67 | Core::System& system; | 67 | Core::System& system; |
| 68 | MemoryManager& memory_manager; | ||
| 69 | Upload::State upload_state; | 68 | Upload::State upload_state; |
| 70 | }; | 69 | }; |
| 71 | 70 | ||
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 08d553696..c8c92757a 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp | |||
| @@ -22,6 +22,7 @@ Maxwell3D::Maxwell3D(Core::System& system, VideoCore::RasterizerInterface& raste | |||
| 22 | MemoryManager& memory_manager) | 22 | MemoryManager& memory_manager) |
| 23 | : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager}, | 23 | : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager}, |
| 24 | macro_interpreter{*this}, upload_state{memory_manager, regs.upload} { | 24 | macro_interpreter{*this}, upload_state{memory_manager, regs.upload} { |
| 25 | InitDirtySettings(); | ||
| 25 | InitializeRegisterDefaults(); | 26 | InitializeRegisterDefaults(); |
| 26 | } | 27 | } |
| 27 | 28 | ||
| @@ -69,6 +70,10 @@ void Maxwell3D::InitializeRegisterDefaults() { | |||
| 69 | regs.stencil_back_func_mask = 0xFFFFFFFF; | 70 | regs.stencil_back_func_mask = 0xFFFFFFFF; |
| 70 | regs.stencil_back_mask = 0xFFFFFFFF; | 71 | regs.stencil_back_mask = 0xFFFFFFFF; |
| 71 | 72 | ||
| 73 | regs.depth_test_func = Regs::ComparisonOp::Always; | ||
| 74 | regs.cull.front_face = Regs::Cull::FrontFace::CounterClockWise; | ||
| 75 | regs.cull.cull_face = Regs::Cull::CullFace::Back; | ||
| 76 | |||
| 72 | // TODO(Rodrigo): Most games do not set a point size. I think this is a case of a | 77 | // TODO(Rodrigo): Most games do not set a point size. I think this is a case of a |
| 73 | // register carrying a default value. Assume it's OpenGL's default (1). | 78 | // register carrying a default value. Assume it's OpenGL's default (1). |
| 74 | regs.point_size = 1.0f; | 79 | regs.point_size = 1.0f; |
| @@ -86,21 +91,168 @@ void Maxwell3D::InitializeRegisterDefaults() { | |||
| 86 | regs.rt_separate_frag_data = 1; | 91 | regs.rt_separate_frag_data = 1; |
| 87 | } | 92 | } |
| 88 | 93 | ||
| 89 | void Maxwell3D::CallMacroMethod(u32 method, std::vector<u32> parameters) { | 94 | #define DIRTY_REGS_POS(field_name) (offsetof(Maxwell3D::DirtyRegs, field_name)) |
| 95 | |||
| 96 | void Maxwell3D::InitDirtySettings() { | ||
| 97 | const auto set_block = [this](const u32 start, const u32 range, const u8 position) { | ||
| 98 | const auto start_itr = dirty_pointers.begin() + start; | ||
| 99 | const auto end_itr = start_itr + range; | ||
| 100 | std::fill(start_itr, end_itr, position); | ||
| 101 | }; | ||
| 102 | dirty.regs.fill(true); | ||
| 103 | |||
| 104 | // Init Render Targets | ||
| 105 | constexpr u32 registers_per_rt = sizeof(regs.rt[0]) / sizeof(u32); | ||
| 106 | constexpr u32 rt_start_reg = MAXWELL3D_REG_INDEX(rt); | ||
| 107 | constexpr u32 rt_end_reg = rt_start_reg + registers_per_rt * 8; | ||
| 108 | u32 rt_dirty_reg = DIRTY_REGS_POS(render_target); | ||
| 109 | for (u32 rt_reg = rt_start_reg; rt_reg < rt_end_reg; rt_reg += registers_per_rt) { | ||
| 110 | set_block(rt_reg, registers_per_rt, rt_dirty_reg); | ||
| 111 | rt_dirty_reg++; | ||
| 112 | } | ||
| 113 | constexpr u32 depth_buffer_flag = DIRTY_REGS_POS(depth_buffer); | ||
| 114 | dirty_pointers[MAXWELL3D_REG_INDEX(zeta_enable)] = depth_buffer_flag; | ||
| 115 | dirty_pointers[MAXWELL3D_REG_INDEX(zeta_width)] = depth_buffer_flag; | ||
| 116 | dirty_pointers[MAXWELL3D_REG_INDEX(zeta_height)] = depth_buffer_flag; | ||
| 117 | constexpr u32 registers_in_zeta = sizeof(regs.zeta) / sizeof(u32); | ||
| 118 | constexpr u32 zeta_reg = MAXWELL3D_REG_INDEX(zeta); | ||
| 119 | set_block(zeta_reg, registers_in_zeta, depth_buffer_flag); | ||
| 120 | |||
| 121 | // Init Vertex Arrays | ||
| 122 | constexpr u32 vertex_array_start = MAXWELL3D_REG_INDEX(vertex_array); | ||
| 123 | constexpr u32 vertex_array_size = sizeof(regs.vertex_array[0]) / sizeof(u32); | ||
| 124 | constexpr u32 vertex_array_end = vertex_array_start + vertex_array_size * Regs::NumVertexArrays; | ||
| 125 | u32 va_reg = DIRTY_REGS_POS(vertex_array); | ||
| 126 | u32 vi_reg = DIRTY_REGS_POS(vertex_instance); | ||
| 127 | for (u32 vertex_reg = vertex_array_start; vertex_reg < vertex_array_end; | ||
| 128 | vertex_reg += vertex_array_size) { | ||
| 129 | set_block(vertex_reg, 3, va_reg); | ||
| 130 | // The divisor concerns vertex array instances | ||
| 131 | dirty_pointers[vertex_reg + 3] = vi_reg; | ||
| 132 | va_reg++; | ||
| 133 | vi_reg++; | ||
| 134 | } | ||
| 135 | constexpr u32 vertex_limit_start = MAXWELL3D_REG_INDEX(vertex_array_limit); | ||
| 136 | constexpr u32 vertex_limit_size = sizeof(regs.vertex_array_limit[0]) / sizeof(u32); | ||
| 137 | constexpr u32 vertex_limit_end = vertex_limit_start + vertex_limit_size * Regs::NumVertexArrays; | ||
| 138 | va_reg = DIRTY_REGS_POS(vertex_array); | ||
| 139 | for (u32 vertex_reg = vertex_limit_start; vertex_reg < vertex_limit_end; | ||
| 140 | vertex_reg += vertex_limit_size) { | ||
| 141 | set_block(vertex_reg, vertex_limit_size, va_reg); | ||
| 142 | va_reg++; | ||
| 143 | } | ||
| 144 | constexpr u32 vertex_instance_start = MAXWELL3D_REG_INDEX(instanced_arrays); | ||
| 145 | constexpr u32 vertex_instance_size = | ||
| 146 | sizeof(regs.instanced_arrays.is_instanced[0]) / sizeof(u32); | ||
| 147 | constexpr u32 vertex_instance_end = | ||
| 148 | vertex_instance_start + vertex_instance_size * Regs::NumVertexArrays; | ||
| 149 | vi_reg = DIRTY_REGS_POS(vertex_instance); | ||
| 150 | for (u32 vertex_reg = vertex_instance_start; vertex_reg < vertex_instance_end; | ||
| 151 | vertex_reg += vertex_instance_size) { | ||
| 152 | set_block(vertex_reg, vertex_instance_size, vi_reg); | ||
| 153 | vi_reg++; | ||
| 154 | } | ||
| 155 | set_block(MAXWELL3D_REG_INDEX(vertex_attrib_format), regs.vertex_attrib_format.size(), | ||
| 156 | DIRTY_REGS_POS(vertex_attrib_format)); | ||
| 157 | |||
| 158 | // Init Shaders | ||
| 159 | constexpr u32 shader_registers_count = | ||
| 160 | sizeof(regs.shader_config[0]) * Regs::MaxShaderProgram / sizeof(u32); | ||
| 161 | set_block(MAXWELL3D_REG_INDEX(shader_config[0]), shader_registers_count, | ||
| 162 | DIRTY_REGS_POS(shaders)); | ||
| 163 | |||
| 164 | // State | ||
| 165 | |||
| 166 | // Viewport | ||
| 167 | constexpr u32 viewport_dirty_reg = DIRTY_REGS_POS(viewport); | ||
| 168 | constexpr u32 viewport_start = MAXWELL3D_REG_INDEX(viewports); | ||
| 169 | constexpr u32 viewport_size = sizeof(regs.viewports) / sizeof(u32); | ||
| 170 | set_block(viewport_start, viewport_size, viewport_dirty_reg); | ||
| 171 | constexpr u32 view_volume_start = MAXWELL3D_REG_INDEX(view_volume_clip_control); | ||
| 172 | constexpr u32 view_volume_size = sizeof(regs.view_volume_clip_control) / sizeof(u32); | ||
| 173 | set_block(view_volume_start, view_volume_size, viewport_dirty_reg); | ||
| 174 | |||
| 175 | // Viewport transformation | ||
| 176 | constexpr u32 viewport_trans_start = MAXWELL3D_REG_INDEX(viewport_transform); | ||
| 177 | constexpr u32 viewport_trans_size = sizeof(regs.viewport_transform) / sizeof(u32); | ||
| 178 | set_block(viewport_trans_start, viewport_trans_size, DIRTY_REGS_POS(viewport_transform)); | ||
| 179 | |||
| 180 | // Cullmode | ||
| 181 | constexpr u32 cull_mode_start = MAXWELL3D_REG_INDEX(cull); | ||
| 182 | constexpr u32 cull_mode_size = sizeof(regs.cull) / sizeof(u32); | ||
| 183 | set_block(cull_mode_start, cull_mode_size, DIRTY_REGS_POS(cull_mode)); | ||
| 184 | |||
| 185 | // Screen y control | ||
| 186 | dirty_pointers[MAXWELL3D_REG_INDEX(screen_y_control)] = DIRTY_REGS_POS(screen_y_control); | ||
| 187 | |||
| 188 | // Primitive Restart | ||
| 189 | constexpr u32 primitive_restart_start = MAXWELL3D_REG_INDEX(primitive_restart); | ||
| 190 | constexpr u32 primitive_restart_size = sizeof(regs.primitive_restart) / sizeof(u32); | ||
| 191 | set_block(primitive_restart_start, primitive_restart_size, DIRTY_REGS_POS(primitive_restart)); | ||
| 192 | |||
| 193 | // Depth Test | ||
| 194 | constexpr u32 depth_test_dirty_reg = DIRTY_REGS_POS(depth_test); | ||
| 195 | dirty_pointers[MAXWELL3D_REG_INDEX(depth_test_enable)] = depth_test_dirty_reg; | ||
| 196 | dirty_pointers[MAXWELL3D_REG_INDEX(depth_write_enabled)] = depth_test_dirty_reg; | ||
| 197 | dirty_pointers[MAXWELL3D_REG_INDEX(depth_test_func)] = depth_test_dirty_reg; | ||
| 198 | |||
| 199 | // Stencil Test | ||
| 200 | constexpr u32 stencil_test_dirty_reg = DIRTY_REGS_POS(stencil_test); | ||
| 201 | dirty_pointers[MAXWELL3D_REG_INDEX(stencil_enable)] = stencil_test_dirty_reg; | ||
| 202 | dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_func_func)] = stencil_test_dirty_reg; | ||
| 203 | dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_func_ref)] = stencil_test_dirty_reg; | ||
| 204 | dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_func_mask)] = stencil_test_dirty_reg; | ||
| 205 | dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_op_fail)] = stencil_test_dirty_reg; | ||
| 206 | dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_op_zfail)] = stencil_test_dirty_reg; | ||
| 207 | dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_op_zpass)] = stencil_test_dirty_reg; | ||
| 208 | dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_mask)] = stencil_test_dirty_reg; | ||
| 209 | dirty_pointers[MAXWELL3D_REG_INDEX(stencil_two_side_enable)] = stencil_test_dirty_reg; | ||
| 210 | dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_func_func)] = stencil_test_dirty_reg; | ||
| 211 | dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_func_ref)] = stencil_test_dirty_reg; | ||
| 212 | dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_func_mask)] = stencil_test_dirty_reg; | ||
| 213 | dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_op_fail)] = stencil_test_dirty_reg; | ||
| 214 | dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_op_zfail)] = stencil_test_dirty_reg; | ||
| 215 | dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_op_zpass)] = stencil_test_dirty_reg; | ||
| 216 | dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_mask)] = stencil_test_dirty_reg; | ||
| 217 | |||
| 218 | // Color Mask | ||
| 219 | constexpr u32 color_mask_dirty_reg = DIRTY_REGS_POS(color_mask); | ||
| 220 | dirty_pointers[MAXWELL3D_REG_INDEX(color_mask_common)] = color_mask_dirty_reg; | ||
| 221 | set_block(MAXWELL3D_REG_INDEX(color_mask), sizeof(regs.color_mask) / sizeof(u32), | ||
| 222 | color_mask_dirty_reg); | ||
| 223 | // Blend State | ||
| 224 | constexpr u32 blend_state_dirty_reg = DIRTY_REGS_POS(blend_state); | ||
| 225 | set_block(MAXWELL3D_REG_INDEX(blend_color), sizeof(regs.blend_color) / sizeof(u32), | ||
| 226 | blend_state_dirty_reg); | ||
| 227 | dirty_pointers[MAXWELL3D_REG_INDEX(independent_blend_enable)] = blend_state_dirty_reg; | ||
| 228 | set_block(MAXWELL3D_REG_INDEX(blend), sizeof(regs.blend) / sizeof(u32), blend_state_dirty_reg); | ||
| 229 | set_block(MAXWELL3D_REG_INDEX(independent_blend), sizeof(regs.independent_blend) / sizeof(u32), | ||
| 230 | blend_state_dirty_reg); | ||
| 231 | |||
| 232 | // Scissor State | ||
| 233 | constexpr u32 scissor_test_dirty_reg = DIRTY_REGS_POS(scissor_test); | ||
| 234 | set_block(MAXWELL3D_REG_INDEX(scissor_test), sizeof(regs.scissor_test) / sizeof(u32), | ||
| 235 | scissor_test_dirty_reg); | ||
| 236 | |||
| 237 | // Polygon Offset | ||
| 238 | constexpr u32 polygon_offset_dirty_reg = DIRTY_REGS_POS(polygon_offset); | ||
| 239 | dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_fill_enable)] = polygon_offset_dirty_reg; | ||
| 240 | dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_line_enable)] = polygon_offset_dirty_reg; | ||
| 241 | dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_point_enable)] = polygon_offset_dirty_reg; | ||
| 242 | dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_units)] = polygon_offset_dirty_reg; | ||
| 243 | dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_factor)] = polygon_offset_dirty_reg; | ||
| 244 | dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_clamp)] = polygon_offset_dirty_reg; | ||
| 245 | } | ||
| 246 | |||
| 247 | void Maxwell3D::CallMacroMethod(u32 method, std::size_t num_parameters, const u32* parameters) { | ||
| 90 | // Reset the current macro. | 248 | // Reset the current macro. |
| 91 | executing_macro = 0; | 249 | executing_macro = 0; |
| 92 | 250 | ||
| 93 | // Lookup the macro offset | 251 | // Lookup the macro offset |
| 94 | const u32 entry{(method - MacroRegistersStart) >> 1}; | 252 | const u32 entry = ((method - MacroRegistersStart) >> 1) % macro_positions.size(); |
| 95 | const auto& search{macro_offsets.find(entry)}; | ||
| 96 | if (search == macro_offsets.end()) { | ||
| 97 | LOG_CRITICAL(HW_GPU, "macro not found for method 0x{:X}!", method); | ||
| 98 | UNREACHABLE(); | ||
| 99 | return; | ||
| 100 | } | ||
| 101 | 253 | ||
| 102 | // Execute the current macro. | 254 | // Execute the current macro. |
| 103 | macro_interpreter.Execute(search->second, std::move(parameters)); | 255 | macro_interpreter.Execute(macro_positions[entry], num_parameters, parameters); |
| 104 | } | 256 | } |
| 105 | 257 | ||
| 106 | void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) { | 258 | void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) { |
| @@ -108,6 +260,14 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) { | |||
| 108 | 260 | ||
| 109 | const u32 method = method_call.method; | 261 | const u32 method = method_call.method; |
| 110 | 262 | ||
| 263 | if (method == cb_data_state.current) { | ||
| 264 | regs.reg_array[method] = method_call.argument; | ||
| 265 | ProcessCBData(method_call.argument); | ||
| 266 | return; | ||
| 267 | } else if (cb_data_state.current != null_cb_data) { | ||
| 268 | FinishCBData(); | ||
| 269 | } | ||
| 270 | |||
| 111 | // It is an error to write to a register other than the current macro's ARG register before it | 271 | // It is an error to write to a register other than the current macro's ARG register before it |
| 112 | // has finished execution. | 272 | // has finished execution. |
| 113 | if (executing_macro != 0) { | 273 | if (executing_macro != 0) { |
| @@ -129,7 +289,8 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) { | |||
| 129 | 289 | ||
| 130 | // Call the macro when there are no more parameters in the command buffer | 290 | // Call the macro when there are no more parameters in the command buffer |
| 131 | if (method_call.IsLastCall()) { | 291 | if (method_call.IsLastCall()) { |
| 132 | CallMacroMethod(executing_macro, std::move(macro_params)); | 292 | CallMacroMethod(executing_macro, macro_params.size(), macro_params.data()); |
| 293 | macro_params.clear(); | ||
| 133 | } | 294 | } |
| 134 | return; | 295 | return; |
| 135 | } | 296 | } |
| @@ -143,49 +304,19 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) { | |||
| 143 | 304 | ||
| 144 | if (regs.reg_array[method] != method_call.argument) { | 305 | if (regs.reg_array[method] != method_call.argument) { |
| 145 | regs.reg_array[method] = method_call.argument; | 306 | regs.reg_array[method] = method_call.argument; |
| 146 | // Color buffers | 307 | const std::size_t dirty_reg = dirty_pointers[method]; |
| 147 | constexpr u32 first_rt_reg = MAXWELL3D_REG_INDEX(rt); | 308 | if (dirty_reg) { |
| 148 | constexpr u32 registers_per_rt = sizeof(regs.rt[0]) / sizeof(u32); | 309 | dirty.regs[dirty_reg] = true; |
| 149 | if (method >= first_rt_reg && | 310 | if (dirty_reg >= DIRTY_REGS_POS(vertex_array) && |
| 150 | method < first_rt_reg + registers_per_rt * Regs::NumRenderTargets) { | 311 | dirty_reg < DIRTY_REGS_POS(vertex_array_buffers)) { |
| 151 | const std::size_t rt_index = (method - first_rt_reg) / registers_per_rt; | 312 | dirty.vertex_array_buffers = true; |
| 152 | dirty_flags.color_buffer.set(rt_index); | 313 | } else if (dirty_reg >= DIRTY_REGS_POS(vertex_instance) && |
| 153 | } | 314 | dirty_reg < DIRTY_REGS_POS(vertex_instances)) { |
| 154 | 315 | dirty.vertex_instances = true; | |
| 155 | // Zeta buffer | 316 | } else if (dirty_reg >= DIRTY_REGS_POS(render_target) && |
| 156 | constexpr u32 registers_in_zeta = sizeof(regs.zeta) / sizeof(u32); | 317 | dirty_reg < DIRTY_REGS_POS(render_settings)) { |
| 157 | if (method == MAXWELL3D_REG_INDEX(zeta_enable) || | 318 | dirty.render_settings = true; |
| 158 | method == MAXWELL3D_REG_INDEX(zeta_width) || | 319 | } |
| 159 | method == MAXWELL3D_REG_INDEX(zeta_height) || | ||
| 160 | (method >= MAXWELL3D_REG_INDEX(zeta) && | ||
| 161 | method < MAXWELL3D_REG_INDEX(zeta) + registers_in_zeta)) { | ||
| 162 | dirty_flags.zeta_buffer = true; | ||
| 163 | } | ||
| 164 | |||
| 165 | // Shader | ||
| 166 | constexpr u32 shader_registers_count = | ||
| 167 | sizeof(regs.shader_config[0]) * Regs::MaxShaderProgram / sizeof(u32); | ||
| 168 | if (method >= MAXWELL3D_REG_INDEX(shader_config[0]) && | ||
| 169 | method < MAXWELL3D_REG_INDEX(shader_config[0]) + shader_registers_count) { | ||
| 170 | dirty_flags.shaders = true; | ||
| 171 | } | ||
| 172 | |||
| 173 | // Vertex format | ||
| 174 | if (method >= MAXWELL3D_REG_INDEX(vertex_attrib_format) && | ||
| 175 | method < MAXWELL3D_REG_INDEX(vertex_attrib_format) + regs.vertex_attrib_format.size()) { | ||
| 176 | dirty_flags.vertex_attrib_format = true; | ||
| 177 | } | ||
| 178 | |||
| 179 | // Vertex buffer | ||
| 180 | if (method >= MAXWELL3D_REG_INDEX(vertex_array) && | ||
| 181 | method < MAXWELL3D_REG_INDEX(vertex_array) + 4 * Regs::NumVertexArrays) { | ||
| 182 | dirty_flags.vertex_array.set((method - MAXWELL3D_REG_INDEX(vertex_array)) >> 2); | ||
| 183 | } else if (method >= MAXWELL3D_REG_INDEX(vertex_array_limit) && | ||
| 184 | method < MAXWELL3D_REG_INDEX(vertex_array_limit) + 2 * Regs::NumVertexArrays) { | ||
| 185 | dirty_flags.vertex_array.set((method - MAXWELL3D_REG_INDEX(vertex_array_limit)) >> 1); | ||
| 186 | } else if (method >= MAXWELL3D_REG_INDEX(instanced_arrays) && | ||
| 187 | method < MAXWELL3D_REG_INDEX(instanced_arrays) + Regs::NumVertexArrays) { | ||
| 188 | dirty_flags.vertex_array.set(method - MAXWELL3D_REG_INDEX(instanced_arrays)); | ||
| 189 | } | 320 | } |
| 190 | } | 321 | } |
| 191 | 322 | ||
| @@ -214,7 +345,7 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) { | |||
| 214 | case MAXWELL3D_REG_INDEX(const_buffer.cb_data[13]): | 345 | case MAXWELL3D_REG_INDEX(const_buffer.cb_data[13]): |
| 215 | case MAXWELL3D_REG_INDEX(const_buffer.cb_data[14]): | 346 | case MAXWELL3D_REG_INDEX(const_buffer.cb_data[14]): |
| 216 | case MAXWELL3D_REG_INDEX(const_buffer.cb_data[15]): { | 347 | case MAXWELL3D_REG_INDEX(const_buffer.cb_data[15]): { |
| 217 | ProcessCBData(method_call.argument); | 348 | StartCBData(method); |
| 218 | break; | 349 | break; |
| 219 | } | 350 | } |
| 220 | case MAXWELL3D_REG_INDEX(cb_bind[0].raw_config): { | 351 | case MAXWELL3D_REG_INDEX(cb_bind[0].raw_config): { |
| @@ -249,6 +380,10 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) { | |||
| 249 | ProcessQueryGet(); | 380 | ProcessQueryGet(); |
| 250 | break; | 381 | break; |
| 251 | } | 382 | } |
| 383 | case MAXWELL3D_REG_INDEX(condition.mode): { | ||
| 384 | ProcessQueryCondition(); | ||
| 385 | break; | ||
| 386 | } | ||
| 252 | case MAXWELL3D_REG_INDEX(sync_info): { | 387 | case MAXWELL3D_REG_INDEX(sync_info): { |
| 253 | ProcessSyncPoint(); | 388 | ProcessSyncPoint(); |
| 254 | break; | 389 | break; |
| @@ -261,7 +396,7 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) { | |||
| 261 | const bool is_last_call = method_call.IsLastCall(); | 396 | const bool is_last_call = method_call.IsLastCall(); |
| 262 | upload_state.ProcessData(method_call.argument, is_last_call); | 397 | upload_state.ProcessData(method_call.argument, is_last_call); |
| 263 | if (is_last_call) { | 398 | if (is_last_call) { |
| 264 | dirty_flags.OnMemoryWrite(); | 399 | dirty.OnMemoryWrite(); |
| 265 | } | 400 | } |
| 266 | break; | 401 | break; |
| 267 | } | 402 | } |
| @@ -281,7 +416,7 @@ void Maxwell3D::ProcessMacroUpload(u32 data) { | |||
| 281 | } | 416 | } |
| 282 | 417 | ||
| 283 | void Maxwell3D::ProcessMacroBind(u32 data) { | 418 | void Maxwell3D::ProcessMacroBind(u32 data) { |
| 284 | macro_offsets[regs.macros.entry] = data; | 419 | macro_positions[regs.macros.entry++] = data; |
| 285 | } | 420 | } |
| 286 | 421 | ||
| 287 | void Maxwell3D::ProcessQueryGet() { | 422 | void Maxwell3D::ProcessQueryGet() { |
| @@ -302,6 +437,7 @@ void Maxwell3D::ProcessQueryGet() { | |||
| 302 | result = regs.query.query_sequence; | 437 | result = regs.query.query_sequence; |
| 303 | break; | 438 | break; |
| 304 | default: | 439 | default: |
| 440 | result = 1; | ||
| 305 | UNIMPLEMENTED_MSG("Unimplemented query select type {}", | 441 | UNIMPLEMENTED_MSG("Unimplemented query select type {}", |
| 306 | static_cast<u32>(regs.query.query_get.select.Value())); | 442 | static_cast<u32>(regs.query.query_get.select.Value())); |
| 307 | } | 443 | } |
| @@ -333,7 +469,6 @@ void Maxwell3D::ProcessQueryGet() { | |||
| 333 | query_result.timestamp = system.CoreTiming().GetTicks(); | 469 | query_result.timestamp = system.CoreTiming().GetTicks(); |
| 334 | memory_manager.WriteBlock(sequence_address, &query_result, sizeof(query_result)); | 470 | memory_manager.WriteBlock(sequence_address, &query_result, sizeof(query_result)); |
| 335 | } | 471 | } |
| 336 | dirty_flags.OnMemoryWrite(); | ||
| 337 | break; | 472 | break; |
| 338 | } | 473 | } |
| 339 | default: | 474 | default: |
| @@ -342,12 +477,52 @@ void Maxwell3D::ProcessQueryGet() { | |||
| 342 | } | 477 | } |
| 343 | } | 478 | } |
| 344 | 479 | ||
| 480 | void Maxwell3D::ProcessQueryCondition() { | ||
| 481 | const GPUVAddr condition_address{regs.condition.Address()}; | ||
| 482 | switch (regs.condition.mode) { | ||
| 483 | case Regs::ConditionMode::Always: { | ||
| 484 | execute_on = true; | ||
| 485 | break; | ||
| 486 | } | ||
| 487 | case Regs::ConditionMode::Never: { | ||
| 488 | execute_on = false; | ||
| 489 | break; | ||
| 490 | } | ||
| 491 | case Regs::ConditionMode::ResNonZero: { | ||
| 492 | Regs::QueryCompare cmp; | ||
| 493 | memory_manager.ReadBlockUnsafe(condition_address, &cmp, sizeof(cmp)); | ||
| 494 | execute_on = cmp.initial_sequence != 0U && cmp.initial_mode != 0U; | ||
| 495 | break; | ||
| 496 | } | ||
| 497 | case Regs::ConditionMode::Equal: { | ||
| 498 | Regs::QueryCompare cmp; | ||
| 499 | memory_manager.ReadBlockUnsafe(condition_address, &cmp, sizeof(cmp)); | ||
| 500 | execute_on = | ||
| 501 | cmp.initial_sequence == cmp.current_sequence && cmp.initial_mode == cmp.current_mode; | ||
| 502 | break; | ||
| 503 | } | ||
| 504 | case Regs::ConditionMode::NotEqual: { | ||
| 505 | Regs::QueryCompare cmp; | ||
| 506 | memory_manager.ReadBlockUnsafe(condition_address, &cmp, sizeof(cmp)); | ||
| 507 | execute_on = | ||
| 508 | cmp.initial_sequence != cmp.current_sequence || cmp.initial_mode != cmp.current_mode; | ||
| 509 | break; | ||
| 510 | } | ||
| 511 | default: { | ||
| 512 | UNIMPLEMENTED_MSG("Uninplemented Condition Mode!"); | ||
| 513 | execute_on = true; | ||
| 514 | break; | ||
| 515 | } | ||
| 516 | } | ||
| 517 | } | ||
| 518 | |||
| 345 | void Maxwell3D::ProcessSyncPoint() { | 519 | void Maxwell3D::ProcessSyncPoint() { |
| 346 | const u32 sync_point = regs.sync_info.sync_point.Value(); | 520 | const u32 sync_point = regs.sync_info.sync_point.Value(); |
| 347 | const u32 increment = regs.sync_info.increment.Value(); | 521 | const u32 increment = regs.sync_info.increment.Value(); |
| 348 | const u32 cache_flush = regs.sync_info.unknown.Value(); | 522 | [[maybe_unused]] const u32 cache_flush = regs.sync_info.unknown.Value(); |
| 349 | LOG_DEBUG(HW_GPU, "Syncpoint set {}, increment: {}, unk: {}", sync_point, increment, | 523 | if (increment) { |
| 350 | cache_flush); | 524 | system.GPU().IncrementSyncPoint(sync_point); |
| 525 | } | ||
| 351 | } | 526 | } |
| 352 | 527 | ||
| 353 | void Maxwell3D::DrawArrays() { | 528 | void Maxwell3D::DrawArrays() { |
| @@ -405,23 +580,39 @@ void Maxwell3D::ProcessCBBind(Regs::ShaderStage stage) { | |||
| 405 | } | 580 | } |
| 406 | 581 | ||
| 407 | void Maxwell3D::ProcessCBData(u32 value) { | 582 | void Maxwell3D::ProcessCBData(u32 value) { |
| 583 | const u32 id = cb_data_state.id; | ||
| 584 | cb_data_state.buffer[id][cb_data_state.counter] = value; | ||
| 585 | // Increment the current buffer position. | ||
| 586 | regs.const_buffer.cb_pos = regs.const_buffer.cb_pos + 4; | ||
| 587 | cb_data_state.counter++; | ||
| 588 | } | ||
| 589 | |||
| 590 | void Maxwell3D::StartCBData(u32 method) { | ||
| 591 | constexpr u32 first_cb_data = MAXWELL3D_REG_INDEX(const_buffer.cb_data[0]); | ||
| 592 | cb_data_state.start_pos = regs.const_buffer.cb_pos; | ||
| 593 | cb_data_state.id = method - first_cb_data; | ||
| 594 | cb_data_state.current = method; | ||
| 595 | cb_data_state.counter = 0; | ||
| 596 | ProcessCBData(regs.const_buffer.cb_data[cb_data_state.id]); | ||
| 597 | } | ||
| 598 | |||
| 599 | void Maxwell3D::FinishCBData() { | ||
| 408 | // Write the input value to the current const buffer at the current position. | 600 | // Write the input value to the current const buffer at the current position. |
| 409 | const GPUVAddr buffer_address = regs.const_buffer.BufferAddress(); | 601 | const GPUVAddr buffer_address = regs.const_buffer.BufferAddress(); |
| 410 | ASSERT(buffer_address != 0); | 602 | ASSERT(buffer_address != 0); |
| 411 | 603 | ||
| 412 | // Don't allow writing past the end of the buffer. | 604 | // Don't allow writing past the end of the buffer. |
| 413 | ASSERT(regs.const_buffer.cb_pos + sizeof(u32) <= regs.const_buffer.cb_size); | 605 | ASSERT(regs.const_buffer.cb_pos <= regs.const_buffer.cb_size); |
| 414 | |||
| 415 | const GPUVAddr address{buffer_address + regs.const_buffer.cb_pos}; | ||
| 416 | 606 | ||
| 417 | u8* ptr{memory_manager.GetPointer(address)}; | 607 | const GPUVAddr address{buffer_address + cb_data_state.start_pos}; |
| 418 | rasterizer.InvalidateRegion(ToCacheAddr(ptr), sizeof(u32)); | 608 | const std::size_t size = regs.const_buffer.cb_pos - cb_data_state.start_pos; |
| 419 | memory_manager.Write<u32>(address, value); | ||
| 420 | 609 | ||
| 421 | dirty_flags.OnMemoryWrite(); | 610 | const u32 id = cb_data_state.id; |
| 611 | memory_manager.WriteBlock(address, cb_data_state.buffer[id].data(), size); | ||
| 612 | dirty.OnMemoryWrite(); | ||
| 422 | 613 | ||
| 423 | // Increment the current buffer position. | 614 | cb_data_state.id = null_cb_data; |
| 424 | regs.const_buffer.cb_pos = regs.const_buffer.cb_pos + 4; | 615 | cb_data_state.current = null_cb_data; |
| 425 | } | 616 | } |
| 426 | 617 | ||
| 427 | Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const { | 618 | Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const { |
| @@ -430,14 +621,10 @@ Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const { | |||
| 430 | Texture::TICEntry tic_entry; | 621 | Texture::TICEntry tic_entry; |
| 431 | memory_manager.ReadBlockUnsafe(tic_address_gpu, &tic_entry, sizeof(Texture::TICEntry)); | 622 | memory_manager.ReadBlockUnsafe(tic_address_gpu, &tic_entry, sizeof(Texture::TICEntry)); |
| 432 | 623 | ||
| 433 | ASSERT_MSG(tic_entry.header_version == Texture::TICHeaderVersion::BlockLinear || | 624 | [[maybe_unused]] const auto r_type{tic_entry.r_type.Value()}; |
| 434 | tic_entry.header_version == Texture::TICHeaderVersion::Pitch, | 625 | [[maybe_unused]] const auto g_type{tic_entry.g_type.Value()}; |
| 435 | "TIC versions other than BlockLinear or Pitch are unimplemented"); | 626 | [[maybe_unused]] const auto b_type{tic_entry.b_type.Value()}; |
| 436 | 627 | [[maybe_unused]] const auto a_type{tic_entry.a_type.Value()}; | |
| 437 | const auto r_type = tic_entry.r_type.Value(); | ||
| 438 | const auto g_type = tic_entry.g_type.Value(); | ||
| 439 | const auto b_type = tic_entry.b_type.Value(); | ||
| 440 | const auto a_type = tic_entry.a_type.Value(); | ||
| 441 | 628 | ||
| 442 | // TODO(Subv): Different data types for separate components are not supported | 629 | // TODO(Subv): Different data types for separate components are not supported |
| 443 | DEBUG_ASSERT(r_type == g_type && r_type == b_type && r_type == a_type); | 630 | DEBUG_ASSERT(r_type == g_type && r_type == b_type && r_type == a_type); |
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 13e314944..f67a5389f 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h | |||
| @@ -62,11 +62,13 @@ public: | |||
| 62 | static constexpr std::size_t NumVertexAttributes = 32; | 62 | static constexpr std::size_t NumVertexAttributes = 32; |
| 63 | static constexpr std::size_t NumVaryings = 31; | 63 | static constexpr std::size_t NumVaryings = 31; |
| 64 | static constexpr std::size_t NumTextureSamplers = 32; | 64 | static constexpr std::size_t NumTextureSamplers = 32; |
| 65 | static constexpr std::size_t NumImages = 8; // TODO(Rodrigo): Investigate this number | ||
| 65 | static constexpr std::size_t NumClipDistances = 8; | 66 | static constexpr std::size_t NumClipDistances = 8; |
| 66 | static constexpr std::size_t MaxShaderProgram = 6; | 67 | static constexpr std::size_t MaxShaderProgram = 6; |
| 67 | static constexpr std::size_t MaxShaderStage = 5; | 68 | static constexpr std::size_t MaxShaderStage = 5; |
| 68 | // Maximum number of const buffers per shader stage. | 69 | // Maximum number of const buffers per shader stage. |
| 69 | static constexpr std::size_t MaxConstBuffers = 18; | 70 | static constexpr std::size_t MaxConstBuffers = 18; |
| 71 | static constexpr std::size_t MaxConstBufferSize = 0x10000; | ||
| 70 | 72 | ||
| 71 | enum class QueryMode : u32 { | 73 | enum class QueryMode : u32 { |
| 72 | Write = 0, | 74 | Write = 0, |
| @@ -89,6 +91,20 @@ public: | |||
| 89 | 91 | ||
| 90 | enum class QuerySelect : u32 { | 92 | enum class QuerySelect : u32 { |
| 91 | Zero = 0, | 93 | Zero = 0, |
| 94 | TimeElapsed = 2, | ||
| 95 | TransformFeedbackPrimitivesGenerated = 11, | ||
| 96 | PrimitivesGenerated = 18, | ||
| 97 | SamplesPassed = 21, | ||
| 98 | TransformFeedbackUnknown = 26, | ||
| 99 | }; | ||
| 100 | |||
| 101 | struct QueryCompare { | ||
| 102 | u32 initial_sequence; | ||
| 103 | u32 initial_mode; | ||
| 104 | u32 unknown1; | ||
| 105 | u32 unknown2; | ||
| 106 | u32 current_sequence; | ||
| 107 | u32 current_mode; | ||
| 92 | }; | 108 | }; |
| 93 | 109 | ||
| 94 | enum class QuerySyncCondition : u32 { | 110 | enum class QuerySyncCondition : u32 { |
| @@ -96,6 +112,14 @@ public: | |||
| 96 | GreaterThan = 1, | 112 | GreaterThan = 1, |
| 97 | }; | 113 | }; |
| 98 | 114 | ||
| 115 | enum class ConditionMode : u32 { | ||
| 116 | Never = 0, | ||
| 117 | Always = 1, | ||
| 118 | ResNonZero = 2, | ||
| 119 | Equal = 3, | ||
| 120 | NotEqual = 4, | ||
| 121 | }; | ||
| 122 | |||
| 99 | enum class ShaderProgram : u32 { | 123 | enum class ShaderProgram : u32 { |
| 100 | VertexA = 0, | 124 | VertexA = 0, |
| 101 | VertexB = 1, | 125 | VertexB = 1, |
| @@ -814,7 +838,18 @@ public: | |||
| 814 | BitField<4, 1, u32> alpha_to_one; | 838 | BitField<4, 1, u32> alpha_to_one; |
| 815 | } multisample_control; | 839 | } multisample_control; |
| 816 | 840 | ||
| 817 | INSERT_PADDING_WORDS(0x7); | 841 | INSERT_PADDING_WORDS(0x4); |
| 842 | |||
| 843 | struct { | ||
| 844 | u32 address_high; | ||
| 845 | u32 address_low; | ||
| 846 | ConditionMode mode; | ||
| 847 | |||
| 848 | GPUVAddr Address() const { | ||
| 849 | return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | | ||
| 850 | address_low); | ||
| 851 | } | ||
| 852 | } condition; | ||
| 818 | 853 | ||
| 819 | struct { | 854 | struct { |
| 820 | u32 tsc_address_high; | 855 | u32 tsc_address_high; |
| @@ -1123,23 +1158,77 @@ public: | |||
| 1123 | 1158 | ||
| 1124 | State state{}; | 1159 | State state{}; |
| 1125 | 1160 | ||
| 1126 | struct DirtyFlags { | 1161 | struct DirtyRegs { |
| 1127 | std::bitset<8> color_buffer{0xFF}; | 1162 | static constexpr std::size_t NUM_REGS = 256; |
| 1128 | std::bitset<32> vertex_array{0xFFFFFFFF}; | 1163 | union { |
| 1164 | struct { | ||
| 1165 | bool null_dirty; | ||
| 1166 | |||
| 1167 | // Vertex Attributes | ||
| 1168 | bool vertex_attrib_format; | ||
| 1169 | |||
| 1170 | // Vertex Arrays | ||
| 1171 | std::array<bool, 32> vertex_array; | ||
| 1172 | |||
| 1173 | bool vertex_array_buffers; | ||
| 1174 | |||
| 1175 | // Vertex Instances | ||
| 1176 | std::array<bool, 32> vertex_instance; | ||
| 1129 | 1177 | ||
| 1130 | bool vertex_attrib_format = true; | 1178 | bool vertex_instances; |
| 1131 | bool zeta_buffer = true; | 1179 | |
| 1132 | bool shaders = true; | 1180 | // Render Targets |
| 1181 | std::array<bool, 8> render_target; | ||
| 1182 | bool depth_buffer; | ||
| 1183 | |||
| 1184 | bool render_settings; | ||
| 1185 | |||
| 1186 | // Shaders | ||
| 1187 | bool shaders; | ||
| 1188 | |||
| 1189 | // Rasterizer State | ||
| 1190 | bool viewport; | ||
| 1191 | bool clip_coefficient; | ||
| 1192 | bool cull_mode; | ||
| 1193 | bool primitive_restart; | ||
| 1194 | bool depth_test; | ||
| 1195 | bool stencil_test; | ||
| 1196 | bool blend_state; | ||
| 1197 | bool scissor_test; | ||
| 1198 | bool transform_feedback; | ||
| 1199 | bool color_mask; | ||
| 1200 | bool polygon_offset; | ||
| 1201 | |||
| 1202 | // Complementary | ||
| 1203 | bool viewport_transform; | ||
| 1204 | bool screen_y_control; | ||
| 1205 | |||
| 1206 | bool memory_general; | ||
| 1207 | }; | ||
| 1208 | std::array<bool, NUM_REGS> regs; | ||
| 1209 | }; | ||
| 1210 | |||
| 1211 | void ResetVertexArrays() { | ||
| 1212 | vertex_array.fill(true); | ||
| 1213 | vertex_array_buffers = true; | ||
| 1214 | } | ||
| 1215 | |||
| 1216 | void ResetRenderTargets() { | ||
| 1217 | depth_buffer = true; | ||
| 1218 | render_target.fill(true); | ||
| 1219 | render_settings = true; | ||
| 1220 | } | ||
| 1133 | 1221 | ||
| 1134 | void OnMemoryWrite() { | 1222 | void OnMemoryWrite() { |
| 1135 | zeta_buffer = true; | ||
| 1136 | shaders = true; | 1223 | shaders = true; |
| 1137 | color_buffer.set(); | 1224 | memory_general = true; |
| 1138 | vertex_array.set(); | 1225 | ResetRenderTargets(); |
| 1226 | ResetVertexArrays(); | ||
| 1139 | } | 1227 | } |
| 1140 | }; | ||
| 1141 | 1228 | ||
| 1142 | DirtyFlags dirty_flags; | 1229 | } dirty{}; |
| 1230 | |||
| 1231 | std::array<u8, Regs::NUM_REGS> dirty_pointers{}; | ||
| 1143 | 1232 | ||
| 1144 | /// Reads a register value located at the input method address | 1233 | /// Reads a register value located at the input method address |
| 1145 | u32 GetRegisterValue(u32 method) const; | 1234 | u32 GetRegisterValue(u32 method) const; |
| @@ -1168,6 +1257,10 @@ public: | |||
| 1168 | return macro_memory; | 1257 | return macro_memory; |
| 1169 | } | 1258 | } |
| 1170 | 1259 | ||
| 1260 | bool ShouldExecute() const { | ||
| 1261 | return execute_on; | ||
| 1262 | } | ||
| 1263 | |||
| 1171 | private: | 1264 | private: |
| 1172 | void InitializeRegisterDefaults(); | 1265 | void InitializeRegisterDefaults(); |
| 1173 | 1266 | ||
| @@ -1178,7 +1271,7 @@ private: | |||
| 1178 | MemoryManager& memory_manager; | 1271 | MemoryManager& memory_manager; |
| 1179 | 1272 | ||
| 1180 | /// Start offsets of each macro in macro_memory | 1273 | /// Start offsets of each macro in macro_memory |
| 1181 | std::unordered_map<u32, u32> macro_offsets; | 1274 | std::array<u32, 0x80> macro_positions = {}; |
| 1182 | 1275 | ||
| 1183 | /// Memory for macro code | 1276 | /// Memory for macro code |
| 1184 | MacroMemory macro_memory; | 1277 | MacroMemory macro_memory; |
| @@ -1191,20 +1284,34 @@ private: | |||
| 1191 | /// Interpreter for the macro codes uploaded to the GPU. | 1284 | /// Interpreter for the macro codes uploaded to the GPU. |
| 1192 | MacroInterpreter macro_interpreter; | 1285 | MacroInterpreter macro_interpreter; |
| 1193 | 1286 | ||
| 1287 | static constexpr u32 null_cb_data = 0xFFFFFFFF; | ||
| 1288 | struct { | ||
| 1289 | std::array<std::array<u32, 0x4000>, 16> buffer; | ||
| 1290 | u32 current{null_cb_data}; | ||
| 1291 | u32 id{null_cb_data}; | ||
| 1292 | u32 start_pos{}; | ||
| 1293 | u32 counter{}; | ||
| 1294 | } cb_data_state; | ||
| 1295 | |||
| 1194 | Upload::State upload_state; | 1296 | Upload::State upload_state; |
| 1195 | 1297 | ||
| 1298 | bool execute_on{true}; | ||
| 1299 | |||
| 1196 | /// Retrieves information about a specific TIC entry from the TIC buffer. | 1300 | /// Retrieves information about a specific TIC entry from the TIC buffer. |
| 1197 | Texture::TICEntry GetTICEntry(u32 tic_index) const; | 1301 | Texture::TICEntry GetTICEntry(u32 tic_index) const; |
| 1198 | 1302 | ||
| 1199 | /// Retrieves information about a specific TSC entry from the TSC buffer. | 1303 | /// Retrieves information about a specific TSC entry from the TSC buffer. |
| 1200 | Texture::TSCEntry GetTSCEntry(u32 tsc_index) const; | 1304 | Texture::TSCEntry GetTSCEntry(u32 tsc_index) const; |
| 1201 | 1305 | ||
| 1306 | void InitDirtySettings(); | ||
| 1307 | |||
| 1202 | /** | 1308 | /** |
| 1203 | * Call a macro on this engine. | 1309 | * Call a macro on this engine. |
| 1204 | * @param method Method to call | 1310 | * @param method Method to call |
| 1311 | * @param num_parameters Number of arguments | ||
| 1205 | * @param parameters Arguments to the method call | 1312 | * @param parameters Arguments to the method call |
| 1206 | */ | 1313 | */ |
| 1207 | void CallMacroMethod(u32 method, std::vector<u32> parameters); | 1314 | void CallMacroMethod(u32 method, std::size_t num_parameters, const u32* parameters); |
| 1208 | 1315 | ||
| 1209 | /// Handles writes to the macro uploading register. | 1316 | /// Handles writes to the macro uploading register. |
| 1210 | void ProcessMacroUpload(u32 data); | 1317 | void ProcessMacroUpload(u32 data); |
| @@ -1218,11 +1325,16 @@ private: | |||
| 1218 | /// Handles a write to the QUERY_GET register. | 1325 | /// Handles a write to the QUERY_GET register. |
| 1219 | void ProcessQueryGet(); | 1326 | void ProcessQueryGet(); |
| 1220 | 1327 | ||
| 1328 | // Handles Conditional Rendering | ||
| 1329 | void ProcessQueryCondition(); | ||
| 1330 | |||
| 1221 | /// Handles writes to syncing register. | 1331 | /// Handles writes to syncing register. |
| 1222 | void ProcessSyncPoint(); | 1332 | void ProcessSyncPoint(); |
| 1223 | 1333 | ||
| 1224 | /// Handles a write to the CB_DATA[i] register. | 1334 | /// Handles a write to the CB_DATA[i] register. |
| 1335 | void StartCBData(u32 method); | ||
| 1225 | void ProcessCBData(u32 value); | 1336 | void ProcessCBData(u32 value); |
| 1337 | void FinishCBData(); | ||
| 1226 | 1338 | ||
| 1227 | /// Handles a write to the CB_BIND register. | 1339 | /// Handles a write to the CB_BIND register. |
| 1228 | void ProcessCBBind(Regs::ShaderStage stage); | 1340 | void ProcessCBBind(Regs::ShaderStage stage); |
| @@ -1289,6 +1401,7 @@ ASSERT_REG_POSITION(clip_distance_enabled, 0x544); | |||
| 1289 | ASSERT_REG_POSITION(point_size, 0x546); | 1401 | ASSERT_REG_POSITION(point_size, 0x546); |
| 1290 | ASSERT_REG_POSITION(zeta_enable, 0x54E); | 1402 | ASSERT_REG_POSITION(zeta_enable, 0x54E); |
| 1291 | ASSERT_REG_POSITION(multisample_control, 0x54F); | 1403 | ASSERT_REG_POSITION(multisample_control, 0x54F); |
| 1404 | ASSERT_REG_POSITION(condition, 0x554); | ||
| 1292 | ASSERT_REG_POSITION(tsc, 0x557); | 1405 | ASSERT_REG_POSITION(tsc, 0x557); |
| 1293 | ASSERT_REG_POSITION(polygon_offset_factor, 0x55b); | 1406 | ASSERT_REG_POSITION(polygon_offset_factor, 0x55b); |
| 1294 | ASSERT_REG_POSITION(tic, 0x55D); | 1407 | ASSERT_REG_POSITION(tic, 0x55D); |
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index 3a5dfef0c..ad8453c5f 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp | |||
| @@ -5,18 +5,17 @@ | |||
| 5 | #include "common/assert.h" | 5 | #include "common/assert.h" |
| 6 | #include "common/logging/log.h" | 6 | #include "common/logging/log.h" |
| 7 | #include "core/core.h" | 7 | #include "core/core.h" |
| 8 | #include "core/settings.h" | ||
| 8 | #include "video_core/engines/maxwell_3d.h" | 9 | #include "video_core/engines/maxwell_3d.h" |
| 9 | #include "video_core/engines/maxwell_dma.h" | 10 | #include "video_core/engines/maxwell_dma.h" |
| 10 | #include "video_core/memory_manager.h" | 11 | #include "video_core/memory_manager.h" |
| 11 | #include "video_core/rasterizer_interface.h" | ||
| 12 | #include "video_core/renderer_base.h" | 12 | #include "video_core/renderer_base.h" |
| 13 | #include "video_core/textures/decoders.h" | 13 | #include "video_core/textures/decoders.h" |
| 14 | 14 | ||
| 15 | namespace Tegra::Engines { | 15 | namespace Tegra::Engines { |
| 16 | 16 | ||
| 17 | MaxwellDMA::MaxwellDMA(Core::System& system, VideoCore::RasterizerInterface& rasterizer, | 17 | MaxwellDMA::MaxwellDMA(Core::System& system, MemoryManager& memory_manager) |
| 18 | MemoryManager& memory_manager) | 18 | : system{system}, memory_manager{memory_manager} {} |
| 19 | : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager} {} | ||
| 20 | 19 | ||
| 21 | void MaxwellDMA::CallMethod(const GPU::MethodCall& method_call) { | 20 | void MaxwellDMA::CallMethod(const GPU::MethodCall& method_call) { |
| 22 | ASSERT_MSG(method_call.method < Regs::NUM_REGS, | 21 | ASSERT_MSG(method_call.method < Regs::NUM_REGS, |
| @@ -38,7 +37,7 @@ void MaxwellDMA::CallMethod(const GPU::MethodCall& method_call) { | |||
| 38 | } | 37 | } |
| 39 | 38 | ||
| 40 | void MaxwellDMA::HandleCopy() { | 39 | void MaxwellDMA::HandleCopy() { |
| 41 | LOG_WARNING(HW_GPU, "Requested a DMA copy"); | 40 | LOG_TRACE(HW_GPU, "Requested a DMA copy"); |
| 42 | 41 | ||
| 43 | const GPUVAddr source = regs.src_address.Address(); | 42 | const GPUVAddr source = regs.src_address.Address(); |
| 44 | const GPUVAddr dest = regs.dst_address.Address(); | 43 | const GPUVAddr dest = regs.dst_address.Address(); |
| @@ -58,7 +57,7 @@ void MaxwellDMA::HandleCopy() { | |||
| 58 | } | 57 | } |
| 59 | 58 | ||
| 60 | // All copies here update the main memory, so mark all rasterizer states as invalid. | 59 | // All copies here update the main memory, so mark all rasterizer states as invalid. |
| 61 | system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); | 60 | system.GPU().Maxwell3D().dirty.OnMemoryWrite(); |
| 62 | 61 | ||
| 63 | if (regs.exec.is_dst_linear && regs.exec.is_src_linear) { | 62 | if (regs.exec.is_dst_linear && regs.exec.is_src_linear) { |
| 64 | // When the enable_2d bit is disabled, the copy is performed as if we were copying a 1D | 63 | // When the enable_2d bit is disabled, the copy is performed as if we were copying a 1D |
| @@ -84,13 +83,17 @@ void MaxwellDMA::HandleCopy() { | |||
| 84 | ASSERT(regs.exec.enable_2d == 1); | 83 | ASSERT(regs.exec.enable_2d == 1); |
| 85 | 84 | ||
| 86 | if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) { | 85 | if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) { |
| 87 | ASSERT(regs.src_params.size_z == 1); | 86 | ASSERT(regs.src_params.BlockDepth() == 0); |
| 88 | // If the input is tiled and the output is linear, deswizzle the input and copy it over. | 87 | // If the input is tiled and the output is linear, deswizzle the input and copy it over. |
| 89 | const u32 src_bytes_per_pixel = regs.src_pitch / regs.src_params.size_x; | 88 | const u32 bytes_per_pixel = regs.dst_pitch / regs.x_count; |
| 90 | const std::size_t src_size = Texture::CalculateSize( | 89 | const std::size_t src_size = Texture::CalculateSize( |
| 91 | true, src_bytes_per_pixel, regs.src_params.size_x, regs.src_params.size_y, | 90 | true, bytes_per_pixel, regs.src_params.size_x, regs.src_params.size_y, |
| 92 | regs.src_params.size_z, regs.src_params.BlockHeight(), regs.src_params.BlockDepth()); | 91 | regs.src_params.size_z, regs.src_params.BlockHeight(), regs.src_params.BlockDepth()); |
| 93 | 92 | ||
| 93 | const std::size_t src_layer_size = Texture::CalculateSize( | ||
| 94 | true, bytes_per_pixel, regs.src_params.size_x, regs.src_params.size_y, 1, | ||
| 95 | regs.src_params.BlockHeight(), regs.src_params.BlockDepth()); | ||
| 96 | |||
| 94 | const std::size_t dst_size = regs.dst_pitch * regs.y_count; | 97 | const std::size_t dst_size = regs.dst_pitch * regs.y_count; |
| 95 | 98 | ||
| 96 | if (read_buffer.size() < src_size) { | 99 | if (read_buffer.size() < src_size) { |
| @@ -104,23 +107,23 @@ void MaxwellDMA::HandleCopy() { | |||
| 104 | memory_manager.ReadBlock(source, read_buffer.data(), src_size); | 107 | memory_manager.ReadBlock(source, read_buffer.data(), src_size); |
| 105 | memory_manager.ReadBlock(dest, write_buffer.data(), dst_size); | 108 | memory_manager.ReadBlock(dest, write_buffer.data(), dst_size); |
| 106 | 109 | ||
| 107 | Texture::UnswizzleSubrect(regs.x_count, regs.y_count, regs.dst_pitch, | 110 | Texture::UnswizzleSubrect( |
| 108 | regs.src_params.size_x, src_bytes_per_pixel, read_buffer.data(), | 111 | regs.x_count, regs.y_count, regs.dst_pitch, regs.src_params.size_x, bytes_per_pixel, |
| 109 | write_buffer.data(), regs.src_params.BlockHeight(), | 112 | read_buffer.data() + src_layer_size * regs.src_params.pos_z, write_buffer.data(), |
| 110 | regs.src_params.pos_x, regs.src_params.pos_y); | 113 | regs.src_params.BlockHeight(), regs.src_params.pos_x, regs.src_params.pos_y); |
| 111 | 114 | ||
| 112 | memory_manager.WriteBlock(dest, write_buffer.data(), dst_size); | 115 | memory_manager.WriteBlock(dest, write_buffer.data(), dst_size); |
| 113 | } else { | 116 | } else { |
| 114 | ASSERT(regs.dst_params.BlockDepth() == 1); | 117 | ASSERT(regs.dst_params.BlockDepth() == 0); |
| 115 | 118 | ||
| 116 | const u32 src_bytes_per_pixel = regs.src_pitch / regs.x_count; | 119 | const u32 bytes_per_pixel = regs.src_pitch / regs.x_count; |
| 117 | 120 | ||
| 118 | const std::size_t dst_size = Texture::CalculateSize( | 121 | const std::size_t dst_size = Texture::CalculateSize( |
| 119 | true, src_bytes_per_pixel, regs.dst_params.size_x, regs.dst_params.size_y, | 122 | true, bytes_per_pixel, regs.dst_params.size_x, regs.dst_params.size_y, |
| 120 | regs.dst_params.size_z, regs.dst_params.BlockHeight(), regs.dst_params.BlockDepth()); | 123 | regs.dst_params.size_z, regs.dst_params.BlockHeight(), regs.dst_params.BlockDepth()); |
| 121 | 124 | ||
| 122 | const std::size_t dst_layer_size = Texture::CalculateSize( | 125 | const std::size_t dst_layer_size = Texture::CalculateSize( |
| 123 | true, src_bytes_per_pixel, regs.dst_params.size_x, regs.dst_params.size_y, 1, | 126 | true, bytes_per_pixel, regs.dst_params.size_x, regs.dst_params.size_y, 1, |
| 124 | regs.dst_params.BlockHeight(), regs.dst_params.BlockDepth()); | 127 | regs.dst_params.BlockHeight(), regs.dst_params.BlockDepth()); |
| 125 | 128 | ||
| 126 | const std::size_t src_size = regs.src_pitch * regs.y_count; | 129 | const std::size_t src_size = regs.src_pitch * regs.y_count; |
| @@ -133,14 +136,19 @@ void MaxwellDMA::HandleCopy() { | |||
| 133 | write_buffer.resize(dst_size); | 136 | write_buffer.resize(dst_size); |
| 134 | } | 137 | } |
| 135 | 138 | ||
| 136 | memory_manager.ReadBlock(source, read_buffer.data(), src_size); | 139 | if (Settings::values.use_accurate_gpu_emulation) { |
| 137 | memory_manager.ReadBlock(dest, write_buffer.data(), dst_size); | 140 | memory_manager.ReadBlock(source, read_buffer.data(), src_size); |
| 141 | memory_manager.ReadBlock(dest, write_buffer.data(), dst_size); | ||
| 142 | } else { | ||
| 143 | memory_manager.ReadBlockUnsafe(source, read_buffer.data(), src_size); | ||
| 144 | memory_manager.ReadBlockUnsafe(dest, write_buffer.data(), dst_size); | ||
| 145 | } | ||
| 138 | 146 | ||
| 139 | // If the input is linear and the output is tiled, swizzle the input and copy it over. | 147 | // If the input is linear and the output is tiled, swizzle the input and copy it over. |
| 140 | Texture::SwizzleSubrect(regs.x_count, regs.y_count, regs.src_pitch, regs.dst_params.size_x, | 148 | Texture::SwizzleSubrect( |
| 141 | src_bytes_per_pixel, | 149 | regs.x_count, regs.y_count, regs.src_pitch, regs.dst_params.size_x, bytes_per_pixel, |
| 142 | write_buffer.data() + dst_layer_size * regs.dst_params.pos_z, | 150 | write_buffer.data() + dst_layer_size * regs.dst_params.pos_z, read_buffer.data(), |
| 143 | read_buffer.data(), regs.dst_params.BlockHeight()); | 151 | regs.dst_params.BlockHeight(), regs.dst_params.pos_x, regs.dst_params.pos_y); |
| 144 | 152 | ||
| 145 | memory_manager.WriteBlock(dest, write_buffer.data(), dst_size); | 153 | memory_manager.WriteBlock(dest, write_buffer.data(), dst_size); |
| 146 | } | 154 | } |
diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h index e5942f671..93808a9bb 100644 --- a/src/video_core/engines/maxwell_dma.h +++ b/src/video_core/engines/maxwell_dma.h | |||
| @@ -20,10 +20,6 @@ namespace Tegra { | |||
| 20 | class MemoryManager; | 20 | class MemoryManager; |
| 21 | } | 21 | } |
| 22 | 22 | ||
| 23 | namespace VideoCore { | ||
| 24 | class RasterizerInterface; | ||
| 25 | } | ||
| 26 | |||
| 27 | namespace Tegra::Engines { | 23 | namespace Tegra::Engines { |
| 28 | 24 | ||
| 29 | /** | 25 | /** |
| @@ -33,8 +29,7 @@ namespace Tegra::Engines { | |||
| 33 | 29 | ||
| 34 | class MaxwellDMA final { | 30 | class MaxwellDMA final { |
| 35 | public: | 31 | public: |
| 36 | explicit MaxwellDMA(Core::System& system, VideoCore::RasterizerInterface& rasterizer, | 32 | explicit MaxwellDMA(Core::System& system, MemoryManager& memory_manager); |
| 37 | MemoryManager& memory_manager); | ||
| 38 | ~MaxwellDMA() = default; | 33 | ~MaxwellDMA() = default; |
| 39 | 34 | ||
| 40 | /// Write the value to the register identified by method. | 35 | /// Write the value to the register identified by method. |
| @@ -59,11 +54,11 @@ public: | |||
| 59 | }; | 54 | }; |
| 60 | 55 | ||
| 61 | u32 BlockHeight() const { | 56 | u32 BlockHeight() const { |
| 62 | return 1 << block_height; | 57 | return block_height.Value(); |
| 63 | } | 58 | } |
| 64 | 59 | ||
| 65 | u32 BlockDepth() const { | 60 | u32 BlockDepth() const { |
| 66 | return 1 << block_depth; | 61 | return block_depth.Value(); |
| 67 | } | 62 | } |
| 68 | }; | 63 | }; |
| 69 | 64 | ||
| @@ -180,8 +175,6 @@ public: | |||
| 180 | private: | 175 | private: |
| 181 | Core::System& system; | 176 | Core::System& system; |
| 182 | 177 | ||
| 183 | VideoCore::RasterizerInterface& rasterizer; | ||
| 184 | |||
| 185 | MemoryManager& memory_manager; | 178 | MemoryManager& memory_manager; |
| 186 | 179 | ||
| 187 | std::vector<u8> read_buffer; | 180 | std::vector<u8> read_buffer; |
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index ffb3ec3e0..052e6d24e 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h | |||
| @@ -4,6 +4,7 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <array> | ||
| 7 | #include <bitset> | 8 | #include <bitset> |
| 8 | #include <optional> | 9 | #include <optional> |
| 9 | #include <tuple> | 10 | #include <tuple> |
| @@ -77,7 +78,7 @@ union Attribute { | |||
| 77 | constexpr explicit Attribute(u64 value) : value(value) {} | 78 | constexpr explicit Attribute(u64 value) : value(value) {} |
| 78 | 79 | ||
| 79 | enum class Index : u64 { | 80 | enum class Index : u64 { |
| 80 | PointSize = 6, | 81 | LayerViewportPointSize = 6, |
| 81 | Position = 7, | 82 | Position = 7, |
| 82 | Attribute_0 = 8, | 83 | Attribute_0 = 8, |
| 83 | Attribute_31 = 39, | 84 | Attribute_31 = 39, |
| @@ -126,6 +127,15 @@ union Sampler { | |||
| 126 | u64 value{}; | 127 | u64 value{}; |
| 127 | }; | 128 | }; |
| 128 | 129 | ||
| 130 | union Image { | ||
| 131 | Image() = default; | ||
| 132 | |||
| 133 | constexpr explicit Image(u64 value) : value{value} {} | ||
| 134 | |||
| 135 | BitField<36, 13, u64> index; | ||
| 136 | u64 value; | ||
| 137 | }; | ||
| 138 | |||
| 129 | } // namespace Tegra::Shader | 139 | } // namespace Tegra::Shader |
| 130 | 140 | ||
| 131 | namespace std { | 141 | namespace std { |
| @@ -344,6 +354,26 @@ enum class TextureMiscMode : u64 { | |||
| 344 | PTP, | 354 | PTP, |
| 345 | }; | 355 | }; |
| 346 | 356 | ||
| 357 | enum class SurfaceDataMode : u64 { | ||
| 358 | P = 0, | ||
| 359 | D_BA = 1, | ||
| 360 | }; | ||
| 361 | |||
| 362 | enum class OutOfBoundsStore : u64 { | ||
| 363 | Ignore = 0, | ||
| 364 | Clamp = 1, | ||
| 365 | Trap = 2, | ||
| 366 | }; | ||
| 367 | |||
| 368 | enum class ImageType : u64 { | ||
| 369 | Texture1D = 0, | ||
| 370 | TextureBuffer = 1, | ||
| 371 | Texture1DArray = 2, | ||
| 372 | Texture2D = 3, | ||
| 373 | Texture2DArray = 4, | ||
| 374 | Texture3D = 5, | ||
| 375 | }; | ||
| 376 | |||
| 347 | enum class IsberdMode : u64 { | 377 | enum class IsberdMode : u64 { |
| 348 | None = 0, | 378 | None = 0, |
| 349 | Patch = 1, | 379 | Patch = 1, |
| @@ -398,7 +428,7 @@ enum class LmemLoadCacheManagement : u64 { | |||
| 398 | CV = 3, | 428 | CV = 3, |
| 399 | }; | 429 | }; |
| 400 | 430 | ||
| 401 | enum class LmemStoreCacheManagement : u64 { | 431 | enum class StoreCacheManagement : u64 { |
| 402 | Default = 0, | 432 | Default = 0, |
| 403 | CG = 1, | 433 | CG = 1, |
| 404 | CS = 2, | 434 | CS = 2, |
| @@ -508,6 +538,34 @@ enum class PhysicalAttributeDirection : u64 { | |||
| 508 | Output = 1, | 538 | Output = 1, |
| 509 | }; | 539 | }; |
| 510 | 540 | ||
| 541 | enum class VoteOperation : u64 { | ||
| 542 | All = 0, // allThreadsNV | ||
| 543 | Any = 1, // anyThreadNV | ||
| 544 | Eq = 2, // allThreadsEqualNV | ||
| 545 | }; | ||
| 546 | |||
| 547 | enum class ImageAtomicSize : u64 { | ||
| 548 | U32 = 0, | ||
| 549 | S32 = 1, | ||
| 550 | U64 = 2, | ||
| 551 | F32 = 3, | ||
| 552 | S64 = 5, | ||
| 553 | SD32 = 6, | ||
| 554 | SD64 = 7, | ||
| 555 | }; | ||
| 556 | |||
| 557 | enum class ImageAtomicOperation : u64 { | ||
| 558 | Add = 0, | ||
| 559 | Min = 1, | ||
| 560 | Max = 2, | ||
| 561 | Inc = 3, | ||
| 562 | Dec = 4, | ||
| 563 | And = 5, | ||
| 564 | Or = 6, | ||
| 565 | Xor = 7, | ||
| 566 | Exch = 8, | ||
| 567 | }; | ||
| 568 | |||
| 511 | union Instruction { | 569 | union Instruction { |
| 512 | Instruction& operator=(const Instruction& instr) { | 570 | Instruction& operator=(const Instruction& instr) { |
| 513 | value = instr.value; | 571 | value = instr.value; |
| @@ -530,6 +588,18 @@ union Instruction { | |||
| 530 | BitField<48, 16, u64> opcode; | 588 | BitField<48, 16, u64> opcode; |
| 531 | 589 | ||
| 532 | union { | 590 | union { |
| 591 | BitField<8, 5, ConditionCode> cc; | ||
| 592 | BitField<13, 1, u64> trigger; | ||
| 593 | } nop; | ||
| 594 | |||
| 595 | union { | ||
| 596 | BitField<48, 2, VoteOperation> operation; | ||
| 597 | BitField<45, 3, u64> dest_pred; | ||
| 598 | BitField<39, 3, u64> value; | ||
| 599 | BitField<42, 1, u64> negate_value; | ||
| 600 | } vote; | ||
| 601 | |||
| 602 | union { | ||
| 533 | BitField<8, 8, Register> gpr; | 603 | BitField<8, 8, Register> gpr; |
| 534 | BitField<20, 24, s64> offset; | 604 | BitField<20, 24, s64> offset; |
| 535 | } gmem; | 605 | } gmem; |
| @@ -627,6 +697,10 @@ union Instruction { | |||
| 627 | } shift; | 697 | } shift; |
| 628 | 698 | ||
| 629 | union { | 699 | union { |
| 700 | BitField<39, 1, u64> wrap; | ||
| 701 | } shr; | ||
| 702 | |||
| 703 | union { | ||
| 630 | BitField<39, 5, u64> shift_amount; | 704 | BitField<39, 5, u64> shift_amount; |
| 631 | BitField<48, 1, u64> negate_b; | 705 | BitField<48, 1, u64> negate_b; |
| 632 | BitField<49, 1, u64> negate_a; | 706 | BitField<49, 1, u64> negate_a; |
| @@ -811,7 +885,7 @@ union Instruction { | |||
| 811 | } ld_l; | 885 | } ld_l; |
| 812 | 886 | ||
| 813 | union { | 887 | union { |
| 814 | BitField<44, 2, LmemStoreCacheManagement> cache_management; | 888 | BitField<44, 2, StoreCacheManagement> cache_management; |
| 815 | } st_l; | 889 | } st_l; |
| 816 | 890 | ||
| 817 | union { | 891 | union { |
| @@ -838,6 +912,7 @@ union Instruction { | |||
| 838 | union { | 912 | union { |
| 839 | BitField<0, 3, u64> pred0; | 913 | BitField<0, 3, u64> pred0; |
| 840 | BitField<3, 3, u64> pred3; | 914 | BitField<3, 3, u64> pred3; |
| 915 | BitField<6, 1, u64> neg_b; | ||
| 841 | BitField<7, 1, u64> abs_a; | 916 | BitField<7, 1, u64> abs_a; |
| 842 | BitField<39, 3, u64> pred39; | 917 | BitField<39, 3, u64> pred39; |
| 843 | BitField<42, 1, u64> neg_pred; | 918 | BitField<42, 1, u64> neg_pred; |
| @@ -901,8 +976,6 @@ union Instruction { | |||
| 901 | } csetp; | 976 | } csetp; |
| 902 | 977 | ||
| 903 | union { | 978 | union { |
| 904 | BitField<35, 4, PredCondition> cond; | ||
| 905 | BitField<49, 1, u64> h_and; | ||
| 906 | BitField<6, 1, u64> ftz; | 979 | BitField<6, 1, u64> ftz; |
| 907 | BitField<45, 2, PredOperation> op; | 980 | BitField<45, 2, PredOperation> op; |
| 908 | BitField<3, 3, u64> pred3; | 981 | BitField<3, 3, u64> pred3; |
| @@ -910,9 +983,21 @@ union Instruction { | |||
| 910 | BitField<43, 1, u64> negate_a; | 983 | BitField<43, 1, u64> negate_a; |
| 911 | BitField<44, 1, u64> abs_a; | 984 | BitField<44, 1, u64> abs_a; |
| 912 | BitField<47, 2, HalfType> type_a; | 985 | BitField<47, 2, HalfType> type_a; |
| 913 | BitField<31, 1, u64> negate_b; | 986 | union { |
| 914 | BitField<30, 1, u64> abs_b; | 987 | BitField<35, 4, PredCondition> cond; |
| 915 | BitField<28, 2, HalfType> type_b; | 988 | BitField<49, 1, u64> h_and; |
| 989 | BitField<31, 1, u64> negate_b; | ||
| 990 | BitField<30, 1, u64> abs_b; | ||
| 991 | BitField<28, 2, HalfType> type_b; | ||
| 992 | } reg; | ||
| 993 | union { | ||
| 994 | BitField<56, 1, u64> negate_b; | ||
| 995 | BitField<54, 1, u64> abs_b; | ||
| 996 | } cbuf; | ||
| 997 | union { | ||
| 998 | BitField<49, 4, PredCondition> cond; | ||
| 999 | BitField<53, 1, u64> h_and; | ||
| 1000 | } cbuf_and_imm; | ||
| 916 | BitField<42, 1, u64> neg_pred; | 1001 | BitField<42, 1, u64> neg_pred; |
| 917 | BitField<39, 3, u64> pred39; | 1002 | BitField<39, 3, u64> pred39; |
| 918 | } hsetp2; | 1003 | } hsetp2; |
| @@ -961,7 +1046,6 @@ union Instruction { | |||
| 961 | } iset; | 1046 | } iset; |
| 962 | 1047 | ||
| 963 | union { | 1048 | union { |
| 964 | BitField<41, 2, u64> selector; // i2i and i2f only | ||
| 965 | BitField<45, 1, u64> negate_a; | 1049 | BitField<45, 1, u64> negate_a; |
| 966 | BitField<49, 1, u64> abs_a; | 1050 | BitField<49, 1, u64> abs_a; |
| 967 | BitField<10, 2, Register::Size> src_size; | 1051 | BitField<10, 2, Register::Size> src_size; |
| @@ -978,8 +1062,6 @@ union Instruction { | |||
| 978 | } f2i; | 1062 | } f2i; |
| 979 | 1063 | ||
| 980 | union { | 1064 | union { |
| 981 | BitField<8, 2, Register::Size> src_size; | ||
| 982 | BitField<10, 2, Register::Size> dst_size; | ||
| 983 | BitField<39, 4, u64> rounding; | 1065 | BitField<39, 4, u64> rounding; |
| 984 | // H0, H1 extract for F16 missing | 1066 | // H0, H1 extract for F16 missing |
| 985 | BitField<41, 1, u64> selector; // Guessed as some games set it, TODO: reverse this value | 1067 | BitField<41, 1, u64> selector; // Guessed as some games set it, TODO: reverse this value |
| @@ -989,6 +1071,13 @@ union Instruction { | |||
| 989 | } | 1071 | } |
| 990 | } f2f; | 1072 | } f2f; |
| 991 | 1073 | ||
| 1074 | union { | ||
| 1075 | BitField<41, 2, u64> selector; | ||
| 1076 | } int_src; | ||
| 1077 | |||
| 1078 | union { | ||
| 1079 | BitField<41, 1, u64> selector; | ||
| 1080 | } float_src; | ||
| 992 | } conversion; | 1081 | } conversion; |
| 993 | 1082 | ||
| 994 | union { | 1083 | union { |
| @@ -1232,8 +1321,23 @@ union Instruction { | |||
| 1232 | } texs; | 1321 | } texs; |
| 1233 | 1322 | ||
| 1234 | union { | 1323 | union { |
| 1324 | BitField<28, 1, u64> is_array; | ||
| 1325 | BitField<29, 2, TextureType> texture_type; | ||
| 1326 | BitField<35, 1, u64> aoffi; | ||
| 1327 | BitField<49, 1, u64> nodep_flag; | ||
| 1328 | BitField<50, 1, u64> ms; // Multisample? | ||
| 1329 | BitField<54, 1, u64> cl; | ||
| 1330 | BitField<55, 1, u64> process_mode; | ||
| 1331 | |||
| 1332 | TextureProcessMode GetTextureProcessMode() const { | ||
| 1333 | return process_mode == 0 ? TextureProcessMode::LZ : TextureProcessMode::LL; | ||
| 1334 | } | ||
| 1335 | } tld; | ||
| 1336 | |||
| 1337 | union { | ||
| 1235 | BitField<49, 1, u64> nodep_flag; | 1338 | BitField<49, 1, u64> nodep_flag; |
| 1236 | BitField<53, 4, u64> texture_info; | 1339 | BitField<53, 4, u64> texture_info; |
| 1340 | BitField<59, 1, u64> fp32_flag; | ||
| 1237 | 1341 | ||
| 1238 | TextureType GetTextureType() const { | 1342 | TextureType GetTextureType() const { |
| 1239 | // The TLDS instruction has a weird encoding for the texture type. | 1343 | // The TLDS instruction has a weird encoding for the texture type. |
| @@ -1281,6 +1385,43 @@ union Instruction { | |||
| 1281 | } tlds; | 1385 | } tlds; |
| 1282 | 1386 | ||
| 1283 | union { | 1387 | union { |
| 1388 | BitField<24, 2, StoreCacheManagement> cache_management; | ||
| 1389 | BitField<33, 3, ImageType> image_type; | ||
| 1390 | BitField<49, 2, OutOfBoundsStore> out_of_bounds_store; | ||
| 1391 | BitField<51, 1, u64> is_immediate; | ||
| 1392 | BitField<52, 1, SurfaceDataMode> mode; | ||
| 1393 | |||
| 1394 | BitField<20, 3, StoreType> store_data_layout; | ||
| 1395 | BitField<20, 4, u64> component_mask_selector; | ||
| 1396 | |||
| 1397 | bool IsComponentEnabled(std::size_t component) const { | ||
| 1398 | ASSERT(mode == SurfaceDataMode::P); | ||
| 1399 | constexpr u8 R = 0b0001; | ||
| 1400 | constexpr u8 G = 0b0010; | ||
| 1401 | constexpr u8 B = 0b0100; | ||
| 1402 | constexpr u8 A = 0b1000; | ||
| 1403 | constexpr std::array<u8, 16> mask = { | ||
| 1404 | 0, (R), (G), (R | G), (B), (R | B), | ||
| 1405 | (G | B), (R | G | B), (A), (R | A), (G | A), (R | G | A), | ||
| 1406 | (B | A), (R | B | A), (G | B | A), (R | G | B | A)}; | ||
| 1407 | return std::bitset<4>{mask.at(component_mask_selector)}.test(component); | ||
| 1408 | } | ||
| 1409 | |||
| 1410 | StoreType GetStoreDataLayout() const { | ||
| 1411 | ASSERT(mode == SurfaceDataMode::D_BA); | ||
| 1412 | return store_data_layout; | ||
| 1413 | } | ||
| 1414 | } sust; | ||
| 1415 | |||
| 1416 | union { | ||
| 1417 | BitField<28, 1, u64> is_ba; | ||
| 1418 | BitField<51, 3, ImageAtomicSize> size; | ||
| 1419 | BitField<33, 3, ImageType> image_type; | ||
| 1420 | BitField<29, 4, ImageAtomicOperation> operation; | ||
| 1421 | BitField<49, 2, OutOfBoundsStore> out_of_bounds_store; | ||
| 1422 | } suatom_d; | ||
| 1423 | |||
| 1424 | union { | ||
| 1284 | BitField<20, 24, u64> target; | 1425 | BitField<20, 24, u64> target; |
| 1285 | BitField<5, 1, u64> constant_buffer; | 1426 | BitField<5, 1, u64> constant_buffer; |
| 1286 | 1427 | ||
| @@ -1295,6 +1436,20 @@ union Instruction { | |||
| 1295 | } bra; | 1436 | } bra; |
| 1296 | 1437 | ||
| 1297 | union { | 1438 | union { |
| 1439 | BitField<20, 24, u64> target; | ||
| 1440 | BitField<5, 1, u64> constant_buffer; | ||
| 1441 | |||
| 1442 | s32 GetBranchExtend() const { | ||
| 1443 | // Sign extend the branch target offset | ||
| 1444 | u32 mask = 1U << (24 - 1); | ||
| 1445 | u32 value = static_cast<u32>(target); | ||
| 1446 | // The branch offset is relative to the next instruction and is stored in bytes, so | ||
| 1447 | // divide it by the size of an instruction and add 1 to it. | ||
| 1448 | return static_cast<s32>((value ^ mask) - mask) / sizeof(Instruction) + 1; | ||
| 1449 | } | ||
| 1450 | } brx; | ||
| 1451 | |||
| 1452 | union { | ||
| 1298 | BitField<39, 1, u64> emit; // EmitVertex | 1453 | BitField<39, 1, u64> emit; // EmitVertex |
| 1299 | BitField<40, 1, u64> cut; // EndPrimitive | 1454 | BitField<40, 1, u64> cut; // EndPrimitive |
| 1300 | } out; | 1455 | } out; |
| @@ -1371,6 +1526,7 @@ union Instruction { | |||
| 1371 | 1526 | ||
| 1372 | Attribute attribute; | 1527 | Attribute attribute; |
| 1373 | Sampler sampler; | 1528 | Sampler sampler; |
| 1529 | Image image; | ||
| 1374 | 1530 | ||
| 1375 | u64 value; | 1531 | u64 value; |
| 1376 | }; | 1532 | }; |
| @@ -1385,11 +1541,13 @@ public: | |||
| 1385 | SYNC, | 1541 | SYNC, |
| 1386 | BRK, | 1542 | BRK, |
| 1387 | DEPBAR, | 1543 | DEPBAR, |
| 1544 | VOTE, | ||
| 1388 | BFE_C, | 1545 | BFE_C, |
| 1389 | BFE_R, | 1546 | BFE_R, |
| 1390 | BFE_IMM, | 1547 | BFE_IMM, |
| 1391 | BFI_IMM_R, | 1548 | BFI_IMM_R, |
| 1392 | BRA, | 1549 | BRA, |
| 1550 | BRX, | ||
| 1393 | PBK, | 1551 | PBK, |
| 1394 | LD_A, | 1552 | LD_A, |
| 1395 | LD_L, | 1553 | LD_L, |
| @@ -1408,12 +1566,16 @@ public: | |||
| 1408 | TXQ, // Texture Query | 1566 | TXQ, // Texture Query |
| 1409 | TXQ_B, // Texture Query Bindless | 1567 | TXQ_B, // Texture Query Bindless |
| 1410 | TEXS, // Texture Fetch with scalar/non-vec4 source/destinations | 1568 | TEXS, // Texture Fetch with scalar/non-vec4 source/destinations |
| 1569 | TLD, // Texture Load | ||
| 1411 | TLDS, // Texture Load with scalar/non-vec4 source/destinations | 1570 | TLDS, // Texture Load with scalar/non-vec4 source/destinations |
| 1412 | TLD4, // Texture Load 4 | 1571 | TLD4, // Texture Load 4 |
| 1413 | TLD4S, // Texture Load 4 with scalar / non - vec4 source / destinations | 1572 | TLD4S, // Texture Load 4 with scalar / non - vec4 source / destinations |
| 1414 | TMML_B, // Texture Mip Map Level | 1573 | TMML_B, // Texture Mip Map Level |
| 1415 | TMML, // Texture Mip Map Level | 1574 | TMML, // Texture Mip Map Level |
| 1575 | SUST, // Surface Store | ||
| 1576 | SUATOM, // Surface Atomic Operation | ||
| 1416 | EXIT, | 1577 | EXIT, |
| 1578 | NOP, | ||
| 1417 | IPA, | 1579 | IPA, |
| 1418 | OUT_R, // Emit vertex/primitive | 1580 | OUT_R, // Emit vertex/primitive |
| 1419 | ISBERD, | 1581 | ISBERD, |
| @@ -1456,7 +1618,9 @@ public: | |||
| 1456 | HFMA2_RC, | 1618 | HFMA2_RC, |
| 1457 | HFMA2_RR, | 1619 | HFMA2_RR, |
| 1458 | HFMA2_IMM_R, | 1620 | HFMA2_IMM_R, |
| 1621 | HSETP2_C, | ||
| 1459 | HSETP2_R, | 1622 | HSETP2_R, |
| 1623 | HSETP2_IMM, | ||
| 1460 | HSET2_R, | 1624 | HSET2_R, |
| 1461 | POPC_C, | 1625 | POPC_C, |
| 1462 | POPC_R, | 1626 | POPC_R, |
| @@ -1541,8 +1705,10 @@ public: | |||
| 1541 | Hfma2, | 1705 | Hfma2, |
| 1542 | Flow, | 1706 | Flow, |
| 1543 | Synch, | 1707 | Synch, |
| 1708 | Warp, | ||
| 1544 | Memory, | 1709 | Memory, |
| 1545 | Texture, | 1710 | Texture, |
| 1711 | Image, | ||
| 1546 | FloatSet, | 1712 | FloatSet, |
| 1547 | FloatSetPredicate, | 1713 | FloatSetPredicate, |
| 1548 | IntegerSet, | 1714 | IntegerSet, |
| @@ -1661,10 +1827,12 @@ private: | |||
| 1661 | INST("111000101001----", Id::SSY, Type::Flow, "SSY"), | 1827 | INST("111000101001----", Id::SSY, Type::Flow, "SSY"), |
| 1662 | INST("111000101010----", Id::PBK, Type::Flow, "PBK"), | 1828 | INST("111000101010----", Id::PBK, Type::Flow, "PBK"), |
| 1663 | INST("111000100100----", Id::BRA, Type::Flow, "BRA"), | 1829 | INST("111000100100----", Id::BRA, Type::Flow, "BRA"), |
| 1830 | INST("111000100101----", Id::BRX, Type::Flow, "BRX"), | ||
| 1664 | INST("1111000011111---", Id::SYNC, Type::Flow, "SYNC"), | 1831 | INST("1111000011111---", Id::SYNC, Type::Flow, "SYNC"), |
| 1665 | INST("111000110100---", Id::BRK, Type::Flow, "BRK"), | 1832 | INST("111000110100---", Id::BRK, Type::Flow, "BRK"), |
| 1666 | INST("111000110000----", Id::EXIT, Type::Flow, "EXIT"), | 1833 | INST("111000110000----", Id::EXIT, Type::Flow, "EXIT"), |
| 1667 | INST("1111000011110---", Id::DEPBAR, Type::Synch, "DEPBAR"), | 1834 | INST("1111000011110---", Id::DEPBAR, Type::Synch, "DEPBAR"), |
| 1835 | INST("0101000011011---", Id::VOTE, Type::Warp, "VOTE"), | ||
| 1668 | INST("1110111111011---", Id::LD_A, Type::Memory, "LD_A"), | 1836 | INST("1110111111011---", Id::LD_A, Type::Memory, "LD_A"), |
| 1669 | INST("1110111101001---", Id::LD_S, Type::Memory, "LD_S"), | 1837 | INST("1110111101001---", Id::LD_S, Type::Memory, "LD_S"), |
| 1670 | INST("1110111101000---", Id::LD_L, Type::Memory, "LD_L"), | 1838 | INST("1110111101000---", Id::LD_L, Type::Memory, "LD_L"), |
| @@ -1682,11 +1850,15 @@ private: | |||
| 1682 | INST("1101111101001---", Id::TXQ, Type::Texture, "TXQ"), | 1850 | INST("1101111101001---", Id::TXQ, Type::Texture, "TXQ"), |
| 1683 | INST("1101111101010---", Id::TXQ_B, Type::Texture, "TXQ_B"), | 1851 | INST("1101111101010---", Id::TXQ_B, Type::Texture, "TXQ_B"), |
| 1684 | INST("1101-00---------", Id::TEXS, Type::Texture, "TEXS"), | 1852 | INST("1101-00---------", Id::TEXS, Type::Texture, "TEXS"), |
| 1685 | INST("1101101---------", Id::TLDS, Type::Texture, "TLDS"), | 1853 | INST("11011100--11----", Id::TLD, Type::Texture, "TLD"), |
| 1854 | INST("1101-01---------", Id::TLDS, Type::Texture, "TLDS"), | ||
| 1686 | INST("110010----111---", Id::TLD4, Type::Texture, "TLD4"), | 1855 | INST("110010----111---", Id::TLD4, Type::Texture, "TLD4"), |
| 1687 | INST("1101111100------", Id::TLD4S, Type::Texture, "TLD4S"), | 1856 | INST("1101111100------", Id::TLD4S, Type::Texture, "TLD4S"), |
| 1688 | INST("110111110110----", Id::TMML_B, Type::Texture, "TMML_B"), | 1857 | INST("110111110110----", Id::TMML_B, Type::Texture, "TMML_B"), |
| 1689 | INST("1101111101011---", Id::TMML, Type::Texture, "TMML"), | 1858 | INST("1101111101011---", Id::TMML, Type::Texture, "TMML"), |
| 1859 | INST("11101011001-----", Id::SUST, Type::Image, "SUST"), | ||
| 1860 | INST("1110101000------", Id::SUATOM, Type::Image, "SUATOM_D"), | ||
| 1861 | INST("0101000010110---", Id::NOP, Type::Trivial, "NOP"), | ||
| 1690 | INST("11100000--------", Id::IPA, Type::Trivial, "IPA"), | 1862 | INST("11100000--------", Id::IPA, Type::Trivial, "IPA"), |
| 1691 | INST("1111101111100---", Id::OUT_R, Type::Trivial, "OUT_R"), | 1863 | INST("1111101111100---", Id::OUT_R, Type::Trivial, "OUT_R"), |
| 1692 | INST("1110111111010---", Id::ISBERD, Type::Trivial, "ISBERD"), | 1864 | INST("1110111111010---", Id::ISBERD, Type::Trivial, "ISBERD"), |
| @@ -1735,7 +1907,9 @@ private: | |||
| 1735 | INST("01100---1-------", Id::HFMA2_RC, Type::Hfma2, "HFMA2_RC"), | 1907 | INST("01100---1-------", Id::HFMA2_RC, Type::Hfma2, "HFMA2_RC"), |
| 1736 | INST("0101110100000---", Id::HFMA2_RR, Type::Hfma2, "HFMA2_RR"), | 1908 | INST("0101110100000---", Id::HFMA2_RR, Type::Hfma2, "HFMA2_RR"), |
| 1737 | INST("01110---0-------", Id::HFMA2_IMM_R, Type::Hfma2, "HFMA2_R_IMM"), | 1909 | INST("01110---0-------", Id::HFMA2_IMM_R, Type::Hfma2, "HFMA2_R_IMM"), |
| 1738 | INST("0101110100100---", Id::HSETP2_R, Type::HalfSetPredicate, "HSETP_R"), | 1910 | INST("0111111-1-------", Id::HSETP2_C, Type::HalfSetPredicate, "HSETP2_C"), |
| 1911 | INST("0101110100100---", Id::HSETP2_R, Type::HalfSetPredicate, "HSETP2_R"), | ||
| 1912 | INST("0111111-0-------", Id::HSETP2_IMM, Type::HalfSetPredicate, "HSETP2_IMM"), | ||
| 1739 | INST("0101110100011---", Id::HSET2_R, Type::HalfSet, "HSET2_R"), | 1913 | INST("0101110100011---", Id::HSET2_R, Type::HalfSet, "HSET2_R"), |
| 1740 | INST("0101000010000---", Id::MUFU, Type::Arithmetic, "MUFU"), | 1914 | INST("0101000010000---", Id::MUFU, Type::Arithmetic, "MUFU"), |
| 1741 | INST("0100110010010---", Id::RRO_C, Type::Arithmetic, "RRO_C"), | 1915 | INST("0100110010010---", Id::RRO_C, Type::Arithmetic, "RRO_C"), |
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 52706505b..2c47541cb 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp | |||
| @@ -17,26 +17,15 @@ | |||
| 17 | 17 | ||
| 18 | namespace Tegra { | 18 | namespace Tegra { |
| 19 | 19 | ||
| 20 | u32 FramebufferConfig::BytesPerPixel(PixelFormat format) { | 20 | GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer, bool is_async) |
| 21 | switch (format) { | 21 | : system{system}, renderer{renderer}, is_async{is_async} { |
| 22 | case PixelFormat::ABGR8: | ||
| 23 | case PixelFormat::BGRA8: | ||
| 24 | return 4; | ||
| 25 | default: | ||
| 26 | return 4; | ||
| 27 | } | ||
| 28 | |||
| 29 | UNREACHABLE(); | ||
| 30 | } | ||
| 31 | |||
| 32 | GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer) : renderer{renderer} { | ||
| 33 | auto& rasterizer{renderer.Rasterizer()}; | 22 | auto& rasterizer{renderer.Rasterizer()}; |
| 34 | memory_manager = std::make_unique<Tegra::MemoryManager>(rasterizer); | 23 | memory_manager = std::make_unique<Tegra::MemoryManager>(system, rasterizer); |
| 35 | dma_pusher = std::make_unique<Tegra::DmaPusher>(*this); | 24 | dma_pusher = std::make_unique<Tegra::DmaPusher>(*this); |
| 36 | maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, rasterizer, *memory_manager); | 25 | maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, rasterizer, *memory_manager); |
| 37 | fermi_2d = std::make_unique<Engines::Fermi2D>(rasterizer, *memory_manager); | 26 | fermi_2d = std::make_unique<Engines::Fermi2D>(rasterizer); |
| 38 | kepler_compute = std::make_unique<Engines::KeplerCompute>(system, rasterizer, *memory_manager); | 27 | kepler_compute = std::make_unique<Engines::KeplerCompute>(system, rasterizer, *memory_manager); |
| 39 | maxwell_dma = std::make_unique<Engines::MaxwellDMA>(system, rasterizer, *memory_manager); | 28 | maxwell_dma = std::make_unique<Engines::MaxwellDMA>(system, *memory_manager); |
| 40 | kepler_memory = std::make_unique<Engines::KeplerMemory>(system, *memory_manager); | 29 | kepler_memory = std::make_unique<Engines::KeplerMemory>(system, *memory_manager); |
| 41 | } | 30 | } |
| 42 | 31 | ||
| @@ -50,6 +39,14 @@ const Engines::Maxwell3D& GPU::Maxwell3D() const { | |||
| 50 | return *maxwell_3d; | 39 | return *maxwell_3d; |
| 51 | } | 40 | } |
| 52 | 41 | ||
| 42 | Engines::KeplerCompute& GPU::KeplerCompute() { | ||
| 43 | return *kepler_compute; | ||
| 44 | } | ||
| 45 | |||
| 46 | const Engines::KeplerCompute& GPU::KeplerCompute() const { | ||
| 47 | return *kepler_compute; | ||
| 48 | } | ||
| 49 | |||
| 53 | MemoryManager& GPU::MemoryManager() { | 50 | MemoryManager& GPU::MemoryManager() { |
| 54 | return *memory_manager; | 51 | return *memory_manager; |
| 55 | } | 52 | } |
| @@ -66,6 +63,55 @@ const DmaPusher& GPU::DmaPusher() const { | |||
| 66 | return *dma_pusher; | 63 | return *dma_pusher; |
| 67 | } | 64 | } |
| 68 | 65 | ||
| 66 | void GPU::IncrementSyncPoint(const u32 syncpoint_id) { | ||
| 67 | syncpoints[syncpoint_id]++; | ||
| 68 | std::lock_guard lock{sync_mutex}; | ||
| 69 | if (!syncpt_interrupts[syncpoint_id].empty()) { | ||
| 70 | u32 value = syncpoints[syncpoint_id].load(); | ||
| 71 | auto it = syncpt_interrupts[syncpoint_id].begin(); | ||
| 72 | while (it != syncpt_interrupts[syncpoint_id].end()) { | ||
| 73 | if (value >= *it) { | ||
| 74 | TriggerCpuInterrupt(syncpoint_id, *it); | ||
| 75 | it = syncpt_interrupts[syncpoint_id].erase(it); | ||
| 76 | continue; | ||
| 77 | } | ||
| 78 | it++; | ||
| 79 | } | ||
| 80 | } | ||
| 81 | } | ||
| 82 | |||
| 83 | u32 GPU::GetSyncpointValue(const u32 syncpoint_id) const { | ||
| 84 | return syncpoints[syncpoint_id].load(); | ||
| 85 | } | ||
| 86 | |||
| 87 | void GPU::RegisterSyncptInterrupt(const u32 syncpoint_id, const u32 value) { | ||
| 88 | auto& interrupt = syncpt_interrupts[syncpoint_id]; | ||
| 89 | bool contains = std::any_of(interrupt.begin(), interrupt.end(), | ||
| 90 | [value](u32 in_value) { return in_value == value; }); | ||
| 91 | if (contains) { | ||
| 92 | return; | ||
| 93 | } | ||
| 94 | syncpt_interrupts[syncpoint_id].emplace_back(value); | ||
| 95 | } | ||
| 96 | |||
| 97 | bool GPU::CancelSyncptInterrupt(const u32 syncpoint_id, const u32 value) { | ||
| 98 | std::lock_guard lock{sync_mutex}; | ||
| 99 | auto& interrupt = syncpt_interrupts[syncpoint_id]; | ||
| 100 | const auto iter = | ||
| 101 | std::find_if(interrupt.begin(), interrupt.end(), | ||
| 102 | [value](u32 interrupt_value) { return value == interrupt_value; }); | ||
| 103 | |||
| 104 | if (iter == interrupt.end()) { | ||
| 105 | return false; | ||
| 106 | } | ||
| 107 | interrupt.erase(iter); | ||
| 108 | return true; | ||
| 109 | } | ||
| 110 | |||
| 111 | void GPU::FlushCommands() { | ||
| 112 | renderer.Rasterizer().FlushCommands(); | ||
| 113 | } | ||
| 114 | |||
| 69 | u32 RenderTargetBytesPerPixel(RenderTargetFormat format) { | 115 | u32 RenderTargetBytesPerPixel(RenderTargetFormat format) { |
| 70 | ASSERT(format != RenderTargetFormat::NONE); | 116 | ASSERT(format != RenderTargetFormat::NONE); |
| 71 | 117 | ||
| @@ -143,12 +189,12 @@ enum class BufferMethods { | |||
| 143 | NotifyIntr = 0x8, | 189 | NotifyIntr = 0x8, |
| 144 | WrcacheFlush = 0x9, | 190 | WrcacheFlush = 0x9, |
| 145 | Unk28 = 0xA, | 191 | Unk28 = 0xA, |
| 146 | Unk2c = 0xB, | 192 | UnkCacheFlush = 0xB, |
| 147 | RefCnt = 0x14, | 193 | RefCnt = 0x14, |
| 148 | SemaphoreAcquire = 0x1A, | 194 | SemaphoreAcquire = 0x1A, |
| 149 | SemaphoreRelease = 0x1B, | 195 | SemaphoreRelease = 0x1B, |
| 150 | Unk70 = 0x1C, | 196 | FenceValue = 0x1C, |
| 151 | Unk74 = 0x1D, | 197 | FenceAction = 0x1D, |
| 152 | Unk78 = 0x1E, | 198 | Unk78 = 0x1E, |
| 153 | Unk7c = 0x1F, | 199 | Unk7c = 0x1F, |
| 154 | Yield = 0x20, | 200 | Yield = 0x20, |
| @@ -194,6 +240,10 @@ void GPU::CallPullerMethod(const MethodCall& method_call) { | |||
| 194 | case BufferMethods::SemaphoreAddressLow: | 240 | case BufferMethods::SemaphoreAddressLow: |
| 195 | case BufferMethods::SemaphoreSequence: | 241 | case BufferMethods::SemaphoreSequence: |
| 196 | case BufferMethods::RefCnt: | 242 | case BufferMethods::RefCnt: |
| 243 | case BufferMethods::UnkCacheFlush: | ||
| 244 | case BufferMethods::WrcacheFlush: | ||
| 245 | case BufferMethods::FenceValue: | ||
| 246 | case BufferMethods::FenceAction: | ||
| 197 | break; | 247 | break; |
| 198 | case BufferMethods::SemaphoreTrigger: { | 248 | case BufferMethods::SemaphoreTrigger: { |
| 199 | ProcessSemaphoreTriggerMethod(); | 249 | ProcessSemaphoreTriggerMethod(); |
| @@ -204,21 +254,11 @@ void GPU::CallPullerMethod(const MethodCall& method_call) { | |||
| 204 | LOG_ERROR(HW_GPU, "Special puller engine method NotifyIntr not implemented"); | 254 | LOG_ERROR(HW_GPU, "Special puller engine method NotifyIntr not implemented"); |
| 205 | break; | 255 | break; |
| 206 | } | 256 | } |
| 207 | case BufferMethods::WrcacheFlush: { | ||
| 208 | // TODO(Kmather73): Research and implement this method. | ||
| 209 | LOG_ERROR(HW_GPU, "Special puller engine method WrcacheFlush not implemented"); | ||
| 210 | break; | ||
| 211 | } | ||
| 212 | case BufferMethods::Unk28: { | 257 | case BufferMethods::Unk28: { |
| 213 | // TODO(Kmather73): Research and implement this method. | 258 | // TODO(Kmather73): Research and implement this method. |
| 214 | LOG_ERROR(HW_GPU, "Special puller engine method Unk28 not implemented"); | 259 | LOG_ERROR(HW_GPU, "Special puller engine method Unk28 not implemented"); |
| 215 | break; | 260 | break; |
| 216 | } | 261 | } |
| 217 | case BufferMethods::Unk2c: { | ||
| 218 | // TODO(Kmather73): Research and implement this method. | ||
| 219 | LOG_ERROR(HW_GPU, "Special puller engine method Unk2c not implemented"); | ||
| 220 | break; | ||
| 221 | } | ||
| 222 | case BufferMethods::SemaphoreAcquire: { | 262 | case BufferMethods::SemaphoreAcquire: { |
| 223 | ProcessSemaphoreAcquire(); | 263 | ProcessSemaphoreAcquire(); |
| 224 | break; | 264 | break; |
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index fe6628923..78bc0601a 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h | |||
| @@ -5,8 +5,12 @@ | |||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <array> | 7 | #include <array> |
| 8 | #include <atomic> | ||
| 9 | #include <list> | ||
| 8 | #include <memory> | 10 | #include <memory> |
| 11 | #include <mutex> | ||
| 9 | #include "common/common_types.h" | 12 | #include "common/common_types.h" |
| 13 | #include "core/hle/service/nvdrv/nvdata.h" | ||
| 10 | #include "core/hle/service/nvflinger/buffer_queue.h" | 14 | #include "core/hle/service/nvflinger/buffer_queue.h" |
| 11 | #include "video_core/dma_pusher.h" | 15 | #include "video_core/dma_pusher.h" |
| 12 | 16 | ||
| @@ -15,6 +19,10 @@ inline CacheAddr ToCacheAddr(const void* host_ptr) { | |||
| 15 | return reinterpret_cast<CacheAddr>(host_ptr); | 19 | return reinterpret_cast<CacheAddr>(host_ptr); |
| 16 | } | 20 | } |
| 17 | 21 | ||
| 22 | inline u8* FromCacheAddr(CacheAddr cache_addr) { | ||
| 23 | return reinterpret_cast<u8*>(cache_addr); | ||
| 24 | } | ||
| 25 | |||
| 18 | namespace Core { | 26 | namespace Core { |
| 19 | class System; | 27 | class System; |
| 20 | } | 28 | } |
| @@ -87,14 +95,10 @@ class DebugContext; | |||
| 87 | struct FramebufferConfig { | 95 | struct FramebufferConfig { |
| 88 | enum class PixelFormat : u32 { | 96 | enum class PixelFormat : u32 { |
| 89 | ABGR8 = 1, | 97 | ABGR8 = 1, |
| 98 | RGB565 = 4, | ||
| 90 | BGRA8 = 5, | 99 | BGRA8 = 5, |
| 91 | }; | 100 | }; |
| 92 | 101 | ||
| 93 | /** | ||
| 94 | * Returns the number of bytes per pixel. | ||
| 95 | */ | ||
| 96 | static u32 BytesPerPixel(PixelFormat format); | ||
| 97 | |||
| 98 | VAddr address; | 102 | VAddr address; |
| 99 | u32 offset; | 103 | u32 offset; |
| 100 | u32 width; | 104 | u32 width; |
| @@ -127,7 +131,7 @@ class MemoryManager; | |||
| 127 | 131 | ||
| 128 | class GPU { | 132 | class GPU { |
| 129 | public: | 133 | public: |
| 130 | explicit GPU(Core::System& system, VideoCore::RendererBase& renderer); | 134 | explicit GPU(Core::System& system, VideoCore::RendererBase& renderer, bool is_async); |
| 131 | 135 | ||
| 132 | virtual ~GPU(); | 136 | virtual ~GPU(); |
| 133 | 137 | ||
| @@ -149,12 +153,20 @@ public: | |||
| 149 | /// Calls a GPU method. | 153 | /// Calls a GPU method. |
| 150 | void CallMethod(const MethodCall& method_call); | 154 | void CallMethod(const MethodCall& method_call); |
| 151 | 155 | ||
| 156 | void FlushCommands(); | ||
| 157 | |||
| 152 | /// Returns a reference to the Maxwell3D GPU engine. | 158 | /// Returns a reference to the Maxwell3D GPU engine. |
| 153 | Engines::Maxwell3D& Maxwell3D(); | 159 | Engines::Maxwell3D& Maxwell3D(); |
| 154 | 160 | ||
| 155 | /// Returns a const reference to the Maxwell3D GPU engine. | 161 | /// Returns a const reference to the Maxwell3D GPU engine. |
| 156 | const Engines::Maxwell3D& Maxwell3D() const; | 162 | const Engines::Maxwell3D& Maxwell3D() const; |
| 157 | 163 | ||
| 164 | /// Returns a reference to the KeplerCompute GPU engine. | ||
| 165 | Engines::KeplerCompute& KeplerCompute(); | ||
| 166 | |||
| 167 | /// Returns a reference to the KeplerCompute GPU engine. | ||
| 168 | const Engines::KeplerCompute& KeplerCompute() const; | ||
| 169 | |||
| 158 | /// Returns a reference to the GPU memory manager. | 170 | /// Returns a reference to the GPU memory manager. |
| 159 | Tegra::MemoryManager& MemoryManager(); | 171 | Tegra::MemoryManager& MemoryManager(); |
| 160 | 172 | ||
| @@ -164,6 +176,22 @@ public: | |||
| 164 | /// Returns a reference to the GPU DMA pusher. | 176 | /// Returns a reference to the GPU DMA pusher. |
| 165 | Tegra::DmaPusher& DmaPusher(); | 177 | Tegra::DmaPusher& DmaPusher(); |
| 166 | 178 | ||
| 179 | void IncrementSyncPoint(u32 syncpoint_id); | ||
| 180 | |||
| 181 | u32 GetSyncpointValue(u32 syncpoint_id) const; | ||
| 182 | |||
| 183 | void RegisterSyncptInterrupt(u32 syncpoint_id, u32 value); | ||
| 184 | |||
| 185 | bool CancelSyncptInterrupt(u32 syncpoint_id, u32 value); | ||
| 186 | |||
| 187 | std::unique_lock<std::mutex> LockSync() { | ||
| 188 | return std::unique_lock{sync_mutex}; | ||
| 189 | } | ||
| 190 | |||
| 191 | bool IsAsync() const { | ||
| 192 | return is_async; | ||
| 193 | } | ||
| 194 | |||
| 167 | /// Returns a const reference to the GPU DMA pusher. | 195 | /// Returns a const reference to the GPU DMA pusher. |
| 168 | const Tegra::DmaPusher& DmaPusher() const; | 196 | const Tegra::DmaPusher& DmaPusher() const; |
| 169 | 197 | ||
| @@ -194,7 +222,12 @@ public: | |||
| 194 | 222 | ||
| 195 | u32 semaphore_acquire; | 223 | u32 semaphore_acquire; |
| 196 | u32 semaphore_release; | 224 | u32 semaphore_release; |
| 197 | INSERT_PADDING_WORDS(0xE4); | 225 | u32 fence_value; |
| 226 | union { | ||
| 227 | BitField<4, 4, u32> operation; | ||
| 228 | BitField<8, 8, u32> id; | ||
| 229 | } fence_action; | ||
| 230 | INSERT_PADDING_WORDS(0xE2); | ||
| 198 | 231 | ||
| 199 | // Puller state | 232 | // Puller state |
| 200 | u32 acquire_mode; | 233 | u32 acquire_mode; |
| @@ -216,8 +249,7 @@ public: | |||
| 216 | virtual void PushGPUEntries(Tegra::CommandList&& entries) = 0; | 249 | virtual void PushGPUEntries(Tegra::CommandList&& entries) = 0; |
| 217 | 250 | ||
| 218 | /// Swap buffers (render frame) | 251 | /// Swap buffers (render frame) |
| 219 | virtual void SwapBuffers( | 252 | virtual void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) = 0; |
| 220 | std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) = 0; | ||
| 221 | 253 | ||
| 222 | /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory | 254 | /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory |
| 223 | virtual void FlushRegion(CacheAddr addr, u64 size) = 0; | 255 | virtual void FlushRegion(CacheAddr addr, u64 size) = 0; |
| @@ -228,6 +260,9 @@ public: | |||
| 228 | /// Notify rasterizer that any caches of the specified region should be flushed and invalidated | 260 | /// Notify rasterizer that any caches of the specified region should be flushed and invalidated |
| 229 | virtual void FlushAndInvalidateRegion(CacheAddr addr, u64 size) = 0; | 261 | virtual void FlushAndInvalidateRegion(CacheAddr addr, u64 size) = 0; |
| 230 | 262 | ||
| 263 | protected: | ||
| 264 | virtual void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const = 0; | ||
| 265 | |||
| 231 | private: | 266 | private: |
| 232 | void ProcessBindMethod(const MethodCall& method_call); | 267 | void ProcessBindMethod(const MethodCall& method_call); |
| 233 | void ProcessSemaphoreTriggerMethod(); | 268 | void ProcessSemaphoreTriggerMethod(); |
| @@ -245,6 +280,7 @@ private: | |||
| 245 | 280 | ||
| 246 | protected: | 281 | protected: |
| 247 | std::unique_ptr<Tegra::DmaPusher> dma_pusher; | 282 | std::unique_ptr<Tegra::DmaPusher> dma_pusher; |
| 283 | Core::System& system; | ||
| 248 | VideoCore::RendererBase& renderer; | 284 | VideoCore::RendererBase& renderer; |
| 249 | 285 | ||
| 250 | private: | 286 | private: |
| @@ -262,6 +298,14 @@ private: | |||
| 262 | std::unique_ptr<Engines::MaxwellDMA> maxwell_dma; | 298 | std::unique_ptr<Engines::MaxwellDMA> maxwell_dma; |
| 263 | /// Inline memory engine | 299 | /// Inline memory engine |
| 264 | std::unique_ptr<Engines::KeplerMemory> kepler_memory; | 300 | std::unique_ptr<Engines::KeplerMemory> kepler_memory; |
| 301 | |||
| 302 | std::array<std::atomic<u32>, Service::Nvidia::MaxSyncPoints> syncpoints{}; | ||
| 303 | |||
| 304 | std::array<std::list<u32>, Service::Nvidia::MaxSyncPoints> syncpt_interrupts; | ||
| 305 | |||
| 306 | std::mutex sync_mutex; | ||
| 307 | |||
| 308 | const bool is_async; | ||
| 265 | }; | 309 | }; |
| 266 | 310 | ||
| 267 | #define ASSERT_REG_POSITION(field_name, position) \ | 311 | #define ASSERT_REG_POSITION(field_name, position) \ |
| @@ -274,6 +318,8 @@ ASSERT_REG_POSITION(semaphore_trigger, 0x7); | |||
| 274 | ASSERT_REG_POSITION(reference_count, 0x14); | 318 | ASSERT_REG_POSITION(reference_count, 0x14); |
| 275 | ASSERT_REG_POSITION(semaphore_acquire, 0x1A); | 319 | ASSERT_REG_POSITION(semaphore_acquire, 0x1A); |
| 276 | ASSERT_REG_POSITION(semaphore_release, 0x1B); | 320 | ASSERT_REG_POSITION(semaphore_release, 0x1B); |
| 321 | ASSERT_REG_POSITION(fence_value, 0x1C); | ||
| 322 | ASSERT_REG_POSITION(fence_action, 0x1D); | ||
| 277 | 323 | ||
| 278 | ASSERT_REG_POSITION(acquire_mode, 0x100); | 324 | ASSERT_REG_POSITION(acquire_mode, 0x100); |
| 279 | ASSERT_REG_POSITION(acquire_source, 0x101); | 325 | ASSERT_REG_POSITION(acquire_source, 0x101); |
diff --git a/src/video_core/gpu_asynch.cpp b/src/video_core/gpu_asynch.cpp index d4e2553a9..f2a3a390e 100644 --- a/src/video_core/gpu_asynch.cpp +++ b/src/video_core/gpu_asynch.cpp | |||
| @@ -2,6 +2,8 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include "core/core.h" | ||
| 6 | #include "core/hardware_interrupt_manager.h" | ||
| 5 | #include "video_core/gpu_asynch.h" | 7 | #include "video_core/gpu_asynch.h" |
| 6 | #include "video_core/gpu_thread.h" | 8 | #include "video_core/gpu_thread.h" |
| 7 | #include "video_core/renderer_base.h" | 9 | #include "video_core/renderer_base.h" |
| @@ -9,7 +11,7 @@ | |||
| 9 | namespace VideoCommon { | 11 | namespace VideoCommon { |
| 10 | 12 | ||
| 11 | GPUAsynch::GPUAsynch(Core::System& system, VideoCore::RendererBase& renderer) | 13 | GPUAsynch::GPUAsynch(Core::System& system, VideoCore::RendererBase& renderer) |
| 12 | : GPU(system, renderer), gpu_thread{system} {} | 14 | : GPU(system, renderer, true), gpu_thread{system} {} |
| 13 | 15 | ||
| 14 | GPUAsynch::~GPUAsynch() = default; | 16 | GPUAsynch::~GPUAsynch() = default; |
| 15 | 17 | ||
| @@ -21,9 +23,8 @@ void GPUAsynch::PushGPUEntries(Tegra::CommandList&& entries) { | |||
| 21 | gpu_thread.SubmitList(std::move(entries)); | 23 | gpu_thread.SubmitList(std::move(entries)); |
| 22 | } | 24 | } |
| 23 | 25 | ||
| 24 | void GPUAsynch::SwapBuffers( | 26 | void GPUAsynch::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { |
| 25 | std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) { | 27 | gpu_thread.SwapBuffers(framebuffer); |
| 26 | gpu_thread.SwapBuffers(std::move(framebuffer)); | ||
| 27 | } | 28 | } |
| 28 | 29 | ||
| 29 | void GPUAsynch::FlushRegion(CacheAddr addr, u64 size) { | 30 | void GPUAsynch::FlushRegion(CacheAddr addr, u64 size) { |
| @@ -38,4 +39,9 @@ void GPUAsynch::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { | |||
| 38 | gpu_thread.FlushAndInvalidateRegion(addr, size); | 39 | gpu_thread.FlushAndInvalidateRegion(addr, size); |
| 39 | } | 40 | } |
| 40 | 41 | ||
| 42 | void GPUAsynch::TriggerCpuInterrupt(const u32 syncpoint_id, const u32 value) const { | ||
| 43 | auto& interrupt_manager = system.InterruptManager(); | ||
| 44 | interrupt_manager.GPUInterruptSyncpt(syncpoint_id, value); | ||
| 45 | } | ||
| 46 | |||
| 41 | } // namespace VideoCommon | 47 | } // namespace VideoCommon |
diff --git a/src/video_core/gpu_asynch.h b/src/video_core/gpu_asynch.h index 30be74cba..a12f9bac4 100644 --- a/src/video_core/gpu_asynch.h +++ b/src/video_core/gpu_asynch.h | |||
| @@ -14,19 +14,21 @@ class RendererBase; | |||
| 14 | namespace VideoCommon { | 14 | namespace VideoCommon { |
| 15 | 15 | ||
| 16 | /// Implementation of GPU interface that runs the GPU asynchronously | 16 | /// Implementation of GPU interface that runs the GPU asynchronously |
| 17 | class GPUAsynch : public Tegra::GPU { | 17 | class GPUAsynch final : public Tegra::GPU { |
| 18 | public: | 18 | public: |
| 19 | explicit GPUAsynch(Core::System& system, VideoCore::RendererBase& renderer); | 19 | explicit GPUAsynch(Core::System& system, VideoCore::RendererBase& renderer); |
| 20 | ~GPUAsynch() override; | 20 | ~GPUAsynch() override; |
| 21 | 21 | ||
| 22 | void Start() override; | 22 | void Start() override; |
| 23 | void PushGPUEntries(Tegra::CommandList&& entries) override; | 23 | void PushGPUEntries(Tegra::CommandList&& entries) override; |
| 24 | void SwapBuffers( | 24 | void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override; |
| 25 | std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) override; | ||
| 26 | void FlushRegion(CacheAddr addr, u64 size) override; | 25 | void FlushRegion(CacheAddr addr, u64 size) override; |
| 27 | void InvalidateRegion(CacheAddr addr, u64 size) override; | 26 | void InvalidateRegion(CacheAddr addr, u64 size) override; |
| 28 | void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; | 27 | void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; |
| 29 | 28 | ||
| 29 | protected: | ||
| 30 | void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const override; | ||
| 31 | |||
| 30 | private: | 32 | private: |
| 31 | GPUThread::ThreadManager gpu_thread; | 33 | GPUThread::ThreadManager gpu_thread; |
| 32 | }; | 34 | }; |
diff --git a/src/video_core/gpu_synch.cpp b/src/video_core/gpu_synch.cpp index 45e43b1dc..d48221077 100644 --- a/src/video_core/gpu_synch.cpp +++ b/src/video_core/gpu_synch.cpp | |||
| @@ -8,7 +8,7 @@ | |||
| 8 | namespace VideoCommon { | 8 | namespace VideoCommon { |
| 9 | 9 | ||
| 10 | GPUSynch::GPUSynch(Core::System& system, VideoCore::RendererBase& renderer) | 10 | GPUSynch::GPUSynch(Core::System& system, VideoCore::RendererBase& renderer) |
| 11 | : GPU(system, renderer) {} | 11 | : GPU(system, renderer, false) {} |
| 12 | 12 | ||
| 13 | GPUSynch::~GPUSynch() = default; | 13 | GPUSynch::~GPUSynch() = default; |
| 14 | 14 | ||
| @@ -19,9 +19,8 @@ void GPUSynch::PushGPUEntries(Tegra::CommandList&& entries) { | |||
| 19 | dma_pusher->DispatchCalls(); | 19 | dma_pusher->DispatchCalls(); |
| 20 | } | 20 | } |
| 21 | 21 | ||
| 22 | void GPUSynch::SwapBuffers( | 22 | void GPUSynch::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { |
| 23 | std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) { | 23 | renderer.SwapBuffers(framebuffer); |
| 24 | renderer.SwapBuffers(std::move(framebuffer)); | ||
| 25 | } | 24 | } |
| 26 | 25 | ||
| 27 | void GPUSynch::FlushRegion(CacheAddr addr, u64 size) { | 26 | void GPUSynch::FlushRegion(CacheAddr addr, u64 size) { |
diff --git a/src/video_core/gpu_synch.h b/src/video_core/gpu_synch.h index 3031fcf72..5eb1c461c 100644 --- a/src/video_core/gpu_synch.h +++ b/src/video_core/gpu_synch.h | |||
| @@ -13,18 +13,21 @@ class RendererBase; | |||
| 13 | namespace VideoCommon { | 13 | namespace VideoCommon { |
| 14 | 14 | ||
| 15 | /// Implementation of GPU interface that runs the GPU synchronously | 15 | /// Implementation of GPU interface that runs the GPU synchronously |
| 16 | class GPUSynch : public Tegra::GPU { | 16 | class GPUSynch final : public Tegra::GPU { |
| 17 | public: | 17 | public: |
| 18 | explicit GPUSynch(Core::System& system, VideoCore::RendererBase& renderer); | 18 | explicit GPUSynch(Core::System& system, VideoCore::RendererBase& renderer); |
| 19 | ~GPUSynch() override; | 19 | ~GPUSynch() override; |
| 20 | 20 | ||
| 21 | void Start() override; | 21 | void Start() override; |
| 22 | void PushGPUEntries(Tegra::CommandList&& entries) override; | 22 | void PushGPUEntries(Tegra::CommandList&& entries) override; |
| 23 | void SwapBuffers( | 23 | void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override; |
| 24 | std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) override; | ||
| 25 | void FlushRegion(CacheAddr addr, u64 size) override; | 24 | void FlushRegion(CacheAddr addr, u64 size) override; |
| 26 | void InvalidateRegion(CacheAddr addr, u64 size) override; | 25 | void InvalidateRegion(CacheAddr addr, u64 size) override; |
| 27 | void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; | 26 | void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; |
| 27 | |||
| 28 | protected: | ||
| 29 | void TriggerCpuInterrupt([[maybe_unused]] u32 syncpoint_id, | ||
| 30 | [[maybe_unused]] u32 value) const override {} | ||
| 28 | }; | 31 | }; |
| 29 | 32 | ||
| 30 | } // namespace VideoCommon | 33 | } // namespace VideoCommon |
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp index 3f0939ec9..5f039e4fd 100644 --- a/src/video_core/gpu_thread.cpp +++ b/src/video_core/gpu_thread.cpp | |||
| @@ -21,7 +21,8 @@ static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_p | |||
| 21 | MicroProfileOnThreadCreate("GpuThread"); | 21 | MicroProfileOnThreadCreate("GpuThread"); |
| 22 | 22 | ||
| 23 | // Wait for first GPU command before acquiring the window context | 23 | // Wait for first GPU command before acquiring the window context |
| 24 | state.WaitForCommands(); | 24 | while (state.queue.Empty()) |
| 25 | ; | ||
| 25 | 26 | ||
| 26 | // If emulation was stopped during disk shader loading, abort before trying to acquire context | 27 | // If emulation was stopped during disk shader loading, abort before trying to acquire context |
| 27 | if (!state.is_running) { | 28 | if (!state.is_running) { |
| @@ -32,14 +33,13 @@ static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_p | |||
| 32 | 33 | ||
| 33 | CommandDataContainer next; | 34 | CommandDataContainer next; |
| 34 | while (state.is_running) { | 35 | while (state.is_running) { |
| 35 | state.WaitForCommands(); | ||
| 36 | while (!state.queue.Empty()) { | 36 | while (!state.queue.Empty()) { |
| 37 | state.queue.Pop(next); | 37 | state.queue.Pop(next); |
| 38 | if (const auto submit_list = std::get_if<SubmitListCommand>(&next.data)) { | 38 | if (const auto submit_list = std::get_if<SubmitListCommand>(&next.data)) { |
| 39 | dma_pusher.Push(std::move(submit_list->entries)); | 39 | dma_pusher.Push(std::move(submit_list->entries)); |
| 40 | dma_pusher.DispatchCalls(); | 40 | dma_pusher.DispatchCalls(); |
| 41 | } else if (const auto data = std::get_if<SwapBuffersCommand>(&next.data)) { | 41 | } else if (const auto data = std::get_if<SwapBuffersCommand>(&next.data)) { |
| 42 | renderer.SwapBuffers(std::move(data->framebuffer)); | 42 | renderer.SwapBuffers(data->framebuffer ? &*data->framebuffer : nullptr); |
| 43 | } else if (const auto data = std::get_if<FlushRegionCommand>(&next.data)) { | 43 | } else if (const auto data = std::get_if<FlushRegionCommand>(&next.data)) { |
| 44 | renderer.Rasterizer().FlushRegion(data->addr, data->size); | 44 | renderer.Rasterizer().FlushRegion(data->addr, data->size); |
| 45 | } else if (const auto data = std::get_if<InvalidateRegionCommand>(&next.data)) { | 45 | } else if (const auto data = std::get_if<InvalidateRegionCommand>(&next.data)) { |
| @@ -49,8 +49,7 @@ static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_p | |||
| 49 | } else { | 49 | } else { |
| 50 | UNREACHABLE(); | 50 | UNREACHABLE(); |
| 51 | } | 51 | } |
| 52 | state.signaled_fence = next.fence; | 52 | state.signaled_fence.store(next.fence); |
| 53 | state.TrySynchronize(); | ||
| 54 | } | 53 | } |
| 55 | } | 54 | } |
| 56 | } | 55 | } |
| @@ -79,9 +78,9 @@ void ThreadManager::SubmitList(Tegra::CommandList&& entries) { | |||
| 79 | system.CoreTiming().ScheduleEvent(synchronization_ticks, synchronization_event, fence); | 78 | system.CoreTiming().ScheduleEvent(synchronization_ticks, synchronization_event, fence); |
| 80 | } | 79 | } |
| 81 | 80 | ||
| 82 | void ThreadManager::SwapBuffers( | 81 | void ThreadManager::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { |
| 83 | std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) { | 82 | PushCommand(SwapBuffersCommand(framebuffer ? *framebuffer |
| 84 | PushCommand(SwapBuffersCommand(std::move(framebuffer))); | 83 | : std::optional<const Tegra::FramebufferConfig>{})); |
| 85 | } | 84 | } |
| 86 | 85 | ||
| 87 | void ThreadManager::FlushRegion(CacheAddr addr, u64 size) { | 86 | void ThreadManager::FlushRegion(CacheAddr addr, u64 size) { |
| @@ -89,12 +88,7 @@ void ThreadManager::FlushRegion(CacheAddr addr, u64 size) { | |||
| 89 | } | 88 | } |
| 90 | 89 | ||
| 91 | void ThreadManager::InvalidateRegion(CacheAddr addr, u64 size) { | 90 | void ThreadManager::InvalidateRegion(CacheAddr addr, u64 size) { |
| 92 | if (state.queue.Empty()) { | 91 | system.Renderer().Rasterizer().InvalidateRegion(addr, size); |
| 93 | // It's quicker to invalidate a single region on the CPU if the queue is already empty | ||
| 94 | system.Renderer().Rasterizer().InvalidateRegion(addr, size); | ||
| 95 | } else { | ||
| 96 | PushCommand(InvalidateRegionCommand(addr, size)); | ||
| 97 | } | ||
| 98 | } | 92 | } |
| 99 | 93 | ||
| 100 | void ThreadManager::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { | 94 | void ThreadManager::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { |
| @@ -105,22 +99,13 @@ void ThreadManager::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { | |||
| 105 | u64 ThreadManager::PushCommand(CommandData&& command_data) { | 99 | u64 ThreadManager::PushCommand(CommandData&& command_data) { |
| 106 | const u64 fence{++state.last_fence}; | 100 | const u64 fence{++state.last_fence}; |
| 107 | state.queue.Push(CommandDataContainer(std::move(command_data), fence)); | 101 | state.queue.Push(CommandDataContainer(std::move(command_data), fence)); |
| 108 | state.SignalCommands(); | ||
| 109 | return fence; | 102 | return fence; |
| 110 | } | 103 | } |
| 111 | 104 | ||
| 112 | MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192)); | 105 | MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192)); |
| 113 | void SynchState::WaitForSynchronization(u64 fence) { | 106 | void SynchState::WaitForSynchronization(u64 fence) { |
| 114 | if (signaled_fence >= fence) { | 107 | while (signaled_fence.load() < fence) |
| 115 | return; | 108 | ; |
| 116 | } | ||
| 117 | |||
| 118 | // Wait for the GPU to be idle (all commands to be executed) | ||
| 119 | { | ||
| 120 | MICROPROFILE_SCOPE(GPU_wait); | ||
| 121 | std::unique_lock lock{synchronization_mutex}; | ||
| 122 | synchronization_condition.wait(lock, [this, fence] { return signaled_fence >= fence; }); | ||
| 123 | } | ||
| 124 | } | 109 | } |
| 125 | 110 | ||
| 126 | } // namespace VideoCommon::GPUThread | 111 | } // namespace VideoCommon::GPUThread |
diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h index 05a168a72..3ae0ec9f3 100644 --- a/src/video_core/gpu_thread.h +++ b/src/video_core/gpu_thread.h | |||
| @@ -88,41 +88,9 @@ struct CommandDataContainer { | |||
| 88 | /// Struct used to synchronize the GPU thread | 88 | /// Struct used to synchronize the GPU thread |
| 89 | struct SynchState final { | 89 | struct SynchState final { |
| 90 | std::atomic_bool is_running{true}; | 90 | std::atomic_bool is_running{true}; |
| 91 | std::atomic_int queued_frame_count{}; | ||
| 92 | std::mutex synchronization_mutex; | ||
| 93 | std::mutex commands_mutex; | ||
| 94 | std::condition_variable commands_condition; | ||
| 95 | std::condition_variable synchronization_condition; | ||
| 96 | |||
| 97 | /// Returns true if the gap in GPU commands is small enough that we can consider the CPU and GPU | ||
| 98 | /// synchronized. This is entirely empirical. | ||
| 99 | bool IsSynchronized() const { | ||
| 100 | constexpr std::size_t max_queue_gap{5}; | ||
| 101 | return queue.Size() <= max_queue_gap; | ||
| 102 | } | ||
| 103 | |||
| 104 | void TrySynchronize() { | ||
| 105 | if (IsSynchronized()) { | ||
| 106 | std::lock_guard lock{synchronization_mutex}; | ||
| 107 | synchronization_condition.notify_one(); | ||
| 108 | } | ||
| 109 | } | ||
| 110 | 91 | ||
| 111 | void WaitForSynchronization(u64 fence); | 92 | void WaitForSynchronization(u64 fence); |
| 112 | 93 | ||
| 113 | void SignalCommands() { | ||
| 114 | if (queue.Empty()) { | ||
| 115 | return; | ||
| 116 | } | ||
| 117 | |||
| 118 | commands_condition.notify_one(); | ||
| 119 | } | ||
| 120 | |||
| 121 | void WaitForCommands() { | ||
| 122 | std::unique_lock lock{commands_mutex}; | ||
| 123 | commands_condition.wait(lock, [this] { return !queue.Empty(); }); | ||
| 124 | } | ||
| 125 | |||
| 126 | using CommandQueue = Common::SPSCQueue<CommandDataContainer>; | 94 | using CommandQueue = Common::SPSCQueue<CommandDataContainer>; |
| 127 | CommandQueue queue; | 95 | CommandQueue queue; |
| 128 | u64 last_fence{}; | 96 | u64 last_fence{}; |
| @@ -142,8 +110,7 @@ public: | |||
| 142 | void SubmitList(Tegra::CommandList&& entries); | 110 | void SubmitList(Tegra::CommandList&& entries); |
| 143 | 111 | ||
| 144 | /// Swap buffers (render frame) | 112 | /// Swap buffers (render frame) |
| 145 | void SwapBuffers( | 113 | void SwapBuffers(const Tegra::FramebufferConfig* framebuffer); |
| 146 | std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer); | ||
| 147 | 114 | ||
| 148 | /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory | 115 | /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory |
| 149 | void FlushRegion(CacheAddr addr, u64 size); | 116 | void FlushRegion(CacheAddr addr, u64 size); |
diff --git a/src/video_core/macro_interpreter.cpp b/src/video_core/macro_interpreter.cpp index c766ed692..4e1cb98db 100644 --- a/src/video_core/macro_interpreter.cpp +++ b/src/video_core/macro_interpreter.cpp | |||
| @@ -4,17 +4,28 @@ | |||
| 4 | 4 | ||
| 5 | #include "common/assert.h" | 5 | #include "common/assert.h" |
| 6 | #include "common/logging/log.h" | 6 | #include "common/logging/log.h" |
| 7 | #include "common/microprofile.h" | ||
| 7 | #include "video_core/engines/maxwell_3d.h" | 8 | #include "video_core/engines/maxwell_3d.h" |
| 8 | #include "video_core/macro_interpreter.h" | 9 | #include "video_core/macro_interpreter.h" |
| 9 | 10 | ||
| 11 | MICROPROFILE_DEFINE(MacroInterp, "GPU", "Execute macro interpreter", MP_RGB(128, 128, 192)); | ||
| 12 | |||
| 10 | namespace Tegra { | 13 | namespace Tegra { |
| 11 | 14 | ||
| 12 | MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d) : maxwell3d(maxwell3d) {} | 15 | MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d) : maxwell3d(maxwell3d) {} |
| 13 | 16 | ||
| 14 | void MacroInterpreter::Execute(u32 offset, std::vector<u32> parameters) { | 17 | void MacroInterpreter::Execute(u32 offset, std::size_t num_parameters, const u32* parameters) { |
| 18 | MICROPROFILE_SCOPE(MacroInterp); | ||
| 15 | Reset(); | 19 | Reset(); |
| 20 | |||
| 16 | registers[1] = parameters[0]; | 21 | registers[1] = parameters[0]; |
| 17 | this->parameters = std::move(parameters); | 22 | |
| 23 | if (num_parameters > parameters_capacity) { | ||
| 24 | parameters_capacity = num_parameters; | ||
| 25 | this->parameters = std::make_unique<u32[]>(num_parameters); | ||
| 26 | } | ||
| 27 | std::memcpy(this->parameters.get(), parameters, num_parameters * sizeof(u32)); | ||
| 28 | this->num_parameters = num_parameters; | ||
| 18 | 29 | ||
| 19 | // Execute the code until we hit an exit condition. | 30 | // Execute the code until we hit an exit condition. |
| 20 | bool keep_executing = true; | 31 | bool keep_executing = true; |
| @@ -23,7 +34,7 @@ void MacroInterpreter::Execute(u32 offset, std::vector<u32> parameters) { | |||
| 23 | } | 34 | } |
| 24 | 35 | ||
| 25 | // Assert the the macro used all the input parameters | 36 | // Assert the the macro used all the input parameters |
| 26 | ASSERT(next_parameter_index == this->parameters.size()); | 37 | ASSERT(next_parameter_index == num_parameters); |
| 27 | } | 38 | } |
| 28 | 39 | ||
| 29 | void MacroInterpreter::Reset() { | 40 | void MacroInterpreter::Reset() { |
| @@ -31,7 +42,7 @@ void MacroInterpreter::Reset() { | |||
| 31 | pc = 0; | 42 | pc = 0; |
| 32 | delayed_pc = {}; | 43 | delayed_pc = {}; |
| 33 | method_address.raw = 0; | 44 | method_address.raw = 0; |
| 34 | parameters.clear(); | 45 | num_parameters = 0; |
| 35 | // The next parameter index starts at 1, because $r1 already has the value of the first | 46 | // The next parameter index starts at 1, because $r1 already has the value of the first |
| 36 | // parameter. | 47 | // parameter. |
| 37 | next_parameter_index = 1; | 48 | next_parameter_index = 1; |
| @@ -225,7 +236,8 @@ void MacroInterpreter::ProcessResult(ResultOperation operation, u32 reg, u32 res | |||
| 225 | } | 236 | } |
| 226 | 237 | ||
| 227 | u32 MacroInterpreter::FetchParameter() { | 238 | u32 MacroInterpreter::FetchParameter() { |
| 228 | return parameters.at(next_parameter_index++); | 239 | ASSERT(next_parameter_index < num_parameters); |
| 240 | return parameters[next_parameter_index++]; | ||
| 229 | } | 241 | } |
| 230 | 242 | ||
| 231 | u32 MacroInterpreter::GetRegister(u32 register_id) const { | 243 | u32 MacroInterpreter::GetRegister(u32 register_id) const { |
diff --git a/src/video_core/macro_interpreter.h b/src/video_core/macro_interpreter.h index cde360288..76b6a895b 100644 --- a/src/video_core/macro_interpreter.h +++ b/src/video_core/macro_interpreter.h | |||
| @@ -25,7 +25,7 @@ public: | |||
| 25 | * @param offset Offset to start execution at. | 25 | * @param offset Offset to start execution at. |
| 26 | * @param parameters The parameters of the macro. | 26 | * @param parameters The parameters of the macro. |
| 27 | */ | 27 | */ |
| 28 | void Execute(u32 offset, std::vector<u32> parameters); | 28 | void Execute(u32 offset, std::size_t num_parameters, const u32* parameters); |
| 29 | 29 | ||
| 30 | private: | 30 | private: |
| 31 | enum class Operation : u32 { | 31 | enum class Operation : u32 { |
| @@ -162,10 +162,12 @@ private: | |||
| 162 | MethodAddress method_address = {}; | 162 | MethodAddress method_address = {}; |
| 163 | 163 | ||
| 164 | /// Input parameters of the current macro. | 164 | /// Input parameters of the current macro. |
| 165 | std::vector<u32> parameters; | 165 | std::unique_ptr<u32[]> parameters; |
| 166 | std::size_t num_parameters = 0; | ||
| 167 | std::size_t parameters_capacity = 0; | ||
| 166 | /// Index of the next parameter that will be fetched by the 'parm' instruction. | 168 | /// Index of the next parameter that will be fetched by the 'parm' instruction. |
| 167 | u32 next_parameter_index = 0; | 169 | u32 next_parameter_index = 0; |
| 168 | 170 | ||
| 169 | bool carry_flag{}; | 171 | bool carry_flag = false; |
| 170 | }; | 172 | }; |
| 171 | } // namespace Tegra | 173 | } // namespace Tegra |
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index 5d8d126c1..bffae940c 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp | |||
| @@ -5,13 +5,17 @@ | |||
| 5 | #include "common/alignment.h" | 5 | #include "common/alignment.h" |
| 6 | #include "common/assert.h" | 6 | #include "common/assert.h" |
| 7 | #include "common/logging/log.h" | 7 | #include "common/logging/log.h" |
| 8 | #include "core/core.h" | ||
| 9 | #include "core/hle/kernel/process.h" | ||
| 10 | #include "core/hle/kernel/vm_manager.h" | ||
| 8 | #include "core/memory.h" | 11 | #include "core/memory.h" |
| 9 | #include "video_core/memory_manager.h" | 12 | #include "video_core/memory_manager.h" |
| 10 | #include "video_core/rasterizer_interface.h" | 13 | #include "video_core/rasterizer_interface.h" |
| 11 | 14 | ||
| 12 | namespace Tegra { | 15 | namespace Tegra { |
| 13 | 16 | ||
| 14 | MemoryManager::MemoryManager(VideoCore::RasterizerInterface& rasterizer) : rasterizer{rasterizer} { | 17 | MemoryManager::MemoryManager(Core::System& system, VideoCore::RasterizerInterface& rasterizer) |
| 18 | : rasterizer{rasterizer}, system{system} { | ||
| 15 | std::fill(page_table.pointers.begin(), page_table.pointers.end(), nullptr); | 19 | std::fill(page_table.pointers.begin(), page_table.pointers.end(), nullptr); |
| 16 | std::fill(page_table.attributes.begin(), page_table.attributes.end(), | 20 | std::fill(page_table.attributes.begin(), page_table.attributes.end(), |
| 17 | Common::PageType::Unmapped); | 21 | Common::PageType::Unmapped); |
| @@ -49,6 +53,11 @@ GPUVAddr MemoryManager::MapBufferEx(VAddr cpu_addr, u64 size) { | |||
| 49 | const GPUVAddr gpu_addr{FindFreeRegion(address_space_base, aligned_size)}; | 53 | const GPUVAddr gpu_addr{FindFreeRegion(address_space_base, aligned_size)}; |
| 50 | 54 | ||
| 51 | MapBackingMemory(gpu_addr, Memory::GetPointer(cpu_addr), aligned_size, cpu_addr); | 55 | MapBackingMemory(gpu_addr, Memory::GetPointer(cpu_addr), aligned_size, cpu_addr); |
| 56 | ASSERT(system.CurrentProcess() | ||
| 57 | ->VMManager() | ||
| 58 | .SetMemoryAttribute(cpu_addr, size, Kernel::MemoryAttribute::DeviceMapped, | ||
| 59 | Kernel::MemoryAttribute::DeviceMapped) | ||
| 60 | .IsSuccess()); | ||
| 52 | 61 | ||
| 53 | return gpu_addr; | 62 | return gpu_addr; |
| 54 | } | 63 | } |
| @@ -59,7 +68,11 @@ GPUVAddr MemoryManager::MapBufferEx(VAddr cpu_addr, GPUVAddr gpu_addr, u64 size) | |||
| 59 | const u64 aligned_size{Common::AlignUp(size, page_size)}; | 68 | const u64 aligned_size{Common::AlignUp(size, page_size)}; |
| 60 | 69 | ||
| 61 | MapBackingMemory(gpu_addr, Memory::GetPointer(cpu_addr), aligned_size, cpu_addr); | 70 | MapBackingMemory(gpu_addr, Memory::GetPointer(cpu_addr), aligned_size, cpu_addr); |
| 62 | 71 | ASSERT(system.CurrentProcess() | |
| 72 | ->VMManager() | ||
| 73 | .SetMemoryAttribute(cpu_addr, size, Kernel::MemoryAttribute::DeviceMapped, | ||
| 74 | Kernel::MemoryAttribute::DeviceMapped) | ||
| 75 | .IsSuccess()); | ||
| 63 | return gpu_addr; | 76 | return gpu_addr; |
| 64 | } | 77 | } |
| 65 | 78 | ||
| @@ -68,9 +81,16 @@ GPUVAddr MemoryManager::UnmapBuffer(GPUVAddr gpu_addr, u64 size) { | |||
| 68 | 81 | ||
| 69 | const u64 aligned_size{Common::AlignUp(size, page_size)}; | 82 | const u64 aligned_size{Common::AlignUp(size, page_size)}; |
| 70 | const CacheAddr cache_addr{ToCacheAddr(GetPointer(gpu_addr))}; | 83 | const CacheAddr cache_addr{ToCacheAddr(GetPointer(gpu_addr))}; |
| 84 | const auto cpu_addr = GpuToCpuAddress(gpu_addr); | ||
| 85 | ASSERT(cpu_addr); | ||
| 71 | 86 | ||
| 72 | rasterizer.FlushAndInvalidateRegion(cache_addr, aligned_size); | 87 | rasterizer.FlushAndInvalidateRegion(cache_addr, aligned_size); |
| 73 | UnmapRange(gpu_addr, aligned_size); | 88 | UnmapRange(gpu_addr, aligned_size); |
| 89 | ASSERT(system.CurrentProcess() | ||
| 90 | ->VMManager() | ||
| 91 | .SetMemoryAttribute(cpu_addr.value(), size, Kernel::MemoryAttribute::DeviceMapped, | ||
| 92 | Kernel::MemoryAttribute::None) | ||
| 93 | .IsSuccess()); | ||
| 74 | 94 | ||
| 75 | return gpu_addr; | 95 | return gpu_addr; |
| 76 | } | 96 | } |
| @@ -202,11 +222,12 @@ const u8* MemoryManager::GetPointer(GPUVAddr addr) const { | |||
| 202 | } | 222 | } |
| 203 | 223 | ||
| 204 | bool MemoryManager::IsBlockContinuous(const GPUVAddr start, const std::size_t size) const { | 224 | bool MemoryManager::IsBlockContinuous(const GPUVAddr start, const std::size_t size) const { |
| 205 | const GPUVAddr end = start + size; | 225 | const std::size_t inner_size = size - 1; |
| 226 | const GPUVAddr end = start + inner_size; | ||
| 206 | const auto host_ptr_start = reinterpret_cast<std::uintptr_t>(GetPointer(start)); | 227 | const auto host_ptr_start = reinterpret_cast<std::uintptr_t>(GetPointer(start)); |
| 207 | const auto host_ptr_end = reinterpret_cast<std::uintptr_t>(GetPointer(end)); | 228 | const auto host_ptr_end = reinterpret_cast<std::uintptr_t>(GetPointer(end)); |
| 208 | const auto range = static_cast<std::size_t>(host_ptr_end - host_ptr_start); | 229 | const auto range = static_cast<std::size_t>(host_ptr_end - host_ptr_start); |
| 209 | return range == size; | 230 | return range == inner_size; |
| 210 | } | 231 | } |
| 211 | 232 | ||
| 212 | void MemoryManager::ReadBlock(GPUVAddr src_addr, void* dest_buffer, const std::size_t size) const { | 233 | void MemoryManager::ReadBlock(GPUVAddr src_addr, void* dest_buffer, const std::size_t size) const { |
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h index 43a84bd52..aea010087 100644 --- a/src/video_core/memory_manager.h +++ b/src/video_core/memory_manager.h | |||
| @@ -14,6 +14,10 @@ namespace VideoCore { | |||
| 14 | class RasterizerInterface; | 14 | class RasterizerInterface; |
| 15 | } | 15 | } |
| 16 | 16 | ||
| 17 | namespace Core { | ||
| 18 | class System; | ||
| 19 | } | ||
| 20 | |||
| 17 | namespace Tegra { | 21 | namespace Tegra { |
| 18 | 22 | ||
| 19 | /** | 23 | /** |
| @@ -47,7 +51,7 @@ struct VirtualMemoryArea { | |||
| 47 | 51 | ||
| 48 | class MemoryManager final { | 52 | class MemoryManager final { |
| 49 | public: | 53 | public: |
| 50 | explicit MemoryManager(VideoCore::RasterizerInterface& rasterizer); | 54 | explicit MemoryManager(Core::System& system, VideoCore::RasterizerInterface& rasterizer); |
| 51 | ~MemoryManager(); | 55 | ~MemoryManager(); |
| 52 | 56 | ||
| 53 | GPUVAddr AllocateSpace(u64 size, u64 align); | 57 | GPUVAddr AllocateSpace(u64 size, u64 align); |
| @@ -173,6 +177,8 @@ private: | |||
| 173 | Common::PageTable page_table{page_bits}; | 177 | Common::PageTable page_table{page_bits}; |
| 174 | VMAMap vma_map; | 178 | VMAMap vma_map; |
| 175 | VideoCore::RasterizerInterface& rasterizer; | 179 | VideoCore::RasterizerInterface& rasterizer; |
| 180 | |||
| 181 | Core::System& system; | ||
| 176 | }; | 182 | }; |
| 177 | 183 | ||
| 178 | } // namespace Tegra | 184 | } // namespace Tegra |
diff --git a/src/video_core/morton.cpp b/src/video_core/morton.cpp index 3e91cbc83..084f85e67 100644 --- a/src/video_core/morton.cpp +++ b/src/video_core/morton.cpp | |||
| @@ -25,8 +25,8 @@ static void MortonCopy(u32 stride, u32 block_height, u32 height, u32 block_depth | |||
| 25 | 25 | ||
| 26 | // With the BCn formats (DXT and DXN), each 4x4 tile is swizzled instead of just individual | 26 | // With the BCn formats (DXT and DXN), each 4x4 tile is swizzled instead of just individual |
| 27 | // pixel values. | 27 | // pixel values. |
| 28 | const u32 tile_size_x{GetDefaultBlockWidth(format)}; | 28 | constexpr u32 tile_size_x{GetDefaultBlockWidth(format)}; |
| 29 | const u32 tile_size_y{GetDefaultBlockHeight(format)}; | 29 | constexpr u32 tile_size_y{GetDefaultBlockHeight(format)}; |
| 30 | 30 | ||
| 31 | if constexpr (morton_to_linear) { | 31 | if constexpr (morton_to_linear) { |
| 32 | Tegra::Texture::UnswizzleTexture(buffer, addr, tile_size_x, tile_size_y, bytes_per_pixel, | 32 | Tegra::Texture::UnswizzleTexture(buffer, addr, tile_size_x, tile_size_y, bytes_per_pixel, |
| @@ -186,99 +186,6 @@ static MortonCopyFn GetSwizzleFunction(MortonSwizzleMode mode, Surface::PixelFor | |||
| 186 | return morton_to_linear_fns[static_cast<std::size_t>(format)]; | 186 | return morton_to_linear_fns[static_cast<std::size_t>(format)]; |
| 187 | } | 187 | } |
| 188 | 188 | ||
| 189 | static u32 MortonInterleave128(u32 x, u32 y) { | ||
| 190 | // 128x128 Z-Order coordinate from 2D coordinates | ||
| 191 | static constexpr u32 xlut[] = { | ||
| 192 | 0x0000, 0x0001, 0x0002, 0x0003, 0x0008, 0x0009, 0x000a, 0x000b, 0x0040, 0x0041, 0x0042, | ||
| 193 | 0x0043, 0x0048, 0x0049, 0x004a, 0x004b, 0x0800, 0x0801, 0x0802, 0x0803, 0x0808, 0x0809, | ||
| 194 | 0x080a, 0x080b, 0x0840, 0x0841, 0x0842, 0x0843, 0x0848, 0x0849, 0x084a, 0x084b, 0x1000, | ||
| 195 | 0x1001, 0x1002, 0x1003, 0x1008, 0x1009, 0x100a, 0x100b, 0x1040, 0x1041, 0x1042, 0x1043, | ||
| 196 | 0x1048, 0x1049, 0x104a, 0x104b, 0x1800, 0x1801, 0x1802, 0x1803, 0x1808, 0x1809, 0x180a, | ||
| 197 | 0x180b, 0x1840, 0x1841, 0x1842, 0x1843, 0x1848, 0x1849, 0x184a, 0x184b, 0x2000, 0x2001, | ||
| 198 | 0x2002, 0x2003, 0x2008, 0x2009, 0x200a, 0x200b, 0x2040, 0x2041, 0x2042, 0x2043, 0x2048, | ||
| 199 | 0x2049, 0x204a, 0x204b, 0x2800, 0x2801, 0x2802, 0x2803, 0x2808, 0x2809, 0x280a, 0x280b, | ||
| 200 | 0x2840, 0x2841, 0x2842, 0x2843, 0x2848, 0x2849, 0x284a, 0x284b, 0x3000, 0x3001, 0x3002, | ||
| 201 | 0x3003, 0x3008, 0x3009, 0x300a, 0x300b, 0x3040, 0x3041, 0x3042, 0x3043, 0x3048, 0x3049, | ||
| 202 | 0x304a, 0x304b, 0x3800, 0x3801, 0x3802, 0x3803, 0x3808, 0x3809, 0x380a, 0x380b, 0x3840, | ||
| 203 | 0x3841, 0x3842, 0x3843, 0x3848, 0x3849, 0x384a, 0x384b, 0x0000, 0x0001, 0x0002, 0x0003, | ||
| 204 | 0x0008, 0x0009, 0x000a, 0x000b, 0x0040, 0x0041, 0x0042, 0x0043, 0x0048, 0x0049, 0x004a, | ||
| 205 | 0x004b, 0x0800, 0x0801, 0x0802, 0x0803, 0x0808, 0x0809, 0x080a, 0x080b, 0x0840, 0x0841, | ||
| 206 | 0x0842, 0x0843, 0x0848, 0x0849, 0x084a, 0x084b, 0x1000, 0x1001, 0x1002, 0x1003, 0x1008, | ||
| 207 | 0x1009, 0x100a, 0x100b, 0x1040, 0x1041, 0x1042, 0x1043, 0x1048, 0x1049, 0x104a, 0x104b, | ||
| 208 | 0x1800, 0x1801, 0x1802, 0x1803, 0x1808, 0x1809, 0x180a, 0x180b, 0x1840, 0x1841, 0x1842, | ||
| 209 | 0x1843, 0x1848, 0x1849, 0x184a, 0x184b, 0x2000, 0x2001, 0x2002, 0x2003, 0x2008, 0x2009, | ||
| 210 | 0x200a, 0x200b, 0x2040, 0x2041, 0x2042, 0x2043, 0x2048, 0x2049, 0x204a, 0x204b, 0x2800, | ||
| 211 | 0x2801, 0x2802, 0x2803, 0x2808, 0x2809, 0x280a, 0x280b, 0x2840, 0x2841, 0x2842, 0x2843, | ||
| 212 | 0x2848, 0x2849, 0x284a, 0x284b, 0x3000, 0x3001, 0x3002, 0x3003, 0x3008, 0x3009, 0x300a, | ||
| 213 | 0x300b, 0x3040, 0x3041, 0x3042, 0x3043, 0x3048, 0x3049, 0x304a, 0x304b, 0x3800, 0x3801, | ||
| 214 | 0x3802, 0x3803, 0x3808, 0x3809, 0x380a, 0x380b, 0x3840, 0x3841, 0x3842, 0x3843, 0x3848, | ||
| 215 | 0x3849, 0x384a, 0x384b, 0x0000, 0x0001, 0x0002, 0x0003, 0x0008, 0x0009, 0x000a, 0x000b, | ||
| 216 | 0x0040, 0x0041, 0x0042, 0x0043, 0x0048, 0x0049, 0x004a, 0x004b, 0x0800, 0x0801, 0x0802, | ||
| 217 | 0x0803, 0x0808, 0x0809, 0x080a, 0x080b, 0x0840, 0x0841, 0x0842, 0x0843, 0x0848, 0x0849, | ||
| 218 | 0x084a, 0x084b, 0x1000, 0x1001, 0x1002, 0x1003, 0x1008, 0x1009, 0x100a, 0x100b, 0x1040, | ||
| 219 | 0x1041, 0x1042, 0x1043, 0x1048, 0x1049, 0x104a, 0x104b, 0x1800, 0x1801, 0x1802, 0x1803, | ||
| 220 | 0x1808, 0x1809, 0x180a, 0x180b, 0x1840, 0x1841, 0x1842, 0x1843, 0x1848, 0x1849, 0x184a, | ||
| 221 | 0x184b, 0x2000, 0x2001, 0x2002, 0x2003, 0x2008, 0x2009, 0x200a, 0x200b, 0x2040, 0x2041, | ||
| 222 | 0x2042, 0x2043, 0x2048, 0x2049, 0x204a, 0x204b, 0x2800, 0x2801, 0x2802, 0x2803, 0x2808, | ||
| 223 | 0x2809, 0x280a, 0x280b, 0x2840, 0x2841, 0x2842, 0x2843, 0x2848, 0x2849, 0x284a, 0x284b, | ||
| 224 | 0x3000, 0x3001, 0x3002, 0x3003, 0x3008, 0x3009, 0x300a, 0x300b, 0x3040, 0x3041, 0x3042, | ||
| 225 | 0x3043, 0x3048, 0x3049, 0x304a, 0x304b, 0x3800, 0x3801, 0x3802, 0x3803, 0x3808, 0x3809, | ||
| 226 | 0x380a, 0x380b, 0x3840, 0x3841, 0x3842, 0x3843, 0x3848, 0x3849, 0x384a, 0x384b, | ||
| 227 | }; | ||
| 228 | static constexpr u32 ylut[] = { | ||
| 229 | 0x0000, 0x0004, 0x0010, 0x0014, 0x0020, 0x0024, 0x0030, 0x0034, 0x0080, 0x0084, 0x0090, | ||
| 230 | 0x0094, 0x00a0, 0x00a4, 0x00b0, 0x00b4, 0x0100, 0x0104, 0x0110, 0x0114, 0x0120, 0x0124, | ||
| 231 | 0x0130, 0x0134, 0x0180, 0x0184, 0x0190, 0x0194, 0x01a0, 0x01a4, 0x01b0, 0x01b4, 0x0200, | ||
| 232 | 0x0204, 0x0210, 0x0214, 0x0220, 0x0224, 0x0230, 0x0234, 0x0280, 0x0284, 0x0290, 0x0294, | ||
| 233 | 0x02a0, 0x02a4, 0x02b0, 0x02b4, 0x0300, 0x0304, 0x0310, 0x0314, 0x0320, 0x0324, 0x0330, | ||
| 234 | 0x0334, 0x0380, 0x0384, 0x0390, 0x0394, 0x03a0, 0x03a4, 0x03b0, 0x03b4, 0x0400, 0x0404, | ||
| 235 | 0x0410, 0x0414, 0x0420, 0x0424, 0x0430, 0x0434, 0x0480, 0x0484, 0x0490, 0x0494, 0x04a0, | ||
| 236 | 0x04a4, 0x04b0, 0x04b4, 0x0500, 0x0504, 0x0510, 0x0514, 0x0520, 0x0524, 0x0530, 0x0534, | ||
| 237 | 0x0580, 0x0584, 0x0590, 0x0594, 0x05a0, 0x05a4, 0x05b0, 0x05b4, 0x0600, 0x0604, 0x0610, | ||
| 238 | 0x0614, 0x0620, 0x0624, 0x0630, 0x0634, 0x0680, 0x0684, 0x0690, 0x0694, 0x06a0, 0x06a4, | ||
| 239 | 0x06b0, 0x06b4, 0x0700, 0x0704, 0x0710, 0x0714, 0x0720, 0x0724, 0x0730, 0x0734, 0x0780, | ||
| 240 | 0x0784, 0x0790, 0x0794, 0x07a0, 0x07a4, 0x07b0, 0x07b4, 0x0000, 0x0004, 0x0010, 0x0014, | ||
| 241 | 0x0020, 0x0024, 0x0030, 0x0034, 0x0080, 0x0084, 0x0090, 0x0094, 0x00a0, 0x00a4, 0x00b0, | ||
| 242 | 0x00b4, 0x0100, 0x0104, 0x0110, 0x0114, 0x0120, 0x0124, 0x0130, 0x0134, 0x0180, 0x0184, | ||
| 243 | 0x0190, 0x0194, 0x01a0, 0x01a4, 0x01b0, 0x01b4, 0x0200, 0x0204, 0x0210, 0x0214, 0x0220, | ||
| 244 | 0x0224, 0x0230, 0x0234, 0x0280, 0x0284, 0x0290, 0x0294, 0x02a0, 0x02a4, 0x02b0, 0x02b4, | ||
| 245 | 0x0300, 0x0304, 0x0310, 0x0314, 0x0320, 0x0324, 0x0330, 0x0334, 0x0380, 0x0384, 0x0390, | ||
| 246 | 0x0394, 0x03a0, 0x03a4, 0x03b0, 0x03b4, 0x0400, 0x0404, 0x0410, 0x0414, 0x0420, 0x0424, | ||
| 247 | 0x0430, 0x0434, 0x0480, 0x0484, 0x0490, 0x0494, 0x04a0, 0x04a4, 0x04b0, 0x04b4, 0x0500, | ||
| 248 | 0x0504, 0x0510, 0x0514, 0x0520, 0x0524, 0x0530, 0x0534, 0x0580, 0x0584, 0x0590, 0x0594, | ||
| 249 | 0x05a0, 0x05a4, 0x05b0, 0x05b4, 0x0600, 0x0604, 0x0610, 0x0614, 0x0620, 0x0624, 0x0630, | ||
| 250 | 0x0634, 0x0680, 0x0684, 0x0690, 0x0694, 0x06a0, 0x06a4, 0x06b0, 0x06b4, 0x0700, 0x0704, | ||
| 251 | 0x0710, 0x0714, 0x0720, 0x0724, 0x0730, 0x0734, 0x0780, 0x0784, 0x0790, 0x0794, 0x07a0, | ||
| 252 | 0x07a4, 0x07b0, 0x07b4, 0x0000, 0x0004, 0x0010, 0x0014, 0x0020, 0x0024, 0x0030, 0x0034, | ||
| 253 | 0x0080, 0x0084, 0x0090, 0x0094, 0x00a0, 0x00a4, 0x00b0, 0x00b4, 0x0100, 0x0104, 0x0110, | ||
| 254 | 0x0114, 0x0120, 0x0124, 0x0130, 0x0134, 0x0180, 0x0184, 0x0190, 0x0194, 0x01a0, 0x01a4, | ||
| 255 | 0x01b0, 0x01b4, 0x0200, 0x0204, 0x0210, 0x0214, 0x0220, 0x0224, 0x0230, 0x0234, 0x0280, | ||
| 256 | 0x0284, 0x0290, 0x0294, 0x02a0, 0x02a4, 0x02b0, 0x02b4, 0x0300, 0x0304, 0x0310, 0x0314, | ||
| 257 | 0x0320, 0x0324, 0x0330, 0x0334, 0x0380, 0x0384, 0x0390, 0x0394, 0x03a0, 0x03a4, 0x03b0, | ||
| 258 | 0x03b4, 0x0400, 0x0404, 0x0410, 0x0414, 0x0420, 0x0424, 0x0430, 0x0434, 0x0480, 0x0484, | ||
| 259 | 0x0490, 0x0494, 0x04a0, 0x04a4, 0x04b0, 0x04b4, 0x0500, 0x0504, 0x0510, 0x0514, 0x0520, | ||
| 260 | 0x0524, 0x0530, 0x0534, 0x0580, 0x0584, 0x0590, 0x0594, 0x05a0, 0x05a4, 0x05b0, 0x05b4, | ||
| 261 | 0x0600, 0x0604, 0x0610, 0x0614, 0x0620, 0x0624, 0x0630, 0x0634, 0x0680, 0x0684, 0x0690, | ||
| 262 | 0x0694, 0x06a0, 0x06a4, 0x06b0, 0x06b4, 0x0700, 0x0704, 0x0710, 0x0714, 0x0720, 0x0724, | ||
| 263 | 0x0730, 0x0734, 0x0780, 0x0784, 0x0790, 0x0794, 0x07a0, 0x07a4, 0x07b0, 0x07b4, | ||
| 264 | }; | ||
| 265 | return xlut[x % 128] + ylut[y % 128]; | ||
| 266 | } | ||
| 267 | |||
| 268 | static u32 GetMortonOffset128(u32 x, u32 y, u32 bytes_per_pixel) { | ||
| 269 | // Calculates the offset of the position of the pixel in Morton order | ||
| 270 | // Framebuffer images are split into 128x128 tiles. | ||
| 271 | |||
| 272 | constexpr u32 block_height = 128; | ||
| 273 | const u32 coarse_x = x & ~127; | ||
| 274 | |||
| 275 | const u32 i = MortonInterleave128(x, y); | ||
| 276 | |||
| 277 | const u32 offset = coarse_x * block_height; | ||
| 278 | |||
| 279 | return (i + offset) * bytes_per_pixel; | ||
| 280 | } | ||
| 281 | |||
| 282 | void MortonSwizzle(MortonSwizzleMode mode, Surface::PixelFormat format, u32 stride, | 189 | void MortonSwizzle(MortonSwizzleMode mode, Surface::PixelFormat format, u32 stride, |
| 283 | u32 block_height, u32 height, u32 block_depth, u32 depth, u32 tile_width_spacing, | 190 | u32 block_height, u32 height, u32 block_depth, u32 depth, u32 tile_width_spacing, |
| 284 | u8* buffer, u8* addr) { | 191 | u8* buffer, u8* addr) { |
| @@ -286,23 +193,4 @@ void MortonSwizzle(MortonSwizzleMode mode, Surface::PixelFormat format, u32 stri | |||
| 286 | tile_width_spacing, buffer, addr); | 193 | tile_width_spacing, buffer, addr); |
| 287 | } | 194 | } |
| 288 | 195 | ||
| 289 | void MortonCopyPixels128(MortonSwizzleMode mode, u32 width, u32 height, u32 bytes_per_pixel, | ||
| 290 | u32 linear_bytes_per_pixel, u8* morton_data, u8* linear_data) { | ||
| 291 | const bool morton_to_linear = mode == MortonSwizzleMode::MortonToLinear; | ||
| 292 | u8* data_ptrs[2]; | ||
| 293 | for (u32 y = 0; y < height; ++y) { | ||
| 294 | for (u32 x = 0; x < width; ++x) { | ||
| 295 | const u32 coarse_y = y & ~127; | ||
| 296 | const u32 morton_offset = | ||
| 297 | GetMortonOffset128(x, y, bytes_per_pixel) + coarse_y * width * bytes_per_pixel; | ||
| 298 | const u32 linear_pixel_index = (x + y * width) * linear_bytes_per_pixel; | ||
| 299 | |||
| 300 | data_ptrs[morton_to_linear ? 1 : 0] = morton_data + morton_offset; | ||
| 301 | data_ptrs[morton_to_linear ? 0 : 1] = &linear_data[linear_pixel_index]; | ||
| 302 | |||
| 303 | std::memcpy(data_ptrs[0], data_ptrs[1], bytes_per_pixel); | ||
| 304 | } | ||
| 305 | } | ||
| 306 | } | ||
| 307 | |||
| 308 | } // namespace VideoCore | 196 | } // namespace VideoCore |
diff --git a/src/video_core/morton.h b/src/video_core/morton.h index ee5b45555..b714a7e3f 100644 --- a/src/video_core/morton.h +++ b/src/video_core/morton.h | |||
| @@ -15,7 +15,4 @@ void MortonSwizzle(MortonSwizzleMode mode, VideoCore::Surface::PixelFormat forma | |||
| 15 | u32 block_height, u32 height, u32 block_depth, u32 depth, u32 tile_width_spacing, | 15 | u32 block_height, u32 height, u32 block_depth, u32 depth, u32 tile_width_spacing, |
| 16 | u8* buffer, u8* addr); | 16 | u8* buffer, u8* addr); |
| 17 | 17 | ||
| 18 | void MortonCopyPixels128(MortonSwizzleMode mode, u32 width, u32 height, u32 bytes_per_pixel, | ||
| 19 | u32 linear_bytes_per_pixel, u8* morton_data, u8* linear_data); | ||
| 20 | |||
| 21 | } // namespace VideoCore | 18 | } // namespace VideoCore |
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index d7b86df38..6b3f2d50a 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h | |||
| @@ -10,6 +10,10 @@ | |||
| 10 | #include "video_core/engines/fermi_2d.h" | 10 | #include "video_core/engines/fermi_2d.h" |
| 11 | #include "video_core/gpu.h" | 11 | #include "video_core/gpu.h" |
| 12 | 12 | ||
| 13 | namespace Tegra { | ||
| 14 | class MemoryManager; | ||
| 15 | } | ||
| 16 | |||
| 13 | namespace VideoCore { | 17 | namespace VideoCore { |
| 14 | 18 | ||
| 15 | enum class LoadCallbackStage { | 19 | enum class LoadCallbackStage { |
| @@ -30,6 +34,9 @@ public: | |||
| 30 | /// Clear the current framebuffer | 34 | /// Clear the current framebuffer |
| 31 | virtual void Clear() = 0; | 35 | virtual void Clear() = 0; |
| 32 | 36 | ||
| 37 | /// Dispatches a compute shader invocation | ||
| 38 | virtual void DispatchCompute(GPUVAddr code_addr) = 0; | ||
| 39 | |||
| 33 | /// Notify rasterizer that all caches should be flushed to Switch memory | 40 | /// Notify rasterizer that all caches should be flushed to Switch memory |
| 34 | virtual void FlushAll() = 0; | 41 | virtual void FlushAll() = 0; |
| 35 | 42 | ||
| @@ -43,11 +50,16 @@ public: | |||
| 43 | /// and invalidated | 50 | /// and invalidated |
| 44 | virtual void FlushAndInvalidateRegion(CacheAddr addr, u64 size) = 0; | 51 | virtual void FlushAndInvalidateRegion(CacheAddr addr, u64 size) = 0; |
| 45 | 52 | ||
| 53 | /// Notify the rasterizer to send all written commands to the host GPU. | ||
| 54 | virtual void FlushCommands() = 0; | ||
| 55 | |||
| 56 | /// Notify rasterizer that a frame is about to finish | ||
| 57 | virtual void TickFrame() = 0; | ||
| 58 | |||
| 46 | /// Attempt to use a faster method to perform a surface copy | 59 | /// Attempt to use a faster method to perform a surface copy |
| 47 | virtual bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, | 60 | virtual bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, |
| 48 | const Tegra::Engines::Fermi2D::Regs::Surface& dst, | 61 | const Tegra::Engines::Fermi2D::Regs::Surface& dst, |
| 49 | const Common::Rectangle<u32>& src_rect, | 62 | const Tegra::Engines::Fermi2D::Config& copy_config) { |
| 50 | const Common::Rectangle<u32>& dst_rect) { | ||
| 51 | return false; | 63 | return false; |
| 52 | } | 64 | } |
| 53 | 65 | ||
diff --git a/src/video_core/renderer_base.h b/src/video_core/renderer_base.h index 1d54c3723..af1bebc4f 100644 --- a/src/video_core/renderer_base.h +++ b/src/video_core/renderer_base.h | |||
| @@ -36,8 +36,7 @@ public: | |||
| 36 | virtual ~RendererBase(); | 36 | virtual ~RendererBase(); |
| 37 | 37 | ||
| 38 | /// Swap buffers (render frame) | 38 | /// Swap buffers (render frame) |
| 39 | virtual void SwapBuffers( | 39 | virtual void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) = 0; |
| 40 | std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) = 0; | ||
| 41 | 40 | ||
| 42 | /// Initialize the renderer | 41 | /// Initialize the renderer |
| 43 | virtual bool Init() = 0; | 42 | virtual bool Init() = 0; |
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index 2b9bd142e..f8a807c84 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp | |||
| @@ -2,103 +2,71 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <cstring> | ||
| 6 | #include <memory> | 5 | #include <memory> |
| 7 | 6 | ||
| 8 | #include "common/alignment.h" | 7 | #include <glad/glad.h> |
| 9 | #include "core/core.h" | 8 | |
| 10 | #include "video_core/memory_manager.h" | 9 | #include "common/assert.h" |
| 10 | #include "common/microprofile.h" | ||
| 11 | #include "video_core/rasterizer_interface.h" | ||
| 11 | #include "video_core/renderer_opengl/gl_buffer_cache.h" | 12 | #include "video_core/renderer_opengl/gl_buffer_cache.h" |
| 12 | #include "video_core/renderer_opengl/gl_rasterizer.h" | 13 | #include "video_core/renderer_opengl/gl_rasterizer.h" |
| 14 | #include "video_core/renderer_opengl/gl_resource_manager.h" | ||
| 13 | 15 | ||
| 14 | namespace OpenGL { | 16 | namespace OpenGL { |
| 15 | 17 | ||
| 16 | CachedBufferEntry::CachedBufferEntry(VAddr cpu_addr, std::size_t size, GLintptr offset, | 18 | MICROPROFILE_DEFINE(OpenGL_Buffer_Download, "OpenGL", "Buffer Download", MP_RGB(192, 192, 128)); |
| 17 | std::size_t alignment, u8* host_ptr) | 19 | |
| 18 | : RasterizerCacheObject{host_ptr}, cpu_addr{cpu_addr}, size{size}, offset{offset}, | 20 | CachedBufferBlock::CachedBufferBlock(CacheAddr cache_addr, const std::size_t size) |
| 19 | alignment{alignment} {} | 21 | : VideoCommon::BufferBlock{cache_addr, size} { |
| 20 | 22 | gl_buffer.Create(); | |
| 21 | OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, std::size_t size) | 23 | glNamedBufferData(gl_buffer.handle, static_cast<GLsizeiptr>(size), nullptr, GL_DYNAMIC_DRAW); |
| 22 | : RasterizerCache{rasterizer}, stream_buffer(size, true) {} | ||
| 23 | |||
| 24 | GLintptr OGLBufferCache::UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment, | ||
| 25 | bool cache) { | ||
| 26 | std::lock_guard lock{mutex}; | ||
| 27 | auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager(); | ||
| 28 | |||
| 29 | // Cache management is a big overhead, so only cache entries with a given size. | ||
| 30 | // TODO: Figure out which size is the best for given games. | ||
| 31 | cache &= size >= 2048; | ||
| 32 | |||
| 33 | const auto& host_ptr{memory_manager.GetPointer(gpu_addr)}; | ||
| 34 | if (cache) { | ||
| 35 | auto entry = TryGet(host_ptr); | ||
| 36 | if (entry) { | ||
| 37 | if (entry->GetSize() >= size && entry->GetAlignment() == alignment) { | ||
| 38 | return entry->GetOffset(); | ||
| 39 | } | ||
| 40 | Unregister(entry); | ||
| 41 | } | ||
| 42 | } | ||
| 43 | |||
| 44 | AlignBuffer(alignment); | ||
| 45 | const GLintptr uploaded_offset = buffer_offset; | ||
| 46 | |||
| 47 | if (!host_ptr) { | ||
| 48 | return uploaded_offset; | ||
| 49 | } | ||
| 50 | |||
| 51 | std::memcpy(buffer_ptr, host_ptr, size); | ||
| 52 | buffer_ptr += size; | ||
| 53 | buffer_offset += size; | ||
| 54 | |||
| 55 | if (cache) { | ||
| 56 | auto entry = std::make_shared<CachedBufferEntry>( | ||
| 57 | *memory_manager.GpuToCpuAddress(gpu_addr), size, uploaded_offset, alignment, host_ptr); | ||
| 58 | Register(entry); | ||
| 59 | } | ||
| 60 | |||
| 61 | return uploaded_offset; | ||
| 62 | } | 24 | } |
| 63 | 25 | ||
| 64 | GLintptr OGLBufferCache::UploadHostMemory(const void* raw_pointer, std::size_t size, | 26 | CachedBufferBlock::~CachedBufferBlock() = default; |
| 65 | std::size_t alignment) { | 27 | |
| 66 | std::lock_guard lock{mutex}; | 28 | OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system, |
| 67 | AlignBuffer(alignment); | 29 | std::size_t stream_size) |
| 68 | std::memcpy(buffer_ptr, raw_pointer, size); | 30 | : VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuffer>{ |
| 69 | const GLintptr uploaded_offset = buffer_offset; | 31 | rasterizer, system, std::make_unique<OGLStreamBuffer>(stream_size, true)} {} |
| 32 | |||
| 33 | OGLBufferCache::~OGLBufferCache() = default; | ||
| 70 | 34 | ||
| 71 | buffer_ptr += size; | 35 | Buffer OGLBufferCache::CreateBlock(CacheAddr cache_addr, std::size_t size) { |
| 72 | buffer_offset += size; | 36 | return std::make_shared<CachedBufferBlock>(cache_addr, size); |
| 73 | return uploaded_offset; | ||
| 74 | } | 37 | } |
| 75 | 38 | ||
| 76 | bool OGLBufferCache::Map(std::size_t max_size) { | 39 | void OGLBufferCache::WriteBarrier() { |
| 77 | bool invalidate; | 40 | glMemoryBarrier(GL_ALL_BARRIER_BITS); |
| 78 | std::tie(buffer_ptr, buffer_offset_base, invalidate) = | 41 | } |
| 79 | stream_buffer.Map(static_cast<GLsizeiptr>(max_size), 4); | 42 | |
| 80 | buffer_offset = buffer_offset_base; | 43 | const GLuint* OGLBufferCache::ToHandle(const Buffer& buffer) { |
| 44 | return buffer->GetHandle(); | ||
| 45 | } | ||
| 81 | 46 | ||
| 82 | if (invalidate) { | 47 | const GLuint* OGLBufferCache::GetEmptyBuffer(std::size_t) { |
| 83 | InvalidateAll(); | 48 | static const GLuint null_buffer = 0; |
| 84 | } | 49 | return &null_buffer; |
| 85 | return invalidate; | ||
| 86 | } | 50 | } |
| 87 | 51 | ||
| 88 | void OGLBufferCache::Unmap() { | 52 | void OGLBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, |
| 89 | stream_buffer.Unmap(buffer_offset - buffer_offset_base); | 53 | const u8* data) { |
| 54 | glNamedBufferSubData(*buffer->GetHandle(), static_cast<GLintptr>(offset), | ||
| 55 | static_cast<GLsizeiptr>(size), data); | ||
| 90 | } | 56 | } |
| 91 | 57 | ||
| 92 | GLuint OGLBufferCache::GetHandle() const { | 58 | void OGLBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, |
| 93 | return stream_buffer.GetHandle(); | 59 | u8* data) { |
| 60 | MICROPROFILE_SCOPE(OpenGL_Buffer_Download); | ||
| 61 | glGetNamedBufferSubData(*buffer->GetHandle(), static_cast<GLintptr>(offset), | ||
| 62 | static_cast<GLsizeiptr>(size), data); | ||
| 94 | } | 63 | } |
| 95 | 64 | ||
| 96 | void OGLBufferCache::AlignBuffer(std::size_t alignment) { | 65 | void OGLBufferCache::CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset, |
| 97 | // Align the offset, not the mapped pointer | 66 | std::size_t dst_offset, std::size_t size) { |
| 98 | const GLintptr offset_aligned = | 67 | glCopyNamedBufferSubData(*src->GetHandle(), *dst->GetHandle(), |
| 99 | static_cast<GLintptr>(Common::AlignUp(static_cast<std::size_t>(buffer_offset), alignment)); | 68 | static_cast<GLintptr>(src_offset), static_cast<GLintptr>(dst_offset), |
| 100 | buffer_ptr += offset_aligned - buffer_offset; | 69 | static_cast<GLsizeiptr>(size)); |
| 101 | buffer_offset = offset_aligned; | ||
| 102 | } | 70 | } |
| 103 | 71 | ||
| 104 | } // namespace OpenGL | 72 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index f2347581b..022e7bfa9 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h | |||
| @@ -4,80 +4,63 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <cstddef> | ||
| 8 | #include <memory> | 7 | #include <memory> |
| 9 | #include <tuple> | ||
| 10 | 8 | ||
| 11 | #include "common/common_types.h" | 9 | #include "common/common_types.h" |
| 10 | #include "video_core/buffer_cache/buffer_cache.h" | ||
| 12 | #include "video_core/rasterizer_cache.h" | 11 | #include "video_core/rasterizer_cache.h" |
| 13 | #include "video_core/renderer_opengl/gl_resource_manager.h" | 12 | #include "video_core/renderer_opengl/gl_resource_manager.h" |
| 14 | #include "video_core/renderer_opengl/gl_stream_buffer.h" | 13 | #include "video_core/renderer_opengl/gl_stream_buffer.h" |
| 15 | 14 | ||
| 15 | namespace Core { | ||
| 16 | class System; | ||
| 17 | } | ||
| 18 | |||
| 16 | namespace OpenGL { | 19 | namespace OpenGL { |
| 17 | 20 | ||
| 21 | class OGLStreamBuffer; | ||
| 18 | class RasterizerOpenGL; | 22 | class RasterizerOpenGL; |
| 19 | 23 | ||
| 20 | class CachedBufferEntry final : public RasterizerCacheObject { | 24 | class CachedBufferBlock; |
| 21 | public: | ||
| 22 | explicit CachedBufferEntry(VAddr cpu_addr, std::size_t size, GLintptr offset, | ||
| 23 | std::size_t alignment, u8* host_ptr); | ||
| 24 | |||
| 25 | VAddr GetCpuAddr() const override { | ||
| 26 | return cpu_addr; | ||
| 27 | } | ||
| 28 | 25 | ||
| 29 | std::size_t GetSizeInBytes() const override { | 26 | using Buffer = std::shared_ptr<CachedBufferBlock>; |
| 30 | return size; | ||
| 31 | } | ||
| 32 | |||
| 33 | std::size_t GetSize() const { | ||
| 34 | return size; | ||
| 35 | } | ||
| 36 | 27 | ||
| 37 | GLintptr GetOffset() const { | 28 | class CachedBufferBlock : public VideoCommon::BufferBlock { |
| 38 | return offset; | 29 | public: |
| 39 | } | 30 | explicit CachedBufferBlock(CacheAddr cache_addr, const std::size_t size); |
| 31 | ~CachedBufferBlock(); | ||
| 40 | 32 | ||
| 41 | std::size_t GetAlignment() const { | 33 | const GLuint* GetHandle() const { |
| 42 | return alignment; | 34 | return &gl_buffer.handle; |
| 43 | } | 35 | } |
| 44 | 36 | ||
| 45 | private: | 37 | private: |
| 46 | VAddr cpu_addr{}; | 38 | OGLBuffer gl_buffer{}; |
| 47 | std::size_t size{}; | ||
| 48 | GLintptr offset{}; | ||
| 49 | std::size_t alignment{}; | ||
| 50 | }; | 39 | }; |
| 51 | 40 | ||
| 52 | class OGLBufferCache final : public RasterizerCache<std::shared_ptr<CachedBufferEntry>> { | 41 | class OGLBufferCache final : public VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuffer> { |
| 53 | public: | 42 | public: |
| 54 | explicit OGLBufferCache(RasterizerOpenGL& rasterizer, std::size_t size); | 43 | explicit OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system, |
| 55 | 44 | std::size_t stream_size); | |
| 56 | /// Uploads data from a guest GPU address. Returns host's buffer offset where it's been | 45 | ~OGLBufferCache(); |
| 57 | /// allocated. | ||
| 58 | GLintptr UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4, | ||
| 59 | bool cache = true); | ||
| 60 | 46 | ||
| 61 | /// Uploads from a host memory. Returns host's buffer offset where it's been allocated. | 47 | const GLuint* GetEmptyBuffer(std::size_t) override; |
| 62 | GLintptr UploadHostMemory(const void* raw_pointer, std::size_t size, std::size_t alignment = 4); | ||
| 63 | 48 | ||
| 64 | bool Map(std::size_t max_size); | 49 | protected: |
| 65 | void Unmap(); | 50 | Buffer CreateBlock(CacheAddr cache_addr, std::size_t size) override; |
| 66 | 51 | ||
| 67 | GLuint GetHandle() const; | 52 | void WriteBarrier() override; |
| 68 | 53 | ||
| 69 | protected: | 54 | const GLuint* ToHandle(const Buffer& buffer) override; |
| 70 | void AlignBuffer(std::size_t alignment); | ||
| 71 | 55 | ||
| 72 | // We do not have to flush this cache as things in it are never modified by us. | 56 | void UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, |
| 73 | void FlushObjectInner(const std::shared_ptr<CachedBufferEntry>& object) override {} | 57 | const u8* data) override; |
| 74 | 58 | ||
| 75 | private: | 59 | void DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, |
| 76 | OGLStreamBuffer stream_buffer; | 60 | u8* data) override; |
| 77 | 61 | ||
| 78 | u8* buffer_ptr = nullptr; | 62 | void CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset, |
| 79 | GLintptr buffer_offset = 0; | 63 | std::size_t dst_offset, std::size_t size) override; |
| 80 | GLintptr buffer_offset_base = 0; | ||
| 81 | }; | 64 | }; |
| 82 | 65 | ||
| 83 | } // namespace OpenGL | 66 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index a48e14d2e..4f59a87b4 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp | |||
| @@ -14,52 +14,64 @@ | |||
| 14 | namespace OpenGL { | 14 | namespace OpenGL { |
| 15 | 15 | ||
| 16 | namespace { | 16 | namespace { |
| 17 | |||
| 17 | template <typename T> | 18 | template <typename T> |
| 18 | T GetInteger(GLenum pname) { | 19 | T GetInteger(GLenum pname) { |
| 19 | GLint temporary; | 20 | GLint temporary; |
| 20 | glGetIntegerv(pname, &temporary); | 21 | glGetIntegerv(pname, &temporary); |
| 21 | return static_cast<T>(temporary); | 22 | return static_cast<T>(temporary); |
| 22 | } | 23 | } |
| 24 | |||
| 25 | bool TestProgram(const GLchar* glsl) { | ||
| 26 | const GLuint shader{glCreateShaderProgramv(GL_VERTEX_SHADER, 1, &glsl)}; | ||
| 27 | GLint link_status; | ||
| 28 | glGetProgramiv(shader, GL_LINK_STATUS, &link_status); | ||
| 29 | glDeleteProgram(shader); | ||
| 30 | return link_status == GL_TRUE; | ||
| 31 | } | ||
| 32 | |||
| 23 | } // Anonymous namespace | 33 | } // Anonymous namespace |
| 24 | 34 | ||
| 25 | Device::Device() { | 35 | Device::Device() { |
| 26 | uniform_buffer_alignment = GetInteger<std::size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT); | 36 | uniform_buffer_alignment = GetInteger<std::size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT); |
| 37 | shader_storage_alignment = GetInteger<std::size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT); | ||
| 27 | max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS); | 38 | max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS); |
| 28 | max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS); | 39 | max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS); |
| 40 | has_warp_intrinsics = GLAD_GL_NV_gpu_shader5 && GLAD_GL_NV_shader_thread_group && | ||
| 41 | GLAD_GL_NV_shader_thread_shuffle; | ||
| 42 | has_vertex_viewport_layer = GLAD_GL_ARB_shader_viewport_layer_array; | ||
| 29 | has_variable_aoffi = TestVariableAoffi(); | 43 | has_variable_aoffi = TestVariableAoffi(); |
| 30 | has_component_indexing_bug = TestComponentIndexingBug(); | 44 | has_component_indexing_bug = TestComponentIndexingBug(); |
| 45 | has_precise_bug = TestPreciseBug(); | ||
| 46 | |||
| 47 | LOG_INFO(Render_OpenGL, "Renderer_VariableAOFFI: {}", has_variable_aoffi); | ||
| 48 | LOG_INFO(Render_OpenGL, "Renderer_ComponentIndexingBug: {}", has_component_indexing_bug); | ||
| 49 | LOG_INFO(Render_OpenGL, "Renderer_PreciseBug: {}", has_precise_bug); | ||
| 31 | } | 50 | } |
| 32 | 51 | ||
| 33 | Device::Device(std::nullptr_t) { | 52 | Device::Device(std::nullptr_t) { |
| 34 | uniform_buffer_alignment = 0; | 53 | uniform_buffer_alignment = 0; |
| 35 | max_vertex_attributes = 16; | 54 | max_vertex_attributes = 16; |
| 36 | max_varyings = 15; | 55 | max_varyings = 15; |
| 56 | has_warp_intrinsics = true; | ||
| 57 | has_vertex_viewport_layer = true; | ||
| 37 | has_variable_aoffi = true; | 58 | has_variable_aoffi = true; |
| 38 | has_component_indexing_bug = false; | 59 | has_component_indexing_bug = false; |
| 60 | has_precise_bug = false; | ||
| 39 | } | 61 | } |
| 40 | 62 | ||
| 41 | bool Device::TestVariableAoffi() { | 63 | bool Device::TestVariableAoffi() { |
| 42 | const GLchar* AOFFI_TEST = R"(#version 430 core | 64 | return TestProgram(R"(#version 430 core |
| 43 | // This is a unit test, please ignore me on apitrace bug reports. | 65 | // This is a unit test, please ignore me on apitrace bug reports. |
| 44 | uniform sampler2D tex; | 66 | uniform sampler2D tex; |
| 45 | uniform ivec2 variable_offset; | 67 | uniform ivec2 variable_offset; |
| 46 | out vec4 output_attribute; | 68 | out vec4 output_attribute; |
| 47 | void main() { | 69 | void main() { |
| 48 | output_attribute = textureOffset(tex, vec2(0), variable_offset); | 70 | output_attribute = textureOffset(tex, vec2(0), variable_offset); |
| 49 | } | 71 | })"); |
| 50 | )"; | ||
| 51 | const GLuint shader{glCreateShaderProgramv(GL_VERTEX_SHADER, 1, &AOFFI_TEST)}; | ||
| 52 | GLint link_status{}; | ||
| 53 | glGetProgramiv(shader, GL_LINK_STATUS, &link_status); | ||
| 54 | glDeleteProgram(shader); | ||
| 55 | |||
| 56 | const bool supported{link_status == GL_TRUE}; | ||
| 57 | LOG_INFO(Render_OpenGL, "Renderer_VariableAOFFI: {}", supported); | ||
| 58 | return supported; | ||
| 59 | } | 72 | } |
| 60 | 73 | ||
| 61 | bool Device::TestComponentIndexingBug() { | 74 | bool Device::TestComponentIndexingBug() { |
| 62 | constexpr char log_message[] = "Renderer_ComponentIndexingBug: {}"; | ||
| 63 | const GLchar* COMPONENT_TEST = R"(#version 430 core | 75 | const GLchar* COMPONENT_TEST = R"(#version 430 core |
| 64 | layout (std430, binding = 0) buffer OutputBuffer { | 76 | layout (std430, binding = 0) buffer OutputBuffer { |
| 65 | uint output_value; | 77 | uint output_value; |
| @@ -99,12 +111,21 @@ void main() { | |||
| 99 | GLuint result; | 111 | GLuint result; |
| 100 | glGetNamedBufferSubData(ssbo.handle, 0, sizeof(result), &result); | 112 | glGetNamedBufferSubData(ssbo.handle, 0, sizeof(result), &result); |
| 101 | if (result != values.at(index)) { | 113 | if (result != values.at(index)) { |
| 102 | LOG_INFO(Render_OpenGL, log_message, true); | ||
| 103 | return true; | 114 | return true; |
| 104 | } | 115 | } |
| 105 | } | 116 | } |
| 106 | LOG_INFO(Render_OpenGL, log_message, false); | ||
| 107 | return false; | 117 | return false; |
| 108 | } | 118 | } |
| 109 | 119 | ||
| 120 | bool Device::TestPreciseBug() { | ||
| 121 | return !TestProgram(R"(#version 430 core | ||
| 122 | in vec3 coords; | ||
| 123 | out float out_value; | ||
| 124 | uniform sampler2DShadow tex; | ||
| 125 | void main() { | ||
| 126 | precise float tmp_value = vec4(texture(tex, coords)).x; | ||
| 127 | out_value = tmp_value; | ||
| 128 | })"); | ||
| 129 | } | ||
| 130 | |||
| 110 | } // namespace OpenGL | 131 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index 8c8c93760..ba6dcd3be 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h | |||
| @@ -18,6 +18,10 @@ public: | |||
| 18 | return uniform_buffer_alignment; | 18 | return uniform_buffer_alignment; |
| 19 | } | 19 | } |
| 20 | 20 | ||
| 21 | std::size_t GetShaderStorageBufferAlignment() const { | ||
| 22 | return shader_storage_alignment; | ||
| 23 | } | ||
| 24 | |||
| 21 | u32 GetMaxVertexAttributes() const { | 25 | u32 GetMaxVertexAttributes() const { |
| 22 | return max_vertex_attributes; | 26 | return max_vertex_attributes; |
| 23 | } | 27 | } |
| @@ -26,6 +30,14 @@ public: | |||
| 26 | return max_varyings; | 30 | return max_varyings; |
| 27 | } | 31 | } |
| 28 | 32 | ||
| 33 | bool HasWarpIntrinsics() const { | ||
| 34 | return has_warp_intrinsics; | ||
| 35 | } | ||
| 36 | |||
| 37 | bool HasVertexViewportLayer() const { | ||
| 38 | return has_vertex_viewport_layer; | ||
| 39 | } | ||
| 40 | |||
| 29 | bool HasVariableAoffi() const { | 41 | bool HasVariableAoffi() const { |
| 30 | return has_variable_aoffi; | 42 | return has_variable_aoffi; |
| 31 | } | 43 | } |
| @@ -34,15 +46,24 @@ public: | |||
| 34 | return has_component_indexing_bug; | 46 | return has_component_indexing_bug; |
| 35 | } | 47 | } |
| 36 | 48 | ||
| 49 | bool HasPreciseBug() const { | ||
| 50 | return has_precise_bug; | ||
| 51 | } | ||
| 52 | |||
| 37 | private: | 53 | private: |
| 38 | static bool TestVariableAoffi(); | 54 | static bool TestVariableAoffi(); |
| 39 | static bool TestComponentIndexingBug(); | 55 | static bool TestComponentIndexingBug(); |
| 56 | static bool TestPreciseBug(); | ||
| 40 | 57 | ||
| 41 | std::size_t uniform_buffer_alignment{}; | 58 | std::size_t uniform_buffer_alignment{}; |
| 59 | std::size_t shader_storage_alignment{}; | ||
| 42 | u32 max_vertex_attributes{}; | 60 | u32 max_vertex_attributes{}; |
| 43 | u32 max_varyings{}; | 61 | u32 max_varyings{}; |
| 62 | bool has_warp_intrinsics{}; | ||
| 63 | bool has_vertex_viewport_layer{}; | ||
| 44 | bool has_variable_aoffi{}; | 64 | bool has_variable_aoffi{}; |
| 45 | bool has_component_indexing_bug{}; | 65 | bool has_component_indexing_bug{}; |
| 66 | bool has_precise_bug{}; | ||
| 46 | }; | 67 | }; |
| 47 | 68 | ||
| 48 | } // namespace OpenGL | 69 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/gl_framebuffer_cache.cpp b/src/video_core/renderer_opengl/gl_framebuffer_cache.cpp new file mode 100644 index 000000000..7c926bd48 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_framebuffer_cache.cpp | |||
| @@ -0,0 +1,75 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <tuple> | ||
| 6 | |||
| 7 | #include "common/cityhash.h" | ||
| 8 | #include "common/scope_exit.h" | ||
| 9 | #include "video_core/engines/maxwell_3d.h" | ||
| 10 | #include "video_core/renderer_opengl/gl_framebuffer_cache.h" | ||
| 11 | #include "video_core/renderer_opengl/gl_state.h" | ||
| 12 | |||
| 13 | namespace OpenGL { | ||
| 14 | |||
| 15 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | ||
| 16 | |||
| 17 | FramebufferCacheOpenGL::FramebufferCacheOpenGL() = default; | ||
| 18 | |||
| 19 | FramebufferCacheOpenGL::~FramebufferCacheOpenGL() = default; | ||
| 20 | |||
| 21 | GLuint FramebufferCacheOpenGL::GetFramebuffer(const FramebufferCacheKey& key) { | ||
| 22 | const auto [entry, is_cache_miss] = cache.try_emplace(key); | ||
| 23 | auto& framebuffer{entry->second}; | ||
| 24 | if (is_cache_miss) { | ||
| 25 | framebuffer = CreateFramebuffer(key); | ||
| 26 | } | ||
| 27 | return framebuffer.handle; | ||
| 28 | } | ||
| 29 | |||
| 30 | OGLFramebuffer FramebufferCacheOpenGL::CreateFramebuffer(const FramebufferCacheKey& key) { | ||
| 31 | OGLFramebuffer framebuffer; | ||
| 32 | framebuffer.Create(); | ||
| 33 | |||
| 34 | // TODO(Rodrigo): Use DSA here after Nvidia fixes their framebuffer DSA bugs. | ||
| 35 | local_state.draw.draw_framebuffer = framebuffer.handle; | ||
| 36 | local_state.ApplyFramebufferState(); | ||
| 37 | |||
| 38 | if (key.is_single_buffer) { | ||
| 39 | if (key.color_attachments[0] != GL_NONE && key.colors[0]) { | ||
| 40 | key.colors[0]->Attach(key.color_attachments[0], GL_DRAW_FRAMEBUFFER); | ||
| 41 | glDrawBuffer(key.color_attachments[0]); | ||
| 42 | } else { | ||
| 43 | glDrawBuffer(GL_NONE); | ||
| 44 | } | ||
| 45 | } else { | ||
| 46 | for (std::size_t index = 0; index < Maxwell::NumRenderTargets; ++index) { | ||
| 47 | if (key.colors[index]) { | ||
| 48 | key.colors[index]->Attach(GL_COLOR_ATTACHMENT0 + static_cast<GLenum>(index), | ||
| 49 | GL_DRAW_FRAMEBUFFER); | ||
| 50 | } | ||
| 51 | } | ||
| 52 | glDrawBuffers(key.colors_count, key.color_attachments.data()); | ||
| 53 | } | ||
| 54 | |||
| 55 | if (key.zeta) { | ||
| 56 | key.zeta->Attach(key.stencil_enable ? GL_DEPTH_STENCIL_ATTACHMENT : GL_DEPTH_ATTACHMENT, | ||
| 57 | GL_DRAW_FRAMEBUFFER); | ||
| 58 | } | ||
| 59 | |||
| 60 | return framebuffer; | ||
| 61 | } | ||
| 62 | |||
| 63 | std::size_t FramebufferCacheKey::Hash() const { | ||
| 64 | static_assert(sizeof(*this) % sizeof(u64) == 0, "Unaligned struct"); | ||
| 65 | return static_cast<std::size_t>( | ||
| 66 | Common::CityHash64(reinterpret_cast<const char*>(this), sizeof(*this))); | ||
| 67 | } | ||
| 68 | |||
| 69 | bool FramebufferCacheKey::operator==(const FramebufferCacheKey& rhs) const { | ||
| 70 | return std::tie(is_single_buffer, stencil_enable, colors_count, color_attachments, colors, | ||
| 71 | zeta) == std::tie(rhs.is_single_buffer, rhs.stencil_enable, rhs.colors_count, | ||
| 72 | rhs.color_attachments, rhs.colors, rhs.zeta); | ||
| 73 | } | ||
| 74 | |||
| 75 | } // namespace OpenGL | ||
diff --git a/src/video_core/renderer_opengl/gl_framebuffer_cache.h b/src/video_core/renderer_opengl/gl_framebuffer_cache.h new file mode 100644 index 000000000..a3a996353 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_framebuffer_cache.h | |||
| @@ -0,0 +1,68 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <array> | ||
| 8 | #include <cstddef> | ||
| 9 | #include <unordered_map> | ||
| 10 | |||
| 11 | #include <glad/glad.h> | ||
| 12 | |||
| 13 | #include "common/common_types.h" | ||
| 14 | #include "video_core/engines/maxwell_3d.h" | ||
| 15 | #include "video_core/renderer_opengl/gl_resource_manager.h" | ||
| 16 | #include "video_core/renderer_opengl/gl_state.h" | ||
| 17 | #include "video_core/renderer_opengl/gl_texture_cache.h" | ||
| 18 | |||
| 19 | namespace OpenGL { | ||
| 20 | |||
| 21 | struct alignas(sizeof(u64)) FramebufferCacheKey { | ||
| 22 | bool is_single_buffer = false; | ||
| 23 | bool stencil_enable = false; | ||
| 24 | u16 colors_count = 0; | ||
| 25 | |||
| 26 | std::array<GLenum, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> color_attachments{}; | ||
| 27 | std::array<View, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> colors; | ||
| 28 | View zeta; | ||
| 29 | |||
| 30 | std::size_t Hash() const; | ||
| 31 | |||
| 32 | bool operator==(const FramebufferCacheKey& rhs) const; | ||
| 33 | |||
| 34 | bool operator!=(const FramebufferCacheKey& rhs) const { | ||
| 35 | return !operator==(rhs); | ||
| 36 | } | ||
| 37 | }; | ||
| 38 | |||
| 39 | } // namespace OpenGL | ||
| 40 | |||
| 41 | namespace std { | ||
| 42 | |||
| 43 | template <> | ||
| 44 | struct hash<OpenGL::FramebufferCacheKey> { | ||
| 45 | std::size_t operator()(const OpenGL::FramebufferCacheKey& k) const noexcept { | ||
| 46 | return k.Hash(); | ||
| 47 | } | ||
| 48 | }; | ||
| 49 | |||
| 50 | } // namespace std | ||
| 51 | |||
| 52 | namespace OpenGL { | ||
| 53 | |||
| 54 | class FramebufferCacheOpenGL { | ||
| 55 | public: | ||
| 56 | FramebufferCacheOpenGL(); | ||
| 57 | ~FramebufferCacheOpenGL(); | ||
| 58 | |||
| 59 | GLuint GetFramebuffer(const FramebufferCacheKey& key); | ||
| 60 | |||
| 61 | private: | ||
| 62 | OGLFramebuffer CreateFramebuffer(const FramebufferCacheKey& key); | ||
| 63 | |||
| 64 | OpenGLState local_state; | ||
| 65 | std::unordered_map<FramebufferCacheKey, OGLFramebuffer> cache; | ||
| 66 | }; | ||
| 67 | |||
| 68 | } // namespace OpenGL | ||
diff --git a/src/video_core/renderer_opengl/gl_global_cache.cpp b/src/video_core/renderer_opengl/gl_global_cache.cpp deleted file mode 100644 index d5e385151..000000000 --- a/src/video_core/renderer_opengl/gl_global_cache.cpp +++ /dev/null | |||
| @@ -1,102 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <glad/glad.h> | ||
| 6 | |||
| 7 | #include "common/logging/log.h" | ||
| 8 | #include "core/core.h" | ||
| 9 | #include "video_core/memory_manager.h" | ||
| 10 | #include "video_core/renderer_opengl/gl_global_cache.h" | ||
| 11 | #include "video_core/renderer_opengl/gl_rasterizer.h" | ||
| 12 | #include "video_core/renderer_opengl/gl_shader_decompiler.h" | ||
| 13 | #include "video_core/renderer_opengl/utils.h" | ||
| 14 | |||
| 15 | namespace OpenGL { | ||
| 16 | |||
| 17 | CachedGlobalRegion::CachedGlobalRegion(VAddr cpu_addr, u8* host_ptr, u32 size, u32 max_size) | ||
| 18 | : RasterizerCacheObject{host_ptr}, cpu_addr{cpu_addr}, host_ptr{host_ptr}, size{size}, | ||
| 19 | max_size{max_size} { | ||
| 20 | buffer.Create(); | ||
| 21 | LabelGLObject(GL_BUFFER, buffer.handle, cpu_addr, "GlobalMemory"); | ||
| 22 | } | ||
| 23 | |||
| 24 | CachedGlobalRegion::~CachedGlobalRegion() = default; | ||
| 25 | |||
| 26 | void CachedGlobalRegion::Reload(u32 size_) { | ||
| 27 | size = size_; | ||
| 28 | if (size > max_size) { | ||
| 29 | size = max_size; | ||
| 30 | LOG_CRITICAL(HW_GPU, "Global region size {} exceeded the supported size {}!", size_, | ||
| 31 | max_size); | ||
| 32 | } | ||
| 33 | glNamedBufferData(buffer.handle, size, host_ptr, GL_STREAM_DRAW); | ||
| 34 | } | ||
| 35 | |||
| 36 | void CachedGlobalRegion::Flush() { | ||
| 37 | LOG_DEBUG(Render_OpenGL, "Flushing {} bytes to CPU memory address 0x{:16}", size, cpu_addr); | ||
| 38 | glGetNamedBufferSubData(buffer.handle, 0, static_cast<GLsizeiptr>(size), host_ptr); | ||
| 39 | } | ||
| 40 | |||
| 41 | GlobalRegion GlobalRegionCacheOpenGL::TryGetReservedGlobalRegion(CacheAddr addr, u32 size) const { | ||
| 42 | const auto search{reserve.find(addr)}; | ||
| 43 | if (search == reserve.end()) { | ||
| 44 | return {}; | ||
| 45 | } | ||
| 46 | return search->second; | ||
| 47 | } | ||
| 48 | |||
| 49 | GlobalRegion GlobalRegionCacheOpenGL::GetUncachedGlobalRegion(GPUVAddr addr, u8* host_ptr, | ||
| 50 | u32 size) { | ||
| 51 | GlobalRegion region{TryGetReservedGlobalRegion(ToCacheAddr(host_ptr), size)}; | ||
| 52 | if (!region) { | ||
| 53 | // No reserved surface available, create a new one and reserve it | ||
| 54 | auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()}; | ||
| 55 | const auto cpu_addr{memory_manager.GpuToCpuAddress(addr)}; | ||
| 56 | ASSERT(cpu_addr); | ||
| 57 | |||
| 58 | region = std::make_shared<CachedGlobalRegion>(*cpu_addr, host_ptr, size, max_ssbo_size); | ||
| 59 | ReserveGlobalRegion(region); | ||
| 60 | } | ||
| 61 | region->Reload(size); | ||
| 62 | return region; | ||
| 63 | } | ||
| 64 | |||
| 65 | void GlobalRegionCacheOpenGL::ReserveGlobalRegion(GlobalRegion region) { | ||
| 66 | reserve.insert_or_assign(region->GetCacheAddr(), std::move(region)); | ||
| 67 | } | ||
| 68 | |||
| 69 | GlobalRegionCacheOpenGL::GlobalRegionCacheOpenGL(RasterizerOpenGL& rasterizer) | ||
| 70 | : RasterizerCache{rasterizer} { | ||
| 71 | GLint max_ssbo_size_; | ||
| 72 | glGetIntegerv(GL_MAX_SHADER_STORAGE_BLOCK_SIZE, &max_ssbo_size_); | ||
| 73 | max_ssbo_size = static_cast<u32>(max_ssbo_size_); | ||
| 74 | } | ||
| 75 | |||
| 76 | GlobalRegion GlobalRegionCacheOpenGL::GetGlobalRegion( | ||
| 77 | const GLShader::GlobalMemoryEntry& global_region, | ||
| 78 | Tegra::Engines::Maxwell3D::Regs::ShaderStage stage) { | ||
| 79 | std::lock_guard lock{mutex}; | ||
| 80 | |||
| 81 | auto& gpu{Core::System::GetInstance().GPU()}; | ||
| 82 | auto& memory_manager{gpu.MemoryManager()}; | ||
| 83 | const auto cbufs{gpu.Maxwell3D().state.shader_stages[static_cast<std::size_t>(stage)]}; | ||
| 84 | const auto addr{cbufs.const_buffers[global_region.GetCbufIndex()].address + | ||
| 85 | global_region.GetCbufOffset()}; | ||
| 86 | const auto actual_addr{memory_manager.Read<u64>(addr)}; | ||
| 87 | const auto size{memory_manager.Read<u32>(addr + 8)}; | ||
| 88 | |||
| 89 | // Look up global region in the cache based on address | ||
| 90 | const auto& host_ptr{memory_manager.GetPointer(actual_addr)}; | ||
| 91 | GlobalRegion region{TryGet(host_ptr)}; | ||
| 92 | |||
| 93 | if (!region) { | ||
| 94 | // No global region found - create a new one | ||
| 95 | region = GetUncachedGlobalRegion(actual_addr, host_ptr, size); | ||
| 96 | Register(region); | ||
| 97 | } | ||
| 98 | |||
| 99 | return region; | ||
| 100 | } | ||
| 101 | |||
| 102 | } // namespace OpenGL | ||
diff --git a/src/video_core/renderer_opengl/gl_global_cache.h b/src/video_core/renderer_opengl/gl_global_cache.h deleted file mode 100644 index 2d467a240..000000000 --- a/src/video_core/renderer_opengl/gl_global_cache.h +++ /dev/null | |||
| @@ -1,82 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <memory> | ||
| 8 | #include <unordered_map> | ||
| 9 | |||
| 10 | #include <glad/glad.h> | ||
| 11 | |||
| 12 | #include "common/assert.h" | ||
| 13 | #include "common/common_types.h" | ||
| 14 | #include "video_core/engines/maxwell_3d.h" | ||
| 15 | #include "video_core/rasterizer_cache.h" | ||
| 16 | #include "video_core/renderer_opengl/gl_resource_manager.h" | ||
| 17 | |||
| 18 | namespace OpenGL { | ||
| 19 | |||
| 20 | namespace GLShader { | ||
| 21 | class GlobalMemoryEntry; | ||
| 22 | } | ||
| 23 | |||
| 24 | class RasterizerOpenGL; | ||
| 25 | class CachedGlobalRegion; | ||
| 26 | using GlobalRegion = std::shared_ptr<CachedGlobalRegion>; | ||
| 27 | |||
| 28 | class CachedGlobalRegion final : public RasterizerCacheObject { | ||
| 29 | public: | ||
| 30 | explicit CachedGlobalRegion(VAddr cpu_addr, u8* host_ptr, u32 size, u32 max_size); | ||
| 31 | ~CachedGlobalRegion(); | ||
| 32 | |||
| 33 | VAddr GetCpuAddr() const override { | ||
| 34 | return cpu_addr; | ||
| 35 | } | ||
| 36 | |||
| 37 | std::size_t GetSizeInBytes() const override { | ||
| 38 | return size; | ||
| 39 | } | ||
| 40 | |||
| 41 | /// Gets the GL program handle for the buffer | ||
| 42 | GLuint GetBufferHandle() const { | ||
| 43 | return buffer.handle; | ||
| 44 | } | ||
| 45 | |||
| 46 | /// Reloads the global region from guest memory | ||
| 47 | void Reload(u32 size_); | ||
| 48 | |||
| 49 | void Flush(); | ||
| 50 | |||
| 51 | private: | ||
| 52 | VAddr cpu_addr{}; | ||
| 53 | u8* host_ptr{}; | ||
| 54 | u32 size{}; | ||
| 55 | u32 max_size{}; | ||
| 56 | |||
| 57 | OGLBuffer buffer; | ||
| 58 | }; | ||
| 59 | |||
| 60 | class GlobalRegionCacheOpenGL final : public RasterizerCache<GlobalRegion> { | ||
| 61 | public: | ||
| 62 | explicit GlobalRegionCacheOpenGL(RasterizerOpenGL& rasterizer); | ||
| 63 | |||
| 64 | /// Gets the current specified shader stage program | ||
| 65 | GlobalRegion GetGlobalRegion(const GLShader::GlobalMemoryEntry& descriptor, | ||
| 66 | Tegra::Engines::Maxwell3D::Regs::ShaderStage stage); | ||
| 67 | |||
| 68 | protected: | ||
| 69 | void FlushObjectInner(const GlobalRegion& object) override { | ||
| 70 | object->Flush(); | ||
| 71 | } | ||
| 72 | |||
| 73 | private: | ||
| 74 | GlobalRegion TryGetReservedGlobalRegion(CacheAddr addr, u32 size) const; | ||
| 75 | GlobalRegion GetUncachedGlobalRegion(GPUVAddr addr, u8* host_ptr, u32 size); | ||
| 76 | void ReserveGlobalRegion(GlobalRegion region); | ||
| 77 | |||
| 78 | std::unordered_map<CacheAddr, GlobalRegion> reserve; | ||
| 79 | u32 max_ssbo_size{}; | ||
| 80 | }; | ||
| 81 | |||
| 82 | } // namespace OpenGL | ||
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index d77426067..4e266cdad 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp | |||
| @@ -4,6 +4,7 @@ | |||
| 4 | 4 | ||
| 5 | #include <algorithm> | 5 | #include <algorithm> |
| 6 | #include <array> | 6 | #include <array> |
| 7 | #include <bitset> | ||
| 7 | #include <memory> | 8 | #include <memory> |
| 8 | #include <string> | 9 | #include <string> |
| 9 | #include <string_view> | 10 | #include <string_view> |
| @@ -19,7 +20,9 @@ | |||
| 19 | #include "core/core.h" | 20 | #include "core/core.h" |
| 20 | #include "core/hle/kernel/process.h" | 21 | #include "core/hle/kernel/process.h" |
| 21 | #include "core/settings.h" | 22 | #include "core/settings.h" |
| 23 | #include "video_core/engines/kepler_compute.h" | ||
| 22 | #include "video_core/engines/maxwell_3d.h" | 24 | #include "video_core/engines/maxwell_3d.h" |
| 25 | #include "video_core/memory_manager.h" | ||
| 23 | #include "video_core/renderer_opengl/gl_rasterizer.h" | 26 | #include "video_core/renderer_opengl/gl_rasterizer.h" |
| 24 | #include "video_core/renderer_opengl/gl_shader_cache.h" | 27 | #include "video_core/renderer_opengl/gl_shader_cache.h" |
| 25 | #include "video_core/renderer_opengl/gl_shader_gen.h" | 28 | #include "video_core/renderer_opengl/gl_shader_gen.h" |
| @@ -29,8 +32,10 @@ | |||
| 29 | namespace OpenGL { | 32 | namespace OpenGL { |
| 30 | 33 | ||
| 31 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | 34 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; |
| 32 | using PixelFormat = VideoCore::Surface::PixelFormat; | 35 | |
| 33 | using SurfaceType = VideoCore::Surface::SurfaceType; | 36 | using VideoCore::Surface::PixelFormat; |
| 37 | using VideoCore::Surface::SurfaceTarget; | ||
| 38 | using VideoCore::Surface::SurfaceType; | ||
| 34 | 39 | ||
| 35 | MICROPROFILE_DEFINE(OpenGL_VAO, "OpenGL", "Vertex Format Setup", MP_RGB(128, 128, 192)); | 40 | MICROPROFILE_DEFINE(OpenGL_VAO, "OpenGL", "Vertex Format Setup", MP_RGB(128, 128, 192)); |
| 36 | MICROPROFILE_DEFINE(OpenGL_VB, "OpenGL", "Vertex Buffer Setup", MP_RGB(128, 128, 192)); | 41 | MICROPROFILE_DEFINE(OpenGL_VB, "OpenGL", "Vertex Buffer Setup", MP_RGB(128, 128, 192)); |
| @@ -78,36 +83,31 @@ struct DrawParameters { | |||
| 78 | } | 83 | } |
| 79 | }; | 84 | }; |
| 80 | 85 | ||
| 81 | struct FramebufferCacheKey { | 86 | static std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer, |
| 82 | bool is_single_buffer = false; | 87 | const GLShader::ConstBufferEntry& entry) { |
| 83 | bool stencil_enable = false; | 88 | if (!entry.IsIndirect()) { |
| 84 | 89 | return entry.GetSize(); | |
| 85 | std::array<GLenum, Maxwell::NumRenderTargets> color_attachments{}; | ||
| 86 | std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> colors{}; | ||
| 87 | u32 colors_count = 0; | ||
| 88 | |||
| 89 | GLuint zeta = 0; | ||
| 90 | |||
| 91 | auto Tie() const { | ||
| 92 | return std::tie(is_single_buffer, stencil_enable, color_attachments, colors, colors_count, | ||
| 93 | zeta); | ||
| 94 | } | 90 | } |
| 95 | 91 | ||
| 96 | bool operator<(const FramebufferCacheKey& rhs) const { | 92 | if (buffer.size > Maxwell::MaxConstBufferSize) { |
| 97 | return Tie() < rhs.Tie(); | 93 | LOG_WARNING(Render_OpenGL, "Indirect constbuffer size {} exceeds maximum {}", buffer.size, |
| 94 | Maxwell::MaxConstBufferSize); | ||
| 95 | return Maxwell::MaxConstBufferSize; | ||
| 98 | } | 96 | } |
| 99 | }; | 97 | |
| 98 | return buffer.size; | ||
| 99 | } | ||
| 100 | 100 | ||
| 101 | RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window, | 101 | RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window, |
| 102 | ScreenInfo& info) | 102 | ScreenInfo& info) |
| 103 | : res_cache{*this}, shader_cache{*this, system, emu_window, device}, | 103 | : texture_cache{system, *this, device}, shader_cache{*this, system, emu_window, device}, |
| 104 | global_cache{*this}, system{system}, screen_info{info}, | 104 | system{system}, screen_info{info}, buffer_cache{*this, system, STREAM_BUFFER_SIZE} { |
| 105 | buffer_cache(*this, STREAM_BUFFER_SIZE) { | ||
| 106 | OpenGLState::ApplyDefaultState(); | 105 | OpenGLState::ApplyDefaultState(); |
| 107 | 106 | ||
| 108 | shader_program_manager = std::make_unique<GLShader::ProgramManager>(); | 107 | shader_program_manager = std::make_unique<GLShader::ProgramManager>(); |
| 109 | state.draw.shader_program = 0; | 108 | state.draw.shader_program = 0; |
| 110 | state.Apply(); | 109 | state.Apply(); |
| 110 | clear_framebuffer.Create(); | ||
| 111 | 111 | ||
| 112 | LOG_DEBUG(Render_OpenGL, "Sync fixed function OpenGL state here"); | 112 | LOG_DEBUG(Render_OpenGL, "Sync fixed function OpenGL state here"); |
| 113 | CheckExtensions(); | 113 | CheckExtensions(); |
| @@ -121,21 +121,16 @@ void RasterizerOpenGL::CheckExtensions() { | |||
| 121 | Render_OpenGL, | 121 | Render_OpenGL, |
| 122 | "Anisotropic filter is not supported! This can cause graphical issues in some games."); | 122 | "Anisotropic filter is not supported! This can cause graphical issues in some games."); |
| 123 | } | 123 | } |
| 124 | if (!GLAD_GL_ARB_buffer_storage) { | ||
| 125 | LOG_WARNING( | ||
| 126 | Render_OpenGL, | ||
| 127 | "Buffer storage control is not supported! This can cause performance degradation."); | ||
| 128 | } | ||
| 129 | } | 124 | } |
| 130 | 125 | ||
| 131 | GLuint RasterizerOpenGL::SetupVertexFormat() { | 126 | GLuint RasterizerOpenGL::SetupVertexFormat() { |
| 132 | auto& gpu = system.GPU().Maxwell3D(); | 127 | auto& gpu = system.GPU().Maxwell3D(); |
| 133 | const auto& regs = gpu.regs; | 128 | const auto& regs = gpu.regs; |
| 134 | 129 | ||
| 135 | if (!gpu.dirty_flags.vertex_attrib_format) { | 130 | if (!gpu.dirty.vertex_attrib_format) { |
| 136 | return state.draw.vertex_array; | 131 | return state.draw.vertex_array; |
| 137 | } | 132 | } |
| 138 | gpu.dirty_flags.vertex_attrib_format = false; | 133 | gpu.dirty.vertex_attrib_format = false; |
| 139 | 134 | ||
| 140 | MICROPROFILE_SCOPE(OpenGL_VAO); | 135 | MICROPROFILE_SCOPE(OpenGL_VAO); |
| 141 | 136 | ||
| @@ -152,8 +147,6 @@ GLuint RasterizerOpenGL::SetupVertexFormat() { | |||
| 152 | state.draw.vertex_array = vao; | 147 | state.draw.vertex_array = vao; |
| 153 | state.ApplyVertexArrayState(); | 148 | state.ApplyVertexArrayState(); |
| 154 | 149 | ||
| 155 | glVertexArrayElementBuffer(vao, buffer_cache.GetHandle()); | ||
| 156 | |||
| 157 | // Use the vertex array as-is, assumes that the data is formatted correctly for OpenGL. | 150 | // Use the vertex array as-is, assumes that the data is formatted correctly for OpenGL. |
| 158 | // Enables the first 16 vertex attributes always, as we don't know which ones are actually | 151 | // Enables the first 16 vertex attributes always, as we don't know which ones are actually |
| 159 | // used until shader time. Note, Tegra technically supports 32, but we're capping this to 16 | 152 | // used until shader time. Note, Tegra technically supports 32, but we're capping this to 16 |
| @@ -191,7 +184,7 @@ GLuint RasterizerOpenGL::SetupVertexFormat() { | |||
| 191 | } | 184 | } |
| 192 | 185 | ||
| 193 | // Rebinding the VAO invalidates the vertex buffer bindings. | 186 | // Rebinding the VAO invalidates the vertex buffer bindings. |
| 194 | gpu.dirty_flags.vertex_array.set(); | 187 | gpu.dirty.ResetVertexArrays(); |
| 195 | 188 | ||
| 196 | state.draw.vertex_array = vao_entry.handle; | 189 | state.draw.vertex_array = vao_entry.handle; |
| 197 | return vao_entry.handle; | 190 | return vao_entry.handle; |
| @@ -199,17 +192,20 @@ GLuint RasterizerOpenGL::SetupVertexFormat() { | |||
| 199 | 192 | ||
| 200 | void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) { | 193 | void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) { |
| 201 | auto& gpu = system.GPU().Maxwell3D(); | 194 | auto& gpu = system.GPU().Maxwell3D(); |
| 202 | const auto& regs = gpu.regs; | 195 | if (!gpu.dirty.vertex_array_buffers) |
| 203 | |||
| 204 | if (gpu.dirty_flags.vertex_array.none()) | ||
| 205 | return; | 196 | return; |
| 197 | gpu.dirty.vertex_array_buffers = false; | ||
| 198 | |||
| 199 | const auto& regs = gpu.regs; | ||
| 206 | 200 | ||
| 207 | MICROPROFILE_SCOPE(OpenGL_VB); | 201 | MICROPROFILE_SCOPE(OpenGL_VB); |
| 208 | 202 | ||
| 209 | // Upload all guest vertex arrays sequentially to our buffer | 203 | // Upload all guest vertex arrays sequentially to our buffer |
| 210 | for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) { | 204 | for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) { |
| 211 | if (!gpu.dirty_flags.vertex_array[index]) | 205 | if (!gpu.dirty.vertex_array[index]) |
| 212 | continue; | 206 | continue; |
| 207 | gpu.dirty.vertex_array[index] = false; | ||
| 208 | gpu.dirty.vertex_instance[index] = false; | ||
| 213 | 209 | ||
| 214 | const auto& vertex_array = regs.vertex_array[index]; | 210 | const auto& vertex_array = regs.vertex_array[index]; |
| 215 | if (!vertex_array.IsEnabled()) | 211 | if (!vertex_array.IsEnabled()) |
| @@ -220,11 +216,11 @@ void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) { | |||
| 220 | 216 | ||
| 221 | ASSERT(end > start); | 217 | ASSERT(end > start); |
| 222 | const u64 size = end - start + 1; | 218 | const u64 size = end - start + 1; |
| 223 | const GLintptr vertex_buffer_offset = buffer_cache.UploadMemory(start, size); | 219 | const auto [vertex_buffer, vertex_buffer_offset] = buffer_cache.UploadMemory(start, size); |
| 224 | 220 | ||
| 225 | // Bind the vertex array to the buffer at the current offset. | 221 | // Bind the vertex array to the buffer at the current offset. |
| 226 | glVertexArrayVertexBuffer(vao, index, buffer_cache.GetHandle(), vertex_buffer_offset, | 222 | vertex_array_pushbuffer.SetVertexBuffer(index, vertex_buffer, vertex_buffer_offset, |
| 227 | vertex_array.stride); | 223 | vertex_array.stride); |
| 228 | 224 | ||
| 229 | if (regs.instanced_arrays.IsInstancingEnabled(index) && vertex_array.divisor != 0) { | 225 | if (regs.instanced_arrays.IsInstancingEnabled(index) && vertex_array.divisor != 0) { |
| 230 | // Enable vertex buffer instancing with the specified divisor. | 226 | // Enable vertex buffer instancing with the specified divisor. |
| @@ -234,11 +230,47 @@ void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) { | |||
| 234 | glVertexArrayBindingDivisor(vao, index, 0); | 230 | glVertexArrayBindingDivisor(vao, index, 0); |
| 235 | } | 231 | } |
| 236 | } | 232 | } |
| 233 | } | ||
| 234 | |||
| 235 | void RasterizerOpenGL::SetupVertexInstances(GLuint vao) { | ||
| 236 | auto& gpu = system.GPU().Maxwell3D(); | ||
| 237 | |||
| 238 | if (!gpu.dirty.vertex_instances) | ||
| 239 | return; | ||
| 240 | gpu.dirty.vertex_instances = false; | ||
| 241 | |||
| 242 | const auto& regs = gpu.regs; | ||
| 243 | // Upload all guest vertex arrays sequentially to our buffer | ||
| 244 | for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) { | ||
| 245 | if (!gpu.dirty.vertex_instance[index]) | ||
| 246 | continue; | ||
| 237 | 247 | ||
| 238 | gpu.dirty_flags.vertex_array.reset(); | 248 | gpu.dirty.vertex_instance[index] = false; |
| 249 | |||
| 250 | if (regs.instanced_arrays.IsInstancingEnabled(index) && | ||
| 251 | regs.vertex_array[index].divisor != 0) { | ||
| 252 | // Enable vertex buffer instancing with the specified divisor. | ||
| 253 | glVertexArrayBindingDivisor(vao, index, regs.vertex_array[index].divisor); | ||
| 254 | } else { | ||
| 255 | // Disable the vertex buffer instancing. | ||
| 256 | glVertexArrayBindingDivisor(vao, index, 0); | ||
| 257 | } | ||
| 258 | } | ||
| 259 | } | ||
| 260 | |||
| 261 | GLintptr RasterizerOpenGL::SetupIndexBuffer() { | ||
| 262 | if (accelerate_draw != AccelDraw::Indexed) { | ||
| 263 | return 0; | ||
| 264 | } | ||
| 265 | MICROPROFILE_SCOPE(OpenGL_Index); | ||
| 266 | const auto& regs = system.GPU().Maxwell3D().regs; | ||
| 267 | const std::size_t size = CalculateIndexBufferSize(); | ||
| 268 | const auto [buffer, offset] = buffer_cache.UploadMemory(regs.index_array.IndexStart(), size); | ||
| 269 | vertex_array_pushbuffer.SetIndexBuffer(buffer); | ||
| 270 | return offset; | ||
| 239 | } | 271 | } |
| 240 | 272 | ||
| 241 | DrawParameters RasterizerOpenGL::SetupDraw() { | 273 | DrawParameters RasterizerOpenGL::SetupDraw(GLintptr index_buffer_offset) { |
| 242 | const auto& gpu = system.GPU().Maxwell3D(); | 274 | const auto& gpu = system.GPU().Maxwell3D(); |
| 243 | const auto& regs = gpu.regs; | 275 | const auto& regs = gpu.regs; |
| 244 | const bool is_indexed = accelerate_draw == AccelDraw::Indexed; | 276 | const bool is_indexed = accelerate_draw == AccelDraw::Indexed; |
| @@ -250,11 +282,9 @@ DrawParameters RasterizerOpenGL::SetupDraw() { | |||
| 250 | params.primitive_mode = MaxwellToGL::PrimitiveTopology(regs.draw.topology); | 282 | params.primitive_mode = MaxwellToGL::PrimitiveTopology(regs.draw.topology); |
| 251 | 283 | ||
| 252 | if (is_indexed) { | 284 | if (is_indexed) { |
| 253 | MICROPROFILE_SCOPE(OpenGL_Index); | ||
| 254 | params.index_format = MaxwellToGL::IndexFormat(regs.index_array.format); | 285 | params.index_format = MaxwellToGL::IndexFormat(regs.index_array.format); |
| 255 | params.count = regs.index_array.count; | 286 | params.count = regs.index_array.count; |
| 256 | params.index_buffer_offset = | 287 | params.index_buffer_offset = index_buffer_offset; |
| 257 | buffer_cache.UploadMemory(regs.index_array.IndexStart(), CalculateIndexBufferSize()); | ||
| 258 | params.base_vertex = static_cast<GLint>(regs.vb_element_base); | 288 | params.base_vertex = static_cast<GLint>(regs.vb_element_base); |
| 259 | } else { | 289 | } else { |
| 260 | params.count = regs.vertex_buffer.count; | 290 | params.count = regs.vertex_buffer.count; |
| @@ -270,10 +300,6 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { | |||
| 270 | BaseBindings base_bindings; | 300 | BaseBindings base_bindings; |
| 271 | std::array<bool, Maxwell::NumClipDistances> clip_distances{}; | 301 | std::array<bool, Maxwell::NumClipDistances> clip_distances{}; |
| 272 | 302 | ||
| 273 | // Prepare packed bindings | ||
| 274 | bind_ubo_pushbuffer.Setup(base_bindings.cbuf); | ||
| 275 | bind_ssbo_pushbuffer.Setup(base_bindings.gmem); | ||
| 276 | |||
| 277 | for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { | 303 | for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { |
| 278 | const auto& shader_config = gpu.regs.shader_config[index]; | 304 | const auto& shader_config = gpu.regs.shader_config[index]; |
| 279 | const Maxwell::ShaderProgram program{static_cast<Maxwell::ShaderProgram>(index)}; | 305 | const Maxwell::ShaderProgram program{static_cast<Maxwell::ShaderProgram>(index)}; |
| @@ -294,16 +320,21 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { | |||
| 294 | 320 | ||
| 295 | GLShader::MaxwellUniformData ubo{}; | 321 | GLShader::MaxwellUniformData ubo{}; |
| 296 | ubo.SetFromRegs(gpu, stage); | 322 | ubo.SetFromRegs(gpu, stage); |
| 297 | const GLintptr offset = | 323 | const auto [buffer, offset] = |
| 298 | buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment()); | 324 | buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment()); |
| 299 | 325 | ||
| 300 | // Bind the emulation info buffer | 326 | // Bind the emulation info buffer |
| 301 | bind_ubo_pushbuffer.Push(buffer_cache.GetHandle(), offset, | 327 | bind_ubo_pushbuffer.Push(buffer, offset, static_cast<GLsizeiptr>(sizeof(ubo))); |
| 302 | static_cast<GLsizeiptr>(sizeof(ubo))); | ||
| 303 | 328 | ||
| 304 | Shader shader{shader_cache.GetStageProgram(program)}; | 329 | Shader shader{shader_cache.GetStageProgram(program)}; |
| 305 | const auto [program_handle, next_bindings] = | 330 | |
| 306 | shader->GetProgramHandle(primitive_mode, base_bindings); | 331 | const auto stage_enum = static_cast<Maxwell::ShaderStage>(stage); |
| 332 | SetupDrawConstBuffers(stage_enum, shader); | ||
| 333 | SetupDrawGlobalMemory(stage_enum, shader); | ||
| 334 | const auto texture_buffer_usage{SetupDrawTextures(stage_enum, shader, base_bindings)}; | ||
| 335 | |||
| 336 | const ProgramVariant variant{base_bindings, primitive_mode, texture_buffer_usage}; | ||
| 337 | const auto [program_handle, next_bindings] = shader->GetProgramHandle(variant); | ||
| 307 | 338 | ||
| 308 | switch (program) { | 339 | switch (program) { |
| 309 | case Maxwell::ShaderProgram::VertexA: | 340 | case Maxwell::ShaderProgram::VertexA: |
| @@ -321,11 +352,6 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { | |||
| 321 | shader_config.enable.Value(), shader_config.offset); | 352 | shader_config.enable.Value(), shader_config.offset); |
| 322 | } | 353 | } |
| 323 | 354 | ||
| 324 | const auto stage_enum = static_cast<Maxwell::ShaderStage>(stage); | ||
| 325 | SetupDrawConstBuffers(stage_enum, shader); | ||
| 326 | SetupGlobalRegions(stage_enum, shader); | ||
| 327 | SetupTextures(stage_enum, shader, base_bindings); | ||
| 328 | |||
| 329 | // Workaround for Intel drivers. | 355 | // Workaround for Intel drivers. |
| 330 | // When a clip distance is enabled but not set in the shader it crops parts of the screen | 356 | // When a clip distance is enabled but not set in the shader it crops parts of the screen |
| 331 | // (sometimes it's half the screen, sometimes three quarters). To avoid this, enable the | 357 | // (sometimes it's half the screen, sometimes three quarters). To avoid this, enable the |
| @@ -343,50 +369,9 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { | |||
| 343 | base_bindings = next_bindings; | 369 | base_bindings = next_bindings; |
| 344 | } | 370 | } |
| 345 | 371 | ||
| 346 | bind_ubo_pushbuffer.Bind(); | ||
| 347 | bind_ssbo_pushbuffer.Bind(); | ||
| 348 | |||
| 349 | SyncClipEnabled(clip_distances); | 372 | SyncClipEnabled(clip_distances); |
| 350 | 373 | ||
| 351 | gpu.dirty_flags.shaders = false; | 374 | gpu.dirty.shaders = false; |
| 352 | } | ||
| 353 | |||
| 354 | void RasterizerOpenGL::SetupCachedFramebuffer(const FramebufferCacheKey& fbkey, | ||
| 355 | OpenGLState& current_state) { | ||
| 356 | const auto [entry, is_cache_miss] = framebuffer_cache.try_emplace(fbkey); | ||
| 357 | auto& framebuffer = entry->second; | ||
| 358 | |||
| 359 | if (is_cache_miss) | ||
| 360 | framebuffer.Create(); | ||
| 361 | |||
| 362 | current_state.draw.draw_framebuffer = framebuffer.handle; | ||
| 363 | current_state.ApplyFramebufferState(); | ||
| 364 | |||
| 365 | if (!is_cache_miss) | ||
| 366 | return; | ||
| 367 | |||
| 368 | if (fbkey.is_single_buffer) { | ||
| 369 | if (fbkey.color_attachments[0] != GL_NONE) { | ||
| 370 | glFramebufferTexture(GL_DRAW_FRAMEBUFFER, fbkey.color_attachments[0], fbkey.colors[0], | ||
| 371 | 0); | ||
| 372 | } | ||
| 373 | glDrawBuffer(fbkey.color_attachments[0]); | ||
| 374 | } else { | ||
| 375 | for (std::size_t index = 0; index < Maxwell::NumRenderTargets; ++index) { | ||
| 376 | if (fbkey.colors[index]) { | ||
| 377 | glFramebufferTexture(GL_DRAW_FRAMEBUFFER, | ||
| 378 | GL_COLOR_ATTACHMENT0 + static_cast<GLenum>(index), | ||
| 379 | fbkey.colors[index], 0); | ||
| 380 | } | ||
| 381 | } | ||
| 382 | glDrawBuffers(fbkey.colors_count, fbkey.color_attachments.data()); | ||
| 383 | } | ||
| 384 | |||
| 385 | if (fbkey.zeta) { | ||
| 386 | GLenum zeta_attachment = | ||
| 387 | fbkey.stencil_enable ? GL_DEPTH_STENCIL_ATTACHMENT : GL_DEPTH_ATTACHMENT; | ||
| 388 | glFramebufferTexture(GL_DRAW_FRAMEBUFFER, zeta_attachment, fbkey.zeta, 0); | ||
| 389 | } | ||
| 390 | } | 375 | } |
| 391 | 376 | ||
| 392 | std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const { | 377 | std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const { |
| @@ -469,18 +454,22 @@ std::pair<bool, bool> RasterizerOpenGL::ConfigureFramebuffers( | |||
| 469 | 454 | ||
| 470 | const FramebufferConfigState fb_config_state{using_color_fb, using_depth_fb, preserve_contents, | 455 | const FramebufferConfigState fb_config_state{using_color_fb, using_depth_fb, preserve_contents, |
| 471 | single_color_target}; | 456 | single_color_target}; |
| 472 | if (fb_config_state == current_framebuffer_config_state && | 457 | if (fb_config_state == current_framebuffer_config_state && !gpu.dirty.render_settings) { |
| 473 | gpu.dirty_flags.color_buffer.none() && !gpu.dirty_flags.zeta_buffer) { | ||
| 474 | // Only skip if the previous ConfigureFramebuffers call was from the same kind (multiple or | 458 | // Only skip if the previous ConfigureFramebuffers call was from the same kind (multiple or |
| 475 | // single color targets). This is done because the guest registers may not change but the | 459 | // single color targets). This is done because the guest registers may not change but the |
| 476 | // host framebuffer may contain different attachments | 460 | // host framebuffer may contain different attachments |
| 477 | return current_depth_stencil_usage; | 461 | return current_depth_stencil_usage; |
| 478 | } | 462 | } |
| 463 | gpu.dirty.render_settings = false; | ||
| 479 | current_framebuffer_config_state = fb_config_state; | 464 | current_framebuffer_config_state = fb_config_state; |
| 480 | 465 | ||
| 481 | Surface depth_surface; | 466 | texture_cache.GuardRenderTargets(true); |
| 467 | |||
| 468 | View depth_surface{}; | ||
| 482 | if (using_depth_fb) { | 469 | if (using_depth_fb) { |
| 483 | depth_surface = res_cache.GetDepthBufferSurface(preserve_contents); | 470 | depth_surface = texture_cache.GetDepthBufferSurface(preserve_contents); |
| 471 | } else { | ||
| 472 | texture_cache.SetEmptyDepthBuffer(); | ||
| 484 | } | 473 | } |
| 485 | 474 | ||
| 486 | UNIMPLEMENTED_IF(regs.rt_separate_frag_data == 0); | 475 | UNIMPLEMENTED_IF(regs.rt_separate_frag_data == 0); |
| @@ -493,13 +482,13 @@ std::pair<bool, bool> RasterizerOpenGL::ConfigureFramebuffers( | |||
| 493 | if (using_color_fb) { | 482 | if (using_color_fb) { |
| 494 | if (single_color_target) { | 483 | if (single_color_target) { |
| 495 | // Used when just a single color attachment is enabled, e.g. for clearing a color buffer | 484 | // Used when just a single color attachment is enabled, e.g. for clearing a color buffer |
| 496 | Surface color_surface = | 485 | View color_surface{ |
| 497 | res_cache.GetColorBufferSurface(*single_color_target, preserve_contents); | 486 | texture_cache.GetColorBufferSurface(*single_color_target, preserve_contents)}; |
| 498 | 487 | ||
| 499 | if (color_surface) { | 488 | if (color_surface) { |
| 500 | // Assume that a surface will be written to if it is used as a framebuffer, even if | 489 | // Assume that a surface will be written to if it is used as a framebuffer, even if |
| 501 | // the shader doesn't actually write to it. | 490 | // the shader doesn't actually write to it. |
| 502 | color_surface->MarkAsModified(true, res_cache); | 491 | texture_cache.MarkColorBufferInUse(*single_color_target); |
| 503 | // Workaround for and issue in nvidia drivers | 492 | // Workaround for and issue in nvidia drivers |
| 504 | // https://devtalk.nvidia.com/default/topic/776591/opengl/gl_framebuffer_srgb-functions-incorrectly/ | 493 | // https://devtalk.nvidia.com/default/topic/776591/opengl/gl_framebuffer_srgb-functions-incorrectly/ |
| 505 | state.framebuffer_srgb.enabled |= color_surface->GetSurfaceParams().srgb_conversion; | 494 | state.framebuffer_srgb.enabled |= color_surface->GetSurfaceParams().srgb_conversion; |
| @@ -508,16 +497,21 @@ std::pair<bool, bool> RasterizerOpenGL::ConfigureFramebuffers( | |||
| 508 | fbkey.is_single_buffer = true; | 497 | fbkey.is_single_buffer = true; |
| 509 | fbkey.color_attachments[0] = | 498 | fbkey.color_attachments[0] = |
| 510 | GL_COLOR_ATTACHMENT0 + static_cast<GLenum>(*single_color_target); | 499 | GL_COLOR_ATTACHMENT0 + static_cast<GLenum>(*single_color_target); |
| 511 | fbkey.colors[0] = color_surface != nullptr ? color_surface->Texture().handle : 0; | 500 | fbkey.colors[0] = color_surface; |
| 501 | for (std::size_t index = 0; index < Maxwell::NumRenderTargets; ++index) { | ||
| 502 | if (index != *single_color_target) { | ||
| 503 | texture_cache.SetEmptyColorBuffer(index); | ||
| 504 | } | ||
| 505 | } | ||
| 512 | } else { | 506 | } else { |
| 513 | // Multiple color attachments are enabled | 507 | // Multiple color attachments are enabled |
| 514 | for (std::size_t index = 0; index < Maxwell::NumRenderTargets; ++index) { | 508 | for (std::size_t index = 0; index < Maxwell::NumRenderTargets; ++index) { |
| 515 | Surface color_surface = res_cache.GetColorBufferSurface(index, preserve_contents); | 509 | View color_surface{texture_cache.GetColorBufferSurface(index, preserve_contents)}; |
| 516 | 510 | ||
| 517 | if (color_surface) { | 511 | if (color_surface) { |
| 518 | // Assume that a surface will be written to if it is used as a framebuffer, even | 512 | // Assume that a surface will be written to if it is used as a framebuffer, even |
| 519 | // if the shader doesn't actually write to it. | 513 | // if the shader doesn't actually write to it. |
| 520 | color_surface->MarkAsModified(true, res_cache); | 514 | texture_cache.MarkColorBufferInUse(index); |
| 521 | // Enable sRGB only for supported formats | 515 | // Enable sRGB only for supported formats |
| 522 | // Workaround for and issue in nvidia drivers | 516 | // Workaround for and issue in nvidia drivers |
| 523 | // https://devtalk.nvidia.com/default/topic/776591/opengl/gl_framebuffer_srgb-functions-incorrectly/ | 517 | // https://devtalk.nvidia.com/default/topic/776591/opengl/gl_framebuffer_srgb-functions-incorrectly/ |
| @@ -527,8 +521,7 @@ std::pair<bool, bool> RasterizerOpenGL::ConfigureFramebuffers( | |||
| 527 | 521 | ||
| 528 | fbkey.color_attachments[index] = | 522 | fbkey.color_attachments[index] = |
| 529 | GL_COLOR_ATTACHMENT0 + regs.rt_control.GetMap(index); | 523 | GL_COLOR_ATTACHMENT0 + regs.rt_control.GetMap(index); |
| 530 | fbkey.colors[index] = | 524 | fbkey.colors[index] = color_surface; |
| 531 | color_surface != nullptr ? color_surface->Texture().handle : 0; | ||
| 532 | } | 525 | } |
| 533 | fbkey.is_single_buffer = false; | 526 | fbkey.is_single_buffer = false; |
| 534 | fbkey.colors_count = regs.rt_control.count; | 527 | fbkey.colors_count = regs.rt_control.count; |
| @@ -541,26 +534,84 @@ std::pair<bool, bool> RasterizerOpenGL::ConfigureFramebuffers( | |||
| 541 | if (depth_surface) { | 534 | if (depth_surface) { |
| 542 | // Assume that a surface will be written to if it is used as a framebuffer, even if | 535 | // Assume that a surface will be written to if it is used as a framebuffer, even if |
| 543 | // the shader doesn't actually write to it. | 536 | // the shader doesn't actually write to it. |
| 544 | depth_surface->MarkAsModified(true, res_cache); | 537 | texture_cache.MarkDepthBufferInUse(); |
| 545 | 538 | ||
| 546 | fbkey.zeta = depth_surface->Texture().handle; | 539 | fbkey.zeta = depth_surface; |
| 547 | fbkey.stencil_enable = regs.stencil_enable && | 540 | fbkey.stencil_enable = depth_surface->GetSurfaceParams().type == SurfaceType::DepthStencil; |
| 548 | depth_surface->GetSurfaceParams().type == SurfaceType::DepthStencil; | ||
| 549 | } | 541 | } |
| 550 | 542 | ||
| 551 | SetupCachedFramebuffer(fbkey, current_state); | 543 | texture_cache.GuardRenderTargets(false); |
| 544 | |||
| 545 | current_state.draw.draw_framebuffer = framebuffer_cache.GetFramebuffer(fbkey); | ||
| 552 | SyncViewport(current_state); | 546 | SyncViewport(current_state); |
| 553 | 547 | ||
| 554 | return current_depth_stencil_usage = {static_cast<bool>(depth_surface), fbkey.stencil_enable}; | 548 | return current_depth_stencil_usage = {static_cast<bool>(depth_surface), fbkey.stencil_enable}; |
| 555 | } | 549 | } |
| 556 | 550 | ||
| 551 | void RasterizerOpenGL::ConfigureClearFramebuffer(OpenGLState& current_state, bool using_color_fb, | ||
| 552 | bool using_depth_fb, bool using_stencil_fb) { | ||
| 553 | auto& gpu = system.GPU().Maxwell3D(); | ||
| 554 | const auto& regs = gpu.regs; | ||
| 555 | |||
| 556 | texture_cache.GuardRenderTargets(true); | ||
| 557 | View color_surface{}; | ||
| 558 | if (using_color_fb) { | ||
| 559 | color_surface = texture_cache.GetColorBufferSurface(regs.clear_buffers.RT, false); | ||
| 560 | } | ||
| 561 | View depth_surface{}; | ||
| 562 | if (using_depth_fb || using_stencil_fb) { | ||
| 563 | depth_surface = texture_cache.GetDepthBufferSurface(false); | ||
| 564 | } | ||
| 565 | texture_cache.GuardRenderTargets(false); | ||
| 566 | |||
| 567 | current_state.draw.draw_framebuffer = clear_framebuffer.handle; | ||
| 568 | current_state.ApplyFramebufferState(); | ||
| 569 | |||
| 570 | if (color_surface) { | ||
| 571 | color_surface->Attach(GL_COLOR_ATTACHMENT0, GL_DRAW_FRAMEBUFFER); | ||
| 572 | } else { | ||
| 573 | glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); | ||
| 574 | } | ||
| 575 | |||
| 576 | if (depth_surface) { | ||
| 577 | const auto& params = depth_surface->GetSurfaceParams(); | ||
| 578 | switch (params.type) { | ||
| 579 | case VideoCore::Surface::SurfaceType::Depth: | ||
| 580 | depth_surface->Attach(GL_DEPTH_ATTACHMENT, GL_DRAW_FRAMEBUFFER); | ||
| 581 | glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); | ||
| 582 | break; | ||
| 583 | case VideoCore::Surface::SurfaceType::DepthStencil: | ||
| 584 | depth_surface->Attach(GL_DEPTH_STENCIL_ATTACHMENT, GL_DRAW_FRAMEBUFFER); | ||
| 585 | break; | ||
| 586 | default: | ||
| 587 | UNIMPLEMENTED(); | ||
| 588 | } | ||
| 589 | } else { | ||
| 590 | glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, | ||
| 591 | 0); | ||
| 592 | } | ||
| 593 | } | ||
| 594 | |||
| 557 | void RasterizerOpenGL::Clear() { | 595 | void RasterizerOpenGL::Clear() { |
| 558 | const auto& regs = system.GPU().Maxwell3D().regs; | 596 | const auto& maxwell3d = system.GPU().Maxwell3D(); |
| 597 | |||
| 598 | if (!maxwell3d.ShouldExecute()) { | ||
| 599 | return; | ||
| 600 | } | ||
| 601 | |||
| 602 | const auto& regs = maxwell3d.regs; | ||
| 559 | bool use_color{}; | 603 | bool use_color{}; |
| 560 | bool use_depth{}; | 604 | bool use_depth{}; |
| 561 | bool use_stencil{}; | 605 | bool use_stencil{}; |
| 562 | 606 | ||
| 563 | OpenGLState clear_state; | 607 | OpenGLState prev_state{OpenGLState::GetCurState()}; |
| 608 | SCOPE_EXIT({ | ||
| 609 | prev_state.AllDirty(); | ||
| 610 | prev_state.Apply(); | ||
| 611 | }); | ||
| 612 | |||
| 613 | OpenGLState clear_state{OpenGLState::GetCurState()}; | ||
| 614 | clear_state.SetDefaultViewports(); | ||
| 564 | if (regs.clear_buffers.R || regs.clear_buffers.G || regs.clear_buffers.B || | 615 | if (regs.clear_buffers.R || regs.clear_buffers.G || regs.clear_buffers.B || |
| 565 | regs.clear_buffers.A) { | 616 | regs.clear_buffers.A) { |
| 566 | use_color = true; | 617 | use_color = true; |
| @@ -580,11 +631,13 @@ void RasterizerOpenGL::Clear() { | |||
| 580 | // true. | 631 | // true. |
| 581 | clear_state.depth.test_enabled = true; | 632 | clear_state.depth.test_enabled = true; |
| 582 | clear_state.depth.test_func = GL_ALWAYS; | 633 | clear_state.depth.test_func = GL_ALWAYS; |
| 634 | clear_state.depth.write_mask = GL_TRUE; | ||
| 583 | } | 635 | } |
| 584 | if (regs.clear_buffers.S) { | 636 | if (regs.clear_buffers.S) { |
| 585 | ASSERT_MSG(regs.zeta_enable != 0, "Tried to clear stencil but buffer is not enabled!"); | 637 | ASSERT_MSG(regs.zeta_enable != 0, "Tried to clear stencil but buffer is not enabled!"); |
| 586 | use_stencil = true; | 638 | use_stencil = true; |
| 587 | clear_state.stencil.test_enabled = true; | 639 | clear_state.stencil.test_enabled = true; |
| 640 | |||
| 588 | if (regs.clear_flags.stencil) { | 641 | if (regs.clear_flags.stencil) { |
| 589 | // Stencil affects the clear so fill it with the used masks | 642 | // Stencil affects the clear so fill it with the used masks |
| 590 | clear_state.stencil.front.test_func = GL_ALWAYS; | 643 | clear_state.stencil.front.test_func = GL_ALWAYS; |
| @@ -616,8 +669,9 @@ void RasterizerOpenGL::Clear() { | |||
| 616 | return; | 669 | return; |
| 617 | } | 670 | } |
| 618 | 671 | ||
| 619 | const auto [clear_depth, clear_stencil] = ConfigureFramebuffers( | 672 | ConfigureClearFramebuffer(clear_state, use_color, use_depth, use_stencil); |
| 620 | clear_state, use_color, use_depth || use_stencil, false, regs.clear_buffers.RT.Value()); | 673 | |
| 674 | SyncViewport(clear_state); | ||
| 621 | if (regs.clear_flags.scissor) { | 675 | if (regs.clear_flags.scissor) { |
| 622 | SyncScissorTest(clear_state); | 676 | SyncScissorTest(clear_state); |
| 623 | } | 677 | } |
| @@ -626,20 +680,18 @@ void RasterizerOpenGL::Clear() { | |||
| 626 | clear_state.EmulateViewportWithScissor(); | 680 | clear_state.EmulateViewportWithScissor(); |
| 627 | } | 681 | } |
| 628 | 682 | ||
| 629 | clear_state.ApplyColorMask(); | 683 | clear_state.AllDirty(); |
| 630 | clear_state.ApplyDepth(); | 684 | clear_state.Apply(); |
| 631 | clear_state.ApplyStencilTest(); | ||
| 632 | clear_state.ApplyViewport(); | ||
| 633 | 685 | ||
| 634 | if (use_color) { | 686 | if (use_color) { |
| 635 | glClearBufferfv(GL_COLOR, regs.clear_buffers.RT, regs.clear_color); | 687 | glClearBufferfv(GL_COLOR, 0, regs.clear_color); |
| 636 | } | 688 | } |
| 637 | 689 | ||
| 638 | if (clear_depth && clear_stencil) { | 690 | if (use_depth && use_stencil) { |
| 639 | glClearBufferfi(GL_DEPTH_STENCIL, 0, regs.clear_depth, regs.clear_stencil); | 691 | glClearBufferfi(GL_DEPTH_STENCIL, 0, regs.clear_depth, regs.clear_stencil); |
| 640 | } else if (clear_depth) { | 692 | } else if (use_depth) { |
| 641 | glClearBufferfv(GL_DEPTH, 0, ®s.clear_depth); | 693 | glClearBufferfv(GL_DEPTH, 0, ®s.clear_depth); |
| 642 | } else if (clear_stencil) { | 694 | } else if (use_stencil) { |
| 643 | glClearBufferiv(GL_STENCIL, 0, ®s.clear_stencil); | 695 | glClearBufferiv(GL_STENCIL, 0, ®s.clear_stencil); |
| 644 | } | 696 | } |
| 645 | } | 697 | } |
| @@ -650,9 +702,11 @@ void RasterizerOpenGL::DrawArrays() { | |||
| 650 | 702 | ||
| 651 | MICROPROFILE_SCOPE(OpenGL_Drawing); | 703 | MICROPROFILE_SCOPE(OpenGL_Drawing); |
| 652 | auto& gpu = system.GPU().Maxwell3D(); | 704 | auto& gpu = system.GPU().Maxwell3D(); |
| 653 | const auto& regs = gpu.regs; | ||
| 654 | 705 | ||
| 655 | ConfigureFramebuffers(state); | 706 | if (!gpu.ShouldExecute()) { |
| 707 | return; | ||
| 708 | } | ||
| 709 | |||
| 656 | SyncColorMask(); | 710 | SyncColorMask(); |
| 657 | SyncFragmentColorClampState(); | 711 | SyncFragmentColorClampState(); |
| 658 | SyncMultiSampleState(); | 712 | SyncMultiSampleState(); |
| @@ -684,31 +738,102 @@ void RasterizerOpenGL::DrawArrays() { | |||
| 684 | Maxwell::MaxShaderStage; | 738 | Maxwell::MaxShaderStage; |
| 685 | 739 | ||
| 686 | // Add space for at least 18 constant buffers | 740 | // Add space for at least 18 constant buffers |
| 687 | buffer_size += | 741 | buffer_size += Maxwell::MaxConstBuffers * |
| 688 | Maxwell::MaxConstBuffers * (MaxConstbufferSize + device.GetUniformBufferAlignment()); | 742 | (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment()); |
| 689 | 743 | ||
| 690 | const bool invalidate = buffer_cache.Map(buffer_size); | 744 | // Prepare the vertex array. |
| 691 | if (invalidate) { | 745 | buffer_cache.Map(buffer_size); |
| 692 | // As all cached buffers are invalidated, we need to recheck their state. | ||
| 693 | gpu.dirty_flags.vertex_array.set(); | ||
| 694 | } | ||
| 695 | 746 | ||
| 747 | // Prepare vertex array format. | ||
| 696 | const GLuint vao = SetupVertexFormat(); | 748 | const GLuint vao = SetupVertexFormat(); |
| 749 | vertex_array_pushbuffer.Setup(vao); | ||
| 750 | |||
| 751 | // Upload vertex and index data. | ||
| 697 | SetupVertexBuffer(vao); | 752 | SetupVertexBuffer(vao); |
| 753 | SetupVertexInstances(vao); | ||
| 754 | const GLintptr index_buffer_offset = SetupIndexBuffer(); | ||
| 755 | |||
| 756 | // Setup draw parameters. It will automatically choose what glDraw* method to use. | ||
| 757 | const DrawParameters params = SetupDraw(index_buffer_offset); | ||
| 698 | 758 | ||
| 699 | DrawParameters params = SetupDraw(); | 759 | // Prepare packed bindings. |
| 760 | bind_ubo_pushbuffer.Setup(0); | ||
| 761 | bind_ssbo_pushbuffer.Setup(0); | ||
| 762 | |||
| 763 | // Setup shaders and their used resources. | ||
| 764 | texture_cache.GuardSamplers(true); | ||
| 700 | SetupShaders(params.primitive_mode); | 765 | SetupShaders(params.primitive_mode); |
| 766 | texture_cache.GuardSamplers(false); | ||
| 701 | 767 | ||
| 702 | buffer_cache.Unmap(); | 768 | ConfigureFramebuffers(state); |
| 769 | |||
| 770 | // Signal the buffer cache that we are not going to upload more things. | ||
| 771 | const bool invalidate = buffer_cache.Unmap(); | ||
| 772 | |||
| 773 | // Now that we are no longer uploading data, we can safely bind the buffers to OpenGL. | ||
| 774 | vertex_array_pushbuffer.Bind(); | ||
| 775 | bind_ubo_pushbuffer.Bind(); | ||
| 776 | bind_ssbo_pushbuffer.Bind(); | ||
| 777 | |||
| 778 | if (invalidate) { | ||
| 779 | // As all cached buffers are invalidated, we need to recheck their state. | ||
| 780 | gpu.dirty.ResetVertexArrays(); | ||
| 781 | } | ||
| 703 | 782 | ||
| 704 | shader_program_manager->ApplyTo(state); | 783 | shader_program_manager->ApplyTo(state); |
| 705 | state.Apply(); | 784 | state.Apply(); |
| 706 | 785 | ||
| 707 | res_cache.SignalPreDrawCall(); | 786 | if (texture_cache.TextureBarrier()) { |
| 787 | glTextureBarrier(); | ||
| 788 | } | ||
| 789 | |||
| 708 | params.DispatchDraw(); | 790 | params.DispatchDraw(); |
| 709 | res_cache.SignalPostDrawCall(); | ||
| 710 | 791 | ||
| 711 | accelerate_draw = AccelDraw::Disabled; | 792 | accelerate_draw = AccelDraw::Disabled; |
| 793 | gpu.dirty.memory_general = false; | ||
| 794 | } | ||
| 795 | |||
| 796 | void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { | ||
| 797 | if (!GLAD_GL_ARB_compute_variable_group_size) { | ||
| 798 | LOG_ERROR(Render_OpenGL, "Compute is currently not supported on this device due to the " | ||
| 799 | "lack of GL_ARB_compute_variable_group_size"); | ||
| 800 | return; | ||
| 801 | } | ||
| 802 | |||
| 803 | auto kernel = shader_cache.GetComputeKernel(code_addr); | ||
| 804 | ProgramVariant variant; | ||
| 805 | variant.texture_buffer_usage = SetupComputeTextures(kernel); | ||
| 806 | SetupComputeImages(kernel); | ||
| 807 | |||
| 808 | const auto [program, next_bindings] = kernel->GetProgramHandle(variant); | ||
| 809 | state.draw.shader_program = program; | ||
| 810 | state.draw.program_pipeline = 0; | ||
| 811 | |||
| 812 | const std::size_t buffer_size = | ||
| 813 | Tegra::Engines::KeplerCompute::NumConstBuffers * | ||
| 814 | (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment()); | ||
| 815 | buffer_cache.Map(buffer_size); | ||
| 816 | |||
| 817 | bind_ubo_pushbuffer.Setup(0); | ||
| 818 | bind_ssbo_pushbuffer.Setup(0); | ||
| 819 | |||
| 820 | SetupComputeConstBuffers(kernel); | ||
| 821 | SetupComputeGlobalMemory(kernel); | ||
| 822 | |||
| 823 | buffer_cache.Unmap(); | ||
| 824 | |||
| 825 | bind_ubo_pushbuffer.Bind(); | ||
| 826 | bind_ssbo_pushbuffer.Bind(); | ||
| 827 | |||
| 828 | state.ApplyTextures(); | ||
| 829 | state.ApplyImages(); | ||
| 830 | state.ApplyShaderProgram(); | ||
| 831 | state.ApplyProgramPipeline(); | ||
| 832 | |||
| 833 | const auto& launch_desc = system.GPU().KeplerCompute().launch_description; | ||
| 834 | glDispatchComputeGroupSizeARB(launch_desc.grid_dim_x, launch_desc.grid_dim_y, | ||
| 835 | launch_desc.grid_dim_z, launch_desc.block_dim_x, | ||
| 836 | launch_desc.block_dim_y, launch_desc.block_dim_z); | ||
| 712 | } | 837 | } |
| 713 | 838 | ||
| 714 | void RasterizerOpenGL::FlushAll() {} | 839 | void RasterizerOpenGL::FlushAll() {} |
| @@ -718,8 +843,8 @@ void RasterizerOpenGL::FlushRegion(CacheAddr addr, u64 size) { | |||
| 718 | if (!addr || !size) { | 843 | if (!addr || !size) { |
| 719 | return; | 844 | return; |
| 720 | } | 845 | } |
| 721 | res_cache.FlushRegion(addr, size); | 846 | texture_cache.FlushRegion(addr, size); |
| 722 | global_cache.FlushRegion(addr, size); | 847 | buffer_cache.FlushRegion(addr, size); |
| 723 | } | 848 | } |
| 724 | 849 | ||
| 725 | void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) { | 850 | void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) { |
| @@ -727,23 +852,31 @@ void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) { | |||
| 727 | if (!addr || !size) { | 852 | if (!addr || !size) { |
| 728 | return; | 853 | return; |
| 729 | } | 854 | } |
| 730 | res_cache.InvalidateRegion(addr, size); | 855 | texture_cache.InvalidateRegion(addr, size); |
| 731 | shader_cache.InvalidateRegion(addr, size); | 856 | shader_cache.InvalidateRegion(addr, size); |
| 732 | global_cache.InvalidateRegion(addr, size); | ||
| 733 | buffer_cache.InvalidateRegion(addr, size); | 857 | buffer_cache.InvalidateRegion(addr, size); |
| 734 | } | 858 | } |
| 735 | 859 | ||
| 736 | void RasterizerOpenGL::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { | 860 | void RasterizerOpenGL::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { |
| 737 | FlushRegion(addr, size); | 861 | if (Settings::values.use_accurate_gpu_emulation) { |
| 862 | FlushRegion(addr, size); | ||
| 863 | } | ||
| 738 | InvalidateRegion(addr, size); | 864 | InvalidateRegion(addr, size); |
| 739 | } | 865 | } |
| 740 | 866 | ||
| 867 | void RasterizerOpenGL::FlushCommands() { | ||
| 868 | glFlush(); | ||
| 869 | } | ||
| 870 | |||
| 871 | void RasterizerOpenGL::TickFrame() { | ||
| 872 | buffer_cache.TickFrame(); | ||
| 873 | } | ||
| 874 | |||
| 741 | bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, | 875 | bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, |
| 742 | const Tegra::Engines::Fermi2D::Regs::Surface& dst, | 876 | const Tegra::Engines::Fermi2D::Regs::Surface& dst, |
| 743 | const Common::Rectangle<u32>& src_rect, | 877 | const Tegra::Engines::Fermi2D::Config& copy_config) { |
| 744 | const Common::Rectangle<u32>& dst_rect) { | ||
| 745 | MICROPROFILE_SCOPE(OpenGL_Blits); | 878 | MICROPROFILE_SCOPE(OpenGL_Blits); |
| 746 | res_cache.FermiCopySurface(src, dst, src_rect, dst_rect); | 879 | texture_cache.DoFermiCopy(src, dst, copy_config); |
| 747 | return true; | 880 | return true; |
| 748 | } | 881 | } |
| 749 | 882 | ||
| @@ -755,7 +888,8 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config, | |||
| 755 | 888 | ||
| 756 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); | 889 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); |
| 757 | 890 | ||
| 758 | const auto& surface{res_cache.TryFindFramebufferSurface(Memory::GetPointer(framebuffer_addr))}; | 891 | const auto surface{ |
| 892 | texture_cache.TryFindFramebufferSurface(Memory::GetPointer(framebuffer_addr))}; | ||
| 759 | if (!surface) { | 893 | if (!surface) { |
| 760 | return {}; | 894 | return {}; |
| 761 | } | 895 | } |
| @@ -771,7 +905,7 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config, | |||
| 771 | LOG_WARNING(Render_OpenGL, "Framebuffer pixel_format is different"); | 905 | LOG_WARNING(Render_OpenGL, "Framebuffer pixel_format is different"); |
| 772 | } | 906 | } |
| 773 | 907 | ||
| 774 | screen_info.display_texture = surface->Texture().handle; | 908 | screen_info.display_texture = surface->GetTexture(); |
| 775 | 909 | ||
| 776 | return true; | 910 | return true; |
| 777 | } | 911 | } |
| @@ -779,14 +913,25 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config, | |||
| 779 | void RasterizerOpenGL::SetupDrawConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, | 913 | void RasterizerOpenGL::SetupDrawConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, |
| 780 | const Shader& shader) { | 914 | const Shader& shader) { |
| 781 | MICROPROFILE_SCOPE(OpenGL_UBO); | 915 | MICROPROFILE_SCOPE(OpenGL_UBO); |
| 782 | const auto stage_index = static_cast<std::size_t>(stage); | 916 | const auto& stages = system.GPU().Maxwell3D().state.shader_stages; |
| 783 | const auto& shader_stage = system.GPU().Maxwell3D().state.shader_stages[stage_index]; | 917 | const auto& shader_stage = stages[static_cast<std::size_t>(stage)]; |
| 784 | const auto& entries = shader->GetShaderEntries().const_buffers; | 918 | for (const auto& entry : shader->GetShaderEntries().const_buffers) { |
| 919 | const auto& buffer = shader_stage.const_buffers[entry.GetIndex()]; | ||
| 920 | SetupConstBuffer(buffer, entry); | ||
| 921 | } | ||
| 922 | } | ||
| 785 | 923 | ||
| 786 | // Upload only the enabled buffers from the 16 constbuffers of each shader stage | 924 | void RasterizerOpenGL::SetupComputeConstBuffers(const Shader& kernel) { |
| 787 | for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) { | 925 | MICROPROFILE_SCOPE(OpenGL_UBO); |
| 788 | const auto& entry = entries[bindpoint]; | 926 | const auto& launch_desc = system.GPU().KeplerCompute().launch_description; |
| 789 | SetupConstBuffer(shader_stage.const_buffers[entry.GetIndex()], entry); | 927 | for (const auto& entry : kernel->GetShaderEntries().const_buffers) { |
| 928 | const auto& config = launch_desc.const_buffer_config[entry.GetIndex()]; | ||
| 929 | const std::bitset<8> mask = launch_desc.const_buffer_enable_mask.Value(); | ||
| 930 | Tegra::Engines::ConstBufferInfo buffer; | ||
| 931 | buffer.address = config.Address(); | ||
| 932 | buffer.size = config.size; | ||
| 933 | buffer.enabled = mask[entry.GetIndex()]; | ||
| 934 | SetupConstBuffer(buffer, entry); | ||
| 790 | } | 935 | } |
| 791 | } | 936 | } |
| 792 | 937 | ||
| @@ -794,84 +939,169 @@ void RasterizerOpenGL::SetupConstBuffer(const Tegra::Engines::ConstBufferInfo& b | |||
| 794 | const GLShader::ConstBufferEntry& entry) { | 939 | const GLShader::ConstBufferEntry& entry) { |
| 795 | if (!buffer.enabled) { | 940 | if (!buffer.enabled) { |
| 796 | // Set values to zero to unbind buffers | 941 | // Set values to zero to unbind buffers |
| 797 | bind_ubo_pushbuffer.Push(0, 0, 0); | 942 | bind_ubo_pushbuffer.Push(buffer_cache.GetEmptyBuffer(sizeof(float)), 0, sizeof(float)); |
| 798 | return; | 943 | return; |
| 799 | } | 944 | } |
| 800 | 945 | ||
| 801 | std::size_t size; | ||
| 802 | if (entry.IsIndirect()) { | ||
| 803 | // Buffer is accessed indirectly, so upload the entire thing | ||
| 804 | size = buffer.size; | ||
| 805 | |||
| 806 | if (size > MaxConstbufferSize) { | ||
| 807 | LOG_WARNING(Render_OpenGL, "Indirect constbuffer size {} exceeds maximum {}", size, | ||
| 808 | MaxConstbufferSize); | ||
| 809 | size = MaxConstbufferSize; | ||
| 810 | } | ||
| 811 | } else { | ||
| 812 | // Buffer is accessed directly, upload just what we use | ||
| 813 | size = entry.GetSize(); | ||
| 814 | } | ||
| 815 | |||
| 816 | // Align the actual size so it ends up being a multiple of vec4 to meet the OpenGL std140 | 946 | // Align the actual size so it ends up being a multiple of vec4 to meet the OpenGL std140 |
| 817 | // UBO alignment requirements. | 947 | // UBO alignment requirements. |
| 818 | size = Common::AlignUp(size, sizeof(GLvec4)); | 948 | const std::size_t size = Common::AlignUp(GetConstBufferSize(buffer, entry), sizeof(GLvec4)); |
| 819 | ASSERT_MSG(size <= MaxConstbufferSize, "Constant buffer is too big"); | ||
| 820 | 949 | ||
| 821 | const std::size_t alignment = device.GetUniformBufferAlignment(); | 950 | const auto alignment = device.GetUniformBufferAlignment(); |
| 822 | const GLintptr offset = buffer_cache.UploadMemory(buffer.address, size, alignment); | 951 | const auto [cbuf, offset] = buffer_cache.UploadMemory(buffer.address, size, alignment); |
| 823 | bind_ubo_pushbuffer.Push(buffer_cache.GetHandle(), offset, size); | 952 | bind_ubo_pushbuffer.Push(cbuf, offset, size); |
| 824 | } | 953 | } |
| 825 | 954 | ||
| 826 | void RasterizerOpenGL::SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, | 955 | void RasterizerOpenGL::SetupDrawGlobalMemory(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, |
| 827 | const Shader& shader) { | 956 | const Shader& shader) { |
| 828 | const auto& entries = shader->GetShaderEntries().global_memory_entries; | 957 | auto& gpu{system.GPU()}; |
| 829 | for (std::size_t bindpoint = 0; bindpoint < entries.size(); ++bindpoint) { | 958 | auto& memory_manager{gpu.MemoryManager()}; |
| 830 | const auto& entry{entries[bindpoint]}; | 959 | const auto cbufs{gpu.Maxwell3D().state.shader_stages[static_cast<std::size_t>(stage)]}; |
| 831 | const auto& region{global_cache.GetGlobalRegion(entry, stage)}; | 960 | for (const auto& entry : shader->GetShaderEntries().global_memory_entries) { |
| 832 | if (entry.IsWritten()) { | 961 | const auto addr{cbufs.const_buffers[entry.GetCbufIndex()].address + entry.GetCbufOffset()}; |
| 833 | region->MarkAsModified(true, global_cache); | 962 | const auto gpu_addr{memory_manager.Read<u64>(addr)}; |
| 834 | } | 963 | const auto size{memory_manager.Read<u32>(addr + 8)}; |
| 835 | bind_ssbo_pushbuffer.Push(region->GetBufferHandle(), 0, | 964 | SetupGlobalMemory(entry, gpu_addr, size); |
| 836 | static_cast<GLsizeiptr>(region->GetSizeInBytes())); | 965 | } |
| 966 | } | ||
| 967 | |||
| 968 | void RasterizerOpenGL::SetupComputeGlobalMemory(const Shader& kernel) { | ||
| 969 | auto& gpu{system.GPU()}; | ||
| 970 | auto& memory_manager{gpu.MemoryManager()}; | ||
| 971 | const auto cbufs{gpu.KeplerCompute().launch_description.const_buffer_config}; | ||
| 972 | for (const auto& entry : kernel->GetShaderEntries().global_memory_entries) { | ||
| 973 | const auto addr{cbufs[entry.GetCbufIndex()].Address() + entry.GetCbufOffset()}; | ||
| 974 | const auto gpu_addr{memory_manager.Read<u64>(addr)}; | ||
| 975 | const auto size{memory_manager.Read<u32>(addr + 8)}; | ||
| 976 | SetupGlobalMemory(entry, gpu_addr, size); | ||
| 837 | } | 977 | } |
| 838 | } | 978 | } |
| 839 | 979 | ||
| 840 | void RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, const Shader& shader, | 980 | void RasterizerOpenGL::SetupGlobalMemory(const GLShader::GlobalMemoryEntry& entry, |
| 841 | BaseBindings base_bindings) { | 981 | GPUVAddr gpu_addr, std::size_t size) { |
| 982 | const auto alignment{device.GetShaderStorageBufferAlignment()}; | ||
| 983 | const auto [ssbo, buffer_offset] = | ||
| 984 | buffer_cache.UploadMemory(gpu_addr, size, alignment, entry.IsWritten()); | ||
| 985 | bind_ssbo_pushbuffer.Push(ssbo, buffer_offset, static_cast<GLsizeiptr>(size)); | ||
| 986 | } | ||
| 987 | |||
| 988 | TextureBufferUsage RasterizerOpenGL::SetupDrawTextures(Maxwell::ShaderStage stage, | ||
| 989 | const Shader& shader, | ||
| 990 | BaseBindings base_bindings) { | ||
| 842 | MICROPROFILE_SCOPE(OpenGL_Texture); | 991 | MICROPROFILE_SCOPE(OpenGL_Texture); |
| 843 | const auto& gpu = system.GPU(); | 992 | const auto& gpu = system.GPU(); |
| 844 | const auto& maxwell3d = gpu.Maxwell3D(); | 993 | const auto& maxwell3d = gpu.Maxwell3D(); |
| 845 | const auto& entries = shader->GetShaderEntries().samplers; | 994 | const auto& entries = shader->GetShaderEntries().samplers; |
| 846 | 995 | ||
| 847 | ASSERT_MSG(base_bindings.sampler + entries.size() <= std::size(state.texture_units), | 996 | ASSERT_MSG(base_bindings.sampler + entries.size() <= std::size(state.textures), |
| 848 | "Exceeded the number of active textures."); | 997 | "Exceeded the number of active textures."); |
| 849 | 998 | ||
| 999 | TextureBufferUsage texture_buffer_usage{0}; | ||
| 1000 | |||
| 850 | for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) { | 1001 | for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) { |
| 851 | const auto& entry = entries[bindpoint]; | 1002 | const auto& entry = entries[bindpoint]; |
| 852 | Tegra::Texture::FullTextureInfo texture; | 1003 | const auto texture = [&]() { |
| 853 | if (entry.IsBindless()) { | 1004 | if (!entry.IsBindless()) { |
| 1005 | return maxwell3d.GetStageTexture(stage, entry.GetOffset()); | ||
| 1006 | } | ||
| 854 | const auto cbuf = entry.GetBindlessCBuf(); | 1007 | const auto cbuf = entry.GetBindlessCBuf(); |
| 855 | Tegra::Texture::TextureHandle tex_handle; | 1008 | Tegra::Texture::TextureHandle tex_handle; |
| 856 | tex_handle.raw = maxwell3d.AccessConstBuffer32(stage, cbuf.first, cbuf.second); | 1009 | tex_handle.raw = maxwell3d.AccessConstBuffer32(stage, cbuf.first, cbuf.second); |
| 857 | texture = maxwell3d.GetTextureInfo(tex_handle, entry.GetOffset()); | 1010 | return maxwell3d.GetTextureInfo(tex_handle, entry.GetOffset()); |
| 858 | } else { | 1011 | }(); |
| 859 | texture = maxwell3d.GetStageTexture(stage, entry.GetOffset()); | 1012 | |
| 1013 | if (SetupTexture(base_bindings.sampler + bindpoint, texture, entry)) { | ||
| 1014 | texture_buffer_usage.set(bindpoint); | ||
| 860 | } | 1015 | } |
| 861 | const u32 current_bindpoint = base_bindings.sampler + bindpoint; | 1016 | } |
| 862 | 1017 | ||
| 863 | state.texture_units[current_bindpoint].sampler = sampler_cache.GetSampler(texture.tsc); | 1018 | return texture_buffer_usage; |
| 1019 | } | ||
| 864 | 1020 | ||
| 865 | if (Surface surface = res_cache.GetTextureSurface(texture, entry); surface) { | 1021 | TextureBufferUsage RasterizerOpenGL::SetupComputeTextures(const Shader& kernel) { |
| 866 | state.texture_units[current_bindpoint].texture = | 1022 | MICROPROFILE_SCOPE(OpenGL_Texture); |
| 867 | surface->Texture(entry.IsArray()).handle; | 1023 | const auto& compute = system.GPU().KeplerCompute(); |
| 868 | surface->UpdateSwizzle(texture.tic.x_source, texture.tic.y_source, texture.tic.z_source, | 1024 | const auto& entries = kernel->GetShaderEntries().samplers; |
| 869 | texture.tic.w_source); | 1025 | |
| 870 | } else { | 1026 | ASSERT_MSG(entries.size() <= std::size(state.textures), |
| 871 | // Can occur when texture addr is null or its memory is unmapped/invalid | 1027 | "Exceeded the number of active textures."); |
| 872 | state.texture_units[current_bindpoint].texture = 0; | 1028 | |
| 1029 | TextureBufferUsage texture_buffer_usage{0}; | ||
| 1030 | |||
| 1031 | for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) { | ||
| 1032 | const auto& entry = entries[bindpoint]; | ||
| 1033 | const auto texture = [&]() { | ||
| 1034 | if (!entry.IsBindless()) { | ||
| 1035 | return compute.GetTexture(entry.GetOffset()); | ||
| 1036 | } | ||
| 1037 | const auto cbuf = entry.GetBindlessCBuf(); | ||
| 1038 | Tegra::Texture::TextureHandle tex_handle; | ||
| 1039 | tex_handle.raw = compute.AccessConstBuffer32(cbuf.first, cbuf.second); | ||
| 1040 | return compute.GetTextureInfo(tex_handle, entry.GetOffset()); | ||
| 1041 | }(); | ||
| 1042 | |||
| 1043 | if (SetupTexture(bindpoint, texture, entry)) { | ||
| 1044 | texture_buffer_usage.set(bindpoint); | ||
| 873 | } | 1045 | } |
| 874 | } | 1046 | } |
| 1047 | |||
| 1048 | return texture_buffer_usage; | ||
| 1049 | } | ||
| 1050 | |||
| 1051 | bool RasterizerOpenGL::SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture, | ||
| 1052 | const GLShader::SamplerEntry& entry) { | ||
| 1053 | state.samplers[binding] = sampler_cache.GetSampler(texture.tsc); | ||
| 1054 | |||
| 1055 | const auto view = texture_cache.GetTextureSurface(texture.tic, entry); | ||
| 1056 | if (!view) { | ||
| 1057 | // Can occur when texture addr is null or its memory is unmapped/invalid | ||
| 1058 | state.textures[binding] = 0; | ||
| 1059 | return false; | ||
| 1060 | } | ||
| 1061 | state.textures[binding] = view->GetTexture(); | ||
| 1062 | |||
| 1063 | if (view->GetSurfaceParams().IsBuffer()) { | ||
| 1064 | return true; | ||
| 1065 | } | ||
| 1066 | |||
| 1067 | // Apply swizzle to textures that are not buffers. | ||
| 1068 | view->ApplySwizzle(texture.tic.x_source, texture.tic.y_source, texture.tic.z_source, | ||
| 1069 | texture.tic.w_source); | ||
| 1070 | return false; | ||
| 1071 | } | ||
| 1072 | |||
| 1073 | void RasterizerOpenGL::SetupComputeImages(const Shader& shader) { | ||
| 1074 | const auto& compute = system.GPU().KeplerCompute(); | ||
| 1075 | const auto& entries = shader->GetShaderEntries().images; | ||
| 1076 | for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) { | ||
| 1077 | const auto& entry = entries[bindpoint]; | ||
| 1078 | const auto tic = [&]() { | ||
| 1079 | if (!entry.IsBindless()) { | ||
| 1080 | return compute.GetTexture(entry.GetOffset()).tic; | ||
| 1081 | } | ||
| 1082 | const auto cbuf = entry.GetBindlessCBuf(); | ||
| 1083 | Tegra::Texture::TextureHandle tex_handle; | ||
| 1084 | tex_handle.raw = compute.AccessConstBuffer32(cbuf.first, cbuf.second); | ||
| 1085 | return compute.GetTextureInfo(tex_handle, entry.GetOffset()).tic; | ||
| 1086 | }(); | ||
| 1087 | SetupImage(bindpoint, tic, entry); | ||
| 1088 | } | ||
| 1089 | } | ||
| 1090 | |||
| 1091 | void RasterizerOpenGL::SetupImage(u32 binding, const Tegra::Texture::TICEntry& tic, | ||
| 1092 | const GLShader::ImageEntry& entry) { | ||
| 1093 | const auto view = texture_cache.GetImageSurface(tic, entry); | ||
| 1094 | if (!view) { | ||
| 1095 | state.images[binding] = 0; | ||
| 1096 | return; | ||
| 1097 | } | ||
| 1098 | if (!tic.IsBuffer()) { | ||
| 1099 | view->ApplySwizzle(tic.x_source, tic.y_source, tic.z_source, tic.w_source); | ||
| 1100 | } | ||
| 1101 | if (entry.IsWritten()) { | ||
| 1102 | view->MarkAsModified(texture_cache.Tick()); | ||
| 1103 | } | ||
| 1104 | state.images[binding] = view->GetTexture(); | ||
| 875 | } | 1105 | } |
| 876 | 1106 | ||
| 877 | void RasterizerOpenGL::SyncViewport(OpenGLState& current_state) { | 1107 | void RasterizerOpenGL::SyncViewport(OpenGLState& current_state) { |
| @@ -915,10 +1145,11 @@ void RasterizerOpenGL::SyncClipCoef() { | |||
| 915 | } | 1145 | } |
| 916 | 1146 | ||
| 917 | void RasterizerOpenGL::SyncCullMode() { | 1147 | void RasterizerOpenGL::SyncCullMode() { |
| 918 | const auto& regs = system.GPU().Maxwell3D().regs; | 1148 | auto& maxwell3d = system.GPU().Maxwell3D(); |
| 919 | 1149 | ||
| 920 | state.cull.enabled = regs.cull.enabled != 0; | 1150 | const auto& regs = maxwell3d.regs; |
| 921 | 1151 | ||
| 1152 | state.cull.enabled = regs.cull.enabled != 0; | ||
| 922 | if (state.cull.enabled) { | 1153 | if (state.cull.enabled) { |
| 923 | state.cull.front_face = MaxwellToGL::FrontFace(regs.cull.front_face); | 1154 | state.cull.front_face = MaxwellToGL::FrontFace(regs.cull.front_face); |
| 924 | state.cull.mode = MaxwellToGL::CullFace(regs.cull.cull_face); | 1155 | state.cull.mode = MaxwellToGL::CullFace(regs.cull.cull_face); |
| @@ -951,15 +1182,23 @@ void RasterizerOpenGL::SyncDepthTestState() { | |||
| 951 | state.depth.test_enabled = regs.depth_test_enable != 0; | 1182 | state.depth.test_enabled = regs.depth_test_enable != 0; |
| 952 | state.depth.write_mask = regs.depth_write_enabled ? GL_TRUE : GL_FALSE; | 1183 | state.depth.write_mask = regs.depth_write_enabled ? GL_TRUE : GL_FALSE; |
| 953 | 1184 | ||
| 954 | if (!state.depth.test_enabled) | 1185 | if (!state.depth.test_enabled) { |
| 955 | return; | 1186 | return; |
| 1187 | } | ||
| 956 | 1188 | ||
| 957 | state.depth.test_func = MaxwellToGL::ComparisonOp(regs.depth_test_func); | 1189 | state.depth.test_func = MaxwellToGL::ComparisonOp(regs.depth_test_func); |
| 958 | } | 1190 | } |
| 959 | 1191 | ||
| 960 | void RasterizerOpenGL::SyncStencilTestState() { | 1192 | void RasterizerOpenGL::SyncStencilTestState() { |
| 961 | const auto& regs = system.GPU().Maxwell3D().regs; | 1193 | auto& maxwell3d = system.GPU().Maxwell3D(); |
| 1194 | if (!maxwell3d.dirty.stencil_test) { | ||
| 1195 | return; | ||
| 1196 | } | ||
| 1197 | maxwell3d.dirty.stencil_test = false; | ||
| 1198 | |||
| 1199 | const auto& regs = maxwell3d.regs; | ||
| 962 | state.stencil.test_enabled = regs.stencil_enable != 0; | 1200 | state.stencil.test_enabled = regs.stencil_enable != 0; |
| 1201 | state.MarkDirtyStencilState(); | ||
| 963 | 1202 | ||
| 964 | if (!regs.stencil_enable) { | 1203 | if (!regs.stencil_enable) { |
| 965 | return; | 1204 | return; |
| @@ -992,7 +1231,12 @@ void RasterizerOpenGL::SyncStencilTestState() { | |||
| 992 | } | 1231 | } |
| 993 | 1232 | ||
| 994 | void RasterizerOpenGL::SyncColorMask() { | 1233 | void RasterizerOpenGL::SyncColorMask() { |
| 995 | const auto& regs = system.GPU().Maxwell3D().regs; | 1234 | auto& maxwell3d = system.GPU().Maxwell3D(); |
| 1235 | if (!maxwell3d.dirty.color_mask) { | ||
| 1236 | return; | ||
| 1237 | } | ||
| 1238 | const auto& regs = maxwell3d.regs; | ||
| 1239 | |||
| 996 | const std::size_t count = | 1240 | const std::size_t count = |
| 997 | regs.independent_blend_enable ? Tegra::Engines::Maxwell3D::Regs::NumRenderTargets : 1; | 1241 | regs.independent_blend_enable ? Tegra::Engines::Maxwell3D::Regs::NumRenderTargets : 1; |
| 998 | for (std::size_t i = 0; i < count; i++) { | 1242 | for (std::size_t i = 0; i < count; i++) { |
| @@ -1003,6 +1247,9 @@ void RasterizerOpenGL::SyncColorMask() { | |||
| 1003 | dest.blue_enabled = (source.B == 0) ? GL_FALSE : GL_TRUE; | 1247 | dest.blue_enabled = (source.B == 0) ? GL_FALSE : GL_TRUE; |
| 1004 | dest.alpha_enabled = (source.A == 0) ? GL_FALSE : GL_TRUE; | 1248 | dest.alpha_enabled = (source.A == 0) ? GL_FALSE : GL_TRUE; |
| 1005 | } | 1249 | } |
| 1250 | |||
| 1251 | state.MarkDirtyColorMask(); | ||
| 1252 | maxwell3d.dirty.color_mask = false; | ||
| 1006 | } | 1253 | } |
| 1007 | 1254 | ||
| 1008 | void RasterizerOpenGL::SyncMultiSampleState() { | 1255 | void RasterizerOpenGL::SyncMultiSampleState() { |
| @@ -1017,7 +1264,11 @@ void RasterizerOpenGL::SyncFragmentColorClampState() { | |||
| 1017 | } | 1264 | } |
| 1018 | 1265 | ||
| 1019 | void RasterizerOpenGL::SyncBlendState() { | 1266 | void RasterizerOpenGL::SyncBlendState() { |
| 1020 | const auto& regs = system.GPU().Maxwell3D().regs; | 1267 | auto& maxwell3d = system.GPU().Maxwell3D(); |
| 1268 | if (!maxwell3d.dirty.blend_state) { | ||
| 1269 | return; | ||
| 1270 | } | ||
| 1271 | const auto& regs = maxwell3d.regs; | ||
| 1021 | 1272 | ||
| 1022 | state.blend_color.red = regs.blend_color.r; | 1273 | state.blend_color.red = regs.blend_color.r; |
| 1023 | state.blend_color.green = regs.blend_color.g; | 1274 | state.blend_color.green = regs.blend_color.g; |
| @@ -1040,6 +1291,8 @@ void RasterizerOpenGL::SyncBlendState() { | |||
| 1040 | for (std::size_t i = 1; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) { | 1291 | for (std::size_t i = 1; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) { |
| 1041 | state.blend[i].enabled = false; | 1292 | state.blend[i].enabled = false; |
| 1042 | } | 1293 | } |
| 1294 | maxwell3d.dirty.blend_state = false; | ||
| 1295 | state.MarkDirtyBlendState(); | ||
| 1043 | return; | 1296 | return; |
| 1044 | } | 1297 | } |
| 1045 | 1298 | ||
| @@ -1056,6 +1309,9 @@ void RasterizerOpenGL::SyncBlendState() { | |||
| 1056 | blend.src_a_func = MaxwellToGL::BlendFunc(src.factor_source_a); | 1309 | blend.src_a_func = MaxwellToGL::BlendFunc(src.factor_source_a); |
| 1057 | blend.dst_a_func = MaxwellToGL::BlendFunc(src.factor_dest_a); | 1310 | blend.dst_a_func = MaxwellToGL::BlendFunc(src.factor_dest_a); |
| 1058 | } | 1311 | } |
| 1312 | |||
| 1313 | state.MarkDirtyBlendState(); | ||
| 1314 | maxwell3d.dirty.blend_state = false; | ||
| 1059 | } | 1315 | } |
| 1060 | 1316 | ||
| 1061 | void RasterizerOpenGL::SyncLogicOpState() { | 1317 | void RasterizerOpenGL::SyncLogicOpState() { |
| @@ -1107,13 +1363,21 @@ void RasterizerOpenGL::SyncPointState() { | |||
| 1107 | } | 1363 | } |
| 1108 | 1364 | ||
| 1109 | void RasterizerOpenGL::SyncPolygonOffset() { | 1365 | void RasterizerOpenGL::SyncPolygonOffset() { |
| 1110 | const auto& regs = system.GPU().Maxwell3D().regs; | 1366 | auto& maxwell3d = system.GPU().Maxwell3D(); |
| 1367 | if (!maxwell3d.dirty.polygon_offset) { | ||
| 1368 | return; | ||
| 1369 | } | ||
| 1370 | const auto& regs = maxwell3d.regs; | ||
| 1371 | |||
| 1111 | state.polygon_offset.fill_enable = regs.polygon_offset_fill_enable != 0; | 1372 | state.polygon_offset.fill_enable = regs.polygon_offset_fill_enable != 0; |
| 1112 | state.polygon_offset.line_enable = regs.polygon_offset_line_enable != 0; | 1373 | state.polygon_offset.line_enable = regs.polygon_offset_line_enable != 0; |
| 1113 | state.polygon_offset.point_enable = regs.polygon_offset_point_enable != 0; | 1374 | state.polygon_offset.point_enable = regs.polygon_offset_point_enable != 0; |
| 1114 | state.polygon_offset.units = regs.polygon_offset_units; | 1375 | state.polygon_offset.units = regs.polygon_offset_units; |
| 1115 | state.polygon_offset.factor = regs.polygon_offset_factor; | 1376 | state.polygon_offset.factor = regs.polygon_offset_factor; |
| 1116 | state.polygon_offset.clamp = regs.polygon_offset_clamp; | 1377 | state.polygon_offset.clamp = regs.polygon_offset_clamp; |
| 1378 | |||
| 1379 | state.MarkDirtyPolygonOffset(); | ||
| 1380 | maxwell3d.dirty.polygon_offset = false; | ||
| 1117 | } | 1381 | } |
| 1118 | 1382 | ||
| 1119 | void RasterizerOpenGL::SyncAlphaTest() { | 1383 | void RasterizerOpenGL::SyncAlphaTest() { |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index f7671ff5d..eada752e0 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h | |||
| @@ -23,15 +23,16 @@ | |||
| 23 | #include "video_core/rasterizer_interface.h" | 23 | #include "video_core/rasterizer_interface.h" |
| 24 | #include "video_core/renderer_opengl/gl_buffer_cache.h" | 24 | #include "video_core/renderer_opengl/gl_buffer_cache.h" |
| 25 | #include "video_core/renderer_opengl/gl_device.h" | 25 | #include "video_core/renderer_opengl/gl_device.h" |
| 26 | #include "video_core/renderer_opengl/gl_global_cache.h" | 26 | #include "video_core/renderer_opengl/gl_framebuffer_cache.h" |
| 27 | #include "video_core/renderer_opengl/gl_rasterizer_cache.h" | ||
| 28 | #include "video_core/renderer_opengl/gl_resource_manager.h" | 27 | #include "video_core/renderer_opengl/gl_resource_manager.h" |
| 29 | #include "video_core/renderer_opengl/gl_sampler_cache.h" | 28 | #include "video_core/renderer_opengl/gl_sampler_cache.h" |
| 30 | #include "video_core/renderer_opengl/gl_shader_cache.h" | 29 | #include "video_core/renderer_opengl/gl_shader_cache.h" |
| 31 | #include "video_core/renderer_opengl/gl_shader_decompiler.h" | 30 | #include "video_core/renderer_opengl/gl_shader_decompiler.h" |
| 32 | #include "video_core/renderer_opengl/gl_shader_manager.h" | 31 | #include "video_core/renderer_opengl/gl_shader_manager.h" |
| 33 | #include "video_core/renderer_opengl/gl_state.h" | 32 | #include "video_core/renderer_opengl/gl_state.h" |
| 33 | #include "video_core/renderer_opengl/gl_texture_cache.h" | ||
| 34 | #include "video_core/renderer_opengl/utils.h" | 34 | #include "video_core/renderer_opengl/utils.h" |
| 35 | #include "video_core/textures/texture.h" | ||
| 35 | 36 | ||
| 36 | namespace Core { | 37 | namespace Core { |
| 37 | class System; | 38 | class System; |
| @@ -41,11 +42,14 @@ namespace Core::Frontend { | |||
| 41 | class EmuWindow; | 42 | class EmuWindow; |
| 42 | } | 43 | } |
| 43 | 44 | ||
| 45 | namespace Tegra { | ||
| 46 | class MemoryManager; | ||
| 47 | } | ||
| 48 | |||
| 44 | namespace OpenGL { | 49 | namespace OpenGL { |
| 45 | 50 | ||
| 46 | struct ScreenInfo; | 51 | struct ScreenInfo; |
| 47 | struct DrawParameters; | 52 | struct DrawParameters; |
| 48 | struct FramebufferCacheKey; | ||
| 49 | 53 | ||
| 50 | class RasterizerOpenGL : public VideoCore::RasterizerInterface { | 54 | class RasterizerOpenGL : public VideoCore::RasterizerInterface { |
| 51 | public: | 55 | public: |
| @@ -55,14 +59,16 @@ public: | |||
| 55 | 59 | ||
| 56 | void DrawArrays() override; | 60 | void DrawArrays() override; |
| 57 | void Clear() override; | 61 | void Clear() override; |
| 62 | void DispatchCompute(GPUVAddr code_addr) override; | ||
| 58 | void FlushAll() override; | 63 | void FlushAll() override; |
| 59 | void FlushRegion(CacheAddr addr, u64 size) override; | 64 | void FlushRegion(CacheAddr addr, u64 size) override; |
| 60 | void InvalidateRegion(CacheAddr addr, u64 size) override; | 65 | void InvalidateRegion(CacheAddr addr, u64 size) override; |
| 61 | void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; | 66 | void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; |
| 67 | void FlushCommands() override; | ||
| 68 | void TickFrame() override; | ||
| 62 | bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, | 69 | bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, |
| 63 | const Tegra::Engines::Fermi2D::Regs::Surface& dst, | 70 | const Tegra::Engines::Fermi2D::Regs::Surface& dst, |
| 64 | const Common::Rectangle<u32>& src_rect, | 71 | const Tegra::Engines::Fermi2D::Config& copy_config) override; |
| 65 | const Common::Rectangle<u32>& dst_rect) override; | ||
| 66 | bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, | 72 | bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, |
| 67 | u32 pixel_stride) override; | 73 | u32 pixel_stride) override; |
| 68 | bool AccelerateDrawBatch(bool is_indexed) override; | 74 | bool AccelerateDrawBatch(bool is_indexed) override; |
| @@ -70,11 +76,6 @@ public: | |||
| 70 | void LoadDiskResources(const std::atomic_bool& stop_loading, | 76 | void LoadDiskResources(const std::atomic_bool& stop_loading, |
| 71 | const VideoCore::DiskResourceLoadCallback& callback) override; | 77 | const VideoCore::DiskResourceLoadCallback& callback) override; |
| 72 | 78 | ||
| 73 | /// Maximum supported size that a constbuffer can have in bytes. | ||
| 74 | static constexpr std::size_t MaxConstbufferSize = 0x10000; | ||
| 75 | static_assert(MaxConstbufferSize % sizeof(GLvec4) == 0, | ||
| 76 | "The maximum size of a constbuffer must be a multiple of the size of GLvec4"); | ||
| 77 | |||
| 78 | private: | 79 | private: |
| 79 | struct FramebufferConfigState { | 80 | struct FramebufferConfigState { |
| 80 | bool using_color_fb{}; | 81 | bool using_color_fb{}; |
| @@ -95,32 +96,64 @@ private: | |||
| 95 | 96 | ||
| 96 | /** | 97 | /** |
| 97 | * Configures the color and depth framebuffer states. | 98 | * Configures the color and depth framebuffer states. |
| 98 | * @param use_color_fb If true, configure color framebuffers. | 99 | * |
| 99 | * @param using_depth_fb If true, configure the depth/stencil framebuffer. | 100 | * @param current_state The current OpenGL state. |
| 100 | * @param preserve_contents If true, tries to preserve data from a previously used framebuffer. | 101 | * @param using_color_fb If true, configure color framebuffers. |
| 102 | * @param using_depth_fb If true, configure the depth/stencil framebuffer. | ||
| 103 | * @param preserve_contents If true, tries to preserve data from a previously used | ||
| 104 | * framebuffer. | ||
| 101 | * @param single_color_target Specifies if a single color buffer target should be used. | 105 | * @param single_color_target Specifies if a single color buffer target should be used. |
| 106 | * | ||
| 102 | * @returns If depth (first) or stencil (second) are being stored in the bound zeta texture | 107 | * @returns If depth (first) or stencil (second) are being stored in the bound zeta texture |
| 103 | * (requires using_depth_fb to be true) | 108 | * (requires using_depth_fb to be true) |
| 104 | */ | 109 | */ |
| 105 | std::pair<bool, bool> ConfigureFramebuffers( | 110 | std::pair<bool, bool> ConfigureFramebuffers( |
| 106 | OpenGLState& current_state, bool use_color_fb = true, bool using_depth_fb = true, | 111 | OpenGLState& current_state, bool using_color_fb = true, bool using_depth_fb = true, |
| 107 | bool preserve_contents = true, std::optional<std::size_t> single_color_target = {}); | 112 | bool preserve_contents = true, std::optional<std::size_t> single_color_target = {}); |
| 108 | 113 | ||
| 114 | void ConfigureClearFramebuffer(OpenGLState& current_state, bool using_color_fb, | ||
| 115 | bool using_depth_fb, bool using_stencil_fb); | ||
| 116 | |||
| 109 | /// Configures the current constbuffers to use for the draw command. | 117 | /// Configures the current constbuffers to use for the draw command. |
| 110 | void SetupDrawConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, | 118 | void SetupDrawConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, |
| 111 | const Shader& shader); | 119 | const Shader& shader); |
| 112 | 120 | ||
| 121 | /// Configures the current constbuffers to use for the kernel invocation. | ||
| 122 | void SetupComputeConstBuffers(const Shader& kernel); | ||
| 123 | |||
| 113 | /// Configures a constant buffer. | 124 | /// Configures a constant buffer. |
| 114 | void SetupConstBuffer(const Tegra::Engines::ConstBufferInfo& buffer, | 125 | void SetupConstBuffer(const Tegra::Engines::ConstBufferInfo& buffer, |
| 115 | const GLShader::ConstBufferEntry& entry); | 126 | const GLShader::ConstBufferEntry& entry); |
| 116 | 127 | ||
| 117 | /// Configures the current global memory entries to use for the draw command. | 128 | /// Configures the current global memory entries to use for the draw command. |
| 118 | void SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, | 129 | void SetupDrawGlobalMemory(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, |
| 119 | const Shader& shader); | 130 | const Shader& shader); |
| 131 | |||
| 132 | /// Configures the current global memory entries to use for the kernel invocation. | ||
| 133 | void SetupComputeGlobalMemory(const Shader& kernel); | ||
| 134 | |||
| 135 | /// Configures a constant buffer. | ||
| 136 | void SetupGlobalMemory(const GLShader::GlobalMemoryEntry& entry, GPUVAddr gpu_addr, | ||
| 137 | std::size_t size); | ||
| 138 | |||
| 139 | /// Configures the current textures to use for the draw command. Returns shaders texture buffer | ||
| 140 | /// usage. | ||
| 141 | TextureBufferUsage SetupDrawTextures(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, | ||
| 142 | const Shader& shader, BaseBindings base_bindings); | ||
| 143 | |||
| 144 | /// Configures the textures used in a compute shader. Returns texture buffer usage. | ||
| 145 | TextureBufferUsage SetupComputeTextures(const Shader& kernel); | ||
| 146 | |||
| 147 | /// Configures a texture. Returns true when the texture is a texture buffer. | ||
| 148 | bool SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture, | ||
| 149 | const GLShader::SamplerEntry& entry); | ||
| 120 | 150 | ||
| 121 | /// Configures the current textures to use for the draw command. | 151 | /// Configures images in a compute shader. |
| 122 | void SetupTextures(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, const Shader& shader, | 152 | void SetupComputeImages(const Shader& shader); |
| 123 | BaseBindings base_bindings); | 153 | |
| 154 | /// Configures an image. | ||
| 155 | void SetupImage(u32 binding, const Tegra::Texture::TICEntry& tic, | ||
| 156 | const GLShader::ImageEntry& entry); | ||
| 124 | 157 | ||
| 125 | /// Syncs the viewport and depth range to match the guest state | 158 | /// Syncs the viewport and depth range to match the guest state |
| 126 | void SyncViewport(OpenGLState& current_state); | 159 | void SyncViewport(OpenGLState& current_state); |
| @@ -181,10 +214,10 @@ private: | |||
| 181 | const Device device; | 214 | const Device device; |
| 182 | OpenGLState state; | 215 | OpenGLState state; |
| 183 | 216 | ||
| 184 | RasterizerCacheOpenGL res_cache; | 217 | TextureCacheOpenGL texture_cache; |
| 185 | ShaderCacheOpenGL shader_cache; | 218 | ShaderCacheOpenGL shader_cache; |
| 186 | GlobalRegionCacheOpenGL global_cache; | ||
| 187 | SamplerCacheOpenGL sampler_cache; | 219 | SamplerCacheOpenGL sampler_cache; |
| 220 | FramebufferCacheOpenGL framebuffer_cache; | ||
| 188 | 221 | ||
| 189 | Core::System& system; | 222 | Core::System& system; |
| 190 | ScreenInfo& screen_info; | 223 | ScreenInfo& screen_info; |
| @@ -195,13 +228,13 @@ private: | |||
| 195 | OGLVertexArray> | 228 | OGLVertexArray> |
| 196 | vertex_array_cache; | 229 | vertex_array_cache; |
| 197 | 230 | ||
| 198 | std::map<FramebufferCacheKey, OGLFramebuffer> framebuffer_cache; | ||
| 199 | FramebufferConfigState current_framebuffer_config_state; | 231 | FramebufferConfigState current_framebuffer_config_state; |
| 200 | std::pair<bool, bool> current_depth_stencil_usage{}; | 232 | std::pair<bool, bool> current_depth_stencil_usage{}; |
| 201 | 233 | ||
| 202 | static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024; | 234 | static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024; |
| 203 | OGLBufferCache buffer_cache; | 235 | OGLBufferCache buffer_cache; |
| 204 | 236 | ||
| 237 | VertexArrayPushBuffer vertex_array_pushbuffer; | ||
| 205 | BindBuffersRangePushBuffer bind_ubo_pushbuffer{GL_UNIFORM_BUFFER}; | 238 | BindBuffersRangePushBuffer bind_ubo_pushbuffer{GL_UNIFORM_BUFFER}; |
| 206 | BindBuffersRangePushBuffer bind_ssbo_pushbuffer{GL_SHADER_STORAGE_BUFFER}; | 239 | BindBuffersRangePushBuffer bind_ssbo_pushbuffer{GL_SHADER_STORAGE_BUFFER}; |
| 207 | 240 | ||
| @@ -213,16 +246,19 @@ private: | |||
| 213 | GLuint SetupVertexFormat(); | 246 | GLuint SetupVertexFormat(); |
| 214 | 247 | ||
| 215 | void SetupVertexBuffer(GLuint vao); | 248 | void SetupVertexBuffer(GLuint vao); |
| 249 | void SetupVertexInstances(GLuint vao); | ||
| 216 | 250 | ||
| 217 | DrawParameters SetupDraw(); | 251 | GLintptr SetupIndexBuffer(); |
| 218 | 252 | ||
| 219 | void SetupShaders(GLenum primitive_mode); | 253 | DrawParameters SetupDraw(GLintptr index_buffer_offset); |
| 220 | 254 | ||
| 221 | void SetupCachedFramebuffer(const FramebufferCacheKey& fbkey, OpenGLState& current_state); | 255 | void SetupShaders(GLenum primitive_mode); |
| 222 | 256 | ||
| 223 | enum class AccelDraw { Disabled, Arrays, Indexed }; | 257 | enum class AccelDraw { Disabled, Arrays, Indexed }; |
| 224 | AccelDraw accelerate_draw = AccelDraw::Disabled; | 258 | AccelDraw accelerate_draw = AccelDraw::Disabled; |
| 225 | 259 | ||
| 260 | OGLFramebuffer clear_framebuffer; | ||
| 261 | |||
| 226 | using CachedPageMap = boost::icl::interval_map<u64, int>; | 262 | using CachedPageMap = boost::icl::interval_map<u64, int>; |
| 227 | CachedPageMap cached_pages; | 263 | CachedPageMap cached_pages; |
| 228 | }; | 264 | }; |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp deleted file mode 100644 index a7681902e..000000000 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ /dev/null | |||
| @@ -1,1362 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <optional> | ||
| 7 | #include <glad/glad.h> | ||
| 8 | |||
| 9 | #include "common/alignment.h" | ||
| 10 | #include "common/assert.h" | ||
| 11 | #include "common/logging/log.h" | ||
| 12 | #include "common/microprofile.h" | ||
| 13 | #include "common/scope_exit.h" | ||
| 14 | #include "core/core.h" | ||
| 15 | #include "core/hle/kernel/process.h" | ||
| 16 | #include "core/settings.h" | ||
| 17 | #include "video_core/engines/maxwell_3d.h" | ||
| 18 | #include "video_core/memory_manager.h" | ||
| 19 | #include "video_core/morton.h" | ||
| 20 | #include "video_core/renderer_opengl/gl_rasterizer.h" | ||
| 21 | #include "video_core/renderer_opengl/gl_rasterizer_cache.h" | ||
| 22 | #include "video_core/renderer_opengl/utils.h" | ||
| 23 | #include "video_core/surface.h" | ||
| 24 | #include "video_core/textures/convert.h" | ||
| 25 | #include "video_core/textures/decoders.h" | ||
| 26 | |||
| 27 | namespace OpenGL { | ||
| 28 | |||
| 29 | using VideoCore::MortonSwizzle; | ||
| 30 | using VideoCore::MortonSwizzleMode; | ||
| 31 | using VideoCore::Surface::ComponentTypeFromDepthFormat; | ||
| 32 | using VideoCore::Surface::ComponentTypeFromRenderTarget; | ||
| 33 | using VideoCore::Surface::ComponentTypeFromTexture; | ||
| 34 | using VideoCore::Surface::PixelFormatFromDepthFormat; | ||
| 35 | using VideoCore::Surface::PixelFormatFromRenderTargetFormat; | ||
| 36 | using VideoCore::Surface::PixelFormatFromTextureFormat; | ||
| 37 | using VideoCore::Surface::SurfaceTargetFromTextureType; | ||
| 38 | |||
| 39 | struct FormatTuple { | ||
| 40 | GLint internal_format; | ||
| 41 | GLenum format; | ||
| 42 | GLenum type; | ||
| 43 | ComponentType component_type; | ||
| 44 | bool compressed; | ||
| 45 | }; | ||
| 46 | |||
| 47 | static void ApplyTextureDefaults(GLuint texture, u32 max_mip_level) { | ||
| 48 | glTextureParameteri(texture, GL_TEXTURE_MIN_FILTER, GL_LINEAR); | ||
| 49 | glTextureParameteri(texture, GL_TEXTURE_MAG_FILTER, GL_LINEAR); | ||
| 50 | glTextureParameteri(texture, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); | ||
| 51 | glTextureParameteri(texture, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); | ||
| 52 | glTextureParameteri(texture, GL_TEXTURE_MAX_LEVEL, max_mip_level - 1); | ||
| 53 | if (max_mip_level == 1) { | ||
| 54 | glTextureParameterf(texture, GL_TEXTURE_LOD_BIAS, 1000.0); | ||
| 55 | } | ||
| 56 | } | ||
| 57 | |||
| 58 | void SurfaceParams::InitCacheParameters(GPUVAddr gpu_addr_) { | ||
| 59 | auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()}; | ||
| 60 | |||
| 61 | gpu_addr = gpu_addr_; | ||
| 62 | host_ptr = memory_manager.GetPointer(gpu_addr_); | ||
| 63 | size_in_bytes = SizeInBytesRaw(); | ||
| 64 | |||
| 65 | if (IsPixelFormatASTC(pixel_format)) { | ||
| 66 | // ASTC is uncompressed in software, in emulated as RGBA8 | ||
| 67 | size_in_bytes_gl = width * height * depth * 4; | ||
| 68 | } else { | ||
| 69 | size_in_bytes_gl = SizeInBytesGL(); | ||
| 70 | } | ||
| 71 | } | ||
| 72 | |||
| 73 | std::size_t SurfaceParams::InnerMipmapMemorySize(u32 mip_level, bool force_gl, bool layer_only, | ||
| 74 | bool uncompressed) const { | ||
| 75 | const u32 tile_x{GetDefaultBlockWidth(pixel_format)}; | ||
| 76 | const u32 tile_y{GetDefaultBlockHeight(pixel_format)}; | ||
| 77 | const u32 bytes_per_pixel{GetBytesPerPixel(pixel_format)}; | ||
| 78 | u32 m_depth = (layer_only ? 1U : depth); | ||
| 79 | u32 m_width = MipWidth(mip_level); | ||
| 80 | u32 m_height = MipHeight(mip_level); | ||
| 81 | m_width = uncompressed ? m_width : std::max(1U, (m_width + tile_x - 1) / tile_x); | ||
| 82 | m_height = uncompressed ? m_height : std::max(1U, (m_height + tile_y - 1) / tile_y); | ||
| 83 | m_depth = std::max(1U, m_depth >> mip_level); | ||
| 84 | u32 m_block_height = MipBlockHeight(mip_level); | ||
| 85 | u32 m_block_depth = MipBlockDepth(mip_level); | ||
| 86 | return Tegra::Texture::CalculateSize(force_gl ? false : is_tiled, bytes_per_pixel, m_width, | ||
| 87 | m_height, m_depth, m_block_height, m_block_depth); | ||
| 88 | } | ||
| 89 | |||
| 90 | std::size_t SurfaceParams::InnerMemorySize(bool force_gl, bool layer_only, | ||
| 91 | bool uncompressed) const { | ||
| 92 | std::size_t block_size_bytes = Tegra::Texture::GetGOBSize() * block_height * block_depth; | ||
| 93 | std::size_t size = 0; | ||
| 94 | for (u32 i = 0; i < max_mip_level; i++) { | ||
| 95 | size += InnerMipmapMemorySize(i, force_gl, layer_only, uncompressed); | ||
| 96 | } | ||
| 97 | if (!force_gl && is_tiled) { | ||
| 98 | size = Common::AlignUp(size, block_size_bytes); | ||
| 99 | } | ||
| 100 | return size; | ||
| 101 | } | ||
| 102 | |||
| 103 | /*static*/ SurfaceParams SurfaceParams::CreateForTexture( | ||
| 104 | const Tegra::Texture::FullTextureInfo& config, const GLShader::SamplerEntry& entry) { | ||
| 105 | SurfaceParams params{}; | ||
| 106 | params.is_tiled = config.tic.IsTiled(); | ||
| 107 | params.block_width = params.is_tiled ? config.tic.BlockWidth() : 0, | ||
| 108 | params.block_height = params.is_tiled ? config.tic.BlockHeight() : 0, | ||
| 109 | params.block_depth = params.is_tiled ? config.tic.BlockDepth() : 0, | ||
| 110 | params.tile_width_spacing = params.is_tiled ? (1 << config.tic.tile_width_spacing.Value()) : 1; | ||
| 111 | params.srgb_conversion = config.tic.IsSrgbConversionEnabled(); | ||
| 112 | params.pixel_format = PixelFormatFromTextureFormat(config.tic.format, config.tic.r_type.Value(), | ||
| 113 | params.srgb_conversion); | ||
| 114 | |||
| 115 | if (config.tsc.depth_compare_enabled) { | ||
| 116 | // Some titles create a 'R16U' (normalized 16-bit) texture with depth_compare enabled, | ||
| 117 | // then attempt to sample from it via a shadow sampler. Convert format to Z16 (which also | ||
| 118 | // causes GetFormatType to properly return 'Depth' below). | ||
| 119 | if (GetFormatType(params.pixel_format) == SurfaceType::ColorTexture) { | ||
| 120 | switch (params.pixel_format) { | ||
| 121 | case PixelFormat::R16S: | ||
| 122 | case PixelFormat::R16U: | ||
| 123 | case PixelFormat::R16F: | ||
| 124 | params.pixel_format = PixelFormat::Z16; | ||
| 125 | break; | ||
| 126 | case PixelFormat::R32F: | ||
| 127 | params.pixel_format = PixelFormat::Z32F; | ||
| 128 | break; | ||
| 129 | default: | ||
| 130 | LOG_WARNING(HW_GPU, "Color texture format being used with depth compare: {}", | ||
| 131 | static_cast<u32>(params.pixel_format)); | ||
| 132 | break; | ||
| 133 | } | ||
| 134 | } | ||
| 135 | } | ||
| 136 | |||
| 137 | params.component_type = ComponentTypeFromTexture(config.tic.r_type.Value()); | ||
| 138 | params.type = GetFormatType(params.pixel_format); | ||
| 139 | UNIMPLEMENTED_IF(params.type == SurfaceType::ColorTexture && config.tsc.depth_compare_enabled); | ||
| 140 | |||
| 141 | params.width = Common::AlignUp(config.tic.Width(), GetCompressionFactor(params.pixel_format)); | ||
| 142 | params.height = Common::AlignUp(config.tic.Height(), GetCompressionFactor(params.pixel_format)); | ||
| 143 | if (!params.is_tiled) { | ||
| 144 | params.pitch = config.tic.Pitch(); | ||
| 145 | } | ||
| 146 | params.unaligned_height = config.tic.Height(); | ||
| 147 | params.target = SurfaceTargetFromTextureType(config.tic.texture_type); | ||
| 148 | params.identity = SurfaceClass::Uploaded; | ||
| 149 | |||
| 150 | switch (params.target) { | ||
| 151 | case SurfaceTarget::Texture1D: | ||
| 152 | case SurfaceTarget::Texture2D: | ||
| 153 | params.depth = 1; | ||
| 154 | break; | ||
| 155 | case SurfaceTarget::TextureCubemap: | ||
| 156 | params.depth = config.tic.Depth() * 6; | ||
| 157 | break; | ||
| 158 | case SurfaceTarget::Texture3D: | ||
| 159 | params.depth = config.tic.Depth(); | ||
| 160 | break; | ||
| 161 | case SurfaceTarget::Texture2DArray: | ||
| 162 | params.depth = config.tic.Depth(); | ||
| 163 | if (!entry.IsArray()) { | ||
| 164 | // TODO(bunnei): We have seen games re-use a Texture2D as Texture2DArray with depth of | ||
| 165 | // one, but sample the texture in the shader as if it were not an array texture. This | ||
| 166 | // probably is valid on hardware, but we still need to write a test to confirm this. In | ||
| 167 | // emulation, the workaround here is to continue to treat this as a Texture2D. An | ||
| 168 | // example game that does this is Super Mario Odyssey (in Cloud Kingdom). | ||
| 169 | ASSERT(params.depth == 1); | ||
| 170 | params.target = SurfaceTarget::Texture2D; | ||
| 171 | } | ||
| 172 | break; | ||
| 173 | case SurfaceTarget::TextureCubeArray: | ||
| 174 | params.depth = config.tic.Depth() * 6; | ||
| 175 | if (!entry.IsArray()) { | ||
| 176 | ASSERT(params.depth == 6); | ||
| 177 | params.target = SurfaceTarget::TextureCubemap; | ||
| 178 | } | ||
| 179 | break; | ||
| 180 | default: | ||
| 181 | LOG_CRITICAL(HW_GPU, "Unknown depth for target={}", static_cast<u32>(params.target)); | ||
| 182 | UNREACHABLE(); | ||
| 183 | params.depth = 1; | ||
| 184 | break; | ||
| 185 | } | ||
| 186 | |||
| 187 | params.is_layered = SurfaceTargetIsLayered(params.target); | ||
| 188 | params.is_array = SurfaceTargetIsArray(params.target); | ||
| 189 | params.max_mip_level = config.tic.max_mip_level + 1; | ||
| 190 | params.rt = {}; | ||
| 191 | |||
| 192 | params.InitCacheParameters(config.tic.Address()); | ||
| 193 | |||
| 194 | return params; | ||
| 195 | } | ||
| 196 | |||
| 197 | /*static*/ SurfaceParams SurfaceParams::CreateForFramebuffer(std::size_t index) { | ||
| 198 | const auto& config{Core::System::GetInstance().GPU().Maxwell3D().regs.rt[index]}; | ||
| 199 | SurfaceParams params{}; | ||
| 200 | |||
| 201 | params.is_tiled = | ||
| 202 | config.memory_layout.type == Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear; | ||
| 203 | params.block_width = 1 << config.memory_layout.block_width; | ||
| 204 | params.block_height = 1 << config.memory_layout.block_height; | ||
| 205 | params.block_depth = 1 << config.memory_layout.block_depth; | ||
| 206 | params.tile_width_spacing = 1; | ||
| 207 | params.pixel_format = PixelFormatFromRenderTargetFormat(config.format); | ||
| 208 | params.srgb_conversion = config.format == Tegra::RenderTargetFormat::BGRA8_SRGB || | ||
| 209 | config.format == Tegra::RenderTargetFormat::RGBA8_SRGB; | ||
| 210 | params.component_type = ComponentTypeFromRenderTarget(config.format); | ||
| 211 | params.type = GetFormatType(params.pixel_format); | ||
| 212 | if (params.is_tiled) { | ||
| 213 | params.width = config.width; | ||
| 214 | } else { | ||
| 215 | params.pitch = config.width; | ||
| 216 | const u32 bpp = params.GetFormatBpp() / 8; | ||
| 217 | params.width = params.pitch / bpp; | ||
| 218 | } | ||
| 219 | params.height = config.height; | ||
| 220 | params.unaligned_height = config.height; | ||
| 221 | params.target = SurfaceTarget::Texture2D; | ||
| 222 | params.identity = SurfaceClass::RenderTarget; | ||
| 223 | params.depth = 1; | ||
| 224 | params.max_mip_level = 1; | ||
| 225 | params.is_layered = false; | ||
| 226 | |||
| 227 | // Render target specific parameters, not used for caching | ||
| 228 | params.rt.index = static_cast<u32>(index); | ||
| 229 | params.rt.array_mode = config.array_mode; | ||
| 230 | params.rt.layer_stride = config.layer_stride; | ||
| 231 | params.rt.volume = config.volume; | ||
| 232 | params.rt.base_layer = config.base_layer; | ||
| 233 | |||
| 234 | params.InitCacheParameters(config.Address()); | ||
| 235 | |||
| 236 | return params; | ||
| 237 | } | ||
| 238 | |||
| 239 | /*static*/ SurfaceParams SurfaceParams::CreateForDepthBuffer( | ||
| 240 | u32 zeta_width, u32 zeta_height, GPUVAddr zeta_address, Tegra::DepthFormat format, | ||
| 241 | u32 block_width, u32 block_height, u32 block_depth, | ||
| 242 | Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout type) { | ||
| 243 | SurfaceParams params{}; | ||
| 244 | |||
| 245 | params.is_tiled = type == Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear; | ||
| 246 | params.block_width = 1 << std::min(block_width, 5U); | ||
| 247 | params.block_height = 1 << std::min(block_height, 5U); | ||
| 248 | params.block_depth = 1 << std::min(block_depth, 5U); | ||
| 249 | params.tile_width_spacing = 1; | ||
| 250 | params.pixel_format = PixelFormatFromDepthFormat(format); | ||
| 251 | params.component_type = ComponentTypeFromDepthFormat(format); | ||
| 252 | params.type = GetFormatType(params.pixel_format); | ||
| 253 | params.srgb_conversion = false; | ||
| 254 | params.width = zeta_width; | ||
| 255 | params.height = zeta_height; | ||
| 256 | params.unaligned_height = zeta_height; | ||
| 257 | params.target = SurfaceTarget::Texture2D; | ||
| 258 | params.identity = SurfaceClass::DepthBuffer; | ||
| 259 | params.depth = 1; | ||
| 260 | params.max_mip_level = 1; | ||
| 261 | params.is_layered = false; | ||
| 262 | params.rt = {}; | ||
| 263 | |||
| 264 | params.InitCacheParameters(zeta_address); | ||
| 265 | |||
| 266 | return params; | ||
| 267 | } | ||
| 268 | |||
| 269 | /*static*/ SurfaceParams SurfaceParams::CreateForFermiCopySurface( | ||
| 270 | const Tegra::Engines::Fermi2D::Regs::Surface& config) { | ||
| 271 | SurfaceParams params{}; | ||
| 272 | |||
| 273 | params.is_tiled = !config.linear; | ||
| 274 | params.block_width = params.is_tiled ? std::min(config.BlockWidth(), 32U) : 0, | ||
| 275 | params.block_height = params.is_tiled ? std::min(config.BlockHeight(), 32U) : 0, | ||
| 276 | params.block_depth = params.is_tiled ? std::min(config.BlockDepth(), 32U) : 0, | ||
| 277 | params.tile_width_spacing = 1; | ||
| 278 | params.pixel_format = PixelFormatFromRenderTargetFormat(config.format); | ||
| 279 | params.srgb_conversion = config.format == Tegra::RenderTargetFormat::BGRA8_SRGB || | ||
| 280 | config.format == Tegra::RenderTargetFormat::RGBA8_SRGB; | ||
| 281 | params.component_type = ComponentTypeFromRenderTarget(config.format); | ||
| 282 | params.type = GetFormatType(params.pixel_format); | ||
| 283 | params.width = config.width; | ||
| 284 | params.pitch = config.pitch; | ||
| 285 | params.height = config.height; | ||
| 286 | params.unaligned_height = config.height; | ||
| 287 | params.target = SurfaceTarget::Texture2D; | ||
| 288 | params.identity = SurfaceClass::Copy; | ||
| 289 | params.depth = 1; | ||
| 290 | params.max_mip_level = 1; | ||
| 291 | params.rt = {}; | ||
| 292 | |||
| 293 | params.InitCacheParameters(config.Address()); | ||
| 294 | |||
| 295 | return params; | ||
| 296 | } | ||
| 297 | |||
| 298 | static constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format_tuples = {{ | ||
| 299 | {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, ComponentType::UNorm, false}, // ABGR8U | ||
| 300 | {GL_RGBA8, GL_RGBA, GL_BYTE, ComponentType::SNorm, false}, // ABGR8S | ||
| 301 | {GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE, ComponentType::UInt, false}, // ABGR8UI | ||
| 302 | {GL_RGB8, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV, ComponentType::UNorm, false}, // B5G6R5U | ||
| 303 | {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV, ComponentType::UNorm, | ||
| 304 | false}, // A2B10G10R10U | ||
| 305 | {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV, ComponentType::UNorm, false}, // A1B5G5R5U | ||
| 306 | {GL_R8, GL_RED, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // R8U | ||
| 307 | {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE, ComponentType::UInt, false}, // R8UI | ||
| 308 | {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT, ComponentType::Float, false}, // RGBA16F | ||
| 309 | {GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT, ComponentType::UNorm, false}, // RGBA16U | ||
| 310 | {GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT, ComponentType::UInt, false}, // RGBA16UI | ||
| 311 | {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV, ComponentType::Float, | ||
| 312 | false}, // R11FG11FB10F | ||
| 313 | {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false}, // RGBA32UI | ||
| 314 | {GL_COMPRESSED_RGBA_S3TC_DXT1_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, | ||
| 315 | true}, // DXT1 | ||
| 316 | {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, | ||
| 317 | true}, // DXT23 | ||
| 318 | {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, | ||
| 319 | true}, // DXT45 | ||
| 320 | {GL_COMPRESSED_RED_RGTC1, GL_RED, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, true}, // DXN1 | ||
| 321 | {GL_COMPRESSED_RG_RGTC2, GL_RG, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, | ||
| 322 | true}, // DXN2UNORM | ||
| 323 | {GL_COMPRESSED_SIGNED_RG_RGTC2, GL_RG, GL_INT, ComponentType::SNorm, true}, // DXN2SNORM | ||
| 324 | {GL_COMPRESSED_RGBA_BPTC_UNORM, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, | ||
| 325 | true}, // BC7U | ||
| 326 | {GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, ComponentType::Float, | ||
| 327 | true}, // BC6H_UF16 | ||
| 328 | {GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, ComponentType::Float, | ||
| 329 | true}, // BC6H_SF16 | ||
| 330 | {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_4X4 | ||
| 331 | {GL_RGBA8, GL_BGRA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // BGRA8 | ||
| 332 | {GL_RGBA32F, GL_RGBA, GL_FLOAT, ComponentType::Float, false}, // RGBA32F | ||
| 333 | {GL_RG32F, GL_RG, GL_FLOAT, ComponentType::Float, false}, // RG32F | ||
| 334 | {GL_R32F, GL_RED, GL_FLOAT, ComponentType::Float, false}, // R32F | ||
| 335 | {GL_R16F, GL_RED, GL_HALF_FLOAT, ComponentType::Float, false}, // R16F | ||
| 336 | {GL_R16, GL_RED, GL_UNSIGNED_SHORT, ComponentType::UNorm, false}, // R16U | ||
| 337 | {GL_R16_SNORM, GL_RED, GL_SHORT, ComponentType::SNorm, false}, // R16S | ||
| 338 | {GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT, ComponentType::UInt, false}, // R16UI | ||
| 339 | {GL_R16I, GL_RED_INTEGER, GL_SHORT, ComponentType::SInt, false}, // R16I | ||
| 340 | {GL_RG16, GL_RG, GL_UNSIGNED_SHORT, ComponentType::UNorm, false}, // RG16 | ||
| 341 | {GL_RG16F, GL_RG, GL_HALF_FLOAT, ComponentType::Float, false}, // RG16F | ||
| 342 | {GL_RG16UI, GL_RG_INTEGER, GL_UNSIGNED_SHORT, ComponentType::UInt, false}, // RG16UI | ||
| 343 | {GL_RG16I, GL_RG_INTEGER, GL_SHORT, ComponentType::SInt, false}, // RG16I | ||
| 344 | {GL_RG16_SNORM, GL_RG, GL_SHORT, ComponentType::SNorm, false}, // RG16S | ||
| 345 | {GL_RGB32F, GL_RGB, GL_FLOAT, ComponentType::Float, false}, // RGB32F | ||
| 346 | {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, ComponentType::UNorm, | ||
| 347 | false}, // RGBA8_SRGB | ||
| 348 | {GL_RG8, GL_RG, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // RG8U | ||
| 349 | {GL_RG8, GL_RG, GL_BYTE, ComponentType::SNorm, false}, // RG8S | ||
| 350 | {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false}, // RG32UI | ||
| 351 | {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false}, // R32UI | ||
| 352 | {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X8 | ||
| 353 | {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X5 | ||
| 354 | {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_5X4 | ||
| 355 | {GL_SRGB8_ALPHA8, GL_BGRA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // BGRA8 | ||
| 356 | // Compressed sRGB formats | ||
| 357 | {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, | ||
| 358 | true}, // DXT1_SRGB | ||
| 359 | {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, | ||
| 360 | true}, // DXT23_SRGB | ||
| 361 | {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, | ||
| 362 | true}, // DXT45_SRGB | ||
| 363 | {GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, | ||
| 364 | true}, // BC7U_SRGB | ||
| 365 | {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_4X4_SRGB | ||
| 366 | {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X8_SRGB | ||
| 367 | {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X5_SRGB | ||
| 368 | {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_5X4_SRGB | ||
| 369 | {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_5X5 | ||
| 370 | {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_5X5_SRGB | ||
| 371 | {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_10X8 | ||
| 372 | {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_10X8_SRGB | ||
| 373 | |||
| 374 | // Depth formats | ||
| 375 | {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT, ComponentType::Float, false}, // Z32F | ||
| 376 | {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT, ComponentType::UNorm, | ||
| 377 | false}, // Z16 | ||
| 378 | |||
| 379 | // DepthStencil formats | ||
| 380 | {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, ComponentType::UNorm, | ||
| 381 | false}, // Z24S8 | ||
| 382 | {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, ComponentType::UNorm, | ||
| 383 | false}, // S8Z24 | ||
| 384 | {GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL, GL_FLOAT_32_UNSIGNED_INT_24_8_REV, | ||
| 385 | ComponentType::Float, false}, // Z32FS8 | ||
| 386 | }}; | ||
| 387 | |||
| 388 | static GLenum SurfaceTargetToGL(SurfaceTarget target) { | ||
| 389 | switch (target) { | ||
| 390 | case SurfaceTarget::Texture1D: | ||
| 391 | return GL_TEXTURE_1D; | ||
| 392 | case SurfaceTarget::Texture2D: | ||
| 393 | return GL_TEXTURE_2D; | ||
| 394 | case SurfaceTarget::Texture3D: | ||
| 395 | return GL_TEXTURE_3D; | ||
| 396 | case SurfaceTarget::Texture1DArray: | ||
| 397 | return GL_TEXTURE_1D_ARRAY; | ||
| 398 | case SurfaceTarget::Texture2DArray: | ||
| 399 | return GL_TEXTURE_2D_ARRAY; | ||
| 400 | case SurfaceTarget::TextureCubemap: | ||
| 401 | return GL_TEXTURE_CUBE_MAP; | ||
| 402 | case SurfaceTarget::TextureCubeArray: | ||
| 403 | return GL_TEXTURE_CUBE_MAP_ARRAY; | ||
| 404 | } | ||
| 405 | LOG_CRITICAL(Render_OpenGL, "Unimplemented texture target={}", static_cast<u32>(target)); | ||
| 406 | UNREACHABLE(); | ||
| 407 | return {}; | ||
| 408 | } | ||
| 409 | |||
| 410 | static const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType component_type) { | ||
| 411 | ASSERT(static_cast<std::size_t>(pixel_format) < tex_format_tuples.size()); | ||
| 412 | auto& format = tex_format_tuples[static_cast<unsigned int>(pixel_format)]; | ||
| 413 | ASSERT(component_type == format.component_type); | ||
| 414 | |||
| 415 | return format; | ||
| 416 | } | ||
| 417 | |||
| 418 | /// Returns the discrepant array target | ||
| 419 | constexpr GLenum GetArrayDiscrepantTarget(SurfaceTarget target) { | ||
| 420 | switch (target) { | ||
| 421 | case SurfaceTarget::Texture1D: | ||
| 422 | return GL_TEXTURE_1D_ARRAY; | ||
| 423 | case SurfaceTarget::Texture2D: | ||
| 424 | return GL_TEXTURE_2D_ARRAY; | ||
| 425 | case SurfaceTarget::Texture3D: | ||
| 426 | return GL_NONE; | ||
| 427 | case SurfaceTarget::Texture1DArray: | ||
| 428 | return GL_TEXTURE_1D; | ||
| 429 | case SurfaceTarget::Texture2DArray: | ||
| 430 | return GL_TEXTURE_2D; | ||
| 431 | case SurfaceTarget::TextureCubemap: | ||
| 432 | return GL_TEXTURE_CUBE_MAP_ARRAY; | ||
| 433 | case SurfaceTarget::TextureCubeArray: | ||
| 434 | return GL_TEXTURE_CUBE_MAP; | ||
| 435 | } | ||
| 436 | return GL_NONE; | ||
| 437 | } | ||
| 438 | |||
| 439 | Common::Rectangle<u32> SurfaceParams::GetRect(u32 mip_level) const { | ||
| 440 | u32 actual_height{std::max(1U, unaligned_height >> mip_level)}; | ||
| 441 | if (IsPixelFormatASTC(pixel_format)) { | ||
| 442 | // ASTC formats must stop at the ATSC block size boundary | ||
| 443 | actual_height = Common::AlignDown(actual_height, GetASTCBlockSize(pixel_format).second); | ||
| 444 | } | ||
| 445 | return {0, actual_height, MipWidth(mip_level), 0}; | ||
| 446 | } | ||
| 447 | |||
| 448 | void SwizzleFunc(const MortonSwizzleMode& mode, const SurfaceParams& params, | ||
| 449 | std::vector<u8>& gl_buffer, u32 mip_level) { | ||
| 450 | u32 depth = params.MipDepth(mip_level); | ||
| 451 | if (params.target == SurfaceTarget::Texture2D) { | ||
| 452 | // TODO(Blinkhawk): Eliminate this condition once all texture types are implemented. | ||
| 453 | depth = 1U; | ||
| 454 | } | ||
| 455 | if (params.is_layered) { | ||
| 456 | u64 offset = params.GetMipmapLevelOffset(mip_level); | ||
| 457 | u64 offset_gl = 0; | ||
| 458 | const u64 layer_size = params.LayerMemorySize(); | ||
| 459 | const u64 gl_size = params.LayerSizeGL(mip_level); | ||
| 460 | for (u32 i = 0; i < params.depth; i++) { | ||
| 461 | MortonSwizzle(mode, params.pixel_format, params.MipWidth(mip_level), | ||
| 462 | params.MipBlockHeight(mip_level), params.MipHeight(mip_level), | ||
| 463 | params.MipBlockDepth(mip_level), 1, params.tile_width_spacing, | ||
| 464 | gl_buffer.data() + offset_gl, params.host_ptr + offset); | ||
| 465 | offset += layer_size; | ||
| 466 | offset_gl += gl_size; | ||
| 467 | } | ||
| 468 | } else { | ||
| 469 | const u64 offset = params.GetMipmapLevelOffset(mip_level); | ||
| 470 | MortonSwizzle(mode, params.pixel_format, params.MipWidth(mip_level), | ||
| 471 | params.MipBlockHeight(mip_level), params.MipHeight(mip_level), | ||
| 472 | params.MipBlockDepth(mip_level), depth, params.tile_width_spacing, | ||
| 473 | gl_buffer.data(), params.host_ptr + offset); | ||
| 474 | } | ||
| 475 | } | ||
| 476 | |||
| 477 | void RasterizerCacheOpenGL::FastCopySurface(const Surface& src_surface, | ||
| 478 | const Surface& dst_surface) { | ||
| 479 | const auto& src_params{src_surface->GetSurfaceParams()}; | ||
| 480 | const auto& dst_params{dst_surface->GetSurfaceParams()}; | ||
| 481 | |||
| 482 | const u32 width{std::min(src_params.width, dst_params.width)}; | ||
| 483 | const u32 height{std::min(src_params.height, dst_params.height)}; | ||
| 484 | |||
| 485 | glCopyImageSubData(src_surface->Texture().handle, SurfaceTargetToGL(src_params.target), 0, 0, 0, | ||
| 486 | 0, dst_surface->Texture().handle, SurfaceTargetToGL(dst_params.target), 0, 0, | ||
| 487 | 0, 0, width, height, 1); | ||
| 488 | |||
| 489 | dst_surface->MarkAsModified(true, *this); | ||
| 490 | } | ||
| 491 | |||
| 492 | MICROPROFILE_DEFINE(OpenGL_CopySurface, "OpenGL", "CopySurface", MP_RGB(128, 192, 64)); | ||
| 493 | void RasterizerCacheOpenGL::CopySurface(const Surface& src_surface, const Surface& dst_surface, | ||
| 494 | const GLuint copy_pbo_handle, const GLenum src_attachment, | ||
| 495 | const GLenum dst_attachment, | ||
| 496 | const std::size_t cubemap_face) { | ||
| 497 | MICROPROFILE_SCOPE(OpenGL_CopySurface); | ||
| 498 | ASSERT_MSG(dst_attachment == 0, "Unimplemented"); | ||
| 499 | |||
| 500 | const auto& src_params{src_surface->GetSurfaceParams()}; | ||
| 501 | const auto& dst_params{dst_surface->GetSurfaceParams()}; | ||
| 502 | |||
| 503 | const auto source_format = GetFormatTuple(src_params.pixel_format, src_params.component_type); | ||
| 504 | const auto dest_format = GetFormatTuple(dst_params.pixel_format, dst_params.component_type); | ||
| 505 | |||
| 506 | const std::size_t buffer_size = std::max(src_params.size_in_bytes, dst_params.size_in_bytes); | ||
| 507 | |||
| 508 | glBindBuffer(GL_PIXEL_PACK_BUFFER, copy_pbo_handle); | ||
| 509 | glBufferData(GL_PIXEL_PACK_BUFFER, buffer_size, nullptr, GL_STREAM_COPY); | ||
| 510 | if (source_format.compressed) { | ||
| 511 | glGetCompressedTextureImage(src_surface->Texture().handle, src_attachment, | ||
| 512 | static_cast<GLsizei>(src_params.size_in_bytes), nullptr); | ||
| 513 | } else { | ||
| 514 | glGetTextureImage(src_surface->Texture().handle, src_attachment, source_format.format, | ||
| 515 | source_format.type, static_cast<GLsizei>(src_params.size_in_bytes), | ||
| 516 | nullptr); | ||
| 517 | } | ||
| 518 | // If the new texture is bigger than the previous one, we need to fill in the rest with data | ||
| 519 | // from the CPU. | ||
| 520 | if (src_params.size_in_bytes < dst_params.size_in_bytes) { | ||
| 521 | // Upload the rest of the memory. | ||
| 522 | if (dst_params.is_tiled) { | ||
| 523 | // TODO(Subv): We might have to de-tile the subtexture and re-tile it with the rest | ||
| 524 | // of the data in this case. Games like Super Mario Odyssey seem to hit this case | ||
| 525 | // when drawing, it re-uses the memory of a previous texture as a bigger framebuffer | ||
| 526 | // but it doesn't clear it beforehand, the texture is already full of zeros. | ||
| 527 | LOG_DEBUG(HW_GPU, "Trying to upload extra texture data from the CPU during " | ||
| 528 | "reinterpretation but the texture is tiled."); | ||
| 529 | } | ||
| 530 | const std::size_t remaining_size = dst_params.size_in_bytes - src_params.size_in_bytes; | ||
| 531 | auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()}; | ||
| 532 | glBufferSubData(GL_PIXEL_PACK_BUFFER, src_params.size_in_bytes, remaining_size, | ||
| 533 | memory_manager.GetPointer(dst_params.gpu_addr + src_params.size_in_bytes)); | ||
| 534 | } | ||
| 535 | |||
| 536 | glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); | ||
| 537 | |||
| 538 | const GLsizei width{static_cast<GLsizei>( | ||
| 539 | std::min(src_params.GetRect().GetWidth(), dst_params.GetRect().GetWidth()))}; | ||
| 540 | const GLsizei height{static_cast<GLsizei>( | ||
| 541 | std::min(src_params.GetRect().GetHeight(), dst_params.GetRect().GetHeight()))}; | ||
| 542 | |||
| 543 | glBindBuffer(GL_PIXEL_UNPACK_BUFFER, copy_pbo_handle); | ||
| 544 | if (dest_format.compressed) { | ||
| 545 | LOG_CRITICAL(HW_GPU, "Compressed copy is unimplemented!"); | ||
| 546 | UNREACHABLE(); | ||
| 547 | } else { | ||
| 548 | switch (dst_params.target) { | ||
| 549 | case SurfaceTarget::Texture1D: | ||
| 550 | glTextureSubImage1D(dst_surface->Texture().handle, 0, 0, width, dest_format.format, | ||
| 551 | dest_format.type, nullptr); | ||
| 552 | break; | ||
| 553 | case SurfaceTarget::Texture2D: | ||
| 554 | glTextureSubImage2D(dst_surface->Texture().handle, 0, 0, 0, width, height, | ||
| 555 | dest_format.format, dest_format.type, nullptr); | ||
| 556 | break; | ||
| 557 | case SurfaceTarget::Texture3D: | ||
| 558 | case SurfaceTarget::Texture2DArray: | ||
| 559 | case SurfaceTarget::TextureCubeArray: | ||
| 560 | glTextureSubImage3D(dst_surface->Texture().handle, 0, 0, 0, 0, width, height, | ||
| 561 | static_cast<GLsizei>(dst_params.depth), dest_format.format, | ||
| 562 | dest_format.type, nullptr); | ||
| 563 | break; | ||
| 564 | case SurfaceTarget::TextureCubemap: | ||
| 565 | glTextureSubImage3D(dst_surface->Texture().handle, 0, 0, 0, | ||
| 566 | static_cast<GLint>(cubemap_face), width, height, 1, | ||
| 567 | dest_format.format, dest_format.type, nullptr); | ||
| 568 | break; | ||
| 569 | default: | ||
| 570 | LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}", | ||
| 571 | static_cast<u32>(dst_params.target)); | ||
| 572 | UNREACHABLE(); | ||
| 573 | } | ||
| 574 | glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); | ||
| 575 | } | ||
| 576 | |||
| 577 | dst_surface->MarkAsModified(true, *this); | ||
| 578 | } | ||
| 579 | |||
| 580 | CachedSurface::CachedSurface(const SurfaceParams& params) | ||
| 581 | : RasterizerCacheObject{params.host_ptr}, params{params}, | ||
| 582 | gl_target{SurfaceTargetToGL(params.target)}, cached_size_in_bytes{params.size_in_bytes} { | ||
| 583 | |||
| 584 | const auto optional_cpu_addr{ | ||
| 585 | Core::System::GetInstance().GPU().MemoryManager().GpuToCpuAddress(params.gpu_addr)}; | ||
| 586 | ASSERT_MSG(optional_cpu_addr, "optional_cpu_addr is invalid"); | ||
| 587 | cpu_addr = *optional_cpu_addr; | ||
| 588 | |||
| 589 | texture.Create(gl_target); | ||
| 590 | |||
| 591 | // TODO(Rodrigo): Using params.GetRect() returns a different size than using its Mip*(0) | ||
| 592 | // alternatives. This signals a bug on those functions. | ||
| 593 | const auto width = static_cast<GLsizei>(params.MipWidth(0)); | ||
| 594 | const auto height = static_cast<GLsizei>(params.MipHeight(0)); | ||
| 595 | memory_size = params.MemorySize(); | ||
| 596 | reinterpreted = false; | ||
| 597 | |||
| 598 | const auto& format_tuple = GetFormatTuple(params.pixel_format, params.component_type); | ||
| 599 | gl_internal_format = format_tuple.internal_format; | ||
| 600 | |||
| 601 | switch (params.target) { | ||
| 602 | case SurfaceTarget::Texture1D: | ||
| 603 | glTextureStorage1D(texture.handle, params.max_mip_level, format_tuple.internal_format, | ||
| 604 | width); | ||
| 605 | break; | ||
| 606 | case SurfaceTarget::Texture2D: | ||
| 607 | case SurfaceTarget::TextureCubemap: | ||
| 608 | glTextureStorage2D(texture.handle, params.max_mip_level, format_tuple.internal_format, | ||
| 609 | width, height); | ||
| 610 | break; | ||
| 611 | case SurfaceTarget::Texture3D: | ||
| 612 | case SurfaceTarget::Texture2DArray: | ||
| 613 | case SurfaceTarget::TextureCubeArray: | ||
| 614 | glTextureStorage3D(texture.handle, params.max_mip_level, format_tuple.internal_format, | ||
| 615 | width, height, params.depth); | ||
| 616 | break; | ||
| 617 | default: | ||
| 618 | LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}", | ||
| 619 | static_cast<u32>(params.target)); | ||
| 620 | UNREACHABLE(); | ||
| 621 | glTextureStorage2D(texture.handle, params.max_mip_level, format_tuple.internal_format, | ||
| 622 | width, height); | ||
| 623 | } | ||
| 624 | |||
| 625 | ApplyTextureDefaults(texture.handle, params.max_mip_level); | ||
| 626 | |||
| 627 | OpenGL::LabelGLObject(GL_TEXTURE, texture.handle, params.gpu_addr, params.IdentityString()); | ||
| 628 | } | ||
| 629 | |||
| 630 | MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 192, 64)); | ||
| 631 | void CachedSurface::LoadGLBuffer(RasterizerTemporaryMemory& res_cache_tmp_mem) { | ||
| 632 | MICROPROFILE_SCOPE(OpenGL_SurfaceLoad); | ||
| 633 | auto& gl_buffer = res_cache_tmp_mem.gl_buffer; | ||
| 634 | if (gl_buffer.size() < params.max_mip_level) | ||
| 635 | gl_buffer.resize(params.max_mip_level); | ||
| 636 | for (u32 i = 0; i < params.max_mip_level; i++) | ||
| 637 | gl_buffer[i].resize(params.GetMipmapSizeGL(i)); | ||
| 638 | if (params.is_tiled) { | ||
| 639 | ASSERT_MSG(params.block_width == 1, "Block width is defined as {} on texture type {}", | ||
| 640 | params.block_width, static_cast<u32>(params.target)); | ||
| 641 | for (u32 i = 0; i < params.max_mip_level; i++) | ||
| 642 | SwizzleFunc(MortonSwizzleMode::MortonToLinear, params, gl_buffer[i], i); | ||
| 643 | } else { | ||
| 644 | const u32 bpp = params.GetFormatBpp() / 8; | ||
| 645 | const u32 copy_size = (params.width * bpp + GetDefaultBlockWidth(params.pixel_format) - 1) / | ||
| 646 | GetDefaultBlockWidth(params.pixel_format); | ||
| 647 | if (params.pitch == copy_size) { | ||
| 648 | std::memcpy(gl_buffer[0].data(), params.host_ptr, params.size_in_bytes_gl); | ||
| 649 | } else { | ||
| 650 | const u32 height = (params.height + GetDefaultBlockHeight(params.pixel_format) - 1) / | ||
| 651 | GetDefaultBlockHeight(params.pixel_format); | ||
| 652 | const u8* start{params.host_ptr}; | ||
| 653 | u8* write_to = gl_buffer[0].data(); | ||
| 654 | for (u32 h = height; h > 0; h--) { | ||
| 655 | std::memcpy(write_to, start, copy_size); | ||
| 656 | start += params.pitch; | ||
| 657 | write_to += copy_size; | ||
| 658 | } | ||
| 659 | } | ||
| 660 | } | ||
| 661 | for (u32 i = 0; i < params.max_mip_level; i++) { | ||
| 662 | const u32 width = params.MipWidth(i); | ||
| 663 | const u32 height = params.MipHeight(i); | ||
| 664 | const u32 depth = params.MipDepth(i); | ||
| 665 | if (VideoCore::Surface::IsPixelFormatASTC(params.pixel_format)) { | ||
| 666 | // Reserve size for RGBA8 conversion | ||
| 667 | constexpr std::size_t rgba_bpp = 4; | ||
| 668 | gl_buffer[i].resize(std::max(gl_buffer[i].size(), width * height * depth * rgba_bpp)); | ||
| 669 | } | ||
| 670 | Tegra::Texture::ConvertFromGuestToHost(gl_buffer[i].data(), params.pixel_format, width, | ||
| 671 | height, depth, true, true); | ||
| 672 | } | ||
| 673 | } | ||
| 674 | |||
| 675 | MICROPROFILE_DEFINE(OpenGL_SurfaceFlush, "OpenGL", "Surface Flush", MP_RGB(128, 192, 64)); | ||
| 676 | void CachedSurface::FlushGLBuffer(RasterizerTemporaryMemory& res_cache_tmp_mem) { | ||
| 677 | MICROPROFILE_SCOPE(OpenGL_SurfaceFlush); | ||
| 678 | |||
| 679 | ASSERT_MSG(!IsPixelFormatASTC(params.pixel_format), "Unimplemented"); | ||
| 680 | |||
| 681 | auto& gl_buffer = res_cache_tmp_mem.gl_buffer; | ||
| 682 | // OpenGL temporary buffer needs to be big enough to store raw texture size | ||
| 683 | gl_buffer[0].resize(GetSizeInBytes()); | ||
| 684 | |||
| 685 | const FormatTuple& tuple = GetFormatTuple(params.pixel_format, params.component_type); | ||
| 686 | const u32 align = std::clamp(params.RowAlign(0), 1U, 8U); | ||
| 687 | glPixelStorei(GL_PACK_ALIGNMENT, align); | ||
| 688 | glPixelStorei(GL_PACK_ROW_LENGTH, static_cast<GLint>(params.width)); | ||
| 689 | ASSERT(!tuple.compressed); | ||
| 690 | glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); | ||
| 691 | glGetTextureImage(texture.handle, 0, tuple.format, tuple.type, | ||
| 692 | static_cast<GLsizei>(gl_buffer[0].size()), gl_buffer[0].data()); | ||
| 693 | glPixelStorei(GL_PACK_ROW_LENGTH, 0); | ||
| 694 | Tegra::Texture::ConvertFromHostToGuest(gl_buffer[0].data(), params.pixel_format, params.width, | ||
| 695 | params.height, params.depth, true, true); | ||
| 696 | if (params.is_tiled) { | ||
| 697 | ASSERT_MSG(params.block_width == 1, "Block width is defined as {} on texture type {}", | ||
| 698 | params.block_width, static_cast<u32>(params.target)); | ||
| 699 | |||
| 700 | SwizzleFunc(MortonSwizzleMode::LinearToMorton, params, gl_buffer[0], 0); | ||
| 701 | } else { | ||
| 702 | const u32 bpp = params.GetFormatBpp() / 8; | ||
| 703 | const u32 copy_size = params.width * bpp; | ||
| 704 | if (params.pitch == copy_size) { | ||
| 705 | std::memcpy(params.host_ptr, gl_buffer[0].data(), GetSizeInBytes()); | ||
| 706 | } else { | ||
| 707 | u8* start{params.host_ptr}; | ||
| 708 | const u8* read_to = gl_buffer[0].data(); | ||
| 709 | for (u32 h = params.height; h > 0; h--) { | ||
| 710 | std::memcpy(start, read_to, copy_size); | ||
| 711 | start += params.pitch; | ||
| 712 | read_to += copy_size; | ||
| 713 | } | ||
| 714 | } | ||
| 715 | } | ||
| 716 | } | ||
| 717 | |||
| 718 | void CachedSurface::UploadGLMipmapTexture(RasterizerTemporaryMemory& res_cache_tmp_mem, u32 mip_map, | ||
| 719 | GLuint read_fb_handle, GLuint draw_fb_handle) { | ||
| 720 | const auto& rect{params.GetRect(mip_map)}; | ||
| 721 | |||
| 722 | auto& gl_buffer = res_cache_tmp_mem.gl_buffer; | ||
| 723 | |||
| 724 | // Load data from memory to the surface | ||
| 725 | const auto x0 = static_cast<GLint>(rect.left); | ||
| 726 | const auto y0 = static_cast<GLint>(rect.bottom); | ||
| 727 | auto buffer_offset = | ||
| 728 | static_cast<std::size_t>(static_cast<std::size_t>(y0) * params.MipWidth(mip_map) + | ||
| 729 | static_cast<std::size_t>(x0)) * | ||
| 730 | GetBytesPerPixel(params.pixel_format); | ||
| 731 | |||
| 732 | const FormatTuple& tuple = GetFormatTuple(params.pixel_format, params.component_type); | ||
| 733 | |||
| 734 | const u32 align = std::clamp(params.RowAlign(mip_map), 1U, 8U); | ||
| 735 | glPixelStorei(GL_UNPACK_ALIGNMENT, align); | ||
| 736 | glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(params.MipWidth(mip_map))); | ||
| 737 | |||
| 738 | const auto image_size = static_cast<GLsizei>(params.GetMipmapSizeGL(mip_map, false)); | ||
| 739 | if (tuple.compressed) { | ||
| 740 | switch (params.target) { | ||
| 741 | case SurfaceTarget::Texture2D: | ||
| 742 | glCompressedTextureSubImage2D( | ||
| 743 | texture.handle, mip_map, 0, 0, static_cast<GLsizei>(params.MipWidth(mip_map)), | ||
| 744 | static_cast<GLsizei>(params.MipHeight(mip_map)), tuple.internal_format, image_size, | ||
| 745 | &gl_buffer[mip_map][buffer_offset]); | ||
| 746 | break; | ||
| 747 | case SurfaceTarget::Texture3D: | ||
| 748 | glCompressedTextureSubImage3D( | ||
| 749 | texture.handle, mip_map, 0, 0, 0, static_cast<GLsizei>(params.MipWidth(mip_map)), | ||
| 750 | static_cast<GLsizei>(params.MipHeight(mip_map)), | ||
| 751 | static_cast<GLsizei>(params.MipDepth(mip_map)), tuple.internal_format, image_size, | ||
| 752 | &gl_buffer[mip_map][buffer_offset]); | ||
| 753 | break; | ||
| 754 | case SurfaceTarget::Texture2DArray: | ||
| 755 | case SurfaceTarget::TextureCubeArray: | ||
| 756 | glCompressedTextureSubImage3D( | ||
| 757 | texture.handle, mip_map, 0, 0, 0, static_cast<GLsizei>(params.MipWidth(mip_map)), | ||
| 758 | static_cast<GLsizei>(params.MipHeight(mip_map)), static_cast<GLsizei>(params.depth), | ||
| 759 | tuple.internal_format, image_size, &gl_buffer[mip_map][buffer_offset]); | ||
| 760 | break; | ||
| 761 | case SurfaceTarget::TextureCubemap: { | ||
| 762 | const auto layer_size = static_cast<GLsizei>(params.LayerSizeGL(mip_map)); | ||
| 763 | for (std::size_t face = 0; face < params.depth; ++face) { | ||
| 764 | glCompressedTextureSubImage3D( | ||
| 765 | texture.handle, mip_map, 0, 0, static_cast<GLint>(face), | ||
| 766 | static_cast<GLsizei>(params.MipWidth(mip_map)), | ||
| 767 | static_cast<GLsizei>(params.MipHeight(mip_map)), 1, tuple.internal_format, | ||
| 768 | layer_size, &gl_buffer[mip_map][buffer_offset]); | ||
| 769 | buffer_offset += layer_size; | ||
| 770 | } | ||
| 771 | break; | ||
| 772 | } | ||
| 773 | default: | ||
| 774 | LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}", | ||
| 775 | static_cast<u32>(params.target)); | ||
| 776 | UNREACHABLE(); | ||
| 777 | glCompressedTextureSubImage2D( | ||
| 778 | texture.handle, mip_map, 0, 0, static_cast<GLsizei>(params.MipWidth(mip_map)), | ||
| 779 | static_cast<GLsizei>(params.MipHeight(mip_map)), tuple.internal_format, | ||
| 780 | static_cast<GLsizei>(params.size_in_bytes_gl), &gl_buffer[mip_map][buffer_offset]); | ||
| 781 | } | ||
| 782 | } else { | ||
| 783 | switch (params.target) { | ||
| 784 | case SurfaceTarget::Texture1D: | ||
| 785 | glTextureSubImage1D(texture.handle, mip_map, x0, static_cast<GLsizei>(rect.GetWidth()), | ||
| 786 | tuple.format, tuple.type, &gl_buffer[mip_map][buffer_offset]); | ||
| 787 | break; | ||
| 788 | case SurfaceTarget::Texture2D: | ||
| 789 | glTextureSubImage2D(texture.handle, mip_map, x0, y0, | ||
| 790 | static_cast<GLsizei>(rect.GetWidth()), | ||
| 791 | static_cast<GLsizei>(rect.GetHeight()), tuple.format, tuple.type, | ||
| 792 | &gl_buffer[mip_map][buffer_offset]); | ||
| 793 | break; | ||
| 794 | case SurfaceTarget::Texture3D: | ||
| 795 | glTextureSubImage3D(texture.handle, mip_map, x0, y0, 0, | ||
| 796 | static_cast<GLsizei>(rect.GetWidth()), | ||
| 797 | static_cast<GLsizei>(rect.GetHeight()), params.MipDepth(mip_map), | ||
| 798 | tuple.format, tuple.type, &gl_buffer[mip_map][buffer_offset]); | ||
| 799 | break; | ||
| 800 | case SurfaceTarget::Texture2DArray: | ||
| 801 | case SurfaceTarget::TextureCubeArray: | ||
| 802 | glTextureSubImage3D(texture.handle, mip_map, x0, y0, 0, | ||
| 803 | static_cast<GLsizei>(rect.GetWidth()), | ||
| 804 | static_cast<GLsizei>(rect.GetHeight()), params.depth, tuple.format, | ||
| 805 | tuple.type, &gl_buffer[mip_map][buffer_offset]); | ||
| 806 | break; | ||
| 807 | case SurfaceTarget::TextureCubemap: { | ||
| 808 | for (std::size_t face = 0; face < params.depth; ++face) { | ||
| 809 | glTextureSubImage3D(texture.handle, mip_map, x0, y0, static_cast<GLint>(face), | ||
| 810 | static_cast<GLsizei>(rect.GetWidth()), | ||
| 811 | static_cast<GLsizei>(rect.GetHeight()), 1, tuple.format, | ||
| 812 | tuple.type, &gl_buffer[mip_map][buffer_offset]); | ||
| 813 | buffer_offset += params.LayerSizeGL(mip_map); | ||
| 814 | } | ||
| 815 | break; | ||
| 816 | } | ||
| 817 | default: | ||
| 818 | LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}", | ||
| 819 | static_cast<u32>(params.target)); | ||
| 820 | UNREACHABLE(); | ||
| 821 | glTextureSubImage2D(texture.handle, mip_map, x0, y0, | ||
| 822 | static_cast<GLsizei>(rect.GetWidth()), | ||
| 823 | static_cast<GLsizei>(rect.GetHeight()), tuple.format, tuple.type, | ||
| 824 | &gl_buffer[mip_map][buffer_offset]); | ||
| 825 | } | ||
| 826 | } | ||
| 827 | |||
| 828 | glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); | ||
| 829 | } | ||
| 830 | |||
| 831 | void CachedSurface::EnsureTextureDiscrepantView() { | ||
| 832 | if (discrepant_view.handle != 0) | ||
| 833 | return; | ||
| 834 | |||
| 835 | const GLenum target{GetArrayDiscrepantTarget(params.target)}; | ||
| 836 | ASSERT(target != GL_NONE); | ||
| 837 | |||
| 838 | const GLuint num_layers{target == GL_TEXTURE_CUBE_MAP_ARRAY ? 6u : 1u}; | ||
| 839 | constexpr GLuint min_layer = 0; | ||
| 840 | constexpr GLuint min_level = 0; | ||
| 841 | |||
| 842 | glGenTextures(1, &discrepant_view.handle); | ||
| 843 | glTextureView(discrepant_view.handle, target, texture.handle, gl_internal_format, min_level, | ||
| 844 | params.max_mip_level, min_layer, num_layers); | ||
| 845 | ApplyTextureDefaults(discrepant_view.handle, params.max_mip_level); | ||
| 846 | glTextureParameteriv(discrepant_view.handle, GL_TEXTURE_SWIZZLE_RGBA, | ||
| 847 | reinterpret_cast<const GLint*>(swizzle.data())); | ||
| 848 | } | ||
| 849 | |||
| 850 | MICROPROFILE_DEFINE(OpenGL_TextureUL, "OpenGL", "Texture Upload", MP_RGB(128, 192, 64)); | ||
| 851 | void CachedSurface::UploadGLTexture(RasterizerTemporaryMemory& res_cache_tmp_mem, | ||
| 852 | GLuint read_fb_handle, GLuint draw_fb_handle) { | ||
| 853 | MICROPROFILE_SCOPE(OpenGL_TextureUL); | ||
| 854 | |||
| 855 | for (u32 i = 0; i < params.max_mip_level; i++) | ||
| 856 | UploadGLMipmapTexture(res_cache_tmp_mem, i, read_fb_handle, draw_fb_handle); | ||
| 857 | } | ||
| 858 | |||
| 859 | void CachedSurface::UpdateSwizzle(Tegra::Texture::SwizzleSource swizzle_x, | ||
| 860 | Tegra::Texture::SwizzleSource swizzle_y, | ||
| 861 | Tegra::Texture::SwizzleSource swizzle_z, | ||
| 862 | Tegra::Texture::SwizzleSource swizzle_w) { | ||
| 863 | const GLenum new_x = MaxwellToGL::SwizzleSource(swizzle_x); | ||
| 864 | const GLenum new_y = MaxwellToGL::SwizzleSource(swizzle_y); | ||
| 865 | const GLenum new_z = MaxwellToGL::SwizzleSource(swizzle_z); | ||
| 866 | const GLenum new_w = MaxwellToGL::SwizzleSource(swizzle_w); | ||
| 867 | if (swizzle[0] == new_x && swizzle[1] == new_y && swizzle[2] == new_z && swizzle[3] == new_w) { | ||
| 868 | return; | ||
| 869 | } | ||
| 870 | swizzle = {new_x, new_y, new_z, new_w}; | ||
| 871 | const auto swizzle_data = reinterpret_cast<const GLint*>(swizzle.data()); | ||
| 872 | glTextureParameteriv(texture.handle, GL_TEXTURE_SWIZZLE_RGBA, swizzle_data); | ||
| 873 | if (discrepant_view.handle != 0) { | ||
| 874 | glTextureParameteriv(discrepant_view.handle, GL_TEXTURE_SWIZZLE_RGBA, swizzle_data); | ||
| 875 | } | ||
| 876 | } | ||
| 877 | |||
| 878 | RasterizerCacheOpenGL::RasterizerCacheOpenGL(RasterizerOpenGL& rasterizer) | ||
| 879 | : RasterizerCache{rasterizer} { | ||
| 880 | read_framebuffer.Create(); | ||
| 881 | draw_framebuffer.Create(); | ||
| 882 | copy_pbo.Create(); | ||
| 883 | } | ||
| 884 | |||
| 885 | Surface RasterizerCacheOpenGL::GetTextureSurface(const Tegra::Texture::FullTextureInfo& config, | ||
| 886 | const GLShader::SamplerEntry& entry) { | ||
| 887 | return GetSurface(SurfaceParams::CreateForTexture(config, entry)); | ||
| 888 | } | ||
| 889 | |||
| 890 | Surface RasterizerCacheOpenGL::GetDepthBufferSurface(bool preserve_contents) { | ||
| 891 | auto& gpu{Core::System::GetInstance().GPU().Maxwell3D()}; | ||
| 892 | const auto& regs{gpu.regs}; | ||
| 893 | |||
| 894 | if (!gpu.dirty_flags.zeta_buffer) { | ||
| 895 | return last_depth_buffer; | ||
| 896 | } | ||
| 897 | gpu.dirty_flags.zeta_buffer = false; | ||
| 898 | |||
| 899 | if (!regs.zeta.Address() || !regs.zeta_enable) { | ||
| 900 | return last_depth_buffer = {}; | ||
| 901 | } | ||
| 902 | |||
| 903 | SurfaceParams depth_params{SurfaceParams::CreateForDepthBuffer( | ||
| 904 | regs.zeta_width, regs.zeta_height, regs.zeta.Address(), regs.zeta.format, | ||
| 905 | regs.zeta.memory_layout.block_width, regs.zeta.memory_layout.block_height, | ||
| 906 | regs.zeta.memory_layout.block_depth, regs.zeta.memory_layout.type)}; | ||
| 907 | |||
| 908 | return last_depth_buffer = GetSurface(depth_params, preserve_contents); | ||
| 909 | } | ||
| 910 | |||
| 911 | Surface RasterizerCacheOpenGL::GetColorBufferSurface(std::size_t index, bool preserve_contents) { | ||
| 912 | auto& gpu{Core::System::GetInstance().GPU().Maxwell3D()}; | ||
| 913 | const auto& regs{gpu.regs}; | ||
| 914 | |||
| 915 | if (!gpu.dirty_flags.color_buffer[index]) { | ||
| 916 | return current_color_buffers[index]; | ||
| 917 | } | ||
| 918 | gpu.dirty_flags.color_buffer.reset(index); | ||
| 919 | |||
| 920 | ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets); | ||
| 921 | |||
| 922 | if (index >= regs.rt_control.count) { | ||
| 923 | return current_color_buffers[index] = {}; | ||
| 924 | } | ||
| 925 | |||
| 926 | if (regs.rt[index].Address() == 0 || regs.rt[index].format == Tegra::RenderTargetFormat::NONE) { | ||
| 927 | return current_color_buffers[index] = {}; | ||
| 928 | } | ||
| 929 | |||
| 930 | const SurfaceParams color_params{SurfaceParams::CreateForFramebuffer(index)}; | ||
| 931 | |||
| 932 | return current_color_buffers[index] = GetSurface(color_params, preserve_contents); | ||
| 933 | } | ||
| 934 | |||
| 935 | void RasterizerCacheOpenGL::LoadSurface(const Surface& surface) { | ||
| 936 | surface->LoadGLBuffer(temporal_memory); | ||
| 937 | surface->UploadGLTexture(temporal_memory, read_framebuffer.handle, draw_framebuffer.handle); | ||
| 938 | surface->MarkAsModified(false, *this); | ||
| 939 | surface->MarkForReload(false); | ||
| 940 | } | ||
| 941 | |||
| 942 | Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, bool preserve_contents) { | ||
| 943 | if (!params.IsValid()) { | ||
| 944 | return {}; | ||
| 945 | } | ||
| 946 | |||
| 947 | // Look up surface in the cache based on address | ||
| 948 | Surface surface{TryGet(params.host_ptr)}; | ||
| 949 | if (surface) { | ||
| 950 | if (surface->GetSurfaceParams().IsCompatibleSurface(params)) { | ||
| 951 | // Use the cached surface as-is unless it's not synced with memory | ||
| 952 | if (surface->MustReload()) | ||
| 953 | LoadSurface(surface); | ||
| 954 | return surface; | ||
| 955 | } else if (preserve_contents) { | ||
| 956 | // If surface parameters changed and we care about keeping the previous data, recreate | ||
| 957 | // the surface from the old one | ||
| 958 | Surface new_surface{RecreateSurface(surface, params)}; | ||
| 959 | Unregister(surface); | ||
| 960 | Register(new_surface); | ||
| 961 | if (new_surface->IsUploaded()) { | ||
| 962 | RegisterReinterpretSurface(new_surface); | ||
| 963 | } | ||
| 964 | return new_surface; | ||
| 965 | } else { | ||
| 966 | // Delete the old surface before creating a new one to prevent collisions. | ||
| 967 | Unregister(surface); | ||
| 968 | } | ||
| 969 | } | ||
| 970 | |||
| 971 | // No cached surface found - get a new one | ||
| 972 | surface = GetUncachedSurface(params); | ||
| 973 | Register(surface); | ||
| 974 | |||
| 975 | // Only load surface from memory if we care about the contents | ||
| 976 | if (preserve_contents) { | ||
| 977 | LoadSurface(surface); | ||
| 978 | } | ||
| 979 | |||
| 980 | return surface; | ||
| 981 | } | ||
| 982 | |||
| 983 | Surface RasterizerCacheOpenGL::GetUncachedSurface(const SurfaceParams& params) { | ||
| 984 | Surface surface{TryGetReservedSurface(params)}; | ||
| 985 | if (!surface) { | ||
| 986 | // No reserved surface available, create a new one and reserve it | ||
| 987 | surface = std::make_shared<CachedSurface>(params); | ||
| 988 | ReserveSurface(surface); | ||
| 989 | } | ||
| 990 | return surface; | ||
| 991 | } | ||
| 992 | |||
| 993 | void RasterizerCacheOpenGL::FastLayeredCopySurface(const Surface& src_surface, | ||
| 994 | const Surface& dst_surface) { | ||
| 995 | const auto& init_params{src_surface->GetSurfaceParams()}; | ||
| 996 | const auto& dst_params{dst_surface->GetSurfaceParams()}; | ||
| 997 | auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()}; | ||
| 998 | GPUVAddr address{init_params.gpu_addr}; | ||
| 999 | const std::size_t layer_size{dst_params.LayerMemorySize()}; | ||
| 1000 | for (u32 layer = 0; layer < dst_params.depth; layer++) { | ||
| 1001 | for (u32 mipmap = 0; mipmap < dst_params.max_mip_level; mipmap++) { | ||
| 1002 | const GPUVAddr sub_address{address + dst_params.GetMipmapLevelOffset(mipmap)}; | ||
| 1003 | const Surface& copy{TryGet(memory_manager.GetPointer(sub_address))}; | ||
| 1004 | if (!copy) { | ||
| 1005 | continue; | ||
| 1006 | } | ||
| 1007 | const auto& src_params{copy->GetSurfaceParams()}; | ||
| 1008 | const u32 width{std::min(src_params.width, dst_params.MipWidth(mipmap))}; | ||
| 1009 | const u32 height{std::min(src_params.height, dst_params.MipHeight(mipmap))}; | ||
| 1010 | |||
| 1011 | glCopyImageSubData(copy->Texture().handle, SurfaceTargetToGL(src_params.target), 0, 0, | ||
| 1012 | 0, 0, dst_surface->Texture().handle, | ||
| 1013 | SurfaceTargetToGL(dst_params.target), mipmap, 0, 0, layer, width, | ||
| 1014 | height, 1); | ||
| 1015 | } | ||
| 1016 | address += layer_size; | ||
| 1017 | } | ||
| 1018 | |||
| 1019 | dst_surface->MarkAsModified(true, *this); | ||
| 1020 | } | ||
| 1021 | |||
| 1022 | static bool BlitSurface(const Surface& src_surface, const Surface& dst_surface, | ||
| 1023 | const Common::Rectangle<u32>& src_rect, | ||
| 1024 | const Common::Rectangle<u32>& dst_rect, GLuint read_fb_handle, | ||
| 1025 | GLuint draw_fb_handle, GLenum src_attachment = 0, GLenum dst_attachment = 0, | ||
| 1026 | std::size_t cubemap_face = 0) { | ||
| 1027 | |||
| 1028 | const auto& src_params{src_surface->GetSurfaceParams()}; | ||
| 1029 | const auto& dst_params{dst_surface->GetSurfaceParams()}; | ||
| 1030 | |||
| 1031 | OpenGLState prev_state{OpenGLState::GetCurState()}; | ||
| 1032 | SCOPE_EXIT({ prev_state.Apply(); }); | ||
| 1033 | |||
| 1034 | OpenGLState state; | ||
| 1035 | state.draw.read_framebuffer = read_fb_handle; | ||
| 1036 | state.draw.draw_framebuffer = draw_fb_handle; | ||
| 1037 | state.Apply(); | ||
| 1038 | |||
| 1039 | u32 buffers{}; | ||
| 1040 | |||
| 1041 | if (src_params.type == SurfaceType::ColorTexture) { | ||
| 1042 | switch (src_params.target) { | ||
| 1043 | case SurfaceTarget::Texture2D: | ||
| 1044 | glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment, | ||
| 1045 | GL_TEXTURE_2D, src_surface->Texture().handle, 0); | ||
| 1046 | glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, | ||
| 1047 | 0, 0); | ||
| 1048 | break; | ||
| 1049 | case SurfaceTarget::TextureCubemap: | ||
| 1050 | glFramebufferTexture2D( | ||
| 1051 | GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment, | ||
| 1052 | static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + cubemap_face), | ||
| 1053 | src_surface->Texture().handle, 0); | ||
| 1054 | glFramebufferTexture2D( | ||
| 1055 | GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, | ||
| 1056 | static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + cubemap_face), 0, 0); | ||
| 1057 | break; | ||
| 1058 | case SurfaceTarget::Texture2DArray: | ||
| 1059 | glFramebufferTextureLayer(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment, | ||
| 1060 | src_surface->Texture().handle, 0, 0); | ||
| 1061 | glFramebufferTextureLayer(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, 0, 0, 0); | ||
| 1062 | break; | ||
| 1063 | case SurfaceTarget::Texture3D: | ||
| 1064 | glFramebufferTexture3D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment, | ||
| 1065 | SurfaceTargetToGL(src_params.target), | ||
| 1066 | src_surface->Texture().handle, 0, 0); | ||
| 1067 | glFramebufferTexture3D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, | ||
| 1068 | SurfaceTargetToGL(src_params.target), 0, 0, 0); | ||
| 1069 | break; | ||
| 1070 | default: | ||
| 1071 | glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment, | ||
| 1072 | GL_TEXTURE_2D, src_surface->Texture().handle, 0); | ||
| 1073 | glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, | ||
| 1074 | 0, 0); | ||
| 1075 | break; | ||
| 1076 | } | ||
| 1077 | |||
| 1078 | switch (dst_params.target) { | ||
| 1079 | case SurfaceTarget::Texture2D: | ||
| 1080 | glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment, | ||
| 1081 | GL_TEXTURE_2D, dst_surface->Texture().handle, 0); | ||
| 1082 | glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, | ||
| 1083 | 0, 0); | ||
| 1084 | break; | ||
| 1085 | case SurfaceTarget::TextureCubemap: | ||
| 1086 | glFramebufferTexture2D( | ||
| 1087 | GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment, | ||
| 1088 | static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + cubemap_face), | ||
| 1089 | dst_surface->Texture().handle, 0); | ||
| 1090 | glFramebufferTexture2D( | ||
| 1091 | GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, | ||
| 1092 | static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + cubemap_face), 0, 0); | ||
| 1093 | break; | ||
| 1094 | case SurfaceTarget::Texture2DArray: | ||
| 1095 | glFramebufferTextureLayer(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment, | ||
| 1096 | dst_surface->Texture().handle, 0, 0); | ||
| 1097 | glFramebufferTextureLayer(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, 0, 0, 0); | ||
| 1098 | break; | ||
| 1099 | |||
| 1100 | case SurfaceTarget::Texture3D: | ||
| 1101 | glFramebufferTexture3D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment, | ||
| 1102 | SurfaceTargetToGL(dst_params.target), | ||
| 1103 | dst_surface->Texture().handle, 0, 0); | ||
| 1104 | glFramebufferTexture3D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, | ||
| 1105 | SurfaceTargetToGL(dst_params.target), 0, 0, 0); | ||
| 1106 | break; | ||
| 1107 | default: | ||
| 1108 | glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment, | ||
| 1109 | GL_TEXTURE_2D, dst_surface->Texture().handle, 0); | ||
| 1110 | glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, | ||
| 1111 | 0, 0); | ||
| 1112 | break; | ||
| 1113 | } | ||
| 1114 | |||
| 1115 | buffers = GL_COLOR_BUFFER_BIT; | ||
| 1116 | } else if (src_params.type == SurfaceType::Depth) { | ||
| 1117 | glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment, | ||
| 1118 | GL_TEXTURE_2D, 0, 0); | ||
| 1119 | glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, | ||
| 1120 | src_surface->Texture().handle, 0); | ||
| 1121 | glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); | ||
| 1122 | |||
| 1123 | glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment, | ||
| 1124 | GL_TEXTURE_2D, 0, 0); | ||
| 1125 | glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, | ||
| 1126 | dst_surface->Texture().handle, 0); | ||
| 1127 | glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); | ||
| 1128 | |||
| 1129 | buffers = GL_DEPTH_BUFFER_BIT; | ||
| 1130 | } else if (src_params.type == SurfaceType::DepthStencil) { | ||
| 1131 | glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment, | ||
| 1132 | GL_TEXTURE_2D, 0, 0); | ||
| 1133 | glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, | ||
| 1134 | src_surface->Texture().handle, 0); | ||
| 1135 | |||
| 1136 | glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment, | ||
| 1137 | GL_TEXTURE_2D, 0, 0); | ||
| 1138 | glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, | ||
| 1139 | dst_surface->Texture().handle, 0); | ||
| 1140 | |||
| 1141 | buffers = GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT; | ||
| 1142 | } | ||
| 1143 | |||
| 1144 | glBlitFramebuffer(src_rect.left, src_rect.top, src_rect.right, src_rect.bottom, dst_rect.left, | ||
| 1145 | dst_rect.top, dst_rect.right, dst_rect.bottom, buffers, | ||
| 1146 | buffers == GL_COLOR_BUFFER_BIT ? GL_LINEAR : GL_NEAREST); | ||
| 1147 | |||
| 1148 | return true; | ||
| 1149 | } | ||
| 1150 | |||
| 1151 | void RasterizerCacheOpenGL::FermiCopySurface( | ||
| 1152 | const Tegra::Engines::Fermi2D::Regs::Surface& src_config, | ||
| 1153 | const Tegra::Engines::Fermi2D::Regs::Surface& dst_config, | ||
| 1154 | const Common::Rectangle<u32>& src_rect, const Common::Rectangle<u32>& dst_rect) { | ||
| 1155 | |||
| 1156 | const auto& src_params = SurfaceParams::CreateForFermiCopySurface(src_config); | ||
| 1157 | const auto& dst_params = SurfaceParams::CreateForFermiCopySurface(dst_config); | ||
| 1158 | |||
| 1159 | ASSERT(src_params.pixel_format == dst_params.pixel_format); | ||
| 1160 | ASSERT(src_params.block_height == dst_params.block_height); | ||
| 1161 | ASSERT(src_params.is_tiled == dst_params.is_tiled); | ||
| 1162 | ASSERT(src_params.depth == dst_params.depth); | ||
| 1163 | ASSERT(src_params.target == dst_params.target); | ||
| 1164 | ASSERT(src_params.rt.index == dst_params.rt.index); | ||
| 1165 | |||
| 1166 | auto src_surface = GetSurface(src_params, true); | ||
| 1167 | auto dst_surface = GetSurface(dst_params, true); | ||
| 1168 | |||
| 1169 | BlitSurface(src_surface, dst_surface, src_rect, dst_rect, read_framebuffer.handle, | ||
| 1170 | draw_framebuffer.handle); | ||
| 1171 | |||
| 1172 | dst_surface->MarkAsModified(true, *this); | ||
| 1173 | } | ||
| 1174 | |||
| 1175 | void RasterizerCacheOpenGL::AccurateCopySurface(const Surface& src_surface, | ||
| 1176 | const Surface& dst_surface) { | ||
| 1177 | const auto& src_params{src_surface->GetSurfaceParams()}; | ||
| 1178 | const auto& dst_params{dst_surface->GetSurfaceParams()}; | ||
| 1179 | |||
| 1180 | // Flush enough memory for both the source and destination surface | ||
| 1181 | FlushRegion(ToCacheAddr(src_params.host_ptr), | ||
| 1182 | std::max(src_params.MemorySize(), dst_params.MemorySize())); | ||
| 1183 | |||
| 1184 | LoadSurface(dst_surface); | ||
| 1185 | } | ||
| 1186 | |||
| 1187 | Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& old_surface, | ||
| 1188 | const SurfaceParams& new_params) { | ||
| 1189 | // Verify surface is compatible for blitting | ||
| 1190 | auto old_params{old_surface->GetSurfaceParams()}; | ||
| 1191 | |||
| 1192 | // Get a new surface with the new parameters, and blit the previous surface to it | ||
| 1193 | Surface new_surface{GetUncachedSurface(new_params)}; | ||
| 1194 | |||
| 1195 | // With use_accurate_gpu_emulation enabled, do an accurate surface copy | ||
| 1196 | if (Settings::values.use_accurate_gpu_emulation) { | ||
| 1197 | AccurateCopySurface(old_surface, new_surface); | ||
| 1198 | return new_surface; | ||
| 1199 | } | ||
| 1200 | |||
| 1201 | const bool old_compressed = | ||
| 1202 | GetFormatTuple(old_params.pixel_format, old_params.component_type).compressed; | ||
| 1203 | const bool new_compressed = | ||
| 1204 | GetFormatTuple(new_params.pixel_format, new_params.component_type).compressed; | ||
| 1205 | const bool compatible_formats = | ||
| 1206 | GetFormatBpp(old_params.pixel_format) == GetFormatBpp(new_params.pixel_format) && | ||
| 1207 | !(old_compressed || new_compressed); | ||
| 1208 | // For compatible surfaces, we can just do fast glCopyImageSubData based copy | ||
| 1209 | if (old_params.target == new_params.target && old_params.depth == new_params.depth && | ||
| 1210 | old_params.depth == 1 && compatible_formats) { | ||
| 1211 | FastCopySurface(old_surface, new_surface); | ||
| 1212 | return new_surface; | ||
| 1213 | } | ||
| 1214 | |||
| 1215 | switch (new_params.target) { | ||
| 1216 | case SurfaceTarget::Texture2D: | ||
| 1217 | CopySurface(old_surface, new_surface, copy_pbo.handle); | ||
| 1218 | break; | ||
| 1219 | case SurfaceTarget::Texture3D: | ||
| 1220 | AccurateCopySurface(old_surface, new_surface); | ||
| 1221 | break; | ||
| 1222 | case SurfaceTarget::TextureCubemap: | ||
| 1223 | case SurfaceTarget::Texture2DArray: | ||
| 1224 | case SurfaceTarget::TextureCubeArray: | ||
| 1225 | if (compatible_formats) | ||
| 1226 | FastLayeredCopySurface(old_surface, new_surface); | ||
| 1227 | else { | ||
| 1228 | AccurateCopySurface(old_surface, new_surface); | ||
| 1229 | } | ||
| 1230 | break; | ||
| 1231 | default: | ||
| 1232 | LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}", | ||
| 1233 | static_cast<u32>(new_params.target)); | ||
| 1234 | UNREACHABLE(); | ||
| 1235 | } | ||
| 1236 | |||
| 1237 | return new_surface; | ||
| 1238 | } | ||
| 1239 | |||
| 1240 | Surface RasterizerCacheOpenGL::TryFindFramebufferSurface(const u8* host_ptr) const { | ||
| 1241 | return TryGet(host_ptr); | ||
| 1242 | } | ||
| 1243 | |||
| 1244 | void RasterizerCacheOpenGL::ReserveSurface(const Surface& surface) { | ||
| 1245 | const auto& surface_reserve_key{SurfaceReserveKey::Create(surface->GetSurfaceParams())}; | ||
| 1246 | surface_reserve[surface_reserve_key] = surface; | ||
| 1247 | } | ||
| 1248 | |||
| 1249 | Surface RasterizerCacheOpenGL::TryGetReservedSurface(const SurfaceParams& params) { | ||
| 1250 | const auto& surface_reserve_key{SurfaceReserveKey::Create(params)}; | ||
| 1251 | auto search{surface_reserve.find(surface_reserve_key)}; | ||
| 1252 | if (search != surface_reserve.end()) { | ||
| 1253 | return search->second; | ||
| 1254 | } | ||
| 1255 | return {}; | ||
| 1256 | } | ||
| 1257 | |||
| 1258 | static std::optional<u32> TryFindBestMipMap(std::size_t memory, const SurfaceParams params, | ||
| 1259 | u32 height) { | ||
| 1260 | for (u32 i = 0; i < params.max_mip_level; i++) { | ||
| 1261 | if (memory == params.GetMipmapSingleSize(i) && params.MipHeight(i) == height) { | ||
| 1262 | return {i}; | ||
| 1263 | } | ||
| 1264 | } | ||
| 1265 | return {}; | ||
| 1266 | } | ||
| 1267 | |||
| 1268 | static std::optional<u32> TryFindBestLayer(GPUVAddr addr, const SurfaceParams params, u32 mipmap) { | ||
| 1269 | const std::size_t size{params.LayerMemorySize()}; | ||
| 1270 | GPUVAddr start{params.gpu_addr + params.GetMipmapLevelOffset(mipmap)}; | ||
| 1271 | for (u32 i = 0; i < params.depth; i++) { | ||
| 1272 | if (start == addr) { | ||
| 1273 | return {i}; | ||
| 1274 | } | ||
| 1275 | start += size; | ||
| 1276 | } | ||
| 1277 | return {}; | ||
| 1278 | } | ||
| 1279 | |||
| 1280 | static bool LayerFitReinterpretSurface(RasterizerCacheOpenGL& cache, const Surface render_surface, | ||
| 1281 | const Surface blitted_surface) { | ||
| 1282 | const auto& dst_params = blitted_surface->GetSurfaceParams(); | ||
| 1283 | const auto& src_params = render_surface->GetSurfaceParams(); | ||
| 1284 | const std::size_t src_memory_size = src_params.size_in_bytes; | ||
| 1285 | const std::optional<u32> level = | ||
| 1286 | TryFindBestMipMap(src_memory_size, dst_params, src_params.height); | ||
| 1287 | if (level.has_value()) { | ||
| 1288 | if (src_params.width == dst_params.MipWidthGobAligned(*level) && | ||
| 1289 | src_params.height == dst_params.MipHeight(*level) && | ||
| 1290 | src_params.block_height >= dst_params.MipBlockHeight(*level)) { | ||
| 1291 | const std::optional<u32> slot = | ||
| 1292 | TryFindBestLayer(render_surface->GetSurfaceParams().gpu_addr, dst_params, *level); | ||
| 1293 | if (slot.has_value()) { | ||
| 1294 | glCopyImageSubData(render_surface->Texture().handle, | ||
| 1295 | SurfaceTargetToGL(src_params.target), 0, 0, 0, 0, | ||
| 1296 | blitted_surface->Texture().handle, | ||
| 1297 | SurfaceTargetToGL(dst_params.target), *level, 0, 0, *slot, | ||
| 1298 | dst_params.MipWidth(*level), dst_params.MipHeight(*level), 1); | ||
| 1299 | blitted_surface->MarkAsModified(true, cache); | ||
| 1300 | return true; | ||
| 1301 | } | ||
| 1302 | } | ||
| 1303 | } | ||
| 1304 | return false; | ||
| 1305 | } | ||
| 1306 | |||
| 1307 | static bool IsReinterpretInvalid(const Surface render_surface, const Surface blitted_surface) { | ||
| 1308 | const VAddr bound1 = blitted_surface->GetCpuAddr() + blitted_surface->GetMemorySize(); | ||
| 1309 | const VAddr bound2 = render_surface->GetCpuAddr() + render_surface->GetMemorySize(); | ||
| 1310 | if (bound2 > bound1) | ||
| 1311 | return true; | ||
| 1312 | const auto& dst_params = blitted_surface->GetSurfaceParams(); | ||
| 1313 | const auto& src_params = render_surface->GetSurfaceParams(); | ||
| 1314 | return (dst_params.component_type != src_params.component_type); | ||
| 1315 | } | ||
| 1316 | |||
| 1317 | static bool IsReinterpretInvalidSecond(const Surface render_surface, | ||
| 1318 | const Surface blitted_surface) { | ||
| 1319 | const auto& dst_params = blitted_surface->GetSurfaceParams(); | ||
| 1320 | const auto& src_params = render_surface->GetSurfaceParams(); | ||
| 1321 | return (dst_params.height > src_params.height && dst_params.width > src_params.width); | ||
| 1322 | } | ||
| 1323 | |||
| 1324 | bool RasterizerCacheOpenGL::PartialReinterpretSurface(Surface triggering_surface, | ||
| 1325 | Surface intersect) { | ||
| 1326 | if (IsReinterpretInvalid(triggering_surface, intersect)) { | ||
| 1327 | Unregister(intersect); | ||
| 1328 | return false; | ||
| 1329 | } | ||
| 1330 | if (!LayerFitReinterpretSurface(*this, triggering_surface, intersect)) { | ||
| 1331 | if (IsReinterpretInvalidSecond(triggering_surface, intersect)) { | ||
| 1332 | Unregister(intersect); | ||
| 1333 | return false; | ||
| 1334 | } | ||
| 1335 | FlushObject(intersect); | ||
| 1336 | FlushObject(triggering_surface); | ||
| 1337 | intersect->MarkForReload(true); | ||
| 1338 | } | ||
| 1339 | return true; | ||
| 1340 | } | ||
| 1341 | |||
| 1342 | void RasterizerCacheOpenGL::SignalPreDrawCall() { | ||
| 1343 | if (texception && GLAD_GL_ARB_texture_barrier) { | ||
| 1344 | glTextureBarrier(); | ||
| 1345 | } | ||
| 1346 | texception = false; | ||
| 1347 | } | ||
| 1348 | |||
| 1349 | void RasterizerCacheOpenGL::SignalPostDrawCall() { | ||
| 1350 | for (u32 i = 0; i < Maxwell::NumRenderTargets; i++) { | ||
| 1351 | if (current_color_buffers[i] != nullptr) { | ||
| 1352 | Surface intersect = | ||
| 1353 | CollideOnReinterpretedSurface(current_color_buffers[i]->GetCacheAddr()); | ||
| 1354 | if (intersect != nullptr) { | ||
| 1355 | PartialReinterpretSurface(current_color_buffers[i], intersect); | ||
| 1356 | texception = true; | ||
| 1357 | } | ||
| 1358 | } | ||
| 1359 | } | ||
| 1360 | } | ||
| 1361 | |||
| 1362 | } // namespace OpenGL | ||
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h deleted file mode 100644 index 6263ef3e7..000000000 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ /dev/null | |||
| @@ -1,572 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <array> | ||
| 8 | #include <memory> | ||
| 9 | #include <string> | ||
| 10 | #include <tuple> | ||
| 11 | #include <vector> | ||
| 12 | |||
| 13 | #include "common/alignment.h" | ||
| 14 | #include "common/bit_util.h" | ||
| 15 | #include "common/common_types.h" | ||
| 16 | #include "common/hash.h" | ||
| 17 | #include "common/math_util.h" | ||
| 18 | #include "video_core/engines/fermi_2d.h" | ||
| 19 | #include "video_core/engines/maxwell_3d.h" | ||
| 20 | #include "video_core/rasterizer_cache.h" | ||
| 21 | #include "video_core/renderer_opengl/gl_resource_manager.h" | ||
| 22 | #include "video_core/renderer_opengl/gl_shader_gen.h" | ||
| 23 | #include "video_core/surface.h" | ||
| 24 | #include "video_core/textures/decoders.h" | ||
| 25 | #include "video_core/textures/texture.h" | ||
| 26 | |||
| 27 | namespace OpenGL { | ||
| 28 | |||
| 29 | class CachedSurface; | ||
| 30 | using Surface = std::shared_ptr<CachedSurface>; | ||
| 31 | using SurfaceSurfaceRect_Tuple = std::tuple<Surface, Surface, Common::Rectangle<u32>>; | ||
| 32 | |||
| 33 | using SurfaceTarget = VideoCore::Surface::SurfaceTarget; | ||
| 34 | using SurfaceType = VideoCore::Surface::SurfaceType; | ||
| 35 | using PixelFormat = VideoCore::Surface::PixelFormat; | ||
| 36 | using ComponentType = VideoCore::Surface::ComponentType; | ||
| 37 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | ||
| 38 | |||
| 39 | struct SurfaceParams { | ||
| 40 | enum class SurfaceClass { | ||
| 41 | Uploaded, | ||
| 42 | RenderTarget, | ||
| 43 | DepthBuffer, | ||
| 44 | Copy, | ||
| 45 | }; | ||
| 46 | |||
| 47 | static std::string SurfaceTargetName(SurfaceTarget target) { | ||
| 48 | switch (target) { | ||
| 49 | case SurfaceTarget::Texture1D: | ||
| 50 | return "Texture1D"; | ||
| 51 | case SurfaceTarget::Texture2D: | ||
| 52 | return "Texture2D"; | ||
| 53 | case SurfaceTarget::Texture3D: | ||
| 54 | return "Texture3D"; | ||
| 55 | case SurfaceTarget::Texture1DArray: | ||
| 56 | return "Texture1DArray"; | ||
| 57 | case SurfaceTarget::Texture2DArray: | ||
| 58 | return "Texture2DArray"; | ||
| 59 | case SurfaceTarget::TextureCubemap: | ||
| 60 | return "TextureCubemap"; | ||
| 61 | case SurfaceTarget::TextureCubeArray: | ||
| 62 | return "TextureCubeArray"; | ||
| 63 | default: | ||
| 64 | LOG_CRITICAL(HW_GPU, "Unimplemented surface_target={}", static_cast<u32>(target)); | ||
| 65 | UNREACHABLE(); | ||
| 66 | return fmt::format("TextureUnknown({})", static_cast<u32>(target)); | ||
| 67 | } | ||
| 68 | } | ||
| 69 | |||
| 70 | u32 GetFormatBpp() const { | ||
| 71 | return VideoCore::Surface::GetFormatBpp(pixel_format); | ||
| 72 | } | ||
| 73 | |||
| 74 | /// Returns the rectangle corresponding to this surface | ||
| 75 | Common::Rectangle<u32> GetRect(u32 mip_level = 0) const; | ||
| 76 | |||
| 77 | /// Returns the total size of this surface in bytes, adjusted for compression | ||
| 78 | std::size_t SizeInBytesRaw(bool ignore_tiled = false) const { | ||
| 79 | const u32 compression_factor{GetCompressionFactor(pixel_format)}; | ||
| 80 | const u32 bytes_per_pixel{GetBytesPerPixel(pixel_format)}; | ||
| 81 | const size_t uncompressed_size{ | ||
| 82 | Tegra::Texture::CalculateSize((ignore_tiled ? false : is_tiled), bytes_per_pixel, width, | ||
| 83 | height, depth, block_height, block_depth)}; | ||
| 84 | |||
| 85 | // Divide by compression_factor^2, as height and width are factored by this | ||
| 86 | return uncompressed_size / (compression_factor * compression_factor); | ||
| 87 | } | ||
| 88 | |||
| 89 | /// Returns the size of this surface as an OpenGL texture in bytes | ||
| 90 | std::size_t SizeInBytesGL() const { | ||
| 91 | return SizeInBytesRaw(true); | ||
| 92 | } | ||
| 93 | |||
| 94 | /// Returns the size of this surface as a cube face in bytes | ||
| 95 | std::size_t SizeInBytesCubeFace() const { | ||
| 96 | return size_in_bytes / 6; | ||
| 97 | } | ||
| 98 | |||
| 99 | /// Returns the size of this surface as an OpenGL cube face in bytes | ||
| 100 | std::size_t SizeInBytesCubeFaceGL() const { | ||
| 101 | return size_in_bytes_gl / 6; | ||
| 102 | } | ||
| 103 | |||
| 104 | /// Returns the exact size of memory occupied by the texture in VRAM, including mipmaps. | ||
| 105 | std::size_t MemorySize() const { | ||
| 106 | std::size_t size = InnerMemorySize(false, is_layered); | ||
| 107 | if (is_layered) | ||
| 108 | return size * depth; | ||
| 109 | return size; | ||
| 110 | } | ||
| 111 | |||
| 112 | /// Returns true if the parameters constitute a valid rasterizer surface. | ||
| 113 | bool IsValid() const { | ||
| 114 | return gpu_addr && host_ptr && height && width; | ||
| 115 | } | ||
| 116 | |||
| 117 | /// Returns the exact size of the memory occupied by a layer in a texture in VRAM, including | ||
| 118 | /// mipmaps. | ||
| 119 | std::size_t LayerMemorySize() const { | ||
| 120 | return InnerMemorySize(false, true); | ||
| 121 | } | ||
| 122 | |||
| 123 | /// Returns the size of a layer of this surface in OpenGL. | ||
| 124 | std::size_t LayerSizeGL(u32 mip_level) const { | ||
| 125 | return InnerMipmapMemorySize(mip_level, true, is_layered, false); | ||
| 126 | } | ||
| 127 | |||
| 128 | std::size_t GetMipmapSizeGL(u32 mip_level, bool ignore_compressed = true) const { | ||
| 129 | std::size_t size = InnerMipmapMemorySize(mip_level, true, is_layered, ignore_compressed); | ||
| 130 | if (is_layered) | ||
| 131 | return size * depth; | ||
| 132 | return size; | ||
| 133 | } | ||
| 134 | |||
| 135 | std::size_t GetMipmapLevelOffset(u32 mip_level) const { | ||
| 136 | std::size_t offset = 0; | ||
| 137 | for (u32 i = 0; i < mip_level; i++) | ||
| 138 | offset += InnerMipmapMemorySize(i, false, is_layered); | ||
| 139 | return offset; | ||
| 140 | } | ||
| 141 | |||
| 142 | std::size_t GetMipmapLevelOffsetGL(u32 mip_level) const { | ||
| 143 | std::size_t offset = 0; | ||
| 144 | for (u32 i = 0; i < mip_level; i++) | ||
| 145 | offset += InnerMipmapMemorySize(i, true, is_layered); | ||
| 146 | return offset; | ||
| 147 | } | ||
| 148 | |||
| 149 | std::size_t GetMipmapSingleSize(u32 mip_level) const { | ||
| 150 | return InnerMipmapMemorySize(mip_level, false, is_layered); | ||
| 151 | } | ||
| 152 | |||
| 153 | u32 MipWidth(u32 mip_level) const { | ||
| 154 | return std::max(1U, width >> mip_level); | ||
| 155 | } | ||
| 156 | |||
| 157 | u32 MipWidthGobAligned(u32 mip_level) const { | ||
| 158 | return Common::AlignUp(std::max(1U, width >> mip_level), 64U * 8U / GetFormatBpp()); | ||
| 159 | } | ||
| 160 | |||
| 161 | u32 MipHeight(u32 mip_level) const { | ||
| 162 | return std::max(1U, height >> mip_level); | ||
| 163 | } | ||
| 164 | |||
| 165 | u32 MipDepth(u32 mip_level) const { | ||
| 166 | return is_layered ? depth : std::max(1U, depth >> mip_level); | ||
| 167 | } | ||
| 168 | |||
| 169 | // Auto block resizing algorithm from: | ||
| 170 | // https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nv50/nv50_miptree.c | ||
| 171 | u32 MipBlockHeight(u32 mip_level) const { | ||
| 172 | if (mip_level == 0) | ||
| 173 | return block_height; | ||
| 174 | u32 alt_height = MipHeight(mip_level); | ||
| 175 | u32 h = GetDefaultBlockHeight(pixel_format); | ||
| 176 | u32 blocks_in_y = (alt_height + h - 1) / h; | ||
| 177 | u32 bh = 16; | ||
| 178 | while (bh > 1 && blocks_in_y <= bh * 4) { | ||
| 179 | bh >>= 1; | ||
| 180 | } | ||
| 181 | return bh; | ||
| 182 | } | ||
| 183 | |||
| 184 | u32 MipBlockDepth(u32 mip_level) const { | ||
| 185 | if (mip_level == 0) { | ||
| 186 | return block_depth; | ||
| 187 | } | ||
| 188 | |||
| 189 | if (is_layered) { | ||
| 190 | return 1; | ||
| 191 | } | ||
| 192 | |||
| 193 | const u32 mip_depth = MipDepth(mip_level); | ||
| 194 | u32 bd = 32; | ||
| 195 | while (bd > 1 && mip_depth * 2 <= bd) { | ||
| 196 | bd >>= 1; | ||
| 197 | } | ||
| 198 | |||
| 199 | if (bd == 32) { | ||
| 200 | const u32 bh = MipBlockHeight(mip_level); | ||
| 201 | if (bh >= 4) { | ||
| 202 | return 16; | ||
| 203 | } | ||
| 204 | } | ||
| 205 | |||
| 206 | return bd; | ||
| 207 | } | ||
| 208 | |||
| 209 | u32 RowAlign(u32 mip_level) const { | ||
| 210 | const u32 m_width = MipWidth(mip_level); | ||
| 211 | const u32 bytes_per_pixel = GetBytesPerPixel(pixel_format); | ||
| 212 | const u32 l2 = Common::CountTrailingZeroes32(m_width * bytes_per_pixel); | ||
| 213 | return (1U << l2); | ||
| 214 | } | ||
| 215 | |||
| 216 | /// Creates SurfaceParams from a texture configuration | ||
| 217 | static SurfaceParams CreateForTexture(const Tegra::Texture::FullTextureInfo& config, | ||
| 218 | const GLShader::SamplerEntry& entry); | ||
| 219 | |||
| 220 | /// Creates SurfaceParams from a framebuffer configuration | ||
| 221 | static SurfaceParams CreateForFramebuffer(std::size_t index); | ||
| 222 | |||
| 223 | /// Creates SurfaceParams for a depth buffer configuration | ||
| 224 | static SurfaceParams CreateForDepthBuffer( | ||
| 225 | u32 zeta_width, u32 zeta_height, GPUVAddr zeta_address, Tegra::DepthFormat format, | ||
| 226 | u32 block_width, u32 block_height, u32 block_depth, | ||
| 227 | Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout type); | ||
| 228 | |||
| 229 | /// Creates SurfaceParams for a Fermi2D surface copy | ||
| 230 | static SurfaceParams CreateForFermiCopySurface( | ||
| 231 | const Tegra::Engines::Fermi2D::Regs::Surface& config); | ||
| 232 | |||
| 233 | /// Checks if surfaces are compatible for caching | ||
| 234 | bool IsCompatibleSurface(const SurfaceParams& other) const { | ||
| 235 | if (std::tie(pixel_format, type, width, height, target, depth, is_tiled) == | ||
| 236 | std::tie(other.pixel_format, other.type, other.width, other.height, other.target, | ||
| 237 | other.depth, other.is_tiled)) { | ||
| 238 | if (!is_tiled) | ||
| 239 | return true; | ||
| 240 | return std::tie(block_height, block_depth, tile_width_spacing) == | ||
| 241 | std::tie(other.block_height, other.block_depth, other.tile_width_spacing); | ||
| 242 | } | ||
| 243 | return false; | ||
| 244 | } | ||
| 245 | |||
| 246 | /// Initializes parameters for caching, should be called after everything has been initialized | ||
| 247 | void InitCacheParameters(GPUVAddr gpu_addr); | ||
| 248 | |||
| 249 | std::string TargetName() const { | ||
| 250 | switch (target) { | ||
| 251 | case SurfaceTarget::Texture1D: | ||
| 252 | return "1D"; | ||
| 253 | case SurfaceTarget::Texture2D: | ||
| 254 | return "2D"; | ||
| 255 | case SurfaceTarget::Texture3D: | ||
| 256 | return "3D"; | ||
| 257 | case SurfaceTarget::Texture1DArray: | ||
| 258 | return "1DArray"; | ||
| 259 | case SurfaceTarget::Texture2DArray: | ||
| 260 | return "2DArray"; | ||
| 261 | case SurfaceTarget::TextureCubemap: | ||
| 262 | return "Cube"; | ||
| 263 | default: | ||
| 264 | LOG_CRITICAL(HW_GPU, "Unimplemented surface_target={}", static_cast<u32>(target)); | ||
| 265 | UNREACHABLE(); | ||
| 266 | return fmt::format("TUK({})", static_cast<u32>(target)); | ||
| 267 | } | ||
| 268 | } | ||
| 269 | |||
| 270 | std::string ClassName() const { | ||
| 271 | switch (identity) { | ||
| 272 | case SurfaceClass::Uploaded: | ||
| 273 | return "UP"; | ||
| 274 | case SurfaceClass::RenderTarget: | ||
| 275 | return "RT"; | ||
| 276 | case SurfaceClass::DepthBuffer: | ||
| 277 | return "DB"; | ||
| 278 | case SurfaceClass::Copy: | ||
| 279 | return "CP"; | ||
| 280 | default: | ||
| 281 | LOG_CRITICAL(HW_GPU, "Unimplemented surface_class={}", static_cast<u32>(identity)); | ||
| 282 | UNREACHABLE(); | ||
| 283 | return fmt::format("CUK({})", static_cast<u32>(identity)); | ||
| 284 | } | ||
| 285 | } | ||
| 286 | |||
| 287 | std::string IdentityString() const { | ||
| 288 | return ClassName() + '_' + TargetName() + '_' + (is_tiled ? 'T' : 'L'); | ||
| 289 | } | ||
| 290 | |||
| 291 | bool is_tiled; | ||
| 292 | u32 block_width; | ||
| 293 | u32 block_height; | ||
| 294 | u32 block_depth; | ||
| 295 | u32 tile_width_spacing; | ||
| 296 | PixelFormat pixel_format; | ||
| 297 | ComponentType component_type; | ||
| 298 | SurfaceType type; | ||
| 299 | u32 width; | ||
| 300 | u32 height; | ||
| 301 | u32 depth; | ||
| 302 | u32 unaligned_height; | ||
| 303 | u32 pitch; | ||
| 304 | SurfaceTarget target; | ||
| 305 | SurfaceClass identity; | ||
| 306 | u32 max_mip_level; | ||
| 307 | bool is_layered; | ||
| 308 | bool is_array; | ||
| 309 | bool srgb_conversion; | ||
| 310 | // Parameters used for caching | ||
| 311 | u8* host_ptr; | ||
| 312 | GPUVAddr gpu_addr; | ||
| 313 | std::size_t size_in_bytes; | ||
| 314 | std::size_t size_in_bytes_gl; | ||
| 315 | |||
| 316 | // Render target specific parameters, not used in caching | ||
| 317 | struct { | ||
| 318 | u32 index; | ||
| 319 | u32 array_mode; | ||
| 320 | u32 volume; | ||
| 321 | u32 layer_stride; | ||
| 322 | u32 base_layer; | ||
| 323 | } rt; | ||
| 324 | |||
| 325 | private: | ||
| 326 | std::size_t InnerMipmapMemorySize(u32 mip_level, bool force_gl = false, bool layer_only = false, | ||
| 327 | bool uncompressed = false) const; | ||
| 328 | std::size_t InnerMemorySize(bool force_gl = false, bool layer_only = false, | ||
| 329 | bool uncompressed = false) const; | ||
| 330 | }; | ||
| 331 | |||
| 332 | }; // namespace OpenGL | ||
| 333 | |||
| 334 | /// Hashable variation of SurfaceParams, used for a key in the surface cache | ||
| 335 | struct SurfaceReserveKey : Common::HashableStruct<OpenGL::SurfaceParams> { | ||
| 336 | static SurfaceReserveKey Create(const OpenGL::SurfaceParams& params) { | ||
| 337 | SurfaceReserveKey res; | ||
| 338 | res.state = params; | ||
| 339 | res.state.identity = {}; // Ignore the origin of the texture | ||
| 340 | res.state.gpu_addr = {}; // Ignore GPU vaddr in caching | ||
| 341 | res.state.rt = {}; // Ignore rt config in caching | ||
| 342 | return res; | ||
| 343 | } | ||
| 344 | }; | ||
| 345 | namespace std { | ||
| 346 | template <> | ||
| 347 | struct hash<SurfaceReserveKey> { | ||
| 348 | std::size_t operator()(const SurfaceReserveKey& k) const { | ||
| 349 | return k.Hash(); | ||
| 350 | } | ||
| 351 | }; | ||
| 352 | } // namespace std | ||
| 353 | |||
| 354 | namespace OpenGL { | ||
| 355 | |||
| 356 | class RasterizerOpenGL; | ||
| 357 | |||
| 358 | // This is used to store temporary big buffers, | ||
| 359 | // instead of creating/destroying all the time | ||
| 360 | struct RasterizerTemporaryMemory { | ||
| 361 | std::vector<std::vector<u8>> gl_buffer; | ||
| 362 | }; | ||
| 363 | |||
| 364 | class CachedSurface final : public RasterizerCacheObject { | ||
| 365 | public: | ||
| 366 | explicit CachedSurface(const SurfaceParams& params); | ||
| 367 | |||
| 368 | VAddr GetCpuAddr() const override { | ||
| 369 | return cpu_addr; | ||
| 370 | } | ||
| 371 | |||
| 372 | std::size_t GetSizeInBytes() const override { | ||
| 373 | return cached_size_in_bytes; | ||
| 374 | } | ||
| 375 | |||
| 376 | std::size_t GetMemorySize() const { | ||
| 377 | return memory_size; | ||
| 378 | } | ||
| 379 | |||
| 380 | const OGLTexture& Texture() const { | ||
| 381 | return texture; | ||
| 382 | } | ||
| 383 | |||
| 384 | const OGLTexture& Texture(bool as_array) { | ||
| 385 | if (params.is_array == as_array) { | ||
| 386 | return texture; | ||
| 387 | } else { | ||
| 388 | EnsureTextureDiscrepantView(); | ||
| 389 | return discrepant_view; | ||
| 390 | } | ||
| 391 | } | ||
| 392 | |||
| 393 | GLenum Target() const { | ||
| 394 | return gl_target; | ||
| 395 | } | ||
| 396 | |||
| 397 | const SurfaceParams& GetSurfaceParams() const { | ||
| 398 | return params; | ||
| 399 | } | ||
| 400 | |||
| 401 | // Read/Write data in Switch memory to/from gl_buffer | ||
| 402 | void LoadGLBuffer(RasterizerTemporaryMemory& res_cache_tmp_mem); | ||
| 403 | void FlushGLBuffer(RasterizerTemporaryMemory& res_cache_tmp_mem); | ||
| 404 | |||
| 405 | // Upload data in gl_buffer to this surface's texture | ||
| 406 | void UploadGLTexture(RasterizerTemporaryMemory& res_cache_tmp_mem, GLuint read_fb_handle, | ||
| 407 | GLuint draw_fb_handle); | ||
| 408 | |||
| 409 | void UpdateSwizzle(Tegra::Texture::SwizzleSource swizzle_x, | ||
| 410 | Tegra::Texture::SwizzleSource swizzle_y, | ||
| 411 | Tegra::Texture::SwizzleSource swizzle_z, | ||
| 412 | Tegra::Texture::SwizzleSource swizzle_w); | ||
| 413 | |||
| 414 | void MarkReinterpreted() { | ||
| 415 | reinterpreted = true; | ||
| 416 | } | ||
| 417 | |||
| 418 | bool IsReinterpreted() const { | ||
| 419 | return reinterpreted; | ||
| 420 | } | ||
| 421 | |||
| 422 | void MarkForReload(bool reload) { | ||
| 423 | must_reload = reload; | ||
| 424 | } | ||
| 425 | |||
| 426 | bool MustReload() const { | ||
| 427 | return must_reload; | ||
| 428 | } | ||
| 429 | |||
| 430 | bool IsUploaded() const { | ||
| 431 | return params.identity == SurfaceParams::SurfaceClass::Uploaded; | ||
| 432 | } | ||
| 433 | |||
| 434 | private: | ||
| 435 | void UploadGLMipmapTexture(RasterizerTemporaryMemory& res_cache_tmp_mem, u32 mip_map, | ||
| 436 | GLuint read_fb_handle, GLuint draw_fb_handle); | ||
| 437 | |||
| 438 | void EnsureTextureDiscrepantView(); | ||
| 439 | |||
| 440 | OGLTexture texture; | ||
| 441 | OGLTexture discrepant_view; | ||
| 442 | SurfaceParams params{}; | ||
| 443 | GLenum gl_target{}; | ||
| 444 | GLenum gl_internal_format{}; | ||
| 445 | std::size_t cached_size_in_bytes{}; | ||
| 446 | std::array<GLenum, 4> swizzle{GL_RED, GL_GREEN, GL_BLUE, GL_ALPHA}; | ||
| 447 | std::size_t memory_size; | ||
| 448 | bool reinterpreted = false; | ||
| 449 | bool must_reload = false; | ||
| 450 | VAddr cpu_addr{}; | ||
| 451 | }; | ||
| 452 | |||
| 453 | class RasterizerCacheOpenGL final : public RasterizerCache<Surface> { | ||
| 454 | public: | ||
| 455 | explicit RasterizerCacheOpenGL(RasterizerOpenGL& rasterizer); | ||
| 456 | |||
| 457 | /// Get a surface based on the texture configuration | ||
| 458 | Surface GetTextureSurface(const Tegra::Texture::FullTextureInfo& config, | ||
| 459 | const GLShader::SamplerEntry& entry); | ||
| 460 | |||
| 461 | /// Get the depth surface based on the framebuffer configuration | ||
| 462 | Surface GetDepthBufferSurface(bool preserve_contents); | ||
| 463 | |||
| 464 | /// Get the color surface based on the framebuffer configuration and the specified render target | ||
| 465 | Surface GetColorBufferSurface(std::size_t index, bool preserve_contents); | ||
| 466 | |||
| 467 | /// Tries to find a framebuffer using on the provided CPU address | ||
| 468 | Surface TryFindFramebufferSurface(const u8* host_ptr) const; | ||
| 469 | |||
| 470 | /// Copies the contents of one surface to another | ||
| 471 | void FermiCopySurface(const Tegra::Engines::Fermi2D::Regs::Surface& src_config, | ||
| 472 | const Tegra::Engines::Fermi2D::Regs::Surface& dst_config, | ||
| 473 | const Common::Rectangle<u32>& src_rect, | ||
| 474 | const Common::Rectangle<u32>& dst_rect); | ||
| 475 | |||
| 476 | void SignalPreDrawCall(); | ||
| 477 | void SignalPostDrawCall(); | ||
| 478 | |||
| 479 | protected: | ||
| 480 | void FlushObjectInner(const Surface& object) override { | ||
| 481 | object->FlushGLBuffer(temporal_memory); | ||
| 482 | } | ||
| 483 | |||
| 484 | private: | ||
| 485 | void LoadSurface(const Surface& surface); | ||
| 486 | Surface GetSurface(const SurfaceParams& params, bool preserve_contents = true); | ||
| 487 | |||
| 488 | /// Gets an uncached surface, creating it if need be | ||
| 489 | Surface GetUncachedSurface(const SurfaceParams& params); | ||
| 490 | |||
| 491 | /// Recreates a surface with new parameters | ||
| 492 | Surface RecreateSurface(const Surface& old_surface, const SurfaceParams& new_params); | ||
| 493 | |||
| 494 | /// Reserves a unique surface that can be reused later | ||
| 495 | void ReserveSurface(const Surface& surface); | ||
| 496 | |||
| 497 | /// Tries to get a reserved surface for the specified parameters | ||
| 498 | Surface TryGetReservedSurface(const SurfaceParams& params); | ||
| 499 | |||
| 500 | // Partialy reinterpret a surface based on a triggering_surface that collides with it. | ||
| 501 | // returns true if the reinterpret was successful, false in case it was not. | ||
| 502 | bool PartialReinterpretSurface(Surface triggering_surface, Surface intersect); | ||
| 503 | |||
| 504 | /// Performs a slow but accurate surface copy, flushing to RAM and reinterpreting the data | ||
| 505 | void AccurateCopySurface(const Surface& src_surface, const Surface& dst_surface); | ||
| 506 | void FastLayeredCopySurface(const Surface& src_surface, const Surface& dst_surface); | ||
| 507 | void FastCopySurface(const Surface& src_surface, const Surface& dst_surface); | ||
| 508 | void CopySurface(const Surface& src_surface, const Surface& dst_surface, | ||
| 509 | const GLuint copy_pbo_handle, const GLenum src_attachment = 0, | ||
| 510 | const GLenum dst_attachment = 0, const std::size_t cubemap_face = 0); | ||
| 511 | |||
| 512 | /// The surface reserve is a "backup" cache, this is where we put unique surfaces that have | ||
| 513 | /// previously been used. This is to prevent surfaces from being constantly created and | ||
| 514 | /// destroyed when used with different surface parameters. | ||
| 515 | std::unordered_map<SurfaceReserveKey, Surface> surface_reserve; | ||
| 516 | |||
| 517 | OGLFramebuffer read_framebuffer; | ||
| 518 | OGLFramebuffer draw_framebuffer; | ||
| 519 | |||
| 520 | bool texception = false; | ||
| 521 | |||
| 522 | /// Use a Pixel Buffer Object to download the previous texture and then upload it to the new one | ||
| 523 | /// using the new format. | ||
| 524 | OGLBuffer copy_pbo; | ||
| 525 | |||
| 526 | std::array<Surface, Maxwell::NumRenderTargets> last_color_buffers; | ||
| 527 | std::array<Surface, Maxwell::NumRenderTargets> current_color_buffers; | ||
| 528 | Surface last_depth_buffer; | ||
| 529 | |||
| 530 | RasterizerTemporaryMemory temporal_memory; | ||
| 531 | |||
| 532 | using SurfaceIntervalCache = boost::icl::interval_map<CacheAddr, Surface>; | ||
| 533 | using SurfaceInterval = typename SurfaceIntervalCache::interval_type; | ||
| 534 | |||
| 535 | static auto GetReinterpretInterval(const Surface& object) { | ||
| 536 | return SurfaceInterval::right_open(object->GetCacheAddr() + 1, | ||
| 537 | object->GetCacheAddr() + object->GetMemorySize() - 1); | ||
| 538 | } | ||
| 539 | |||
| 540 | // Reinterpreted surfaces are very fragil as the game may keep rendering into them. | ||
| 541 | SurfaceIntervalCache reinterpreted_surfaces; | ||
| 542 | |||
| 543 | void RegisterReinterpretSurface(Surface reinterpret_surface) { | ||
| 544 | auto interval = GetReinterpretInterval(reinterpret_surface); | ||
| 545 | reinterpreted_surfaces.insert({interval, reinterpret_surface}); | ||
| 546 | reinterpret_surface->MarkReinterpreted(); | ||
| 547 | } | ||
| 548 | |||
| 549 | Surface CollideOnReinterpretedSurface(CacheAddr addr) const { | ||
| 550 | const SurfaceInterval interval{addr}; | ||
| 551 | for (auto& pair : | ||
| 552 | boost::make_iterator_range(reinterpreted_surfaces.equal_range(interval))) { | ||
| 553 | return pair.second; | ||
| 554 | } | ||
| 555 | return nullptr; | ||
| 556 | } | ||
| 557 | |||
| 558 | void Register(const Surface& object) override { | ||
| 559 | RasterizerCache<Surface>::Register(object); | ||
| 560 | } | ||
| 561 | |||
| 562 | /// Unregisters an object from the cache | ||
| 563 | void Unregister(const Surface& object) override { | ||
| 564 | if (object->IsReinterpreted()) { | ||
| 565 | auto interval = GetReinterpretInterval(object); | ||
| 566 | reinterpreted_surfaces.erase(interval); | ||
| 567 | } | ||
| 568 | RasterizerCache<Surface>::Unregister(object); | ||
| 569 | } | ||
| 570 | }; | ||
| 571 | |||
| 572 | } // namespace OpenGL | ||
diff --git a/src/video_core/renderer_opengl/gl_resource_manager.cpp b/src/video_core/renderer_opengl/gl_resource_manager.cpp index bfe666a73..5c96c1d46 100644 --- a/src/video_core/renderer_opengl/gl_resource_manager.cpp +++ b/src/video_core/renderer_opengl/gl_resource_manager.cpp | |||
| @@ -33,6 +33,24 @@ void OGLTexture::Release() { | |||
| 33 | handle = 0; | 33 | handle = 0; |
| 34 | } | 34 | } |
| 35 | 35 | ||
| 36 | void OGLTextureView::Create() { | ||
| 37 | if (handle != 0) | ||
| 38 | return; | ||
| 39 | |||
| 40 | MICROPROFILE_SCOPE(OpenGL_ResourceCreation); | ||
| 41 | glGenTextures(1, &handle); | ||
| 42 | } | ||
| 43 | |||
| 44 | void OGLTextureView::Release() { | ||
| 45 | if (handle == 0) | ||
| 46 | return; | ||
| 47 | |||
| 48 | MICROPROFILE_SCOPE(OpenGL_ResourceDeletion); | ||
| 49 | glDeleteTextures(1, &handle); | ||
| 50 | OpenGLState::GetCurState().UnbindTexture(handle).Apply(); | ||
| 51 | handle = 0; | ||
| 52 | } | ||
| 53 | |||
| 36 | void OGLSampler::Create() { | 54 | void OGLSampler::Create() { |
| 37 | if (handle != 0) | 55 | if (handle != 0) |
| 38 | return; | 56 | return; |
| @@ -130,6 +148,12 @@ void OGLBuffer::Release() { | |||
| 130 | handle = 0; | 148 | handle = 0; |
| 131 | } | 149 | } |
| 132 | 150 | ||
| 151 | void OGLBuffer::MakeStreamCopy(std::size_t buffer_size) { | ||
| 152 | ASSERT_OR_EXECUTE((handle != 0 && buffer_size != 0), { return; }); | ||
| 153 | |||
| 154 | glNamedBufferData(handle, buffer_size, nullptr, GL_STREAM_COPY); | ||
| 155 | } | ||
| 156 | |||
| 133 | void OGLSync::Create() { | 157 | void OGLSync::Create() { |
| 134 | if (handle != 0) | 158 | if (handle != 0) |
| 135 | return; | 159 | return; |
diff --git a/src/video_core/renderer_opengl/gl_resource_manager.h b/src/video_core/renderer_opengl/gl_resource_manager.h index fbb93ee49..3a85a1d4c 100644 --- a/src/video_core/renderer_opengl/gl_resource_manager.h +++ b/src/video_core/renderer_opengl/gl_resource_manager.h | |||
| @@ -36,6 +36,31 @@ public: | |||
| 36 | GLuint handle = 0; | 36 | GLuint handle = 0; |
| 37 | }; | 37 | }; |
| 38 | 38 | ||
| 39 | class OGLTextureView : private NonCopyable { | ||
| 40 | public: | ||
| 41 | OGLTextureView() = default; | ||
| 42 | |||
| 43 | OGLTextureView(OGLTextureView&& o) noexcept : handle(std::exchange(o.handle, 0)) {} | ||
| 44 | |||
| 45 | ~OGLTextureView() { | ||
| 46 | Release(); | ||
| 47 | } | ||
| 48 | |||
| 49 | OGLTextureView& operator=(OGLTextureView&& o) noexcept { | ||
| 50 | Release(); | ||
| 51 | handle = std::exchange(o.handle, 0); | ||
| 52 | return *this; | ||
| 53 | } | ||
| 54 | |||
| 55 | /// Creates a new internal OpenGL resource and stores the handle | ||
| 56 | void Create(); | ||
| 57 | |||
| 58 | /// Deletes the internal OpenGL resource | ||
| 59 | void Release(); | ||
| 60 | |||
| 61 | GLuint handle = 0; | ||
| 62 | }; | ||
| 63 | |||
| 39 | class OGLSampler : private NonCopyable { | 64 | class OGLSampler : private NonCopyable { |
| 40 | public: | 65 | public: |
| 41 | OGLSampler() = default; | 66 | OGLSampler() = default; |
| @@ -161,6 +186,9 @@ public: | |||
| 161 | /// Deletes the internal OpenGL resource | 186 | /// Deletes the internal OpenGL resource |
| 162 | void Release(); | 187 | void Release(); |
| 163 | 188 | ||
| 189 | // Converts the buffer into a stream copy buffer with a fixed size | ||
| 190 | void MakeStreamCopy(std::size_t buffer_size); | ||
| 191 | |||
| 164 | GLuint handle = 0; | 192 | GLuint handle = 0; |
| 165 | }; | 193 | }; |
| 166 | 194 | ||
diff --git a/src/video_core/renderer_opengl/gl_sampler_cache.h b/src/video_core/renderer_opengl/gl_sampler_cache.h index defbc2d81..34ee37f00 100644 --- a/src/video_core/renderer_opengl/gl_sampler_cache.h +++ b/src/video_core/renderer_opengl/gl_sampler_cache.h | |||
| @@ -17,9 +17,9 @@ public: | |||
| 17 | ~SamplerCacheOpenGL(); | 17 | ~SamplerCacheOpenGL(); |
| 18 | 18 | ||
| 19 | protected: | 19 | protected: |
| 20 | OGLSampler CreateSampler(const Tegra::Texture::TSCEntry& tsc) const; | 20 | OGLSampler CreateSampler(const Tegra::Texture::TSCEntry& tsc) const override; |
| 21 | 21 | ||
| 22 | GLuint ToSamplerType(const OGLSampler& sampler) const; | 22 | GLuint ToSamplerType(const OGLSampler& sampler) const override; |
| 23 | }; | 23 | }; |
| 24 | 24 | ||
| 25 | } // namespace OpenGL | 25 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index ac8a9e6b7..909ccb82c 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp | |||
| @@ -23,13 +23,13 @@ namespace OpenGL { | |||
| 23 | 23 | ||
| 24 | using VideoCommon::Shader::ProgramCode; | 24 | using VideoCommon::Shader::ProgramCode; |
| 25 | 25 | ||
| 26 | // One UBO is always reserved for emulation values | 26 | // One UBO is always reserved for emulation values on staged shaders |
| 27 | constexpr u32 RESERVED_UBOS = 1; | 27 | constexpr u32 STAGE_RESERVED_UBOS = 1; |
| 28 | 28 | ||
| 29 | struct UnspecializedShader { | 29 | struct UnspecializedShader { |
| 30 | std::string code; | 30 | std::string code; |
| 31 | GLShader::ShaderEntries entries; | 31 | GLShader::ShaderEntries entries; |
| 32 | Maxwell::ShaderProgram program_type; | 32 | ProgramType program_type; |
| 33 | }; | 33 | }; |
| 34 | 34 | ||
| 35 | namespace { | 35 | namespace { |
| @@ -55,15 +55,17 @@ ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, const GPUVAddr g | |||
| 55 | } | 55 | } |
| 56 | 56 | ||
| 57 | /// Gets the shader type from a Maxwell program type | 57 | /// Gets the shader type from a Maxwell program type |
| 58 | constexpr GLenum GetShaderType(Maxwell::ShaderProgram program_type) { | 58 | constexpr GLenum GetShaderType(ProgramType program_type) { |
| 59 | switch (program_type) { | 59 | switch (program_type) { |
| 60 | case Maxwell::ShaderProgram::VertexA: | 60 | case ProgramType::VertexA: |
| 61 | case Maxwell::ShaderProgram::VertexB: | 61 | case ProgramType::VertexB: |
| 62 | return GL_VERTEX_SHADER; | 62 | return GL_VERTEX_SHADER; |
| 63 | case Maxwell::ShaderProgram::Geometry: | 63 | case ProgramType::Geometry: |
| 64 | return GL_GEOMETRY_SHADER; | 64 | return GL_GEOMETRY_SHADER; |
| 65 | case Maxwell::ShaderProgram::Fragment: | 65 | case ProgramType::Fragment: |
| 66 | return GL_FRAGMENT_SHADER; | 66 | return GL_FRAGMENT_SHADER; |
| 67 | case ProgramType::Compute: | ||
| 68 | return GL_COMPUTE_SHADER; | ||
| 67 | default: | 69 | default: |
| 68 | return GL_NONE; | 70 | return GL_NONE; |
| 69 | } | 71 | } |
| @@ -100,18 +102,44 @@ constexpr std::tuple<const char*, const char*, u32> GetPrimitiveDescription(GLen | |||
| 100 | } | 102 | } |
| 101 | } | 103 | } |
| 102 | 104 | ||
| 105 | ProgramType GetProgramType(Maxwell::ShaderProgram program) { | ||
| 106 | switch (program) { | ||
| 107 | case Maxwell::ShaderProgram::VertexA: | ||
| 108 | return ProgramType::VertexA; | ||
| 109 | case Maxwell::ShaderProgram::VertexB: | ||
| 110 | return ProgramType::VertexB; | ||
| 111 | case Maxwell::ShaderProgram::TesselationControl: | ||
| 112 | return ProgramType::TessellationControl; | ||
| 113 | case Maxwell::ShaderProgram::TesselationEval: | ||
| 114 | return ProgramType::TessellationEval; | ||
| 115 | case Maxwell::ShaderProgram::Geometry: | ||
| 116 | return ProgramType::Geometry; | ||
| 117 | case Maxwell::ShaderProgram::Fragment: | ||
| 118 | return ProgramType::Fragment; | ||
| 119 | } | ||
| 120 | UNREACHABLE(); | ||
| 121 | return {}; | ||
| 122 | } | ||
| 123 | |||
| 103 | /// Calculates the size of a program stream | 124 | /// Calculates the size of a program stream |
| 104 | std::size_t CalculateProgramSize(const GLShader::ProgramCode& program) { | 125 | std::size_t CalculateProgramSize(const GLShader::ProgramCode& program) { |
| 105 | constexpr std::size_t start_offset = 10; | 126 | constexpr std::size_t start_offset = 10; |
| 127 | // This is the encoded version of BRA that jumps to itself. All Nvidia | ||
| 128 | // shaders end with one. | ||
| 129 | constexpr u64 self_jumping_branch = 0xE2400FFFFF07000FULL; | ||
| 130 | constexpr u64 mask = 0xFFFFFFFFFF7FFFFFULL; | ||
| 106 | std::size_t offset = start_offset; | 131 | std::size_t offset = start_offset; |
| 107 | std::size_t size = start_offset * sizeof(u64); | 132 | std::size_t size = start_offset * sizeof(u64); |
| 108 | while (offset < program.size()) { | 133 | while (offset < program.size()) { |
| 109 | const u64 instruction = program[offset]; | 134 | const u64 instruction = program[offset]; |
| 110 | if (!IsSchedInstruction(offset, start_offset)) { | 135 | if (!IsSchedInstruction(offset, start_offset)) { |
| 111 | if (instruction == 0 || (instruction >> 52) == 0x50b) { | 136 | if ((instruction & mask) == self_jumping_branch) { |
| 112 | // End on Maxwell's "nop" instruction | 137 | // End on Maxwell's "nop" instruction |
| 113 | break; | 138 | break; |
| 114 | } | 139 | } |
| 140 | if (instruction == 0) { | ||
| 141 | break; | ||
| 142 | } | ||
| 115 | } | 143 | } |
| 116 | size += sizeof(u64); | 144 | size += sizeof(u64); |
| 117 | offset++; | 145 | offset++; |
| @@ -121,11 +149,13 @@ std::size_t CalculateProgramSize(const GLShader::ProgramCode& program) { | |||
| 121 | } | 149 | } |
| 122 | 150 | ||
| 123 | /// Hashes one (or two) program streams | 151 | /// Hashes one (or two) program streams |
| 124 | u64 GetUniqueIdentifier(Maxwell::ShaderProgram program_type, const ProgramCode& code, | 152 | u64 GetUniqueIdentifier(ProgramType program_type, const ProgramCode& code, |
| 125 | const ProgramCode& code_b) { | 153 | const ProgramCode& code_b, std::size_t size_a = 0, std::size_t size_b = 0) { |
| 126 | u64 unique_identifier = | 154 | if (size_a == 0) { |
| 127 | Common::CityHash64(reinterpret_cast<const char*>(code.data()), CalculateProgramSize(code)); | 155 | size_a = CalculateProgramSize(code); |
| 128 | if (program_type != Maxwell::ShaderProgram::VertexA) { | 156 | } |
| 157 | u64 unique_identifier = Common::CityHash64(reinterpret_cast<const char*>(code.data()), size_a); | ||
| 158 | if (program_type != ProgramType::VertexA) { | ||
| 129 | return unique_identifier; | 159 | return unique_identifier; |
| 130 | } | 160 | } |
| 131 | // VertexA programs include two programs | 161 | // VertexA programs include two programs |
| @@ -133,46 +163,69 @@ u64 GetUniqueIdentifier(Maxwell::ShaderProgram program_type, const ProgramCode& | |||
| 133 | std::size_t seed = 0; | 163 | std::size_t seed = 0; |
| 134 | boost::hash_combine(seed, unique_identifier); | 164 | boost::hash_combine(seed, unique_identifier); |
| 135 | 165 | ||
| 136 | const u64 identifier_b = Common::CityHash64(reinterpret_cast<const char*>(code_b.data()), | 166 | if (size_b == 0) { |
| 137 | CalculateProgramSize(code_b)); | 167 | size_b = CalculateProgramSize(code_b); |
| 168 | } | ||
| 169 | const u64 identifier_b = | ||
| 170 | Common::CityHash64(reinterpret_cast<const char*>(code_b.data()), size_b); | ||
| 138 | boost::hash_combine(seed, identifier_b); | 171 | boost::hash_combine(seed, identifier_b); |
| 139 | return static_cast<u64>(seed); | 172 | return static_cast<u64>(seed); |
| 140 | } | 173 | } |
| 141 | 174 | ||
| 142 | /// Creates an unspecialized program from code streams | 175 | /// Creates an unspecialized program from code streams |
| 143 | GLShader::ProgramResult CreateProgram(const Device& device, Maxwell::ShaderProgram program_type, | 176 | GLShader::ProgramResult CreateProgram(const Device& device, ProgramType program_type, |
| 144 | ProgramCode program_code, ProgramCode program_code_b) { | 177 | ProgramCode program_code, ProgramCode program_code_b) { |
| 145 | GLShader::ShaderSetup setup(program_code); | 178 | GLShader::ShaderSetup setup(program_code); |
| 146 | if (program_type == Maxwell::ShaderProgram::VertexA) { | 179 | setup.program.size_a = CalculateProgramSize(program_code); |
| 180 | setup.program.size_b = 0; | ||
| 181 | if (program_type == ProgramType::VertexA) { | ||
| 147 | // VertexB is always enabled, so when VertexA is enabled, we have two vertex shaders. | 182 | // VertexB is always enabled, so when VertexA is enabled, we have two vertex shaders. |
| 148 | // Conventional HW does not support this, so we combine VertexA and VertexB into one | 183 | // Conventional HW does not support this, so we combine VertexA and VertexB into one |
| 149 | // stage here. | 184 | // stage here. |
| 150 | setup.SetProgramB(program_code_b); | 185 | setup.SetProgramB(program_code_b); |
| 186 | setup.program.size_b = CalculateProgramSize(program_code_b); | ||
| 151 | } | 187 | } |
| 152 | setup.program.unique_identifier = | 188 | setup.program.unique_identifier = GetUniqueIdentifier( |
| 153 | GetUniqueIdentifier(program_type, program_code, program_code_b); | 189 | program_type, program_code, program_code_b, setup.program.size_a, setup.program.size_b); |
| 154 | 190 | ||
| 155 | switch (program_type) { | 191 | switch (program_type) { |
| 156 | case Maxwell::ShaderProgram::VertexA: | 192 | case ProgramType::VertexA: |
| 157 | case Maxwell::ShaderProgram::VertexB: | 193 | case ProgramType::VertexB: |
| 158 | return GLShader::GenerateVertexShader(device, setup); | 194 | return GLShader::GenerateVertexShader(device, setup); |
| 159 | case Maxwell::ShaderProgram::Geometry: | 195 | case ProgramType::Geometry: |
| 160 | return GLShader::GenerateGeometryShader(device, setup); | 196 | return GLShader::GenerateGeometryShader(device, setup); |
| 161 | case Maxwell::ShaderProgram::Fragment: | 197 | case ProgramType::Fragment: |
| 162 | return GLShader::GenerateFragmentShader(device, setup); | 198 | return GLShader::GenerateFragmentShader(device, setup); |
| 199 | case ProgramType::Compute: | ||
| 200 | return GLShader::GenerateComputeShader(device, setup); | ||
| 163 | default: | 201 | default: |
| 164 | LOG_CRITICAL(HW_GPU, "Unimplemented program_type={}", static_cast<u32>(program_type)); | 202 | UNIMPLEMENTED_MSG("Unimplemented program_type={}", static_cast<u32>(program_type)); |
| 165 | UNREACHABLE(); | ||
| 166 | return {}; | 203 | return {}; |
| 167 | } | 204 | } |
| 168 | } | 205 | } |
| 169 | 206 | ||
| 170 | CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEntries& entries, | 207 | CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEntries& entries, |
| 171 | Maxwell::ShaderProgram program_type, BaseBindings base_bindings, | 208 | ProgramType program_type, const ProgramVariant& variant, |
| 172 | GLenum primitive_mode, bool hint_retrievable = false) { | 209 | bool hint_retrievable = false) { |
| 210 | auto base_bindings{variant.base_bindings}; | ||
| 211 | const auto primitive_mode{variant.primitive_mode}; | ||
| 212 | const auto texture_buffer_usage{variant.texture_buffer_usage}; | ||
| 213 | |||
| 173 | std::string source = "#version 430 core\n" | 214 | std::string source = "#version 430 core\n" |
| 174 | "#extension GL_ARB_separate_shader_objects : enable\n\n"; | 215 | "#extension GL_ARB_separate_shader_objects : enable\n" |
| 175 | source += fmt::format("#define EMULATION_UBO_BINDING {}\n", base_bindings.cbuf++); | 216 | "#extension GL_NV_gpu_shader5 : enable\n" |
| 217 | "#extension GL_NV_shader_thread_group : enable\n"; | ||
| 218 | if (entries.shader_viewport_layer_array) { | ||
| 219 | source += "#extension GL_ARB_shader_viewport_layer_array : enable\n"; | ||
| 220 | } | ||
| 221 | if (program_type == ProgramType::Compute) { | ||
| 222 | source += "#extension GL_ARB_compute_variable_group_size : require\n"; | ||
| 223 | } | ||
| 224 | source += '\n'; | ||
| 225 | |||
| 226 | if (program_type != ProgramType::Compute) { | ||
| 227 | source += fmt::format("#define EMULATION_UBO_BINDING {}\n", base_bindings.cbuf++); | ||
| 228 | } | ||
| 176 | 229 | ||
| 177 | for (const auto& cbuf : entries.const_buffers) { | 230 | for (const auto& cbuf : entries.const_buffers) { |
| 178 | source += | 231 | source += |
| @@ -186,15 +239,34 @@ CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEn | |||
| 186 | source += fmt::format("#define SAMPLER_BINDING_{} {}\n", sampler.GetIndex(), | 239 | source += fmt::format("#define SAMPLER_BINDING_{} {}\n", sampler.GetIndex(), |
| 187 | base_bindings.sampler++); | 240 | base_bindings.sampler++); |
| 188 | } | 241 | } |
| 242 | for (const auto& image : entries.images) { | ||
| 243 | source += | ||
| 244 | fmt::format("#define IMAGE_BINDING_{} {}\n", image.GetIndex(), base_bindings.image++); | ||
| 245 | } | ||
| 246 | |||
| 247 | // Transform 1D textures to texture samplers by declaring its preprocessor macros. | ||
| 248 | for (std::size_t i = 0; i < texture_buffer_usage.size(); ++i) { | ||
| 249 | if (!texture_buffer_usage.test(i)) { | ||
| 250 | continue; | ||
| 251 | } | ||
| 252 | source += fmt::format("#define SAMPLER_{}_IS_BUFFER\n", i); | ||
| 253 | } | ||
| 254 | if (texture_buffer_usage.any()) { | ||
| 255 | source += '\n'; | ||
| 256 | } | ||
| 189 | 257 | ||
| 190 | if (program_type == Maxwell::ShaderProgram::Geometry) { | 258 | if (program_type == ProgramType::Geometry) { |
| 191 | const auto [glsl_topology, debug_name, max_vertices] = | 259 | const auto [glsl_topology, debug_name, max_vertices] = |
| 192 | GetPrimitiveDescription(primitive_mode); | 260 | GetPrimitiveDescription(primitive_mode); |
| 193 | 261 | ||
| 194 | source += "layout (" + std::string(glsl_topology) + ") in;\n"; | 262 | source += "layout (" + std::string(glsl_topology) + ") in;\n\n"; |
| 195 | source += "#define MAX_VERTEX_INPUT " + std::to_string(max_vertices) + '\n'; | 263 | source += "#define MAX_VERTEX_INPUT " + std::to_string(max_vertices) + '\n'; |
| 196 | } | 264 | } |
| 265 | if (program_type == ProgramType::Compute) { | ||
| 266 | source += "layout (local_size_variable) in;\n"; | ||
| 267 | } | ||
| 197 | 268 | ||
| 269 | source += '\n'; | ||
| 198 | source += code; | 270 | source += code; |
| 199 | 271 | ||
| 200 | OGLShader shader; | 272 | OGLShader shader; |
| @@ -221,131 +293,97 @@ std::set<GLenum> GetSupportedFormats() { | |||
| 221 | 293 | ||
| 222 | } // Anonymous namespace | 294 | } // Anonymous namespace |
| 223 | 295 | ||
| 224 | CachedShader::CachedShader(const Device& device, VAddr cpu_addr, u64 unique_identifier, | 296 | CachedShader::CachedShader(const ShaderParameters& params, ProgramType program_type, |
| 225 | Maxwell::ShaderProgram program_type, ShaderDiskCacheOpenGL& disk_cache, | 297 | GLShader::ProgramResult result) |
| 226 | const PrecompiledPrograms& precompiled_programs, | 298 | : RasterizerCacheObject{params.host_ptr}, cpu_addr{params.cpu_addr}, |
| 227 | ProgramCode&& program_code, ProgramCode&& program_code_b, u8* host_ptr) | 299 | unique_identifier{params.unique_identifier}, program_type{program_type}, |
| 228 | : RasterizerCacheObject{host_ptr}, host_ptr{host_ptr}, cpu_addr{cpu_addr}, | 300 | disk_cache{params.disk_cache}, precompiled_programs{params.precompiled_programs}, |
| 229 | unique_identifier{unique_identifier}, program_type{program_type}, disk_cache{disk_cache}, | 301 | entries{result.second}, code{std::move(result.first)}, shader_length{entries.shader_length} {} |
| 230 | precompiled_programs{precompiled_programs} { | 302 | |
| 231 | const std::size_t code_size{CalculateProgramSize(program_code)}; | 303 | Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params, |
| 232 | const std::size_t code_size_b{program_code_b.empty() ? 0 | 304 | Maxwell::ShaderProgram program_type, |
| 233 | : CalculateProgramSize(program_code_b)}; | 305 | ProgramCode&& program_code, |
| 234 | GLShader::ProgramResult program_result{ | 306 | ProgramCode&& program_code_b) { |
| 235 | CreateProgram(device, program_type, program_code, program_code_b)}; | 307 | const auto code_size{CalculateProgramSize(program_code)}; |
| 236 | if (program_result.first.empty()) { | 308 | const auto code_size_b{CalculateProgramSize(program_code_b)}; |
| 309 | auto result{ | ||
| 310 | CreateProgram(params.device, GetProgramType(program_type), program_code, program_code_b)}; | ||
| 311 | if (result.first.empty()) { | ||
| 237 | // TODO(Rodrigo): Unimplemented shader stages hit here, avoid using these for now | 312 | // TODO(Rodrigo): Unimplemented shader stages hit here, avoid using these for now |
| 238 | return; | 313 | return {}; |
| 239 | } | 314 | } |
| 240 | 315 | ||
| 241 | code = program_result.first; | 316 | params.disk_cache.SaveRaw(ShaderDiskCacheRaw( |
| 242 | entries = program_result.second; | 317 | params.unique_identifier, GetProgramType(program_type), |
| 243 | shader_length = entries.shader_length; | 318 | static_cast<u32>(code_size / sizeof(u64)), static_cast<u32>(code_size_b / sizeof(u64)), |
| 319 | std::move(program_code), std::move(program_code_b))); | ||
| 244 | 320 | ||
| 245 | const ShaderDiskCacheRaw raw(unique_identifier, program_type, | 321 | return std::shared_ptr<CachedShader>( |
| 246 | static_cast<u32>(code_size / sizeof(u64)), | 322 | new CachedShader(params, GetProgramType(program_type), std::move(result))); |
| 247 | static_cast<u32>(code_size_b / sizeof(u64)), | ||
| 248 | std::move(program_code), std::move(program_code_b)); | ||
| 249 | disk_cache.SaveRaw(raw); | ||
| 250 | } | 323 | } |
| 251 | 324 | ||
| 252 | CachedShader::CachedShader(VAddr cpu_addr, u64 unique_identifier, | 325 | Shader CachedShader::CreateStageFromCache(const ShaderParameters& params, |
| 253 | Maxwell::ShaderProgram program_type, ShaderDiskCacheOpenGL& disk_cache, | 326 | Maxwell::ShaderProgram program_type, |
| 254 | const PrecompiledPrograms& precompiled_programs, | 327 | GLShader::ProgramResult result) { |
| 255 | GLShader::ProgramResult result, u8* host_ptr) | 328 | return std::shared_ptr<CachedShader>( |
| 256 | : RasterizerCacheObject{host_ptr}, cpu_addr{cpu_addr}, unique_identifier{unique_identifier}, | 329 | new CachedShader(params, GetProgramType(program_type), std::move(result))); |
| 257 | program_type{program_type}, disk_cache{disk_cache}, precompiled_programs{ | ||
| 258 | precompiled_programs} { | ||
| 259 | code = std::move(result.first); | ||
| 260 | entries = result.second; | ||
| 261 | shader_length = entries.shader_length; | ||
| 262 | } | 330 | } |
| 263 | 331 | ||
| 264 | std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(GLenum primitive_mode, | 332 | Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode&& code) { |
| 265 | BaseBindings base_bindings) { | 333 | auto result{CreateProgram(params.device, ProgramType::Compute, code, {})}; |
| 266 | GLuint handle{}; | ||
| 267 | if (program_type == Maxwell::ShaderProgram::Geometry) { | ||
| 268 | handle = GetGeometryShader(primitive_mode, base_bindings); | ||
| 269 | } else { | ||
| 270 | const auto [entry, is_cache_miss] = programs.try_emplace(base_bindings); | ||
| 271 | auto& program = entry->second; | ||
| 272 | if (is_cache_miss) { | ||
| 273 | program = TryLoadProgram(primitive_mode, base_bindings); | ||
| 274 | if (!program) { | ||
| 275 | program = | ||
| 276 | SpecializeShader(code, entries, program_type, base_bindings, primitive_mode); | ||
| 277 | disk_cache.SaveUsage(GetUsage(primitive_mode, base_bindings)); | ||
| 278 | } | ||
| 279 | |||
| 280 | LabelGLObject(GL_PROGRAM, program->handle, cpu_addr); | ||
| 281 | } | ||
| 282 | 334 | ||
| 283 | handle = program->handle; | 335 | const auto code_size{CalculateProgramSize(code)}; |
| 284 | } | 336 | params.disk_cache.SaveRaw(ShaderDiskCacheRaw(params.unique_identifier, ProgramType::Compute, |
| 337 | static_cast<u32>(code_size / sizeof(u64)), 0, | ||
| 338 | std::move(code), {})); | ||
| 285 | 339 | ||
| 286 | base_bindings.cbuf += static_cast<u32>(entries.const_buffers.size()) + RESERVED_UBOS; | 340 | return std::shared_ptr<CachedShader>( |
| 287 | base_bindings.gmem += static_cast<u32>(entries.global_memory_entries.size()); | 341 | new CachedShader(params, ProgramType::Compute, std::move(result))); |
| 288 | base_bindings.sampler += static_cast<u32>(entries.samplers.size()); | 342 | } |
| 289 | 343 | ||
| 290 | return {handle, base_bindings}; | 344 | Shader CachedShader::CreateKernelFromCache(const ShaderParameters& params, |
| 345 | GLShader::ProgramResult result) { | ||
| 346 | return std::shared_ptr<CachedShader>( | ||
| 347 | new CachedShader(params, ProgramType::Compute, std::move(result))); | ||
| 291 | } | 348 | } |
| 292 | 349 | ||
| 293 | GLuint CachedShader::GetGeometryShader(GLenum primitive_mode, BaseBindings base_bindings) { | 350 | std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(const ProgramVariant& variant) { |
| 294 | const auto [entry, is_cache_miss] = geometry_programs.try_emplace(base_bindings); | 351 | const auto [entry, is_cache_miss] = programs.try_emplace(variant); |
| 295 | auto& programs = entry->second; | 352 | auto& program = entry->second; |
| 353 | if (is_cache_miss) { | ||
| 354 | program = TryLoadProgram(variant); | ||
| 355 | if (!program) { | ||
| 356 | program = SpecializeShader(code, entries, program_type, variant); | ||
| 357 | disk_cache.SaveUsage(GetUsage(variant)); | ||
| 358 | } | ||
| 296 | 359 | ||
| 297 | switch (primitive_mode) { | 360 | LabelGLObject(GL_PROGRAM, program->handle, cpu_addr); |
| 298 | case GL_POINTS: | ||
| 299 | return LazyGeometryProgram(programs.points, base_bindings, primitive_mode); | ||
| 300 | case GL_LINES: | ||
| 301 | case GL_LINE_STRIP: | ||
| 302 | return LazyGeometryProgram(programs.lines, base_bindings, primitive_mode); | ||
| 303 | case GL_LINES_ADJACENCY: | ||
| 304 | case GL_LINE_STRIP_ADJACENCY: | ||
| 305 | return LazyGeometryProgram(programs.lines_adjacency, base_bindings, primitive_mode); | ||
| 306 | case GL_TRIANGLES: | ||
| 307 | case GL_TRIANGLE_STRIP: | ||
| 308 | case GL_TRIANGLE_FAN: | ||
| 309 | return LazyGeometryProgram(programs.triangles, base_bindings, primitive_mode); | ||
| 310 | case GL_TRIANGLES_ADJACENCY: | ||
| 311 | case GL_TRIANGLE_STRIP_ADJACENCY: | ||
| 312 | return LazyGeometryProgram(programs.triangles_adjacency, base_bindings, primitive_mode); | ||
| 313 | default: | ||
| 314 | UNREACHABLE_MSG("Unknown primitive mode."); | ||
| 315 | return LazyGeometryProgram(programs.points, base_bindings, primitive_mode); | ||
| 316 | } | 361 | } |
| 317 | } | ||
| 318 | 362 | ||
| 319 | GLuint CachedShader::LazyGeometryProgram(CachedProgram& target_program, BaseBindings base_bindings, | 363 | auto base_bindings = variant.base_bindings; |
| 320 | GLenum primitive_mode) { | 364 | base_bindings.cbuf += static_cast<u32>(entries.const_buffers.size()); |
| 321 | if (target_program) { | 365 | if (program_type != ProgramType::Compute) { |
| 322 | return target_program->handle; | 366 | base_bindings.cbuf += STAGE_RESERVED_UBOS; |
| 323 | } | ||
| 324 | const auto [glsl_name, debug_name, vertices] = GetPrimitiveDescription(primitive_mode); | ||
| 325 | target_program = TryLoadProgram(primitive_mode, base_bindings); | ||
| 326 | if (!target_program) { | ||
| 327 | target_program = | ||
| 328 | SpecializeShader(code, entries, program_type, base_bindings, primitive_mode); | ||
| 329 | disk_cache.SaveUsage(GetUsage(primitive_mode, base_bindings)); | ||
| 330 | } | 367 | } |
| 368 | base_bindings.gmem += static_cast<u32>(entries.global_memory_entries.size()); | ||
| 369 | base_bindings.sampler += static_cast<u32>(entries.samplers.size()); | ||
| 331 | 370 | ||
| 332 | LabelGLObject(GL_PROGRAM, target_program->handle, cpu_addr, debug_name); | 371 | return {program->handle, base_bindings}; |
| 333 | 372 | } | |
| 334 | return target_program->handle; | ||
| 335 | }; | ||
| 336 | 373 | ||
| 337 | CachedProgram CachedShader::TryLoadProgram(GLenum primitive_mode, | 374 | CachedProgram CachedShader::TryLoadProgram(const ProgramVariant& variant) const { |
| 338 | BaseBindings base_bindings) const { | 375 | const auto found = precompiled_programs.find(GetUsage(variant)); |
| 339 | const auto found = precompiled_programs.find(GetUsage(primitive_mode, base_bindings)); | ||
| 340 | if (found == precompiled_programs.end()) { | 376 | if (found == precompiled_programs.end()) { |
| 341 | return {}; | 377 | return {}; |
| 342 | } | 378 | } |
| 343 | return found->second; | 379 | return found->second; |
| 344 | } | 380 | } |
| 345 | 381 | ||
| 346 | ShaderDiskCacheUsage CachedShader::GetUsage(GLenum primitive_mode, | 382 | ShaderDiskCacheUsage CachedShader::GetUsage(const ProgramVariant& variant) const { |
| 347 | BaseBindings base_bindings) const { | 383 | ShaderDiskCacheUsage usage; |
| 348 | return {unique_identifier, base_bindings, primitive_mode}; | 384 | usage.unique_identifier = unique_identifier; |
| 385 | usage.variant = variant; | ||
| 386 | return usage; | ||
| 349 | } | 387 | } |
| 350 | 388 | ||
| 351 | ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system, | 389 | ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system, |
| @@ -411,8 +449,7 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, | |||
| 411 | } | 449 | } |
| 412 | if (!shader) { | 450 | if (!shader) { |
| 413 | shader = SpecializeShader(unspecialized.code, unspecialized.entries, | 451 | shader = SpecializeShader(unspecialized.code, unspecialized.entries, |
| 414 | unspecialized.program_type, usage.bindings, | 452 | unspecialized.program_type, usage.variant, true); |
| 415 | usage.primitive, true); | ||
| 416 | } | 453 | } |
| 417 | 454 | ||
| 418 | std::scoped_lock lock(mutex); | 455 | std::scoped_lock lock(mutex); |
| @@ -547,7 +584,7 @@ std::unordered_map<u64, UnspecializedShader> ShaderCacheOpenGL::GenerateUnspecia | |||
| 547 | } | 584 | } |
| 548 | 585 | ||
| 549 | Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { | 586 | Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { |
| 550 | if (!system.GPU().Maxwell3D().dirty_flags.shaders) { | 587 | if (!system.GPU().Maxwell3D().dirty.shaders) { |
| 551 | return last_shaders[static_cast<std::size_t>(program)]; | 588 | return last_shaders[static_cast<std::size_t>(program)]; |
| 552 | } | 589 | } |
| 553 | 590 | ||
| @@ -564,28 +601,55 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { | |||
| 564 | // No shader found - create a new one | 601 | // No shader found - create a new one |
| 565 | ProgramCode program_code{GetShaderCode(memory_manager, program_addr, host_ptr)}; | 602 | ProgramCode program_code{GetShaderCode(memory_manager, program_addr, host_ptr)}; |
| 566 | ProgramCode program_code_b; | 603 | ProgramCode program_code_b; |
| 567 | if (program == Maxwell::ShaderProgram::VertexA) { | 604 | const bool is_program_a{program == Maxwell::ShaderProgram::VertexA}; |
| 605 | if (is_program_a) { | ||
| 568 | const GPUVAddr program_addr_b{GetShaderAddress(system, Maxwell::ShaderProgram::VertexB)}; | 606 | const GPUVAddr program_addr_b{GetShaderAddress(system, Maxwell::ShaderProgram::VertexB)}; |
| 569 | program_code_b = GetShaderCode(memory_manager, program_addr_b, | 607 | program_code_b = GetShaderCode(memory_manager, program_addr_b, |
| 570 | memory_manager.GetPointer(program_addr_b)); | 608 | memory_manager.GetPointer(program_addr_b)); |
| 571 | } | 609 | } |
| 572 | 610 | ||
| 573 | const u64 unique_identifier = GetUniqueIdentifier(program, program_code, program_code_b); | 611 | const auto unique_identifier = |
| 574 | const VAddr cpu_addr{*memory_manager.GpuToCpuAddress(program_addr)}; | 612 | GetUniqueIdentifier(GetProgramType(program), program_code, program_code_b); |
| 613 | const auto cpu_addr{*memory_manager.GpuToCpuAddress(program_addr)}; | ||
| 614 | const ShaderParameters params{disk_cache, precompiled_programs, device, cpu_addr, | ||
| 615 | host_ptr, unique_identifier}; | ||
| 616 | |||
| 575 | const auto found = precompiled_shaders.find(unique_identifier); | 617 | const auto found = precompiled_shaders.find(unique_identifier); |
| 576 | if (found != precompiled_shaders.end()) { | 618 | if (found == precompiled_shaders.end()) { |
| 577 | // Create a shader from the cache | 619 | shader = CachedShader::CreateStageFromMemory(params, program, std::move(program_code), |
| 578 | shader = std::make_shared<CachedShader>(cpu_addr, unique_identifier, program, disk_cache, | 620 | std::move(program_code_b)); |
| 579 | precompiled_programs, found->second, host_ptr); | ||
| 580 | } else { | 621 | } else { |
| 581 | // Create a shader from guest memory | 622 | shader = CachedShader::CreateStageFromCache(params, program, found->second); |
| 582 | shader = std::make_shared<CachedShader>( | ||
| 583 | device, cpu_addr, unique_identifier, program, disk_cache, precompiled_programs, | ||
| 584 | std::move(program_code), std::move(program_code_b), host_ptr); | ||
| 585 | } | 623 | } |
| 586 | Register(shader); | 624 | Register(shader); |
| 587 | 625 | ||
| 588 | return last_shaders[static_cast<std::size_t>(program)] = shader; | 626 | return last_shaders[static_cast<std::size_t>(program)] = shader; |
| 589 | } | 627 | } |
| 590 | 628 | ||
| 629 | Shader ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) { | ||
| 630 | auto& memory_manager{system.GPU().MemoryManager()}; | ||
| 631 | const auto host_ptr{memory_manager.GetPointer(code_addr)}; | ||
| 632 | auto kernel = TryGet(host_ptr); | ||
| 633 | if (kernel) { | ||
| 634 | return kernel; | ||
| 635 | } | ||
| 636 | |||
| 637 | // No kernel found - create a new one | ||
| 638 | auto code{GetShaderCode(memory_manager, code_addr, host_ptr)}; | ||
| 639 | const auto unique_identifier{GetUniqueIdentifier(ProgramType::Compute, code, {})}; | ||
| 640 | const auto cpu_addr{*memory_manager.GpuToCpuAddress(code_addr)}; | ||
| 641 | const ShaderParameters params{disk_cache, precompiled_programs, device, cpu_addr, | ||
| 642 | host_ptr, unique_identifier}; | ||
| 643 | |||
| 644 | const auto found = precompiled_shaders.find(unique_identifier); | ||
| 645 | if (found == precompiled_shaders.end()) { | ||
| 646 | kernel = CachedShader::CreateKernelFromMemory(params, std::move(code)); | ||
| 647 | } else { | ||
| 648 | kernel = CachedShader::CreateKernelFromCache(params, found->second); | ||
| 649 | } | ||
| 650 | |||
| 651 | Register(kernel); | ||
| 652 | return kernel; | ||
| 653 | } | ||
| 654 | |||
| 591 | } // namespace OpenGL | 655 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index 09bd0761d..de195cc5d 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h | |||
| @@ -6,6 +6,7 @@ | |||
| 6 | 6 | ||
| 7 | #include <array> | 7 | #include <array> |
| 8 | #include <atomic> | 8 | #include <atomic> |
| 9 | #include <bitset> | ||
| 9 | #include <memory> | 10 | #include <memory> |
| 10 | #include <set> | 11 | #include <set> |
| 11 | #include <tuple> | 12 | #include <tuple> |
| @@ -41,17 +42,29 @@ using Maxwell = Tegra::Engines::Maxwell3D::Regs; | |||
| 41 | using PrecompiledPrograms = std::unordered_map<ShaderDiskCacheUsage, CachedProgram>; | 42 | using PrecompiledPrograms = std::unordered_map<ShaderDiskCacheUsage, CachedProgram>; |
| 42 | using PrecompiledShaders = std::unordered_map<u64, GLShader::ProgramResult>; | 43 | using PrecompiledShaders = std::unordered_map<u64, GLShader::ProgramResult>; |
| 43 | 44 | ||
| 45 | struct ShaderParameters { | ||
| 46 | ShaderDiskCacheOpenGL& disk_cache; | ||
| 47 | const PrecompiledPrograms& precompiled_programs; | ||
| 48 | const Device& device; | ||
| 49 | VAddr cpu_addr; | ||
| 50 | u8* host_ptr; | ||
| 51 | u64 unique_identifier; | ||
| 52 | }; | ||
| 53 | |||
| 44 | class CachedShader final : public RasterizerCacheObject { | 54 | class CachedShader final : public RasterizerCacheObject { |
| 45 | public: | 55 | public: |
| 46 | explicit CachedShader(const Device& device, VAddr cpu_addr, u64 unique_identifier, | 56 | static Shader CreateStageFromMemory(const ShaderParameters& params, |
| 47 | Maxwell::ShaderProgram program_type, ShaderDiskCacheOpenGL& disk_cache, | 57 | Maxwell::ShaderProgram program_type, |
| 48 | const PrecompiledPrograms& precompiled_programs, | 58 | ProgramCode&& program_code, ProgramCode&& program_code_b); |
| 49 | ProgramCode&& program_code, ProgramCode&& program_code_b, u8* host_ptr); | 59 | |
| 60 | static Shader CreateStageFromCache(const ShaderParameters& params, | ||
| 61 | Maxwell::ShaderProgram program_type, | ||
| 62 | GLShader::ProgramResult result); | ||
| 63 | |||
| 64 | static Shader CreateKernelFromMemory(const ShaderParameters& params, ProgramCode&& code); | ||
| 50 | 65 | ||
| 51 | explicit CachedShader(VAddr cpu_addr, u64 unique_identifier, | 66 | static Shader CreateKernelFromCache(const ShaderParameters& params, |
| 52 | Maxwell::ShaderProgram program_type, ShaderDiskCacheOpenGL& disk_cache, | 67 | GLShader::ProgramResult result); |
| 53 | const PrecompiledPrograms& precompiled_programs, | ||
| 54 | GLShader::ProgramResult result, u8* host_ptr); | ||
| 55 | 68 | ||
| 56 | VAddr GetCpuAddr() const override { | 69 | VAddr GetCpuAddr() const override { |
| 57 | return cpu_addr; | 70 | return cpu_addr; |
| @@ -67,49 +80,27 @@ public: | |||
| 67 | } | 80 | } |
| 68 | 81 | ||
| 69 | /// Gets the GL program handle for the shader | 82 | /// Gets the GL program handle for the shader |
| 70 | std::tuple<GLuint, BaseBindings> GetProgramHandle(GLenum primitive_mode, | 83 | std::tuple<GLuint, BaseBindings> GetProgramHandle(const ProgramVariant& variant); |
| 71 | BaseBindings base_bindings); | ||
| 72 | 84 | ||
| 73 | private: | 85 | private: |
| 74 | // Geometry programs. These are needed because GLSL needs an input topology but it's not | 86 | explicit CachedShader(const ShaderParameters& params, ProgramType program_type, |
| 75 | // declared by the hardware. Workaround this issue by generating a different shader per input | 87 | GLShader::ProgramResult result); |
| 76 | // topology class. | ||
| 77 | struct GeometryPrograms { | ||
| 78 | CachedProgram points; | ||
| 79 | CachedProgram lines; | ||
| 80 | CachedProgram lines_adjacency; | ||
| 81 | CachedProgram triangles; | ||
| 82 | CachedProgram triangles_adjacency; | ||
| 83 | }; | ||
| 84 | 88 | ||
| 85 | GLuint GetGeometryShader(GLenum primitive_mode, BaseBindings base_bindings); | 89 | CachedProgram TryLoadProgram(const ProgramVariant& variant) const; |
| 86 | 90 | ||
| 87 | /// Generates a geometry shader or returns one that already exists. | 91 | ShaderDiskCacheUsage GetUsage(const ProgramVariant& variant) const; |
| 88 | GLuint LazyGeometryProgram(CachedProgram& target_program, BaseBindings base_bindings, | ||
| 89 | GLenum primitive_mode); | ||
| 90 | 92 | ||
| 91 | CachedProgram TryLoadProgram(GLenum primitive_mode, BaseBindings base_bindings) const; | ||
| 92 | |||
| 93 | ShaderDiskCacheUsage GetUsage(GLenum primitive_mode, BaseBindings base_bindings) const; | ||
| 94 | |||
| 95 | u8* host_ptr{}; | ||
| 96 | VAddr cpu_addr{}; | 93 | VAddr cpu_addr{}; |
| 97 | u64 unique_identifier{}; | 94 | u64 unique_identifier{}; |
| 98 | Maxwell::ShaderProgram program_type{}; | 95 | ProgramType program_type{}; |
| 99 | ShaderDiskCacheOpenGL& disk_cache; | 96 | ShaderDiskCacheOpenGL& disk_cache; |
| 100 | const PrecompiledPrograms& precompiled_programs; | 97 | const PrecompiledPrograms& precompiled_programs; |
| 101 | 98 | ||
| 102 | std::size_t shader_length{}; | ||
| 103 | GLShader::ShaderEntries entries; | 99 | GLShader::ShaderEntries entries; |
| 104 | |||
| 105 | std::string code; | 100 | std::string code; |
| 101 | std::size_t shader_length{}; | ||
| 106 | 102 | ||
| 107 | std::unordered_map<BaseBindings, CachedProgram> programs; | 103 | std::unordered_map<ProgramVariant, CachedProgram> programs; |
| 108 | std::unordered_map<BaseBindings, GeometryPrograms> geometry_programs; | ||
| 109 | |||
| 110 | std::unordered_map<u32, GLuint> cbuf_resource_cache; | ||
| 111 | std::unordered_map<u32, GLuint> gmem_resource_cache; | ||
| 112 | std::unordered_map<u32, GLint> uniform_cache; | ||
| 113 | }; | 104 | }; |
| 114 | 105 | ||
| 115 | class ShaderCacheOpenGL final : public RasterizerCache<Shader> { | 106 | class ShaderCacheOpenGL final : public RasterizerCache<Shader> { |
| @@ -124,6 +115,9 @@ public: | |||
| 124 | /// Gets the current specified shader stage program | 115 | /// Gets the current specified shader stage program |
| 125 | Shader GetStageProgram(Maxwell::ShaderProgram program); | 116 | Shader GetStageProgram(Maxwell::ShaderProgram program); |
| 126 | 117 | ||
| 118 | /// Gets a compute kernel in the passed address | ||
| 119 | Shader GetComputeKernel(GPUVAddr code_addr); | ||
| 120 | |||
| 127 | protected: | 121 | protected: |
| 128 | // We do not have to flush this cache as things in it are never modified by us. | 122 | // We do not have to flush this cache as things in it are never modified by us. |
| 129 | void FlushObjectInner(const Shader& object) override {} | 123 | void FlushObjectInner(const Shader& object) override {} |
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 7dc2e0560..137b23740 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp | |||
| @@ -14,6 +14,7 @@ | |||
| 14 | #include "common/alignment.h" | 14 | #include "common/alignment.h" |
| 15 | #include "common/assert.h" | 15 | #include "common/assert.h" |
| 16 | #include "common/common_types.h" | 16 | #include "common/common_types.h" |
| 17 | #include "common/logging/log.h" | ||
| 17 | #include "video_core/engines/maxwell_3d.h" | 18 | #include "video_core/engines/maxwell_3d.h" |
| 18 | #include "video_core/renderer_opengl/gl_device.h" | 19 | #include "video_core/renderer_opengl/gl_device.h" |
| 19 | #include "video_core/renderer_opengl/gl_rasterizer.h" | 20 | #include "video_core/renderer_opengl/gl_rasterizer.h" |
| @@ -36,19 +37,18 @@ using namespace std::string_literals; | |||
| 36 | using namespace VideoCommon::Shader; | 37 | using namespace VideoCommon::Shader; |
| 37 | 38 | ||
| 38 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | 39 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; |
| 39 | using ShaderStage = Tegra::Engines::Maxwell3D::Regs::ShaderStage; | ||
| 40 | using Operation = const OperationNode&; | 40 | using Operation = const OperationNode&; |
| 41 | 41 | ||
| 42 | enum class Type { Bool, Bool2, Float, Int, Uint, HalfFloat }; | 42 | enum class Type { Void, Bool, Bool2, Float, Int, Uint, HalfFloat }; |
| 43 | 43 | ||
| 44 | struct TextureAoffi {}; | 44 | struct TextureAoffi {}; |
| 45 | using TextureArgument = std::pair<Type, Node>; | 45 | using TextureArgument = std::pair<Type, Node>; |
| 46 | using TextureIR = std::variant<TextureAoffi, TextureArgument>; | 46 | using TextureIR = std::variant<TextureAoffi, TextureArgument>; |
| 47 | 47 | ||
| 48 | constexpr u32 MAX_CONSTBUFFER_ELEMENTS = | 48 | constexpr u32 MAX_CONSTBUFFER_ELEMENTS = |
| 49 | static_cast<u32>(RasterizerOpenGL::MaxConstbufferSize) / (4 * sizeof(float)); | 49 | static_cast<u32>(Maxwell::MaxConstBufferSize) / (4 * sizeof(float)); |
| 50 | 50 | ||
| 51 | class ShaderWriter { | 51 | class ShaderWriter final { |
| 52 | public: | 52 | public: |
| 53 | void AddExpression(std::string_view text) { | 53 | void AddExpression(std::string_view text) { |
| 54 | DEBUG_ASSERT(scope >= 0); | 54 | DEBUG_ASSERT(scope >= 0); |
| @@ -93,9 +93,157 @@ private: | |||
| 93 | u32 temporary_index = 1; | 93 | u32 temporary_index = 1; |
| 94 | }; | 94 | }; |
| 95 | 95 | ||
| 96 | class Expression final { | ||
| 97 | public: | ||
| 98 | Expression(std::string code, Type type) : code{std::move(code)}, type{type} { | ||
| 99 | ASSERT(type != Type::Void); | ||
| 100 | } | ||
| 101 | Expression() : type{Type::Void} {} | ||
| 102 | |||
| 103 | Type GetType() const { | ||
| 104 | return type; | ||
| 105 | } | ||
| 106 | |||
| 107 | std::string GetCode() const { | ||
| 108 | return code; | ||
| 109 | } | ||
| 110 | |||
| 111 | void CheckVoid() const { | ||
| 112 | ASSERT(type == Type::Void); | ||
| 113 | } | ||
| 114 | |||
| 115 | std::string As(Type type) const { | ||
| 116 | switch (type) { | ||
| 117 | case Type::Bool: | ||
| 118 | return AsBool(); | ||
| 119 | case Type::Bool2: | ||
| 120 | return AsBool2(); | ||
| 121 | case Type::Float: | ||
| 122 | return AsFloat(); | ||
| 123 | case Type::Int: | ||
| 124 | return AsInt(); | ||
| 125 | case Type::Uint: | ||
| 126 | return AsUint(); | ||
| 127 | case Type::HalfFloat: | ||
| 128 | return AsHalfFloat(); | ||
| 129 | default: | ||
| 130 | UNREACHABLE_MSG("Invalid type"); | ||
| 131 | return code; | ||
| 132 | } | ||
| 133 | } | ||
| 134 | |||
| 135 | std::string AsBool() const { | ||
| 136 | switch (type) { | ||
| 137 | case Type::Bool: | ||
| 138 | return code; | ||
| 139 | default: | ||
| 140 | UNREACHABLE_MSG("Incompatible types"); | ||
| 141 | return code; | ||
| 142 | } | ||
| 143 | } | ||
| 144 | |||
| 145 | std::string AsBool2() const { | ||
| 146 | switch (type) { | ||
| 147 | case Type::Bool2: | ||
| 148 | return code; | ||
| 149 | default: | ||
| 150 | UNREACHABLE_MSG("Incompatible types"); | ||
| 151 | return code; | ||
| 152 | } | ||
| 153 | } | ||
| 154 | |||
| 155 | std::string AsFloat() const { | ||
| 156 | switch (type) { | ||
| 157 | case Type::Float: | ||
| 158 | return code; | ||
| 159 | case Type::Uint: | ||
| 160 | return fmt::format("utof({})", code); | ||
| 161 | case Type::Int: | ||
| 162 | return fmt::format("itof({})", code); | ||
| 163 | case Type::HalfFloat: | ||
| 164 | return fmt::format("utof(packHalf2x16({}))", code); | ||
| 165 | default: | ||
| 166 | UNREACHABLE_MSG("Incompatible types"); | ||
| 167 | return code; | ||
| 168 | } | ||
| 169 | } | ||
| 170 | |||
| 171 | std::string AsInt() const { | ||
| 172 | switch (type) { | ||
| 173 | case Type::Float: | ||
| 174 | return fmt::format("ftoi({})", code); | ||
| 175 | case Type::Uint: | ||
| 176 | return fmt::format("int({})", code); | ||
| 177 | case Type::Int: | ||
| 178 | return code; | ||
| 179 | case Type::HalfFloat: | ||
| 180 | return fmt::format("int(packHalf2x16({}))", code); | ||
| 181 | default: | ||
| 182 | UNREACHABLE_MSG("Incompatible types"); | ||
| 183 | return code; | ||
| 184 | } | ||
| 185 | } | ||
| 186 | |||
| 187 | std::string AsUint() const { | ||
| 188 | switch (type) { | ||
| 189 | case Type::Float: | ||
| 190 | return fmt::format("ftou({})", code); | ||
| 191 | case Type::Uint: | ||
| 192 | return code; | ||
| 193 | case Type::Int: | ||
| 194 | return fmt::format("uint({})", code); | ||
| 195 | case Type::HalfFloat: | ||
| 196 | return fmt::format("packHalf2x16({})", code); | ||
| 197 | default: | ||
| 198 | UNREACHABLE_MSG("Incompatible types"); | ||
| 199 | return code; | ||
| 200 | } | ||
| 201 | } | ||
| 202 | |||
| 203 | std::string AsHalfFloat() const { | ||
| 204 | switch (type) { | ||
| 205 | case Type::Float: | ||
| 206 | return fmt::format("unpackHalf2x16(ftou({}))", code); | ||
| 207 | case Type::Uint: | ||
| 208 | return fmt::format("unpackHalf2x16({})", code); | ||
| 209 | case Type::Int: | ||
| 210 | return fmt::format("unpackHalf2x16(int({}))", code); | ||
| 211 | case Type::HalfFloat: | ||
| 212 | return code; | ||
| 213 | default: | ||
| 214 | UNREACHABLE_MSG("Incompatible types"); | ||
| 215 | return code; | ||
| 216 | } | ||
| 217 | } | ||
| 218 | |||
| 219 | private: | ||
| 220 | std::string code; | ||
| 221 | Type type{}; | ||
| 222 | }; | ||
| 223 | |||
| 224 | constexpr const char* GetTypeString(Type type) { | ||
| 225 | switch (type) { | ||
| 226 | case Type::Bool: | ||
| 227 | return "bool"; | ||
| 228 | case Type::Bool2: | ||
| 229 | return "bvec2"; | ||
| 230 | case Type::Float: | ||
| 231 | return "float"; | ||
| 232 | case Type::Int: | ||
| 233 | return "int"; | ||
| 234 | case Type::Uint: | ||
| 235 | return "uint"; | ||
| 236 | case Type::HalfFloat: | ||
| 237 | return "vec2"; | ||
| 238 | default: | ||
| 239 | UNREACHABLE_MSG("Invalid type"); | ||
| 240 | return "<invalid type>"; | ||
| 241 | } | ||
| 242 | } | ||
| 243 | |||
| 96 | /// Generates code to use for a swizzle operation. | 244 | /// Generates code to use for a swizzle operation. |
| 97 | constexpr const char* GetSwizzle(u32 element) { | 245 | constexpr const char* GetSwizzle(u32 element) { |
| 98 | constexpr std::array<const char*, 4> swizzle = {".x", ".y", ".z", ".w"}; | 246 | constexpr std::array swizzle = {".x", ".y", ".z", ".w"}; |
| 99 | return swizzle.at(element); | 247 | return swizzle.at(element); |
| 100 | } | 248 | } |
| 101 | 249 | ||
| @@ -134,8 +282,8 @@ constexpr bool IsGenericAttribute(Attribute::Index index) { | |||
| 134 | return index >= Attribute::Index::Attribute_0 && index <= Attribute::Index::Attribute_31; | 282 | return index >= Attribute::Index::Attribute_0 && index <= Attribute::Index::Attribute_31; |
| 135 | } | 283 | } |
| 136 | 284 | ||
| 137 | constexpr Attribute::Index ToGenericAttribute(u32 value) { | 285 | constexpr Attribute::Index ToGenericAttribute(u64 value) { |
| 138 | return static_cast<Attribute::Index>(value + static_cast<u32>(Attribute::Index::Attribute_0)); | 286 | return static_cast<Attribute::Index>(value + static_cast<u64>(Attribute::Index::Attribute_0)); |
| 139 | } | 287 | } |
| 140 | 288 | ||
| 141 | u32 GetGenericAttributeIndex(Attribute::Index index) { | 289 | u32 GetGenericAttributeIndex(Attribute::Index index) { |
| @@ -161,9 +309,13 @@ std::string FlowStackTopName(MetaStackClass stack) { | |||
| 161 | return fmt::format("{}_flow_stack_top", GetFlowStackPrefix(stack)); | 309 | return fmt::format("{}_flow_stack_top", GetFlowStackPrefix(stack)); |
| 162 | } | 310 | } |
| 163 | 311 | ||
| 312 | constexpr bool IsVertexShader(ProgramType stage) { | ||
| 313 | return stage == ProgramType::VertexA || stage == ProgramType::VertexB; | ||
| 314 | } | ||
| 315 | |||
| 164 | class GLSLDecompiler final { | 316 | class GLSLDecompiler final { |
| 165 | public: | 317 | public: |
| 166 | explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, ShaderStage stage, | 318 | explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, ProgramType stage, |
| 167 | std::string suffix) | 319 | std::string suffix) |
| 168 | : device{device}, ir{ir}, stage{stage}, suffix{suffix}, header{ir.GetHeader()} {} | 320 | : device{device}, ir{ir}, stage{stage}, suffix{suffix}, header{ir.GetHeader()} {} |
| 169 | 321 | ||
| @@ -180,20 +332,23 @@ public: | |||
| 180 | DeclareGlobalMemory(); | 332 | DeclareGlobalMemory(); |
| 181 | DeclareSamplers(); | 333 | DeclareSamplers(); |
| 182 | DeclarePhysicalAttributeReader(); | 334 | DeclarePhysicalAttributeReader(); |
| 335 | DeclareImages(); | ||
| 183 | 336 | ||
| 184 | code.AddLine("void execute_{}() {{", suffix); | 337 | code.AddLine("void execute_{}() {{", suffix); |
| 185 | ++code.scope; | 338 | ++code.scope; |
| 186 | 339 | ||
| 187 | // VM's program counter | 340 | // VM's program counter |
| 188 | const auto first_address = ir.GetBasicBlocks().begin()->first; | 341 | const auto first_address = ir.GetBasicBlocks().begin()->first; |
| 189 | code.AddLine("uint jmp_to = {}u;", first_address); | 342 | code.AddLine("uint jmp_to = {}U;", first_address); |
| 190 | 343 | ||
| 191 | // TODO(Subv): Figure out the actual depth of the flow stack, for now it seems | 344 | // TODO(Subv): Figure out the actual depth of the flow stack, for now it seems |
| 192 | // unlikely that shaders will use 20 nested SSYs and PBKs. | 345 | // unlikely that shaders will use 20 nested SSYs and PBKs. |
| 193 | constexpr u32 FLOW_STACK_SIZE = 20; | 346 | if (!ir.IsFlowStackDisabled()) { |
| 194 | for (const auto stack : std::array{MetaStackClass::Ssy, MetaStackClass::Pbk}) { | 347 | constexpr u32 FLOW_STACK_SIZE = 20; |
| 195 | code.AddLine("uint {}[{}];", FlowStackName(stack), FLOW_STACK_SIZE); | 348 | for (const auto stack : std::array{MetaStackClass::Ssy, MetaStackClass::Pbk}) { |
| 196 | code.AddLine("uint {} = 0u;", FlowStackTopName(stack)); | 349 | code.AddLine("uint {}[{}];", FlowStackName(stack), FLOW_STACK_SIZE); |
| 350 | code.AddLine("uint {} = 0U;", FlowStackTopName(stack)); | ||
| 351 | } | ||
| 197 | } | 352 | } |
| 198 | 353 | ||
| 199 | code.AddLine("while (true) {{"); | 354 | code.AddLine("while (true) {{"); |
| @@ -203,7 +358,7 @@ public: | |||
| 203 | 358 | ||
| 204 | for (const auto& pair : ir.GetBasicBlocks()) { | 359 | for (const auto& pair : ir.GetBasicBlocks()) { |
| 205 | const auto [address, bb] = pair; | 360 | const auto [address, bb] = pair; |
| 206 | code.AddLine("case 0x{:x}u: {{", address); | 361 | code.AddLine("case 0x{:X}U: {{", address); |
| 207 | ++code.scope; | 362 | ++code.scope; |
| 208 | 363 | ||
| 209 | VisitBlock(bb); | 364 | VisitBlock(bb); |
| @@ -234,30 +389,30 @@ public: | |||
| 234 | for (const auto& sampler : ir.GetSamplers()) { | 389 | for (const auto& sampler : ir.GetSamplers()) { |
| 235 | entries.samplers.emplace_back(sampler); | 390 | entries.samplers.emplace_back(sampler); |
| 236 | } | 391 | } |
| 237 | for (const auto& gmem_pair : ir.GetGlobalMemory()) { | 392 | for (const auto& [offset, image] : ir.GetImages()) { |
| 238 | const auto& [base, usage] = gmem_pair; | 393 | entries.images.emplace_back(image); |
| 394 | } | ||
| 395 | for (const auto& [base, usage] : ir.GetGlobalMemory()) { | ||
| 239 | entries.global_memory_entries.emplace_back(base.cbuf_index, base.cbuf_offset, | 396 | entries.global_memory_entries.emplace_back(base.cbuf_index, base.cbuf_offset, |
| 240 | usage.is_read, usage.is_written); | 397 | usage.is_read, usage.is_written); |
| 241 | } | 398 | } |
| 242 | entries.clip_distances = ir.GetClipDistances(); | 399 | entries.clip_distances = ir.GetClipDistances(); |
| 400 | entries.shader_viewport_layer_array = | ||
| 401 | IsVertexShader(stage) && (ir.UsesLayer() || ir.UsesViewportIndex()); | ||
| 243 | entries.shader_length = ir.GetLength(); | 402 | entries.shader_length = ir.GetLength(); |
| 244 | return entries; | 403 | return entries; |
| 245 | } | 404 | } |
| 246 | 405 | ||
| 247 | private: | 406 | private: |
| 248 | using OperationDecompilerFn = std::string (GLSLDecompiler::*)(Operation); | ||
| 249 | using OperationDecompilersArray = | ||
| 250 | std::array<OperationDecompilerFn, static_cast<std::size_t>(OperationCode::Amount)>; | ||
| 251 | |||
| 252 | void DeclareVertex() { | 407 | void DeclareVertex() { |
| 253 | if (stage != ShaderStage::Vertex) | 408 | if (!IsVertexShader(stage)) |
| 254 | return; | 409 | return; |
| 255 | 410 | ||
| 256 | DeclareVertexRedeclarations(); | 411 | DeclareVertexRedeclarations(); |
| 257 | } | 412 | } |
| 258 | 413 | ||
| 259 | void DeclareGeometry() { | 414 | void DeclareGeometry() { |
| 260 | if (stage != ShaderStage::Geometry) { | 415 | if (stage != ProgramType::Geometry) { |
| 261 | return; | 416 | return; |
| 262 | } | 417 | } |
| 263 | 418 | ||
| @@ -276,21 +431,34 @@ private: | |||
| 276 | } | 431 | } |
| 277 | 432 | ||
| 278 | void DeclareVertexRedeclarations() { | 433 | void DeclareVertexRedeclarations() { |
| 279 | bool clip_distances_declared = false; | ||
| 280 | |||
| 281 | code.AddLine("out gl_PerVertex {{"); | 434 | code.AddLine("out gl_PerVertex {{"); |
| 282 | ++code.scope; | 435 | ++code.scope; |
| 283 | 436 | ||
| 284 | code.AddLine("vec4 gl_Position;"); | 437 | code.AddLine("vec4 gl_Position;"); |
| 285 | 438 | ||
| 286 | for (const auto o : ir.GetOutputAttributes()) { | 439 | for (const auto attribute : ir.GetOutputAttributes()) { |
| 287 | if (o == Attribute::Index::PointSize) | 440 | if (attribute == Attribute::Index::ClipDistances0123 || |
| 288 | code.AddLine("float gl_PointSize;"); | 441 | attribute == Attribute::Index::ClipDistances4567) { |
| 289 | if (!clip_distances_declared && (o == Attribute::Index::ClipDistances0123 || | ||
| 290 | o == Attribute::Index::ClipDistances4567)) { | ||
| 291 | code.AddLine("float gl_ClipDistance[];"); | 442 | code.AddLine("float gl_ClipDistance[];"); |
| 292 | clip_distances_declared = true; | 443 | break; |
| 444 | } | ||
| 445 | } | ||
| 446 | if (!IsVertexShader(stage) || device.HasVertexViewportLayer()) { | ||
| 447 | if (ir.UsesLayer()) { | ||
| 448 | code.AddLine("int gl_Layer;"); | ||
| 449 | } | ||
| 450 | if (ir.UsesViewportIndex()) { | ||
| 451 | code.AddLine("int gl_ViewportIndex;"); | ||
| 293 | } | 452 | } |
| 453 | } else if ((ir.UsesLayer() || ir.UsesViewportIndex()) && IsVertexShader(stage) && | ||
| 454 | !device.HasVertexViewportLayer()) { | ||
| 455 | LOG_ERROR( | ||
| 456 | Render_OpenGL, | ||
| 457 | "GL_ARB_shader_viewport_layer_array is not available and its required by a shader"); | ||
| 458 | } | ||
| 459 | |||
| 460 | if (ir.UsesPointSize()) { | ||
| 461 | code.AddLine("float gl_PointSize;"); | ||
| 294 | } | 462 | } |
| 295 | 463 | ||
| 296 | --code.scope; | 464 | --code.scope; |
| @@ -301,7 +469,7 @@ private: | |||
| 301 | void DeclareRegisters() { | 469 | void DeclareRegisters() { |
| 302 | const auto& registers = ir.GetRegisters(); | 470 | const auto& registers = ir.GetRegisters(); |
| 303 | for (const u32 gpr : registers) { | 471 | for (const u32 gpr : registers) { |
| 304 | code.AddLine("float {} = 0;", GetRegister(gpr)); | 472 | code.AddLine("float {} = 0.0f;", GetRegister(gpr)); |
| 305 | } | 473 | } |
| 306 | if (!registers.empty()) { | 474 | if (!registers.empty()) { |
| 307 | code.AddNewLine(); | 475 | code.AddNewLine(); |
| @@ -319,11 +487,16 @@ private: | |||
| 319 | } | 487 | } |
| 320 | 488 | ||
| 321 | void DeclareLocalMemory() { | 489 | void DeclareLocalMemory() { |
| 322 | if (const u64 local_memory_size = header.GetLocalMemorySize(); local_memory_size > 0) { | 490 | // TODO(Rodrigo): Unstub kernel local memory size and pass it from a register at |
| 323 | const auto element_count = Common::AlignUp(local_memory_size, 4) / 4; | 491 | // specialization time. |
| 324 | code.AddLine("float {}[{}];", GetLocalMemory(), element_count); | 492 | const u64 local_memory_size = |
| 325 | code.AddNewLine(); | 493 | stage == ProgramType::Compute ? 0x400 : header.GetLocalMemorySize(); |
| 494 | if (local_memory_size == 0) { | ||
| 495 | return; | ||
| 326 | } | 496 | } |
| 497 | const auto element_count = Common::AlignUp(local_memory_size, 4) / 4; | ||
| 498 | code.AddLine("uint {}[{}];", GetLocalMemory(), element_count); | ||
| 499 | code.AddNewLine(); | ||
| 327 | } | 500 | } |
| 328 | 501 | ||
| 329 | void DeclareInternalFlags() { | 502 | void DeclareInternalFlags() { |
| @@ -345,8 +518,6 @@ private: | |||
| 345 | return "noperspective "; | 518 | return "noperspective "; |
| 346 | default: | 519 | default: |
| 347 | case AttributeUse::Unused: | 520 | case AttributeUse::Unused: |
| 348 | UNREACHABLE_MSG("Unused attribute being fetched"); | ||
| 349 | return {}; | ||
| 350 | UNIMPLEMENTED_MSG("Unknown attribute usage index={}", static_cast<u32>(attribute)); | 521 | UNIMPLEMENTED_MSG("Unknown attribute usage index={}", static_cast<u32>(attribute)); |
| 351 | return {}; | 522 | return {}; |
| 352 | } | 523 | } |
| @@ -377,12 +548,12 @@ private: | |||
| 377 | const u32 location{GetGenericAttributeIndex(index)}; | 548 | const u32 location{GetGenericAttributeIndex(index)}; |
| 378 | 549 | ||
| 379 | std::string name{GetInputAttribute(index)}; | 550 | std::string name{GetInputAttribute(index)}; |
| 380 | if (stage == ShaderStage::Geometry) { | 551 | if (stage == ProgramType::Geometry) { |
| 381 | name = "gs_" + name + "[]"; | 552 | name = "gs_" + name + "[]"; |
| 382 | } | 553 | } |
| 383 | 554 | ||
| 384 | std::string suffix; | 555 | std::string suffix; |
| 385 | if (stage == ShaderStage::Fragment) { | 556 | if (stage == ProgramType::Fragment) { |
| 386 | const auto input_mode{header.ps.GetAttributeUse(location)}; | 557 | const auto input_mode{header.ps.GetAttributeUse(location)}; |
| 387 | if (skip_unused && input_mode == AttributeUse::Unused) { | 558 | if (skip_unused && input_mode == AttributeUse::Unused) { |
| 388 | return; | 559 | return; |
| @@ -394,7 +565,7 @@ private: | |||
| 394 | } | 565 | } |
| 395 | 566 | ||
| 396 | void DeclareOutputAttributes() { | 567 | void DeclareOutputAttributes() { |
| 397 | if (ir.HasPhysicalAttributes() && stage != ShaderStage::Fragment) { | 568 | if (ir.HasPhysicalAttributes() && stage != ProgramType::Fragment) { |
| 398 | for (u32 i = 0; i < GetNumPhysicalVaryings(); ++i) { | 569 | for (u32 i = 0; i < GetNumPhysicalVaryings(); ++i) { |
| 399 | DeclareOutputAttribute(ToGenericAttribute(i)); | 570 | DeclareOutputAttribute(ToGenericAttribute(i)); |
| 400 | } | 571 | } |
| @@ -423,7 +594,7 @@ private: | |||
| 423 | const auto [index, size] = entry; | 594 | const auto [index, size] = entry; |
| 424 | code.AddLine("layout (std140, binding = CBUF_BINDING_{}) uniform {} {{", index, | 595 | code.AddLine("layout (std140, binding = CBUF_BINDING_{}) uniform {} {{", index, |
| 425 | GetConstBufferBlock(index)); | 596 | GetConstBufferBlock(index)); |
| 426 | code.AddLine(" vec4 {}[MAX_CONSTBUFFER_ELEMENTS];", GetConstBuffer(index)); | 597 | code.AddLine(" uvec4 {}[{}];", GetConstBuffer(index), MAX_CONSTBUFFER_ELEMENTS); |
| 427 | code.AddLine("}};"); | 598 | code.AddLine("}};"); |
| 428 | code.AddNewLine(); | 599 | code.AddNewLine(); |
| 429 | } | 600 | } |
| @@ -444,7 +615,7 @@ private: | |||
| 444 | 615 | ||
| 445 | code.AddLine("layout (std430, binding = GMEM_BINDING_{}_{}) {} buffer {} {{", | 616 | code.AddLine("layout (std430, binding = GMEM_BINDING_{}_{}) {} buffer {} {{", |
| 446 | base.cbuf_index, base.cbuf_offset, qualifier, GetGlobalMemoryBlock(base)); | 617 | base.cbuf_index, base.cbuf_offset, qualifier, GetGlobalMemoryBlock(base)); |
| 447 | code.AddLine(" float {}[];", GetGlobalMemory(base)); | 618 | code.AddLine(" uint {}[];", GetGlobalMemory(base)); |
| 448 | code.AddLine("}};"); | 619 | code.AddLine("}};"); |
| 449 | code.AddNewLine(); | 620 | code.AddNewLine(); |
| 450 | } | 621 | } |
| @@ -453,9 +624,13 @@ private: | |||
| 453 | void DeclareSamplers() { | 624 | void DeclareSamplers() { |
| 454 | const auto& samplers = ir.GetSamplers(); | 625 | const auto& samplers = ir.GetSamplers(); |
| 455 | for (const auto& sampler : samplers) { | 626 | for (const auto& sampler : samplers) { |
| 456 | std::string sampler_type = [&sampler] { | 627 | const std::string name{GetSampler(sampler)}; |
| 628 | const std::string description{"layout (binding = SAMPLER_BINDING_" + | ||
| 629 | std::to_string(sampler.GetIndex()) + ") uniform"}; | ||
| 630 | std::string sampler_type = [&]() { | ||
| 457 | switch (sampler.GetType()) { | 631 | switch (sampler.GetType()) { |
| 458 | case Tegra::Shader::TextureType::Texture1D: | 632 | case Tegra::Shader::TextureType::Texture1D: |
| 633 | // Special cased, read below. | ||
| 459 | return "sampler1D"; | 634 | return "sampler1D"; |
| 460 | case Tegra::Shader::TextureType::Texture2D: | 635 | case Tegra::Shader::TextureType::Texture2D: |
| 461 | return "sampler2D"; | 636 | return "sampler2D"; |
| @@ -475,8 +650,19 @@ private: | |||
| 475 | sampler_type += "Shadow"; | 650 | sampler_type += "Shadow"; |
| 476 | } | 651 | } |
| 477 | 652 | ||
| 478 | code.AddLine("layout (binding = SAMPLER_BINDING_{}) uniform {} {};", sampler.GetIndex(), | 653 | if (sampler.GetType() == Tegra::Shader::TextureType::Texture1D) { |
| 479 | sampler_type, GetSampler(sampler)); | 654 | // 1D textures can be aliased to texture buffers, hide the declarations behind a |
| 655 | // preprocessor flag and use one or the other from the GPU state. This has to be | ||
| 656 | // done because shaders don't have enough information to determine the texture type. | ||
| 657 | EmitIfdefIsBuffer(sampler); | ||
| 658 | code.AddLine("{} samplerBuffer {};", description, name); | ||
| 659 | code.AddLine("#else"); | ||
| 660 | code.AddLine("{} {} {};", description, sampler_type, name); | ||
| 661 | code.AddLine("#endif"); | ||
| 662 | } else { | ||
| 663 | // The other texture types (2D, 3D and cubes) don't have this issue. | ||
| 664 | code.AddLine("{} {} {};", description, sampler_type, name); | ||
| 665 | } | ||
| 480 | } | 666 | } |
| 481 | if (!samplers.empty()) { | 667 | if (!samplers.empty()) { |
| 482 | code.AddNewLine(); | 668 | code.AddNewLine(); |
| @@ -487,7 +673,7 @@ private: | |||
| 487 | if (!ir.HasPhysicalAttributes()) { | 673 | if (!ir.HasPhysicalAttributes()) { |
| 488 | return; | 674 | return; |
| 489 | } | 675 | } |
| 490 | code.AddLine("float readPhysicalAttribute(uint physical_address) {{"); | 676 | code.AddLine("float ReadPhysicalAttribute(uint physical_address) {{"); |
| 491 | ++code.scope; | 677 | ++code.scope; |
| 492 | code.AddLine("switch (physical_address) {{"); | 678 | code.AddLine("switch (physical_address) {{"); |
| 493 | 679 | ||
| @@ -496,15 +682,16 @@ private: | |||
| 496 | for (u32 index = 0; index < num_attributes; ++index) { | 682 | for (u32 index = 0; index < num_attributes; ++index) { |
| 497 | const auto attribute{ToGenericAttribute(index)}; | 683 | const auto attribute{ToGenericAttribute(index)}; |
| 498 | for (u32 element = 0; element < 4; ++element) { | 684 | for (u32 element = 0; element < 4; ++element) { |
| 499 | constexpr u32 generic_base{0x80}; | 685 | constexpr u32 generic_base = 0x80; |
| 500 | constexpr u32 generic_stride{16}; | 686 | constexpr u32 generic_stride = 16; |
| 501 | constexpr u32 element_stride{4}; | 687 | constexpr u32 element_stride = 4; |
| 502 | const u32 address{generic_base + index * generic_stride + element * element_stride}; | 688 | const u32 address{generic_base + index * generic_stride + element * element_stride}; |
| 503 | 689 | ||
| 504 | const bool declared{stage != ShaderStage::Fragment || | 690 | const bool declared = stage != ProgramType::Fragment || |
| 505 | header.ps.GetAttributeUse(index) != AttributeUse::Unused}; | 691 | header.ps.GetAttributeUse(index) != AttributeUse::Unused; |
| 506 | const std::string value{declared ? ReadAttribute(attribute, element) : "0"}; | 692 | const std::string value = |
| 507 | code.AddLine("case 0x{:x}: return {};", address, value); | 693 | declared ? ReadAttribute(attribute, element).AsFloat() : "0.0f"; |
| 694 | code.AddLine("case 0x{:X}U: return {};", address, value); | ||
| 508 | } | 695 | } |
| 509 | } | 696 | } |
| 510 | 697 | ||
| @@ -516,15 +703,68 @@ private: | |||
| 516 | code.AddNewLine(); | 703 | code.AddNewLine(); |
| 517 | } | 704 | } |
| 518 | 705 | ||
| 706 | void DeclareImages() { | ||
| 707 | const auto& images{ir.GetImages()}; | ||
| 708 | for (const auto& [offset, image] : images) { | ||
| 709 | const char* image_type = [&] { | ||
| 710 | switch (image.GetType()) { | ||
| 711 | case Tegra::Shader::ImageType::Texture1D: | ||
| 712 | return "image1D"; | ||
| 713 | case Tegra::Shader::ImageType::TextureBuffer: | ||
| 714 | return "imageBuffer"; | ||
| 715 | case Tegra::Shader::ImageType::Texture1DArray: | ||
| 716 | return "image1DArray"; | ||
| 717 | case Tegra::Shader::ImageType::Texture2D: | ||
| 718 | return "image2D"; | ||
| 719 | case Tegra::Shader::ImageType::Texture2DArray: | ||
| 720 | return "image2DArray"; | ||
| 721 | case Tegra::Shader::ImageType::Texture3D: | ||
| 722 | return "image3D"; | ||
| 723 | default: | ||
| 724 | UNREACHABLE(); | ||
| 725 | return "image1D"; | ||
| 726 | } | ||
| 727 | }(); | ||
| 728 | |||
| 729 | const auto [type_prefix, format] = [&]() -> std::pair<const char*, const char*> { | ||
| 730 | if (!image.IsSizeKnown()) { | ||
| 731 | return {"", ""}; | ||
| 732 | } | ||
| 733 | switch (image.GetSize()) { | ||
| 734 | case Tegra::Shader::ImageAtomicSize::U32: | ||
| 735 | return {"u", "r32ui, "}; | ||
| 736 | case Tegra::Shader::ImageAtomicSize::S32: | ||
| 737 | return {"i", "r32i, "}; | ||
| 738 | default: | ||
| 739 | UNIMPLEMENTED_MSG("Unimplemented atomic size={}", | ||
| 740 | static_cast<u32>(image.GetSize())); | ||
| 741 | return {"", ""}; | ||
| 742 | } | ||
| 743 | }(); | ||
| 744 | |||
| 745 | std::string qualifier = "coherent volatile"; | ||
| 746 | if (image.IsRead() && !image.IsWritten()) { | ||
| 747 | qualifier += " readonly"; | ||
| 748 | } else if (image.IsWritten() && !image.IsRead()) { | ||
| 749 | qualifier += " writeonly"; | ||
| 750 | } | ||
| 751 | |||
| 752 | code.AddLine("layout (binding = IMAGE_BINDING_{}) {} uniform " | ||
| 753 | "{} {};", | ||
| 754 | image.GetIndex(), qualifier, image_type, GetImage(image)); | ||
| 755 | } | ||
| 756 | if (!images.empty()) { | ||
| 757 | code.AddNewLine(); | ||
| 758 | } | ||
| 759 | } | ||
| 760 | |||
| 519 | void VisitBlock(const NodeBlock& bb) { | 761 | void VisitBlock(const NodeBlock& bb) { |
| 520 | for (const auto& node : bb) { | 762 | for (const auto& node : bb) { |
| 521 | if (const std::string expr = Visit(node); !expr.empty()) { | 763 | Visit(node).CheckVoid(); |
| 522 | code.AddLine(expr); | ||
| 523 | } | ||
| 524 | } | 764 | } |
| 525 | } | 765 | } |
| 526 | 766 | ||
| 527 | std::string Visit(const Node& node) { | 767 | Expression Visit(const Node& node) { |
| 528 | if (const auto operation = std::get_if<OperationNode>(&*node)) { | 768 | if (const auto operation = std::get_if<OperationNode>(&*node)) { |
| 529 | const auto operation_index = static_cast<std::size_t>(operation->GetCode()); | 769 | const auto operation_index = static_cast<std::size_t>(operation->GetCode()); |
| 530 | if (operation_index >= operation_decompilers.size()) { | 770 | if (operation_index >= operation_decompilers.size()) { |
| @@ -542,18 +782,18 @@ private: | |||
| 542 | if (const auto gpr = std::get_if<GprNode>(&*node)) { | 782 | if (const auto gpr = std::get_if<GprNode>(&*node)) { |
| 543 | const u32 index = gpr->GetIndex(); | 783 | const u32 index = gpr->GetIndex(); |
| 544 | if (index == Register::ZeroIndex) { | 784 | if (index == Register::ZeroIndex) { |
| 545 | return "0"; | 785 | return {"0U", Type::Uint}; |
| 546 | } | 786 | } |
| 547 | return GetRegister(index); | 787 | return {GetRegister(index), Type::Float}; |
| 548 | } | 788 | } |
| 549 | 789 | ||
| 550 | if (const auto immediate = std::get_if<ImmediateNode>(&*node)) { | 790 | if (const auto immediate = std::get_if<ImmediateNode>(&*node)) { |
| 551 | const u32 value = immediate->GetValue(); | 791 | const u32 value = immediate->GetValue(); |
| 552 | if (value < 10) { | 792 | if (value < 10) { |
| 553 | // For eyecandy avoid using hex numbers on single digits | 793 | // For eyecandy avoid using hex numbers on single digits |
| 554 | return fmt::format("utof({}u)", immediate->GetValue()); | 794 | return {fmt::format("{}U", immediate->GetValue()), Type::Uint}; |
| 555 | } | 795 | } |
| 556 | return fmt::format("utof(0x{:x}u)", immediate->GetValue()); | 796 | return {fmt::format("0x{:X}U", immediate->GetValue()), Type::Uint}; |
| 557 | } | 797 | } |
| 558 | 798 | ||
| 559 | if (const auto predicate = std::get_if<PredicateNode>(&*node)) { | 799 | if (const auto predicate = std::get_if<PredicateNode>(&*node)) { |
| @@ -568,17 +808,18 @@ private: | |||
| 568 | } | 808 | } |
| 569 | }(); | 809 | }(); |
| 570 | if (predicate->IsNegated()) { | 810 | if (predicate->IsNegated()) { |
| 571 | return fmt::format("!({})", value); | 811 | return {fmt::format("!({})", value), Type::Bool}; |
| 572 | } | 812 | } |
| 573 | return value; | 813 | return {value, Type::Bool}; |
| 574 | } | 814 | } |
| 575 | 815 | ||
| 576 | if (const auto abuf = std::get_if<AbufNode>(&*node)) { | 816 | if (const auto abuf = std::get_if<AbufNode>(&*node)) { |
| 577 | UNIMPLEMENTED_IF_MSG(abuf->IsPhysicalBuffer() && stage == ShaderStage::Geometry, | 817 | UNIMPLEMENTED_IF_MSG(abuf->IsPhysicalBuffer() && stage == ProgramType::Geometry, |
| 578 | "Physical attributes in geometry shaders are not implemented"); | 818 | "Physical attributes in geometry shaders are not implemented"); |
| 579 | if (abuf->IsPhysicalBuffer()) { | 819 | if (abuf->IsPhysicalBuffer()) { |
| 580 | return fmt::format("readPhysicalAttribute(ftou({}))", | 820 | return {fmt::format("ReadPhysicalAttribute({})", |
| 581 | Visit(abuf->GetPhysicalAddress())); | 821 | Visit(abuf->GetPhysicalAddress()).AsUint()), |
| 822 | Type::Float}; | ||
| 582 | } | 823 | } |
| 583 | return ReadAttribute(abuf->GetIndex(), abuf->GetElement(), abuf->GetBuffer()); | 824 | return ReadAttribute(abuf->GetIndex(), abuf->GetElement(), abuf->GetBuffer()); |
| 584 | } | 825 | } |
| @@ -589,56 +830,64 @@ private: | |||
| 589 | // Direct access | 830 | // Direct access |
| 590 | const u32 offset_imm = immediate->GetValue(); | 831 | const u32 offset_imm = immediate->GetValue(); |
| 591 | ASSERT_MSG(offset_imm % 4 == 0, "Unaligned cbuf direct access"); | 832 | ASSERT_MSG(offset_imm % 4 == 0, "Unaligned cbuf direct access"); |
| 592 | return fmt::format("{}[{}][{}]", GetConstBuffer(cbuf->GetIndex()), | 833 | return {fmt::format("{}[{}][{}]", GetConstBuffer(cbuf->GetIndex()), |
| 593 | offset_imm / (4 * 4), (offset_imm / 4) % 4); | 834 | offset_imm / (4 * 4), (offset_imm / 4) % 4), |
| 835 | Type::Uint}; | ||
| 594 | } | 836 | } |
| 595 | 837 | ||
| 596 | if (std::holds_alternative<OperationNode>(*offset)) { | 838 | if (std::holds_alternative<OperationNode>(*offset)) { |
| 597 | // Indirect access | 839 | // Indirect access |
| 598 | const std::string final_offset = code.GenerateTemporary(); | 840 | const std::string final_offset = code.GenerateTemporary(); |
| 599 | code.AddLine("uint {} = ftou({}) >> 2;", final_offset, Visit(offset)); | 841 | code.AddLine("uint {} = {} >> 2;", final_offset, Visit(offset).AsUint()); |
| 600 | 842 | ||
| 601 | if (!device.HasComponentIndexingBug()) { | 843 | if (!device.HasComponentIndexingBug()) { |
| 602 | return fmt::format("{}[{} >> 2][{} & 3]", GetConstBuffer(cbuf->GetIndex()), | 844 | return {fmt::format("{}[{} >> 2][{} & 3]", GetConstBuffer(cbuf->GetIndex()), |
| 603 | final_offset, final_offset); | 845 | final_offset, final_offset), |
| 846 | Type::Uint}; | ||
| 604 | } | 847 | } |
| 605 | 848 | ||
| 606 | // AMD's proprietary GLSL compiler emits ill code for variable component access. | 849 | // AMD's proprietary GLSL compiler emits ill code for variable component access. |
| 607 | // To bypass this driver bug generate 4 ifs, one per each component. | 850 | // To bypass this driver bug generate 4 ifs, one per each component. |
| 608 | const std::string pack = code.GenerateTemporary(); | 851 | const std::string pack = code.GenerateTemporary(); |
| 609 | code.AddLine("vec4 {} = {}[{} >> 2];", pack, GetConstBuffer(cbuf->GetIndex()), | 852 | code.AddLine("uvec4 {} = {}[{} >> 2];", pack, GetConstBuffer(cbuf->GetIndex()), |
| 610 | final_offset); | 853 | final_offset); |
| 611 | 854 | ||
| 612 | const std::string result = code.GenerateTemporary(); | 855 | const std::string result = code.GenerateTemporary(); |
| 613 | code.AddLine("float {};", result); | 856 | code.AddLine("uint {};", result); |
| 614 | for (u32 swizzle = 0; swizzle < 4; ++swizzle) { | 857 | for (u32 swizzle = 0; swizzle < 4; ++swizzle) { |
| 615 | code.AddLine("if (({} & 3) == {}) {} = {}{};", final_offset, swizzle, result, | 858 | code.AddLine("if (({} & 3) == {}) {} = {}{};", final_offset, swizzle, result, |
| 616 | pack, GetSwizzle(swizzle)); | 859 | pack, GetSwizzle(swizzle)); |
| 617 | } | 860 | } |
| 618 | return result; | 861 | return {result, Type::Uint}; |
| 619 | } | 862 | } |
| 620 | 863 | ||
| 621 | UNREACHABLE_MSG("Unmanaged offset node type"); | 864 | UNREACHABLE_MSG("Unmanaged offset node type"); |
| 622 | } | 865 | } |
| 623 | 866 | ||
| 624 | if (const auto gmem = std::get_if<GmemNode>(&*node)) { | 867 | if (const auto gmem = std::get_if<GmemNode>(&*node)) { |
| 625 | const std::string real = Visit(gmem->GetRealAddress()); | 868 | const std::string real = Visit(gmem->GetRealAddress()).AsUint(); |
| 626 | const std::string base = Visit(gmem->GetBaseAddress()); | 869 | const std::string base = Visit(gmem->GetBaseAddress()).AsUint(); |
| 627 | const std::string final_offset = fmt::format("(ftou({}) - ftou({})) / 4", real, base); | 870 | const std::string final_offset = fmt::format("({} - {}) >> 2", real, base); |
| 628 | return fmt::format("{}[{}]", GetGlobalMemory(gmem->GetDescriptor()), final_offset); | 871 | return {fmt::format("{}[{}]", GetGlobalMemory(gmem->GetDescriptor()), final_offset), |
| 872 | Type::Uint}; | ||
| 629 | } | 873 | } |
| 630 | 874 | ||
| 631 | if (const auto lmem = std::get_if<LmemNode>(&*node)) { | 875 | if (const auto lmem = std::get_if<LmemNode>(&*node)) { |
| 632 | return fmt::format("{}[ftou({}) / 4]", GetLocalMemory(), Visit(lmem->GetAddress())); | 876 | if (stage == ProgramType::Compute) { |
| 877 | LOG_WARNING(Render_OpenGL, "Local memory is stubbed on compute shaders"); | ||
| 878 | } | ||
| 879 | return { | ||
| 880 | fmt::format("{}[{} >> 2]", GetLocalMemory(), Visit(lmem->GetAddress()).AsUint()), | ||
| 881 | Type::Uint}; | ||
| 633 | } | 882 | } |
| 634 | 883 | ||
| 635 | if (const auto internal_flag = std::get_if<InternalFlagNode>(&*node)) { | 884 | if (const auto internal_flag = std::get_if<InternalFlagNode>(&*node)) { |
| 636 | return GetInternalFlag(internal_flag->GetFlag()); | 885 | return {GetInternalFlag(internal_flag->GetFlag()), Type::Bool}; |
| 637 | } | 886 | } |
| 638 | 887 | ||
| 639 | if (const auto conditional = std::get_if<ConditionalNode>(&*node)) { | 888 | if (const auto conditional = std::get_if<ConditionalNode>(&*node)) { |
| 640 | // It's invalid to call conditional on nested nodes, use an operation instead | 889 | // It's invalid to call conditional on nested nodes, use an operation instead |
| 641 | code.AddLine("if ({}) {{", Visit(conditional->GetCondition())); | 890 | code.AddLine("if ({}) {{", Visit(conditional->GetCondition()).AsBool()); |
| 642 | ++code.scope; | 891 | ++code.scope; |
| 643 | 892 | ||
| 644 | VisitBlock(conditional->GetCode()); | 893 | VisitBlock(conditional->GetCode()); |
| @@ -649,20 +898,21 @@ private: | |||
| 649 | } | 898 | } |
| 650 | 899 | ||
| 651 | if (const auto comment = std::get_if<CommentNode>(&*node)) { | 900 | if (const auto comment = std::get_if<CommentNode>(&*node)) { |
| 652 | return "// " + comment->GetText(); | 901 | code.AddLine("// " + comment->GetText()); |
| 902 | return {}; | ||
| 653 | } | 903 | } |
| 654 | 904 | ||
| 655 | UNREACHABLE(); | 905 | UNREACHABLE(); |
| 656 | return {}; | 906 | return {}; |
| 657 | } | 907 | } |
| 658 | 908 | ||
| 659 | std::string ReadAttribute(Attribute::Index attribute, u32 element, const Node& buffer = {}) { | 909 | Expression ReadAttribute(Attribute::Index attribute, u32 element, const Node& buffer = {}) { |
| 660 | const auto GeometryPass = [&](std::string_view name) { | 910 | const auto GeometryPass = [&](std::string_view name) { |
| 661 | if (stage == ShaderStage::Geometry && buffer) { | 911 | if (stage == ProgramType::Geometry && buffer) { |
| 662 | // TODO(Rodrigo): Guard geometry inputs against out of bound reads. Some games | 912 | // TODO(Rodrigo): Guard geometry inputs against out of bound reads. Some games |
| 663 | // set an 0x80000000 index for those and the shader fails to build. Find out why | 913 | // set an 0x80000000 index for those and the shader fails to build. Find out why |
| 664 | // this happens and what's its intent. | 914 | // this happens and what's its intent. |
| 665 | return fmt::format("gs_{}[ftou({}) % MAX_VERTEX_INPUT]", name, Visit(buffer)); | 915 | return fmt::format("gs_{}[{} % MAX_VERTEX_INPUT]", name, Visit(buffer).AsUint()); |
| 666 | } | 916 | } |
| 667 | return std::string(name); | 917 | return std::string(name); |
| 668 | }; | 918 | }; |
| @@ -670,72 +920,79 @@ private: | |||
| 670 | switch (attribute) { | 920 | switch (attribute) { |
| 671 | case Attribute::Index::Position: | 921 | case Attribute::Index::Position: |
| 672 | switch (stage) { | 922 | switch (stage) { |
| 673 | case ShaderStage::Geometry: | 923 | case ProgramType::Geometry: |
| 674 | return fmt::format("gl_in[ftou({})].gl_Position{}", Visit(buffer), | 924 | return {fmt::format("gl_in[{}].gl_Position{}", Visit(buffer).AsUint(), |
| 675 | GetSwizzle(element)); | 925 | GetSwizzle(element)), |
| 676 | case ShaderStage::Fragment: | 926 | Type::Float}; |
| 677 | return element == 3 ? "1.0f" : ("gl_FragCoord"s + GetSwizzle(element)); | 927 | case ProgramType::Fragment: |
| 928 | return {element == 3 ? "1.0f" : ("gl_FragCoord"s + GetSwizzle(element)), | ||
| 929 | Type::Float}; | ||
| 678 | default: | 930 | default: |
| 679 | UNREACHABLE(); | 931 | UNREACHABLE(); |
| 680 | } | 932 | } |
| 681 | case Attribute::Index::PointCoord: | 933 | case Attribute::Index::PointCoord: |
| 682 | switch (element) { | 934 | switch (element) { |
| 683 | case 0: | 935 | case 0: |
| 684 | return "gl_PointCoord.x"; | 936 | return {"gl_PointCoord.x", Type::Float}; |
| 685 | case 1: | 937 | case 1: |
| 686 | return "gl_PointCoord.y"; | 938 | return {"gl_PointCoord.y", Type::Float}; |
| 687 | case 2: | 939 | case 2: |
| 688 | case 3: | 940 | case 3: |
| 689 | return "0"; | 941 | return {"0.0f", Type::Float}; |
| 690 | } | 942 | } |
| 691 | UNREACHABLE(); | 943 | UNREACHABLE(); |
| 692 | return "0"; | 944 | return {"0", Type::Int}; |
| 693 | case Attribute::Index::TessCoordInstanceIDVertexID: | 945 | case Attribute::Index::TessCoordInstanceIDVertexID: |
| 694 | // TODO(Subv): Find out what the values are for the first two elements when inside a | 946 | // TODO(Subv): Find out what the values are for the first two elements when inside a |
| 695 | // vertex shader, and what's the value of the fourth element when inside a Tess Eval | 947 | // vertex shader, and what's the value of the fourth element when inside a Tess Eval |
| 696 | // shader. | 948 | // shader. |
| 697 | ASSERT(stage == ShaderStage::Vertex); | 949 | ASSERT(IsVertexShader(stage)); |
| 698 | switch (element) { | 950 | switch (element) { |
| 699 | case 2: | 951 | case 2: |
| 700 | // Config pack's first value is instance_id. | 952 | // Config pack's first value is instance_id. |
| 701 | return "uintBitsToFloat(config_pack[0])"; | 953 | return {"config_pack[0]", Type::Uint}; |
| 702 | case 3: | 954 | case 3: |
| 703 | return "uintBitsToFloat(gl_VertexID)"; | 955 | return {"gl_VertexID", Type::Int}; |
| 704 | } | 956 | } |
| 705 | UNIMPLEMENTED_MSG("Unmanaged TessCoordInstanceIDVertexID element={}", element); | 957 | UNIMPLEMENTED_MSG("Unmanaged TessCoordInstanceIDVertexID element={}", element); |
| 706 | return "0"; | 958 | return {"0", Type::Int}; |
| 707 | case Attribute::Index::FrontFacing: | 959 | case Attribute::Index::FrontFacing: |
| 708 | // TODO(Subv): Find out what the values are for the other elements. | 960 | // TODO(Subv): Find out what the values are for the other elements. |
| 709 | ASSERT(stage == ShaderStage::Fragment); | 961 | ASSERT(stage == ProgramType::Fragment); |
| 710 | switch (element) { | 962 | switch (element) { |
| 711 | case 3: | 963 | case 3: |
| 712 | return "itof(gl_FrontFacing ? -1 : 0)"; | 964 | return {"(gl_FrontFacing ? -1 : 0)", Type::Int}; |
| 713 | } | 965 | } |
| 714 | UNIMPLEMENTED_MSG("Unmanaged FrontFacing element={}", element); | 966 | UNIMPLEMENTED_MSG("Unmanaged FrontFacing element={}", element); |
| 715 | return "0"; | 967 | return {"0", Type::Int}; |
| 716 | default: | 968 | default: |
| 717 | if (IsGenericAttribute(attribute)) { | 969 | if (IsGenericAttribute(attribute)) { |
| 718 | return GeometryPass(GetInputAttribute(attribute)) + GetSwizzle(element); | 970 | return {GeometryPass(GetInputAttribute(attribute)) + GetSwizzle(element), |
| 971 | Type::Float}; | ||
| 719 | } | 972 | } |
| 720 | break; | 973 | break; |
| 721 | } | 974 | } |
| 722 | UNIMPLEMENTED_MSG("Unhandled input attribute: {}", static_cast<u32>(attribute)); | 975 | UNIMPLEMENTED_MSG("Unhandled input attribute: {}", static_cast<u32>(attribute)); |
| 723 | return "0"; | 976 | return {"0", Type::Int}; |
| 724 | } | 977 | } |
| 725 | 978 | ||
| 726 | std::string ApplyPrecise(Operation operation, const std::string& value) { | 979 | Expression ApplyPrecise(Operation operation, std::string value, Type type) { |
| 727 | if (!IsPrecise(operation)) { | 980 | if (!IsPrecise(operation)) { |
| 728 | return value; | 981 | return {std::move(value), type}; |
| 729 | } | 982 | } |
| 730 | // There's a bug in NVidia's proprietary drivers that makes precise fail on fragment shaders | 983 | // Old Nvidia drivers have a bug with precise and texture sampling. These are more likely to |
| 731 | const std::string precise = stage != ShaderStage::Fragment ? "precise " : ""; | 984 | // be found in fragment shaders, so we disable precise there. There are vertex shaders that |
| 985 | // also fail to build but nobody seems to care about those. | ||
| 986 | // Note: Only bugged drivers will skip precise. | ||
| 987 | const bool disable_precise = device.HasPreciseBug() && stage == ProgramType::Fragment; | ||
| 732 | 988 | ||
| 733 | const std::string temporary = code.GenerateTemporary(); | 989 | std::string temporary = code.GenerateTemporary(); |
| 734 | code.AddLine("{}float {} = {};", precise, temporary, value); | 990 | code.AddLine("{}{} {} = {};", disable_precise ? "" : "precise ", GetTypeString(type), |
| 735 | return temporary; | 991 | temporary, value); |
| 992 | return {std::move(temporary), type}; | ||
| 736 | } | 993 | } |
| 737 | 994 | ||
| 738 | std::string VisitOperand(Operation operation, std::size_t operand_index) { | 995 | Expression VisitOperand(Operation operation, std::size_t operand_index) { |
| 739 | const auto& operand = operation[operand_index]; | 996 | const auto& operand = operation[operand_index]; |
| 740 | const bool parent_precise = IsPrecise(operation); | 997 | const bool parent_precise = IsPrecise(operation); |
| 741 | const bool child_precise = IsPrecise(operand); | 998 | const bool child_precise = IsPrecise(operand); |
| @@ -744,102 +1001,98 @@ private: | |||
| 744 | return Visit(operand); | 1001 | return Visit(operand); |
| 745 | } | 1002 | } |
| 746 | 1003 | ||
| 747 | const std::string temporary = code.GenerateTemporary(); | 1004 | Expression value = Visit(operand); |
| 748 | code.AddLine("float {} = {};", temporary, Visit(operand)); | 1005 | std::string temporary = code.GenerateTemporary(); |
| 749 | return temporary; | 1006 | code.AddLine("{} {} = {};", GetTypeString(value.GetType()), temporary, value.GetCode()); |
| 1007 | return {std::move(temporary), value.GetType()}; | ||
| 750 | } | 1008 | } |
| 751 | 1009 | ||
| 752 | std::string VisitOperand(Operation operation, std::size_t operand_index, Type type) { | 1010 | Expression GetOutputAttribute(const AbufNode* abuf) { |
| 753 | return CastOperand(VisitOperand(operation, operand_index), type); | 1011 | switch (const auto attribute = abuf->GetIndex()) { |
| 754 | } | 1012 | case Attribute::Index::Position: |
| 755 | 1013 | return {"gl_Position"s + GetSwizzle(abuf->GetElement()), Type::Float}; | |
| 756 | std::string CastOperand(const std::string& value, Type type) const { | 1014 | case Attribute::Index::LayerViewportPointSize: |
| 757 | switch (type) { | 1015 | switch (abuf->GetElement()) { |
| 758 | case Type::Bool: | 1016 | case 0: |
| 759 | case Type::Bool2: | 1017 | UNIMPLEMENTED(); |
| 760 | case Type::Float: | 1018 | return {}; |
| 761 | return value; | 1019 | case 1: |
| 762 | case Type::Int: | 1020 | if (IsVertexShader(stage) && !device.HasVertexViewportLayer()) { |
| 763 | return fmt::format("ftoi({})", value); | 1021 | return {}; |
| 764 | case Type::Uint: | 1022 | } |
| 765 | return fmt::format("ftou({})", value); | 1023 | return {"gl_Layer", Type::Int}; |
| 766 | case Type::HalfFloat: | 1024 | case 2: |
| 767 | return fmt::format("toHalf2({})", value); | 1025 | if (IsVertexShader(stage) && !device.HasVertexViewportLayer()) { |
| 768 | } | 1026 | return {}; |
| 769 | UNREACHABLE(); | 1027 | } |
| 770 | return value; | 1028 | return {"gl_ViewportIndex", Type::Int}; |
| 771 | } | 1029 | case 3: |
| 772 | 1030 | UNIMPLEMENTED_MSG("Requires some state changes for gl_PointSize to work in shader"); | |
| 773 | std::string BitwiseCastResult(const std::string& value, Type type, | 1031 | return {"gl_PointSize", Type::Float}; |
| 774 | bool needs_parenthesis = false) { | ||
| 775 | switch (type) { | ||
| 776 | case Type::Bool: | ||
| 777 | case Type::Bool2: | ||
| 778 | case Type::Float: | ||
| 779 | if (needs_parenthesis) { | ||
| 780 | return fmt::format("({})", value); | ||
| 781 | } | 1032 | } |
| 782 | return value; | 1033 | return {}; |
| 783 | case Type::Int: | 1034 | case Attribute::Index::ClipDistances0123: |
| 784 | return fmt::format("itof({})", value); | 1035 | return {fmt::format("gl_ClipDistance[{}]", abuf->GetElement()), Type::Float}; |
| 785 | case Type::Uint: | 1036 | case Attribute::Index::ClipDistances4567: |
| 786 | return fmt::format("utof({})", value); | 1037 | return {fmt::format("gl_ClipDistance[{}]", abuf->GetElement() + 4), Type::Float}; |
| 787 | case Type::HalfFloat: | 1038 | default: |
| 788 | return fmt::format("fromHalf2({})", value); | 1039 | if (IsGenericAttribute(attribute)) { |
| 1040 | return {GetOutputAttribute(attribute) + GetSwizzle(abuf->GetElement()), | ||
| 1041 | Type::Float}; | ||
| 1042 | } | ||
| 1043 | UNIMPLEMENTED_MSG("Unhandled output attribute: {}", static_cast<u32>(attribute)); | ||
| 1044 | return {}; | ||
| 789 | } | 1045 | } |
| 790 | UNREACHABLE(); | ||
| 791 | return value; | ||
| 792 | } | 1046 | } |
| 793 | 1047 | ||
| 794 | std::string GenerateUnary(Operation operation, const std::string& func, Type result_type, | 1048 | Expression GenerateUnary(Operation operation, std::string_view func, Type result_type, |
| 795 | Type type_a, bool needs_parenthesis = true) { | 1049 | Type type_a) { |
| 796 | const std::string op_str = fmt::format("{}({})", func, VisitOperand(operation, 0, type_a)); | 1050 | std::string op_str = fmt::format("{}({})", func, VisitOperand(operation, 0).As(type_a)); |
| 797 | 1051 | return ApplyPrecise(operation, std::move(op_str), result_type); | |
| 798 | return ApplyPrecise(operation, BitwiseCastResult(op_str, result_type, needs_parenthesis)); | ||
| 799 | } | 1052 | } |
| 800 | 1053 | ||
| 801 | std::string GenerateBinaryInfix(Operation operation, const std::string& func, Type result_type, | 1054 | Expression GenerateBinaryInfix(Operation operation, std::string_view func, Type result_type, |
| 802 | Type type_a, Type type_b) { | 1055 | Type type_a, Type type_b) { |
| 803 | const std::string op_a = VisitOperand(operation, 0, type_a); | 1056 | const std::string op_a = VisitOperand(operation, 0).As(type_a); |
| 804 | const std::string op_b = VisitOperand(operation, 1, type_b); | 1057 | const std::string op_b = VisitOperand(operation, 1).As(type_b); |
| 805 | const std::string op_str = fmt::format("({} {} {})", op_a, func, op_b); | 1058 | std::string op_str = fmt::format("({} {} {})", op_a, func, op_b); |
| 806 | 1059 | ||
| 807 | return ApplyPrecise(operation, BitwiseCastResult(op_str, result_type)); | 1060 | return ApplyPrecise(operation, std::move(op_str), result_type); |
| 808 | } | 1061 | } |
| 809 | 1062 | ||
| 810 | std::string GenerateBinaryCall(Operation operation, const std::string& func, Type result_type, | 1063 | Expression GenerateBinaryCall(Operation operation, std::string_view func, Type result_type, |
| 811 | Type type_a, Type type_b) { | 1064 | Type type_a, Type type_b) { |
| 812 | const std::string op_a = VisitOperand(operation, 0, type_a); | 1065 | const std::string op_a = VisitOperand(operation, 0).As(type_a); |
| 813 | const std::string op_b = VisitOperand(operation, 1, type_b); | 1066 | const std::string op_b = VisitOperand(operation, 1).As(type_b); |
| 814 | const std::string op_str = fmt::format("{}({}, {})", func, op_a, op_b); | 1067 | std::string op_str = fmt::format("{}({}, {})", func, op_a, op_b); |
| 815 | 1068 | ||
| 816 | return ApplyPrecise(operation, BitwiseCastResult(op_str, result_type)); | 1069 | return ApplyPrecise(operation, std::move(op_str), result_type); |
| 817 | } | 1070 | } |
| 818 | 1071 | ||
| 819 | std::string GenerateTernary(Operation operation, const std::string& func, Type result_type, | 1072 | Expression GenerateTernary(Operation operation, std::string_view func, Type result_type, |
| 820 | Type type_a, Type type_b, Type type_c) { | 1073 | Type type_a, Type type_b, Type type_c) { |
| 821 | const std::string op_a = VisitOperand(operation, 0, type_a); | 1074 | const std::string op_a = VisitOperand(operation, 0).As(type_a); |
| 822 | const std::string op_b = VisitOperand(operation, 1, type_b); | 1075 | const std::string op_b = VisitOperand(operation, 1).As(type_b); |
| 823 | const std::string op_c = VisitOperand(operation, 2, type_c); | 1076 | const std::string op_c = VisitOperand(operation, 2).As(type_c); |
| 824 | const std::string op_str = fmt::format("{}({}, {}, {})", func, op_a, op_b, op_c); | 1077 | std::string op_str = fmt::format("{}({}, {}, {})", func, op_a, op_b, op_c); |
| 825 | 1078 | ||
| 826 | return ApplyPrecise(operation, BitwiseCastResult(op_str, result_type)); | 1079 | return ApplyPrecise(operation, std::move(op_str), result_type); |
| 827 | } | 1080 | } |
| 828 | 1081 | ||
| 829 | std::string GenerateQuaternary(Operation operation, const std::string& func, Type result_type, | 1082 | Expression GenerateQuaternary(Operation operation, const std::string& func, Type result_type, |
| 830 | Type type_a, Type type_b, Type type_c, Type type_d) { | 1083 | Type type_a, Type type_b, Type type_c, Type type_d) { |
| 831 | const std::string op_a = VisitOperand(operation, 0, type_a); | 1084 | const std::string op_a = VisitOperand(operation, 0).As(type_a); |
| 832 | const std::string op_b = VisitOperand(operation, 1, type_b); | 1085 | const std::string op_b = VisitOperand(operation, 1).As(type_b); |
| 833 | const std::string op_c = VisitOperand(operation, 2, type_c); | 1086 | const std::string op_c = VisitOperand(operation, 2).As(type_c); |
| 834 | const std::string op_d = VisitOperand(operation, 3, type_d); | 1087 | const std::string op_d = VisitOperand(operation, 3).As(type_d); |
| 835 | const std::string op_str = fmt::format("{}({}, {}, {}, {})", func, op_a, op_b, op_c, op_d); | 1088 | std::string op_str = fmt::format("{}({}, {}, {}, {})", func, op_a, op_b, op_c, op_d); |
| 836 | 1089 | ||
| 837 | return ApplyPrecise(operation, BitwiseCastResult(op_str, result_type)); | 1090 | return ApplyPrecise(operation, std::move(op_str), result_type); |
| 838 | } | 1091 | } |
| 839 | 1092 | ||
| 840 | std::string GenerateTexture(Operation operation, const std::string& function_suffix, | 1093 | std::string GenerateTexture(Operation operation, const std::string& function_suffix, |
| 841 | const std::vector<TextureIR>& extras) { | 1094 | const std::vector<TextureIR>& extras) { |
| 842 | constexpr std::array<const char*, 4> coord_constructors = {"float", "vec2", "vec3", "vec4"}; | 1095 | constexpr std::array coord_constructors = {"float", "vec2", "vec3", "vec4"}; |
| 843 | 1096 | ||
| 844 | const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); | 1097 | const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); |
| 845 | ASSERT(meta); | 1098 | ASSERT(meta); |
| @@ -856,17 +1109,17 @@ private: | |||
| 856 | expr += coord_constructors.at(count + (has_array ? 1 : 0) + (has_shadow ? 1 : 0) - 1); | 1109 | expr += coord_constructors.at(count + (has_array ? 1 : 0) + (has_shadow ? 1 : 0) - 1); |
| 857 | expr += '('; | 1110 | expr += '('; |
| 858 | for (std::size_t i = 0; i < count; ++i) { | 1111 | for (std::size_t i = 0; i < count; ++i) { |
| 859 | expr += Visit(operation[i]); | 1112 | expr += Visit(operation[i]).AsFloat(); |
| 860 | 1113 | ||
| 861 | const std::size_t next = i + 1; | 1114 | const std::size_t next = i + 1; |
| 862 | if (next < count) | 1115 | if (next < count) |
| 863 | expr += ", "; | 1116 | expr += ", "; |
| 864 | } | 1117 | } |
| 865 | if (has_array) { | 1118 | if (has_array) { |
| 866 | expr += ", float(ftoi(" + Visit(meta->array) + "))"; | 1119 | expr += ", float(" + Visit(meta->array).AsInt() + ')'; |
| 867 | } | 1120 | } |
| 868 | if (has_shadow) { | 1121 | if (has_shadow) { |
| 869 | expr += ", " + Visit(meta->depth_compare); | 1122 | expr += ", " + Visit(meta->depth_compare).AsFloat(); |
| 870 | } | 1123 | } |
| 871 | expr += ')'; | 1124 | expr += ')'; |
| 872 | 1125 | ||
| @@ -897,11 +1150,11 @@ private: | |||
| 897 | // required to be constant) | 1150 | // required to be constant) |
| 898 | expr += std::to_string(static_cast<s32>(immediate->GetValue())); | 1151 | expr += std::to_string(static_cast<s32>(immediate->GetValue())); |
| 899 | } else { | 1152 | } else { |
| 900 | expr += fmt::format("ftoi({})", Visit(operand)); | 1153 | expr += Visit(operand).AsInt(); |
| 901 | } | 1154 | } |
| 902 | break; | 1155 | break; |
| 903 | case Type::Float: | 1156 | case Type::Float: |
| 904 | expr += Visit(operand); | 1157 | expr += Visit(operand).AsFloat(); |
| 905 | break; | 1158 | break; |
| 906 | default: { | 1159 | default: { |
| 907 | const auto type_int = static_cast<u32>(type); | 1160 | const auto type_int = static_cast<u32>(type); |
| @@ -917,7 +1170,7 @@ private: | |||
| 917 | if (aoffi.empty()) { | 1170 | if (aoffi.empty()) { |
| 918 | return {}; | 1171 | return {}; |
| 919 | } | 1172 | } |
| 920 | constexpr std::array<const char*, 3> coord_constructors = {"int", "ivec2", "ivec3"}; | 1173 | constexpr std::array coord_constructors = {"int", "ivec2", "ivec3"}; |
| 921 | std::string expr = ", "; | 1174 | std::string expr = ", "; |
| 922 | expr += coord_constructors.at(aoffi.size() - 1); | 1175 | expr += coord_constructors.at(aoffi.size() - 1); |
| 923 | expr += '('; | 1176 | expr += '('; |
| @@ -930,7 +1183,7 @@ private: | |||
| 930 | expr += std::to_string(static_cast<s32>(immediate->GetValue())); | 1183 | expr += std::to_string(static_cast<s32>(immediate->GetValue())); |
| 931 | } else if (device.HasVariableAoffi()) { | 1184 | } else if (device.HasVariableAoffi()) { |
| 932 | // Avoid using variable AOFFI on unsupported devices. | 1185 | // Avoid using variable AOFFI on unsupported devices. |
| 933 | expr += fmt::format("ftoi({})", Visit(operand)); | 1186 | expr += Visit(operand).AsInt(); |
| 934 | } else { | 1187 | } else { |
| 935 | // Insert 0 on devices not supporting variable AOFFI. | 1188 | // Insert 0 on devices not supporting variable AOFFI. |
| 936 | expr += '0'; | 1189 | expr += '0'; |
| @@ -944,318 +1197,382 @@ private: | |||
| 944 | return expr; | 1197 | return expr; |
| 945 | } | 1198 | } |
| 946 | 1199 | ||
| 947 | std::string Assign(Operation operation) { | 1200 | std::string BuildIntegerCoordinates(Operation operation) { |
| 1201 | constexpr std::array constructors{"int(", "ivec2(", "ivec3(", "ivec4("}; | ||
| 1202 | const std::size_t coords_count{operation.GetOperandsCount()}; | ||
| 1203 | std::string expr = constructors.at(coords_count - 1); | ||
| 1204 | for (std::size_t i = 0; i < coords_count; ++i) { | ||
| 1205 | expr += VisitOperand(operation, i).AsInt(); | ||
| 1206 | if (i + 1 < coords_count) { | ||
| 1207 | expr += ", "; | ||
| 1208 | } | ||
| 1209 | } | ||
| 1210 | expr += ')'; | ||
| 1211 | return expr; | ||
| 1212 | } | ||
| 1213 | |||
| 1214 | std::string BuildImageValues(Operation operation) { | ||
| 1215 | const auto meta{std::get<MetaImage>(operation.GetMeta())}; | ||
| 1216 | const auto [constructors, type] = [&]() -> std::pair<std::array<const char*, 4>, Type> { | ||
| 1217 | constexpr std::array float_constructors{"float", "vec2", "vec3", "vec4"}; | ||
| 1218 | if (!meta.image.IsSizeKnown()) { | ||
| 1219 | return {float_constructors, Type::Float}; | ||
| 1220 | } | ||
| 1221 | switch (meta.image.GetSize()) { | ||
| 1222 | case Tegra::Shader::ImageAtomicSize::U32: | ||
| 1223 | return {{"uint", "uvec2", "uvec3", "uvec4"}, Type::Uint}; | ||
| 1224 | case Tegra::Shader::ImageAtomicSize::S32: | ||
| 1225 | return {{"int", "ivec2", "ivec3", "ivec4"}, Type::Uint}; | ||
| 1226 | default: | ||
| 1227 | UNIMPLEMENTED_MSG("Unimplemented image size={}", | ||
| 1228 | static_cast<u32>(meta.image.GetSize())); | ||
| 1229 | return {float_constructors, Type::Float}; | ||
| 1230 | } | ||
| 1231 | }(); | ||
| 1232 | |||
| 1233 | const std::size_t values_count{meta.values.size()}; | ||
| 1234 | std::string expr = fmt::format("{}(", constructors.at(values_count - 1)); | ||
| 1235 | for (std::size_t i = 0; i < values_count; ++i) { | ||
| 1236 | expr += Visit(meta.values.at(i)).As(type); | ||
| 1237 | if (i + 1 < values_count) { | ||
| 1238 | expr += ", "; | ||
| 1239 | } | ||
| 1240 | } | ||
| 1241 | expr += ')'; | ||
| 1242 | return expr; | ||
| 1243 | } | ||
| 1244 | |||
| 1245 | Expression AtomicImage(Operation operation, const char* opname) { | ||
| 1246 | constexpr std::array constructors{"int(", "ivec2(", "ivec3(", "ivec4("}; | ||
| 1247 | const auto meta{std::get<MetaImage>(operation.GetMeta())}; | ||
| 1248 | ASSERT(meta.values.size() == 1); | ||
| 1249 | ASSERT(meta.image.IsSizeKnown()); | ||
| 1250 | |||
| 1251 | const auto type = [&]() { | ||
| 1252 | switch (const auto size = meta.image.GetSize()) { | ||
| 1253 | case Tegra::Shader::ImageAtomicSize::U32: | ||
| 1254 | return Type::Uint; | ||
| 1255 | case Tegra::Shader::ImageAtomicSize::S32: | ||
| 1256 | return Type::Int; | ||
| 1257 | default: | ||
| 1258 | UNIMPLEMENTED_MSG("Unimplemented image size={}", static_cast<u32>(size)); | ||
| 1259 | return Type::Uint; | ||
| 1260 | } | ||
| 1261 | }(); | ||
| 1262 | |||
| 1263 | return {fmt::format("{}({}, {}, {})", opname, GetImage(meta.image), | ||
| 1264 | BuildIntegerCoordinates(operation), Visit(meta.values[0]).As(type)), | ||
| 1265 | type}; | ||
| 1266 | } | ||
| 1267 | |||
| 1268 | Expression Assign(Operation operation) { | ||
| 948 | const Node& dest = operation[0]; | 1269 | const Node& dest = operation[0]; |
| 949 | const Node& src = operation[1]; | 1270 | const Node& src = operation[1]; |
| 950 | 1271 | ||
| 951 | std::string target; | 1272 | Expression target; |
| 952 | if (const auto gpr = std::get_if<GprNode>(&*dest)) { | 1273 | if (const auto gpr = std::get_if<GprNode>(&*dest)) { |
| 953 | if (gpr->GetIndex() == Register::ZeroIndex) { | 1274 | if (gpr->GetIndex() == Register::ZeroIndex) { |
| 954 | // Writing to Register::ZeroIndex is a no op | 1275 | // Writing to Register::ZeroIndex is a no op |
| 955 | return {}; | 1276 | return {}; |
| 956 | } | 1277 | } |
| 957 | target = GetRegister(gpr->GetIndex()); | 1278 | target = {GetRegister(gpr->GetIndex()), Type::Float}; |
| 958 | } else if (const auto abuf = std::get_if<AbufNode>(&*dest)) { | 1279 | } else if (const auto abuf = std::get_if<AbufNode>(&*dest)) { |
| 959 | UNIMPLEMENTED_IF(abuf->IsPhysicalBuffer()); | 1280 | UNIMPLEMENTED_IF(abuf->IsPhysicalBuffer()); |
| 960 | 1281 | target = GetOutputAttribute(abuf); | |
| 961 | target = [&]() -> std::string { | ||
| 962 | switch (const auto attribute = abuf->GetIndex(); abuf->GetIndex()) { | ||
| 963 | case Attribute::Index::Position: | ||
| 964 | return "gl_Position"s + GetSwizzle(abuf->GetElement()); | ||
| 965 | case Attribute::Index::PointSize: | ||
| 966 | return "gl_PointSize"; | ||
| 967 | case Attribute::Index::ClipDistances0123: | ||
| 968 | return fmt::format("gl_ClipDistance[{}]", abuf->GetElement()); | ||
| 969 | case Attribute::Index::ClipDistances4567: | ||
| 970 | return fmt::format("gl_ClipDistance[{}]", abuf->GetElement() + 4); | ||
| 971 | default: | ||
| 972 | if (IsGenericAttribute(attribute)) { | ||
| 973 | return GetOutputAttribute(attribute) + GetSwizzle(abuf->GetElement()); | ||
| 974 | } | ||
| 975 | UNIMPLEMENTED_MSG("Unhandled output attribute: {}", | ||
| 976 | static_cast<u32>(attribute)); | ||
| 977 | return "0"; | ||
| 978 | } | ||
| 979 | }(); | ||
| 980 | } else if (const auto lmem = std::get_if<LmemNode>(&*dest)) { | 1282 | } else if (const auto lmem = std::get_if<LmemNode>(&*dest)) { |
| 981 | target = fmt::format("{}[ftou({}) / 4]", GetLocalMemory(), Visit(lmem->GetAddress())); | 1283 | if (stage == ProgramType::Compute) { |
| 1284 | LOG_WARNING(Render_OpenGL, "Local memory is stubbed on compute shaders"); | ||
| 1285 | } | ||
| 1286 | target = { | ||
| 1287 | fmt::format("{}[{} >> 2]", GetLocalMemory(), Visit(lmem->GetAddress()).AsUint()), | ||
| 1288 | Type::Uint}; | ||
| 982 | } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) { | 1289 | } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) { |
| 983 | const std::string real = Visit(gmem->GetRealAddress()); | 1290 | const std::string real = Visit(gmem->GetRealAddress()).AsUint(); |
| 984 | const std::string base = Visit(gmem->GetBaseAddress()); | 1291 | const std::string base = Visit(gmem->GetBaseAddress()).AsUint(); |
| 985 | const std::string final_offset = fmt::format("(ftou({}) - ftou({})) / 4", real, base); | 1292 | const std::string final_offset = fmt::format("({} - {}) >> 2", real, base); |
| 986 | target = fmt::format("{}[{}]", GetGlobalMemory(gmem->GetDescriptor()), final_offset); | 1293 | target = {fmt::format("{}[{}]", GetGlobalMemory(gmem->GetDescriptor()), final_offset), |
| 1294 | Type::Uint}; | ||
| 987 | } else { | 1295 | } else { |
| 988 | UNREACHABLE_MSG("Assign called without a proper target"); | 1296 | UNREACHABLE_MSG("Assign called without a proper target"); |
| 989 | } | 1297 | } |
| 990 | 1298 | ||
| 991 | code.AddLine("{} = {};", target, Visit(src)); | 1299 | code.AddLine("{} = {};", target.GetCode(), Visit(src).As(target.GetType())); |
| 992 | return {}; | 1300 | return {}; |
| 993 | } | 1301 | } |
| 994 | 1302 | ||
| 995 | template <Type type> | 1303 | template <Type type> |
| 996 | std::string Add(Operation operation) { | 1304 | Expression Add(Operation operation) { |
| 997 | return GenerateBinaryInfix(operation, "+", type, type, type); | 1305 | return GenerateBinaryInfix(operation, "+", type, type, type); |
| 998 | } | 1306 | } |
| 999 | 1307 | ||
| 1000 | template <Type type> | 1308 | template <Type type> |
| 1001 | std::string Mul(Operation operation) { | 1309 | Expression Mul(Operation operation) { |
| 1002 | return GenerateBinaryInfix(operation, "*", type, type, type); | 1310 | return GenerateBinaryInfix(operation, "*", type, type, type); |
| 1003 | } | 1311 | } |
| 1004 | 1312 | ||
| 1005 | template <Type type> | 1313 | template <Type type> |
| 1006 | std::string Div(Operation operation) { | 1314 | Expression Div(Operation operation) { |
| 1007 | return GenerateBinaryInfix(operation, "/", type, type, type); | 1315 | return GenerateBinaryInfix(operation, "/", type, type, type); |
| 1008 | } | 1316 | } |
| 1009 | 1317 | ||
| 1010 | template <Type type> | 1318 | template <Type type> |
| 1011 | std::string Fma(Operation operation) { | 1319 | Expression Fma(Operation operation) { |
| 1012 | return GenerateTernary(operation, "fma", type, type, type, type); | 1320 | return GenerateTernary(operation, "fma", type, type, type, type); |
| 1013 | } | 1321 | } |
| 1014 | 1322 | ||
| 1015 | template <Type type> | 1323 | template <Type type> |
| 1016 | std::string Negate(Operation operation) { | 1324 | Expression Negate(Operation operation) { |
| 1017 | return GenerateUnary(operation, "-", type, type, true); | 1325 | return GenerateUnary(operation, "-", type, type); |
| 1018 | } | 1326 | } |
| 1019 | 1327 | ||
| 1020 | template <Type type> | 1328 | template <Type type> |
| 1021 | std::string Absolute(Operation operation) { | 1329 | Expression Absolute(Operation operation) { |
| 1022 | return GenerateUnary(operation, "abs", type, type, false); | 1330 | return GenerateUnary(operation, "abs", type, type); |
| 1023 | } | 1331 | } |
| 1024 | 1332 | ||
| 1025 | std::string FClamp(Operation operation) { | 1333 | Expression FClamp(Operation operation) { |
| 1026 | return GenerateTernary(operation, "clamp", Type::Float, Type::Float, Type::Float, | 1334 | return GenerateTernary(operation, "clamp", Type::Float, Type::Float, Type::Float, |
| 1027 | Type::Float); | 1335 | Type::Float); |
| 1028 | } | 1336 | } |
| 1029 | 1337 | ||
| 1338 | Expression FCastHalf0(Operation operation) { | ||
| 1339 | return {fmt::format("({})[0]", VisitOperand(operation, 0).AsHalfFloat()), Type::Float}; | ||
| 1340 | } | ||
| 1341 | |||
| 1342 | Expression FCastHalf1(Operation operation) { | ||
| 1343 | return {fmt::format("({})[1]", VisitOperand(operation, 0).AsHalfFloat()), Type::Float}; | ||
| 1344 | } | ||
| 1345 | |||
| 1030 | template <Type type> | 1346 | template <Type type> |
| 1031 | std::string Min(Operation operation) { | 1347 | Expression Min(Operation operation) { |
| 1032 | return GenerateBinaryCall(operation, "min", type, type, type); | 1348 | return GenerateBinaryCall(operation, "min", type, type, type); |
| 1033 | } | 1349 | } |
| 1034 | 1350 | ||
| 1035 | template <Type type> | 1351 | template <Type type> |
| 1036 | std::string Max(Operation operation) { | 1352 | Expression Max(Operation operation) { |
| 1037 | return GenerateBinaryCall(operation, "max", type, type, type); | 1353 | return GenerateBinaryCall(operation, "max", type, type, type); |
| 1038 | } | 1354 | } |
| 1039 | 1355 | ||
| 1040 | std::string Select(Operation operation) { | 1356 | Expression Select(Operation operation) { |
| 1041 | const std::string condition = Visit(operation[0]); | 1357 | const std::string condition = Visit(operation[0]).AsBool(); |
| 1042 | const std::string true_case = Visit(operation[1]); | 1358 | const std::string true_case = Visit(operation[1]).AsUint(); |
| 1043 | const std::string false_case = Visit(operation[2]); | 1359 | const std::string false_case = Visit(operation[2]).AsUint(); |
| 1044 | const std::string op_str = fmt::format("({} ? {} : {})", condition, true_case, false_case); | 1360 | std::string op_str = fmt::format("({} ? {} : {})", condition, true_case, false_case); |
| 1045 | 1361 | ||
| 1046 | return ApplyPrecise(operation, op_str); | 1362 | return ApplyPrecise(operation, std::move(op_str), Type::Uint); |
| 1047 | } | 1363 | } |
| 1048 | 1364 | ||
| 1049 | std::string FCos(Operation operation) { | 1365 | Expression FCos(Operation operation) { |
| 1050 | return GenerateUnary(operation, "cos", Type::Float, Type::Float, false); | 1366 | return GenerateUnary(operation, "cos", Type::Float, Type::Float); |
| 1051 | } | 1367 | } |
| 1052 | 1368 | ||
| 1053 | std::string FSin(Operation operation) { | 1369 | Expression FSin(Operation operation) { |
| 1054 | return GenerateUnary(operation, "sin", Type::Float, Type::Float, false); | 1370 | return GenerateUnary(operation, "sin", Type::Float, Type::Float); |
| 1055 | } | 1371 | } |
| 1056 | 1372 | ||
| 1057 | std::string FExp2(Operation operation) { | 1373 | Expression FExp2(Operation operation) { |
| 1058 | return GenerateUnary(operation, "exp2", Type::Float, Type::Float, false); | 1374 | return GenerateUnary(operation, "exp2", Type::Float, Type::Float); |
| 1059 | } | 1375 | } |
| 1060 | 1376 | ||
| 1061 | std::string FLog2(Operation operation) { | 1377 | Expression FLog2(Operation operation) { |
| 1062 | return GenerateUnary(operation, "log2", Type::Float, Type::Float, false); | 1378 | return GenerateUnary(operation, "log2", Type::Float, Type::Float); |
| 1063 | } | 1379 | } |
| 1064 | 1380 | ||
| 1065 | std::string FInverseSqrt(Operation operation) { | 1381 | Expression FInverseSqrt(Operation operation) { |
| 1066 | return GenerateUnary(operation, "inversesqrt", Type::Float, Type::Float, false); | 1382 | return GenerateUnary(operation, "inversesqrt", Type::Float, Type::Float); |
| 1067 | } | 1383 | } |
| 1068 | 1384 | ||
| 1069 | std::string FSqrt(Operation operation) { | 1385 | Expression FSqrt(Operation operation) { |
| 1070 | return GenerateUnary(operation, "sqrt", Type::Float, Type::Float, false); | 1386 | return GenerateUnary(operation, "sqrt", Type::Float, Type::Float); |
| 1071 | } | 1387 | } |
| 1072 | 1388 | ||
| 1073 | std::string FRoundEven(Operation operation) { | 1389 | Expression FRoundEven(Operation operation) { |
| 1074 | return GenerateUnary(operation, "roundEven", Type::Float, Type::Float, false); | 1390 | return GenerateUnary(operation, "roundEven", Type::Float, Type::Float); |
| 1075 | } | 1391 | } |
| 1076 | 1392 | ||
| 1077 | std::string FFloor(Operation operation) { | 1393 | Expression FFloor(Operation operation) { |
| 1078 | return GenerateUnary(operation, "floor", Type::Float, Type::Float, false); | 1394 | return GenerateUnary(operation, "floor", Type::Float, Type::Float); |
| 1079 | } | 1395 | } |
| 1080 | 1396 | ||
| 1081 | std::string FCeil(Operation operation) { | 1397 | Expression FCeil(Operation operation) { |
| 1082 | return GenerateUnary(operation, "ceil", Type::Float, Type::Float, false); | 1398 | return GenerateUnary(operation, "ceil", Type::Float, Type::Float); |
| 1083 | } | 1399 | } |
| 1084 | 1400 | ||
| 1085 | std::string FTrunc(Operation operation) { | 1401 | Expression FTrunc(Operation operation) { |
| 1086 | return GenerateUnary(operation, "trunc", Type::Float, Type::Float, false); | 1402 | return GenerateUnary(operation, "trunc", Type::Float, Type::Float); |
| 1087 | } | 1403 | } |
| 1088 | 1404 | ||
| 1089 | template <Type type> | 1405 | template <Type type> |
| 1090 | std::string FCastInteger(Operation operation) { | 1406 | Expression FCastInteger(Operation operation) { |
| 1091 | return GenerateUnary(operation, "float", Type::Float, type, false); | 1407 | return GenerateUnary(operation, "float", Type::Float, type); |
| 1092 | } | 1408 | } |
| 1093 | 1409 | ||
| 1094 | std::string ICastFloat(Operation operation) { | 1410 | Expression ICastFloat(Operation operation) { |
| 1095 | return GenerateUnary(operation, "int", Type::Int, Type::Float, false); | 1411 | return GenerateUnary(operation, "int", Type::Int, Type::Float); |
| 1096 | } | 1412 | } |
| 1097 | 1413 | ||
| 1098 | std::string ICastUnsigned(Operation operation) { | 1414 | Expression ICastUnsigned(Operation operation) { |
| 1099 | return GenerateUnary(operation, "int", Type::Int, Type::Uint, false); | 1415 | return GenerateUnary(operation, "int", Type::Int, Type::Uint); |
| 1100 | } | 1416 | } |
| 1101 | 1417 | ||
| 1102 | template <Type type> | 1418 | template <Type type> |
| 1103 | std::string LogicalShiftLeft(Operation operation) { | 1419 | Expression LogicalShiftLeft(Operation operation) { |
| 1104 | return GenerateBinaryInfix(operation, "<<", type, type, Type::Uint); | 1420 | return GenerateBinaryInfix(operation, "<<", type, type, Type::Uint); |
| 1105 | } | 1421 | } |
| 1106 | 1422 | ||
| 1107 | std::string ILogicalShiftRight(Operation operation) { | 1423 | Expression ILogicalShiftRight(Operation operation) { |
| 1108 | const std::string op_a = VisitOperand(operation, 0, Type::Uint); | 1424 | const std::string op_a = VisitOperand(operation, 0).AsUint(); |
| 1109 | const std::string op_b = VisitOperand(operation, 1, Type::Uint); | 1425 | const std::string op_b = VisitOperand(operation, 1).AsUint(); |
| 1110 | const std::string op_str = fmt::format("int({} >> {})", op_a, op_b); | 1426 | std::string op_str = fmt::format("int({} >> {})", op_a, op_b); |
| 1111 | 1427 | ||
| 1112 | return ApplyPrecise(operation, BitwiseCastResult(op_str, Type::Int)); | 1428 | return ApplyPrecise(operation, std::move(op_str), Type::Int); |
| 1113 | } | 1429 | } |
| 1114 | 1430 | ||
| 1115 | std::string IArithmeticShiftRight(Operation operation) { | 1431 | Expression IArithmeticShiftRight(Operation operation) { |
| 1116 | return GenerateBinaryInfix(operation, ">>", Type::Int, Type::Int, Type::Uint); | 1432 | return GenerateBinaryInfix(operation, ">>", Type::Int, Type::Int, Type::Uint); |
| 1117 | } | 1433 | } |
| 1118 | 1434 | ||
| 1119 | template <Type type> | 1435 | template <Type type> |
| 1120 | std::string BitwiseAnd(Operation operation) { | 1436 | Expression BitwiseAnd(Operation operation) { |
| 1121 | return GenerateBinaryInfix(operation, "&", type, type, type); | 1437 | return GenerateBinaryInfix(operation, "&", type, type, type); |
| 1122 | } | 1438 | } |
| 1123 | 1439 | ||
| 1124 | template <Type type> | 1440 | template <Type type> |
| 1125 | std::string BitwiseOr(Operation operation) { | 1441 | Expression BitwiseOr(Operation operation) { |
| 1126 | return GenerateBinaryInfix(operation, "|", type, type, type); | 1442 | return GenerateBinaryInfix(operation, "|", type, type, type); |
| 1127 | } | 1443 | } |
| 1128 | 1444 | ||
| 1129 | template <Type type> | 1445 | template <Type type> |
| 1130 | std::string BitwiseXor(Operation operation) { | 1446 | Expression BitwiseXor(Operation operation) { |
| 1131 | return GenerateBinaryInfix(operation, "^", type, type, type); | 1447 | return GenerateBinaryInfix(operation, "^", type, type, type); |
| 1132 | } | 1448 | } |
| 1133 | 1449 | ||
| 1134 | template <Type type> | 1450 | template <Type type> |
| 1135 | std::string BitwiseNot(Operation operation) { | 1451 | Expression BitwiseNot(Operation operation) { |
| 1136 | return GenerateUnary(operation, "~", type, type, false); | 1452 | return GenerateUnary(operation, "~", type, type); |
| 1137 | } | 1453 | } |
| 1138 | 1454 | ||
| 1139 | std::string UCastFloat(Operation operation) { | 1455 | Expression UCastFloat(Operation operation) { |
| 1140 | return GenerateUnary(operation, "uint", Type::Uint, Type::Float, false); | 1456 | return GenerateUnary(operation, "uint", Type::Uint, Type::Float); |
| 1141 | } | 1457 | } |
| 1142 | 1458 | ||
| 1143 | std::string UCastSigned(Operation operation) { | 1459 | Expression UCastSigned(Operation operation) { |
| 1144 | return GenerateUnary(operation, "uint", Type::Uint, Type::Int, false); | 1460 | return GenerateUnary(operation, "uint", Type::Uint, Type::Int); |
| 1145 | } | 1461 | } |
| 1146 | 1462 | ||
| 1147 | std::string UShiftRight(Operation operation) { | 1463 | Expression UShiftRight(Operation operation) { |
| 1148 | return GenerateBinaryInfix(operation, ">>", Type::Uint, Type::Uint, Type::Uint); | 1464 | return GenerateBinaryInfix(operation, ">>", Type::Uint, Type::Uint, Type::Uint); |
| 1149 | } | 1465 | } |
| 1150 | 1466 | ||
| 1151 | template <Type type> | 1467 | template <Type type> |
| 1152 | std::string BitfieldInsert(Operation operation) { | 1468 | Expression BitfieldInsert(Operation operation) { |
| 1153 | return GenerateQuaternary(operation, "bitfieldInsert", type, type, type, Type::Int, | 1469 | return GenerateQuaternary(operation, "bitfieldInsert", type, type, type, Type::Int, |
| 1154 | Type::Int); | 1470 | Type::Int); |
| 1155 | } | 1471 | } |
| 1156 | 1472 | ||
| 1157 | template <Type type> | 1473 | template <Type type> |
| 1158 | std::string BitfieldExtract(Operation operation) { | 1474 | Expression BitfieldExtract(Operation operation) { |
| 1159 | return GenerateTernary(operation, "bitfieldExtract", type, type, Type::Int, Type::Int); | 1475 | return GenerateTernary(operation, "bitfieldExtract", type, type, Type::Int, Type::Int); |
| 1160 | } | 1476 | } |
| 1161 | 1477 | ||
| 1162 | template <Type type> | 1478 | template <Type type> |
| 1163 | std::string BitCount(Operation operation) { | 1479 | Expression BitCount(Operation operation) { |
| 1164 | return GenerateUnary(operation, "bitCount", type, type, false); | 1480 | return GenerateUnary(operation, "bitCount", type, type); |
| 1165 | } | 1481 | } |
| 1166 | 1482 | ||
| 1167 | std::string HNegate(Operation operation) { | 1483 | Expression HNegate(Operation operation) { |
| 1168 | const auto GetNegate = [&](std::size_t index) { | 1484 | const auto GetNegate = [&](std::size_t index) { |
| 1169 | return VisitOperand(operation, index, Type::Bool) + " ? -1 : 1"; | 1485 | return VisitOperand(operation, index).AsBool() + " ? -1 : 1"; |
| 1170 | }; | 1486 | }; |
| 1171 | const std::string value = | 1487 | return {fmt::format("({} * vec2({}, {}))", VisitOperand(operation, 0).AsHalfFloat(), |
| 1172 | fmt::format("({} * vec2({}, {}))", VisitOperand(operation, 0, Type::HalfFloat), | 1488 | GetNegate(1), GetNegate(2)), |
| 1173 | GetNegate(1), GetNegate(2)); | 1489 | Type::HalfFloat}; |
| 1174 | return BitwiseCastResult(value, Type::HalfFloat); | 1490 | } |
| 1175 | } | 1491 | |
| 1176 | 1492 | Expression HClamp(Operation operation) { | |
| 1177 | std::string HClamp(Operation operation) { | 1493 | const std::string value = VisitOperand(operation, 0).AsHalfFloat(); |
| 1178 | const std::string value = VisitOperand(operation, 0, Type::HalfFloat); | 1494 | const std::string min = VisitOperand(operation, 1).AsFloat(); |
| 1179 | const std::string min = VisitOperand(operation, 1, Type::Float); | 1495 | const std::string max = VisitOperand(operation, 2).AsFloat(); |
| 1180 | const std::string max = VisitOperand(operation, 2, Type::Float); | 1496 | std::string clamped = fmt::format("clamp({}, vec2({}), vec2({}))", value, min, max); |
| 1181 | const std::string clamped = fmt::format("clamp({}, vec2({}), vec2({}))", value, min, max); | 1497 | |
| 1182 | 1498 | return ApplyPrecise(operation, std::move(clamped), Type::HalfFloat); | |
| 1183 | return ApplyPrecise(operation, BitwiseCastResult(clamped, Type::HalfFloat)); | 1499 | } |
| 1184 | } | 1500 | |
| 1185 | 1501 | Expression HCastFloat(Operation operation) { | |
| 1186 | std::string HUnpack(Operation operation) { | 1502 | return {fmt::format("vec2({})", VisitOperand(operation, 0).AsFloat()), Type::HalfFloat}; |
| 1187 | const std::string operand{VisitOperand(operation, 0, Type::HalfFloat)}; | ||
| 1188 | const auto value = [&]() -> std::string { | ||
| 1189 | switch (std::get<Tegra::Shader::HalfType>(operation.GetMeta())) { | ||
| 1190 | case Tegra::Shader::HalfType::H0_H1: | ||
| 1191 | return operand; | ||
| 1192 | case Tegra::Shader::HalfType::F32: | ||
| 1193 | return fmt::format("vec2(fromHalf2({}))", operand); | ||
| 1194 | case Tegra::Shader::HalfType::H0_H0: | ||
| 1195 | return fmt::format("vec2({}[0])", operand); | ||
| 1196 | case Tegra::Shader::HalfType::H1_H1: | ||
| 1197 | return fmt::format("vec2({}[1])", operand); | ||
| 1198 | } | ||
| 1199 | UNREACHABLE(); | ||
| 1200 | return "0"; | ||
| 1201 | }(); | ||
| 1202 | return fmt::format("fromHalf2({})", value); | ||
| 1203 | } | 1503 | } |
| 1204 | 1504 | ||
| 1205 | std::string HMergeF32(Operation operation) { | 1505 | Expression HUnpack(Operation operation) { |
| 1206 | return fmt::format("float(toHalf2({})[0])", Visit(operation[0])); | 1506 | Expression operand = VisitOperand(operation, 0); |
| 1507 | switch (std::get<Tegra::Shader::HalfType>(operation.GetMeta())) { | ||
| 1508 | case Tegra::Shader::HalfType::H0_H1: | ||
| 1509 | return operand; | ||
| 1510 | case Tegra::Shader::HalfType::F32: | ||
| 1511 | return {fmt::format("vec2({})", operand.AsFloat()), Type::HalfFloat}; | ||
| 1512 | case Tegra::Shader::HalfType::H0_H0: | ||
| 1513 | return {fmt::format("vec2({}[0])", operand.AsHalfFloat()), Type::HalfFloat}; | ||
| 1514 | case Tegra::Shader::HalfType::H1_H1: | ||
| 1515 | return {fmt::format("vec2({}[1])", operand.AsHalfFloat()), Type::HalfFloat}; | ||
| 1516 | } | ||
| 1517 | } | ||
| 1518 | |||
| 1519 | Expression HMergeF32(Operation operation) { | ||
| 1520 | return {fmt::format("float({}[0])", VisitOperand(operation, 0).AsHalfFloat()), Type::Float}; | ||
| 1207 | } | 1521 | } |
| 1208 | 1522 | ||
| 1209 | std::string HMergeH0(Operation operation) { | 1523 | Expression HMergeH0(Operation operation) { |
| 1210 | return fmt::format("fromHalf2(vec2(toHalf2({})[0], toHalf2({})[1]))", Visit(operation[1]), | 1524 | std::string dest = VisitOperand(operation, 0).AsUint(); |
| 1211 | Visit(operation[0])); | 1525 | std::string src = VisitOperand(operation, 1).AsUint(); |
| 1526 | return {fmt::format("(({} & 0x0000FFFFU) | ({} & 0xFFFF0000U))", src, dest), Type::Uint}; | ||
| 1212 | } | 1527 | } |
| 1213 | 1528 | ||
| 1214 | std::string HMergeH1(Operation operation) { | 1529 | Expression HMergeH1(Operation operation) { |
| 1215 | return fmt::format("fromHalf2(vec2(toHalf2({})[0], toHalf2({})[1]))", Visit(operation[0]), | 1530 | std::string dest = VisitOperand(operation, 0).AsUint(); |
| 1216 | Visit(operation[1])); | 1531 | std::string src = VisitOperand(operation, 1).AsUint(); |
| 1532 | return {fmt::format("(({} & 0x0000FFFFU) | ({} & 0xFFFF0000U))", dest, src), Type::Uint}; | ||
| 1217 | } | 1533 | } |
| 1218 | 1534 | ||
| 1219 | std::string HPack2(Operation operation) { | 1535 | Expression HPack2(Operation operation) { |
| 1220 | return fmt::format("utof(packHalf2x16(vec2({}, {})))", Visit(operation[0]), | 1536 | return {fmt::format("vec2({}, {})", VisitOperand(operation, 0).AsFloat(), |
| 1221 | Visit(operation[1])); | 1537 | VisitOperand(operation, 1).AsFloat()), |
| 1538 | Type::HalfFloat}; | ||
| 1222 | } | 1539 | } |
| 1223 | 1540 | ||
| 1224 | template <Type type> | 1541 | template <Type type> |
| 1225 | std::string LogicalLessThan(Operation operation) { | 1542 | Expression LogicalLessThan(Operation operation) { |
| 1226 | return GenerateBinaryInfix(operation, "<", Type::Bool, type, type); | 1543 | return GenerateBinaryInfix(operation, "<", Type::Bool, type, type); |
| 1227 | } | 1544 | } |
| 1228 | 1545 | ||
| 1229 | template <Type type> | 1546 | template <Type type> |
| 1230 | std::string LogicalEqual(Operation operation) { | 1547 | Expression LogicalEqual(Operation operation) { |
| 1231 | return GenerateBinaryInfix(operation, "==", Type::Bool, type, type); | 1548 | return GenerateBinaryInfix(operation, "==", Type::Bool, type, type); |
| 1232 | } | 1549 | } |
| 1233 | 1550 | ||
| 1234 | template <Type type> | 1551 | template <Type type> |
| 1235 | std::string LogicalLessEqual(Operation operation) { | 1552 | Expression LogicalLessEqual(Operation operation) { |
| 1236 | return GenerateBinaryInfix(operation, "<=", Type::Bool, type, type); | 1553 | return GenerateBinaryInfix(operation, "<=", Type::Bool, type, type); |
| 1237 | } | 1554 | } |
| 1238 | 1555 | ||
| 1239 | template <Type type> | 1556 | template <Type type> |
| 1240 | std::string LogicalGreaterThan(Operation operation) { | 1557 | Expression LogicalGreaterThan(Operation operation) { |
| 1241 | return GenerateBinaryInfix(operation, ">", Type::Bool, type, type); | 1558 | return GenerateBinaryInfix(operation, ">", Type::Bool, type, type); |
| 1242 | } | 1559 | } |
| 1243 | 1560 | ||
| 1244 | template <Type type> | 1561 | template <Type type> |
| 1245 | std::string LogicalNotEqual(Operation operation) { | 1562 | Expression LogicalNotEqual(Operation operation) { |
| 1246 | return GenerateBinaryInfix(operation, "!=", Type::Bool, type, type); | 1563 | return GenerateBinaryInfix(operation, "!=", Type::Bool, type, type); |
| 1247 | } | 1564 | } |
| 1248 | 1565 | ||
| 1249 | template <Type type> | 1566 | template <Type type> |
| 1250 | std::string LogicalGreaterEqual(Operation operation) { | 1567 | Expression LogicalGreaterEqual(Operation operation) { |
| 1251 | return GenerateBinaryInfix(operation, ">=", Type::Bool, type, type); | 1568 | return GenerateBinaryInfix(operation, ">=", Type::Bool, type, type); |
| 1252 | } | 1569 | } |
| 1253 | 1570 | ||
| 1254 | std::string LogicalFIsNan(Operation operation) { | 1571 | Expression LogicalFIsNan(Operation operation) { |
| 1255 | return GenerateUnary(operation, "isnan", Type::Bool, Type::Float, false); | 1572 | return GenerateUnary(operation, "isnan", Type::Bool, Type::Float); |
| 1256 | } | 1573 | } |
| 1257 | 1574 | ||
| 1258 | std::string LogicalAssign(Operation operation) { | 1575 | Expression LogicalAssign(Operation operation) { |
| 1259 | const Node& dest = operation[0]; | 1576 | const Node& dest = operation[0]; |
| 1260 | const Node& src = operation[1]; | 1577 | const Node& src = operation[1]; |
| 1261 | 1578 | ||
| @@ -1276,82 +1593,80 @@ private: | |||
| 1276 | target = GetInternalFlag(flag->GetFlag()); | 1593 | target = GetInternalFlag(flag->GetFlag()); |
| 1277 | } | 1594 | } |
| 1278 | 1595 | ||
| 1279 | code.AddLine("{} = {};", target, Visit(src)); | 1596 | code.AddLine("{} = {};", target, Visit(src).AsBool()); |
| 1280 | return {}; | 1597 | return {}; |
| 1281 | } | 1598 | } |
| 1282 | 1599 | ||
| 1283 | std::string LogicalAnd(Operation operation) { | 1600 | Expression LogicalAnd(Operation operation) { |
| 1284 | return GenerateBinaryInfix(operation, "&&", Type::Bool, Type::Bool, Type::Bool); | 1601 | return GenerateBinaryInfix(operation, "&&", Type::Bool, Type::Bool, Type::Bool); |
| 1285 | } | 1602 | } |
| 1286 | 1603 | ||
| 1287 | std::string LogicalOr(Operation operation) { | 1604 | Expression LogicalOr(Operation operation) { |
| 1288 | return GenerateBinaryInfix(operation, "||", Type::Bool, Type::Bool, Type::Bool); | 1605 | return GenerateBinaryInfix(operation, "||", Type::Bool, Type::Bool, Type::Bool); |
| 1289 | } | 1606 | } |
| 1290 | 1607 | ||
| 1291 | std::string LogicalXor(Operation operation) { | 1608 | Expression LogicalXor(Operation operation) { |
| 1292 | return GenerateBinaryInfix(operation, "^^", Type::Bool, Type::Bool, Type::Bool); | 1609 | return GenerateBinaryInfix(operation, "^^", Type::Bool, Type::Bool, Type::Bool); |
| 1293 | } | 1610 | } |
| 1294 | 1611 | ||
| 1295 | std::string LogicalNegate(Operation operation) { | 1612 | Expression LogicalNegate(Operation operation) { |
| 1296 | return GenerateUnary(operation, "!", Type::Bool, Type::Bool, false); | 1613 | return GenerateUnary(operation, "!", Type::Bool, Type::Bool); |
| 1297 | } | 1614 | } |
| 1298 | 1615 | ||
| 1299 | std::string LogicalPick2(Operation operation) { | 1616 | Expression LogicalPick2(Operation operation) { |
| 1300 | const std::string pair = VisitOperand(operation, 0, Type::Bool2); | 1617 | return {fmt::format("{}[{}]", VisitOperand(operation, 0).AsBool2(), |
| 1301 | return fmt::format("{}[{}]", pair, VisitOperand(operation, 1, Type::Uint)); | 1618 | VisitOperand(operation, 1).AsUint()), |
| 1619 | Type::Bool}; | ||
| 1302 | } | 1620 | } |
| 1303 | 1621 | ||
| 1304 | std::string LogicalAll2(Operation operation) { | 1622 | Expression LogicalAnd2(Operation operation) { |
| 1305 | return GenerateUnary(operation, "all", Type::Bool, Type::Bool2); | 1623 | return GenerateUnary(operation, "all", Type::Bool, Type::Bool2); |
| 1306 | } | 1624 | } |
| 1307 | 1625 | ||
| 1308 | std::string LogicalAny2(Operation operation) { | ||
| 1309 | return GenerateUnary(operation, "any", Type::Bool, Type::Bool2); | ||
| 1310 | } | ||
| 1311 | |||
| 1312 | template <bool with_nan> | 1626 | template <bool with_nan> |
| 1313 | std::string GenerateHalfComparison(Operation operation, const std::string& compare_op) { | 1627 | Expression GenerateHalfComparison(Operation operation, std::string_view compare_op) { |
| 1314 | const std::string comparison{GenerateBinaryCall(operation, compare_op, Type::Bool2, | 1628 | Expression comparison = GenerateBinaryCall(operation, compare_op, Type::Bool2, |
| 1315 | Type::HalfFloat, Type::HalfFloat)}; | 1629 | Type::HalfFloat, Type::HalfFloat); |
| 1316 | if constexpr (!with_nan) { | 1630 | if constexpr (!with_nan) { |
| 1317 | return comparison; | 1631 | return comparison; |
| 1318 | } | 1632 | } |
| 1319 | return fmt::format("halfFloatNanComparison({}, {}, {})", comparison, | 1633 | return {fmt::format("HalfFloatNanComparison({}, {}, {})", comparison.AsBool2(), |
| 1320 | VisitOperand(operation, 0, Type::HalfFloat), | 1634 | VisitOperand(operation, 0).AsHalfFloat(), |
| 1321 | VisitOperand(operation, 1, Type::HalfFloat)); | 1635 | VisitOperand(operation, 1).AsHalfFloat()), |
| 1636 | Type::Bool2}; | ||
| 1322 | } | 1637 | } |
| 1323 | 1638 | ||
| 1324 | template <bool with_nan> | 1639 | template <bool with_nan> |
| 1325 | std::string Logical2HLessThan(Operation operation) { | 1640 | Expression Logical2HLessThan(Operation operation) { |
| 1326 | return GenerateHalfComparison<with_nan>(operation, "lessThan"); | 1641 | return GenerateHalfComparison<with_nan>(operation, "lessThan"); |
| 1327 | } | 1642 | } |
| 1328 | 1643 | ||
| 1329 | template <bool with_nan> | 1644 | template <bool with_nan> |
| 1330 | std::string Logical2HEqual(Operation operation) { | 1645 | Expression Logical2HEqual(Operation operation) { |
| 1331 | return GenerateHalfComparison<with_nan>(operation, "equal"); | 1646 | return GenerateHalfComparison<with_nan>(operation, "equal"); |
| 1332 | } | 1647 | } |
| 1333 | 1648 | ||
| 1334 | template <bool with_nan> | 1649 | template <bool with_nan> |
| 1335 | std::string Logical2HLessEqual(Operation operation) { | 1650 | Expression Logical2HLessEqual(Operation operation) { |
| 1336 | return GenerateHalfComparison<with_nan>(operation, "lessThanEqual"); | 1651 | return GenerateHalfComparison<with_nan>(operation, "lessThanEqual"); |
| 1337 | } | 1652 | } |
| 1338 | 1653 | ||
| 1339 | template <bool with_nan> | 1654 | template <bool with_nan> |
| 1340 | std::string Logical2HGreaterThan(Operation operation) { | 1655 | Expression Logical2HGreaterThan(Operation operation) { |
| 1341 | return GenerateHalfComparison<with_nan>(operation, "greaterThan"); | 1656 | return GenerateHalfComparison<with_nan>(operation, "greaterThan"); |
| 1342 | } | 1657 | } |
| 1343 | 1658 | ||
| 1344 | template <bool with_nan> | 1659 | template <bool with_nan> |
| 1345 | std::string Logical2HNotEqual(Operation operation) { | 1660 | Expression Logical2HNotEqual(Operation operation) { |
| 1346 | return GenerateHalfComparison<with_nan>(operation, "notEqual"); | 1661 | return GenerateHalfComparison<with_nan>(operation, "notEqual"); |
| 1347 | } | 1662 | } |
| 1348 | 1663 | ||
| 1349 | template <bool with_nan> | 1664 | template <bool with_nan> |
| 1350 | std::string Logical2HGreaterEqual(Operation operation) { | 1665 | Expression Logical2HGreaterEqual(Operation operation) { |
| 1351 | return GenerateHalfComparison<with_nan>(operation, "greaterThanEqual"); | 1666 | return GenerateHalfComparison<with_nan>(operation, "greaterThanEqual"); |
| 1352 | } | 1667 | } |
| 1353 | 1668 | ||
| 1354 | std::string Texture(Operation operation) { | 1669 | Expression Texture(Operation operation) { |
| 1355 | const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); | 1670 | const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); |
| 1356 | ASSERT(meta); | 1671 | ASSERT(meta); |
| 1357 | 1672 | ||
| @@ -1360,10 +1675,10 @@ private: | |||
| 1360 | if (meta->sampler.IsShadow()) { | 1675 | if (meta->sampler.IsShadow()) { |
| 1361 | expr = "vec4(" + expr + ')'; | 1676 | expr = "vec4(" + expr + ')'; |
| 1362 | } | 1677 | } |
| 1363 | return expr + GetSwizzle(meta->element); | 1678 | return {expr + GetSwizzle(meta->element), Type::Float}; |
| 1364 | } | 1679 | } |
| 1365 | 1680 | ||
| 1366 | std::string TextureLod(Operation operation) { | 1681 | Expression TextureLod(Operation operation) { |
| 1367 | const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); | 1682 | const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); |
| 1368 | ASSERT(meta); | 1683 | ASSERT(meta); |
| 1369 | 1684 | ||
| @@ -1372,54 +1687,54 @@ private: | |||
| 1372 | if (meta->sampler.IsShadow()) { | 1687 | if (meta->sampler.IsShadow()) { |
| 1373 | expr = "vec4(" + expr + ')'; | 1688 | expr = "vec4(" + expr + ')'; |
| 1374 | } | 1689 | } |
| 1375 | return expr + GetSwizzle(meta->element); | 1690 | return {expr + GetSwizzle(meta->element), Type::Float}; |
| 1376 | } | 1691 | } |
| 1377 | 1692 | ||
| 1378 | std::string TextureGather(Operation operation) { | 1693 | Expression TextureGather(Operation operation) { |
| 1379 | const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); | 1694 | const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); |
| 1380 | ASSERT(meta); | 1695 | ASSERT(meta); |
| 1381 | 1696 | ||
| 1382 | const auto type = meta->sampler.IsShadow() ? Type::Float : Type::Int; | 1697 | const auto type = meta->sampler.IsShadow() ? Type::Float : Type::Int; |
| 1383 | return GenerateTexture(operation, "Gather", | 1698 | return {GenerateTexture(operation, "Gather", |
| 1384 | {TextureArgument{type, meta->component}, TextureAoffi{}}) + | 1699 | {TextureArgument{type, meta->component}, TextureAoffi{}}) + |
| 1385 | GetSwizzle(meta->element); | 1700 | GetSwizzle(meta->element), |
| 1701 | Type::Float}; | ||
| 1386 | } | 1702 | } |
| 1387 | 1703 | ||
| 1388 | std::string TextureQueryDimensions(Operation operation) { | 1704 | Expression TextureQueryDimensions(Operation operation) { |
| 1389 | const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); | 1705 | const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); |
| 1390 | ASSERT(meta); | 1706 | ASSERT(meta); |
| 1391 | 1707 | ||
| 1392 | const std::string sampler = GetSampler(meta->sampler); | 1708 | const std::string sampler = GetSampler(meta->sampler); |
| 1393 | const std::string lod = VisitOperand(operation, 0, Type::Int); | 1709 | const std::string lod = VisitOperand(operation, 0).AsInt(); |
| 1394 | 1710 | ||
| 1395 | switch (meta->element) { | 1711 | switch (meta->element) { |
| 1396 | case 0: | 1712 | case 0: |
| 1397 | case 1: | 1713 | case 1: |
| 1398 | return fmt::format("itof(int(textureSize({}, {}){}))", sampler, lod, | 1714 | return {fmt::format("textureSize({}, {}){}", sampler, lod, GetSwizzle(meta->element)), |
| 1399 | GetSwizzle(meta->element)); | 1715 | Type::Int}; |
| 1400 | case 2: | ||
| 1401 | return "0"; | ||
| 1402 | case 3: | 1716 | case 3: |
| 1403 | return fmt::format("itof(textureQueryLevels({}))", sampler); | 1717 | return {fmt::format("textureQueryLevels({})", sampler), Type::Int}; |
| 1404 | } | 1718 | } |
| 1405 | UNREACHABLE(); | 1719 | UNREACHABLE(); |
| 1406 | return "0"; | 1720 | return {"0", Type::Int}; |
| 1407 | } | 1721 | } |
| 1408 | 1722 | ||
| 1409 | std::string TextureQueryLod(Operation operation) { | 1723 | Expression TextureQueryLod(Operation operation) { |
| 1410 | const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); | 1724 | const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); |
| 1411 | ASSERT(meta); | 1725 | ASSERT(meta); |
| 1412 | 1726 | ||
| 1413 | if (meta->element < 2) { | 1727 | if (meta->element < 2) { |
| 1414 | return fmt::format("itof(int(({} * vec2(256)){}))", | 1728 | return {fmt::format("int(({} * vec2(256)){})", |
| 1415 | GenerateTexture(operation, "QueryLod", {}), | 1729 | GenerateTexture(operation, "QueryLod", {}), |
| 1416 | GetSwizzle(meta->element)); | 1730 | GetSwizzle(meta->element)), |
| 1731 | Type::Int}; | ||
| 1417 | } | 1732 | } |
| 1418 | return "0"; | 1733 | return {"0", Type::Int}; |
| 1419 | } | 1734 | } |
| 1420 | 1735 | ||
| 1421 | std::string TexelFetch(Operation operation) { | 1736 | Expression TexelFetch(Operation operation) { |
| 1422 | constexpr std::array<const char*, 4> constructors = {"int", "ivec2", "ivec3", "ivec4"}; | 1737 | constexpr std::array constructors = {"int", "ivec2", "ivec3", "ivec4"}; |
| 1423 | const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); | 1738 | const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); |
| 1424 | ASSERT(meta); | 1739 | ASSERT(meta); |
| 1425 | UNIMPLEMENTED_IF(meta->sampler.IsArray()); | 1740 | UNIMPLEMENTED_IF(meta->sampler.IsArray()); |
| @@ -1432,60 +1747,117 @@ private: | |||
| 1432 | expr += constructors.at(operation.GetOperandsCount() - 1); | 1747 | expr += constructors.at(operation.GetOperandsCount() - 1); |
| 1433 | expr += '('; | 1748 | expr += '('; |
| 1434 | for (std::size_t i = 0; i < count; ++i) { | 1749 | for (std::size_t i = 0; i < count; ++i) { |
| 1435 | expr += VisitOperand(operation, i, Type::Int); | 1750 | expr += VisitOperand(operation, i).AsInt(); |
| 1436 | const std::size_t next = i + 1; | 1751 | const std::size_t next = i + 1; |
| 1437 | if (next == count) | 1752 | if (next == count) |
| 1438 | expr += ')'; | 1753 | expr += ')'; |
| 1439 | else if (next < count) | 1754 | else if (next < count) |
| 1440 | expr += ", "; | 1755 | expr += ", "; |
| 1441 | } | 1756 | } |
| 1757 | |||
| 1758 | // Store a copy of the expression without the lod to be used with texture buffers | ||
| 1759 | std::string expr_buffer = expr; | ||
| 1760 | |||
| 1442 | if (meta->lod) { | 1761 | if (meta->lod) { |
| 1443 | expr += ", "; | 1762 | expr += ", "; |
| 1444 | expr += CastOperand(Visit(meta->lod), Type::Int); | 1763 | expr += Visit(meta->lod).AsInt(); |
| 1445 | } | 1764 | } |
| 1446 | expr += ')'; | 1765 | expr += ')'; |
| 1766 | expr += GetSwizzle(meta->element); | ||
| 1767 | |||
| 1768 | expr_buffer += ')'; | ||
| 1769 | expr_buffer += GetSwizzle(meta->element); | ||
| 1447 | 1770 | ||
| 1448 | return expr + GetSwizzle(meta->element); | 1771 | const std::string tmp{code.GenerateTemporary()}; |
| 1772 | EmitIfdefIsBuffer(meta->sampler); | ||
| 1773 | code.AddLine("float {} = {};", tmp, expr_buffer); | ||
| 1774 | code.AddLine("#else"); | ||
| 1775 | code.AddLine("float {} = {};", tmp, expr); | ||
| 1776 | code.AddLine("#endif"); | ||
| 1777 | |||
| 1778 | return {tmp, Type::Float}; | ||
| 1779 | } | ||
| 1780 | |||
| 1781 | Expression ImageStore(Operation operation) { | ||
| 1782 | const auto meta{std::get<MetaImage>(operation.GetMeta())}; | ||
| 1783 | code.AddLine("imageStore({}, {}, {});", GetImage(meta.image), | ||
| 1784 | BuildIntegerCoordinates(operation), BuildImageValues(operation)); | ||
| 1785 | return {}; | ||
| 1786 | } | ||
| 1787 | |||
| 1788 | Expression AtomicImageAdd(Operation operation) { | ||
| 1789 | return AtomicImage(operation, "imageAtomicAdd"); | ||
| 1790 | } | ||
| 1791 | |||
| 1792 | Expression AtomicImageMin(Operation operation) { | ||
| 1793 | return AtomicImage(operation, "imageAtomicMin"); | ||
| 1794 | } | ||
| 1795 | |||
| 1796 | Expression AtomicImageMax(Operation operation) { | ||
| 1797 | return AtomicImage(operation, "imageAtomicMax"); | ||
| 1798 | } | ||
| 1799 | Expression AtomicImageAnd(Operation operation) { | ||
| 1800 | return AtomicImage(operation, "imageAtomicAnd"); | ||
| 1449 | } | 1801 | } |
| 1450 | 1802 | ||
| 1451 | std::string Branch(Operation operation) { | 1803 | Expression AtomicImageOr(Operation operation) { |
| 1804 | return AtomicImage(operation, "imageAtomicOr"); | ||
| 1805 | } | ||
| 1806 | |||
| 1807 | Expression AtomicImageXor(Operation operation) { | ||
| 1808 | return AtomicImage(operation, "imageAtomicXor"); | ||
| 1809 | } | ||
| 1810 | |||
| 1811 | Expression AtomicImageExchange(Operation operation) { | ||
| 1812 | return AtomicImage(operation, "imageAtomicExchange"); | ||
| 1813 | } | ||
| 1814 | |||
| 1815 | Expression Branch(Operation operation) { | ||
| 1452 | const auto target = std::get_if<ImmediateNode>(&*operation[0]); | 1816 | const auto target = std::get_if<ImmediateNode>(&*operation[0]); |
| 1453 | UNIMPLEMENTED_IF(!target); | 1817 | UNIMPLEMENTED_IF(!target); |
| 1454 | 1818 | ||
| 1455 | code.AddLine("jmp_to = 0x{:x}u;", target->GetValue()); | 1819 | code.AddLine("jmp_to = 0x{:X}U;", target->GetValue()); |
| 1456 | code.AddLine("break;"); | 1820 | code.AddLine("break;"); |
| 1457 | return {}; | 1821 | return {}; |
| 1458 | } | 1822 | } |
| 1459 | 1823 | ||
| 1460 | std::string PushFlowStack(Operation operation) { | 1824 | Expression BranchIndirect(Operation operation) { |
| 1825 | const std::string op_a = VisitOperand(operation, 0).AsUint(); | ||
| 1826 | |||
| 1827 | code.AddLine("jmp_to = {};", op_a); | ||
| 1828 | code.AddLine("break;"); | ||
| 1829 | return {}; | ||
| 1830 | } | ||
| 1831 | |||
| 1832 | Expression PushFlowStack(Operation operation) { | ||
| 1461 | const auto stack = std::get<MetaStackClass>(operation.GetMeta()); | 1833 | const auto stack = std::get<MetaStackClass>(operation.GetMeta()); |
| 1462 | const auto target = std::get_if<ImmediateNode>(&*operation[0]); | 1834 | const auto target = std::get_if<ImmediateNode>(&*operation[0]); |
| 1463 | UNIMPLEMENTED_IF(!target); | 1835 | UNIMPLEMENTED_IF(!target); |
| 1464 | 1836 | ||
| 1465 | code.AddLine("{}[{}++] = 0x{:x}u;", FlowStackName(stack), FlowStackTopName(stack), | 1837 | code.AddLine("{}[{}++] = 0x{:X}U;", FlowStackName(stack), FlowStackTopName(stack), |
| 1466 | target->GetValue()); | 1838 | target->GetValue()); |
| 1467 | return {}; | 1839 | return {}; |
| 1468 | } | 1840 | } |
| 1469 | 1841 | ||
| 1470 | std::string PopFlowStack(Operation operation) { | 1842 | Expression PopFlowStack(Operation operation) { |
| 1471 | const auto stack = std::get<MetaStackClass>(operation.GetMeta()); | 1843 | const auto stack = std::get<MetaStackClass>(operation.GetMeta()); |
| 1472 | code.AddLine("jmp_to = {}[--{}];", FlowStackName(stack), FlowStackTopName(stack)); | 1844 | code.AddLine("jmp_to = {}[--{}];", FlowStackName(stack), FlowStackTopName(stack)); |
| 1473 | code.AddLine("break;"); | 1845 | code.AddLine("break;"); |
| 1474 | return {}; | 1846 | return {}; |
| 1475 | } | 1847 | } |
| 1476 | 1848 | ||
| 1477 | std::string Exit(Operation operation) { | 1849 | Expression Exit(Operation operation) { |
| 1478 | if (stage != ShaderStage::Fragment) { | 1850 | if (stage != ProgramType::Fragment) { |
| 1479 | code.AddLine("return;"); | 1851 | code.AddLine("return;"); |
| 1480 | return {}; | 1852 | return {}; |
| 1481 | } | 1853 | } |
| 1482 | const auto& used_registers = ir.GetRegisters(); | 1854 | const auto& used_registers = ir.GetRegisters(); |
| 1483 | const auto SafeGetRegister = [&](u32 reg) -> std::string { | 1855 | const auto SafeGetRegister = [&](u32 reg) -> Expression { |
| 1484 | // TODO(Rodrigo): Replace with contains once C++20 releases | 1856 | // TODO(Rodrigo): Replace with contains once C++20 releases |
| 1485 | if (used_registers.find(reg) != used_registers.end()) { | 1857 | if (used_registers.find(reg) != used_registers.end()) { |
| 1486 | return GetRegister(reg); | 1858 | return {GetRegister(reg), Type::Float}; |
| 1487 | } | 1859 | } |
| 1488 | return "0.0f"; | 1860 | return {"0.0f", Type::Float}; |
| 1489 | }; | 1861 | }; |
| 1490 | 1862 | ||
| 1491 | UNIMPLEMENTED_IF_MSG(header.ps.omap.sample_mask != 0, "Sample mask write is unimplemented"); | 1863 | UNIMPLEMENTED_IF_MSG(header.ps.omap.sample_mask != 0, "Sample mask write is unimplemented"); |
| @@ -1498,7 +1870,7 @@ private: | |||
| 1498 | for (u32 component = 0; component < 4; ++component) { | 1870 | for (u32 component = 0; component < 4; ++component) { |
| 1499 | if (header.ps.IsColorComponentOutputEnabled(render_target, component)) { | 1871 | if (header.ps.IsColorComponentOutputEnabled(render_target, component)) { |
| 1500 | code.AddLine("FragColor{}[{}] = {};", render_target, component, | 1872 | code.AddLine("FragColor{}[{}] = {};", render_target, component, |
| 1501 | SafeGetRegister(current_reg)); | 1873 | SafeGetRegister(current_reg).AsFloat()); |
| 1502 | ++current_reg; | 1874 | ++current_reg; |
| 1503 | } | 1875 | } |
| 1504 | } | 1876 | } |
| @@ -1507,14 +1879,14 @@ private: | |||
| 1507 | if (header.ps.omap.depth) { | 1879 | if (header.ps.omap.depth) { |
| 1508 | // The depth output is always 2 registers after the last color output, and current_reg | 1880 | // The depth output is always 2 registers after the last color output, and current_reg |
| 1509 | // already contains one past the last color register. | 1881 | // already contains one past the last color register. |
| 1510 | code.AddLine("gl_FragDepth = {};", SafeGetRegister(current_reg + 1)); | 1882 | code.AddLine("gl_FragDepth = {};", SafeGetRegister(current_reg + 1).AsFloat()); |
| 1511 | } | 1883 | } |
| 1512 | 1884 | ||
| 1513 | code.AddLine("return;"); | 1885 | code.AddLine("return;"); |
| 1514 | return {}; | 1886 | return {}; |
| 1515 | } | 1887 | } |
| 1516 | 1888 | ||
| 1517 | std::string Discard(Operation operation) { | 1889 | Expression Discard(Operation operation) { |
| 1518 | // Enclose "discard" in a conditional, so that GLSL compilation does not complain | 1890 | // Enclose "discard" in a conditional, so that GLSL compilation does not complain |
| 1519 | // about unexecuted instructions that may follow this. | 1891 | // about unexecuted instructions that may follow this. |
| 1520 | code.AddLine("if (true) {{"); | 1892 | code.AddLine("if (true) {{"); |
| @@ -1525,8 +1897,8 @@ private: | |||
| 1525 | return {}; | 1897 | return {}; |
| 1526 | } | 1898 | } |
| 1527 | 1899 | ||
| 1528 | std::string EmitVertex(Operation operation) { | 1900 | Expression EmitVertex(Operation operation) { |
| 1529 | ASSERT_MSG(stage == ShaderStage::Geometry, | 1901 | ASSERT_MSG(stage == ProgramType::Geometry, |
| 1530 | "EmitVertex is expected to be used in a geometry shader."); | 1902 | "EmitVertex is expected to be used in a geometry shader."); |
| 1531 | 1903 | ||
| 1532 | // If a geometry shader is attached, it will always flip (it's the last stage before | 1904 | // If a geometry shader is attached, it will always flip (it's the last stage before |
| @@ -1536,30 +1908,72 @@ private: | |||
| 1536 | return {}; | 1908 | return {}; |
| 1537 | } | 1909 | } |
| 1538 | 1910 | ||
| 1539 | std::string EndPrimitive(Operation operation) { | 1911 | Expression EndPrimitive(Operation operation) { |
| 1540 | ASSERT_MSG(stage == ShaderStage::Geometry, | 1912 | ASSERT_MSG(stage == ProgramType::Geometry, |
| 1541 | "EndPrimitive is expected to be used in a geometry shader."); | 1913 | "EndPrimitive is expected to be used in a geometry shader."); |
| 1542 | 1914 | ||
| 1543 | code.AddLine("EndPrimitive();"); | 1915 | code.AddLine("EndPrimitive();"); |
| 1544 | return {}; | 1916 | return {}; |
| 1545 | } | 1917 | } |
| 1546 | 1918 | ||
| 1547 | std::string YNegate(Operation operation) { | 1919 | Expression YNegate(Operation operation) { |
| 1548 | // Config pack's third value is Y_NEGATE's state. | 1920 | // Config pack's third value is Y_NEGATE's state. |
| 1549 | return "uintBitsToFloat(config_pack[2])"; | 1921 | return {"config_pack[2]", Type::Uint}; |
| 1550 | } | 1922 | } |
| 1551 | 1923 | ||
| 1552 | template <u32 element> | 1924 | template <u32 element> |
| 1553 | std::string LocalInvocationId(Operation) { | 1925 | Expression LocalInvocationId(Operation) { |
| 1554 | return "utof(gl_LocalInvocationID"s + GetSwizzle(element) + ')'; | 1926 | return {"gl_LocalInvocationID"s + GetSwizzle(element), Type::Uint}; |
| 1555 | } | 1927 | } |
| 1556 | 1928 | ||
| 1557 | template <u32 element> | 1929 | template <u32 element> |
| 1558 | std::string WorkGroupId(Operation) { | 1930 | Expression WorkGroupId(Operation) { |
| 1559 | return "utof(gl_WorkGroupID"s + GetSwizzle(element) + ')'; | 1931 | return {"gl_WorkGroupID"s + GetSwizzle(element), Type::Uint}; |
| 1932 | } | ||
| 1933 | |||
| 1934 | Expression BallotThread(Operation operation) { | ||
| 1935 | const std::string value = VisitOperand(operation, 0).AsBool(); | ||
| 1936 | if (!device.HasWarpIntrinsics()) { | ||
| 1937 | LOG_ERROR(Render_OpenGL, | ||
| 1938 | "Nvidia warp intrinsics are not available and its required by a shader"); | ||
| 1939 | // Stub on non-Nvidia devices by simulating all threads voting the same as the active | ||
| 1940 | // one. | ||
| 1941 | return {fmt::format("({} ? 0xFFFFFFFFU : 0U)", value), Type::Uint}; | ||
| 1942 | } | ||
| 1943 | return {fmt::format("ballotThreadNV({})", value), Type::Uint}; | ||
| 1944 | } | ||
| 1945 | |||
| 1946 | Expression Vote(Operation operation, const char* func) { | ||
| 1947 | const std::string value = VisitOperand(operation, 0).AsBool(); | ||
| 1948 | if (!device.HasWarpIntrinsics()) { | ||
| 1949 | LOG_ERROR(Render_OpenGL, | ||
| 1950 | "Nvidia vote intrinsics are not available and its required by a shader"); | ||
| 1951 | // Stub with a warp size of one. | ||
| 1952 | return {value, Type::Bool}; | ||
| 1953 | } | ||
| 1954 | return {fmt::format("{}({})", func, value), Type::Bool}; | ||
| 1955 | } | ||
| 1956 | |||
| 1957 | Expression VoteAll(Operation operation) { | ||
| 1958 | return Vote(operation, "allThreadsNV"); | ||
| 1560 | } | 1959 | } |
| 1561 | 1960 | ||
| 1562 | static constexpr OperationDecompilersArray operation_decompilers = { | 1961 | Expression VoteAny(Operation operation) { |
| 1962 | return Vote(operation, "anyThreadNV"); | ||
| 1963 | } | ||
| 1964 | |||
| 1965 | Expression VoteEqual(Operation operation) { | ||
| 1966 | if (!device.HasWarpIntrinsics()) { | ||
| 1967 | LOG_ERROR(Render_OpenGL, | ||
| 1968 | "Nvidia vote intrinsics are not available and its required by a shader"); | ||
| 1969 | // We must return true here since a stub for a theoretical warp size of 1 will always | ||
| 1970 | // return an equal result for all its votes. | ||
| 1971 | return {"true", Type::Bool}; | ||
| 1972 | } | ||
| 1973 | return Vote(operation, "allThreadsEqualNV"); | ||
| 1974 | } | ||
| 1975 | |||
| 1976 | static constexpr std::array operation_decompilers = { | ||
| 1563 | &GLSLDecompiler::Assign, | 1977 | &GLSLDecompiler::Assign, |
| 1564 | 1978 | ||
| 1565 | &GLSLDecompiler::Select, | 1979 | &GLSLDecompiler::Select, |
| @@ -1571,6 +1985,8 @@ private: | |||
| 1571 | &GLSLDecompiler::Negate<Type::Float>, | 1985 | &GLSLDecompiler::Negate<Type::Float>, |
| 1572 | &GLSLDecompiler::Absolute<Type::Float>, | 1986 | &GLSLDecompiler::Absolute<Type::Float>, |
| 1573 | &GLSLDecompiler::FClamp, | 1987 | &GLSLDecompiler::FClamp, |
| 1988 | &GLSLDecompiler::FCastHalf0, | ||
| 1989 | &GLSLDecompiler::FCastHalf1, | ||
| 1574 | &GLSLDecompiler::Min<Type::Float>, | 1990 | &GLSLDecompiler::Min<Type::Float>, |
| 1575 | &GLSLDecompiler::Max<Type::Float>, | 1991 | &GLSLDecompiler::Max<Type::Float>, |
| 1576 | &GLSLDecompiler::FCos, | 1992 | &GLSLDecompiler::FCos, |
| @@ -1631,6 +2047,7 @@ private: | |||
| 1631 | &GLSLDecompiler::Absolute<Type::HalfFloat>, | 2047 | &GLSLDecompiler::Absolute<Type::HalfFloat>, |
| 1632 | &GLSLDecompiler::HNegate, | 2048 | &GLSLDecompiler::HNegate, |
| 1633 | &GLSLDecompiler::HClamp, | 2049 | &GLSLDecompiler::HClamp, |
| 2050 | &GLSLDecompiler::HCastFloat, | ||
| 1634 | &GLSLDecompiler::HUnpack, | 2051 | &GLSLDecompiler::HUnpack, |
| 1635 | &GLSLDecompiler::HMergeF32, | 2052 | &GLSLDecompiler::HMergeF32, |
| 1636 | &GLSLDecompiler::HMergeH0, | 2053 | &GLSLDecompiler::HMergeH0, |
| @@ -1643,8 +2060,7 @@ private: | |||
| 1643 | &GLSLDecompiler::LogicalXor, | 2060 | &GLSLDecompiler::LogicalXor, |
| 1644 | &GLSLDecompiler::LogicalNegate, | 2061 | &GLSLDecompiler::LogicalNegate, |
| 1645 | &GLSLDecompiler::LogicalPick2, | 2062 | &GLSLDecompiler::LogicalPick2, |
| 1646 | &GLSLDecompiler::LogicalAll2, | 2063 | &GLSLDecompiler::LogicalAnd2, |
| 1647 | &GLSLDecompiler::LogicalAny2, | ||
| 1648 | 2064 | ||
| 1649 | &GLSLDecompiler::LogicalLessThan<Type::Float>, | 2065 | &GLSLDecompiler::LogicalLessThan<Type::Float>, |
| 1650 | &GLSLDecompiler::LogicalEqual<Type::Float>, | 2066 | &GLSLDecompiler::LogicalEqual<Type::Float>, |
| @@ -1688,7 +2104,17 @@ private: | |||
| 1688 | &GLSLDecompiler::TextureQueryLod, | 2104 | &GLSLDecompiler::TextureQueryLod, |
| 1689 | &GLSLDecompiler::TexelFetch, | 2105 | &GLSLDecompiler::TexelFetch, |
| 1690 | 2106 | ||
| 2107 | &GLSLDecompiler::ImageStore, | ||
| 2108 | &GLSLDecompiler::AtomicImageAdd, | ||
| 2109 | &GLSLDecompiler::AtomicImageMin, | ||
| 2110 | &GLSLDecompiler::AtomicImageMax, | ||
| 2111 | &GLSLDecompiler::AtomicImageAnd, | ||
| 2112 | &GLSLDecompiler::AtomicImageOr, | ||
| 2113 | &GLSLDecompiler::AtomicImageXor, | ||
| 2114 | &GLSLDecompiler::AtomicImageExchange, | ||
| 2115 | |||
| 1691 | &GLSLDecompiler::Branch, | 2116 | &GLSLDecompiler::Branch, |
| 2117 | &GLSLDecompiler::BranchIndirect, | ||
| 1692 | &GLSLDecompiler::PushFlowStack, | 2118 | &GLSLDecompiler::PushFlowStack, |
| 1693 | &GLSLDecompiler::PopFlowStack, | 2119 | &GLSLDecompiler::PopFlowStack, |
| 1694 | &GLSLDecompiler::Exit, | 2120 | &GLSLDecompiler::Exit, |
| @@ -1704,7 +2130,13 @@ private: | |||
| 1704 | &GLSLDecompiler::WorkGroupId<0>, | 2130 | &GLSLDecompiler::WorkGroupId<0>, |
| 1705 | &GLSLDecompiler::WorkGroupId<1>, | 2131 | &GLSLDecompiler::WorkGroupId<1>, |
| 1706 | &GLSLDecompiler::WorkGroupId<2>, | 2132 | &GLSLDecompiler::WorkGroupId<2>, |
| 2133 | |||
| 2134 | &GLSLDecompiler::BallotThread, | ||
| 2135 | &GLSLDecompiler::VoteAll, | ||
| 2136 | &GLSLDecompiler::VoteAny, | ||
| 2137 | &GLSLDecompiler::VoteEqual, | ||
| 1707 | }; | 2138 | }; |
| 2139 | static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount)); | ||
| 1708 | 2140 | ||
| 1709 | std::string GetRegister(u32 index) const { | 2141 | std::string GetRegister(u32 index) const { |
| 1710 | return GetDeclarationWithSuffix(index, "gpr"); | 2142 | return GetDeclarationWithSuffix(index, "gpr"); |
| @@ -1744,8 +2176,8 @@ private: | |||
| 1744 | } | 2176 | } |
| 1745 | 2177 | ||
| 1746 | std::string GetInternalFlag(InternalFlag flag) const { | 2178 | std::string GetInternalFlag(InternalFlag flag) const { |
| 1747 | constexpr std::array<const char*, 4> InternalFlagNames = {"zero_flag", "sign_flag", | 2179 | constexpr std::array InternalFlagNames = {"zero_flag", "sign_flag", "carry_flag", |
| 1748 | "carry_flag", "overflow_flag"}; | 2180 | "overflow_flag"}; |
| 1749 | const auto index = static_cast<u32>(flag); | 2181 | const auto index = static_cast<u32>(flag); |
| 1750 | ASSERT(index < static_cast<u32>(InternalFlag::Amount)); | 2182 | ASSERT(index < static_cast<u32>(InternalFlag::Amount)); |
| 1751 | 2183 | ||
| @@ -1756,12 +2188,20 @@ private: | |||
| 1756 | return GetDeclarationWithSuffix(static_cast<u32>(sampler.GetIndex()), "sampler"); | 2188 | return GetDeclarationWithSuffix(static_cast<u32>(sampler.GetIndex()), "sampler"); |
| 1757 | } | 2189 | } |
| 1758 | 2190 | ||
| 2191 | std::string GetImage(const Image& image) const { | ||
| 2192 | return GetDeclarationWithSuffix(static_cast<u32>(image.GetIndex()), "image"); | ||
| 2193 | } | ||
| 2194 | |||
| 2195 | void EmitIfdefIsBuffer(const Sampler& sampler) { | ||
| 2196 | code.AddLine("#ifdef SAMPLER_{}_IS_BUFFER", sampler.GetIndex()); | ||
| 2197 | } | ||
| 2198 | |||
| 1759 | std::string GetDeclarationWithSuffix(u32 index, const std::string& name) const { | 2199 | std::string GetDeclarationWithSuffix(u32 index, const std::string& name) const { |
| 1760 | return fmt::format("{}_{}_{}", name, index, suffix); | 2200 | return fmt::format("{}_{}_{}", name, index, suffix); |
| 1761 | } | 2201 | } |
| 1762 | 2202 | ||
| 1763 | u32 GetNumPhysicalInputAttributes() const { | 2203 | u32 GetNumPhysicalInputAttributes() const { |
| 1764 | return stage == ShaderStage::Vertex ? GetNumPhysicalAttributes() : GetNumPhysicalVaryings(); | 2204 | return IsVertexShader(stage) ? GetNumPhysicalAttributes() : GetNumPhysicalVaryings(); |
| 1765 | } | 2205 | } |
| 1766 | 2206 | ||
| 1767 | u32 GetNumPhysicalAttributes() const { | 2207 | u32 GetNumPhysicalAttributes() const { |
| @@ -1774,7 +2214,7 @@ private: | |||
| 1774 | 2214 | ||
| 1775 | const Device& device; | 2215 | const Device& device; |
| 1776 | const ShaderIR& ir; | 2216 | const ShaderIR& ir; |
| 1777 | const ShaderStage stage; | 2217 | const ProgramType stage; |
| 1778 | const std::string suffix; | 2218 | const std::string suffix; |
| 1779 | const Header header; | 2219 | const Header header; |
| 1780 | 2220 | ||
| @@ -1785,27 +2225,19 @@ private: | |||
| 1785 | 2225 | ||
| 1786 | std::string GetCommonDeclarations() { | 2226 | std::string GetCommonDeclarations() { |
| 1787 | return fmt::format( | 2227 | return fmt::format( |
| 1788 | "#define MAX_CONSTBUFFER_ELEMENTS {}\n" | ||
| 1789 | "#define ftoi floatBitsToInt\n" | 2228 | "#define ftoi floatBitsToInt\n" |
| 1790 | "#define ftou floatBitsToUint\n" | 2229 | "#define ftou floatBitsToUint\n" |
| 1791 | "#define itof intBitsToFloat\n" | 2230 | "#define itof intBitsToFloat\n" |
| 1792 | "#define utof uintBitsToFloat\n\n" | 2231 | "#define utof uintBitsToFloat\n\n" |
| 1793 | "float fromHalf2(vec2 pair) {{\n" | 2232 | "bvec2 HalfFloatNanComparison(bvec2 comparison, vec2 pair1, vec2 pair2) {{\n" |
| 1794 | " return utof(packHalf2x16(pair));\n" | ||
| 1795 | "}}\n\n" | ||
| 1796 | "vec2 toHalf2(float value) {{\n" | ||
| 1797 | " return unpackHalf2x16(ftou(value));\n" | ||
| 1798 | "}}\n\n" | ||
| 1799 | "bvec2 halfFloatNanComparison(bvec2 comparison, vec2 pair1, vec2 pair2) {{\n" | ||
| 1800 | " bvec2 is_nan1 = isnan(pair1);\n" | 2233 | " bvec2 is_nan1 = isnan(pair1);\n" |
| 1801 | " bvec2 is_nan2 = isnan(pair2);\n" | 2234 | " bvec2 is_nan2 = isnan(pair2);\n" |
| 1802 | " return bvec2(comparison.x || is_nan1.x || is_nan2.x, comparison.y || is_nan1.y || " | 2235 | " return bvec2(comparison.x || is_nan1.x || is_nan2.x, comparison.y || is_nan1.y || " |
| 1803 | "is_nan2.y);\n" | 2236 | "is_nan2.y);\n" |
| 1804 | "}}\n", | 2237 | "}}\n\n"); |
| 1805 | MAX_CONSTBUFFER_ELEMENTS); | ||
| 1806 | } | 2238 | } |
| 1807 | 2239 | ||
| 1808 | ProgramResult Decompile(const Device& device, const ShaderIR& ir, Maxwell::ShaderStage stage, | 2240 | ProgramResult Decompile(const Device& device, const ShaderIR& ir, ProgramType stage, |
| 1809 | const std::string& suffix) { | 2241 | const std::string& suffix) { |
| 1810 | GLSLDecompiler decompiler(device, ir, stage, suffix); | 2242 | GLSLDecompiler decompiler(device, ir, stage, suffix); |
| 1811 | decompiler.Decompile(); | 2243 | decompiler.Decompile(); |
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h index c1569e737..2ea02f5bf 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.h +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h | |||
| @@ -12,14 +12,26 @@ | |||
| 12 | #include "video_core/engines/maxwell_3d.h" | 12 | #include "video_core/engines/maxwell_3d.h" |
| 13 | #include "video_core/shader/shader_ir.h" | 13 | #include "video_core/shader/shader_ir.h" |
| 14 | 14 | ||
| 15 | namespace OpenGL { | ||
| 16 | class Device; | ||
| 17 | } | ||
| 18 | |||
| 19 | namespace VideoCommon::Shader { | 15 | namespace VideoCommon::Shader { |
| 20 | class ShaderIR; | 16 | class ShaderIR; |
| 21 | } | 17 | } |
| 22 | 18 | ||
| 19 | namespace OpenGL { | ||
| 20 | |||
| 21 | class Device; | ||
| 22 | |||
| 23 | enum class ProgramType : u32 { | ||
| 24 | VertexA = 0, | ||
| 25 | VertexB = 1, | ||
| 26 | TessellationControl = 2, | ||
| 27 | TessellationEval = 3, | ||
| 28 | Geometry = 4, | ||
| 29 | Fragment = 5, | ||
| 30 | Compute = 6 | ||
| 31 | }; | ||
| 32 | |||
| 33 | } // namespace OpenGL | ||
| 34 | |||
| 23 | namespace OpenGL::GLShader { | 35 | namespace OpenGL::GLShader { |
| 24 | 36 | ||
| 25 | struct ShaderEntries; | 37 | struct ShaderEntries; |
| @@ -27,6 +39,7 @@ struct ShaderEntries; | |||
| 27 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | 39 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; |
| 28 | using ProgramResult = std::pair<std::string, ShaderEntries>; | 40 | using ProgramResult = std::pair<std::string, ShaderEntries>; |
| 29 | using SamplerEntry = VideoCommon::Shader::Sampler; | 41 | using SamplerEntry = VideoCommon::Shader::Sampler; |
| 42 | using ImageEntry = VideoCommon::Shader::Image; | ||
| 30 | 43 | ||
| 31 | class ConstBufferEntry : public VideoCommon::Shader::ConstBuffer { | 44 | class ConstBufferEntry : public VideoCommon::Shader::ConstBuffer { |
| 32 | public: | 45 | public: |
| @@ -74,14 +87,16 @@ struct ShaderEntries { | |||
| 74 | std::vector<ConstBufferEntry> const_buffers; | 87 | std::vector<ConstBufferEntry> const_buffers; |
| 75 | std::vector<SamplerEntry> samplers; | 88 | std::vector<SamplerEntry> samplers; |
| 76 | std::vector<SamplerEntry> bindless_samplers; | 89 | std::vector<SamplerEntry> bindless_samplers; |
| 90 | std::vector<ImageEntry> images; | ||
| 77 | std::vector<GlobalMemoryEntry> global_memory_entries; | 91 | std::vector<GlobalMemoryEntry> global_memory_entries; |
| 78 | std::array<bool, Maxwell::NumClipDistances> clip_distances{}; | 92 | std::array<bool, Maxwell::NumClipDistances> clip_distances{}; |
| 93 | bool shader_viewport_layer_array{}; | ||
| 79 | std::size_t shader_length{}; | 94 | std::size_t shader_length{}; |
| 80 | }; | 95 | }; |
| 81 | 96 | ||
| 82 | std::string GetCommonDeclarations(); | 97 | std::string GetCommonDeclarations(); |
| 83 | 98 | ||
| 84 | ProgramResult Decompile(const Device& device, const VideoCommon::Shader::ShaderIR& ir, | 99 | ProgramResult Decompile(const Device& device, const VideoCommon::Shader::ShaderIR& ir, |
| 85 | Maxwell::ShaderStage stage, const std::string& suffix); | 100 | ProgramType stage, const std::string& suffix); |
| 86 | 101 | ||
| 87 | } // namespace OpenGL::GLShader | 102 | } // namespace OpenGL::GLShader |
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp index ee4a45ca2..f141c4e3b 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp | |||
| @@ -34,11 +34,11 @@ enum class PrecompiledEntryKind : u32 { | |||
| 34 | Dump, | 34 | Dump, |
| 35 | }; | 35 | }; |
| 36 | 36 | ||
| 37 | constexpr u32 NativeVersion = 1; | 37 | constexpr u32 NativeVersion = 4; |
| 38 | 38 | ||
| 39 | // Making sure sizes doesn't change by accident | 39 | // Making sure sizes doesn't change by accident |
| 40 | static_assert(sizeof(BaseBindings) == 12); | 40 | static_assert(sizeof(BaseBindings) == 16); |
| 41 | static_assert(sizeof(ShaderDiskCacheUsage) == 24); | 41 | static_assert(sizeof(ShaderDiskCacheUsage) == 40); |
| 42 | 42 | ||
| 43 | namespace { | 43 | namespace { |
| 44 | 44 | ||
| @@ -51,7 +51,7 @@ ShaderCacheVersionHash GetShaderCacheVersionHash() { | |||
| 51 | 51 | ||
| 52 | } // namespace | 52 | } // namespace |
| 53 | 53 | ||
| 54 | ShaderDiskCacheRaw::ShaderDiskCacheRaw(u64 unique_identifier, Maxwell::ShaderProgram program_type, | 54 | ShaderDiskCacheRaw::ShaderDiskCacheRaw(u64 unique_identifier, ProgramType program_type, |
| 55 | u32 program_code_size, u32 program_code_size_b, | 55 | u32 program_code_size, u32 program_code_size_b, |
| 56 | ProgramCode program_code, ProgramCode program_code_b) | 56 | ProgramCode program_code, ProgramCode program_code_b) |
| 57 | : unique_identifier{unique_identifier}, program_type{program_type}, | 57 | : unique_identifier{unique_identifier}, program_type{program_type}, |
| @@ -332,11 +332,37 @@ std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEn | |||
| 332 | static_cast<Tegra::Shader::TextureType>(type), is_array, is_shadow, is_bindless); | 332 | static_cast<Tegra::Shader::TextureType>(type), is_array, is_shadow, is_bindless); |
| 333 | } | 333 | } |
| 334 | 334 | ||
| 335 | u32 images_count{}; | ||
| 336 | if (!LoadObjectFromPrecompiled(images_count)) { | ||
| 337 | return {}; | ||
| 338 | } | ||
| 339 | for (u32 i = 0; i < images_count; ++i) { | ||
| 340 | u64 offset{}; | ||
| 341 | u64 index{}; | ||
| 342 | u32 type{}; | ||
| 343 | u8 is_bindless{}; | ||
| 344 | u8 is_written{}; | ||
| 345 | u8 is_read{}; | ||
| 346 | u8 is_size_known{}; | ||
| 347 | u32 size{}; | ||
| 348 | if (!LoadObjectFromPrecompiled(offset) || !LoadObjectFromPrecompiled(index) || | ||
| 349 | !LoadObjectFromPrecompiled(type) || !LoadObjectFromPrecompiled(is_bindless) || | ||
| 350 | !LoadObjectFromPrecompiled(is_written) || !LoadObjectFromPrecompiled(is_read) || | ||
| 351 | !LoadObjectFromPrecompiled(is_size_known) || !LoadObjectFromPrecompiled(size)) { | ||
| 352 | return {}; | ||
| 353 | } | ||
| 354 | entry.entries.images.emplace_back( | ||
| 355 | static_cast<std::size_t>(offset), static_cast<std::size_t>(index), | ||
| 356 | static_cast<Tegra::Shader::ImageType>(type), is_bindless != 0, is_written != 0, | ||
| 357 | is_read != 0, | ||
| 358 | is_size_known ? std::make_optional(static_cast<Tegra::Shader::ImageAtomicSize>(size)) | ||
| 359 | : std::nullopt); | ||
| 360 | } | ||
| 361 | |||
| 335 | u32 global_memory_count{}; | 362 | u32 global_memory_count{}; |
| 336 | if (!LoadObjectFromPrecompiled(global_memory_count)) { | 363 | if (!LoadObjectFromPrecompiled(global_memory_count)) { |
| 337 | return {}; | 364 | return {}; |
| 338 | } | 365 | } |
| 339 | |||
| 340 | for (u32 i = 0; i < global_memory_count; ++i) { | 366 | for (u32 i = 0; i < global_memory_count; ++i) { |
| 341 | u32 cbuf_index{}; | 367 | u32 cbuf_index{}; |
| 342 | u32 cbuf_offset{}; | 368 | u32 cbuf_offset{}; |
| @@ -356,11 +382,16 @@ std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEn | |||
| 356 | } | 382 | } |
| 357 | } | 383 | } |
| 358 | 384 | ||
| 385 | bool shader_viewport_layer_array{}; | ||
| 386 | if (!LoadObjectFromPrecompiled(shader_viewport_layer_array)) { | ||
| 387 | return {}; | ||
| 388 | } | ||
| 389 | entry.entries.shader_viewport_layer_array = shader_viewport_layer_array; | ||
| 390 | |||
| 359 | u64 shader_length{}; | 391 | u64 shader_length{}; |
| 360 | if (!LoadObjectFromPrecompiled(shader_length)) { | 392 | if (!LoadObjectFromPrecompiled(shader_length)) { |
| 361 | return {}; | 393 | return {}; |
| 362 | } | 394 | } |
| 363 | |||
| 364 | entry.entries.shader_length = static_cast<std::size_t>(shader_length); | 395 | entry.entries.shader_length = static_cast<std::size_t>(shader_length); |
| 365 | 396 | ||
| 366 | return entry; | 397 | return entry; |
| @@ -400,6 +431,22 @@ bool ShaderDiskCacheOpenGL::SaveDecompiledFile(u64 unique_identifier, const std: | |||
| 400 | } | 431 | } |
| 401 | } | 432 | } |
| 402 | 433 | ||
| 434 | if (!SaveObjectToPrecompiled(static_cast<u32>(entries.images.size()))) { | ||
| 435 | return false; | ||
| 436 | } | ||
| 437 | for (const auto& image : entries.images) { | ||
| 438 | const u32 size = image.IsSizeKnown() ? static_cast<u32>(image.GetSize()) : 0U; | ||
| 439 | if (!SaveObjectToPrecompiled(static_cast<u64>(image.GetOffset())) || | ||
| 440 | !SaveObjectToPrecompiled(static_cast<u64>(image.GetIndex())) || | ||
| 441 | !SaveObjectToPrecompiled(static_cast<u32>(image.GetType())) || | ||
| 442 | !SaveObjectToPrecompiled(static_cast<u8>(image.IsBindless() ? 1 : 0)) || | ||
| 443 | !SaveObjectToPrecompiled(static_cast<u8>(image.IsWritten() ? 1 : 0)) || | ||
| 444 | !SaveObjectToPrecompiled(static_cast<u8>(image.IsRead() ? 1 : 0)) || | ||
| 445 | !SaveObjectToPrecompiled(image.IsSizeKnown()) || !SaveObjectToPrecompiled(size)) { | ||
| 446 | return false; | ||
| 447 | } | ||
| 448 | } | ||
| 449 | |||
| 403 | if (!SaveObjectToPrecompiled(static_cast<u32>(entries.global_memory_entries.size()))) { | 450 | if (!SaveObjectToPrecompiled(static_cast<u32>(entries.global_memory_entries.size()))) { |
| 404 | return false; | 451 | return false; |
| 405 | } | 452 | } |
| @@ -417,6 +464,10 @@ bool ShaderDiskCacheOpenGL::SaveDecompiledFile(u64 unique_identifier, const std: | |||
| 417 | } | 464 | } |
| 418 | } | 465 | } |
| 419 | 466 | ||
| 467 | if (!SaveObjectToPrecompiled(entries.shader_viewport_layer_array)) { | ||
| 468 | return false; | ||
| 469 | } | ||
| 470 | |||
| 420 | if (!SaveObjectToPrecompiled(static_cast<u64>(entries.shader_length))) { | 471 | if (!SaveObjectToPrecompiled(static_cast<u64>(entries.shader_length))) { |
| 421 | return false; | 472 | return false; |
| 422 | } | 473 | } |
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h index ecd72ba58..cc8bbd61e 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.h | |||
| @@ -4,6 +4,7 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <bitset> | ||
| 7 | #include <optional> | 8 | #include <optional> |
| 8 | #include <string> | 9 | #include <string> |
| 9 | #include <tuple> | 10 | #include <tuple> |
| @@ -17,7 +18,6 @@ | |||
| 17 | #include "common/assert.h" | 18 | #include "common/assert.h" |
| 18 | #include "common/common_types.h" | 19 | #include "common/common_types.h" |
| 19 | #include "core/file_sys/vfs_vector.h" | 20 | #include "core/file_sys/vfs_vector.h" |
| 20 | #include "video_core/engines/maxwell_3d.h" | ||
| 21 | #include "video_core/renderer_opengl/gl_shader_gen.h" | 21 | #include "video_core/renderer_opengl/gl_shader_gen.h" |
| 22 | 22 | ||
| 23 | namespace Core { | 23 | namespace Core { |
| @@ -30,22 +30,23 @@ class IOFile; | |||
| 30 | 30 | ||
| 31 | namespace OpenGL { | 31 | namespace OpenGL { |
| 32 | 32 | ||
| 33 | using ProgramCode = std::vector<u64>; | ||
| 34 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | ||
| 35 | |||
| 36 | struct ShaderDiskCacheUsage; | 33 | struct ShaderDiskCacheUsage; |
| 37 | struct ShaderDiskCacheDump; | 34 | struct ShaderDiskCacheDump; |
| 38 | 35 | ||
| 36 | using ProgramCode = std::vector<u64>; | ||
| 39 | using ShaderDumpsMap = std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>; | 37 | using ShaderDumpsMap = std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>; |
| 38 | using TextureBufferUsage = std::bitset<64>; | ||
| 40 | 39 | ||
| 41 | /// Allocated bindings used by an OpenGL shader program | 40 | /// Allocated bindings used by an OpenGL shader program |
| 42 | struct BaseBindings { | 41 | struct BaseBindings { |
| 43 | u32 cbuf{}; | 42 | u32 cbuf{}; |
| 44 | u32 gmem{}; | 43 | u32 gmem{}; |
| 45 | u32 sampler{}; | 44 | u32 sampler{}; |
| 45 | u32 image{}; | ||
| 46 | 46 | ||
| 47 | bool operator==(const BaseBindings& rhs) const { | 47 | bool operator==(const BaseBindings& rhs) const { |
| 48 | return std::tie(cbuf, gmem, sampler) == std::tie(rhs.cbuf, rhs.gmem, rhs.sampler); | 48 | return std::tie(cbuf, gmem, sampler, image) == |
| 49 | std::tie(rhs.cbuf, rhs.gmem, rhs.sampler, rhs.image); | ||
| 49 | } | 50 | } |
| 50 | 51 | ||
| 51 | bool operator!=(const BaseBindings& rhs) const { | 52 | bool operator!=(const BaseBindings& rhs) const { |
| @@ -53,15 +54,29 @@ struct BaseBindings { | |||
| 53 | } | 54 | } |
| 54 | }; | 55 | }; |
| 55 | 56 | ||
| 56 | /// Describes how a shader is used | 57 | /// Describes the different variants a single program can be compiled. |
| 58 | struct ProgramVariant { | ||
| 59 | BaseBindings base_bindings; | ||
| 60 | GLenum primitive_mode{}; | ||
| 61 | TextureBufferUsage texture_buffer_usage{}; | ||
| 62 | |||
| 63 | bool operator==(const ProgramVariant& rhs) const { | ||
| 64 | return std::tie(base_bindings, primitive_mode, texture_buffer_usage) == | ||
| 65 | std::tie(rhs.base_bindings, rhs.primitive_mode, rhs.texture_buffer_usage); | ||
| 66 | } | ||
| 67 | |||
| 68 | bool operator!=(const ProgramVariant& rhs) const { | ||
| 69 | return !operator==(rhs); | ||
| 70 | } | ||
| 71 | }; | ||
| 72 | |||
| 73 | /// Describes how a shader is used. | ||
| 57 | struct ShaderDiskCacheUsage { | 74 | struct ShaderDiskCacheUsage { |
| 58 | u64 unique_identifier{}; | 75 | u64 unique_identifier{}; |
| 59 | BaseBindings bindings; | 76 | ProgramVariant variant; |
| 60 | GLenum primitive{}; | ||
| 61 | 77 | ||
| 62 | bool operator==(const ShaderDiskCacheUsage& rhs) const { | 78 | bool operator==(const ShaderDiskCacheUsage& rhs) const { |
| 63 | return std::tie(unique_identifier, bindings, primitive) == | 79 | return std::tie(unique_identifier, variant) == std::tie(rhs.unique_identifier, rhs.variant); |
| 64 | std::tie(rhs.unique_identifier, rhs.bindings, rhs.primitive); | ||
| 65 | } | 80 | } |
| 66 | 81 | ||
| 67 | bool operator!=(const ShaderDiskCacheUsage& rhs) const { | 82 | bool operator!=(const ShaderDiskCacheUsage& rhs) const { |
| @@ -76,7 +91,19 @@ namespace std { | |||
| 76 | template <> | 91 | template <> |
| 77 | struct hash<OpenGL::BaseBindings> { | 92 | struct hash<OpenGL::BaseBindings> { |
| 78 | std::size_t operator()(const OpenGL::BaseBindings& bindings) const noexcept { | 93 | std::size_t operator()(const OpenGL::BaseBindings& bindings) const noexcept { |
| 79 | return bindings.cbuf | bindings.gmem << 8 | bindings.sampler << 16; | 94 | return static_cast<std::size_t>(bindings.cbuf) ^ |
| 95 | (static_cast<std::size_t>(bindings.gmem) << 8) ^ | ||
| 96 | (static_cast<std::size_t>(bindings.sampler) << 16) ^ | ||
| 97 | (static_cast<std::size_t>(bindings.image) << 24); | ||
| 98 | } | ||
| 99 | }; | ||
| 100 | |||
| 101 | template <> | ||
| 102 | struct hash<OpenGL::ProgramVariant> { | ||
| 103 | std::size_t operator()(const OpenGL::ProgramVariant& variant) const noexcept { | ||
| 104 | return std::hash<OpenGL::BaseBindings>()(variant.base_bindings) ^ | ||
| 105 | std::hash<OpenGL::TextureBufferUsage>()(variant.texture_buffer_usage) ^ | ||
| 106 | (static_cast<std::size_t>(variant.primitive_mode) << 6); | ||
| 80 | } | 107 | } |
| 81 | }; | 108 | }; |
| 82 | 109 | ||
| @@ -84,7 +111,7 @@ template <> | |||
| 84 | struct hash<OpenGL::ShaderDiskCacheUsage> { | 111 | struct hash<OpenGL::ShaderDiskCacheUsage> { |
| 85 | std::size_t operator()(const OpenGL::ShaderDiskCacheUsage& usage) const noexcept { | 112 | std::size_t operator()(const OpenGL::ShaderDiskCacheUsage& usage) const noexcept { |
| 86 | return static_cast<std::size_t>(usage.unique_identifier) ^ | 113 | return static_cast<std::size_t>(usage.unique_identifier) ^ |
| 87 | std::hash<OpenGL::BaseBindings>()(usage.bindings) ^ usage.primitive << 16; | 114 | std::hash<OpenGL::ProgramVariant>()(usage.variant); |
| 88 | } | 115 | } |
| 89 | }; | 116 | }; |
| 90 | 117 | ||
| @@ -95,7 +122,7 @@ namespace OpenGL { | |||
| 95 | /// Describes a shader how it's used by the guest GPU | 122 | /// Describes a shader how it's used by the guest GPU |
| 96 | class ShaderDiskCacheRaw { | 123 | class ShaderDiskCacheRaw { |
| 97 | public: | 124 | public: |
| 98 | explicit ShaderDiskCacheRaw(u64 unique_identifier, Maxwell::ShaderProgram program_type, | 125 | explicit ShaderDiskCacheRaw(u64 unique_identifier, ProgramType program_type, |
| 99 | u32 program_code_size, u32 program_code_size_b, | 126 | u32 program_code_size, u32 program_code_size_b, |
| 100 | ProgramCode program_code, ProgramCode program_code_b); | 127 | ProgramCode program_code, ProgramCode program_code_b); |
| 101 | ShaderDiskCacheRaw(); | 128 | ShaderDiskCacheRaw(); |
| @@ -110,30 +137,13 @@ public: | |||
| 110 | } | 137 | } |
| 111 | 138 | ||
| 112 | bool HasProgramA() const { | 139 | bool HasProgramA() const { |
| 113 | return program_type == Maxwell::ShaderProgram::VertexA; | 140 | return program_type == ProgramType::VertexA; |
| 114 | } | 141 | } |
| 115 | 142 | ||
| 116 | Maxwell::ShaderProgram GetProgramType() const { | 143 | ProgramType GetProgramType() const { |
| 117 | return program_type; | 144 | return program_type; |
| 118 | } | 145 | } |
| 119 | 146 | ||
| 120 | Maxwell::ShaderStage GetProgramStage() const { | ||
| 121 | switch (program_type) { | ||
| 122 | case Maxwell::ShaderProgram::VertexA: | ||
| 123 | case Maxwell::ShaderProgram::VertexB: | ||
| 124 | return Maxwell::ShaderStage::Vertex; | ||
| 125 | case Maxwell::ShaderProgram::TesselationControl: | ||
| 126 | return Maxwell::ShaderStage::TesselationControl; | ||
| 127 | case Maxwell::ShaderProgram::TesselationEval: | ||
| 128 | return Maxwell::ShaderStage::TesselationEval; | ||
| 129 | case Maxwell::ShaderProgram::Geometry: | ||
| 130 | return Maxwell::ShaderStage::Geometry; | ||
| 131 | case Maxwell::ShaderProgram::Fragment: | ||
| 132 | return Maxwell::ShaderStage::Fragment; | ||
| 133 | } | ||
| 134 | UNREACHABLE(); | ||
| 135 | } | ||
| 136 | |||
| 137 | const ProgramCode& GetProgramCode() const { | 147 | const ProgramCode& GetProgramCode() const { |
| 138 | return program_code; | 148 | return program_code; |
| 139 | } | 149 | } |
| @@ -144,7 +154,7 @@ public: | |||
| 144 | 154 | ||
| 145 | private: | 155 | private: |
| 146 | u64 unique_identifier{}; | 156 | u64 unique_identifier{}; |
| 147 | Maxwell::ShaderProgram program_type{}; | 157 | ProgramType program_type{}; |
| 148 | u32 program_code_size{}; | 158 | u32 program_code_size{}; |
| 149 | u32 program_code_size_b{}; | 159 | u32 program_code_size_b{}; |
| 150 | 160 | ||
| @@ -275,26 +285,17 @@ private: | |||
| 275 | return LoadArrayFromPrecompiled(&object, 1); | 285 | return LoadArrayFromPrecompiled(&object, 1); |
| 276 | } | 286 | } |
| 277 | 287 | ||
| 278 | bool LoadObjectFromPrecompiled(bool& object) { | ||
| 279 | u8 value; | ||
| 280 | const bool read_ok = LoadArrayFromPrecompiled(&value, 1); | ||
| 281 | if (!read_ok) { | ||
| 282 | return false; | ||
| 283 | } | ||
| 284 | |||
| 285 | object = value != 0; | ||
| 286 | return true; | ||
| 287 | } | ||
| 288 | |||
| 289 | // Core system | ||
| 290 | Core::System& system; | 288 | Core::System& system; |
| 291 | // Stored transferable shaders | 289 | |
| 292 | std::map<u64, std::unordered_set<ShaderDiskCacheUsage>> transferable; | 290 | // Stores whole precompiled cache which will be read from or saved to the precompiled chache |
| 293 | // Stores whole precompiled cache which will be read from/saved to the precompiled cache file | 291 | // file |
| 294 | FileSys::VectorVfsFile precompiled_cache_virtual_file; | 292 | FileSys::VectorVfsFile precompiled_cache_virtual_file; |
| 295 | // Stores the current offset of the precompiled cache file for IO purposes | 293 | // Stores the current offset of the precompiled cache file for IO purposes |
| 296 | std::size_t precompiled_cache_virtual_file_offset = 0; | 294 | std::size_t precompiled_cache_virtual_file_offset = 0; |
| 297 | 295 | ||
| 296 | // Stored transferable shaders | ||
| 297 | std::unordered_map<u64, std::unordered_set<ShaderDiskCacheUsage>> transferable; | ||
| 298 | |||
| 298 | // The cache has been loaded at boot | 299 | // The cache has been loaded at boot |
| 299 | bool tried_to_load{}; | 300 | bool tried_to_load{}; |
| 300 | }; | 301 | }; |
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index 9148629ec..3a8d9e1da 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp | |||
| @@ -14,7 +14,8 @@ using Tegra::Engines::Maxwell3D; | |||
| 14 | using VideoCommon::Shader::ProgramCode; | 14 | using VideoCommon::Shader::ProgramCode; |
| 15 | using VideoCommon::Shader::ShaderIR; | 15 | using VideoCommon::Shader::ShaderIR; |
| 16 | 16 | ||
| 17 | static constexpr u32 PROGRAM_OFFSET{10}; | 17 | static constexpr u32 PROGRAM_OFFSET = 10; |
| 18 | static constexpr u32 COMPUTE_OFFSET = 0; | ||
| 18 | 19 | ||
| 19 | ProgramResult GenerateVertexShader(const Device& device, const ShaderSetup& setup) { | 20 | ProgramResult GenerateVertexShader(const Device& device, const ShaderSetup& setup) { |
| 20 | const std::string id = fmt::format("{:016x}", setup.program.unique_identifier); | 21 | const std::string id = fmt::format("{:016x}", setup.program.unique_identifier); |
| @@ -29,17 +30,15 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform vs_config { | |||
| 29 | }; | 30 | }; |
| 30 | 31 | ||
| 31 | )"; | 32 | )"; |
| 32 | const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET); | ||
| 33 | ProgramResult program = | ||
| 34 | Decompile(device, program_ir, Maxwell3D::Regs::ShaderStage::Vertex, "vertex"); | ||
| 35 | 33 | ||
| 34 | const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a); | ||
| 35 | const auto stage = setup.IsDualProgram() ? ProgramType::VertexA : ProgramType::VertexB; | ||
| 36 | ProgramResult program = Decompile(device, program_ir, stage, "vertex"); | ||
| 36 | out += program.first; | 37 | out += program.first; |
| 37 | 38 | ||
| 38 | if (setup.IsDualProgram()) { | 39 | if (setup.IsDualProgram()) { |
| 39 | const ShaderIR program_ir_b(setup.program.code_b, PROGRAM_OFFSET); | 40 | const ShaderIR program_ir_b(setup.program.code_b, PROGRAM_OFFSET, setup.program.size_b); |
| 40 | ProgramResult program_b = | 41 | ProgramResult program_b = Decompile(device, program_ir_b, ProgramType::VertexB, "vertex_b"); |
| 41 | Decompile(device, program_ir_b, Maxwell3D::Regs::ShaderStage::Vertex, "vertex_b"); | ||
| 42 | |||
| 43 | out += program_b.first; | 42 | out += program_b.first; |
| 44 | } | 43 | } |
| 45 | 44 | ||
| @@ -80,9 +79,9 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform gs_config { | |||
| 80 | }; | 79 | }; |
| 81 | 80 | ||
| 82 | )"; | 81 | )"; |
| 83 | const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET); | 82 | |
| 84 | ProgramResult program = | 83 | const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a); |
| 85 | Decompile(device, program_ir, Maxwell3D::Regs::ShaderStage::Geometry, "geometry"); | 84 | ProgramResult program = Decompile(device, program_ir, ProgramType::Geometry, "geometry"); |
| 86 | out += program.first; | 85 | out += program.first; |
| 87 | 86 | ||
| 88 | out += R"( | 87 | out += R"( |
| @@ -115,10 +114,8 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform fs_config { | |||
| 115 | }; | 114 | }; |
| 116 | 115 | ||
| 117 | )"; | 116 | )"; |
| 118 | const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET); | 117 | const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a); |
| 119 | ProgramResult program = | 118 | ProgramResult program = Decompile(device, program_ir, ProgramType::Fragment, "fragment"); |
| 120 | Decompile(device, program_ir, Maxwell3D::Regs::ShaderStage::Fragment, "fragment"); | ||
| 121 | |||
| 122 | out += program.first; | 119 | out += program.first; |
| 123 | 120 | ||
| 124 | out += R"( | 121 | out += R"( |
| @@ -130,4 +127,22 @@ void main() { | |||
| 130 | return {std::move(out), std::move(program.second)}; | 127 | return {std::move(out), std::move(program.second)}; |
| 131 | } | 128 | } |
| 132 | 129 | ||
| 130 | ProgramResult GenerateComputeShader(const Device& device, const ShaderSetup& setup) { | ||
| 131 | const std::string id = fmt::format("{:016x}", setup.program.unique_identifier); | ||
| 132 | |||
| 133 | std::string out = "// Shader Unique Id: CS" + id + "\n\n"; | ||
| 134 | out += GetCommonDeclarations(); | ||
| 135 | |||
| 136 | const ShaderIR program_ir(setup.program.code, COMPUTE_OFFSET, setup.program.size_a); | ||
| 137 | ProgramResult program = Decompile(device, program_ir, ProgramType::Compute, "compute"); | ||
| 138 | out += program.first; | ||
| 139 | |||
| 140 | out += R"( | ||
| 141 | void main() { | ||
| 142 | execute_compute(); | ||
| 143 | } | ||
| 144 | )"; | ||
| 145 | return {std::move(out), std::move(program.second)}; | ||
| 146 | } | ||
| 147 | |||
| 133 | } // namespace OpenGL::GLShader | 148 | } // namespace OpenGL::GLShader |
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h index 0536c8a03..3833e88ab 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.h +++ b/src/video_core/renderer_opengl/gl_shader_gen.h | |||
| @@ -27,6 +27,8 @@ struct ShaderSetup { | |||
| 27 | ProgramCode code; | 27 | ProgramCode code; |
| 28 | ProgramCode code_b; // Used for dual vertex shaders | 28 | ProgramCode code_b; // Used for dual vertex shaders |
| 29 | u64 unique_identifier; | 29 | u64 unique_identifier; |
| 30 | std::size_t size_a; | ||
| 31 | std::size_t size_b; | ||
| 30 | } program; | 32 | } program; |
| 31 | 33 | ||
| 32 | /// Used in scenarios where we have a dual vertex shaders | 34 | /// Used in scenarios where we have a dual vertex shaders |
| @@ -52,4 +54,7 @@ ProgramResult GenerateGeometryShader(const Device& device, const ShaderSetup& se | |||
| 52 | /// Generates the GLSL fragment shader program source code for the given FS program | 54 | /// Generates the GLSL fragment shader program source code for the given FS program |
| 53 | ProgramResult GenerateFragmentShader(const Device& device, const ShaderSetup& setup); | 55 | ProgramResult GenerateFragmentShader(const Device& device, const ShaderSetup& setup); |
| 54 | 56 | ||
| 57 | /// Generates the GLSL compute shader program source code for the given CS program | ||
| 58 | ProgramResult GenerateComputeShader(const Device& device, const ShaderSetup& setup); | ||
| 59 | |||
| 55 | } // namespace OpenGL::GLShader | 60 | } // namespace OpenGL::GLShader |
diff --git a/src/video_core/renderer_opengl/gl_shader_util.cpp b/src/video_core/renderer_opengl/gl_shader_util.cpp index 5f3fe067e..9e74eda0d 100644 --- a/src/video_core/renderer_opengl/gl_shader_util.cpp +++ b/src/video_core/renderer_opengl/gl_shader_util.cpp | |||
| @@ -10,21 +10,25 @@ | |||
| 10 | 10 | ||
| 11 | namespace OpenGL::GLShader { | 11 | namespace OpenGL::GLShader { |
| 12 | 12 | ||
| 13 | GLuint LoadShader(const char* source, GLenum type) { | 13 | namespace { |
| 14 | const char* debug_type; | 14 | const char* GetStageDebugName(GLenum type) { |
| 15 | switch (type) { | 15 | switch (type) { |
| 16 | case GL_VERTEX_SHADER: | 16 | case GL_VERTEX_SHADER: |
| 17 | debug_type = "vertex"; | 17 | return "vertex"; |
| 18 | break; | ||
| 19 | case GL_GEOMETRY_SHADER: | 18 | case GL_GEOMETRY_SHADER: |
| 20 | debug_type = "geometry"; | 19 | return "geometry"; |
| 21 | break; | ||
| 22 | case GL_FRAGMENT_SHADER: | 20 | case GL_FRAGMENT_SHADER: |
| 23 | debug_type = "fragment"; | 21 | return "fragment"; |
| 24 | break; | 22 | case GL_COMPUTE_SHADER: |
| 25 | default: | 23 | return "compute"; |
| 26 | UNREACHABLE(); | ||
| 27 | } | 24 | } |
| 25 | UNIMPLEMENTED(); | ||
| 26 | return "unknown"; | ||
| 27 | } | ||
| 28 | } // Anonymous namespace | ||
| 29 | |||
| 30 | GLuint LoadShader(const char* source, GLenum type) { | ||
| 31 | const char* debug_type = GetStageDebugName(type); | ||
| 28 | const GLuint shader_id = glCreateShader(type); | 32 | const GLuint shader_id = glCreateShader(type); |
| 29 | glShaderSource(shader_id, 1, &source, nullptr); | 33 | glShaderSource(shader_id, 1, &source, nullptr); |
| 30 | LOG_DEBUG(Render_OpenGL, "Compiling {} shader...", debug_type); | 34 | LOG_DEBUG(Render_OpenGL, "Compiling {} shader...", debug_type); |
diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp index d86e137ac..6eabf4fac 100644 --- a/src/video_core/renderer_opengl/gl_state.cpp +++ b/src/video_core/renderer_opengl/gl_state.cpp | |||
| @@ -6,8 +6,11 @@ | |||
| 6 | #include <glad/glad.h> | 6 | #include <glad/glad.h> |
| 7 | #include "common/assert.h" | 7 | #include "common/assert.h" |
| 8 | #include "common/logging/log.h" | 8 | #include "common/logging/log.h" |
| 9 | #include "common/microprofile.h" | ||
| 9 | #include "video_core/renderer_opengl/gl_state.h" | 10 | #include "video_core/renderer_opengl/gl_state.h" |
| 10 | 11 | ||
| 12 | MICROPROFILE_DEFINE(OpenGL_State, "OpenGL", "State Change", MP_RGB(192, 128, 128)); | ||
| 13 | |||
| 11 | namespace OpenGL { | 14 | namespace OpenGL { |
| 12 | 15 | ||
| 13 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | 16 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; |
| @@ -31,6 +34,25 @@ bool UpdateTie(T1 current_value, const T2 new_value) { | |||
| 31 | return changed; | 34 | return changed; |
| 32 | } | 35 | } |
| 33 | 36 | ||
| 37 | template <typename T> | ||
| 38 | std::optional<std::pair<GLuint, GLsizei>> UpdateArray(T& current_values, const T& new_values) { | ||
| 39 | std::optional<std::size_t> first; | ||
| 40 | std::size_t last; | ||
| 41 | for (std::size_t i = 0; i < std::size(current_values); ++i) { | ||
| 42 | if (!UpdateValue(current_values[i], new_values[i])) { | ||
| 43 | continue; | ||
| 44 | } | ||
| 45 | if (!first) { | ||
| 46 | first = i; | ||
| 47 | } | ||
| 48 | last = i; | ||
| 49 | } | ||
| 50 | if (!first) { | ||
| 51 | return std::nullopt; | ||
| 52 | } | ||
| 53 | return std::make_pair(static_cast<GLuint>(*first), static_cast<GLsizei>(last - *first + 1)); | ||
| 54 | } | ||
| 55 | |||
| 34 | void Enable(GLenum cap, bool enable) { | 56 | void Enable(GLenum cap, bool enable) { |
| 35 | if (enable) { | 57 | if (enable) { |
| 36 | glEnable(cap); | 58 | glEnable(cap); |
| @@ -131,10 +153,6 @@ OpenGLState::OpenGLState() { | |||
| 131 | logic_op.enabled = false; | 153 | logic_op.enabled = false; |
| 132 | logic_op.operation = GL_COPY; | 154 | logic_op.operation = GL_COPY; |
| 133 | 155 | ||
| 134 | for (auto& texture_unit : texture_units) { | ||
| 135 | texture_unit.Reset(); | ||
| 136 | } | ||
| 137 | |||
| 138 | draw.read_framebuffer = 0; | 156 | draw.read_framebuffer = 0; |
| 139 | draw.draw_framebuffer = 0; | 157 | draw.draw_framebuffer = 0; |
| 140 | draw.vertex_array = 0; | 158 | draw.vertex_array = 0; |
| @@ -162,6 +180,25 @@ OpenGLState::OpenGLState() { | |||
| 162 | alpha_test.ref = 0.0f; | 180 | alpha_test.ref = 0.0f; |
| 163 | } | 181 | } |
| 164 | 182 | ||
| 183 | void OpenGLState::SetDefaultViewports() { | ||
| 184 | for (auto& item : viewports) { | ||
| 185 | item.x = 0; | ||
| 186 | item.y = 0; | ||
| 187 | item.width = 0; | ||
| 188 | item.height = 0; | ||
| 189 | item.depth_range_near = 0.0f; | ||
| 190 | item.depth_range_far = 1.0f; | ||
| 191 | item.scissor.enabled = false; | ||
| 192 | item.scissor.x = 0; | ||
| 193 | item.scissor.y = 0; | ||
| 194 | item.scissor.width = 0; | ||
| 195 | item.scissor.height = 0; | ||
| 196 | } | ||
| 197 | |||
| 198 | depth_clamp.far_plane = false; | ||
| 199 | depth_clamp.near_plane = false; | ||
| 200 | } | ||
| 201 | |||
| 165 | void OpenGLState::ApplyDefaultState() { | 202 | void OpenGLState::ApplyDefaultState() { |
| 166 | glEnable(GL_BLEND); | 203 | glEnable(GL_BLEND); |
| 167 | glDisable(GL_FRAMEBUFFER_SRGB); | 204 | glDisable(GL_FRAMEBUFFER_SRGB); |
| @@ -474,56 +511,25 @@ void OpenGLState::ApplyAlphaTest() const { | |||
| 474 | } | 511 | } |
| 475 | 512 | ||
| 476 | void OpenGLState::ApplyTextures() const { | 513 | void OpenGLState::ApplyTextures() const { |
| 477 | bool has_delta{}; | 514 | if (const auto update = UpdateArray(cur_state.textures, textures)) { |
| 478 | std::size_t first{}; | 515 | glBindTextures(update->first, update->second, textures.data() + update->first); |
| 479 | std::size_t last{}; | ||
| 480 | std::array<GLuint, Maxwell::NumTextureSamplers> textures; | ||
| 481 | |||
| 482 | for (std::size_t i = 0; i < std::size(texture_units); ++i) { | ||
| 483 | const auto& texture_unit = texture_units[i]; | ||
| 484 | auto& cur_state_texture_unit = cur_state.texture_units[i]; | ||
| 485 | textures[i] = texture_unit.texture; | ||
| 486 | if (cur_state_texture_unit.texture == textures[i]) { | ||
| 487 | continue; | ||
| 488 | } | ||
| 489 | cur_state_texture_unit.texture = textures[i]; | ||
| 490 | if (!has_delta) { | ||
| 491 | first = i; | ||
| 492 | has_delta = true; | ||
| 493 | } | ||
| 494 | last = i; | ||
| 495 | } | ||
| 496 | if (has_delta) { | ||
| 497 | glBindTextures(static_cast<GLuint>(first), static_cast<GLsizei>(last - first + 1), | ||
| 498 | textures.data() + first); | ||
| 499 | } | 516 | } |
| 500 | } | 517 | } |
| 501 | 518 | ||
| 502 | void OpenGLState::ApplySamplers() const { | 519 | void OpenGLState::ApplySamplers() const { |
| 503 | bool has_delta{}; | 520 | if (const auto update = UpdateArray(cur_state.samplers, samplers)) { |
| 504 | std::size_t first{}; | 521 | glBindSamplers(update->first, update->second, samplers.data() + update->first); |
| 505 | std::size_t last{}; | ||
| 506 | std::array<GLuint, Maxwell::NumTextureSamplers> samplers; | ||
| 507 | |||
| 508 | for (std::size_t i = 0; i < std::size(samplers); ++i) { | ||
| 509 | samplers[i] = texture_units[i].sampler; | ||
| 510 | if (cur_state.texture_units[i].sampler == texture_units[i].sampler) { | ||
| 511 | continue; | ||
| 512 | } | ||
| 513 | cur_state.texture_units[i].sampler = texture_units[i].sampler; | ||
| 514 | if (!has_delta) { | ||
| 515 | first = i; | ||
| 516 | has_delta = true; | ||
| 517 | } | ||
| 518 | last = i; | ||
| 519 | } | 522 | } |
| 520 | if (has_delta) { | 523 | } |
| 521 | glBindSamplers(static_cast<GLuint>(first), static_cast<GLsizei>(last - first + 1), | 524 | |
| 522 | samplers.data() + first); | 525 | void OpenGLState::ApplyImages() const { |
| 526 | if (const auto update = UpdateArray(cur_state.images, images)) { | ||
| 527 | glBindImageTextures(update->first, update->second, images.data() + update->first); | ||
| 523 | } | 528 | } |
| 524 | } | 529 | } |
| 525 | 530 | ||
| 526 | void OpenGLState::Apply() const { | 531 | void OpenGLState::Apply() { |
| 532 | MICROPROFILE_SCOPE(OpenGL_State); | ||
| 527 | ApplyFramebufferState(); | 533 | ApplyFramebufferState(); |
| 528 | ApplyVertexArrayState(); | 534 | ApplyVertexArrayState(); |
| 529 | ApplyShaderProgram(); | 535 | ApplyShaderProgram(); |
| @@ -532,19 +538,32 @@ void OpenGLState::Apply() const { | |||
| 532 | ApplyPointSize(); | 538 | ApplyPointSize(); |
| 533 | ApplyFragmentColorClamp(); | 539 | ApplyFragmentColorClamp(); |
| 534 | ApplyMultisample(); | 540 | ApplyMultisample(); |
| 541 | if (dirty.color_mask) { | ||
| 542 | ApplyColorMask(); | ||
| 543 | dirty.color_mask = false; | ||
| 544 | } | ||
| 535 | ApplyDepthClamp(); | 545 | ApplyDepthClamp(); |
| 536 | ApplyColorMask(); | ||
| 537 | ApplyViewport(); | 546 | ApplyViewport(); |
| 538 | ApplyStencilTest(); | 547 | if (dirty.stencil_state) { |
| 548 | ApplyStencilTest(); | ||
| 549 | dirty.stencil_state = false; | ||
| 550 | } | ||
| 539 | ApplySRgb(); | 551 | ApplySRgb(); |
| 540 | ApplyCulling(); | 552 | ApplyCulling(); |
| 541 | ApplyDepth(); | 553 | ApplyDepth(); |
| 542 | ApplyPrimitiveRestart(); | 554 | ApplyPrimitiveRestart(); |
| 543 | ApplyBlending(); | 555 | if (dirty.blend_state) { |
| 556 | ApplyBlending(); | ||
| 557 | dirty.blend_state = false; | ||
| 558 | } | ||
| 544 | ApplyLogicOp(); | 559 | ApplyLogicOp(); |
| 545 | ApplyTextures(); | 560 | ApplyTextures(); |
| 546 | ApplySamplers(); | 561 | ApplySamplers(); |
| 547 | ApplyPolygonOffset(); | 562 | ApplyImages(); |
| 563 | if (dirty.polygon_offset) { | ||
| 564 | ApplyPolygonOffset(); | ||
| 565 | dirty.polygon_offset = false; | ||
| 566 | } | ||
| 548 | ApplyAlphaTest(); | 567 | ApplyAlphaTest(); |
| 549 | } | 568 | } |
| 550 | 569 | ||
| @@ -571,18 +590,18 @@ void OpenGLState::EmulateViewportWithScissor() { | |||
| 571 | } | 590 | } |
| 572 | 591 | ||
| 573 | OpenGLState& OpenGLState::UnbindTexture(GLuint handle) { | 592 | OpenGLState& OpenGLState::UnbindTexture(GLuint handle) { |
| 574 | for (auto& unit : texture_units) { | 593 | for (auto& texture : textures) { |
| 575 | if (unit.texture == handle) { | 594 | if (texture == handle) { |
| 576 | unit.Unbind(); | 595 | texture = 0; |
| 577 | } | 596 | } |
| 578 | } | 597 | } |
| 579 | return *this; | 598 | return *this; |
| 580 | } | 599 | } |
| 581 | 600 | ||
| 582 | OpenGLState& OpenGLState::ResetSampler(GLuint handle) { | 601 | OpenGLState& OpenGLState::ResetSampler(GLuint handle) { |
| 583 | for (auto& unit : texture_units) { | 602 | for (auto& sampler : samplers) { |
| 584 | if (unit.sampler == handle) { | 603 | if (sampler == handle) { |
| 585 | unit.sampler = 0; | 604 | sampler = 0; |
| 586 | } | 605 | } |
| 587 | } | 606 | } |
| 588 | return *this; | 607 | return *this; |
diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h index b0140495d..949b13051 100644 --- a/src/video_core/renderer_opengl/gl_state.h +++ b/src/video_core/renderer_opengl/gl_state.h | |||
| @@ -118,21 +118,9 @@ public: | |||
| 118 | GLenum operation; | 118 | GLenum operation; |
| 119 | } logic_op; | 119 | } logic_op; |
| 120 | 120 | ||
| 121 | // 3 texture units - one for each that is used in PICA fragment shader emulation | 121 | std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> textures{}; |
| 122 | struct TextureUnit { | 122 | std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> samplers{}; |
| 123 | GLuint texture; // GL_TEXTURE_BINDING_2D | 123 | std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::NumImages> images{}; |
| 124 | GLuint sampler; // GL_SAMPLER_BINDING | ||
| 125 | |||
| 126 | void Unbind() { | ||
| 127 | texture = 0; | ||
| 128 | } | ||
| 129 | |||
| 130 | void Reset() { | ||
| 131 | Unbind(); | ||
| 132 | sampler = 0; | ||
| 133 | } | ||
| 134 | }; | ||
| 135 | std::array<TextureUnit, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> texture_units; | ||
| 136 | 124 | ||
| 137 | struct { | 125 | struct { |
| 138 | GLuint read_framebuffer; // GL_READ_FRAMEBUFFER_BINDING | 126 | GLuint read_framebuffer; // GL_READ_FRAMEBUFFER_BINDING |
| @@ -195,8 +183,9 @@ public: | |||
| 195 | s_rgb_used = false; | 183 | s_rgb_used = false; |
| 196 | } | 184 | } |
| 197 | 185 | ||
| 186 | void SetDefaultViewports(); | ||
| 198 | /// Apply this state as the current OpenGL state | 187 | /// Apply this state as the current OpenGL state |
| 199 | void Apply() const; | 188 | void Apply(); |
| 200 | 189 | ||
| 201 | void ApplyFramebufferState() const; | 190 | void ApplyFramebufferState() const; |
| 202 | void ApplyVertexArrayState() const; | 191 | void ApplyVertexArrayState() const; |
| @@ -219,6 +208,7 @@ public: | |||
| 219 | void ApplyLogicOp() const; | 208 | void ApplyLogicOp() const; |
| 220 | void ApplyTextures() const; | 209 | void ApplyTextures() const; |
| 221 | void ApplySamplers() const; | 210 | void ApplySamplers() const; |
| 211 | void ApplyImages() const; | ||
| 222 | void ApplyDepthClamp() const; | 212 | void ApplyDepthClamp() const; |
| 223 | void ApplyPolygonOffset() const; | 213 | void ApplyPolygonOffset() const; |
| 224 | void ApplyAlphaTest() const; | 214 | void ApplyAlphaTest() const; |
| @@ -237,11 +227,41 @@ public: | |||
| 237 | /// Viewport does not affects glClearBuffer so emulate viewport using scissor test | 227 | /// Viewport does not affects glClearBuffer so emulate viewport using scissor test |
| 238 | void EmulateViewportWithScissor(); | 228 | void EmulateViewportWithScissor(); |
| 239 | 229 | ||
| 230 | void MarkDirtyBlendState() { | ||
| 231 | dirty.blend_state = true; | ||
| 232 | } | ||
| 233 | |||
| 234 | void MarkDirtyStencilState() { | ||
| 235 | dirty.stencil_state = true; | ||
| 236 | } | ||
| 237 | |||
| 238 | void MarkDirtyPolygonOffset() { | ||
| 239 | dirty.polygon_offset = true; | ||
| 240 | } | ||
| 241 | |||
| 242 | void MarkDirtyColorMask() { | ||
| 243 | dirty.color_mask = true; | ||
| 244 | } | ||
| 245 | |||
| 246 | void AllDirty() { | ||
| 247 | dirty.blend_state = true; | ||
| 248 | dirty.stencil_state = true; | ||
| 249 | dirty.polygon_offset = true; | ||
| 250 | dirty.color_mask = true; | ||
| 251 | } | ||
| 252 | |||
| 240 | private: | 253 | private: |
| 241 | static OpenGLState cur_state; | 254 | static OpenGLState cur_state; |
| 242 | 255 | ||
| 243 | // Workaround for sRGB problems caused by QT not supporting srgb output | 256 | // Workaround for sRGB problems caused by QT not supporting srgb output |
| 244 | static bool s_rgb_used; | 257 | static bool s_rgb_used; |
| 258 | struct { | ||
| 259 | bool blend_state; | ||
| 260 | bool stencil_state; | ||
| 261 | bool viewport_state; | ||
| 262 | bool polygon_offset; | ||
| 263 | bool color_mask; | ||
| 264 | } dirty{}; | ||
| 245 | }; | 265 | }; |
| 246 | 266 | ||
| 247 | } // namespace OpenGL | 267 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.cpp b/src/video_core/renderer_opengl/gl_stream_buffer.cpp index d0b14b3f6..35ba334e4 100644 --- a/src/video_core/renderer_opengl/gl_stream_buffer.cpp +++ b/src/video_core/renderer_opengl/gl_stream_buffer.cpp | |||
| @@ -15,7 +15,8 @@ MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning", | |||
| 15 | 15 | ||
| 16 | namespace OpenGL { | 16 | namespace OpenGL { |
| 17 | 17 | ||
| 18 | OGLStreamBuffer::OGLStreamBuffer(GLsizeiptr size, bool vertex_data_usage, bool prefer_coherent) | 18 | OGLStreamBuffer::OGLStreamBuffer(GLsizeiptr size, bool vertex_data_usage, bool prefer_coherent, |
| 19 | bool use_persistent) | ||
| 19 | : buffer_size(size) { | 20 | : buffer_size(size) { |
| 20 | gl_buffer.Create(); | 21 | gl_buffer.Create(); |
| 21 | 22 | ||
| @@ -29,7 +30,7 @@ OGLStreamBuffer::OGLStreamBuffer(GLsizeiptr size, bool vertex_data_usage, bool p | |||
| 29 | allocate_size *= 2; | 30 | allocate_size *= 2; |
| 30 | } | 31 | } |
| 31 | 32 | ||
| 32 | if (GLAD_GL_ARB_buffer_storage) { | 33 | if (use_persistent) { |
| 33 | persistent = true; | 34 | persistent = true; |
| 34 | coherent = prefer_coherent; | 35 | coherent = prefer_coherent; |
| 35 | const GLbitfield flags = | 36 | const GLbitfield flags = |
diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.h b/src/video_core/renderer_opengl/gl_stream_buffer.h index 3d18ecb4d..f8383cbd4 100644 --- a/src/video_core/renderer_opengl/gl_stream_buffer.h +++ b/src/video_core/renderer_opengl/gl_stream_buffer.h | |||
| @@ -13,7 +13,8 @@ namespace OpenGL { | |||
| 13 | 13 | ||
| 14 | class OGLStreamBuffer : private NonCopyable { | 14 | class OGLStreamBuffer : private NonCopyable { |
| 15 | public: | 15 | public: |
| 16 | explicit OGLStreamBuffer(GLsizeiptr size, bool vertex_data_usage, bool prefer_coherent = false); | 16 | explicit OGLStreamBuffer(GLsizeiptr size, bool vertex_data_usage, bool prefer_coherent = false, |
| 17 | bool use_persistent = true); | ||
| 17 | ~OGLStreamBuffer(); | 18 | ~OGLStreamBuffer(); |
| 18 | 19 | ||
| 19 | GLuint GetHandle() const; | 20 | GLuint GetHandle() const; |
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp new file mode 100644 index 000000000..4f135fe03 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp | |||
| @@ -0,0 +1,624 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/bit_util.h" | ||
| 7 | #include "common/common_types.h" | ||
| 8 | #include "common/microprofile.h" | ||
| 9 | #include "common/scope_exit.h" | ||
| 10 | #include "core/core.h" | ||
| 11 | #include "video_core/morton.h" | ||
| 12 | #include "video_core/renderer_opengl/gl_resource_manager.h" | ||
| 13 | #include "video_core/renderer_opengl/gl_state.h" | ||
| 14 | #include "video_core/renderer_opengl/gl_texture_cache.h" | ||
| 15 | #include "video_core/renderer_opengl/utils.h" | ||
| 16 | #include "video_core/texture_cache/surface_base.h" | ||
| 17 | #include "video_core/texture_cache/texture_cache.h" | ||
| 18 | #include "video_core/textures/convert.h" | ||
| 19 | #include "video_core/textures/texture.h" | ||
| 20 | |||
| 21 | namespace OpenGL { | ||
| 22 | |||
| 23 | using Tegra::Texture::SwizzleSource; | ||
| 24 | using VideoCore::MortonSwizzleMode; | ||
| 25 | |||
| 26 | using VideoCore::Surface::ComponentType; | ||
| 27 | using VideoCore::Surface::PixelFormat; | ||
| 28 | using VideoCore::Surface::SurfaceCompression; | ||
| 29 | using VideoCore::Surface::SurfaceTarget; | ||
| 30 | using VideoCore::Surface::SurfaceType; | ||
| 31 | |||
| 32 | MICROPROFILE_DEFINE(OpenGL_Texture_Upload, "OpenGL", "Texture Upload", MP_RGB(128, 192, 128)); | ||
| 33 | MICROPROFILE_DEFINE(OpenGL_Texture_Download, "OpenGL", "Texture Download", MP_RGB(128, 192, 128)); | ||
| 34 | MICROPROFILE_DEFINE(OpenGL_Texture_Buffer_Copy, "OpenGL", "Texture Buffer Copy", | ||
| 35 | MP_RGB(128, 192, 128)); | ||
| 36 | |||
| 37 | namespace { | ||
| 38 | |||
| 39 | struct FormatTuple { | ||
| 40 | GLint internal_format; | ||
| 41 | GLenum format; | ||
| 42 | GLenum type; | ||
| 43 | ComponentType component_type; | ||
| 44 | bool compressed; | ||
| 45 | }; | ||
| 46 | |||
| 47 | constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format_tuples = {{ | ||
| 48 | {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, ComponentType::UNorm, false}, // ABGR8U | ||
| 49 | {GL_RGBA8, GL_RGBA, GL_BYTE, ComponentType::SNorm, false}, // ABGR8S | ||
| 50 | {GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE, ComponentType::UInt, false}, // ABGR8UI | ||
| 51 | {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV, ComponentType::UNorm, false}, // B5G6R5U | ||
| 52 | {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV, ComponentType::UNorm, | ||
| 53 | false}, // A2B10G10R10U | ||
| 54 | {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV, ComponentType::UNorm, false}, // A1B5G5R5U | ||
| 55 | {GL_R8, GL_RED, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // R8U | ||
| 56 | {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE, ComponentType::UInt, false}, // R8UI | ||
| 57 | {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT, ComponentType::Float, false}, // RGBA16F | ||
| 58 | {GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT, ComponentType::UNorm, false}, // RGBA16U | ||
| 59 | {GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT, ComponentType::UInt, false}, // RGBA16UI | ||
| 60 | {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV, ComponentType::Float, | ||
| 61 | false}, // R11FG11FB10F | ||
| 62 | {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false}, // RGBA32UI | ||
| 63 | {GL_COMPRESSED_RGBA_S3TC_DXT1_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, | ||
| 64 | true}, // DXT1 | ||
| 65 | {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, | ||
| 66 | true}, // DXT23 | ||
| 67 | {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, | ||
| 68 | true}, // DXT45 | ||
| 69 | {GL_COMPRESSED_RED_RGTC1, GL_RED, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, true}, // DXN1 | ||
| 70 | {GL_COMPRESSED_RG_RGTC2, GL_RG, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, | ||
| 71 | true}, // DXN2UNORM | ||
| 72 | {GL_COMPRESSED_SIGNED_RG_RGTC2, GL_RG, GL_INT, ComponentType::SNorm, true}, // DXN2SNORM | ||
| 73 | {GL_COMPRESSED_RGBA_BPTC_UNORM, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, | ||
| 74 | true}, // BC7U | ||
| 75 | {GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, ComponentType::Float, | ||
| 76 | true}, // BC6H_UF16 | ||
| 77 | {GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, ComponentType::Float, | ||
| 78 | true}, // BC6H_SF16 | ||
| 79 | {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_4X4 | ||
| 80 | {GL_RGBA8, GL_BGRA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // BGRA8 | ||
| 81 | {GL_RGBA32F, GL_RGBA, GL_FLOAT, ComponentType::Float, false}, // RGBA32F | ||
| 82 | {GL_RG32F, GL_RG, GL_FLOAT, ComponentType::Float, false}, // RG32F | ||
| 83 | {GL_R32F, GL_RED, GL_FLOAT, ComponentType::Float, false}, // R32F | ||
| 84 | {GL_R16F, GL_RED, GL_HALF_FLOAT, ComponentType::Float, false}, // R16F | ||
| 85 | {GL_R16, GL_RED, GL_UNSIGNED_SHORT, ComponentType::UNorm, false}, // R16U | ||
| 86 | {GL_R16_SNORM, GL_RED, GL_SHORT, ComponentType::SNorm, false}, // R16S | ||
| 87 | {GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT, ComponentType::UInt, false}, // R16UI | ||
| 88 | {GL_R16I, GL_RED_INTEGER, GL_SHORT, ComponentType::SInt, false}, // R16I | ||
| 89 | {GL_RG16, GL_RG, GL_UNSIGNED_SHORT, ComponentType::UNorm, false}, // RG16 | ||
| 90 | {GL_RG16F, GL_RG, GL_HALF_FLOAT, ComponentType::Float, false}, // RG16F | ||
| 91 | {GL_RG16UI, GL_RG_INTEGER, GL_UNSIGNED_SHORT, ComponentType::UInt, false}, // RG16UI | ||
| 92 | {GL_RG16I, GL_RG_INTEGER, GL_SHORT, ComponentType::SInt, false}, // RG16I | ||
| 93 | {GL_RG16_SNORM, GL_RG, GL_SHORT, ComponentType::SNorm, false}, // RG16S | ||
| 94 | {GL_RGB32F, GL_RGB, GL_FLOAT, ComponentType::Float, false}, // RGB32F | ||
| 95 | {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, ComponentType::UNorm, | ||
| 96 | false}, // RGBA8_SRGB | ||
| 97 | {GL_RG8, GL_RG, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // RG8U | ||
| 98 | {GL_RG8, GL_RG, GL_BYTE, ComponentType::SNorm, false}, // RG8S | ||
| 99 | {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false}, // RG32UI | ||
| 100 | {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false}, // R32UI | ||
| 101 | {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X8 | ||
| 102 | {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X5 | ||
| 103 | {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_5X4 | ||
| 104 | {GL_SRGB8_ALPHA8, GL_BGRA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // BGRA8 | ||
| 105 | // Compressed sRGB formats | ||
| 106 | {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, | ||
| 107 | true}, // DXT1_SRGB | ||
| 108 | {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, | ||
| 109 | true}, // DXT23_SRGB | ||
| 110 | {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, | ||
| 111 | true}, // DXT45_SRGB | ||
| 112 | {GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, | ||
| 113 | true}, // BC7U_SRGB | ||
| 114 | {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_4X4_SRGB | ||
| 115 | {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X8_SRGB | ||
| 116 | {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X5_SRGB | ||
| 117 | {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_5X4_SRGB | ||
| 118 | {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_5X5 | ||
| 119 | {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_5X5_SRGB | ||
| 120 | {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_10X8 | ||
| 121 | {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_10X8_SRGB | ||
| 122 | |||
| 123 | // Depth formats | ||
| 124 | {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT, ComponentType::Float, false}, // Z32F | ||
| 125 | {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT, ComponentType::UNorm, | ||
| 126 | false}, // Z16 | ||
| 127 | |||
| 128 | // DepthStencil formats | ||
| 129 | {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, ComponentType::UNorm, | ||
| 130 | false}, // Z24S8 | ||
| 131 | {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, ComponentType::UNorm, | ||
| 132 | false}, // S8Z24 | ||
| 133 | {GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL, GL_FLOAT_32_UNSIGNED_INT_24_8_REV, | ||
| 134 | ComponentType::Float, false}, // Z32FS8 | ||
| 135 | }}; | ||
| 136 | |||
| 137 | const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType component_type) { | ||
| 138 | ASSERT(static_cast<std::size_t>(pixel_format) < tex_format_tuples.size()); | ||
| 139 | const auto& format{tex_format_tuples[static_cast<std::size_t>(pixel_format)]}; | ||
| 140 | return format; | ||
| 141 | } | ||
| 142 | |||
| 143 | GLenum GetTextureTarget(const SurfaceTarget& target) { | ||
| 144 | switch (target) { | ||
| 145 | case SurfaceTarget::TextureBuffer: | ||
| 146 | return GL_TEXTURE_BUFFER; | ||
| 147 | case SurfaceTarget::Texture1D: | ||
| 148 | return GL_TEXTURE_1D; | ||
| 149 | case SurfaceTarget::Texture2D: | ||
| 150 | return GL_TEXTURE_2D; | ||
| 151 | case SurfaceTarget::Texture3D: | ||
| 152 | return GL_TEXTURE_3D; | ||
| 153 | case SurfaceTarget::Texture1DArray: | ||
| 154 | return GL_TEXTURE_1D_ARRAY; | ||
| 155 | case SurfaceTarget::Texture2DArray: | ||
| 156 | return GL_TEXTURE_2D_ARRAY; | ||
| 157 | case SurfaceTarget::TextureCubemap: | ||
| 158 | return GL_TEXTURE_CUBE_MAP; | ||
| 159 | case SurfaceTarget::TextureCubeArray: | ||
| 160 | return GL_TEXTURE_CUBE_MAP_ARRAY; | ||
| 161 | } | ||
| 162 | UNREACHABLE(); | ||
| 163 | return {}; | ||
| 164 | } | ||
| 165 | |||
| 166 | GLint GetSwizzleSource(SwizzleSource source) { | ||
| 167 | switch (source) { | ||
| 168 | case SwizzleSource::Zero: | ||
| 169 | return GL_ZERO; | ||
| 170 | case SwizzleSource::R: | ||
| 171 | return GL_RED; | ||
| 172 | case SwizzleSource::G: | ||
| 173 | return GL_GREEN; | ||
| 174 | case SwizzleSource::B: | ||
| 175 | return GL_BLUE; | ||
| 176 | case SwizzleSource::A: | ||
| 177 | return GL_ALPHA; | ||
| 178 | case SwizzleSource::OneInt: | ||
| 179 | case SwizzleSource::OneFloat: | ||
| 180 | return GL_ONE; | ||
| 181 | } | ||
| 182 | UNREACHABLE(); | ||
| 183 | return GL_NONE; | ||
| 184 | } | ||
| 185 | |||
| 186 | void ApplyTextureDefaults(const SurfaceParams& params, GLuint texture) { | ||
| 187 | if (params.IsBuffer()) { | ||
| 188 | return; | ||
| 189 | } | ||
| 190 | glTextureParameteri(texture, GL_TEXTURE_MIN_FILTER, GL_LINEAR); | ||
| 191 | glTextureParameteri(texture, GL_TEXTURE_MAG_FILTER, GL_LINEAR); | ||
| 192 | glTextureParameteri(texture, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); | ||
| 193 | glTextureParameteri(texture, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); | ||
| 194 | glTextureParameteri(texture, GL_TEXTURE_MAX_LEVEL, params.num_levels - 1); | ||
| 195 | if (params.num_levels == 1) { | ||
| 196 | glTextureParameterf(texture, GL_TEXTURE_LOD_BIAS, 1000.0f); | ||
| 197 | } | ||
| 198 | } | ||
| 199 | |||
| 200 | OGLTexture CreateTexture(const SurfaceParams& params, GLenum target, GLenum internal_format, | ||
| 201 | OGLBuffer& texture_buffer) { | ||
| 202 | OGLTexture texture; | ||
| 203 | texture.Create(target); | ||
| 204 | |||
| 205 | switch (params.target) { | ||
| 206 | case SurfaceTarget::Texture1D: | ||
| 207 | glTextureStorage1D(texture.handle, params.emulated_levels, internal_format, params.width); | ||
| 208 | break; | ||
| 209 | case SurfaceTarget::TextureBuffer: | ||
| 210 | texture_buffer.Create(); | ||
| 211 | glNamedBufferStorage(texture_buffer.handle, params.width * params.GetBytesPerPixel(), | ||
| 212 | nullptr, GL_DYNAMIC_STORAGE_BIT); | ||
| 213 | glTextureBuffer(texture.handle, internal_format, texture_buffer.handle); | ||
| 214 | break; | ||
| 215 | case SurfaceTarget::Texture2D: | ||
| 216 | case SurfaceTarget::TextureCubemap: | ||
| 217 | glTextureStorage2D(texture.handle, params.emulated_levels, internal_format, params.width, | ||
| 218 | params.height); | ||
| 219 | break; | ||
| 220 | case SurfaceTarget::Texture3D: | ||
| 221 | case SurfaceTarget::Texture2DArray: | ||
| 222 | case SurfaceTarget::TextureCubeArray: | ||
| 223 | glTextureStorage3D(texture.handle, params.emulated_levels, internal_format, params.width, | ||
| 224 | params.height, params.depth); | ||
| 225 | break; | ||
| 226 | default: | ||
| 227 | UNREACHABLE(); | ||
| 228 | } | ||
| 229 | |||
| 230 | ApplyTextureDefaults(params, texture.handle); | ||
| 231 | |||
| 232 | return texture; | ||
| 233 | } | ||
| 234 | |||
| 235 | } // Anonymous namespace | ||
| 236 | |||
| 237 | CachedSurface::CachedSurface(const GPUVAddr gpu_addr, const SurfaceParams& params) | ||
| 238 | : VideoCommon::SurfaceBase<View>(gpu_addr, params) { | ||
| 239 | const auto& tuple{GetFormatTuple(params.pixel_format, params.component_type)}; | ||
| 240 | internal_format = tuple.internal_format; | ||
| 241 | format = tuple.format; | ||
| 242 | type = tuple.type; | ||
| 243 | is_compressed = tuple.compressed; | ||
| 244 | target = GetTextureTarget(params.target); | ||
| 245 | texture = CreateTexture(params, target, internal_format, texture_buffer); | ||
| 246 | DecorateSurfaceName(); | ||
| 247 | main_view = CreateViewInner( | ||
| 248 | ViewParams(params.target, 0, params.is_layered ? params.depth : 1, 0, params.num_levels), | ||
| 249 | true); | ||
| 250 | } | ||
| 251 | |||
| 252 | CachedSurface::~CachedSurface() = default; | ||
| 253 | |||
| 254 | void CachedSurface::DownloadTexture(std::vector<u8>& staging_buffer) { | ||
| 255 | MICROPROFILE_SCOPE(OpenGL_Texture_Download); | ||
| 256 | |||
| 257 | SCOPE_EXIT({ glPixelStorei(GL_PACK_ROW_LENGTH, 0); }); | ||
| 258 | |||
| 259 | for (u32 level = 0; level < params.emulated_levels; ++level) { | ||
| 260 | glPixelStorei(GL_PACK_ALIGNMENT, std::min(8U, params.GetRowAlignment(level))); | ||
| 261 | glPixelStorei(GL_PACK_ROW_LENGTH, static_cast<GLint>(params.GetMipWidth(level))); | ||
| 262 | const std::size_t mip_offset = params.GetHostMipmapLevelOffset(level); | ||
| 263 | if (is_compressed) { | ||
| 264 | glGetCompressedTextureImage(texture.handle, level, | ||
| 265 | static_cast<GLsizei>(params.GetHostMipmapSize(level)), | ||
| 266 | staging_buffer.data() + mip_offset); | ||
| 267 | } else { | ||
| 268 | glGetTextureImage(texture.handle, level, format, type, | ||
| 269 | static_cast<GLsizei>(params.GetHostMipmapSize(level)), | ||
| 270 | staging_buffer.data() + mip_offset); | ||
| 271 | } | ||
| 272 | } | ||
| 273 | } | ||
| 274 | |||
| 275 | void CachedSurface::UploadTexture(const std::vector<u8>& staging_buffer) { | ||
| 276 | MICROPROFILE_SCOPE(OpenGL_Texture_Upload); | ||
| 277 | SCOPE_EXIT({ glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); }); | ||
| 278 | for (u32 level = 0; level < params.emulated_levels; ++level) { | ||
| 279 | UploadTextureMipmap(level, staging_buffer); | ||
| 280 | } | ||
| 281 | } | ||
| 282 | |||
| 283 | void CachedSurface::UploadTextureMipmap(u32 level, const std::vector<u8>& staging_buffer) { | ||
| 284 | glPixelStorei(GL_UNPACK_ALIGNMENT, std::min(8U, params.GetRowAlignment(level))); | ||
| 285 | glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(params.GetMipWidth(level))); | ||
| 286 | |||
| 287 | auto compression_type = params.GetCompressionType(); | ||
| 288 | |||
| 289 | const std::size_t mip_offset = compression_type == SurfaceCompression::Converted | ||
| 290 | ? params.GetConvertedMipmapOffset(level) | ||
| 291 | : params.GetHostMipmapLevelOffset(level); | ||
| 292 | const u8* buffer{staging_buffer.data() + mip_offset}; | ||
| 293 | if (is_compressed) { | ||
| 294 | const auto image_size{static_cast<GLsizei>(params.GetHostMipmapSize(level))}; | ||
| 295 | switch (params.target) { | ||
| 296 | case SurfaceTarget::Texture2D: | ||
| 297 | glCompressedTextureSubImage2D(texture.handle, level, 0, 0, | ||
| 298 | static_cast<GLsizei>(params.GetMipWidth(level)), | ||
| 299 | static_cast<GLsizei>(params.GetMipHeight(level)), | ||
| 300 | internal_format, image_size, buffer); | ||
| 301 | break; | ||
| 302 | case SurfaceTarget::Texture3D: | ||
| 303 | case SurfaceTarget::Texture2DArray: | ||
| 304 | case SurfaceTarget::TextureCubeArray: | ||
| 305 | glCompressedTextureSubImage3D(texture.handle, level, 0, 0, 0, | ||
| 306 | static_cast<GLsizei>(params.GetMipWidth(level)), | ||
| 307 | static_cast<GLsizei>(params.GetMipHeight(level)), | ||
| 308 | static_cast<GLsizei>(params.GetMipDepth(level)), | ||
| 309 | internal_format, image_size, buffer); | ||
| 310 | break; | ||
| 311 | case SurfaceTarget::TextureCubemap: { | ||
| 312 | const std::size_t layer_size{params.GetHostLayerSize(level)}; | ||
| 313 | for (std::size_t face = 0; face < params.depth; ++face) { | ||
| 314 | glCompressedTextureSubImage3D(texture.handle, level, 0, 0, static_cast<GLint>(face), | ||
| 315 | static_cast<GLsizei>(params.GetMipWidth(level)), | ||
| 316 | static_cast<GLsizei>(params.GetMipHeight(level)), 1, | ||
| 317 | internal_format, static_cast<GLsizei>(layer_size), | ||
| 318 | buffer); | ||
| 319 | buffer += layer_size; | ||
| 320 | } | ||
| 321 | break; | ||
| 322 | } | ||
| 323 | default: | ||
| 324 | UNREACHABLE(); | ||
| 325 | } | ||
| 326 | } else { | ||
| 327 | switch (params.target) { | ||
| 328 | case SurfaceTarget::Texture1D: | ||
| 329 | glTextureSubImage1D(texture.handle, level, 0, params.GetMipWidth(level), format, type, | ||
| 330 | buffer); | ||
| 331 | break; | ||
| 332 | case SurfaceTarget::TextureBuffer: | ||
| 333 | ASSERT(level == 0); | ||
| 334 | glNamedBufferSubData(texture_buffer.handle, 0, | ||
| 335 | params.GetMipWidth(level) * params.GetBytesPerPixel(), buffer); | ||
| 336 | break; | ||
| 337 | case SurfaceTarget::Texture1DArray: | ||
| 338 | case SurfaceTarget::Texture2D: | ||
| 339 | glTextureSubImage2D(texture.handle, level, 0, 0, params.GetMipWidth(level), | ||
| 340 | params.GetMipHeight(level), format, type, buffer); | ||
| 341 | break; | ||
| 342 | case SurfaceTarget::Texture3D: | ||
| 343 | case SurfaceTarget::Texture2DArray: | ||
| 344 | case SurfaceTarget::TextureCubeArray: | ||
| 345 | glTextureSubImage3D( | ||
| 346 | texture.handle, level, 0, 0, 0, static_cast<GLsizei>(params.GetMipWidth(level)), | ||
| 347 | static_cast<GLsizei>(params.GetMipHeight(level)), | ||
| 348 | static_cast<GLsizei>(params.GetMipDepth(level)), format, type, buffer); | ||
| 349 | break; | ||
| 350 | case SurfaceTarget::TextureCubemap: | ||
| 351 | for (std::size_t face = 0; face < params.depth; ++face) { | ||
| 352 | glTextureSubImage3D(texture.handle, level, 0, 0, static_cast<GLint>(face), | ||
| 353 | params.GetMipWidth(level), params.GetMipHeight(level), 1, | ||
| 354 | format, type, buffer); | ||
| 355 | buffer += params.GetHostLayerSize(level); | ||
| 356 | } | ||
| 357 | break; | ||
| 358 | default: | ||
| 359 | UNREACHABLE(); | ||
| 360 | } | ||
| 361 | } | ||
| 362 | } | ||
| 363 | |||
| 364 | void CachedSurface::DecorateSurfaceName() { | ||
| 365 | LabelGLObject(GL_TEXTURE, texture.handle, GetGpuAddr(), params.TargetName()); | ||
| 366 | } | ||
| 367 | |||
| 368 | void CachedSurfaceView::DecorateViewName(GPUVAddr gpu_addr, std::string prefix) { | ||
| 369 | LabelGLObject(GL_TEXTURE, texture_view.handle, gpu_addr, prefix); | ||
| 370 | } | ||
| 371 | |||
| 372 | View CachedSurface::CreateView(const ViewParams& view_key) { | ||
| 373 | return CreateViewInner(view_key, false); | ||
| 374 | } | ||
| 375 | |||
| 376 | View CachedSurface::CreateViewInner(const ViewParams& view_key, const bool is_proxy) { | ||
| 377 | auto view = std::make_shared<CachedSurfaceView>(*this, view_key, is_proxy); | ||
| 378 | views[view_key] = view; | ||
| 379 | if (!is_proxy) | ||
| 380 | view->DecorateViewName(gpu_addr, params.TargetName() + "V:" + std::to_string(view_count++)); | ||
| 381 | return view; | ||
| 382 | } | ||
| 383 | |||
| 384 | CachedSurfaceView::CachedSurfaceView(CachedSurface& surface, const ViewParams& params, | ||
| 385 | const bool is_proxy) | ||
| 386 | : VideoCommon::ViewBase(params), surface{surface}, is_proxy{is_proxy} { | ||
| 387 | target = GetTextureTarget(params.target); | ||
| 388 | if (!is_proxy) { | ||
| 389 | texture_view = CreateTextureView(); | ||
| 390 | } | ||
| 391 | swizzle = EncodeSwizzle(SwizzleSource::R, SwizzleSource::G, SwizzleSource::B, SwizzleSource::A); | ||
| 392 | } | ||
| 393 | |||
| 394 | CachedSurfaceView::~CachedSurfaceView() = default; | ||
| 395 | |||
| 396 | void CachedSurfaceView::Attach(GLenum attachment, GLenum target) const { | ||
| 397 | ASSERT(params.num_layers == 1 && params.num_levels == 1); | ||
| 398 | |||
| 399 | const auto& owner_params = surface.GetSurfaceParams(); | ||
| 400 | |||
| 401 | switch (owner_params.target) { | ||
| 402 | case SurfaceTarget::Texture1D: | ||
| 403 | glFramebufferTexture1D(target, attachment, surface.GetTarget(), surface.GetTexture(), | ||
| 404 | params.base_level); | ||
| 405 | break; | ||
| 406 | case SurfaceTarget::Texture2D: | ||
| 407 | glFramebufferTexture2D(target, attachment, surface.GetTarget(), surface.GetTexture(), | ||
| 408 | params.base_level); | ||
| 409 | break; | ||
| 410 | case SurfaceTarget::Texture1DArray: | ||
| 411 | case SurfaceTarget::Texture2DArray: | ||
| 412 | case SurfaceTarget::TextureCubemap: | ||
| 413 | case SurfaceTarget::TextureCubeArray: | ||
| 414 | glFramebufferTextureLayer(target, attachment, surface.GetTexture(), params.base_level, | ||
| 415 | params.base_layer); | ||
| 416 | break; | ||
| 417 | default: | ||
| 418 | UNIMPLEMENTED(); | ||
| 419 | } | ||
| 420 | } | ||
| 421 | |||
| 422 | void CachedSurfaceView::ApplySwizzle(SwizzleSource x_source, SwizzleSource y_source, | ||
| 423 | SwizzleSource z_source, SwizzleSource w_source) { | ||
| 424 | u32 new_swizzle = EncodeSwizzle(x_source, y_source, z_source, w_source); | ||
| 425 | if (new_swizzle == swizzle) | ||
| 426 | return; | ||
| 427 | swizzle = new_swizzle; | ||
| 428 | const std::array<GLint, 4> gl_swizzle = {GetSwizzleSource(x_source), GetSwizzleSource(y_source), | ||
| 429 | GetSwizzleSource(z_source), | ||
| 430 | GetSwizzleSource(w_source)}; | ||
| 431 | const GLuint handle = GetTexture(); | ||
| 432 | glTextureParameteriv(handle, GL_TEXTURE_SWIZZLE_RGBA, gl_swizzle.data()); | ||
| 433 | } | ||
| 434 | |||
| 435 | OGLTextureView CachedSurfaceView::CreateTextureView() const { | ||
| 436 | const auto& owner_params = surface.GetSurfaceParams(); | ||
| 437 | OGLTextureView texture_view; | ||
| 438 | texture_view.Create(); | ||
| 439 | |||
| 440 | const GLuint handle{texture_view.handle}; | ||
| 441 | const FormatTuple& tuple{ | ||
| 442 | GetFormatTuple(owner_params.pixel_format, owner_params.component_type)}; | ||
| 443 | |||
| 444 | glTextureView(handle, target, surface.texture.handle, tuple.internal_format, params.base_level, | ||
| 445 | params.num_levels, params.base_layer, params.num_layers); | ||
| 446 | |||
| 447 | ApplyTextureDefaults(owner_params, handle); | ||
| 448 | |||
| 449 | return texture_view; | ||
| 450 | } | ||
| 451 | |||
| 452 | TextureCacheOpenGL::TextureCacheOpenGL(Core::System& system, | ||
| 453 | VideoCore::RasterizerInterface& rasterizer, | ||
| 454 | const Device& device) | ||
| 455 | : TextureCacheBase{system, rasterizer} { | ||
| 456 | src_framebuffer.Create(); | ||
| 457 | dst_framebuffer.Create(); | ||
| 458 | } | ||
| 459 | |||
| 460 | TextureCacheOpenGL::~TextureCacheOpenGL() = default; | ||
| 461 | |||
| 462 | Surface TextureCacheOpenGL::CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) { | ||
| 463 | return std::make_shared<CachedSurface>(gpu_addr, params); | ||
| 464 | } | ||
| 465 | |||
| 466 | void TextureCacheOpenGL::ImageCopy(Surface& src_surface, Surface& dst_surface, | ||
| 467 | const VideoCommon::CopyParams& copy_params) { | ||
| 468 | const auto& src_params = src_surface->GetSurfaceParams(); | ||
| 469 | const auto& dst_params = dst_surface->GetSurfaceParams(); | ||
| 470 | if (src_params.type != dst_params.type) { | ||
| 471 | // A fallback is needed | ||
| 472 | return; | ||
| 473 | } | ||
| 474 | const auto src_handle = src_surface->GetTexture(); | ||
| 475 | const auto src_target = src_surface->GetTarget(); | ||
| 476 | const auto dst_handle = dst_surface->GetTexture(); | ||
| 477 | const auto dst_target = dst_surface->GetTarget(); | ||
| 478 | glCopyImageSubData(src_handle, src_target, copy_params.source_level, copy_params.source_x, | ||
| 479 | copy_params.source_y, copy_params.source_z, dst_handle, dst_target, | ||
| 480 | copy_params.dest_level, copy_params.dest_x, copy_params.dest_y, | ||
| 481 | copy_params.dest_z, copy_params.width, copy_params.height, | ||
| 482 | copy_params.depth); | ||
| 483 | } | ||
| 484 | |||
| 485 | void TextureCacheOpenGL::ImageBlit(View& src_view, View& dst_view, | ||
| 486 | const Tegra::Engines::Fermi2D::Config& copy_config) { | ||
| 487 | const auto& src_params{src_view->GetSurfaceParams()}; | ||
| 488 | const auto& dst_params{dst_view->GetSurfaceParams()}; | ||
| 489 | |||
| 490 | OpenGLState prev_state{OpenGLState::GetCurState()}; | ||
| 491 | SCOPE_EXIT({ | ||
| 492 | prev_state.AllDirty(); | ||
| 493 | prev_state.Apply(); | ||
| 494 | }); | ||
| 495 | |||
| 496 | OpenGLState state; | ||
| 497 | state.draw.read_framebuffer = src_framebuffer.handle; | ||
| 498 | state.draw.draw_framebuffer = dst_framebuffer.handle; | ||
| 499 | state.AllDirty(); | ||
| 500 | state.Apply(); | ||
| 501 | |||
| 502 | u32 buffers{}; | ||
| 503 | |||
| 504 | UNIMPLEMENTED_IF(src_params.target == SurfaceTarget::Texture3D); | ||
| 505 | UNIMPLEMENTED_IF(dst_params.target == SurfaceTarget::Texture3D); | ||
| 506 | |||
| 507 | if (src_params.type == SurfaceType::ColorTexture) { | ||
| 508 | src_view->Attach(GL_COLOR_ATTACHMENT0, GL_READ_FRAMEBUFFER); | ||
| 509 | glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, | ||
| 510 | 0); | ||
| 511 | |||
| 512 | dst_view->Attach(GL_COLOR_ATTACHMENT0, GL_DRAW_FRAMEBUFFER); | ||
| 513 | glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, | ||
| 514 | 0); | ||
| 515 | |||
| 516 | buffers = GL_COLOR_BUFFER_BIT; | ||
| 517 | } else if (src_params.type == SurfaceType::Depth) { | ||
| 518 | glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); | ||
| 519 | src_view->Attach(GL_DEPTH_ATTACHMENT, GL_READ_FRAMEBUFFER); | ||
| 520 | glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); | ||
| 521 | |||
| 522 | glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); | ||
| 523 | dst_view->Attach(GL_DEPTH_ATTACHMENT, GL_DRAW_FRAMEBUFFER); | ||
| 524 | glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); | ||
| 525 | |||
| 526 | buffers = GL_DEPTH_BUFFER_BIT; | ||
| 527 | } else if (src_params.type == SurfaceType::DepthStencil) { | ||
| 528 | glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); | ||
| 529 | src_view->Attach(GL_DEPTH_STENCIL_ATTACHMENT, GL_READ_FRAMEBUFFER); | ||
| 530 | |||
| 531 | glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); | ||
| 532 | dst_view->Attach(GL_DEPTH_STENCIL_ATTACHMENT, GL_DRAW_FRAMEBUFFER); | ||
| 533 | |||
| 534 | buffers = GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT; | ||
| 535 | } | ||
| 536 | |||
| 537 | const Common::Rectangle<u32>& src_rect = copy_config.src_rect; | ||
| 538 | const Common::Rectangle<u32>& dst_rect = copy_config.dst_rect; | ||
| 539 | const bool is_linear = copy_config.filter == Tegra::Engines::Fermi2D::Filter::Linear; | ||
| 540 | |||
| 541 | glBlitFramebuffer(src_rect.left, src_rect.top, src_rect.right, src_rect.bottom, dst_rect.left, | ||
| 542 | dst_rect.top, dst_rect.right, dst_rect.bottom, buffers, | ||
| 543 | is_linear && (buffers == GL_COLOR_BUFFER_BIT) ? GL_LINEAR : GL_NEAREST); | ||
| 544 | } | ||
| 545 | |||
| 546 | void TextureCacheOpenGL::BufferCopy(Surface& src_surface, Surface& dst_surface) { | ||
| 547 | MICROPROFILE_SCOPE(OpenGL_Texture_Buffer_Copy); | ||
| 548 | const auto& src_params = src_surface->GetSurfaceParams(); | ||
| 549 | const auto& dst_params = dst_surface->GetSurfaceParams(); | ||
| 550 | UNIMPLEMENTED_IF(src_params.num_levels > 1 || dst_params.num_levels > 1); | ||
| 551 | |||
| 552 | const auto source_format = GetFormatTuple(src_params.pixel_format, src_params.component_type); | ||
| 553 | const auto dest_format = GetFormatTuple(dst_params.pixel_format, dst_params.component_type); | ||
| 554 | |||
| 555 | const std::size_t source_size = src_surface->GetHostSizeInBytes(); | ||
| 556 | const std::size_t dest_size = dst_surface->GetHostSizeInBytes(); | ||
| 557 | |||
| 558 | const std::size_t buffer_size = std::max(source_size, dest_size); | ||
| 559 | |||
| 560 | GLuint copy_pbo_handle = FetchPBO(buffer_size); | ||
| 561 | |||
| 562 | glBindBuffer(GL_PIXEL_PACK_BUFFER, copy_pbo_handle); | ||
| 563 | |||
| 564 | if (source_format.compressed) { | ||
| 565 | glGetCompressedTextureImage(src_surface->GetTexture(), 0, static_cast<GLsizei>(source_size), | ||
| 566 | nullptr); | ||
| 567 | } else { | ||
| 568 | glGetTextureImage(src_surface->GetTexture(), 0, source_format.format, source_format.type, | ||
| 569 | static_cast<GLsizei>(source_size), nullptr); | ||
| 570 | } | ||
| 571 | glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); | ||
| 572 | |||
| 573 | glBindBuffer(GL_PIXEL_UNPACK_BUFFER, copy_pbo_handle); | ||
| 574 | |||
| 575 | const GLsizei width = static_cast<GLsizei>(dst_params.width); | ||
| 576 | const GLsizei height = static_cast<GLsizei>(dst_params.height); | ||
| 577 | const GLsizei depth = static_cast<GLsizei>(dst_params.depth); | ||
| 578 | if (dest_format.compressed) { | ||
| 579 | LOG_CRITICAL(HW_GPU, "Compressed buffer copy is unimplemented!"); | ||
| 580 | UNREACHABLE(); | ||
| 581 | } else { | ||
| 582 | switch (dst_params.target) { | ||
| 583 | case SurfaceTarget::Texture1D: | ||
| 584 | glTextureSubImage1D(dst_surface->GetTexture(), 0, 0, width, dest_format.format, | ||
| 585 | dest_format.type, nullptr); | ||
| 586 | break; | ||
| 587 | case SurfaceTarget::Texture2D: | ||
| 588 | glTextureSubImage2D(dst_surface->GetTexture(), 0, 0, 0, width, height, | ||
| 589 | dest_format.format, dest_format.type, nullptr); | ||
| 590 | break; | ||
| 591 | case SurfaceTarget::Texture3D: | ||
| 592 | case SurfaceTarget::Texture2DArray: | ||
| 593 | case SurfaceTarget::TextureCubeArray: | ||
| 594 | glTextureSubImage3D(dst_surface->GetTexture(), 0, 0, 0, 0, width, height, depth, | ||
| 595 | dest_format.format, dest_format.type, nullptr); | ||
| 596 | break; | ||
| 597 | case SurfaceTarget::TextureCubemap: | ||
| 598 | glTextureSubImage3D(dst_surface->GetTexture(), 0, 0, 0, 0, width, height, depth, | ||
| 599 | dest_format.format, dest_format.type, nullptr); | ||
| 600 | break; | ||
| 601 | default: | ||
| 602 | LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}", | ||
| 603 | static_cast<u32>(dst_params.target)); | ||
| 604 | UNREACHABLE(); | ||
| 605 | } | ||
| 606 | } | ||
| 607 | glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); | ||
| 608 | |||
| 609 | glTextureBarrier(); | ||
| 610 | } | ||
| 611 | |||
| 612 | GLuint TextureCacheOpenGL::FetchPBO(std::size_t buffer_size) { | ||
| 613 | ASSERT_OR_EXECUTE(buffer_size > 0, { return 0; }); | ||
| 614 | const u32 l2 = Common::Log2Ceil64(static_cast<u64>(buffer_size)); | ||
| 615 | OGLBuffer& cp = copy_pbo_cache[l2]; | ||
| 616 | if (cp.handle == 0) { | ||
| 617 | const std::size_t ceil_size = 1ULL << l2; | ||
| 618 | cp.Create(); | ||
| 619 | cp.MakeStreamCopy(ceil_size); | ||
| 620 | } | ||
| 621 | return cp.handle; | ||
| 622 | } | ||
| 623 | |||
| 624 | } // namespace OpenGL | ||
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h new file mode 100644 index 000000000..8e13ab38b --- /dev/null +++ b/src/video_core/renderer_opengl/gl_texture_cache.h | |||
| @@ -0,0 +1,147 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <array> | ||
| 8 | #include <functional> | ||
| 9 | #include <memory> | ||
| 10 | #include <unordered_map> | ||
| 11 | #include <utility> | ||
| 12 | #include <vector> | ||
| 13 | |||
| 14 | #include <glad/glad.h> | ||
| 15 | |||
| 16 | #include "common/common_types.h" | ||
| 17 | #include "video_core/engines/shader_bytecode.h" | ||
| 18 | #include "video_core/renderer_opengl/gl_device.h" | ||
| 19 | #include "video_core/renderer_opengl/gl_resource_manager.h" | ||
| 20 | #include "video_core/texture_cache/texture_cache.h" | ||
| 21 | |||
| 22 | namespace OpenGL { | ||
| 23 | |||
| 24 | using VideoCommon::SurfaceParams; | ||
| 25 | using VideoCommon::ViewParams; | ||
| 26 | |||
| 27 | class CachedSurfaceView; | ||
| 28 | class CachedSurface; | ||
| 29 | class TextureCacheOpenGL; | ||
| 30 | |||
| 31 | using Surface = std::shared_ptr<CachedSurface>; | ||
| 32 | using View = std::shared_ptr<CachedSurfaceView>; | ||
| 33 | using TextureCacheBase = VideoCommon::TextureCache<Surface, View>; | ||
| 34 | |||
| 35 | class CachedSurface final : public VideoCommon::SurfaceBase<View> { | ||
| 36 | friend CachedSurfaceView; | ||
| 37 | |||
| 38 | public: | ||
| 39 | explicit CachedSurface(GPUVAddr gpu_addr, const SurfaceParams& params); | ||
| 40 | ~CachedSurface(); | ||
| 41 | |||
| 42 | void UploadTexture(const std::vector<u8>& staging_buffer) override; | ||
| 43 | void DownloadTexture(std::vector<u8>& staging_buffer) override; | ||
| 44 | |||
| 45 | GLenum GetTarget() const { | ||
| 46 | return target; | ||
| 47 | } | ||
| 48 | |||
| 49 | GLuint GetTexture() const { | ||
| 50 | return texture.handle; | ||
| 51 | } | ||
| 52 | |||
| 53 | protected: | ||
| 54 | void DecorateSurfaceName() override; | ||
| 55 | |||
| 56 | View CreateView(const ViewParams& view_key) override; | ||
| 57 | View CreateViewInner(const ViewParams& view_key, bool is_proxy); | ||
| 58 | |||
| 59 | private: | ||
| 60 | void UploadTextureMipmap(u32 level, const std::vector<u8>& staging_buffer); | ||
| 61 | |||
| 62 | GLenum internal_format{}; | ||
| 63 | GLenum format{}; | ||
| 64 | GLenum type{}; | ||
| 65 | bool is_compressed{}; | ||
| 66 | GLenum target{}; | ||
| 67 | u32 view_count{}; | ||
| 68 | |||
| 69 | OGLTexture texture; | ||
| 70 | OGLBuffer texture_buffer; | ||
| 71 | }; | ||
| 72 | |||
| 73 | class CachedSurfaceView final : public VideoCommon::ViewBase { | ||
| 74 | public: | ||
| 75 | explicit CachedSurfaceView(CachedSurface& surface, const ViewParams& params, bool is_proxy); | ||
| 76 | ~CachedSurfaceView(); | ||
| 77 | |||
| 78 | /// Attaches this texture view to the current bound GL_DRAW_FRAMEBUFFER | ||
| 79 | void Attach(GLenum attachment, GLenum target) const; | ||
| 80 | |||
| 81 | void ApplySwizzle(Tegra::Texture::SwizzleSource x_source, | ||
| 82 | Tegra::Texture::SwizzleSource y_source, | ||
| 83 | Tegra::Texture::SwizzleSource z_source, | ||
| 84 | Tegra::Texture::SwizzleSource w_source); | ||
| 85 | |||
| 86 | void DecorateViewName(GPUVAddr gpu_addr, std::string prefix); | ||
| 87 | |||
| 88 | void MarkAsModified(u64 tick) { | ||
| 89 | surface.MarkAsModified(true, tick); | ||
| 90 | } | ||
| 91 | |||
| 92 | GLuint GetTexture() const { | ||
| 93 | if (is_proxy) { | ||
| 94 | return surface.GetTexture(); | ||
| 95 | } | ||
| 96 | return texture_view.handle; | ||
| 97 | } | ||
| 98 | |||
| 99 | const SurfaceParams& GetSurfaceParams() const { | ||
| 100 | return surface.GetSurfaceParams(); | ||
| 101 | } | ||
| 102 | |||
| 103 | private: | ||
| 104 | u32 EncodeSwizzle(Tegra::Texture::SwizzleSource x_source, | ||
| 105 | Tegra::Texture::SwizzleSource y_source, | ||
| 106 | Tegra::Texture::SwizzleSource z_source, | ||
| 107 | Tegra::Texture::SwizzleSource w_source) const { | ||
| 108 | return (static_cast<u32>(x_source) << 24) | (static_cast<u32>(y_source) << 16) | | ||
| 109 | (static_cast<u32>(z_source) << 8) | static_cast<u32>(w_source); | ||
| 110 | } | ||
| 111 | |||
| 112 | OGLTextureView CreateTextureView() const; | ||
| 113 | |||
| 114 | CachedSurface& surface; | ||
| 115 | GLenum target{}; | ||
| 116 | |||
| 117 | OGLTextureView texture_view; | ||
| 118 | u32 swizzle{}; | ||
| 119 | bool is_proxy{}; | ||
| 120 | }; | ||
| 121 | |||
| 122 | class TextureCacheOpenGL final : public TextureCacheBase { | ||
| 123 | public: | ||
| 124 | explicit TextureCacheOpenGL(Core::System& system, VideoCore::RasterizerInterface& rasterizer, | ||
| 125 | const Device& device); | ||
| 126 | ~TextureCacheOpenGL(); | ||
| 127 | |||
| 128 | protected: | ||
| 129 | Surface CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) override; | ||
| 130 | |||
| 131 | void ImageCopy(Surface& src_surface, Surface& dst_surface, | ||
| 132 | const VideoCommon::CopyParams& copy_params) override; | ||
| 133 | |||
| 134 | void ImageBlit(View& src_view, View& dst_view, | ||
| 135 | const Tegra::Engines::Fermi2D::Config& copy_config) override; | ||
| 136 | |||
| 137 | void BufferCopy(Surface& src_surface, Surface& dst_surface) override; | ||
| 138 | |||
| 139 | private: | ||
| 140 | GLuint FetchPBO(std::size_t buffer_size); | ||
| 141 | |||
| 142 | OGLFramebuffer src_framebuffer; | ||
| 143 | OGLFramebuffer dst_framebuffer; | ||
| 144 | std::unordered_map<u32, OGLBuffer> copy_pbo_cache; | ||
| 145 | }; | ||
| 146 | |||
| 147 | } // namespace OpenGL | ||
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index aafd6f31b..839178152 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp | |||
| @@ -101,21 +101,19 @@ RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::Syst | |||
| 101 | 101 | ||
| 102 | RendererOpenGL::~RendererOpenGL() = default; | 102 | RendererOpenGL::~RendererOpenGL() = default; |
| 103 | 103 | ||
| 104 | /// Swap buffers (render frame) | 104 | void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { |
| 105 | void RendererOpenGL::SwapBuffers( | ||
| 106 | std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) { | ||
| 107 | |||
| 108 | system.GetPerfStats().EndSystemFrame(); | 105 | system.GetPerfStats().EndSystemFrame(); |
| 109 | 106 | ||
| 110 | // Maintain the rasterizer's state as a priority | 107 | // Maintain the rasterizer's state as a priority |
| 111 | OpenGLState prev_state = OpenGLState::GetCurState(); | 108 | OpenGLState prev_state = OpenGLState::GetCurState(); |
| 109 | state.AllDirty(); | ||
| 112 | state.Apply(); | 110 | state.Apply(); |
| 113 | 111 | ||
| 114 | if (framebuffer) { | 112 | if (framebuffer) { |
| 115 | // If framebuffer is provided, reload it from memory to a texture | 113 | // If framebuffer is provided, reload it from memory to a texture |
| 116 | if (screen_info.texture.width != (GLsizei)framebuffer->get().width || | 114 | if (screen_info.texture.width != static_cast<GLsizei>(framebuffer->width) || |
| 117 | screen_info.texture.height != (GLsizei)framebuffer->get().height || | 115 | screen_info.texture.height != static_cast<GLsizei>(framebuffer->height) || |
| 118 | screen_info.texture.pixel_format != framebuffer->get().pixel_format) { | 116 | screen_info.texture.pixel_format != framebuffer->pixel_format) { |
| 119 | // Reallocate texture if the framebuffer size has changed. | 117 | // Reallocate texture if the framebuffer size has changed. |
| 120 | // This is expected to not happen very often and hence should not be a | 118 | // This is expected to not happen very often and hence should not be a |
| 121 | // performance problem. | 119 | // performance problem. |
| @@ -130,6 +128,8 @@ void RendererOpenGL::SwapBuffers( | |||
| 130 | 128 | ||
| 131 | DrawScreen(render_window.GetFramebufferLayout()); | 129 | DrawScreen(render_window.GetFramebufferLayout()); |
| 132 | 130 | ||
| 131 | rasterizer->TickFrame(); | ||
| 132 | |||
| 133 | render_window.SwapBuffers(); | 133 | render_window.SwapBuffers(); |
| 134 | } | 134 | } |
| 135 | 135 | ||
| @@ -139,6 +139,7 @@ void RendererOpenGL::SwapBuffers( | |||
| 139 | system.GetPerfStats().BeginSystemFrame(); | 139 | system.GetPerfStats().BeginSystemFrame(); |
| 140 | 140 | ||
| 141 | // Restore the rasterizer state | 141 | // Restore the rasterizer state |
| 142 | prev_state.AllDirty(); | ||
| 142 | prev_state.Apply(); | 143 | prev_state.Apply(); |
| 143 | } | 144 | } |
| 144 | 145 | ||
| @@ -146,43 +147,43 @@ void RendererOpenGL::SwapBuffers( | |||
| 146 | * Loads framebuffer from emulated memory into the active OpenGL texture. | 147 | * Loads framebuffer from emulated memory into the active OpenGL texture. |
| 147 | */ | 148 | */ |
| 148 | void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuffer) { | 149 | void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuffer) { |
| 149 | const u32 bytes_per_pixel{Tegra::FramebufferConfig::BytesPerPixel(framebuffer.pixel_format)}; | ||
| 150 | const u64 size_in_bytes{framebuffer.stride * framebuffer.height * bytes_per_pixel}; | ||
| 151 | const VAddr framebuffer_addr{framebuffer.address + framebuffer.offset}; | ||
| 152 | |||
| 153 | // Framebuffer orientation handling | 150 | // Framebuffer orientation handling |
| 154 | framebuffer_transform_flags = framebuffer.transform_flags; | 151 | framebuffer_transform_flags = framebuffer.transform_flags; |
| 155 | framebuffer_crop_rect = framebuffer.crop_rect; | 152 | framebuffer_crop_rect = framebuffer.crop_rect; |
| 156 | 153 | ||
| 157 | // Ensure no bad interactions with GL_UNPACK_ALIGNMENT, which by default | 154 | const VAddr framebuffer_addr{framebuffer.address + framebuffer.offset}; |
| 158 | // only allows rows to have a memory alignement of 4. | 155 | if (rasterizer->AccelerateDisplay(framebuffer, framebuffer_addr, framebuffer.stride)) { |
| 159 | ASSERT(framebuffer.stride % 4 == 0); | 156 | return; |
| 160 | 157 | } | |
| 161 | if (!rasterizer->AccelerateDisplay(framebuffer, framebuffer_addr, framebuffer.stride)) { | ||
| 162 | // Reset the screen info's display texture to its own permanent texture | ||
| 163 | screen_info.display_texture = screen_info.texture.resource.handle; | ||
| 164 | |||
| 165 | rasterizer->FlushRegion(ToCacheAddr(Memory::GetPointer(framebuffer_addr)), size_in_bytes); | ||
| 166 | |||
| 167 | constexpr u32 linear_bpp = 4; | ||
| 168 | VideoCore::MortonCopyPixels128(VideoCore::MortonSwizzleMode::MortonToLinear, | ||
| 169 | framebuffer.width, framebuffer.height, bytes_per_pixel, | ||
| 170 | linear_bpp, Memory::GetPointer(framebuffer_addr), | ||
| 171 | gl_framebuffer_data.data()); | ||
| 172 | |||
| 173 | glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(framebuffer.stride)); | ||
| 174 | 158 | ||
| 175 | // Update existing texture | 159 | // Reset the screen info's display texture to its own permanent texture |
| 176 | // TODO: Test what happens on hardware when you change the framebuffer dimensions so that | 160 | screen_info.display_texture = screen_info.texture.resource.handle; |
| 177 | // they differ from the LCD resolution. | ||
| 178 | // TODO: Applications could theoretically crash yuzu here by specifying too large | ||
| 179 | // framebuffer sizes. We should make sure that this cannot happen. | ||
| 180 | glTextureSubImage2D(screen_info.texture.resource.handle, 0, 0, 0, framebuffer.width, | ||
| 181 | framebuffer.height, screen_info.texture.gl_format, | ||
| 182 | screen_info.texture.gl_type, gl_framebuffer_data.data()); | ||
| 183 | 161 | ||
| 184 | glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); | 162 | const auto pixel_format{ |
| 185 | } | 163 | VideoCore::Surface::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format)}; |
| 164 | const u32 bytes_per_pixel{VideoCore::Surface::GetBytesPerPixel(pixel_format)}; | ||
| 165 | const u64 size_in_bytes{framebuffer.stride * framebuffer.height * bytes_per_pixel}; | ||
| 166 | const auto host_ptr{Memory::GetPointer(framebuffer_addr)}; | ||
| 167 | rasterizer->FlushRegion(ToCacheAddr(host_ptr), size_in_bytes); | ||
| 168 | |||
| 169 | // TODO(Rodrigo): Read this from HLE | ||
| 170 | constexpr u32 block_height_log2 = 4; | ||
| 171 | VideoCore::MortonSwizzle(VideoCore::MortonSwizzleMode::MortonToLinear, pixel_format, | ||
| 172 | framebuffer.stride, block_height_log2, framebuffer.height, 0, 1, 1, | ||
| 173 | gl_framebuffer_data.data(), host_ptr); | ||
| 174 | |||
| 175 | glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(framebuffer.stride)); | ||
| 176 | |||
| 177 | // Update existing texture | ||
| 178 | // TODO: Test what happens on hardware when you change the framebuffer dimensions so that | ||
| 179 | // they differ from the LCD resolution. | ||
| 180 | // TODO: Applications could theoretically crash yuzu here by specifying too large | ||
| 181 | // framebuffer sizes. We should make sure that this cannot happen. | ||
| 182 | glTextureSubImage2D(screen_info.texture.resource.handle, 0, 0, 0, framebuffer.width, | ||
| 183 | framebuffer.height, screen_info.texture.gl_format, | ||
| 184 | screen_info.texture.gl_type, gl_framebuffer_data.data()); | ||
| 185 | |||
| 186 | glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); | ||
| 186 | } | 187 | } |
| 187 | 188 | ||
| 188 | /** | 189 | /** |
| @@ -205,6 +206,7 @@ void RendererOpenGL::InitOpenGLObjects() { | |||
| 205 | // Link shaders and get variable locations | 206 | // Link shaders and get variable locations |
| 206 | shader.CreateFromSource(vertex_shader, nullptr, fragment_shader); | 207 | shader.CreateFromSource(vertex_shader, nullptr, fragment_shader); |
| 207 | state.draw.shader_program = shader.handle; | 208 | state.draw.shader_program = shader.handle; |
| 209 | state.AllDirty(); | ||
| 208 | state.Apply(); | 210 | state.Apply(); |
| 209 | uniform_modelview_matrix = glGetUniformLocation(shader.handle, "modelview_matrix"); | 211 | uniform_modelview_matrix = glGetUniformLocation(shader.handle, "modelview_matrix"); |
| 210 | uniform_color_texture = glGetUniformLocation(shader.handle, "color_texture"); | 212 | uniform_color_texture = glGetUniformLocation(shader.handle, "color_texture"); |
| @@ -262,7 +264,6 @@ void RendererOpenGL::CreateRasterizer() { | |||
| 262 | if (rasterizer) { | 264 | if (rasterizer) { |
| 263 | return; | 265 | return; |
| 264 | } | 266 | } |
| 265 | // Initialize sRGB Usage | ||
| 266 | OpenGLState::ClearsRGBUsed(); | 267 | OpenGLState::ClearsRGBUsed(); |
| 267 | rasterizer = std::make_unique<RasterizerOpenGL>(system, emu_window, screen_info); | 268 | rasterizer = std::make_unique<RasterizerOpenGL>(system, emu_window, screen_info); |
| 268 | } | 269 | } |
| @@ -273,22 +274,29 @@ void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture, | |||
| 273 | texture.height = framebuffer.height; | 274 | texture.height = framebuffer.height; |
| 274 | texture.pixel_format = framebuffer.pixel_format; | 275 | texture.pixel_format = framebuffer.pixel_format; |
| 275 | 276 | ||
| 277 | const auto pixel_format{ | ||
| 278 | VideoCore::Surface::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format)}; | ||
| 279 | const u32 bytes_per_pixel{VideoCore::Surface::GetBytesPerPixel(pixel_format)}; | ||
| 280 | gl_framebuffer_data.resize(texture.width * texture.height * bytes_per_pixel); | ||
| 281 | |||
| 276 | GLint internal_format; | 282 | GLint internal_format; |
| 277 | switch (framebuffer.pixel_format) { | 283 | switch (framebuffer.pixel_format) { |
| 278 | case Tegra::FramebufferConfig::PixelFormat::ABGR8: | 284 | case Tegra::FramebufferConfig::PixelFormat::ABGR8: |
| 279 | internal_format = GL_RGBA8; | 285 | internal_format = GL_RGBA8; |
| 280 | texture.gl_format = GL_RGBA; | 286 | texture.gl_format = GL_RGBA; |
| 281 | texture.gl_type = GL_UNSIGNED_INT_8_8_8_8_REV; | 287 | texture.gl_type = GL_UNSIGNED_INT_8_8_8_8_REV; |
| 282 | gl_framebuffer_data.resize(texture.width * texture.height * 4); | 288 | break; |
| 289 | case Tegra::FramebufferConfig::PixelFormat::RGB565: | ||
| 290 | internal_format = GL_RGB565; | ||
| 291 | texture.gl_format = GL_RGB; | ||
| 292 | texture.gl_type = GL_UNSIGNED_SHORT_5_6_5; | ||
| 283 | break; | 293 | break; |
| 284 | default: | 294 | default: |
| 285 | internal_format = GL_RGBA8; | 295 | internal_format = GL_RGBA8; |
| 286 | texture.gl_format = GL_RGBA; | 296 | texture.gl_format = GL_RGBA; |
| 287 | texture.gl_type = GL_UNSIGNED_INT_8_8_8_8_REV; | 297 | texture.gl_type = GL_UNSIGNED_INT_8_8_8_8_REV; |
| 288 | gl_framebuffer_data.resize(texture.width * texture.height * 4); | 298 | UNIMPLEMENTED_MSG("Unknown framebuffer pixel format: {}", |
| 289 | LOG_CRITICAL(Render_OpenGL, "Unknown framebuffer pixel format: {}", | 299 | static_cast<u32>(framebuffer.pixel_format)); |
| 290 | static_cast<u32>(framebuffer.pixel_format)); | ||
| 291 | UNREACHABLE(); | ||
| 292 | } | 300 | } |
| 293 | 301 | ||
| 294 | texture.resource.Release(); | 302 | texture.resource.Release(); |
| @@ -334,16 +342,18 @@ void RendererOpenGL::DrawScreenTriangles(const ScreenInfo& screen_info, float x, | |||
| 334 | ScreenRectVertex(x + w, y + h, texcoords.bottom * scale_u, right * scale_v), | 342 | ScreenRectVertex(x + w, y + h, texcoords.bottom * scale_u, right * scale_v), |
| 335 | }}; | 343 | }}; |
| 336 | 344 | ||
| 337 | state.texture_units[0].texture = screen_info.display_texture; | 345 | state.textures[0] = screen_info.display_texture; |
| 338 | // Workaround brigthness problems in SMO by enabling sRGB in the final output | 346 | // Workaround brigthness problems in SMO by enabling sRGB in the final output |
| 339 | // if it has been used in the frame. Needed because of this bug in QT: QTBUG-50987 | 347 | // if it has been used in the frame. Needed because of this bug in QT: QTBUG-50987 |
| 340 | state.framebuffer_srgb.enabled = OpenGLState::GetsRGBUsed(); | 348 | state.framebuffer_srgb.enabled = OpenGLState::GetsRGBUsed(); |
| 349 | state.AllDirty(); | ||
| 341 | state.Apply(); | 350 | state.Apply(); |
| 342 | glNamedBufferSubData(vertex_buffer.handle, 0, sizeof(vertices), vertices.data()); | 351 | glNamedBufferSubData(vertex_buffer.handle, 0, sizeof(vertices), vertices.data()); |
| 343 | glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); | 352 | glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); |
| 344 | // Restore default state | 353 | // Restore default state |
| 345 | state.framebuffer_srgb.enabled = false; | 354 | state.framebuffer_srgb.enabled = false; |
| 346 | state.texture_units[0].texture = 0; | 355 | state.textures[0] = 0; |
| 356 | state.AllDirty(); | ||
| 347 | state.Apply(); | 357 | state.Apply(); |
| 348 | // Clear sRGB state for the next frame | 358 | // Clear sRGB state for the next frame |
| 349 | OpenGLState::ClearsRGBUsed(); | 359 | OpenGLState::ClearsRGBUsed(); |
| @@ -388,6 +398,7 @@ void RendererOpenGL::CaptureScreenshot() { | |||
| 388 | GLuint old_read_fb = state.draw.read_framebuffer; | 398 | GLuint old_read_fb = state.draw.read_framebuffer; |
| 389 | GLuint old_draw_fb = state.draw.draw_framebuffer; | 399 | GLuint old_draw_fb = state.draw.draw_framebuffer; |
| 390 | state.draw.read_framebuffer = state.draw.draw_framebuffer = screenshot_framebuffer.handle; | 400 | state.draw.read_framebuffer = state.draw.draw_framebuffer = screenshot_framebuffer.handle; |
| 401 | state.AllDirty(); | ||
| 391 | state.Apply(); | 402 | state.Apply(); |
| 392 | 403 | ||
| 393 | Layout::FramebufferLayout layout{renderer_settings.screenshot_framebuffer_layout}; | 404 | Layout::FramebufferLayout layout{renderer_settings.screenshot_framebuffer_layout}; |
| @@ -407,6 +418,7 @@ void RendererOpenGL::CaptureScreenshot() { | |||
| 407 | screenshot_framebuffer.Release(); | 418 | screenshot_framebuffer.Release(); |
| 408 | state.draw.read_framebuffer = old_read_fb; | 419 | state.draw.read_framebuffer = old_read_fb; |
| 409 | state.draw.draw_framebuffer = old_draw_fb; | 420 | state.draw.draw_framebuffer = old_draw_fb; |
| 421 | state.AllDirty(); | ||
| 410 | state.Apply(); | 422 | state.Apply(); |
| 411 | glDeleteRenderbuffers(1, &renderbuffer); | 423 | glDeleteRenderbuffers(1, &renderbuffer); |
| 412 | 424 | ||
| @@ -471,7 +483,6 @@ static void APIENTRY DebugHandler(GLenum source, GLenum type, GLuint id, GLenum | |||
| 471 | } | 483 | } |
| 472 | } | 484 | } |
| 473 | 485 | ||
| 474 | /// Initialize the renderer | ||
| 475 | bool RendererOpenGL::Init() { | 486 | bool RendererOpenGL::Init() { |
| 476 | Core::Frontend::ScopeAcquireWindowContext acquire_context{render_window}; | 487 | Core::Frontend::ScopeAcquireWindowContext acquire_context{render_window}; |
| 477 | 488 | ||
diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h index 4aebf2321..9bd086368 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.h +++ b/src/video_core/renderer_opengl/renderer_opengl.h | |||
| @@ -43,14 +43,13 @@ struct ScreenInfo { | |||
| 43 | TextureInfo texture; | 43 | TextureInfo texture; |
| 44 | }; | 44 | }; |
| 45 | 45 | ||
| 46 | class RendererOpenGL : public VideoCore::RendererBase { | 46 | class RendererOpenGL final : public VideoCore::RendererBase { |
| 47 | public: | 47 | public: |
| 48 | explicit RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system); | 48 | explicit RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system); |
| 49 | ~RendererOpenGL() override; | 49 | ~RendererOpenGL() override; |
| 50 | 50 | ||
| 51 | /// Swap buffers (render frame) | 51 | /// Swap buffers (render frame) |
| 52 | void SwapBuffers( | 52 | void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override; |
| 53 | std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) override; | ||
| 54 | 53 | ||
| 55 | /// Initialize the renderer | 54 | /// Initialize the renderer |
| 56 | bool Init() override; | 55 | bool Init() override; |
diff --git a/src/video_core/renderer_opengl/utils.cpp b/src/video_core/renderer_opengl/utils.cpp index f23fc9f9d..c504a2c1a 100644 --- a/src/video_core/renderer_opengl/utils.cpp +++ b/src/video_core/renderer_opengl/utils.cpp | |||
| @@ -5,35 +5,75 @@ | |||
| 5 | #include <string> | 5 | #include <string> |
| 6 | #include <fmt/format.h> | 6 | #include <fmt/format.h> |
| 7 | #include <glad/glad.h> | 7 | #include <glad/glad.h> |
| 8 | |||
| 8 | #include "common/assert.h" | 9 | #include "common/assert.h" |
| 9 | #include "common/common_types.h" | 10 | #include "common/common_types.h" |
| 11 | #include "common/scope_exit.h" | ||
| 10 | #include "video_core/renderer_opengl/utils.h" | 12 | #include "video_core/renderer_opengl/utils.h" |
| 11 | 13 | ||
| 12 | namespace OpenGL { | 14 | namespace OpenGL { |
| 13 | 15 | ||
| 16 | VertexArrayPushBuffer::VertexArrayPushBuffer() = default; | ||
| 17 | |||
| 18 | VertexArrayPushBuffer::~VertexArrayPushBuffer() = default; | ||
| 19 | |||
| 20 | void VertexArrayPushBuffer::Setup(GLuint vao_) { | ||
| 21 | vao = vao_; | ||
| 22 | index_buffer = nullptr; | ||
| 23 | vertex_buffers.clear(); | ||
| 24 | } | ||
| 25 | |||
| 26 | void VertexArrayPushBuffer::SetIndexBuffer(const GLuint* buffer) { | ||
| 27 | index_buffer = buffer; | ||
| 28 | } | ||
| 29 | |||
| 30 | void VertexArrayPushBuffer::SetVertexBuffer(GLuint binding_index, const GLuint* buffer, | ||
| 31 | GLintptr offset, GLsizei stride) { | ||
| 32 | vertex_buffers.push_back(Entry{binding_index, buffer, offset, stride}); | ||
| 33 | } | ||
| 34 | |||
| 35 | void VertexArrayPushBuffer::Bind() { | ||
| 36 | if (index_buffer) { | ||
| 37 | glVertexArrayElementBuffer(vao, *index_buffer); | ||
| 38 | } | ||
| 39 | |||
| 40 | // TODO(Rodrigo): Find a way to ARB_multi_bind this | ||
| 41 | for (const auto& entry : vertex_buffers) { | ||
| 42 | glVertexArrayVertexBuffer(vao, entry.binding_index, *entry.buffer, entry.offset, | ||
| 43 | entry.stride); | ||
| 44 | } | ||
| 45 | } | ||
| 46 | |||
| 14 | BindBuffersRangePushBuffer::BindBuffersRangePushBuffer(GLenum target) : target{target} {} | 47 | BindBuffersRangePushBuffer::BindBuffersRangePushBuffer(GLenum target) : target{target} {} |
| 15 | 48 | ||
| 16 | BindBuffersRangePushBuffer::~BindBuffersRangePushBuffer() = default; | 49 | BindBuffersRangePushBuffer::~BindBuffersRangePushBuffer() = default; |
| 17 | 50 | ||
| 18 | void BindBuffersRangePushBuffer::Setup(GLuint first_) { | 51 | void BindBuffersRangePushBuffer::Setup(GLuint first_) { |
| 19 | first = first_; | 52 | first = first_; |
| 20 | buffers.clear(); | 53 | buffer_pointers.clear(); |
| 21 | offsets.clear(); | 54 | offsets.clear(); |
| 22 | sizes.clear(); | 55 | sizes.clear(); |
| 23 | } | 56 | } |
| 24 | 57 | ||
| 25 | void BindBuffersRangePushBuffer::Push(GLuint buffer, GLintptr offset, GLsizeiptr size) { | 58 | void BindBuffersRangePushBuffer::Push(const GLuint* buffer, GLintptr offset, GLsizeiptr size) { |
| 26 | buffers.push_back(buffer); | 59 | buffer_pointers.push_back(buffer); |
| 27 | offsets.push_back(offset); | 60 | offsets.push_back(offset); |
| 28 | sizes.push_back(size); | 61 | sizes.push_back(size); |
| 29 | } | 62 | } |
| 30 | 63 | ||
| 31 | void BindBuffersRangePushBuffer::Bind() const { | 64 | void BindBuffersRangePushBuffer::Bind() { |
| 32 | const std::size_t count{buffers.size()}; | 65 | // Ensure sizes are valid. |
| 66 | const std::size_t count{buffer_pointers.size()}; | ||
| 33 | DEBUG_ASSERT(count == offsets.size() && count == sizes.size()); | 67 | DEBUG_ASSERT(count == offsets.size() && count == sizes.size()); |
| 34 | if (count == 0) { | 68 | if (count == 0) { |
| 35 | return; | 69 | return; |
| 36 | } | 70 | } |
| 71 | |||
| 72 | // Dereference buffers. | ||
| 73 | buffers.resize(count); | ||
| 74 | std::transform(buffer_pointers.begin(), buffer_pointers.end(), buffers.begin(), | ||
| 75 | [](const GLuint* pointer) { return *pointer; }); | ||
| 76 | |||
| 37 | glBindBuffersRange(target, first, static_cast<GLsizei>(count), buffers.data(), offsets.data(), | 77 | glBindBuffersRange(target, first, static_cast<GLsizei>(count), buffers.data(), offsets.data(), |
| 38 | sizes.data()); | 78 | sizes.data()); |
| 39 | } | 79 | } |
| @@ -63,4 +103,4 @@ void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string_vie | |||
| 63 | glObjectLabel(identifier, handle, -1, static_cast<const GLchar*>(object_label.c_str())); | 103 | glObjectLabel(identifier, handle, -1, static_cast<const GLchar*>(object_label.c_str())); |
| 64 | } | 104 | } |
| 65 | 105 | ||
| 66 | } // namespace OpenGL \ No newline at end of file | 106 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/utils.h b/src/video_core/renderer_opengl/utils.h index b3e9fc499..6c2b45546 100644 --- a/src/video_core/renderer_opengl/utils.h +++ b/src/video_core/renderer_opengl/utils.h | |||
| @@ -11,20 +11,49 @@ | |||
| 11 | 11 | ||
| 12 | namespace OpenGL { | 12 | namespace OpenGL { |
| 13 | 13 | ||
| 14 | class BindBuffersRangePushBuffer { | 14 | class VertexArrayPushBuffer final { |
| 15 | public: | 15 | public: |
| 16 | BindBuffersRangePushBuffer(GLenum target); | 16 | explicit VertexArrayPushBuffer(); |
| 17 | ~VertexArrayPushBuffer(); | ||
| 18 | |||
| 19 | void Setup(GLuint vao_); | ||
| 20 | |||
| 21 | void SetIndexBuffer(const GLuint* buffer); | ||
| 22 | |||
| 23 | void SetVertexBuffer(GLuint binding_index, const GLuint* buffer, GLintptr offset, | ||
| 24 | GLsizei stride); | ||
| 25 | |||
| 26 | void Bind(); | ||
| 27 | |||
| 28 | private: | ||
| 29 | struct Entry { | ||
| 30 | GLuint binding_index{}; | ||
| 31 | const GLuint* buffer{}; | ||
| 32 | GLintptr offset{}; | ||
| 33 | GLsizei stride{}; | ||
| 34 | }; | ||
| 35 | |||
| 36 | GLuint vao{}; | ||
| 37 | const GLuint* index_buffer{}; | ||
| 38 | std::vector<Entry> vertex_buffers; | ||
| 39 | }; | ||
| 40 | |||
| 41 | class BindBuffersRangePushBuffer final { | ||
| 42 | public: | ||
| 43 | explicit BindBuffersRangePushBuffer(GLenum target); | ||
| 17 | ~BindBuffersRangePushBuffer(); | 44 | ~BindBuffersRangePushBuffer(); |
| 18 | 45 | ||
| 19 | void Setup(GLuint first_); | 46 | void Setup(GLuint first_); |
| 20 | 47 | ||
| 21 | void Push(GLuint buffer, GLintptr offset, GLsizeiptr size); | 48 | void Push(const GLuint* buffer, GLintptr offset, GLsizeiptr size); |
| 22 | 49 | ||
| 23 | void Bind() const; | 50 | void Bind(); |
| 24 | 51 | ||
| 25 | private: | 52 | private: |
| 26 | GLenum target; | 53 | GLenum target{}; |
| 27 | GLuint first; | 54 | GLuint first{}; |
| 55 | std::vector<const GLuint*> buffer_pointers; | ||
| 56 | |||
| 28 | std::vector<GLuint> buffers; | 57 | std::vector<GLuint> buffers; |
| 29 | std::vector<GLintptr> offsets; | 58 | std::vector<GLintptr> offsets; |
| 30 | std::vector<GLsizeiptr> sizes; | 59 | std::vector<GLsizeiptr> sizes; |
| @@ -32,4 +61,4 @@ private: | |||
| 32 | 61 | ||
| 33 | void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string_view extra_info = {}); | 62 | void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string_view extra_info = {}); |
| 34 | 63 | ||
| 35 | } // namespace OpenGL \ No newline at end of file | 64 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index 02a9f5ecb..d2e9f4031 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp | |||
| @@ -109,8 +109,8 @@ void VKBufferCache::Reserve(std::size_t max_size) { | |||
| 109 | } | 109 | } |
| 110 | } | 110 | } |
| 111 | 111 | ||
| 112 | VKExecutionContext VKBufferCache::Send(VKExecutionContext exctx) { | 112 | void VKBufferCache::Send() { |
| 113 | return stream_buffer->Send(exctx, buffer_offset - buffer_offset_base); | 113 | stream_buffer->Send(buffer_offset - buffer_offset_base); |
| 114 | } | 114 | } |
| 115 | 115 | ||
| 116 | void VKBufferCache::AlignBuffer(std::size_t alignment) { | 116 | void VKBufferCache::AlignBuffer(std::size_t alignment) { |
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h index 3edf460df..49f13bcdc 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.h +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h | |||
| @@ -77,7 +77,7 @@ public: | |||
| 77 | void Reserve(std::size_t max_size); | 77 | void Reserve(std::size_t max_size); |
| 78 | 78 | ||
| 79 | /// Ensures that the set data is sent to the device. | 79 | /// Ensures that the set data is sent to the device. |
| 80 | [[nodiscard]] VKExecutionContext Send(VKExecutionContext exctx); | 80 | void Send(); |
| 81 | 81 | ||
| 82 | /// Returns the buffer cache handle. | 82 | /// Returns the buffer cache handle. |
| 83 | vk::Buffer GetBuffer() const { | 83 | vk::Buffer GetBuffer() const { |
diff --git a/src/video_core/renderer_vulkan/vk_sampler_cache.h b/src/video_core/renderer_vulkan/vk_sampler_cache.h index 771b05c73..1f73b716b 100644 --- a/src/video_core/renderer_vulkan/vk_sampler_cache.h +++ b/src/video_core/renderer_vulkan/vk_sampler_cache.h | |||
| @@ -4,9 +4,6 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <unordered_map> | ||
| 8 | |||
| 9 | #include "common/common_types.h" | ||
| 10 | #include "video_core/renderer_vulkan/declarations.h" | 7 | #include "video_core/renderer_vulkan/declarations.h" |
| 11 | #include "video_core/sampler_cache.h" | 8 | #include "video_core/sampler_cache.h" |
| 12 | #include "video_core/textures/texture.h" | 9 | #include "video_core/textures/texture.h" |
| @@ -21,9 +18,9 @@ public: | |||
| 21 | ~VKSamplerCache(); | 18 | ~VKSamplerCache(); |
| 22 | 19 | ||
| 23 | protected: | 20 | protected: |
| 24 | UniqueSampler CreateSampler(const Tegra::Texture::TSCEntry& tsc) const; | 21 | UniqueSampler CreateSampler(const Tegra::Texture::TSCEntry& tsc) const override; |
| 25 | 22 | ||
| 26 | vk::Sampler ToSamplerType(const UniqueSampler& sampler) const; | 23 | vk::Sampler ToSamplerType(const UniqueSampler& sampler) const override; |
| 27 | 24 | ||
| 28 | private: | 25 | private: |
| 29 | const VKDevice& device; | 26 | const VKDevice& device; |
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index f1fea1871..0f8116458 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp | |||
| @@ -19,23 +19,19 @@ VKScheduler::VKScheduler(const VKDevice& device, VKResourceManager& resource_man | |||
| 19 | 19 | ||
| 20 | VKScheduler::~VKScheduler() = default; | 20 | VKScheduler::~VKScheduler() = default; |
| 21 | 21 | ||
| 22 | VKExecutionContext VKScheduler::GetExecutionContext() const { | 22 | void VKScheduler::Flush(bool release_fence, vk::Semaphore semaphore) { |
| 23 | return VKExecutionContext(current_fence, current_cmdbuf); | ||
| 24 | } | ||
| 25 | |||
| 26 | VKExecutionContext VKScheduler::Flush(vk::Semaphore semaphore) { | ||
| 27 | SubmitExecution(semaphore); | 23 | SubmitExecution(semaphore); |
| 28 | current_fence->Release(); | 24 | if (release_fence) |
| 25 | current_fence->Release(); | ||
| 29 | AllocateNewContext(); | 26 | AllocateNewContext(); |
| 30 | return GetExecutionContext(); | ||
| 31 | } | 27 | } |
| 32 | 28 | ||
| 33 | VKExecutionContext VKScheduler::Finish(vk::Semaphore semaphore) { | 29 | void VKScheduler::Finish(bool release_fence, vk::Semaphore semaphore) { |
| 34 | SubmitExecution(semaphore); | 30 | SubmitExecution(semaphore); |
| 35 | current_fence->Wait(); | 31 | current_fence->Wait(); |
| 36 | current_fence->Release(); | 32 | if (release_fence) |
| 33 | current_fence->Release(); | ||
| 37 | AllocateNewContext(); | 34 | AllocateNewContext(); |
| 38 | return GetExecutionContext(); | ||
| 39 | } | 35 | } |
| 40 | 36 | ||
| 41 | void VKScheduler::SubmitExecution(vk::Semaphore semaphore) { | 37 | void VKScheduler::SubmitExecution(vk::Semaphore semaphore) { |
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h index cfaf5376f..0e5b49c7f 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.h +++ b/src/video_core/renderer_vulkan/vk_scheduler.h | |||
| @@ -10,10 +10,43 @@ | |||
| 10 | namespace Vulkan { | 10 | namespace Vulkan { |
| 11 | 11 | ||
| 12 | class VKDevice; | 12 | class VKDevice; |
| 13 | class VKExecutionContext; | ||
| 14 | class VKFence; | 13 | class VKFence; |
| 15 | class VKResourceManager; | 14 | class VKResourceManager; |
| 16 | 15 | ||
| 16 | class VKFenceView { | ||
| 17 | public: | ||
| 18 | VKFenceView() = default; | ||
| 19 | VKFenceView(VKFence* const& fence) : fence{fence} {} | ||
| 20 | |||
| 21 | VKFence* operator->() const noexcept { | ||
| 22 | return fence; | ||
| 23 | } | ||
| 24 | |||
| 25 | operator VKFence&() const noexcept { | ||
| 26 | return *fence; | ||
| 27 | } | ||
| 28 | |||
| 29 | private: | ||
| 30 | VKFence* const& fence; | ||
| 31 | }; | ||
| 32 | |||
| 33 | class VKCommandBufferView { | ||
| 34 | public: | ||
| 35 | VKCommandBufferView() = default; | ||
| 36 | VKCommandBufferView(const vk::CommandBuffer& cmdbuf) : cmdbuf{cmdbuf} {} | ||
| 37 | |||
| 38 | const vk::CommandBuffer* operator->() const noexcept { | ||
| 39 | return &cmdbuf; | ||
| 40 | } | ||
| 41 | |||
| 42 | operator vk::CommandBuffer() const noexcept { | ||
| 43 | return cmdbuf; | ||
| 44 | } | ||
| 45 | |||
| 46 | private: | ||
| 47 | const vk::CommandBuffer& cmdbuf; | ||
| 48 | }; | ||
| 49 | |||
| 17 | /// The scheduler abstracts command buffer and fence management with an interface that's able to do | 50 | /// The scheduler abstracts command buffer and fence management with an interface that's able to do |
| 18 | /// OpenGL-like operations on Vulkan command buffers. | 51 | /// OpenGL-like operations on Vulkan command buffers. |
| 19 | class VKScheduler { | 52 | class VKScheduler { |
| @@ -21,16 +54,21 @@ public: | |||
| 21 | explicit VKScheduler(const VKDevice& device, VKResourceManager& resource_manager); | 54 | explicit VKScheduler(const VKDevice& device, VKResourceManager& resource_manager); |
| 22 | ~VKScheduler(); | 55 | ~VKScheduler(); |
| 23 | 56 | ||
| 24 | /// Gets the current execution context. | 57 | /// Gets a reference to the current fence. |
| 25 | [[nodiscard]] VKExecutionContext GetExecutionContext() const; | 58 | VKFenceView GetFence() const { |
| 59 | return current_fence; | ||
| 60 | } | ||
| 61 | |||
| 62 | /// Gets a reference to the current command buffer. | ||
| 63 | VKCommandBufferView GetCommandBuffer() const { | ||
| 64 | return current_cmdbuf; | ||
| 65 | } | ||
| 26 | 66 | ||
| 27 | /// Sends the current execution context to the GPU. It invalidates the current execution context | 67 | /// Sends the current execution context to the GPU. |
| 28 | /// and returns a new one. | 68 | void Flush(bool release_fence = true, vk::Semaphore semaphore = nullptr); |
| 29 | VKExecutionContext Flush(vk::Semaphore semaphore = nullptr); | ||
| 30 | 69 | ||
| 31 | /// Sends the current execution context to the GPU and waits for it to complete. It invalidates | 70 | /// Sends the current execution context to the GPU and waits for it to complete. |
| 32 | /// the current execution context and returns a new one. | 71 | void Finish(bool release_fence = true, vk::Semaphore semaphore = nullptr); |
| 33 | VKExecutionContext Finish(vk::Semaphore semaphore = nullptr); | ||
| 34 | 72 | ||
| 35 | private: | 73 | private: |
| 36 | void SubmitExecution(vk::Semaphore semaphore); | 74 | void SubmitExecution(vk::Semaphore semaphore); |
| @@ -44,26 +82,4 @@ private: | |||
| 44 | VKFence* next_fence = nullptr; | 82 | VKFence* next_fence = nullptr; |
| 45 | }; | 83 | }; |
| 46 | 84 | ||
| 47 | class VKExecutionContext { | ||
| 48 | friend class VKScheduler; | ||
| 49 | |||
| 50 | public: | ||
| 51 | VKExecutionContext() = default; | ||
| 52 | |||
| 53 | VKFence& GetFence() const { | ||
| 54 | return *fence; | ||
| 55 | } | ||
| 56 | |||
| 57 | vk::CommandBuffer GetCommandBuffer() const { | ||
| 58 | return cmdbuf; | ||
| 59 | } | ||
| 60 | |||
| 61 | private: | ||
| 62 | explicit VKExecutionContext(VKFence* fence, vk::CommandBuffer cmdbuf) | ||
| 63 | : fence{fence}, cmdbuf{cmdbuf} {} | ||
| 64 | |||
| 65 | VKFence* fence{}; | ||
| 66 | vk::CommandBuffer cmdbuf; | ||
| 67 | }; | ||
| 68 | |||
| 69 | } // namespace Vulkan | 85 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index 33ad9764a..7675fc7b3 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp | |||
| @@ -205,10 +205,6 @@ public: | |||
| 205 | } | 205 | } |
| 206 | 206 | ||
| 207 | private: | 207 | private: |
| 208 | using OperationDecompilerFn = Id (SPIRVDecompiler::*)(Operation); | ||
| 209 | using OperationDecompilersArray = | ||
| 210 | std::array<OperationDecompilerFn, static_cast<std::size_t>(OperationCode::Amount)>; | ||
| 211 | |||
| 212 | static constexpr auto INTERNAL_FLAGS_COUNT = static_cast<std::size_t>(InternalFlag::Amount); | 208 | static constexpr auto INTERNAL_FLAGS_COUNT = static_cast<std::size_t>(InternalFlag::Amount); |
| 213 | 209 | ||
| 214 | void AllocateBindings() { | 210 | void AllocateBindings() { |
| @@ -430,20 +426,17 @@ private: | |||
| 430 | instance_index = DeclareBuiltIn(spv::BuiltIn::InstanceIndex, spv::StorageClass::Input, | 426 | instance_index = DeclareBuiltIn(spv::BuiltIn::InstanceIndex, spv::StorageClass::Input, |
| 431 | t_in_uint, "instance_index"); | 427 | t_in_uint, "instance_index"); |
| 432 | 428 | ||
| 433 | bool is_point_size_declared = false; | ||
| 434 | bool is_clip_distances_declared = false; | 429 | bool is_clip_distances_declared = false; |
| 435 | for (const auto index : ir.GetOutputAttributes()) { | 430 | for (const auto index : ir.GetOutputAttributes()) { |
| 436 | if (index == Attribute::Index::PointSize) { | 431 | if (index == Attribute::Index::ClipDistances0123 || |
| 437 | is_point_size_declared = true; | 432 | index == Attribute::Index::ClipDistances4567) { |
| 438 | } else if (index == Attribute::Index::ClipDistances0123 || | ||
| 439 | index == Attribute::Index::ClipDistances4567) { | ||
| 440 | is_clip_distances_declared = true; | 433 | is_clip_distances_declared = true; |
| 441 | } | 434 | } |
| 442 | } | 435 | } |
| 443 | 436 | ||
| 444 | std::vector<Id> members; | 437 | std::vector<Id> members; |
| 445 | members.push_back(t_float4); | 438 | members.push_back(t_float4); |
| 446 | if (is_point_size_declared) { | 439 | if (ir.UsesPointSize()) { |
| 447 | members.push_back(t_float); | 440 | members.push_back(t_float); |
| 448 | } | 441 | } |
| 449 | if (is_clip_distances_declared) { | 442 | if (is_clip_distances_declared) { |
| @@ -466,7 +459,7 @@ private: | |||
| 466 | 459 | ||
| 467 | position_index = MemberDecorateBuiltIn(spv::BuiltIn::Position, "position", true); | 460 | position_index = MemberDecorateBuiltIn(spv::BuiltIn::Position, "position", true); |
| 468 | point_size_index = | 461 | point_size_index = |
| 469 | MemberDecorateBuiltIn(spv::BuiltIn::PointSize, "point_size", is_point_size_declared); | 462 | MemberDecorateBuiltIn(spv::BuiltIn::PointSize, "point_size", ir.UsesPointSize()); |
| 470 | clip_distances_index = MemberDecorateBuiltIn(spv::BuiltIn::ClipDistance, "clip_distances", | 463 | clip_distances_index = MemberDecorateBuiltIn(spv::BuiltIn::ClipDistance, "clip_distances", |
| 471 | is_clip_distances_declared); | 464 | is_clip_distances_declared); |
| 472 | 465 | ||
| @@ -712,7 +705,8 @@ private: | |||
| 712 | case Attribute::Index::Position: | 705 | case Attribute::Index::Position: |
| 713 | return AccessElement(t_out_float, per_vertex, position_index, | 706 | return AccessElement(t_out_float, per_vertex, position_index, |
| 714 | abuf->GetElement()); | 707 | abuf->GetElement()); |
| 715 | case Attribute::Index::PointSize: | 708 | case Attribute::Index::LayerViewportPointSize: |
| 709 | UNIMPLEMENTED_IF(abuf->GetElement() != 3); | ||
| 716 | return AccessElement(t_out_float, per_vertex, point_size_index); | 710 | return AccessElement(t_out_float, per_vertex, point_size_index); |
| 717 | case Attribute::Index::ClipDistances0123: | 711 | case Attribute::Index::ClipDistances0123: |
| 718 | return AccessElement(t_out_float, per_vertex, clip_distances_index, | 712 | return AccessElement(t_out_float, per_vertex, clip_distances_index, |
| @@ -741,6 +735,16 @@ private: | |||
| 741 | return {}; | 735 | return {}; |
| 742 | } | 736 | } |
| 743 | 737 | ||
| 738 | Id FCastHalf0(Operation operation) { | ||
| 739 | UNIMPLEMENTED(); | ||
| 740 | return {}; | ||
| 741 | } | ||
| 742 | |||
| 743 | Id FCastHalf1(Operation operation) { | ||
| 744 | UNIMPLEMENTED(); | ||
| 745 | return {}; | ||
| 746 | } | ||
| 747 | |||
| 744 | Id HNegate(Operation operation) { | 748 | Id HNegate(Operation operation) { |
| 745 | UNIMPLEMENTED(); | 749 | UNIMPLEMENTED(); |
| 746 | return {}; | 750 | return {}; |
| @@ -751,6 +755,11 @@ private: | |||
| 751 | return {}; | 755 | return {}; |
| 752 | } | 756 | } |
| 753 | 757 | ||
| 758 | Id HCastFloat(Operation operation) { | ||
| 759 | UNIMPLEMENTED(); | ||
| 760 | return {}; | ||
| 761 | } | ||
| 762 | |||
| 754 | Id HUnpack(Operation operation) { | 763 | Id HUnpack(Operation operation) { |
| 755 | UNIMPLEMENTED(); | 764 | UNIMPLEMENTED(); |
| 756 | return {}; | 765 | return {}; |
| @@ -806,12 +815,7 @@ private: | |||
| 806 | return {}; | 815 | return {}; |
| 807 | } | 816 | } |
| 808 | 817 | ||
| 809 | Id LogicalAll2(Operation operation) { | 818 | Id LogicalAnd2(Operation operation) { |
| 810 | UNIMPLEMENTED(); | ||
| 811 | return {}; | ||
| 812 | } | ||
| 813 | |||
| 814 | Id LogicalAny2(Operation operation) { | ||
| 815 | UNIMPLEMENTED(); | 819 | UNIMPLEMENTED(); |
| 816 | return {}; | 820 | return {}; |
| 817 | } | 821 | } |
| @@ -935,6 +939,46 @@ private: | |||
| 935 | return {}; | 939 | return {}; |
| 936 | } | 940 | } |
| 937 | 941 | ||
| 942 | Id ImageStore(Operation operation) { | ||
| 943 | UNIMPLEMENTED(); | ||
| 944 | return {}; | ||
| 945 | } | ||
| 946 | |||
| 947 | Id AtomicImageAdd(Operation operation) { | ||
| 948 | UNIMPLEMENTED(); | ||
| 949 | return {}; | ||
| 950 | } | ||
| 951 | |||
| 952 | Id AtomicImageMin(Operation operation) { | ||
| 953 | UNIMPLEMENTED(); | ||
| 954 | return {}; | ||
| 955 | } | ||
| 956 | |||
| 957 | Id AtomicImageMax(Operation operation) { | ||
| 958 | UNIMPLEMENTED(); | ||
| 959 | return {}; | ||
| 960 | } | ||
| 961 | |||
| 962 | Id AtomicImageAnd(Operation operation) { | ||
| 963 | UNIMPLEMENTED(); | ||
| 964 | return {}; | ||
| 965 | } | ||
| 966 | |||
| 967 | Id AtomicImageOr(Operation operation) { | ||
| 968 | UNIMPLEMENTED(); | ||
| 969 | return {}; | ||
| 970 | } | ||
| 971 | |||
| 972 | Id AtomicImageXor(Operation operation) { | ||
| 973 | UNIMPLEMENTED(); | ||
| 974 | return {}; | ||
| 975 | } | ||
| 976 | |||
| 977 | Id AtomicImageExchange(Operation operation) { | ||
| 978 | UNIMPLEMENTED(); | ||
| 979 | return {}; | ||
| 980 | } | ||
| 981 | |||
| 938 | Id Branch(Operation operation) { | 982 | Id Branch(Operation operation) { |
| 939 | const auto target = std::get_if<ImmediateNode>(&*operation[0]); | 983 | const auto target = std::get_if<ImmediateNode>(&*operation[0]); |
| 940 | UNIMPLEMENTED_IF(!target); | 984 | UNIMPLEMENTED_IF(!target); |
| @@ -944,6 +988,14 @@ private: | |||
| 944 | return {}; | 988 | return {}; |
| 945 | } | 989 | } |
| 946 | 990 | ||
| 991 | Id BranchIndirect(Operation operation) { | ||
| 992 | const Id op_a = VisitOperand<Type::Uint>(operation, 0); | ||
| 993 | |||
| 994 | Emit(OpStore(jmp_to, op_a)); | ||
| 995 | BranchingOp([&]() { Emit(OpBranch(continue_label)); }); | ||
| 996 | return {}; | ||
| 997 | } | ||
| 998 | |||
| 947 | Id PushFlowStack(Operation operation) { | 999 | Id PushFlowStack(Operation operation) { |
| 948 | const auto target = std::get_if<ImmediateNode>(&*operation[0]); | 1000 | const auto target = std::get_if<ImmediateNode>(&*operation[0]); |
| 949 | ASSERT(target); | 1001 | ASSERT(target); |
| @@ -1055,6 +1107,26 @@ private: | |||
| 1055 | return {}; | 1107 | return {}; |
| 1056 | } | 1108 | } |
| 1057 | 1109 | ||
| 1110 | Id BallotThread(Operation) { | ||
| 1111 | UNIMPLEMENTED(); | ||
| 1112 | return {}; | ||
| 1113 | } | ||
| 1114 | |||
| 1115 | Id VoteAll(Operation) { | ||
| 1116 | UNIMPLEMENTED(); | ||
| 1117 | return {}; | ||
| 1118 | } | ||
| 1119 | |||
| 1120 | Id VoteAny(Operation) { | ||
| 1121 | UNIMPLEMENTED(); | ||
| 1122 | return {}; | ||
| 1123 | } | ||
| 1124 | |||
| 1125 | Id VoteEqual(Operation) { | ||
| 1126 | UNIMPLEMENTED(); | ||
| 1127 | return {}; | ||
| 1128 | } | ||
| 1129 | |||
| 1058 | Id DeclareBuiltIn(spv::BuiltIn builtin, spv::StorageClass storage, Id type, | 1130 | Id DeclareBuiltIn(spv::BuiltIn builtin, spv::StorageClass storage, Id type, |
| 1059 | const std::string& name) { | 1131 | const std::string& name) { |
| 1060 | const Id id = OpVariable(type, storage); | 1132 | const Id id = OpVariable(type, storage); |
| @@ -1195,7 +1267,7 @@ private: | |||
| 1195 | return {}; | 1267 | return {}; |
| 1196 | } | 1268 | } |
| 1197 | 1269 | ||
| 1198 | static constexpr OperationDecompilersArray operation_decompilers = { | 1270 | static constexpr std::array operation_decompilers = { |
| 1199 | &SPIRVDecompiler::Assign, | 1271 | &SPIRVDecompiler::Assign, |
| 1200 | 1272 | ||
| 1201 | &SPIRVDecompiler::Ternary<&Module::OpSelect, Type::Float, Type::Bool, Type::Float, | 1273 | &SPIRVDecompiler::Ternary<&Module::OpSelect, Type::Float, Type::Bool, Type::Float, |
| @@ -1208,6 +1280,8 @@ private: | |||
| 1208 | &SPIRVDecompiler::Unary<&Module::OpFNegate, Type::Float>, | 1280 | &SPIRVDecompiler::Unary<&Module::OpFNegate, Type::Float>, |
| 1209 | &SPIRVDecompiler::Unary<&Module::OpFAbs, Type::Float>, | 1281 | &SPIRVDecompiler::Unary<&Module::OpFAbs, Type::Float>, |
| 1210 | &SPIRVDecompiler::Ternary<&Module::OpFClamp, Type::Float>, | 1282 | &SPIRVDecompiler::Ternary<&Module::OpFClamp, Type::Float>, |
| 1283 | &SPIRVDecompiler::FCastHalf0, | ||
| 1284 | &SPIRVDecompiler::FCastHalf1, | ||
| 1211 | &SPIRVDecompiler::Binary<&Module::OpFMin, Type::Float>, | 1285 | &SPIRVDecompiler::Binary<&Module::OpFMin, Type::Float>, |
| 1212 | &SPIRVDecompiler::Binary<&Module::OpFMax, Type::Float>, | 1286 | &SPIRVDecompiler::Binary<&Module::OpFMax, Type::Float>, |
| 1213 | &SPIRVDecompiler::Unary<&Module::OpCos, Type::Float>, | 1287 | &SPIRVDecompiler::Unary<&Module::OpCos, Type::Float>, |
| @@ -1268,6 +1342,7 @@ private: | |||
| 1268 | &SPIRVDecompiler::Unary<&Module::OpFAbs, Type::HalfFloat>, | 1342 | &SPIRVDecompiler::Unary<&Module::OpFAbs, Type::HalfFloat>, |
| 1269 | &SPIRVDecompiler::HNegate, | 1343 | &SPIRVDecompiler::HNegate, |
| 1270 | &SPIRVDecompiler::HClamp, | 1344 | &SPIRVDecompiler::HClamp, |
| 1345 | &SPIRVDecompiler::HCastFloat, | ||
| 1271 | &SPIRVDecompiler::HUnpack, | 1346 | &SPIRVDecompiler::HUnpack, |
| 1272 | &SPIRVDecompiler::HMergeF32, | 1347 | &SPIRVDecompiler::HMergeF32, |
| 1273 | &SPIRVDecompiler::HMergeH0, | 1348 | &SPIRVDecompiler::HMergeH0, |
| @@ -1280,8 +1355,7 @@ private: | |||
| 1280 | &SPIRVDecompiler::Binary<&Module::OpLogicalNotEqual, Type::Bool>, | 1355 | &SPIRVDecompiler::Binary<&Module::OpLogicalNotEqual, Type::Bool>, |
| 1281 | &SPIRVDecompiler::Unary<&Module::OpLogicalNot, Type::Bool>, | 1356 | &SPIRVDecompiler::Unary<&Module::OpLogicalNot, Type::Bool>, |
| 1282 | &SPIRVDecompiler::LogicalPick2, | 1357 | &SPIRVDecompiler::LogicalPick2, |
| 1283 | &SPIRVDecompiler::LogicalAll2, | 1358 | &SPIRVDecompiler::LogicalAnd2, |
| 1284 | &SPIRVDecompiler::LogicalAny2, | ||
| 1285 | 1359 | ||
| 1286 | &SPIRVDecompiler::Binary<&Module::OpFOrdLessThan, Type::Bool, Type::Float>, | 1360 | &SPIRVDecompiler::Binary<&Module::OpFOrdLessThan, Type::Bool, Type::Float>, |
| 1287 | &SPIRVDecompiler::Binary<&Module::OpFOrdEqual, Type::Bool, Type::Float>, | 1361 | &SPIRVDecompiler::Binary<&Module::OpFOrdEqual, Type::Bool, Type::Float>, |
| @@ -1326,7 +1400,17 @@ private: | |||
| 1326 | &SPIRVDecompiler::TextureQueryLod, | 1400 | &SPIRVDecompiler::TextureQueryLod, |
| 1327 | &SPIRVDecompiler::TexelFetch, | 1401 | &SPIRVDecompiler::TexelFetch, |
| 1328 | 1402 | ||
| 1403 | &SPIRVDecompiler::ImageStore, | ||
| 1404 | &SPIRVDecompiler::AtomicImageAdd, | ||
| 1405 | &SPIRVDecompiler::AtomicImageMin, | ||
| 1406 | &SPIRVDecompiler::AtomicImageMax, | ||
| 1407 | &SPIRVDecompiler::AtomicImageAnd, | ||
| 1408 | &SPIRVDecompiler::AtomicImageOr, | ||
| 1409 | &SPIRVDecompiler::AtomicImageXor, | ||
| 1410 | &SPIRVDecompiler::AtomicImageExchange, | ||
| 1411 | |||
| 1329 | &SPIRVDecompiler::Branch, | 1412 | &SPIRVDecompiler::Branch, |
| 1413 | &SPIRVDecompiler::BranchIndirect, | ||
| 1330 | &SPIRVDecompiler::PushFlowStack, | 1414 | &SPIRVDecompiler::PushFlowStack, |
| 1331 | &SPIRVDecompiler::PopFlowStack, | 1415 | &SPIRVDecompiler::PopFlowStack, |
| 1332 | &SPIRVDecompiler::Exit, | 1416 | &SPIRVDecompiler::Exit, |
| @@ -1342,7 +1426,13 @@ private: | |||
| 1342 | &SPIRVDecompiler::WorkGroupId<0>, | 1426 | &SPIRVDecompiler::WorkGroupId<0>, |
| 1343 | &SPIRVDecompiler::WorkGroupId<1>, | 1427 | &SPIRVDecompiler::WorkGroupId<1>, |
| 1344 | &SPIRVDecompiler::WorkGroupId<2>, | 1428 | &SPIRVDecompiler::WorkGroupId<2>, |
| 1429 | |||
| 1430 | &SPIRVDecompiler::BallotThread, | ||
| 1431 | &SPIRVDecompiler::VoteAll, | ||
| 1432 | &SPIRVDecompiler::VoteAny, | ||
| 1433 | &SPIRVDecompiler::VoteEqual, | ||
| 1345 | }; | 1434 | }; |
| 1435 | static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount)); | ||
| 1346 | 1436 | ||
| 1347 | const VKDevice& device; | 1437 | const VKDevice& device; |
| 1348 | const ShaderIR& ir; | 1438 | const ShaderIR& ir; |
diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp index 58ffa42f2..62f1427f5 100644 --- a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp +++ b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp | |||
| @@ -46,12 +46,12 @@ std::tuple<u8*, u64, bool> VKStreamBuffer::Reserve(u64 size) { | |||
| 46 | return {mapped_pointer + offset, offset, invalidation_mark.has_value()}; | 46 | return {mapped_pointer + offset, offset, invalidation_mark.has_value()}; |
| 47 | } | 47 | } |
| 48 | 48 | ||
| 49 | VKExecutionContext VKStreamBuffer::Send(VKExecutionContext exctx, u64 size) { | 49 | void VKStreamBuffer::Send(u64 size) { |
| 50 | ASSERT_MSG(size <= mapped_size, "Reserved size is too small"); | 50 | ASSERT_MSG(size <= mapped_size, "Reserved size is too small"); |
| 51 | 51 | ||
| 52 | if (invalidation_mark) { | 52 | if (invalidation_mark) { |
| 53 | // TODO(Rodrigo): Find a better way to invalidate than waiting for all watches to finish. | 53 | // TODO(Rodrigo): Find a better way to invalidate than waiting for all watches to finish. |
| 54 | exctx = scheduler.Flush(); | 54 | scheduler.Flush(); |
| 55 | std::for_each(watches.begin(), watches.begin() + *invalidation_mark, | 55 | std::for_each(watches.begin(), watches.begin() + *invalidation_mark, |
| 56 | [&](auto& resource) { resource->Wait(); }); | 56 | [&](auto& resource) { resource->Wait(); }); |
| 57 | invalidation_mark = std::nullopt; | 57 | invalidation_mark = std::nullopt; |
| @@ -62,11 +62,9 @@ VKExecutionContext VKStreamBuffer::Send(VKExecutionContext exctx, u64 size) { | |||
| 62 | ReserveWatches(WATCHES_RESERVE_CHUNK); | 62 | ReserveWatches(WATCHES_RESERVE_CHUNK); |
| 63 | } | 63 | } |
| 64 | // Add a watch for this allocation. | 64 | // Add a watch for this allocation. |
| 65 | watches[used_watches++]->Watch(exctx.GetFence()); | 65 | watches[used_watches++]->Watch(scheduler.GetFence()); |
| 66 | 66 | ||
| 67 | offset += size; | 67 | offset += size; |
| 68 | |||
| 69 | return exctx; | ||
| 70 | } | 68 | } |
| 71 | 69 | ||
| 72 | void VKStreamBuffer::CreateBuffers(VKMemoryManager& memory_manager, vk::BufferUsageFlags usage) { | 70 | void VKStreamBuffer::CreateBuffers(VKMemoryManager& memory_manager, vk::BufferUsageFlags usage) { |
diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.h b/src/video_core/renderer_vulkan/vk_stream_buffer.h index 69d036ccd..842e54162 100644 --- a/src/video_core/renderer_vulkan/vk_stream_buffer.h +++ b/src/video_core/renderer_vulkan/vk_stream_buffer.h | |||
| @@ -37,7 +37,7 @@ public: | |||
| 37 | std::tuple<u8*, u64, bool> Reserve(u64 size); | 37 | std::tuple<u8*, u64, bool> Reserve(u64 size); |
| 38 | 38 | ||
| 39 | /// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy. | 39 | /// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy. |
| 40 | [[nodiscard]] VKExecutionContext Send(VKExecutionContext exctx, u64 size); | 40 | void Send(u64 size); |
| 41 | 41 | ||
| 42 | vk::Buffer GetBuffer() const { | 42 | vk::Buffer GetBuffer() const { |
| 43 | return *buffer; | 43 | return *buffer; |
diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp new file mode 100644 index 000000000..ec3a76690 --- /dev/null +++ b/src/video_core/shader/control_flow.cpp | |||
| @@ -0,0 +1,481 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <list> | ||
| 6 | #include <map> | ||
| 7 | #include <stack> | ||
| 8 | #include <unordered_map> | ||
| 9 | #include <unordered_set> | ||
| 10 | #include <vector> | ||
| 11 | |||
| 12 | #include "common/assert.h" | ||
| 13 | #include "common/common_types.h" | ||
| 14 | #include "video_core/shader/control_flow.h" | ||
| 15 | #include "video_core/shader/shader_ir.h" | ||
| 16 | |||
| 17 | namespace VideoCommon::Shader { | ||
| 18 | namespace { | ||
| 19 | using Tegra::Shader::Instruction; | ||
| 20 | using Tegra::Shader::OpCode; | ||
| 21 | |||
| 22 | constexpr s32 unassigned_branch = -2; | ||
| 23 | |||
| 24 | struct Query { | ||
| 25 | u32 address{}; | ||
| 26 | std::stack<u32> ssy_stack{}; | ||
| 27 | std::stack<u32> pbk_stack{}; | ||
| 28 | }; | ||
| 29 | |||
| 30 | struct BlockStack { | ||
| 31 | BlockStack() = default; | ||
| 32 | explicit BlockStack(const Query& q) : ssy_stack{q.ssy_stack}, pbk_stack{q.pbk_stack} {} | ||
| 33 | std::stack<u32> ssy_stack{}; | ||
| 34 | std::stack<u32> pbk_stack{}; | ||
| 35 | }; | ||
| 36 | |||
| 37 | struct BlockBranchInfo { | ||
| 38 | Condition condition{}; | ||
| 39 | s32 address{exit_branch}; | ||
| 40 | bool kill{}; | ||
| 41 | bool is_sync{}; | ||
| 42 | bool is_brk{}; | ||
| 43 | bool ignore{}; | ||
| 44 | }; | ||
| 45 | |||
| 46 | struct BlockInfo { | ||
| 47 | u32 start{}; | ||
| 48 | u32 end{}; | ||
| 49 | bool visited{}; | ||
| 50 | BlockBranchInfo branch{}; | ||
| 51 | |||
| 52 | bool IsInside(const u32 address) const { | ||
| 53 | return start <= address && address <= end; | ||
| 54 | } | ||
| 55 | }; | ||
| 56 | |||
| 57 | struct CFGRebuildState { | ||
| 58 | explicit CFGRebuildState(const ProgramCode& program_code, const std::size_t program_size, | ||
| 59 | const u32 start) | ||
| 60 | : start{start}, program_code{program_code}, program_size{program_size} {} | ||
| 61 | |||
| 62 | u32 start{}; | ||
| 63 | std::vector<BlockInfo> block_info{}; | ||
| 64 | std::list<u32> inspect_queries{}; | ||
| 65 | std::list<Query> queries{}; | ||
| 66 | std::unordered_map<u32, u32> registered{}; | ||
| 67 | std::unordered_set<u32> labels{}; | ||
| 68 | std::map<u32, u32> ssy_labels{}; | ||
| 69 | std::map<u32, u32> pbk_labels{}; | ||
| 70 | std::unordered_map<u32, BlockStack> stacks{}; | ||
| 71 | const ProgramCode& program_code; | ||
| 72 | const std::size_t program_size; | ||
| 73 | }; | ||
| 74 | |||
| 75 | enum class BlockCollision : u32 { None, Found, Inside }; | ||
| 76 | |||
| 77 | std::pair<BlockCollision, u32> TryGetBlock(CFGRebuildState& state, u32 address) { | ||
| 78 | const auto& blocks = state.block_info; | ||
| 79 | for (u32 index = 0; index < blocks.size(); index++) { | ||
| 80 | if (blocks[index].start == address) { | ||
| 81 | return {BlockCollision::Found, index}; | ||
| 82 | } | ||
| 83 | if (blocks[index].IsInside(address)) { | ||
| 84 | return {BlockCollision::Inside, index}; | ||
| 85 | } | ||
| 86 | } | ||
| 87 | return {BlockCollision::None, 0xFFFFFFFF}; | ||
| 88 | } | ||
| 89 | |||
| 90 | struct ParseInfo { | ||
| 91 | BlockBranchInfo branch_info{}; | ||
| 92 | u32 end_address{}; | ||
| 93 | }; | ||
| 94 | |||
| 95 | BlockInfo& CreateBlockInfo(CFGRebuildState& state, u32 start, u32 end) { | ||
| 96 | auto& it = state.block_info.emplace_back(); | ||
| 97 | it.start = start; | ||
| 98 | it.end = end; | ||
| 99 | const u32 index = static_cast<u32>(state.block_info.size() - 1); | ||
| 100 | state.registered.insert({start, index}); | ||
| 101 | return it; | ||
| 102 | } | ||
| 103 | |||
| 104 | Pred GetPredicate(u32 index, bool negated) { | ||
| 105 | return static_cast<Pred>(index + (negated ? 8 : 0)); | ||
| 106 | } | ||
| 107 | |||
| 108 | /** | ||
| 109 | * Returns whether the instruction at the specified offset is a 'sched' instruction. | ||
| 110 | * Sched instructions always appear before a sequence of 3 instructions. | ||
| 111 | */ | ||
| 112 | constexpr bool IsSchedInstruction(u32 offset, u32 main_offset) { | ||
| 113 | constexpr u32 SchedPeriod = 4; | ||
| 114 | u32 absolute_offset = offset - main_offset; | ||
| 115 | |||
| 116 | return (absolute_offset % SchedPeriod) == 0; | ||
| 117 | } | ||
| 118 | |||
| 119 | enum class ParseResult : u32 { | ||
| 120 | ControlCaught, | ||
| 121 | BlockEnd, | ||
| 122 | AbnormalFlow, | ||
| 123 | }; | ||
| 124 | |||
| 125 | std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address) { | ||
| 126 | u32 offset = static_cast<u32>(address); | ||
| 127 | const u32 end_address = static_cast<u32>(state.program_size / sizeof(Instruction)); | ||
| 128 | ParseInfo parse_info{}; | ||
| 129 | |||
| 130 | const auto insert_label = [](CFGRebuildState& state, u32 address) { | ||
| 131 | const auto pair = state.labels.emplace(address); | ||
| 132 | if (pair.second) { | ||
| 133 | state.inspect_queries.push_back(address); | ||
| 134 | } | ||
| 135 | }; | ||
| 136 | |||
| 137 | while (true) { | ||
| 138 | if (offset >= end_address) { | ||
| 139 | // ASSERT_OR_EXECUTE can't be used, as it ignores the break | ||
| 140 | ASSERT_MSG(false, "Shader passed the current limit!"); | ||
| 141 | parse_info.branch_info.address = exit_branch; | ||
| 142 | parse_info.branch_info.ignore = false; | ||
| 143 | break; | ||
| 144 | } | ||
| 145 | if (state.registered.count(offset) != 0) { | ||
| 146 | parse_info.branch_info.address = offset; | ||
| 147 | parse_info.branch_info.ignore = true; | ||
| 148 | break; | ||
| 149 | } | ||
| 150 | if (IsSchedInstruction(offset, state.start)) { | ||
| 151 | offset++; | ||
| 152 | continue; | ||
| 153 | } | ||
| 154 | const Instruction instr = {state.program_code[offset]}; | ||
| 155 | const auto opcode = OpCode::Decode(instr); | ||
| 156 | if (!opcode || opcode->get().GetType() != OpCode::Type::Flow) { | ||
| 157 | offset++; | ||
| 158 | continue; | ||
| 159 | } | ||
| 160 | |||
| 161 | switch (opcode->get().GetId()) { | ||
| 162 | case OpCode::Id::EXIT: { | ||
| 163 | const auto pred_index = static_cast<u32>(instr.pred.pred_index); | ||
| 164 | parse_info.branch_info.condition.predicate = | ||
| 165 | GetPredicate(pred_index, instr.negate_pred != 0); | ||
| 166 | if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) { | ||
| 167 | offset++; | ||
| 168 | continue; | ||
| 169 | } | ||
| 170 | const ConditionCode cc = instr.flow_condition_code; | ||
| 171 | parse_info.branch_info.condition.cc = cc; | ||
| 172 | if (cc == ConditionCode::F) { | ||
| 173 | offset++; | ||
| 174 | continue; | ||
| 175 | } | ||
| 176 | parse_info.branch_info.address = exit_branch; | ||
| 177 | parse_info.branch_info.kill = false; | ||
| 178 | parse_info.branch_info.is_sync = false; | ||
| 179 | parse_info.branch_info.is_brk = false; | ||
| 180 | parse_info.branch_info.ignore = false; | ||
| 181 | parse_info.end_address = offset; | ||
| 182 | |||
| 183 | return {ParseResult::ControlCaught, parse_info}; | ||
| 184 | } | ||
| 185 | case OpCode::Id::BRA: { | ||
| 186 | if (instr.bra.constant_buffer != 0) { | ||
| 187 | return {ParseResult::AbnormalFlow, parse_info}; | ||
| 188 | } | ||
| 189 | const auto pred_index = static_cast<u32>(instr.pred.pred_index); | ||
| 190 | parse_info.branch_info.condition.predicate = | ||
| 191 | GetPredicate(pred_index, instr.negate_pred != 0); | ||
| 192 | if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) { | ||
| 193 | offset++; | ||
| 194 | continue; | ||
| 195 | } | ||
| 196 | const ConditionCode cc = instr.flow_condition_code; | ||
| 197 | parse_info.branch_info.condition.cc = cc; | ||
| 198 | if (cc == ConditionCode::F) { | ||
| 199 | offset++; | ||
| 200 | continue; | ||
| 201 | } | ||
| 202 | const u32 branch_offset = offset + instr.bra.GetBranchTarget(); | ||
| 203 | if (branch_offset == 0) { | ||
| 204 | parse_info.branch_info.address = exit_branch; | ||
| 205 | } else { | ||
| 206 | parse_info.branch_info.address = branch_offset; | ||
| 207 | } | ||
| 208 | insert_label(state, branch_offset); | ||
| 209 | parse_info.branch_info.kill = false; | ||
| 210 | parse_info.branch_info.is_sync = false; | ||
| 211 | parse_info.branch_info.is_brk = false; | ||
| 212 | parse_info.branch_info.ignore = false; | ||
| 213 | parse_info.end_address = offset; | ||
| 214 | |||
| 215 | return {ParseResult::ControlCaught, parse_info}; | ||
| 216 | } | ||
| 217 | case OpCode::Id::SYNC: { | ||
| 218 | const auto pred_index = static_cast<u32>(instr.pred.pred_index); | ||
| 219 | parse_info.branch_info.condition.predicate = | ||
| 220 | GetPredicate(pred_index, instr.negate_pred != 0); | ||
| 221 | if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) { | ||
| 222 | offset++; | ||
| 223 | continue; | ||
| 224 | } | ||
| 225 | const ConditionCode cc = instr.flow_condition_code; | ||
| 226 | parse_info.branch_info.condition.cc = cc; | ||
| 227 | if (cc == ConditionCode::F) { | ||
| 228 | offset++; | ||
| 229 | continue; | ||
| 230 | } | ||
| 231 | parse_info.branch_info.address = unassigned_branch; | ||
| 232 | parse_info.branch_info.kill = false; | ||
| 233 | parse_info.branch_info.is_sync = true; | ||
| 234 | parse_info.branch_info.is_brk = false; | ||
| 235 | parse_info.branch_info.ignore = false; | ||
| 236 | parse_info.end_address = offset; | ||
| 237 | |||
| 238 | return {ParseResult::ControlCaught, parse_info}; | ||
| 239 | } | ||
| 240 | case OpCode::Id::BRK: { | ||
| 241 | const auto pred_index = static_cast<u32>(instr.pred.pred_index); | ||
| 242 | parse_info.branch_info.condition.predicate = | ||
| 243 | GetPredicate(pred_index, instr.negate_pred != 0); | ||
| 244 | if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) { | ||
| 245 | offset++; | ||
| 246 | continue; | ||
| 247 | } | ||
| 248 | const ConditionCode cc = instr.flow_condition_code; | ||
| 249 | parse_info.branch_info.condition.cc = cc; | ||
| 250 | if (cc == ConditionCode::F) { | ||
| 251 | offset++; | ||
| 252 | continue; | ||
| 253 | } | ||
| 254 | parse_info.branch_info.address = unassigned_branch; | ||
| 255 | parse_info.branch_info.kill = false; | ||
| 256 | parse_info.branch_info.is_sync = false; | ||
| 257 | parse_info.branch_info.is_brk = true; | ||
| 258 | parse_info.branch_info.ignore = false; | ||
| 259 | parse_info.end_address = offset; | ||
| 260 | |||
| 261 | return {ParseResult::ControlCaught, parse_info}; | ||
| 262 | } | ||
| 263 | case OpCode::Id::KIL: { | ||
| 264 | const auto pred_index = static_cast<u32>(instr.pred.pred_index); | ||
| 265 | parse_info.branch_info.condition.predicate = | ||
| 266 | GetPredicate(pred_index, instr.negate_pred != 0); | ||
| 267 | if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) { | ||
| 268 | offset++; | ||
| 269 | continue; | ||
| 270 | } | ||
| 271 | const ConditionCode cc = instr.flow_condition_code; | ||
| 272 | parse_info.branch_info.condition.cc = cc; | ||
| 273 | if (cc == ConditionCode::F) { | ||
| 274 | offset++; | ||
| 275 | continue; | ||
| 276 | } | ||
| 277 | parse_info.branch_info.address = exit_branch; | ||
| 278 | parse_info.branch_info.kill = true; | ||
| 279 | parse_info.branch_info.is_sync = false; | ||
| 280 | parse_info.branch_info.is_brk = false; | ||
| 281 | parse_info.branch_info.ignore = false; | ||
| 282 | parse_info.end_address = offset; | ||
| 283 | |||
| 284 | return {ParseResult::ControlCaught, parse_info}; | ||
| 285 | } | ||
| 286 | case OpCode::Id::SSY: { | ||
| 287 | const u32 target = offset + instr.bra.GetBranchTarget(); | ||
| 288 | insert_label(state, target); | ||
| 289 | state.ssy_labels.emplace(offset, target); | ||
| 290 | break; | ||
| 291 | } | ||
| 292 | case OpCode::Id::PBK: { | ||
| 293 | const u32 target = offset + instr.bra.GetBranchTarget(); | ||
| 294 | insert_label(state, target); | ||
| 295 | state.pbk_labels.emplace(offset, target); | ||
| 296 | break; | ||
| 297 | } | ||
| 298 | case OpCode::Id::BRX: { | ||
| 299 | return {ParseResult::AbnormalFlow, parse_info}; | ||
| 300 | } | ||
| 301 | default: | ||
| 302 | break; | ||
| 303 | } | ||
| 304 | |||
| 305 | offset++; | ||
| 306 | } | ||
| 307 | parse_info.branch_info.kill = false; | ||
| 308 | parse_info.branch_info.is_sync = false; | ||
| 309 | parse_info.branch_info.is_brk = false; | ||
| 310 | parse_info.end_address = offset - 1; | ||
| 311 | return {ParseResult::BlockEnd, parse_info}; | ||
| 312 | } | ||
| 313 | |||
| 314 | bool TryInspectAddress(CFGRebuildState& state) { | ||
| 315 | if (state.inspect_queries.empty()) { | ||
| 316 | return false; | ||
| 317 | } | ||
| 318 | |||
| 319 | const u32 address = state.inspect_queries.front(); | ||
| 320 | state.inspect_queries.pop_front(); | ||
| 321 | const auto [result, block_index] = TryGetBlock(state, address); | ||
| 322 | switch (result) { | ||
| 323 | case BlockCollision::Found: { | ||
| 324 | return true; | ||
| 325 | } | ||
| 326 | case BlockCollision::Inside: { | ||
| 327 | // This case is the tricky one: | ||
| 328 | // We need to Split the block in 2 sepparate blocks | ||
| 329 | const u32 end = state.block_info[block_index].end; | ||
| 330 | BlockInfo& new_block = CreateBlockInfo(state, address, end); | ||
| 331 | BlockInfo& current_block = state.block_info[block_index]; | ||
| 332 | current_block.end = address - 1; | ||
| 333 | new_block.branch = current_block.branch; | ||
| 334 | BlockBranchInfo forward_branch{}; | ||
| 335 | forward_branch.address = address; | ||
| 336 | forward_branch.ignore = true; | ||
| 337 | current_block.branch = forward_branch; | ||
| 338 | return true; | ||
| 339 | } | ||
| 340 | default: | ||
| 341 | break; | ||
| 342 | } | ||
| 343 | const auto [parse_result, parse_info] = ParseCode(state, address); | ||
| 344 | if (parse_result == ParseResult::AbnormalFlow) { | ||
| 345 | // if it's AbnormalFlow, we end it as false, ending the CFG reconstruction | ||
| 346 | return false; | ||
| 347 | } | ||
| 348 | |||
| 349 | BlockInfo& block_info = CreateBlockInfo(state, address, parse_info.end_address); | ||
| 350 | block_info.branch = parse_info.branch_info; | ||
| 351 | if (parse_info.branch_info.condition.IsUnconditional()) { | ||
| 352 | return true; | ||
| 353 | } | ||
| 354 | |||
| 355 | const u32 fallthrough_address = parse_info.end_address + 1; | ||
| 356 | state.inspect_queries.push_front(fallthrough_address); | ||
| 357 | return true; | ||
| 358 | } | ||
| 359 | |||
| 360 | bool TryQuery(CFGRebuildState& state) { | ||
| 361 | const auto gather_labels = [](std::stack<u32>& cc, std::map<u32, u32>& labels, | ||
| 362 | BlockInfo& block) { | ||
| 363 | auto gather_start = labels.lower_bound(block.start); | ||
| 364 | const auto gather_end = labels.upper_bound(block.end); | ||
| 365 | while (gather_start != gather_end) { | ||
| 366 | cc.push(gather_start->second); | ||
| 367 | ++gather_start; | ||
| 368 | } | ||
| 369 | }; | ||
| 370 | if (state.queries.empty()) { | ||
| 371 | return false; | ||
| 372 | } | ||
| 373 | |||
| 374 | Query& q = state.queries.front(); | ||
| 375 | const u32 block_index = state.registered[q.address]; | ||
| 376 | BlockInfo& block = state.block_info[block_index]; | ||
| 377 | // If the block is visited, check if the stacks match, else gather the ssy/pbk | ||
| 378 | // labels into the current stack and look if the branch at the end of the block | ||
| 379 | // consumes a label. Schedule new queries accordingly | ||
| 380 | if (block.visited) { | ||
| 381 | BlockStack& stack = state.stacks[q.address]; | ||
| 382 | const bool all_okay = (stack.ssy_stack.empty() || q.ssy_stack == stack.ssy_stack) && | ||
| 383 | (stack.pbk_stack.empty() || q.pbk_stack == stack.pbk_stack); | ||
| 384 | state.queries.pop_front(); | ||
| 385 | return all_okay; | ||
| 386 | } | ||
| 387 | block.visited = true; | ||
| 388 | state.stacks.insert_or_assign(q.address, BlockStack{q}); | ||
| 389 | |||
| 390 | Query q2(q); | ||
| 391 | state.queries.pop_front(); | ||
| 392 | gather_labels(q2.ssy_stack, state.ssy_labels, block); | ||
| 393 | gather_labels(q2.pbk_stack, state.pbk_labels, block); | ||
| 394 | if (!block.branch.condition.IsUnconditional()) { | ||
| 395 | q2.address = block.end + 1; | ||
| 396 | state.queries.push_back(q2); | ||
| 397 | } | ||
| 398 | |||
| 399 | Query conditional_query{q2}; | ||
| 400 | if (block.branch.is_sync) { | ||
| 401 | if (block.branch.address == unassigned_branch) { | ||
| 402 | block.branch.address = conditional_query.ssy_stack.top(); | ||
| 403 | } | ||
| 404 | conditional_query.ssy_stack.pop(); | ||
| 405 | } | ||
| 406 | if (block.branch.is_brk) { | ||
| 407 | if (block.branch.address == unassigned_branch) { | ||
| 408 | block.branch.address = conditional_query.pbk_stack.top(); | ||
| 409 | } | ||
| 410 | conditional_query.pbk_stack.pop(); | ||
| 411 | } | ||
| 412 | conditional_query.address = block.branch.address; | ||
| 413 | state.queries.push_back(std::move(conditional_query)); | ||
| 414 | return true; | ||
| 415 | } | ||
| 416 | } // Anonymous namespace | ||
| 417 | |||
| 418 | std::optional<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, | ||
| 419 | std::size_t program_size, u32 start_address) { | ||
| 420 | CFGRebuildState state{program_code, program_size, start_address}; | ||
| 421 | |||
| 422 | // Inspect Code and generate blocks | ||
| 423 | state.labels.clear(); | ||
| 424 | state.labels.emplace(start_address); | ||
| 425 | state.inspect_queries.push_back(state.start); | ||
| 426 | while (!state.inspect_queries.empty()) { | ||
| 427 | if (!TryInspectAddress(state)) { | ||
| 428 | return {}; | ||
| 429 | } | ||
| 430 | } | ||
| 431 | |||
| 432 | // Decompile Stacks | ||
| 433 | state.queries.push_back(Query{state.start, {}, {}}); | ||
| 434 | bool decompiled = true; | ||
| 435 | while (!state.queries.empty()) { | ||
| 436 | if (!TryQuery(state)) { | ||
| 437 | decompiled = false; | ||
| 438 | break; | ||
| 439 | } | ||
| 440 | } | ||
| 441 | |||
| 442 | // Sort and organize results | ||
| 443 | std::sort(state.block_info.begin(), state.block_info.end(), | ||
| 444 | [](const BlockInfo& a, const BlockInfo& b) { return a.start < b.start; }); | ||
| 445 | ShaderCharacteristics result_out{}; | ||
| 446 | result_out.decompilable = decompiled; | ||
| 447 | result_out.start = start_address; | ||
| 448 | result_out.end = start_address; | ||
| 449 | for (const auto& block : state.block_info) { | ||
| 450 | ShaderBlock new_block{}; | ||
| 451 | new_block.start = block.start; | ||
| 452 | new_block.end = block.end; | ||
| 453 | new_block.ignore_branch = block.branch.ignore; | ||
| 454 | if (!new_block.ignore_branch) { | ||
| 455 | new_block.branch.cond = block.branch.condition; | ||
| 456 | new_block.branch.kills = block.branch.kill; | ||
| 457 | new_block.branch.address = block.branch.address; | ||
| 458 | } | ||
| 459 | result_out.end = std::max(result_out.end, block.end); | ||
| 460 | result_out.blocks.push_back(new_block); | ||
| 461 | } | ||
| 462 | if (result_out.decompilable) { | ||
| 463 | result_out.labels = std::move(state.labels); | ||
| 464 | return {std::move(result_out)}; | ||
| 465 | } | ||
| 466 | |||
| 467 | // If it's not decompilable, merge the unlabelled blocks together | ||
| 468 | auto back = result_out.blocks.begin(); | ||
| 469 | auto next = std::next(back); | ||
| 470 | while (next != result_out.blocks.end()) { | ||
| 471 | if (state.labels.count(next->start) == 0 && next->start == back->end + 1) { | ||
| 472 | back->end = next->end; | ||
| 473 | next = result_out.blocks.erase(next); | ||
| 474 | continue; | ||
| 475 | } | ||
| 476 | back = next; | ||
| 477 | ++next; | ||
| 478 | } | ||
| 479 | return {std::move(result_out)}; | ||
| 480 | } | ||
| 481 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/control_flow.h b/src/video_core/shader/control_flow.h new file mode 100644 index 000000000..b0a5e4f8c --- /dev/null +++ b/src/video_core/shader/control_flow.h | |||
| @@ -0,0 +1,79 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <list> | ||
| 8 | #include <optional> | ||
| 9 | #include <unordered_set> | ||
| 10 | |||
| 11 | #include "video_core/engines/shader_bytecode.h" | ||
| 12 | #include "video_core/shader/shader_ir.h" | ||
| 13 | |||
| 14 | namespace VideoCommon::Shader { | ||
| 15 | |||
| 16 | using Tegra::Shader::ConditionCode; | ||
| 17 | using Tegra::Shader::Pred; | ||
| 18 | |||
| 19 | constexpr s32 exit_branch = -1; | ||
| 20 | |||
| 21 | struct Condition { | ||
| 22 | Pred predicate{Pred::UnusedIndex}; | ||
| 23 | ConditionCode cc{ConditionCode::T}; | ||
| 24 | |||
| 25 | bool IsUnconditional() const { | ||
| 26 | return predicate == Pred::UnusedIndex && cc == ConditionCode::T; | ||
| 27 | } | ||
| 28 | |||
| 29 | bool operator==(const Condition& other) const { | ||
| 30 | return std::tie(predicate, cc) == std::tie(other.predicate, other.cc); | ||
| 31 | } | ||
| 32 | |||
| 33 | bool operator!=(const Condition& other) const { | ||
| 34 | return !operator==(other); | ||
| 35 | } | ||
| 36 | }; | ||
| 37 | |||
| 38 | struct ShaderBlock { | ||
| 39 | struct Branch { | ||
| 40 | Condition cond{}; | ||
| 41 | bool kills{}; | ||
| 42 | s32 address{}; | ||
| 43 | |||
| 44 | bool operator==(const Branch& b) const { | ||
| 45 | return std::tie(cond, kills, address) == std::tie(b.cond, b.kills, b.address); | ||
| 46 | } | ||
| 47 | |||
| 48 | bool operator!=(const Branch& b) const { | ||
| 49 | return !operator==(b); | ||
| 50 | } | ||
| 51 | }; | ||
| 52 | |||
| 53 | u32 start{}; | ||
| 54 | u32 end{}; | ||
| 55 | bool ignore_branch{}; | ||
| 56 | Branch branch{}; | ||
| 57 | |||
| 58 | bool operator==(const ShaderBlock& sb) const { | ||
| 59 | return std::tie(start, end, ignore_branch, branch) == | ||
| 60 | std::tie(sb.start, sb.end, sb.ignore_branch, sb.branch); | ||
| 61 | } | ||
| 62 | |||
| 63 | bool operator!=(const ShaderBlock& sb) const { | ||
| 64 | return !operator==(sb); | ||
| 65 | } | ||
| 66 | }; | ||
| 67 | |||
| 68 | struct ShaderCharacteristics { | ||
| 69 | std::list<ShaderBlock> blocks{}; | ||
| 70 | bool decompilable{}; | ||
| 71 | u32 start{}; | ||
| 72 | u32 end{}; | ||
| 73 | std::unordered_set<u32> labels{}; | ||
| 74 | }; | ||
| 75 | |||
| 76 | std::optional<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, | ||
| 77 | std::size_t program_size, u32 start_address); | ||
| 78 | |||
| 79 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp index a0554c97e..47a9fd961 100644 --- a/src/video_core/shader/decode.cpp +++ b/src/video_core/shader/decode.cpp | |||
| @@ -11,6 +11,7 @@ | |||
| 11 | #include "common/common_types.h" | 11 | #include "common/common_types.h" |
| 12 | #include "video_core/engines/shader_bytecode.h" | 12 | #include "video_core/engines/shader_bytecode.h" |
| 13 | #include "video_core/engines/shader_header.h" | 13 | #include "video_core/engines/shader_header.h" |
| 14 | #include "video_core/shader/control_flow.h" | ||
| 14 | #include "video_core/shader/node_helper.h" | 15 | #include "video_core/shader/node_helper.h" |
| 15 | #include "video_core/shader/shader_ir.h" | 16 | #include "video_core/shader/shader_ir.h" |
| 16 | 17 | ||
| @@ -21,20 +22,6 @@ using Tegra::Shader::OpCode; | |||
| 21 | 22 | ||
| 22 | namespace { | 23 | namespace { |
| 23 | 24 | ||
| 24 | /// Merges exit method of two parallel branches. | ||
| 25 | constexpr ExitMethod ParallelExit(ExitMethod a, ExitMethod b) { | ||
| 26 | if (a == ExitMethod::Undetermined) { | ||
| 27 | return b; | ||
| 28 | } | ||
| 29 | if (b == ExitMethod::Undetermined) { | ||
| 30 | return a; | ||
| 31 | } | ||
| 32 | if (a == b) { | ||
| 33 | return a; | ||
| 34 | } | ||
| 35 | return ExitMethod::Conditional; | ||
| 36 | } | ||
| 37 | |||
| 38 | /** | 25 | /** |
| 39 | * Returns whether the instruction at the specified offset is a 'sched' instruction. | 26 | * Returns whether the instruction at the specified offset is a 'sched' instruction. |
| 40 | * Sched instructions always appear before a sequence of 3 instructions. | 27 | * Sched instructions always appear before a sequence of 3 instructions. |
| @@ -51,85 +38,104 @@ constexpr bool IsSchedInstruction(u32 offset, u32 main_offset) { | |||
| 51 | void ShaderIR::Decode() { | 38 | void ShaderIR::Decode() { |
| 52 | std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header)); | 39 | std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header)); |
| 53 | 40 | ||
| 54 | std::set<u32> labels; | 41 | disable_flow_stack = false; |
| 55 | const ExitMethod exit_method = Scan(main_offset, MAX_PROGRAM_LENGTH, labels); | 42 | const auto info = ScanFlow(program_code, program_size, main_offset); |
| 56 | if (exit_method != ExitMethod::AlwaysEnd) { | 43 | if (info) { |
| 57 | UNREACHABLE_MSG("Program does not always end"); | 44 | const auto& shader_info = *info; |
| 58 | } | 45 | coverage_begin = shader_info.start; |
| 59 | 46 | coverage_end = shader_info.end; | |
| 60 | if (labels.empty()) { | 47 | if (shader_info.decompilable) { |
| 61 | basic_blocks.insert({main_offset, DecodeRange(main_offset, MAX_PROGRAM_LENGTH)}); | 48 | disable_flow_stack = true; |
| 49 | const auto insert_block = [this](NodeBlock& nodes, u32 label) { | ||
| 50 | if (label == static_cast<u32>(exit_branch)) { | ||
| 51 | return; | ||
| 52 | } | ||
| 53 | basic_blocks.insert({label, nodes}); | ||
| 54 | }; | ||
| 55 | const auto& blocks = shader_info.blocks; | ||
| 56 | NodeBlock current_block; | ||
| 57 | u32 current_label = static_cast<u32>(exit_branch); | ||
| 58 | for (auto& block : blocks) { | ||
| 59 | if (shader_info.labels.count(block.start) != 0) { | ||
| 60 | insert_block(current_block, current_label); | ||
| 61 | current_block.clear(); | ||
| 62 | current_label = block.start; | ||
| 63 | } | ||
| 64 | if (!block.ignore_branch) { | ||
| 65 | DecodeRangeInner(current_block, block.start, block.end); | ||
| 66 | InsertControlFlow(current_block, block); | ||
| 67 | } else { | ||
| 68 | DecodeRangeInner(current_block, block.start, block.end + 1); | ||
| 69 | } | ||
| 70 | } | ||
| 71 | insert_block(current_block, current_label); | ||
| 72 | return; | ||
| 73 | } | ||
| 74 | LOG_WARNING(HW_GPU, "Flow Stack Removing Failed! Falling back to old method"); | ||
| 75 | // we can't decompile it, fallback to standard method | ||
| 76 | for (const auto& block : shader_info.blocks) { | ||
| 77 | basic_blocks.insert({block.start, DecodeRange(block.start, block.end + 1)}); | ||
| 78 | } | ||
| 62 | return; | 79 | return; |
| 63 | } | 80 | } |
| 81 | LOG_WARNING(HW_GPU, "Flow Analysis Failed! Falling back to brute force compiling"); | ||
| 82 | |||
| 83 | // Now we need to deal with an undecompilable shader. We need to brute force | ||
| 84 | // a shader that captures every position. | ||
| 85 | coverage_begin = main_offset; | ||
| 86 | const u32 shader_end = static_cast<u32>(program_size / sizeof(u64)); | ||
| 87 | coverage_end = shader_end; | ||
| 88 | for (u32 label = main_offset; label < shader_end; label++) { | ||
| 89 | basic_blocks.insert({label, DecodeRange(label, label + 1)}); | ||
| 90 | } | ||
| 91 | } | ||
| 64 | 92 | ||
| 65 | labels.insert(main_offset); | 93 | NodeBlock ShaderIR::DecodeRange(u32 begin, u32 end) { |
| 66 | 94 | NodeBlock basic_block; | |
| 67 | for (const u32 label : labels) { | 95 | DecodeRangeInner(basic_block, begin, end); |
| 68 | const auto next_it = labels.lower_bound(label + 1); | 96 | return basic_block; |
| 69 | const u32 next_label = next_it == labels.end() ? MAX_PROGRAM_LENGTH : *next_it; | 97 | } |
| 70 | 98 | ||
| 71 | basic_blocks.insert({label, DecodeRange(label, next_label)}); | 99 | void ShaderIR::DecodeRangeInner(NodeBlock& bb, u32 begin, u32 end) { |
| 100 | for (u32 pc = begin; pc < (begin > end ? MAX_PROGRAM_LENGTH : end);) { | ||
| 101 | pc = DecodeInstr(bb, pc); | ||
| 72 | } | 102 | } |
| 73 | } | 103 | } |
| 74 | 104 | ||
| 75 | ExitMethod ShaderIR::Scan(u32 begin, u32 end, std::set<u32>& labels) { | 105 | void ShaderIR::InsertControlFlow(NodeBlock& bb, const ShaderBlock& block) { |
| 76 | const auto [iter, inserted] = | 106 | const auto apply_conditions = [&](const Condition& cond, Node n) -> Node { |
| 77 | exit_method_map.emplace(std::make_pair(begin, end), ExitMethod::Undetermined); | 107 | Node result = n; |
| 78 | ExitMethod& exit_method = iter->second; | 108 | if (cond.cc != ConditionCode::T) { |
| 79 | if (!inserted) | 109 | result = Conditional(GetConditionCode(cond.cc), {result}); |
| 80 | return exit_method; | ||
| 81 | |||
| 82 | for (u32 offset = begin; offset != end && offset != MAX_PROGRAM_LENGTH; ++offset) { | ||
| 83 | coverage_begin = std::min(coverage_begin, offset); | ||
| 84 | coverage_end = std::max(coverage_end, offset + 1); | ||
| 85 | |||
| 86 | const Instruction instr = {program_code[offset]}; | ||
| 87 | const auto opcode = OpCode::Decode(instr); | ||
| 88 | if (!opcode) | ||
| 89 | continue; | ||
| 90 | switch (opcode->get().GetId()) { | ||
| 91 | case OpCode::Id::EXIT: { | ||
| 92 | // The EXIT instruction can be predicated, which means that the shader can conditionally | ||
| 93 | // end on this instruction. We have to consider the case where the condition is not met | ||
| 94 | // and check the exit method of that other basic block. | ||
| 95 | using Tegra::Shader::Pred; | ||
| 96 | if (instr.pred.pred_index == static_cast<u64>(Pred::UnusedIndex)) { | ||
| 97 | return exit_method = ExitMethod::AlwaysEnd; | ||
| 98 | } else { | ||
| 99 | const ExitMethod not_met = Scan(offset + 1, end, labels); | ||
| 100 | return exit_method = ParallelExit(ExitMethod::AlwaysEnd, not_met); | ||
| 101 | } | ||
| 102 | } | 110 | } |
| 103 | case OpCode::Id::BRA: { | 111 | if (cond.predicate != Pred::UnusedIndex) { |
| 104 | const u32 target = offset + instr.bra.GetBranchTarget(); | 112 | u32 pred = static_cast<u32>(cond.predicate); |
| 105 | labels.insert(target); | 113 | const bool is_neg = pred > 7; |
| 106 | const ExitMethod no_jmp = Scan(offset + 1, end, labels); | 114 | if (is_neg) { |
| 107 | const ExitMethod jmp = Scan(target, end, labels); | 115 | pred -= 8; |
| 108 | return exit_method = ParallelExit(no_jmp, jmp); | 116 | } |
| 109 | } | 117 | result = Conditional(GetPredicate(pred, is_neg), {result}); |
| 110 | case OpCode::Id::SSY: | ||
| 111 | case OpCode::Id::PBK: { | ||
| 112 | // The SSY and PBK use a similar encoding as the BRA instruction. | ||
| 113 | UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, | ||
| 114 | "Constant buffer branching is not supported"); | ||
| 115 | const u32 target = offset + instr.bra.GetBranchTarget(); | ||
| 116 | labels.insert(target); | ||
| 117 | // Continue scanning for an exit method. | ||
| 118 | break; | ||
| 119 | } | 118 | } |
| 120 | default: | 119 | return result; |
| 121 | break; | 120 | }; |
| 121 | if (block.branch.address < 0) { | ||
| 122 | if (block.branch.kills) { | ||
| 123 | Node n = Operation(OperationCode::Discard); | ||
| 124 | n = apply_conditions(block.branch.cond, n); | ||
| 125 | bb.push_back(n); | ||
| 126 | global_code.push_back(n); | ||
| 127 | return; | ||
| 122 | } | 128 | } |
| 129 | Node n = Operation(OperationCode::Exit); | ||
| 130 | n = apply_conditions(block.branch.cond, n); | ||
| 131 | bb.push_back(n); | ||
| 132 | global_code.push_back(n); | ||
| 133 | return; | ||
| 123 | } | 134 | } |
| 124 | return exit_method = ExitMethod::AlwaysReturn; | 135 | Node n = Operation(OperationCode::Branch, Immediate(block.branch.address)); |
| 125 | } | 136 | n = apply_conditions(block.branch.cond, n); |
| 126 | 137 | bb.push_back(n); | |
| 127 | NodeBlock ShaderIR::DecodeRange(u32 begin, u32 end) { | 138 | global_code.push_back(n); |
| 128 | NodeBlock basic_block; | ||
| 129 | for (u32 pc = begin; pc < (begin > end ? MAX_PROGRAM_LENGTH : end);) { | ||
| 130 | pc = DecodeInstr(basic_block, pc); | ||
| 131 | } | ||
| 132 | return basic_block; | ||
| 133 | } | 139 | } |
| 134 | 140 | ||
| 135 | u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) { | 141 | u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) { |
| @@ -140,15 +146,18 @@ u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) { | |||
| 140 | 146 | ||
| 141 | const Instruction instr = {program_code[pc]}; | 147 | const Instruction instr = {program_code[pc]}; |
| 142 | const auto opcode = OpCode::Decode(instr); | 148 | const auto opcode = OpCode::Decode(instr); |
| 149 | const u32 nv_address = ConvertAddressToNvidiaSpace(pc); | ||
| 143 | 150 | ||
| 144 | // Decoding failure | 151 | // Decoding failure |
| 145 | if (!opcode) { | 152 | if (!opcode) { |
| 146 | UNIMPLEMENTED_MSG("Unhandled instruction: {0:x}", instr.value); | 153 | UNIMPLEMENTED_MSG("Unhandled instruction: {0:x}", instr.value); |
| 154 | bb.push_back(Comment(fmt::format("{:05x} Unimplemented Shader instruction (0x{:016x})", | ||
| 155 | nv_address, instr.value))); | ||
| 147 | return pc + 1; | 156 | return pc + 1; |
| 148 | } | 157 | } |
| 149 | 158 | ||
| 150 | bb.push_back( | 159 | bb.push_back(Comment( |
| 151 | Comment(fmt::format("{}: {} (0x{:016x})", pc, opcode->get().GetName(), instr.value))); | 160 | fmt::format("{:05x} {} (0x{:016x})", nv_address, opcode->get().GetName(), instr.value))); |
| 152 | 161 | ||
| 153 | using Tegra::Shader::Pred; | 162 | using Tegra::Shader::Pred; |
| 154 | UNIMPLEMENTED_IF_MSG(instr.pred.full_pred == Pred::NeverExecute, | 163 | UNIMPLEMENTED_IF_MSG(instr.pred.full_pred == Pred::NeverExecute, |
| @@ -167,8 +176,10 @@ u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) { | |||
| 167 | {OpCode::Type::Ffma, &ShaderIR::DecodeFfma}, | 176 | {OpCode::Type::Ffma, &ShaderIR::DecodeFfma}, |
| 168 | {OpCode::Type::Hfma2, &ShaderIR::DecodeHfma2}, | 177 | {OpCode::Type::Hfma2, &ShaderIR::DecodeHfma2}, |
| 169 | {OpCode::Type::Conversion, &ShaderIR::DecodeConversion}, | 178 | {OpCode::Type::Conversion, &ShaderIR::DecodeConversion}, |
| 179 | {OpCode::Type::Warp, &ShaderIR::DecodeWarp}, | ||
| 170 | {OpCode::Type::Memory, &ShaderIR::DecodeMemory}, | 180 | {OpCode::Type::Memory, &ShaderIR::DecodeMemory}, |
| 171 | {OpCode::Type::Texture, &ShaderIR::DecodeTexture}, | 181 | {OpCode::Type::Texture, &ShaderIR::DecodeTexture}, |
| 182 | {OpCode::Type::Image, &ShaderIR::DecodeImage}, | ||
| 172 | {OpCode::Type::FloatSetPredicate, &ShaderIR::DecodeFloatSetPredicate}, | 183 | {OpCode::Type::FloatSetPredicate, &ShaderIR::DecodeFloatSetPredicate}, |
| 173 | {OpCode::Type::IntegerSetPredicate, &ShaderIR::DecodeIntegerSetPredicate}, | 184 | {OpCode::Type::IntegerSetPredicate, &ShaderIR::DecodeIntegerSetPredicate}, |
| 174 | {OpCode::Type::HalfSetPredicate, &ShaderIR::DecodeHalfSetPredicate}, | 185 | {OpCode::Type::HalfSetPredicate, &ShaderIR::DecodeHalfSetPredicate}, |
diff --git a/src/video_core/shader/decode/arithmetic.cpp b/src/video_core/shader/decode/arithmetic.cpp index 87d8fecaa..1473c282a 100644 --- a/src/video_core/shader/decode/arithmetic.cpp +++ b/src/video_core/shader/decode/arithmetic.cpp | |||
| @@ -42,11 +42,14 @@ u32 ShaderIR::DecodeArithmetic(NodeBlock& bb, u32 pc) { | |||
| 42 | case OpCode::Id::FMUL_R: | 42 | case OpCode::Id::FMUL_R: |
| 43 | case OpCode::Id::FMUL_IMM: { | 43 | case OpCode::Id::FMUL_IMM: { |
| 44 | // FMUL does not have 'abs' bits and only the second operand has a 'neg' bit. | 44 | // FMUL does not have 'abs' bits and only the second operand has a 'neg' bit. |
| 45 | UNIMPLEMENTED_IF_MSG(instr.fmul.tab5cb8_2 != 0, "FMUL tab5cb8_2({}) is not implemented", | 45 | if (instr.fmul.tab5cb8_2 != 0) { |
| 46 | instr.fmul.tab5cb8_2.Value()); | 46 | LOG_WARNING(HW_GPU, "FMUL tab5cb8_2({}) is not implemented", |
| 47 | UNIMPLEMENTED_IF_MSG( | 47 | instr.fmul.tab5cb8_2.Value()); |
| 48 | instr.fmul.tab5c68_0 != 1, "FMUL tab5cb8_0({}) is not implemented", | 48 | } |
| 49 | instr.fmul.tab5c68_0.Value()); // SMO typical sends 1 here which seems to be the default | 49 | if (instr.fmul.tab5c68_0 != 1) { |
| 50 | LOG_WARNING(HW_GPU, "FMUL tab5cb8_0({}) is not implemented", | ||
| 51 | instr.fmul.tab5c68_0.Value()); | ||
| 52 | } | ||
| 50 | 53 | ||
| 51 | op_b = GetOperandAbsNegFloat(op_b, false, instr.fmul.negate_b); | 54 | op_b = GetOperandAbsNegFloat(op_b, false, instr.fmul.negate_b); |
| 52 | 55 | ||
diff --git a/src/video_core/shader/decode/arithmetic_half_immediate.cpp b/src/video_core/shader/decode/arithmetic_half_immediate.cpp index 7bcf38f23..6466fc011 100644 --- a/src/video_core/shader/decode/arithmetic_half_immediate.cpp +++ b/src/video_core/shader/decode/arithmetic_half_immediate.cpp | |||
| @@ -23,7 +23,9 @@ u32 ShaderIR::DecodeArithmeticHalfImmediate(NodeBlock& bb, u32 pc) { | |||
| 23 | LOG_WARNING(HW_GPU, "{} FTZ not implemented", opcode->get().GetName()); | 23 | LOG_WARNING(HW_GPU, "{} FTZ not implemented", opcode->get().GetName()); |
| 24 | } | 24 | } |
| 25 | } else { | 25 | } else { |
| 26 | UNIMPLEMENTED_IF(instr.alu_half_imm.precision != Tegra::Shader::HalfPrecision::None); | 26 | if (instr.alu_half_imm.precision != Tegra::Shader::HalfPrecision::None) { |
| 27 | LOG_WARNING(HW_GPU, "{} FTZ not implemented", opcode->get().GetName()); | ||
| 28 | } | ||
| 27 | } | 29 | } |
| 28 | 30 | ||
| 29 | Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.alu_half_imm.type_a); | 31 | Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.alu_half_imm.type_a); |
diff --git a/src/video_core/shader/decode/conversion.cpp b/src/video_core/shader/decode/conversion.cpp index 4221f0c58..32facd6ba 100644 --- a/src/video_core/shader/decode/conversion.cpp +++ b/src/video_core/shader/decode/conversion.cpp | |||
| @@ -14,6 +14,12 @@ using Tegra::Shader::Instruction; | |||
| 14 | using Tegra::Shader::OpCode; | 14 | using Tegra::Shader::OpCode; |
| 15 | using Tegra::Shader::Register; | 15 | using Tegra::Shader::Register; |
| 16 | 16 | ||
| 17 | namespace { | ||
| 18 | constexpr OperationCode GetFloatSelector(u64 selector) { | ||
| 19 | return selector == 0 ? OperationCode::FCastHalf0 : OperationCode::FCastHalf1; | ||
| 20 | } | ||
| 21 | } // Anonymous namespace | ||
| 22 | |||
| 17 | u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) { | 23 | u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) { |
| 18 | const Instruction instr = {program_code[pc]}; | 24 | const Instruction instr = {program_code[pc]}; |
| 19 | const auto opcode = OpCode::Decode(instr); | 25 | const auto opcode = OpCode::Decode(instr); |
| @@ -22,7 +28,7 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) { | |||
| 22 | case OpCode::Id::I2I_R: | 28 | case OpCode::Id::I2I_R: |
| 23 | case OpCode::Id::I2I_C: | 29 | case OpCode::Id::I2I_C: |
| 24 | case OpCode::Id::I2I_IMM: { | 30 | case OpCode::Id::I2I_IMM: { |
| 25 | UNIMPLEMENTED_IF(instr.conversion.selector); | 31 | UNIMPLEMENTED_IF(instr.conversion.int_src.selector != 0); |
| 26 | UNIMPLEMENTED_IF(instr.conversion.dst_size != Register::Size::Word); | 32 | UNIMPLEMENTED_IF(instr.conversion.dst_size != Register::Size::Word); |
| 27 | UNIMPLEMENTED_IF(instr.alu.saturate_d); | 33 | UNIMPLEMENTED_IF(instr.alu.saturate_d); |
| 28 | 34 | ||
| @@ -57,8 +63,8 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) { | |||
| 57 | case OpCode::Id::I2F_R: | 63 | case OpCode::Id::I2F_R: |
| 58 | case OpCode::Id::I2F_C: | 64 | case OpCode::Id::I2F_C: |
| 59 | case OpCode::Id::I2F_IMM: { | 65 | case OpCode::Id::I2F_IMM: { |
| 60 | UNIMPLEMENTED_IF(instr.conversion.dst_size != Register::Size::Word); | 66 | UNIMPLEMENTED_IF(instr.conversion.int_src.selector != 0); |
| 61 | UNIMPLEMENTED_IF(instr.conversion.selector); | 67 | UNIMPLEMENTED_IF(instr.conversion.dst_size == Register::Size::Long); |
| 62 | UNIMPLEMENTED_IF_MSG(instr.generates_cc, | 68 | UNIMPLEMENTED_IF_MSG(instr.generates_cc, |
| 63 | "Condition codes generation in I2F is not implemented"); | 69 | "Condition codes generation in I2F is not implemented"); |
| 64 | 70 | ||
| @@ -82,14 +88,19 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) { | |||
| 82 | value = GetOperandAbsNegFloat(value, false, instr.conversion.negate_a); | 88 | value = GetOperandAbsNegFloat(value, false, instr.conversion.negate_a); |
| 83 | 89 | ||
| 84 | SetInternalFlagsFromFloat(bb, value, instr.generates_cc); | 90 | SetInternalFlagsFromFloat(bb, value, instr.generates_cc); |
| 91 | |||
| 92 | if (instr.conversion.dst_size == Register::Size::Short) { | ||
| 93 | value = Operation(OperationCode::HCastFloat, PRECISE, value); | ||
| 94 | } | ||
| 95 | |||
| 85 | SetRegister(bb, instr.gpr0, value); | 96 | SetRegister(bb, instr.gpr0, value); |
| 86 | break; | 97 | break; |
| 87 | } | 98 | } |
| 88 | case OpCode::Id::F2F_R: | 99 | case OpCode::Id::F2F_R: |
| 89 | case OpCode::Id::F2F_C: | 100 | case OpCode::Id::F2F_C: |
| 90 | case OpCode::Id::F2F_IMM: { | 101 | case OpCode::Id::F2F_IMM: { |
| 91 | UNIMPLEMENTED_IF(instr.conversion.f2f.dst_size != Register::Size::Word); | 102 | UNIMPLEMENTED_IF(instr.conversion.dst_size == Register::Size::Long); |
| 92 | UNIMPLEMENTED_IF(instr.conversion.f2f.src_size != Register::Size::Word); | 103 | UNIMPLEMENTED_IF(instr.conversion.src_size == Register::Size::Long); |
| 93 | UNIMPLEMENTED_IF_MSG(instr.generates_cc, | 104 | UNIMPLEMENTED_IF_MSG(instr.generates_cc, |
| 94 | "Condition codes generation in F2F is not implemented"); | 105 | "Condition codes generation in F2F is not implemented"); |
| 95 | 106 | ||
| @@ -107,6 +118,13 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) { | |||
| 107 | } | 118 | } |
| 108 | }(); | 119 | }(); |
| 109 | 120 | ||
| 121 | if (instr.conversion.src_size == Register::Size::Short) { | ||
| 122 | value = Operation(GetFloatSelector(instr.conversion.float_src.selector), NO_PRECISE, | ||
| 123 | std::move(value)); | ||
| 124 | } else { | ||
| 125 | ASSERT(instr.conversion.float_src.selector == 0); | ||
| 126 | } | ||
| 127 | |||
| 110 | value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a); | 128 | value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a); |
| 111 | 129 | ||
| 112 | value = [&]() { | 130 | value = [&]() { |
| @@ -124,19 +142,24 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) { | |||
| 124 | default: | 142 | default: |
| 125 | UNIMPLEMENTED_MSG("Unimplemented F2F rounding mode {}", | 143 | UNIMPLEMENTED_MSG("Unimplemented F2F rounding mode {}", |
| 126 | static_cast<u32>(instr.conversion.f2f.rounding.Value())); | 144 | static_cast<u32>(instr.conversion.f2f.rounding.Value())); |
| 127 | return Immediate(0); | 145 | return value; |
| 128 | } | 146 | } |
| 129 | }(); | 147 | }(); |
| 130 | value = GetSaturatedFloat(value, instr.alu.saturate_d); | 148 | value = GetSaturatedFloat(value, instr.alu.saturate_d); |
| 131 | 149 | ||
| 132 | SetInternalFlagsFromFloat(bb, value, instr.generates_cc); | 150 | SetInternalFlagsFromFloat(bb, value, instr.generates_cc); |
| 151 | |||
| 152 | if (instr.conversion.dst_size == Register::Size::Short) { | ||
| 153 | value = Operation(OperationCode::HCastFloat, PRECISE, value); | ||
| 154 | } | ||
| 155 | |||
| 133 | SetRegister(bb, instr.gpr0, value); | 156 | SetRegister(bb, instr.gpr0, value); |
| 134 | break; | 157 | break; |
| 135 | } | 158 | } |
| 136 | case OpCode::Id::F2I_R: | 159 | case OpCode::Id::F2I_R: |
| 137 | case OpCode::Id::F2I_C: | 160 | case OpCode::Id::F2I_C: |
| 138 | case OpCode::Id::F2I_IMM: { | 161 | case OpCode::Id::F2I_IMM: { |
| 139 | UNIMPLEMENTED_IF(instr.conversion.src_size != Register::Size::Word); | 162 | UNIMPLEMENTED_IF(instr.conversion.src_size == Register::Size::Long); |
| 140 | UNIMPLEMENTED_IF_MSG(instr.generates_cc, | 163 | UNIMPLEMENTED_IF_MSG(instr.generates_cc, |
| 141 | "Condition codes generation in F2I is not implemented"); | 164 | "Condition codes generation in F2I is not implemented"); |
| 142 | Node value = [&]() { | 165 | Node value = [&]() { |
| @@ -153,6 +176,13 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) { | |||
| 153 | } | 176 | } |
| 154 | }(); | 177 | }(); |
| 155 | 178 | ||
| 179 | if (instr.conversion.src_size == Register::Size::Short) { | ||
| 180 | value = Operation(GetFloatSelector(instr.conversion.float_src.selector), NO_PRECISE, | ||
| 181 | std::move(value)); | ||
| 182 | } else { | ||
| 183 | ASSERT(instr.conversion.float_src.selector == 0); | ||
| 184 | } | ||
| 185 | |||
| 156 | value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a); | 186 | value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a); |
| 157 | 187 | ||
| 158 | value = [&]() { | 188 | value = [&]() { |
diff --git a/src/video_core/shader/decode/decode_integer_set.cpp b/src/video_core/shader/decode/decode_integer_set.cpp deleted file mode 100644 index e69de29bb..000000000 --- a/src/video_core/shader/decode/decode_integer_set.cpp +++ /dev/null | |||
diff --git a/src/video_core/shader/decode/ffma.cpp b/src/video_core/shader/decode/ffma.cpp index 29be25ca3..ca2f39e8d 100644 --- a/src/video_core/shader/decode/ffma.cpp +++ b/src/video_core/shader/decode/ffma.cpp | |||
| @@ -18,10 +18,12 @@ u32 ShaderIR::DecodeFfma(NodeBlock& bb, u32 pc) { | |||
| 18 | const auto opcode = OpCode::Decode(instr); | 18 | const auto opcode = OpCode::Decode(instr); |
| 19 | 19 | ||
| 20 | UNIMPLEMENTED_IF_MSG(instr.ffma.cc != 0, "FFMA cc not implemented"); | 20 | UNIMPLEMENTED_IF_MSG(instr.ffma.cc != 0, "FFMA cc not implemented"); |
| 21 | UNIMPLEMENTED_IF_MSG(instr.ffma.tab5980_0 != 1, "FFMA tab5980_0({}) not implemented", | 21 | if (instr.ffma.tab5980_0 != 1) { |
| 22 | instr.ffma.tab5980_0.Value()); // Seems to be 1 by default based on SMO | 22 | LOG_WARNING(HW_GPU, "FFMA tab5980_0({}) not implemented", instr.ffma.tab5980_0.Value()); |
| 23 | UNIMPLEMENTED_IF_MSG(instr.ffma.tab5980_1 != 0, "FFMA tab5980_1({}) not implemented", | 23 | } |
| 24 | instr.ffma.tab5980_1.Value()); | 24 | if (instr.ffma.tab5980_1 != 0) { |
| 25 | LOG_WARNING(HW_GPU, "FFMA tab5980_1({}) not implemented", instr.ffma.tab5980_1.Value()); | ||
| 26 | } | ||
| 25 | 27 | ||
| 26 | const Node op_a = GetRegister(instr.gpr8); | 28 | const Node op_a = GetRegister(instr.gpr8); |
| 27 | 29 | ||
diff --git a/src/video_core/shader/decode/float_set.cpp b/src/video_core/shader/decode/float_set.cpp index f5013e44a..5614e8a0d 100644 --- a/src/video_core/shader/decode/float_set.cpp +++ b/src/video_core/shader/decode/float_set.cpp | |||
| @@ -15,7 +15,6 @@ using Tegra::Shader::OpCode; | |||
| 15 | 15 | ||
| 16 | u32 ShaderIR::DecodeFloatSet(NodeBlock& bb, u32 pc) { | 16 | u32 ShaderIR::DecodeFloatSet(NodeBlock& bb, u32 pc) { |
| 17 | const Instruction instr = {program_code[pc]}; | 17 | const Instruction instr = {program_code[pc]}; |
| 18 | const auto opcode = OpCode::Decode(instr); | ||
| 19 | 18 | ||
| 20 | const Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fset.abs_a != 0, | 19 | const Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fset.abs_a != 0, |
| 21 | instr.fset.neg_a != 0); | 20 | instr.fset.neg_a != 0); |
diff --git a/src/video_core/shader/decode/float_set_predicate.cpp b/src/video_core/shader/decode/float_set_predicate.cpp index 2323052b0..200c2c983 100644 --- a/src/video_core/shader/decode/float_set_predicate.cpp +++ b/src/video_core/shader/decode/float_set_predicate.cpp | |||
| @@ -16,10 +16,9 @@ using Tegra::Shader::Pred; | |||
| 16 | 16 | ||
| 17 | u32 ShaderIR::DecodeFloatSetPredicate(NodeBlock& bb, u32 pc) { | 17 | u32 ShaderIR::DecodeFloatSetPredicate(NodeBlock& bb, u32 pc) { |
| 18 | const Instruction instr = {program_code[pc]}; | 18 | const Instruction instr = {program_code[pc]}; |
| 19 | const auto opcode = OpCode::Decode(instr); | ||
| 20 | 19 | ||
| 21 | const Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fsetp.abs_a != 0, | 20 | Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fsetp.abs_a != 0, |
| 22 | instr.fsetp.neg_a != 0); | 21 | instr.fsetp.neg_a != 0); |
| 23 | Node op_b = [&]() { | 22 | Node op_b = [&]() { |
| 24 | if (instr.is_b_imm) { | 23 | if (instr.is_b_imm) { |
| 25 | return GetImmediate19(instr); | 24 | return GetImmediate19(instr); |
| @@ -29,12 +28,13 @@ u32 ShaderIR::DecodeFloatSetPredicate(NodeBlock& bb, u32 pc) { | |||
| 29 | return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); | 28 | return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); |
| 30 | } | 29 | } |
| 31 | }(); | 30 | }(); |
| 32 | op_b = GetOperandAbsNegFloat(op_b, instr.fsetp.abs_b, false); | 31 | op_b = GetOperandAbsNegFloat(std::move(op_b), instr.fsetp.abs_b, instr.fsetp.neg_b); |
| 33 | 32 | ||
| 34 | // We can't use the constant predicate as destination. | 33 | // We can't use the constant predicate as destination. |
| 35 | ASSERT(instr.fsetp.pred3 != static_cast<u64>(Pred::UnusedIndex)); | 34 | ASSERT(instr.fsetp.pred3 != static_cast<u64>(Pred::UnusedIndex)); |
| 36 | 35 | ||
| 37 | const Node predicate = GetPredicateComparisonFloat(instr.fsetp.cond, op_a, op_b); | 36 | const Node predicate = |
| 37 | GetPredicateComparisonFloat(instr.fsetp.cond, std::move(op_a), std::move(op_b)); | ||
| 38 | const Node second_pred = GetPredicate(instr.fsetp.pred39, instr.fsetp.neg_pred != 0); | 38 | const Node second_pred = GetPredicate(instr.fsetp.pred39, instr.fsetp.neg_pred != 0); |
| 39 | 39 | ||
| 40 | const OperationCode combiner = GetPredicateCombiner(instr.fsetp.op); | 40 | const OperationCode combiner = GetPredicateCombiner(instr.fsetp.op); |
diff --git a/src/video_core/shader/decode/half_set_predicate.cpp b/src/video_core/shader/decode/half_set_predicate.cpp index d59d15bd8..840694527 100644 --- a/src/video_core/shader/decode/half_set_predicate.cpp +++ b/src/video_core/shader/decode/half_set_predicate.cpp | |||
| @@ -18,43 +18,55 @@ u32 ShaderIR::DecodeHalfSetPredicate(NodeBlock& bb, u32 pc) { | |||
| 18 | const Instruction instr = {program_code[pc]}; | 18 | const Instruction instr = {program_code[pc]}; |
| 19 | const auto opcode = OpCode::Decode(instr); | 19 | const auto opcode = OpCode::Decode(instr); |
| 20 | 20 | ||
| 21 | UNIMPLEMENTED_IF(instr.hsetp2.ftz != 0); | 21 | DEBUG_ASSERT(instr.hsetp2.ftz == 0); |
| 22 | 22 | ||
| 23 | Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hsetp2.type_a); | 23 | Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hsetp2.type_a); |
| 24 | op_a = GetOperandAbsNegHalf(op_a, instr.hsetp2.abs_a, instr.hsetp2.negate_a); | 24 | op_a = GetOperandAbsNegHalf(op_a, instr.hsetp2.abs_a, instr.hsetp2.negate_a); |
| 25 | 25 | ||
| 26 | Node op_b = [&]() { | 26 | Tegra::Shader::PredCondition cond{}; |
| 27 | switch (opcode->get().GetId()) { | 27 | bool h_and{}; |
| 28 | case OpCode::Id::HSETP2_R: | 28 | Node op_b{}; |
| 29 | return GetOperandAbsNegHalf(GetRegister(instr.gpr20), instr.hsetp2.abs_a, | 29 | switch (opcode->get().GetId()) { |
| 30 | instr.hsetp2.negate_b); | 30 | case OpCode::Id::HSETP2_C: |
| 31 | default: | 31 | cond = instr.hsetp2.cbuf_and_imm.cond; |
| 32 | UNREACHABLE(); | 32 | h_and = instr.hsetp2.cbuf_and_imm.h_and; |
| 33 | return Immediate(0); | 33 | op_b = GetOperandAbsNegHalf(GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()), |
| 34 | } | 34 | instr.hsetp2.cbuf.abs_b, instr.hsetp2.cbuf.negate_b); |
| 35 | }(); | 35 | break; |
| 36 | op_b = UnpackHalfFloat(op_b, instr.hsetp2.type_b); | 36 | case OpCode::Id::HSETP2_IMM: |
| 37 | 37 | cond = instr.hsetp2.cbuf_and_imm.cond; | |
| 38 | // We can't use the constant predicate as destination. | 38 | h_and = instr.hsetp2.cbuf_and_imm.h_and; |
| 39 | ASSERT(instr.hsetp2.pred3 != static_cast<u64>(Pred::UnusedIndex)); | 39 | op_b = UnpackHalfImmediate(instr, true); |
| 40 | 40 | break; | |
| 41 | const Node second_pred = GetPredicate(instr.hsetp2.pred39, instr.hsetp2.neg_pred != 0); | 41 | case OpCode::Id::HSETP2_R: |
| 42 | cond = instr.hsetp2.reg.cond; | ||
| 43 | h_and = instr.hsetp2.reg.h_and; | ||
| 44 | op_b = | ||
| 45 | GetOperandAbsNegHalf(UnpackHalfFloat(GetRegister(instr.gpr20), instr.hsetp2.reg.type_b), | ||
| 46 | instr.hsetp2.reg.abs_b, instr.hsetp2.reg.negate_b); | ||
| 47 | break; | ||
| 48 | default: | ||
| 49 | UNREACHABLE(); | ||
| 50 | op_b = Immediate(0); | ||
| 51 | } | ||
| 42 | 52 | ||
| 43 | const OperationCode combiner = GetPredicateCombiner(instr.hsetp2.op); | 53 | const OperationCode combiner = GetPredicateCombiner(instr.hsetp2.op); |
| 44 | const OperationCode pair_combiner = | 54 | const Node combined_pred = GetPredicate(instr.hsetp2.pred39, instr.hsetp2.neg_pred); |
| 45 | instr.hsetp2.h_and ? OperationCode::LogicalAll2 : OperationCode::LogicalAny2; | ||
| 46 | |||
| 47 | const Node comparison = GetPredicateComparisonHalf(instr.hsetp2.cond, op_a, op_b); | ||
| 48 | const Node first_pred = Operation(pair_combiner, comparison); | ||
| 49 | 55 | ||
| 50 | // Set the primary predicate to the result of Predicate OP SecondPredicate | 56 | const auto Write = [&](u64 dest, Node src) { |
| 51 | const Node value = Operation(combiner, first_pred, second_pred); | 57 | SetPredicate(bb, dest, Operation(combiner, std::move(src), combined_pred)); |
| 52 | SetPredicate(bb, instr.hsetp2.pred3, value); | 58 | }; |
| 53 | 59 | ||
| 54 | if (instr.hsetp2.pred0 != static_cast<u64>(Pred::UnusedIndex)) { | 60 | const Node comparison = GetPredicateComparisonHalf(cond, op_a, op_b); |
| 55 | // Set the secondary predicate to the result of !Predicate OP SecondPredicate, if enabled | 61 | const u64 first = instr.hsetp2.pred3; |
| 56 | const Node negated_pred = Operation(OperationCode::LogicalNegate, first_pred); | 62 | const u64 second = instr.hsetp2.pred0; |
| 57 | SetPredicate(bb, instr.hsetp2.pred0, Operation(combiner, negated_pred, second_pred)); | 63 | if (h_and) { |
| 64 | Node joined = Operation(OperationCode::LogicalAnd2, comparison); | ||
| 65 | Write(first, joined); | ||
| 66 | Write(second, Operation(OperationCode::LogicalNegate, std::move(joined))); | ||
| 67 | } else { | ||
| 68 | Write(first, Operation(OperationCode::LogicalPick2, comparison, Immediate(0U))); | ||
| 69 | Write(second, Operation(OperationCode::LogicalPick2, comparison, Immediate(1U))); | ||
| 58 | } | 70 | } |
| 59 | 71 | ||
| 60 | return pc; | 72 | return pc; |
diff --git a/src/video_core/shader/decode/hfma2.cpp b/src/video_core/shader/decode/hfma2.cpp index c3bcf1ae9..5b44cb79c 100644 --- a/src/video_core/shader/decode/hfma2.cpp +++ b/src/video_core/shader/decode/hfma2.cpp | |||
| @@ -22,9 +22,9 @@ u32 ShaderIR::DecodeHfma2(NodeBlock& bb, u32 pc) { | |||
| 22 | const auto opcode = OpCode::Decode(instr); | 22 | const auto opcode = OpCode::Decode(instr); |
| 23 | 23 | ||
| 24 | if (opcode->get().GetId() == OpCode::Id::HFMA2_RR) { | 24 | if (opcode->get().GetId() == OpCode::Id::HFMA2_RR) { |
| 25 | UNIMPLEMENTED_IF(instr.hfma2.rr.precision != HalfPrecision::None); | 25 | DEBUG_ASSERT(instr.hfma2.rr.precision == HalfPrecision::None); |
| 26 | } else { | 26 | } else { |
| 27 | UNIMPLEMENTED_IF(instr.hfma2.precision != HalfPrecision::None); | 27 | DEBUG_ASSERT(instr.hfma2.precision == HalfPrecision::None); |
| 28 | } | 28 | } |
| 29 | 29 | ||
| 30 | constexpr auto identity = HalfType::H0_H1; | 30 | constexpr auto identity = HalfType::H0_H1; |
diff --git a/src/video_core/shader/decode/image.cpp b/src/video_core/shader/decode/image.cpp new file mode 100644 index 000000000..d54fb88c9 --- /dev/null +++ b/src/video_core/shader/decode/image.cpp | |||
| @@ -0,0 +1,164 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <vector> | ||
| 7 | #include <fmt/format.h> | ||
| 8 | |||
| 9 | #include "common/assert.h" | ||
| 10 | #include "common/bit_field.h" | ||
| 11 | #include "common/common_types.h" | ||
| 12 | #include "common/logging/log.h" | ||
| 13 | #include "video_core/engines/shader_bytecode.h" | ||
| 14 | #include "video_core/shader/node_helper.h" | ||
| 15 | #include "video_core/shader/shader_ir.h" | ||
| 16 | |||
| 17 | namespace VideoCommon::Shader { | ||
| 18 | |||
| 19 | using Tegra::Shader::Instruction; | ||
| 20 | using Tegra::Shader::OpCode; | ||
| 21 | |||
| 22 | namespace { | ||
| 23 | std::size_t GetImageTypeNumCoordinates(Tegra::Shader::ImageType image_type) { | ||
| 24 | switch (image_type) { | ||
| 25 | case Tegra::Shader::ImageType::Texture1D: | ||
| 26 | case Tegra::Shader::ImageType::TextureBuffer: | ||
| 27 | return 1; | ||
| 28 | case Tegra::Shader::ImageType::Texture1DArray: | ||
| 29 | case Tegra::Shader::ImageType::Texture2D: | ||
| 30 | return 2; | ||
| 31 | case Tegra::Shader::ImageType::Texture2DArray: | ||
| 32 | case Tegra::Shader::ImageType::Texture3D: | ||
| 33 | return 3; | ||
| 34 | } | ||
| 35 | UNREACHABLE(); | ||
| 36 | return 1; | ||
| 37 | } | ||
| 38 | } // Anonymous namespace | ||
| 39 | |||
| 40 | u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) { | ||
| 41 | const Instruction instr = {program_code[pc]}; | ||
| 42 | const auto opcode = OpCode::Decode(instr); | ||
| 43 | |||
| 44 | switch (opcode->get().GetId()) { | ||
| 45 | case OpCode::Id::SUST: { | ||
| 46 | UNIMPLEMENTED_IF(instr.sust.mode != Tegra::Shader::SurfaceDataMode::P); | ||
| 47 | UNIMPLEMENTED_IF(instr.sust.out_of_bounds_store != Tegra::Shader::OutOfBoundsStore::Ignore); | ||
| 48 | UNIMPLEMENTED_IF(instr.sust.component_mask_selector != 0xf); // Ensure we have an RGBA store | ||
| 49 | |||
| 50 | std::vector<Node> values; | ||
| 51 | constexpr std::size_t hardcoded_size{4}; | ||
| 52 | for (std::size_t i = 0; i < hardcoded_size; ++i) { | ||
| 53 | values.push_back(GetRegister(instr.gpr0.Value() + i)); | ||
| 54 | } | ||
| 55 | |||
| 56 | std::vector<Node> coords; | ||
| 57 | const std::size_t num_coords{GetImageTypeNumCoordinates(instr.sust.image_type)}; | ||
| 58 | for (std::size_t i = 0; i < num_coords; ++i) { | ||
| 59 | coords.push_back(GetRegister(instr.gpr8.Value() + i)); | ||
| 60 | } | ||
| 61 | |||
| 62 | const auto type{instr.sust.image_type}; | ||
| 63 | auto& image{instr.sust.is_immediate ? GetImage(instr.image, type) | ||
| 64 | : GetBindlessImage(instr.gpr39, type)}; | ||
| 65 | image.MarkWrite(); | ||
| 66 | |||
| 67 | MetaImage meta{image, values}; | ||
| 68 | bb.push_back(Operation(OperationCode::ImageStore, meta, std::move(coords))); | ||
| 69 | break; | ||
| 70 | } | ||
| 71 | case OpCode::Id::SUATOM: { | ||
| 72 | UNIMPLEMENTED_IF(instr.suatom_d.is_ba != 0); | ||
| 73 | |||
| 74 | Node value = GetRegister(instr.gpr0); | ||
| 75 | |||
| 76 | std::vector<Node> coords; | ||
| 77 | const std::size_t num_coords{GetImageTypeNumCoordinates(instr.sust.image_type)}; | ||
| 78 | for (std::size_t i = 0; i < num_coords; ++i) { | ||
| 79 | coords.push_back(GetRegister(instr.gpr8.Value() + i)); | ||
| 80 | } | ||
| 81 | |||
| 82 | const OperationCode operation_code = [instr] { | ||
| 83 | switch (instr.suatom_d.operation) { | ||
| 84 | case Tegra::Shader::ImageAtomicOperation::Add: | ||
| 85 | return OperationCode::AtomicImageAdd; | ||
| 86 | case Tegra::Shader::ImageAtomicOperation::Min: | ||
| 87 | return OperationCode::AtomicImageMin; | ||
| 88 | case Tegra::Shader::ImageAtomicOperation::Max: | ||
| 89 | return OperationCode::AtomicImageMax; | ||
| 90 | case Tegra::Shader::ImageAtomicOperation::And: | ||
| 91 | return OperationCode::AtomicImageAnd; | ||
| 92 | case Tegra::Shader::ImageAtomicOperation::Or: | ||
| 93 | return OperationCode::AtomicImageOr; | ||
| 94 | case Tegra::Shader::ImageAtomicOperation::Xor: | ||
| 95 | return OperationCode::AtomicImageXor; | ||
| 96 | case Tegra::Shader::ImageAtomicOperation::Exch: | ||
| 97 | return OperationCode::AtomicImageExchange; | ||
| 98 | default: | ||
| 99 | UNIMPLEMENTED_MSG("Unimplemented operation={}", | ||
| 100 | static_cast<u32>(instr.suatom_d.operation.Value())); | ||
| 101 | return OperationCode::AtomicImageAdd; | ||
| 102 | } | ||
| 103 | }(); | ||
| 104 | |||
| 105 | const auto& image{GetImage(instr.image, instr.suatom_d.image_type, instr.suatom_d.size)}; | ||
| 106 | MetaImage meta{image, {std::move(value)}}; | ||
| 107 | SetRegister(bb, instr.gpr0, Operation(operation_code, meta, std::move(coords))); | ||
| 108 | break; | ||
| 109 | } | ||
| 110 | default: | ||
| 111 | UNIMPLEMENTED_MSG("Unhandled image instruction: {}", opcode->get().GetName()); | ||
| 112 | } | ||
| 113 | |||
| 114 | return pc; | ||
| 115 | } | ||
| 116 | |||
| 117 | Image& ShaderIR::GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type, | ||
| 118 | std::optional<Tegra::Shader::ImageAtomicSize> size) { | ||
| 119 | const auto offset{static_cast<std::size_t>(image.index.Value())}; | ||
| 120 | if (const auto image = TryUseExistingImage(offset, type, size)) { | ||
| 121 | return *image; | ||
| 122 | } | ||
| 123 | |||
| 124 | const std::size_t next_index{used_images.size()}; | ||
| 125 | return used_images.emplace(offset, Image{offset, next_index, type, size}).first->second; | ||
| 126 | } | ||
| 127 | |||
| 128 | Image& ShaderIR::GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type, | ||
| 129 | std::optional<Tegra::Shader::ImageAtomicSize> size) { | ||
| 130 | const Node image_register{GetRegister(reg)}; | ||
| 131 | const auto [base_image, cbuf_index, cbuf_offset]{ | ||
| 132 | TrackCbuf(image_register, global_code, static_cast<s64>(global_code.size()))}; | ||
| 133 | const auto cbuf_key{(static_cast<u64>(cbuf_index) << 32) | static_cast<u64>(cbuf_offset)}; | ||
| 134 | |||
| 135 | if (const auto image = TryUseExistingImage(cbuf_key, type, size)) { | ||
| 136 | return *image; | ||
| 137 | } | ||
| 138 | |||
| 139 | const std::size_t next_index{used_images.size()}; | ||
| 140 | return used_images.emplace(cbuf_key, Image{cbuf_index, cbuf_offset, next_index, type, size}) | ||
| 141 | .first->second; | ||
| 142 | } | ||
| 143 | |||
| 144 | Image* ShaderIR::TryUseExistingImage(u64 offset, Tegra::Shader::ImageType type, | ||
| 145 | std::optional<Tegra::Shader::ImageAtomicSize> size) { | ||
| 146 | auto it = used_images.find(offset); | ||
| 147 | if (it == used_images.end()) { | ||
| 148 | return nullptr; | ||
| 149 | } | ||
| 150 | auto& image = it->second; | ||
| 151 | ASSERT(image.GetType() == type); | ||
| 152 | |||
| 153 | if (size) { | ||
| 154 | // We know the size, if it's known it has to be the same as before, otherwise we can set it. | ||
| 155 | if (image.IsSizeKnown()) { | ||
| 156 | ASSERT(image.GetSize() == size); | ||
| 157 | } else { | ||
| 158 | image.SetSize(*size); | ||
| 159 | } | ||
| 160 | } | ||
| 161 | return ℑ | ||
| 162 | } | ||
| 163 | |||
| 164 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/decode/integer_set.cpp b/src/video_core/shader/decode/integer_set.cpp index 46e3d5905..59809bcd8 100644 --- a/src/video_core/shader/decode/integer_set.cpp +++ b/src/video_core/shader/decode/integer_set.cpp | |||
| @@ -14,7 +14,6 @@ using Tegra::Shader::OpCode; | |||
| 14 | 14 | ||
| 15 | u32 ShaderIR::DecodeIntegerSet(NodeBlock& bb, u32 pc) { | 15 | u32 ShaderIR::DecodeIntegerSet(NodeBlock& bb, u32 pc) { |
| 16 | const Instruction instr = {program_code[pc]}; | 16 | const Instruction instr = {program_code[pc]}; |
| 17 | const auto opcode = OpCode::Decode(instr); | ||
| 18 | 17 | ||
| 19 | const Node op_a = GetRegister(instr.gpr8); | 18 | const Node op_a = GetRegister(instr.gpr8); |
| 20 | const Node op_b = [&]() { | 19 | const Node op_b = [&]() { |
diff --git a/src/video_core/shader/decode/integer_set_predicate.cpp b/src/video_core/shader/decode/integer_set_predicate.cpp index dd20775d7..25e48fef8 100644 --- a/src/video_core/shader/decode/integer_set_predicate.cpp +++ b/src/video_core/shader/decode/integer_set_predicate.cpp | |||
| @@ -16,7 +16,6 @@ using Tegra::Shader::Pred; | |||
| 16 | 16 | ||
| 17 | u32 ShaderIR::DecodeIntegerSetPredicate(NodeBlock& bb, u32 pc) { | 17 | u32 ShaderIR::DecodeIntegerSetPredicate(NodeBlock& bb, u32 pc) { |
| 18 | const Instruction instr = {program_code[pc]}; | 18 | const Instruction instr = {program_code[pc]}; |
| 19 | const auto opcode = OpCode::Decode(instr); | ||
| 20 | 19 | ||
| 21 | const Node op_a = GetRegister(instr.gpr8); | 20 | const Node op_a = GetRegister(instr.gpr8); |
| 22 | 21 | ||
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp index 80fc0ccfc..ed108bea8 100644 --- a/src/video_core/shader/decode/memory.cpp +++ b/src/video_core/shader/decode/memory.cpp | |||
| @@ -95,10 +95,10 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { | |||
| 95 | const Node op_b = | 95 | const Node op_b = |
| 96 | GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.GetOffset() + 4, index); | 96 | GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.GetOffset() + 4, index); |
| 97 | 97 | ||
| 98 | SetTemporal(bb, 0, op_a); | 98 | SetTemporary(bb, 0, op_a); |
| 99 | SetTemporal(bb, 1, op_b); | 99 | SetTemporary(bb, 1, op_b); |
| 100 | SetRegister(bb, instr.gpr0, GetTemporal(0)); | 100 | SetRegister(bb, instr.gpr0, GetTemporary(0)); |
| 101 | SetRegister(bb, instr.gpr0.Value() + 1, GetTemporal(1)); | 101 | SetRegister(bb, instr.gpr0.Value() + 1, GetTemporary(1)); |
| 102 | break; | 102 | break; |
| 103 | } | 103 | } |
| 104 | default: | 104 | default: |
| @@ -136,9 +136,9 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { | |||
| 136 | } | 136 | } |
| 137 | }(); | 137 | }(); |
| 138 | for (u32 i = 0; i < count; ++i) | 138 | for (u32 i = 0; i < count; ++i) |
| 139 | SetTemporal(bb, i, GetLmem(i * 4)); | 139 | SetTemporary(bb, i, GetLmem(i * 4)); |
| 140 | for (u32 i = 0; i < count; ++i) | 140 | for (u32 i = 0; i < count; ++i) |
| 141 | SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i)); | 141 | SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); |
| 142 | break; | 142 | break; |
| 143 | } | 143 | } |
| 144 | default: | 144 | default: |
| @@ -172,10 +172,10 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { | |||
| 172 | Operation(OperationCode::UAdd, NO_PRECISE, real_address_base, it_offset); | 172 | Operation(OperationCode::UAdd, NO_PRECISE, real_address_base, it_offset); |
| 173 | const Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); | 173 | const Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); |
| 174 | 174 | ||
| 175 | SetTemporal(bb, i, gmem); | 175 | SetTemporary(bb, i, gmem); |
| 176 | } | 176 | } |
| 177 | for (u32 i = 0; i < count; ++i) { | 177 | for (u32 i = 0; i < count; ++i) { |
| 178 | SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i)); | 178 | SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); |
| 179 | } | 179 | } |
| 180 | break; | 180 | break; |
| 181 | } | 181 | } |
| @@ -253,11 +253,11 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { | |||
| 253 | TrackAndGetGlobalMemory(bb, instr, true); | 253 | TrackAndGetGlobalMemory(bb, instr, true); |
| 254 | 254 | ||
| 255 | // Encode in temporary registers like this: real_base_address, {registers_to_be_written...} | 255 | // Encode in temporary registers like this: real_base_address, {registers_to_be_written...} |
| 256 | SetTemporal(bb, 0, real_address_base); | 256 | SetTemporary(bb, 0, real_address_base); |
| 257 | 257 | ||
| 258 | const u32 count = GetUniformTypeElementsCount(type); | 258 | const u32 count = GetUniformTypeElementsCount(type); |
| 259 | for (u32 i = 0; i < count; ++i) { | 259 | for (u32 i = 0; i < count; ++i) { |
| 260 | SetTemporal(bb, i + 1, GetRegister(instr.gpr0.Value() + i)); | 260 | SetTemporary(bb, i + 1, GetRegister(instr.gpr0.Value() + i)); |
| 261 | } | 261 | } |
| 262 | for (u32 i = 0; i < count; ++i) { | 262 | for (u32 i = 0; i < count; ++i) { |
| 263 | const Node it_offset = Immediate(i * 4); | 263 | const Node it_offset = Immediate(i * 4); |
| @@ -265,7 +265,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { | |||
| 265 | Operation(OperationCode::UAdd, NO_PRECISE, real_address_base, it_offset); | 265 | Operation(OperationCode::UAdd, NO_PRECISE, real_address_base, it_offset); |
| 266 | const Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); | 266 | const Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); |
| 267 | 267 | ||
| 268 | bb.push_back(Operation(OperationCode::Assign, gmem, GetTemporal(i + 1))); | 268 | bb.push_back(Operation(OperationCode::Assign, gmem, GetTemporary(i + 1))); |
| 269 | } | 269 | } |
| 270 | break; | 270 | break; |
| 271 | } | 271 | } |
| @@ -297,18 +297,13 @@ std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackAndGetGlobalMemory(NodeB | |||
| 297 | const auto addr_register{GetRegister(instr.gmem.gpr)}; | 297 | const auto addr_register{GetRegister(instr.gmem.gpr)}; |
| 298 | const auto immediate_offset{static_cast<u32>(instr.gmem.offset)}; | 298 | const auto immediate_offset{static_cast<u32>(instr.gmem.offset)}; |
| 299 | 299 | ||
| 300 | const Node base_address{ | 300 | const auto [base_address, index, offset] = |
| 301 | TrackCbuf(addr_register, global_code, static_cast<s64>(global_code.size()))}; | 301 | TrackCbuf(addr_register, global_code, static_cast<s64>(global_code.size())); |
| 302 | const auto cbuf = std::get_if<CbufNode>(&*base_address); | 302 | ASSERT(base_address != nullptr); |
| 303 | ASSERT(cbuf != nullptr); | ||
| 304 | const auto cbuf_offset_imm = std::get_if<ImmediateNode>(&*cbuf->GetOffset()); | ||
| 305 | ASSERT(cbuf_offset_imm != nullptr); | ||
| 306 | const auto cbuf_offset = cbuf_offset_imm->GetValue(); | ||
| 307 | 303 | ||
| 308 | bb.push_back( | 304 | bb.push_back(Comment(fmt::format("Base address is c[0x{:x}][0x{:x}]", index, offset))); |
| 309 | Comment(fmt::format("Base address is c[0x{:x}][0x{:x}]", cbuf->GetIndex(), cbuf_offset))); | ||
| 310 | 305 | ||
| 311 | const GlobalMemoryBase descriptor{cbuf->GetIndex(), cbuf_offset}; | 306 | const GlobalMemoryBase descriptor{index, offset}; |
| 312 | const auto& [entry, is_new] = used_global_memory.try_emplace(descriptor); | 307 | const auto& [entry, is_new] = used_global_memory.try_emplace(descriptor); |
| 313 | auto& usage = entry->second; | 308 | auto& usage = entry->second; |
| 314 | if (is_write) { | 309 | if (is_write) { |
diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp index d46a8ab82..d46e0f823 100644 --- a/src/video_core/shader/decode/other.cpp +++ b/src/video_core/shader/decode/other.cpp | |||
| @@ -22,6 +22,12 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { | |||
| 22 | const auto opcode = OpCode::Decode(instr); | 22 | const auto opcode = OpCode::Decode(instr); |
| 23 | 23 | ||
| 24 | switch (opcode->get().GetId()) { | 24 | switch (opcode->get().GetId()) { |
| 25 | case OpCode::Id::NOP: { | ||
| 26 | UNIMPLEMENTED_IF(instr.nop.cc != Tegra::Shader::ConditionCode::T); | ||
| 27 | UNIMPLEMENTED_IF(instr.nop.trigger != 0); | ||
| 28 | // With the previous preconditions, this instruction is a no-operation. | ||
| 29 | break; | ||
| 30 | } | ||
| 25 | case OpCode::Id::EXIT: { | 31 | case OpCode::Id::EXIT: { |
| 26 | const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; | 32 | const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; |
| 27 | UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "EXIT condition code used: {}", | 33 | UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "EXIT condition code used: {}", |
| @@ -68,6 +74,13 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { | |||
| 68 | case SystemVariable::InvocationInfo: | 74 | case SystemVariable::InvocationInfo: |
| 69 | LOG_WARNING(HW_GPU, "MOV_SYS instruction with InvocationInfo is incomplete"); | 75 | LOG_WARNING(HW_GPU, "MOV_SYS instruction with InvocationInfo is incomplete"); |
| 70 | return Immediate(0u); | 76 | return Immediate(0u); |
| 77 | case SystemVariable::Tid: { | ||
| 78 | Node value = Immediate(0); | ||
| 79 | value = BitfieldInsert(value, Operation(OperationCode::LocalInvocationIdX), 0, 9); | ||
| 80 | value = BitfieldInsert(value, Operation(OperationCode::LocalInvocationIdY), 16, 9); | ||
| 81 | value = BitfieldInsert(value, Operation(OperationCode::LocalInvocationIdZ), 26, 5); | ||
| 82 | return value; | ||
| 83 | } | ||
| 71 | case SystemVariable::TidX: | 84 | case SystemVariable::TidX: |
| 72 | return Operation(OperationCode::LocalInvocationIdX); | 85 | return Operation(OperationCode::LocalInvocationIdX); |
| 73 | case SystemVariable::TidY: | 86 | case SystemVariable::TidY: |
| @@ -91,11 +104,46 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { | |||
| 91 | break; | 104 | break; |
| 92 | } | 105 | } |
| 93 | case OpCode::Id::BRA: { | 106 | case OpCode::Id::BRA: { |
| 94 | UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, | 107 | Node branch; |
| 95 | "BRA with constant buffers are not implemented"); | 108 | if (instr.bra.constant_buffer == 0) { |
| 109 | const u32 target = pc + instr.bra.GetBranchTarget(); | ||
| 110 | branch = Operation(OperationCode::Branch, Immediate(target)); | ||
| 111 | } else { | ||
| 112 | const u32 target = pc + 1; | ||
| 113 | const Node op_a = GetConstBuffer(instr.cbuf36.index, instr.cbuf36.GetOffset()); | ||
| 114 | const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, true, | ||
| 115 | PRECISE, op_a, Immediate(3)); | ||
| 116 | const Node operand = | ||
| 117 | Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target)); | ||
| 118 | branch = Operation(OperationCode::BranchIndirect, operand); | ||
| 119 | } | ||
| 96 | 120 | ||
| 97 | const u32 target = pc + instr.bra.GetBranchTarget(); | 121 | const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; |
| 98 | const Node branch = Operation(OperationCode::Branch, Immediate(target)); | 122 | if (cc != Tegra::Shader::ConditionCode::T) { |
| 123 | bb.push_back(Conditional(GetConditionCode(cc), {branch})); | ||
| 124 | } else { | ||
| 125 | bb.push_back(branch); | ||
| 126 | } | ||
| 127 | break; | ||
| 128 | } | ||
| 129 | case OpCode::Id::BRX: { | ||
| 130 | Node operand; | ||
| 131 | if (instr.brx.constant_buffer != 0) { | ||
| 132 | const s32 target = pc + 1; | ||
| 133 | const Node index = GetRegister(instr.gpr8); | ||
| 134 | const Node op_a = | ||
| 135 | GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.GetOffset() + 0, index); | ||
| 136 | const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, true, | ||
| 137 | PRECISE, op_a, Immediate(3)); | ||
| 138 | operand = Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target)); | ||
| 139 | } else { | ||
| 140 | const s32 target = pc + instr.brx.GetBranchExtend(); | ||
| 141 | const Node op_a = GetRegister(instr.gpr8); | ||
| 142 | const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, true, | ||
| 143 | PRECISE, op_a, Immediate(3)); | ||
| 144 | operand = Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target)); | ||
| 145 | } | ||
| 146 | const Node branch = Operation(OperationCode::BranchIndirect, operand); | ||
| 99 | 147 | ||
| 100 | const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; | 148 | const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; |
| 101 | if (cc != Tegra::Shader::ConditionCode::T) { | 149 | if (cc != Tegra::Shader::ConditionCode::T) { |
| @@ -109,6 +157,10 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { | |||
| 109 | UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, | 157 | UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, |
| 110 | "Constant buffer flow is not supported"); | 158 | "Constant buffer flow is not supported"); |
| 111 | 159 | ||
| 160 | if (disable_flow_stack) { | ||
| 161 | break; | ||
| 162 | } | ||
| 163 | |||
| 112 | // The SSY opcode tells the GPU where to re-converge divergent execution paths with SYNC. | 164 | // The SSY opcode tells the GPU where to re-converge divergent execution paths with SYNC. |
| 113 | const u32 target = pc + instr.bra.GetBranchTarget(); | 165 | const u32 target = pc + instr.bra.GetBranchTarget(); |
| 114 | bb.push_back( | 166 | bb.push_back( |
| @@ -119,6 +171,10 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { | |||
| 119 | UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, | 171 | UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, |
| 120 | "Constant buffer PBK is not supported"); | 172 | "Constant buffer PBK is not supported"); |
| 121 | 173 | ||
| 174 | if (disable_flow_stack) { | ||
| 175 | break; | ||
| 176 | } | ||
| 177 | |||
| 122 | // PBK pushes to a stack the address where BRK will jump to. | 178 | // PBK pushes to a stack the address where BRK will jump to. |
| 123 | const u32 target = pc + instr.bra.GetBranchTarget(); | 179 | const u32 target = pc + instr.bra.GetBranchTarget(); |
| 124 | bb.push_back( | 180 | bb.push_back( |
| @@ -130,6 +186,10 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { | |||
| 130 | UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "SYNC condition code used: {}", | 186 | UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "SYNC condition code used: {}", |
| 131 | static_cast<u32>(cc)); | 187 | static_cast<u32>(cc)); |
| 132 | 188 | ||
| 189 | if (disable_flow_stack) { | ||
| 190 | break; | ||
| 191 | } | ||
| 192 | |||
| 133 | // The SYNC opcode jumps to the address previously set by the SSY opcode | 193 | // The SYNC opcode jumps to the address previously set by the SSY opcode |
| 134 | bb.push_back(Operation(OperationCode::PopFlowStack, MetaStackClass::Ssy)); | 194 | bb.push_back(Operation(OperationCode::PopFlowStack, MetaStackClass::Ssy)); |
| 135 | break; | 195 | break; |
| @@ -138,6 +198,9 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { | |||
| 138 | const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; | 198 | const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; |
| 139 | UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "BRK condition code used: {}", | 199 | UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "BRK condition code used: {}", |
| 140 | static_cast<u32>(cc)); | 200 | static_cast<u32>(cc)); |
| 201 | if (disable_flow_stack) { | ||
| 202 | break; | ||
| 203 | } | ||
| 141 | 204 | ||
| 142 | // The BRK opcode jumps to the address previously set by the PBK opcode | 205 | // The BRK opcode jumps to the address previously set by the PBK opcode |
| 143 | bb.push_back(Operation(OperationCode::PopFlowStack, MetaStackClass::Pbk)); | 206 | bb.push_back(Operation(OperationCode::PopFlowStack, MetaStackClass::Pbk)); |
diff --git a/src/video_core/shader/decode/predicate_set_register.cpp b/src/video_core/shader/decode/predicate_set_register.cpp index febbfeb50..84dbc50fe 100644 --- a/src/video_core/shader/decode/predicate_set_register.cpp +++ b/src/video_core/shader/decode/predicate_set_register.cpp | |||
| @@ -15,7 +15,6 @@ using Tegra::Shader::OpCode; | |||
| 15 | 15 | ||
| 16 | u32 ShaderIR::DecodePredicateSetRegister(NodeBlock& bb, u32 pc) { | 16 | u32 ShaderIR::DecodePredicateSetRegister(NodeBlock& bb, u32 pc) { |
| 17 | const Instruction instr = {program_code[pc]}; | 17 | const Instruction instr = {program_code[pc]}; |
| 18 | const auto opcode = OpCode::Decode(instr); | ||
| 19 | 18 | ||
| 20 | UNIMPLEMENTED_IF_MSG(instr.generates_cc, | 19 | UNIMPLEMENTED_IF_MSG(instr.generates_cc, |
| 21 | "Condition codes generation in PSET is not implemented"); | 20 | "Condition codes generation in PSET is not implemented"); |
diff --git a/src/video_core/shader/decode/shift.cpp b/src/video_core/shader/decode/shift.cpp index 2ac16eeb0..f6ee68a54 100644 --- a/src/video_core/shader/decode/shift.cpp +++ b/src/video_core/shader/decode/shift.cpp | |||
| @@ -17,8 +17,8 @@ u32 ShaderIR::DecodeShift(NodeBlock& bb, u32 pc) { | |||
| 17 | const Instruction instr = {program_code[pc]}; | 17 | const Instruction instr = {program_code[pc]}; |
| 18 | const auto opcode = OpCode::Decode(instr); | 18 | const auto opcode = OpCode::Decode(instr); |
| 19 | 19 | ||
| 20 | const Node op_a = GetRegister(instr.gpr8); | 20 | Node op_a = GetRegister(instr.gpr8); |
| 21 | const Node op_b = [&]() { | 21 | Node op_b = [&]() { |
| 22 | if (instr.is_b_imm) { | 22 | if (instr.is_b_imm) { |
| 23 | return Immediate(instr.alu.GetSignedImm20_20()); | 23 | return Immediate(instr.alu.GetSignedImm20_20()); |
| 24 | } else if (instr.is_b_gpr) { | 24 | } else if (instr.is_b_gpr) { |
| @@ -32,16 +32,23 @@ u32 ShaderIR::DecodeShift(NodeBlock& bb, u32 pc) { | |||
| 32 | case OpCode::Id::SHR_C: | 32 | case OpCode::Id::SHR_C: |
| 33 | case OpCode::Id::SHR_R: | 33 | case OpCode::Id::SHR_R: |
| 34 | case OpCode::Id::SHR_IMM: { | 34 | case OpCode::Id::SHR_IMM: { |
| 35 | const Node value = SignedOperation(OperationCode::IArithmeticShiftRight, | 35 | if (instr.shr.wrap) { |
| 36 | instr.shift.is_signed, PRECISE, op_a, op_b); | 36 | op_b = Operation(OperationCode::UBitwiseAnd, std::move(op_b), Immediate(0x1f)); |
| 37 | } else { | ||
| 38 | op_b = Operation(OperationCode::IMax, std::move(op_b), Immediate(0)); | ||
| 39 | op_b = Operation(OperationCode::IMin, std::move(op_b), Immediate(31)); | ||
| 40 | } | ||
| 41 | |||
| 42 | Node value = SignedOperation(OperationCode::IArithmeticShiftRight, instr.shift.is_signed, | ||
| 43 | std::move(op_a), std::move(op_b)); | ||
| 37 | SetInternalFlagsFromInteger(bb, value, instr.generates_cc); | 44 | SetInternalFlagsFromInteger(bb, value, instr.generates_cc); |
| 38 | SetRegister(bb, instr.gpr0, value); | 45 | SetRegister(bb, instr.gpr0, std::move(value)); |
| 39 | break; | 46 | break; |
| 40 | } | 47 | } |
| 41 | case OpCode::Id::SHL_C: | 48 | case OpCode::Id::SHL_C: |
| 42 | case OpCode::Id::SHL_R: | 49 | case OpCode::Id::SHL_R: |
| 43 | case OpCode::Id::SHL_IMM: { | 50 | case OpCode::Id::SHL_IMM: { |
| 44 | const Node value = Operation(OperationCode::ILogicalShiftLeft, PRECISE, op_a, op_b); | 51 | const Node value = Operation(OperationCode::ILogicalShiftLeft, op_a, op_b); |
| 45 | SetInternalFlagsFromInteger(bb, value, instr.generates_cc); | 52 | SetInternalFlagsFromInteger(bb, value, instr.generates_cc); |
| 46 | SetRegister(bb, instr.gpr0, value); | 53 | SetRegister(bb, instr.gpr0, value); |
| 47 | break; | 54 | break; |
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp index 4a356dbd4..0b934a069 100644 --- a/src/video_core/shader/decode/texture.cpp +++ b/src/video_core/shader/decode/texture.cpp | |||
| @@ -181,10 +181,10 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { | |||
| 181 | const Node value = | 181 | const Node value = |
| 182 | Operation(OperationCode::TextureQueryDimensions, meta, | 182 | Operation(OperationCode::TextureQueryDimensions, meta, |
| 183 | GetRegister(instr.gpr8.Value() + (is_bindless ? 1 : 0))); | 183 | GetRegister(instr.gpr8.Value() + (is_bindless ? 1 : 0))); |
| 184 | SetTemporal(bb, indexer++, value); | 184 | SetTemporary(bb, indexer++, value); |
| 185 | } | 185 | } |
| 186 | for (u32 i = 0; i < indexer; ++i) { | 186 | for (u32 i = 0; i < indexer; ++i) { |
| 187 | SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i)); | 187 | SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); |
| 188 | } | 188 | } |
| 189 | break; | 189 | break; |
| 190 | } | 190 | } |
| @@ -238,13 +238,25 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { | |||
| 238 | auto params = coords; | 238 | auto params = coords; |
| 239 | MetaTexture meta{sampler, {}, {}, {}, {}, {}, {}, element}; | 239 | MetaTexture meta{sampler, {}, {}, {}, {}, {}, {}, element}; |
| 240 | const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params)); | 240 | const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params)); |
| 241 | SetTemporal(bb, indexer++, value); | 241 | SetTemporary(bb, indexer++, value); |
| 242 | } | 242 | } |
| 243 | for (u32 i = 0; i < indexer; ++i) { | 243 | for (u32 i = 0; i < indexer; ++i) { |
| 244 | SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i)); | 244 | SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); |
| 245 | } | 245 | } |
| 246 | break; | 246 | break; |
| 247 | } | 247 | } |
| 248 | case OpCode::Id::TLD: { | ||
| 249 | UNIMPLEMENTED_IF_MSG(instr.tld.aoffi, "AOFFI is not implemented"); | ||
| 250 | UNIMPLEMENTED_IF_MSG(instr.tld.ms, "MS is not implemented"); | ||
| 251 | UNIMPLEMENTED_IF_MSG(instr.tld.cl, "CL is not implemented"); | ||
| 252 | |||
| 253 | if (instr.tld.nodep_flag) { | ||
| 254 | LOG_WARNING(HW_GPU, "TLD.NODEP implementation is incomplete"); | ||
| 255 | } | ||
| 256 | |||
| 257 | WriteTexInstructionFloat(bb, instr, GetTldCode(instr)); | ||
| 258 | break; | ||
| 259 | } | ||
| 248 | case OpCode::Id::TLDS: { | 260 | case OpCode::Id::TLDS: { |
| 249 | const Tegra::Shader::TextureType texture_type{instr.tlds.GetTextureType()}; | 261 | const Tegra::Shader::TextureType texture_type{instr.tlds.GetTextureType()}; |
| 250 | const bool is_array{instr.tlds.IsArrayTexture()}; | 262 | const bool is_array{instr.tlds.IsArrayTexture()}; |
| @@ -257,7 +269,13 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { | |||
| 257 | LOG_WARNING(HW_GPU, "TLDS.NODEP implementation is incomplete"); | 269 | LOG_WARNING(HW_GPU, "TLDS.NODEP implementation is incomplete"); |
| 258 | } | 270 | } |
| 259 | 271 | ||
| 260 | WriteTexsInstructionFloat(bb, instr, GetTldsCode(instr, texture_type, is_array)); | 272 | const Node4 components = GetTldsCode(instr, texture_type, is_array); |
| 273 | |||
| 274 | if (instr.tlds.fp32_flag) { | ||
| 275 | WriteTexsInstructionFloat(bb, instr, components); | ||
| 276 | } else { | ||
| 277 | WriteTexsInstructionHalfFloat(bb, instr, components); | ||
| 278 | } | ||
| 261 | break; | 279 | break; |
| 262 | } | 280 | } |
| 263 | default: | 281 | default: |
| @@ -290,13 +308,9 @@ const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, Textu | |||
| 290 | const Sampler& ShaderIR::GetBindlessSampler(const Tegra::Shader::Register& reg, TextureType type, | 308 | const Sampler& ShaderIR::GetBindlessSampler(const Tegra::Shader::Register& reg, TextureType type, |
| 291 | bool is_array, bool is_shadow) { | 309 | bool is_array, bool is_shadow) { |
| 292 | const Node sampler_register = GetRegister(reg); | 310 | const Node sampler_register = GetRegister(reg); |
| 293 | const Node base_sampler = | 311 | const auto [base_sampler, cbuf_index, cbuf_offset] = |
| 294 | TrackCbuf(sampler_register, global_code, static_cast<s64>(global_code.size())); | 312 | TrackCbuf(sampler_register, global_code, static_cast<s64>(global_code.size())); |
| 295 | const auto cbuf = std::get_if<CbufNode>(&*base_sampler); | 313 | ASSERT(base_sampler != nullptr); |
| 296 | const auto cbuf_offset_imm = std::get_if<ImmediateNode>(&*cbuf->GetOffset()); | ||
| 297 | ASSERT(cbuf_offset_imm != nullptr); | ||
| 298 | const auto cbuf_offset = cbuf_offset_imm->GetValue(); | ||
| 299 | const auto cbuf_index = cbuf->GetIndex(); | ||
| 300 | const auto cbuf_key = (static_cast<u64>(cbuf_index) << 32) | static_cast<u64>(cbuf_offset); | 314 | const auto cbuf_key = (static_cast<u64>(cbuf_index) << 32) | static_cast<u64>(cbuf_offset); |
| 301 | 315 | ||
| 302 | // If this sampler has already been used, return the existing mapping. | 316 | // If this sampler has already been used, return the existing mapping. |
| @@ -322,11 +336,11 @@ void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const | |||
| 322 | // Skip disabled components | 336 | // Skip disabled components |
| 323 | continue; | 337 | continue; |
| 324 | } | 338 | } |
| 325 | SetTemporal(bb, dest_elem++, components[elem]); | 339 | SetTemporary(bb, dest_elem++, components[elem]); |
| 326 | } | 340 | } |
| 327 | // After writing values in temporals, move them to the real registers | 341 | // After writing values in temporals, move them to the real registers |
| 328 | for (u32 i = 0; i < dest_elem; ++i) { | 342 | for (u32 i = 0; i < dest_elem; ++i) { |
| 329 | SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i)); | 343 | SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); |
| 330 | } | 344 | } |
| 331 | } | 345 | } |
| 332 | 346 | ||
| @@ -339,17 +353,17 @@ void ShaderIR::WriteTexsInstructionFloat(NodeBlock& bb, Instruction instr, | |||
| 339 | for (u32 component = 0; component < 4; ++component) { | 353 | for (u32 component = 0; component < 4; ++component) { |
| 340 | if (!instr.texs.IsComponentEnabled(component)) | 354 | if (!instr.texs.IsComponentEnabled(component)) |
| 341 | continue; | 355 | continue; |
| 342 | SetTemporal(bb, dest_elem++, components[component]); | 356 | SetTemporary(bb, dest_elem++, components[component]); |
| 343 | } | 357 | } |
| 344 | 358 | ||
| 345 | for (u32 i = 0; i < dest_elem; ++i) { | 359 | for (u32 i = 0; i < dest_elem; ++i) { |
| 346 | if (i < 2) { | 360 | if (i < 2) { |
| 347 | // Write the first two swizzle components to gpr0 and gpr0+1 | 361 | // Write the first two swizzle components to gpr0 and gpr0+1 |
| 348 | SetRegister(bb, instr.gpr0.Value() + i % 2, GetTemporal(i)); | 362 | SetRegister(bb, instr.gpr0.Value() + i % 2, GetTemporary(i)); |
| 349 | } else { | 363 | } else { |
| 350 | ASSERT(instr.texs.HasTwoDestinations()); | 364 | ASSERT(instr.texs.HasTwoDestinations()); |
| 351 | // Write the rest of the swizzle components to gpr28 and gpr28+1 | 365 | // Write the rest of the swizzle components to gpr28 and gpr28+1 |
| 352 | SetRegister(bb, instr.gpr28.Value() + i % 2, GetTemporal(i)); | 366 | SetRegister(bb, instr.gpr28.Value() + i % 2, GetTemporary(i)); |
| 353 | } | 367 | } |
| 354 | } | 368 | } |
| 355 | } | 369 | } |
| @@ -377,11 +391,11 @@ void ShaderIR::WriteTexsInstructionHalfFloat(NodeBlock& bb, Instruction instr, | |||
| 377 | return; | 391 | return; |
| 378 | } | 392 | } |
| 379 | 393 | ||
| 380 | SetTemporal(bb, 0, first_value); | 394 | SetTemporary(bb, 0, first_value); |
| 381 | SetTemporal(bb, 1, Operation(OperationCode::HPack2, values[2], values[3])); | 395 | SetTemporary(bb, 1, Operation(OperationCode::HPack2, values[2], values[3])); |
| 382 | 396 | ||
| 383 | SetRegister(bb, instr.gpr0, GetTemporal(0)); | 397 | SetRegister(bb, instr.gpr0, GetTemporary(0)); |
| 384 | SetRegister(bb, instr.gpr28, GetTemporal(1)); | 398 | SetRegister(bb, instr.gpr28, GetTemporary(1)); |
| 385 | } | 399 | } |
| 386 | 400 | ||
| 387 | Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, | 401 | Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, |
| @@ -575,6 +589,39 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de | |||
| 575 | return values; | 589 | return values; |
| 576 | } | 590 | } |
| 577 | 591 | ||
| 592 | Node4 ShaderIR::GetTldCode(Tegra::Shader::Instruction instr) { | ||
| 593 | const auto texture_type{instr.tld.texture_type}; | ||
| 594 | const bool is_array{instr.tld.is_array}; | ||
| 595 | const bool lod_enabled{instr.tld.GetTextureProcessMode() == TextureProcessMode::LL}; | ||
| 596 | const std::size_t coord_count{GetCoordCount(texture_type)}; | ||
| 597 | |||
| 598 | u64 gpr8_cursor{instr.gpr8.Value()}; | ||
| 599 | const Node array_register{is_array ? GetRegister(gpr8_cursor++) : nullptr}; | ||
| 600 | |||
| 601 | std::vector<Node> coords; | ||
| 602 | coords.reserve(coord_count); | ||
| 603 | for (std::size_t i = 0; i < coord_count; ++i) { | ||
| 604 | coords.push_back(GetRegister(gpr8_cursor++)); | ||
| 605 | } | ||
| 606 | |||
| 607 | u64 gpr20_cursor{instr.gpr20.Value()}; | ||
| 608 | // const Node bindless_register{is_bindless ? GetRegister(gpr20_cursor++) : nullptr}; | ||
| 609 | const Node lod{lod_enabled ? GetRegister(gpr20_cursor++) : Immediate(0u)}; | ||
| 610 | // const Node aoffi_register{is_aoffi ? GetRegister(gpr20_cursor++) : nullptr}; | ||
| 611 | // const Node multisample{is_multisample ? GetRegister(gpr20_cursor++) : nullptr}; | ||
| 612 | |||
| 613 | const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false); | ||
| 614 | |||
| 615 | Node4 values; | ||
| 616 | for (u32 element = 0; element < values.size(); ++element) { | ||
| 617 | auto coords_copy = coords; | ||
| 618 | MetaTexture meta{sampler, array_register, {}, {}, {}, lod, {}, element}; | ||
| 619 | values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy)); | ||
| 620 | } | ||
| 621 | |||
| 622 | return values; | ||
| 623 | } | ||
| 624 | |||
| 578 | Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is_array) { | 625 | Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is_array) { |
| 579 | const std::size_t type_coord_count = GetCoordCount(texture_type); | 626 | const std::size_t type_coord_count = GetCoordCount(texture_type); |
| 580 | const bool lod_enabled = instr.tlds.GetTextureProcessMode() == TextureProcessMode::LL; | 627 | const bool lod_enabled = instr.tlds.GetTextureProcessMode() == TextureProcessMode::LL; |
diff --git a/src/video_core/shader/decode/warp.cpp b/src/video_core/shader/decode/warp.cpp new file mode 100644 index 000000000..04ca74f46 --- /dev/null +++ b/src/video_core/shader/decode/warp.cpp | |||
| @@ -0,0 +1,55 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | #include "video_core/shader/node_helper.h" | ||
| 9 | #include "video_core/shader/shader_ir.h" | ||
| 10 | |||
| 11 | namespace VideoCommon::Shader { | ||
| 12 | |||
| 13 | using Tegra::Shader::Instruction; | ||
| 14 | using Tegra::Shader::OpCode; | ||
| 15 | using Tegra::Shader::Pred; | ||
| 16 | using Tegra::Shader::VoteOperation; | ||
| 17 | |||
| 18 | namespace { | ||
| 19 | OperationCode GetOperationCode(VoteOperation vote_op) { | ||
| 20 | switch (vote_op) { | ||
| 21 | case VoteOperation::All: | ||
| 22 | return OperationCode::VoteAll; | ||
| 23 | case VoteOperation::Any: | ||
| 24 | return OperationCode::VoteAny; | ||
| 25 | case VoteOperation::Eq: | ||
| 26 | return OperationCode::VoteEqual; | ||
| 27 | default: | ||
| 28 | UNREACHABLE_MSG("Invalid vote operation={}", static_cast<u64>(vote_op)); | ||
| 29 | return OperationCode::VoteAll; | ||
| 30 | } | ||
| 31 | } | ||
| 32 | } // Anonymous namespace | ||
| 33 | |||
| 34 | u32 ShaderIR::DecodeWarp(NodeBlock& bb, u32 pc) { | ||
| 35 | const Instruction instr = {program_code[pc]}; | ||
| 36 | const auto opcode = OpCode::Decode(instr); | ||
| 37 | |||
| 38 | switch (opcode->get().GetId()) { | ||
| 39 | case OpCode::Id::VOTE: { | ||
| 40 | const Node value = GetPredicate(instr.vote.value, instr.vote.negate_value != 0); | ||
| 41 | const Node active = Operation(OperationCode::BallotThread, value); | ||
| 42 | const Node vote = Operation(GetOperationCode(instr.vote.operation), value); | ||
| 43 | SetRegister(bb, instr.gpr0, active); | ||
| 44 | SetPredicate(bb, instr.vote.dest_pred, vote); | ||
| 45 | break; | ||
| 46 | } | ||
| 47 | default: | ||
| 48 | UNIMPLEMENTED_MSG("Unhandled warp instruction: {}", opcode->get().GetName()); | ||
| 49 | break; | ||
| 50 | } | ||
| 51 | |||
| 52 | return pc; | ||
| 53 | } | ||
| 54 | |||
| 55 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/decode/xmad.cpp b/src/video_core/shader/decode/xmad.cpp index 93dee77d1..206961909 100644 --- a/src/video_core/shader/decode/xmad.cpp +++ b/src/video_core/shader/decode/xmad.cpp | |||
| @@ -73,8 +73,8 @@ u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) { | |||
| 73 | if (is_psl) { | 73 | if (is_psl) { |
| 74 | product = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, product, Immediate(16)); | 74 | product = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, product, Immediate(16)); |
| 75 | } | 75 | } |
| 76 | SetTemporal(bb, 0, product); | 76 | SetTemporary(bb, 0, product); |
| 77 | product = GetTemporal(0); | 77 | product = GetTemporary(0); |
| 78 | 78 | ||
| 79 | const Node original_c = op_c; | 79 | const Node original_c = op_c; |
| 80 | const Tegra::Shader::XmadMode set_mode = mode; // Workaround to clang compile error | 80 | const Tegra::Shader::XmadMode set_mode = mode; // Workaround to clang compile error |
| @@ -98,13 +98,13 @@ u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) { | |||
| 98 | } | 98 | } |
| 99 | }(); | 99 | }(); |
| 100 | 100 | ||
| 101 | SetTemporal(bb, 1, op_c); | 101 | SetTemporary(bb, 1, op_c); |
| 102 | op_c = GetTemporal(1); | 102 | op_c = GetTemporary(1); |
| 103 | 103 | ||
| 104 | // TODO(Rodrigo): Use an appropiate sign for this operation | 104 | // TODO(Rodrigo): Use an appropiate sign for this operation |
| 105 | Node sum = Operation(OperationCode::IAdd, product, op_c); | 105 | Node sum = Operation(OperationCode::IAdd, product, op_c); |
| 106 | SetTemporal(bb, 2, sum); | 106 | SetTemporary(bb, 2, sum); |
| 107 | sum = GetTemporal(2); | 107 | sum = GetTemporary(2); |
| 108 | if (is_merge) { | 108 | if (is_merge) { |
| 109 | const Node a = BitfieldExtract(sum, 0, 16); | 109 | const Node a = BitfieldExtract(sum, 0, 16); |
| 110 | const Node b = | 110 | const Node b = |
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h index 3cfb911bb..b47b201cf 100644 --- a/src/video_core/shader/node.h +++ b/src/video_core/shader/node.h | |||
| @@ -7,6 +7,7 @@ | |||
| 7 | #include <array> | 7 | #include <array> |
| 8 | #include <cstddef> | 8 | #include <cstddef> |
| 9 | #include <memory> | 9 | #include <memory> |
| 10 | #include <optional> | ||
| 10 | #include <string> | 11 | #include <string> |
| 11 | #include <tuple> | 12 | #include <tuple> |
| 12 | #include <utility> | 13 | #include <utility> |
| @@ -30,6 +31,8 @@ enum class OperationCode { | |||
| 30 | FNegate, /// (MetaArithmetic, float a) -> float | 31 | FNegate, /// (MetaArithmetic, float a) -> float |
| 31 | FAbsolute, /// (MetaArithmetic, float a) -> float | 32 | FAbsolute, /// (MetaArithmetic, float a) -> float |
| 32 | FClamp, /// (MetaArithmetic, float value, float min, float max) -> float | 33 | FClamp, /// (MetaArithmetic, float value, float min, float max) -> float |
| 34 | FCastHalf0, /// (MetaArithmetic, f16vec2 a) -> float | ||
| 35 | FCastHalf1, /// (MetaArithmetic, f16vec2 a) -> float | ||
| 33 | FMin, /// (MetaArithmetic, float a, float b) -> float | 36 | FMin, /// (MetaArithmetic, float a, float b) -> float |
| 34 | FMax, /// (MetaArithmetic, float a, float b) -> float | 37 | FMax, /// (MetaArithmetic, float a, float b) -> float |
| 35 | FCos, /// (MetaArithmetic, float a) -> float | 38 | FCos, /// (MetaArithmetic, float a) -> float |
| @@ -83,17 +86,18 @@ enum class OperationCode { | |||
| 83 | UBitfieldExtract, /// (MetaArithmetic, uint value, int offset, int offset) -> uint | 86 | UBitfieldExtract, /// (MetaArithmetic, uint value, int offset, int offset) -> uint |
| 84 | UBitCount, /// (MetaArithmetic, uint) -> uint | 87 | UBitCount, /// (MetaArithmetic, uint) -> uint |
| 85 | 88 | ||
| 86 | HAdd, /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2 | 89 | HAdd, /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2 |
| 87 | HMul, /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2 | 90 | HMul, /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2 |
| 88 | HFma, /// (MetaArithmetic, f16vec2 a, f16vec2 b, f16vec2 c) -> f16vec2 | 91 | HFma, /// (MetaArithmetic, f16vec2 a, f16vec2 b, f16vec2 c) -> f16vec2 |
| 89 | HAbsolute, /// (f16vec2 a) -> f16vec2 | 92 | HAbsolute, /// (f16vec2 a) -> f16vec2 |
| 90 | HNegate, /// (f16vec2 a, bool first, bool second) -> f16vec2 | 93 | HNegate, /// (f16vec2 a, bool first, bool second) -> f16vec2 |
| 91 | HClamp, /// (f16vec2 src, float min, float max) -> f16vec2 | 94 | HClamp, /// (f16vec2 src, float min, float max) -> f16vec2 |
| 92 | HUnpack, /// (Tegra::Shader::HalfType, T value) -> f16vec2 | 95 | HCastFloat, /// (MetaArithmetic, float a) -> f16vec2 |
| 93 | HMergeF32, /// (f16vec2 src) -> float | 96 | HUnpack, /// (Tegra::Shader::HalfType, T value) -> f16vec2 |
| 94 | HMergeH0, /// (f16vec2 dest, f16vec2 src) -> f16vec2 | 97 | HMergeF32, /// (f16vec2 src) -> float |
| 95 | HMergeH1, /// (f16vec2 dest, f16vec2 src) -> f16vec2 | 98 | HMergeH0, /// (f16vec2 dest, f16vec2 src) -> f16vec2 |
| 96 | HPack2, /// (float a, float b) -> f16vec2 | 99 | HMergeH1, /// (f16vec2 dest, f16vec2 src) -> f16vec2 |
| 100 | HPack2, /// (float a, float b) -> f16vec2 | ||
| 97 | 101 | ||
| 98 | LogicalAssign, /// (bool& dst, bool src) -> void | 102 | LogicalAssign, /// (bool& dst, bool src) -> void |
| 99 | LogicalAnd, /// (bool a, bool b) -> bool | 103 | LogicalAnd, /// (bool a, bool b) -> bool |
| @@ -101,8 +105,7 @@ enum class OperationCode { | |||
| 101 | LogicalXor, /// (bool a, bool b) -> bool | 105 | LogicalXor, /// (bool a, bool b) -> bool |
| 102 | LogicalNegate, /// (bool a) -> bool | 106 | LogicalNegate, /// (bool a) -> bool |
| 103 | LogicalPick2, /// (bool2 pair, uint index) -> bool | 107 | LogicalPick2, /// (bool2 pair, uint index) -> bool |
| 104 | LogicalAll2, /// (bool2 a) -> bool | 108 | LogicalAnd2, /// (bool2 a) -> bool |
| 105 | LogicalAny2, /// (bool2 a) -> bool | ||
| 106 | 109 | ||
| 107 | LogicalFLessThan, /// (float a, float b) -> bool | 110 | LogicalFLessThan, /// (float a, float b) -> bool |
| 108 | LogicalFEqual, /// (float a, float b) -> bool | 111 | LogicalFEqual, /// (float a, float b) -> bool |
| @@ -146,11 +149,21 @@ enum class OperationCode { | |||
| 146 | TextureQueryLod, /// (MetaTexture, float[N] coords) -> float4 | 149 | TextureQueryLod, /// (MetaTexture, float[N] coords) -> float4 |
| 147 | TexelFetch, /// (MetaTexture, int[N], int) -> float4 | 150 | TexelFetch, /// (MetaTexture, int[N], int) -> float4 |
| 148 | 151 | ||
| 149 | Branch, /// (uint branch_target) -> void | 152 | ImageStore, /// (MetaImage, int[N] values) -> void |
| 150 | PushFlowStack, /// (uint branch_target) -> void | 153 | AtomicImageAdd, /// (MetaImage, int[N] coords) -> void |
| 151 | PopFlowStack, /// () -> void | 154 | AtomicImageMin, /// (MetaImage, int[N] coords) -> void |
| 152 | Exit, /// () -> void | 155 | AtomicImageMax, /// (MetaImage, int[N] coords) -> void |
| 153 | Discard, /// () -> void | 156 | AtomicImageAnd, /// (MetaImage, int[N] coords) -> void |
| 157 | AtomicImageOr, /// (MetaImage, int[N] coords) -> void | ||
| 158 | AtomicImageXor, /// (MetaImage, int[N] coords) -> void | ||
| 159 | AtomicImageExchange, /// (MetaImage, int[N] coords) -> void | ||
| 160 | |||
| 161 | Branch, /// (uint branch_target) -> void | ||
| 162 | BranchIndirect, /// (uint branch_target) -> void | ||
| 163 | PushFlowStack, /// (uint branch_target) -> void | ||
| 164 | PopFlowStack, /// () -> void | ||
| 165 | Exit, /// () -> void | ||
| 166 | Discard, /// () -> void | ||
| 154 | 167 | ||
| 155 | EmitVertex, /// () -> void | 168 | EmitVertex, /// () -> void |
| 156 | EndPrimitive, /// () -> void | 169 | EndPrimitive, /// () -> void |
| @@ -163,6 +176,11 @@ enum class OperationCode { | |||
| 163 | WorkGroupIdY, /// () -> uint | 176 | WorkGroupIdY, /// () -> uint |
| 164 | WorkGroupIdZ, /// () -> uint | 177 | WorkGroupIdZ, /// () -> uint |
| 165 | 178 | ||
| 179 | BallotThread, /// (bool) -> uint | ||
| 180 | VoteAll, /// (bool) -> bool | ||
| 181 | VoteAny, /// (bool) -> bool | ||
| 182 | VoteEqual, /// (bool) -> bool | ||
| 183 | |||
| 166 | Amount, | 184 | Amount, |
| 167 | }; | 185 | }; |
| 168 | 186 | ||
| @@ -263,6 +281,87 @@ private: | |||
| 263 | bool is_bindless{}; ///< Whether this sampler belongs to a bindless texture or not. | 281 | bool is_bindless{}; ///< Whether this sampler belongs to a bindless texture or not. |
| 264 | }; | 282 | }; |
| 265 | 283 | ||
| 284 | class Image final { | ||
| 285 | public: | ||
| 286 | constexpr explicit Image(std::size_t offset, std::size_t index, Tegra::Shader::ImageType type, | ||
| 287 | std::optional<Tegra::Shader::ImageAtomicSize> size) | ||
| 288 | : offset{offset}, index{index}, type{type}, is_bindless{false}, size{size} {} | ||
| 289 | |||
| 290 | constexpr explicit Image(u32 cbuf_index, u32 cbuf_offset, std::size_t index, | ||
| 291 | Tegra::Shader::ImageType type, | ||
| 292 | std::optional<Tegra::Shader::ImageAtomicSize> size) | ||
| 293 | : offset{(static_cast<u64>(cbuf_index) << 32) | cbuf_offset}, index{index}, type{type}, | ||
| 294 | is_bindless{true}, size{size} {} | ||
| 295 | |||
| 296 | constexpr explicit Image(std::size_t offset, std::size_t index, Tegra::Shader::ImageType type, | ||
| 297 | bool is_bindless, bool is_written, bool is_read, | ||
| 298 | std::optional<Tegra::Shader::ImageAtomicSize> size) | ||
| 299 | : offset{offset}, index{index}, type{type}, is_bindless{is_bindless}, | ||
| 300 | is_written{is_written}, is_read{is_read}, size{size} {} | ||
| 301 | |||
| 302 | void MarkWrite() { | ||
| 303 | is_written = true; | ||
| 304 | } | ||
| 305 | |||
| 306 | void MarkRead() { | ||
| 307 | is_read = true; | ||
| 308 | } | ||
| 309 | |||
| 310 | void SetSize(Tegra::Shader::ImageAtomicSize size_) { | ||
| 311 | size = size_; | ||
| 312 | } | ||
| 313 | |||
| 314 | constexpr std::size_t GetOffset() const { | ||
| 315 | return offset; | ||
| 316 | } | ||
| 317 | |||
| 318 | constexpr std::size_t GetIndex() const { | ||
| 319 | return index; | ||
| 320 | } | ||
| 321 | |||
| 322 | constexpr Tegra::Shader::ImageType GetType() const { | ||
| 323 | return type; | ||
| 324 | } | ||
| 325 | |||
| 326 | constexpr bool IsBindless() const { | ||
| 327 | return is_bindless; | ||
| 328 | } | ||
| 329 | |||
| 330 | constexpr bool IsWritten() const { | ||
| 331 | return is_written; | ||
| 332 | } | ||
| 333 | |||
| 334 | constexpr bool IsRead() const { | ||
| 335 | return is_read; | ||
| 336 | } | ||
| 337 | |||
| 338 | constexpr std::pair<u32, u32> GetBindlessCBuf() const { | ||
| 339 | return {static_cast<u32>(offset >> 32), static_cast<u32>(offset)}; | ||
| 340 | } | ||
| 341 | |||
| 342 | constexpr bool IsSizeKnown() const { | ||
| 343 | return size.has_value(); | ||
| 344 | } | ||
| 345 | |||
| 346 | constexpr Tegra::Shader::ImageAtomicSize GetSize() const { | ||
| 347 | return size.value(); | ||
| 348 | } | ||
| 349 | |||
| 350 | constexpr bool operator<(const Image& rhs) const { | ||
| 351 | return std::tie(offset, index, type, size, is_bindless) < | ||
| 352 | std::tie(rhs.offset, rhs.index, rhs.type, rhs.size, rhs.is_bindless); | ||
| 353 | } | ||
| 354 | |||
| 355 | private: | ||
| 356 | u64 offset{}; | ||
| 357 | std::size_t index{}; | ||
| 358 | Tegra::Shader::ImageType type{}; | ||
| 359 | bool is_bindless{}; | ||
| 360 | bool is_written{}; | ||
| 361 | bool is_read{}; | ||
| 362 | std::optional<Tegra::Shader::ImageAtomicSize> size{}; | ||
| 363 | }; | ||
| 364 | |||
| 266 | struct GlobalMemoryBase { | 365 | struct GlobalMemoryBase { |
| 267 | u32 cbuf_index{}; | 366 | u32 cbuf_index{}; |
| 268 | u32 cbuf_offset{}; | 367 | u32 cbuf_offset{}; |
| @@ -289,8 +388,14 @@ struct MetaTexture { | |||
| 289 | u32 element{}; | 388 | u32 element{}; |
| 290 | }; | 389 | }; |
| 291 | 390 | ||
| 391 | struct MetaImage { | ||
| 392 | const Image& image; | ||
| 393 | std::vector<Node> values; | ||
| 394 | }; | ||
| 395 | |||
| 292 | /// Parameters that modify an operation but are not part of any particular operand | 396 | /// Parameters that modify an operation but are not part of any particular operand |
| 293 | using Meta = std::variant<MetaArithmetic, MetaTexture, MetaStackClass, Tegra::Shader::HalfType>; | 397 | using Meta = |
| 398 | std::variant<MetaArithmetic, MetaTexture, MetaImage, MetaStackClass, Tegra::Shader::HalfType>; | ||
| 294 | 399 | ||
| 295 | /// Holds any kind of operation that can be done in the IR | 400 | /// Holds any kind of operation that can be done in the IR |
| 296 | class OperationNode final { | 401 | class OperationNode final { |
diff --git a/src/video_core/shader/node_helper.cpp b/src/video_core/shader/node_helper.cpp index 6fccbbba3..b3dcd291c 100644 --- a/src/video_core/shader/node_helper.cpp +++ b/src/video_core/shader/node_helper.cpp | |||
| @@ -12,7 +12,7 @@ | |||
| 12 | namespace VideoCommon::Shader { | 12 | namespace VideoCommon::Shader { |
| 13 | 13 | ||
| 14 | Node Conditional(Node condition, std::vector<Node> code) { | 14 | Node Conditional(Node condition, std::vector<Node> code) { |
| 15 | return MakeNode<ConditionalNode>(condition, std::move(code)); | 15 | return MakeNode<ConditionalNode>(std::move(condition), std::move(code)); |
| 16 | } | 16 | } |
| 17 | 17 | ||
| 18 | Node Comment(std::string text) { | 18 | Node Comment(std::string text) { |
diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp index 11b545cca..1e5c7f660 100644 --- a/src/video_core/shader/shader_ir.cpp +++ b/src/video_core/shader/shader_ir.cpp | |||
| @@ -22,8 +22,8 @@ using Tegra::Shader::PredCondition; | |||
| 22 | using Tegra::Shader::PredOperation; | 22 | using Tegra::Shader::PredOperation; |
| 23 | using Tegra::Shader::Register; | 23 | using Tegra::Shader::Register; |
| 24 | 24 | ||
| 25 | ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset) | 25 | ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset, const std::size_t size) |
| 26 | : program_code{program_code}, main_offset{main_offset} { | 26 | : program_code{program_code}, main_offset{main_offset}, program_size{size} { |
| 27 | Decode(); | 27 | Decode(); |
| 28 | } | 28 | } |
| 29 | 29 | ||
| @@ -61,8 +61,17 @@ Node ShaderIR::GetConstBufferIndirect(u64 index_, u64 offset_, Node node) { | |||
| 61 | const auto [entry, is_new] = used_cbufs.try_emplace(index); | 61 | const auto [entry, is_new] = used_cbufs.try_emplace(index); |
| 62 | entry->second.MarkAsUsedIndirect(); | 62 | entry->second.MarkAsUsedIndirect(); |
| 63 | 63 | ||
| 64 | const Node final_offset = Operation(OperationCode::UAdd, NO_PRECISE, node, Immediate(offset)); | 64 | Node final_offset = [&] { |
| 65 | return MakeNode<CbufNode>(index, final_offset); | 65 | // Attempt to inline constant buffer without a variable offset. This is done to allow |
| 66 | // tracking LDC calls. | ||
| 67 | if (const auto gpr = std::get_if<GprNode>(&*node)) { | ||
| 68 | if (gpr->GetIndex() == Register::ZeroIndex) { | ||
| 69 | return Immediate(offset); | ||
| 70 | } | ||
| 71 | } | ||
| 72 | return Operation(OperationCode::UAdd, NO_PRECISE, std::move(node), Immediate(offset)); | ||
| 73 | }(); | ||
| 74 | return MakeNode<CbufNode>(index, std::move(final_offset)); | ||
| 66 | } | 75 | } |
| 67 | 76 | ||
| 68 | Node ShaderIR::GetPredicate(u64 pred_, bool negated) { | 77 | Node ShaderIR::GetPredicate(u64 pred_, bool negated) { |
| @@ -80,7 +89,7 @@ Node ShaderIR::GetPredicate(bool immediate) { | |||
| 80 | 89 | ||
| 81 | Node ShaderIR::GetInputAttribute(Attribute::Index index, u64 element, Node buffer) { | 90 | Node ShaderIR::GetInputAttribute(Attribute::Index index, u64 element, Node buffer) { |
| 82 | used_input_attributes.emplace(index); | 91 | used_input_attributes.emplace(index); |
| 83 | return MakeNode<AbufNode>(index, static_cast<u32>(element), buffer); | 92 | return MakeNode<AbufNode>(index, static_cast<u32>(element), std::move(buffer)); |
| 84 | } | 93 | } |
| 85 | 94 | ||
| 86 | Node ShaderIR::GetPhysicalInputAttribute(Tegra::Shader::Register physical_address, Node buffer) { | 95 | Node ShaderIR::GetPhysicalInputAttribute(Tegra::Shader::Register physical_address, Node buffer) { |
| @@ -89,6 +98,22 @@ Node ShaderIR::GetPhysicalInputAttribute(Tegra::Shader::Register physical_addres | |||
| 89 | } | 98 | } |
| 90 | 99 | ||
| 91 | Node ShaderIR::GetOutputAttribute(Attribute::Index index, u64 element, Node buffer) { | 100 | Node ShaderIR::GetOutputAttribute(Attribute::Index index, u64 element, Node buffer) { |
| 101 | if (index == Attribute::Index::LayerViewportPointSize) { | ||
| 102 | switch (element) { | ||
| 103 | case 0: | ||
| 104 | UNIMPLEMENTED(); | ||
| 105 | break; | ||
| 106 | case 1: | ||
| 107 | uses_layer = true; | ||
| 108 | break; | ||
| 109 | case 2: | ||
| 110 | uses_viewport_index = true; | ||
| 111 | break; | ||
| 112 | case 3: | ||
| 113 | uses_point_size = true; | ||
| 114 | break; | ||
| 115 | } | ||
| 116 | } | ||
| 92 | if (index == Attribute::Index::ClipDistances0123 || | 117 | if (index == Attribute::Index::ClipDistances0123 || |
| 93 | index == Attribute::Index::ClipDistances4567) { | 118 | index == Attribute::Index::ClipDistances4567) { |
| 94 | const auto clip_index = | 119 | const auto clip_index = |
| @@ -97,7 +122,7 @@ Node ShaderIR::GetOutputAttribute(Attribute::Index index, u64 element, Node buff | |||
| 97 | } | 122 | } |
| 98 | used_output_attributes.insert(index); | 123 | used_output_attributes.insert(index); |
| 99 | 124 | ||
| 100 | return MakeNode<AbufNode>(index, static_cast<u32>(element), buffer); | 125 | return MakeNode<AbufNode>(index, static_cast<u32>(element), std::move(buffer)); |
| 101 | } | 126 | } |
| 102 | 127 | ||
| 103 | Node ShaderIR::GetInternalFlag(InternalFlag flag, bool negated) { | 128 | Node ShaderIR::GetInternalFlag(InternalFlag flag, bool negated) { |
| @@ -109,19 +134,19 @@ Node ShaderIR::GetInternalFlag(InternalFlag flag, bool negated) { | |||
| 109 | } | 134 | } |
| 110 | 135 | ||
| 111 | Node ShaderIR::GetLocalMemory(Node address) { | 136 | Node ShaderIR::GetLocalMemory(Node address) { |
| 112 | return MakeNode<LmemNode>(address); | 137 | return MakeNode<LmemNode>(std::move(address)); |
| 113 | } | 138 | } |
| 114 | 139 | ||
| 115 | Node ShaderIR::GetTemporal(u32 id) { | 140 | Node ShaderIR::GetTemporary(u32 id) { |
| 116 | return GetRegister(Register::ZeroIndex + 1 + id); | 141 | return GetRegister(Register::ZeroIndex + 1 + id); |
| 117 | } | 142 | } |
| 118 | 143 | ||
| 119 | Node ShaderIR::GetOperandAbsNegFloat(Node value, bool absolute, bool negate) { | 144 | Node ShaderIR::GetOperandAbsNegFloat(Node value, bool absolute, bool negate) { |
| 120 | if (absolute) { | 145 | if (absolute) { |
| 121 | value = Operation(OperationCode::FAbsolute, NO_PRECISE, value); | 146 | value = Operation(OperationCode::FAbsolute, NO_PRECISE, std::move(value)); |
| 122 | } | 147 | } |
| 123 | if (negate) { | 148 | if (negate) { |
| 124 | value = Operation(OperationCode::FNegate, NO_PRECISE, value); | 149 | value = Operation(OperationCode::FNegate, NO_PRECISE, std::move(value)); |
| 125 | } | 150 | } |
| 126 | return value; | 151 | return value; |
| 127 | } | 152 | } |
| @@ -130,24 +155,26 @@ Node ShaderIR::GetSaturatedFloat(Node value, bool saturate) { | |||
| 130 | if (!saturate) { | 155 | if (!saturate) { |
| 131 | return value; | 156 | return value; |
| 132 | } | 157 | } |
| 133 | const Node positive_zero = Immediate(std::copysignf(0, 1)); | 158 | |
| 134 | const Node positive_one = Immediate(1.0f); | 159 | Node positive_zero = Immediate(std::copysignf(0, 1)); |
| 135 | return Operation(OperationCode::FClamp, NO_PRECISE, value, positive_zero, positive_one); | 160 | Node positive_one = Immediate(1.0f); |
| 161 | return Operation(OperationCode::FClamp, NO_PRECISE, std::move(value), std::move(positive_zero), | ||
| 162 | std::move(positive_one)); | ||
| 136 | } | 163 | } |
| 137 | 164 | ||
| 138 | Node ShaderIR::ConvertIntegerSize(Node value, Tegra::Shader::Register::Size size, bool is_signed) { | 165 | Node ShaderIR::ConvertIntegerSize(Node value, Register::Size size, bool is_signed) { |
| 139 | switch (size) { | 166 | switch (size) { |
| 140 | case Register::Size::Byte: | 167 | case Register::Size::Byte: |
| 141 | value = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, NO_PRECISE, value, | 168 | value = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, NO_PRECISE, |
| 142 | Immediate(24)); | 169 | std::move(value), Immediate(24)); |
| 143 | value = SignedOperation(OperationCode::IArithmeticShiftRight, is_signed, NO_PRECISE, value, | 170 | value = SignedOperation(OperationCode::IArithmeticShiftRight, is_signed, NO_PRECISE, |
| 144 | Immediate(24)); | 171 | std::move(value), Immediate(24)); |
| 145 | return value; | 172 | return value; |
| 146 | case Register::Size::Short: | 173 | case Register::Size::Short: |
| 147 | value = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, NO_PRECISE, value, | 174 | value = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, NO_PRECISE, |
| 148 | Immediate(16)); | 175 | std::move(value), Immediate(16)); |
| 149 | value = SignedOperation(OperationCode::IArithmeticShiftRight, is_signed, NO_PRECISE, value, | 176 | value = SignedOperation(OperationCode::IArithmeticShiftRight, is_signed, NO_PRECISE, |
| 150 | Immediate(16)); | 177 | std::move(value), Immediate(16)); |
| 151 | case Register::Size::Word: | 178 | case Register::Size::Word: |
| 152 | // Default - do nothing | 179 | // Default - do nothing |
| 153 | return value; | 180 | return value; |
| @@ -163,27 +190,29 @@ Node ShaderIR::GetOperandAbsNegInteger(Node value, bool absolute, bool negate, b | |||
| 163 | return value; | 190 | return value; |
| 164 | } | 191 | } |
| 165 | if (absolute) { | 192 | if (absolute) { |
| 166 | value = Operation(OperationCode::IAbsolute, NO_PRECISE, value); | 193 | value = Operation(OperationCode::IAbsolute, NO_PRECISE, std::move(value)); |
| 167 | } | 194 | } |
| 168 | if (negate) { | 195 | if (negate) { |
| 169 | value = Operation(OperationCode::INegate, NO_PRECISE, value); | 196 | value = Operation(OperationCode::INegate, NO_PRECISE, std::move(value)); |
| 170 | } | 197 | } |
| 171 | return value; | 198 | return value; |
| 172 | } | 199 | } |
| 173 | 200 | ||
| 174 | Node ShaderIR::UnpackHalfImmediate(Instruction instr, bool has_negation) { | 201 | Node ShaderIR::UnpackHalfImmediate(Instruction instr, bool has_negation) { |
| 175 | const Node value = Immediate(instr.half_imm.PackImmediates()); | 202 | Node value = Immediate(instr.half_imm.PackImmediates()); |
| 176 | if (!has_negation) { | 203 | if (!has_negation) { |
| 177 | return value; | 204 | return value; |
| 178 | } | 205 | } |
| 179 | const Node first_negate = GetPredicate(instr.half_imm.first_negate != 0); | ||
| 180 | const Node second_negate = GetPredicate(instr.half_imm.second_negate != 0); | ||
| 181 | 206 | ||
| 182 | return Operation(OperationCode::HNegate, NO_PRECISE, value, first_negate, second_negate); | 207 | Node first_negate = GetPredicate(instr.half_imm.first_negate != 0); |
| 208 | Node second_negate = GetPredicate(instr.half_imm.second_negate != 0); | ||
| 209 | |||
| 210 | return Operation(OperationCode::HNegate, NO_PRECISE, std::move(value), std::move(first_negate), | ||
| 211 | std::move(second_negate)); | ||
| 183 | } | 212 | } |
| 184 | 213 | ||
| 185 | Node ShaderIR::UnpackHalfFloat(Node value, Tegra::Shader::HalfType type) { | 214 | Node ShaderIR::UnpackHalfFloat(Node value, Tegra::Shader::HalfType type) { |
| 186 | return Operation(OperationCode::HUnpack, type, value); | 215 | return Operation(OperationCode::HUnpack, type, std::move(value)); |
| 187 | } | 216 | } |
| 188 | 217 | ||
| 189 | Node ShaderIR::HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge) { | 218 | Node ShaderIR::HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge) { |
| @@ -191,11 +220,11 @@ Node ShaderIR::HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge) { | |||
| 191 | case Tegra::Shader::HalfMerge::H0_H1: | 220 | case Tegra::Shader::HalfMerge::H0_H1: |
| 192 | return src; | 221 | return src; |
| 193 | case Tegra::Shader::HalfMerge::F32: | 222 | case Tegra::Shader::HalfMerge::F32: |
| 194 | return Operation(OperationCode::HMergeF32, src); | 223 | return Operation(OperationCode::HMergeF32, std::move(src)); |
| 195 | case Tegra::Shader::HalfMerge::Mrg_H0: | 224 | case Tegra::Shader::HalfMerge::Mrg_H0: |
| 196 | return Operation(OperationCode::HMergeH0, dest, src); | 225 | return Operation(OperationCode::HMergeH0, std::move(dest), std::move(src)); |
| 197 | case Tegra::Shader::HalfMerge::Mrg_H1: | 226 | case Tegra::Shader::HalfMerge::Mrg_H1: |
| 198 | return Operation(OperationCode::HMergeH1, dest, src); | 227 | return Operation(OperationCode::HMergeH1, std::move(dest), std::move(src)); |
| 199 | } | 228 | } |
| 200 | UNREACHABLE(); | 229 | UNREACHABLE(); |
| 201 | return src; | 230 | return src; |
| @@ -203,10 +232,10 @@ Node ShaderIR::HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge) { | |||
| 203 | 232 | ||
| 204 | Node ShaderIR::GetOperandAbsNegHalf(Node value, bool absolute, bool negate) { | 233 | Node ShaderIR::GetOperandAbsNegHalf(Node value, bool absolute, bool negate) { |
| 205 | if (absolute) { | 234 | if (absolute) { |
| 206 | value = Operation(OperationCode::HAbsolute, NO_PRECISE, value); | 235 | value = Operation(OperationCode::HAbsolute, NO_PRECISE, std::move(value)); |
| 207 | } | 236 | } |
| 208 | if (negate) { | 237 | if (negate) { |
| 209 | value = Operation(OperationCode::HNegate, NO_PRECISE, value, GetPredicate(true), | 238 | value = Operation(OperationCode::HNegate, NO_PRECISE, std::move(value), GetPredicate(true), |
| 210 | GetPredicate(true)); | 239 | GetPredicate(true)); |
| 211 | } | 240 | } |
| 212 | return value; | 241 | return value; |
| @@ -216,9 +245,11 @@ Node ShaderIR::GetSaturatedHalfFloat(Node value, bool saturate) { | |||
| 216 | if (!saturate) { | 245 | if (!saturate) { |
| 217 | return value; | 246 | return value; |
| 218 | } | 247 | } |
| 219 | const Node positive_zero = Immediate(std::copysignf(0, 1)); | 248 | |
| 220 | const Node positive_one = Immediate(1.0f); | 249 | Node positive_zero = Immediate(std::copysignf(0, 1)); |
| 221 | return Operation(OperationCode::HClamp, NO_PRECISE, value, positive_zero, positive_one); | 250 | Node positive_one = Immediate(1.0f); |
| 251 | return Operation(OperationCode::HClamp, NO_PRECISE, std::move(value), std::move(positive_zero), | ||
| 252 | std::move(positive_one)); | ||
| 222 | } | 253 | } |
| 223 | 254 | ||
| 224 | Node ShaderIR::GetPredicateComparisonFloat(PredCondition condition, Node op_a, Node op_b) { | 255 | Node ShaderIR::GetPredicateComparisonFloat(PredCondition condition, Node op_a, Node op_b) { |
| @@ -246,7 +277,6 @@ Node ShaderIR::GetPredicateComparisonFloat(PredCondition condition, Node op_a, N | |||
| 246 | condition == PredCondition::LessEqualWithNan || | 277 | condition == PredCondition::LessEqualWithNan || |
| 247 | condition == PredCondition::GreaterThanWithNan || | 278 | condition == PredCondition::GreaterThanWithNan || |
| 248 | condition == PredCondition::GreaterEqualWithNan) { | 279 | condition == PredCondition::GreaterEqualWithNan) { |
| 249 | |||
| 250 | predicate = Operation(OperationCode::LogicalOr, predicate, | 280 | predicate = Operation(OperationCode::LogicalOr, predicate, |
| 251 | Operation(OperationCode::LogicalFIsNan, op_a)); | 281 | Operation(OperationCode::LogicalFIsNan, op_a)); |
| 252 | predicate = Operation(OperationCode::LogicalOr, predicate, | 282 | predicate = Operation(OperationCode::LogicalOr, predicate, |
| @@ -275,7 +305,8 @@ Node ShaderIR::GetPredicateComparisonInteger(PredCondition condition, bool is_si | |||
| 275 | UNIMPLEMENTED_IF_MSG(comparison == PredicateComparisonTable.end(), | 305 | UNIMPLEMENTED_IF_MSG(comparison == PredicateComparisonTable.end(), |
| 276 | "Unknown predicate comparison operation"); | 306 | "Unknown predicate comparison operation"); |
| 277 | 307 | ||
| 278 | Node predicate = SignedOperation(comparison->second, is_signed, NO_PRECISE, op_a, op_b); | 308 | Node predicate = SignedOperation(comparison->second, is_signed, NO_PRECISE, std::move(op_a), |
| 309 | std::move(op_b)); | ||
| 279 | 310 | ||
| 280 | UNIMPLEMENTED_IF_MSG(condition == PredCondition::LessThanWithNan || | 311 | UNIMPLEMENTED_IF_MSG(condition == PredCondition::LessThanWithNan || |
| 281 | condition == PredCondition::NotEqualWithNan || | 312 | condition == PredCondition::NotEqualWithNan || |
| @@ -305,9 +336,7 @@ Node ShaderIR::GetPredicateComparisonHalf(Tegra::Shader::PredCondition condition | |||
| 305 | UNIMPLEMENTED_IF_MSG(comparison == PredicateComparisonTable.end(), | 336 | UNIMPLEMENTED_IF_MSG(comparison == PredicateComparisonTable.end(), |
| 306 | "Unknown predicate comparison operation"); | 337 | "Unknown predicate comparison operation"); |
| 307 | 338 | ||
| 308 | const Node predicate = Operation(comparison->second, NO_PRECISE, op_a, op_b); | 339 | return Operation(comparison->second, NO_PRECISE, std::move(op_a), std::move(op_b)); |
| 309 | |||
| 310 | return predicate; | ||
| 311 | } | 340 | } |
| 312 | 341 | ||
| 313 | OperationCode ShaderIR::GetPredicateCombiner(PredOperation operation) { | 342 | OperationCode ShaderIR::GetPredicateCombiner(PredOperation operation) { |
| @@ -333,31 +362,32 @@ Node ShaderIR::GetConditionCode(Tegra::Shader::ConditionCode cc) { | |||
| 333 | } | 362 | } |
| 334 | 363 | ||
| 335 | void ShaderIR::SetRegister(NodeBlock& bb, Register dest, Node src) { | 364 | void ShaderIR::SetRegister(NodeBlock& bb, Register dest, Node src) { |
| 336 | bb.push_back(Operation(OperationCode::Assign, GetRegister(dest), src)); | 365 | bb.push_back(Operation(OperationCode::Assign, GetRegister(dest), std::move(src))); |
| 337 | } | 366 | } |
| 338 | 367 | ||
| 339 | void ShaderIR::SetPredicate(NodeBlock& bb, u64 dest, Node src) { | 368 | void ShaderIR::SetPredicate(NodeBlock& bb, u64 dest, Node src) { |
| 340 | bb.push_back(Operation(OperationCode::LogicalAssign, GetPredicate(dest), src)); | 369 | bb.push_back(Operation(OperationCode::LogicalAssign, GetPredicate(dest), std::move(src))); |
| 341 | } | 370 | } |
| 342 | 371 | ||
| 343 | void ShaderIR::SetInternalFlag(NodeBlock& bb, InternalFlag flag, Node value) { | 372 | void ShaderIR::SetInternalFlag(NodeBlock& bb, InternalFlag flag, Node value) { |
| 344 | bb.push_back(Operation(OperationCode::LogicalAssign, GetInternalFlag(flag), value)); | 373 | bb.push_back(Operation(OperationCode::LogicalAssign, GetInternalFlag(flag), std::move(value))); |
| 345 | } | 374 | } |
| 346 | 375 | ||
| 347 | void ShaderIR::SetLocalMemory(NodeBlock& bb, Node address, Node value) { | 376 | void ShaderIR::SetLocalMemory(NodeBlock& bb, Node address, Node value) { |
| 348 | bb.push_back(Operation(OperationCode::Assign, GetLocalMemory(address), value)); | 377 | bb.push_back( |
| 378 | Operation(OperationCode::Assign, GetLocalMemory(std::move(address)), std::move(value))); | ||
| 349 | } | 379 | } |
| 350 | 380 | ||
| 351 | void ShaderIR::SetTemporal(NodeBlock& bb, u32 id, Node value) { | 381 | void ShaderIR::SetTemporary(NodeBlock& bb, u32 id, Node value) { |
| 352 | SetRegister(bb, Register::ZeroIndex + 1 + id, value); | 382 | SetRegister(bb, Register::ZeroIndex + 1 + id, std::move(value)); |
| 353 | } | 383 | } |
| 354 | 384 | ||
| 355 | void ShaderIR::SetInternalFlagsFromFloat(NodeBlock& bb, Node value, bool sets_cc) { | 385 | void ShaderIR::SetInternalFlagsFromFloat(NodeBlock& bb, Node value, bool sets_cc) { |
| 356 | if (!sets_cc) { | 386 | if (!sets_cc) { |
| 357 | return; | 387 | return; |
| 358 | } | 388 | } |
| 359 | const Node zerop = Operation(OperationCode::LogicalFEqual, value, Immediate(0.0f)); | 389 | Node zerop = Operation(OperationCode::LogicalFEqual, std::move(value), Immediate(0.0f)); |
| 360 | SetInternalFlag(bb, InternalFlag::Zero, zerop); | 390 | SetInternalFlag(bb, InternalFlag::Zero, std::move(zerop)); |
| 361 | LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete"); | 391 | LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete"); |
| 362 | } | 392 | } |
| 363 | 393 | ||
| @@ -365,13 +395,18 @@ void ShaderIR::SetInternalFlagsFromInteger(NodeBlock& bb, Node value, bool sets_ | |||
| 365 | if (!sets_cc) { | 395 | if (!sets_cc) { |
| 366 | return; | 396 | return; |
| 367 | } | 397 | } |
| 368 | const Node zerop = Operation(OperationCode::LogicalIEqual, value, Immediate(0)); | 398 | Node zerop = Operation(OperationCode::LogicalIEqual, std::move(value), Immediate(0)); |
| 369 | SetInternalFlag(bb, InternalFlag::Zero, zerop); | 399 | SetInternalFlag(bb, InternalFlag::Zero, std::move(zerop)); |
| 370 | LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete"); | 400 | LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete"); |
| 371 | } | 401 | } |
| 372 | 402 | ||
| 373 | Node ShaderIR::BitfieldExtract(Node value, u32 offset, u32 bits) { | 403 | Node ShaderIR::BitfieldExtract(Node value, u32 offset, u32 bits) { |
| 374 | return Operation(OperationCode::UBitfieldExtract, NO_PRECISE, value, Immediate(offset), | 404 | return Operation(OperationCode::UBitfieldExtract, NO_PRECISE, std::move(value), |
| 405 | Immediate(offset), Immediate(bits)); | ||
| 406 | } | ||
| 407 | |||
| 408 | Node ShaderIR::BitfieldInsert(Node base, Node insert, u32 offset, u32 bits) { | ||
| 409 | return Operation(OperationCode::UBitfieldInsert, NO_PRECISE, base, insert, Immediate(offset), | ||
| 375 | Immediate(bits)); | 410 | Immediate(bits)); |
| 376 | } | 411 | } |
| 377 | 412 | ||
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index edcf2288e..62816bd56 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h | |||
| @@ -5,13 +5,10 @@ | |||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <array> | 7 | #include <array> |
| 8 | #include <cstring> | ||
| 9 | #include <map> | 8 | #include <map> |
| 10 | #include <optional> | 9 | #include <optional> |
| 11 | #include <set> | 10 | #include <set> |
| 12 | #include <string> | ||
| 13 | #include <tuple> | 11 | #include <tuple> |
| 14 | #include <variant> | ||
| 15 | #include <vector> | 12 | #include <vector> |
| 16 | 13 | ||
| 17 | #include "common/common_types.h" | 14 | #include "common/common_types.h" |
| @@ -22,18 +19,12 @@ | |||
| 22 | 19 | ||
| 23 | namespace VideoCommon::Shader { | 20 | namespace VideoCommon::Shader { |
| 24 | 21 | ||
| 22 | struct ShaderBlock; | ||
| 23 | |||
| 25 | using ProgramCode = std::vector<u64>; | 24 | using ProgramCode = std::vector<u64>; |
| 26 | 25 | ||
| 27 | constexpr u32 MAX_PROGRAM_LENGTH = 0x1000; | 26 | constexpr u32 MAX_PROGRAM_LENGTH = 0x1000; |
| 28 | 27 | ||
| 29 | /// Describes the behaviour of code path of a given entry point and a return point. | ||
| 30 | enum class ExitMethod { | ||
| 31 | Undetermined, ///< Internal value. Only occur when analyzing JMP loop. | ||
| 32 | AlwaysReturn, ///< All code paths reach the return point. | ||
| 33 | Conditional, ///< Code path reaches the return point or an END instruction conditionally. | ||
| 34 | AlwaysEnd, ///< All code paths reach a END instruction. | ||
| 35 | }; | ||
| 36 | |||
| 37 | class ConstBuffer { | 28 | class ConstBuffer { |
| 38 | public: | 29 | public: |
| 39 | explicit ConstBuffer(u32 max_offset, bool is_indirect) | 30 | explicit ConstBuffer(u32 max_offset, bool is_indirect) |
| @@ -73,7 +64,7 @@ struct GlobalMemoryUsage { | |||
| 73 | 64 | ||
| 74 | class ShaderIR final { | 65 | class ShaderIR final { |
| 75 | public: | 66 | public: |
| 76 | explicit ShaderIR(const ProgramCode& program_code, u32 main_offset); | 67 | explicit ShaderIR(const ProgramCode& program_code, u32 main_offset, std::size_t size); |
| 77 | ~ShaderIR(); | 68 | ~ShaderIR(); |
| 78 | 69 | ||
| 79 | const std::map<u32, NodeBlock>& GetBasicBlocks() const { | 70 | const std::map<u32, NodeBlock>& GetBasicBlocks() const { |
| @@ -104,6 +95,10 @@ public: | |||
| 104 | return used_samplers; | 95 | return used_samplers; |
| 105 | } | 96 | } |
| 106 | 97 | ||
| 98 | const std::map<u64, Image>& GetImages() const { | ||
| 99 | return used_images; | ||
| 100 | } | ||
| 101 | |||
| 107 | const std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances>& GetClipDistances() | 102 | const std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances>& GetClipDistances() |
| 108 | const { | 103 | const { |
| 109 | return used_clip_distances; | 104 | return used_clip_distances; |
| @@ -117,6 +112,18 @@ public: | |||
| 117 | return static_cast<std::size_t>(coverage_end * sizeof(u64)); | 112 | return static_cast<std::size_t>(coverage_end * sizeof(u64)); |
| 118 | } | 113 | } |
| 119 | 114 | ||
| 115 | bool UsesLayer() const { | ||
| 116 | return uses_layer; | ||
| 117 | } | ||
| 118 | |||
| 119 | bool UsesViewportIndex() const { | ||
| 120 | return uses_viewport_index; | ||
| 121 | } | ||
| 122 | |||
| 123 | bool UsesPointSize() const { | ||
| 124 | return uses_point_size; | ||
| 125 | } | ||
| 126 | |||
| 120 | bool HasPhysicalAttributes() const { | 127 | bool HasPhysicalAttributes() const { |
| 121 | return uses_physical_attributes; | 128 | return uses_physical_attributes; |
| 122 | } | 129 | } |
| @@ -125,12 +132,20 @@ public: | |||
| 125 | return header; | 132 | return header; |
| 126 | } | 133 | } |
| 127 | 134 | ||
| 135 | bool IsFlowStackDisabled() const { | ||
| 136 | return disable_flow_stack; | ||
| 137 | } | ||
| 138 | |||
| 139 | u32 ConvertAddressToNvidiaSpace(const u32 address) const { | ||
| 140 | return (address - main_offset) * sizeof(Tegra::Shader::Instruction); | ||
| 141 | } | ||
| 142 | |||
| 128 | private: | 143 | private: |
| 129 | void Decode(); | 144 | void Decode(); |
| 130 | 145 | ||
| 131 | ExitMethod Scan(u32 begin, u32 end, std::set<u32>& labels); | ||
| 132 | |||
| 133 | NodeBlock DecodeRange(u32 begin, u32 end); | 146 | NodeBlock DecodeRange(u32 begin, u32 end); |
| 147 | void DecodeRangeInner(NodeBlock& bb, u32 begin, u32 end); | ||
| 148 | void InsertControlFlow(NodeBlock& bb, const ShaderBlock& block); | ||
| 134 | 149 | ||
| 135 | /** | 150 | /** |
| 136 | * Decodes a single instruction from Tegra to IR. | 151 | * Decodes a single instruction from Tegra to IR. |
| @@ -152,8 +167,10 @@ private: | |||
| 152 | u32 DecodeFfma(NodeBlock& bb, u32 pc); | 167 | u32 DecodeFfma(NodeBlock& bb, u32 pc); |
| 153 | u32 DecodeHfma2(NodeBlock& bb, u32 pc); | 168 | u32 DecodeHfma2(NodeBlock& bb, u32 pc); |
| 154 | u32 DecodeConversion(NodeBlock& bb, u32 pc); | 169 | u32 DecodeConversion(NodeBlock& bb, u32 pc); |
| 170 | u32 DecodeWarp(NodeBlock& bb, u32 pc); | ||
| 155 | u32 DecodeMemory(NodeBlock& bb, u32 pc); | 171 | u32 DecodeMemory(NodeBlock& bb, u32 pc); |
| 156 | u32 DecodeTexture(NodeBlock& bb, u32 pc); | 172 | u32 DecodeTexture(NodeBlock& bb, u32 pc); |
| 173 | u32 DecodeImage(NodeBlock& bb, u32 pc); | ||
| 157 | u32 DecodeFloatSetPredicate(NodeBlock& bb, u32 pc); | 174 | u32 DecodeFloatSetPredicate(NodeBlock& bb, u32 pc); |
| 158 | u32 DecodeIntegerSetPredicate(NodeBlock& bb, u32 pc); | 175 | u32 DecodeIntegerSetPredicate(NodeBlock& bb, u32 pc); |
| 159 | u32 DecodeHalfSetPredicate(NodeBlock& bb, u32 pc); | 176 | u32 DecodeHalfSetPredicate(NodeBlock& bb, u32 pc); |
| @@ -191,8 +208,8 @@ private: | |||
| 191 | Node GetInternalFlag(InternalFlag flag, bool negated = false); | 208 | Node GetInternalFlag(InternalFlag flag, bool negated = false); |
| 192 | /// Generates a node representing a local memory address | 209 | /// Generates a node representing a local memory address |
| 193 | Node GetLocalMemory(Node address); | 210 | Node GetLocalMemory(Node address); |
| 194 | /// Generates a temporal, internally it uses a post-RZ register | 211 | /// Generates a temporary, internally it uses a post-RZ register |
| 195 | Node GetTemporal(u32 id); | 212 | Node GetTemporary(u32 id); |
| 196 | 213 | ||
| 197 | /// Sets a register. src value must be a number-evaluated node. | 214 | /// Sets a register. src value must be a number-evaluated node. |
| 198 | void SetRegister(NodeBlock& bb, Tegra::Shader::Register dest, Node src); | 215 | void SetRegister(NodeBlock& bb, Tegra::Shader::Register dest, Node src); |
| @@ -202,8 +219,8 @@ private: | |||
| 202 | void SetInternalFlag(NodeBlock& bb, InternalFlag flag, Node value); | 219 | void SetInternalFlag(NodeBlock& bb, InternalFlag flag, Node value); |
| 203 | /// Sets a local memory address. address and value must be a number-evaluated node | 220 | /// Sets a local memory address. address and value must be a number-evaluated node |
| 204 | void SetLocalMemory(NodeBlock& bb, Node address, Node value); | 221 | void SetLocalMemory(NodeBlock& bb, Node address, Node value); |
| 205 | /// Sets a temporal. Internally it uses a post-RZ register | 222 | /// Sets a temporary. Internally it uses a post-RZ register |
| 206 | void SetTemporal(NodeBlock& bb, u32 id, Node value); | 223 | void SetTemporary(NodeBlock& bb, u32 id, Node value); |
| 207 | 224 | ||
| 208 | /// Sets internal flags from a float | 225 | /// Sets internal flags from a float |
| 209 | void SetInternalFlagsFromFloat(NodeBlock& bb, Node value, bool sets_cc = true); | 226 | void SetInternalFlagsFromFloat(NodeBlock& bb, Node value, bool sets_cc = true); |
| @@ -254,9 +271,24 @@ private: | |||
| 254 | Tegra::Shader::TextureType type, bool is_array, | 271 | Tegra::Shader::TextureType type, bool is_array, |
| 255 | bool is_shadow); | 272 | bool is_shadow); |
| 256 | 273 | ||
| 274 | /// Accesses an image. | ||
| 275 | Image& GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type, | ||
| 276 | std::optional<Tegra::Shader::ImageAtomicSize> size = {}); | ||
| 277 | |||
| 278 | /// Access a bindless image sampler. | ||
| 279 | Image& GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type, | ||
| 280 | std::optional<Tegra::Shader::ImageAtomicSize> size = {}); | ||
| 281 | |||
| 282 | /// Tries to access an existing image, updating it's state as needed | ||
| 283 | Image* TryUseExistingImage(u64 offset, Tegra::Shader::ImageType type, | ||
| 284 | std::optional<Tegra::Shader::ImageAtomicSize> size); | ||
| 285 | |||
| 257 | /// Extracts a sequence of bits from a node | 286 | /// Extracts a sequence of bits from a node |
| 258 | Node BitfieldExtract(Node value, u32 offset, u32 bits); | 287 | Node BitfieldExtract(Node value, u32 offset, u32 bits); |
| 259 | 288 | ||
| 289 | /// Inserts a sequence of bits from a node | ||
| 290 | Node BitfieldInsert(Node base, Node insert, u32 offset, u32 bits); | ||
| 291 | |||
| 260 | void WriteTexInstructionFloat(NodeBlock& bb, Tegra::Shader::Instruction instr, | 292 | void WriteTexInstructionFloat(NodeBlock& bb, Tegra::Shader::Instruction instr, |
| 261 | const Node4& components); | 293 | const Node4& components); |
| 262 | 294 | ||
| @@ -277,6 +309,8 @@ private: | |||
| 277 | Node4 GetTld4Code(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, | 309 | Node4 GetTld4Code(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, |
| 278 | bool depth_compare, bool is_array, bool is_aoffi); | 310 | bool depth_compare, bool is_array, bool is_aoffi); |
| 279 | 311 | ||
| 312 | Node4 GetTldCode(Tegra::Shader::Instruction instr); | ||
| 313 | |||
| 280 | Node4 GetTldsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, | 314 | Node4 GetTldsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, |
| 281 | bool is_array); | 315 | bool is_array); |
| 282 | 316 | ||
| @@ -301,7 +335,7 @@ private: | |||
| 301 | void WriteLop3Instruction(NodeBlock& bb, Tegra::Shader::Register dest, Node op_a, Node op_b, | 335 | void WriteLop3Instruction(NodeBlock& bb, Tegra::Shader::Register dest, Node op_a, Node op_b, |
| 302 | Node op_c, Node imm_lut, bool sets_cc); | 336 | Node op_c, Node imm_lut, bool sets_cc); |
| 303 | 337 | ||
| 304 | Node TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const; | 338 | std::tuple<Node, u32, u32> TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const; |
| 305 | 339 | ||
| 306 | std::optional<u32> TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const; | 340 | std::optional<u32> TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const; |
| 307 | 341 | ||
| @@ -313,10 +347,11 @@ private: | |||
| 313 | 347 | ||
| 314 | const ProgramCode& program_code; | 348 | const ProgramCode& program_code; |
| 315 | const u32 main_offset; | 349 | const u32 main_offset; |
| 350 | const std::size_t program_size; | ||
| 351 | bool disable_flow_stack{}; | ||
| 316 | 352 | ||
| 317 | u32 coverage_begin{}; | 353 | u32 coverage_begin{}; |
| 318 | u32 coverage_end{}; | 354 | u32 coverage_end{}; |
| 319 | std::map<std::pair<u32, u32>, ExitMethod> exit_method_map; | ||
| 320 | 355 | ||
| 321 | std::map<u32, NodeBlock> basic_blocks; | 356 | std::map<u32, NodeBlock> basic_blocks; |
| 322 | NodeBlock global_code; | 357 | NodeBlock global_code; |
| @@ -327,8 +362,12 @@ private: | |||
| 327 | std::set<Tegra::Shader::Attribute::Index> used_output_attributes; | 362 | std::set<Tegra::Shader::Attribute::Index> used_output_attributes; |
| 328 | std::map<u32, ConstBuffer> used_cbufs; | 363 | std::map<u32, ConstBuffer> used_cbufs; |
| 329 | std::set<Sampler> used_samplers; | 364 | std::set<Sampler> used_samplers; |
| 365 | std::map<u64, Image> used_images; | ||
| 330 | std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances> used_clip_distances{}; | 366 | std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances> used_clip_distances{}; |
| 331 | std::map<GlobalMemoryBase, GlobalMemoryUsage> used_global_memory; | 367 | std::map<GlobalMemoryBase, GlobalMemoryUsage> used_global_memory; |
| 368 | bool uses_layer{}; | ||
| 369 | bool uses_viewport_index{}; | ||
| 370 | bool uses_point_size{}; | ||
| 332 | bool uses_physical_attributes{}; // Shader uses AL2P or physical attribute read/writes | 371 | bool uses_physical_attributes{}; // Shader uses AL2P or physical attribute read/writes |
| 333 | 372 | ||
| 334 | Tegra::Shader::Header header; | 373 | Tegra::Shader::Header header; |
diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp index fc957d980..55f5949e4 100644 --- a/src/video_core/shader/track.cpp +++ b/src/video_core/shader/track.cpp | |||
| @@ -15,56 +15,63 @@ namespace { | |||
| 15 | std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor, | 15 | std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor, |
| 16 | OperationCode operation_code) { | 16 | OperationCode operation_code) { |
| 17 | for (; cursor >= 0; --cursor) { | 17 | for (; cursor >= 0; --cursor) { |
| 18 | const Node node = code.at(cursor); | 18 | Node node = code.at(cursor); |
| 19 | |||
| 19 | if (const auto operation = std::get_if<OperationNode>(&*node)) { | 20 | if (const auto operation = std::get_if<OperationNode>(&*node)) { |
| 20 | if (operation->GetCode() == operation_code) { | 21 | if (operation->GetCode() == operation_code) { |
| 21 | return {node, cursor}; | 22 | return {std::move(node), cursor}; |
| 22 | } | 23 | } |
| 23 | } | 24 | } |
| 25 | |||
| 24 | if (const auto conditional = std::get_if<ConditionalNode>(&*node)) { | 26 | if (const auto conditional = std::get_if<ConditionalNode>(&*node)) { |
| 25 | const auto& conditional_code = conditional->GetCode(); | 27 | const auto& conditional_code = conditional->GetCode(); |
| 26 | const auto [found, internal_cursor] = FindOperation( | 28 | auto [found, internal_cursor] = FindOperation( |
| 27 | conditional_code, static_cast<s64>(conditional_code.size() - 1), operation_code); | 29 | conditional_code, static_cast<s64>(conditional_code.size() - 1), operation_code); |
| 28 | if (found) { | 30 | if (found) { |
| 29 | return {found, cursor}; | 31 | return {std::move(found), cursor}; |
| 30 | } | 32 | } |
| 31 | } | 33 | } |
| 32 | } | 34 | } |
| 33 | return {}; | 35 | return {}; |
| 34 | } | 36 | } |
| 35 | } // namespace | 37 | } // Anonymous namespace |
| 36 | 38 | ||
| 37 | Node ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const { | 39 | std::tuple<Node, u32, u32> ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code, |
| 40 | s64 cursor) const { | ||
| 38 | if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) { | 41 | if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) { |
| 39 | // Cbuf found, but it has to be immediate | 42 | // Constant buffer found, test if it's an immediate |
| 40 | return std::holds_alternative<ImmediateNode>(*cbuf->GetOffset()) ? tracked : nullptr; | 43 | const auto offset = cbuf->GetOffset(); |
| 44 | if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) { | ||
| 45 | return {tracked, cbuf->GetIndex(), immediate->GetValue()}; | ||
| 46 | } | ||
| 47 | return {}; | ||
| 41 | } | 48 | } |
| 42 | if (const auto gpr = std::get_if<GprNode>(&*tracked)) { | 49 | if (const auto gpr = std::get_if<GprNode>(&*tracked)) { |
| 43 | if (gpr->GetIndex() == Tegra::Shader::Register::ZeroIndex) { | 50 | if (gpr->GetIndex() == Tegra::Shader::Register::ZeroIndex) { |
| 44 | return nullptr; | 51 | return {}; |
| 45 | } | 52 | } |
| 46 | // Reduce the cursor in one to avoid infinite loops when the instruction sets the same | 53 | // Reduce the cursor in one to avoid infinite loops when the instruction sets the same |
| 47 | // register that it uses as operand | 54 | // register that it uses as operand |
| 48 | const auto [source, new_cursor] = TrackRegister(gpr, code, cursor - 1); | 55 | const auto [source, new_cursor] = TrackRegister(gpr, code, cursor - 1); |
| 49 | if (!source) { | 56 | if (!source) { |
| 50 | return nullptr; | 57 | return {}; |
| 51 | } | 58 | } |
| 52 | return TrackCbuf(source, code, new_cursor); | 59 | return TrackCbuf(source, code, new_cursor); |
| 53 | } | 60 | } |
| 54 | if (const auto operation = std::get_if<OperationNode>(&*tracked)) { | 61 | if (const auto operation = std::get_if<OperationNode>(&*tracked)) { |
| 55 | for (std::size_t i = 0; i < operation->GetOperandsCount(); ++i) { | 62 | for (std::size_t i = operation->GetOperandsCount(); i > 0; --i) { |
| 56 | if (const auto found = TrackCbuf((*operation)[i], code, cursor)) { | 63 | if (auto found = TrackCbuf((*operation)[i - 1], code, cursor); std::get<0>(found)) { |
| 57 | // Cbuf found in operand | 64 | // Cbuf found in operand. |
| 58 | return found; | 65 | return found; |
| 59 | } | 66 | } |
| 60 | } | 67 | } |
| 61 | return nullptr; | 68 | return {}; |
| 62 | } | 69 | } |
| 63 | if (const auto conditional = std::get_if<ConditionalNode>(&*tracked)) { | 70 | if (const auto conditional = std::get_if<ConditionalNode>(&*tracked)) { |
| 64 | const auto& conditional_code = conditional->GetCode(); | 71 | const auto& conditional_code = conditional->GetCode(); |
| 65 | return TrackCbuf(tracked, conditional_code, static_cast<s64>(conditional_code.size())); | 72 | return TrackCbuf(tracked, conditional_code, static_cast<s64>(conditional_code.size())); |
| 66 | } | 73 | } |
| 67 | return nullptr; | 74 | return {}; |
| 68 | } | 75 | } |
| 69 | 76 | ||
| 70 | std::optional<u32> ShaderIR::TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const { | 77 | std::optional<u32> ShaderIR::TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const { |
diff --git a/src/video_core/surface.cpp b/src/video_core/surface.cpp index 6384fa8d2..4ceb219be 100644 --- a/src/video_core/surface.cpp +++ b/src/video_core/surface.cpp | |||
| @@ -12,6 +12,8 @@ SurfaceTarget SurfaceTargetFromTextureType(Tegra::Texture::TextureType texture_t | |||
| 12 | switch (texture_type) { | 12 | switch (texture_type) { |
| 13 | case Tegra::Texture::TextureType::Texture1D: | 13 | case Tegra::Texture::TextureType::Texture1D: |
| 14 | return SurfaceTarget::Texture1D; | 14 | return SurfaceTarget::Texture1D; |
| 15 | case Tegra::Texture::TextureType::Texture1DBuffer: | ||
| 16 | return SurfaceTarget::TextureBuffer; | ||
| 15 | case Tegra::Texture::TextureType::Texture2D: | 17 | case Tegra::Texture::TextureType::Texture2D: |
| 16 | case Tegra::Texture::TextureType::Texture2DNoMipmap: | 18 | case Tegra::Texture::TextureType::Texture2DNoMipmap: |
| 17 | return SurfaceTarget::Texture2D; | 19 | return SurfaceTarget::Texture2D; |
| @@ -35,6 +37,7 @@ SurfaceTarget SurfaceTargetFromTextureType(Tegra::Texture::TextureType texture_t | |||
| 35 | bool SurfaceTargetIsLayered(SurfaceTarget target) { | 37 | bool SurfaceTargetIsLayered(SurfaceTarget target) { |
| 36 | switch (target) { | 38 | switch (target) { |
| 37 | case SurfaceTarget::Texture1D: | 39 | case SurfaceTarget::Texture1D: |
| 40 | case SurfaceTarget::TextureBuffer: | ||
| 38 | case SurfaceTarget::Texture2D: | 41 | case SurfaceTarget::Texture2D: |
| 39 | case SurfaceTarget::Texture3D: | 42 | case SurfaceTarget::Texture3D: |
| 40 | return false; | 43 | return false; |
| @@ -53,6 +56,7 @@ bool SurfaceTargetIsLayered(SurfaceTarget target) { | |||
| 53 | bool SurfaceTargetIsArray(SurfaceTarget target) { | 56 | bool SurfaceTargetIsArray(SurfaceTarget target) { |
| 54 | switch (target) { | 57 | switch (target) { |
| 55 | case SurfaceTarget::Texture1D: | 58 | case SurfaceTarget::Texture1D: |
| 59 | case SurfaceTarget::TextureBuffer: | ||
| 56 | case SurfaceTarget::Texture2D: | 60 | case SurfaceTarget::Texture2D: |
| 57 | case SurfaceTarget::Texture3D: | 61 | case SurfaceTarget::Texture3D: |
| 58 | case SurfaceTarget::TextureCubemap: | 62 | case SurfaceTarget::TextureCubemap: |
| @@ -304,8 +308,8 @@ PixelFormat PixelFormatFromTextureFormat(Tegra::Texture::TextureFormat format, | |||
| 304 | return PixelFormat::Z32F; | 308 | return PixelFormat::Z32F; |
| 305 | case Tegra::Texture::TextureFormat::Z16: | 309 | case Tegra::Texture::TextureFormat::Z16: |
| 306 | return PixelFormat::Z16; | 310 | return PixelFormat::Z16; |
| 307 | case Tegra::Texture::TextureFormat::Z24S8: | 311 | case Tegra::Texture::TextureFormat::S8Z24: |
| 308 | return PixelFormat::Z24S8; | 312 | return PixelFormat::S8Z24; |
| 309 | case Tegra::Texture::TextureFormat::ZF32_X24S8: | 313 | case Tegra::Texture::TextureFormat::ZF32_X24S8: |
| 310 | return PixelFormat::Z32FS8; | 314 | return PixelFormat::Z32FS8; |
| 311 | case Tegra::Texture::TextureFormat::DXT1: | 315 | case Tegra::Texture::TextureFormat::DXT1: |
| @@ -441,11 +445,12 @@ PixelFormat PixelFormatFromGPUPixelFormat(Tegra::FramebufferConfig::PixelFormat | |||
| 441 | switch (format) { | 445 | switch (format) { |
| 442 | case Tegra::FramebufferConfig::PixelFormat::ABGR8: | 446 | case Tegra::FramebufferConfig::PixelFormat::ABGR8: |
| 443 | return PixelFormat::ABGR8U; | 447 | return PixelFormat::ABGR8U; |
| 448 | case Tegra::FramebufferConfig::PixelFormat::RGB565: | ||
| 449 | return PixelFormat::B5G6R5U; | ||
| 444 | case Tegra::FramebufferConfig::PixelFormat::BGRA8: | 450 | case Tegra::FramebufferConfig::PixelFormat::BGRA8: |
| 445 | return PixelFormat::BGRA8; | 451 | return PixelFormat::BGRA8; |
| 446 | default: | 452 | default: |
| 447 | LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format)); | 453 | UNIMPLEMENTED_MSG("Unimplemented format={}", static_cast<u32>(format)); |
| 448 | UNREACHABLE(); | ||
| 449 | return PixelFormat::ABGR8U; | 454 | return PixelFormat::ABGR8U; |
| 450 | } | 455 | } |
| 451 | } | 456 | } |
diff --git a/src/video_core/surface.h b/src/video_core/surface.h index b783e4b27..83f31c12c 100644 --- a/src/video_core/surface.h +++ b/src/video_core/surface.h | |||
| @@ -114,6 +114,7 @@ enum class SurfaceType { | |||
| 114 | 114 | ||
| 115 | enum class SurfaceTarget { | 115 | enum class SurfaceTarget { |
| 116 | Texture1D, | 116 | Texture1D, |
| 117 | TextureBuffer, | ||
| 117 | Texture2D, | 118 | Texture2D, |
| 118 | Texture3D, | 119 | Texture3D, |
| 119 | Texture1DArray, | 120 | Texture1DArray, |
| @@ -122,71 +123,71 @@ enum class SurfaceTarget { | |||
| 122 | TextureCubeArray, | 123 | TextureCubeArray, |
| 123 | }; | 124 | }; |
| 124 | 125 | ||
| 125 | constexpr std::array<u32, MaxPixelFormat> compression_factor_table = {{ | 126 | constexpr std::array<u32, MaxPixelFormat> compression_factor_shift_table = {{ |
| 126 | 1, // ABGR8U | 127 | 0, // ABGR8U |
| 127 | 1, // ABGR8S | 128 | 0, // ABGR8S |
| 128 | 1, // ABGR8UI | 129 | 0, // ABGR8UI |
| 129 | 1, // B5G6R5U | 130 | 0, // B5G6R5U |
| 130 | 1, // A2B10G10R10U | 131 | 0, // A2B10G10R10U |
| 131 | 1, // A1B5G5R5U | 132 | 0, // A1B5G5R5U |
| 132 | 1, // R8U | 133 | 0, // R8U |
| 133 | 1, // R8UI | 134 | 0, // R8UI |
| 134 | 1, // RGBA16F | 135 | 0, // RGBA16F |
| 135 | 1, // RGBA16U | 136 | 0, // RGBA16U |
| 136 | 1, // RGBA16UI | 137 | 0, // RGBA16UI |
| 137 | 1, // R11FG11FB10F | 138 | 0, // R11FG11FB10F |
| 138 | 1, // RGBA32UI | 139 | 0, // RGBA32UI |
| 139 | 4, // DXT1 | 140 | 2, // DXT1 |
| 140 | 4, // DXT23 | 141 | 2, // DXT23 |
| 141 | 4, // DXT45 | 142 | 2, // DXT45 |
| 142 | 4, // DXN1 | 143 | 2, // DXN1 |
| 143 | 4, // DXN2UNORM | 144 | 2, // DXN2UNORM |
| 144 | 4, // DXN2SNORM | 145 | 2, // DXN2SNORM |
| 145 | 4, // BC7U | 146 | 2, // BC7U |
| 146 | 4, // BC6H_UF16 | 147 | 2, // BC6H_UF16 |
| 147 | 4, // BC6H_SF16 | 148 | 2, // BC6H_SF16 |
| 148 | 4, // ASTC_2D_4X4 | 149 | 2, // ASTC_2D_4X4 |
| 149 | 1, // BGRA8 | 150 | 0, // BGRA8 |
| 150 | 1, // RGBA32F | 151 | 0, // RGBA32F |
| 151 | 1, // RG32F | 152 | 0, // RG32F |
| 152 | 1, // R32F | 153 | 0, // R32F |
| 153 | 1, // R16F | 154 | 0, // R16F |
| 154 | 1, // R16U | 155 | 0, // R16U |
| 155 | 1, // R16S | 156 | 0, // R16S |
| 156 | 1, // R16UI | 157 | 0, // R16UI |
| 157 | 1, // R16I | 158 | 0, // R16I |
| 158 | 1, // RG16 | 159 | 0, // RG16 |
| 159 | 1, // RG16F | 160 | 0, // RG16F |
| 160 | 1, // RG16UI | 161 | 0, // RG16UI |
| 161 | 1, // RG16I | 162 | 0, // RG16I |
| 162 | 1, // RG16S | 163 | 0, // RG16S |
| 163 | 1, // RGB32F | 164 | 0, // RGB32F |
| 164 | 1, // RGBA8_SRGB | 165 | 0, // RGBA8_SRGB |
| 165 | 1, // RG8U | 166 | 0, // RG8U |
| 166 | 1, // RG8S | 167 | 0, // RG8S |
| 167 | 1, // RG32UI | 168 | 0, // RG32UI |
| 168 | 1, // R32UI | 169 | 0, // R32UI |
| 169 | 4, // ASTC_2D_8X8 | 170 | 2, // ASTC_2D_8X8 |
| 170 | 4, // ASTC_2D_8X5 | 171 | 2, // ASTC_2D_8X5 |
| 171 | 4, // ASTC_2D_5X4 | 172 | 2, // ASTC_2D_5X4 |
| 172 | 1, // BGRA8_SRGB | 173 | 0, // BGRA8_SRGB |
| 173 | 4, // DXT1_SRGB | 174 | 2, // DXT1_SRGB |
| 174 | 4, // DXT23_SRGB | 175 | 2, // DXT23_SRGB |
| 175 | 4, // DXT45_SRGB | 176 | 2, // DXT45_SRGB |
| 176 | 4, // BC7U_SRGB | 177 | 2, // BC7U_SRGB |
| 177 | 4, // ASTC_2D_4X4_SRGB | 178 | 2, // ASTC_2D_4X4_SRGB |
| 178 | 4, // ASTC_2D_8X8_SRGB | 179 | 2, // ASTC_2D_8X8_SRGB |
| 179 | 4, // ASTC_2D_8X5_SRGB | 180 | 2, // ASTC_2D_8X5_SRGB |
| 180 | 4, // ASTC_2D_5X4_SRGB | 181 | 2, // ASTC_2D_5X4_SRGB |
| 181 | 4, // ASTC_2D_5X5 | 182 | 2, // ASTC_2D_5X5 |
| 182 | 4, // ASTC_2D_5X5_SRGB | 183 | 2, // ASTC_2D_5X5_SRGB |
| 183 | 4, // ASTC_2D_10X8 | 184 | 2, // ASTC_2D_10X8 |
| 184 | 4, // ASTC_2D_10X8_SRGB | 185 | 2, // ASTC_2D_10X8_SRGB |
| 185 | 1, // Z32F | 186 | 0, // Z32F |
| 186 | 1, // Z16 | 187 | 0, // Z16 |
| 187 | 1, // Z24S8 | 188 | 0, // Z24S8 |
| 188 | 1, // S8Z24 | 189 | 0, // S8Z24 |
| 189 | 1, // Z32FS8 | 190 | 0, // Z32FS8 |
| 190 | }}; | 191 | }}; |
| 191 | 192 | ||
| 192 | /** | 193 | /** |
| @@ -195,12 +196,14 @@ constexpr std::array<u32, MaxPixelFormat> compression_factor_table = {{ | |||
| 195 | * compressed image. This is used for maintaining proper surface sizes for compressed | 196 | * compressed image. This is used for maintaining proper surface sizes for compressed |
| 196 | * texture formats. | 197 | * texture formats. |
| 197 | */ | 198 | */ |
| 198 | static constexpr u32 GetCompressionFactor(PixelFormat format) { | 199 | inline constexpr u32 GetCompressionFactorShift(PixelFormat format) { |
| 199 | if (format == PixelFormat::Invalid) | 200 | DEBUG_ASSERT(format != PixelFormat::Invalid); |
| 200 | return 0; | 201 | DEBUG_ASSERT(static_cast<std::size_t>(format) < compression_factor_shift_table.size()); |
| 202 | return compression_factor_shift_table[static_cast<std::size_t>(format)]; | ||
| 203 | } | ||
| 201 | 204 | ||
| 202 | ASSERT(static_cast<std::size_t>(format) < compression_factor_table.size()); | 205 | inline constexpr u32 GetCompressionFactor(PixelFormat format) { |
| 203 | return compression_factor_table[static_cast<std::size_t>(format)]; | 206 | return 1U << GetCompressionFactorShift(format); |
| 204 | } | 207 | } |
| 205 | 208 | ||
| 206 | constexpr std::array<u32, MaxPixelFormat> block_width_table = {{ | 209 | constexpr std::array<u32, MaxPixelFormat> block_width_table = {{ |
| @@ -436,6 +439,88 @@ static constexpr u32 GetBytesPerPixel(PixelFormat pixel_format) { | |||
| 436 | return GetFormatBpp(pixel_format) / CHAR_BIT; | 439 | return GetFormatBpp(pixel_format) / CHAR_BIT; |
| 437 | } | 440 | } |
| 438 | 441 | ||
| 442 | enum class SurfaceCompression { | ||
| 443 | None, // Not compressed | ||
| 444 | Compressed, // Texture is compressed | ||
| 445 | Converted, // Texture is converted before upload or after download | ||
| 446 | Rearranged, // Texture is swizzled before upload or after download | ||
| 447 | }; | ||
| 448 | |||
| 449 | constexpr std::array<SurfaceCompression, MaxPixelFormat> compression_type_table = {{ | ||
| 450 | SurfaceCompression::None, // ABGR8U | ||
| 451 | SurfaceCompression::None, // ABGR8S | ||
| 452 | SurfaceCompression::None, // ABGR8UI | ||
| 453 | SurfaceCompression::None, // B5G6R5U | ||
| 454 | SurfaceCompression::None, // A2B10G10R10U | ||
| 455 | SurfaceCompression::None, // A1B5G5R5U | ||
| 456 | SurfaceCompression::None, // R8U | ||
| 457 | SurfaceCompression::None, // R8UI | ||
| 458 | SurfaceCompression::None, // RGBA16F | ||
| 459 | SurfaceCompression::None, // RGBA16U | ||
| 460 | SurfaceCompression::None, // RGBA16UI | ||
| 461 | SurfaceCompression::None, // R11FG11FB10F | ||
| 462 | SurfaceCompression::None, // RGBA32UI | ||
| 463 | SurfaceCompression::Compressed, // DXT1 | ||
| 464 | SurfaceCompression::Compressed, // DXT23 | ||
| 465 | SurfaceCompression::Compressed, // DXT45 | ||
| 466 | SurfaceCompression::Compressed, // DXN1 | ||
| 467 | SurfaceCompression::Compressed, // DXN2UNORM | ||
| 468 | SurfaceCompression::Compressed, // DXN2SNORM | ||
| 469 | SurfaceCompression::Compressed, // BC7U | ||
| 470 | SurfaceCompression::Compressed, // BC6H_UF16 | ||
| 471 | SurfaceCompression::Compressed, // BC6H_SF16 | ||
| 472 | SurfaceCompression::Converted, // ASTC_2D_4X4 | ||
| 473 | SurfaceCompression::None, // BGRA8 | ||
| 474 | SurfaceCompression::None, // RGBA32F | ||
| 475 | SurfaceCompression::None, // RG32F | ||
| 476 | SurfaceCompression::None, // R32F | ||
| 477 | SurfaceCompression::None, // R16F | ||
| 478 | SurfaceCompression::None, // R16U | ||
| 479 | SurfaceCompression::None, // R16S | ||
| 480 | SurfaceCompression::None, // R16UI | ||
| 481 | SurfaceCompression::None, // R16I | ||
| 482 | SurfaceCompression::None, // RG16 | ||
| 483 | SurfaceCompression::None, // RG16F | ||
| 484 | SurfaceCompression::None, // RG16UI | ||
| 485 | SurfaceCompression::None, // RG16I | ||
| 486 | SurfaceCompression::None, // RG16S | ||
| 487 | SurfaceCompression::None, // RGB32F | ||
| 488 | SurfaceCompression::None, // RGBA8_SRGB | ||
| 489 | SurfaceCompression::None, // RG8U | ||
| 490 | SurfaceCompression::None, // RG8S | ||
| 491 | SurfaceCompression::None, // RG32UI | ||
| 492 | SurfaceCompression::None, // R32UI | ||
| 493 | SurfaceCompression::Converted, // ASTC_2D_8X8 | ||
| 494 | SurfaceCompression::Converted, // ASTC_2D_8X5 | ||
| 495 | SurfaceCompression::Converted, // ASTC_2D_5X4 | ||
| 496 | SurfaceCompression::None, // BGRA8_SRGB | ||
| 497 | SurfaceCompression::Compressed, // DXT1_SRGB | ||
| 498 | SurfaceCompression::Compressed, // DXT23_SRGB | ||
| 499 | SurfaceCompression::Compressed, // DXT45_SRGB | ||
| 500 | SurfaceCompression::Compressed, // BC7U_SRGB | ||
| 501 | SurfaceCompression::Converted, // ASTC_2D_4X4_SRGB | ||
| 502 | SurfaceCompression::Converted, // ASTC_2D_8X8_SRGB | ||
| 503 | SurfaceCompression::Converted, // ASTC_2D_8X5_SRGB | ||
| 504 | SurfaceCompression::Converted, // ASTC_2D_5X4_SRGB | ||
| 505 | SurfaceCompression::Converted, // ASTC_2D_5X5 | ||
| 506 | SurfaceCompression::Converted, // ASTC_2D_5X5_SRGB | ||
| 507 | SurfaceCompression::Converted, // ASTC_2D_10X8 | ||
| 508 | SurfaceCompression::Converted, // ASTC_2D_10X8_SRGB | ||
| 509 | SurfaceCompression::None, // Z32F | ||
| 510 | SurfaceCompression::None, // Z16 | ||
| 511 | SurfaceCompression::None, // Z24S8 | ||
| 512 | SurfaceCompression::Rearranged, // S8Z24 | ||
| 513 | SurfaceCompression::None, // Z32FS8 | ||
| 514 | }}; | ||
| 515 | |||
| 516 | constexpr SurfaceCompression GetFormatCompressionType(PixelFormat format) { | ||
| 517 | if (format == PixelFormat::Invalid) { | ||
| 518 | return SurfaceCompression::None; | ||
| 519 | } | ||
| 520 | DEBUG_ASSERT(static_cast<std::size_t>(format) < compression_type_table.size()); | ||
| 521 | return compression_type_table[static_cast<std::size_t>(format)]; | ||
| 522 | } | ||
| 523 | |||
| 439 | SurfaceTarget SurfaceTargetFromTextureType(Tegra::Texture::TextureType texture_type); | 524 | SurfaceTarget SurfaceTargetFromTextureType(Tegra::Texture::TextureType texture_type); |
| 440 | 525 | ||
| 441 | bool SurfaceTargetIsLayered(SurfaceTarget target); | 526 | bool SurfaceTargetIsLayered(SurfaceTarget target); |
diff --git a/src/video_core/texture_cache.cpp b/src/video_core/texture_cache.cpp deleted file mode 100644 index e96eba7cc..000000000 --- a/src/video_core/texture_cache.cpp +++ /dev/null | |||
| @@ -1,386 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/alignment.h" | ||
| 6 | #include "common/assert.h" | ||
| 7 | #include "common/cityhash.h" | ||
| 8 | #include "common/common_types.h" | ||
| 9 | #include "core/core.h" | ||
| 10 | #include "video_core/surface.h" | ||
| 11 | #include "video_core/texture_cache.h" | ||
| 12 | #include "video_core/textures/decoders.h" | ||
| 13 | #include "video_core/textures/texture.h" | ||
| 14 | |||
| 15 | namespace VideoCommon { | ||
| 16 | |||
| 17 | using VideoCore::Surface::SurfaceTarget; | ||
| 18 | |||
| 19 | using VideoCore::Surface::ComponentTypeFromDepthFormat; | ||
| 20 | using VideoCore::Surface::ComponentTypeFromRenderTarget; | ||
| 21 | using VideoCore::Surface::ComponentTypeFromTexture; | ||
| 22 | using VideoCore::Surface::PixelFormatFromDepthFormat; | ||
| 23 | using VideoCore::Surface::PixelFormatFromRenderTargetFormat; | ||
| 24 | using VideoCore::Surface::PixelFormatFromTextureFormat; | ||
| 25 | using VideoCore::Surface::SurfaceTargetFromTextureType; | ||
| 26 | |||
| 27 | constexpr u32 GetMipmapSize(bool uncompressed, u32 mip_size, u32 tile) { | ||
| 28 | return uncompressed ? mip_size : std::max(1U, (mip_size + tile - 1) / tile); | ||
| 29 | } | ||
| 30 | |||
| 31 | SurfaceParams SurfaceParams::CreateForTexture(Core::System& system, | ||
| 32 | const Tegra::Texture::FullTextureInfo& config) { | ||
| 33 | SurfaceParams params; | ||
| 34 | params.is_tiled = config.tic.IsTiled(); | ||
| 35 | params.block_width = params.is_tiled ? config.tic.BlockWidth() : 0, | ||
| 36 | params.block_height = params.is_tiled ? config.tic.BlockHeight() : 0, | ||
| 37 | params.block_depth = params.is_tiled ? config.tic.BlockDepth() : 0, | ||
| 38 | params.tile_width_spacing = params.is_tiled ? (1 << config.tic.tile_width_spacing.Value()) : 1; | ||
| 39 | params.pixel_format = | ||
| 40 | PixelFormatFromTextureFormat(config.tic.format, config.tic.r_type.Value(), false); | ||
| 41 | params.component_type = ComponentTypeFromTexture(config.tic.r_type.Value()); | ||
| 42 | params.type = GetFormatType(params.pixel_format); | ||
| 43 | params.target = SurfaceTargetFromTextureType(config.tic.texture_type); | ||
| 44 | params.width = Common::AlignUp(config.tic.Width(), GetCompressionFactor(params.pixel_format)); | ||
| 45 | params.height = Common::AlignUp(config.tic.Height(), GetCompressionFactor(params.pixel_format)); | ||
| 46 | params.depth = config.tic.Depth(); | ||
| 47 | if (params.target == SurfaceTarget::TextureCubemap || | ||
| 48 | params.target == SurfaceTarget::TextureCubeArray) { | ||
| 49 | params.depth *= 6; | ||
| 50 | } | ||
| 51 | params.pitch = params.is_tiled ? 0 : config.tic.Pitch(); | ||
| 52 | params.unaligned_height = config.tic.Height(); | ||
| 53 | params.num_levels = config.tic.max_mip_level + 1; | ||
| 54 | |||
| 55 | params.CalculateCachedValues(); | ||
| 56 | return params; | ||
| 57 | } | ||
| 58 | |||
| 59 | SurfaceParams SurfaceParams::CreateForDepthBuffer( | ||
| 60 | Core::System& system, u32 zeta_width, u32 zeta_height, Tegra::DepthFormat format, | ||
| 61 | u32 block_width, u32 block_height, u32 block_depth, | ||
| 62 | Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout type) { | ||
| 63 | SurfaceParams params; | ||
| 64 | params.is_tiled = type == Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear; | ||
| 65 | params.block_width = 1 << std::min(block_width, 5U); | ||
| 66 | params.block_height = 1 << std::min(block_height, 5U); | ||
| 67 | params.block_depth = 1 << std::min(block_depth, 5U); | ||
| 68 | params.tile_width_spacing = 1; | ||
| 69 | params.pixel_format = PixelFormatFromDepthFormat(format); | ||
| 70 | params.component_type = ComponentTypeFromDepthFormat(format); | ||
| 71 | params.type = GetFormatType(params.pixel_format); | ||
| 72 | params.width = zeta_width; | ||
| 73 | params.height = zeta_height; | ||
| 74 | params.unaligned_height = zeta_height; | ||
| 75 | params.target = SurfaceTarget::Texture2D; | ||
| 76 | params.depth = 1; | ||
| 77 | params.num_levels = 1; | ||
| 78 | |||
| 79 | params.CalculateCachedValues(); | ||
| 80 | return params; | ||
| 81 | } | ||
| 82 | |||
| 83 | SurfaceParams SurfaceParams::CreateForFramebuffer(Core::System& system, std::size_t index) { | ||
| 84 | const auto& config{system.GPU().Maxwell3D().regs.rt[index]}; | ||
| 85 | SurfaceParams params; | ||
| 86 | params.is_tiled = | ||
| 87 | config.memory_layout.type == Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear; | ||
| 88 | params.block_width = 1 << config.memory_layout.block_width; | ||
| 89 | params.block_height = 1 << config.memory_layout.block_height; | ||
| 90 | params.block_depth = 1 << config.memory_layout.block_depth; | ||
| 91 | params.tile_width_spacing = 1; | ||
| 92 | params.pixel_format = PixelFormatFromRenderTargetFormat(config.format); | ||
| 93 | params.component_type = ComponentTypeFromRenderTarget(config.format); | ||
| 94 | params.type = GetFormatType(params.pixel_format); | ||
| 95 | if (params.is_tiled) { | ||
| 96 | params.width = config.width; | ||
| 97 | } else { | ||
| 98 | const u32 bpp = GetFormatBpp(params.pixel_format) / CHAR_BIT; | ||
| 99 | params.pitch = config.width; | ||
| 100 | params.width = params.pitch / bpp; | ||
| 101 | } | ||
| 102 | params.height = config.height; | ||
| 103 | params.depth = 1; | ||
| 104 | params.unaligned_height = config.height; | ||
| 105 | params.target = SurfaceTarget::Texture2D; | ||
| 106 | params.num_levels = 1; | ||
| 107 | |||
| 108 | params.CalculateCachedValues(); | ||
| 109 | return params; | ||
| 110 | } | ||
| 111 | |||
| 112 | SurfaceParams SurfaceParams::CreateForFermiCopySurface( | ||
| 113 | const Tegra::Engines::Fermi2D::Regs::Surface& config) { | ||
| 114 | SurfaceParams params{}; | ||
| 115 | params.is_tiled = !config.linear; | ||
| 116 | params.block_width = params.is_tiled ? std::min(config.BlockWidth(), 32U) : 0, | ||
| 117 | params.block_height = params.is_tiled ? std::min(config.BlockHeight(), 32U) : 0, | ||
| 118 | params.block_depth = params.is_tiled ? std::min(config.BlockDepth(), 32U) : 0, | ||
| 119 | params.tile_width_spacing = 1; | ||
| 120 | params.pixel_format = PixelFormatFromRenderTargetFormat(config.format); | ||
| 121 | params.component_type = ComponentTypeFromRenderTarget(config.format); | ||
| 122 | params.type = GetFormatType(params.pixel_format); | ||
| 123 | params.width = config.width; | ||
| 124 | params.height = config.height; | ||
| 125 | params.unaligned_height = config.height; | ||
| 126 | // TODO(Rodrigo): Try to guess the surface target from depth and layer parameters | ||
| 127 | params.target = SurfaceTarget::Texture2D; | ||
| 128 | params.depth = 1; | ||
| 129 | params.num_levels = 1; | ||
| 130 | |||
| 131 | params.CalculateCachedValues(); | ||
| 132 | return params; | ||
| 133 | } | ||
| 134 | |||
| 135 | u32 SurfaceParams::GetMipWidth(u32 level) const { | ||
| 136 | return std::max(1U, width >> level); | ||
| 137 | } | ||
| 138 | |||
| 139 | u32 SurfaceParams::GetMipHeight(u32 level) const { | ||
| 140 | return std::max(1U, height >> level); | ||
| 141 | } | ||
| 142 | |||
| 143 | u32 SurfaceParams::GetMipDepth(u32 level) const { | ||
| 144 | return IsLayered() ? depth : std::max(1U, depth >> level); | ||
| 145 | } | ||
| 146 | |||
| 147 | bool SurfaceParams::IsLayered() const { | ||
| 148 | switch (target) { | ||
| 149 | case SurfaceTarget::Texture1DArray: | ||
| 150 | case SurfaceTarget::Texture2DArray: | ||
| 151 | case SurfaceTarget::TextureCubeArray: | ||
| 152 | case SurfaceTarget::TextureCubemap: | ||
| 153 | return true; | ||
| 154 | default: | ||
| 155 | return false; | ||
| 156 | } | ||
| 157 | } | ||
| 158 | |||
| 159 | u32 SurfaceParams::GetMipBlockHeight(u32 level) const { | ||
| 160 | // Auto block resizing algorithm from: | ||
| 161 | // https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nv50/nv50_miptree.c | ||
| 162 | if (level == 0) { | ||
| 163 | return block_height; | ||
| 164 | } | ||
| 165 | const u32 height{GetMipHeight(level)}; | ||
| 166 | const u32 default_block_height{GetDefaultBlockHeight(pixel_format)}; | ||
| 167 | const u32 blocks_in_y{(height + default_block_height - 1) / default_block_height}; | ||
| 168 | u32 block_height = 16; | ||
| 169 | while (block_height > 1 && blocks_in_y <= block_height * 4) { | ||
| 170 | block_height >>= 1; | ||
| 171 | } | ||
| 172 | return block_height; | ||
| 173 | } | ||
| 174 | |||
| 175 | u32 SurfaceParams::GetMipBlockDepth(u32 level) const { | ||
| 176 | if (level == 0) | ||
| 177 | return block_depth; | ||
| 178 | if (target != SurfaceTarget::Texture3D) | ||
| 179 | return 1; | ||
| 180 | |||
| 181 | const u32 depth{GetMipDepth(level)}; | ||
| 182 | u32 block_depth = 32; | ||
| 183 | while (block_depth > 1 && depth * 2 <= block_depth) { | ||
| 184 | block_depth >>= 1; | ||
| 185 | } | ||
| 186 | if (block_depth == 32 && GetMipBlockHeight(level) >= 4) { | ||
| 187 | return 16; | ||
| 188 | } | ||
| 189 | return block_depth; | ||
| 190 | } | ||
| 191 | |||
| 192 | std::size_t SurfaceParams::GetGuestMipmapLevelOffset(u32 level) const { | ||
| 193 | std::size_t offset = 0; | ||
| 194 | for (u32 i = 0; i < level; i++) { | ||
| 195 | offset += GetInnerMipmapMemorySize(i, false, IsLayered(), false); | ||
| 196 | } | ||
| 197 | return offset; | ||
| 198 | } | ||
| 199 | |||
| 200 | std::size_t SurfaceParams::GetHostMipmapLevelOffset(u32 level) const { | ||
| 201 | std::size_t offset = 0; | ||
| 202 | for (u32 i = 0; i < level; i++) { | ||
| 203 | offset += GetInnerMipmapMemorySize(i, true, false, false); | ||
| 204 | } | ||
| 205 | return offset; | ||
| 206 | } | ||
| 207 | |||
| 208 | std::size_t SurfaceParams::GetGuestLayerSize() const { | ||
| 209 | return GetInnerMemorySize(false, true, false); | ||
| 210 | } | ||
| 211 | |||
| 212 | std::size_t SurfaceParams::GetHostLayerSize(u32 level) const { | ||
| 213 | return GetInnerMipmapMemorySize(level, true, IsLayered(), false); | ||
| 214 | } | ||
| 215 | |||
| 216 | bool SurfaceParams::IsFamiliar(const SurfaceParams& view_params) const { | ||
| 217 | if (std::tie(is_tiled, tile_width_spacing, pixel_format, component_type, type) != | ||
| 218 | std::tie(view_params.is_tiled, view_params.tile_width_spacing, view_params.pixel_format, | ||
| 219 | view_params.component_type, view_params.type)) { | ||
| 220 | return false; | ||
| 221 | } | ||
| 222 | |||
| 223 | const SurfaceTarget view_target{view_params.target}; | ||
| 224 | if (view_target == target) { | ||
| 225 | return true; | ||
| 226 | } | ||
| 227 | |||
| 228 | switch (target) { | ||
| 229 | case SurfaceTarget::Texture1D: | ||
| 230 | case SurfaceTarget::Texture2D: | ||
| 231 | case SurfaceTarget::Texture3D: | ||
| 232 | return false; | ||
| 233 | case SurfaceTarget::Texture1DArray: | ||
| 234 | return view_target == SurfaceTarget::Texture1D; | ||
| 235 | case SurfaceTarget::Texture2DArray: | ||
| 236 | return view_target == SurfaceTarget::Texture2D; | ||
| 237 | case SurfaceTarget::TextureCubemap: | ||
| 238 | return view_target == SurfaceTarget::Texture2D || | ||
| 239 | view_target == SurfaceTarget::Texture2DArray; | ||
| 240 | case SurfaceTarget::TextureCubeArray: | ||
| 241 | return view_target == SurfaceTarget::Texture2D || | ||
| 242 | view_target == SurfaceTarget::Texture2DArray || | ||
| 243 | view_target == SurfaceTarget::TextureCubemap; | ||
| 244 | default: | ||
| 245 | UNIMPLEMENTED_MSG("Unimplemented texture family={}", static_cast<u32>(target)); | ||
| 246 | return false; | ||
| 247 | } | ||
| 248 | } | ||
| 249 | |||
| 250 | bool SurfaceParams::IsPixelFormatZeta() const { | ||
| 251 | return pixel_format >= VideoCore::Surface::PixelFormat::MaxColorFormat && | ||
| 252 | pixel_format < VideoCore::Surface::PixelFormat::MaxDepthStencilFormat; | ||
| 253 | } | ||
| 254 | |||
| 255 | void SurfaceParams::CalculateCachedValues() { | ||
| 256 | guest_size_in_bytes = GetInnerMemorySize(false, false, false); | ||
| 257 | |||
| 258 | // ASTC is uncompressed in software, in emulated as RGBA8 | ||
| 259 | if (IsPixelFormatASTC(pixel_format)) { | ||
| 260 | host_size_in_bytes = width * height * depth * 4; | ||
| 261 | } else { | ||
| 262 | host_size_in_bytes = GetInnerMemorySize(true, false, false); | ||
| 263 | } | ||
| 264 | |||
| 265 | switch (target) { | ||
| 266 | case SurfaceTarget::Texture1D: | ||
| 267 | case SurfaceTarget::Texture2D: | ||
| 268 | case SurfaceTarget::Texture3D: | ||
| 269 | num_layers = 1; | ||
| 270 | break; | ||
| 271 | case SurfaceTarget::Texture1DArray: | ||
| 272 | case SurfaceTarget::Texture2DArray: | ||
| 273 | case SurfaceTarget::TextureCubemap: | ||
| 274 | case SurfaceTarget::TextureCubeArray: | ||
| 275 | num_layers = depth; | ||
| 276 | break; | ||
| 277 | default: | ||
| 278 | UNREACHABLE(); | ||
| 279 | } | ||
| 280 | } | ||
| 281 | |||
| 282 | std::size_t SurfaceParams::GetInnerMipmapMemorySize(u32 level, bool as_host_size, bool layer_only, | ||
| 283 | bool uncompressed) const { | ||
| 284 | const bool tiled{as_host_size ? false : is_tiled}; | ||
| 285 | const u32 tile_x{GetDefaultBlockWidth(pixel_format)}; | ||
| 286 | const u32 tile_y{GetDefaultBlockHeight(pixel_format)}; | ||
| 287 | const u32 width{GetMipmapSize(uncompressed, GetMipWidth(level), tile_x)}; | ||
| 288 | const u32 height{GetMipmapSize(uncompressed, GetMipHeight(level), tile_y)}; | ||
| 289 | const u32 depth{layer_only ? 1U : GetMipDepth(level)}; | ||
| 290 | return Tegra::Texture::CalculateSize(tiled, GetBytesPerPixel(pixel_format), width, height, | ||
| 291 | depth, GetMipBlockHeight(level), GetMipBlockDepth(level)); | ||
| 292 | } | ||
| 293 | |||
| 294 | std::size_t SurfaceParams::GetInnerMemorySize(bool as_host_size, bool layer_only, | ||
| 295 | bool uncompressed) const { | ||
| 296 | std::size_t size = 0; | ||
| 297 | for (u32 level = 0; level < num_levels; ++level) { | ||
| 298 | size += GetInnerMipmapMemorySize(level, as_host_size, layer_only, uncompressed); | ||
| 299 | } | ||
| 300 | if (!as_host_size && is_tiled) { | ||
| 301 | size = Common::AlignUp(size, Tegra::Texture::GetGOBSize() * block_height * block_depth); | ||
| 302 | } | ||
| 303 | return size; | ||
| 304 | } | ||
| 305 | |||
| 306 | std::map<u64, std::pair<u32, u32>> SurfaceParams::CreateViewOffsetMap() const { | ||
| 307 | std::map<u64, std::pair<u32, u32>> view_offset_map; | ||
| 308 | switch (target) { | ||
| 309 | case SurfaceTarget::Texture1D: | ||
| 310 | case SurfaceTarget::Texture2D: | ||
| 311 | case SurfaceTarget::Texture3D: { | ||
| 312 | constexpr u32 layer = 0; | ||
| 313 | for (u32 level = 0; level < num_levels; ++level) { | ||
| 314 | const std::size_t offset{GetGuestMipmapLevelOffset(level)}; | ||
| 315 | view_offset_map.insert({offset, {layer, level}}); | ||
| 316 | } | ||
| 317 | break; | ||
| 318 | } | ||
| 319 | case SurfaceTarget::Texture1DArray: | ||
| 320 | case SurfaceTarget::Texture2DArray: | ||
| 321 | case SurfaceTarget::TextureCubemap: | ||
| 322 | case SurfaceTarget::TextureCubeArray: { | ||
| 323 | const std::size_t layer_size{GetGuestLayerSize()}; | ||
| 324 | for (u32 level = 0; level < num_levels; ++level) { | ||
| 325 | const std::size_t level_offset{GetGuestMipmapLevelOffset(level)}; | ||
| 326 | for (u32 layer = 0; layer < num_layers; ++layer) { | ||
| 327 | const auto layer_offset{static_cast<std::size_t>(layer_size * layer)}; | ||
| 328 | const std::size_t offset{level_offset + layer_offset}; | ||
| 329 | view_offset_map.insert({offset, {layer, level}}); | ||
| 330 | } | ||
| 331 | } | ||
| 332 | break; | ||
| 333 | } | ||
| 334 | default: | ||
| 335 | UNIMPLEMENTED_MSG("Unimplemented surface target {}", static_cast<u32>(target)); | ||
| 336 | } | ||
| 337 | return view_offset_map; | ||
| 338 | } | ||
| 339 | |||
| 340 | bool SurfaceParams::IsViewValid(const SurfaceParams& view_params, u32 layer, u32 level) const { | ||
| 341 | return IsDimensionValid(view_params, level) && IsDepthValid(view_params, level) && | ||
| 342 | IsInBounds(view_params, layer, level); | ||
| 343 | } | ||
| 344 | |||
| 345 | bool SurfaceParams::IsDimensionValid(const SurfaceParams& view_params, u32 level) const { | ||
| 346 | return view_params.width == GetMipWidth(level) && view_params.height == GetMipHeight(level); | ||
| 347 | } | ||
| 348 | |||
| 349 | bool SurfaceParams::IsDepthValid(const SurfaceParams& view_params, u32 level) const { | ||
| 350 | if (view_params.target != SurfaceTarget::Texture3D) { | ||
| 351 | return true; | ||
| 352 | } | ||
| 353 | return view_params.depth == GetMipDepth(level); | ||
| 354 | } | ||
| 355 | |||
| 356 | bool SurfaceParams::IsInBounds(const SurfaceParams& view_params, u32 layer, u32 level) const { | ||
| 357 | return layer + view_params.num_layers <= num_layers && | ||
| 358 | level + view_params.num_levels <= num_levels; | ||
| 359 | } | ||
| 360 | |||
| 361 | std::size_t HasheableSurfaceParams::Hash() const { | ||
| 362 | return static_cast<std::size_t>( | ||
| 363 | Common::CityHash64(reinterpret_cast<const char*>(this), sizeof(*this))); | ||
| 364 | } | ||
| 365 | |||
| 366 | bool HasheableSurfaceParams::operator==(const HasheableSurfaceParams& rhs) const { | ||
| 367 | return std::tie(is_tiled, block_width, block_height, block_depth, tile_width_spacing, width, | ||
| 368 | height, depth, pitch, unaligned_height, num_levels, pixel_format, | ||
| 369 | component_type, type, target) == | ||
| 370 | std::tie(rhs.is_tiled, rhs.block_width, rhs.block_height, rhs.block_depth, | ||
| 371 | rhs.tile_width_spacing, rhs.width, rhs.height, rhs.depth, rhs.pitch, | ||
| 372 | rhs.unaligned_height, rhs.num_levels, rhs.pixel_format, rhs.component_type, | ||
| 373 | rhs.type, rhs.target); | ||
| 374 | } | ||
| 375 | |||
| 376 | std::size_t ViewKey::Hash() const { | ||
| 377 | return static_cast<std::size_t>( | ||
| 378 | Common::CityHash64(reinterpret_cast<const char*>(this), sizeof(*this))); | ||
| 379 | } | ||
| 380 | |||
| 381 | bool ViewKey::operator==(const ViewKey& rhs) const { | ||
| 382 | return std::tie(base_layer, num_layers, base_level, num_levels) == | ||
| 383 | std::tie(rhs.base_layer, rhs.num_layers, rhs.base_level, rhs.num_levels); | ||
| 384 | } | ||
| 385 | |||
| 386 | } // namespace VideoCommon | ||
diff --git a/src/video_core/texture_cache.h b/src/video_core/texture_cache.h deleted file mode 100644 index 041551691..000000000 --- a/src/video_core/texture_cache.h +++ /dev/null | |||
| @@ -1,586 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <list> | ||
| 8 | #include <memory> | ||
| 9 | #include <set> | ||
| 10 | #include <tuple> | ||
| 11 | #include <type_traits> | ||
| 12 | #include <unordered_map> | ||
| 13 | |||
| 14 | #include <boost/icl/interval_map.hpp> | ||
| 15 | #include <boost/range/iterator_range.hpp> | ||
| 16 | |||
| 17 | #include "common/assert.h" | ||
| 18 | #include "common/common_types.h" | ||
| 19 | #include "core/memory.h" | ||
| 20 | #include "video_core/engines/fermi_2d.h" | ||
| 21 | #include "video_core/engines/maxwell_3d.h" | ||
| 22 | #include "video_core/gpu.h" | ||
| 23 | #include "video_core/rasterizer_interface.h" | ||
| 24 | #include "video_core/surface.h" | ||
| 25 | |||
| 26 | namespace Core { | ||
| 27 | class System; | ||
| 28 | } | ||
| 29 | |||
| 30 | namespace Tegra::Texture { | ||
| 31 | struct FullTextureInfo; | ||
| 32 | } | ||
| 33 | |||
| 34 | namespace VideoCore { | ||
| 35 | class RasterizerInterface; | ||
| 36 | } | ||
| 37 | |||
| 38 | namespace VideoCommon { | ||
| 39 | |||
| 40 | class HasheableSurfaceParams { | ||
| 41 | public: | ||
| 42 | std::size_t Hash() const; | ||
| 43 | |||
| 44 | bool operator==(const HasheableSurfaceParams& rhs) const; | ||
| 45 | |||
| 46 | protected: | ||
| 47 | // Avoid creation outside of a managed environment. | ||
| 48 | HasheableSurfaceParams() = default; | ||
| 49 | |||
| 50 | bool is_tiled; | ||
| 51 | u32 block_width; | ||
| 52 | u32 block_height; | ||
| 53 | u32 block_depth; | ||
| 54 | u32 tile_width_spacing; | ||
| 55 | u32 width; | ||
| 56 | u32 height; | ||
| 57 | u32 depth; | ||
| 58 | u32 pitch; | ||
| 59 | u32 unaligned_height; | ||
| 60 | u32 num_levels; | ||
| 61 | VideoCore::Surface::PixelFormat pixel_format; | ||
| 62 | VideoCore::Surface::ComponentType component_type; | ||
| 63 | VideoCore::Surface::SurfaceType type; | ||
| 64 | VideoCore::Surface::SurfaceTarget target; | ||
| 65 | }; | ||
| 66 | |||
| 67 | class SurfaceParams final : public HasheableSurfaceParams { | ||
| 68 | public: | ||
| 69 | /// Creates SurfaceCachedParams from a texture configuration. | ||
| 70 | static SurfaceParams CreateForTexture(Core::System& system, | ||
| 71 | const Tegra::Texture::FullTextureInfo& config); | ||
| 72 | |||
| 73 | /// Creates SurfaceCachedParams for a depth buffer configuration. | ||
| 74 | static SurfaceParams CreateForDepthBuffer( | ||
| 75 | Core::System& system, u32 zeta_width, u32 zeta_height, Tegra::DepthFormat format, | ||
| 76 | u32 block_width, u32 block_height, u32 block_depth, | ||
| 77 | Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout type); | ||
| 78 | |||
| 79 | /// Creates SurfaceCachedParams from a framebuffer configuration. | ||
| 80 | static SurfaceParams CreateForFramebuffer(Core::System& system, std::size_t index); | ||
| 81 | |||
| 82 | /// Creates SurfaceCachedParams from a Fermi2D surface configuration. | ||
| 83 | static SurfaceParams CreateForFermiCopySurface( | ||
| 84 | const Tegra::Engines::Fermi2D::Regs::Surface& config); | ||
| 85 | |||
| 86 | bool IsTiled() const { | ||
| 87 | return is_tiled; | ||
| 88 | } | ||
| 89 | |||
| 90 | u32 GetBlockWidth() const { | ||
| 91 | return block_width; | ||
| 92 | } | ||
| 93 | |||
| 94 | u32 GetTileWidthSpacing() const { | ||
| 95 | return tile_width_spacing; | ||
| 96 | } | ||
| 97 | |||
| 98 | u32 GetWidth() const { | ||
| 99 | return width; | ||
| 100 | } | ||
| 101 | |||
| 102 | u32 GetHeight() const { | ||
| 103 | return height; | ||
| 104 | } | ||
| 105 | |||
| 106 | u32 GetDepth() const { | ||
| 107 | return depth; | ||
| 108 | } | ||
| 109 | |||
| 110 | u32 GetPitch() const { | ||
| 111 | return pitch; | ||
| 112 | } | ||
| 113 | |||
| 114 | u32 GetNumLevels() const { | ||
| 115 | return num_levels; | ||
| 116 | } | ||
| 117 | |||
| 118 | VideoCore::Surface::PixelFormat GetPixelFormat() const { | ||
| 119 | return pixel_format; | ||
| 120 | } | ||
| 121 | |||
| 122 | VideoCore::Surface::ComponentType GetComponentType() const { | ||
| 123 | return component_type; | ||
| 124 | } | ||
| 125 | |||
| 126 | VideoCore::Surface::SurfaceTarget GetTarget() const { | ||
| 127 | return target; | ||
| 128 | } | ||
| 129 | |||
| 130 | VideoCore::Surface::SurfaceType GetType() const { | ||
| 131 | return type; | ||
| 132 | } | ||
| 133 | |||
| 134 | std::size_t GetGuestSizeInBytes() const { | ||
| 135 | return guest_size_in_bytes; | ||
| 136 | } | ||
| 137 | |||
| 138 | std::size_t GetHostSizeInBytes() const { | ||
| 139 | return host_size_in_bytes; | ||
| 140 | } | ||
| 141 | |||
| 142 | u32 GetNumLayers() const { | ||
| 143 | return num_layers; | ||
| 144 | } | ||
| 145 | |||
| 146 | /// Returns the width of a given mipmap level. | ||
| 147 | u32 GetMipWidth(u32 level) const; | ||
| 148 | |||
| 149 | /// Returns the height of a given mipmap level. | ||
| 150 | u32 GetMipHeight(u32 level) const; | ||
| 151 | |||
| 152 | /// Returns the depth of a given mipmap level. | ||
| 153 | u32 GetMipDepth(u32 level) const; | ||
| 154 | |||
| 155 | /// Returns true if these parameters are from a layered surface. | ||
| 156 | bool IsLayered() const; | ||
| 157 | |||
| 158 | /// Returns the block height of a given mipmap level. | ||
| 159 | u32 GetMipBlockHeight(u32 level) const; | ||
| 160 | |||
| 161 | /// Returns the block depth of a given mipmap level. | ||
| 162 | u32 GetMipBlockDepth(u32 level) const; | ||
| 163 | |||
| 164 | /// Returns the offset in bytes in guest memory of a given mipmap level. | ||
| 165 | std::size_t GetGuestMipmapLevelOffset(u32 level) const; | ||
| 166 | |||
| 167 | /// Returns the offset in bytes in host memory (linear) of a given mipmap level. | ||
| 168 | std::size_t GetHostMipmapLevelOffset(u32 level) const; | ||
| 169 | |||
| 170 | /// Returns the size of a layer in bytes in guest memory. | ||
| 171 | std::size_t GetGuestLayerSize() const; | ||
| 172 | |||
| 173 | /// Returns the size of a layer in bytes in host memory for a given mipmap level. | ||
| 174 | std::size_t GetHostLayerSize(u32 level) const; | ||
| 175 | |||
| 176 | /// Returns true if another surface can be familiar with this. This is a loosely defined term | ||
| 177 | /// that reflects the possibility of these two surface parameters potentially being part of a | ||
| 178 | /// bigger superset. | ||
| 179 | bool IsFamiliar(const SurfaceParams& view_params) const; | ||
| 180 | |||
| 181 | /// Returns true if the pixel format is a depth and/or stencil format. | ||
| 182 | bool IsPixelFormatZeta() const; | ||
| 183 | |||
| 184 | /// Creates a map that redirects an address difference to a layer and mipmap level. | ||
| 185 | std::map<u64, std::pair<u32, u32>> CreateViewOffsetMap() const; | ||
| 186 | |||
| 187 | /// Returns true if the passed surface view parameters is equal or a valid subset of this. | ||
| 188 | bool IsViewValid(const SurfaceParams& view_params, u32 layer, u32 level) const; | ||
| 189 | |||
| 190 | private: | ||
| 191 | /// Calculates values that can be deduced from HasheableSurfaceParams. | ||
| 192 | void CalculateCachedValues(); | ||
| 193 | |||
| 194 | /// Returns the size of a given mipmap level. | ||
| 195 | std::size_t GetInnerMipmapMemorySize(u32 level, bool as_host_size, bool layer_only, | ||
| 196 | bool uncompressed) const; | ||
| 197 | |||
| 198 | /// Returns the size of all mipmap levels and aligns as needed. | ||
| 199 | std::size_t GetInnerMemorySize(bool as_host_size, bool layer_only, bool uncompressed) const; | ||
| 200 | |||
| 201 | /// Returns true if the passed view width and height match the size of this params in a given | ||
| 202 | /// mipmap level. | ||
| 203 | bool IsDimensionValid(const SurfaceParams& view_params, u32 level) const; | ||
| 204 | |||
| 205 | /// Returns true if the passed view depth match the size of this params in a given mipmap level. | ||
| 206 | bool IsDepthValid(const SurfaceParams& view_params, u32 level) const; | ||
| 207 | |||
| 208 | /// Returns true if the passed view layers and mipmap levels are in bounds. | ||
| 209 | bool IsInBounds(const SurfaceParams& view_params, u32 layer, u32 level) const; | ||
| 210 | |||
| 211 | std::size_t guest_size_in_bytes; | ||
| 212 | std::size_t host_size_in_bytes; | ||
| 213 | u32 num_layers; | ||
| 214 | }; | ||
| 215 | |||
| 216 | struct ViewKey { | ||
| 217 | std::size_t Hash() const; | ||
| 218 | |||
| 219 | bool operator==(const ViewKey& rhs) const; | ||
| 220 | |||
| 221 | u32 base_layer{}; | ||
| 222 | u32 num_layers{}; | ||
| 223 | u32 base_level{}; | ||
| 224 | u32 num_levels{}; | ||
| 225 | }; | ||
| 226 | |||
| 227 | } // namespace VideoCommon | ||
| 228 | |||
| 229 | namespace std { | ||
| 230 | |||
| 231 | template <> | ||
| 232 | struct hash<VideoCommon::SurfaceParams> { | ||
| 233 | std::size_t operator()(const VideoCommon::SurfaceParams& k) const noexcept { | ||
| 234 | return k.Hash(); | ||
| 235 | } | ||
| 236 | }; | ||
| 237 | |||
| 238 | template <> | ||
| 239 | struct hash<VideoCommon::ViewKey> { | ||
| 240 | std::size_t operator()(const VideoCommon::ViewKey& k) const noexcept { | ||
| 241 | return k.Hash(); | ||
| 242 | } | ||
| 243 | }; | ||
| 244 | |||
| 245 | } // namespace std | ||
| 246 | |||
| 247 | namespace VideoCommon { | ||
| 248 | |||
| 249 | template <typename TView, typename TExecutionContext> | ||
| 250 | class SurfaceBase { | ||
| 251 | static_assert(std::is_trivially_copyable_v<TExecutionContext>); | ||
| 252 | |||
| 253 | public: | ||
| 254 | virtual void LoadBuffer() = 0; | ||
| 255 | |||
| 256 | virtual TExecutionContext FlushBuffer(TExecutionContext exctx) = 0; | ||
| 257 | |||
| 258 | virtual TExecutionContext UploadTexture(TExecutionContext exctx) = 0; | ||
| 259 | |||
| 260 | TView* TryGetView(VAddr view_addr, const SurfaceParams& view_params) { | ||
| 261 | if (view_addr < cpu_addr || !params.IsFamiliar(view_params)) { | ||
| 262 | // It can't be a view if it's in a prior address. | ||
| 263 | return {}; | ||
| 264 | } | ||
| 265 | |||
| 266 | const auto relative_offset{static_cast<u64>(view_addr - cpu_addr)}; | ||
| 267 | const auto it{view_offset_map.find(relative_offset)}; | ||
| 268 | if (it == view_offset_map.end()) { | ||
| 269 | // Couldn't find an aligned view. | ||
| 270 | return {}; | ||
| 271 | } | ||
| 272 | const auto [layer, level] = it->second; | ||
| 273 | |||
| 274 | if (!params.IsViewValid(view_params, layer, level)) { | ||
| 275 | return {}; | ||
| 276 | } | ||
| 277 | |||
| 278 | return GetView(layer, view_params.GetNumLayers(), level, view_params.GetNumLevels()); | ||
| 279 | } | ||
| 280 | |||
| 281 | VAddr GetCpuAddr() const { | ||
| 282 | ASSERT(is_registered); | ||
| 283 | return cpu_addr; | ||
| 284 | } | ||
| 285 | |||
| 286 | u8* GetHostPtr() const { | ||
| 287 | ASSERT(is_registered); | ||
| 288 | return host_ptr; | ||
| 289 | } | ||
| 290 | |||
| 291 | CacheAddr GetCacheAddr() const { | ||
| 292 | ASSERT(is_registered); | ||
| 293 | return cache_addr; | ||
| 294 | } | ||
| 295 | |||
| 296 | std::size_t GetSizeInBytes() const { | ||
| 297 | return params.GetGuestSizeInBytes(); | ||
| 298 | } | ||
| 299 | |||
| 300 | void MarkAsModified(bool is_modified_) { | ||
| 301 | is_modified = is_modified_; | ||
| 302 | } | ||
| 303 | |||
| 304 | const SurfaceParams& GetSurfaceParams() const { | ||
| 305 | return params; | ||
| 306 | } | ||
| 307 | |||
| 308 | TView* GetView(VAddr view_addr, const SurfaceParams& view_params) { | ||
| 309 | TView* view{TryGetView(view_addr, view_params)}; | ||
| 310 | ASSERT(view != nullptr); | ||
| 311 | return view; | ||
| 312 | } | ||
| 313 | |||
| 314 | void Register(VAddr cpu_addr_, u8* host_ptr_) { | ||
| 315 | ASSERT(!is_registered); | ||
| 316 | is_registered = true; | ||
| 317 | cpu_addr = cpu_addr_; | ||
| 318 | host_ptr = host_ptr_; | ||
| 319 | cache_addr = ToCacheAddr(host_ptr_); | ||
| 320 | } | ||
| 321 | |||
| 322 | void Register(VAddr cpu_addr_) { | ||
| 323 | Register(cpu_addr_, Memory::GetPointer(cpu_addr_)); | ||
| 324 | } | ||
| 325 | |||
| 326 | void Unregister() { | ||
| 327 | ASSERT(is_registered); | ||
| 328 | is_registered = false; | ||
| 329 | } | ||
| 330 | |||
| 331 | bool IsRegistered() const { | ||
| 332 | return is_registered; | ||
| 333 | } | ||
| 334 | |||
| 335 | protected: | ||
| 336 | explicit SurfaceBase(const SurfaceParams& params) | ||
| 337 | : params{params}, view_offset_map{params.CreateViewOffsetMap()} {} | ||
| 338 | |||
| 339 | ~SurfaceBase() = default; | ||
| 340 | |||
| 341 | virtual std::unique_ptr<TView> CreateView(const ViewKey& view_key) = 0; | ||
| 342 | |||
| 343 | bool IsModified() const { | ||
| 344 | return is_modified; | ||
| 345 | } | ||
| 346 | |||
| 347 | const SurfaceParams params; | ||
| 348 | |||
| 349 | private: | ||
| 350 | TView* GetView(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels) { | ||
| 351 | const ViewKey key{base_layer, num_layers, base_level, num_levels}; | ||
| 352 | const auto [entry, is_cache_miss] = views.try_emplace(key); | ||
| 353 | auto& view{entry->second}; | ||
| 354 | if (is_cache_miss) { | ||
| 355 | view = CreateView(key); | ||
| 356 | } | ||
| 357 | return view.get(); | ||
| 358 | } | ||
| 359 | |||
| 360 | const std::map<u64, std::pair<u32, u32>> view_offset_map; | ||
| 361 | |||
| 362 | VAddr cpu_addr{}; | ||
| 363 | u8* host_ptr{}; | ||
| 364 | CacheAddr cache_addr{}; | ||
| 365 | bool is_modified{}; | ||
| 366 | bool is_registered{}; | ||
| 367 | std::unordered_map<ViewKey, std::unique_ptr<TView>> views; | ||
| 368 | }; | ||
| 369 | |||
| 370 | template <typename TSurface, typename TView, typename TExecutionContext> | ||
| 371 | class TextureCache { | ||
| 372 | static_assert(std::is_trivially_copyable_v<TExecutionContext>); | ||
| 373 | using ResultType = std::tuple<TView*, TExecutionContext>; | ||
| 374 | using IntervalMap = boost::icl::interval_map<CacheAddr, std::set<TSurface*>>; | ||
| 375 | using IntervalType = typename IntervalMap::interval_type; | ||
| 376 | |||
| 377 | public: | ||
| 378 | void InvalidateRegion(CacheAddr addr, std::size_t size) { | ||
| 379 | for (TSurface* surface : GetSurfacesInRegion(addr, size)) { | ||
| 380 | if (!surface->IsRegistered()) { | ||
| 381 | // Skip duplicates | ||
| 382 | continue; | ||
| 383 | } | ||
| 384 | Unregister(surface); | ||
| 385 | } | ||
| 386 | } | ||
| 387 | |||
| 388 | ResultType GetTextureSurface(TExecutionContext exctx, | ||
| 389 | const Tegra::Texture::FullTextureInfo& config) { | ||
| 390 | auto& memory_manager{system.GPU().MemoryManager()}; | ||
| 391 | const auto cpu_addr{memory_manager.GpuToCpuAddress(config.tic.Address())}; | ||
| 392 | if (!cpu_addr) { | ||
| 393 | return {{}, exctx}; | ||
| 394 | } | ||
| 395 | const auto params{SurfaceParams::CreateForTexture(system, config)}; | ||
| 396 | return GetSurfaceView(exctx, *cpu_addr, params, true); | ||
| 397 | } | ||
| 398 | |||
| 399 | ResultType GetDepthBufferSurface(TExecutionContext exctx, bool preserve_contents) { | ||
| 400 | const auto& regs{system.GPU().Maxwell3D().regs}; | ||
| 401 | if (!regs.zeta.Address() || !regs.zeta_enable) { | ||
| 402 | return {{}, exctx}; | ||
| 403 | } | ||
| 404 | |||
| 405 | auto& memory_manager{system.GPU().MemoryManager()}; | ||
| 406 | const auto cpu_addr{memory_manager.GpuToCpuAddress(regs.zeta.Address())}; | ||
| 407 | if (!cpu_addr) { | ||
| 408 | return {{}, exctx}; | ||
| 409 | } | ||
| 410 | |||
| 411 | const auto depth_params{SurfaceParams::CreateForDepthBuffer( | ||
| 412 | system, regs.zeta_width, regs.zeta_height, regs.zeta.format, | ||
| 413 | regs.zeta.memory_layout.block_width, regs.zeta.memory_layout.block_height, | ||
| 414 | regs.zeta.memory_layout.block_depth, regs.zeta.memory_layout.type)}; | ||
| 415 | return GetSurfaceView(exctx, *cpu_addr, depth_params, preserve_contents); | ||
| 416 | } | ||
| 417 | |||
| 418 | ResultType GetColorBufferSurface(TExecutionContext exctx, std::size_t index, | ||
| 419 | bool preserve_contents) { | ||
| 420 | ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets); | ||
| 421 | |||
| 422 | const auto& regs{system.GPU().Maxwell3D().regs}; | ||
| 423 | if (index >= regs.rt_control.count || regs.rt[index].Address() == 0 || | ||
| 424 | regs.rt[index].format == Tegra::RenderTargetFormat::NONE) { | ||
| 425 | return {{}, exctx}; | ||
| 426 | } | ||
| 427 | |||
| 428 | auto& memory_manager{system.GPU().MemoryManager()}; | ||
| 429 | const auto& config{system.GPU().Maxwell3D().regs.rt[index]}; | ||
| 430 | const auto cpu_addr{memory_manager.GpuToCpuAddress( | ||
| 431 | config.Address() + config.base_layer * config.layer_stride * sizeof(u32))}; | ||
| 432 | if (!cpu_addr) { | ||
| 433 | return {{}, exctx}; | ||
| 434 | } | ||
| 435 | |||
| 436 | return GetSurfaceView(exctx, *cpu_addr, SurfaceParams::CreateForFramebuffer(system, index), | ||
| 437 | preserve_contents); | ||
| 438 | } | ||
| 439 | |||
| 440 | ResultType GetFermiSurface(TExecutionContext exctx, | ||
| 441 | const Tegra::Engines::Fermi2D::Regs::Surface& config) { | ||
| 442 | const auto cpu_addr{system.GPU().MemoryManager().GpuToCpuAddress(config.Address())}; | ||
| 443 | ASSERT(cpu_addr); | ||
| 444 | return GetSurfaceView(exctx, *cpu_addr, SurfaceParams::CreateForFermiCopySurface(config), | ||
| 445 | true); | ||
| 446 | } | ||
| 447 | |||
| 448 | TSurface* TryFindFramebufferSurface(const u8* host_ptr) const { | ||
| 449 | const auto it{registered_surfaces.find(ToCacheAddr(host_ptr))}; | ||
| 450 | return it != registered_surfaces.end() ? *it->second.begin() : nullptr; | ||
| 451 | } | ||
| 452 | |||
| 453 | protected: | ||
| 454 | TextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer) | ||
| 455 | : system{system}, rasterizer{rasterizer} {} | ||
| 456 | |||
| 457 | ~TextureCache() = default; | ||
| 458 | |||
| 459 | virtual ResultType TryFastGetSurfaceView(TExecutionContext exctx, VAddr cpu_addr, u8* host_ptr, | ||
| 460 | const SurfaceParams& params, bool preserve_contents, | ||
| 461 | const std::vector<TSurface*>& overlaps) = 0; | ||
| 462 | |||
| 463 | virtual std::unique_ptr<TSurface> CreateSurface(const SurfaceParams& params) = 0; | ||
| 464 | |||
| 465 | void Register(TSurface* surface, VAddr cpu_addr, u8* host_ptr) { | ||
| 466 | surface->Register(cpu_addr, host_ptr); | ||
| 467 | registered_surfaces.add({GetSurfaceInterval(surface), {surface}}); | ||
| 468 | rasterizer.UpdatePagesCachedCount(surface->GetCpuAddr(), surface->GetSizeInBytes(), 1); | ||
| 469 | } | ||
| 470 | |||
| 471 | void Unregister(TSurface* surface) { | ||
| 472 | registered_surfaces.subtract({GetSurfaceInterval(surface), {surface}}); | ||
| 473 | rasterizer.UpdatePagesCachedCount(surface->GetCpuAddr(), surface->GetSizeInBytes(), -1); | ||
| 474 | surface->Unregister(); | ||
| 475 | } | ||
| 476 | |||
| 477 | TSurface* GetUncachedSurface(const SurfaceParams& params) { | ||
| 478 | if (TSurface* surface = TryGetReservedSurface(params); surface) | ||
| 479 | return surface; | ||
| 480 | // No reserved surface available, create a new one and reserve it | ||
| 481 | auto new_surface{CreateSurface(params)}; | ||
| 482 | TSurface* surface{new_surface.get()}; | ||
| 483 | ReserveSurface(params, std::move(new_surface)); | ||
| 484 | return surface; | ||
| 485 | } | ||
| 486 | |||
| 487 | Core::System& system; | ||
| 488 | |||
| 489 | private: | ||
| 490 | ResultType GetSurfaceView(TExecutionContext exctx, VAddr cpu_addr, const SurfaceParams& params, | ||
| 491 | bool preserve_contents) { | ||
| 492 | const auto host_ptr{Memory::GetPointer(cpu_addr)}; | ||
| 493 | const auto cache_addr{ToCacheAddr(host_ptr)}; | ||
| 494 | const auto overlaps{GetSurfacesInRegion(cache_addr, params.GetGuestSizeInBytes())}; | ||
| 495 | if (overlaps.empty()) { | ||
| 496 | return LoadSurfaceView(exctx, cpu_addr, host_ptr, params, preserve_contents); | ||
| 497 | } | ||
| 498 | |||
| 499 | if (overlaps.size() == 1) { | ||
| 500 | if (TView* view = overlaps[0]->TryGetView(cpu_addr, params); view) | ||
| 501 | return {view, exctx}; | ||
| 502 | } | ||
| 503 | |||
| 504 | TView* fast_view; | ||
| 505 | std::tie(fast_view, exctx) = | ||
| 506 | TryFastGetSurfaceView(exctx, cpu_addr, host_ptr, params, preserve_contents, overlaps); | ||
| 507 | |||
| 508 | for (TSurface* surface : overlaps) { | ||
| 509 | if (!fast_view) { | ||
| 510 | // Flush even when we don't care about the contents, to preserve memory not written | ||
| 511 | // by the new surface. | ||
| 512 | exctx = surface->FlushBuffer(exctx); | ||
| 513 | } | ||
| 514 | Unregister(surface); | ||
| 515 | } | ||
| 516 | |||
| 517 | if (fast_view) { | ||
| 518 | return {fast_view, exctx}; | ||
| 519 | } | ||
| 520 | |||
| 521 | return LoadSurfaceView(exctx, cpu_addr, host_ptr, params, preserve_contents); | ||
| 522 | } | ||
| 523 | |||
| 524 | ResultType LoadSurfaceView(TExecutionContext exctx, VAddr cpu_addr, u8* host_ptr, | ||
| 525 | const SurfaceParams& params, bool preserve_contents) { | ||
| 526 | TSurface* new_surface{GetUncachedSurface(params)}; | ||
| 527 | Register(new_surface, cpu_addr, host_ptr); | ||
| 528 | if (preserve_contents) { | ||
| 529 | exctx = LoadSurface(exctx, new_surface); | ||
| 530 | } | ||
| 531 | return {new_surface->GetView(cpu_addr, params), exctx}; | ||
| 532 | } | ||
| 533 | |||
| 534 | TExecutionContext LoadSurface(TExecutionContext exctx, TSurface* surface) { | ||
| 535 | surface->LoadBuffer(); | ||
| 536 | exctx = surface->UploadTexture(exctx); | ||
| 537 | surface->MarkAsModified(false); | ||
| 538 | return exctx; | ||
| 539 | } | ||
| 540 | |||
| 541 | std::vector<TSurface*> GetSurfacesInRegion(CacheAddr cache_addr, std::size_t size) const { | ||
| 542 | if (size == 0) { | ||
| 543 | return {}; | ||
| 544 | } | ||
| 545 | const IntervalType interval{cache_addr, cache_addr + size}; | ||
| 546 | |||
| 547 | std::vector<TSurface*> surfaces; | ||
| 548 | for (auto& pair : boost::make_iterator_range(registered_surfaces.equal_range(interval))) { | ||
| 549 | surfaces.push_back(*pair.second.begin()); | ||
| 550 | } | ||
| 551 | return surfaces; | ||
| 552 | } | ||
| 553 | |||
| 554 | void ReserveSurface(const SurfaceParams& params, std::unique_ptr<TSurface> surface) { | ||
| 555 | surface_reserve[params].push_back(std::move(surface)); | ||
| 556 | } | ||
| 557 | |||
| 558 | TSurface* TryGetReservedSurface(const SurfaceParams& params) { | ||
| 559 | auto search{surface_reserve.find(params)}; | ||
| 560 | if (search == surface_reserve.end()) { | ||
| 561 | return {}; | ||
| 562 | } | ||
| 563 | for (auto& surface : search->second) { | ||
| 564 | if (!surface->IsRegistered()) { | ||
| 565 | return surface.get(); | ||
| 566 | } | ||
| 567 | } | ||
| 568 | return {}; | ||
| 569 | } | ||
| 570 | |||
| 571 | IntervalType GetSurfaceInterval(TSurface* surface) const { | ||
| 572 | return IntervalType::right_open(surface->GetCacheAddr(), | ||
| 573 | surface->GetCacheAddr() + surface->GetSizeInBytes()); | ||
| 574 | } | ||
| 575 | |||
| 576 | VideoCore::RasterizerInterface& rasterizer; | ||
| 577 | |||
| 578 | IntervalMap registered_surfaces; | ||
| 579 | |||
| 580 | /// The surface reserve is a "backup" cache, this is where we put unique surfaces that have | ||
| 581 | /// previously been used. This is to prevent surfaces from being constantly created and | ||
| 582 | /// destroyed when used with different surface parameters. | ||
| 583 | std::unordered_map<SurfaceParams, std::list<std::unique_ptr<TSurface>>> surface_reserve; | ||
| 584 | }; | ||
| 585 | |||
| 586 | } // namespace VideoCommon | ||
diff --git a/src/video_core/texture_cache/copy_params.h b/src/video_core/texture_cache/copy_params.h new file mode 100644 index 000000000..9c21a0649 --- /dev/null +++ b/src/video_core/texture_cache/copy_params.h | |||
| @@ -0,0 +1,36 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "common/common_types.h" | ||
| 8 | |||
| 9 | namespace VideoCommon { | ||
| 10 | |||
| 11 | struct CopyParams { | ||
| 12 | constexpr CopyParams(u32 source_x, u32 source_y, u32 source_z, u32 dest_x, u32 dest_y, | ||
| 13 | u32 dest_z, u32 source_level, u32 dest_level, u32 width, u32 height, | ||
| 14 | u32 depth) | ||
| 15 | : source_x{source_x}, source_y{source_y}, source_z{source_z}, dest_x{dest_x}, | ||
| 16 | dest_y{dest_y}, dest_z{dest_z}, source_level{source_level}, | ||
| 17 | dest_level{dest_level}, width{width}, height{height}, depth{depth} {} | ||
| 18 | |||
| 19 | constexpr CopyParams(u32 width, u32 height, u32 depth, u32 level) | ||
| 20 | : source_x{}, source_y{}, source_z{}, dest_x{}, dest_y{}, dest_z{}, source_level{level}, | ||
| 21 | dest_level{level}, width{width}, height{height}, depth{depth} {} | ||
| 22 | |||
| 23 | u32 source_x; | ||
| 24 | u32 source_y; | ||
| 25 | u32 source_z; | ||
| 26 | u32 dest_x; | ||
| 27 | u32 dest_y; | ||
| 28 | u32 dest_z; | ||
| 29 | u32 source_level; | ||
| 30 | u32 dest_level; | ||
| 31 | u32 width; | ||
| 32 | u32 height; | ||
| 33 | u32 depth; | ||
| 34 | }; | ||
| 35 | |||
| 36 | } // namespace VideoCommon | ||
diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp new file mode 100644 index 000000000..683c49207 --- /dev/null +++ b/src/video_core/texture_cache/surface_base.cpp | |||
| @@ -0,0 +1,302 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "common/microprofile.h" | ||
| 8 | #include "video_core/memory_manager.h" | ||
| 9 | #include "video_core/texture_cache/surface_base.h" | ||
| 10 | #include "video_core/texture_cache/surface_params.h" | ||
| 11 | #include "video_core/textures/convert.h" | ||
| 12 | |||
| 13 | namespace VideoCommon { | ||
| 14 | |||
| 15 | MICROPROFILE_DEFINE(GPU_Load_Texture, "GPU", "Texture Load", MP_RGB(128, 192, 128)); | ||
| 16 | MICROPROFILE_DEFINE(GPU_Flush_Texture, "GPU", "Texture Flush", MP_RGB(128, 192, 128)); | ||
| 17 | |||
| 18 | using Tegra::Texture::ConvertFromGuestToHost; | ||
| 19 | using VideoCore::MortonSwizzleMode; | ||
| 20 | using VideoCore::Surface::SurfaceCompression; | ||
| 21 | |||
| 22 | StagingCache::StagingCache() = default; | ||
| 23 | |||
| 24 | StagingCache::~StagingCache() = default; | ||
| 25 | |||
| 26 | SurfaceBaseImpl::SurfaceBaseImpl(GPUVAddr gpu_addr, const SurfaceParams& params) | ||
| 27 | : params{params}, host_memory_size{params.GetHostSizeInBytes()}, gpu_addr{gpu_addr}, | ||
| 28 | mipmap_sizes(params.num_levels), mipmap_offsets(params.num_levels) { | ||
| 29 | std::size_t offset = 0; | ||
| 30 | for (u32 level = 0; level < params.num_levels; ++level) { | ||
| 31 | const std::size_t mipmap_size{params.GetGuestMipmapSize(level)}; | ||
| 32 | mipmap_sizes[level] = mipmap_size; | ||
| 33 | mipmap_offsets[level] = offset; | ||
| 34 | offset += mipmap_size; | ||
| 35 | } | ||
| 36 | layer_size = offset; | ||
| 37 | if (params.is_layered) { | ||
| 38 | if (params.is_tiled) { | ||
| 39 | layer_size = | ||
| 40 | SurfaceParams::AlignLayered(layer_size, params.block_height, params.block_depth); | ||
| 41 | } | ||
| 42 | guest_memory_size = layer_size * params.depth; | ||
| 43 | } else { | ||
| 44 | guest_memory_size = layer_size; | ||
| 45 | } | ||
| 46 | } | ||
| 47 | |||
| 48 | MatchTopologyResult SurfaceBaseImpl::MatchesTopology(const SurfaceParams& rhs) const { | ||
| 49 | const u32 src_bpp{params.GetBytesPerPixel()}; | ||
| 50 | const u32 dst_bpp{rhs.GetBytesPerPixel()}; | ||
| 51 | const bool ib1 = params.IsBuffer(); | ||
| 52 | const bool ib2 = rhs.IsBuffer(); | ||
| 53 | if (std::tie(src_bpp, params.is_tiled, ib1) == std::tie(dst_bpp, rhs.is_tiled, ib2)) { | ||
| 54 | const bool cb1 = params.IsCompressed(); | ||
| 55 | const bool cb2 = rhs.IsCompressed(); | ||
| 56 | if (cb1 == cb2) { | ||
| 57 | return MatchTopologyResult::FullMatch; | ||
| 58 | } | ||
| 59 | return MatchTopologyResult::CompressUnmatch; | ||
| 60 | } | ||
| 61 | return MatchTopologyResult::None; | ||
| 62 | } | ||
| 63 | |||
| 64 | MatchStructureResult SurfaceBaseImpl::MatchesStructure(const SurfaceParams& rhs) const { | ||
| 65 | // Buffer surface Check | ||
| 66 | if (params.IsBuffer()) { | ||
| 67 | const std::size_t wd1 = params.width * params.GetBytesPerPixel(); | ||
| 68 | const std::size_t wd2 = rhs.width * rhs.GetBytesPerPixel(); | ||
| 69 | if (wd1 == wd2) { | ||
| 70 | return MatchStructureResult::FullMatch; | ||
| 71 | } | ||
| 72 | return MatchStructureResult::None; | ||
| 73 | } | ||
| 74 | |||
| 75 | // Linear Surface check | ||
| 76 | if (!params.is_tiled) { | ||
| 77 | if (std::tie(params.height, params.pitch) == std::tie(rhs.height, rhs.pitch)) { | ||
| 78 | if (params.width == rhs.width) { | ||
| 79 | return MatchStructureResult::FullMatch; | ||
| 80 | } else { | ||
| 81 | return MatchStructureResult::SemiMatch; | ||
| 82 | } | ||
| 83 | } | ||
| 84 | return MatchStructureResult::None; | ||
| 85 | } | ||
| 86 | |||
| 87 | // Tiled Surface check | ||
| 88 | if (std::tie(params.depth, params.block_width, params.block_height, params.block_depth, | ||
| 89 | params.tile_width_spacing, params.num_levels) == | ||
| 90 | std::tie(rhs.depth, rhs.block_width, rhs.block_height, rhs.block_depth, | ||
| 91 | rhs.tile_width_spacing, rhs.num_levels)) { | ||
| 92 | if (std::tie(params.width, params.height) == std::tie(rhs.width, rhs.height)) { | ||
| 93 | return MatchStructureResult::FullMatch; | ||
| 94 | } | ||
| 95 | const u32 ws = SurfaceParams::ConvertWidth(rhs.GetBlockAlignedWidth(), params.pixel_format, | ||
| 96 | rhs.pixel_format); | ||
| 97 | const u32 hs = | ||
| 98 | SurfaceParams::ConvertHeight(rhs.height, params.pixel_format, rhs.pixel_format); | ||
| 99 | const u32 w1 = params.GetBlockAlignedWidth(); | ||
| 100 | if (std::tie(w1, params.height) == std::tie(ws, hs)) { | ||
| 101 | return MatchStructureResult::SemiMatch; | ||
| 102 | } | ||
| 103 | } | ||
| 104 | return MatchStructureResult::None; | ||
| 105 | } | ||
| 106 | |||
| 107 | std::optional<std::pair<u32, u32>> SurfaceBaseImpl::GetLayerMipmap( | ||
| 108 | const GPUVAddr candidate_gpu_addr) const { | ||
| 109 | if (gpu_addr == candidate_gpu_addr) { | ||
| 110 | return {{0, 0}}; | ||
| 111 | } | ||
| 112 | if (candidate_gpu_addr < gpu_addr) { | ||
| 113 | return {}; | ||
| 114 | } | ||
| 115 | const auto relative_address{static_cast<GPUVAddr>(candidate_gpu_addr - gpu_addr)}; | ||
| 116 | const auto layer{static_cast<u32>(relative_address / layer_size)}; | ||
| 117 | const GPUVAddr mipmap_address = relative_address - layer_size * layer; | ||
| 118 | const auto mipmap_it = | ||
| 119 | Common::BinaryFind(mipmap_offsets.begin(), mipmap_offsets.end(), mipmap_address); | ||
| 120 | if (mipmap_it == mipmap_offsets.end()) { | ||
| 121 | return {}; | ||
| 122 | } | ||
| 123 | const auto level{static_cast<u32>(std::distance(mipmap_offsets.begin(), mipmap_it))}; | ||
| 124 | return std::make_pair(layer, level); | ||
| 125 | } | ||
| 126 | |||
| 127 | std::vector<CopyParams> SurfaceBaseImpl::BreakDownLayered(const SurfaceParams& in_params) const { | ||
| 128 | const u32 layers{params.depth}; | ||
| 129 | const u32 mipmaps{params.num_levels}; | ||
| 130 | std::vector<CopyParams> result; | ||
| 131 | result.reserve(static_cast<std::size_t>(layers) * static_cast<std::size_t>(mipmaps)); | ||
| 132 | |||
| 133 | for (u32 layer = 0; layer < layers; layer++) { | ||
| 134 | for (u32 level = 0; level < mipmaps; level++) { | ||
| 135 | const u32 width = SurfaceParams::IntersectWidth(params, in_params, level, level); | ||
| 136 | const u32 height = SurfaceParams::IntersectHeight(params, in_params, level, level); | ||
| 137 | result.emplace_back(width, height, layer, level); | ||
| 138 | } | ||
| 139 | } | ||
| 140 | return result; | ||
| 141 | } | ||
| 142 | |||
| 143 | std::vector<CopyParams> SurfaceBaseImpl::BreakDownNonLayered(const SurfaceParams& in_params) const { | ||
| 144 | const u32 mipmaps{params.num_levels}; | ||
| 145 | std::vector<CopyParams> result; | ||
| 146 | result.reserve(mipmaps); | ||
| 147 | |||
| 148 | for (u32 level = 0; level < mipmaps; level++) { | ||
| 149 | const u32 width = SurfaceParams::IntersectWidth(params, in_params, level, level); | ||
| 150 | const u32 height = SurfaceParams::IntersectHeight(params, in_params, level, level); | ||
| 151 | const u32 depth{std::min(params.GetMipDepth(level), in_params.GetMipDepth(level))}; | ||
| 152 | result.emplace_back(width, height, depth, level); | ||
| 153 | } | ||
| 154 | return result; | ||
| 155 | } | ||
| 156 | |||
| 157 | void SurfaceBaseImpl::SwizzleFunc(MortonSwizzleMode mode, u8* memory, const SurfaceParams& params, | ||
| 158 | u8* buffer, u32 level) { | ||
| 159 | const u32 width{params.GetMipWidth(level)}; | ||
| 160 | const u32 height{params.GetMipHeight(level)}; | ||
| 161 | const u32 block_height{params.GetMipBlockHeight(level)}; | ||
| 162 | const u32 block_depth{params.GetMipBlockDepth(level)}; | ||
| 163 | |||
| 164 | std::size_t guest_offset{mipmap_offsets[level]}; | ||
| 165 | if (params.is_layered) { | ||
| 166 | std::size_t host_offset{0}; | ||
| 167 | const std::size_t guest_stride = layer_size; | ||
| 168 | const std::size_t host_stride = params.GetHostLayerSize(level); | ||
| 169 | for (u32 layer = 0; layer < params.depth; ++layer) { | ||
| 170 | MortonSwizzle(mode, params.pixel_format, width, block_height, height, block_depth, 1, | ||
| 171 | params.tile_width_spacing, buffer + host_offset, memory + guest_offset); | ||
| 172 | guest_offset += guest_stride; | ||
| 173 | host_offset += host_stride; | ||
| 174 | } | ||
| 175 | } else { | ||
| 176 | MortonSwizzle(mode, params.pixel_format, width, block_height, height, block_depth, | ||
| 177 | params.GetMipDepth(level), params.tile_width_spacing, buffer, | ||
| 178 | memory + guest_offset); | ||
| 179 | } | ||
| 180 | } | ||
| 181 | |||
| 182 | void SurfaceBaseImpl::LoadBuffer(Tegra::MemoryManager& memory_manager, | ||
| 183 | StagingCache& staging_cache) { | ||
| 184 | MICROPROFILE_SCOPE(GPU_Load_Texture); | ||
| 185 | auto& staging_buffer = staging_cache.GetBuffer(0); | ||
| 186 | u8* host_ptr; | ||
| 187 | is_continuous = memory_manager.IsBlockContinuous(gpu_addr, guest_memory_size); | ||
| 188 | |||
| 189 | // Handle continuouty | ||
| 190 | if (is_continuous) { | ||
| 191 | // Use physical memory directly | ||
| 192 | host_ptr = memory_manager.GetPointer(gpu_addr); | ||
| 193 | if (!host_ptr) { | ||
| 194 | return; | ||
| 195 | } | ||
| 196 | } else { | ||
| 197 | // Use an extra temporal buffer | ||
| 198 | auto& tmp_buffer = staging_cache.GetBuffer(1); | ||
| 199 | tmp_buffer.resize(guest_memory_size); | ||
| 200 | host_ptr = tmp_buffer.data(); | ||
| 201 | memory_manager.ReadBlockUnsafe(gpu_addr, host_ptr, guest_memory_size); | ||
| 202 | } | ||
| 203 | |||
| 204 | if (params.is_tiled) { | ||
| 205 | ASSERT_MSG(params.block_width == 0, "Block width is defined as {} on texture target {}", | ||
| 206 | params.block_width, static_cast<u32>(params.target)); | ||
| 207 | for (u32 level = 0; level < params.num_levels; ++level) { | ||
| 208 | const std::size_t host_offset{params.GetHostMipmapLevelOffset(level)}; | ||
| 209 | SwizzleFunc(MortonSwizzleMode::MortonToLinear, host_ptr, params, | ||
| 210 | staging_buffer.data() + host_offset, level); | ||
| 211 | } | ||
| 212 | } else { | ||
| 213 | ASSERT_MSG(params.num_levels == 1, "Linear mipmap loading is not implemented"); | ||
| 214 | const u32 bpp{params.GetBytesPerPixel()}; | ||
| 215 | const u32 block_width{params.GetDefaultBlockWidth()}; | ||
| 216 | const u32 block_height{params.GetDefaultBlockHeight()}; | ||
| 217 | const u32 width{(params.width + block_width - 1) / block_width}; | ||
| 218 | const u32 height{(params.height + block_height - 1) / block_height}; | ||
| 219 | const u32 copy_size{width * bpp}; | ||
| 220 | if (params.pitch == copy_size) { | ||
| 221 | std::memcpy(staging_buffer.data(), host_ptr, params.GetHostSizeInBytes()); | ||
| 222 | } else { | ||
| 223 | const u8* start{host_ptr}; | ||
| 224 | u8* write_to{staging_buffer.data()}; | ||
| 225 | for (u32 h = height; h > 0; --h) { | ||
| 226 | std::memcpy(write_to, start, copy_size); | ||
| 227 | start += params.pitch; | ||
| 228 | write_to += copy_size; | ||
| 229 | } | ||
| 230 | } | ||
| 231 | } | ||
| 232 | |||
| 233 | auto compression_type = params.GetCompressionType(); | ||
| 234 | if (compression_type == SurfaceCompression::None || | ||
| 235 | compression_type == SurfaceCompression::Compressed) | ||
| 236 | return; | ||
| 237 | |||
| 238 | for (u32 level_up = params.num_levels; level_up > 0; --level_up) { | ||
| 239 | const u32 level = level_up - 1; | ||
| 240 | const std::size_t in_host_offset{params.GetHostMipmapLevelOffset(level)}; | ||
| 241 | const std::size_t out_host_offset = compression_type == SurfaceCompression::Rearranged | ||
| 242 | ? in_host_offset | ||
| 243 | : params.GetConvertedMipmapOffset(level); | ||
| 244 | u8* in_buffer = staging_buffer.data() + in_host_offset; | ||
| 245 | u8* out_buffer = staging_buffer.data() + out_host_offset; | ||
| 246 | ConvertFromGuestToHost(in_buffer, out_buffer, params.pixel_format, | ||
| 247 | params.GetMipWidth(level), params.GetMipHeight(level), | ||
| 248 | params.GetMipDepth(level), true, true); | ||
| 249 | } | ||
| 250 | } | ||
| 251 | |||
| 252 | void SurfaceBaseImpl::FlushBuffer(Tegra::MemoryManager& memory_manager, | ||
| 253 | StagingCache& staging_cache) { | ||
| 254 | MICROPROFILE_SCOPE(GPU_Flush_Texture); | ||
| 255 | auto& staging_buffer = staging_cache.GetBuffer(0); | ||
| 256 | u8* host_ptr; | ||
| 257 | |||
| 258 | // Handle continuouty | ||
| 259 | if (is_continuous) { | ||
| 260 | // Use physical memory directly | ||
| 261 | host_ptr = memory_manager.GetPointer(gpu_addr); | ||
| 262 | if (!host_ptr) { | ||
| 263 | return; | ||
| 264 | } | ||
| 265 | } else { | ||
| 266 | // Use an extra temporal buffer | ||
| 267 | auto& tmp_buffer = staging_cache.GetBuffer(1); | ||
| 268 | tmp_buffer.resize(guest_memory_size); | ||
| 269 | host_ptr = tmp_buffer.data(); | ||
| 270 | } | ||
| 271 | |||
| 272 | if (params.is_tiled) { | ||
| 273 | ASSERT_MSG(params.block_width == 0, "Block width is defined as {}", params.block_width); | ||
| 274 | for (u32 level = 0; level < params.num_levels; ++level) { | ||
| 275 | const std::size_t host_offset{params.GetHostMipmapLevelOffset(level)}; | ||
| 276 | SwizzleFunc(MortonSwizzleMode::LinearToMorton, host_ptr, params, | ||
| 277 | staging_buffer.data() + host_offset, level); | ||
| 278 | } | ||
| 279 | } else { | ||
| 280 | ASSERT(params.target == SurfaceTarget::Texture2D); | ||
| 281 | ASSERT(params.num_levels == 1); | ||
| 282 | |||
| 283 | const u32 bpp{params.GetBytesPerPixel()}; | ||
| 284 | const u32 copy_size{params.width * bpp}; | ||
| 285 | if (params.pitch == copy_size) { | ||
| 286 | std::memcpy(host_ptr, staging_buffer.data(), guest_memory_size); | ||
| 287 | } else { | ||
| 288 | u8* start{host_ptr}; | ||
| 289 | const u8* read_to{staging_buffer.data()}; | ||
| 290 | for (u32 h = params.height; h > 0; --h) { | ||
| 291 | std::memcpy(start, read_to, copy_size); | ||
| 292 | start += params.pitch; | ||
| 293 | read_to += copy_size; | ||
| 294 | } | ||
| 295 | } | ||
| 296 | } | ||
| 297 | if (!is_continuous) { | ||
| 298 | memory_manager.WriteBlockUnsafe(gpu_addr, host_ptr, guest_memory_size); | ||
| 299 | } | ||
| 300 | } | ||
| 301 | |||
| 302 | } // namespace VideoCommon | ||
diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h new file mode 100644 index 000000000..5e497e49f --- /dev/null +++ b/src/video_core/texture_cache/surface_base.h | |||
| @@ -0,0 +1,325 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <algorithm> | ||
| 8 | #include <unordered_map> | ||
| 9 | #include <vector> | ||
| 10 | |||
| 11 | #include "common/assert.h" | ||
| 12 | #include "common/binary_find.h" | ||
| 13 | #include "common/common_types.h" | ||
| 14 | #include "video_core/gpu.h" | ||
| 15 | #include "video_core/morton.h" | ||
| 16 | #include "video_core/texture_cache/copy_params.h" | ||
| 17 | #include "video_core/texture_cache/surface_params.h" | ||
| 18 | #include "video_core/texture_cache/surface_view.h" | ||
| 19 | |||
| 20 | namespace Tegra { | ||
| 21 | class MemoryManager; | ||
| 22 | } | ||
| 23 | |||
| 24 | namespace VideoCommon { | ||
| 25 | |||
| 26 | using VideoCore::MortonSwizzleMode; | ||
| 27 | using VideoCore::Surface::SurfaceTarget; | ||
| 28 | |||
| 29 | enum class MatchStructureResult : u32 { | ||
| 30 | FullMatch = 0, | ||
| 31 | SemiMatch = 1, | ||
| 32 | None = 2, | ||
| 33 | }; | ||
| 34 | |||
| 35 | enum class MatchTopologyResult : u32 { | ||
| 36 | FullMatch = 0, | ||
| 37 | CompressUnmatch = 1, | ||
| 38 | None = 2, | ||
| 39 | }; | ||
| 40 | |||
| 41 | class StagingCache { | ||
| 42 | public: | ||
| 43 | explicit StagingCache(); | ||
| 44 | ~StagingCache(); | ||
| 45 | |||
| 46 | std::vector<u8>& GetBuffer(std::size_t index) { | ||
| 47 | return staging_buffer[index]; | ||
| 48 | } | ||
| 49 | |||
| 50 | const std::vector<u8>& GetBuffer(std::size_t index) const { | ||
| 51 | return staging_buffer[index]; | ||
| 52 | } | ||
| 53 | |||
| 54 | void SetSize(std::size_t size) { | ||
| 55 | staging_buffer.resize(size); | ||
| 56 | } | ||
| 57 | |||
| 58 | private: | ||
| 59 | std::vector<std::vector<u8>> staging_buffer; | ||
| 60 | }; | ||
| 61 | |||
| 62 | class SurfaceBaseImpl { | ||
| 63 | public: | ||
| 64 | void LoadBuffer(Tegra::MemoryManager& memory_manager, StagingCache& staging_cache); | ||
| 65 | |||
| 66 | void FlushBuffer(Tegra::MemoryManager& memory_manager, StagingCache& staging_cache); | ||
| 67 | |||
| 68 | GPUVAddr GetGpuAddr() const { | ||
| 69 | return gpu_addr; | ||
| 70 | } | ||
| 71 | |||
| 72 | bool Overlaps(const CacheAddr start, const CacheAddr end) const { | ||
| 73 | return (cache_addr < end) && (cache_addr_end > start); | ||
| 74 | } | ||
| 75 | |||
| 76 | bool IsInside(const GPUVAddr other_start, const GPUVAddr other_end) { | ||
| 77 | const GPUVAddr gpu_addr_end = gpu_addr + guest_memory_size; | ||
| 78 | return (gpu_addr <= other_start && other_end <= gpu_addr_end); | ||
| 79 | } | ||
| 80 | |||
| 81 | // Use only when recycling a surface | ||
| 82 | void SetGpuAddr(const GPUVAddr new_addr) { | ||
| 83 | gpu_addr = new_addr; | ||
| 84 | } | ||
| 85 | |||
| 86 | VAddr GetCpuAddr() const { | ||
| 87 | return cpu_addr; | ||
| 88 | } | ||
| 89 | |||
| 90 | void SetCpuAddr(const VAddr new_addr) { | ||
| 91 | cpu_addr = new_addr; | ||
| 92 | } | ||
| 93 | |||
| 94 | CacheAddr GetCacheAddr() const { | ||
| 95 | return cache_addr; | ||
| 96 | } | ||
| 97 | |||
| 98 | CacheAddr GetCacheAddrEnd() const { | ||
| 99 | return cache_addr_end; | ||
| 100 | } | ||
| 101 | |||
| 102 | void SetCacheAddr(const CacheAddr new_addr) { | ||
| 103 | cache_addr = new_addr; | ||
| 104 | cache_addr_end = new_addr + guest_memory_size; | ||
| 105 | } | ||
| 106 | |||
| 107 | const SurfaceParams& GetSurfaceParams() const { | ||
| 108 | return params; | ||
| 109 | } | ||
| 110 | |||
| 111 | std::size_t GetSizeInBytes() const { | ||
| 112 | return guest_memory_size; | ||
| 113 | } | ||
| 114 | |||
| 115 | std::size_t GetHostSizeInBytes() const { | ||
| 116 | return host_memory_size; | ||
| 117 | } | ||
| 118 | |||
| 119 | std::size_t GetMipmapSize(const u32 level) const { | ||
| 120 | return mipmap_sizes[level]; | ||
| 121 | } | ||
| 122 | |||
| 123 | void MarkAsContinuous(const bool is_continuous) { | ||
| 124 | this->is_continuous = is_continuous; | ||
| 125 | } | ||
| 126 | |||
| 127 | bool IsContinuous() const { | ||
| 128 | return is_continuous; | ||
| 129 | } | ||
| 130 | |||
| 131 | bool IsLinear() const { | ||
| 132 | return !params.is_tiled; | ||
| 133 | } | ||
| 134 | |||
| 135 | bool MatchFormat(VideoCore::Surface::PixelFormat pixel_format) const { | ||
| 136 | return params.pixel_format == pixel_format; | ||
| 137 | } | ||
| 138 | |||
| 139 | VideoCore::Surface::PixelFormat GetFormat() const { | ||
| 140 | return params.pixel_format; | ||
| 141 | } | ||
| 142 | |||
| 143 | bool MatchTarget(VideoCore::Surface::SurfaceTarget target) const { | ||
| 144 | return params.target == target; | ||
| 145 | } | ||
| 146 | |||
| 147 | MatchTopologyResult MatchesTopology(const SurfaceParams& rhs) const; | ||
| 148 | |||
| 149 | MatchStructureResult MatchesStructure(const SurfaceParams& rhs) const; | ||
| 150 | |||
| 151 | bool MatchesSubTexture(const SurfaceParams& rhs, const GPUVAddr other_gpu_addr) const { | ||
| 152 | return std::tie(gpu_addr, params.target, params.num_levels) == | ||
| 153 | std::tie(other_gpu_addr, rhs.target, rhs.num_levels) && | ||
| 154 | params.target == SurfaceTarget::Texture2D && params.num_levels == 1; | ||
| 155 | } | ||
| 156 | |||
| 157 | std::optional<std::pair<u32, u32>> GetLayerMipmap(const GPUVAddr candidate_gpu_addr) const; | ||
| 158 | |||
| 159 | std::vector<CopyParams> BreakDown(const SurfaceParams& in_params) const { | ||
| 160 | return params.is_layered ? BreakDownLayered(in_params) : BreakDownNonLayered(in_params); | ||
| 161 | } | ||
| 162 | |||
| 163 | protected: | ||
| 164 | explicit SurfaceBaseImpl(GPUVAddr gpu_addr, const SurfaceParams& params); | ||
| 165 | ~SurfaceBaseImpl() = default; | ||
| 166 | |||
| 167 | virtual void DecorateSurfaceName() = 0; | ||
| 168 | |||
| 169 | const SurfaceParams params; | ||
| 170 | std::size_t layer_size; | ||
| 171 | std::size_t guest_memory_size; | ||
| 172 | const std::size_t host_memory_size; | ||
| 173 | GPUVAddr gpu_addr{}; | ||
| 174 | CacheAddr cache_addr{}; | ||
| 175 | CacheAddr cache_addr_end{}; | ||
| 176 | VAddr cpu_addr{}; | ||
| 177 | bool is_continuous{}; | ||
| 178 | |||
| 179 | std::vector<std::size_t> mipmap_sizes; | ||
| 180 | std::vector<std::size_t> mipmap_offsets; | ||
| 181 | |||
| 182 | private: | ||
| 183 | void SwizzleFunc(MortonSwizzleMode mode, u8* memory, const SurfaceParams& params, u8* buffer, | ||
| 184 | u32 level); | ||
| 185 | |||
| 186 | std::vector<CopyParams> BreakDownLayered(const SurfaceParams& in_params) const; | ||
| 187 | |||
| 188 | std::vector<CopyParams> BreakDownNonLayered(const SurfaceParams& in_params) const; | ||
| 189 | }; | ||
| 190 | |||
| 191 | template <typename TView> | ||
| 192 | class SurfaceBase : public SurfaceBaseImpl { | ||
| 193 | public: | ||
| 194 | virtual void UploadTexture(const std::vector<u8>& staging_buffer) = 0; | ||
| 195 | |||
| 196 | virtual void DownloadTexture(std::vector<u8>& staging_buffer) = 0; | ||
| 197 | |||
| 198 | void MarkAsModified(bool is_modified_, u64 tick) { | ||
| 199 | is_modified = is_modified_ || is_target; | ||
| 200 | modification_tick = tick; | ||
| 201 | } | ||
| 202 | |||
| 203 | void MarkAsRenderTarget(bool is_target_, u32 index_) { | ||
| 204 | is_target = is_target_; | ||
| 205 | index = index_; | ||
| 206 | } | ||
| 207 | |||
| 208 | void MarkAsPicked(bool is_picked_) { | ||
| 209 | is_picked = is_picked_; | ||
| 210 | } | ||
| 211 | |||
| 212 | bool IsModified() const { | ||
| 213 | return is_modified; | ||
| 214 | } | ||
| 215 | |||
| 216 | bool IsProtected() const { | ||
| 217 | // Only 3D Slices are to be protected | ||
| 218 | return is_target && params.block_depth > 0; | ||
| 219 | } | ||
| 220 | |||
| 221 | bool IsRenderTarget() const { | ||
| 222 | return is_target; | ||
| 223 | } | ||
| 224 | |||
| 225 | u32 GetRenderTarget() const { | ||
| 226 | return index; | ||
| 227 | } | ||
| 228 | |||
| 229 | bool IsRegistered() const { | ||
| 230 | return is_registered; | ||
| 231 | } | ||
| 232 | |||
| 233 | bool IsPicked() const { | ||
| 234 | return is_picked; | ||
| 235 | } | ||
| 236 | |||
| 237 | void MarkAsRegistered(bool is_reg) { | ||
| 238 | is_registered = is_reg; | ||
| 239 | } | ||
| 240 | |||
| 241 | u64 GetModificationTick() const { | ||
| 242 | return modification_tick; | ||
| 243 | } | ||
| 244 | |||
| 245 | TView EmplaceOverview(const SurfaceParams& overview_params) { | ||
| 246 | const u32 num_layers{(params.is_layered && !overview_params.is_layered) ? 1 : params.depth}; | ||
| 247 | return GetView(ViewParams(overview_params.target, 0, num_layers, 0, params.num_levels)); | ||
| 248 | } | ||
| 249 | |||
| 250 | std::optional<TView> EmplaceIrregularView(const SurfaceParams& view_params, | ||
| 251 | const GPUVAddr view_addr, | ||
| 252 | const std::size_t candidate_size, const u32 mipmap, | ||
| 253 | const u32 layer) { | ||
| 254 | const auto layer_mipmap{GetLayerMipmap(view_addr + candidate_size)}; | ||
| 255 | if (!layer_mipmap) { | ||
| 256 | return {}; | ||
| 257 | } | ||
| 258 | const u32 end_layer{layer_mipmap->first}; | ||
| 259 | const u32 end_mipmap{layer_mipmap->second}; | ||
| 260 | if (layer != end_layer) { | ||
| 261 | if (mipmap == 0 && end_mipmap == 0) { | ||
| 262 | return GetView(ViewParams(view_params.target, layer, end_layer - layer + 1, 0, 1)); | ||
| 263 | } | ||
| 264 | return {}; | ||
| 265 | } else { | ||
| 266 | return GetView( | ||
| 267 | ViewParams(view_params.target, layer, 1, mipmap, end_mipmap - mipmap + 1)); | ||
| 268 | } | ||
| 269 | } | ||
| 270 | |||
| 271 | std::optional<TView> EmplaceView(const SurfaceParams& view_params, const GPUVAddr view_addr, | ||
| 272 | const std::size_t candidate_size) { | ||
| 273 | if (params.target == SurfaceTarget::Texture3D || | ||
| 274 | (params.num_levels == 1 && !params.is_layered) || | ||
| 275 | view_params.target == SurfaceTarget::Texture3D) { | ||
| 276 | return {}; | ||
| 277 | } | ||
| 278 | const auto layer_mipmap{GetLayerMipmap(view_addr)}; | ||
| 279 | if (!layer_mipmap) { | ||
| 280 | return {}; | ||
| 281 | } | ||
| 282 | const u32 layer{layer_mipmap->first}; | ||
| 283 | const u32 mipmap{layer_mipmap->second}; | ||
| 284 | if (GetMipmapSize(mipmap) != candidate_size) { | ||
| 285 | return EmplaceIrregularView(view_params, view_addr, candidate_size, mipmap, layer); | ||
| 286 | } | ||
| 287 | return GetView(ViewParams(view_params.target, layer, 1, mipmap, 1)); | ||
| 288 | } | ||
| 289 | |||
| 290 | TView GetMainView() const { | ||
| 291 | return main_view; | ||
| 292 | } | ||
| 293 | |||
| 294 | protected: | ||
| 295 | explicit SurfaceBase(const GPUVAddr gpu_addr, const SurfaceParams& params) | ||
| 296 | : SurfaceBaseImpl(gpu_addr, params) {} | ||
| 297 | |||
| 298 | ~SurfaceBase() = default; | ||
| 299 | |||
| 300 | virtual TView CreateView(const ViewParams& view_key) = 0; | ||
| 301 | |||
| 302 | TView main_view; | ||
| 303 | std::unordered_map<ViewParams, TView> views; | ||
| 304 | |||
| 305 | private: | ||
| 306 | TView GetView(const ViewParams& key) { | ||
| 307 | const auto [entry, is_cache_miss] = views.try_emplace(key); | ||
| 308 | auto& view{entry->second}; | ||
| 309 | if (is_cache_miss) { | ||
| 310 | view = CreateView(key); | ||
| 311 | } | ||
| 312 | return view; | ||
| 313 | } | ||
| 314 | |||
| 315 | static constexpr u32 NO_RT = 0xFFFFFFFF; | ||
| 316 | |||
| 317 | bool is_modified{}; | ||
| 318 | bool is_target{}; | ||
| 319 | bool is_registered{}; | ||
| 320 | bool is_picked{}; | ||
| 321 | u32 index{NO_RT}; | ||
| 322 | u64 modification_tick{}; | ||
| 323 | }; | ||
| 324 | |||
| 325 | } // namespace VideoCommon | ||
diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp new file mode 100644 index 000000000..1e4d3fb79 --- /dev/null +++ b/src/video_core/texture_cache/surface_params.cpp | |||
| @@ -0,0 +1,389 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <map> | ||
| 6 | |||
| 7 | #include "common/alignment.h" | ||
| 8 | #include "common/bit_util.h" | ||
| 9 | #include "core/core.h" | ||
| 10 | #include "video_core/engines/shader_bytecode.h" | ||
| 11 | #include "video_core/surface.h" | ||
| 12 | #include "video_core/texture_cache/surface_params.h" | ||
| 13 | |||
| 14 | namespace VideoCommon { | ||
| 15 | |||
| 16 | using VideoCore::Surface::ComponentTypeFromDepthFormat; | ||
| 17 | using VideoCore::Surface::ComponentTypeFromRenderTarget; | ||
| 18 | using VideoCore::Surface::ComponentTypeFromTexture; | ||
| 19 | using VideoCore::Surface::PixelFormat; | ||
| 20 | using VideoCore::Surface::PixelFormatFromDepthFormat; | ||
| 21 | using VideoCore::Surface::PixelFormatFromRenderTargetFormat; | ||
| 22 | using VideoCore::Surface::PixelFormatFromTextureFormat; | ||
| 23 | using VideoCore::Surface::SurfaceTarget; | ||
| 24 | using VideoCore::Surface::SurfaceTargetFromTextureType; | ||
| 25 | using VideoCore::Surface::SurfaceType; | ||
| 26 | |||
| 27 | namespace { | ||
| 28 | |||
| 29 | SurfaceTarget TextureTypeToSurfaceTarget(Tegra::Shader::TextureType type, bool is_array) { | ||
| 30 | switch (type) { | ||
| 31 | case Tegra::Shader::TextureType::Texture1D: | ||
| 32 | return is_array ? SurfaceTarget::Texture1DArray : SurfaceTarget::Texture1D; | ||
| 33 | case Tegra::Shader::TextureType::Texture2D: | ||
| 34 | return is_array ? SurfaceTarget::Texture2DArray : SurfaceTarget::Texture2D; | ||
| 35 | case Tegra::Shader::TextureType::Texture3D: | ||
| 36 | ASSERT(!is_array); | ||
| 37 | return SurfaceTarget::Texture3D; | ||
| 38 | case Tegra::Shader::TextureType::TextureCube: | ||
| 39 | return is_array ? SurfaceTarget::TextureCubeArray : SurfaceTarget::TextureCubemap; | ||
| 40 | default: | ||
| 41 | UNREACHABLE(); | ||
| 42 | return SurfaceTarget::Texture2D; | ||
| 43 | } | ||
| 44 | } | ||
| 45 | |||
| 46 | SurfaceTarget ImageTypeToSurfaceTarget(Tegra::Shader::ImageType type) { | ||
| 47 | switch (type) { | ||
| 48 | case Tegra::Shader::ImageType::Texture1D: | ||
| 49 | return SurfaceTarget::Texture1D; | ||
| 50 | case Tegra::Shader::ImageType::TextureBuffer: | ||
| 51 | return SurfaceTarget::TextureBuffer; | ||
| 52 | case Tegra::Shader::ImageType::Texture1DArray: | ||
| 53 | return SurfaceTarget::Texture1DArray; | ||
| 54 | case Tegra::Shader::ImageType::Texture2D: | ||
| 55 | return SurfaceTarget::Texture2D; | ||
| 56 | case Tegra::Shader::ImageType::Texture2DArray: | ||
| 57 | return SurfaceTarget::Texture2DArray; | ||
| 58 | case Tegra::Shader::ImageType::Texture3D: | ||
| 59 | return SurfaceTarget::Texture3D; | ||
| 60 | default: | ||
| 61 | UNREACHABLE(); | ||
| 62 | return SurfaceTarget::Texture2D; | ||
| 63 | } | ||
| 64 | } | ||
| 65 | |||
| 66 | constexpr u32 GetMipmapSize(bool uncompressed, u32 mip_size, u32 tile) { | ||
| 67 | return uncompressed ? mip_size : std::max(1U, (mip_size + tile - 1) / tile); | ||
| 68 | } | ||
| 69 | |||
| 70 | } // Anonymous namespace | ||
| 71 | |||
| 72 | SurfaceParams SurfaceParams::CreateForTexture(const Tegra::Texture::TICEntry& tic, | ||
| 73 | const VideoCommon::Shader::Sampler& entry) { | ||
| 74 | SurfaceParams params; | ||
| 75 | params.is_tiled = tic.IsTiled(); | ||
| 76 | params.srgb_conversion = tic.IsSrgbConversionEnabled(); | ||
| 77 | params.block_width = params.is_tiled ? tic.BlockWidth() : 0, | ||
| 78 | params.block_height = params.is_tiled ? tic.BlockHeight() : 0, | ||
| 79 | params.block_depth = params.is_tiled ? tic.BlockDepth() : 0, | ||
| 80 | params.tile_width_spacing = params.is_tiled ? (1 << tic.tile_width_spacing.Value()) : 1; | ||
| 81 | params.pixel_format = | ||
| 82 | PixelFormatFromTextureFormat(tic.format, tic.r_type.Value(), params.srgb_conversion); | ||
| 83 | params.type = GetFormatType(params.pixel_format); | ||
| 84 | if (entry.IsShadow() && params.type == SurfaceType::ColorTexture) { | ||
| 85 | switch (params.pixel_format) { | ||
| 86 | case PixelFormat::R16U: | ||
| 87 | case PixelFormat::R16F: { | ||
| 88 | params.pixel_format = PixelFormat::Z16; | ||
| 89 | break; | ||
| 90 | } | ||
| 91 | case PixelFormat::R32F: { | ||
| 92 | params.pixel_format = PixelFormat::Z32F; | ||
| 93 | break; | ||
| 94 | } | ||
| 95 | default: { | ||
| 96 | UNIMPLEMENTED_MSG("Unimplemented shadow convert format: {}", | ||
| 97 | static_cast<u32>(params.pixel_format)); | ||
| 98 | } | ||
| 99 | } | ||
| 100 | params.type = GetFormatType(params.pixel_format); | ||
| 101 | } | ||
| 102 | params.component_type = ComponentTypeFromTexture(tic.r_type.Value()); | ||
| 103 | params.type = GetFormatType(params.pixel_format); | ||
| 104 | // TODO: on 1DBuffer we should use the tic info. | ||
| 105 | if (tic.IsBuffer()) { | ||
| 106 | params.target = SurfaceTarget::TextureBuffer; | ||
| 107 | params.width = tic.Width(); | ||
| 108 | params.pitch = params.width * params.GetBytesPerPixel(); | ||
| 109 | params.height = 1; | ||
| 110 | params.depth = 1; | ||
| 111 | params.num_levels = 1; | ||
| 112 | params.emulated_levels = 1; | ||
| 113 | params.is_layered = false; | ||
| 114 | } else { | ||
| 115 | params.target = TextureTypeToSurfaceTarget(entry.GetType(), entry.IsArray()); | ||
| 116 | params.width = tic.Width(); | ||
| 117 | params.height = tic.Height(); | ||
| 118 | params.depth = tic.Depth(); | ||
| 119 | params.pitch = params.is_tiled ? 0 : tic.Pitch(); | ||
| 120 | if (params.target == SurfaceTarget::TextureCubemap || | ||
| 121 | params.target == SurfaceTarget::TextureCubeArray) { | ||
| 122 | params.depth *= 6; | ||
| 123 | } | ||
| 124 | params.num_levels = tic.max_mip_level + 1; | ||
| 125 | params.emulated_levels = std::min(params.num_levels, params.MaxPossibleMipmap()); | ||
| 126 | params.is_layered = params.IsLayered(); | ||
| 127 | } | ||
| 128 | return params; | ||
| 129 | } | ||
| 130 | |||
| 131 | SurfaceParams SurfaceParams::CreateForImage(const Tegra::Texture::TICEntry& tic, | ||
| 132 | const VideoCommon::Shader::Image& entry) { | ||
| 133 | SurfaceParams params; | ||
| 134 | params.is_tiled = tic.IsTiled(); | ||
| 135 | params.srgb_conversion = tic.IsSrgbConversionEnabled(); | ||
| 136 | params.block_width = params.is_tiled ? tic.BlockWidth() : 0, | ||
| 137 | params.block_height = params.is_tiled ? tic.BlockHeight() : 0, | ||
| 138 | params.block_depth = params.is_tiled ? tic.BlockDepth() : 0, | ||
| 139 | params.tile_width_spacing = params.is_tiled ? (1 << tic.tile_width_spacing.Value()) : 1; | ||
| 140 | params.pixel_format = | ||
| 141 | PixelFormatFromTextureFormat(tic.format, tic.r_type.Value(), params.srgb_conversion); | ||
| 142 | params.type = GetFormatType(params.pixel_format); | ||
| 143 | params.component_type = ComponentTypeFromTexture(tic.r_type.Value()); | ||
| 144 | params.type = GetFormatType(params.pixel_format); | ||
| 145 | params.target = ImageTypeToSurfaceTarget(entry.GetType()); | ||
| 146 | // TODO: on 1DBuffer we should use the tic info. | ||
| 147 | if (tic.IsBuffer()) { | ||
| 148 | params.target = SurfaceTarget::TextureBuffer; | ||
| 149 | params.width = tic.Width(); | ||
| 150 | params.pitch = params.width * params.GetBytesPerPixel(); | ||
| 151 | params.height = 1; | ||
| 152 | params.depth = 1; | ||
| 153 | params.num_levels = 1; | ||
| 154 | params.emulated_levels = 1; | ||
| 155 | params.is_layered = false; | ||
| 156 | } else { | ||
| 157 | params.width = tic.Width(); | ||
| 158 | params.height = tic.Height(); | ||
| 159 | params.depth = tic.Depth(); | ||
| 160 | params.pitch = params.is_tiled ? 0 : tic.Pitch(); | ||
| 161 | if (params.target == SurfaceTarget::TextureCubemap || | ||
| 162 | params.target == SurfaceTarget::TextureCubeArray) { | ||
| 163 | params.depth *= 6; | ||
| 164 | } | ||
| 165 | params.num_levels = tic.max_mip_level + 1; | ||
| 166 | params.emulated_levels = std::min(params.num_levels, params.MaxPossibleMipmap()); | ||
| 167 | params.is_layered = params.IsLayered(); | ||
| 168 | } | ||
| 169 | return params; | ||
| 170 | } | ||
| 171 | |||
| 172 | SurfaceParams SurfaceParams::CreateForDepthBuffer( | ||
| 173 | Core::System& system, u32 zeta_width, u32 zeta_height, Tegra::DepthFormat format, | ||
| 174 | u32 block_width, u32 block_height, u32 block_depth, | ||
| 175 | Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout type) { | ||
| 176 | SurfaceParams params; | ||
| 177 | params.is_tiled = type == Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear; | ||
| 178 | params.srgb_conversion = false; | ||
| 179 | params.block_width = std::min(block_width, 5U); | ||
| 180 | params.block_height = std::min(block_height, 5U); | ||
| 181 | params.block_depth = std::min(block_depth, 5U); | ||
| 182 | params.tile_width_spacing = 1; | ||
| 183 | params.pixel_format = PixelFormatFromDepthFormat(format); | ||
| 184 | params.component_type = ComponentTypeFromDepthFormat(format); | ||
| 185 | params.type = GetFormatType(params.pixel_format); | ||
| 186 | params.width = zeta_width; | ||
| 187 | params.height = zeta_height; | ||
| 188 | params.target = SurfaceTarget::Texture2D; | ||
| 189 | params.depth = 1; | ||
| 190 | params.pitch = 0; | ||
| 191 | params.num_levels = 1; | ||
| 192 | params.emulated_levels = 1; | ||
| 193 | params.is_layered = false; | ||
| 194 | return params; | ||
| 195 | } | ||
| 196 | |||
| 197 | SurfaceParams SurfaceParams::CreateForFramebuffer(Core::System& system, std::size_t index) { | ||
| 198 | const auto& config{system.GPU().Maxwell3D().regs.rt[index]}; | ||
| 199 | SurfaceParams params; | ||
| 200 | params.is_tiled = | ||
| 201 | config.memory_layout.type == Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear; | ||
| 202 | params.srgb_conversion = config.format == Tegra::RenderTargetFormat::BGRA8_SRGB || | ||
| 203 | config.format == Tegra::RenderTargetFormat::RGBA8_SRGB; | ||
| 204 | params.block_width = config.memory_layout.block_width; | ||
| 205 | params.block_height = config.memory_layout.block_height; | ||
| 206 | params.block_depth = config.memory_layout.block_depth; | ||
| 207 | params.tile_width_spacing = 1; | ||
| 208 | params.pixel_format = PixelFormatFromRenderTargetFormat(config.format); | ||
| 209 | params.component_type = ComponentTypeFromRenderTarget(config.format); | ||
| 210 | params.type = GetFormatType(params.pixel_format); | ||
| 211 | if (params.is_tiled) { | ||
| 212 | params.pitch = 0; | ||
| 213 | params.width = config.width; | ||
| 214 | } else { | ||
| 215 | const u32 bpp = GetFormatBpp(params.pixel_format) / CHAR_BIT; | ||
| 216 | params.pitch = config.width; | ||
| 217 | params.width = params.pitch / bpp; | ||
| 218 | } | ||
| 219 | params.height = config.height; | ||
| 220 | params.depth = 1; | ||
| 221 | params.target = SurfaceTarget::Texture2D; | ||
| 222 | params.num_levels = 1; | ||
| 223 | params.emulated_levels = 1; | ||
| 224 | params.is_layered = false; | ||
| 225 | return params; | ||
| 226 | } | ||
| 227 | |||
| 228 | SurfaceParams SurfaceParams::CreateForFermiCopySurface( | ||
| 229 | const Tegra::Engines::Fermi2D::Regs::Surface& config) { | ||
| 230 | SurfaceParams params{}; | ||
| 231 | params.is_tiled = !config.linear; | ||
| 232 | params.srgb_conversion = config.format == Tegra::RenderTargetFormat::BGRA8_SRGB || | ||
| 233 | config.format == Tegra::RenderTargetFormat::RGBA8_SRGB; | ||
| 234 | params.block_width = params.is_tiled ? std::min(config.BlockWidth(), 5U) : 0, | ||
| 235 | params.block_height = params.is_tiled ? std::min(config.BlockHeight(), 5U) : 0, | ||
| 236 | params.block_depth = params.is_tiled ? std::min(config.BlockDepth(), 5U) : 0, | ||
| 237 | params.tile_width_spacing = 1; | ||
| 238 | params.pixel_format = PixelFormatFromRenderTargetFormat(config.format); | ||
| 239 | params.component_type = ComponentTypeFromRenderTarget(config.format); | ||
| 240 | params.type = GetFormatType(params.pixel_format); | ||
| 241 | params.width = config.width; | ||
| 242 | params.height = config.height; | ||
| 243 | params.pitch = config.pitch; | ||
| 244 | // TODO(Rodrigo): Try to guess the surface target from depth and layer parameters | ||
| 245 | params.target = SurfaceTarget::Texture2D; | ||
| 246 | params.depth = 1; | ||
| 247 | params.num_levels = 1; | ||
| 248 | params.emulated_levels = 1; | ||
| 249 | params.is_layered = params.IsLayered(); | ||
| 250 | return params; | ||
| 251 | } | ||
| 252 | |||
| 253 | bool SurfaceParams::IsLayered() const { | ||
| 254 | switch (target) { | ||
| 255 | case SurfaceTarget::Texture1DArray: | ||
| 256 | case SurfaceTarget::Texture2DArray: | ||
| 257 | case SurfaceTarget::TextureCubemap: | ||
| 258 | case SurfaceTarget::TextureCubeArray: | ||
| 259 | return true; | ||
| 260 | default: | ||
| 261 | return false; | ||
| 262 | } | ||
| 263 | } | ||
| 264 | |||
| 265 | // Auto block resizing algorithm from: | ||
| 266 | // https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nv50/nv50_miptree.c | ||
| 267 | u32 SurfaceParams::GetMipBlockHeight(u32 level) const { | ||
| 268 | if (level == 0) { | ||
| 269 | return this->block_height; | ||
| 270 | } | ||
| 271 | |||
| 272 | const u32 height_new{GetMipHeight(level)}; | ||
| 273 | const u32 default_block_height{GetDefaultBlockHeight()}; | ||
| 274 | const u32 blocks_in_y{(height_new + default_block_height - 1) / default_block_height}; | ||
| 275 | const u32 block_height_new = Common::Log2Ceil32(blocks_in_y); | ||
| 276 | return std::clamp(block_height_new, 3U, 7U) - 3U; | ||
| 277 | } | ||
| 278 | |||
| 279 | u32 SurfaceParams::GetMipBlockDepth(u32 level) const { | ||
| 280 | if (level == 0) { | ||
| 281 | return this->block_depth; | ||
| 282 | } | ||
| 283 | if (is_layered) { | ||
| 284 | return 0; | ||
| 285 | } | ||
| 286 | |||
| 287 | const u32 depth_new{GetMipDepth(level)}; | ||
| 288 | const u32 block_depth_new = Common::Log2Ceil32(depth_new); | ||
| 289 | if (block_depth_new > 4) { | ||
| 290 | return 5 - (GetMipBlockHeight(level) >= 2); | ||
| 291 | } | ||
| 292 | return block_depth_new; | ||
| 293 | } | ||
| 294 | |||
| 295 | std::size_t SurfaceParams::GetGuestMipmapLevelOffset(u32 level) const { | ||
| 296 | std::size_t offset = 0; | ||
| 297 | for (u32 i = 0; i < level; i++) { | ||
| 298 | offset += GetInnerMipmapMemorySize(i, false, false); | ||
| 299 | } | ||
| 300 | return offset; | ||
| 301 | } | ||
| 302 | |||
| 303 | std::size_t SurfaceParams::GetHostMipmapLevelOffset(u32 level) const { | ||
| 304 | std::size_t offset = 0; | ||
| 305 | for (u32 i = 0; i < level; i++) { | ||
| 306 | offset += GetInnerMipmapMemorySize(i, true, false) * GetNumLayers(); | ||
| 307 | } | ||
| 308 | return offset; | ||
| 309 | } | ||
| 310 | |||
| 311 | std::size_t SurfaceParams::GetConvertedMipmapOffset(u32 level) const { | ||
| 312 | std::size_t offset = 0; | ||
| 313 | for (u32 i = 0; i < level; i++) { | ||
| 314 | offset += GetConvertedMipmapSize(i); | ||
| 315 | } | ||
| 316 | return offset; | ||
| 317 | } | ||
| 318 | |||
| 319 | std::size_t SurfaceParams::GetConvertedMipmapSize(u32 level) const { | ||
| 320 | constexpr std::size_t rgba8_bpp = 4ULL; | ||
| 321 | const std::size_t width_t = GetMipWidth(level); | ||
| 322 | const std::size_t height_t = GetMipHeight(level); | ||
| 323 | const std::size_t depth_t = is_layered ? depth : GetMipDepth(level); | ||
| 324 | return width_t * height_t * depth_t * rgba8_bpp; | ||
| 325 | } | ||
| 326 | |||
| 327 | std::size_t SurfaceParams::GetLayerSize(bool as_host_size, bool uncompressed) const { | ||
| 328 | std::size_t size = 0; | ||
| 329 | for (u32 level = 0; level < num_levels; ++level) { | ||
| 330 | size += GetInnerMipmapMemorySize(level, as_host_size, uncompressed); | ||
| 331 | } | ||
| 332 | if (is_tiled && is_layered) { | ||
| 333 | return Common::AlignBits(size, | ||
| 334 | Tegra::Texture::GetGOBSizeShift() + block_height + block_depth); | ||
| 335 | } | ||
| 336 | return size; | ||
| 337 | } | ||
| 338 | |||
| 339 | std::size_t SurfaceParams::GetInnerMipmapMemorySize(u32 level, bool as_host_size, | ||
| 340 | bool uncompressed) const { | ||
| 341 | const u32 width{GetMipmapSize(uncompressed, GetMipWidth(level), GetDefaultBlockWidth())}; | ||
| 342 | const u32 height{GetMipmapSize(uncompressed, GetMipHeight(level), GetDefaultBlockHeight())}; | ||
| 343 | const u32 depth{is_layered ? 1U : GetMipDepth(level)}; | ||
| 344 | if (is_tiled) { | ||
| 345 | return Tegra::Texture::CalculateSize(!as_host_size, GetBytesPerPixel(), width, height, | ||
| 346 | depth, GetMipBlockHeight(level), | ||
| 347 | GetMipBlockDepth(level)); | ||
| 348 | } else if (as_host_size || IsBuffer()) { | ||
| 349 | return GetBytesPerPixel() * width * height * depth; | ||
| 350 | } else { | ||
| 351 | // Linear Texture Case | ||
| 352 | return pitch * height * depth; | ||
| 353 | } | ||
| 354 | } | ||
| 355 | |||
| 356 | bool SurfaceParams::operator==(const SurfaceParams& rhs) const { | ||
| 357 | return std::tie(is_tiled, block_width, block_height, block_depth, tile_width_spacing, width, | ||
| 358 | height, depth, pitch, num_levels, pixel_format, component_type, type, target) == | ||
| 359 | std::tie(rhs.is_tiled, rhs.block_width, rhs.block_height, rhs.block_depth, | ||
| 360 | rhs.tile_width_spacing, rhs.width, rhs.height, rhs.depth, rhs.pitch, | ||
| 361 | rhs.num_levels, rhs.pixel_format, rhs.component_type, rhs.type, rhs.target); | ||
| 362 | } | ||
| 363 | |||
| 364 | std::string SurfaceParams::TargetName() const { | ||
| 365 | switch (target) { | ||
| 366 | case SurfaceTarget::Texture1D: | ||
| 367 | return "1D"; | ||
| 368 | case SurfaceTarget::TextureBuffer: | ||
| 369 | return "TexBuffer"; | ||
| 370 | case SurfaceTarget::Texture2D: | ||
| 371 | return "2D"; | ||
| 372 | case SurfaceTarget::Texture3D: | ||
| 373 | return "3D"; | ||
| 374 | case SurfaceTarget::Texture1DArray: | ||
| 375 | return "1DArray"; | ||
| 376 | case SurfaceTarget::Texture2DArray: | ||
| 377 | return "2DArray"; | ||
| 378 | case SurfaceTarget::TextureCubemap: | ||
| 379 | return "Cube"; | ||
| 380 | case SurfaceTarget::TextureCubeArray: | ||
| 381 | return "CubeArray"; | ||
| 382 | default: | ||
| 383 | LOG_CRITICAL(HW_GPU, "Unimplemented surface_target={}", static_cast<u32>(target)); | ||
| 384 | UNREACHABLE(); | ||
| 385 | return fmt::format("TUK({})", static_cast<u32>(target)); | ||
| 386 | } | ||
| 387 | } | ||
| 388 | |||
| 389 | } // namespace VideoCommon | ||
diff --git a/src/video_core/texture_cache/surface_params.h b/src/video_core/texture_cache/surface_params.h new file mode 100644 index 000000000..c58e7f8a4 --- /dev/null +++ b/src/video_core/texture_cache/surface_params.h | |||
| @@ -0,0 +1,286 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "common/alignment.h" | ||
| 8 | #include "common/bit_util.h" | ||
| 9 | #include "common/cityhash.h" | ||
| 10 | #include "common/common_types.h" | ||
| 11 | #include "video_core/engines/fermi_2d.h" | ||
| 12 | #include "video_core/engines/maxwell_3d.h" | ||
| 13 | #include "video_core/shader/shader_ir.h" | ||
| 14 | #include "video_core/surface.h" | ||
| 15 | #include "video_core/textures/decoders.h" | ||
| 16 | |||
| 17 | namespace VideoCommon { | ||
| 18 | |||
| 19 | using VideoCore::Surface::SurfaceCompression; | ||
| 20 | |||
| 21 | class SurfaceParams { | ||
| 22 | public: | ||
| 23 | /// Creates SurfaceCachedParams from a texture configuration. | ||
| 24 | static SurfaceParams CreateForTexture(const Tegra::Texture::TICEntry& tic, | ||
| 25 | const VideoCommon::Shader::Sampler& entry); | ||
| 26 | |||
| 27 | /// Creates SurfaceCachedParams from an image configuration. | ||
| 28 | static SurfaceParams CreateForImage(const Tegra::Texture::TICEntry& tic, | ||
| 29 | const VideoCommon::Shader::Image& entry); | ||
| 30 | |||
| 31 | /// Creates SurfaceCachedParams for a depth buffer configuration. | ||
| 32 | static SurfaceParams CreateForDepthBuffer( | ||
| 33 | Core::System& system, u32 zeta_width, u32 zeta_height, Tegra::DepthFormat format, | ||
| 34 | u32 block_width, u32 block_height, u32 block_depth, | ||
| 35 | Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout type); | ||
| 36 | |||
| 37 | /// Creates SurfaceCachedParams from a framebuffer configuration. | ||
| 38 | static SurfaceParams CreateForFramebuffer(Core::System& system, std::size_t index); | ||
| 39 | |||
| 40 | /// Creates SurfaceCachedParams from a Fermi2D surface configuration. | ||
| 41 | static SurfaceParams CreateForFermiCopySurface( | ||
| 42 | const Tegra::Engines::Fermi2D::Regs::Surface& config); | ||
| 43 | |||
| 44 | std::size_t Hash() const { | ||
| 45 | return static_cast<std::size_t>( | ||
| 46 | Common::CityHash64(reinterpret_cast<const char*>(this), sizeof(*this))); | ||
| 47 | } | ||
| 48 | |||
| 49 | bool operator==(const SurfaceParams& rhs) const; | ||
| 50 | |||
| 51 | bool operator!=(const SurfaceParams& rhs) const { | ||
| 52 | return !operator==(rhs); | ||
| 53 | } | ||
| 54 | |||
| 55 | std::size_t GetGuestSizeInBytes() const { | ||
| 56 | return GetInnerMemorySize(false, false, false); | ||
| 57 | } | ||
| 58 | |||
| 59 | std::size_t GetHostSizeInBytes() const { | ||
| 60 | std::size_t host_size_in_bytes; | ||
| 61 | if (GetCompressionType() == SurfaceCompression::Converted) { | ||
| 62 | // ASTC is uncompressed in software, in emulated as RGBA8 | ||
| 63 | host_size_in_bytes = 0; | ||
| 64 | for (u32 level = 0; level < num_levels; ++level) { | ||
| 65 | host_size_in_bytes += GetConvertedMipmapSize(level); | ||
| 66 | } | ||
| 67 | } else { | ||
| 68 | host_size_in_bytes = GetInnerMemorySize(true, false, false); | ||
| 69 | } | ||
| 70 | return host_size_in_bytes; | ||
| 71 | } | ||
| 72 | |||
| 73 | u32 GetBlockAlignedWidth() const { | ||
| 74 | return Common::AlignUp(width, 64 / GetBytesPerPixel()); | ||
| 75 | } | ||
| 76 | |||
| 77 | /// Returns the width of a given mipmap level. | ||
| 78 | u32 GetMipWidth(u32 level) const { | ||
| 79 | return std::max(1U, width >> level); | ||
| 80 | } | ||
| 81 | |||
| 82 | /// Returns the height of a given mipmap level. | ||
| 83 | u32 GetMipHeight(u32 level) const { | ||
| 84 | return std::max(1U, height >> level); | ||
| 85 | } | ||
| 86 | |||
| 87 | /// Returns the depth of a given mipmap level. | ||
| 88 | u32 GetMipDepth(u32 level) const { | ||
| 89 | return is_layered ? depth : std::max(1U, depth >> level); | ||
| 90 | } | ||
| 91 | |||
| 92 | /// Returns the block height of a given mipmap level. | ||
| 93 | u32 GetMipBlockHeight(u32 level) const; | ||
| 94 | |||
| 95 | /// Returns the block depth of a given mipmap level. | ||
| 96 | u32 GetMipBlockDepth(u32 level) const; | ||
| 97 | |||
| 98 | /// Returns the best possible row/pitch alignment for the surface. | ||
| 99 | u32 GetRowAlignment(u32 level) const { | ||
| 100 | const u32 bpp = | ||
| 101 | GetCompressionType() == SurfaceCompression::Converted ? 4 : GetBytesPerPixel(); | ||
| 102 | return 1U << Common::CountTrailingZeroes32(GetMipWidth(level) * bpp); | ||
| 103 | } | ||
| 104 | |||
| 105 | /// Returns the offset in bytes in guest memory of a given mipmap level. | ||
| 106 | std::size_t GetGuestMipmapLevelOffset(u32 level) const; | ||
| 107 | |||
| 108 | /// Returns the offset in bytes in host memory (linear) of a given mipmap level. | ||
| 109 | std::size_t GetHostMipmapLevelOffset(u32 level) const; | ||
| 110 | |||
| 111 | /// Returns the offset in bytes in host memory (linear) of a given mipmap level | ||
| 112 | /// for a texture that is converted in host gpu. | ||
| 113 | std::size_t GetConvertedMipmapOffset(u32 level) const; | ||
| 114 | |||
| 115 | /// Returns the size in bytes in guest memory of a given mipmap level. | ||
| 116 | std::size_t GetGuestMipmapSize(u32 level) const { | ||
| 117 | return GetInnerMipmapMemorySize(level, false, false); | ||
| 118 | } | ||
| 119 | |||
| 120 | /// Returns the size in bytes in host memory (linear) of a given mipmap level. | ||
| 121 | std::size_t GetHostMipmapSize(u32 level) const { | ||
| 122 | return GetInnerMipmapMemorySize(level, true, false) * GetNumLayers(); | ||
| 123 | } | ||
| 124 | |||
| 125 | std::size_t GetConvertedMipmapSize(u32 level) const; | ||
| 126 | |||
| 127 | /// Returns the size of a layer in bytes in guest memory. | ||
| 128 | std::size_t GetGuestLayerSize() const { | ||
| 129 | return GetLayerSize(false, false); | ||
| 130 | } | ||
| 131 | |||
| 132 | /// Returns the size of a layer in bytes in host memory for a given mipmap level. | ||
| 133 | std::size_t GetHostLayerSize(u32 level) const { | ||
| 134 | ASSERT(target != VideoCore::Surface::SurfaceTarget::Texture3D); | ||
| 135 | return GetInnerMipmapMemorySize(level, true, false); | ||
| 136 | } | ||
| 137 | |||
| 138 | /// Returns the max possible mipmap that the texture can have in host gpu | ||
| 139 | u32 MaxPossibleMipmap() const { | ||
| 140 | const u32 max_mipmap_w = Common::Log2Ceil32(width) + 1U; | ||
| 141 | const u32 max_mipmap_h = Common::Log2Ceil32(height) + 1U; | ||
| 142 | const u32 max_mipmap = std::max(max_mipmap_w, max_mipmap_h); | ||
| 143 | if (target != VideoCore::Surface::SurfaceTarget::Texture3D) | ||
| 144 | return max_mipmap; | ||
| 145 | return std::max(max_mipmap, Common::Log2Ceil32(depth) + 1U); | ||
| 146 | } | ||
| 147 | |||
| 148 | /// Returns if the guest surface is a compressed surface. | ||
| 149 | bool IsCompressed() const { | ||
| 150 | return GetDefaultBlockHeight() > 1 || GetDefaultBlockWidth() > 1; | ||
| 151 | } | ||
| 152 | |||
| 153 | /// Returns the default block width. | ||
| 154 | u32 GetDefaultBlockWidth() const { | ||
| 155 | return VideoCore::Surface::GetDefaultBlockWidth(pixel_format); | ||
| 156 | } | ||
| 157 | |||
| 158 | /// Returns the default block height. | ||
| 159 | u32 GetDefaultBlockHeight() const { | ||
| 160 | return VideoCore::Surface::GetDefaultBlockHeight(pixel_format); | ||
| 161 | } | ||
| 162 | |||
| 163 | /// Returns the bits per pixel. | ||
| 164 | u32 GetBitsPerPixel() const { | ||
| 165 | return VideoCore::Surface::GetFormatBpp(pixel_format); | ||
| 166 | } | ||
| 167 | |||
| 168 | /// Returns the bytes per pixel. | ||
| 169 | u32 GetBytesPerPixel() const { | ||
| 170 | return VideoCore::Surface::GetBytesPerPixel(pixel_format); | ||
| 171 | } | ||
| 172 | |||
| 173 | /// Returns true if the pixel format is a depth and/or stencil format. | ||
| 174 | bool IsPixelFormatZeta() const { | ||
| 175 | return pixel_format >= VideoCore::Surface::PixelFormat::MaxColorFormat && | ||
| 176 | pixel_format < VideoCore::Surface::PixelFormat::MaxDepthStencilFormat; | ||
| 177 | } | ||
| 178 | |||
| 179 | /// Returns how the compression should be handled for this texture. | ||
| 180 | SurfaceCompression GetCompressionType() const { | ||
| 181 | return VideoCore::Surface::GetFormatCompressionType(pixel_format); | ||
| 182 | } | ||
| 183 | |||
| 184 | /// Returns is the surface is a TextureBuffer type of surface. | ||
| 185 | bool IsBuffer() const { | ||
| 186 | return target == VideoCore::Surface::SurfaceTarget::TextureBuffer; | ||
| 187 | } | ||
| 188 | |||
| 189 | /// Returns the debug name of the texture for use in graphic debuggers. | ||
| 190 | std::string TargetName() const; | ||
| 191 | |||
| 192 | // Helper used for out of class size calculations | ||
| 193 | static std::size_t AlignLayered(const std::size_t out_size, const u32 block_height, | ||
| 194 | const u32 block_depth) { | ||
| 195 | return Common::AlignBits(out_size, | ||
| 196 | Tegra::Texture::GetGOBSizeShift() + block_height + block_depth); | ||
| 197 | } | ||
| 198 | |||
| 199 | /// Converts a width from a type of surface into another. This helps represent the | ||
| 200 | /// equivalent value between compressed/non-compressed textures. | ||
| 201 | static u32 ConvertWidth(u32 width, VideoCore::Surface::PixelFormat pixel_format_from, | ||
| 202 | VideoCore::Surface::PixelFormat pixel_format_to) { | ||
| 203 | const u32 bw1 = VideoCore::Surface::GetDefaultBlockWidth(pixel_format_from); | ||
| 204 | const u32 bw2 = VideoCore::Surface::GetDefaultBlockWidth(pixel_format_to); | ||
| 205 | return (width * bw2 + bw1 - 1) / bw1; | ||
| 206 | } | ||
| 207 | |||
| 208 | /// Converts a height from a type of surface into another. This helps represent the | ||
| 209 | /// equivalent value between compressed/non-compressed textures. | ||
| 210 | static u32 ConvertHeight(u32 height, VideoCore::Surface::PixelFormat pixel_format_from, | ||
| 211 | VideoCore::Surface::PixelFormat pixel_format_to) { | ||
| 212 | const u32 bh1 = VideoCore::Surface::GetDefaultBlockHeight(pixel_format_from); | ||
| 213 | const u32 bh2 = VideoCore::Surface::GetDefaultBlockHeight(pixel_format_to); | ||
| 214 | return (height * bh2 + bh1 - 1) / bh1; | ||
| 215 | } | ||
| 216 | |||
| 217 | // Finds the maximun possible width between 2 2D layers of different formats | ||
| 218 | static u32 IntersectWidth(const SurfaceParams& src_params, const SurfaceParams& dst_params, | ||
| 219 | const u32 src_level, const u32 dst_level) { | ||
| 220 | const u32 bw1 = src_params.GetDefaultBlockWidth(); | ||
| 221 | const u32 bw2 = dst_params.GetDefaultBlockWidth(); | ||
| 222 | const u32 t_src_width = (src_params.GetMipWidth(src_level) * bw2 + bw1 - 1) / bw1; | ||
| 223 | const u32 t_dst_width = (dst_params.GetMipWidth(dst_level) * bw1 + bw2 - 1) / bw2; | ||
| 224 | return std::min(t_src_width, t_dst_width); | ||
| 225 | } | ||
| 226 | |||
| 227 | // Finds the maximun possible height between 2 2D layers of different formats | ||
| 228 | static u32 IntersectHeight(const SurfaceParams& src_params, const SurfaceParams& dst_params, | ||
| 229 | const u32 src_level, const u32 dst_level) { | ||
| 230 | const u32 bh1 = src_params.GetDefaultBlockHeight(); | ||
| 231 | const u32 bh2 = dst_params.GetDefaultBlockHeight(); | ||
| 232 | const u32 t_src_height = (src_params.GetMipHeight(src_level) * bh2 + bh1 - 1) / bh1; | ||
| 233 | const u32 t_dst_height = (dst_params.GetMipHeight(dst_level) * bh1 + bh2 - 1) / bh2; | ||
| 234 | return std::min(t_src_height, t_dst_height); | ||
| 235 | } | ||
| 236 | |||
| 237 | bool is_tiled; | ||
| 238 | bool srgb_conversion; | ||
| 239 | bool is_layered; | ||
| 240 | u32 block_width; | ||
| 241 | u32 block_height; | ||
| 242 | u32 block_depth; | ||
| 243 | u32 tile_width_spacing; | ||
| 244 | u32 width; | ||
| 245 | u32 height; | ||
| 246 | u32 depth; | ||
| 247 | u32 pitch; | ||
| 248 | u32 num_levels; | ||
| 249 | u32 emulated_levels; | ||
| 250 | VideoCore::Surface::PixelFormat pixel_format; | ||
| 251 | VideoCore::Surface::ComponentType component_type; | ||
| 252 | VideoCore::Surface::SurfaceType type; | ||
| 253 | VideoCore::Surface::SurfaceTarget target; | ||
| 254 | |||
| 255 | private: | ||
| 256 | /// Returns the size of a given mipmap level inside a layer. | ||
| 257 | std::size_t GetInnerMipmapMemorySize(u32 level, bool as_host_size, bool uncompressed) const; | ||
| 258 | |||
| 259 | /// Returns the size of all mipmap levels and aligns as needed. | ||
| 260 | std::size_t GetInnerMemorySize(bool as_host_size, bool layer_only, bool uncompressed) const { | ||
| 261 | return GetLayerSize(as_host_size, uncompressed) * (layer_only ? 1U : depth); | ||
| 262 | } | ||
| 263 | |||
| 264 | /// Returns the size of a layer | ||
| 265 | std::size_t GetLayerSize(bool as_host_size, bool uncompressed) const; | ||
| 266 | |||
| 267 | std::size_t GetNumLayers() const { | ||
| 268 | return is_layered ? depth : 1; | ||
| 269 | } | ||
| 270 | |||
| 271 | /// Returns true if these parameters are from a layered surface. | ||
| 272 | bool IsLayered() const; | ||
| 273 | }; | ||
| 274 | |||
| 275 | } // namespace VideoCommon | ||
| 276 | |||
| 277 | namespace std { | ||
| 278 | |||
| 279 | template <> | ||
| 280 | struct hash<VideoCommon::SurfaceParams> { | ||
| 281 | std::size_t operator()(const VideoCommon::SurfaceParams& k) const noexcept { | ||
| 282 | return k.Hash(); | ||
| 283 | } | ||
| 284 | }; | ||
| 285 | |||
| 286 | } // namespace std | ||
diff --git a/src/video_core/texture_cache/surface_view.cpp b/src/video_core/texture_cache/surface_view.cpp new file mode 100644 index 000000000..57a1f5803 --- /dev/null +++ b/src/video_core/texture_cache/surface_view.cpp | |||
| @@ -0,0 +1,23 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <tuple> | ||
| 6 | |||
| 7 | #include "common/common_types.h" | ||
| 8 | #include "video_core/texture_cache/surface_view.h" | ||
| 9 | |||
| 10 | namespace VideoCommon { | ||
| 11 | |||
| 12 | std::size_t ViewParams::Hash() const { | ||
| 13 | return static_cast<std::size_t>(base_layer) ^ (static_cast<std::size_t>(num_layers) << 16) ^ | ||
| 14 | (static_cast<std::size_t>(base_level) << 24) ^ | ||
| 15 | (static_cast<std::size_t>(num_levels) << 32) ^ (static_cast<std::size_t>(target) << 36); | ||
| 16 | } | ||
| 17 | |||
| 18 | bool ViewParams::operator==(const ViewParams& rhs) const { | ||
| 19 | return std::tie(base_layer, num_layers, base_level, num_levels, target) == | ||
| 20 | std::tie(rhs.base_layer, rhs.num_layers, rhs.base_level, rhs.num_levels, rhs.target); | ||
| 21 | } | ||
| 22 | |||
| 23 | } // namespace VideoCommon | ||
diff --git a/src/video_core/texture_cache/surface_view.h b/src/video_core/texture_cache/surface_view.h new file mode 100644 index 000000000..b17fd11a9 --- /dev/null +++ b/src/video_core/texture_cache/surface_view.h | |||
| @@ -0,0 +1,67 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <functional> | ||
| 8 | |||
| 9 | #include "common/common_types.h" | ||
| 10 | #include "video_core/surface.h" | ||
| 11 | #include "video_core/texture_cache/surface_params.h" | ||
| 12 | |||
| 13 | namespace VideoCommon { | ||
| 14 | |||
| 15 | struct ViewParams { | ||
| 16 | constexpr explicit ViewParams(VideoCore::Surface::SurfaceTarget target, u32 base_layer, | ||
| 17 | u32 num_layers, u32 base_level, u32 num_levels) | ||
| 18 | : target{target}, base_layer{base_layer}, num_layers{num_layers}, base_level{base_level}, | ||
| 19 | num_levels{num_levels} {} | ||
| 20 | |||
| 21 | std::size_t Hash() const; | ||
| 22 | |||
| 23 | bool operator==(const ViewParams& rhs) const; | ||
| 24 | |||
| 25 | bool IsLayered() const { | ||
| 26 | switch (target) { | ||
| 27 | case VideoCore::Surface::SurfaceTarget::Texture1DArray: | ||
| 28 | case VideoCore::Surface::SurfaceTarget::Texture2DArray: | ||
| 29 | case VideoCore::Surface::SurfaceTarget::TextureCubemap: | ||
| 30 | case VideoCore::Surface::SurfaceTarget::TextureCubeArray: | ||
| 31 | return true; | ||
| 32 | default: | ||
| 33 | return false; | ||
| 34 | } | ||
| 35 | } | ||
| 36 | |||
| 37 | VideoCore::Surface::SurfaceTarget target{}; | ||
| 38 | u32 base_layer{}; | ||
| 39 | u32 num_layers{}; | ||
| 40 | u32 base_level{}; | ||
| 41 | u32 num_levels{}; | ||
| 42 | }; | ||
| 43 | |||
| 44 | class ViewBase { | ||
| 45 | public: | ||
| 46 | constexpr explicit ViewBase(const ViewParams& params) : params{params} {} | ||
| 47 | |||
| 48 | constexpr const ViewParams& GetViewParams() const { | ||
| 49 | return params; | ||
| 50 | } | ||
| 51 | |||
| 52 | protected: | ||
| 53 | ViewParams params; | ||
| 54 | }; | ||
| 55 | |||
| 56 | } // namespace VideoCommon | ||
| 57 | |||
| 58 | namespace std { | ||
| 59 | |||
| 60 | template <> | ||
| 61 | struct hash<VideoCommon::ViewParams> { | ||
| 62 | std::size_t operator()(const VideoCommon::ViewParams& k) const noexcept { | ||
| 63 | return k.Hash(); | ||
| 64 | } | ||
| 65 | }; | ||
| 66 | |||
| 67 | } // namespace std | ||
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h new file mode 100644 index 000000000..877c6635d --- /dev/null +++ b/src/video_core/texture_cache/texture_cache.h | |||
| @@ -0,0 +1,835 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <algorithm> | ||
| 8 | #include <array> | ||
| 9 | #include <memory> | ||
| 10 | #include <mutex> | ||
| 11 | #include <set> | ||
| 12 | #include <tuple> | ||
| 13 | #include <unordered_map> | ||
| 14 | #include <vector> | ||
| 15 | |||
| 16 | #include <boost/icl/interval_map.hpp> | ||
| 17 | #include <boost/range/iterator_range.hpp> | ||
| 18 | |||
| 19 | #include "common/assert.h" | ||
| 20 | #include "common/common_types.h" | ||
| 21 | #include "common/math_util.h" | ||
| 22 | #include "core/core.h" | ||
| 23 | #include "core/memory.h" | ||
| 24 | #include "core/settings.h" | ||
| 25 | #include "video_core/engines/fermi_2d.h" | ||
| 26 | #include "video_core/engines/maxwell_3d.h" | ||
| 27 | #include "video_core/gpu.h" | ||
| 28 | #include "video_core/memory_manager.h" | ||
| 29 | #include "video_core/rasterizer_interface.h" | ||
| 30 | #include "video_core/surface.h" | ||
| 31 | #include "video_core/texture_cache/copy_params.h" | ||
| 32 | #include "video_core/texture_cache/surface_base.h" | ||
| 33 | #include "video_core/texture_cache/surface_params.h" | ||
| 34 | #include "video_core/texture_cache/surface_view.h" | ||
| 35 | |||
| 36 | namespace Tegra::Texture { | ||
| 37 | struct FullTextureInfo; | ||
| 38 | } | ||
| 39 | |||
| 40 | namespace VideoCore { | ||
| 41 | class RasterizerInterface; | ||
| 42 | } | ||
| 43 | |||
| 44 | namespace VideoCommon { | ||
| 45 | |||
| 46 | using VideoCore::Surface::PixelFormat; | ||
| 47 | |||
| 48 | using VideoCore::Surface::SurfaceTarget; | ||
| 49 | using RenderTargetConfig = Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig; | ||
| 50 | |||
| 51 | template <typename TSurface, typename TView> | ||
| 52 | class TextureCache { | ||
| 53 | using IntervalMap = boost::icl::interval_map<CacheAddr, std::set<TSurface>>; | ||
| 54 | using IntervalType = typename IntervalMap::interval_type; | ||
| 55 | |||
| 56 | public: | ||
| 57 | void InvalidateRegion(CacheAddr addr, std::size_t size) { | ||
| 58 | std::lock_guard lock{mutex}; | ||
| 59 | |||
| 60 | for (const auto& surface : GetSurfacesInRegion(addr, size)) { | ||
| 61 | Unregister(surface); | ||
| 62 | } | ||
| 63 | } | ||
| 64 | |||
| 65 | /*** | ||
| 66 | * `Guard` guarantees that rendertargets don't unregister themselves if the | ||
| 67 | * collide. Protection is currently only done on 3D slices. | ||
| 68 | ***/ | ||
| 69 | void GuardRenderTargets(bool new_guard) { | ||
| 70 | guard_render_targets = new_guard; | ||
| 71 | } | ||
| 72 | |||
| 73 | void GuardSamplers(bool new_guard) { | ||
| 74 | guard_samplers = new_guard; | ||
| 75 | } | ||
| 76 | |||
| 77 | void FlushRegion(CacheAddr addr, std::size_t size) { | ||
| 78 | std::lock_guard lock{mutex}; | ||
| 79 | |||
| 80 | auto surfaces = GetSurfacesInRegion(addr, size); | ||
| 81 | if (surfaces.empty()) { | ||
| 82 | return; | ||
| 83 | } | ||
| 84 | std::sort(surfaces.begin(), surfaces.end(), [](const TSurface& a, const TSurface& b) { | ||
| 85 | return a->GetModificationTick() < b->GetModificationTick(); | ||
| 86 | }); | ||
| 87 | for (const auto& surface : surfaces) { | ||
| 88 | FlushSurface(surface); | ||
| 89 | } | ||
| 90 | } | ||
| 91 | |||
| 92 | TView GetTextureSurface(const Tegra::Texture::TICEntry& tic, | ||
| 93 | const VideoCommon::Shader::Sampler& entry) { | ||
| 94 | std::lock_guard lock{mutex}; | ||
| 95 | const auto gpu_addr{tic.Address()}; | ||
| 96 | if (!gpu_addr) { | ||
| 97 | return {}; | ||
| 98 | } | ||
| 99 | const auto params{SurfaceParams::CreateForTexture(tic, entry)}; | ||
| 100 | const auto [surface, view] = GetSurface(gpu_addr, params, true, false); | ||
| 101 | if (guard_samplers) { | ||
| 102 | sampled_textures.push_back(surface); | ||
| 103 | } | ||
| 104 | return view; | ||
| 105 | } | ||
| 106 | |||
| 107 | TView GetImageSurface(const Tegra::Texture::TICEntry& tic, | ||
| 108 | const VideoCommon::Shader::Image& entry) { | ||
| 109 | std::lock_guard lock{mutex}; | ||
| 110 | const auto gpu_addr{tic.Address()}; | ||
| 111 | if (!gpu_addr) { | ||
| 112 | return {}; | ||
| 113 | } | ||
| 114 | const auto params{SurfaceParams::CreateForImage(tic, entry)}; | ||
| 115 | const auto [surface, view] = GetSurface(gpu_addr, params, true, false); | ||
| 116 | if (guard_samplers) { | ||
| 117 | sampled_textures.push_back(surface); | ||
| 118 | } | ||
| 119 | return view; | ||
| 120 | } | ||
| 121 | |||
| 122 | bool TextureBarrier() { | ||
| 123 | const bool any_rt = | ||
| 124 | std::any_of(sampled_textures.begin(), sampled_textures.end(), | ||
| 125 | [](const auto& surface) { return surface->IsRenderTarget(); }); | ||
| 126 | sampled_textures.clear(); | ||
| 127 | return any_rt; | ||
| 128 | } | ||
| 129 | |||
| 130 | TView GetDepthBufferSurface(bool preserve_contents) { | ||
| 131 | std::lock_guard lock{mutex}; | ||
| 132 | auto& maxwell3d = system.GPU().Maxwell3D(); | ||
| 133 | |||
| 134 | if (!maxwell3d.dirty.depth_buffer) { | ||
| 135 | return depth_buffer.view; | ||
| 136 | } | ||
| 137 | maxwell3d.dirty.depth_buffer = false; | ||
| 138 | |||
| 139 | const auto& regs{maxwell3d.regs}; | ||
| 140 | const auto gpu_addr{regs.zeta.Address()}; | ||
| 141 | if (!gpu_addr || !regs.zeta_enable) { | ||
| 142 | SetEmptyDepthBuffer(); | ||
| 143 | return {}; | ||
| 144 | } | ||
| 145 | const auto depth_params{SurfaceParams::CreateForDepthBuffer( | ||
| 146 | system, regs.zeta_width, regs.zeta_height, regs.zeta.format, | ||
| 147 | regs.zeta.memory_layout.block_width, regs.zeta.memory_layout.block_height, | ||
| 148 | regs.zeta.memory_layout.block_depth, regs.zeta.memory_layout.type)}; | ||
| 149 | auto surface_view = GetSurface(gpu_addr, depth_params, preserve_contents, true); | ||
| 150 | if (depth_buffer.target) | ||
| 151 | depth_buffer.target->MarkAsRenderTarget(false, NO_RT); | ||
| 152 | depth_buffer.target = surface_view.first; | ||
| 153 | depth_buffer.view = surface_view.second; | ||
| 154 | if (depth_buffer.target) | ||
| 155 | depth_buffer.target->MarkAsRenderTarget(true, DEPTH_RT); | ||
| 156 | return surface_view.second; | ||
| 157 | } | ||
| 158 | |||
| 159 | TView GetColorBufferSurface(std::size_t index, bool preserve_contents) { | ||
| 160 | std::lock_guard lock{mutex}; | ||
| 161 | ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets); | ||
| 162 | auto& maxwell3d = system.GPU().Maxwell3D(); | ||
| 163 | if (!maxwell3d.dirty.render_target[index]) { | ||
| 164 | return render_targets[index].view; | ||
| 165 | } | ||
| 166 | maxwell3d.dirty.render_target[index] = false; | ||
| 167 | |||
| 168 | const auto& regs{maxwell3d.regs}; | ||
| 169 | if (index >= regs.rt_control.count || regs.rt[index].Address() == 0 || | ||
| 170 | regs.rt[index].format == Tegra::RenderTargetFormat::NONE) { | ||
| 171 | SetEmptyColorBuffer(index); | ||
| 172 | return {}; | ||
| 173 | } | ||
| 174 | |||
| 175 | const auto& config{regs.rt[index]}; | ||
| 176 | const auto gpu_addr{config.Address()}; | ||
| 177 | if (!gpu_addr) { | ||
| 178 | SetEmptyColorBuffer(index); | ||
| 179 | return {}; | ||
| 180 | } | ||
| 181 | |||
| 182 | auto surface_view = GetSurface(gpu_addr, SurfaceParams::CreateForFramebuffer(system, index), | ||
| 183 | preserve_contents, true); | ||
| 184 | if (render_targets[index].target) | ||
| 185 | render_targets[index].target->MarkAsRenderTarget(false, NO_RT); | ||
| 186 | render_targets[index].target = surface_view.first; | ||
| 187 | render_targets[index].view = surface_view.second; | ||
| 188 | if (render_targets[index].target) | ||
| 189 | render_targets[index].target->MarkAsRenderTarget(true, static_cast<u32>(index)); | ||
| 190 | return surface_view.second; | ||
| 191 | } | ||
| 192 | |||
| 193 | void MarkColorBufferInUse(std::size_t index) { | ||
| 194 | if (auto& render_target = render_targets[index].target) { | ||
| 195 | render_target->MarkAsModified(true, Tick()); | ||
| 196 | } | ||
| 197 | } | ||
| 198 | |||
| 199 | void MarkDepthBufferInUse() { | ||
| 200 | if (depth_buffer.target) { | ||
| 201 | depth_buffer.target->MarkAsModified(true, Tick()); | ||
| 202 | } | ||
| 203 | } | ||
| 204 | |||
| 205 | void SetEmptyDepthBuffer() { | ||
| 206 | if (depth_buffer.target == nullptr) { | ||
| 207 | return; | ||
| 208 | } | ||
| 209 | depth_buffer.target->MarkAsRenderTarget(false, NO_RT); | ||
| 210 | depth_buffer.target = nullptr; | ||
| 211 | depth_buffer.view = nullptr; | ||
| 212 | } | ||
| 213 | |||
| 214 | void SetEmptyColorBuffer(std::size_t index) { | ||
| 215 | if (render_targets[index].target == nullptr) { | ||
| 216 | return; | ||
| 217 | } | ||
| 218 | render_targets[index].target->MarkAsRenderTarget(false, NO_RT); | ||
| 219 | render_targets[index].target = nullptr; | ||
| 220 | render_targets[index].view = nullptr; | ||
| 221 | } | ||
| 222 | |||
| 223 | void DoFermiCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src_config, | ||
| 224 | const Tegra::Engines::Fermi2D::Regs::Surface& dst_config, | ||
| 225 | const Tegra::Engines::Fermi2D::Config& copy_config) { | ||
| 226 | std::lock_guard lock{mutex}; | ||
| 227 | std::pair<TSurface, TView> dst_surface = GetFermiSurface(dst_config); | ||
| 228 | std::pair<TSurface, TView> src_surface = GetFermiSurface(src_config); | ||
| 229 | ImageBlit(src_surface.second, dst_surface.second, copy_config); | ||
| 230 | dst_surface.first->MarkAsModified(true, Tick()); | ||
| 231 | } | ||
| 232 | |||
| 233 | TSurface TryFindFramebufferSurface(const u8* host_ptr) { | ||
| 234 | const CacheAddr cache_addr = ToCacheAddr(host_ptr); | ||
| 235 | if (!cache_addr) { | ||
| 236 | return nullptr; | ||
| 237 | } | ||
| 238 | const CacheAddr page = cache_addr >> registry_page_bits; | ||
| 239 | std::vector<TSurface>& list = registry[page]; | ||
| 240 | for (auto& surface : list) { | ||
| 241 | if (surface->GetCacheAddr() == cache_addr) { | ||
| 242 | return surface; | ||
| 243 | } | ||
| 244 | } | ||
| 245 | return nullptr; | ||
| 246 | } | ||
| 247 | |||
| 248 | u64 Tick() { | ||
| 249 | return ++ticks; | ||
| 250 | } | ||
| 251 | |||
| 252 | protected: | ||
| 253 | TextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer) | ||
| 254 | : system{system}, rasterizer{rasterizer} { | ||
| 255 | for (std::size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) { | ||
| 256 | SetEmptyColorBuffer(i); | ||
| 257 | } | ||
| 258 | |||
| 259 | SetEmptyDepthBuffer(); | ||
| 260 | staging_cache.SetSize(2); | ||
| 261 | |||
| 262 | const auto make_siblings = [this](PixelFormat a, PixelFormat b) { | ||
| 263 | siblings_table[static_cast<std::size_t>(a)] = b; | ||
| 264 | siblings_table[static_cast<std::size_t>(b)] = a; | ||
| 265 | }; | ||
| 266 | std::fill(siblings_table.begin(), siblings_table.end(), PixelFormat::Invalid); | ||
| 267 | make_siblings(PixelFormat::Z16, PixelFormat::R16U); | ||
| 268 | make_siblings(PixelFormat::Z32F, PixelFormat::R32F); | ||
| 269 | make_siblings(PixelFormat::Z32FS8, PixelFormat::RG32F); | ||
| 270 | |||
| 271 | sampled_textures.reserve(64); | ||
| 272 | } | ||
| 273 | |||
| 274 | ~TextureCache() = default; | ||
| 275 | |||
| 276 | virtual TSurface CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) = 0; | ||
| 277 | |||
| 278 | virtual void ImageCopy(TSurface& src_surface, TSurface& dst_surface, | ||
| 279 | const CopyParams& copy_params) = 0; | ||
| 280 | |||
| 281 | virtual void ImageBlit(TView& src_view, TView& dst_view, | ||
| 282 | const Tegra::Engines::Fermi2D::Config& copy_config) = 0; | ||
| 283 | |||
| 284 | // Depending on the backend, a buffer copy can be slow as it means deoptimizing the texture | ||
| 285 | // and reading it from a sepparate buffer. | ||
| 286 | virtual void BufferCopy(TSurface& src_surface, TSurface& dst_surface) = 0; | ||
| 287 | |||
| 288 | void ManageRenderTargetUnregister(TSurface& surface) { | ||
| 289 | auto& maxwell3d = system.GPU().Maxwell3D(); | ||
| 290 | const u32 index = surface->GetRenderTarget(); | ||
| 291 | if (index == DEPTH_RT) { | ||
| 292 | maxwell3d.dirty.depth_buffer = true; | ||
| 293 | } else { | ||
| 294 | maxwell3d.dirty.render_target[index] = true; | ||
| 295 | } | ||
| 296 | maxwell3d.dirty.render_settings = true; | ||
| 297 | } | ||
| 298 | |||
| 299 | void Register(TSurface surface) { | ||
| 300 | const GPUVAddr gpu_addr = surface->GetGpuAddr(); | ||
| 301 | const CacheAddr cache_ptr = ToCacheAddr(system.GPU().MemoryManager().GetPointer(gpu_addr)); | ||
| 302 | const std::size_t size = surface->GetSizeInBytes(); | ||
| 303 | const std::optional<VAddr> cpu_addr = | ||
| 304 | system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr); | ||
| 305 | if (!cache_ptr || !cpu_addr) { | ||
| 306 | LOG_CRITICAL(HW_GPU, "Failed to register surface with unmapped gpu_address 0x{:016x}", | ||
| 307 | gpu_addr); | ||
| 308 | return; | ||
| 309 | } | ||
| 310 | const bool continuous = system.GPU().MemoryManager().IsBlockContinuous(gpu_addr, size); | ||
| 311 | surface->MarkAsContinuous(continuous); | ||
| 312 | surface->SetCacheAddr(cache_ptr); | ||
| 313 | surface->SetCpuAddr(*cpu_addr); | ||
| 314 | RegisterInnerCache(surface); | ||
| 315 | surface->MarkAsRegistered(true); | ||
| 316 | rasterizer.UpdatePagesCachedCount(*cpu_addr, size, 1); | ||
| 317 | } | ||
| 318 | |||
| 319 | void Unregister(TSurface surface) { | ||
| 320 | if (guard_render_targets && surface->IsProtected()) { | ||
| 321 | return; | ||
| 322 | } | ||
| 323 | if (!guard_render_targets && surface->IsRenderTarget()) { | ||
| 324 | ManageRenderTargetUnregister(surface); | ||
| 325 | } | ||
| 326 | const std::size_t size = surface->GetSizeInBytes(); | ||
| 327 | const VAddr cpu_addr = surface->GetCpuAddr(); | ||
| 328 | rasterizer.UpdatePagesCachedCount(cpu_addr, size, -1); | ||
| 329 | UnregisterInnerCache(surface); | ||
| 330 | surface->MarkAsRegistered(false); | ||
| 331 | ReserveSurface(surface->GetSurfaceParams(), surface); | ||
| 332 | } | ||
| 333 | |||
| 334 | TSurface GetUncachedSurface(const GPUVAddr gpu_addr, const SurfaceParams& params) { | ||
| 335 | if (const auto surface = TryGetReservedSurface(params); surface) { | ||
| 336 | surface->SetGpuAddr(gpu_addr); | ||
| 337 | return surface; | ||
| 338 | } | ||
| 339 | // No reserved surface available, create a new one and reserve it | ||
| 340 | auto new_surface{CreateSurface(gpu_addr, params)}; | ||
| 341 | return new_surface; | ||
| 342 | } | ||
| 343 | |||
| 344 | std::pair<TSurface, TView> GetFermiSurface( | ||
| 345 | const Tegra::Engines::Fermi2D::Regs::Surface& config) { | ||
| 346 | SurfaceParams params = SurfaceParams::CreateForFermiCopySurface(config); | ||
| 347 | const GPUVAddr gpu_addr = config.Address(); | ||
| 348 | return GetSurface(gpu_addr, params, true, false); | ||
| 349 | } | ||
| 350 | |||
| 351 | Core::System& system; | ||
| 352 | |||
| 353 | private: | ||
| 354 | enum class RecycleStrategy : u32 { | ||
| 355 | Ignore = 0, | ||
| 356 | Flush = 1, | ||
| 357 | BufferCopy = 3, | ||
| 358 | }; | ||
| 359 | |||
| 360 | /** | ||
| 361 | * `PickStrategy` takes care of selecting a proper strategy to deal with a texture recycle. | ||
| 362 | * @param overlaps, the overlapping surfaces registered in the cache. | ||
| 363 | * @param params, the paremeters on the new surface. | ||
| 364 | * @param gpu_addr, the starting address of the new surface. | ||
| 365 | * @param untopological, tells the recycler that the texture has no way to match the overlaps | ||
| 366 | * due to topological reasons. | ||
| 367 | **/ | ||
| 368 | RecycleStrategy PickStrategy(std::vector<TSurface>& overlaps, const SurfaceParams& params, | ||
| 369 | const GPUVAddr gpu_addr, const MatchTopologyResult untopological) { | ||
| 370 | if (Settings::values.use_accurate_gpu_emulation) { | ||
| 371 | return RecycleStrategy::Flush; | ||
| 372 | } | ||
| 373 | // 3D Textures decision | ||
| 374 | if (params.block_depth > 1 || params.target == SurfaceTarget::Texture3D) { | ||
| 375 | return RecycleStrategy::Flush; | ||
| 376 | } | ||
| 377 | for (auto s : overlaps) { | ||
| 378 | const auto& s_params = s->GetSurfaceParams(); | ||
| 379 | if (s_params.block_depth > 1 || s_params.target == SurfaceTarget::Texture3D) { | ||
| 380 | return RecycleStrategy::Flush; | ||
| 381 | } | ||
| 382 | } | ||
| 383 | // Untopological decision | ||
| 384 | if (untopological == MatchTopologyResult::CompressUnmatch) { | ||
| 385 | return RecycleStrategy::Flush; | ||
| 386 | } | ||
| 387 | if (untopological == MatchTopologyResult::FullMatch && !params.is_tiled) { | ||
| 388 | return RecycleStrategy::Flush; | ||
| 389 | } | ||
| 390 | return RecycleStrategy::Ignore; | ||
| 391 | } | ||
| 392 | |||
| 393 | /** | ||
| 394 | * `RecycleSurface` es a method we use to decide what to do with textures we can't resolve in | ||
| 395 | *the cache It has 2 implemented strategies: Ignore and Flush. Ignore just unregisters all the | ||
| 396 | *overlaps and loads the new texture. Flush, flushes all the overlaps into memory and loads the | ||
| 397 | *new surface from that data. | ||
| 398 | * @param overlaps, the overlapping surfaces registered in the cache. | ||
| 399 | * @param params, the paremeters on the new surface. | ||
| 400 | * @param gpu_addr, the starting address of the new surface. | ||
| 401 | * @param preserve_contents, tells if the new surface should be loaded from meory or left blank | ||
| 402 | * @param untopological, tells the recycler that the texture has no way to match the overlaps | ||
| 403 | * due to topological reasons. | ||
| 404 | **/ | ||
| 405 | std::pair<TSurface, TView> RecycleSurface(std::vector<TSurface>& overlaps, | ||
| 406 | const SurfaceParams& params, const GPUVAddr gpu_addr, | ||
| 407 | const bool preserve_contents, | ||
| 408 | const MatchTopologyResult untopological) { | ||
| 409 | const bool do_load = preserve_contents && Settings::values.use_accurate_gpu_emulation; | ||
| 410 | for (auto& surface : overlaps) { | ||
| 411 | Unregister(surface); | ||
| 412 | } | ||
| 413 | switch (PickStrategy(overlaps, params, gpu_addr, untopological)) { | ||
| 414 | case RecycleStrategy::Ignore: { | ||
| 415 | return InitializeSurface(gpu_addr, params, do_load); | ||
| 416 | } | ||
| 417 | case RecycleStrategy::Flush: { | ||
| 418 | std::sort(overlaps.begin(), overlaps.end(), | ||
| 419 | [](const TSurface& a, const TSurface& b) -> bool { | ||
| 420 | return a->GetModificationTick() < b->GetModificationTick(); | ||
| 421 | }); | ||
| 422 | for (auto& surface : overlaps) { | ||
| 423 | FlushSurface(surface); | ||
| 424 | } | ||
| 425 | return InitializeSurface(gpu_addr, params, preserve_contents); | ||
| 426 | } | ||
| 427 | case RecycleStrategy::BufferCopy: { | ||
| 428 | auto new_surface = GetUncachedSurface(gpu_addr, params); | ||
| 429 | BufferCopy(overlaps[0], new_surface); | ||
| 430 | return {new_surface, new_surface->GetMainView()}; | ||
| 431 | } | ||
| 432 | default: { | ||
| 433 | UNIMPLEMENTED_MSG("Unimplemented Texture Cache Recycling Strategy!"); | ||
| 434 | return InitializeSurface(gpu_addr, params, do_load); | ||
| 435 | } | ||
| 436 | } | ||
| 437 | } | ||
| 438 | |||
| 439 | /** | ||
| 440 | * `RebuildSurface` this method takes a single surface and recreates into another that | ||
| 441 | * may differ in format, target or width alingment. | ||
| 442 | * @param current_surface, the registered surface in the cache which we want to convert. | ||
| 443 | * @param params, the new surface params which we'll use to recreate the surface. | ||
| 444 | **/ | ||
| 445 | std::pair<TSurface, TView> RebuildSurface(TSurface current_surface, const SurfaceParams& params, | ||
| 446 | bool is_render) { | ||
| 447 | const auto gpu_addr = current_surface->GetGpuAddr(); | ||
| 448 | const auto& cr_params = current_surface->GetSurfaceParams(); | ||
| 449 | TSurface new_surface; | ||
| 450 | if (cr_params.pixel_format != params.pixel_format && !is_render && | ||
| 451 | GetSiblingFormat(cr_params.pixel_format) == params.pixel_format) { | ||
| 452 | SurfaceParams new_params = params; | ||
| 453 | new_params.pixel_format = cr_params.pixel_format; | ||
| 454 | new_params.component_type = cr_params.component_type; | ||
| 455 | new_params.type = cr_params.type; | ||
| 456 | new_surface = GetUncachedSurface(gpu_addr, new_params); | ||
| 457 | } else { | ||
| 458 | new_surface = GetUncachedSurface(gpu_addr, params); | ||
| 459 | } | ||
| 460 | const auto& final_params = new_surface->GetSurfaceParams(); | ||
| 461 | if (cr_params.type != final_params.type || | ||
| 462 | (cr_params.component_type != final_params.component_type)) { | ||
| 463 | BufferCopy(current_surface, new_surface); | ||
| 464 | } else { | ||
| 465 | std::vector<CopyParams> bricks = current_surface->BreakDown(final_params); | ||
| 466 | for (auto& brick : bricks) { | ||
| 467 | ImageCopy(current_surface, new_surface, brick); | ||
| 468 | } | ||
| 469 | } | ||
| 470 | Unregister(current_surface); | ||
| 471 | Register(new_surface); | ||
| 472 | new_surface->MarkAsModified(current_surface->IsModified(), Tick()); | ||
| 473 | return {new_surface, new_surface->GetMainView()}; | ||
| 474 | } | ||
| 475 | |||
| 476 | /** | ||
| 477 | * `ManageStructuralMatch` this method takes a single surface and checks with the new surface's | ||
| 478 | * params if it's an exact match, we return the main view of the registered surface. If it's | ||
| 479 | * formats don't match, we rebuild the surface. We call this last method a `Mirage`. If formats | ||
| 480 | * match but the targets don't, we create an overview View of the registered surface. | ||
| 481 | * @param current_surface, the registered surface in the cache which we want to convert. | ||
| 482 | * @param params, the new surface params which we want to check. | ||
| 483 | **/ | ||
| 484 | std::pair<TSurface, TView> ManageStructuralMatch(TSurface current_surface, | ||
| 485 | const SurfaceParams& params, bool is_render) { | ||
| 486 | const bool is_mirage = !current_surface->MatchFormat(params.pixel_format); | ||
| 487 | const bool matches_target = current_surface->MatchTarget(params.target); | ||
| 488 | const auto match_check = [&]() -> std::pair<TSurface, TView> { | ||
| 489 | if (matches_target) { | ||
| 490 | return {current_surface, current_surface->GetMainView()}; | ||
| 491 | } | ||
| 492 | return {current_surface, current_surface->EmplaceOverview(params)}; | ||
| 493 | }; | ||
| 494 | if (!is_mirage) { | ||
| 495 | return match_check(); | ||
| 496 | } | ||
| 497 | if (!is_render && GetSiblingFormat(current_surface->GetFormat()) == params.pixel_format) { | ||
| 498 | return match_check(); | ||
| 499 | } | ||
| 500 | return RebuildSurface(current_surface, params, is_render); | ||
| 501 | } | ||
| 502 | |||
| 503 | /** | ||
| 504 | * `TryReconstructSurface` unlike `RebuildSurface` where we know the registered surface | ||
| 505 | * matches the candidate in some way, we got no guarantess here. We try to see if the overlaps | ||
| 506 | * are sublayers/mipmaps of the new surface, if they all match we end up recreating a surface | ||
| 507 | * for them, else we return nothing. | ||
| 508 | * @param overlaps, the overlapping surfaces registered in the cache. | ||
| 509 | * @param params, the paremeters on the new surface. | ||
| 510 | * @param gpu_addr, the starting address of the new surface. | ||
| 511 | **/ | ||
| 512 | std::optional<std::pair<TSurface, TView>> TryReconstructSurface(std::vector<TSurface>& overlaps, | ||
| 513 | const SurfaceParams& params, | ||
| 514 | const GPUVAddr gpu_addr) { | ||
| 515 | if (params.target == SurfaceTarget::Texture3D) { | ||
| 516 | return {}; | ||
| 517 | } | ||
| 518 | bool modified = false; | ||
| 519 | TSurface new_surface = GetUncachedSurface(gpu_addr, params); | ||
| 520 | u32 passed_tests = 0; | ||
| 521 | for (auto& surface : overlaps) { | ||
| 522 | const SurfaceParams& src_params = surface->GetSurfaceParams(); | ||
| 523 | if (src_params.is_layered || src_params.num_levels > 1) { | ||
| 524 | // We send this cases to recycle as they are more complex to handle | ||
| 525 | return {}; | ||
| 526 | } | ||
| 527 | const std::size_t candidate_size = surface->GetSizeInBytes(); | ||
| 528 | auto mipmap_layer{new_surface->GetLayerMipmap(surface->GetGpuAddr())}; | ||
| 529 | if (!mipmap_layer) { | ||
| 530 | continue; | ||
| 531 | } | ||
| 532 | const auto [layer, mipmap] = *mipmap_layer; | ||
| 533 | if (new_surface->GetMipmapSize(mipmap) != candidate_size) { | ||
| 534 | continue; | ||
| 535 | } | ||
| 536 | modified |= surface->IsModified(); | ||
| 537 | // Now we got all the data set up | ||
| 538 | const u32 width = SurfaceParams::IntersectWidth(src_params, params, 0, mipmap); | ||
| 539 | const u32 height = SurfaceParams::IntersectHeight(src_params, params, 0, mipmap); | ||
| 540 | const CopyParams copy_params(0, 0, 0, 0, 0, layer, 0, mipmap, width, height, 1); | ||
| 541 | passed_tests++; | ||
| 542 | ImageCopy(surface, new_surface, copy_params); | ||
| 543 | } | ||
| 544 | if (passed_tests == 0) { | ||
| 545 | return {}; | ||
| 546 | // In Accurate GPU all tests should pass, else we recycle | ||
| 547 | } else if (Settings::values.use_accurate_gpu_emulation && passed_tests != overlaps.size()) { | ||
| 548 | return {}; | ||
| 549 | } | ||
| 550 | for (auto surface : overlaps) { | ||
| 551 | Unregister(surface); | ||
| 552 | } | ||
| 553 | new_surface->MarkAsModified(modified, Tick()); | ||
| 554 | Register(new_surface); | ||
| 555 | return {{new_surface, new_surface->GetMainView()}}; | ||
| 556 | } | ||
| 557 | |||
| 558 | /** | ||
| 559 | * `GetSurface` gets the starting address and parameters of a candidate surface and tries | ||
| 560 | * to find a matching surface within the cache. This is done in 3 big steps. The first is to | ||
| 561 | * check the 1st Level Cache in order to find an exact match, if we fail, we move to step 2. | ||
| 562 | * Step 2 is checking if there are any overlaps at all, if none, we just load the texture from | ||
| 563 | * memory else we move to step 3. Step 3 consists on figuring the relationship between the | ||
| 564 | * candidate texture and the overlaps. We divide the scenarios depending if there's 1 or many | ||
| 565 | * overlaps. If there's many, we just try to reconstruct a new surface out of them based on the | ||
| 566 | * candidate's parameters, if we fail, we recycle. When there's only 1 overlap then we have to | ||
| 567 | * check if the candidate is a view (layer/mipmap) of the overlap or if the registered surface | ||
| 568 | * is a mipmap/layer of the candidate. In this last case we reconstruct a new surface. | ||
| 569 | * @param gpu_addr, the starting address of the candidate surface. | ||
| 570 | * @param params, the paremeters on the candidate surface. | ||
| 571 | * @param preserve_contents, tells if the new surface should be loaded from meory or left blank. | ||
| 572 | **/ | ||
| 573 | std::pair<TSurface, TView> GetSurface(const GPUVAddr gpu_addr, const SurfaceParams& params, | ||
| 574 | bool preserve_contents, bool is_render) { | ||
| 575 | const auto host_ptr{system.GPU().MemoryManager().GetPointer(gpu_addr)}; | ||
| 576 | const auto cache_addr{ToCacheAddr(host_ptr)}; | ||
| 577 | |||
| 578 | // Step 0: guarantee a valid surface | ||
| 579 | if (!cache_addr) { | ||
| 580 | // Return a null surface if it's invalid | ||
| 581 | SurfaceParams new_params = params; | ||
| 582 | new_params.width = 1; | ||
| 583 | new_params.height = 1; | ||
| 584 | new_params.depth = 1; | ||
| 585 | new_params.block_height = 0; | ||
| 586 | new_params.block_depth = 0; | ||
| 587 | return InitializeSurface(gpu_addr, new_params, false); | ||
| 588 | } | ||
| 589 | |||
| 590 | // Step 1 | ||
| 591 | // Check Level 1 Cache for a fast structural match. If candidate surface | ||
| 592 | // matches at certain level we are pretty much done. | ||
| 593 | if (const auto iter = l1_cache.find(cache_addr); iter != l1_cache.end()) { | ||
| 594 | TSurface& current_surface = iter->second; | ||
| 595 | const auto topological_result = current_surface->MatchesTopology(params); | ||
| 596 | if (topological_result != MatchTopologyResult::FullMatch) { | ||
| 597 | std::vector<TSurface> overlaps{current_surface}; | ||
| 598 | return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, | ||
| 599 | topological_result); | ||
| 600 | } | ||
| 601 | const auto struct_result = current_surface->MatchesStructure(params); | ||
| 602 | if (struct_result != MatchStructureResult::None && | ||
| 603 | (params.target != SurfaceTarget::Texture3D || | ||
| 604 | current_surface->MatchTarget(params.target))) { | ||
| 605 | if (struct_result == MatchStructureResult::FullMatch) { | ||
| 606 | return ManageStructuralMatch(current_surface, params, is_render); | ||
| 607 | } else { | ||
| 608 | return RebuildSurface(current_surface, params, is_render); | ||
| 609 | } | ||
| 610 | } | ||
| 611 | } | ||
| 612 | |||
| 613 | // Step 2 | ||
| 614 | // Obtain all possible overlaps in the memory region | ||
| 615 | const std::size_t candidate_size = params.GetGuestSizeInBytes(); | ||
| 616 | auto overlaps{GetSurfacesInRegion(cache_addr, candidate_size)}; | ||
| 617 | |||
| 618 | // If none are found, we are done. we just load the surface and create it. | ||
| 619 | if (overlaps.empty()) { | ||
| 620 | return InitializeSurface(gpu_addr, params, preserve_contents); | ||
| 621 | } | ||
| 622 | |||
| 623 | // Step 3 | ||
| 624 | // Now we need to figure the relationship between the texture and its overlaps | ||
| 625 | // we do a topological test to ensure we can find some relationship. If it fails | ||
| 626 | // inmediatly recycle the texture | ||
| 627 | for (const auto& surface : overlaps) { | ||
| 628 | const auto topological_result = surface->MatchesTopology(params); | ||
| 629 | if (topological_result != MatchTopologyResult::FullMatch) { | ||
| 630 | return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, | ||
| 631 | topological_result); | ||
| 632 | } | ||
| 633 | } | ||
| 634 | |||
| 635 | // Split cases between 1 overlap or many. | ||
| 636 | if (overlaps.size() == 1) { | ||
| 637 | TSurface current_surface = overlaps[0]; | ||
| 638 | // First check if the surface is within the overlap. If not, it means | ||
| 639 | // two things either the candidate surface is a supertexture of the overlap | ||
| 640 | // or they don't match in any known way. | ||
| 641 | if (!current_surface->IsInside(gpu_addr, gpu_addr + candidate_size)) { | ||
| 642 | if (current_surface->GetGpuAddr() == gpu_addr) { | ||
| 643 | std::optional<std::pair<TSurface, TView>> view = | ||
| 644 | TryReconstructSurface(overlaps, params, gpu_addr); | ||
| 645 | if (view) { | ||
| 646 | return *view; | ||
| 647 | } | ||
| 648 | } | ||
| 649 | return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, | ||
| 650 | MatchTopologyResult::FullMatch); | ||
| 651 | } | ||
| 652 | // Now we check if the candidate is a mipmap/layer of the overlap | ||
| 653 | std::optional<TView> view = | ||
| 654 | current_surface->EmplaceView(params, gpu_addr, candidate_size); | ||
| 655 | if (view) { | ||
| 656 | const bool is_mirage = !current_surface->MatchFormat(params.pixel_format); | ||
| 657 | if (is_mirage) { | ||
| 658 | // On a mirage view, we need to recreate the surface under this new view | ||
| 659 | // and then obtain a view again. | ||
| 660 | SurfaceParams new_params = current_surface->GetSurfaceParams(); | ||
| 661 | const u32 wh = SurfaceParams::ConvertWidth( | ||
| 662 | new_params.width, new_params.pixel_format, params.pixel_format); | ||
| 663 | const u32 hh = SurfaceParams::ConvertHeight( | ||
| 664 | new_params.height, new_params.pixel_format, params.pixel_format); | ||
| 665 | new_params.width = wh; | ||
| 666 | new_params.height = hh; | ||
| 667 | new_params.pixel_format = params.pixel_format; | ||
| 668 | std::pair<TSurface, TView> pair = | ||
| 669 | RebuildSurface(current_surface, new_params, is_render); | ||
| 670 | std::optional<TView> mirage_view = | ||
| 671 | pair.first->EmplaceView(params, gpu_addr, candidate_size); | ||
| 672 | if (mirage_view) | ||
| 673 | return {pair.first, *mirage_view}; | ||
| 674 | return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, | ||
| 675 | MatchTopologyResult::FullMatch); | ||
| 676 | } | ||
| 677 | return {current_surface, *view}; | ||
| 678 | } | ||
| 679 | } else { | ||
| 680 | // If there are many overlaps, odds are they are subtextures of the candidate | ||
| 681 | // surface. We try to construct a new surface based on the candidate parameters, | ||
| 682 | // using the overlaps. If a single overlap fails, this will fail. | ||
| 683 | std::optional<std::pair<TSurface, TView>> view = | ||
| 684 | TryReconstructSurface(overlaps, params, gpu_addr); | ||
| 685 | if (view) { | ||
| 686 | return *view; | ||
| 687 | } | ||
| 688 | } | ||
| 689 | // We failed all the tests, recycle the overlaps into a new texture. | ||
| 690 | return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, | ||
| 691 | MatchTopologyResult::FullMatch); | ||
| 692 | } | ||
| 693 | |||
| 694 | std::pair<TSurface, TView> InitializeSurface(GPUVAddr gpu_addr, const SurfaceParams& params, | ||
| 695 | bool preserve_contents) { | ||
| 696 | auto new_surface{GetUncachedSurface(gpu_addr, params)}; | ||
| 697 | Register(new_surface); | ||
| 698 | if (preserve_contents) { | ||
| 699 | LoadSurface(new_surface); | ||
| 700 | } | ||
| 701 | return {new_surface, new_surface->GetMainView()}; | ||
| 702 | } | ||
| 703 | |||
| 704 | void LoadSurface(const TSurface& surface) { | ||
| 705 | staging_cache.GetBuffer(0).resize(surface->GetHostSizeInBytes()); | ||
| 706 | surface->LoadBuffer(system.GPU().MemoryManager(), staging_cache); | ||
| 707 | surface->UploadTexture(staging_cache.GetBuffer(0)); | ||
| 708 | surface->MarkAsModified(false, Tick()); | ||
| 709 | } | ||
| 710 | |||
| 711 | void FlushSurface(const TSurface& surface) { | ||
| 712 | if (!surface->IsModified()) { | ||
| 713 | return; | ||
| 714 | } | ||
| 715 | staging_cache.GetBuffer(0).resize(surface->GetHostSizeInBytes()); | ||
| 716 | surface->DownloadTexture(staging_cache.GetBuffer(0)); | ||
| 717 | surface->FlushBuffer(system.GPU().MemoryManager(), staging_cache); | ||
| 718 | surface->MarkAsModified(false, Tick()); | ||
| 719 | } | ||
| 720 | |||
| 721 | void RegisterInnerCache(TSurface& surface) { | ||
| 722 | const CacheAddr cache_addr = surface->GetCacheAddr(); | ||
| 723 | CacheAddr start = cache_addr >> registry_page_bits; | ||
| 724 | const CacheAddr end = (surface->GetCacheAddrEnd() - 1) >> registry_page_bits; | ||
| 725 | l1_cache[cache_addr] = surface; | ||
| 726 | while (start <= end) { | ||
| 727 | registry[start].push_back(surface); | ||
| 728 | start++; | ||
| 729 | } | ||
| 730 | } | ||
| 731 | |||
| 732 | void UnregisterInnerCache(TSurface& surface) { | ||
| 733 | const CacheAddr cache_addr = surface->GetCacheAddr(); | ||
| 734 | CacheAddr start = cache_addr >> registry_page_bits; | ||
| 735 | const CacheAddr end = (surface->GetCacheAddrEnd() - 1) >> registry_page_bits; | ||
| 736 | l1_cache.erase(cache_addr); | ||
| 737 | while (start <= end) { | ||
| 738 | auto& reg{registry[start]}; | ||
| 739 | reg.erase(std::find(reg.begin(), reg.end(), surface)); | ||
| 740 | start++; | ||
| 741 | } | ||
| 742 | } | ||
| 743 | |||
| 744 | std::vector<TSurface> GetSurfacesInRegion(const CacheAddr cache_addr, const std::size_t size) { | ||
| 745 | if (size == 0) { | ||
| 746 | return {}; | ||
| 747 | } | ||
| 748 | const CacheAddr cache_addr_end = cache_addr + size; | ||
| 749 | CacheAddr start = cache_addr >> registry_page_bits; | ||
| 750 | const CacheAddr end = (cache_addr_end - 1) >> registry_page_bits; | ||
| 751 | std::vector<TSurface> surfaces; | ||
| 752 | while (start <= end) { | ||
| 753 | std::vector<TSurface>& list = registry[start]; | ||
| 754 | for (auto& surface : list) { | ||
| 755 | if (!surface->IsPicked() && surface->Overlaps(cache_addr, cache_addr_end)) { | ||
| 756 | surface->MarkAsPicked(true); | ||
| 757 | surfaces.push_back(surface); | ||
| 758 | } | ||
| 759 | } | ||
| 760 | start++; | ||
| 761 | } | ||
| 762 | for (auto& surface : surfaces) { | ||
| 763 | surface->MarkAsPicked(false); | ||
| 764 | } | ||
| 765 | return surfaces; | ||
| 766 | } | ||
| 767 | |||
| 768 | void ReserveSurface(const SurfaceParams& params, TSurface surface) { | ||
| 769 | surface_reserve[params].push_back(std::move(surface)); | ||
| 770 | } | ||
| 771 | |||
| 772 | TSurface TryGetReservedSurface(const SurfaceParams& params) { | ||
| 773 | auto search{surface_reserve.find(params)}; | ||
| 774 | if (search == surface_reserve.end()) { | ||
| 775 | return {}; | ||
| 776 | } | ||
| 777 | for (auto& surface : search->second) { | ||
| 778 | if (!surface->IsRegistered()) { | ||
| 779 | return surface; | ||
| 780 | } | ||
| 781 | } | ||
| 782 | return {}; | ||
| 783 | } | ||
| 784 | |||
| 785 | constexpr PixelFormat GetSiblingFormat(PixelFormat format) const { | ||
| 786 | return siblings_table[static_cast<std::size_t>(format)]; | ||
| 787 | } | ||
| 788 | |||
| 789 | struct FramebufferTargetInfo { | ||
| 790 | TSurface target; | ||
| 791 | TView view; | ||
| 792 | }; | ||
| 793 | |||
| 794 | VideoCore::RasterizerInterface& rasterizer; | ||
| 795 | |||
| 796 | u64 ticks{}; | ||
| 797 | |||
| 798 | // Guards the cache for protection conflicts. | ||
| 799 | bool guard_render_targets{}; | ||
| 800 | bool guard_samplers{}; | ||
| 801 | |||
| 802 | // The siblings table is for formats that can inter exchange with one another | ||
| 803 | // without causing issues. This is only valid when a conflict occurs on a non | ||
| 804 | // rendering use. | ||
| 805 | std::array<PixelFormat, static_cast<std::size_t>(PixelFormat::Max)> siblings_table; | ||
| 806 | |||
| 807 | // The internal Cache is different for the Texture Cache. It's based on buckets | ||
| 808 | // of 1MB. This fits better for the purpose of this cache as textures are normaly | ||
| 809 | // large in size. | ||
| 810 | static constexpr u64 registry_page_bits{20}; | ||
| 811 | static constexpr u64 registry_page_size{1 << registry_page_bits}; | ||
| 812 | std::unordered_map<CacheAddr, std::vector<TSurface>> registry; | ||
| 813 | |||
| 814 | static constexpr u32 DEPTH_RT = 8; | ||
| 815 | static constexpr u32 NO_RT = 0xFFFFFFFF; | ||
| 816 | |||
| 817 | // The L1 Cache is used for fast texture lookup before checking the overlaps | ||
| 818 | // This avoids calculating size and other stuffs. | ||
| 819 | std::unordered_map<CacheAddr, TSurface> l1_cache; | ||
| 820 | |||
| 821 | /// The surface reserve is a "backup" cache, this is where we put unique surfaces that have | ||
| 822 | /// previously been used. This is to prevent surfaces from being constantly created and | ||
| 823 | /// destroyed when used with different surface parameters. | ||
| 824 | std::unordered_map<SurfaceParams, std::vector<TSurface>> surface_reserve; | ||
| 825 | std::array<FramebufferTargetInfo, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> | ||
| 826 | render_targets; | ||
| 827 | FramebufferTargetInfo depth_buffer; | ||
| 828 | |||
| 829 | std::vector<TSurface> sampled_textures; | ||
| 830 | |||
| 831 | StagingCache staging_cache; | ||
| 832 | std::recursive_mutex mutex; | ||
| 833 | }; | ||
| 834 | |||
| 835 | } // namespace VideoCommon | ||
diff --git a/src/video_core/textures/convert.cpp b/src/video_core/textures/convert.cpp index 82050bd51..f3efa7eb0 100644 --- a/src/video_core/textures/convert.cpp +++ b/src/video_core/textures/convert.cpp | |||
| @@ -62,19 +62,19 @@ static void ConvertZ24S8ToS8Z24(u8* data, u32 width, u32 height) { | |||
| 62 | SwapS8Z24ToZ24S8<true>(data, width, height); | 62 | SwapS8Z24ToZ24S8<true>(data, width, height); |
| 63 | } | 63 | } |
| 64 | 64 | ||
| 65 | void ConvertFromGuestToHost(u8* data, PixelFormat pixel_format, u32 width, u32 height, u32 depth, | 65 | void ConvertFromGuestToHost(u8* in_data, u8* out_data, PixelFormat pixel_format, u32 width, |
| 66 | bool convert_astc, bool convert_s8z24) { | 66 | u32 height, u32 depth, bool convert_astc, bool convert_s8z24) { |
| 67 | if (convert_astc && IsPixelFormatASTC(pixel_format)) { | 67 | if (convert_astc && IsPixelFormatASTC(pixel_format)) { |
| 68 | // Convert ASTC pixel formats to RGBA8, as most desktop GPUs do not support ASTC. | 68 | // Convert ASTC pixel formats to RGBA8, as most desktop GPUs do not support ASTC. |
| 69 | u32 block_width{}; | 69 | u32 block_width{}; |
| 70 | u32 block_height{}; | 70 | u32 block_height{}; |
| 71 | std::tie(block_width, block_height) = GetASTCBlockSize(pixel_format); | 71 | std::tie(block_width, block_height) = GetASTCBlockSize(pixel_format); |
| 72 | const std::vector<u8> rgba8_data = | 72 | const std::vector<u8> rgba8_data = Tegra::Texture::ASTC::Decompress( |
| 73 | Tegra::Texture::ASTC::Decompress(data, width, height, depth, block_width, block_height); | 73 | in_data, width, height, depth, block_width, block_height); |
| 74 | std::copy(rgba8_data.begin(), rgba8_data.end(), data); | 74 | std::copy(rgba8_data.begin(), rgba8_data.end(), out_data); |
| 75 | 75 | ||
| 76 | } else if (convert_s8z24 && pixel_format == PixelFormat::S8Z24) { | 76 | } else if (convert_s8z24 && pixel_format == PixelFormat::S8Z24) { |
| 77 | Tegra::Texture::ConvertS8Z24ToZ24S8(data, width, height); | 77 | Tegra::Texture::ConvertS8Z24ToZ24S8(in_data, width, height); |
| 78 | } | 78 | } |
| 79 | } | 79 | } |
| 80 | 80 | ||
| @@ -90,4 +90,4 @@ void ConvertFromHostToGuest(u8* data, PixelFormat pixel_format, u32 width, u32 h | |||
| 90 | } | 90 | } |
| 91 | } | 91 | } |
| 92 | 92 | ||
| 93 | } // namespace Tegra::Texture \ No newline at end of file | 93 | } // namespace Tegra::Texture |
diff --git a/src/video_core/textures/convert.h b/src/video_core/textures/convert.h index 12542e71c..d5d6c77bb 100644 --- a/src/video_core/textures/convert.h +++ b/src/video_core/textures/convert.h | |||
| @@ -12,10 +12,11 @@ enum class PixelFormat; | |||
| 12 | 12 | ||
| 13 | namespace Tegra::Texture { | 13 | namespace Tegra::Texture { |
| 14 | 14 | ||
| 15 | void ConvertFromGuestToHost(u8* data, VideoCore::Surface::PixelFormat pixel_format, u32 width, | 15 | void ConvertFromGuestToHost(u8* in_data, u8* out_data, VideoCore::Surface::PixelFormat pixel_format, |
| 16 | u32 height, u32 depth, bool convert_astc, bool convert_s8z24); | 16 | u32 width, u32 height, u32 depth, bool convert_astc, |
| 17 | bool convert_s8z24); | ||
| 17 | 18 | ||
| 18 | void ConvertFromHostToGuest(u8* data, VideoCore::Surface::PixelFormat pixel_format, u32 width, | 19 | void ConvertFromHostToGuest(u8* data, VideoCore::Surface::PixelFormat pixel_format, u32 width, |
| 19 | u32 height, u32 depth, bool convert_astc, bool convert_s8z24); | 20 | u32 height, u32 depth, bool convert_astc, bool convert_s8z24); |
| 20 | 21 | ||
| 21 | } // namespace Tegra::Texture \ No newline at end of file | 22 | } // namespace Tegra::Texture |
diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp index 217805386..7df5f1452 100644 --- a/src/video_core/textures/decoders.cpp +++ b/src/video_core/textures/decoders.cpp | |||
| @@ -36,10 +36,16 @@ struct alignas(64) SwizzleTable { | |||
| 36 | std::array<std::array<u16, M>, N> values{}; | 36 | std::array<std::array<u16, M>, N> values{}; |
| 37 | }; | 37 | }; |
| 38 | 38 | ||
| 39 | constexpr u32 gob_size_x = 64; | 39 | constexpr u32 gob_size_x_shift = 6; |
| 40 | constexpr u32 gob_size_y = 8; | 40 | constexpr u32 gob_size_y_shift = 3; |
| 41 | constexpr u32 gob_size_z = 1; | 41 | constexpr u32 gob_size_z_shift = 0; |
| 42 | constexpr u32 gob_size = gob_size_x * gob_size_y * gob_size_z; | 42 | constexpr u32 gob_size_shift = gob_size_x_shift + gob_size_y_shift + gob_size_z_shift; |
| 43 | |||
| 44 | constexpr u32 gob_size_x = 1U << gob_size_x_shift; | ||
| 45 | constexpr u32 gob_size_y = 1U << gob_size_y_shift; | ||
| 46 | constexpr u32 gob_size_z = 1U << gob_size_z_shift; | ||
| 47 | constexpr u32 gob_size = 1U << gob_size_shift; | ||
| 48 | |||
| 43 | constexpr u32 fast_swizzle_align = 16; | 49 | constexpr u32 fast_swizzle_align = 16; |
| 44 | 50 | ||
| 45 | constexpr auto legacy_swizzle_table = SwizzleTable<gob_size_y, gob_size_x, gob_size_z>(); | 51 | constexpr auto legacy_swizzle_table = SwizzleTable<gob_size_y, gob_size_x, gob_size_z>(); |
| @@ -171,14 +177,16 @@ void SwizzledData(u8* const swizzled_data, u8* const unswizzled_data, const bool | |||
| 171 | void CopySwizzledData(u32 width, u32 height, u32 depth, u32 bytes_per_pixel, | 177 | void CopySwizzledData(u32 width, u32 height, u32 depth, u32 bytes_per_pixel, |
| 172 | u32 out_bytes_per_pixel, u8* const swizzled_data, u8* const unswizzled_data, | 178 | u32 out_bytes_per_pixel, u8* const swizzled_data, u8* const unswizzled_data, |
| 173 | bool unswizzle, u32 block_height, u32 block_depth, u32 width_spacing) { | 179 | bool unswizzle, u32 block_height, u32 block_depth, u32 width_spacing) { |
| 180 | const u32 block_height_size{1U << block_height}; | ||
| 181 | const u32 block_depth_size{1U << block_depth}; | ||
| 174 | if (bytes_per_pixel % 3 != 0 && (width * bytes_per_pixel) % fast_swizzle_align == 0) { | 182 | if (bytes_per_pixel % 3 != 0 && (width * bytes_per_pixel) % fast_swizzle_align == 0) { |
| 175 | SwizzledData<true>(swizzled_data, unswizzled_data, unswizzle, width, height, depth, | 183 | SwizzledData<true>(swizzled_data, unswizzled_data, unswizzle, width, height, depth, |
| 176 | bytes_per_pixel, out_bytes_per_pixel, block_height, block_depth, | 184 | bytes_per_pixel, out_bytes_per_pixel, block_height_size, |
| 177 | width_spacing); | 185 | block_depth_size, width_spacing); |
| 178 | } else { | 186 | } else { |
| 179 | SwizzledData<false>(swizzled_data, unswizzled_data, unswizzle, width, height, depth, | 187 | SwizzledData<false>(swizzled_data, unswizzled_data, unswizzle, width, height, depth, |
| 180 | bytes_per_pixel, out_bytes_per_pixel, block_height, block_depth, | 188 | bytes_per_pixel, out_bytes_per_pixel, block_height_size, |
| 181 | width_spacing); | 189 | block_depth_size, width_spacing); |
| 182 | } | 190 | } |
| 183 | } | 191 | } |
| 184 | 192 | ||
| @@ -248,18 +256,22 @@ std::vector<u8> UnswizzleTexture(u8* address, u32 tile_size_x, u32 tile_size_y, | |||
| 248 | } | 256 | } |
| 249 | 257 | ||
| 250 | void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width, | 258 | void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width, |
| 251 | u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height) { | 259 | u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, |
| 260 | u32 block_height_bit, u32 offset_x, u32 offset_y) { | ||
| 261 | const u32 block_height = 1U << block_height_bit; | ||
| 252 | const u32 image_width_in_gobs{(swizzled_width * bytes_per_pixel + (gob_size_x - 1)) / | 262 | const u32 image_width_in_gobs{(swizzled_width * bytes_per_pixel + (gob_size_x - 1)) / |
| 253 | gob_size_x}; | 263 | gob_size_x}; |
| 254 | for (u32 line = 0; line < subrect_height; ++line) { | 264 | for (u32 line = 0; line < subrect_height; ++line) { |
| 265 | const u32 dst_y = line + offset_y; | ||
| 255 | const u32 gob_address_y = | 266 | const u32 gob_address_y = |
| 256 | (line / (gob_size_y * block_height)) * gob_size * block_height * image_width_in_gobs + | 267 | (dst_y / (gob_size_y * block_height)) * gob_size * block_height * image_width_in_gobs + |
| 257 | ((line % (gob_size_y * block_height)) / gob_size_y) * gob_size; | 268 | ((dst_y % (gob_size_y * block_height)) / gob_size_y) * gob_size; |
| 258 | const auto& table = legacy_swizzle_table[line % gob_size_y]; | 269 | const auto& table = legacy_swizzle_table[dst_y % gob_size_y]; |
| 259 | for (u32 x = 0; x < subrect_width; ++x) { | 270 | for (u32 x = 0; x < subrect_width; ++x) { |
| 271 | const u32 dst_x = x + offset_x; | ||
| 260 | const u32 gob_address = | 272 | const u32 gob_address = |
| 261 | gob_address_y + (x * bytes_per_pixel / gob_size_x) * gob_size * block_height; | 273 | gob_address_y + (dst_x * bytes_per_pixel / gob_size_x) * gob_size * block_height; |
| 262 | const u32 swizzled_offset = gob_address + table[(x * bytes_per_pixel) % gob_size_x]; | 274 | const u32 swizzled_offset = gob_address + table[(dst_x * bytes_per_pixel) % gob_size_x]; |
| 263 | u8* source_line = unswizzled_data + line * source_pitch + x * bytes_per_pixel; | 275 | u8* source_line = unswizzled_data + line * source_pitch + x * bytes_per_pixel; |
| 264 | u8* dest_addr = swizzled_data + swizzled_offset; | 276 | u8* dest_addr = swizzled_data + swizzled_offset; |
| 265 | 277 | ||
| @@ -269,8 +281,9 @@ void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 | |||
| 269 | } | 281 | } |
| 270 | 282 | ||
| 271 | void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 swizzled_width, | 283 | void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 swizzled_width, |
| 272 | u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height, | 284 | u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, |
| 273 | u32 offset_x, u32 offset_y) { | 285 | u32 block_height_bit, u32 offset_x, u32 offset_y) { |
| 286 | const u32 block_height = 1U << block_height_bit; | ||
| 274 | for (u32 line = 0; line < subrect_height; ++line) { | 287 | for (u32 line = 0; line < subrect_height; ++line) { |
| 275 | const u32 y2 = line + offset_y; | 288 | const u32 y2 = line + offset_y; |
| 276 | const u32 gob_address_y = (y2 / (gob_size_y * block_height)) * gob_size * block_height + | 289 | const u32 gob_address_y = (y2 / (gob_size_y * block_height)) * gob_size * block_height + |
| @@ -289,8 +302,9 @@ void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 | |||
| 289 | } | 302 | } |
| 290 | 303 | ||
| 291 | void SwizzleKepler(const u32 width, const u32 height, const u32 dst_x, const u32 dst_y, | 304 | void SwizzleKepler(const u32 width, const u32 height, const u32 dst_x, const u32 dst_y, |
| 292 | const u32 block_height, const std::size_t copy_size, const u8* source_data, | 305 | const u32 block_height_bit, const std::size_t copy_size, const u8* source_data, |
| 293 | u8* swizzle_data) { | 306 | u8* swizzle_data) { |
| 307 | const u32 block_height = 1U << block_height_bit; | ||
| 294 | const u32 image_width_in_gobs{(width + gob_size_x - 1) / gob_size_x}; | 308 | const u32 image_width_in_gobs{(width + gob_size_x - 1) / gob_size_x}; |
| 295 | std::size_t count = 0; | 309 | std::size_t count = 0; |
| 296 | for (std::size_t y = dst_y; y < height && count < copy_size; ++y) { | 310 | for (std::size_t y = dst_y; y < height && count < copy_size; ++y) { |
| @@ -356,9 +370,9 @@ std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat | |||
| 356 | std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height, u32 depth, | 370 | std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height, u32 depth, |
| 357 | u32 block_height, u32 block_depth) { | 371 | u32 block_height, u32 block_depth) { |
| 358 | if (tiled) { | 372 | if (tiled) { |
| 359 | const u32 aligned_width = Common::AlignUp(width * bytes_per_pixel, gob_size_x); | 373 | const u32 aligned_width = Common::AlignBits(width * bytes_per_pixel, gob_size_x_shift); |
| 360 | const u32 aligned_height = Common::AlignUp(height, gob_size_y * block_height); | 374 | const u32 aligned_height = Common::AlignBits(height, gob_size_y_shift + block_height); |
| 361 | const u32 aligned_depth = Common::AlignUp(depth, gob_size_z * block_depth); | 375 | const u32 aligned_depth = Common::AlignBits(depth, gob_size_z_shift + block_depth); |
| 362 | return aligned_width * aligned_height * aligned_depth; | 376 | return aligned_width * aligned_height * aligned_depth; |
| 363 | } else { | 377 | } else { |
| 364 | return width * height * depth * bytes_per_pixel; | 378 | return width * height * depth * bytes_per_pixel; |
diff --git a/src/video_core/textures/decoders.h b/src/video_core/textures/decoders.h index e072d8401..f1e3952bc 100644 --- a/src/video_core/textures/decoders.h +++ b/src/video_core/textures/decoders.h | |||
| @@ -12,8 +12,8 @@ namespace Tegra::Texture { | |||
| 12 | 12 | ||
| 13 | // GOBSize constant. Calculated by 64 bytes in x multiplied by 8 y coords, represents | 13 | // GOBSize constant. Calculated by 64 bytes in x multiplied by 8 y coords, represents |
| 14 | // an small rect of (64/bytes_per_pixel)X8. | 14 | // an small rect of (64/bytes_per_pixel)X8. |
| 15 | inline std::size_t GetGOBSize() { | 15 | inline std::size_t GetGOBSizeShift() { |
| 16 | return 512; | 16 | return 9; |
| 17 | } | 17 | } |
| 18 | 18 | ||
| 19 | /// Unswizzles a swizzled texture without changing its format. | 19 | /// Unswizzles a swizzled texture without changing its format. |
| @@ -44,7 +44,8 @@ std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height | |||
| 44 | 44 | ||
| 45 | /// Copies an untiled subrectangle into a tiled surface. | 45 | /// Copies an untiled subrectangle into a tiled surface. |
| 46 | void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width, | 46 | void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width, |
| 47 | u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height); | 47 | u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height, |
| 48 | u32 offset_x, u32 offset_y); | ||
| 48 | 49 | ||
| 49 | /// Copies a tiled subrectangle into a linear surface. | 50 | /// Copies a tiled subrectangle into a linear surface. |
| 50 | void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 swizzled_width, | 51 | void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 swizzled_width, |
diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h index 219bfd559..e36bc2c04 100644 --- a/src/video_core/textures/texture.h +++ b/src/video_core/textures/texture.h | |||
| @@ -52,9 +52,9 @@ enum class TextureFormat : u32 { | |||
| 52 | DXT45 = 0x26, | 52 | DXT45 = 0x26, |
| 53 | DXN1 = 0x27, | 53 | DXN1 = 0x27, |
| 54 | DXN2 = 0x28, | 54 | DXN2 = 0x28, |
| 55 | Z24S8 = 0x29, | 55 | S8Z24 = 0x29, |
| 56 | X8Z24 = 0x2a, | 56 | X8Z24 = 0x2a, |
| 57 | S8Z24 = 0x2b, | 57 | Z24S8 = 0x2b, |
| 58 | X4V4Z24__COV4R4V = 0x2c, | 58 | X4V4Z24__COV4R4V = 0x2c, |
| 59 | X4V4Z24__COV8R8V = 0x2d, | 59 | X4V4Z24__COV8R8V = 0x2d, |
| 60 | V8Z24__COV4R12V = 0x2e, | 60 | V8Z24__COV4R12V = 0x2e, |
| @@ -172,12 +172,16 @@ struct TICEntry { | |||
| 172 | BitField<26, 1, u32> use_header_opt_control; | 172 | BitField<26, 1, u32> use_header_opt_control; |
| 173 | BitField<27, 1, u32> depth_texture; | 173 | BitField<27, 1, u32> depth_texture; |
| 174 | BitField<28, 4, u32> max_mip_level; | 174 | BitField<28, 4, u32> max_mip_level; |
| 175 | |||
| 176 | BitField<0, 16, u32> buffer_high_width_minus_one; | ||
| 175 | }; | 177 | }; |
| 176 | union { | 178 | union { |
| 177 | BitField<0, 16, u32> width_minus_1; | 179 | BitField<0, 16, u32> width_minus_1; |
| 178 | BitField<22, 1, u32> srgb_conversion; | 180 | BitField<22, 1, u32> srgb_conversion; |
| 179 | BitField<23, 4, TextureType> texture_type; | 181 | BitField<23, 4, TextureType> texture_type; |
| 180 | BitField<29, 3, u32> border_size; | 182 | BitField<29, 3, u32> border_size; |
| 183 | |||
| 184 | BitField<0, 16, u32> buffer_low_width_minus_one; | ||
| 181 | }; | 185 | }; |
| 182 | union { | 186 | union { |
| 183 | BitField<0, 16, u32> height_minus_1; | 187 | BitField<0, 16, u32> height_minus_1; |
| @@ -206,7 +210,10 @@ struct TICEntry { | |||
| 206 | } | 210 | } |
| 207 | 211 | ||
| 208 | u32 Width() const { | 212 | u32 Width() const { |
| 209 | return width_minus_1 + 1; | 213 | if (header_version != TICHeaderVersion::OneDBuffer) { |
| 214 | return width_minus_1 + 1; | ||
| 215 | } | ||
| 216 | return ((buffer_high_width_minus_one << 16) | buffer_low_width_minus_one) + 1; | ||
| 210 | } | 217 | } |
| 211 | 218 | ||
| 212 | u32 Height() const { | 219 | u32 Height() const { |
| @@ -219,20 +226,17 @@ struct TICEntry { | |||
| 219 | 226 | ||
| 220 | u32 BlockWidth() const { | 227 | u32 BlockWidth() const { |
| 221 | ASSERT(IsTiled()); | 228 | ASSERT(IsTiled()); |
| 222 | // The block height is stored in log2 format. | 229 | return block_width; |
| 223 | return 1 << block_width; | ||
| 224 | } | 230 | } |
| 225 | 231 | ||
| 226 | u32 BlockHeight() const { | 232 | u32 BlockHeight() const { |
| 227 | ASSERT(IsTiled()); | 233 | ASSERT(IsTiled()); |
| 228 | // The block height is stored in log2 format. | 234 | return block_height; |
| 229 | return 1 << block_height; | ||
| 230 | } | 235 | } |
| 231 | 236 | ||
| 232 | u32 BlockDepth() const { | 237 | u32 BlockDepth() const { |
| 233 | ASSERT(IsTiled()); | 238 | ASSERT(IsTiled()); |
| 234 | // The block height is stored in log2 format. | 239 | return block_depth; |
| 235 | return 1 << block_depth; | ||
| 236 | } | 240 | } |
| 237 | 241 | ||
| 238 | bool IsTiled() const { | 242 | bool IsTiled() const { |
| @@ -240,6 +244,15 @@ struct TICEntry { | |||
| 240 | header_version == TICHeaderVersion::BlockLinearColorKey; | 244 | header_version == TICHeaderVersion::BlockLinearColorKey; |
| 241 | } | 245 | } |
| 242 | 246 | ||
| 247 | bool IsLineal() const { | ||
| 248 | return header_version == TICHeaderVersion::Pitch || | ||
| 249 | header_version == TICHeaderVersion::PitchColorKey; | ||
| 250 | } | ||
| 251 | |||
| 252 | bool IsBuffer() const { | ||
| 253 | return header_version == TICHeaderVersion::OneDBuffer; | ||
| 254 | } | ||
| 255 | |||
| 243 | bool IsSrgbConversionEnabled() const { | 256 | bool IsSrgbConversionEnabled() const { |
| 244 | return srgb_conversion != 0; | 257 | return srgb_conversion != 0; |
| 245 | } | 258 | } |
diff --git a/src/yuzu/CMakeLists.txt b/src/yuzu/CMakeLists.txt index 3dc0e47d0..f051e17b4 100644 --- a/src/yuzu/CMakeLists.txt +++ b/src/yuzu/CMakeLists.txt | |||
| @@ -1,5 +1,6 @@ | |||
| 1 | set(CMAKE_AUTOMOC ON) | 1 | set(CMAKE_AUTOMOC ON) |
| 2 | set(CMAKE_AUTORCC ON) | 2 | set(CMAKE_AUTORCC ON) |
| 3 | set(CMAKE_AUTOUIC ON) | ||
| 3 | set(CMAKE_INCLUDE_CURRENT_DIR ON) | 4 | set(CMAKE_INCLUDE_CURRENT_DIR ON) |
| 4 | set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${PROJECT_SOURCE_DIR}/CMakeModules) | 5 | set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${PROJECT_SOURCE_DIR}/CMakeModules) |
| 5 | 6 | ||
| @@ -7,6 +8,7 @@ add_executable(yuzu | |||
| 7 | Info.plist | 8 | Info.plist |
| 8 | about_dialog.cpp | 9 | about_dialog.cpp |
| 9 | about_dialog.h | 10 | about_dialog.h |
| 11 | aboutdialog.ui | ||
| 10 | applets/error.cpp | 12 | applets/error.cpp |
| 11 | applets/error.h | 13 | applets/error.h |
| 12 | applets/profile_select.cpp | 14 | applets/profile_select.cpp |
| @@ -17,42 +19,59 @@ add_executable(yuzu | |||
| 17 | applets/web_browser.h | 19 | applets/web_browser.h |
| 18 | bootmanager.cpp | 20 | bootmanager.cpp |
| 19 | bootmanager.h | 21 | bootmanager.h |
| 22 | compatdb.ui | ||
| 20 | compatibility_list.cpp | 23 | compatibility_list.cpp |
| 21 | compatibility_list.h | 24 | compatibility_list.h |
| 22 | configuration/config.cpp | 25 | configuration/config.cpp |
| 23 | configuration/config.h | 26 | configuration/config.h |
| 27 | configuration/configure.ui | ||
| 24 | configuration/configure_audio.cpp | 28 | configuration/configure_audio.cpp |
| 25 | configuration/configure_audio.h | 29 | configuration/configure_audio.h |
| 30 | configuration/configure_audio.ui | ||
| 26 | configuration/configure_debug.cpp | 31 | configuration/configure_debug.cpp |
| 27 | configuration/configure_debug.h | 32 | configuration/configure_debug.h |
| 33 | configuration/configure_debug.ui | ||
| 28 | configuration/configure_dialog.cpp | 34 | configuration/configure_dialog.cpp |
| 29 | configuration/configure_dialog.h | 35 | configuration/configure_dialog.h |
| 30 | configuration/configure_gamelist.cpp | 36 | configuration/configure_gamelist.cpp |
| 31 | configuration/configure_gamelist.h | 37 | configuration/configure_gamelist.h |
| 38 | configuration/configure_gamelist.ui | ||
| 32 | configuration/configure_general.cpp | 39 | configuration/configure_general.cpp |
| 33 | configuration/configure_general.h | 40 | configuration/configure_general.h |
| 41 | configuration/configure_general.ui | ||
| 34 | configuration/configure_graphics.cpp | 42 | configuration/configure_graphics.cpp |
| 35 | configuration/configure_graphics.h | 43 | configuration/configure_graphics.h |
| 44 | configuration/configure_graphics.ui | ||
| 36 | configuration/configure_hotkeys.cpp | 45 | configuration/configure_hotkeys.cpp |
| 37 | configuration/configure_hotkeys.h | 46 | configuration/configure_hotkeys.h |
| 47 | configuration/configure_hotkeys.ui | ||
| 38 | configuration/configure_input.cpp | 48 | configuration/configure_input.cpp |
| 39 | configuration/configure_input.h | 49 | configuration/configure_input.h |
| 50 | configuration/configure_input.ui | ||
| 40 | configuration/configure_input_player.cpp | 51 | configuration/configure_input_player.cpp |
| 41 | configuration/configure_input_player.h | 52 | configuration/configure_input_player.h |
| 53 | configuration/configure_input_player.ui | ||
| 42 | configuration/configure_input_simple.cpp | 54 | configuration/configure_input_simple.cpp |
| 43 | configuration/configure_input_simple.h | 55 | configuration/configure_input_simple.h |
| 56 | configuration/configure_input_simple.ui | ||
| 44 | configuration/configure_mouse_advanced.cpp | 57 | configuration/configure_mouse_advanced.cpp |
| 45 | configuration/configure_mouse_advanced.h | 58 | configuration/configure_mouse_advanced.h |
| 59 | configuration/configure_mouse_advanced.ui | ||
| 60 | configuration/configure_per_general.cpp | ||
| 61 | configuration/configure_per_general.h | ||
| 62 | configuration/configure_per_general.ui | ||
| 46 | configuration/configure_profile_manager.cpp | 63 | configuration/configure_profile_manager.cpp |
| 47 | configuration/configure_profile_manager.h | 64 | configuration/configure_profile_manager.h |
| 65 | configuration/configure_profile_manager.ui | ||
| 48 | configuration/configure_system.cpp | 66 | configuration/configure_system.cpp |
| 49 | configuration/configure_system.h | 67 | configuration/configure_system.h |
| 50 | configuration/configure_per_general.cpp | 68 | configuration/configure_system.ui |
| 51 | configuration/configure_per_general.h | ||
| 52 | configuration/configure_touchscreen_advanced.cpp | 69 | configuration/configure_touchscreen_advanced.cpp |
| 53 | configuration/configure_touchscreen_advanced.h | 70 | configuration/configure_touchscreen_advanced.h |
| 71 | configuration/configure_touchscreen_advanced.ui | ||
| 54 | configuration/configure_web.cpp | 72 | configuration/configure_web.cpp |
| 55 | configuration/configure_web.h | 73 | configuration/configure_web.h |
| 74 | configuration/configure_web.ui | ||
| 56 | debugger/graphics/graphics_breakpoint_observer.cpp | 75 | debugger/graphics/graphics_breakpoint_observer.cpp |
| 57 | debugger/graphics/graphics_breakpoint_observer.h | 76 | debugger/graphics/graphics_breakpoint_observer.h |
| 58 | debugger/graphics/graphics_breakpoints.cpp | 77 | debugger/graphics/graphics_breakpoints.cpp |
| @@ -72,12 +91,14 @@ add_executable(yuzu | |||
| 72 | game_list_worker.h | 91 | game_list_worker.h |
| 73 | loading_screen.cpp | 92 | loading_screen.cpp |
| 74 | loading_screen.h | 93 | loading_screen.h |
| 94 | loading_screen.ui | ||
| 75 | hotkeys.cpp | 95 | hotkeys.cpp |
| 76 | hotkeys.h | 96 | hotkeys.h |
| 77 | main.cpp | 97 | main.cpp |
| 78 | main.h | 98 | main.h |
| 79 | ui_settings.cpp | 99 | main.ui |
| 80 | ui_settings.h | 100 | uisettings.cpp |
| 101 | uisettings.h | ||
| 81 | util/limitable_input_dialog.cpp | 102 | util/limitable_input_dialog.cpp |
| 82 | util/limitable_input_dialog.h | 103 | util/limitable_input_dialog.h |
| 83 | util/sequence_dialog/sequence_dialog.cpp | 104 | util/sequence_dialog/sequence_dialog.cpp |
| @@ -89,44 +110,18 @@ add_executable(yuzu | |||
| 89 | yuzu.rc | 110 | yuzu.rc |
| 90 | ) | 111 | ) |
| 91 | 112 | ||
| 92 | set(UIS | ||
| 93 | aboutdialog.ui | ||
| 94 | configuration/configure.ui | ||
| 95 | configuration/configure_audio.ui | ||
| 96 | configuration/configure_debug.ui | ||
| 97 | configuration/configure_gamelist.ui | ||
| 98 | configuration/configure_general.ui | ||
| 99 | configuration/configure_graphics.ui | ||
| 100 | configuration/configure_hotkeys.ui | ||
| 101 | configuration/configure_input.ui | ||
| 102 | configuration/configure_input_player.ui | ||
| 103 | configuration/configure_input_simple.ui | ||
| 104 | configuration/configure_mouse_advanced.ui | ||
| 105 | configuration/configure_per_general.ui | ||
| 106 | configuration/configure_profile_manager.ui | ||
| 107 | configuration/configure_system.ui | ||
| 108 | configuration/configure_touchscreen_advanced.ui | ||
| 109 | configuration/configure_web.ui | ||
| 110 | compatdb.ui | ||
| 111 | loading_screen.ui | ||
| 112 | main.ui | ||
| 113 | ) | ||
| 114 | |||
| 115 | file(GLOB COMPAT_LIST | 113 | file(GLOB COMPAT_LIST |
| 116 | ${PROJECT_BINARY_DIR}/dist/compatibility_list/compatibility_list.qrc | 114 | ${PROJECT_BINARY_DIR}/dist/compatibility_list/compatibility_list.qrc |
| 117 | ${PROJECT_BINARY_DIR}/dist/compatibility_list/compatibility_list.json) | 115 | ${PROJECT_BINARY_DIR}/dist/compatibility_list/compatibility_list.json) |
| 118 | file(GLOB_RECURSE ICONS ${PROJECT_SOURCE_DIR}/dist/icons/*) | 116 | file(GLOB_RECURSE ICONS ${PROJECT_SOURCE_DIR}/dist/icons/*) |
| 119 | file(GLOB_RECURSE THEMES ${PROJECT_SOURCE_DIR}/dist/qt_themes/*) | 117 | file(GLOB_RECURSE THEMES ${PROJECT_SOURCE_DIR}/dist/qt_themes/*) |
| 120 | 118 | ||
| 121 | qt5_wrap_ui(UI_HDRS ${UIS}) | ||
| 122 | 119 | ||
| 123 | target_sources(yuzu | 120 | target_sources(yuzu |
| 124 | PRIVATE | 121 | PRIVATE |
| 125 | ${COMPAT_LIST} | 122 | ${COMPAT_LIST} |
| 126 | ${ICONS} | 123 | ${ICONS} |
| 127 | ${THEMES} | 124 | ${THEMES} |
| 128 | ${UI_HDRS} | ||
| 129 | ${UIS} | ||
| 130 | ) | 125 | ) |
| 131 | 126 | ||
| 132 | if (APPLE) | 127 | if (APPLE) |
diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp index 5a456e603..f594106bf 100644 --- a/src/yuzu/configuration/config.cpp +++ b/src/yuzu/configuration/config.cpp | |||
| @@ -11,7 +11,7 @@ | |||
| 11 | #include "core/hle/service/hid/controllers/npad.h" | 11 | #include "core/hle/service/hid/controllers/npad.h" |
| 12 | #include "input_common/main.h" | 12 | #include "input_common/main.h" |
| 13 | #include "yuzu/configuration/config.h" | 13 | #include "yuzu/configuration/config.h" |
| 14 | #include "yuzu/ui_settings.h" | 14 | #include "yuzu/uisettings.h" |
| 15 | 15 | ||
| 16 | Config::Config() { | 16 | Config::Config() { |
| 17 | // TODO: Don't hardcode the path; let the frontend decide where to put the config files. | 17 | // TODO: Don't hardcode the path; let the frontend decide where to put the config files. |
| @@ -436,7 +436,6 @@ void Config::ReadControlValues() { | |||
| 436 | void Config::ReadCoreValues() { | 436 | void Config::ReadCoreValues() { |
| 437 | qt_config->beginGroup(QStringLiteral("Core")); | 437 | qt_config->beginGroup(QStringLiteral("Core")); |
| 438 | 438 | ||
| 439 | Settings::values.use_cpu_jit = ReadSetting(QStringLiteral("use_cpu_jit"), true).toBool(); | ||
| 440 | Settings::values.use_multi_core = ReadSetting(QStringLiteral("use_multi_core"), false).toBool(); | 439 | Settings::values.use_multi_core = ReadSetting(QStringLiteral("use_multi_core"), false).toBool(); |
| 441 | 440 | ||
| 442 | qt_config->endGroup(); | 441 | qt_config->endGroup(); |
| @@ -475,6 +474,7 @@ void Config::ReadDebuggingValues() { | |||
| 475 | Settings::values.dump_nso = ReadSetting(QStringLiteral("dump_nso"), false).toBool(); | 474 | Settings::values.dump_nso = ReadSetting(QStringLiteral("dump_nso"), false).toBool(); |
| 476 | Settings::values.reporting_services = | 475 | Settings::values.reporting_services = |
| 477 | ReadSetting(QStringLiteral("reporting_services"), false).toBool(); | 476 | ReadSetting(QStringLiteral("reporting_services"), false).toBool(); |
| 477 | Settings::values.quest_flag = ReadSetting(QStringLiteral("quest_flag"), false).toBool(); | ||
| 478 | 478 | ||
| 479 | qt_config->endGroup(); | 479 | qt_config->endGroup(); |
| 480 | } | 480 | } |
| @@ -516,10 +516,38 @@ void Config::ReadPathValues() { | |||
| 516 | 516 | ||
| 517 | UISettings::values.roms_path = ReadSetting(QStringLiteral("romsPath")).toString(); | 517 | UISettings::values.roms_path = ReadSetting(QStringLiteral("romsPath")).toString(); |
| 518 | UISettings::values.symbols_path = ReadSetting(QStringLiteral("symbolsPath")).toString(); | 518 | UISettings::values.symbols_path = ReadSetting(QStringLiteral("symbolsPath")).toString(); |
| 519 | UISettings::values.game_directory_path = | 519 | UISettings::values.screenshot_path = ReadSetting(QStringLiteral("screenshotPath")).toString(); |
| 520 | UISettings::values.game_dir_deprecated = | ||
| 520 | ReadSetting(QStringLiteral("gameListRootDir"), QStringLiteral(".")).toString(); | 521 | ReadSetting(QStringLiteral("gameListRootDir"), QStringLiteral(".")).toString(); |
| 521 | UISettings::values.game_directory_deepscan = | 522 | UISettings::values.game_dir_deprecated_deepscan = |
| 522 | ReadSetting(QStringLiteral("gameListDeepScan"), false).toBool(); | 523 | ReadSetting(QStringLiteral("gameListDeepScan"), false).toBool(); |
| 524 | const int gamedirs_size = qt_config->beginReadArray(QStringLiteral("gamedirs")); | ||
| 525 | for (int i = 0; i < gamedirs_size; ++i) { | ||
| 526 | qt_config->setArrayIndex(i); | ||
| 527 | UISettings::GameDir game_dir; | ||
| 528 | game_dir.path = ReadSetting(QStringLiteral("path")).toString(); | ||
| 529 | game_dir.deep_scan = ReadSetting(QStringLiteral("deep_scan"), false).toBool(); | ||
| 530 | game_dir.expanded = ReadSetting(QStringLiteral("expanded"), true).toBool(); | ||
| 531 | UISettings::values.game_dirs.append(game_dir); | ||
| 532 | } | ||
| 533 | qt_config->endArray(); | ||
| 534 | // create NAND and SD card directories if empty, these are not removable through the UI, | ||
| 535 | // also carries over old game list settings if present | ||
| 536 | if (UISettings::values.game_dirs.isEmpty()) { | ||
| 537 | UISettings::GameDir game_dir; | ||
| 538 | game_dir.path = QStringLiteral("SDMC"); | ||
| 539 | game_dir.expanded = true; | ||
| 540 | UISettings::values.game_dirs.append(game_dir); | ||
| 541 | game_dir.path = QStringLiteral("UserNAND"); | ||
| 542 | UISettings::values.game_dirs.append(game_dir); | ||
| 543 | game_dir.path = QStringLiteral("SysNAND"); | ||
| 544 | UISettings::values.game_dirs.append(game_dir); | ||
| 545 | if (UISettings::values.game_dir_deprecated != QStringLiteral(".")) { | ||
| 546 | game_dir.path = UISettings::values.game_dir_deprecated; | ||
| 547 | game_dir.deep_scan = UISettings::values.game_dir_deprecated_deepscan; | ||
| 548 | UISettings::values.game_dirs.append(game_dir); | ||
| 549 | } | ||
| 550 | } | ||
| 523 | UISettings::values.recent_files = ReadSetting(QStringLiteral("recentFiles")).toStringList(); | 551 | UISettings::values.recent_files = ReadSetting(QStringLiteral("recentFiles")).toStringList(); |
| 524 | 552 | ||
| 525 | qt_config->endGroup(); | 553 | qt_config->endGroup(); |
| @@ -829,7 +857,6 @@ void Config::SaveControlValues() { | |||
| 829 | void Config::SaveCoreValues() { | 857 | void Config::SaveCoreValues() { |
| 830 | qt_config->beginGroup(QStringLiteral("Core")); | 858 | qt_config->beginGroup(QStringLiteral("Core")); |
| 831 | 859 | ||
| 832 | WriteSetting(QStringLiteral("use_cpu_jit"), Settings::values.use_cpu_jit, true); | ||
| 833 | WriteSetting(QStringLiteral("use_multi_core"), Settings::values.use_multi_core, false); | 860 | WriteSetting(QStringLiteral("use_multi_core"), Settings::values.use_multi_core, false); |
| 834 | 861 | ||
| 835 | qt_config->endGroup(); | 862 | qt_config->endGroup(); |
| @@ -858,6 +885,7 @@ void Config::SaveDebuggingValues() { | |||
| 858 | QString::fromStdString(Settings::values.program_args), QStringLiteral("")); | 885 | QString::fromStdString(Settings::values.program_args), QStringLiteral("")); |
| 859 | WriteSetting(QStringLiteral("dump_exefs"), Settings::values.dump_exefs, false); | 886 | WriteSetting(QStringLiteral("dump_exefs"), Settings::values.dump_exefs, false); |
| 860 | WriteSetting(QStringLiteral("dump_nso"), Settings::values.dump_nso, false); | 887 | WriteSetting(QStringLiteral("dump_nso"), Settings::values.dump_nso, false); |
| 888 | WriteSetting(QStringLiteral("quest_flag"), Settings::values.quest_flag, false); | ||
| 861 | 889 | ||
| 862 | qt_config->endGroup(); | 890 | qt_config->endGroup(); |
| 863 | } | 891 | } |
| @@ -898,10 +926,15 @@ void Config::SavePathValues() { | |||
| 898 | WriteSetting(QStringLiteral("romsPath"), UISettings::values.roms_path); | 926 | WriteSetting(QStringLiteral("romsPath"), UISettings::values.roms_path); |
| 899 | WriteSetting(QStringLiteral("symbolsPath"), UISettings::values.symbols_path); | 927 | WriteSetting(QStringLiteral("symbolsPath"), UISettings::values.symbols_path); |
| 900 | WriteSetting(QStringLiteral("screenshotPath"), UISettings::values.screenshot_path); | 928 | WriteSetting(QStringLiteral("screenshotPath"), UISettings::values.screenshot_path); |
| 901 | WriteSetting(QStringLiteral("gameListRootDir"), UISettings::values.game_directory_path, | 929 | qt_config->beginWriteArray(QStringLiteral("gamedirs")); |
| 902 | QStringLiteral(".")); | 930 | for (int i = 0; i < UISettings::values.game_dirs.size(); ++i) { |
| 903 | WriteSetting(QStringLiteral("gameListDeepScan"), UISettings::values.game_directory_deepscan, | 931 | qt_config->setArrayIndex(i); |
| 904 | false); | 932 | const auto& game_dir = UISettings::values.game_dirs[i]; |
| 933 | WriteSetting(QStringLiteral("path"), game_dir.path); | ||
| 934 | WriteSetting(QStringLiteral("deep_scan"), game_dir.deep_scan, false); | ||
| 935 | WriteSetting(QStringLiteral("expanded"), game_dir.expanded, true); | ||
| 936 | } | ||
| 937 | qt_config->endArray(); | ||
| 905 | WriteSetting(QStringLiteral("recentFiles"), UISettings::values.recent_files); | 938 | WriteSetting(QStringLiteral("recentFiles"), UISettings::values.recent_files); |
| 906 | 939 | ||
| 907 | qt_config->endGroup(); | 940 | qt_config->endGroup(); |
diff --git a/src/yuzu/configuration/configure_debug.cpp b/src/yuzu/configuration/configure_debug.cpp index 63426fe4f..5b7e03056 100644 --- a/src/yuzu/configuration/configure_debug.cpp +++ b/src/yuzu/configuration/configure_debug.cpp | |||
| @@ -12,13 +12,13 @@ | |||
| 12 | #include "ui_configure_debug.h" | 12 | #include "ui_configure_debug.h" |
| 13 | #include "yuzu/configuration/configure_debug.h" | 13 | #include "yuzu/configuration/configure_debug.h" |
| 14 | #include "yuzu/debugger/console.h" | 14 | #include "yuzu/debugger/console.h" |
| 15 | #include "yuzu/ui_settings.h" | 15 | #include "yuzu/uisettings.h" |
| 16 | 16 | ||
| 17 | ConfigureDebug::ConfigureDebug(QWidget* parent) : QWidget(parent), ui(new Ui::ConfigureDebug) { | 17 | ConfigureDebug::ConfigureDebug(QWidget* parent) : QWidget(parent), ui(new Ui::ConfigureDebug) { |
| 18 | ui->setupUi(this); | 18 | ui->setupUi(this); |
| 19 | SetConfiguration(); | 19 | SetConfiguration(); |
| 20 | 20 | ||
| 21 | connect(ui->open_log_button, &QPushButton::pressed, []() { | 21 | connect(ui->open_log_button, &QPushButton::clicked, []() { |
| 22 | QString path = QString::fromStdString(FileUtil::GetUserPath(FileUtil::UserPath::LogDir)); | 22 | QString path = QString::fromStdString(FileUtil::GetUserPath(FileUtil::UserPath::LogDir)); |
| 23 | QDesktopServices::openUrl(QUrl::fromLocalFile(path)); | 23 | QDesktopServices::openUrl(QUrl::fromLocalFile(path)); |
| 24 | }); | 24 | }); |
| @@ -37,6 +37,7 @@ void ConfigureDebug::SetConfiguration() { | |||
| 37 | ui->dump_exefs->setChecked(Settings::values.dump_exefs); | 37 | ui->dump_exefs->setChecked(Settings::values.dump_exefs); |
| 38 | ui->dump_decompressed_nso->setChecked(Settings::values.dump_nso); | 38 | ui->dump_decompressed_nso->setChecked(Settings::values.dump_nso); |
| 39 | ui->reporting_services->setChecked(Settings::values.reporting_services); | 39 | ui->reporting_services->setChecked(Settings::values.reporting_services); |
| 40 | ui->quest_flag->setChecked(Settings::values.quest_flag); | ||
| 40 | } | 41 | } |
| 41 | 42 | ||
| 42 | void ConfigureDebug::ApplyConfiguration() { | 43 | void ConfigureDebug::ApplyConfiguration() { |
| @@ -48,6 +49,7 @@ void ConfigureDebug::ApplyConfiguration() { | |||
| 48 | Settings::values.dump_exefs = ui->dump_exefs->isChecked(); | 49 | Settings::values.dump_exefs = ui->dump_exefs->isChecked(); |
| 49 | Settings::values.dump_nso = ui->dump_decompressed_nso->isChecked(); | 50 | Settings::values.dump_nso = ui->dump_decompressed_nso->isChecked(); |
| 50 | Settings::values.reporting_services = ui->reporting_services->isChecked(); | 51 | Settings::values.reporting_services = ui->reporting_services->isChecked(); |
| 52 | Settings::values.quest_flag = ui->quest_flag->isChecked(); | ||
| 51 | Debugger::ToggleConsole(); | 53 | Debugger::ToggleConsole(); |
| 52 | Log::Filter filter; | 54 | Log::Filter filter; |
| 53 | filter.ParseFilterString(Settings::values.log_filter); | 55 | filter.ParseFilterString(Settings::values.log_filter); |
diff --git a/src/yuzu/configuration/configure_debug.ui b/src/yuzu/configuration/configure_debug.ui index 4a7e3dc3d..7e109cef0 100644 --- a/src/yuzu/configuration/configure_debug.ui +++ b/src/yuzu/configuration/configure_debug.ui | |||
| @@ -7,7 +7,7 @@ | |||
| 7 | <x>0</x> | 7 | <x>0</x> |
| 8 | <y>0</y> | 8 | <y>0</y> |
| 9 | <width>400</width> | 9 | <width>400</width> |
| 10 | <height>357</height> | 10 | <height>474</height> |
| 11 | </rect> | 11 | </rect> |
| 12 | </property> | 12 | </property> |
| 13 | <property name="windowTitle"> | 13 | <property name="windowTitle"> |
| @@ -181,6 +181,22 @@ | |||
| 181 | </widget> | 181 | </widget> |
| 182 | </item> | 182 | </item> |
| 183 | <item> | 183 | <item> |
| 184 | <widget class="QGroupBox" name="groupBox_5"> | ||
| 185 | <property name="title"> | ||
| 186 | <string>Advanced</string> | ||
| 187 | </property> | ||
| 188 | <layout class="QVBoxLayout" name="verticalLayout"> | ||
| 189 | <item> | ||
| 190 | <widget class="QCheckBox" name="quest_flag"> | ||
| 191 | <property name="text"> | ||
| 192 | <string>Kiosk (Quest) Mode</string> | ||
| 193 | </property> | ||
| 194 | </widget> | ||
| 195 | </item> | ||
| 196 | </layout> | ||
| 197 | </widget> | ||
| 198 | </item> | ||
| 199 | <item> | ||
| 184 | <spacer name="verticalSpacer"> | 200 | <spacer name="verticalSpacer"> |
| 185 | <property name="orientation"> | 201 | <property name="orientation"> |
| 186 | <enum>Qt::Vertical</enum> | 202 | <enum>Qt::Vertical</enum> |
diff --git a/src/yuzu/configuration/configure_dialog.cpp b/src/yuzu/configuration/configure_dialog.cpp index e636964e3..775e3f2ea 100644 --- a/src/yuzu/configuration/configure_dialog.cpp +++ b/src/yuzu/configuration/configure_dialog.cpp | |||
| @@ -68,12 +68,14 @@ void ConfigureDialog::RetranslateUI() { | |||
| 68 | ui->tabWidget->setCurrentIndex(old_index); | 68 | ui->tabWidget->setCurrentIndex(old_index); |
| 69 | } | 69 | } |
| 70 | 70 | ||
| 71 | Q_DECLARE_METATYPE(QList<QWidget*>); | ||
| 72 | |||
| 71 | void ConfigureDialog::PopulateSelectionList() { | 73 | void ConfigureDialog::PopulateSelectionList() { |
| 72 | const std::array<std::pair<QString, QStringList>, 4> items{ | 74 | const std::array<std::pair<QString, QList<QWidget*>>, 4> items{ |
| 73 | {{tr("General"), {tr("General"), tr("Web"), tr("Debug"), tr("Game List")}}, | 75 | {{tr("General"), {ui->generalTab, ui->webTab, ui->debugTab, ui->gameListTab}}, |
| 74 | {tr("System"), {tr("System"), tr("Profiles"), tr("Audio")}}, | 76 | {tr("System"), {ui->systemTab, ui->profileManagerTab, ui->audioTab}}, |
| 75 | {tr("Graphics"), {tr("Graphics")}}, | 77 | {tr("Graphics"), {ui->graphicsTab}}, |
| 76 | {tr("Controls"), {tr("Input"), tr("Hotkeys")}}}, | 78 | {tr("Controls"), {ui->inputTab, ui->hotkeysTab}}}, |
| 77 | }; | 79 | }; |
| 78 | 80 | ||
| 79 | [[maybe_unused]] const QSignalBlocker blocker(ui->selectorList); | 81 | [[maybe_unused]] const QSignalBlocker blocker(ui->selectorList); |
| @@ -81,7 +83,7 @@ void ConfigureDialog::PopulateSelectionList() { | |||
| 81 | ui->selectorList->clear(); | 83 | ui->selectorList->clear(); |
| 82 | for (const auto& entry : items) { | 84 | for (const auto& entry : items) { |
| 83 | auto* const item = new QListWidgetItem(entry.first); | 85 | auto* const item = new QListWidgetItem(entry.first); |
| 84 | item->setData(Qt::UserRole, entry.second); | 86 | item->setData(Qt::UserRole, QVariant::fromValue(entry.second)); |
| 85 | 87 | ||
| 86 | ui->selectorList->addItem(item); | 88 | ui->selectorList->addItem(item); |
| 87 | } | 89 | } |
| @@ -93,24 +95,26 @@ void ConfigureDialog::UpdateVisibleTabs() { | |||
| 93 | return; | 95 | return; |
| 94 | } | 96 | } |
| 95 | 97 | ||
| 96 | const std::map<QString, QWidget*> widgets = { | 98 | const std::map<QWidget*, QString> widgets = { |
| 97 | {tr("General"), ui->generalTab}, | 99 | {ui->generalTab, tr("General")}, |
| 98 | {tr("System"), ui->systemTab}, | 100 | {ui->systemTab, tr("System")}, |
| 99 | {tr("Profiles"), ui->profileManagerTab}, | 101 | {ui->profileManagerTab, tr("Profiles")}, |
| 100 | {tr("Input"), ui->inputTab}, | 102 | {ui->inputTab, tr("Input")}, |
| 101 | {tr("Hotkeys"), ui->hotkeysTab}, | 103 | {ui->hotkeysTab, tr("Hotkeys")}, |
| 102 | {tr("Graphics"), ui->graphicsTab}, | 104 | {ui->graphicsTab, tr("Graphics")}, |
| 103 | {tr("Audio"), ui->audioTab}, | 105 | {ui->audioTab, tr("Audio")}, |
| 104 | {tr("Debug"), ui->debugTab}, | 106 | {ui->debugTab, tr("Debug")}, |
| 105 | {tr("Web"), ui->webTab}, | 107 | {ui->webTab, tr("Web")}, |
| 106 | {tr("Game List"), ui->gameListTab}, | 108 | {ui->gameListTab, tr("Game List")}, |
| 107 | }; | 109 | }; |
| 108 | 110 | ||
| 109 | [[maybe_unused]] const QSignalBlocker blocker(ui->tabWidget); | 111 | [[maybe_unused]] const QSignalBlocker blocker(ui->tabWidget); |
| 110 | 112 | ||
| 111 | ui->tabWidget->clear(); | 113 | ui->tabWidget->clear(); |
| 112 | const QStringList tabs = items[0]->data(Qt::UserRole).toStringList(); | 114 | |
| 113 | for (const auto& tab : tabs) { | 115 | const QList<QWidget*> tabs = qvariant_cast<QList<QWidget*>>(items[0]->data(Qt::UserRole)); |
| 114 | ui->tabWidget->addTab(widgets.find(tab)->second, tab); | 116 | |
| 117 | for (const auto tab : tabs) { | ||
| 118 | ui->tabWidget->addTab(tab, widgets.at(tab)); | ||
| 115 | } | 119 | } |
| 116 | } | 120 | } |
diff --git a/src/yuzu/configuration/configure_gamelist.cpp b/src/yuzu/configuration/configure_gamelist.cpp index d1724ba89..daedbc33e 100644 --- a/src/yuzu/configuration/configure_gamelist.cpp +++ b/src/yuzu/configuration/configure_gamelist.cpp | |||
| @@ -9,7 +9,7 @@ | |||
| 9 | #include "core/settings.h" | 9 | #include "core/settings.h" |
| 10 | #include "ui_configure_gamelist.h" | 10 | #include "ui_configure_gamelist.h" |
| 11 | #include "yuzu/configuration/configure_gamelist.h" | 11 | #include "yuzu/configuration/configure_gamelist.h" |
| 12 | #include "yuzu/ui_settings.h" | 12 | #include "yuzu/uisettings.h" |
| 13 | 13 | ||
| 14 | namespace { | 14 | namespace { |
| 15 | constexpr std::array default_icon_sizes{ | 15 | constexpr std::array default_icon_sizes{ |
diff --git a/src/yuzu/configuration/configure_general.cpp b/src/yuzu/configuration/configure_general.cpp index 06d368dfc..10bcd650e 100644 --- a/src/yuzu/configuration/configure_general.cpp +++ b/src/yuzu/configuration/configure_general.cpp | |||
| @@ -6,7 +6,7 @@ | |||
| 6 | #include "core/settings.h" | 6 | #include "core/settings.h" |
| 7 | #include "ui_configure_general.h" | 7 | #include "ui_configure_general.h" |
| 8 | #include "yuzu/configuration/configure_general.h" | 8 | #include "yuzu/configuration/configure_general.h" |
| 9 | #include "yuzu/ui_settings.h" | 9 | #include "yuzu/uisettings.h" |
| 10 | 10 | ||
| 11 | ConfigureGeneral::ConfigureGeneral(QWidget* parent) | 11 | ConfigureGeneral::ConfigureGeneral(QWidget* parent) |
| 12 | : QWidget(parent), ui(new Ui::ConfigureGeneral) { | 12 | : QWidget(parent), ui(new Ui::ConfigureGeneral) { |
| @@ -20,30 +20,29 @@ ConfigureGeneral::ConfigureGeneral(QWidget* parent) | |||
| 20 | 20 | ||
| 21 | SetConfiguration(); | 21 | SetConfiguration(); |
| 22 | 22 | ||
| 23 | connect(ui->toggle_deepscan, &QCheckBox::stateChanged, this, | 23 | connect(ui->toggle_frame_limit, &QCheckBox::toggled, ui->frame_limit, &QSpinBox::setEnabled); |
| 24 | [] { UISettings::values.is_game_list_reload_pending.exchange(true); }); | ||
| 25 | |||
| 26 | ui->use_cpu_jit->setEnabled(!Core::System::GetInstance().IsPoweredOn()); | ||
| 27 | } | 24 | } |
| 28 | 25 | ||
| 29 | ConfigureGeneral::~ConfigureGeneral() = default; | 26 | ConfigureGeneral::~ConfigureGeneral() = default; |
| 30 | 27 | ||
| 31 | void ConfigureGeneral::SetConfiguration() { | 28 | void ConfigureGeneral::SetConfiguration() { |
| 32 | ui->toggle_deepscan->setChecked(UISettings::values.game_directory_deepscan); | ||
| 33 | ui->toggle_check_exit->setChecked(UISettings::values.confirm_before_closing); | 29 | ui->toggle_check_exit->setChecked(UISettings::values.confirm_before_closing); |
| 34 | ui->toggle_user_on_boot->setChecked(UISettings::values.select_user_on_boot); | 30 | ui->toggle_user_on_boot->setChecked(UISettings::values.select_user_on_boot); |
| 35 | ui->theme_combobox->setCurrentIndex(ui->theme_combobox->findData(UISettings::values.theme)); | 31 | ui->theme_combobox->setCurrentIndex(ui->theme_combobox->findData(UISettings::values.theme)); |
| 36 | ui->use_cpu_jit->setChecked(Settings::values.use_cpu_jit); | 32 | |
| 33 | ui->toggle_frame_limit->setChecked(Settings::values.use_frame_limit); | ||
| 34 | ui->frame_limit->setEnabled(ui->toggle_frame_limit->isChecked()); | ||
| 35 | ui->frame_limit->setValue(Settings::values.frame_limit); | ||
| 37 | } | 36 | } |
| 38 | 37 | ||
| 39 | void ConfigureGeneral::ApplyConfiguration() { | 38 | void ConfigureGeneral::ApplyConfiguration() { |
| 40 | UISettings::values.game_directory_deepscan = ui->toggle_deepscan->isChecked(); | ||
| 41 | UISettings::values.confirm_before_closing = ui->toggle_check_exit->isChecked(); | 39 | UISettings::values.confirm_before_closing = ui->toggle_check_exit->isChecked(); |
| 42 | UISettings::values.select_user_on_boot = ui->toggle_user_on_boot->isChecked(); | 40 | UISettings::values.select_user_on_boot = ui->toggle_user_on_boot->isChecked(); |
| 43 | UISettings::values.theme = | 41 | UISettings::values.theme = |
| 44 | ui->theme_combobox->itemData(ui->theme_combobox->currentIndex()).toString(); | 42 | ui->theme_combobox->itemData(ui->theme_combobox->currentIndex()).toString(); |
| 45 | 43 | ||
| 46 | Settings::values.use_cpu_jit = ui->use_cpu_jit->isChecked(); | 44 | Settings::values.use_frame_limit = ui->toggle_frame_limit->isChecked(); |
| 45 | Settings::values.frame_limit = ui->frame_limit->value(); | ||
| 47 | } | 46 | } |
| 48 | 47 | ||
| 49 | void ConfigureGeneral::changeEvent(QEvent* event) { | 48 | void ConfigureGeneral::changeEvent(QEvent* event) { |
diff --git a/src/yuzu/configuration/configure_general.ui b/src/yuzu/configuration/configure_general.ui index 1a5721fe7..0bb91d64b 100644 --- a/src/yuzu/configuration/configure_general.ui +++ b/src/yuzu/configuration/configure_general.ui | |||
| @@ -25,11 +25,31 @@ | |||
| 25 | <item> | 25 | <item> |
| 26 | <layout class="QVBoxLayout" name="GeneralVerticalLayout"> | 26 | <layout class="QVBoxLayout" name="GeneralVerticalLayout"> |
| 27 | <item> | 27 | <item> |
| 28 | <widget class="QCheckBox" name="toggle_deepscan"> | 28 | <layout class="QHBoxLayout" name="horizontalLayout_2"> |
| 29 | <property name="text"> | 29 | <item> |
| 30 | <string>Search sub-directories for games</string> | 30 | <widget class="QCheckBox" name="toggle_frame_limit"> |
| 31 | </property> | 31 | <property name="text"> |
| 32 | </widget> | 32 | <string>Limit Speed Percent</string> |
| 33 | </property> | ||
| 34 | </widget> | ||
| 35 | </item> | ||
| 36 | <item> | ||
| 37 | <widget class="QSpinBox" name="frame_limit"> | ||
| 38 | <property name="suffix"> | ||
| 39 | <string>%</string> | ||
| 40 | </property> | ||
| 41 | <property name="minimum"> | ||
| 42 | <number>1</number> | ||
| 43 | </property> | ||
| 44 | <property name="maximum"> | ||
| 45 | <number>9999</number> | ||
| 46 | </property> | ||
| 47 | <property name="value"> | ||
| 48 | <number>100</number> | ||
| 49 | </property> | ||
| 50 | </widget> | ||
| 51 | </item> | ||
| 52 | </layout> | ||
| 33 | </item> | 53 | </item> |
| 34 | <item> | 54 | <item> |
| 35 | <widget class="QCheckBox" name="toggle_check_exit"> | 55 | <widget class="QCheckBox" name="toggle_check_exit"> |
| @@ -51,26 +71,6 @@ | |||
| 51 | </widget> | 71 | </widget> |
| 52 | </item> | 72 | </item> |
| 53 | <item> | 73 | <item> |
| 54 | <widget class="QGroupBox" name="PerformanceGroupBox"> | ||
| 55 | <property name="title"> | ||
| 56 | <string>Performance</string> | ||
| 57 | </property> | ||
| 58 | <layout class="QHBoxLayout" name="PerformanceHorizontalLayout"> | ||
| 59 | <item> | ||
| 60 | <layout class="QVBoxLayout" name="PerformanceVerticalLayout"> | ||
| 61 | <item> | ||
| 62 | <widget class="QCheckBox" name="use_cpu_jit"> | ||
| 63 | <property name="text"> | ||
| 64 | <string>Enable CPU JIT</string> | ||
| 65 | </property> | ||
| 66 | </widget> | ||
| 67 | </item> | ||
| 68 | </layout> | ||
| 69 | </item> | ||
| 70 | </layout> | ||
| 71 | </widget> | ||
| 72 | </item> | ||
| 73 | <item> | ||
| 74 | <widget class="QGroupBox" name="theme_group_box"> | 74 | <widget class="QGroupBox" name="theme_group_box"> |
| 75 | <property name="title"> | 75 | <property name="title"> |
| 76 | <string>Theme</string> | 76 | <string>Theme</string> |
diff --git a/src/yuzu/configuration/configure_graphics.cpp b/src/yuzu/configuration/configure_graphics.cpp index 2b17b250c..2c9e322c9 100644 --- a/src/yuzu/configuration/configure_graphics.cpp +++ b/src/yuzu/configuration/configure_graphics.cpp | |||
| @@ -55,7 +55,6 @@ ConfigureGraphics::ConfigureGraphics(QWidget* parent) | |||
| 55 | 55 | ||
| 56 | SetConfiguration(); | 56 | SetConfiguration(); |
| 57 | 57 | ||
| 58 | connect(ui->toggle_frame_limit, &QCheckBox::toggled, ui->frame_limit, &QSpinBox::setEnabled); | ||
| 59 | connect(ui->bg_button, &QPushButton::clicked, this, [this] { | 58 | connect(ui->bg_button, &QPushButton::clicked, this, [this] { |
| 60 | const QColor new_bg_color = QColorDialog::getColor(bg_color); | 59 | const QColor new_bg_color = QColorDialog::getColor(bg_color); |
| 61 | if (!new_bg_color.isValid()) { | 60 | if (!new_bg_color.isValid()) { |
| @@ -72,9 +71,6 @@ void ConfigureGraphics::SetConfiguration() { | |||
| 72 | 71 | ||
| 73 | ui->resolution_factor_combobox->setCurrentIndex( | 72 | ui->resolution_factor_combobox->setCurrentIndex( |
| 74 | static_cast<int>(FromResolutionFactor(Settings::values.resolution_factor))); | 73 | static_cast<int>(FromResolutionFactor(Settings::values.resolution_factor))); |
| 75 | ui->toggle_frame_limit->setChecked(Settings::values.use_frame_limit); | ||
| 76 | ui->frame_limit->setEnabled(ui->toggle_frame_limit->isChecked()); | ||
| 77 | ui->frame_limit->setValue(Settings::values.frame_limit); | ||
| 78 | ui->use_disk_shader_cache->setEnabled(runtime_lock); | 74 | ui->use_disk_shader_cache->setEnabled(runtime_lock); |
| 79 | ui->use_disk_shader_cache->setChecked(Settings::values.use_disk_shader_cache); | 75 | ui->use_disk_shader_cache->setChecked(Settings::values.use_disk_shader_cache); |
| 80 | ui->use_accurate_gpu_emulation->setChecked(Settings::values.use_accurate_gpu_emulation); | 76 | ui->use_accurate_gpu_emulation->setChecked(Settings::values.use_accurate_gpu_emulation); |
| @@ -89,8 +85,6 @@ void ConfigureGraphics::SetConfiguration() { | |||
| 89 | void ConfigureGraphics::ApplyConfiguration() { | 85 | void ConfigureGraphics::ApplyConfiguration() { |
| 90 | Settings::values.resolution_factor = | 86 | Settings::values.resolution_factor = |
| 91 | ToResolutionFactor(static_cast<Resolution>(ui->resolution_factor_combobox->currentIndex())); | 87 | ToResolutionFactor(static_cast<Resolution>(ui->resolution_factor_combobox->currentIndex())); |
| 92 | Settings::values.use_frame_limit = ui->toggle_frame_limit->isChecked(); | ||
| 93 | Settings::values.frame_limit = ui->frame_limit->value(); | ||
| 94 | Settings::values.use_disk_shader_cache = ui->use_disk_shader_cache->isChecked(); | 88 | Settings::values.use_disk_shader_cache = ui->use_disk_shader_cache->isChecked(); |
| 95 | Settings::values.use_accurate_gpu_emulation = ui->use_accurate_gpu_emulation->isChecked(); | 89 | Settings::values.use_accurate_gpu_emulation = ui->use_accurate_gpu_emulation->isChecked(); |
| 96 | Settings::values.use_asynchronous_gpu_emulation = | 90 | Settings::values.use_asynchronous_gpu_emulation = |
diff --git a/src/yuzu/configuration/configure_graphics.ui b/src/yuzu/configuration/configure_graphics.ui index 15ab18ecd..0309ee300 100644 --- a/src/yuzu/configuration/configure_graphics.ui +++ b/src/yuzu/configuration/configure_graphics.ui | |||
| @@ -23,33 +23,6 @@ | |||
| 23 | </property> | 23 | </property> |
| 24 | <layout class="QVBoxLayout" name="verticalLayout_2"> | 24 | <layout class="QVBoxLayout" name="verticalLayout_2"> |
| 25 | <item> | 25 | <item> |
| 26 | <layout class="QHBoxLayout" name="horizontalLayout_2"> | ||
| 27 | <item> | ||
| 28 | <widget class="QCheckBox" name="toggle_frame_limit"> | ||
| 29 | <property name="text"> | ||
| 30 | <string>Limit Speed Percent</string> | ||
| 31 | </property> | ||
| 32 | </widget> | ||
| 33 | </item> | ||
| 34 | <item> | ||
| 35 | <widget class="QSpinBox" name="frame_limit"> | ||
| 36 | <property name="suffix"> | ||
| 37 | <string>%</string> | ||
| 38 | </property> | ||
| 39 | <property name="minimum"> | ||
| 40 | <number>1</number> | ||
| 41 | </property> | ||
| 42 | <property name="maximum"> | ||
| 43 | <number>9999</number> | ||
| 44 | </property> | ||
| 45 | <property name="value"> | ||
| 46 | <number>100</number> | ||
| 47 | </property> | ||
| 48 | </widget> | ||
| 49 | </item> | ||
| 50 | </layout> | ||
| 51 | </item> | ||
| 52 | <item> | ||
| 53 | <widget class="QCheckBox" name="use_disk_shader_cache"> | 26 | <widget class="QCheckBox" name="use_disk_shader_cache"> |
| 54 | <property name="text"> | 27 | <property name="text"> |
| 55 | <string>Use disk shader cache</string> | 28 | <string>Use disk shader cache</string> |
diff --git a/src/yuzu/configuration/configure_input.cpp b/src/yuzu/configuration/configure_input.cpp index 4dd775aab..7613197f2 100644 --- a/src/yuzu/configuration/configure_input.cpp +++ b/src/yuzu/configuration/configure_input.cpp | |||
| @@ -79,7 +79,7 @@ ConfigureInput::ConfigureInput(QWidget* parent) | |||
| 79 | LoadConfiguration(); | 79 | LoadConfiguration(); |
| 80 | UpdateUIEnabled(); | 80 | UpdateUIEnabled(); |
| 81 | 81 | ||
| 82 | connect(ui->restore_defaults_button, &QPushButton::pressed, this, | 82 | connect(ui->restore_defaults_button, &QPushButton::clicked, this, |
| 83 | &ConfigureInput::RestoreDefaults); | 83 | &ConfigureInput::RestoreDefaults); |
| 84 | 84 | ||
| 85 | for (auto* enabled : players_controller) { | 85 | for (auto* enabled : players_controller) { |
| @@ -96,20 +96,20 @@ ConfigureInput::ConfigureInput(QWidget* parent) | |||
| 96 | &ConfigureInput::UpdateUIEnabled); | 96 | &ConfigureInput::UpdateUIEnabled); |
| 97 | 97 | ||
| 98 | for (std::size_t i = 0; i < players_configure.size(); ++i) { | 98 | for (std::size_t i = 0; i < players_configure.size(); ++i) { |
| 99 | connect(players_configure[i], &QPushButton::pressed, this, | 99 | connect(players_configure[i], &QPushButton::clicked, this, |
| 100 | [this, i] { CallConfigureDialog<ConfigureInputPlayer>(*this, i, false); }); | 100 | [this, i] { CallConfigureDialog<ConfigureInputPlayer>(*this, i, false); }); |
| 101 | } | 101 | } |
| 102 | 102 | ||
| 103 | connect(ui->handheld_configure, &QPushButton::pressed, this, | 103 | connect(ui->handheld_configure, &QPushButton::clicked, this, |
| 104 | [this] { CallConfigureDialog<ConfigureInputPlayer>(*this, 8, false); }); | 104 | [this] { CallConfigureDialog<ConfigureInputPlayer>(*this, 8, false); }); |
| 105 | 105 | ||
| 106 | connect(ui->debug_configure, &QPushButton::pressed, this, | 106 | connect(ui->debug_configure, &QPushButton::clicked, this, |
| 107 | [this] { CallConfigureDialog<ConfigureInputPlayer>(*this, 9, true); }); | 107 | [this] { CallConfigureDialog<ConfigureInputPlayer>(*this, 9, true); }); |
| 108 | 108 | ||
| 109 | connect(ui->mouse_advanced, &QPushButton::pressed, this, | 109 | connect(ui->mouse_advanced, &QPushButton::clicked, this, |
| 110 | [this] { CallConfigureDialog<ConfigureMouseAdvanced>(*this); }); | 110 | [this] { CallConfigureDialog<ConfigureMouseAdvanced>(*this); }); |
| 111 | 111 | ||
| 112 | connect(ui->touchscreen_advanced, &QPushButton::pressed, this, | 112 | connect(ui->touchscreen_advanced, &QPushButton::clicked, this, |
| 113 | [this] { CallConfigureDialog<ConfigureTouchscreenAdvanced>(*this); }); | 113 | [this] { CallConfigureDialog<ConfigureTouchscreenAdvanced>(*this); }); |
| 114 | } | 114 | } |
| 115 | 115 | ||
diff --git a/src/yuzu/configuration/configure_input_player.cpp b/src/yuzu/configuration/configure_input_player.cpp index 916baccc1..a968cfb5d 100644 --- a/src/yuzu/configuration/configure_input_player.cpp +++ b/src/yuzu/configuration/configure_input_player.cpp | |||
| @@ -244,7 +244,7 @@ ConfigureInputPlayer::ConfigureInputPlayer(QWidget* parent, std::size_t player_i | |||
| 244 | } | 244 | } |
| 245 | 245 | ||
| 246 | button->setContextMenuPolicy(Qt::CustomContextMenu); | 246 | button->setContextMenuPolicy(Qt::CustomContextMenu); |
| 247 | connect(button, &QPushButton::released, [=] { | 247 | connect(button, &QPushButton::clicked, [=] { |
| 248 | HandleClick( | 248 | HandleClick( |
| 249 | button_map[button_id], | 249 | button_map[button_id], |
| 250 | [=](const Common::ParamPackage& params) { buttons_param[button_id] = params; }, | 250 | [=](const Common::ParamPackage& params) { buttons_param[button_id] = params; }, |
| @@ -273,7 +273,7 @@ ConfigureInputPlayer::ConfigureInputPlayer(QWidget* parent, std::size_t player_i | |||
| 273 | } | 273 | } |
| 274 | 274 | ||
| 275 | analog_button->setContextMenuPolicy(Qt::CustomContextMenu); | 275 | analog_button->setContextMenuPolicy(Qt::CustomContextMenu); |
| 276 | connect(analog_button, &QPushButton::released, [=]() { | 276 | connect(analog_button, &QPushButton::clicked, [=]() { |
| 277 | HandleClick(analog_map_buttons[analog_id][sub_button_id], | 277 | HandleClick(analog_map_buttons[analog_id][sub_button_id], |
| 278 | [=](const Common::ParamPackage& params) { | 278 | [=](const Common::ParamPackage& params) { |
| 279 | SetAnalogButton(params, analogs_param[analog_id], | 279 | SetAnalogButton(params, analogs_param[analog_id], |
| @@ -300,19 +300,22 @@ ConfigureInputPlayer::ConfigureInputPlayer(QWidget* parent, std::size_t player_i | |||
| 300 | menu_location)); | 300 | menu_location)); |
| 301 | }); | 301 | }); |
| 302 | } | 302 | } |
| 303 | connect(analog_map_stick[analog_id], &QPushButton::released, [=] { | 303 | connect(analog_map_stick[analog_id], &QPushButton::clicked, [=] { |
| 304 | QMessageBox::information(this, tr("Information"), | 304 | if (QMessageBox::information( |
| 305 | tr("After pressing OK, first move your joystick horizontally, " | 305 | this, tr("Information"), |
| 306 | "and then vertically.")); | 306 | tr("After pressing OK, first move your joystick horizontally, " |
| 307 | HandleClick( | 307 | "and then vertically."), |
| 308 | analog_map_stick[analog_id], | 308 | QMessageBox::Ok | QMessageBox::Cancel) == QMessageBox::Ok) { |
| 309 | [=](const Common::ParamPackage& params) { analogs_param[analog_id] = params; }, | 309 | HandleClick( |
| 310 | InputCommon::Polling::DeviceType::Analog); | 310 | analog_map_stick[analog_id], |
| 311 | [=](const Common::ParamPackage& params) { analogs_param[analog_id] = params; }, | ||
| 312 | InputCommon::Polling::DeviceType::Analog); | ||
| 313 | } | ||
| 311 | }); | 314 | }); |
| 312 | } | 315 | } |
| 313 | 316 | ||
| 314 | connect(ui->buttonClearAll, &QPushButton::released, [this] { ClearAll(); }); | 317 | connect(ui->buttonClearAll, &QPushButton::clicked, [this] { ClearAll(); }); |
| 315 | connect(ui->buttonRestoreDefaults, &QPushButton::released, [this] { RestoreDefaults(); }); | 318 | connect(ui->buttonRestoreDefaults, &QPushButton::clicked, [this] { RestoreDefaults(); }); |
| 316 | 319 | ||
| 317 | timeout_timer->setSingleShot(true); | 320 | timeout_timer->setSingleShot(true); |
| 318 | connect(timeout_timer.get(), &QTimer::timeout, [this] { SetPollingResult({}, true); }); | 321 | connect(timeout_timer.get(), &QTimer::timeout, [this] { SetPollingResult({}, true); }); |
diff --git a/src/yuzu/configuration/configure_input_simple.cpp b/src/yuzu/configuration/configure_input_simple.cpp index 864803ea3..ab3a11d30 100644 --- a/src/yuzu/configuration/configure_input_simple.cpp +++ b/src/yuzu/configuration/configure_input_simple.cpp | |||
| @@ -9,7 +9,7 @@ | |||
| 9 | #include "yuzu/configuration/configure_input.h" | 9 | #include "yuzu/configuration/configure_input.h" |
| 10 | #include "yuzu/configuration/configure_input_player.h" | 10 | #include "yuzu/configuration/configure_input_player.h" |
| 11 | #include "yuzu/configuration/configure_input_simple.h" | 11 | #include "yuzu/configuration/configure_input_simple.h" |
| 12 | #include "yuzu/ui_settings.h" | 12 | #include "yuzu/uisettings.h" |
| 13 | 13 | ||
| 14 | namespace { | 14 | namespace { |
| 15 | 15 | ||
| @@ -101,7 +101,7 @@ ConfigureInputSimple::ConfigureInputSimple(QWidget* parent) | |||
| 101 | 101 | ||
| 102 | connect(ui->profile_combobox, QOverload<int>::of(&QComboBox::currentIndexChanged), this, | 102 | connect(ui->profile_combobox, QOverload<int>::of(&QComboBox::currentIndexChanged), this, |
| 103 | &ConfigureInputSimple::OnSelectProfile); | 103 | &ConfigureInputSimple::OnSelectProfile); |
| 104 | connect(ui->profile_configure, &QPushButton::pressed, this, &ConfigureInputSimple::OnConfigure); | 104 | connect(ui->profile_configure, &QPushButton::clicked, this, &ConfigureInputSimple::OnConfigure); |
| 105 | 105 | ||
| 106 | LoadConfiguration(); | 106 | LoadConfiguration(); |
| 107 | } | 107 | } |
diff --git a/src/yuzu/configuration/configure_mouse_advanced.cpp b/src/yuzu/configuration/configure_mouse_advanced.cpp index b7305e653..0a4abe34f 100644 --- a/src/yuzu/configuration/configure_mouse_advanced.cpp +++ b/src/yuzu/configuration/configure_mouse_advanced.cpp | |||
| @@ -83,7 +83,7 @@ ConfigureMouseAdvanced::ConfigureMouseAdvanced(QWidget* parent) | |||
| 83 | } | 83 | } |
| 84 | 84 | ||
| 85 | button->setContextMenuPolicy(Qt::CustomContextMenu); | 85 | button->setContextMenuPolicy(Qt::CustomContextMenu); |
| 86 | connect(button, &QPushButton::released, [=] { | 86 | connect(button, &QPushButton::clicked, [=] { |
| 87 | HandleClick( | 87 | HandleClick( |
| 88 | button_map[button_id], | 88 | button_map[button_id], |
| 89 | [=](const Common::ParamPackage& params) { buttons_param[button_id] = params; }, | 89 | [=](const Common::ParamPackage& params) { buttons_param[button_id] = params; }, |
| @@ -104,8 +104,8 @@ ConfigureMouseAdvanced::ConfigureMouseAdvanced(QWidget* parent) | |||
| 104 | }); | 104 | }); |
| 105 | } | 105 | } |
| 106 | 106 | ||
| 107 | connect(ui->buttonClearAll, &QPushButton::released, [this] { ClearAll(); }); | 107 | connect(ui->buttonClearAll, &QPushButton::clicked, [this] { ClearAll(); }); |
| 108 | connect(ui->buttonRestoreDefaults, &QPushButton::released, [this] { RestoreDefaults(); }); | 108 | connect(ui->buttonRestoreDefaults, &QPushButton::clicked, [this] { RestoreDefaults(); }); |
| 109 | 109 | ||
| 110 | timeout_timer->setSingleShot(true); | 110 | timeout_timer->setSingleShot(true); |
| 111 | connect(timeout_timer.get(), &QTimer::timeout, [this] { SetPollingResult({}, true); }); | 111 | connect(timeout_timer.get(), &QTimer::timeout, [this] { SetPollingResult({}, true); }); |
diff --git a/src/yuzu/configuration/configure_per_general.cpp b/src/yuzu/configuration/configure_per_general.cpp index 90336e235..d7f259f12 100644 --- a/src/yuzu/configuration/configure_per_general.cpp +++ b/src/yuzu/configuration/configure_per_general.cpp | |||
| @@ -23,7 +23,7 @@ | |||
| 23 | #include "yuzu/configuration/config.h" | 23 | #include "yuzu/configuration/config.h" |
| 24 | #include "yuzu/configuration/configure_input.h" | 24 | #include "yuzu/configuration/configure_input.h" |
| 25 | #include "yuzu/configuration/configure_per_general.h" | 25 | #include "yuzu/configuration/configure_per_general.h" |
| 26 | #include "yuzu/ui_settings.h" | 26 | #include "yuzu/uisettings.h" |
| 27 | #include "yuzu/util/util.h" | 27 | #include "yuzu/util/util.h" |
| 28 | 28 | ||
| 29 | ConfigurePerGameGeneral::ConfigurePerGameGeneral(QWidget* parent, u64 title_id) | 29 | ConfigurePerGameGeneral::ConfigurePerGameGeneral(QWidget* parent, u64 title_id) |
diff --git a/src/yuzu/configuration/configure_profile_manager.cpp b/src/yuzu/configuration/configure_profile_manager.cpp index c90f4cdd8..f53423440 100644 --- a/src/yuzu/configuration/configure_profile_manager.cpp +++ b/src/yuzu/configuration/configure_profile_manager.cpp | |||
| @@ -108,10 +108,10 @@ ConfigureProfileManager ::ConfigureProfileManager(QWidget* parent) | |||
| 108 | 108 | ||
| 109 | connect(tree_view, &QTreeView::clicked, this, &ConfigureProfileManager::SelectUser); | 109 | connect(tree_view, &QTreeView::clicked, this, &ConfigureProfileManager::SelectUser); |
| 110 | 110 | ||
| 111 | connect(ui->pm_add, &QPushButton::pressed, this, &ConfigureProfileManager::AddUser); | 111 | connect(ui->pm_add, &QPushButton::clicked, this, &ConfigureProfileManager::AddUser); |
| 112 | connect(ui->pm_rename, &QPushButton::pressed, this, &ConfigureProfileManager::RenameUser); | 112 | connect(ui->pm_rename, &QPushButton::clicked, this, &ConfigureProfileManager::RenameUser); |
| 113 | connect(ui->pm_remove, &QPushButton::pressed, this, &ConfigureProfileManager::DeleteUser); | 113 | connect(ui->pm_remove, &QPushButton::clicked, this, &ConfigureProfileManager::DeleteUser); |
| 114 | connect(ui->pm_set_image, &QPushButton::pressed, this, &ConfigureProfileManager::SetUserImage); | 114 | connect(ui->pm_set_image, &QPushButton::clicked, this, &ConfigureProfileManager::SetUserImage); |
| 115 | 115 | ||
| 116 | scene = new QGraphicsScene; | 116 | scene = new QGraphicsScene; |
| 117 | ui->current_user_icon->setScene(scene); | 117 | ui->current_user_icon->setScene(scene); |
diff --git a/src/yuzu/configuration/configure_touchscreen_advanced.cpp b/src/yuzu/configuration/configure_touchscreen_advanced.cpp index 8ced28c75..7d7cc00b7 100644 --- a/src/yuzu/configuration/configure_touchscreen_advanced.cpp +++ b/src/yuzu/configuration/configure_touchscreen_advanced.cpp | |||
| @@ -11,7 +11,7 @@ ConfigureTouchscreenAdvanced::ConfigureTouchscreenAdvanced(QWidget* parent) | |||
| 11 | : QDialog(parent), ui(std::make_unique<Ui::ConfigureTouchscreenAdvanced>()) { | 11 | : QDialog(parent), ui(std::make_unique<Ui::ConfigureTouchscreenAdvanced>()) { |
| 12 | ui->setupUi(this); | 12 | ui->setupUi(this); |
| 13 | 13 | ||
| 14 | connect(ui->restore_defaults_button, &QPushButton::pressed, this, | 14 | connect(ui->restore_defaults_button, &QPushButton::clicked, this, |
| 15 | &ConfigureTouchscreenAdvanced::RestoreDefaults); | 15 | &ConfigureTouchscreenAdvanced::RestoreDefaults); |
| 16 | 16 | ||
| 17 | LoadConfiguration(); | 17 | LoadConfiguration(); |
diff --git a/src/yuzu/configuration/configure_web.cpp b/src/yuzu/configuration/configure_web.cpp index 5a70ef168..336b062b3 100644 --- a/src/yuzu/configuration/configure_web.cpp +++ b/src/yuzu/configuration/configure_web.cpp | |||
| @@ -9,7 +9,7 @@ | |||
| 9 | #include "core/telemetry_session.h" | 9 | #include "core/telemetry_session.h" |
| 10 | #include "ui_configure_web.h" | 10 | #include "ui_configure_web.h" |
| 11 | #include "yuzu/configuration/configure_web.h" | 11 | #include "yuzu/configuration/configure_web.h" |
| 12 | #include "yuzu/ui_settings.h" | 12 | #include "yuzu/uisettings.h" |
| 13 | 13 | ||
| 14 | ConfigureWeb::ConfigureWeb(QWidget* parent) | 14 | ConfigureWeb::ConfigureWeb(QWidget* parent) |
| 15 | : QWidget(parent), ui(std::make_unique<Ui::ConfigureWeb>()) { | 15 | : QWidget(parent), ui(std::make_unique<Ui::ConfigureWeb>()) { |
diff --git a/src/yuzu/debugger/console.cpp b/src/yuzu/debugger/console.cpp index 320898f6a..207ff4d58 100644 --- a/src/yuzu/debugger/console.cpp +++ b/src/yuzu/debugger/console.cpp | |||
| @@ -10,7 +10,7 @@ | |||
| 10 | 10 | ||
| 11 | #include "common/logging/backend.h" | 11 | #include "common/logging/backend.h" |
| 12 | #include "yuzu/debugger/console.h" | 12 | #include "yuzu/debugger/console.h" |
| 13 | #include "yuzu/ui_settings.h" | 13 | #include "yuzu/uisettings.h" |
| 14 | 14 | ||
| 15 | namespace Debugger { | 15 | namespace Debugger { |
| 16 | void ToggleConsole() { | 16 | void ToggleConsole() { |
diff --git a/src/yuzu/discord_impl.cpp b/src/yuzu/discord_impl.cpp index 9d87a41eb..ea0079353 100644 --- a/src/yuzu/discord_impl.cpp +++ b/src/yuzu/discord_impl.cpp | |||
| @@ -9,7 +9,7 @@ | |||
| 9 | #include "core/core.h" | 9 | #include "core/core.h" |
| 10 | #include "core/loader/loader.h" | 10 | #include "core/loader/loader.h" |
| 11 | #include "yuzu/discord_impl.h" | 11 | #include "yuzu/discord_impl.h" |
| 12 | #include "yuzu/ui_settings.h" | 12 | #include "yuzu/uisettings.h" |
| 13 | 13 | ||
| 14 | namespace DiscordRPC { | 14 | namespace DiscordRPC { |
| 15 | 15 | ||
diff --git a/src/yuzu/game_list.cpp b/src/yuzu/game_list.cpp index 1885587af..d5fab2f1f 100644 --- a/src/yuzu/game_list.cpp +++ b/src/yuzu/game_list.cpp | |||
| @@ -23,7 +23,7 @@ | |||
| 23 | #include "yuzu/game_list_p.h" | 23 | #include "yuzu/game_list_p.h" |
| 24 | #include "yuzu/game_list_worker.h" | 24 | #include "yuzu/game_list_worker.h" |
| 25 | #include "yuzu/main.h" | 25 | #include "yuzu/main.h" |
| 26 | #include "yuzu/ui_settings.h" | 26 | #include "yuzu/uisettings.h" |
| 27 | 27 | ||
| 28 | GameListSearchField::KeyReleaseEater::KeyReleaseEater(GameList* gamelist) : gamelist{gamelist} {} | 28 | GameListSearchField::KeyReleaseEater::KeyReleaseEater(GameList* gamelist) : gamelist{gamelist} {} |
| 29 | 29 | ||
| @@ -34,7 +34,6 @@ bool GameListSearchField::KeyReleaseEater::eventFilter(QObject* obj, QEvent* eve | |||
| 34 | return QObject::eventFilter(obj, event); | 34 | return QObject::eventFilter(obj, event); |
| 35 | 35 | ||
| 36 | QKeyEvent* keyEvent = static_cast<QKeyEvent*>(event); | 36 | QKeyEvent* keyEvent = static_cast<QKeyEvent*>(event); |
| 37 | int rowCount = gamelist->tree_view->model()->rowCount(); | ||
| 38 | QString edit_filter_text = gamelist->search_field->edit_filter->text().toLower(); | 37 | QString edit_filter_text = gamelist->search_field->edit_filter->text().toLower(); |
| 39 | 38 | ||
| 40 | // If the searchfield's text hasn't changed special function keys get checked | 39 | // If the searchfield's text hasn't changed special function keys get checked |
| @@ -56,19 +55,9 @@ bool GameListSearchField::KeyReleaseEater::eventFilter(QObject* obj, QEvent* eve | |||
| 56 | // If there is only one result launch this game | 55 | // If there is only one result launch this game |
| 57 | case Qt::Key_Return: | 56 | case Qt::Key_Return: |
| 58 | case Qt::Key_Enter: { | 57 | case Qt::Key_Enter: { |
| 59 | QStandardItemModel* item_model = new QStandardItemModel(gamelist->tree_view); | 58 | if (gamelist->search_field->visible == 1) { |
| 60 | QModelIndex root_index = item_model->invisibleRootItem()->index(); | 59 | QString file_path = gamelist->getLastFilterResultItem(); |
| 61 | QStandardItem* child_file; | 60 | |
| 62 | QString file_path; | ||
| 63 | int resultCount = 0; | ||
| 64 | for (int i = 0; i < rowCount; ++i) { | ||
| 65 | if (!gamelist->tree_view->isRowHidden(i, root_index)) { | ||
| 66 | ++resultCount; | ||
| 67 | child_file = gamelist->item_model->item(i, 0); | ||
| 68 | file_path = child_file->data(GameListItemPath::FullPathRole).toString(); | ||
| 69 | } | ||
| 70 | } | ||
| 71 | if (resultCount == 1) { | ||
| 72 | // To avoid loading error dialog loops while confirming them using enter | 61 | // To avoid loading error dialog loops while confirming them using enter |
| 73 | // Also users usually want to run a different game after closing one | 62 | // Also users usually want to run a different game after closing one |
| 74 | gamelist->search_field->edit_filter->clear(); | 63 | gamelist->search_field->edit_filter->clear(); |
| @@ -88,9 +77,31 @@ bool GameListSearchField::KeyReleaseEater::eventFilter(QObject* obj, QEvent* eve | |||
| 88 | } | 77 | } |
| 89 | 78 | ||
| 90 | void GameListSearchField::setFilterResult(int visible, int total) { | 79 | void GameListSearchField::setFilterResult(int visible, int total) { |
| 80 | this->visible = visible; | ||
| 81 | this->total = total; | ||
| 82 | |||
| 91 | label_filter_result->setText(tr("%1 of %n result(s)", "", total).arg(visible)); | 83 | label_filter_result->setText(tr("%1 of %n result(s)", "", total).arg(visible)); |
| 92 | } | 84 | } |
| 93 | 85 | ||
| 86 | QString GameList::getLastFilterResultItem() const { | ||
| 87 | QStandardItem* folder; | ||
| 88 | QStandardItem* child; | ||
| 89 | QString file_path; | ||
| 90 | const int folder_count = item_model->rowCount(); | ||
| 91 | for (int i = 0; i < folder_count; ++i) { | ||
| 92 | folder = item_model->item(i, 0); | ||
| 93 | const QModelIndex folder_index = folder->index(); | ||
| 94 | const int children_count = folder->rowCount(); | ||
| 95 | for (int j = 0; j < children_count; ++j) { | ||
| 96 | if (!tree_view->isRowHidden(j, folder_index)) { | ||
| 97 | child = folder->child(j, 0); | ||
| 98 | file_path = child->data(GameListItemPath::FullPathRole).toString(); | ||
| 99 | } | ||
| 100 | } | ||
| 101 | } | ||
| 102 | return file_path; | ||
| 103 | } | ||
| 104 | |||
| 94 | void GameListSearchField::clear() { | 105 | void GameListSearchField::clear() { |
| 95 | edit_filter->clear(); | 106 | edit_filter->clear(); |
| 96 | } | 107 | } |
| @@ -147,45 +158,120 @@ static bool ContainsAllWords(const QString& haystack, const QString& userinput) | |||
| 147 | [&haystack](const QString& s) { return haystack.contains(s); }); | 158 | [&haystack](const QString& s) { return haystack.contains(s); }); |
| 148 | } | 159 | } |
| 149 | 160 | ||
| 161 | // Syncs the expanded state of Game Directories with settings to persist across sessions | ||
| 162 | void GameList::onItemExpanded(const QModelIndex& item) { | ||
| 163 | const auto type = item.data(GameListItem::TypeRole).value<GameListItemType>(); | ||
| 164 | if (type == GameListItemType::CustomDir || type == GameListItemType::SdmcDir || | ||
| 165 | type == GameListItemType::UserNandDir || type == GameListItemType::SysNandDir) | ||
| 166 | item.data(GameListDir::GameDirRole).value<UISettings::GameDir*>()->expanded = | ||
| 167 | tree_view->isExpanded(item); | ||
| 168 | } | ||
| 169 | |||
| 150 | // Event in order to filter the gamelist after editing the searchfield | 170 | // Event in order to filter the gamelist after editing the searchfield |
| 151 | void GameList::onTextChanged(const QString& new_text) { | 171 | void GameList::onTextChanged(const QString& new_text) { |
| 152 | const int row_count = tree_view->model()->rowCount(); | 172 | const int folder_count = tree_view->model()->rowCount(); |
| 153 | const QString edit_filter_text = new_text.toLower(); | 173 | QString edit_filter_text = new_text.toLower(); |
| 154 | const QModelIndex root_index = item_model->invisibleRootItem()->index(); | 174 | QStandardItem* folder; |
| 175 | QStandardItem* child; | ||
| 176 | int children_total = 0; | ||
| 177 | QModelIndex root_index = item_model->invisibleRootItem()->index(); | ||
| 155 | 178 | ||
| 156 | // If the searchfield is empty every item is visible | 179 | // If the searchfield is empty every item is visible |
| 157 | // Otherwise the filter gets applied | 180 | // Otherwise the filter gets applied |
| 158 | if (edit_filter_text.isEmpty()) { | 181 | if (edit_filter_text.isEmpty()) { |
| 159 | for (int i = 0; i < row_count; ++i) { | 182 | for (int i = 0; i < folder_count; ++i) { |
| 160 | tree_view->setRowHidden(i, root_index, false); | 183 | folder = item_model->item(i, 0); |
| 184 | const QModelIndex folder_index = folder->index(); | ||
| 185 | const int children_count = folder->rowCount(); | ||
| 186 | for (int j = 0; j < children_count; ++j) { | ||
| 187 | ++children_total; | ||
| 188 | tree_view->setRowHidden(j, folder_index, false); | ||
| 189 | } | ||
| 161 | } | 190 | } |
| 162 | search_field->setFilterResult(row_count, row_count); | 191 | search_field->setFilterResult(children_total, children_total); |
| 163 | } else { | 192 | } else { |
| 164 | int result_count = 0; | 193 | int result_count = 0; |
| 165 | for (int i = 0; i < row_count; ++i) { | 194 | for (int i = 0; i < folder_count; ++i) { |
| 166 | const QStandardItem* child_file = item_model->item(i, 0); | 195 | folder = item_model->item(i, 0); |
| 167 | const QString file_path = | 196 | const QModelIndex folder_index = folder->index(); |
| 168 | child_file->data(GameListItemPath::FullPathRole).toString().toLower(); | 197 | const int children_count = folder->rowCount(); |
| 169 | const QString file_title = | 198 | for (int j = 0; j < children_count; ++j) { |
| 170 | child_file->data(GameListItemPath::TitleRole).toString().toLower(); | 199 | ++children_total; |
| 171 | const QString file_program_id = | 200 | const QStandardItem* child = folder->child(j, 0); |
| 172 | child_file->data(GameListItemPath::ProgramIdRole).toString().toLower(); | 201 | const QString file_path = |
| 173 | 202 | child->data(GameListItemPath::FullPathRole).toString().toLower(); | |
| 174 | // Only items which filename in combination with its title contains all words | 203 | const QString file_title = |
| 175 | // that are in the searchfield will be visible in the gamelist | 204 | child->data(GameListItemPath::TitleRole).toString().toLower(); |
| 176 | // The search is case insensitive because of toLower() | 205 | const QString file_program_id = |
| 177 | // I decided not to use Qt::CaseInsensitive in containsAllWords to prevent | 206 | child->data(GameListItemPath::ProgramIdRole).toString().toLower(); |
| 178 | // multiple conversions of edit_filter_text for each game in the gamelist | 207 | |
| 179 | const QString file_name = file_path.mid(file_path.lastIndexOf(QLatin1Char{'/'}) + 1) + | 208 | // Only items which filename in combination with its title contains all words |
| 180 | QLatin1Char{' '} + file_title; | 209 | // that are in the searchfield will be visible in the gamelist |
| 181 | if (ContainsAllWords(file_name, edit_filter_text) || | 210 | // The search is case insensitive because of toLower() |
| 182 | (file_program_id.count() == 16 && edit_filter_text.contains(file_program_id))) { | 211 | // I decided not to use Qt::CaseInsensitive in containsAllWords to prevent |
| 183 | tree_view->setRowHidden(i, root_index, false); | 212 | // multiple conversions of edit_filter_text for each game in the gamelist |
| 184 | ++result_count; | 213 | const QString file_name = |
| 185 | } else { | 214 | file_path.mid(file_path.lastIndexOf(QLatin1Char{'/'}) + 1) + QLatin1Char{' '} + |
| 186 | tree_view->setRowHidden(i, root_index, true); | 215 | file_title; |
| 216 | if (ContainsAllWords(file_name, edit_filter_text) || | ||
| 217 | (file_program_id.count() == 16 && edit_filter_text.contains(file_program_id))) { | ||
| 218 | tree_view->setRowHidden(j, folder_index, false); | ||
| 219 | ++result_count; | ||
| 220 | } else { | ||
| 221 | tree_view->setRowHidden(j, folder_index, true); | ||
| 222 | } | ||
| 223 | search_field->setFilterResult(result_count, children_total); | ||
| 187 | } | 224 | } |
| 188 | search_field->setFilterResult(result_count, row_count); | 225 | } |
| 226 | } | ||
| 227 | } | ||
| 228 | |||
| 229 | void GameList::onUpdateThemedIcons() { | ||
| 230 | for (int i = 0; i < item_model->invisibleRootItem()->rowCount(); i++) { | ||
| 231 | QStandardItem* child = item_model->invisibleRootItem()->child(i); | ||
| 232 | |||
| 233 | const int icon_size = std::min(static_cast<int>(UISettings::values.icon_size), 64); | ||
| 234 | switch (child->data(GameListItem::TypeRole).value<GameListItemType>()) { | ||
| 235 | case GameListItemType::SdmcDir: | ||
| 236 | child->setData( | ||
| 237 | QIcon::fromTheme(QStringLiteral("sd_card")) | ||
| 238 | .pixmap(icon_size) | ||
| 239 | .scaled(icon_size, icon_size, Qt::IgnoreAspectRatio, Qt::SmoothTransformation), | ||
| 240 | Qt::DecorationRole); | ||
| 241 | break; | ||
| 242 | case GameListItemType::UserNandDir: | ||
| 243 | child->setData( | ||
| 244 | QIcon::fromTheme(QStringLiteral("chip")) | ||
| 245 | .pixmap(icon_size) | ||
| 246 | .scaled(icon_size, icon_size, Qt::IgnoreAspectRatio, Qt::SmoothTransformation), | ||
| 247 | Qt::DecorationRole); | ||
| 248 | break; | ||
| 249 | case GameListItemType::SysNandDir: | ||
| 250 | child->setData( | ||
| 251 | QIcon::fromTheme(QStringLiteral("chip")) | ||
| 252 | .pixmap(icon_size) | ||
| 253 | .scaled(icon_size, icon_size, Qt::IgnoreAspectRatio, Qt::SmoothTransformation), | ||
| 254 | Qt::DecorationRole); | ||
| 255 | break; | ||
| 256 | case GameListItemType::CustomDir: { | ||
| 257 | const UISettings::GameDir* game_dir = | ||
| 258 | child->data(GameListDir::GameDirRole).value<UISettings::GameDir*>(); | ||
| 259 | const QString icon_name = QFileInfo::exists(game_dir->path) | ||
| 260 | ? QStringLiteral("folder") | ||
| 261 | : QStringLiteral("bad_folder"); | ||
| 262 | child->setData( | ||
| 263 | QIcon::fromTheme(icon_name).pixmap(icon_size).scaled( | ||
| 264 | icon_size, icon_size, Qt::IgnoreAspectRatio, Qt::SmoothTransformation), | ||
| 265 | Qt::DecorationRole); | ||
| 266 | break; | ||
| 267 | } | ||
| 268 | case GameListItemType::AddDir: | ||
| 269 | child->setData( | ||
| 270 | QIcon::fromTheme(QStringLiteral("plus")) | ||
| 271 | .pixmap(icon_size) | ||
| 272 | .scaled(icon_size, icon_size, Qt::IgnoreAspectRatio, Qt::SmoothTransformation), | ||
| 273 | Qt::DecorationRole); | ||
| 274 | break; | ||
| 189 | } | 275 | } |
| 190 | } | 276 | } |
| 191 | } | 277 | } |
| @@ -214,7 +300,6 @@ GameList::GameList(FileSys::VirtualFilesystem vfs, FileSys::ManualContentProvide | |||
| 214 | tree_view->setHorizontalScrollMode(QHeaderView::ScrollPerPixel); | 300 | tree_view->setHorizontalScrollMode(QHeaderView::ScrollPerPixel); |
| 215 | tree_view->setSortingEnabled(true); | 301 | tree_view->setSortingEnabled(true); |
| 216 | tree_view->setEditTriggers(QHeaderView::NoEditTriggers); | 302 | tree_view->setEditTriggers(QHeaderView::NoEditTriggers); |
| 217 | tree_view->setUniformRowHeights(true); | ||
| 218 | tree_view->setContextMenuPolicy(Qt::CustomContextMenu); | 303 | tree_view->setContextMenuPolicy(Qt::CustomContextMenu); |
| 219 | tree_view->setStyleSheet(QStringLiteral("QTreeView{ border: none; }")); | 304 | tree_view->setStyleSheet(QStringLiteral("QTreeView{ border: none; }")); |
| 220 | 305 | ||
| @@ -230,12 +315,16 @@ GameList::GameList(FileSys::VirtualFilesystem vfs, FileSys::ManualContentProvide | |||
| 230 | item_model->setHeaderData(COLUMN_FILE_TYPE - 1, Qt::Horizontal, tr("File type")); | 315 | item_model->setHeaderData(COLUMN_FILE_TYPE - 1, Qt::Horizontal, tr("File type")); |
| 231 | item_model->setHeaderData(COLUMN_SIZE - 1, Qt::Horizontal, tr("Size")); | 316 | item_model->setHeaderData(COLUMN_SIZE - 1, Qt::Horizontal, tr("Size")); |
| 232 | } | 317 | } |
| 318 | item_model->setSortRole(GameListItemPath::TitleRole); | ||
| 233 | 319 | ||
| 320 | connect(main_window, &GMainWindow::UpdateThemedIcons, this, &GameList::onUpdateThemedIcons); | ||
| 234 | connect(tree_view, &QTreeView::activated, this, &GameList::ValidateEntry); | 321 | connect(tree_view, &QTreeView::activated, this, &GameList::ValidateEntry); |
| 235 | connect(tree_view, &QTreeView::customContextMenuRequested, this, &GameList::PopupContextMenu); | 322 | connect(tree_view, &QTreeView::customContextMenuRequested, this, &GameList::PopupContextMenu); |
| 323 | connect(tree_view, &QTreeView::expanded, this, &GameList::onItemExpanded); | ||
| 324 | connect(tree_view, &QTreeView::collapsed, this, &GameList::onItemExpanded); | ||
| 236 | 325 | ||
| 237 | // We must register all custom types with the Qt Automoc system so that we are able to use it | 326 | // We must register all custom types with the Qt Automoc system so that we are able to use |
| 238 | // with signals/slots. In this case, QList falls under the umbrells of custom types. | 327 | // it with signals/slots. In this case, QList falls under the umbrells of custom types. |
| 239 | qRegisterMetaType<QList<QStandardItem*>>("QList<QStandardItem*>"); | 328 | qRegisterMetaType<QList<QStandardItem*>>("QList<QStandardItem*>"); |
| 240 | 329 | ||
| 241 | layout->setContentsMargins(0, 0, 0, 0); | 330 | layout->setContentsMargins(0, 0, 0, 0); |
| @@ -263,38 +352,68 @@ void GameList::clearFilter() { | |||
| 263 | search_field->clear(); | 352 | search_field->clear(); |
| 264 | } | 353 | } |
| 265 | 354 | ||
| 266 | void GameList::AddEntry(const QList<QStandardItem*>& entry_items) { | 355 | void GameList::AddDirEntry(GameListDir* entry_items) { |
| 267 | item_model->invisibleRootItem()->appendRow(entry_items); | 356 | item_model->invisibleRootItem()->appendRow(entry_items); |
| 357 | tree_view->setExpanded( | ||
| 358 | entry_items->index(), | ||
| 359 | entry_items->data(GameListDir::GameDirRole).value<UISettings::GameDir*>()->expanded); | ||
| 268 | } | 360 | } |
| 269 | 361 | ||
| 270 | void GameList::ValidateEntry(const QModelIndex& item) { | 362 | void GameList::AddEntry(const QList<QStandardItem*>& entry_items, GameListDir* parent) { |
| 271 | // We don't care about the individual QStandardItem that was selected, but its row. | 363 | parent->appendRow(entry_items); |
| 272 | const int row = item_model->itemFromIndex(item)->row(); | 364 | } |
| 273 | const QStandardItem* child_file = item_model->invisibleRootItem()->child(row, COLUMN_NAME); | ||
| 274 | const QString file_path = child_file->data(GameListItemPath::FullPathRole).toString(); | ||
| 275 | |||
| 276 | if (file_path.isEmpty()) | ||
| 277 | return; | ||
| 278 | |||
| 279 | if (!QFileInfo::exists(file_path)) | ||
| 280 | return; | ||
| 281 | 365 | ||
| 282 | const QFileInfo file_info{file_path}; | 366 | void GameList::ValidateEntry(const QModelIndex& item) { |
| 283 | if (file_info.isDir()) { | 367 | const auto selected = item.sibling(item.row(), 0); |
| 284 | const QDir dir{file_path}; | 368 | |
| 285 | const QStringList matching_main = dir.entryList({QStringLiteral("main")}, QDir::Files); | 369 | switch (selected.data(GameListItem::TypeRole).value<GameListItemType>()) { |
| 286 | if (matching_main.size() == 1) { | 370 | case GameListItemType::Game: { |
| 287 | emit GameChosen(dir.path() + QDir::separator() + matching_main[0]); | 371 | const QString file_path = selected.data(GameListItemPath::FullPathRole).toString(); |
| 372 | if (file_path.isEmpty()) | ||
| 373 | return; | ||
| 374 | const QFileInfo file_info(file_path); | ||
| 375 | if (!file_info.exists()) | ||
| 376 | return; | ||
| 377 | |||
| 378 | if (file_info.isDir()) { | ||
| 379 | const QDir dir{file_path}; | ||
| 380 | const QStringList matching_main = dir.entryList({QStringLiteral("main")}, QDir::Files); | ||
| 381 | if (matching_main.size() == 1) { | ||
| 382 | emit GameChosen(dir.path() + QDir::separator() + matching_main[0]); | ||
| 383 | } | ||
| 384 | return; | ||
| 288 | } | 385 | } |
| 289 | return; | 386 | |
| 387 | // Users usually want to run a different game after closing one | ||
| 388 | search_field->clear(); | ||
| 389 | emit GameChosen(file_path); | ||
| 390 | break; | ||
| 290 | } | 391 | } |
| 392 | case GameListItemType::AddDir: | ||
| 393 | emit AddDirectory(); | ||
| 394 | break; | ||
| 395 | } | ||
| 396 | } | ||
| 291 | 397 | ||
| 292 | // Users usually want to run a diffrent game after closing one | 398 | bool GameList::isEmpty() const { |
| 293 | search_field->clear(); | 399 | for (int i = 0; i < item_model->rowCount(); i++) { |
| 294 | emit GameChosen(file_path); | 400 | const QStandardItem* child = item_model->invisibleRootItem()->child(i); |
| 401 | const auto type = static_cast<GameListItemType>(child->type()); | ||
| 402 | if (!child->hasChildren() && | ||
| 403 | (type == GameListItemType::SdmcDir || type == GameListItemType::UserNandDir || | ||
| 404 | type == GameListItemType::SysNandDir)) { | ||
| 405 | item_model->invisibleRootItem()->removeRow(child->row()); | ||
| 406 | i--; | ||
| 407 | }; | ||
| 408 | } | ||
| 409 | return !item_model->invisibleRootItem()->hasChildren(); | ||
| 295 | } | 410 | } |
| 296 | 411 | ||
| 297 | void GameList::DonePopulating(QStringList watch_list) { | 412 | void GameList::DonePopulating(QStringList watch_list) { |
| 413 | emit ShowList(!isEmpty()); | ||
| 414 | |||
| 415 | item_model->invisibleRootItem()->appendRow(new GameListAddDir()); | ||
| 416 | |||
| 298 | // Clear out the old directories to watch for changes and add the new ones | 417 | // Clear out the old directories to watch for changes and add the new ones |
| 299 | auto watch_dirs = watcher->directories(); | 418 | auto watch_dirs = watcher->directories(); |
| 300 | if (!watch_dirs.isEmpty()) { | 419 | if (!watch_dirs.isEmpty()) { |
| @@ -311,9 +430,13 @@ void GameList::DonePopulating(QStringList watch_list) { | |||
| 311 | QCoreApplication::processEvents(); | 430 | QCoreApplication::processEvents(); |
| 312 | } | 431 | } |
| 313 | tree_view->setEnabled(true); | 432 | tree_view->setEnabled(true); |
| 314 | int rowCount = tree_view->model()->rowCount(); | 433 | const int folder_count = tree_view->model()->rowCount(); |
| 315 | search_field->setFilterResult(rowCount, rowCount); | 434 | int children_total = 0; |
| 316 | if (rowCount > 0) { | 435 | for (int i = 0; i < folder_count; ++i) { |
| 436 | children_total += item_model->item(i, 0)->rowCount(); | ||
| 437 | } | ||
| 438 | search_field->setFilterResult(children_total, children_total); | ||
| 439 | if (children_total > 0) { | ||
| 317 | search_field->setFocus(); | 440 | search_field->setFocus(); |
| 318 | } | 441 | } |
| 319 | } | 442 | } |
| @@ -323,12 +446,27 @@ void GameList::PopupContextMenu(const QPoint& menu_location) { | |||
| 323 | if (!item.isValid()) | 446 | if (!item.isValid()) |
| 324 | return; | 447 | return; |
| 325 | 448 | ||
| 326 | int row = item_model->itemFromIndex(item)->row(); | 449 | const auto selected = item.sibling(item.row(), 0); |
| 327 | QStandardItem* child_file = item_model->invisibleRootItem()->child(row, COLUMN_NAME); | ||
| 328 | u64 program_id = child_file->data(GameListItemPath::ProgramIdRole).toULongLong(); | ||
| 329 | std::string path = child_file->data(GameListItemPath::FullPathRole).toString().toStdString(); | ||
| 330 | |||
| 331 | QMenu context_menu; | 450 | QMenu context_menu; |
| 451 | switch (selected.data(GameListItem::TypeRole).value<GameListItemType>()) { | ||
| 452 | case GameListItemType::Game: | ||
| 453 | AddGamePopup(context_menu, selected.data(GameListItemPath::ProgramIdRole).toULongLong(), | ||
| 454 | selected.data(GameListItemPath::FullPathRole).toString().toStdString()); | ||
| 455 | break; | ||
| 456 | case GameListItemType::CustomDir: | ||
| 457 | AddPermDirPopup(context_menu, selected); | ||
| 458 | AddCustomDirPopup(context_menu, selected); | ||
| 459 | break; | ||
| 460 | case GameListItemType::SdmcDir: | ||
| 461 | case GameListItemType::UserNandDir: | ||
| 462 | case GameListItemType::SysNandDir: | ||
| 463 | AddPermDirPopup(context_menu, selected); | ||
| 464 | break; | ||
| 465 | } | ||
| 466 | context_menu.exec(tree_view->viewport()->mapToGlobal(menu_location)); | ||
| 467 | } | ||
| 468 | |||
| 469 | void GameList::AddGamePopup(QMenu& context_menu, u64 program_id, std::string path) { | ||
| 332 | QAction* open_save_location = context_menu.addAction(tr("Open Save Data Location")); | 470 | QAction* open_save_location = context_menu.addAction(tr("Open Save Data Location")); |
| 333 | QAction* open_lfs_location = context_menu.addAction(tr("Open Mod Data Location")); | 471 | QAction* open_lfs_location = context_menu.addAction(tr("Open Mod Data Location")); |
| 334 | QAction* open_transferable_shader_cache = | 472 | QAction* open_transferable_shader_cache = |
| @@ -344,19 +482,86 @@ void GameList::PopupContextMenu(const QPoint& menu_location) { | |||
| 344 | auto it = FindMatchingCompatibilityEntry(compatibility_list, program_id); | 482 | auto it = FindMatchingCompatibilityEntry(compatibility_list, program_id); |
| 345 | navigate_to_gamedb_entry->setVisible(it != compatibility_list.end() && program_id != 0); | 483 | navigate_to_gamedb_entry->setVisible(it != compatibility_list.end() && program_id != 0); |
| 346 | 484 | ||
| 347 | connect(open_save_location, &QAction::triggered, | 485 | connect(open_save_location, &QAction::triggered, [this, program_id]() { |
| 348 | [&]() { emit OpenFolderRequested(program_id, GameListOpenTarget::SaveData); }); | 486 | emit OpenFolderRequested(program_id, GameListOpenTarget::SaveData); |
| 349 | connect(open_lfs_location, &QAction::triggered, | 487 | }); |
| 350 | [&]() { emit OpenFolderRequested(program_id, GameListOpenTarget::ModData); }); | 488 | connect(open_lfs_location, &QAction::triggered, [this, program_id]() { |
| 489 | emit OpenFolderRequested(program_id, GameListOpenTarget::ModData); | ||
| 490 | }); | ||
| 351 | connect(open_transferable_shader_cache, &QAction::triggered, | 491 | connect(open_transferable_shader_cache, &QAction::triggered, |
| 352 | [&]() { emit OpenTransferableShaderCacheRequested(program_id); }); | 492 | [this, program_id]() { emit OpenTransferableShaderCacheRequested(program_id); }); |
| 353 | connect(dump_romfs, &QAction::triggered, [&]() { emit DumpRomFSRequested(program_id, path); }); | 493 | connect(dump_romfs, &QAction::triggered, |
| 354 | connect(copy_tid, &QAction::triggered, [&]() { emit CopyTIDRequested(program_id); }); | 494 | [this, program_id, path]() { emit DumpRomFSRequested(program_id, path); }); |
| 355 | connect(navigate_to_gamedb_entry, &QAction::triggered, | 495 | connect(copy_tid, &QAction::triggered, |
| 356 | [&]() { emit NavigateToGamedbEntryRequested(program_id, compatibility_list); }); | 496 | [this, program_id]() { emit CopyTIDRequested(program_id); }); |
| 357 | connect(properties, &QAction::triggered, [&]() { emit OpenPerGameGeneralRequested(path); }); | 497 | connect(navigate_to_gamedb_entry, &QAction::triggered, [this, program_id]() { |
| 498 | emit NavigateToGamedbEntryRequested(program_id, compatibility_list); | ||
| 499 | }); | ||
| 500 | connect(properties, &QAction::triggered, | ||
| 501 | [this, path]() { emit OpenPerGameGeneralRequested(path); }); | ||
| 502 | }; | ||
| 503 | |||
| 504 | void GameList::AddCustomDirPopup(QMenu& context_menu, QModelIndex selected) { | ||
| 505 | UISettings::GameDir& game_dir = | ||
| 506 | *selected.data(GameListDir::GameDirRole).value<UISettings::GameDir*>(); | ||
| 507 | |||
| 508 | QAction* deep_scan = context_menu.addAction(tr("Scan Subfolders")); | ||
| 509 | QAction* delete_dir = context_menu.addAction(tr("Remove Game Directory")); | ||
| 510 | |||
| 511 | deep_scan->setCheckable(true); | ||
| 512 | deep_scan->setChecked(game_dir.deep_scan); | ||
| 513 | |||
| 514 | connect(deep_scan, &QAction::triggered, [this, &game_dir] { | ||
| 515 | game_dir.deep_scan = !game_dir.deep_scan; | ||
| 516 | PopulateAsync(UISettings::values.game_dirs); | ||
| 517 | }); | ||
| 518 | connect(delete_dir, &QAction::triggered, [this, &game_dir, selected] { | ||
| 519 | UISettings::values.game_dirs.removeOne(game_dir); | ||
| 520 | item_model->invisibleRootItem()->removeRow(selected.row()); | ||
| 521 | }); | ||
| 522 | } | ||
| 358 | 523 | ||
| 359 | context_menu.exec(tree_view->viewport()->mapToGlobal(menu_location)); | 524 | void GameList::AddPermDirPopup(QMenu& context_menu, QModelIndex selected) { |
| 525 | UISettings::GameDir& game_dir = | ||
| 526 | *selected.data(GameListDir::GameDirRole).value<UISettings::GameDir*>(); | ||
| 527 | |||
| 528 | QAction* move_up = context_menu.addAction(tr(u8"\U000025b2 Move Up")); | ||
| 529 | QAction* move_down = context_menu.addAction(tr(u8"\U000025bc Move Down ")); | ||
| 530 | QAction* open_directory_location = context_menu.addAction(tr("Open Directory Location")); | ||
| 531 | |||
| 532 | const int row = selected.row(); | ||
| 533 | |||
| 534 | move_up->setEnabled(row > 0); | ||
| 535 | move_down->setEnabled(row < item_model->rowCount() - 2); | ||
| 536 | |||
| 537 | connect(move_up, &QAction::triggered, [this, selected, row, &game_dir] { | ||
| 538 | // find the indices of the items in settings and swap them | ||
| 539 | std::swap(UISettings::values.game_dirs[UISettings::values.game_dirs.indexOf(game_dir)], | ||
| 540 | UISettings::values.game_dirs[UISettings::values.game_dirs.indexOf( | ||
| 541 | *selected.sibling(row - 1, 0) | ||
| 542 | .data(GameListDir::GameDirRole) | ||
| 543 | .value<UISettings::GameDir*>())]); | ||
| 544 | // move the treeview items | ||
| 545 | QList<QStandardItem*> item = item_model->takeRow(row); | ||
| 546 | item_model->invisibleRootItem()->insertRow(row - 1, item); | ||
| 547 | tree_view->setExpanded(selected, game_dir.expanded); | ||
| 548 | }); | ||
| 549 | |||
| 550 | connect(move_down, &QAction::triggered, [this, selected, row, &game_dir] { | ||
| 551 | // find the indices of the items in settings and swap them | ||
| 552 | std::swap(UISettings::values.game_dirs[UISettings::values.game_dirs.indexOf(game_dir)], | ||
| 553 | UISettings::values.game_dirs[UISettings::values.game_dirs.indexOf( | ||
| 554 | *selected.sibling(row + 1, 0) | ||
| 555 | .data(GameListDir::GameDirRole) | ||
| 556 | .value<UISettings::GameDir*>())]); | ||
| 557 | // move the treeview items | ||
| 558 | const QList<QStandardItem*> item = item_model->takeRow(row); | ||
| 559 | item_model->invisibleRootItem()->insertRow(row + 1, item); | ||
| 560 | tree_view->setExpanded(selected, game_dir.expanded); | ||
| 561 | }); | ||
| 562 | |||
| 563 | connect(open_directory_location, &QAction::triggered, | ||
| 564 | [this, game_dir] { emit OpenDirectory(game_dir.path); }); | ||
| 360 | } | 565 | } |
| 361 | 566 | ||
| 362 | void GameList::LoadCompatibilityList() { | 567 | void GameList::LoadCompatibilityList() { |
| @@ -403,14 +608,7 @@ void GameList::LoadCompatibilityList() { | |||
| 403 | } | 608 | } |
| 404 | } | 609 | } |
| 405 | 610 | ||
| 406 | void GameList::PopulateAsync(const QString& dir_path, bool deep_scan) { | 611 | void GameList::PopulateAsync(QVector<UISettings::GameDir>& game_dirs) { |
| 407 | const QFileInfo dir_info{dir_path}; | ||
| 408 | if (!dir_info.exists() || !dir_info.isDir()) { | ||
| 409 | LOG_ERROR(Frontend, "Could not find game list folder at {}", dir_path.toStdString()); | ||
| 410 | search_field->setFilterResult(0, 0); | ||
| 411 | return; | ||
| 412 | } | ||
| 413 | |||
| 414 | tree_view->setEnabled(false); | 612 | tree_view->setEnabled(false); |
| 415 | 613 | ||
| 416 | // Update the columns in case UISettings has changed | 614 | // Update the columns in case UISettings has changed |
| @@ -433,17 +631,19 @@ void GameList::PopulateAsync(const QString& dir_path, bool deep_scan) { | |||
| 433 | 631 | ||
| 434 | // Delete any rows that might already exist if we're repopulating | 632 | // Delete any rows that might already exist if we're repopulating |
| 435 | item_model->removeRows(0, item_model->rowCount()); | 633 | item_model->removeRows(0, item_model->rowCount()); |
| 634 | search_field->clear(); | ||
| 436 | 635 | ||
| 437 | emit ShouldCancelWorker(); | 636 | emit ShouldCancelWorker(); |
| 438 | 637 | ||
| 439 | GameListWorker* worker = | 638 | GameListWorker* worker = new GameListWorker(vfs, provider, game_dirs, compatibility_list); |
| 440 | new GameListWorker(vfs, provider, dir_path, deep_scan, compatibility_list); | ||
| 441 | 639 | ||
| 442 | connect(worker, &GameListWorker::EntryReady, this, &GameList::AddEntry, Qt::QueuedConnection); | 640 | connect(worker, &GameListWorker::EntryReady, this, &GameList::AddEntry, Qt::QueuedConnection); |
| 641 | connect(worker, &GameListWorker::DirEntryReady, this, &GameList::AddDirEntry, | ||
| 642 | Qt::QueuedConnection); | ||
| 443 | connect(worker, &GameListWorker::Finished, this, &GameList::DonePopulating, | 643 | connect(worker, &GameListWorker::Finished, this, &GameList::DonePopulating, |
| 444 | Qt::QueuedConnection); | 644 | Qt::QueuedConnection); |
| 445 | // Use DirectConnection here because worker->Cancel() is thread-safe and we want it to cancel | 645 | // Use DirectConnection here because worker->Cancel() is thread-safe and we want it to |
| 446 | // without delay. | 646 | // cancel without delay. |
| 447 | connect(this, &GameList::ShouldCancelWorker, worker, &GameListWorker::Cancel, | 647 | connect(this, &GameList::ShouldCancelWorker, worker, &GameListWorker::Cancel, |
| 448 | Qt::DirectConnection); | 648 | Qt::DirectConnection); |
| 449 | 649 | ||
| @@ -471,10 +671,40 @@ const QStringList GameList::supported_file_extensions = { | |||
| 471 | QStringLiteral("xci"), QStringLiteral("nsp"), QStringLiteral("kip")}; | 671 | QStringLiteral("xci"), QStringLiteral("nsp"), QStringLiteral("kip")}; |
| 472 | 672 | ||
| 473 | void GameList::RefreshGameDirectory() { | 673 | void GameList::RefreshGameDirectory() { |
| 474 | if (!UISettings::values.game_directory_path.isEmpty() && current_worker != nullptr) { | 674 | if (!UISettings::values.game_dirs.isEmpty() && current_worker != nullptr) { |
| 475 | LOG_INFO(Frontend, "Change detected in the games directory. Reloading game list."); | 675 | LOG_INFO(Frontend, "Change detected in the games directory. Reloading game list."); |
| 476 | search_field->clear(); | 676 | PopulateAsync(UISettings::values.game_dirs); |
| 477 | PopulateAsync(UISettings::values.game_directory_path, | ||
| 478 | UISettings::values.game_directory_deepscan); | ||
| 479 | } | 677 | } |
| 480 | } | 678 | } |
| 679 | |||
| 680 | GameListPlaceholder::GameListPlaceholder(GMainWindow* parent) : QWidget{parent} { | ||
| 681 | connect(parent, &GMainWindow::UpdateThemedIcons, this, | ||
| 682 | &GameListPlaceholder::onUpdateThemedIcons); | ||
| 683 | |||
| 684 | layout = new QVBoxLayout; | ||
| 685 | image = new QLabel; | ||
| 686 | text = new QLabel; | ||
| 687 | layout->setAlignment(Qt::AlignCenter); | ||
| 688 | image->setPixmap(QIcon::fromTheme(QStringLiteral("plus_folder")).pixmap(200)); | ||
| 689 | |||
| 690 | text->setText(tr("Double-click to add a new folder to the game list")); | ||
| 691 | QFont font = text->font(); | ||
| 692 | font.setPointSize(20); | ||
| 693 | text->setFont(font); | ||
| 694 | text->setAlignment(Qt::AlignHCenter); | ||
| 695 | image->setAlignment(Qt::AlignHCenter); | ||
| 696 | |||
| 697 | layout->addWidget(image); | ||
| 698 | layout->addWidget(text); | ||
| 699 | setLayout(layout); | ||
| 700 | } | ||
| 701 | |||
| 702 | GameListPlaceholder::~GameListPlaceholder() = default; | ||
| 703 | |||
| 704 | void GameListPlaceholder::onUpdateThemedIcons() { | ||
| 705 | image->setPixmap(QIcon::fromTheme(QStringLiteral("plus_folder")).pixmap(200)); | ||
| 706 | } | ||
| 707 | |||
| 708 | void GameListPlaceholder::mouseDoubleClickEvent(QMouseEvent* event) { | ||
| 709 | emit GameListPlaceholder::AddDirectory(); | ||
| 710 | } | ||
diff --git a/src/yuzu/game_list.h b/src/yuzu/game_list.h index f8f8bd6c5..878d94413 100644 --- a/src/yuzu/game_list.h +++ b/src/yuzu/game_list.h | |||
| @@ -8,6 +8,7 @@ | |||
| 8 | #include <QHBoxLayout> | 8 | #include <QHBoxLayout> |
| 9 | #include <QLabel> | 9 | #include <QLabel> |
| 10 | #include <QLineEdit> | 10 | #include <QLineEdit> |
| 11 | #include <QList> | ||
| 11 | #include <QModelIndex> | 12 | #include <QModelIndex> |
| 12 | #include <QSettings> | 13 | #include <QSettings> |
| 13 | #include <QStandardItem> | 14 | #include <QStandardItem> |
| @@ -16,13 +17,16 @@ | |||
| 16 | #include <QToolButton> | 17 | #include <QToolButton> |
| 17 | #include <QTreeView> | 18 | #include <QTreeView> |
| 18 | #include <QVBoxLayout> | 19 | #include <QVBoxLayout> |
| 20 | #include <QVector> | ||
| 19 | #include <QWidget> | 21 | #include <QWidget> |
| 20 | 22 | ||
| 21 | #include "common/common_types.h" | 23 | #include "common/common_types.h" |
| 24 | #include "uisettings.h" | ||
| 22 | #include "yuzu/compatibility_list.h" | 25 | #include "yuzu/compatibility_list.h" |
| 23 | 26 | ||
| 24 | class GameListWorker; | 27 | class GameListWorker; |
| 25 | class GameListSearchField; | 28 | class GameListSearchField; |
| 29 | class GameListDir; | ||
| 26 | class GMainWindow; | 30 | class GMainWindow; |
| 27 | 31 | ||
| 28 | namespace FileSys { | 32 | namespace FileSys { |
| @@ -52,12 +56,14 @@ public: | |||
| 52 | FileSys::ManualContentProvider* provider, GMainWindow* parent = nullptr); | 56 | FileSys::ManualContentProvider* provider, GMainWindow* parent = nullptr); |
| 53 | ~GameList() override; | 57 | ~GameList() override; |
| 54 | 58 | ||
| 59 | QString getLastFilterResultItem() const; | ||
| 55 | void clearFilter(); | 60 | void clearFilter(); |
| 56 | void setFilterFocus(); | 61 | void setFilterFocus(); |
| 57 | void setFilterVisible(bool visibility); | 62 | void setFilterVisible(bool visibility); |
| 63 | bool isEmpty() const; | ||
| 58 | 64 | ||
| 59 | void LoadCompatibilityList(); | 65 | void LoadCompatibilityList(); |
| 60 | void PopulateAsync(const QString& dir_path, bool deep_scan); | 66 | void PopulateAsync(QVector<UISettings::GameDir>& game_dirs); |
| 61 | 67 | ||
| 62 | void SaveInterfaceLayout(); | 68 | void SaveInterfaceLayout(); |
| 63 | void LoadInterfaceLayout(); | 69 | void LoadInterfaceLayout(); |
| @@ -74,19 +80,29 @@ signals: | |||
| 74 | void NavigateToGamedbEntryRequested(u64 program_id, | 80 | void NavigateToGamedbEntryRequested(u64 program_id, |
| 75 | const CompatibilityList& compatibility_list); | 81 | const CompatibilityList& compatibility_list); |
| 76 | void OpenPerGameGeneralRequested(const std::string& file); | 82 | void OpenPerGameGeneralRequested(const std::string& file); |
| 83 | void OpenDirectory(const QString& directory); | ||
| 84 | void AddDirectory(); | ||
| 85 | void ShowList(bool show); | ||
| 77 | 86 | ||
| 78 | private slots: | 87 | private slots: |
| 88 | void onItemExpanded(const QModelIndex& item); | ||
| 79 | void onTextChanged(const QString& new_text); | 89 | void onTextChanged(const QString& new_text); |
| 80 | void onFilterCloseClicked(); | 90 | void onFilterCloseClicked(); |
| 91 | void onUpdateThemedIcons(); | ||
| 81 | 92 | ||
| 82 | private: | 93 | private: |
| 83 | void AddEntry(const QList<QStandardItem*>& entry_items); | 94 | void AddDirEntry(GameListDir* entry_items); |
| 95 | void AddEntry(const QList<QStandardItem*>& entry_items, GameListDir* parent); | ||
| 84 | void ValidateEntry(const QModelIndex& item); | 96 | void ValidateEntry(const QModelIndex& item); |
| 85 | void DonePopulating(QStringList watch_list); | 97 | void DonePopulating(QStringList watch_list); |
| 86 | 98 | ||
| 87 | void PopupContextMenu(const QPoint& menu_location); | ||
| 88 | void RefreshGameDirectory(); | 99 | void RefreshGameDirectory(); |
| 89 | 100 | ||
| 101 | void PopupContextMenu(const QPoint& menu_location); | ||
| 102 | void AddGamePopup(QMenu& context_menu, u64 program_id, std::string path); | ||
| 103 | void AddCustomDirPopup(QMenu& context_menu, QModelIndex selected); | ||
| 104 | void AddPermDirPopup(QMenu& context_menu, QModelIndex selected); | ||
| 105 | |||
| 90 | std::shared_ptr<FileSys::VfsFilesystem> vfs; | 106 | std::shared_ptr<FileSys::VfsFilesystem> vfs; |
| 91 | FileSys::ManualContentProvider* provider; | 107 | FileSys::ManualContentProvider* provider; |
| 92 | GameListSearchField* search_field; | 108 | GameListSearchField* search_field; |
| @@ -102,3 +118,24 @@ private: | |||
| 102 | }; | 118 | }; |
| 103 | 119 | ||
| 104 | Q_DECLARE_METATYPE(GameListOpenTarget); | 120 | Q_DECLARE_METATYPE(GameListOpenTarget); |
| 121 | |||
| 122 | class GameListPlaceholder : public QWidget { | ||
| 123 | Q_OBJECT | ||
| 124 | public: | ||
| 125 | explicit GameListPlaceholder(GMainWindow* parent = nullptr); | ||
| 126 | ~GameListPlaceholder(); | ||
| 127 | |||
| 128 | signals: | ||
| 129 | void AddDirectory(); | ||
| 130 | |||
| 131 | private slots: | ||
| 132 | void onUpdateThemedIcons(); | ||
| 133 | |||
| 134 | protected: | ||
| 135 | void mouseDoubleClickEvent(QMouseEvent* event) override; | ||
| 136 | |||
| 137 | private: | ||
| 138 | QVBoxLayout* layout = nullptr; | ||
| 139 | QLabel* image = nullptr; | ||
| 140 | QLabel* text = nullptr; | ||
| 141 | }; | ||
diff --git a/src/yuzu/game_list_p.h b/src/yuzu/game_list_p.h index 0b458ef48..a8d888fee 100644 --- a/src/yuzu/game_list_p.h +++ b/src/yuzu/game_list_p.h | |||
| @@ -10,6 +10,7 @@ | |||
| 10 | #include <utility> | 10 | #include <utility> |
| 11 | 11 | ||
| 12 | #include <QCoreApplication> | 12 | #include <QCoreApplication> |
| 13 | #include <QFileInfo> | ||
| 13 | #include <QImage> | 14 | #include <QImage> |
| 14 | #include <QObject> | 15 | #include <QObject> |
| 15 | #include <QStandardItem> | 16 | #include <QStandardItem> |
| @@ -19,9 +20,20 @@ | |||
| 19 | #include "common/common_types.h" | 20 | #include "common/common_types.h" |
| 20 | #include "common/logging/log.h" | 21 | #include "common/logging/log.h" |
| 21 | #include "common/string_util.h" | 22 | #include "common/string_util.h" |
| 22 | #include "yuzu/ui_settings.h" | 23 | #include "yuzu/uisettings.h" |
| 23 | #include "yuzu/util/util.h" | 24 | #include "yuzu/util/util.h" |
| 24 | 25 | ||
| 26 | enum class GameListItemType { | ||
| 27 | Game = QStandardItem::UserType + 1, | ||
| 28 | CustomDir = QStandardItem::UserType + 2, | ||
| 29 | SdmcDir = QStandardItem::UserType + 3, | ||
| 30 | UserNandDir = QStandardItem::UserType + 4, | ||
| 31 | SysNandDir = QStandardItem::UserType + 5, | ||
| 32 | AddDir = QStandardItem::UserType + 6 | ||
| 33 | }; | ||
| 34 | |||
| 35 | Q_DECLARE_METATYPE(GameListItemType); | ||
| 36 | |||
| 25 | /** | 37 | /** |
| 26 | * Gets the default icon (for games without valid title metadata) | 38 | * Gets the default icon (for games without valid title metadata) |
| 27 | * @param size The desired width and height of the default icon. | 39 | * @param size The desired width and height of the default icon. |
| @@ -36,8 +48,13 @@ static QPixmap GetDefaultIcon(u32 size) { | |||
| 36 | class GameListItem : public QStandardItem { | 48 | class GameListItem : public QStandardItem { |
| 37 | 49 | ||
| 38 | public: | 50 | public: |
| 51 | // used to access type from item index | ||
| 52 | static const int TypeRole = Qt::UserRole + 1; | ||
| 53 | static const int SortRole = Qt::UserRole + 2; | ||
| 39 | GameListItem() = default; | 54 | GameListItem() = default; |
| 40 | explicit GameListItem(const QString& string) : QStandardItem(string) {} | 55 | GameListItem(const QString& string) : QStandardItem(string) { |
| 56 | setData(string, SortRole); | ||
| 57 | } | ||
| 41 | }; | 58 | }; |
| 42 | 59 | ||
| 43 | /** | 60 | /** |
| @@ -48,14 +65,15 @@ public: | |||
| 48 | */ | 65 | */ |
| 49 | class GameListItemPath : public GameListItem { | 66 | class GameListItemPath : public GameListItem { |
| 50 | public: | 67 | public: |
| 51 | static const int FullPathRole = Qt::UserRole + 1; | 68 | static const int TitleRole = SortRole; |
| 52 | static const int TitleRole = Qt::UserRole + 2; | 69 | static const int FullPathRole = SortRole + 1; |
| 53 | static const int ProgramIdRole = Qt::UserRole + 3; | 70 | static const int ProgramIdRole = SortRole + 2; |
| 54 | static const int FileTypeRole = Qt::UserRole + 4; | 71 | static const int FileTypeRole = SortRole + 3; |
| 55 | 72 | ||
| 56 | GameListItemPath() = default; | 73 | GameListItemPath() = default; |
| 57 | GameListItemPath(const QString& game_path, const std::vector<u8>& picture_data, | 74 | GameListItemPath(const QString& game_path, const std::vector<u8>& picture_data, |
| 58 | const QString& game_name, const QString& game_type, u64 program_id) { | 75 | const QString& game_name, const QString& game_type, u64 program_id) { |
| 76 | setData(type(), TypeRole); | ||
| 59 | setData(game_path, FullPathRole); | 77 | setData(game_path, FullPathRole); |
| 60 | setData(game_name, TitleRole); | 78 | setData(game_name, TitleRole); |
| 61 | setData(qulonglong(program_id), ProgramIdRole); | 79 | setData(qulonglong(program_id), ProgramIdRole); |
| @@ -72,6 +90,10 @@ public: | |||
| 72 | setData(picture, Qt::DecorationRole); | 90 | setData(picture, Qt::DecorationRole); |
| 73 | } | 91 | } |
| 74 | 92 | ||
| 93 | int type() const override { | ||
| 94 | return static_cast<int>(GameListItemType::Game); | ||
| 95 | } | ||
| 96 | |||
| 75 | QVariant data(int role) const override { | 97 | QVariant data(int role) const override { |
| 76 | if (role == Qt::DisplayRole) { | 98 | if (role == Qt::DisplayRole) { |
| 77 | std::string filename; | 99 | std::string filename; |
| @@ -103,9 +125,11 @@ public: | |||
| 103 | class GameListItemCompat : public GameListItem { | 125 | class GameListItemCompat : public GameListItem { |
| 104 | Q_DECLARE_TR_FUNCTIONS(GameListItemCompat) | 126 | Q_DECLARE_TR_FUNCTIONS(GameListItemCompat) |
| 105 | public: | 127 | public: |
| 106 | static const int CompatNumberRole = Qt::UserRole + 1; | 128 | static const int CompatNumberRole = SortRole; |
| 107 | GameListItemCompat() = default; | 129 | GameListItemCompat() = default; |
| 108 | explicit GameListItemCompat(const QString& compatibility) { | 130 | explicit GameListItemCompat(const QString& compatibility) { |
| 131 | setData(type(), TypeRole); | ||
| 132 | |||
| 109 | struct CompatStatus { | 133 | struct CompatStatus { |
| 110 | QString color; | 134 | QString color; |
| 111 | const char* text; | 135 | const char* text; |
| @@ -135,6 +159,10 @@ public: | |||
| 135 | setData(CreateCirclePixmapFromColor(status.color), Qt::DecorationRole); | 159 | setData(CreateCirclePixmapFromColor(status.color), Qt::DecorationRole); |
| 136 | } | 160 | } |
| 137 | 161 | ||
| 162 | int type() const override { | ||
| 163 | return static_cast<int>(GameListItemType::Game); | ||
| 164 | } | ||
| 165 | |||
| 138 | bool operator<(const QStandardItem& other) const override { | 166 | bool operator<(const QStandardItem& other) const override { |
| 139 | return data(CompatNumberRole) < other.data(CompatNumberRole); | 167 | return data(CompatNumberRole) < other.data(CompatNumberRole); |
| 140 | } | 168 | } |
| @@ -146,12 +174,12 @@ public: | |||
| 146 | * human-readable string representation will be displayed to the user. | 174 | * human-readable string representation will be displayed to the user. |
| 147 | */ | 175 | */ |
| 148 | class GameListItemSize : public GameListItem { | 176 | class GameListItemSize : public GameListItem { |
| 149 | |||
| 150 | public: | 177 | public: |
| 151 | static const int SizeRole = Qt::UserRole + 1; | 178 | static const int SizeRole = SortRole; |
| 152 | 179 | ||
| 153 | GameListItemSize() = default; | 180 | GameListItemSize() = default; |
| 154 | explicit GameListItemSize(const qulonglong size_bytes) { | 181 | explicit GameListItemSize(const qulonglong size_bytes) { |
| 182 | setData(type(), TypeRole); | ||
| 155 | setData(size_bytes, SizeRole); | 183 | setData(size_bytes, SizeRole); |
| 156 | } | 184 | } |
| 157 | 185 | ||
| @@ -167,6 +195,10 @@ public: | |||
| 167 | } | 195 | } |
| 168 | } | 196 | } |
| 169 | 197 | ||
| 198 | int type() const override { | ||
| 199 | return static_cast<int>(GameListItemType::Game); | ||
| 200 | } | ||
| 201 | |||
| 170 | /** | 202 | /** |
| 171 | * This operator is, in practice, only used by the TreeView sorting systems. | 203 | * This operator is, in practice, only used by the TreeView sorting systems. |
| 172 | * Override it so that it will correctly sort by numerical value instead of by string | 204 | * Override it so that it will correctly sort by numerical value instead of by string |
| @@ -177,6 +209,82 @@ public: | |||
| 177 | } | 209 | } |
| 178 | }; | 210 | }; |
| 179 | 211 | ||
| 212 | class GameListDir : public GameListItem { | ||
| 213 | public: | ||
| 214 | static const int GameDirRole = Qt::UserRole + 2; | ||
| 215 | |||
| 216 | explicit GameListDir(UISettings::GameDir& directory, | ||
| 217 | GameListItemType dir_type = GameListItemType::CustomDir) | ||
| 218 | : dir_type{dir_type} { | ||
| 219 | setData(type(), TypeRole); | ||
| 220 | |||
| 221 | UISettings::GameDir* game_dir = &directory; | ||
| 222 | setData(QVariant::fromValue(game_dir), GameDirRole); | ||
| 223 | |||
| 224 | const int icon_size = std::min(static_cast<int>(UISettings::values.icon_size), 64); | ||
| 225 | switch (dir_type) { | ||
| 226 | case GameListItemType::SdmcDir: | ||
| 227 | setData( | ||
| 228 | QIcon::fromTheme(QStringLiteral("sd_card")) | ||
| 229 | .pixmap(icon_size) | ||
| 230 | .scaled(icon_size, icon_size, Qt::IgnoreAspectRatio, Qt::SmoothTransformation), | ||
| 231 | Qt::DecorationRole); | ||
| 232 | setData(QObject::tr("Installed SD Titles"), Qt::DisplayRole); | ||
| 233 | break; | ||
| 234 | case GameListItemType::UserNandDir: | ||
| 235 | setData( | ||
| 236 | QIcon::fromTheme(QStringLiteral("chip")) | ||
| 237 | .pixmap(icon_size) | ||
| 238 | .scaled(icon_size, icon_size, Qt::IgnoreAspectRatio, Qt::SmoothTransformation), | ||
| 239 | Qt::DecorationRole); | ||
| 240 | setData(QObject::tr("Installed NAND Titles"), Qt::DisplayRole); | ||
| 241 | break; | ||
| 242 | case GameListItemType::SysNandDir: | ||
| 243 | setData( | ||
| 244 | QIcon::fromTheme(QStringLiteral("chip")) | ||
| 245 | .pixmap(icon_size) | ||
| 246 | .scaled(icon_size, icon_size, Qt::IgnoreAspectRatio, Qt::SmoothTransformation), | ||
| 247 | Qt::DecorationRole); | ||
| 248 | setData(QObject::tr("System Titles"), Qt::DisplayRole); | ||
| 249 | break; | ||
| 250 | case GameListItemType::CustomDir: | ||
| 251 | const QString icon_name = QFileInfo::exists(game_dir->path) | ||
| 252 | ? QStringLiteral("folder") | ||
| 253 | : QStringLiteral("bad_folder"); | ||
| 254 | setData(QIcon::fromTheme(icon_name).pixmap(icon_size).scaled( | ||
| 255 | icon_size, icon_size, Qt::IgnoreAspectRatio, Qt::SmoothTransformation), | ||
| 256 | Qt::DecorationRole); | ||
| 257 | setData(game_dir->path, Qt::DisplayRole); | ||
| 258 | break; | ||
| 259 | }; | ||
| 260 | }; | ||
| 261 | |||
| 262 | int type() const override { | ||
| 263 | return static_cast<int>(dir_type); | ||
| 264 | } | ||
| 265 | |||
| 266 | private: | ||
| 267 | GameListItemType dir_type; | ||
| 268 | }; | ||
| 269 | |||
| 270 | class GameListAddDir : public GameListItem { | ||
| 271 | public: | ||
| 272 | explicit GameListAddDir() { | ||
| 273 | setData(type(), TypeRole); | ||
| 274 | |||
| 275 | const int icon_size = std::min(static_cast<int>(UISettings::values.icon_size), 64); | ||
| 276 | setData(QIcon::fromTheme(QStringLiteral("plus")) | ||
| 277 | .pixmap(icon_size) | ||
| 278 | .scaled(icon_size, icon_size, Qt::IgnoreAspectRatio, Qt::SmoothTransformation), | ||
| 279 | Qt::DecorationRole); | ||
| 280 | setData(QObject::tr("Add New Game Directory"), Qt::DisplayRole); | ||
| 281 | } | ||
| 282 | |||
| 283 | int type() const override { | ||
| 284 | return static_cast<int>(GameListItemType::AddDir); | ||
| 285 | } | ||
| 286 | }; | ||
| 287 | |||
| 180 | class GameList; | 288 | class GameList; |
| 181 | class QHBoxLayout; | 289 | class QHBoxLayout; |
| 182 | class QTreeView; | 290 | class QTreeView; |
| @@ -208,6 +316,9 @@ private: | |||
| 208 | // EventFilter in order to process systemkeys while editing the searchfield | 316 | // EventFilter in order to process systemkeys while editing the searchfield |
| 209 | bool eventFilter(QObject* obj, QEvent* event) override; | 317 | bool eventFilter(QObject* obj, QEvent* event) override; |
| 210 | }; | 318 | }; |
| 319 | int visible; | ||
| 320 | int total; | ||
| 321 | |||
| 211 | QHBoxLayout* layout_filter = nullptr; | 322 | QHBoxLayout* layout_filter = nullptr; |
| 212 | QTreeView* tree_view = nullptr; | 323 | QTreeView* tree_view = nullptr; |
| 213 | QLabel* label_filter = nullptr; | 324 | QLabel* label_filter = nullptr; |
diff --git a/src/yuzu/game_list_worker.cpp b/src/yuzu/game_list_worker.cpp index 4f30e9147..fd21a9761 100644 --- a/src/yuzu/game_list_worker.cpp +++ b/src/yuzu/game_list_worker.cpp | |||
| @@ -29,7 +29,7 @@ | |||
| 29 | #include "yuzu/game_list.h" | 29 | #include "yuzu/game_list.h" |
| 30 | #include "yuzu/game_list_p.h" | 30 | #include "yuzu/game_list_p.h" |
| 31 | #include "yuzu/game_list_worker.h" | 31 | #include "yuzu/game_list_worker.h" |
| 32 | #include "yuzu/ui_settings.h" | 32 | #include "yuzu/uisettings.h" |
| 33 | 33 | ||
| 34 | namespace { | 34 | namespace { |
| 35 | 35 | ||
| @@ -223,21 +223,37 @@ QList<QStandardItem*> MakeGameListEntry(const std::string& path, const std::stri | |||
| 223 | } // Anonymous namespace | 223 | } // Anonymous namespace |
| 224 | 224 | ||
| 225 | GameListWorker::GameListWorker(FileSys::VirtualFilesystem vfs, | 225 | GameListWorker::GameListWorker(FileSys::VirtualFilesystem vfs, |
| 226 | FileSys::ManualContentProvider* provider, QString dir_path, | 226 | FileSys::ManualContentProvider* provider, |
| 227 | bool deep_scan, const CompatibilityList& compatibility_list) | 227 | QVector<UISettings::GameDir>& game_dirs, |
| 228 | : vfs(std::move(vfs)), provider(provider), dir_path(std::move(dir_path)), deep_scan(deep_scan), | 228 | const CompatibilityList& compatibility_list) |
| 229 | : vfs(std::move(vfs)), provider(provider), game_dirs(game_dirs), | ||
| 229 | compatibility_list(compatibility_list) {} | 230 | compatibility_list(compatibility_list) {} |
| 230 | 231 | ||
| 231 | GameListWorker::~GameListWorker() = default; | 232 | GameListWorker::~GameListWorker() = default; |
| 232 | 233 | ||
| 233 | void GameListWorker::AddTitlesToGameList() { | 234 | void GameListWorker::AddTitlesToGameList(GameListDir* parent_dir) { |
| 234 | const auto& cache = dynamic_cast<FileSys::ContentProviderUnion&>( | 235 | using namespace FileSys; |
| 235 | Core::System::GetInstance().GetContentProvider()); | 236 | |
| 236 | const auto installed_games = cache.ListEntriesFilterOrigin( | 237 | const auto& cache = |
| 237 | std::nullopt, FileSys::TitleType::Application, FileSys::ContentRecordType::Program); | 238 | dynamic_cast<ContentProviderUnion&>(Core::System::GetInstance().GetContentProvider()); |
| 239 | |||
| 240 | std::vector<std::pair<ContentProviderUnionSlot, ContentProviderEntry>> installed_games; | ||
| 241 | installed_games = cache.ListEntriesFilterOrigin(std::nullopt, TitleType::Application, | ||
| 242 | ContentRecordType::Program); | ||
| 243 | |||
| 244 | if (parent_dir->type() == static_cast<int>(GameListItemType::SdmcDir)) { | ||
| 245 | installed_games = cache.ListEntriesFilterOrigin( | ||
| 246 | ContentProviderUnionSlot::SDMC, TitleType::Application, ContentRecordType::Program); | ||
| 247 | } else if (parent_dir->type() == static_cast<int>(GameListItemType::UserNandDir)) { | ||
| 248 | installed_games = cache.ListEntriesFilterOrigin( | ||
| 249 | ContentProviderUnionSlot::UserNAND, TitleType::Application, ContentRecordType::Program); | ||
| 250 | } else if (parent_dir->type() == static_cast<int>(GameListItemType::SysNandDir)) { | ||
| 251 | installed_games = cache.ListEntriesFilterOrigin( | ||
| 252 | ContentProviderUnionSlot::SysNAND, TitleType::Application, ContentRecordType::Program); | ||
| 253 | } | ||
| 238 | 254 | ||
| 239 | for (const auto& [slot, game] : installed_games) { | 255 | for (const auto& [slot, game] : installed_games) { |
| 240 | if (slot == FileSys::ContentProviderUnionSlot::FrontendManual) | 256 | if (slot == ContentProviderUnionSlot::FrontendManual) |
| 241 | continue; | 257 | continue; |
| 242 | 258 | ||
| 243 | const auto file = cache.GetEntryUnparsed(game.title_id, game.type); | 259 | const auto file = cache.GetEntryUnparsed(game.title_id, game.type); |
| @@ -250,21 +266,22 @@ void GameListWorker::AddTitlesToGameList() { | |||
| 250 | u64 program_id = 0; | 266 | u64 program_id = 0; |
| 251 | loader->ReadProgramId(program_id); | 267 | loader->ReadProgramId(program_id); |
| 252 | 268 | ||
| 253 | const FileSys::PatchManager patch{program_id}; | 269 | const PatchManager patch{program_id}; |
| 254 | const auto control = cache.GetEntry(game.title_id, FileSys::ContentRecordType::Control); | 270 | const auto control = cache.GetEntry(game.title_id, ContentRecordType::Control); |
| 255 | if (control != nullptr) | 271 | if (control != nullptr) |
| 256 | GetMetadataFromControlNCA(patch, *control, icon, name); | 272 | GetMetadataFromControlNCA(patch, *control, icon, name); |
| 257 | 273 | ||
| 258 | emit EntryReady(MakeGameListEntry(file->GetFullPath(), name, icon, *loader, program_id, | 274 | emit EntryReady(MakeGameListEntry(file->GetFullPath(), name, icon, *loader, program_id, |
| 259 | compatibility_list, patch)); | 275 | compatibility_list, patch), |
| 276 | parent_dir); | ||
| 260 | } | 277 | } |
| 261 | } | 278 | } |
| 262 | 279 | ||
| 263 | void GameListWorker::ScanFileSystem(ScanTarget target, const std::string& dir_path, | 280 | void GameListWorker::ScanFileSystem(ScanTarget target, const std::string& dir_path, |
| 264 | unsigned int recursion) { | 281 | unsigned int recursion, GameListDir* parent_dir) { |
| 265 | const auto callback = [this, target, recursion](u64* num_entries_out, | 282 | const auto callback = [this, target, recursion, |
| 266 | const std::string& directory, | 283 | parent_dir](u64* num_entries_out, const std::string& directory, |
| 267 | const std::string& virtual_name) -> bool { | 284 | const std::string& virtual_name) -> bool { |
| 268 | if (stop_processing) { | 285 | if (stop_processing) { |
| 269 | // Breaks the callback loop. | 286 | // Breaks the callback loop. |
| 270 | return false; | 287 | return false; |
| @@ -317,11 +334,12 @@ void GameListWorker::ScanFileSystem(ScanTarget target, const std::string& dir_pa | |||
| 317 | const FileSys::PatchManager patch{program_id}; | 334 | const FileSys::PatchManager patch{program_id}; |
| 318 | 335 | ||
| 319 | emit EntryReady(MakeGameListEntry(physical_name, name, icon, *loader, program_id, | 336 | emit EntryReady(MakeGameListEntry(physical_name, name, icon, *loader, program_id, |
| 320 | compatibility_list, patch)); | 337 | compatibility_list, patch), |
| 338 | parent_dir); | ||
| 321 | } | 339 | } |
| 322 | } else if (is_dir && recursion > 0) { | 340 | } else if (is_dir && recursion > 0) { |
| 323 | watch_list.append(QString::fromStdString(physical_name)); | 341 | watch_list.append(QString::fromStdString(physical_name)); |
| 324 | ScanFileSystem(target, physical_name, recursion - 1); | 342 | ScanFileSystem(target, physical_name, recursion - 1, parent_dir); |
| 325 | } | 343 | } |
| 326 | 344 | ||
| 327 | return true; | 345 | return true; |
| @@ -332,12 +350,32 @@ void GameListWorker::ScanFileSystem(ScanTarget target, const std::string& dir_pa | |||
| 332 | 350 | ||
| 333 | void GameListWorker::run() { | 351 | void GameListWorker::run() { |
| 334 | stop_processing = false; | 352 | stop_processing = false; |
| 335 | watch_list.append(dir_path); | 353 | |
| 336 | provider->ClearAllEntries(); | 354 | for (UISettings::GameDir& game_dir : game_dirs) { |
| 337 | ScanFileSystem(ScanTarget::FillManualContentProvider, dir_path.toStdString(), | 355 | if (game_dir.path == QStringLiteral("SDMC")) { |
| 338 | deep_scan ? 256 : 0); | 356 | auto* const game_list_dir = new GameListDir(game_dir, GameListItemType::SdmcDir); |
| 339 | AddTitlesToGameList(); | 357 | emit DirEntryReady({game_list_dir}); |
| 340 | ScanFileSystem(ScanTarget::PopulateGameList, dir_path.toStdString(), deep_scan ? 256 : 0); | 358 | AddTitlesToGameList(game_list_dir); |
| 359 | } else if (game_dir.path == QStringLiteral("UserNAND")) { | ||
| 360 | auto* const game_list_dir = new GameListDir(game_dir, GameListItemType::UserNandDir); | ||
| 361 | emit DirEntryReady({game_list_dir}); | ||
| 362 | AddTitlesToGameList(game_list_dir); | ||
| 363 | } else if (game_dir.path == QStringLiteral("SysNAND")) { | ||
| 364 | auto* const game_list_dir = new GameListDir(game_dir, GameListItemType::SysNandDir); | ||
| 365 | emit DirEntryReady({game_list_dir}); | ||
| 366 | AddTitlesToGameList(game_list_dir); | ||
| 367 | } else { | ||
| 368 | watch_list.append(game_dir.path); | ||
| 369 | auto* const game_list_dir = new GameListDir(game_dir); | ||
| 370 | emit DirEntryReady({game_list_dir}); | ||
| 371 | provider->ClearAllEntries(); | ||
| 372 | ScanFileSystem(ScanTarget::FillManualContentProvider, game_dir.path.toStdString(), 2, | ||
| 373 | game_list_dir); | ||
| 374 | ScanFileSystem(ScanTarget::PopulateGameList, game_dir.path.toStdString(), | ||
| 375 | game_dir.deep_scan ? 256 : 0, game_list_dir); | ||
| 376 | } | ||
| 377 | }; | ||
| 378 | |||
| 341 | emit Finished(watch_list); | 379 | emit Finished(watch_list); |
| 342 | } | 380 | } |
| 343 | 381 | ||
diff --git a/src/yuzu/game_list_worker.h b/src/yuzu/game_list_worker.h index 7c3074af9..6e52fca89 100644 --- a/src/yuzu/game_list_worker.h +++ b/src/yuzu/game_list_worker.h | |||
| @@ -14,6 +14,7 @@ | |||
| 14 | #include <QObject> | 14 | #include <QObject> |
| 15 | #include <QRunnable> | 15 | #include <QRunnable> |
| 16 | #include <QString> | 16 | #include <QString> |
| 17 | #include <QVector> | ||
| 17 | 18 | ||
| 18 | #include "common/common_types.h" | 19 | #include "common/common_types.h" |
| 19 | #include "yuzu/compatibility_list.h" | 20 | #include "yuzu/compatibility_list.h" |
| @@ -33,9 +34,10 @@ class GameListWorker : public QObject, public QRunnable { | |||
| 33 | Q_OBJECT | 34 | Q_OBJECT |
| 34 | 35 | ||
| 35 | public: | 36 | public: |
| 36 | GameListWorker(std::shared_ptr<FileSys::VfsFilesystem> vfs, | 37 | explicit GameListWorker(std::shared_ptr<FileSys::VfsFilesystem> vfs, |
| 37 | FileSys::ManualContentProvider* provider, QString dir_path, bool deep_scan, | 38 | FileSys::ManualContentProvider* provider, |
| 38 | const CompatibilityList& compatibility_list); | 39 | QVector<UISettings::GameDir>& game_dirs, |
| 40 | const CompatibilityList& compatibility_list); | ||
| 39 | ~GameListWorker() override; | 41 | ~GameListWorker() override; |
| 40 | 42 | ||
| 41 | /// Starts the processing of directory tree information. | 43 | /// Starts the processing of directory tree information. |
| @@ -48,31 +50,33 @@ signals: | |||
| 48 | /** | 50 | /** |
| 49 | * The `EntryReady` signal is emitted once an entry has been prepared and is ready | 51 | * The `EntryReady` signal is emitted once an entry has been prepared and is ready |
| 50 | * to be added to the game list. | 52 | * to be added to the game list. |
| 51 | * @param entry_items a list with `QStandardItem`s that make up the columns of the new entry. | 53 | * @param entry_items a list with `QStandardItem`s that make up the columns of the new |
| 54 | * entry. | ||
| 52 | */ | 55 | */ |
| 53 | void EntryReady(QList<QStandardItem*> entry_items); | 56 | void DirEntryReady(GameListDir* entry_items); |
| 57 | void EntryReady(QList<QStandardItem*> entry_items, GameListDir* parent_dir); | ||
| 54 | 58 | ||
| 55 | /** | 59 | /** |
| 56 | * After the worker has traversed the game directory looking for entries, this signal is emitted | 60 | * After the worker has traversed the game directory looking for entries, this signal is |
| 57 | * with a list of folders that should be watched for changes as well. | 61 | * emitted with a list of folders that should be watched for changes as well. |
| 58 | */ | 62 | */ |
| 59 | void Finished(QStringList watch_list); | 63 | void Finished(QStringList watch_list); |
| 60 | 64 | ||
| 61 | private: | 65 | private: |
| 62 | void AddTitlesToGameList(); | 66 | void AddTitlesToGameList(GameListDir* parent_dir); |
| 63 | 67 | ||
| 64 | enum class ScanTarget { | 68 | enum class ScanTarget { |
| 65 | FillManualContentProvider, | 69 | FillManualContentProvider, |
| 66 | PopulateGameList, | 70 | PopulateGameList, |
| 67 | }; | 71 | }; |
| 68 | 72 | ||
| 69 | void ScanFileSystem(ScanTarget target, const std::string& dir_path, unsigned int recursion = 0); | 73 | void ScanFileSystem(ScanTarget target, const std::string& dir_path, unsigned int recursion, |
| 74 | GameListDir* parent_dir); | ||
| 70 | 75 | ||
| 71 | std::shared_ptr<FileSys::VfsFilesystem> vfs; | 76 | std::shared_ptr<FileSys::VfsFilesystem> vfs; |
| 72 | FileSys::ManualContentProvider* provider; | 77 | FileSys::ManualContentProvider* provider; |
| 73 | QStringList watch_list; | 78 | QStringList watch_list; |
| 74 | QString dir_path; | ||
| 75 | bool deep_scan; | ||
| 76 | const CompatibilityList& compatibility_list; | 79 | const CompatibilityList& compatibility_list; |
| 80 | QVector<UISettings::GameDir>& game_dirs; | ||
| 77 | std::atomic_bool stop_processing; | 81 | std::atomic_bool stop_processing; |
| 78 | }; | 82 | }; |
diff --git a/src/yuzu/hotkeys.cpp b/src/yuzu/hotkeys.cpp index 4582e7f21..d4e97fa16 100644 --- a/src/yuzu/hotkeys.cpp +++ b/src/yuzu/hotkeys.cpp | |||
| @@ -7,7 +7,7 @@ | |||
| 7 | #include <QTreeWidgetItem> | 7 | #include <QTreeWidgetItem> |
| 8 | #include <QtGlobal> | 8 | #include <QtGlobal> |
| 9 | #include "yuzu/hotkeys.h" | 9 | #include "yuzu/hotkeys.h" |
| 10 | #include "yuzu/ui_settings.h" | 10 | #include "yuzu/uisettings.h" |
| 11 | 11 | ||
| 12 | HotkeyRegistry::HotkeyRegistry() = default; | 12 | HotkeyRegistry::HotkeyRegistry() = default; |
| 13 | HotkeyRegistry::~HotkeyRegistry() = default; | 13 | HotkeyRegistry::~HotkeyRegistry() = default; |
diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp index 47e46f574..8304c6517 100644 --- a/src/yuzu/main.cpp +++ b/src/yuzu/main.cpp | |||
| @@ -6,6 +6,9 @@ | |||
| 6 | #include <clocale> | 6 | #include <clocale> |
| 7 | #include <memory> | 7 | #include <memory> |
| 8 | #include <thread> | 8 | #include <thread> |
| 9 | #ifdef __APPLE__ | ||
| 10 | #include <unistd.h> // for chdir | ||
| 11 | #endif | ||
| 9 | 12 | ||
| 10 | // VFS includes must be before glad as they will conflict with Windows file api, which uses defines. | 13 | // VFS includes must be before glad as they will conflict with Windows file api, which uses defines. |
| 11 | #include "applets/error.h" | 14 | #include "applets/error.h" |
| @@ -100,7 +103,7 @@ static FileSys::VirtualFile VfsDirectoryCreateFileWrapper(const FileSys::Virtual | |||
| 100 | #include "yuzu/hotkeys.h" | 103 | #include "yuzu/hotkeys.h" |
| 101 | #include "yuzu/loading_screen.h" | 104 | #include "yuzu/loading_screen.h" |
| 102 | #include "yuzu/main.h" | 105 | #include "yuzu/main.h" |
| 103 | #include "yuzu/ui_settings.h" | 106 | #include "yuzu/uisettings.h" |
| 104 | 107 | ||
| 105 | #ifdef USE_DISCORD_PRESENCE | 108 | #ifdef USE_DISCORD_PRESENCE |
| 106 | #include "yuzu/discord_impl.h" | 109 | #include "yuzu/discord_impl.h" |
| @@ -119,6 +122,7 @@ Q_IMPORT_PLUGIN(QWindowsIntegrationPlugin); | |||
| 119 | #endif | 122 | #endif |
| 120 | 123 | ||
| 121 | #ifdef _WIN32 | 124 | #ifdef _WIN32 |
| 125 | #include <windows.h> | ||
| 122 | extern "C" { | 126 | extern "C" { |
| 123 | // tells Nvidia and AMD drivers to use the dedicated GPU by default on laptops with switchable | 127 | // tells Nvidia and AMD drivers to use the dedicated GPU by default on laptops with switchable |
| 124 | // graphics | 128 | // graphics |
| @@ -215,8 +219,7 @@ GMainWindow::GMainWindow() | |||
| 215 | OnReinitializeKeys(ReinitializeKeyBehavior::NoWarning); | 219 | OnReinitializeKeys(ReinitializeKeyBehavior::NoWarning); |
| 216 | 220 | ||
| 217 | game_list->LoadCompatibilityList(); | 221 | game_list->LoadCompatibilityList(); |
| 218 | game_list->PopulateAsync(UISettings::values.game_directory_path, | 222 | game_list->PopulateAsync(UISettings::values.game_dirs); |
| 219 | UISettings::values.game_directory_deepscan); | ||
| 220 | 223 | ||
| 221 | // Show one-time "callout" messages to the user | 224 | // Show one-time "callout" messages to the user |
| 222 | ShowTelemetryCallout(); | 225 | ShowTelemetryCallout(); |
| @@ -426,6 +429,10 @@ void GMainWindow::InitializeWidgets() { | |||
| 426 | game_list = new GameList(vfs, provider.get(), this); | 429 | game_list = new GameList(vfs, provider.get(), this); |
| 427 | ui.horizontalLayout->addWidget(game_list); | 430 | ui.horizontalLayout->addWidget(game_list); |
| 428 | 431 | ||
| 432 | game_list_placeholder = new GameListPlaceholder(this); | ||
| 433 | ui.horizontalLayout->addWidget(game_list_placeholder); | ||
| 434 | game_list_placeholder->setVisible(false); | ||
| 435 | |||
| 429 | loading_screen = new LoadingScreen(this); | 436 | loading_screen = new LoadingScreen(this); |
| 430 | loading_screen->hide(); | 437 | loading_screen->hide(); |
| 431 | ui.horizontalLayout->addWidget(loading_screen); | 438 | ui.horizontalLayout->addWidget(loading_screen); |
| @@ -659,6 +666,7 @@ void GMainWindow::RestoreUIState() { | |||
| 659 | 666 | ||
| 660 | void GMainWindow::ConnectWidgetEvents() { | 667 | void GMainWindow::ConnectWidgetEvents() { |
| 661 | connect(game_list, &GameList::GameChosen, this, &GMainWindow::OnGameListLoadFile); | 668 | connect(game_list, &GameList::GameChosen, this, &GMainWindow::OnGameListLoadFile); |
| 669 | connect(game_list, &GameList::OpenDirectory, this, &GMainWindow::OnGameListOpenDirectory); | ||
| 662 | connect(game_list, &GameList::OpenFolderRequested, this, &GMainWindow::OnGameListOpenFolder); | 670 | connect(game_list, &GameList::OpenFolderRequested, this, &GMainWindow::OnGameListOpenFolder); |
| 663 | connect(game_list, &GameList::OpenTransferableShaderCacheRequested, this, | 671 | connect(game_list, &GameList::OpenTransferableShaderCacheRequested, this, |
| 664 | &GMainWindow::OnTransferableShaderCacheOpenFile); | 672 | &GMainWindow::OnTransferableShaderCacheOpenFile); |
| @@ -666,6 +674,11 @@ void GMainWindow::ConnectWidgetEvents() { | |||
| 666 | connect(game_list, &GameList::CopyTIDRequested, this, &GMainWindow::OnGameListCopyTID); | 674 | connect(game_list, &GameList::CopyTIDRequested, this, &GMainWindow::OnGameListCopyTID); |
| 667 | connect(game_list, &GameList::NavigateToGamedbEntryRequested, this, | 675 | connect(game_list, &GameList::NavigateToGamedbEntryRequested, this, |
| 668 | &GMainWindow::OnGameListNavigateToGamedbEntry); | 676 | &GMainWindow::OnGameListNavigateToGamedbEntry); |
| 677 | connect(game_list, &GameList::AddDirectory, this, &GMainWindow::OnGameListAddDirectory); | ||
| 678 | connect(game_list_placeholder, &GameListPlaceholder::AddDirectory, this, | ||
| 679 | &GMainWindow::OnGameListAddDirectory); | ||
| 680 | connect(game_list, &GameList::ShowList, this, &GMainWindow::OnGameListShowList); | ||
| 681 | |||
| 669 | connect(game_list, &GameList::OpenPerGameGeneralRequested, this, | 682 | connect(game_list, &GameList::OpenPerGameGeneralRequested, this, |
| 670 | &GMainWindow::OnGameListOpenPerGameProperties); | 683 | &GMainWindow::OnGameListOpenPerGameProperties); |
| 671 | 684 | ||
| @@ -683,8 +696,6 @@ void GMainWindow::ConnectMenuEvents() { | |||
| 683 | connect(ui.action_Load_Folder, &QAction::triggered, this, &GMainWindow::OnMenuLoadFolder); | 696 | connect(ui.action_Load_Folder, &QAction::triggered, this, &GMainWindow::OnMenuLoadFolder); |
| 684 | connect(ui.action_Install_File_NAND, &QAction::triggered, this, | 697 | connect(ui.action_Install_File_NAND, &QAction::triggered, this, |
| 685 | &GMainWindow::OnMenuInstallToNAND); | 698 | &GMainWindow::OnMenuInstallToNAND); |
| 686 | connect(ui.action_Select_Game_List_Root, &QAction::triggered, this, | ||
| 687 | &GMainWindow::OnMenuSelectGameListRoot); | ||
| 688 | connect(ui.action_Select_NAND_Directory, &QAction::triggered, this, | 699 | connect(ui.action_Select_NAND_Directory, &QAction::triggered, this, |
| 689 | [this] { OnMenuSelectEmulatedDirectory(EmulatedDirectoryTarget::NAND); }); | 700 | [this] { OnMenuSelectEmulatedDirectory(EmulatedDirectoryTarget::NAND); }); |
| 690 | connect(ui.action_Select_SDMC_Directory, &QAction::triggered, this, | 701 | connect(ui.action_Select_SDMC_Directory, &QAction::triggered, this, |
| @@ -747,9 +758,24 @@ void GMainWindow::OnDisplayTitleBars(bool show) { | |||
| 747 | } | 758 | } |
| 748 | } | 759 | } |
| 749 | 760 | ||
| 761 | void GMainWindow::PreventOSSleep() { | ||
| 762 | #ifdef _WIN32 | ||
| 763 | SetThreadExecutionState(ES_CONTINUOUS | ES_SYSTEM_REQUIRED | ES_DISPLAY_REQUIRED); | ||
| 764 | #endif | ||
| 765 | } | ||
| 766 | |||
| 767 | void GMainWindow::AllowOSSleep() { | ||
| 768 | #ifdef _WIN32 | ||
| 769 | SetThreadExecutionState(ES_CONTINUOUS); | ||
| 770 | #endif | ||
| 771 | } | ||
| 772 | |||
| 750 | QStringList GMainWindow::GetUnsupportedGLExtensions() { | 773 | QStringList GMainWindow::GetUnsupportedGLExtensions() { |
| 751 | QStringList unsupported_ext; | 774 | QStringList unsupported_ext; |
| 752 | 775 | ||
| 776 | if (!GLAD_GL_ARB_buffer_storage) { | ||
| 777 | unsupported_ext.append(QStringLiteral("ARB_buffer_storage")); | ||
| 778 | } | ||
| 753 | if (!GLAD_GL_ARB_direct_state_access) { | 779 | if (!GLAD_GL_ARB_direct_state_access) { |
| 754 | unsupported_ext.append(QStringLiteral("ARB_direct_state_access")); | 780 | unsupported_ext.append(QStringLiteral("ARB_direct_state_access")); |
| 755 | } | 781 | } |
| @@ -934,6 +960,7 @@ void GMainWindow::BootGame(const QString& filename) { | |||
| 934 | // Update the GUI | 960 | // Update the GUI |
| 935 | if (ui.action_Single_Window_Mode->isChecked()) { | 961 | if (ui.action_Single_Window_Mode->isChecked()) { |
| 936 | game_list->hide(); | 962 | game_list->hide(); |
| 963 | game_list_placeholder->hide(); | ||
| 937 | } | 964 | } |
| 938 | status_bar_update_timer.start(2000); | 965 | status_bar_update_timer.start(2000); |
| 939 | 966 | ||
| @@ -963,6 +990,8 @@ void GMainWindow::BootGame(const QString& filename) { | |||
| 963 | } | 990 | } |
| 964 | 991 | ||
| 965 | void GMainWindow::ShutdownGame() { | 992 | void GMainWindow::ShutdownGame() { |
| 993 | AllowOSSleep(); | ||
| 994 | |||
| 966 | discord_rpc->Pause(); | 995 | discord_rpc->Pause(); |
| 967 | emu_thread->RequestStop(); | 996 | emu_thread->RequestStop(); |
| 968 | 997 | ||
| @@ -989,7 +1018,10 @@ void GMainWindow::ShutdownGame() { | |||
| 989 | render_window->hide(); | 1018 | render_window->hide(); |
| 990 | loading_screen->hide(); | 1019 | loading_screen->hide(); |
| 991 | loading_screen->Clear(); | 1020 | loading_screen->Clear(); |
| 992 | game_list->show(); | 1021 | if (game_list->isEmpty()) |
| 1022 | game_list_placeholder->show(); | ||
| 1023 | else | ||
| 1024 | game_list->show(); | ||
| 993 | game_list->setFilterFocus(); | 1025 | game_list->setFilterFocus(); |
| 994 | 1026 | ||
| 995 | UpdateWindowTitle(); | 1027 | UpdateWindowTitle(); |
| @@ -1280,6 +1312,47 @@ void GMainWindow::OnGameListNavigateToGamedbEntry(u64 program_id, | |||
| 1280 | QDesktopServices::openUrl(QUrl(QStringLiteral("https://yuzu-emu.org/game/") + directory)); | 1312 | QDesktopServices::openUrl(QUrl(QStringLiteral("https://yuzu-emu.org/game/") + directory)); |
| 1281 | } | 1313 | } |
| 1282 | 1314 | ||
| 1315 | void GMainWindow::OnGameListOpenDirectory(const QString& directory) { | ||
| 1316 | QString path; | ||
| 1317 | if (directory == QStringLiteral("SDMC")) { | ||
| 1318 | path = QString::fromStdString(FileUtil::GetUserPath(FileUtil::UserPath::SDMCDir) + | ||
| 1319 | "Nintendo/Contents/registered"); | ||
| 1320 | } else if (directory == QStringLiteral("UserNAND")) { | ||
| 1321 | path = QString::fromStdString(FileUtil::GetUserPath(FileUtil::UserPath::NANDDir) + | ||
| 1322 | "user/Contents/registered"); | ||
| 1323 | } else if (directory == QStringLiteral("SysNAND")) { | ||
| 1324 | path = QString::fromStdString(FileUtil::GetUserPath(FileUtil::UserPath::NANDDir) + | ||
| 1325 | "system/Contents/registered"); | ||
| 1326 | } else { | ||
| 1327 | path = directory; | ||
| 1328 | } | ||
| 1329 | if (!QFileInfo::exists(path)) { | ||
| 1330 | QMessageBox::critical(this, tr("Error Opening %1").arg(path), tr("Folder does not exist!")); | ||
| 1331 | return; | ||
| 1332 | } | ||
| 1333 | QDesktopServices::openUrl(QUrl::fromLocalFile(path)); | ||
| 1334 | } | ||
| 1335 | |||
| 1336 | void GMainWindow::OnGameListAddDirectory() { | ||
| 1337 | const QString dir_path = QFileDialog::getExistingDirectory(this, tr("Select Directory")); | ||
| 1338 | if (dir_path.isEmpty()) | ||
| 1339 | return; | ||
| 1340 | UISettings::GameDir game_dir{dir_path, false, true}; | ||
| 1341 | if (!UISettings::values.game_dirs.contains(game_dir)) { | ||
| 1342 | UISettings::values.game_dirs.append(game_dir); | ||
| 1343 | game_list->PopulateAsync(UISettings::values.game_dirs); | ||
| 1344 | } else { | ||
| 1345 | LOG_WARNING(Frontend, "Selected directory is already in the game list"); | ||
| 1346 | } | ||
| 1347 | } | ||
| 1348 | |||
| 1349 | void GMainWindow::OnGameListShowList(bool show) { | ||
| 1350 | if (emulation_running && ui.action_Single_Window_Mode->isChecked()) | ||
| 1351 | return; | ||
| 1352 | game_list->setVisible(show); | ||
| 1353 | game_list_placeholder->setVisible(!show); | ||
| 1354 | }; | ||
| 1355 | |||
| 1283 | void GMainWindow::OnGameListOpenPerGameProperties(const std::string& file) { | 1356 | void GMainWindow::OnGameListOpenPerGameProperties(const std::string& file) { |
| 1284 | u64 title_id{}; | 1357 | u64 title_id{}; |
| 1285 | const auto v_file = Core::GetGameFileFromPath(vfs, file); | 1358 | const auto v_file = Core::GetGameFileFromPath(vfs, file); |
| @@ -1298,8 +1371,7 @@ void GMainWindow::OnGameListOpenPerGameProperties(const std::string& file) { | |||
| 1298 | 1371 | ||
| 1299 | const auto reload = UISettings::values.is_game_list_reload_pending.exchange(false); | 1372 | const auto reload = UISettings::values.is_game_list_reload_pending.exchange(false); |
| 1300 | if (reload) { | 1373 | if (reload) { |
| 1301 | game_list->PopulateAsync(UISettings::values.game_directory_path, | 1374 | game_list->PopulateAsync(UISettings::values.game_dirs); |
| 1302 | UISettings::values.game_directory_deepscan); | ||
| 1303 | } | 1375 | } |
| 1304 | 1376 | ||
| 1305 | config->Save(); | 1377 | config->Save(); |
| @@ -1389,8 +1461,7 @@ void GMainWindow::OnMenuInstallToNAND() { | |||
| 1389 | const auto success = [this]() { | 1461 | const auto success = [this]() { |
| 1390 | QMessageBox::information(this, tr("Successfully Installed"), | 1462 | QMessageBox::information(this, tr("Successfully Installed"), |
| 1391 | tr("The file was successfully installed.")); | 1463 | tr("The file was successfully installed.")); |
| 1392 | game_list->PopulateAsync(UISettings::values.game_directory_path, | 1464 | game_list->PopulateAsync(UISettings::values.game_dirs); |
| 1393 | UISettings::values.game_directory_deepscan); | ||
| 1394 | FileUtil::DeleteDirRecursively(FileUtil::GetUserPath(FileUtil::UserPath::CacheDir) + | 1465 | FileUtil::DeleteDirRecursively(FileUtil::GetUserPath(FileUtil::UserPath::CacheDir) + |
| 1395 | DIR_SEP + "game_list"); | 1466 | DIR_SEP + "game_list"); |
| 1396 | }; | 1467 | }; |
| @@ -1515,14 +1586,6 @@ void GMainWindow::OnMenuInstallToNAND() { | |||
| 1515 | } | 1586 | } |
| 1516 | } | 1587 | } |
| 1517 | 1588 | ||
| 1518 | void GMainWindow::OnMenuSelectGameListRoot() { | ||
| 1519 | QString dir_path = QFileDialog::getExistingDirectory(this, tr("Select Directory")); | ||
| 1520 | if (!dir_path.isEmpty()) { | ||
| 1521 | UISettings::values.game_directory_path = dir_path; | ||
| 1522 | game_list->PopulateAsync(dir_path, UISettings::values.game_directory_deepscan); | ||
| 1523 | } | ||
| 1524 | } | ||
| 1525 | |||
| 1526 | void GMainWindow::OnMenuSelectEmulatedDirectory(EmulatedDirectoryTarget target) { | 1589 | void GMainWindow::OnMenuSelectEmulatedDirectory(EmulatedDirectoryTarget target) { |
| 1527 | const auto res = QMessageBox::information( | 1590 | const auto res = QMessageBox::information( |
| 1528 | this, tr("Changing Emulated Directory"), | 1591 | this, tr("Changing Emulated Directory"), |
| @@ -1541,8 +1604,7 @@ void GMainWindow::OnMenuSelectEmulatedDirectory(EmulatedDirectoryTarget target) | |||
| 1541 | : FileUtil::UserPath::NANDDir, | 1604 | : FileUtil::UserPath::NANDDir, |
| 1542 | dir_path.toStdString()); | 1605 | dir_path.toStdString()); |
| 1543 | Service::FileSystem::CreateFactories(*vfs); | 1606 | Service::FileSystem::CreateFactories(*vfs); |
| 1544 | game_list->PopulateAsync(UISettings::values.game_directory_path, | 1607 | game_list->PopulateAsync(UISettings::values.game_dirs); |
| 1545 | UISettings::values.game_directory_deepscan); | ||
| 1546 | } | 1608 | } |
| 1547 | } | 1609 | } |
| 1548 | 1610 | ||
| @@ -1564,6 +1626,8 @@ void GMainWindow::OnMenuRecentFile() { | |||
| 1564 | } | 1626 | } |
| 1565 | 1627 | ||
| 1566 | void GMainWindow::OnStartGame() { | 1628 | void GMainWindow::OnStartGame() { |
| 1629 | PreventOSSleep(); | ||
| 1630 | |||
| 1567 | emu_thread->SetRunning(true); | 1631 | emu_thread->SetRunning(true); |
| 1568 | 1632 | ||
| 1569 | qRegisterMetaType<Core::Frontend::SoftwareKeyboardParameters>( | 1633 | qRegisterMetaType<Core::Frontend::SoftwareKeyboardParameters>( |
| @@ -1595,6 +1659,8 @@ void GMainWindow::OnPauseGame() { | |||
| 1595 | ui.action_Pause->setEnabled(false); | 1659 | ui.action_Pause->setEnabled(false); |
| 1596 | ui.action_Stop->setEnabled(true); | 1660 | ui.action_Stop->setEnabled(true); |
| 1597 | ui.action_Capture_Screenshot->setEnabled(false); | 1661 | ui.action_Capture_Screenshot->setEnabled(false); |
| 1662 | |||
| 1663 | AllowOSSleep(); | ||
| 1598 | } | 1664 | } |
| 1599 | 1665 | ||
| 1600 | void GMainWindow::OnStopGame() { | 1666 | void GMainWindow::OnStopGame() { |
| @@ -1702,11 +1768,11 @@ void GMainWindow::OnConfigure() { | |||
| 1702 | if (UISettings::values.enable_discord_presence != old_discord_presence) { | 1768 | if (UISettings::values.enable_discord_presence != old_discord_presence) { |
| 1703 | SetDiscordEnabled(UISettings::values.enable_discord_presence); | 1769 | SetDiscordEnabled(UISettings::values.enable_discord_presence); |
| 1704 | } | 1770 | } |
| 1771 | emit UpdateThemedIcons(); | ||
| 1705 | 1772 | ||
| 1706 | const auto reload = UISettings::values.is_game_list_reload_pending.exchange(false); | 1773 | const auto reload = UISettings::values.is_game_list_reload_pending.exchange(false); |
| 1707 | if (reload) { | 1774 | if (reload) { |
| 1708 | game_list->PopulateAsync(UISettings::values.game_directory_path, | 1775 | game_list->PopulateAsync(UISettings::values.game_dirs); |
| 1709 | UISettings::values.game_directory_deepscan); | ||
| 1710 | } | 1776 | } |
| 1711 | 1777 | ||
| 1712 | config->Save(); | 1778 | config->Save(); |
| @@ -1840,13 +1906,14 @@ void GMainWindow::OnCoreError(Core::System::ResultStatus result, std::string det | |||
| 1840 | "data, or other bugs."); | 1906 | "data, or other bugs."); |
| 1841 | switch (result) { | 1907 | switch (result) { |
| 1842 | case Core::System::ResultStatus::ErrorSystemFiles: { | 1908 | case Core::System::ResultStatus::ErrorSystemFiles: { |
| 1843 | QString message = tr("yuzu was unable to locate a Switch system archive"); | 1909 | QString message; |
| 1844 | if (!details.empty()) { | 1910 | if (details.empty()) { |
| 1845 | message.append(tr(": %1. ").arg(QString::fromStdString(details))); | 1911 | message = |
| 1912 | tr("yuzu was unable to locate a Switch system archive. %1").arg(common_message); | ||
| 1846 | } else { | 1913 | } else { |
| 1847 | message.append(tr(". ")); | 1914 | message = tr("yuzu was unable to locate a Switch system archive: %1. %2") |
| 1915 | .arg(QString::fromStdString(details), common_message); | ||
| 1848 | } | 1916 | } |
| 1849 | message.append(common_message); | ||
| 1850 | 1917 | ||
| 1851 | answer = QMessageBox::question(this, tr("System Archive Not Found"), message, | 1918 | answer = QMessageBox::question(this, tr("System Archive Not Found"), message, |
| 1852 | QMessageBox::Yes | QMessageBox::No, QMessageBox::No); | 1919 | QMessageBox::Yes | QMessageBox::No, QMessageBox::No); |
| @@ -1855,8 +1922,8 @@ void GMainWindow::OnCoreError(Core::System::ResultStatus result, std::string det | |||
| 1855 | } | 1922 | } |
| 1856 | 1923 | ||
| 1857 | case Core::System::ResultStatus::ErrorSharedFont: { | 1924 | case Core::System::ResultStatus::ErrorSharedFont: { |
| 1858 | QString message = tr("yuzu was unable to locate the Switch shared fonts. "); | 1925 | const QString message = |
| 1859 | message.append(common_message); | 1926 | tr("yuzu was unable to locate the Switch shared fonts. %1").arg(common_message); |
| 1860 | answer = QMessageBox::question(this, tr("Shared Fonts Not Found"), message, | 1927 | answer = QMessageBox::question(this, tr("Shared Fonts Not Found"), message, |
| 1861 | QMessageBox::Yes | QMessageBox::No, QMessageBox::No); | 1928 | QMessageBox::Yes | QMessageBox::No, QMessageBox::No); |
| 1862 | status_message = tr("Shared Font Missing"); | 1929 | status_message = tr("Shared Font Missing"); |
| @@ -1969,8 +2036,7 @@ void GMainWindow::OnReinitializeKeys(ReinitializeKeyBehavior behavior) { | |||
| 1969 | Service::FileSystem::CreateFactories(*vfs); | 2036 | Service::FileSystem::CreateFactories(*vfs); |
| 1970 | 2037 | ||
| 1971 | if (behavior == ReinitializeKeyBehavior::Warning) { | 2038 | if (behavior == ReinitializeKeyBehavior::Warning) { |
| 1972 | game_list->PopulateAsync(UISettings::values.game_directory_path, | 2039 | game_list->PopulateAsync(UISettings::values.game_dirs); |
| 1973 | UISettings::values.game_directory_deepscan); | ||
| 1974 | } | 2040 | } |
| 1975 | } | 2041 | } |
| 1976 | 2042 | ||
| @@ -2135,7 +2201,6 @@ void GMainWindow::UpdateUITheme() { | |||
| 2135 | } | 2201 | } |
| 2136 | 2202 | ||
| 2137 | QIcon::setThemeSearchPaths(theme_paths); | 2203 | QIcon::setThemeSearchPaths(theme_paths); |
| 2138 | emit UpdateThemedIcons(); | ||
| 2139 | } | 2204 | } |
| 2140 | 2205 | ||
| 2141 | void GMainWindow::SetDiscordEnabled([[maybe_unused]] bool state) { | 2206 | void GMainWindow::SetDiscordEnabled([[maybe_unused]] bool state) { |
| @@ -2164,6 +2229,14 @@ int main(int argc, char* argv[]) { | |||
| 2164 | QCoreApplication::setOrganizationName(QStringLiteral("yuzu team")); | 2229 | QCoreApplication::setOrganizationName(QStringLiteral("yuzu team")); |
| 2165 | QCoreApplication::setApplicationName(QStringLiteral("yuzu")); | 2230 | QCoreApplication::setApplicationName(QStringLiteral("yuzu")); |
| 2166 | 2231 | ||
| 2232 | #ifdef __APPLE__ | ||
| 2233 | // If you start a bundle (binary) on OSX without the Terminal, the working directory is "/". | ||
| 2234 | // But since we require the working directory to be the executable path for the location of the | ||
| 2235 | // user folder in the Qt Frontend, we need to cd into that working directory | ||
| 2236 | const std::string bin_path = FileUtil::GetBundleDirectory() + DIR_SEP + ".."; | ||
| 2237 | chdir(bin_path.c_str()); | ||
| 2238 | #endif | ||
| 2239 | |||
| 2167 | // Enables the core to make the qt created contexts current on std::threads | 2240 | // Enables the core to make the qt created contexts current on std::threads |
| 2168 | QCoreApplication::setAttribute(Qt::AA_DontCheckOpenGLContextThreadAffinity); | 2241 | QCoreApplication::setAttribute(Qt::AA_DontCheckOpenGLContextThreadAffinity); |
| 2169 | QApplication app(argc, argv); | 2242 | QApplication app(argc, argv); |
diff --git a/src/yuzu/main.h b/src/yuzu/main.h index 1137bbc7a..7d16188cb 100644 --- a/src/yuzu/main.h +++ b/src/yuzu/main.h | |||
| @@ -30,6 +30,7 @@ class ProfilerWidget; | |||
| 30 | class QLabel; | 30 | class QLabel; |
| 31 | class WaitTreeWidget; | 31 | class WaitTreeWidget; |
| 32 | enum class GameListOpenTarget; | 32 | enum class GameListOpenTarget; |
| 33 | class GameListPlaceholder; | ||
| 33 | 34 | ||
| 34 | namespace Core::Frontend { | 35 | namespace Core::Frontend { |
| 35 | struct SoftwareKeyboardParameters; | 36 | struct SoftwareKeyboardParameters; |
| @@ -130,6 +131,9 @@ private: | |||
| 130 | void ConnectWidgetEvents(); | 131 | void ConnectWidgetEvents(); |
| 131 | void ConnectMenuEvents(); | 132 | void ConnectMenuEvents(); |
| 132 | 133 | ||
| 134 | void PreventOSSleep(); | ||
| 135 | void AllowOSSleep(); | ||
| 136 | |||
| 133 | QStringList GetUnsupportedGLExtensions(); | 137 | QStringList GetUnsupportedGLExtensions(); |
| 134 | bool LoadROM(const QString& filename); | 138 | bool LoadROM(const QString& filename); |
| 135 | void BootGame(const QString& filename); | 139 | void BootGame(const QString& filename); |
| @@ -183,12 +187,13 @@ private slots: | |||
| 183 | void OnGameListCopyTID(u64 program_id); | 187 | void OnGameListCopyTID(u64 program_id); |
| 184 | void OnGameListNavigateToGamedbEntry(u64 program_id, | 188 | void OnGameListNavigateToGamedbEntry(u64 program_id, |
| 185 | const CompatibilityList& compatibility_list); | 189 | const CompatibilityList& compatibility_list); |
| 190 | void OnGameListOpenDirectory(const QString& directory); | ||
| 191 | void OnGameListAddDirectory(); | ||
| 192 | void OnGameListShowList(bool show); | ||
| 186 | void OnGameListOpenPerGameProperties(const std::string& file); | 193 | void OnGameListOpenPerGameProperties(const std::string& file); |
| 187 | void OnMenuLoadFile(); | 194 | void OnMenuLoadFile(); |
| 188 | void OnMenuLoadFolder(); | 195 | void OnMenuLoadFolder(); |
| 189 | void OnMenuInstallToNAND(); | 196 | void OnMenuInstallToNAND(); |
| 190 | /// Called whenever a user selects the "File->Select Game List Root" menu item | ||
| 191 | void OnMenuSelectGameListRoot(); | ||
| 192 | /// Called whenever a user select the "File->Select -- Directory" where -- is NAND or SD Card | 197 | /// Called whenever a user select the "File->Select -- Directory" where -- is NAND or SD Card |
| 193 | void OnMenuSelectEmulatedDirectory(EmulatedDirectoryTarget target); | 198 | void OnMenuSelectEmulatedDirectory(EmulatedDirectoryTarget target); |
| 194 | void OnMenuRecentFile(); | 199 | void OnMenuRecentFile(); |
| @@ -220,6 +225,8 @@ private: | |||
| 220 | GameList* game_list; | 225 | GameList* game_list; |
| 221 | LoadingScreen* loading_screen; | 226 | LoadingScreen* loading_screen; |
| 222 | 227 | ||
| 228 | GameListPlaceholder* game_list_placeholder; | ||
| 229 | |||
| 223 | // Status bar elements | 230 | // Status bar elements |
| 224 | QLabel* message_label = nullptr; | 231 | QLabel* message_label = nullptr; |
| 225 | QLabel* emu_speed_label = nullptr; | 232 | QLabel* emu_speed_label = nullptr; |
diff --git a/src/yuzu/main.ui b/src/yuzu/main.ui index ffcabb495..a1ce3c0c3 100644 --- a/src/yuzu/main.ui +++ b/src/yuzu/main.ui | |||
| @@ -62,7 +62,6 @@ | |||
| 62 | <addaction name="action_Load_File"/> | 62 | <addaction name="action_Load_File"/> |
| 63 | <addaction name="action_Load_Folder"/> | 63 | <addaction name="action_Load_Folder"/> |
| 64 | <addaction name="separator"/> | 64 | <addaction name="separator"/> |
| 65 | <addaction name="action_Select_Game_List_Root"/> | ||
| 66 | <addaction name="menu_recent_files"/> | 65 | <addaction name="menu_recent_files"/> |
| 67 | <addaction name="separator"/> | 66 | <addaction name="separator"/> |
| 68 | <addaction name="action_Select_NAND_Directory"/> | 67 | <addaction name="action_Select_NAND_Directory"/> |
diff --git a/src/yuzu/ui_settings.cpp b/src/yuzu/uisettings.cpp index 4bdc302e0..7f7d247a3 100644 --- a/src/yuzu/ui_settings.cpp +++ b/src/yuzu/uisettings.cpp | |||
| @@ -2,7 +2,7 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include "ui_settings.h" | 5 | #include "yuzu/uisettings.h" |
| 6 | 6 | ||
| 7 | namespace UISettings { | 7 | namespace UISettings { |
| 8 | 8 | ||
diff --git a/src/yuzu/ui_settings.h b/src/yuzu/uisettings.h index a62cd6911..c57290006 100644 --- a/src/yuzu/ui_settings.h +++ b/src/yuzu/uisettings.h | |||
| @@ -8,8 +8,10 @@ | |||
| 8 | #include <atomic> | 8 | #include <atomic> |
| 9 | #include <vector> | 9 | #include <vector> |
| 10 | #include <QByteArray> | 10 | #include <QByteArray> |
| 11 | #include <QMetaType> | ||
| 11 | #include <QString> | 12 | #include <QString> |
| 12 | #include <QStringList> | 13 | #include <QStringList> |
| 14 | #include <QVector> | ||
| 13 | #include "common/common_types.h" | 15 | #include "common/common_types.h" |
| 14 | 16 | ||
| 15 | namespace UISettings { | 17 | namespace UISettings { |
| @@ -25,6 +27,18 @@ struct Shortcut { | |||
| 25 | using Themes = std::array<std::pair<const char*, const char*>, 2>; | 27 | using Themes = std::array<std::pair<const char*, const char*>, 2>; |
| 26 | extern const Themes themes; | 28 | extern const Themes themes; |
| 27 | 29 | ||
| 30 | struct GameDir { | ||
| 31 | QString path; | ||
| 32 | bool deep_scan; | ||
| 33 | bool expanded; | ||
| 34 | bool operator==(const GameDir& rhs) const { | ||
| 35 | return path == rhs.path; | ||
| 36 | }; | ||
| 37 | bool operator!=(const GameDir& rhs) const { | ||
| 38 | return !operator==(rhs); | ||
| 39 | }; | ||
| 40 | }; | ||
| 41 | |||
| 28 | struct Values { | 42 | struct Values { |
| 29 | QByteArray geometry; | 43 | QByteArray geometry; |
| 30 | QByteArray state; | 44 | QByteArray state; |
| @@ -55,8 +69,9 @@ struct Values { | |||
| 55 | QString roms_path; | 69 | QString roms_path; |
| 56 | QString symbols_path; | 70 | QString symbols_path; |
| 57 | QString screenshot_path; | 71 | QString screenshot_path; |
| 58 | QString game_directory_path; | 72 | QString game_dir_deprecated; |
| 59 | bool game_directory_deepscan; | 73 | bool game_dir_deprecated_deepscan; |
| 74 | QVector<UISettings::GameDir> game_dirs; | ||
| 60 | QStringList recent_files; | 75 | QStringList recent_files; |
| 61 | 76 | ||
| 62 | QString theme; | 77 | QString theme; |
| @@ -84,3 +99,5 @@ struct Values { | |||
| 84 | 99 | ||
| 85 | extern Values values; | 100 | extern Values values; |
| 86 | } // namespace UISettings | 101 | } // namespace UISettings |
| 102 | |||
| 103 | Q_DECLARE_METATYPE(UISettings::GameDir*); | ||
diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp index 9ac92e937..067d58d80 100644 --- a/src/yuzu_cmd/config.cpp +++ b/src/yuzu_cmd/config.cpp | |||
| @@ -340,7 +340,6 @@ void Config::ReadValues() { | |||
| 340 | } | 340 | } |
| 341 | 341 | ||
| 342 | // Core | 342 | // Core |
| 343 | Settings::values.use_cpu_jit = sdl2_config->GetBoolean("Core", "use_cpu_jit", true); | ||
| 344 | Settings::values.use_multi_core = sdl2_config->GetBoolean("Core", "use_multi_core", false); | 343 | Settings::values.use_multi_core = sdl2_config->GetBoolean("Core", "use_multi_core", false); |
| 345 | 344 | ||
| 346 | // Renderer | 345 | // Renderer |
| @@ -383,6 +382,7 @@ void Config::ReadValues() { | |||
| 383 | Settings::values.dump_nso = sdl2_config->GetBoolean("Debugging", "dump_nso", false); | 382 | Settings::values.dump_nso = sdl2_config->GetBoolean("Debugging", "dump_nso", false); |
| 384 | Settings::values.reporting_services = | 383 | Settings::values.reporting_services = |
| 385 | sdl2_config->GetBoolean("Debugging", "reporting_services", false); | 384 | sdl2_config->GetBoolean("Debugging", "reporting_services", false); |
| 385 | Settings::values.quest_flag = sdl2_config->GetBoolean("Debugging", "quest_flag", false); | ||
| 386 | 386 | ||
| 387 | const auto title_list = sdl2_config->Get("AddOns", "title_ids", ""); | 387 | const auto title_list = sdl2_config->Get("AddOns", "title_ids", ""); |
| 388 | std::stringstream ss(title_list); | 388 | std::stringstream ss(title_list); |
diff --git a/src/yuzu_cmd/default_ini.h b/src/yuzu_cmd/default_ini.h index 6538af098..0cfc111a6 100644 --- a/src/yuzu_cmd/default_ini.h +++ b/src/yuzu_cmd/default_ini.h | |||
| @@ -76,10 +76,6 @@ motion_device= | |||
| 76 | touch_device= | 76 | touch_device= |
| 77 | 77 | ||
| 78 | [Core] | 78 | [Core] |
| 79 | # Whether to use the Just-In-Time (JIT) compiler for CPU emulation | ||
| 80 | # 0: Interpreter (slow), 1 (default): JIT (fast) | ||
| 81 | use_cpu_jit = | ||
| 82 | |||
| 83 | # Whether to use multi-core for CPU emulation | 79 | # Whether to use multi-core for CPU emulation |
| 84 | # 0 (default): Disabled, 1: Enabled | 80 | # 0 (default): Disabled, 1: Enabled |
| 85 | use_multi_core= | 81 | use_multi_core= |
| @@ -224,6 +220,9 @@ gdbstub_port=24689 | |||
| 224 | dump_exefs=false | 220 | dump_exefs=false |
| 225 | # Determines whether or not yuzu will dump all NSOs it attempts to load while loading them | 221 | # Determines whether or not yuzu will dump all NSOs it attempts to load while loading them |
| 226 | dump_nso=false | 222 | dump_nso=false |
| 223 | # Determines whether or not yuzu will report to the game that the emulated console is in Kiosk Mode | ||
| 224 | # false: Retail/Normal Mode (default), true: Kiosk Mode | ||
| 225 | quest_flag = | ||
| 227 | 226 | ||
| 228 | [WebService] | 227 | [WebService] |
| 229 | # Whether or not to enable telemetry | 228 | # Whether or not to enable telemetry |
diff --git a/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp b/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp index e2d3df180..f91b071bf 100644 --- a/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp +++ b/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp | |||
| @@ -52,6 +52,10 @@ private: | |||
| 52 | bool EmuWindow_SDL2_GL::SupportsRequiredGLExtensions() { | 52 | bool EmuWindow_SDL2_GL::SupportsRequiredGLExtensions() { |
| 53 | std::vector<std::string> unsupported_ext; | 53 | std::vector<std::string> unsupported_ext; |
| 54 | 54 | ||
| 55 | if (!GLAD_GL_ARB_buffer_storage) | ||
| 56 | unsupported_ext.push_back("ARB_buffer_storage"); | ||
| 57 | if (!GLAD_GL_ARB_direct_state_access) | ||
| 58 | unsupported_ext.push_back("ARB_direct_state_access"); | ||
| 55 | if (!GLAD_GL_ARB_vertex_type_10f_11f_11f_rev) | 59 | if (!GLAD_GL_ARB_vertex_type_10f_11f_11f_rev) |
| 56 | unsupported_ext.push_back("ARB_vertex_type_10f_11f_11f_rev"); | 60 | unsupported_ext.push_back("ARB_vertex_type_10f_11f_11f_rev"); |
| 57 | if (!GLAD_GL_ARB_texture_mirror_clamp_to_edge) | 61 | if (!GLAD_GL_ARB_texture_mirror_clamp_to_edge) |
diff --git a/src/yuzu_tester/config.cpp b/src/yuzu_tester/config.cpp index d7e0d408d..9a11dc6c3 100644 --- a/src/yuzu_tester/config.cpp +++ b/src/yuzu_tester/config.cpp | |||
| @@ -114,7 +114,6 @@ void Config::ReadValues() { | |||
| 114 | } | 114 | } |
| 115 | 115 | ||
| 116 | // Core | 116 | // Core |
| 117 | Settings::values.use_cpu_jit = sdl2_config->GetBoolean("Core", "use_cpu_jit", true); | ||
| 118 | Settings::values.use_multi_core = sdl2_config->GetBoolean("Core", "use_multi_core", false); | 117 | Settings::values.use_multi_core = sdl2_config->GetBoolean("Core", "use_multi_core", false); |
| 119 | 118 | ||
| 120 | // Renderer | 119 | // Renderer |
diff --git a/src/yuzu_tester/default_ini.h b/src/yuzu_tester/default_ini.h index 46a9960cd..9a3e86d68 100644 --- a/src/yuzu_tester/default_ini.h +++ b/src/yuzu_tester/default_ini.h | |||
| @@ -8,10 +8,6 @@ namespace DefaultINI { | |||
| 8 | 8 | ||
| 9 | const char* sdl2_config_file = R"( | 9 | const char* sdl2_config_file = R"( |
| 10 | [Core] | 10 | [Core] |
| 11 | # Whether to use the Just-In-Time (JIT) compiler for CPU emulation | ||
| 12 | # 0: Interpreter (slow), 1 (default): JIT (fast) | ||
| 13 | use_cpu_jit = | ||
| 14 | |||
| 15 | # Whether to use multi-core for CPU emulation | 11 | # Whether to use multi-core for CPU emulation |
| 16 | # 0 (default): Disabled, 1: Enabled | 12 | # 0 (default): Disabled, 1: Enabled |
| 17 | use_multi_core= | 13 | use_multi_core= |
diff --git a/src/yuzu_tester/yuzu.cpp b/src/yuzu_tester/yuzu.cpp index b589c3de3..0ee97aa54 100644 --- a/src/yuzu_tester/yuzu.cpp +++ b/src/yuzu_tester/yuzu.cpp | |||
| @@ -92,7 +92,6 @@ int main(int argc, char** argv) { | |||
| 92 | 92 | ||
| 93 | int option_index = 0; | 93 | int option_index = 0; |
| 94 | 94 | ||
| 95 | char* endarg; | ||
| 96 | #ifdef _WIN32 | 95 | #ifdef _WIN32 |
| 97 | int argc_w; | 96 | int argc_w; |
| 98 | auto argv_w = CommandLineToArgvW(GetCommandLineW(), &argc_w); | 97 | auto argv_w = CommandLineToArgvW(GetCommandLineW(), &argc_w); |
| @@ -226,7 +225,7 @@ int main(int argc, char** argv) { | |||
| 226 | 225 | ||
| 227 | switch (load_result) { | 226 | switch (load_result) { |
| 228 | case Core::System::ResultStatus::ErrorGetLoader: | 227 | case Core::System::ResultStatus::ErrorGetLoader: |
| 229 | LOG_CRITICAL(Frontend, "Failed to obtain loader for %s!", filepath.c_str()); | 228 | LOG_CRITICAL(Frontend, "Failed to obtain loader for {}!", filepath); |
| 230 | return -1; | 229 | return -1; |
| 231 | case Core::System::ResultStatus::ErrorLoader: | 230 | case Core::System::ResultStatus::ErrorLoader: |
| 232 | LOG_CRITICAL(Frontend, "Failed to load ROM!"); | 231 | LOG_CRITICAL(Frontend, "Failed to load ROM!"); |