summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/core/CMakeLists.txt4
-rw-r--r--src/core/file_sys/partition_filesystem.cpp24
-rw-r--r--src/core/file_sys/partition_filesystem.h8
-rw-r--r--src/core/hle/kernel/address_arbiter.cpp173
-rw-r--r--src/core/hle/kernel/address_arbiter.h32
-rw-r--r--src/core/hle/kernel/errors.h12
-rw-r--r--src/core/hle/kernel/hle_ipc.cpp5
-rw-r--r--src/core/hle/kernel/mutex.cpp4
-rw-r--r--src/core/hle/kernel/svc.cpp68
-rw-r--r--src/core/hle/kernel/svc_wrap.h14
-rw-r--r--src/core/hle/kernel/thread.cpp6
-rw-r--r--src/core/hle/kernel/thread.h4
-rw-r--r--src/core/hle/service/audio/audren_u.cpp159
-rw-r--r--src/core/hle/service/audio/audren_u.h37
-rw-r--r--src/core/hle/service/hid/hid.cpp7
-rw-r--r--src/core/hle/service/hid/hid.h2
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp3
-rw-r--r--src/core/hle/service/set/set.cpp5
-rw-r--r--src/core/loader/loader.cpp10
-rw-r--r--src/core/loader/loader.h1
-rw-r--r--src/core/loader/nca.cpp303
-rw-r--r--src/core/loader/nca.h49
-rw-r--r--src/core/loader/nso.cpp79
-rw-r--r--src/core/loader/nso.h3
-rw-r--r--src/core/memory.cpp4
-rw-r--r--src/core/memory.h7
-rw-r--r--src/video_core/CMakeLists.txt4
-rw-r--r--src/video_core/command_processor.cpp6
-rw-r--r--src/video_core/debug_utils/debug_utils.h6
-rw-r--r--src/video_core/engines/fermi_2d.cpp1
-rw-r--r--src/video_core/engines/maxwell_3d.cpp5
-rw-r--r--src/video_core/engines/maxwell_dma.cpp69
-rw-r--r--src/video_core/engines/maxwell_dma.h155
-rw-r--r--src/video_core/engines/shader_bytecode.h62
-rw-r--r--src/video_core/gpu.cpp2
-rw-r--r--src/video_core/gpu.h3
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp6
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.cpp64
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.h16
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp336
-rw-r--r--src/video_core/renderer_opengl/gl_shader_gen.cpp4
-rw-r--r--src/video_core/renderer_opengl/gl_shader_manager.cpp4
-rw-r--r--src/video_core/renderer_opengl/gl_state.cpp4
-rw-r--r--src/video_core/textures/astc.cpp1646
-rw-r--r--src/video_core/textures/astc.h15
-rw-r--r--src/video_core/textures/decoders.cpp3
-rw-r--r--src/yuzu/CMakeLists.txt3
-rw-r--r--src/yuzu/debugger/registers.cpp190
-rw-r--r--src/yuzu/debugger/registers.h42
-rw-r--r--src/yuzu/debugger/registers.ui40
-rw-r--r--src/yuzu/debugger/wait_tree.cpp4
-rw-r--r--src/yuzu/game_list.cpp32
-rw-r--r--src/yuzu/main.cpp31
-rw-r--r--src/yuzu/main.h3
-rw-r--r--src/yuzu/main.ui6
55 files changed, 3198 insertions, 587 deletions
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt
index ba5b02174..51e4088d2 100644
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -40,6 +40,8 @@ add_library(core STATIC
40 hle/config_mem.h 40 hle/config_mem.h
41 hle/ipc.h 41 hle/ipc.h
42 hle/ipc_helpers.h 42 hle/ipc_helpers.h
43 hle/kernel/address_arbiter.cpp
44 hle/kernel/address_arbiter.h
43 hle/kernel/client_port.cpp 45 hle/kernel/client_port.cpp
44 hle/kernel/client_port.h 46 hle/kernel/client_port.h
45 hle/kernel/client_session.cpp 47 hle/kernel/client_session.cpp
@@ -257,6 +259,8 @@ add_library(core STATIC
257 loader/linker.h 259 loader/linker.h
258 loader/loader.cpp 260 loader/loader.cpp
259 loader/loader.h 261 loader/loader.h
262 loader/nca.cpp
263 loader/nca.h
260 loader/nro.cpp 264 loader/nro.cpp
261 loader/nro.h 265 loader/nro.h
262 loader/nso.cpp 266 loader/nso.cpp
diff --git a/src/core/file_sys/partition_filesystem.cpp b/src/core/file_sys/partition_filesystem.cpp
index 808254ecc..874b9e23b 100644
--- a/src/core/file_sys/partition_filesystem.cpp
+++ b/src/core/file_sys/partition_filesystem.cpp
@@ -19,13 +19,20 @@ Loader::ResultStatus PartitionFilesystem::Load(const std::string& file_path, siz
19 if (file.GetSize() < sizeof(Header)) 19 if (file.GetSize() < sizeof(Header))
20 return Loader::ResultStatus::Error; 20 return Loader::ResultStatus::Error;
21 21
22 file.Seek(offset, SEEK_SET);
22 // For cartridges, HFSs can get very large, so we need to calculate the size up to 23 // For cartridges, HFSs can get very large, so we need to calculate the size up to
23 // the actual content itself instead of just blindly reading in the entire file. 24 // the actual content itself instead of just blindly reading in the entire file.
24 Header pfs_header; 25 Header pfs_header;
25 if (!file.ReadBytes(&pfs_header, sizeof(Header))) 26 if (!file.ReadBytes(&pfs_header, sizeof(Header)))
26 return Loader::ResultStatus::Error; 27 return Loader::ResultStatus::Error;
27 28
28 bool is_hfs = (memcmp(pfs_header.magic.data(), "HFS", 3) == 0); 29 if (pfs_header.magic != Common::MakeMagic('H', 'F', 'S', '0') &&
30 pfs_header.magic != Common::MakeMagic('P', 'F', 'S', '0')) {
31 return Loader::ResultStatus::ErrorInvalidFormat;
32 }
33
34 bool is_hfs = pfs_header.magic == Common::MakeMagic('H', 'F', 'S', '0');
35
29 size_t entry_size = is_hfs ? sizeof(HFSEntry) : sizeof(PFSEntry); 36 size_t entry_size = is_hfs ? sizeof(HFSEntry) : sizeof(PFSEntry);
30 size_t metadata_size = 37 size_t metadata_size =
31 sizeof(Header) + (pfs_header.num_entries * entry_size) + pfs_header.strtab_size; 38 sizeof(Header) + (pfs_header.num_entries * entry_size) + pfs_header.strtab_size;
@@ -50,7 +57,12 @@ Loader::ResultStatus PartitionFilesystem::Load(const std::vector<u8>& file_data,
50 return Loader::ResultStatus::Error; 57 return Loader::ResultStatus::Error;
51 58
52 memcpy(&pfs_header, &file_data[offset], sizeof(Header)); 59 memcpy(&pfs_header, &file_data[offset], sizeof(Header));
53 is_hfs = (memcmp(pfs_header.magic.data(), "HFS", 3) == 0); 60 if (pfs_header.magic != Common::MakeMagic('H', 'F', 'S', '0') &&
61 pfs_header.magic != Common::MakeMagic('P', 'F', 'S', '0')) {
62 return Loader::ResultStatus::ErrorInvalidFormat;
63 }
64
65 is_hfs = pfs_header.magic == Common::MakeMagic('H', 'F', 'S', '0');
54 66
55 size_t entries_offset = offset + sizeof(Header); 67 size_t entries_offset = offset + sizeof(Header);
56 size_t entry_size = is_hfs ? sizeof(HFSEntry) : sizeof(PFSEntry); 68 size_t entry_size = is_hfs ? sizeof(HFSEntry) : sizeof(PFSEntry);
@@ -73,21 +85,21 @@ u32 PartitionFilesystem::GetNumEntries() const {
73 return pfs_header.num_entries; 85 return pfs_header.num_entries;
74} 86}
75 87
76u64 PartitionFilesystem::GetEntryOffset(int index) const { 88u64 PartitionFilesystem::GetEntryOffset(u32 index) const {
77 if (index > GetNumEntries()) 89 if (index > GetNumEntries())
78 return 0; 90 return 0;
79 91
80 return content_offset + pfs_entries[index].fs_entry.offset; 92 return content_offset + pfs_entries[index].fs_entry.offset;
81} 93}
82 94
83u64 PartitionFilesystem::GetEntrySize(int index) const { 95u64 PartitionFilesystem::GetEntrySize(u32 index) const {
84 if (index > GetNumEntries()) 96 if (index > GetNumEntries())
85 return 0; 97 return 0;
86 98
87 return pfs_entries[index].fs_entry.size; 99 return pfs_entries[index].fs_entry.size;
88} 100}
89 101
90std::string PartitionFilesystem::GetEntryName(int index) const { 102std::string PartitionFilesystem::GetEntryName(u32 index) const {
91 if (index > GetNumEntries()) 103 if (index > GetNumEntries())
92 return ""; 104 return "";
93 105
@@ -113,7 +125,7 @@ u64 PartitionFilesystem::GetFileSize(const std::string& name) const {
113} 125}
114 126
115void PartitionFilesystem::Print() const { 127void PartitionFilesystem::Print() const {
116 NGLOG_DEBUG(Service_FS, "Magic: {:.4}", pfs_header.magic.data()); 128 NGLOG_DEBUG(Service_FS, "Magic: {}", pfs_header.magic);
117 NGLOG_DEBUG(Service_FS, "Files: {}", pfs_header.num_entries); 129 NGLOG_DEBUG(Service_FS, "Files: {}", pfs_header.num_entries);
118 for (u32 i = 0; i < pfs_header.num_entries; i++) { 130 for (u32 i = 0; i < pfs_header.num_entries; i++) {
119 NGLOG_DEBUG(Service_FS, " > File {}: {} (0x{:X} bytes, at 0x{:X})", i, 131 NGLOG_DEBUG(Service_FS, " > File {}: {} (0x{:X} bytes, at 0x{:X})", i,
diff --git a/src/core/file_sys/partition_filesystem.h b/src/core/file_sys/partition_filesystem.h
index 573c90057..9c5810cf1 100644
--- a/src/core/file_sys/partition_filesystem.h
+++ b/src/core/file_sys/partition_filesystem.h
@@ -27,9 +27,9 @@ public:
27 Loader::ResultStatus Load(const std::vector<u8>& file_data, size_t offset = 0); 27 Loader::ResultStatus Load(const std::vector<u8>& file_data, size_t offset = 0);
28 28
29 u32 GetNumEntries() const; 29 u32 GetNumEntries() const;
30 u64 GetEntryOffset(int index) const; 30 u64 GetEntryOffset(u32 index) const;
31 u64 GetEntrySize(int index) const; 31 u64 GetEntrySize(u32 index) const;
32 std::string GetEntryName(int index) const; 32 std::string GetEntryName(u32 index) const;
33 u64 GetFileOffset(const std::string& name) const; 33 u64 GetFileOffset(const std::string& name) const;
34 u64 GetFileSize(const std::string& name) const; 34 u64 GetFileSize(const std::string& name) const;
35 35
@@ -37,7 +37,7 @@ public:
37 37
38private: 38private:
39 struct Header { 39 struct Header {
40 std::array<char, 4> magic; 40 u32_le magic;
41 u32_le num_entries; 41 u32_le num_entries;
42 u32_le strtab_size; 42 u32_le strtab_size;
43 INSERT_PADDING_BYTES(0x4); 43 INSERT_PADDING_BYTES(0x4);
diff --git a/src/core/hle/kernel/address_arbiter.cpp b/src/core/hle/kernel/address_arbiter.cpp
new file mode 100644
index 000000000..e9c8369d7
--- /dev/null
+++ b/src/core/hle/kernel/address_arbiter.cpp
@@ -0,0 +1,173 @@
1// Copyright 2018 yuzu emulator team
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/common_funcs.h"
7#include "common/common_types.h"
8#include "core/core.h"
9#include "core/hle/kernel/errors.h"
10#include "core/hle/kernel/kernel.h"
11#include "core/hle/kernel/process.h"
12#include "core/hle/kernel/thread.h"
13#include "core/hle/lock.h"
14#include "core/memory.h"
15
16namespace Kernel {
17namespace AddressArbiter {
18
19// Performs actual address waiting logic.
20static ResultCode WaitForAddress(VAddr address, s64 timeout) {
21 SharedPtr<Thread> current_thread = GetCurrentThread();
22 current_thread->arb_wait_address = address;
23 current_thread->status = THREADSTATUS_WAIT_ARB;
24 current_thread->wakeup_callback = nullptr;
25
26 current_thread->WakeAfterDelay(timeout);
27
28 Core::System::GetInstance().CpuCore(current_thread->processor_id).PrepareReschedule();
29 return RESULT_TIMEOUT;
30}
31
32// Gets the threads waiting on an address.
33static void GetThreadsWaitingOnAddress(std::vector<SharedPtr<Thread>>& waiting_threads,
34 VAddr address) {
35 auto RetrieveWaitingThreads =
36 [](size_t core_index, std::vector<SharedPtr<Thread>>& waiting_threads, VAddr arb_addr) {
37 const auto& scheduler = Core::System::GetInstance().Scheduler(core_index);
38 auto& thread_list = scheduler->GetThreadList();
39
40 for (auto& thread : thread_list) {
41 if (thread->arb_wait_address == arb_addr)
42 waiting_threads.push_back(thread);
43 }
44 };
45
46 // Retrieve a list of all threads that are waiting for this address.
47 RetrieveWaitingThreads(0, waiting_threads, address);
48 RetrieveWaitingThreads(1, waiting_threads, address);
49 RetrieveWaitingThreads(2, waiting_threads, address);
50 RetrieveWaitingThreads(3, waiting_threads, address);
51 // Sort them by priority, such that the highest priority ones come first.
52 std::sort(waiting_threads.begin(), waiting_threads.end(),
53 [](const SharedPtr<Thread>& lhs, const SharedPtr<Thread>& rhs) {
54 return lhs->current_priority < rhs->current_priority;
55 });
56}
57
58// Wake up num_to_wake (or all) threads in a vector.
59static void WakeThreads(std::vector<SharedPtr<Thread>>& waiting_threads, s32 num_to_wake) {
60 // Only process up to 'target' threads, unless 'target' is <= 0, in which case process
61 // them all.
62 size_t last = waiting_threads.size();
63 if (num_to_wake > 0)
64 last = num_to_wake;
65
66 // Signal the waiting threads.
67 for (size_t i = 0; i < last; i++) {
68 ASSERT(waiting_threads[i]->status = THREADSTATUS_WAIT_ARB);
69 waiting_threads[i]->SetWaitSynchronizationResult(RESULT_SUCCESS);
70 waiting_threads[i]->arb_wait_address = 0;
71 waiting_threads[i]->ResumeFromWait();
72 }
73}
74
75// Signals an address being waited on.
76ResultCode SignalToAddress(VAddr address, s32 num_to_wake) {
77 // Get threads waiting on the address.
78 std::vector<SharedPtr<Thread>> waiting_threads;
79 GetThreadsWaitingOnAddress(waiting_threads, address);
80
81 WakeThreads(waiting_threads, num_to_wake);
82 return RESULT_SUCCESS;
83}
84
85// Signals an address being waited on and increments its value if equal to the value argument.
86ResultCode IncrementAndSignalToAddressIfEqual(VAddr address, s32 value, s32 num_to_wake) {
87 // Ensure that we can write to the address.
88 if (!Memory::IsValidVirtualAddress(address)) {
89 return ERR_INVALID_ADDRESS_STATE;
90 }
91
92 if (static_cast<s32>(Memory::Read32(address)) == value) {
93 Memory::Write32(address, static_cast<u32>(value + 1));
94 } else {
95 return ERR_INVALID_STATE;
96 }
97
98 return SignalToAddress(address, num_to_wake);
99}
100
101// Signals an address being waited on and modifies its value based on waiting thread count if equal
102// to the value argument.
103ResultCode ModifyByWaitingCountAndSignalToAddressIfEqual(VAddr address, s32 value,
104 s32 num_to_wake) {
105 // Ensure that we can write to the address.
106 if (!Memory::IsValidVirtualAddress(address)) {
107 return ERR_INVALID_ADDRESS_STATE;
108 }
109
110 // Get threads waiting on the address.
111 std::vector<SharedPtr<Thread>> waiting_threads;
112 GetThreadsWaitingOnAddress(waiting_threads, address);
113
114 // Determine the modified value depending on the waiting count.
115 s32 updated_value;
116 if (waiting_threads.size() == 0) {
117 updated_value = value - 1;
118 } else if (num_to_wake <= 0 || waiting_threads.size() <= num_to_wake) {
119 updated_value = value + 1;
120 } else {
121 updated_value = value;
122 }
123
124 if (static_cast<s32>(Memory::Read32(address)) == value) {
125 Memory::Write32(address, static_cast<u32>(updated_value));
126 } else {
127 return ERR_INVALID_STATE;
128 }
129
130 WakeThreads(waiting_threads, num_to_wake);
131 return RESULT_SUCCESS;
132}
133
134// Waits on an address if the value passed is less than the argument value, optionally decrementing.
135ResultCode WaitForAddressIfLessThan(VAddr address, s32 value, s64 timeout, bool should_decrement) {
136 // Ensure that we can read the address.
137 if (!Memory::IsValidVirtualAddress(address)) {
138 return ERR_INVALID_ADDRESS_STATE;
139 }
140
141 s32 cur_value = static_cast<s32>(Memory::Read32(address));
142 if (cur_value < value) {
143 Memory::Write32(address, static_cast<u32>(cur_value - 1));
144 } else {
145 return ERR_INVALID_STATE;
146 }
147 // Short-circuit without rescheduling, if timeout is zero.
148 if (timeout == 0) {
149 return RESULT_TIMEOUT;
150 }
151
152 return WaitForAddress(address, timeout);
153}
154
155// Waits on an address if the value passed is equal to the argument value.
156ResultCode WaitForAddressIfEqual(VAddr address, s32 value, s64 timeout) {
157 // Ensure that we can read the address.
158 if (!Memory::IsValidVirtualAddress(address)) {
159 return ERR_INVALID_ADDRESS_STATE;
160 }
161 // Only wait for the address if equal.
162 if (static_cast<s32>(Memory::Read32(address)) != value) {
163 return ERR_INVALID_STATE;
164 }
165 // Short-circuit without rescheduling, if timeout is zero.
166 if (timeout == 0) {
167 return RESULT_TIMEOUT;
168 }
169
170 return WaitForAddress(address, timeout);
171}
172} // namespace AddressArbiter
173} // namespace Kernel
diff --git a/src/core/hle/kernel/address_arbiter.h b/src/core/hle/kernel/address_arbiter.h
new file mode 100644
index 000000000..f20f3dbc0
--- /dev/null
+++ b/src/core/hle/kernel/address_arbiter.h
@@ -0,0 +1,32 @@
1// Copyright 2018 yuzu emulator team
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "core/hle/result.h"
8
9namespace Kernel {
10
11namespace AddressArbiter {
12enum class ArbitrationType {
13 WaitIfLessThan = 0,
14 DecrementAndWaitIfLessThan = 1,
15 WaitIfEqual = 2,
16};
17
18enum class SignalType {
19 Signal = 0,
20 IncrementAndSignalIfEqual = 1,
21 ModifyByWaitingCountAndSignalIfEqual = 2,
22};
23
24ResultCode SignalToAddress(VAddr address, s32 num_to_wake);
25ResultCode IncrementAndSignalToAddressIfEqual(VAddr address, s32 value, s32 num_to_wake);
26ResultCode ModifyByWaitingCountAndSignalToAddressIfEqual(VAddr address, s32 value, s32 num_to_wake);
27
28ResultCode WaitForAddressIfLessThan(VAddr address, s32 value, s64 timeout, bool should_decrement);
29ResultCode WaitForAddressIfEqual(VAddr address, s32 value, s64 timeout);
30} // namespace AddressArbiter
31
32} // namespace Kernel
diff --git a/src/core/hle/kernel/errors.h b/src/core/hle/kernel/errors.h
index e1b5430bf..221cb1bb5 100644
--- a/src/core/hle/kernel/errors.h
+++ b/src/core/hle/kernel/errors.h
@@ -20,13 +20,16 @@ enum {
20 MaxConnectionsReached = 52, 20 MaxConnectionsReached = 52,
21 21
22 // Confirmed Switch OS error codes 22 // Confirmed Switch OS error codes
23 MisalignedAddress = 102, 23 InvalidAddress = 102,
24 InvalidMemoryState = 106,
24 InvalidProcessorId = 113, 25 InvalidProcessorId = 113,
25 InvalidHandle = 114, 26 InvalidHandle = 114,
26 InvalidCombination = 116, 27 InvalidCombination = 116,
27 Timeout = 117, 28 Timeout = 117,
28 SynchronizationCanceled = 118, 29 SynchronizationCanceled = 118,
29 TooLarge = 119, 30 TooLarge = 119,
31 InvalidEnumValue = 120,
32 InvalidState = 125,
30}; 33};
31} 34}
32 35
@@ -39,14 +42,15 @@ constexpr ResultCode ERR_SESSION_CLOSED_BY_REMOTE(-1);
39constexpr ResultCode ERR_PORT_NAME_TOO_LONG(-1); 42constexpr ResultCode ERR_PORT_NAME_TOO_LONG(-1);
40constexpr ResultCode ERR_WRONG_PERMISSION(-1); 43constexpr ResultCode ERR_WRONG_PERMISSION(-1);
41constexpr ResultCode ERR_MAX_CONNECTIONS_REACHED(-1); 44constexpr ResultCode ERR_MAX_CONNECTIONS_REACHED(-1);
42constexpr ResultCode ERR_INVALID_ENUM_VALUE(-1); 45constexpr ResultCode ERR_INVALID_ENUM_VALUE(ErrorModule::Kernel, ErrCodes::InvalidEnumValue);
43constexpr ResultCode ERR_INVALID_ENUM_VALUE_FND(-1); 46constexpr ResultCode ERR_INVALID_ENUM_VALUE_FND(-1);
44constexpr ResultCode ERR_INVALID_COMBINATION(-1); 47constexpr ResultCode ERR_INVALID_COMBINATION(-1);
45constexpr ResultCode ERR_INVALID_COMBINATION_KERNEL(-1); 48constexpr ResultCode ERR_INVALID_COMBINATION_KERNEL(-1);
46constexpr ResultCode ERR_OUT_OF_MEMORY(-1); 49constexpr ResultCode ERR_OUT_OF_MEMORY(-1);
47constexpr ResultCode ERR_INVALID_ADDRESS(-1); 50constexpr ResultCode ERR_INVALID_ADDRESS(ErrorModule::Kernel, ErrCodes::InvalidAddress);
48constexpr ResultCode ERR_INVALID_ADDRESS_STATE(-1); 51constexpr ResultCode ERR_INVALID_ADDRESS_STATE(ErrorModule::Kernel, ErrCodes::InvalidMemoryState);
49constexpr ResultCode ERR_INVALID_HANDLE(ErrorModule::Kernel, ErrCodes::InvalidHandle); 52constexpr ResultCode ERR_INVALID_HANDLE(ErrorModule::Kernel, ErrCodes::InvalidHandle);
53constexpr ResultCode ERR_INVALID_STATE(ErrorModule::Kernel, ErrCodes::InvalidState);
50constexpr ResultCode ERR_INVALID_POINTER(-1); 54constexpr ResultCode ERR_INVALID_POINTER(-1);
51constexpr ResultCode ERR_INVALID_OBJECT_ADDR(-1); 55constexpr ResultCode ERR_INVALID_OBJECT_ADDR(-1);
52constexpr ResultCode ERR_NOT_AUTHORIZED(-1); 56constexpr ResultCode ERR_NOT_AUTHORIZED(-1);
diff --git a/src/core/hle/kernel/hle_ipc.cpp b/src/core/hle/kernel/hle_ipc.cpp
index 01904467e..b0d83f401 100644
--- a/src/core/hle/kernel/hle_ipc.cpp
+++ b/src/core/hle/kernel/hle_ipc.cpp
@@ -271,6 +271,11 @@ std::vector<u8> HLERequestContext::ReadBuffer(int buffer_index) const {
271} 271}
272 272
273size_t HLERequestContext::WriteBuffer(const void* buffer, size_t size, int buffer_index) const { 273size_t HLERequestContext::WriteBuffer(const void* buffer, size_t size, int buffer_index) const {
274 if (size == 0) {
275 NGLOG_WARNING(Core, "skip empty buffer write");
276 return 0;
277 }
278
274 const bool is_buffer_b{BufferDescriptorB().size() && BufferDescriptorB()[buffer_index].Size()}; 279 const bool is_buffer_b{BufferDescriptorB().size() && BufferDescriptorB()[buffer_index].Size()};
275 const size_t buffer_size{GetWriteBufferSize(buffer_index)}; 280 const size_t buffer_size{GetWriteBufferSize(buffer_index)};
276 if (size > buffer_size) { 281 if (size > buffer_size) {
diff --git a/src/core/hle/kernel/mutex.cpp b/src/core/hle/kernel/mutex.cpp
index bc144f3de..65560226d 100644
--- a/src/core/hle/kernel/mutex.cpp
+++ b/src/core/hle/kernel/mutex.cpp
@@ -59,7 +59,7 @@ ResultCode Mutex::TryAcquire(VAddr address, Handle holding_thread_handle,
59 Handle requesting_thread_handle) { 59 Handle requesting_thread_handle) {
60 // The mutex address must be 4-byte aligned 60 // The mutex address must be 4-byte aligned
61 if ((address % sizeof(u32)) != 0) { 61 if ((address % sizeof(u32)) != 0) {
62 return ResultCode(ErrorModule::Kernel, ErrCodes::MisalignedAddress); 62 return ResultCode(ErrorModule::Kernel, ErrCodes::InvalidAddress);
63 } 63 }
64 64
65 SharedPtr<Thread> holding_thread = g_handle_table.Get<Thread>(holding_thread_handle); 65 SharedPtr<Thread> holding_thread = g_handle_table.Get<Thread>(holding_thread_handle);
@@ -97,7 +97,7 @@ ResultCode Mutex::TryAcquire(VAddr address, Handle holding_thread_handle,
97ResultCode Mutex::Release(VAddr address) { 97ResultCode Mutex::Release(VAddr address) {
98 // The mutex address must be 4-byte aligned 98 // The mutex address must be 4-byte aligned
99 if ((address % sizeof(u32)) != 0) { 99 if ((address % sizeof(u32)) != 0) {
100 return ResultCode(ErrorModule::Kernel, ErrCodes::MisalignedAddress); 100 return ResultCode(ErrorModule::Kernel, ErrCodes::InvalidAddress);
101 } 101 }
102 102
103 auto [thread, num_waiters] = GetHighestPriorityMutexWaitingThread(GetCurrentThread(), address); 103 auto [thread, num_waiters] = GetHighestPriorityMutexWaitingThread(GetCurrentThread(), address);
diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp
index ec3601e8b..1a36e0d02 100644
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -11,6 +11,7 @@
11#include "common/string_util.h" 11#include "common/string_util.h"
12#include "core/core.h" 12#include "core/core.h"
13#include "core/core_timing.h" 13#include "core/core_timing.h"
14#include "core/hle/kernel/address_arbiter.h"
14#include "core/hle/kernel/client_port.h" 15#include "core/hle/kernel/client_port.h"
15#include "core/hle/kernel/client_session.h" 16#include "core/hle/kernel/client_session.h"
16#include "core/hle/kernel/event.h" 17#include "core/hle/kernel/event.h"
@@ -316,6 +317,11 @@ static ResultCode GetInfo(u64* result, u64 info_id, u64 handle, u64 info_sub_id)
316 "(STUBBED) Attempted to query privileged process id bounds, returned 0"); 317 "(STUBBED) Attempted to query privileged process id bounds, returned 0");
317 *result = 0; 318 *result = 0;
318 break; 319 break;
320 case GetInfoType::UserExceptionContextAddr:
321 NGLOG_WARNING(Kernel_SVC,
322 "(STUBBED) Attempted to query user exception context address, returned 0");
323 *result = 0;
324 break;
319 default: 325 default:
320 UNIMPLEMENTED(); 326 UNIMPLEMENTED();
321 } 327 }
@@ -575,7 +581,7 @@ static void SleepThread(s64 nanoseconds) {
575 Core::System::GetInstance().PrepareReschedule(); 581 Core::System::GetInstance().PrepareReschedule();
576} 582}
577 583
578/// Signal process wide key atomic 584/// Wait process wide key atomic
579static ResultCode WaitProcessWideKeyAtomic(VAddr mutex_addr, VAddr condition_variable_addr, 585static ResultCode WaitProcessWideKeyAtomic(VAddr mutex_addr, VAddr condition_variable_addr,
580 Handle thread_handle, s64 nano_seconds) { 586 Handle thread_handle, s64 nano_seconds) {
581 NGLOG_TRACE( 587 NGLOG_TRACE(
@@ -684,6 +690,58 @@ static ResultCode SignalProcessWideKey(VAddr condition_variable_addr, s32 target
684 return RESULT_SUCCESS; 690 return RESULT_SUCCESS;
685} 691}
686 692
693// Wait for an address (via Address Arbiter)
694static ResultCode WaitForAddress(VAddr address, u32 type, s32 value, s64 timeout) {
695 NGLOG_WARNING(Kernel_SVC, "called, address=0x{:X}, type=0x{:X}, value=0x{:X}, timeout={}",
696 address, type, value, timeout);
697 // If the passed address is a kernel virtual address, return invalid memory state.
698 if (Memory::IsKernelVirtualAddress(address)) {
699 return ERR_INVALID_ADDRESS_STATE;
700 }
701 // If the address is not properly aligned to 4 bytes, return invalid address.
702 if (address % sizeof(u32) != 0) {
703 return ERR_INVALID_ADDRESS;
704 }
705
706 switch (static_cast<AddressArbiter::ArbitrationType>(type)) {
707 case AddressArbiter::ArbitrationType::WaitIfLessThan:
708 return AddressArbiter::WaitForAddressIfLessThan(address, value, timeout, false);
709 case AddressArbiter::ArbitrationType::DecrementAndWaitIfLessThan:
710 return AddressArbiter::WaitForAddressIfLessThan(address, value, timeout, true);
711 case AddressArbiter::ArbitrationType::WaitIfEqual:
712 return AddressArbiter::WaitForAddressIfEqual(address, value, timeout);
713 default:
714 return ERR_INVALID_ENUM_VALUE;
715 }
716}
717
718// Signals to an address (via Address Arbiter)
719static ResultCode SignalToAddress(VAddr address, u32 type, s32 value, s32 num_to_wake) {
720 NGLOG_WARNING(Kernel_SVC,
721 "called, address=0x{:X}, type=0x{:X}, value=0x{:X}, num_to_wake=0x{:X}", address,
722 type, value, num_to_wake);
723 // If the passed address is a kernel virtual address, return invalid memory state.
724 if (Memory::IsKernelVirtualAddress(address)) {
725 return ERR_INVALID_ADDRESS_STATE;
726 }
727 // If the address is not properly aligned to 4 bytes, return invalid address.
728 if (address % sizeof(u32) != 0) {
729 return ERR_INVALID_ADDRESS;
730 }
731
732 switch (static_cast<AddressArbiter::SignalType>(type)) {
733 case AddressArbiter::SignalType::Signal:
734 return AddressArbiter::SignalToAddress(address, num_to_wake);
735 case AddressArbiter::SignalType::IncrementAndSignalIfEqual:
736 return AddressArbiter::IncrementAndSignalToAddressIfEqual(address, value, num_to_wake);
737 case AddressArbiter::SignalType::ModifyByWaitingCountAndSignalIfEqual:
738 return AddressArbiter::ModifyByWaitingCountAndSignalToAddressIfEqual(address, value,
739 num_to_wake);
740 default:
741 return ERR_INVALID_ENUM_VALUE;
742 }
743}
744
687/// This returns the total CPU ticks elapsed since the CPU was powered-on 745/// This returns the total CPU ticks elapsed since the CPU was powered-on
688static u64 GetSystemTick() { 746static u64 GetSystemTick() {
689 const u64 result{CoreTiming::GetTicks()}; 747 const u64 result{CoreTiming::GetTicks()};
@@ -744,7 +802,7 @@ static ResultCode SetThreadCoreMask(Handle thread_handle, u32 core, u64 mask) {
744 ASSERT(thread->owner_process->ideal_processor != THREADPROCESSORID_DEFAULT); 802 ASSERT(thread->owner_process->ideal_processor != THREADPROCESSORID_DEFAULT);
745 // Set the target CPU to the one specified in the process' exheader. 803 // Set the target CPU to the one specified in the process' exheader.
746 core = thread->owner_process->ideal_processor; 804 core = thread->owner_process->ideal_processor;
747 mask = 1 << core; 805 mask = 1ull << core;
748 } 806 }
749 807
750 if (mask == 0) { 808 if (mask == 0) {
@@ -761,7 +819,7 @@ static ResultCode SetThreadCoreMask(Handle thread_handle, u32 core, u64 mask) {
761 } 819 }
762 820
763 // Error out if the input core isn't enabled in the input mask. 821 // Error out if the input core isn't enabled in the input mask.
764 if (core < Core::NUM_CPU_CORES && (mask & (1 << core)) == 0) { 822 if (core < Core::NUM_CPU_CORES && (mask & (1ull << core)) == 0) {
765 return ResultCode(ErrorModule::Kernel, ErrCodes::InvalidCombination); 823 return ResultCode(ErrorModule::Kernel, ErrCodes::InvalidCombination);
766 } 824 }
767 825
@@ -856,8 +914,8 @@ static const FunctionDef SVC_Table[] = {
856 {0x31, nullptr, "GetResourceLimitCurrentValue"}, 914 {0x31, nullptr, "GetResourceLimitCurrentValue"},
857 {0x32, SvcWrap<SetThreadActivity>, "SetThreadActivity"}, 915 {0x32, SvcWrap<SetThreadActivity>, "SetThreadActivity"},
858 {0x33, SvcWrap<GetThreadContext>, "GetThreadContext"}, 916 {0x33, SvcWrap<GetThreadContext>, "GetThreadContext"},
859 {0x34, nullptr, "WaitForAddress"}, 917 {0x34, SvcWrap<WaitForAddress>, "WaitForAddress"},
860 {0x35, nullptr, "SignalToAddress"}, 918 {0x35, SvcWrap<SignalToAddress>, "SignalToAddress"},
861 {0x36, nullptr, "Unknown"}, 919 {0x36, nullptr, "Unknown"},
862 {0x37, nullptr, "Unknown"}, 920 {0x37, nullptr, "Unknown"},
863 {0x38, nullptr, "Unknown"}, 921 {0x38, nullptr, "Unknown"},
diff --git a/src/core/hle/kernel/svc_wrap.h b/src/core/hle/kernel/svc_wrap.h
index 40aa88cc1..79c3fe31b 100644
--- a/src/core/hle/kernel/svc_wrap.h
+++ b/src/core/hle/kernel/svc_wrap.h
@@ -179,6 +179,20 @@ void SvcWrap() {
179 FuncReturn(retval); 179 FuncReturn(retval);
180} 180}
181 181
182template <ResultCode func(u64, u32, s32, s64)>
183void SvcWrap() {
184 FuncReturn(
185 func(PARAM(0), (u32)(PARAM(1) & 0xFFFFFFFF), (s32)(PARAM(2) & 0xFFFFFFFF), (s64)PARAM(3))
186 .raw);
187}
188
189template <ResultCode func(u64, u32, s32, s32)>
190void SvcWrap() {
191 FuncReturn(func(PARAM(0), (u32)(PARAM(1) & 0xFFFFFFFF), (s32)(PARAM(2) & 0xFFFFFFFF),
192 (s32)(PARAM(3) & 0xFFFFFFFF))
193 .raw);
194}
195
182//////////////////////////////////////////////////////////////////////////////////////////////////// 196////////////////////////////////////////////////////////////////////////////////////////////////////
183// Function wrappers that return type u32 197// Function wrappers that return type u32
184 198
diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp
index cffa7ca83..2f333ec34 100644
--- a/src/core/hle/kernel/thread.cpp
+++ b/src/core/hle/kernel/thread.cpp
@@ -140,6 +140,11 @@ static void ThreadWakeupCallback(u64 thread_handle, int cycles_late) {
140 } 140 }
141 } 141 }
142 142
143 if (thread->arb_wait_address != 0) {
144 ASSERT(thread->status == THREADSTATUS_WAIT_ARB);
145 thread->arb_wait_address = 0;
146 }
147
143 if (resume) 148 if (resume)
144 thread->ResumeFromWait(); 149 thread->ResumeFromWait();
145} 150}
@@ -179,6 +184,7 @@ void Thread::ResumeFromWait() {
179 case THREADSTATUS_WAIT_SLEEP: 184 case THREADSTATUS_WAIT_SLEEP:
180 case THREADSTATUS_WAIT_IPC: 185 case THREADSTATUS_WAIT_IPC:
181 case THREADSTATUS_WAIT_MUTEX: 186 case THREADSTATUS_WAIT_MUTEX:
187 case THREADSTATUS_WAIT_ARB:
182 break; 188 break;
183 189
184 case THREADSTATUS_READY: 190 case THREADSTATUS_READY:
diff --git a/src/core/hle/kernel/thread.h b/src/core/hle/kernel/thread.h
index 1d2da6d50..f1e759802 100644
--- a/src/core/hle/kernel/thread.h
+++ b/src/core/hle/kernel/thread.h
@@ -45,6 +45,7 @@ enum ThreadStatus {
45 THREADSTATUS_WAIT_SYNCH_ANY, ///< Waiting due to WaitSynch1 or WaitSynchN with wait_all = false 45 THREADSTATUS_WAIT_SYNCH_ANY, ///< Waiting due to WaitSynch1 or WaitSynchN with wait_all = false
46 THREADSTATUS_WAIT_SYNCH_ALL, ///< Waiting due to WaitSynchronizationN with wait_all = true 46 THREADSTATUS_WAIT_SYNCH_ALL, ///< Waiting due to WaitSynchronizationN with wait_all = true
47 THREADSTATUS_WAIT_MUTEX, ///< Waiting due to an ArbitrateLock/WaitProcessWideKey svc 47 THREADSTATUS_WAIT_MUTEX, ///< Waiting due to an ArbitrateLock/WaitProcessWideKey svc
48 THREADSTATUS_WAIT_ARB, ///< Waiting due to a SignalToAddress/WaitForAddress svc
48 THREADSTATUS_DORMANT, ///< Created but not yet made ready 49 THREADSTATUS_DORMANT, ///< Created but not yet made ready
49 THREADSTATUS_DEAD ///< Run to completion, or forcefully terminated 50 THREADSTATUS_DEAD ///< Run to completion, or forcefully terminated
50}; 51};
@@ -230,6 +231,9 @@ public:
230 VAddr mutex_wait_address; ///< If waiting on a Mutex, this is the mutex address 231 VAddr mutex_wait_address; ///< If waiting on a Mutex, this is the mutex address
231 Handle wait_handle; ///< The handle used to wait for the mutex. 232 Handle wait_handle; ///< The handle used to wait for the mutex.
232 233
234 // If waiting for an AddressArbiter, this is the address being waited on.
235 VAddr arb_wait_address{0};
236
233 std::string name; 237 std::string name;
234 238
235 /// Handle used by guest emulated application to access this thread 239 /// Handle used by guest emulated application to access this thread
diff --git a/src/core/hle/service/audio/audren_u.cpp b/src/core/hle/service/audio/audren_u.cpp
index 6e8002bc9..3dfb3fb52 100644
--- a/src/core/hle/service/audio/audren_u.cpp
+++ b/src/core/hle/service/audio/audren_u.cpp
@@ -17,7 +17,8 @@ constexpr u64 audio_ticks{static_cast<u64>(CoreTiming::BASE_CLOCK_RATE / 200)};
17 17
18class IAudioRenderer final : public ServiceFramework<IAudioRenderer> { 18class IAudioRenderer final : public ServiceFramework<IAudioRenderer> {
19public: 19public:
20 IAudioRenderer() : ServiceFramework("IAudioRenderer") { 20 IAudioRenderer(AudioRendererParameter audren_params)
21 : ServiceFramework("IAudioRenderer"), worker_params(audren_params) {
21 static const FunctionInfo functions[] = { 22 static const FunctionInfo functions[] = {
22 {0, nullptr, "GetAudioRendererSampleRate"}, 23 {0, nullptr, "GetAudioRendererSampleRate"},
23 {1, nullptr, "GetAudioRendererSampleCount"}, 24 {1, nullptr, "GetAudioRendererSampleCount"},
@@ -57,27 +58,37 @@ private:
57 } 58 }
58 59
59 void RequestUpdateAudioRenderer(Kernel::HLERequestContext& ctx) { 60 void RequestUpdateAudioRenderer(Kernel::HLERequestContext& ctx) {
60 NGLOG_DEBUG(Service_Audio, "{}", ctx.Description()); 61 UpdateDataHeader config{};
61 AudioRendererResponseData response_data{}; 62 auto buf = ctx.ReadBuffer();
62 63 std::memcpy(&config, buf.data(), sizeof(UpdateDataHeader));
63 response_data.section_0_size = 64 u32 memory_pool_count = worker_params.effect_count + (worker_params.voice_count * 4);
64 static_cast<u32>(response_data.state_entries.size() * sizeof(AudioRendererStateEntry)); 65
65 response_data.section_1_size = static_cast<u32>(response_data.section_1.size()); 66 std::vector<MemoryPoolInfo> mem_pool_info(memory_pool_count);
66 response_data.section_2_size = static_cast<u32>(response_data.section_2.size()); 67 std::memcpy(mem_pool_info.data(),
67 response_data.section_3_size = static_cast<u32>(response_data.section_3.size()); 68 buf.data() + sizeof(UpdateDataHeader) + config.behavior_size,
68 response_data.section_4_size = static_cast<u32>(response_data.section_4.size()); 69 memory_pool_count * sizeof(MemoryPoolInfo));
69 response_data.section_5_size = static_cast<u32>(response_data.section_5.size()); 70
70 response_data.total_size = sizeof(AudioRendererResponseData); 71 UpdateDataHeader response_data{worker_params};
71 72
72 for (unsigned i = 0; i < response_data.state_entries.size(); i++) { 73 ASSERT(ctx.GetWriteBufferSize() == response_data.total_size);
73 // 4 = Busy and 5 = Ready? 74
74 response_data.state_entries[i].state = 5; 75 std::vector<u8> output(response_data.total_size);
76 std::memcpy(output.data(), &response_data, sizeof(UpdateDataHeader));
77 std::vector<MemoryPoolEntry> memory_pool(memory_pool_count);
78 for (unsigned i = 0; i < memory_pool.size(); i++) {
79 if (mem_pool_info[i].pool_state == MemoryPoolStates::RequestAttach)
80 memory_pool[i].state = MemoryPoolStates::Attached;
81 else if (mem_pool_info[i].pool_state == MemoryPoolStates::RequestDetach)
82 memory_pool[i].state = MemoryPoolStates::Detached;
83 else
84 memory_pool[i].state = mem_pool_info[i].pool_state;
75 } 85 }
86 std::memcpy(output.data() + sizeof(UpdateDataHeader), memory_pool.data(),
87 response_data.memory_pools_size);
76 88
77 ctx.WriteBuffer(&response_data, response_data.total_size); 89 ctx.WriteBuffer(output);
78 90
79 IPC::ResponseBuilder rb{ctx, 2}; 91 IPC::ResponseBuilder rb{ctx, 2};
80
81 rb.Push(RESULT_SUCCESS); 92 rb.Push(RESULT_SUCCESS);
82 93
83 NGLOG_WARNING(Service_Audio, "(STUBBED) called"); 94 NGLOG_WARNING(Service_Audio, "(STUBBED) called");
@@ -109,48 +120,66 @@ private:
109 NGLOG_WARNING(Service_Audio, "(STUBBED) called"); 120 NGLOG_WARNING(Service_Audio, "(STUBBED) called");
110 } 121 }
111 122
112 struct AudioRendererStateEntry { 123 enum class MemoryPoolStates : u32 { // Should be LE
113 u32_le state; 124 Invalid = 0x0,
125 Unknown = 0x1,
126 RequestDetach = 0x2,
127 Detached = 0x3,
128 RequestAttach = 0x4,
129 Attached = 0x5,
130 Released = 0x6,
131 };
132
133 struct MemoryPoolEntry {
134 MemoryPoolStates state;
114 u32_le unknown_4; 135 u32_le unknown_4;
115 u32_le unknown_8; 136 u32_le unknown_8;
116 u32_le unknown_c; 137 u32_le unknown_c;
117 }; 138 };
118 static_assert(sizeof(AudioRendererStateEntry) == 0x10, 139 static_assert(sizeof(MemoryPoolEntry) == 0x10, "MemoryPoolEntry has wrong size");
119 "AudioRendererStateEntry has wrong size");
120
121 struct AudioRendererResponseData {
122 u32_le unknown_0;
123 u32_le section_5_size;
124 u32_le section_0_size;
125 u32_le section_1_size;
126 u32_le unknown_10;
127 u32_le section_2_size;
128 u32_le unknown_18;
129 u32_le section_3_size;
130 u32_le section_4_size;
131 u32_le unknown_24;
132 u32_le unknown_28;
133 u32_le unknown_2c;
134 u32_le unknown_30;
135 u32_le unknown_34;
136 u32_le unknown_38;
137 u32_le total_size;
138 140
139 std::array<AudioRendererStateEntry, 0x18e> state_entries; 141 struct MemoryPoolInfo {
142 u64_le pool_address;
143 u64_le pool_size;
144 MemoryPoolStates pool_state;
145 INSERT_PADDING_WORDS(3); // Unknown
146 };
147 static_assert(sizeof(MemoryPoolInfo) == 0x20, "MemoryPoolInfo has wrong size");
148
149 struct UpdateDataHeader {
150 UpdateDataHeader() {}
151
152 UpdateDataHeader(const AudioRendererParameter& config) {
153 revision = Common::MakeMagic('R', 'E', 'V', '4'); // 5.1.0 Revision
154 behavior_size = 0xb0;
155 memory_pools_size = (config.effect_count + (config.voice_count * 4)) * 0x10;
156 voices_size = config.voice_count * 0x10;
157 effects_size = config.effect_count * 0x10;
158 sinks_size = config.sink_count * 0x20;
159 performance_manager_size = 0x10;
160 total_size = sizeof(UpdateDataHeader) + behavior_size + memory_pools_size +
161 voices_size + effects_size + sinks_size + performance_manager_size;
162 }
140 163
141 std::array<u8, 0x600> section_1; 164 u32_le revision;
142 std::array<u8, 0xe0> section_2; 165 u32_le behavior_size;
143 std::array<u8, 0x20> section_3; 166 u32_le memory_pools_size;
144 std::array<u8, 0x10> section_4; 167 u32_le voices_size;
145 std::array<u8, 0xb0> section_5; 168 u32_le voice_resource_size;
169 u32_le effects_size;
170 u32_le mixes_size;
171 u32_le sinks_size;
172 u32_le performance_manager_size;
173 INSERT_PADDING_WORDS(6);
174 u32_le total_size;
146 }; 175 };
147 static_assert(sizeof(AudioRendererResponseData) == 0x20e0, 176 static_assert(sizeof(UpdateDataHeader) == 0x40, "UpdateDataHeader has wrong size");
148 "AudioRendererResponseData has wrong size");
149 177
150 /// This is used to trigger the audio event callback. 178 /// This is used to trigger the audio event callback.
151 CoreTiming::EventType* audio_event; 179 CoreTiming::EventType* audio_event;
152 180
153 Kernel::SharedPtr<Kernel::Event> system_event; 181 Kernel::SharedPtr<Kernel::Event> system_event;
182 AudioRendererParameter worker_params;
154}; 183};
155 184
156class IAudioDevice final : public ServiceFramework<IAudioDevice> { 185class IAudioDevice final : public ServiceFramework<IAudioDevice> {
@@ -248,31 +277,33 @@ AudRenU::AudRenU() : ServiceFramework("audren:u") {
248} 277}
249 278
250void AudRenU::OpenAudioRenderer(Kernel::HLERequestContext& ctx) { 279void AudRenU::OpenAudioRenderer(Kernel::HLERequestContext& ctx) {
280 IPC::RequestParser rp{ctx};
281 auto params = rp.PopRaw<AudioRendererParameter>();
251 IPC::ResponseBuilder rb{ctx, 2, 0, 1}; 282 IPC::ResponseBuilder rb{ctx, 2, 0, 1};
252 283
253 rb.Push(RESULT_SUCCESS); 284 rb.Push(RESULT_SUCCESS);
254 rb.PushIpcInterface<Audio::IAudioRenderer>(); 285 rb.PushIpcInterface<Audio::IAudioRenderer>(std::move(params));
255 286
256 NGLOG_DEBUG(Service_Audio, "called"); 287 NGLOG_DEBUG(Service_Audio, "called");
257} 288}
258 289
259void AudRenU::GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx) { 290void AudRenU::GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx) {
260 IPC::RequestParser rp{ctx}; 291 IPC::RequestParser rp{ctx};
261 auto params = rp.PopRaw<WorkerBufferParameters>(); 292 auto params = rp.PopRaw<AudioRendererParameter>();
262 293
263 u64 buffer_sz = Common::AlignUp(4 * params.unknown8, 0x40); 294 u64 buffer_sz = Common::AlignUp(4 * params.unknown_8, 0x40);
264 buffer_sz += params.unknownC * 1024; 295 buffer_sz += params.unknown_c * 1024;
265 buffer_sz += 0x940 * (params.unknownC + 1); 296 buffer_sz += 0x940 * (params.unknown_c + 1);
266 buffer_sz += 0x3F0 * params.voice_count; 297 buffer_sz += 0x3F0 * params.voice_count;
267 buffer_sz += Common::AlignUp(8 * (params.unknownC + 1), 0x10); 298 buffer_sz += Common::AlignUp(8 * (params.unknown_c + 1), 0x10);
268 buffer_sz += Common::AlignUp(8 * params.voice_count, 0x10); 299 buffer_sz += Common::AlignUp(8 * params.voice_count, 0x10);
269 buffer_sz += 300 buffer_sz +=
270 Common::AlignUp((0x3C0 * (params.sink_count + params.unknownC) + 4 * params.sample_count) * 301 Common::AlignUp((0x3C0 * (params.sink_count + params.unknown_c) + 4 * params.sample_count) *
271 (params.unknown8 + 6), 302 (params.unknown_8 + 6),
272 0x40); 303 0x40);
273 304
274 if (IsFeatureSupported(AudioFeatures::Splitter, params.magic)) { 305 if (IsFeatureSupported(AudioFeatures::Splitter, params.revision)) {
275 u32 count = params.unknownC + 1; 306 u32 count = params.unknown_c + 1;
276 u64 node_count = Common::AlignUp(count, 0x40); 307 u64 node_count = Common::AlignUp(count, 0x40);
277 u64 node_state_buffer_sz = 308 u64 node_state_buffer_sz =
278 4 * (node_count * node_count) + 0xC * node_count + 2 * (node_count / 8); 309 4 * (node_count * node_count) + 0xC * node_count + 2 * (node_count / 8);
@@ -287,20 +318,20 @@ void AudRenU::GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx) {
287 } 318 }
288 319
289 buffer_sz += 0x20 * (params.effect_count + 4 * params.voice_count) + 0x50; 320 buffer_sz += 0x20 * (params.effect_count + 4 * params.voice_count) + 0x50;
290 if (IsFeatureSupported(AudioFeatures::Splitter, params.magic)) { 321 if (IsFeatureSupported(AudioFeatures::Splitter, params.revision)) {
291 buffer_sz += 0xE0 * params.unknown2c; 322 buffer_sz += 0xE0 * params.unknown_2c;
292 buffer_sz += 0x20 * params.splitter_count; 323 buffer_sz += 0x20 * params.splitter_count;
293 buffer_sz += Common::AlignUp(4 * params.unknown2c, 0x10); 324 buffer_sz += Common::AlignUp(4 * params.unknown_2c, 0x10);
294 } 325 }
295 buffer_sz = Common::AlignUp(buffer_sz, 0x40) + 0x170 * params.sink_count; 326 buffer_sz = Common::AlignUp(buffer_sz, 0x40) + 0x170 * params.sink_count;
296 u64 output_sz = buffer_sz + 0x280 * params.sink_count + 0x4B0 * params.effect_count + 327 u64 output_sz = buffer_sz + 0x280 * params.sink_count + 0x4B0 * params.effect_count +
297 ((params.voice_count * 256) | 0x40); 328 ((params.voice_count * 256) | 0x40);
298 329
299 if (params.unknown1c >= 1) { 330 if (params.unknown_1c >= 1) {
300 output_sz = Common::AlignUp(((16 * params.sink_count + 16 * params.effect_count + 331 output_sz = Common::AlignUp(((16 * params.sink_count + 16 * params.effect_count +
301 16 * params.voice_count + 16) + 332 16 * params.voice_count + 16) +
302 0x658) * 333 0x658) *
303 (params.unknown1c + 1) + 334 (params.unknown_1c + 1) +
304 0xc0, 335 0xc0,
305 0x40) + 336 0x40) +
306 output_sz; 337 output_sz;
@@ -328,7 +359,7 @@ bool AudRenU::IsFeatureSupported(AudioFeatures feature, u32_le revision) const {
328 u32_be version_num = (revision - Common::MakeMagic('R', 'E', 'V', '0')); // Byte swap 359 u32_be version_num = (revision - Common::MakeMagic('R', 'E', 'V', '0')); // Byte swap
329 switch (feature) { 360 switch (feature) {
330 case AudioFeatures::Splitter: 361 case AudioFeatures::Splitter:
331 return version_num >= 2; 362 return version_num >= 2u;
332 default: 363 default:
333 return false; 364 return false;
334 } 365 }
diff --git a/src/core/hle/service/audio/audren_u.h b/src/core/hle/service/audio/audren_u.h
index fe53de4ce..b9b81db4f 100644
--- a/src/core/hle/service/audio/audren_u.h
+++ b/src/core/hle/service/audio/audren_u.h
@@ -12,6 +12,24 @@ class HLERequestContext;
12 12
13namespace Service::Audio { 13namespace Service::Audio {
14 14
15struct AudioRendererParameter {
16 u32_le sample_rate;
17 u32_le sample_count;
18 u32_le unknown_8;
19 u32_le unknown_c;
20 u32_le voice_count;
21 u32_le sink_count;
22 u32_le effect_count;
23 u32_le unknown_1c;
24 u8 unknown_20;
25 INSERT_PADDING_BYTES(3);
26 u32_le splitter_count;
27 u32_le unknown_2c;
28 INSERT_PADDING_WORDS(1);
29 u32_le revision;
30};
31static_assert(sizeof(AudioRendererParameter) == 52, "AudioRendererParameter is an invalid size");
32
15class AudRenU final : public ServiceFramework<AudRenU> { 33class AudRenU final : public ServiceFramework<AudRenU> {
16public: 34public:
17 explicit AudRenU(); 35 explicit AudRenU();
@@ -22,25 +40,6 @@ private:
22 void GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx); 40 void GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx);
23 void GetAudioDevice(Kernel::HLERequestContext& ctx); 41 void GetAudioDevice(Kernel::HLERequestContext& ctx);
24 42
25 struct WorkerBufferParameters {
26 u32_le sample_rate;
27 u32_le sample_count;
28 u32_le unknown8;
29 u32_le unknownC;
30 u32_le voice_count;
31 u32_le sink_count;
32 u32_le effect_count;
33 u32_le unknown1c;
34 u8 unknown20;
35 u8 padding1[3];
36 u32_le splitter_count;
37 u32_le unknown2c;
38 u8 padding2[4];
39 u32_le magic;
40 };
41 static_assert(sizeof(WorkerBufferParameters) == 52,
42 "WorkerBufferParameters is an invalid size");
43
44 enum class AudioFeatures : u32 { 43 enum class AudioFeatures : u32 {
45 Splitter, 44 Splitter,
46 }; 45 };
diff --git a/src/core/hle/service/hid/hid.cpp b/src/core/hle/service/hid/hid.cpp
index 00c5308ba..2696a8bf0 100644
--- a/src/core/hle/service/hid/hid.cpp
+++ b/src/core/hle/service/hid/hid.cpp
@@ -84,6 +84,10 @@ private:
84 84
85 for (size_t controller = 0; controller < mem.controllers.size(); controller++) { 85 for (size_t controller = 0; controller < mem.controllers.size(); controller++) {
86 for (int index = 0; index < HID_NUM_LAYOUTS; index++) { 86 for (int index = 0; index < HID_NUM_LAYOUTS; index++) {
87 // TODO(DarkLordZach): Is this layout/controller config actually invalid?
88 if (controller == Controller_Handheld && index == Layout_Single)
89 continue;
90
87 ControllerLayout& layout = mem.controllers[controller].layouts[index]; 91 ControllerLayout& layout = mem.controllers[controller].layouts[index];
88 layout.header.num_entries = HID_NUM_ENTRIES; 92 layout.header.num_entries = HID_NUM_ENTRIES;
89 layout.header.max_entry_index = HID_NUM_ENTRIES - 1; 93 layout.header.max_entry_index = HID_NUM_ENTRIES - 1;
@@ -94,7 +98,6 @@ private:
94 layout.header.latest_entry = (layout.header.latest_entry + 1) % HID_NUM_ENTRIES; 98 layout.header.latest_entry = (layout.header.latest_entry + 1) % HID_NUM_ENTRIES;
95 99
96 ControllerInputEntry& entry = layout.entries[layout.header.latest_entry]; 100 ControllerInputEntry& entry = layout.entries[layout.header.latest_entry];
97 entry.connection_state = ConnectionState_Connected | ConnectionState_Wired;
98 entry.timestamp++; 101 entry.timestamp++;
99 // TODO(shinyquagsire23): Is this always identical to timestamp? 102 // TODO(shinyquagsire23): Is this always identical to timestamp?
100 entry.timestamp_2++; 103 entry.timestamp_2++;
@@ -103,6 +106,8 @@ private:
103 if (controller != Controller_Handheld) 106 if (controller != Controller_Handheld)
104 continue; 107 continue;
105 108
109 entry.connection_state = ConnectionState_Connected | ConnectionState_Wired;
110
106 // TODO(shinyquagsire23): Set up some LUTs for each layout mapping in the future? 111 // TODO(shinyquagsire23): Set up some LUTs for each layout mapping in the future?
107 // For now everything is just the default handheld layout, but split Joy-Con will 112 // For now everything is just the default handheld layout, but split Joy-Con will
108 // rotate the face buttons and directions for certain layouts. 113 // rotate the face buttons and directions for certain layouts.
diff --git a/src/core/hle/service/hid/hid.h b/src/core/hle/service/hid/hid.h
index 15eee8f01..b499308d6 100644
--- a/src/core/hle/service/hid/hid.h
+++ b/src/core/hle/service/hid/hid.h
@@ -12,7 +12,7 @@ namespace Service::HID {
12// Begin enums and output structs 12// Begin enums and output structs
13 13
14constexpr u32 HID_NUM_ENTRIES = 17; 14constexpr u32 HID_NUM_ENTRIES = 17;
15constexpr u32 HID_NUM_LAYOUTS = 2; 15constexpr u32 HID_NUM_LAYOUTS = 7;
16constexpr s32 HID_JOYSTICK_MAX = 0x8000; 16constexpr s32 HID_JOYSTICK_MAX = 0x8000;
17constexpr s32 HID_JOYSTICK_MIN = -0x8000; 17constexpr s32 HID_JOYSTICK_MIN = -0x8000;
18 18
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
index 79aab87f9..ed7b6dc03 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
@@ -121,8 +121,9 @@ u32 nvhost_gpu::AllocateObjectContext(const std::vector<u8>& input, std::vector<
121} 121}
122 122
123u32 nvhost_gpu::SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& output) { 123u32 nvhost_gpu::SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& output) {
124 if (input.size() < sizeof(IoctlSubmitGpfifo)) 124 if (input.size() < sizeof(IoctlSubmitGpfifo)) {
125 UNIMPLEMENTED(); 125 UNIMPLEMENTED();
126 }
126 IoctlSubmitGpfifo params{}; 127 IoctlSubmitGpfifo params{};
127 std::memcpy(&params, input.data(), sizeof(IoctlSubmitGpfifo)); 128 std::memcpy(&params, input.data(), sizeof(IoctlSubmitGpfifo));
128 NGLOG_WARNING(Service_NVDRV, "(STUBBED) called, gpfifo={:X}, num_entries={:X}, flags={:X}", 129 NGLOG_WARNING(Service_NVDRV, "(STUBBED) called, gpfifo={:X}, num_entries={:X}, flags={:X}",
diff --git a/src/core/hle/service/set/set.cpp b/src/core/hle/service/set/set.cpp
index f0572bed6..baeecb0ec 100644
--- a/src/core/hle/service/set/set.cpp
+++ b/src/core/hle/service/set/set.cpp
@@ -12,9 +12,6 @@
12namespace Service::Set { 12namespace Service::Set {
13 13
14void SET::GetAvailableLanguageCodes(Kernel::HLERequestContext& ctx) { 14void SET::GetAvailableLanguageCodes(Kernel::HLERequestContext& ctx) {
15 IPC::RequestParser rp{ctx};
16 u32 id = rp.Pop<u32>();
17
18 static constexpr std::array<LanguageCode, 17> available_language_codes = {{ 15 static constexpr std::array<LanguageCode, 17> available_language_codes = {{
19 LanguageCode::JA, 16 LanguageCode::JA,
20 LanguageCode::EN_US, 17 LanguageCode::EN_US,
@@ -50,7 +47,7 @@ SET::SET() : ServiceFramework("set") {
50 {2, nullptr, "MakeLanguageCode"}, 47 {2, nullptr, "MakeLanguageCode"},
51 {3, nullptr, "GetAvailableLanguageCodeCount"}, 48 {3, nullptr, "GetAvailableLanguageCodeCount"},
52 {4, nullptr, "GetRegionCode"}, 49 {4, nullptr, "GetRegionCode"},
53 {5, nullptr, "GetAvailableLanguageCodes2"}, 50 {5, &SET::GetAvailableLanguageCodes, "GetAvailableLanguageCodes2"},
54 {6, nullptr, "GetAvailableLanguageCodeCount2"}, 51 {6, nullptr, "GetAvailableLanguageCodeCount2"},
55 {7, nullptr, "GetKeyCodeMap"}, 52 {7, nullptr, "GetKeyCodeMap"},
56 {8, nullptr, "GetQuestFlag"}, 53 {8, nullptr, "GetQuestFlag"},
diff --git a/src/core/loader/loader.cpp b/src/core/loader/loader.cpp
index 6a4fd38cb..20cc0bac0 100644
--- a/src/core/loader/loader.cpp
+++ b/src/core/loader/loader.cpp
@@ -9,6 +9,7 @@
9#include "core/hle/kernel/process.h" 9#include "core/hle/kernel/process.h"
10#include "core/loader/deconstructed_rom_directory.h" 10#include "core/loader/deconstructed_rom_directory.h"
11#include "core/loader/elf.h" 11#include "core/loader/elf.h"
12#include "core/loader/nca.h"
12#include "core/loader/nro.h" 13#include "core/loader/nro.h"
13#include "core/loader/nso.h" 14#include "core/loader/nso.h"
14 15
@@ -32,6 +33,7 @@ FileType IdentifyFile(FileUtil::IOFile& file, const std::string& filepath) {
32 CHECK_TYPE(ELF) 33 CHECK_TYPE(ELF)
33 CHECK_TYPE(NSO) 34 CHECK_TYPE(NSO)
34 CHECK_TYPE(NRO) 35 CHECK_TYPE(NRO)
36 CHECK_TYPE(NCA)
35 37
36#undef CHECK_TYPE 38#undef CHECK_TYPE
37 39
@@ -57,6 +59,8 @@ FileType GuessFromExtension(const std::string& extension_) {
57 return FileType::NRO; 59 return FileType::NRO;
58 else if (extension == ".nso") 60 else if (extension == ".nso")
59 return FileType::NSO; 61 return FileType::NSO;
62 else if (extension == ".nca")
63 return FileType::NCA;
60 64
61 return FileType::Unknown; 65 return FileType::Unknown;
62} 66}
@@ -69,6 +73,8 @@ const char* GetFileTypeString(FileType type) {
69 return "NRO"; 73 return "NRO";
70 case FileType::NSO: 74 case FileType::NSO:
71 return "NSO"; 75 return "NSO";
76 case FileType::NCA:
77 return "NCA";
72 case FileType::DeconstructedRomDirectory: 78 case FileType::DeconstructedRomDirectory:
73 return "Directory"; 79 return "Directory";
74 case FileType::Error: 80 case FileType::Error:
@@ -104,6 +110,10 @@ static std::unique_ptr<AppLoader> GetFileLoader(FileUtil::IOFile&& file, FileTyp
104 case FileType::NRO: 110 case FileType::NRO:
105 return std::make_unique<AppLoader_NRO>(std::move(file), filepath); 111 return std::make_unique<AppLoader_NRO>(std::move(file), filepath);
106 112
113 // NX NCA file format.
114 case FileType::NCA:
115 return std::make_unique<AppLoader_NCA>(std::move(file), filepath);
116
107 // NX deconstructed ROM directory. 117 // NX deconstructed ROM directory.
108 case FileType::DeconstructedRomDirectory: 118 case FileType::DeconstructedRomDirectory:
109 return std::make_unique<AppLoader_DeconstructedRomDirectory>(std::move(file), filepath); 119 return std::make_unique<AppLoader_DeconstructedRomDirectory>(std::move(file), filepath);
diff --git a/src/core/loader/loader.h b/src/core/loader/loader.h
index b1aabb1cb..b76f7b13d 100644
--- a/src/core/loader/loader.h
+++ b/src/core/loader/loader.h
@@ -29,6 +29,7 @@ enum class FileType {
29 ELF, 29 ELF,
30 NSO, 30 NSO,
31 NRO, 31 NRO,
32 NCA,
32 DeconstructedRomDirectory, 33 DeconstructedRomDirectory,
33}; 34};
34 35
diff --git a/src/core/loader/nca.cpp b/src/core/loader/nca.cpp
new file mode 100644
index 000000000..067945d46
--- /dev/null
+++ b/src/core/loader/nca.cpp
@@ -0,0 +1,303 @@
1// Copyright 2018 yuzu emulator team
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <vector>
6
7#include "common/common_funcs.h"
8#include "common/file_util.h"
9#include "common/logging/log.h"
10#include "common/swap.h"
11#include "core/core.h"
12#include "core/file_sys/program_metadata.h"
13#include "core/file_sys/romfs_factory.h"
14#include "core/hle/kernel/process.h"
15#include "core/hle/kernel/resource_limit.h"
16#include "core/hle/service/filesystem/filesystem.h"
17#include "core/loader/nca.h"
18#include "core/loader/nso.h"
19#include "core/memory.h"
20
21namespace Loader {
22
23// Media offsets in headers are stored divided by 512. Mult. by this to get real offset.
24constexpr u64 MEDIA_OFFSET_MULTIPLIER = 0x200;
25
26constexpr u64 SECTION_HEADER_SIZE = 0x200;
27constexpr u64 SECTION_HEADER_OFFSET = 0x400;
28
29enum class NcaContentType : u8 { Program = 0, Meta = 1, Control = 2, Manual = 3, Data = 4 };
30
31enum class NcaSectionFilesystemType : u8 { PFS0 = 0x2, ROMFS = 0x3 };
32
33struct NcaSectionTableEntry {
34 u32_le media_offset;
35 u32_le media_end_offset;
36 INSERT_PADDING_BYTES(0x8);
37};
38static_assert(sizeof(NcaSectionTableEntry) == 0x10, "NcaSectionTableEntry has incorrect size.");
39
40struct NcaHeader {
41 std::array<u8, 0x100> rsa_signature_1;
42 std::array<u8, 0x100> rsa_signature_2;
43 u32_le magic;
44 u8 is_system;
45 NcaContentType content_type;
46 u8 crypto_type;
47 u8 key_index;
48 u64_le size;
49 u64_le title_id;
50 INSERT_PADDING_BYTES(0x4);
51 u32_le sdk_version;
52 u8 crypto_type_2;
53 INSERT_PADDING_BYTES(15);
54 std::array<u8, 0x10> rights_id;
55 std::array<NcaSectionTableEntry, 0x4> section_tables;
56 std::array<std::array<u8, 0x20>, 0x4> hash_tables;
57 std::array<std::array<u8, 0x10>, 0x4> key_area;
58 INSERT_PADDING_BYTES(0xC0);
59};
60static_assert(sizeof(NcaHeader) == 0x400, "NcaHeader has incorrect size.");
61
62struct NcaSectionHeaderBlock {
63 INSERT_PADDING_BYTES(3);
64 NcaSectionFilesystemType filesystem_type;
65 u8 crypto_type;
66 INSERT_PADDING_BYTES(3);
67};
68static_assert(sizeof(NcaSectionHeaderBlock) == 0x8, "NcaSectionHeaderBlock has incorrect size.");
69
70struct Pfs0Superblock {
71 NcaSectionHeaderBlock header_block;
72 std::array<u8, 0x20> hash;
73 u32_le size;
74 INSERT_PADDING_BYTES(4);
75 u64_le hash_table_offset;
76 u64_le hash_table_size;
77 u64_le pfs0_header_offset;
78 u64_le pfs0_size;
79 INSERT_PADDING_BYTES(432);
80};
81static_assert(sizeof(Pfs0Superblock) == 0x200, "Pfs0Superblock has incorrect size.");
82
83static bool IsValidNca(const NcaHeader& header) {
84 return header.magic == Common::MakeMagic('N', 'C', 'A', '2') ||
85 header.magic == Common::MakeMagic('N', 'C', 'A', '3');
86}
87
88// TODO(DarkLordZach): Add support for encrypted.
89class Nca final {
90 std::vector<FileSys::PartitionFilesystem> pfs;
91 std::vector<u64> pfs_offset;
92
93 u64 romfs_offset = 0;
94 u64 romfs_size = 0;
95
96 boost::optional<u8> exefs_id = boost::none;
97
98 FileUtil::IOFile file;
99 std::string path;
100
101 u64 GetExeFsFileOffset(const std::string& file_name) const;
102 u64 GetExeFsFileSize(const std::string& file_name) const;
103
104public:
105 ResultStatus Load(FileUtil::IOFile&& file, std::string path);
106
107 FileSys::PartitionFilesystem GetPfs(u8 id) const;
108
109 u64 GetRomFsOffset() const;
110 u64 GetRomFsSize() const;
111
112 std::vector<u8> GetExeFsFile(const std::string& file_name);
113};
114
115static bool IsPfsExeFs(const FileSys::PartitionFilesystem& pfs) {
116 // According to switchbrew, an exefs must only contain these two files:
117 return pfs.GetFileSize("main") > 0 && pfs.GetFileSize("main.npdm") > 0;
118}
119
120ResultStatus Nca::Load(FileUtil::IOFile&& in_file, std::string in_path) {
121 file = std::move(in_file);
122 path = in_path;
123 file.Seek(0, SEEK_SET);
124 std::array<u8, sizeof(NcaHeader)> header_array{};
125 if (sizeof(NcaHeader) != file.ReadBytes(header_array.data(), sizeof(NcaHeader)))
126 NGLOG_CRITICAL(Loader, "File reader errored out during header read.");
127
128 NcaHeader header{};
129 std::memcpy(&header, header_array.data(), sizeof(NcaHeader));
130 if (!IsValidNca(header))
131 return ResultStatus::ErrorInvalidFormat;
132
133 int number_sections =
134 std::count_if(std::begin(header.section_tables), std::end(header.section_tables),
135 [](NcaSectionTableEntry entry) { return entry.media_offset > 0; });
136
137 for (int i = 0; i < number_sections; ++i) {
138 // Seek to beginning of this section.
139 file.Seek(SECTION_HEADER_OFFSET + i * SECTION_HEADER_SIZE, SEEK_SET);
140 std::array<u8, sizeof(NcaSectionHeaderBlock)> array{};
141 if (sizeof(NcaSectionHeaderBlock) !=
142 file.ReadBytes(array.data(), sizeof(NcaSectionHeaderBlock)))
143 NGLOG_CRITICAL(Loader, "File reader errored out during header read.");
144
145 NcaSectionHeaderBlock block{};
146 std::memcpy(&block, array.data(), sizeof(NcaSectionHeaderBlock));
147
148 if (block.filesystem_type == NcaSectionFilesystemType::ROMFS) {
149 romfs_offset = header.section_tables[i].media_offset * MEDIA_OFFSET_MULTIPLIER;
150 romfs_size =
151 header.section_tables[i].media_end_offset * MEDIA_OFFSET_MULTIPLIER - romfs_offset;
152 } else if (block.filesystem_type == NcaSectionFilesystemType::PFS0) {
153 Pfs0Superblock sb{};
154 // Seek back to beginning of this section.
155 file.Seek(SECTION_HEADER_OFFSET + i * SECTION_HEADER_SIZE, SEEK_SET);
156 if (sizeof(Pfs0Superblock) != file.ReadBytes(&sb, sizeof(Pfs0Superblock)))
157 NGLOG_CRITICAL(Loader, "File reader errored out during header read.");
158
159 u64 offset = (static_cast<u64>(header.section_tables[i].media_offset) *
160 MEDIA_OFFSET_MULTIPLIER) +
161 sb.pfs0_header_offset;
162 FileSys::PartitionFilesystem npfs{};
163 ResultStatus status = npfs.Load(path, offset);
164
165 if (status == ResultStatus::Success) {
166 pfs.emplace_back(std::move(npfs));
167 pfs_offset.emplace_back(offset);
168 }
169 }
170 }
171
172 for (size_t i = 0; i < pfs.size(); ++i) {
173 if (IsPfsExeFs(pfs[i]))
174 exefs_id = i;
175 }
176
177 return ResultStatus::Success;
178}
179
180FileSys::PartitionFilesystem Nca::GetPfs(u8 id) const {
181 return pfs[id];
182}
183
184u64 Nca::GetExeFsFileOffset(const std::string& file_name) const {
185 if (exefs_id == boost::none)
186 return 0;
187 return pfs[*exefs_id].GetFileOffset(file_name) + pfs_offset[*exefs_id];
188}
189
190u64 Nca::GetExeFsFileSize(const std::string& file_name) const {
191 if (exefs_id == boost::none)
192 return 0;
193 return pfs[*exefs_id].GetFileSize(file_name);
194}
195
196u64 Nca::GetRomFsOffset() const {
197 return romfs_offset;
198}
199
200u64 Nca::GetRomFsSize() const {
201 return romfs_size;
202}
203
204std::vector<u8> Nca::GetExeFsFile(const std::string& file_name) {
205 std::vector<u8> out(GetExeFsFileSize(file_name));
206 file.Seek(GetExeFsFileOffset(file_name), SEEK_SET);
207 file.ReadBytes(out.data(), GetExeFsFileSize(file_name));
208 return out;
209}
210
211AppLoader_NCA::AppLoader_NCA(FileUtil::IOFile&& file, std::string filepath)
212 : AppLoader(std::move(file)), filepath(std::move(filepath)) {}
213
214FileType AppLoader_NCA::IdentifyType(FileUtil::IOFile& file, const std::string&) {
215 file.Seek(0, SEEK_SET);
216 std::array<u8, 0x400> header_enc_array{};
217 if (0x400 != file.ReadBytes(header_enc_array.data(), 0x400))
218 return FileType::Error;
219
220 // TODO(DarkLordZach): Assuming everything is decrypted. Add crypto support.
221 NcaHeader header{};
222 std::memcpy(&header, header_enc_array.data(), sizeof(NcaHeader));
223
224 if (IsValidNca(header) && header.content_type == NcaContentType::Program)
225 return FileType::NCA;
226
227 return FileType::Error;
228}
229
230ResultStatus AppLoader_NCA::Load(Kernel::SharedPtr<Kernel::Process>& process) {
231 if (is_loaded) {
232 return ResultStatus::ErrorAlreadyLoaded;
233 }
234 if (!file.IsOpen()) {
235 return ResultStatus::Error;
236 }
237
238 nca = std::make_unique<Nca>();
239 ResultStatus result = nca->Load(std::move(file), filepath);
240 if (result != ResultStatus::Success) {
241 return result;
242 }
243
244 result = metadata.Load(nca->GetExeFsFile("main.npdm"));
245 if (result != ResultStatus::Success) {
246 return result;
247 }
248 metadata.Print();
249
250 const FileSys::ProgramAddressSpaceType arch_bits{metadata.GetAddressSpaceType()};
251 if (arch_bits == FileSys::ProgramAddressSpaceType::Is32Bit) {
252 return ResultStatus::ErrorUnsupportedArch;
253 }
254
255 VAddr next_load_addr{Memory::PROCESS_IMAGE_VADDR};
256 for (const auto& module : {"rtld", "main", "subsdk0", "subsdk1", "subsdk2", "subsdk3",
257 "subsdk4", "subsdk5", "subsdk6", "subsdk7", "sdk"}) {
258 const VAddr load_addr = next_load_addr;
259 next_load_addr = AppLoader_NSO::LoadModule(module, nca->GetExeFsFile(module), load_addr);
260 if (next_load_addr) {
261 NGLOG_DEBUG(Loader, "loaded module {} @ 0x{:X}", module, load_addr);
262 } else {
263 next_load_addr = load_addr;
264 }
265 }
266
267 process->program_id = metadata.GetTitleID();
268 process->svc_access_mask.set();
269 process->address_mappings = default_address_mappings;
270 process->resource_limit =
271 Kernel::ResourceLimit::GetForCategory(Kernel::ResourceLimitCategory::APPLICATION);
272 process->Run(Memory::PROCESS_IMAGE_VADDR, metadata.GetMainThreadPriority(),
273 metadata.GetMainThreadStackSize());
274
275 if (nca->GetRomFsSize() > 0)
276 Service::FileSystem::RegisterFileSystem(std::make_unique<FileSys::RomFS_Factory>(*this),
277 Service::FileSystem::Type::RomFS);
278
279 is_loaded = true;
280 return ResultStatus::Success;
281}
282
283ResultStatus AppLoader_NCA::ReadRomFS(std::shared_ptr<FileUtil::IOFile>& romfs_file, u64& offset,
284 u64& size) {
285 if (nca->GetRomFsSize() == 0) {
286 NGLOG_DEBUG(Loader, "No RomFS available");
287 return ResultStatus::ErrorNotUsed;
288 }
289
290 romfs_file = std::make_shared<FileUtil::IOFile>(filepath, "rb");
291
292 offset = nca->GetRomFsOffset();
293 size = nca->GetRomFsSize();
294
295 NGLOG_DEBUG(Loader, "RomFS offset: 0x{:016X}", offset);
296 NGLOG_DEBUG(Loader, "RomFS size: 0x{:016X}", size);
297
298 return ResultStatus::Success;
299}
300
301AppLoader_NCA::~AppLoader_NCA() = default;
302
303} // namespace Loader
diff --git a/src/core/loader/nca.h b/src/core/loader/nca.h
new file mode 100644
index 000000000..3b6c451d0
--- /dev/null
+++ b/src/core/loader/nca.h
@@ -0,0 +1,49 @@
1// Copyright 2018 yuzu emulator team
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <string>
8#include "common/common_types.h"
9#include "core/file_sys/partition_filesystem.h"
10#include "core/file_sys/program_metadata.h"
11#include "core/hle/kernel/kernel.h"
12#include "core/loader/loader.h"
13
14namespace Loader {
15
16class Nca;
17
18/// Loads an NCA file
19class AppLoader_NCA final : public AppLoader {
20public:
21 AppLoader_NCA(FileUtil::IOFile&& file, std::string filepath);
22
23 /**
24 * Returns the type of the file
25 * @param file FileUtil::IOFile open file
26 * @param filepath Path of the file that we are opening.
27 * @return FileType found, or FileType::Error if this loader doesn't know it
28 */
29 static FileType IdentifyType(FileUtil::IOFile& file, const std::string& filepath);
30
31 FileType GetFileType() override {
32 return IdentifyType(file, filepath);
33 }
34
35 ResultStatus Load(Kernel::SharedPtr<Kernel::Process>& process) override;
36
37 ResultStatus ReadRomFS(std::shared_ptr<FileUtil::IOFile>& romfs_file, u64& offset,
38 u64& size) override;
39
40 ~AppLoader_NCA();
41
42private:
43 std::string filepath;
44 FileSys::ProgramMetadata metadata;
45
46 std::unique_ptr<Nca> nca;
47};
48
49} // namespace Loader
diff --git a/src/core/loader/nso.cpp b/src/core/loader/nso.cpp
index 01be9e217..845ed7e90 100644
--- a/src/core/loader/nso.cpp
+++ b/src/core/loader/nso.cpp
@@ -66,8 +66,22 @@ FileType AppLoader_NSO::IdentifyType(FileUtil::IOFile& file, const std::string&)
66 return FileType::Error; 66 return FileType::Error;
67} 67}
68 68
69static std::vector<u8> DecompressSegment(const std::vector<u8>& compressed_data,
70 const NsoSegmentHeader& header) {
71 std::vector<u8> uncompressed_data;
72 uncompressed_data.resize(header.size);
73 const int bytes_uncompressed = LZ4_decompress_safe(
74 reinterpret_cast<const char*>(compressed_data.data()),
75 reinterpret_cast<char*>(uncompressed_data.data()), compressed_data.size(), header.size);
76
77 ASSERT_MSG(bytes_uncompressed == header.size && bytes_uncompressed == uncompressed_data.size(),
78 "{} != {} != {}", bytes_uncompressed, header.size, uncompressed_data.size());
79
80 return uncompressed_data;
81}
82
69static std::vector<u8> ReadSegment(FileUtil::IOFile& file, const NsoSegmentHeader& header, 83static std::vector<u8> ReadSegment(FileUtil::IOFile& file, const NsoSegmentHeader& header,
70 int compressed_size) { 84 size_t compressed_size) {
71 std::vector<u8> compressed_data; 85 std::vector<u8> compressed_data;
72 compressed_data.resize(compressed_size); 86 compressed_data.resize(compressed_size);
73 87
@@ -77,22 +91,65 @@ static std::vector<u8> ReadSegment(FileUtil::IOFile& file, const NsoSegmentHeade
77 return {}; 91 return {};
78 } 92 }
79 93
80 std::vector<u8> uncompressed_data; 94 return DecompressSegment(compressed_data, header);
81 uncompressed_data.resize(header.size);
82 const int bytes_uncompressed = LZ4_decompress_safe(
83 reinterpret_cast<const char*>(compressed_data.data()),
84 reinterpret_cast<char*>(uncompressed_data.data()), compressed_size, header.size);
85
86 ASSERT_MSG(bytes_uncompressed == header.size && bytes_uncompressed == uncompressed_data.size(),
87 "{} != {} != {}", bytes_uncompressed, header.size, uncompressed_data.size());
88
89 return uncompressed_data;
90} 95}
91 96
92static constexpr u32 PageAlignSize(u32 size) { 97static constexpr u32 PageAlignSize(u32 size) {
93 return (size + Memory::PAGE_MASK) & ~Memory::PAGE_MASK; 98 return (size + Memory::PAGE_MASK) & ~Memory::PAGE_MASK;
94} 99}
95 100
101VAddr AppLoader_NSO::LoadModule(const std::string& name, const std::vector<u8>& file_data,
102 VAddr load_base) {
103 if (file_data.size() < sizeof(NsoHeader))
104 return {};
105
106 NsoHeader nso_header;
107 std::memcpy(&nso_header, file_data.data(), sizeof(NsoHeader));
108
109 if (nso_header.magic != Common::MakeMagic('N', 'S', 'O', '0'))
110 return {};
111
112 // Build program image
113 Kernel::SharedPtr<Kernel::CodeSet> codeset = Kernel::CodeSet::Create("");
114 std::vector<u8> program_image;
115 for (int i = 0; i < nso_header.segments.size(); ++i) {
116 std::vector<u8> compressed_data(nso_header.segments_compressed_size[i]);
117 for (int j = 0; j < nso_header.segments_compressed_size[i]; ++j)
118 compressed_data[j] = file_data[nso_header.segments[i].offset + j];
119 std::vector<u8> data = DecompressSegment(compressed_data, nso_header.segments[i]);
120 program_image.resize(nso_header.segments[i].location);
121 program_image.insert(program_image.end(), data.begin(), data.end());
122 codeset->segments[i].addr = nso_header.segments[i].location;
123 codeset->segments[i].offset = nso_header.segments[i].location;
124 codeset->segments[i].size = PageAlignSize(static_cast<u32>(data.size()));
125 }
126
127 // MOD header pointer is at .text offset + 4
128 u32 module_offset;
129 std::memcpy(&module_offset, program_image.data() + 4, sizeof(u32));
130
131 // Read MOD header
132 ModHeader mod_header{};
133 // Default .bss to size in segment header if MOD0 section doesn't exist
134 u32 bss_size{PageAlignSize(nso_header.segments[2].bss_size)};
135 std::memcpy(&mod_header, program_image.data() + module_offset, sizeof(ModHeader));
136 const bool has_mod_header{mod_header.magic == Common::MakeMagic('M', 'O', 'D', '0')};
137 if (has_mod_header) {
138 // Resize program image to include .bss section and page align each section
139 bss_size = PageAlignSize(mod_header.bss_end_offset - mod_header.bss_start_offset);
140 }
141 codeset->data.size += bss_size;
142 const u32 image_size{PageAlignSize(static_cast<u32>(program_image.size()) + bss_size)};
143 program_image.resize(image_size);
144
145 // Load codeset for current process
146 codeset->name = name;
147 codeset->memory = std::make_shared<std::vector<u8>>(std::move(program_image));
148 Core::CurrentProcess()->LoadModule(codeset, load_base);
149
150 return load_base + image_size;
151}
152
96VAddr AppLoader_NSO::LoadModule(const std::string& path, VAddr load_base) { 153VAddr AppLoader_NSO::LoadModule(const std::string& path, VAddr load_base) {
97 FileUtil::IOFile file(path, "rb"); 154 FileUtil::IOFile file(path, "rb");
98 if (!file.IsOpen()) { 155 if (!file.IsOpen()) {
diff --git a/src/core/loader/nso.h b/src/core/loader/nso.h
index 1ae30a824..386f4d39a 100644
--- a/src/core/loader/nso.h
+++ b/src/core/loader/nso.h
@@ -29,6 +29,9 @@ public:
29 return IdentifyType(file, filepath); 29 return IdentifyType(file, filepath);
30 } 30 }
31 31
32 static VAddr LoadModule(const std::string& name, const std::vector<u8>& file_data,
33 VAddr load_base);
34
32 static VAddr LoadModule(const std::string& path, VAddr load_base); 35 static VAddr LoadModule(const std::string& path, VAddr load_base);
33 36
34 ResultStatus Load(Kernel::SharedPtr<Kernel::Process>& process) override; 37 ResultStatus Load(Kernel::SharedPtr<Kernel::Process>& process) override;
diff --git a/src/core/memory.cpp b/src/core/memory.cpp
index 3b81acd63..f070dee7d 100644
--- a/src/core/memory.cpp
+++ b/src/core/memory.cpp
@@ -241,6 +241,10 @@ bool IsValidVirtualAddress(const VAddr vaddr) {
241 return IsValidVirtualAddress(*Core::CurrentProcess(), vaddr); 241 return IsValidVirtualAddress(*Core::CurrentProcess(), vaddr);
242} 242}
243 243
244bool IsKernelVirtualAddress(const VAddr vaddr) {
245 return KERNEL_REGION_VADDR <= vaddr && vaddr < KERNEL_REGION_END;
246}
247
244bool IsValidPhysicalAddress(const PAddr paddr) { 248bool IsValidPhysicalAddress(const PAddr paddr) {
245 return GetPhysicalPointer(paddr) != nullptr; 249 return GetPhysicalPointer(paddr) != nullptr;
246} 250}
diff --git a/src/core/memory.h b/src/core/memory.h
index 3f56a2c6a..8d5d017a4 100644
--- a/src/core/memory.h
+++ b/src/core/memory.h
@@ -188,6 +188,11 @@ enum : VAddr {
188 MAP_REGION_VADDR = NEW_MAP_REGION_VADDR_END, 188 MAP_REGION_VADDR = NEW_MAP_REGION_VADDR_END,
189 MAP_REGION_SIZE = 0x1000000000, 189 MAP_REGION_SIZE = 0x1000000000,
190 MAP_REGION_VADDR_END = MAP_REGION_VADDR + MAP_REGION_SIZE, 190 MAP_REGION_VADDR_END = MAP_REGION_VADDR + MAP_REGION_SIZE,
191
192 /// Kernel Virtual Address Range
193 KERNEL_REGION_VADDR = 0xFFFFFF8000000000,
194 KERNEL_REGION_SIZE = 0x7FFFE00000,
195 KERNEL_REGION_END = KERNEL_REGION_VADDR + KERNEL_REGION_SIZE,
191}; 196};
192 197
193/// Currently active page table 198/// Currently active page table
@@ -197,6 +202,8 @@ PageTable* GetCurrentPageTable();
197/// Determines if the given VAddr is valid for the specified process. 202/// Determines if the given VAddr is valid for the specified process.
198bool IsValidVirtualAddress(const Kernel::Process& process, const VAddr vaddr); 203bool IsValidVirtualAddress(const Kernel::Process& process, const VAddr vaddr);
199bool IsValidVirtualAddress(const VAddr addr); 204bool IsValidVirtualAddress(const VAddr addr);
205/// Determines if the given VAddr is a kernel address
206bool IsKernelVirtualAddress(const VAddr addr);
200 207
201bool IsValidPhysicalAddress(const PAddr addr); 208bool IsValidPhysicalAddress(const PAddr addr);
202 209
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 281810357..c6431e722 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -9,6 +9,8 @@ add_library(video_core STATIC
9 engines/maxwell_3d.h 9 engines/maxwell_3d.h
10 engines/maxwell_compute.cpp 10 engines/maxwell_compute.cpp
11 engines/maxwell_compute.h 11 engines/maxwell_compute.h
12 engines/maxwell_dma.cpp
13 engines/maxwell_dma.h
12 engines/shader_bytecode.h 14 engines/shader_bytecode.h
13 gpu.cpp 15 gpu.cpp
14 gpu.h 16 gpu.h
@@ -39,6 +41,8 @@ add_library(video_core STATIC
39 renderer_opengl/maxwell_to_gl.h 41 renderer_opengl/maxwell_to_gl.h
40 renderer_opengl/renderer_opengl.cpp 42 renderer_opengl/renderer_opengl.cpp
41 renderer_opengl/renderer_opengl.h 43 renderer_opengl/renderer_opengl.h
44 textures/astc.cpp
45 textures/astc.h
42 textures/decoders.cpp 46 textures/decoders.cpp
43 textures/decoders.h 47 textures/decoders.h
44 textures/texture.h 48 textures/texture.h
diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp
index d72d6f760..cec9cb9f3 100644
--- a/src/video_core/command_processor.cpp
+++ b/src/video_core/command_processor.cpp
@@ -16,6 +16,7 @@
16#include "video_core/engines/fermi_2d.h" 16#include "video_core/engines/fermi_2d.h"
17#include "video_core/engines/maxwell_3d.h" 17#include "video_core/engines/maxwell_3d.h"
18#include "video_core/engines/maxwell_compute.h" 18#include "video_core/engines/maxwell_compute.h"
19#include "video_core/engines/maxwell_dma.h"
19#include "video_core/gpu.h" 20#include "video_core/gpu.h"
20#include "video_core/renderer_base.h" 21#include "video_core/renderer_base.h"
21#include "video_core/video_core.h" 22#include "video_core/video_core.h"
@@ -60,8 +61,11 @@ void GPU::WriteReg(u32 method, u32 subchannel, u32 value, u32 remaining_params)
60 case EngineID::MAXWELL_COMPUTE_B: 61 case EngineID::MAXWELL_COMPUTE_B:
61 maxwell_compute->WriteReg(method, value); 62 maxwell_compute->WriteReg(method, value);
62 break; 63 break;
64 case EngineID::MAXWELL_DMA_COPY_A:
65 maxwell_dma->WriteReg(method, value);
66 break;
63 default: 67 default:
64 UNIMPLEMENTED(); 68 UNIMPLEMENTED_MSG("Unimplemented engine");
65 } 69 }
66} 70}
67 71
diff --git a/src/video_core/debug_utils/debug_utils.h b/src/video_core/debug_utils/debug_utils.h
index bbba8e380..9382a75e5 100644
--- a/src/video_core/debug_utils/debug_utils.h
+++ b/src/video_core/debug_utils/debug_utils.h
@@ -55,8 +55,10 @@ public:
55 virtual ~BreakPointObserver() { 55 virtual ~BreakPointObserver() {
56 auto context = context_weak.lock(); 56 auto context = context_weak.lock();
57 if (context) { 57 if (context) {
58 std::unique_lock<std::mutex> lock(context->breakpoint_mutex); 58 {
59 context->breakpoint_observers.remove(this); 59 std::unique_lock<std::mutex> lock(context->breakpoint_mutex);
60 context->breakpoint_observers.remove(this);
61 }
60 62
61 // If we are the last observer to be destroyed, tell the debugger context that 63 // If we are the last observer to be destroyed, tell the debugger context that
62 // it is free to continue. In particular, this is required for a proper yuzu 64 // it is free to continue. In particular, this is required for a proper yuzu
diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp
index 6b9382f06..998b7c843 100644
--- a/src/video_core/engines/fermi_2d.cpp
+++ b/src/video_core/engines/fermi_2d.cpp
@@ -47,6 +47,7 @@ void Fermi2D::HandleSurfaceCopy() {
47 47
48 if (regs.src.linear == regs.dst.linear) { 48 if (regs.src.linear == regs.dst.linear) {
49 // If the input layout and the output layout are the same, just perform a raw copy. 49 // If the input layout and the output layout are the same, just perform a raw copy.
50 ASSERT(regs.src.BlockHeight() == regs.dst.BlockHeight());
50 Memory::CopyBlock(dest_cpu, source_cpu, 51 Memory::CopyBlock(dest_cpu, source_cpu,
51 src_bytes_per_pixel * regs.dst.width * regs.dst.height); 52 src_bytes_per_pixel * regs.dst.width * regs.dst.height);
52 return; 53 return;
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 86e9dc998..93c43c8cb 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -328,8 +328,9 @@ std::vector<Texture::FullTextureInfo> Maxwell3D::GetStageTextures(Regs::ShaderSt
328 328
329 Texture::FullTextureInfo tex_info{}; 329 Texture::FullTextureInfo tex_info{};
330 // TODO(Subv): Use the shader to determine which textures are actually accessed. 330 // TODO(Subv): Use the shader to determine which textures are actually accessed.
331 tex_info.index = (current_texture - tex_info_buffer.address - TextureInfoOffset) / 331 tex_info.index =
332 sizeof(Texture::TextureHandle); 332 static_cast<u32>(current_texture - tex_info_buffer.address - TextureInfoOffset) /
333 sizeof(Texture::TextureHandle);
333 334
334 // Load the TIC data. 335 // Load the TIC data.
335 if (tex_handle.tic_id != 0) { 336 if (tex_handle.tic_id != 0) {
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
new file mode 100644
index 000000000..442138988
--- /dev/null
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -0,0 +1,69 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "core/memory.h"
6#include "video_core/engines/maxwell_dma.h"
7#include "video_core/textures/decoders.h"
8
9namespace Tegra {
10namespace Engines {
11
12MaxwellDMA::MaxwellDMA(MemoryManager& memory_manager) : memory_manager(memory_manager) {}
13
14void MaxwellDMA::WriteReg(u32 method, u32 value) {
15 ASSERT_MSG(method < Regs::NUM_REGS,
16 "Invalid MaxwellDMA register, increase the size of the Regs structure");
17
18 regs.reg_array[method] = value;
19
20#define MAXWELLDMA_REG_INDEX(field_name) \
21 (offsetof(Tegra::Engines::MaxwellDMA::Regs, field_name) / sizeof(u32))
22
23 switch (method) {
24 case MAXWELLDMA_REG_INDEX(exec): {
25 HandleCopy();
26 break;
27 }
28 }
29
30#undef MAXWELLDMA_REG_INDEX
31}
32
33void MaxwellDMA::HandleCopy() {
34 NGLOG_WARNING(HW_GPU, "Requested a DMA copy");
35
36 const GPUVAddr source = regs.src_address.Address();
37 const GPUVAddr dest = regs.dst_address.Address();
38
39 const VAddr source_cpu = *memory_manager.GpuToCpuAddress(source);
40 const VAddr dest_cpu = *memory_manager.GpuToCpuAddress(dest);
41
42 // TODO(Subv): Perform more research and implement all features of this engine.
43 ASSERT(regs.exec.enable_swizzle == 0);
44 ASSERT(regs.exec.enable_2d == 1);
45 ASSERT(regs.exec.query_mode == Regs::QueryMode::None);
46 ASSERT(regs.exec.query_intr == Regs::QueryIntr::None);
47 ASSERT(regs.exec.copy_mode == Regs::CopyMode::Unk2);
48 ASSERT(regs.src_params.pos_x == 0);
49 ASSERT(regs.src_params.pos_y == 0);
50 ASSERT(regs.dst_params.pos_x == 0);
51 ASSERT(regs.dst_params.pos_y == 0);
52 ASSERT(regs.exec.is_dst_linear != regs.exec.is_src_linear);
53
54 u8* src_buffer = Memory::GetPointer(source_cpu);
55 u8* dst_buffer = Memory::GetPointer(dest_cpu);
56
57 if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) {
58 // If the input is tiled and the output is linear, deswizzle the input and copy it over.
59 Texture::CopySwizzledData(regs.src_params.size_x, regs.src_params.size_y, 1, 1, src_buffer,
60 dst_buffer, true, regs.src_params.BlockHeight());
61 } else {
62 // If the input is linear and the output is tiled, swizzle the input and copy it over.
63 Texture::CopySwizzledData(regs.dst_params.size_x, regs.dst_params.size_y, 1, 1, dst_buffer,
64 src_buffer, false, regs.dst_params.BlockHeight());
65 }
66}
67
68} // namespace Engines
69} // namespace Tegra
diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h
new file mode 100644
index 000000000..905749bde
--- /dev/null
+++ b/src/video_core/engines/maxwell_dma.h
@@ -0,0 +1,155 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include "common/assert.h"
9#include "common/bit_field.h"
10#include "common/common_funcs.h"
11#include "common/common_types.h"
12#include "video_core/gpu.h"
13#include "video_core/memory_manager.h"
14
15namespace Tegra {
16namespace Engines {
17
18class MaxwellDMA final {
19public:
20 explicit MaxwellDMA(MemoryManager& memory_manager);
21 ~MaxwellDMA() = default;
22
23 /// Write the value to the register identified by method.
24 void WriteReg(u32 method, u32 value);
25
26 struct Regs {
27 static constexpr size_t NUM_REGS = 0x1D6;
28
29 struct Parameters {
30 union {
31 BitField<0, 4, u32> block_depth;
32 BitField<4, 4, u32> block_height;
33 BitField<8, 4, u32> block_width;
34 };
35 u32 size_x;
36 u32 size_y;
37 u32 size_z;
38 u32 pos_z;
39 union {
40 BitField<0, 16, u32> pos_x;
41 BitField<16, 16, u32> pos_y;
42 };
43
44 u32 BlockHeight() const {
45 return 1 << block_height;
46 }
47 };
48
49 static_assert(sizeof(Parameters) == 24, "Parameters has wrong size");
50
51 enum class CopyMode : u32 {
52 None = 0,
53 Unk1 = 1,
54 Unk2 = 2,
55 };
56
57 enum class QueryMode : u32 {
58 None = 0,
59 Short = 1,
60 Long = 2,
61 };
62
63 enum class QueryIntr : u32 {
64 None = 0,
65 Block = 1,
66 NonBlock = 2,
67 };
68
69 union {
70 struct {
71 INSERT_PADDING_WORDS(0xC0);
72
73 struct {
74 union {
75 BitField<0, 2, CopyMode> copy_mode;
76 BitField<2, 1, u32> flush;
77
78 BitField<3, 2, QueryMode> query_mode;
79 BitField<5, 2, QueryIntr> query_intr;
80
81 BitField<7, 1, u32> is_src_linear;
82 BitField<8, 1, u32> is_dst_linear;
83
84 BitField<9, 1, u32> enable_2d;
85 BitField<10, 1, u32> enable_swizzle;
86 };
87 } exec;
88
89 INSERT_PADDING_WORDS(0x3F);
90
91 struct {
92 u32 address_high;
93 u32 address_low;
94
95 GPUVAddr Address() const {
96 return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
97 address_low);
98 }
99 } src_address;
100
101 struct {
102 u32 address_high;
103 u32 address_low;
104
105 GPUVAddr Address() const {
106 return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
107 address_low);
108 }
109 } dst_address;
110
111 u32 src_pitch;
112 u32 dst_pitch;
113 u32 x_count;
114 u32 y_count;
115
116 INSERT_PADDING_WORDS(0xBB);
117
118 Parameters dst_params;
119
120 INSERT_PADDING_WORDS(1);
121
122 Parameters src_params;
123
124 INSERT_PADDING_WORDS(0x13);
125 };
126 std::array<u32, NUM_REGS> reg_array;
127 };
128 } regs{};
129
130 MemoryManager& memory_manager;
131
132private:
133 /// Performs the copy from the source buffer to the destination buffer as configured in the
134 /// registers.
135 void HandleCopy();
136};
137
138#define ASSERT_REG_POSITION(field_name, position) \
139 static_assert(offsetof(MaxwellDMA::Regs, field_name) == position * 4, \
140 "Field " #field_name " has invalid position")
141
142ASSERT_REG_POSITION(exec, 0xC0);
143ASSERT_REG_POSITION(src_address, 0x100);
144ASSERT_REG_POSITION(dst_address, 0x102);
145ASSERT_REG_POSITION(src_pitch, 0x104);
146ASSERT_REG_POSITION(dst_pitch, 0x105);
147ASSERT_REG_POSITION(x_count, 0x106);
148ASSERT_REG_POSITION(y_count, 0x107);
149ASSERT_REG_POSITION(dst_params, 0x1C3);
150ASSERT_REG_POSITION(src_params, 0x1CA);
151
152#undef ASSERT_REG_POSITION
153
154} // namespace Engines
155} // namespace Tegra
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index d748026b8..cb4db0679 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -213,16 +213,15 @@ union Instruction {
213 BitField<28, 8, Register> gpr28; 213 BitField<28, 8, Register> gpr28;
214 BitField<39, 8, Register> gpr39; 214 BitField<39, 8, Register> gpr39;
215 BitField<48, 16, u64> opcode; 215 BitField<48, 16, u64> opcode;
216 BitField<50, 1, u64> saturate_a;
217 216
218 union { 217 union {
219 BitField<20, 19, u64> imm20_19; 218 BitField<20, 19, u64> imm20_19;
220 BitField<20, 32, u64> imm20_32; 219 BitField<20, 32, s64> imm20_32;
221 BitField<45, 1, u64> negate_b; 220 BitField<45, 1, u64> negate_b;
222 BitField<46, 1, u64> abs_a; 221 BitField<46, 1, u64> abs_a;
223 BitField<48, 1, u64> negate_a; 222 BitField<48, 1, u64> negate_a;
224 BitField<49, 1, u64> abs_b; 223 BitField<49, 1, u64> abs_b;
225 BitField<50, 1, u64> abs_d; 224 BitField<50, 1, u64> saturate_d;
226 BitField<56, 1, u64> negate_imm; 225 BitField<56, 1, u64> negate_imm;
227 226
228 union { 227 union {
@@ -231,10 +230,18 @@ union Instruction {
231 } fmnmx; 230 } fmnmx;
232 231
233 union { 232 union {
233 BitField<39, 1, u64> invert_a;
234 BitField<40, 1, u64> invert_b;
235 BitField<41, 2, LogicOperation> operation;
236 BitField<44, 2, u64> unk44;
237 BitField<48, 3, Pred> pred48;
238 } lop;
239
240 union {
234 BitField<53, 2, LogicOperation> operation; 241 BitField<53, 2, LogicOperation> operation;
235 BitField<55, 1, u64> invert_a; 242 BitField<55, 1, u64> invert_a;
236 BitField<56, 1, u64> invert_b; 243 BitField<56, 1, u64> invert_b;
237 } lop; 244 } lop32i;
238 245
239 float GetImm20_19() const { 246 float GetImm20_19() const {
240 float result{}; 247 float result{};
@@ -247,7 +254,7 @@ union Instruction {
247 254
248 float GetImm20_32() const { 255 float GetImm20_32() const {
249 float result{}; 256 float result{};
250 u32 imm{static_cast<u32>(imm20_32)}; 257 s32 imm{static_cast<s32>(imm20_32)};
251 std::memcpy(&result, &imm, sizeof(imm)); 258 std::memcpy(&result, &imm, sizeof(imm));
252 return result; 259 return result;
253 } 260 }
@@ -271,6 +278,11 @@ union Instruction {
271 } alu_integer; 278 } alu_integer;
272 279
273 union { 280 union {
281 BitField<54, 1, u64> saturate;
282 BitField<56, 1, u64> negate_a;
283 } iadd32i;
284
285 union {
274 BitField<20, 8, u64> shift_position; 286 BitField<20, 8, u64> shift_position;
275 BitField<28, 8, u64> shift_length; 287 BitField<28, 8, u64> shift_length;
276 BitField<48, 1, u64> negate_b; 288 BitField<48, 1, u64> negate_b;
@@ -330,7 +342,17 @@ union Instruction {
330 } fset; 342 } fset;
331 343
332 union { 344 union {
333 BitField<10, 2, Register::Size> size; 345 BitField<39, 3, u64> pred39;
346 BitField<42, 1, u64> neg_pred;
347 BitField<44, 1, u64> bf;
348 BitField<45, 2, PredOperation> op;
349 BitField<48, 1, u64> is_signed;
350 BitField<49, 3, PredCondition> cond;
351 } iset;
352
353 union {
354 BitField<8, 2, Register::Size> dest_size;
355 BitField<10, 2, Register::Size> src_size;
334 BitField<12, 1, u64> is_output_signed; 356 BitField<12, 1, u64> is_output_signed;
335 BitField<13, 1, u64> is_input_signed; 357 BitField<13, 1, u64> is_input_signed;
336 BitField<41, 2, u64> selector; 358 BitField<41, 2, u64> selector;
@@ -350,7 +372,7 @@ union Instruction {
350 BitField<31, 4, u64> component_mask; 372 BitField<31, 4, u64> component_mask;
351 373
352 bool IsComponentEnabled(size_t component) const { 374 bool IsComponentEnabled(size_t component) const {
353 return ((1 << component) & component_mask) != 0; 375 return ((1ull << component) & component_mask) != 0;
354 } 376 }
355 } tex; 377 } tex;
356 378
@@ -369,7 +391,7 @@ union Instruction {
369 391
370 ASSERT(component_mask_selector < mask.size()); 392 ASSERT(component_mask_selector < mask.size());
371 393
372 return ((1 << component) & mask[component_mask_selector]) != 0; 394 return ((1ull << component) & mask[component_mask_selector]) != 0;
373 } 395 }
374 } texs; 396 } texs;
375 397
@@ -442,6 +464,7 @@ public:
442 IADD_C, 464 IADD_C,
443 IADD_R, 465 IADD_R,
444 IADD_IMM, 466 IADD_IMM,
467 IADD32I,
445 ISCADD_C, // Scale and Add 468 ISCADD_C, // Scale and Add
446 ISCADD_R, 469 ISCADD_R,
447 ISCADD_IMM, 470 ISCADD_IMM,
@@ -461,6 +484,9 @@ public:
461 I2I_C, 484 I2I_C,
462 I2I_R, 485 I2I_R,
463 I2I_IMM, 486 I2I_IMM,
487 LOP_C,
488 LOP_R,
489 LOP_IMM,
464 LOP32I, 490 LOP32I,
465 MOV_C, 491 MOV_C,
466 MOV_R, 492 MOV_R,
@@ -487,6 +513,9 @@ public:
487 ISETP_C, 513 ISETP_C,
488 ISETP_IMM, 514 ISETP_IMM,
489 ISETP_R, 515 ISETP_R,
516 ISET_R,
517 ISET_C,
518 ISET_IMM,
490 PSETP, 519 PSETP,
491 XMAD_IMM, 520 XMAD_IMM,
492 XMAD_CR, 521 XMAD_CR,
@@ -497,15 +526,17 @@ public:
497 enum class Type { 526 enum class Type {
498 Trivial, 527 Trivial,
499 Arithmetic, 528 Arithmetic,
529 ArithmeticImmediate,
500 ArithmeticInteger, 530 ArithmeticInteger,
531 ArithmeticIntegerImmediate,
501 Bfe, 532 Bfe,
502 Logic,
503 Shift, 533 Shift,
504 Ffma, 534 Ffma,
505 Flow, 535 Flow,
506 Memory, 536 Memory,
507 FloatSet, 537 FloatSet,
508 FloatSetPredicate, 538 FloatSetPredicate,
539 IntegerSet,
509 IntegerSetPredicate, 540 IntegerSetPredicate,
510 PredicateSetPredicate, 541 PredicateSetPredicate,
511 Conversion, 542 Conversion,
@@ -625,10 +656,11 @@ private:
625 INST("0100110001101---", Id::FMUL_C, Type::Arithmetic, "FMUL_C"), 656 INST("0100110001101---", Id::FMUL_C, Type::Arithmetic, "FMUL_C"),
626 INST("0101110001101---", Id::FMUL_R, Type::Arithmetic, "FMUL_R"), 657 INST("0101110001101---", Id::FMUL_R, Type::Arithmetic, "FMUL_R"),
627 INST("0011100-01101---", Id::FMUL_IMM, Type::Arithmetic, "FMUL_IMM"), 658 INST("0011100-01101---", Id::FMUL_IMM, Type::Arithmetic, "FMUL_IMM"),
628 INST("00011110--------", Id::FMUL32_IMM, Type::Arithmetic, "FMUL32_IMM"), 659 INST("00011110--------", Id::FMUL32_IMM, Type::ArithmeticImmediate, "FMUL32_IMM"),
629 INST("0100110000010---", Id::IADD_C, Type::ArithmeticInteger, "IADD_C"), 660 INST("0100110000010---", Id::IADD_C, Type::ArithmeticInteger, "IADD_C"),
630 INST("0101110000010---", Id::IADD_R, Type::ArithmeticInteger, "IADD_R"), 661 INST("0101110000010---", Id::IADD_R, Type::ArithmeticInteger, "IADD_R"),
631 INST("0011100-00010---", Id::IADD_IMM, Type::ArithmeticInteger, "IADD_IMM"), 662 INST("0011100-00010---", Id::IADD_IMM, Type::ArithmeticInteger, "IADD_IMM"),
663 INST("0001110---------", Id::IADD32I, Type::ArithmeticIntegerImmediate, "IADD32I"),
632 INST("0100110000011---", Id::ISCADD_C, Type::ArithmeticInteger, "ISCADD_C"), 664 INST("0100110000011---", Id::ISCADD_C, Type::ArithmeticInteger, "ISCADD_C"),
633 INST("0101110000011---", Id::ISCADD_R, Type::ArithmeticInteger, "ISCADD_R"), 665 INST("0101110000011---", Id::ISCADD_R, Type::ArithmeticInteger, "ISCADD_R"),
634 INST("0011100-00011---", Id::ISCADD_IMM, Type::ArithmeticInteger, "ISCADD_IMM"), 666 INST("0011100-00011---", Id::ISCADD_IMM, Type::ArithmeticInteger, "ISCADD_IMM"),
@@ -645,7 +677,7 @@ private:
645 INST("0100110010011---", Id::MOV_C, Type::Arithmetic, "MOV_C"), 677 INST("0100110010011---", Id::MOV_C, Type::Arithmetic, "MOV_C"),
646 INST("0101110010011---", Id::MOV_R, Type::Arithmetic, "MOV_R"), 678 INST("0101110010011---", Id::MOV_R, Type::Arithmetic, "MOV_R"),
647 INST("0011100-10011---", Id::MOV_IMM, Type::Arithmetic, "MOV_IMM"), 679 INST("0011100-10011---", Id::MOV_IMM, Type::Arithmetic, "MOV_IMM"),
648 INST("000000010000----", Id::MOV32_IMM, Type::Arithmetic, "MOV32_IMM"), 680 INST("000000010000----", Id::MOV32_IMM, Type::ArithmeticImmediate, "MOV32_IMM"),
649 INST("0100110001100---", Id::FMNMX_C, Type::Arithmetic, "FMNMX_C"), 681 INST("0100110001100---", Id::FMNMX_C, Type::Arithmetic, "FMNMX_C"),
650 INST("0101110001100---", Id::FMNMX_R, Type::Arithmetic, "FMNMX_R"), 682 INST("0101110001100---", Id::FMNMX_R, Type::Arithmetic, "FMNMX_R"),
651 INST("0011100-01100---", Id::FMNMX_IMM, Type::Arithmetic, "FMNMX_IMM"), 683 INST("0011100-01100---", Id::FMNMX_IMM, Type::Arithmetic, "FMNMX_IMM"),
@@ -655,7 +687,10 @@ private:
655 INST("0100110000000---", Id::BFE_C, Type::Bfe, "BFE_C"), 687 INST("0100110000000---", Id::BFE_C, Type::Bfe, "BFE_C"),
656 INST("0101110000000---", Id::BFE_R, Type::Bfe, "BFE_R"), 688 INST("0101110000000---", Id::BFE_R, Type::Bfe, "BFE_R"),
657 INST("0011100-00000---", Id::BFE_IMM, Type::Bfe, "BFE_IMM"), 689 INST("0011100-00000---", Id::BFE_IMM, Type::Bfe, "BFE_IMM"),
658 INST("000001----------", Id::LOP32I, Type::Logic, "LOP32I"), 690 INST("0100110001000---", Id::LOP_C, Type::ArithmeticInteger, "LOP_C"),
691 INST("0101110001000---", Id::LOP_R, Type::ArithmeticInteger, "LOP_R"),
692 INST("0011100001000---", Id::LOP_IMM, Type::ArithmeticInteger, "LOP_IMM"),
693 INST("000001----------", Id::LOP32I, Type::ArithmeticIntegerImmediate, "LOP32I"),
659 INST("0100110001001---", Id::SHL_C, Type::Shift, "SHL_C"), 694 INST("0100110001001---", Id::SHL_C, Type::Shift, "SHL_C"),
660 INST("0101110001001---", Id::SHL_R, Type::Shift, "SHL_R"), 695 INST("0101110001001---", Id::SHL_R, Type::Shift, "SHL_R"),
661 INST("0011100-01001---", Id::SHL_IMM, Type::Shift, "SHL_IMM"), 696 INST("0011100-01001---", Id::SHL_IMM, Type::Shift, "SHL_IMM"),
@@ -677,6 +712,9 @@ private:
677 INST("010010110110----", Id::ISETP_C, Type::IntegerSetPredicate, "ISETP_C"), 712 INST("010010110110----", Id::ISETP_C, Type::IntegerSetPredicate, "ISETP_C"),
678 INST("010110110110----", Id::ISETP_R, Type::IntegerSetPredicate, "ISETP_R"), 713 INST("010110110110----", Id::ISETP_R, Type::IntegerSetPredicate, "ISETP_R"),
679 INST("0011011-0110----", Id::ISETP_IMM, Type::IntegerSetPredicate, "ISETP_IMM"), 714 INST("0011011-0110----", Id::ISETP_IMM, Type::IntegerSetPredicate, "ISETP_IMM"),
715 INST("010110110101----", Id::ISET_R, Type::IntegerSet, "ISET_R"),
716 INST("010010110101----", Id::ISET_C, Type::IntegerSet, "ISET_C"),
717 INST("0011011-0101----", Id::ISET_IMM, Type::IntegerSet, "ISET_IMM"),
680 INST("0101000010010---", Id::PSETP, Type::PredicateSetPredicate, "PSETP"), 718 INST("0101000010010---", Id::PSETP, Type::PredicateSetPredicate, "PSETP"),
681 INST("0011011-00------", Id::XMAD_IMM, Type::Arithmetic, "XMAD_IMM"), 719 INST("0011011-00------", Id::XMAD_IMM, Type::Arithmetic, "XMAD_IMM"),
682 INST("0100111---------", Id::XMAD_CR, Type::Arithmetic, "XMAD_CR"), 720 INST("0100111---------", Id::XMAD_CR, Type::Arithmetic, "XMAD_CR"),
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 66351fe6e..e36483145 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -5,6 +5,7 @@
5#include "video_core/engines/fermi_2d.h" 5#include "video_core/engines/fermi_2d.h"
6#include "video_core/engines/maxwell_3d.h" 6#include "video_core/engines/maxwell_3d.h"
7#include "video_core/engines/maxwell_compute.h" 7#include "video_core/engines/maxwell_compute.h"
8#include "video_core/engines/maxwell_dma.h"
8#include "video_core/gpu.h" 9#include "video_core/gpu.h"
9 10
10namespace Tegra { 11namespace Tegra {
@@ -14,6 +15,7 @@ GPU::GPU() {
14 maxwell_3d = std::make_unique<Engines::Maxwell3D>(*memory_manager); 15 maxwell_3d = std::make_unique<Engines::Maxwell3D>(*memory_manager);
15 fermi_2d = std::make_unique<Engines::Fermi2D>(*memory_manager); 16 fermi_2d = std::make_unique<Engines::Fermi2D>(*memory_manager);
16 maxwell_compute = std::make_unique<Engines::MaxwellCompute>(); 17 maxwell_compute = std::make_unique<Engines::MaxwellCompute>();
18 maxwell_dma = std::make_unique<Engines::MaxwellDMA>(*memory_manager);
17} 19}
18 20
19GPU::~GPU() = default; 21GPU::~GPU() = default;
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index 5852b9619..7b4e9b842 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -63,6 +63,7 @@ namespace Engines {
63class Fermi2D; 63class Fermi2D;
64class Maxwell3D; 64class Maxwell3D;
65class MaxwellCompute; 65class MaxwellCompute;
66class MaxwellDMA;
66} // namespace Engines 67} // namespace Engines
67 68
68enum class EngineID { 69enum class EngineID {
@@ -103,6 +104,8 @@ private:
103 std::unique_ptr<Engines::Fermi2D> fermi_2d; 104 std::unique_ptr<Engines::Fermi2D> fermi_2d;
104 /// Compute engine 105 /// Compute engine
105 std::unique_ptr<Engines::MaxwellCompute> maxwell_compute; 106 std::unique_ptr<Engines::MaxwellCompute> maxwell_compute;
107 /// DMA engine
108 std::unique_ptr<Engines::MaxwellDMA> maxwell_dma;
106}; 109};
107 110
108} // namespace Tegra 111} // namespace Tegra
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index e04966849..0f6dec60b 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -639,7 +639,7 @@ u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, GLuint progr
639 639
640 state.Apply(); 640 state.Apply();
641 641
642 return current_bindpoint + entries.size(); 642 return current_bindpoint + static_cast<u32>(entries.size());
643} 643}
644 644
645u32 RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, GLuint program, u32 current_unit, 645u32 RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, GLuint program, u32 current_unit,
@@ -685,7 +685,7 @@ u32 RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, GLuint program,
685 685
686 state.Apply(); 686 state.Apply();
687 687
688 return current_unit + entries.size(); 688 return current_unit + static_cast<u32>(entries.size());
689} 689}
690 690
691void RasterizerOpenGL::BindFramebufferSurfaces(const Surface& color_surface, 691void RasterizerOpenGL::BindFramebufferSurfaces(const Surface& color_surface,
@@ -746,7 +746,6 @@ void RasterizerOpenGL::SyncDepthOffset() {
746 746
747void RasterizerOpenGL::SyncBlendState() { 747void RasterizerOpenGL::SyncBlendState() {
748 const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs; 748 const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs;
749 ASSERT_MSG(regs.independent_blend_enable == 1, "Only independent blending is implemented");
750 749
751 // TODO(Subv): Support more than just render target 0. 750 // TODO(Subv): Support more than just render target 0.
752 state.blend.enabled = regs.blend.enable[0] != 0; 751 state.blend.enabled = regs.blend.enable[0] != 0;
@@ -754,6 +753,7 @@ void RasterizerOpenGL::SyncBlendState() {
754 if (!state.blend.enabled) 753 if (!state.blend.enabled)
755 return; 754 return;
756 755
756 ASSERT_MSG(regs.independent_blend_enable == 1, "Only independent blending is implemented");
757 ASSERT_MSG(!regs.independent_blend[0].separate_alpha, "Unimplemented"); 757 ASSERT_MSG(!regs.independent_blend[0].separate_alpha, "Unimplemented");
758 state.blend.rgb_equation = MaxwellToGL::BlendEquation(regs.independent_blend[0].equation_rgb); 758 state.blend.rgb_equation = MaxwellToGL::BlendEquation(regs.independent_blend[0].equation_rgb);
759 state.blend.src_rgb_func = MaxwellToGL::BlendFunc(regs.independent_blend[0].factor_source_rgb); 759 state.blend.src_rgb_func = MaxwellToGL::BlendFunc(regs.independent_blend[0].factor_source_rgb);
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
index ff48a2669..61d670dcb 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -28,6 +28,7 @@
28#include "video_core/engines/maxwell_3d.h" 28#include "video_core/engines/maxwell_3d.h"
29#include "video_core/renderer_opengl/gl_rasterizer_cache.h" 29#include "video_core/renderer_opengl/gl_rasterizer_cache.h"
30#include "video_core/renderer_opengl/gl_state.h" 30#include "video_core/renderer_opengl/gl_state.h"
31#include "video_core/textures/astc.h"
31#include "video_core/textures/decoders.h" 32#include "video_core/textures/decoders.h"
32#include "video_core/utils.h" 33#include "video_core/utils.h"
33#include "video_core/video_core.h" 34#include "video_core/video_core.h"
@@ -55,6 +56,7 @@ static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_form
55 {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT23 56 {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT23
56 {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT45 57 {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT45
57 {GL_COMPRESSED_RED_RGTC1, GL_RED, GL_UNSIGNED_INT_8_8_8_8, true}, // DXN1 58 {GL_COMPRESSED_RED_RGTC1, GL_RED, GL_UNSIGNED_INT_8_8_8_8, true}, // DXN1
59 {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_4X4
58}}; 60}};
59 61
60static const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType component_type) { 62static const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType component_type) {
@@ -86,6 +88,23 @@ static u16 GetResolutionScaleFactor() {
86 : Settings::values.resolution_factor); 88 : Settings::values.resolution_factor);
87} 89}
88 90
91static void ConvertASTCToRGBA8(std::vector<u8>& data, PixelFormat format, u32 width, u32 height) {
92 u32 block_width{};
93 u32 block_height{};
94
95 switch (format) {
96 case PixelFormat::ASTC_2D_4X4:
97 block_width = 4;
98 block_height = 4;
99 break;
100 default:
101 NGLOG_CRITICAL(HW_GPU, "Unhandled format: {}", static_cast<u32>(format));
102 UNREACHABLE();
103 }
104
105 data = Tegra::Texture::ASTC::Decompress(data, width, height, block_width, block_height);
106}
107
89template <bool morton_to_gl, PixelFormat format> 108template <bool morton_to_gl, PixelFormat format>
90void MortonCopy(u32 stride, u32 block_height, u32 height, u8* gl_buffer, Tegra::GPUVAddr base, 109void MortonCopy(u32 stride, u32 block_height, u32 height, u8* gl_buffer, Tegra::GPUVAddr base,
91 Tegra::GPUVAddr start, Tegra::GPUVAddr end) { 110 Tegra::GPUVAddr start, Tegra::GPUVAddr end) {
@@ -97,6 +116,12 @@ void MortonCopy(u32 stride, u32 block_height, u32 height, u8* gl_buffer, Tegra::
97 auto data = Tegra::Texture::UnswizzleTexture( 116 auto data = Tegra::Texture::UnswizzleTexture(
98 *gpu.memory_manager->GpuToCpuAddress(base), 117 *gpu.memory_manager->GpuToCpuAddress(base),
99 SurfaceParams::TextureFormatFromPixelFormat(format), stride, height, block_height); 118 SurfaceParams::TextureFormatFromPixelFormat(format), stride, height, block_height);
119
120 if (SurfaceParams::IsFormatASTC(format)) {
121 // ASTC formats are converted to RGBA8 in software, as most PC GPUs do not support this
122 ConvertASTCToRGBA8(data, format, stride, height);
123 }
124
100 std::memcpy(gl_buffer, data.data(), data.size()); 125 std::memcpy(gl_buffer, data.data(), data.size());
101 } else { 126 } else {
102 // TODO(bunnei): Assumes the default rendering GOB size of 16 (128 lines). We should check 127 // TODO(bunnei): Assumes the default rendering GOB size of 16 (128 lines). We should check
@@ -118,7 +143,7 @@ static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr, Tegra:
118 MortonCopy<true, PixelFormat::R8>, MortonCopy<true, PixelFormat::RGBA16F>, 143 MortonCopy<true, PixelFormat::R8>, MortonCopy<true, PixelFormat::RGBA16F>,
119 MortonCopy<true, PixelFormat::R11FG11FB10F>, MortonCopy<true, PixelFormat::DXT1>, 144 MortonCopy<true, PixelFormat::R11FG11FB10F>, MortonCopy<true, PixelFormat::DXT1>,
120 MortonCopy<true, PixelFormat::DXT23>, MortonCopy<true, PixelFormat::DXT45>, 145 MortonCopy<true, PixelFormat::DXT23>, MortonCopy<true, PixelFormat::DXT45>,
121 MortonCopy<true, PixelFormat::DXN1>, 146 MortonCopy<true, PixelFormat::DXN1>, MortonCopy<true, PixelFormat::ASTC_2D_4X4>,
122}; 147};
123 148
124static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr, Tegra::GPUVAddr, 149static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr, Tegra::GPUVAddr,
@@ -137,6 +162,7 @@ static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr, Tegra:
137 nullptr, 162 nullptr,
138 nullptr, 163 nullptr,
139 nullptr, 164 nullptr,
165 MortonCopy<false, PixelFormat::ABGR8>,
140}; 166};
141 167
142// Allocate an uninitialized texture of appropriate size and format for the surface 168// Allocate an uninitialized texture of appropriate size and format for the surface
@@ -549,7 +575,7 @@ void CachedSurface::UploadGLTexture(const MathUtil::Rectangle<u32>& rect, GLuint
549 glCompressedTexImage2D(GL_TEXTURE_2D, 0, tuple.internal_format, 575 glCompressedTexImage2D(GL_TEXTURE_2D, 0, tuple.internal_format,
550 static_cast<GLsizei>(rect.GetWidth() * GetCompresssionFactor()), 576 static_cast<GLsizei>(rect.GetWidth() * GetCompresssionFactor()),
551 static_cast<GLsizei>(rect.GetHeight() * GetCompresssionFactor()), 0, 577 static_cast<GLsizei>(rect.GetHeight() * GetCompresssionFactor()), 0,
552 size, &gl_buffer[buffer_offset]); 578 static_cast<GLsizei>(size), &gl_buffer[buffer_offset]);
553 } else { 579 } else {
554 glTexSubImage2D(GL_TEXTURE_2D, 0, x0, y0, static_cast<GLsizei>(rect.GetWidth()), 580 glTexSubImage2D(GL_TEXTURE_2D, 0, x0, y0, static_cast<GLsizei>(rect.GetWidth()),
555 static_cast<GLsizei>(rect.GetHeight()), tuple.format, tuple.type, 581 static_cast<GLsizei>(rect.GetHeight()), tuple.format, tuple.type,
@@ -888,9 +914,6 @@ Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, ScaleMatc
888 // Use GetSurfaceSubRect instead 914 // Use GetSurfaceSubRect instead
889 ASSERT(params.width == params.stride); 915 ASSERT(params.width == params.stride);
890 916
891 ASSERT(!params.is_tiled ||
892 (params.GetActualWidth() % 8 == 0 && params.GetActualHeight() % 8 == 0));
893
894 // Check for an exact match in existing surfaces 917 // Check for an exact match in existing surfaces
895 Surface surface = 918 Surface surface =
896 FindMatch<MatchFlags::Exact | MatchFlags::Invalid>(surface_cache, params, match_res_scale); 919 FindMatch<MatchFlags::Exact | MatchFlags::Invalid>(surface_cache, params, match_res_scale);
@@ -1048,8 +1071,13 @@ Surface RasterizerCacheOpenGL::GetTextureSurface(const Tegra::Texture::FullTextu
1048 1071
1049 if (config.tic.IsTiled()) { 1072 if (config.tic.IsTiled()) {
1050 params.block_height = config.tic.BlockHeight(); 1073 params.block_height = config.tic.BlockHeight();
1051 params.width = Common::AlignUp(params.width, params.block_height); 1074
1052 params.height = Common::AlignUp(params.height, params.block_height); 1075 // TODO(bunnei): The below align up is a hack. This is here because some compressed textures
1076 // are not a multiple of their own compression factor, and so this accounts for that. This
1077 // could potentially result in an extra row of 4px being decoded if a texture is not a
1078 // multiple of 4.
1079 params.width = Common::AlignUp(params.width, 4);
1080 params.height = Common::AlignUp(params.height, 4);
1053 } else { 1081 } else {
1054 // Use the texture-provided stride value if the texture isn't tiled. 1082 // Use the texture-provided stride value if the texture isn't tiled.
1055 params.stride = static_cast<u32>(params.PixelsInBytes(config.tic.Pitch())); 1083 params.stride = static_cast<u32>(params.PixelsInBytes(config.tic.Pitch()));
@@ -1057,26 +1085,6 @@ Surface RasterizerCacheOpenGL::GetTextureSurface(const Tegra::Texture::FullTextu
1057 1085
1058 params.UpdateParams(); 1086 params.UpdateParams();
1059 1087
1060 if (params.GetActualWidth() % 8 != 0 || params.GetActualHeight() % 8 != 0 ||
1061 params.stride != params.width) {
1062 Surface src_surface;
1063 MathUtil::Rectangle<u32> rect;
1064 std::tie(src_surface, rect) = GetSurfaceSubRect(params, ScaleMatch::Ignore, true);
1065
1066 rect = rect.Scale(params.GetCompresssionFactor());
1067
1068 params.res_scale = src_surface->res_scale;
1069 Surface tmp_surface = CreateSurface(params);
1070
1071 auto dst_rect = tmp_surface->GetScaledRect().Scale(params.GetCompresssionFactor());
1072 BlitTextures(src_surface->texture.handle, rect, tmp_surface->texture.handle, dst_rect,
1073 SurfaceParams::GetFormatType(params.pixel_format), read_framebuffer.handle,
1074 draw_framebuffer.handle);
1075
1076 remove_surfaces.emplace(tmp_surface);
1077 return tmp_surface;
1078 }
1079
1080 return GetSurface(params, ScaleMatch::Ignore, true); 1088 return GetSurface(params, ScaleMatch::Ignore, true);
1081} 1089}
1082 1090
@@ -1251,7 +1259,7 @@ void RasterizerCacheOpenGL::ValidateSurface(const Surface& surface, Tegra::GPUVA
1251 1259
1252 const auto interval = *it & validate_interval; 1260 const auto interval = *it & validate_interval;
1253 // Look for a valid surface to copy from 1261 // Look for a valid surface to copy from
1254 SurfaceParams params = surface->FromInterval(interval); 1262 SurfaceParams params = *surface;
1255 1263
1256 Surface copy_surface = 1264 Surface copy_surface =
1257 FindMatch<MatchFlags::Copy>(surface_cache, params, ScaleMatch::Ignore, interval); 1265 FindMatch<MatchFlags::Copy>(surface_cache, params, ScaleMatch::Ignore, interval);
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
index 0f43e863d..9da945e19 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -65,6 +65,7 @@ struct SurfaceParams {
65 DXT23 = 8, 65 DXT23 = 8,
66 DXT45 = 9, 66 DXT45 = 9,
67 DXN1 = 10, // This is also known as BC4 67 DXN1 = 10, // This is also known as BC4
68 ASTC_2D_4X4 = 11,
68 69
69 Max, 70 Max,
70 Invalid = 255, 71 Invalid = 255,
@@ -111,6 +112,7 @@ struct SurfaceParams {
111 4, // DXT23 112 4, // DXT23
112 4, // DXT45 113 4, // DXT45
113 4, // DXN1 114 4, // DXN1
115 1, // ASTC_2D_4X4
114 }}; 116 }};
115 117
116 ASSERT(static_cast<size_t>(format) < compression_factor_table.size()); 118 ASSERT(static_cast<size_t>(format) < compression_factor_table.size());
@@ -136,6 +138,7 @@ struct SurfaceParams {
136 128, // DXT23 138 128, // DXT23
137 128, // DXT45 139 128, // DXT45
138 64, // DXN1 140 64, // DXN1
141 32, // ASTC_2D_4X4
139 }}; 142 }};
140 143
141 ASSERT(static_cast<size_t>(format) < bpp_table.size()); 144 ASSERT(static_cast<size_t>(format) < bpp_table.size());
@@ -162,6 +165,15 @@ struct SurfaceParams {
162 } 165 }
163 } 166 }
164 167
168 static bool IsFormatASTC(PixelFormat format) {
169 switch (format) {
170 case PixelFormat::ASTC_2D_4X4:
171 return true;
172 default:
173 return false;
174 }
175 }
176
165 static PixelFormat PixelFormatFromGPUPixelFormat(Tegra::FramebufferConfig::PixelFormat format) { 177 static PixelFormat PixelFormatFromGPUPixelFormat(Tegra::FramebufferConfig::PixelFormat format) {
166 switch (format) { 178 switch (format) {
167 case Tegra::FramebufferConfig::PixelFormat::ABGR8: 179 case Tegra::FramebufferConfig::PixelFormat::ABGR8:
@@ -197,6 +209,8 @@ struct SurfaceParams {
197 return PixelFormat::DXT45; 209 return PixelFormat::DXT45;
198 case Tegra::Texture::TextureFormat::DXN1: 210 case Tegra::Texture::TextureFormat::DXN1:
199 return PixelFormat::DXN1; 211 return PixelFormat::DXN1;
212 case Tegra::Texture::TextureFormat::ASTC_2D_4X4:
213 return PixelFormat::ASTC_2D_4X4;
200 default: 214 default:
201 NGLOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format)); 215 NGLOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
202 UNREACHABLE(); 216 UNREACHABLE();
@@ -228,6 +242,8 @@ struct SurfaceParams {
228 return Tegra::Texture::TextureFormat::DXT45; 242 return Tegra::Texture::TextureFormat::DXT45;
229 case PixelFormat::DXN1: 243 case PixelFormat::DXN1:
230 return Tegra::Texture::TextureFormat::DXN1; 244 return Tegra::Texture::TextureFormat::DXN1;
245 case PixelFormat::ASTC_2D_4X4:
246 return Tegra::Texture::TextureFormat::ASTC_2D_4X4;
231 default: 247 default:
232 UNREACHABLE(); 248 UNREACHABLE();
233 } 249 }
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 87ae47ac9..cd7569e2f 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -17,6 +17,7 @@ namespace Decompiler {
17 17
18using Tegra::Shader::Attribute; 18using Tegra::Shader::Attribute;
19using Tegra::Shader::Instruction; 19using Tegra::Shader::Instruction;
20using Tegra::Shader::LogicOperation;
20using Tegra::Shader::OpCode; 21using Tegra::Shader::OpCode;
21using Tegra::Shader::Register; 22using Tegra::Shader::Register;
22using Tegra::Shader::Sampler; 23using Tegra::Shader::Sampler;
@@ -267,6 +268,27 @@ public:
267 } 268 }
268 269
269 /** 270 /**
271 * Returns code that does an integer size conversion for the specified size.
272 * @param value Value to perform integer size conversion on.
273 * @param size Register size to use for conversion instructions.
274 * @returns GLSL string corresponding to the value converted to the specified size.
275 */
276 static std::string ConvertIntegerSize(const std::string& value, Register::Size size) {
277 switch (size) {
278 case Register::Size::Byte:
279 return "((" + value + " << 24) >> 24)";
280 case Register::Size::Short:
281 return "((" + value + " << 16) >> 16)";
282 case Register::Size::Word:
283 // Default - do nothing
284 return value;
285 default:
286 NGLOG_CRITICAL(HW_GPU, "Unimplemented conversion size {}", static_cast<u32>(size));
287 UNREACHABLE();
288 }
289 }
290
291 /**
270 * Gets a register as an float. 292 * Gets a register as an float.
271 * @param reg The register to get. 293 * @param reg The register to get.
272 * @param elem The element to use for the operation. 294 * @param elem The element to use for the operation.
@@ -282,15 +304,18 @@ public:
282 * @param reg The register to get. 304 * @param reg The register to get.
283 * @param elem The element to use for the operation. 305 * @param elem The element to use for the operation.
284 * @param is_signed Whether to get the register as a signed (or unsigned) integer. 306 * @param is_signed Whether to get the register as a signed (or unsigned) integer.
307 * @param size Register size to use for conversion instructions.
285 * @returns GLSL string corresponding to the register as an integer. 308 * @returns GLSL string corresponding to the register as an integer.
286 */ 309 */
287 std::string GetRegisterAsInteger(const Register& reg, unsigned elem = 0, 310 std::string GetRegisterAsInteger(const Register& reg, unsigned elem = 0, bool is_signed = true,
288 bool is_signed = true) { 311 Register::Size size = Register::Size::Word) {
289 const std::string func = GetGLSLConversionFunc( 312 const std::string func = GetGLSLConversionFunc(
290 GLSLRegister::Type::Float, 313 GLSLRegister::Type::Float,
291 is_signed ? GLSLRegister::Type::Integer : GLSLRegister::Type::UnsignedInteger); 314 is_signed ? GLSLRegister::Type::Integer : GLSLRegister::Type::UnsignedInteger);
292 315
293 return func + '(' + GetRegister(reg, elem) + ')'; 316 std::string value = func + '(' + GetRegister(reg, elem) + ')';
317
318 return ConvertIntegerSize(value, size);
294 } 319 }
295 320
296 /** 321 /**
@@ -300,13 +325,15 @@ public:
300 * @param value The code representing the value to assign. 325 * @param value The code representing the value to assign.
301 * @param dest_num_components Number of components in the destination. 326 * @param dest_num_components Number of components in the destination.
302 * @param value_num_components Number of components in the value. 327 * @param value_num_components Number of components in the value.
303 * @param is_abs Optional, when True, applies absolute value to output. 328 * @param is_saturated Optional, when True, saturates the provided value.
304 * @param dest_elem Optional, the destination element to use for the operation. 329 * @param dest_elem Optional, the destination element to use for the operation.
305 */ 330 */
306 void SetRegisterToFloat(const Register& reg, u64 elem, const std::string& value, 331 void SetRegisterToFloat(const Register& reg, u64 elem, const std::string& value,
307 u64 dest_num_components, u64 value_num_components, bool is_abs = false, 332 u64 dest_num_components, u64 value_num_components,
308 u64 dest_elem = 0) { 333 bool is_saturated = false, u64 dest_elem = 0) {
309 SetRegister(reg, elem, value, dest_num_components, value_num_components, is_abs, dest_elem); 334
335 SetRegister(reg, elem, is_saturated ? "clamp(" + value + ", 0.0, 1.0)" : value,
336 dest_num_components, value_num_components, dest_elem);
310 } 337 }
311 338
312 /** 339 /**
@@ -316,18 +343,22 @@ public:
316 * @param value The code representing the value to assign. 343 * @param value The code representing the value to assign.
317 * @param dest_num_components Number of components in the destination. 344 * @param dest_num_components Number of components in the destination.
318 * @param value_num_components Number of components in the value. 345 * @param value_num_components Number of components in the value.
319 * @param is_abs Optional, when True, applies absolute value to output. 346 * @param is_saturated Optional, when True, saturates the provided value.
320 * @param dest_elem Optional, the destination element to use for the operation. 347 * @param dest_elem Optional, the destination element to use for the operation.
348 * @param size Register size to use for conversion instructions.
321 */ 349 */
322 void SetRegisterToInteger(const Register& reg, bool is_signed, u64 elem, 350 void SetRegisterToInteger(const Register& reg, bool is_signed, u64 elem,
323 const std::string& value, u64 dest_num_components, 351 const std::string& value, u64 dest_num_components,
324 u64 value_num_components, bool is_abs = false, u64 dest_elem = 0) { 352 u64 value_num_components, bool is_saturated = false,
353 u64 dest_elem = 0, Register::Size size = Register::Size::Word) {
354 ASSERT_MSG(!is_saturated, "Unimplemented");
355
325 const std::string func = GetGLSLConversionFunc( 356 const std::string func = GetGLSLConversionFunc(
326 is_signed ? GLSLRegister::Type::Integer : GLSLRegister::Type::UnsignedInteger, 357 is_signed ? GLSLRegister::Type::Integer : GLSLRegister::Type::UnsignedInteger,
327 GLSLRegister::Type::Float); 358 GLSLRegister::Type::Float);
328 359
329 SetRegister(reg, elem, func + '(' + value + ')', dest_num_components, value_num_components, 360 SetRegister(reg, elem, func + '(' + ConvertIntegerSize(value, size) + ')',
330 is_abs, dest_elem); 361 dest_num_components, value_num_components, dest_elem);
331 } 362 }
332 363
333 /** 364 /**
@@ -507,13 +538,11 @@ private:
507 * @param value The code representing the value to assign. 538 * @param value The code representing the value to assign.
508 * @param dest_num_components Number of components in the destination. 539 * @param dest_num_components Number of components in the destination.
509 * @param value_num_components Number of components in the value. 540 * @param value_num_components Number of components in the value.
510 * @param is_abs Optional, when True, applies absolute value to output.
511 * @param dest_elem Optional, the destination element to use for the operation. 541 * @param dest_elem Optional, the destination element to use for the operation.
512 */ 542 */
513 void SetRegister(const Register& reg, u64 elem, const std::string& value, 543 void SetRegister(const Register& reg, u64 elem, const std::string& value,
514 u64 dest_num_components, u64 value_num_components, bool is_abs, 544 u64 dest_num_components, u64 value_num_components, u64 dest_elem) {
515 u64 dest_elem) { 545 std::string dest = GetRegister(reg, static_cast<u32>(dest_elem));
516 std::string dest = GetRegister(reg, dest_elem);
517 if (dest_num_components > 1) { 546 if (dest_num_components > 1) {
518 dest += GetSwizzle(elem); 547 dest += GetSwizzle(elem);
519 } 548 }
@@ -523,8 +552,6 @@ private:
523 src += GetSwizzle(elem); 552 src += GetSwizzle(elem);
524 } 553 }
525 554
526 src = is_abs ? "abs(" + src + ')' : src;
527
528 shader.AddLine(dest + " = " + src + ';'); 555 shader.AddLine(dest + " = " + src + ';');
529 } 556 }
530 557
@@ -545,7 +572,7 @@ private:
545 // vertex shader, and what's the value of the fourth element when inside a Tess Eval 572 // vertex shader, and what's the value of the fourth element when inside a Tess Eval
546 // shader. 573 // shader.
547 ASSERT(stage == Maxwell3D::Regs::ShaderStage::Vertex); 574 ASSERT(stage == Maxwell3D::Regs::ShaderStage::Vertex);
548 return "vec4(0, 0, gl_InstanceID, gl_VertexID)"; 575 return "vec4(0, 0, uintBitsToFloat(gl_InstanceID), uintBitsToFloat(gl_VertexID))";
549 default: 576 default:
550 const u32 index{static_cast<u32>(attribute) - 577 const u32 index{static_cast<u32>(attribute) -
551 static_cast<u32>(Attribute::Index::Attribute_0)}; 578 static_cast<u32>(Attribute::Index::Attribute_0)};
@@ -740,6 +767,31 @@ private:
740 return (absolute_offset % SchedPeriod) == 0; 767 return (absolute_offset % SchedPeriod) == 0;
741 } 768 }
742 769
770 void WriteLogicOperation(Register dest, LogicOperation logic_op, const std::string& op_a,
771 const std::string& op_b) {
772 switch (logic_op) {
773 case LogicOperation::And: {
774 regs.SetRegisterToInteger(dest, true, 0, '(' + op_a + " & " + op_b + ')', 1, 1);
775 break;
776 }
777 case LogicOperation::Or: {
778 regs.SetRegisterToInteger(dest, true, 0, '(' + op_a + " | " + op_b + ')', 1, 1);
779 break;
780 }
781 case LogicOperation::Xor: {
782 regs.SetRegisterToInteger(dest, true, 0, '(' + op_a + " ^ " + op_b + ')', 1, 1);
783 break;
784 }
785 case LogicOperation::PassB: {
786 regs.SetRegisterToInteger(dest, true, 0, op_b, 1, 1);
787 break;
788 }
789 default:
790 NGLOG_CRITICAL(HW_GPU, "Unimplemented logic operation: {}", static_cast<u32>(logic_op));
791 UNREACHABLE();
792 }
793 }
794
743 /** 795 /**
744 * Compiles a single instruction from Tegra to GLSL. 796 * Compiles a single instruction from Tegra to GLSL.
745 * @param offset the offset of the Tegra shader instruction. 797 * @param offset the offset of the Tegra shader instruction.
@@ -777,22 +829,25 @@ private:
777 829
778 switch (opcode->GetType()) { 830 switch (opcode->GetType()) {
779 case OpCode::Type::Arithmetic: { 831 case OpCode::Type::Arithmetic: {
780 std::string op_a = instr.alu.negate_a ? "-" : ""; 832 std::string op_a = regs.GetRegisterAsFloat(instr.gpr8);
781 op_a += regs.GetRegisterAsFloat(instr.gpr8);
782 if (instr.alu.abs_a) { 833 if (instr.alu.abs_a) {
783 op_a = "abs(" + op_a + ')'; 834 op_a = "abs(" + op_a + ')';
784 } 835 }
785 836
786 std::string op_b = instr.alu.negate_b ? "-" : ""; 837 if (instr.alu.negate_a) {
838 op_a = "-(" + op_a + ')';
839 }
840
841 std::string op_b;
787 842
788 if (instr.is_b_imm) { 843 if (instr.is_b_imm) {
789 op_b += GetImmediate19(instr); 844 op_b = GetImmediate19(instr);
790 } else { 845 } else {
791 if (instr.is_b_gpr) { 846 if (instr.is_b_gpr) {
792 op_b += regs.GetRegisterAsFloat(instr.gpr20); 847 op_b = regs.GetRegisterAsFloat(instr.gpr20);
793 } else { 848 } else {
794 op_b += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset, 849 op_b = regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset,
795 GLSLRegister::Type::Float); 850 GLSLRegister::Type::Float);
796 } 851 }
797 } 852 }
798 853
@@ -800,6 +855,10 @@ private:
800 op_b = "abs(" + op_b + ')'; 855 op_b = "abs(" + op_b + ')';
801 } 856 }
802 857
858 if (instr.alu.negate_b) {
859 op_b = "-(" + op_b + ')';
860 }
861
803 switch (opcode->GetId()) { 862 switch (opcode->GetId()) {
804 case OpCode::Id::MOV_C: 863 case OpCode::Id::MOV_C:
805 case OpCode::Id::MOV_R: { 864 case OpCode::Id::MOV_R: {
@@ -807,64 +866,49 @@ private:
807 break; 866 break;
808 } 867 }
809 868
810 case OpCode::Id::MOV32_IMM: {
811 // mov32i doesn't have abs or neg bits.
812 regs.SetRegisterToFloat(instr.gpr0, 0, GetImmediate32(instr), 1, 1);
813 break;
814 }
815 case OpCode::Id::FMUL_C: 869 case OpCode::Id::FMUL_C:
816 case OpCode::Id::FMUL_R: 870 case OpCode::Id::FMUL_R:
817 case OpCode::Id::FMUL_IMM: { 871 case OpCode::Id::FMUL_IMM: {
818 ASSERT_MSG(!instr.saturate_a, "Unimplemented"); 872 regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " * " + op_b, 1, 1,
819 873 instr.alu.saturate_d);
820 regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " * " + op_b, 1, 1, instr.alu.abs_d);
821 break;
822 }
823 case OpCode::Id::FMUL32_IMM: {
824 // fmul32i doesn't have abs or neg bits.
825 regs.SetRegisterToFloat(
826 instr.gpr0, 0,
827 regs.GetRegisterAsFloat(instr.gpr8) + " * " + GetImmediate32(instr), 1, 1);
828 break; 874 break;
829 } 875 }
830 case OpCode::Id::FADD_C: 876 case OpCode::Id::FADD_C:
831 case OpCode::Id::FADD_R: 877 case OpCode::Id::FADD_R:
832 case OpCode::Id::FADD_IMM: { 878 case OpCode::Id::FADD_IMM: {
833 ASSERT_MSG(!instr.saturate_a, "Unimplemented"); 879 regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " + " + op_b, 1, 1,
834 880 instr.alu.saturate_d);
835 regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " + " + op_b, 1, 1, instr.alu.abs_d);
836 break; 881 break;
837 } 882 }
838 case OpCode::Id::MUFU: { 883 case OpCode::Id::MUFU: {
839 ASSERT_MSG(!instr.saturate_a, "Unimplemented");
840
841 switch (instr.sub_op) { 884 switch (instr.sub_op) {
842 case SubOp::Cos: 885 case SubOp::Cos:
843 regs.SetRegisterToFloat(instr.gpr0, 0, "cos(" + op_a + ')', 1, 1, 886 regs.SetRegisterToFloat(instr.gpr0, 0, "cos(" + op_a + ')', 1, 1,
844 instr.alu.abs_d); 887 instr.alu.saturate_d);
845 break; 888 break;
846 case SubOp::Sin: 889 case SubOp::Sin:
847 regs.SetRegisterToFloat(instr.gpr0, 0, "sin(" + op_a + ')', 1, 1, 890 regs.SetRegisterToFloat(instr.gpr0, 0, "sin(" + op_a + ')', 1, 1,
848 instr.alu.abs_d); 891 instr.alu.saturate_d);
849 break; 892 break;
850 case SubOp::Ex2: 893 case SubOp::Ex2:
851 regs.SetRegisterToFloat(instr.gpr0, 0, "exp2(" + op_a + ')', 1, 1, 894 regs.SetRegisterToFloat(instr.gpr0, 0, "exp2(" + op_a + ')', 1, 1,
852 instr.alu.abs_d); 895 instr.alu.saturate_d);
853 break; 896 break;
854 case SubOp::Lg2: 897 case SubOp::Lg2:
855 regs.SetRegisterToFloat(instr.gpr0, 0, "log2(" + op_a + ')', 1, 1, 898 regs.SetRegisterToFloat(instr.gpr0, 0, "log2(" + op_a + ')', 1, 1,
856 instr.alu.abs_d); 899 instr.alu.saturate_d);
857 break; 900 break;
858 case SubOp::Rcp: 901 case SubOp::Rcp:
859 regs.SetRegisterToFloat(instr.gpr0, 0, "1.0 / " + op_a, 1, 1, instr.alu.abs_d); 902 regs.SetRegisterToFloat(instr.gpr0, 0, "1.0 / " + op_a, 1, 1,
903 instr.alu.saturate_d);
860 break; 904 break;
861 case SubOp::Rsq: 905 case SubOp::Rsq:
862 regs.SetRegisterToFloat(instr.gpr0, 0, "inversesqrt(" + op_a + ')', 1, 1, 906 regs.SetRegisterToFloat(instr.gpr0, 0, "inversesqrt(" + op_a + ')', 1, 1,
863 instr.alu.abs_d); 907 instr.alu.saturate_d);
864 break; 908 break;
865 case SubOp::Min: 909 case SubOp::Min:
866 regs.SetRegisterToFloat(instr.gpr0, 0, "min(" + op_a + "," + op_b + ')', 1, 1, 910 regs.SetRegisterToFloat(instr.gpr0, 0, "min(" + op_a + "," + op_b + ')', 1, 1,
867 instr.alu.abs_d); 911 instr.alu.saturate_d);
868 break; 912 break;
869 default: 913 default:
870 NGLOG_CRITICAL(HW_GPU, "Unhandled MUFU sub op: {0:x}", 914 NGLOG_CRITICAL(HW_GPU, "Unhandled MUFU sub op: {0:x}",
@@ -901,6 +945,21 @@ private:
901 } 945 }
902 break; 946 break;
903 } 947 }
948 case OpCode::Type::ArithmeticImmediate: {
949 switch (opcode->GetId()) {
950 case OpCode::Id::MOV32_IMM: {
951 regs.SetRegisterToFloat(instr.gpr0, 0, GetImmediate32(instr), 1, 1);
952 break;
953 }
954 case OpCode::Id::FMUL32_IMM: {
955 regs.SetRegisterToFloat(
956 instr.gpr0, 0,
957 regs.GetRegisterAsFloat(instr.gpr8) + " * " + GetImmediate32(instr), 1, 1);
958 break;
959 }
960 }
961 break;
962 }
904 case OpCode::Type::Bfe: { 963 case OpCode::Type::Bfe: {
905 ASSERT_MSG(!instr.bfe.negate_b, "Unimplemented"); 964 ASSERT_MSG(!instr.bfe.negate_b, "Unimplemented");
906 965
@@ -926,49 +985,6 @@ private:
926 985
927 break; 986 break;
928 } 987 }
929 case OpCode::Type::Logic: {
930 std::string op_a = regs.GetRegisterAsInteger(instr.gpr8, 0, true);
931
932 if (instr.alu.lop.invert_a)
933 op_a = "~(" + op_a + ')';
934
935 switch (opcode->GetId()) {
936 case OpCode::Id::LOP32I: {
937 u32 imm = static_cast<u32>(instr.alu.imm20_32.Value());
938
939 if (instr.alu.lop.invert_b)
940 imm = ~imm;
941
942 switch (instr.alu.lop.operation) {
943 case Tegra::Shader::LogicOperation::And: {
944 regs.SetRegisterToInteger(instr.gpr0, true, 0,
945 '(' + op_a + " & " + std::to_string(imm) + ')', 1, 1);
946 break;
947 }
948 case Tegra::Shader::LogicOperation::Or: {
949 regs.SetRegisterToInteger(instr.gpr0, true, 0,
950 '(' + op_a + " | " + std::to_string(imm) + ')', 1, 1);
951 break;
952 }
953 case Tegra::Shader::LogicOperation::Xor: {
954 regs.SetRegisterToInteger(instr.gpr0, true, 0,
955 '(' + op_a + " ^ " + std::to_string(imm) + ')', 1, 1);
956 break;
957 }
958 default:
959 NGLOG_CRITICAL(HW_GPU, "Unimplemented lop32i operation: {}",
960 static_cast<u32>(instr.alu.lop.operation.Value()));
961 UNREACHABLE();
962 }
963 break;
964 }
965 default: {
966 NGLOG_CRITICAL(HW_GPU, "Unhandled logic instruction: {}", opcode->GetName());
967 UNREACHABLE();
968 }
969 }
970 break;
971 }
972 988
973 case OpCode::Type::Shift: { 989 case OpCode::Type::Shift: {
974 std::string op_a = regs.GetRegisterAsInteger(instr.gpr8, 0, true); 990 std::string op_a = regs.GetRegisterAsInteger(instr.gpr8, 0, true);
@@ -1012,14 +1028,39 @@ private:
1012 break; 1028 break;
1013 } 1029 }
1014 1030
1015 case OpCode::Type::ArithmeticInteger: { 1031 case OpCode::Type::ArithmeticIntegerImmediate: {
1016 std::string op_a = regs.GetRegisterAsInteger(instr.gpr8); 1032 std::string op_a = regs.GetRegisterAsInteger(instr.gpr8);
1033 std::string op_b = std::to_string(instr.alu.imm20_32.Value());
1034
1035 switch (opcode->GetId()) {
1036 case OpCode::Id::IADD32I:
1037 if (instr.iadd32i.negate_a)
1038 op_a = "-(" + op_a + ')';
1017 1039
1018 if (instr.alu_integer.negate_a) 1040 regs.SetRegisterToInteger(instr.gpr0, true, 0, op_a + " + " + op_b, 1, 1,
1019 op_a = '-' + op_a; 1041 instr.iadd32i.saturate != 0);
1042 break;
1043 case OpCode::Id::LOP32I: {
1044 if (instr.alu.lop32i.invert_a)
1045 op_a = "~(" + op_a + ')';
1020 1046
1021 std::string op_b = instr.alu_integer.negate_b ? "-" : ""; 1047 if (instr.alu.lop32i.invert_b)
1048 op_b = "~(" + op_b + ')';
1022 1049
1050 WriteLogicOperation(instr.gpr0, instr.alu.lop32i.operation, op_a, op_b);
1051 break;
1052 }
1053 default: {
1054 NGLOG_CRITICAL(HW_GPU, "Unhandled ArithmeticIntegerImmediate instruction: {}",
1055 opcode->GetName());
1056 UNREACHABLE();
1057 }
1058 }
1059 break;
1060 }
1061 case OpCode::Type::ArithmeticInteger: {
1062 std::string op_a = regs.GetRegisterAsInteger(instr.gpr8);
1063 std::string op_b;
1023 if (instr.is_b_imm) { 1064 if (instr.is_b_imm) {
1024 op_b += '(' + std::to_string(instr.alu.GetSignedImm20_20()) + ')'; 1065 op_b += '(' + std::to_string(instr.alu.GetSignedImm20_20()) + ')';
1025 } else { 1066 } else {
@@ -1035,19 +1076,46 @@ private:
1035 case OpCode::Id::IADD_C: 1076 case OpCode::Id::IADD_C:
1036 case OpCode::Id::IADD_R: 1077 case OpCode::Id::IADD_R:
1037 case OpCode::Id::IADD_IMM: { 1078 case OpCode::Id::IADD_IMM: {
1038 ASSERT_MSG(!instr.saturate_a, "Unimplemented"); 1079 if (instr.alu_integer.negate_a)
1039 regs.SetRegisterToInteger(instr.gpr0, true, 0, op_a + " + " + op_b, 1, 1); 1080 op_a = "-(" + op_a + ')';
1081
1082 if (instr.alu_integer.negate_b)
1083 op_b = "-(" + op_b + ')';
1084
1085 regs.SetRegisterToInteger(instr.gpr0, true, 0, op_a + " + " + op_b, 1, 1,
1086 instr.alu.saturate_d);
1040 break; 1087 break;
1041 } 1088 }
1042 case OpCode::Id::ISCADD_C: 1089 case OpCode::Id::ISCADD_C:
1043 case OpCode::Id::ISCADD_R: 1090 case OpCode::Id::ISCADD_R:
1044 case OpCode::Id::ISCADD_IMM: { 1091 case OpCode::Id::ISCADD_IMM: {
1092 if (instr.alu_integer.negate_a)
1093 op_a = "-(" + op_a + ')';
1094
1095 if (instr.alu_integer.negate_b)
1096 op_b = "-(" + op_b + ')';
1097
1045 std::string shift = std::to_string(instr.alu_integer.shift_amount.Value()); 1098 std::string shift = std::to_string(instr.alu_integer.shift_amount.Value());
1046 1099
1047 regs.SetRegisterToInteger(instr.gpr0, true, 0, 1100 regs.SetRegisterToInteger(instr.gpr0, true, 0,
1048 "((" + op_a + " << " + shift + ") + " + op_b + ')', 1, 1); 1101 "((" + op_a + " << " + shift + ") + " + op_b + ')', 1, 1);
1049 break; 1102 break;
1050 } 1103 }
1104 case OpCode::Id::LOP_C:
1105 case OpCode::Id::LOP_R:
1106 case OpCode::Id::LOP_IMM: {
1107 ASSERT_MSG(!instr.alu.lop.unk44, "Unimplemented");
1108 ASSERT_MSG(instr.alu.lop.pred48 == Pred::UnusedIndex, "Unimplemented");
1109
1110 if (instr.alu.lop.invert_a)
1111 op_a = "~(" + op_a + ')';
1112
1113 if (instr.alu.lop.invert_b)
1114 op_b = "~(" + op_b + ')';
1115
1116 WriteLogicOperation(instr.gpr0, instr.alu.lop.operation, op_a, op_b);
1117 break;
1118 }
1051 default: { 1119 default: {
1052 NGLOG_CRITICAL(HW_GPU, "Unhandled ArithmeticInteger instruction: {}", 1120 NGLOG_CRITICAL(HW_GPU, "Unhandled ArithmeticInteger instruction: {}",
1053 opcode->GetName()); 1121 opcode->GetName());
@@ -1058,8 +1126,6 @@ private:
1058 break; 1126 break;
1059 } 1127 }
1060 case OpCode::Type::Ffma: { 1128 case OpCode::Type::Ffma: {
1061 ASSERT_MSG(!instr.saturate_a, "Unimplemented");
1062
1063 std::string op_a = regs.GetRegisterAsFloat(instr.gpr8); 1129 std::string op_a = regs.GetRegisterAsFloat(instr.gpr8);
1064 std::string op_b = instr.ffma.negate_b ? "-" : ""; 1130 std::string op_b = instr.ffma.negate_b ? "-" : "";
1065 std::string op_c = instr.ffma.negate_c ? "-" : ""; 1131 std::string op_c = instr.ffma.negate_c ? "-" : "";
@@ -1093,33 +1159,33 @@ private:
1093 } 1159 }
1094 } 1160 }
1095 1161
1096 regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " * " + op_b + " + " + op_c, 1, 1); 1162 regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " * " + op_b + " + " + op_c, 1, 1,
1163 instr.alu.saturate_d);
1097 break; 1164 break;
1098 } 1165 }
1099 case OpCode::Type::Conversion: { 1166 case OpCode::Type::Conversion: {
1100 ASSERT_MSG(instr.conversion.size == Register::Size::Word, "Unimplemented");
1101 ASSERT_MSG(!instr.conversion.negate_a, "Unimplemented"); 1167 ASSERT_MSG(!instr.conversion.negate_a, "Unimplemented");
1102 ASSERT_MSG(!instr.saturate_a, "Unimplemented");
1103 1168
1104 switch (opcode->GetId()) { 1169 switch (opcode->GetId()) {
1105 case OpCode::Id::I2I_R: { 1170 case OpCode::Id::I2I_R: {
1106 ASSERT_MSG(!instr.conversion.selector, "Unimplemented"); 1171 ASSERT_MSG(!instr.conversion.selector, "Unimplemented");
1107 1172
1108 std::string op_a = 1173 std::string op_a = regs.GetRegisterAsInteger(
1109 regs.GetRegisterAsInteger(instr.gpr20, 0, instr.conversion.is_input_signed); 1174 instr.gpr20, 0, instr.conversion.is_input_signed, instr.conversion.src_size);
1110 1175
1111 if (instr.conversion.abs_a) { 1176 if (instr.conversion.abs_a) {
1112 op_a = "abs(" + op_a + ')'; 1177 op_a = "abs(" + op_a + ')';
1113 } 1178 }
1114 1179
1115 regs.SetRegisterToInteger(instr.gpr0, instr.conversion.is_output_signed, 0, op_a, 1, 1180 regs.SetRegisterToInteger(instr.gpr0, instr.conversion.is_output_signed, 0, op_a, 1,
1116 1); 1181 1, instr.alu.saturate_d, 0, instr.conversion.dest_size);
1117 break; 1182 break;
1118 } 1183 }
1119 case OpCode::Id::I2F_R: { 1184 case OpCode::Id::I2F_R: {
1185 ASSERT_MSG(instr.conversion.dest_size == Register::Size::Word, "Unimplemented");
1120 ASSERT_MSG(!instr.conversion.selector, "Unimplemented"); 1186 ASSERT_MSG(!instr.conversion.selector, "Unimplemented");
1121 std::string op_a = 1187 std::string op_a = regs.GetRegisterAsInteger(
1122 regs.GetRegisterAsInteger(instr.gpr20, 0, instr.conversion.is_input_signed); 1188 instr.gpr20, 0, instr.conversion.is_input_signed, instr.conversion.src_size);
1123 1189
1124 if (instr.conversion.abs_a) { 1190 if (instr.conversion.abs_a) {
1125 op_a = "abs(" + op_a + ')'; 1191 op_a = "abs(" + op_a + ')';
@@ -1129,8 +1195,8 @@ private:
1129 break; 1195 break;
1130 } 1196 }
1131 case OpCode::Id::F2F_R: { 1197 case OpCode::Id::F2F_R: {
1132 ASSERT_MSG(!instr.saturate_a, "Unimplemented"); 1198 ASSERT_MSG(instr.conversion.dest_size == Register::Size::Word, "Unimplemented");
1133 1199 ASSERT_MSG(instr.conversion.src_size == Register::Size::Word, "Unimplemented");
1134 std::string op_a = regs.GetRegisterAsFloat(instr.gpr20); 1200 std::string op_a = regs.GetRegisterAsFloat(instr.gpr20);
1135 1201
1136 switch (instr.conversion.f2f.rounding) { 1202 switch (instr.conversion.f2f.rounding) {
@@ -1156,10 +1222,11 @@ private:
1156 op_a = "abs(" + op_a + ')'; 1222 op_a = "abs(" + op_a + ')';
1157 } 1223 }
1158 1224
1159 regs.SetRegisterToFloat(instr.gpr0, 0, op_a, 1, 1); 1225 regs.SetRegisterToFloat(instr.gpr0, 0, op_a, 1, 1, instr.alu.saturate_d);
1160 break; 1226 break;
1161 } 1227 }
1162 case OpCode::Id::F2I_R: { 1228 case OpCode::Id::F2I_R: {
1229 ASSERT_MSG(instr.conversion.src_size == Register::Size::Word, "Unimplemented");
1163 std::string op_a = regs.GetRegisterAsFloat(instr.gpr20); 1230 std::string op_a = regs.GetRegisterAsFloat(instr.gpr20);
1164 1231
1165 if (instr.conversion.abs_a) { 1232 if (instr.conversion.abs_a) {
@@ -1192,7 +1259,7 @@ private:
1192 } 1259 }
1193 1260
1194 regs.SetRegisterToInteger(instr.gpr0, instr.conversion.is_output_signed, 0, op_a, 1, 1261 regs.SetRegisterToInteger(instr.gpr0, instr.conversion.is_output_signed, 0, op_a, 1,
1195 1); 1262 1, false, 0, instr.conversion.dest_size);
1196 break; 1263 break;
1197 } 1264 }
1198 default: { 1265 default: {
@@ -1430,8 +1497,8 @@ private:
1430 op_b = "abs(" + op_b + ')'; 1497 op_b = "abs(" + op_b + ')';
1431 } 1498 }
1432 1499
1433 // The fset instruction sets a register to 1.0 if the condition is true, and to 0 1500 // The fset instruction sets a register to 1.0 or -1 (depending on the bf bit) if the
1434 // otherwise. 1501 // condition is true, and to 0 otherwise.
1435 std::string second_pred = 1502 std::string second_pred =
1436 GetPredicateCondition(instr.fset.pred39, instr.fset.neg_pred != 0); 1503 GetPredicateCondition(instr.fset.pred39, instr.fset.neg_pred != 0);
1437 1504
@@ -1449,6 +1516,41 @@ private:
1449 } 1516 }
1450 break; 1517 break;
1451 } 1518 }
1519 case OpCode::Type::IntegerSet: {
1520 std::string op_a = regs.GetRegisterAsInteger(instr.gpr8, 0, instr.iset.is_signed);
1521
1522 std::string op_b;
1523
1524 if (instr.is_b_imm) {
1525 op_b = std::to_string(instr.alu.GetSignedImm20_20());
1526 } else {
1527 if (instr.is_b_gpr) {
1528 op_b = regs.GetRegisterAsInteger(instr.gpr20, 0, instr.iset.is_signed);
1529 } else {
1530 op_b = regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset,
1531 GLSLRegister::Type::Integer);
1532 }
1533 }
1534
1535 // The iset instruction sets a register to 1.0 or -1 (depending on the bf bit) if the
1536 // condition is true, and to 0 otherwise.
1537 std::string second_pred =
1538 GetPredicateCondition(instr.iset.pred39, instr.iset.neg_pred != 0);
1539
1540 std::string comparator = GetPredicateComparison(instr.iset.cond);
1541 std::string combiner = GetPredicateCombiner(instr.iset.op);
1542
1543 std::string predicate = "(((" + op_a + ") " + comparator + " (" + op_b + ")) " +
1544 combiner + " (" + second_pred + "))";
1545
1546 if (instr.iset.bf) {
1547 regs.SetRegisterToFloat(instr.gpr0, 0, predicate + " ? 1.0 : 0.0", 1, 1);
1548 } else {
1549 regs.SetRegisterToInteger(instr.gpr0, false, 0, predicate + " ? 0xFFFFFFFF : 0", 1,
1550 1);
1551 }
1552 break;
1553 }
1452 default: { 1554 default: {
1453 switch (opcode->GetId()) { 1555 switch (opcode->GetId()) {
1454 case OpCode::Id::EXIT: { 1556 case OpCode::Id::EXIT: {
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp
index b88d592b7..c1e6fac9f 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp
@@ -39,6 +39,10 @@ void main() {
39 // Viewport can be flipped, which is unsupported by glViewport 39 // Viewport can be flipped, which is unsupported by glViewport
40 position.xy *= viewport_flip.xy; 40 position.xy *= viewport_flip.xy;
41 gl_Position = position; 41 gl_Position = position;
42
43 // TODO(bunnei): This is likely a hack, position.w should be interpolated as 1.0
44 // For now, this is here to bring order in lieu of proper emulation
45 position.w = 1.0;
42} 46}
43)"; 47)";
44 out += program.first; 48 out += program.first;
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp
index 7c00beb33..d7167b298 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp
@@ -38,8 +38,8 @@ void MaxwellUniformData::SetFromRegs(const Maxwell3D::State::ShaderStageInfo& sh
38 const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs; 38 const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs;
39 39
40 // TODO(bunnei): Support more than one viewport 40 // TODO(bunnei): Support more than one viewport
41 viewport_flip[0] = regs.viewport_transform[0].scale_x < 0.0 ? -1.0 : 1.0; 41 viewport_flip[0] = regs.viewport_transform[0].scale_x < 0.0 ? -1.0f : 1.0f;
42 viewport_flip[1] = regs.viewport_transform[0].scale_y < 0.0 ? -1.0 : 1.0; 42 viewport_flip[1] = regs.viewport_transform[0].scale_y < 0.0 ? -1.0f : 1.0f;
43} 43}
44 44
45} // namespace GLShader 45} // namespace GLShader
diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp
index 443ce3f2b..6e5f9a789 100644
--- a/src/video_core/renderer_opengl/gl_state.cpp
+++ b/src/video_core/renderer_opengl/gl_state.cpp
@@ -196,13 +196,13 @@ void OpenGLState::Apply() const {
196 } 196 }
197 197
198 // Textures 198 // Textures
199 for (size_t i = 0; i < std::size(texture_units); ++i) { 199 for (int i = 0; i < std::size(texture_units); ++i) {
200 if (texture_units[i].texture_2d != cur_state.texture_units[i].texture_2d) { 200 if (texture_units[i].texture_2d != cur_state.texture_units[i].texture_2d) {
201 glActiveTexture(TextureUnits::MaxwellTexture(i).Enum()); 201 glActiveTexture(TextureUnits::MaxwellTexture(i).Enum());
202 glBindTexture(GL_TEXTURE_2D, texture_units[i].texture_2d); 202 glBindTexture(GL_TEXTURE_2D, texture_units[i].texture_2d);
203 } 203 }
204 if (texture_units[i].sampler != cur_state.texture_units[i].sampler) { 204 if (texture_units[i].sampler != cur_state.texture_units[i].sampler) {
205 glBindSampler(i, texture_units[i].sampler); 205 glBindSampler(static_cast<GLuint>(i), texture_units[i].sampler);
206 } 206 }
207 // Update the texture swizzle 207 // Update the texture swizzle
208 if (texture_units[i].swizzle.r != cur_state.texture_units[i].swizzle.r || 208 if (texture_units[i].swizzle.r != cur_state.texture_units[i].swizzle.r ||
diff --git a/src/video_core/textures/astc.cpp b/src/video_core/textures/astc.cpp
new file mode 100644
index 000000000..3c4ad1c9d
--- /dev/null
+++ b/src/video_core/textures/astc.cpp
@@ -0,0 +1,1646 @@
1// Copyright 2016 The University of North Carolina at Chapel Hill
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14//
15// Please send all BUG REPORTS to <pavel@cs.unc.edu>.
16// <http://gamma.cs.unc.edu/FasTC/>
17
18#include <algorithm>
19#include <cassert>
20#include <cstdint>
21#include <cstring>
22#include <vector>
23
24#include "video_core/textures/astc.h"
25
26class BitStream {
27public:
28 BitStream(unsigned char* ptr, int nBits = 0, int start_offset = 0)
29 : m_BitsWritten(0), m_BitsRead(0), m_NumBits(nBits), m_CurByte(ptr),
30 m_NextBit(start_offset % 8), done(false) {}
31
32 int GetBitsWritten() const {
33 return m_BitsWritten;
34 }
35
36 ~BitStream() {}
37
38 void WriteBitsR(unsigned int val, unsigned int nBits) {
39 for (unsigned int i = 0; i < nBits; i++) {
40 WriteBit((val >> (nBits - i - 1)) & 1);
41 }
42 }
43
44 void WriteBits(unsigned int val, unsigned int nBits) {
45 for (unsigned int i = 0; i < nBits; i++) {
46 WriteBit((val >> i) & 1);
47 }
48 }
49
50 int GetBitsRead() const {
51 return m_BitsRead;
52 }
53
54 int ReadBit() {
55
56 int bit = *m_CurByte >> m_NextBit++;
57 while (m_NextBit >= 8) {
58 m_NextBit -= 8;
59 m_CurByte++;
60 }
61
62 m_BitsRead++;
63 return bit & 1;
64 }
65
66 unsigned int ReadBits(unsigned int nBits) {
67 unsigned int ret = 0;
68 for (unsigned int i = 0; i < nBits; i++) {
69 ret |= (ReadBit() & 1) << i;
70 }
71 return ret;
72 }
73
74private:
75 void WriteBit(int b) {
76
77 if (done)
78 return;
79
80 const unsigned int mask = 1 << m_NextBit++;
81
82 // clear the bit
83 *m_CurByte &= ~mask;
84
85 // Write the bit, if necessary
86 if (b)
87 *m_CurByte |= mask;
88
89 // Next byte?
90 if (m_NextBit >= 8) {
91 m_CurByte += 1;
92 m_NextBit = 0;
93 }
94
95 done = done || ++m_BitsWritten >= m_NumBits;
96 }
97
98 int m_BitsWritten;
99 const int m_NumBits;
100 unsigned char* m_CurByte;
101 int m_NextBit;
102 int m_BitsRead;
103
104 bool done;
105};
106
107template <typename IntType>
108class Bits {
109private:
110 const IntType& m_Bits;
111
112 // Don't copy
113 Bits() {}
114 Bits(const Bits&) {}
115 Bits& operator=(const Bits&) {}
116
117public:
118 explicit Bits(IntType& v) : m_Bits(v) {}
119
120 uint8_t operator[](uint32_t bitPos) {
121 return static_cast<uint8_t>((m_Bits >> bitPos) & 1);
122 }
123
124 IntType operator()(uint32_t start, uint32_t end) {
125 if (start == end) {
126 return (*this)[start];
127 } else if (start > end) {
128 uint32_t t = start;
129 start = end;
130 end = t;
131 }
132
133 uint64_t mask = (1 << (end - start + 1)) - 1;
134 return (m_Bits >> start) & mask;
135 }
136};
137
138enum EIntegerEncoding { eIntegerEncoding_JustBits, eIntegerEncoding_Quint, eIntegerEncoding_Trit };
139
140class IntegerEncodedValue {
141private:
142 const EIntegerEncoding m_Encoding;
143 const uint32_t m_NumBits;
144 uint32_t m_BitValue;
145 union {
146 uint32_t m_QuintValue;
147 uint32_t m_TritValue;
148 };
149
150public:
151 // Jank, but we're not doing any heavy lifting in this class, so it's
152 // probably OK. It allows us to use these in std::vectors...
153 IntegerEncodedValue& operator=(const IntegerEncodedValue& other) {
154 new (this) IntegerEncodedValue(other);
155 return *this;
156 }
157
158 IntegerEncodedValue(EIntegerEncoding encoding, uint32_t numBits)
159 : m_Encoding(encoding), m_NumBits(numBits) {}
160
161 EIntegerEncoding GetEncoding() const {
162 return m_Encoding;
163 }
164 uint32_t BaseBitLength() const {
165 return m_NumBits;
166 }
167
168 uint32_t GetBitValue() const {
169 return m_BitValue;
170 }
171 void SetBitValue(uint32_t val) {
172 m_BitValue = val;
173 }
174
175 uint32_t GetTritValue() const {
176 return m_TritValue;
177 }
178 void SetTritValue(uint32_t val) {
179 m_TritValue = val;
180 }
181
182 uint32_t GetQuintValue() const {
183 return m_QuintValue;
184 }
185 void SetQuintValue(uint32_t val) {
186 m_QuintValue = val;
187 }
188
189 bool MatchesEncoding(const IntegerEncodedValue& other) {
190 return m_Encoding == other.m_Encoding && m_NumBits == other.m_NumBits;
191 }
192
193 // Returns the number of bits required to encode nVals values.
194 uint32_t GetBitLength(uint32_t nVals) {
195 uint32_t totalBits = m_NumBits * nVals;
196 if (m_Encoding == eIntegerEncoding_Trit) {
197 totalBits += (nVals * 8 + 4) / 5;
198 } else if (m_Encoding == eIntegerEncoding_Quint) {
199 totalBits += (nVals * 7 + 2) / 3;
200 }
201 return totalBits;
202 }
203
204 // Count the number of bits set in a number.
205 static inline uint32_t Popcnt(uint32_t n) {
206 uint32_t c;
207 for (c = 0; n; c++) {
208 n &= n - 1;
209 }
210 return c;
211 }
212
213 // Returns a new instance of this struct that corresponds to the
214 // can take no more than maxval values
215 static IntegerEncodedValue CreateEncoding(uint32_t maxVal) {
216 while (maxVal > 0) {
217 uint32_t check = maxVal + 1;
218
219 // Is maxVal a power of two?
220 if (!(check & (check - 1))) {
221 return IntegerEncodedValue(eIntegerEncoding_JustBits, Popcnt(maxVal));
222 }
223
224 // Is maxVal of the type 3*2^n - 1?
225 if ((check % 3 == 0) && !((check / 3) & ((check / 3) - 1))) {
226 return IntegerEncodedValue(eIntegerEncoding_Trit, Popcnt(check / 3 - 1));
227 }
228
229 // Is maxVal of the type 5*2^n - 1?
230 if ((check % 5 == 0) && !((check / 5) & ((check / 5) - 1))) {
231 return IntegerEncodedValue(eIntegerEncoding_Quint, Popcnt(check / 5 - 1));
232 }
233
234 // Apparently it can't be represented with a bounded integer sequence...
235 // just iterate.
236 maxVal--;
237 }
238 return IntegerEncodedValue(eIntegerEncoding_JustBits, 0);
239 }
240
241 // Fills result with the values that are encoded in the given
242 // bitstream. We must know beforehand what the maximum possible
243 // value is, and how many values we're decoding.
244 static void DecodeIntegerSequence(std::vector<IntegerEncodedValue>& result, BitStream& bits,
245 uint32_t maxRange, uint32_t nValues) {
246 // Determine encoding parameters
247 IntegerEncodedValue val = IntegerEncodedValue::CreateEncoding(maxRange);
248
249 // Start decoding
250 uint32_t nValsDecoded = 0;
251 while (nValsDecoded < nValues) {
252 switch (val.GetEncoding()) {
253 case eIntegerEncoding_Quint:
254 DecodeQuintBlock(bits, result, val.BaseBitLength());
255 nValsDecoded += 3;
256 break;
257
258 case eIntegerEncoding_Trit:
259 DecodeTritBlock(bits, result, val.BaseBitLength());
260 nValsDecoded += 5;
261 break;
262
263 case eIntegerEncoding_JustBits:
264 val.SetBitValue(bits.ReadBits(val.BaseBitLength()));
265 result.push_back(val);
266 nValsDecoded++;
267 break;
268 }
269 }
270 }
271
272private:
273 static void DecodeTritBlock(BitStream& bits, std::vector<IntegerEncodedValue>& result,
274 uint32_t nBitsPerValue) {
275 // Implement the algorithm in section C.2.12
276 uint32_t m[5];
277 uint32_t t[5];
278 uint32_t T;
279
280 // Read the trit encoded block according to
281 // table C.2.14
282 m[0] = bits.ReadBits(nBitsPerValue);
283 T = bits.ReadBits(2);
284 m[1] = bits.ReadBits(nBitsPerValue);
285 T |= bits.ReadBits(2) << 2;
286 m[2] = bits.ReadBits(nBitsPerValue);
287 T |= bits.ReadBit() << 4;
288 m[3] = bits.ReadBits(nBitsPerValue);
289 T |= bits.ReadBits(2) << 5;
290 m[4] = bits.ReadBits(nBitsPerValue);
291 T |= bits.ReadBit() << 7;
292
293 uint32_t C = 0;
294
295 Bits<uint32_t> Tb(T);
296 if (Tb(2, 4) == 7) {
297 C = (Tb(5, 7) << 2) | Tb(0, 1);
298 t[4] = t[3] = 2;
299 } else {
300 C = Tb(0, 4);
301 if (Tb(5, 6) == 3) {
302 t[4] = 2;
303 t[3] = Tb[7];
304 } else {
305 t[4] = Tb[7];
306 t[3] = Tb(5, 6);
307 }
308 }
309
310 Bits<uint32_t> Cb(C);
311 if (Cb(0, 1) == 3) {
312 t[2] = 2;
313 t[1] = Cb[4];
314 t[0] = (Cb[3] << 1) | (Cb[2] & ~Cb[3]);
315 } else if (Cb(2, 3) == 3) {
316 t[2] = 2;
317 t[1] = 2;
318 t[0] = Cb(0, 1);
319 } else {
320 t[2] = Cb[4];
321 t[1] = Cb(2, 3);
322 t[0] = (Cb[1] << 1) | (Cb[0] & ~Cb[1]);
323 }
324
325 for (uint32_t i = 0; i < 5; i++) {
326 IntegerEncodedValue val(eIntegerEncoding_Trit, nBitsPerValue);
327 val.SetBitValue(m[i]);
328 val.SetTritValue(t[i]);
329 result.push_back(val);
330 }
331 }
332
333 static void DecodeQuintBlock(BitStream& bits, std::vector<IntegerEncodedValue>& result,
334 uint32_t nBitsPerValue) {
335 // Implement the algorithm in section C.2.12
336 uint32_t m[3];
337 uint32_t q[3];
338 uint32_t Q;
339
340 // Read the trit encoded block according to
341 // table C.2.15
342 m[0] = bits.ReadBits(nBitsPerValue);
343 Q = bits.ReadBits(3);
344 m[1] = bits.ReadBits(nBitsPerValue);
345 Q |= bits.ReadBits(2) << 3;
346 m[2] = bits.ReadBits(nBitsPerValue);
347 Q |= bits.ReadBits(2) << 5;
348
349 Bits<uint32_t> Qb(Q);
350 if (Qb(1, 2) == 3 && Qb(5, 6) == 0) {
351 q[0] = q[1] = 4;
352 q[2] = (Qb[0] << 2) | ((Qb[4] & ~Qb[0]) << 1) | (Qb[3] & ~Qb[0]);
353 } else {
354 uint32_t C = 0;
355 if (Qb(1, 2) == 3) {
356 q[2] = 4;
357 C = (Qb(3, 4) << 3) | ((~Qb(5, 6) & 3) << 1) | Qb[0];
358 } else {
359 q[2] = Qb(5, 6);
360 C = Qb(0, 4);
361 }
362
363 Bits<uint32_t> Cb(C);
364 if (Cb(0, 2) == 5) {
365 q[1] = 4;
366 q[0] = Cb(3, 4);
367 } else {
368 q[1] = Cb(3, 4);
369 q[0] = Cb(0, 2);
370 }
371 }
372
373 for (uint32_t i = 0; i < 3; i++) {
374 IntegerEncodedValue val(eIntegerEncoding_Quint, nBitsPerValue);
375 val.m_BitValue = m[i];
376 val.m_QuintValue = q[i];
377 result.push_back(val);
378 }
379 }
380};
381
382namespace ASTCC {
383
384struct TexelWeightParams {
385 uint32_t m_Width;
386 uint32_t m_Height;
387 bool m_bDualPlane;
388 uint32_t m_MaxWeight;
389 bool m_bError;
390 bool m_bVoidExtentLDR;
391 bool m_bVoidExtentHDR;
392
393 TexelWeightParams() {
394 memset(this, 0, sizeof(*this));
395 }
396
397 uint32_t GetPackedBitSize() {
398 // How many indices do we have?
399 uint32_t nIdxs = m_Height * m_Width;
400 if (m_bDualPlane) {
401 nIdxs *= 2;
402 }
403
404 return IntegerEncodedValue::CreateEncoding(m_MaxWeight).GetBitLength(nIdxs);
405 }
406
407 uint32_t GetNumWeightValues() const {
408 uint32_t ret = m_Width * m_Height;
409 if (m_bDualPlane) {
410 ret *= 2;
411 }
412 return ret;
413 }
414};
415
416TexelWeightParams DecodeBlockInfo(BitStream& strm) {
417 TexelWeightParams params;
418
419 // Read the entire block mode all at once
420 uint16_t modeBits = strm.ReadBits(11);
421
422 // Does this match the void extent block mode?
423 if ((modeBits & 0x01FF) == 0x1FC) {
424 if (modeBits & 0x200) {
425 params.m_bVoidExtentHDR = true;
426 } else {
427 params.m_bVoidExtentLDR = true;
428 }
429
430 // Next two bits must be one.
431 if (!(modeBits & 0x400) || !strm.ReadBit()) {
432 params.m_bError = true;
433 }
434
435 return params;
436 }
437
438 // First check if the last four bits are zero
439 if ((modeBits & 0xF) == 0) {
440 params.m_bError = true;
441 return params;
442 }
443
444 // If the last two bits are zero, then if bits
445 // [6-8] are all ones, this is also reserved.
446 if ((modeBits & 0x3) == 0 && (modeBits & 0x1C0) == 0x1C0) {
447 params.m_bError = true;
448 return params;
449 }
450
451 // Otherwise, there is no error... Figure out the layout
452 // of the block mode. Layout is determined by a number
453 // between 0 and 9 corresponding to table C.2.8 of the
454 // ASTC spec.
455 uint32_t layout = 0;
456
457 if ((modeBits & 0x1) || (modeBits & 0x2)) {
458 // layout is in [0-4]
459 if (modeBits & 0x8) {
460 // layout is in [2-4]
461 if (modeBits & 0x4) {
462 // layout is in [3-4]
463 if (modeBits & 0x100) {
464 layout = 4;
465 } else {
466 layout = 3;
467 }
468 } else {
469 layout = 2;
470 }
471 } else {
472 // layout is in [0-1]
473 if (modeBits & 0x4) {
474 layout = 1;
475 } else {
476 layout = 0;
477 }
478 }
479 } else {
480 // layout is in [5-9]
481 if (modeBits & 0x100) {
482 // layout is in [7-9]
483 if (modeBits & 0x80) {
484 // layout is in [7-8]
485 assert((modeBits & 0x40) == 0U);
486 if (modeBits & 0x20) {
487 layout = 8;
488 } else {
489 layout = 7;
490 }
491 } else {
492 layout = 9;
493 }
494 } else {
495 // layout is in [5-6]
496 if (modeBits & 0x80) {
497 layout = 6;
498 } else {
499 layout = 5;
500 }
501 }
502 }
503
504 assert(layout < 10);
505
506 // Determine R
507 uint32_t R = !!(modeBits & 0x10);
508 if (layout < 5) {
509 R |= (modeBits & 0x3) << 1;
510 } else {
511 R |= (modeBits & 0xC) >> 1;
512 }
513 assert(2 <= R && R <= 7);
514
515 // Determine width & height
516 switch (layout) {
517 case 0: {
518 uint32_t A = (modeBits >> 5) & 0x3;
519 uint32_t B = (modeBits >> 7) & 0x3;
520 params.m_Width = B + 4;
521 params.m_Height = A + 2;
522 break;
523 }
524
525 case 1: {
526 uint32_t A = (modeBits >> 5) & 0x3;
527 uint32_t B = (modeBits >> 7) & 0x3;
528 params.m_Width = B + 8;
529 params.m_Height = A + 2;
530 break;
531 }
532
533 case 2: {
534 uint32_t A = (modeBits >> 5) & 0x3;
535 uint32_t B = (modeBits >> 7) & 0x3;
536 params.m_Width = A + 2;
537 params.m_Height = B + 8;
538 break;
539 }
540
541 case 3: {
542 uint32_t A = (modeBits >> 5) & 0x3;
543 uint32_t B = (modeBits >> 7) & 0x1;
544 params.m_Width = A + 2;
545 params.m_Height = B + 6;
546 break;
547 }
548
549 case 4: {
550 uint32_t A = (modeBits >> 5) & 0x3;
551 uint32_t B = (modeBits >> 7) & 0x1;
552 params.m_Width = B + 2;
553 params.m_Height = A + 2;
554 break;
555 }
556
557 case 5: {
558 uint32_t A = (modeBits >> 5) & 0x3;
559 params.m_Width = 12;
560 params.m_Height = A + 2;
561 break;
562 }
563
564 case 6: {
565 uint32_t A = (modeBits >> 5) & 0x3;
566 params.m_Width = A + 2;
567 params.m_Height = 12;
568 break;
569 }
570
571 case 7: {
572 params.m_Width = 6;
573 params.m_Height = 10;
574 break;
575 }
576
577 case 8: {
578 params.m_Width = 10;
579 params.m_Height = 6;
580 break;
581 }
582
583 case 9: {
584 uint32_t A = (modeBits >> 5) & 0x3;
585 uint32_t B = (modeBits >> 9) & 0x3;
586 params.m_Width = A + 6;
587 params.m_Height = B + 6;
588 break;
589 }
590
591 default:
592 assert(!"Don't know this layout...");
593 params.m_bError = true;
594 break;
595 }
596
597 // Determine whether or not we're using dual planes
598 // and/or high precision layouts.
599 bool D = (layout != 9) && (modeBits & 0x400);
600 bool H = (layout != 9) && (modeBits & 0x200);
601
602 if (H) {
603 const uint32_t maxWeights[6] = {9, 11, 15, 19, 23, 31};
604 params.m_MaxWeight = maxWeights[R - 2];
605 } else {
606 const uint32_t maxWeights[6] = {1, 2, 3, 4, 5, 7};
607 params.m_MaxWeight = maxWeights[R - 2];
608 }
609
610 params.m_bDualPlane = D;
611
612 return params;
613}
614
615void FillVoidExtentLDR(BitStream& strm, uint32_t* const outBuf, uint32_t blockWidth,
616 uint32_t blockHeight) {
617 // Don't actually care about the void extent, just read the bits...
618 for (int i = 0; i < 4; ++i) {
619 strm.ReadBits(13);
620 }
621
622 // Decode the RGBA components and renormalize them to the range [0, 255]
623 uint16_t r = strm.ReadBits(16);
624 uint16_t g = strm.ReadBits(16);
625 uint16_t b = strm.ReadBits(16);
626 uint16_t a = strm.ReadBits(16);
627
628 uint32_t rgba = (r >> 8) | (g & 0xFF00) | (static_cast<uint32_t>(b) & 0xFF00) << 8 |
629 (static_cast<uint32_t>(a) & 0xFF00) << 16;
630
631 for (uint32_t j = 0; j < blockHeight; j++)
632 for (uint32_t i = 0; i < blockWidth; i++) {
633 outBuf[j * blockWidth + i] = rgba;
634 }
635}
636
637void FillError(uint32_t* outBuf, uint32_t blockWidth, uint32_t blockHeight) {
638 for (uint32_t j = 0; j < blockHeight; j++)
639 for (uint32_t i = 0; i < blockWidth; i++) {
640 outBuf[j * blockWidth + i] = 0xFFFF00FF;
641 }
642}
643
644// Replicates low numBits such that [(toBit - 1):(toBit - 1 - fromBit)]
645// is the same as [(numBits - 1):0] and repeats all the way down.
646template <typename IntType>
647IntType Replicate(const IntType& val, uint32_t numBits, uint32_t toBit) {
648 if (numBits == 0)
649 return 0;
650 if (toBit == 0)
651 return 0;
652 IntType v = val & ((1 << numBits) - 1);
653 IntType res = v;
654 uint32_t reslen = numBits;
655 while (reslen < toBit) {
656 uint32_t comp = 0;
657 if (numBits > toBit - reslen) {
658 uint32_t newshift = toBit - reslen;
659 comp = numBits - newshift;
660 numBits = newshift;
661 }
662 res <<= numBits;
663 res |= v >> comp;
664 reslen += numBits;
665 }
666 return res;
667}
668
669class Pixel {
670protected:
671 typedef int16_t ChannelType;
672 uint8_t m_BitDepth[4];
673 int16_t color[4];
674
675public:
676 Pixel() {
677 for (int i = 0; i < 4; i++) {
678 m_BitDepth[i] = 8;
679 color[i] = 0;
680 }
681 }
682
683 Pixel(ChannelType a, ChannelType r, ChannelType g, ChannelType b, unsigned bitDepth = 8) {
684 for (int i = 0; i < 4; i++)
685 m_BitDepth[i] = bitDepth;
686
687 color[0] = a;
688 color[1] = r;
689 color[2] = g;
690 color[3] = b;
691 }
692
693 // Changes the depth of each pixel. This scales the values to
694 // the appropriate bit depth by either truncating the least
695 // significant bits when going from larger to smaller bit depth
696 // or by repeating the most significant bits when going from
697 // smaller to larger bit depths.
698 void ChangeBitDepth(const uint8_t (&depth)[4]) {
699 for (uint32_t i = 0; i < 4; i++) {
700 Component(i) = ChangeBitDepth(Component(i), m_BitDepth[i], depth[i]);
701 m_BitDepth[i] = depth[i];
702 }
703 }
704
705 template <typename IntType>
706 static float ConvertChannelToFloat(IntType channel, uint8_t bitDepth) {
707 float denominator = static_cast<float>((1 << bitDepth) - 1);
708 return static_cast<float>(channel) / denominator;
709 }
710
711 // Changes the bit depth of a single component. See the comment
712 // above for how we do this.
713 static ChannelType ChangeBitDepth(Pixel::ChannelType val, uint8_t oldDepth, uint8_t newDepth) {
714 assert(newDepth <= 8);
715 assert(oldDepth <= 8);
716
717 if (oldDepth == newDepth) {
718 // Do nothing
719 return val;
720 } else if (oldDepth == 0 && newDepth != 0) {
721 return (1 << newDepth) - 1;
722 } else if (newDepth > oldDepth) {
723 return Replicate(val, oldDepth, newDepth);
724 } else {
725 // oldDepth > newDepth
726 if (newDepth == 0) {
727 return 0xFF;
728 } else {
729 uint8_t bitsWasted = oldDepth - newDepth;
730 uint16_t v = static_cast<uint16_t>(val);
731 v = (v + (1 << (bitsWasted - 1))) >> bitsWasted;
732 v = ::std::min<uint16_t>(::std::max<uint16_t>(0, v), (1 << newDepth) - 1);
733 return static_cast<uint8_t>(v);
734 }
735 }
736
737 assert(!"We shouldn't get here.");
738 return 0;
739 }
740
741 const ChannelType& A() const {
742 return color[0];
743 }
744 ChannelType& A() {
745 return color[0];
746 }
747 const ChannelType& R() const {
748 return color[1];
749 }
750 ChannelType& R() {
751 return color[1];
752 }
753 const ChannelType& G() const {
754 return color[2];
755 }
756 ChannelType& G() {
757 return color[2];
758 }
759 const ChannelType& B() const {
760 return color[3];
761 }
762 ChannelType& B() {
763 return color[3];
764 }
765 const ChannelType& Component(uint32_t idx) const {
766 return color[idx];
767 }
768 ChannelType& Component(uint32_t idx) {
769 return color[idx];
770 }
771
772 void GetBitDepth(uint8_t (&outDepth)[4]) const {
773 for (int i = 0; i < 4; i++) {
774 outDepth[i] = m_BitDepth[i];
775 }
776 }
777
778 // Take all of the components, transform them to their 8-bit variants,
779 // and then pack each channel into an R8G8B8A8 32-bit integer. We assume
780 // that the architecture is little-endian, so the alpha channel will end
781 // up in the most-significant byte.
782 uint32_t Pack() const {
783 Pixel eightBit(*this);
784 const uint8_t eightBitDepth[4] = {8, 8, 8, 8};
785 eightBit.ChangeBitDepth(eightBitDepth);
786
787 uint32_t r = 0;
788 r |= eightBit.A();
789 r <<= 8;
790 r |= eightBit.B();
791 r <<= 8;
792 r |= eightBit.G();
793 r <<= 8;
794 r |= eightBit.R();
795 return r;
796 }
797
798 // Clamps the pixel to the range [0,255]
799 void ClampByte() {
800 for (uint32_t i = 0; i < 4; i++) {
801 color[i] = (color[i] < 0) ? 0 : ((color[i] > 255) ? 255 : color[i]);
802 }
803 }
804
805 void MakeOpaque() {
806 A() = 255;
807 }
808};
809
810void DecodeColorValues(uint32_t* out, uint8_t* data, uint32_t* modes, const uint32_t nPartitions,
811 const uint32_t nBitsForColorData) {
812 // First figure out how many color values we have
813 uint32_t nValues = 0;
814 for (uint32_t i = 0; i < nPartitions; i++) {
815 nValues += ((modes[i] >> 2) + 1) << 1;
816 }
817
818 // Then based on the number of values and the remaining number of bits,
819 // figure out the max value for each of them...
820 uint32_t range = 256;
821 while (--range > 0) {
822 IntegerEncodedValue val = IntegerEncodedValue::CreateEncoding(range);
823 uint32_t bitLength = val.GetBitLength(nValues);
824 if (bitLength <= nBitsForColorData) {
825 // Find the smallest possible range that matches the given encoding
826 while (--range > 0) {
827 IntegerEncodedValue newval = IntegerEncodedValue::CreateEncoding(range);
828 if (!newval.MatchesEncoding(val)) {
829 break;
830 }
831 }
832
833 // Return to last matching range.
834 range++;
835 break;
836 }
837 }
838
839 // We now have enough to decode our integer sequence.
840 std::vector<IntegerEncodedValue> decodedColorValues;
841 BitStream colorStream(data);
842 IntegerEncodedValue::DecodeIntegerSequence(decodedColorValues, colorStream, range, nValues);
843
844 // Once we have the decoded values, we need to dequantize them to the 0-255 range
845 // This procedure is outlined in ASTC spec C.2.13
846 uint32_t outIdx = 0;
847 std::vector<IntegerEncodedValue>::const_iterator itr;
848 for (itr = decodedColorValues.begin(); itr != decodedColorValues.end(); itr++) {
849 // Have we already decoded all that we need?
850 if (outIdx >= nValues) {
851 break;
852 }
853
854 const IntegerEncodedValue& val = *itr;
855 uint32_t bitlen = val.BaseBitLength();
856 uint32_t bitval = val.GetBitValue();
857
858 assert(bitlen >= 1);
859
860 uint32_t A = 0, B = 0, C = 0, D = 0;
861 // A is just the lsb replicated 9 times.
862 A = Replicate(bitval & 1, 1, 9);
863
864 switch (val.GetEncoding()) {
865 // Replicate bits
866 case eIntegerEncoding_JustBits:
867 out[outIdx++] = Replicate(bitval, bitlen, 8);
868 break;
869
870 // Use algorithm in C.2.13
871 case eIntegerEncoding_Trit: {
872
873 D = val.GetTritValue();
874
875 switch (bitlen) {
876 case 1: {
877 C = 204;
878 } break;
879
880 case 2: {
881 C = 93;
882 // B = b000b0bb0
883 uint32_t b = (bitval >> 1) & 1;
884 B = (b << 8) | (b << 4) | (b << 2) | (b << 1);
885 } break;
886
887 case 3: {
888 C = 44;
889 // B = cb000cbcb
890 uint32_t cb = (bitval >> 1) & 3;
891 B = (cb << 7) | (cb << 2) | cb;
892 } break;
893
894 case 4: {
895 C = 22;
896 // B = dcb000dcb
897 uint32_t dcb = (bitval >> 1) & 7;
898 B = (dcb << 6) | dcb;
899 } break;
900
901 case 5: {
902 C = 11;
903 // B = edcb000ed
904 uint32_t edcb = (bitval >> 1) & 0xF;
905 B = (edcb << 5) | (edcb >> 2);
906 } break;
907
908 case 6: {
909 C = 5;
910 // B = fedcb000f
911 uint32_t fedcb = (bitval >> 1) & 0x1F;
912 B = (fedcb << 4) | (fedcb >> 4);
913 } break;
914
915 default:
916 assert(!"Unsupported trit encoding for color values!");
917 break;
918 } // switch(bitlen)
919 } // case eIntegerEncoding_Trit
920 break;
921
922 case eIntegerEncoding_Quint: {
923
924 D = val.GetQuintValue();
925
926 switch (bitlen) {
927 case 1: {
928 C = 113;
929 } break;
930
931 case 2: {
932 C = 54;
933 // B = b0000bb00
934 uint32_t b = (bitval >> 1) & 1;
935 B = (b << 8) | (b << 3) | (b << 2);
936 } break;
937
938 case 3: {
939 C = 26;
940 // B = cb0000cbc
941 uint32_t cb = (bitval >> 1) & 3;
942 B = (cb << 7) | (cb << 1) | (cb >> 1);
943 } break;
944
945 case 4: {
946 C = 13;
947 // B = dcb0000dc
948 uint32_t dcb = (bitval >> 1) & 7;
949 B = (dcb << 6) | (dcb >> 1);
950 } break;
951
952 case 5: {
953 C = 6;
954 // B = edcb0000e
955 uint32_t edcb = (bitval >> 1) & 0xF;
956 B = (edcb << 5) | (edcb >> 3);
957 } break;
958
959 default:
960 assert(!"Unsupported quint encoding for color values!");
961 break;
962 } // switch(bitlen)
963 } // case eIntegerEncoding_Quint
964 break;
965 } // switch(val.GetEncoding())
966
967 if (val.GetEncoding() != eIntegerEncoding_JustBits) {
968 uint32_t T = D * C + B;
969 T ^= A;
970 T = (A & 0x80) | (T >> 2);
971 out[outIdx++] = T;
972 }
973 }
974
975 // Make sure that each of our values is in the proper range...
976 for (uint32_t i = 0; i < nValues; i++) {
977 assert(out[i] <= 255);
978 }
979}
980
981uint32_t UnquantizeTexelWeight(const IntegerEncodedValue& val) {
982 uint32_t bitval = val.GetBitValue();
983 uint32_t bitlen = val.BaseBitLength();
984
985 uint32_t A = Replicate(bitval & 1, 1, 7);
986 uint32_t B = 0, C = 0, D = 0;
987
988 uint32_t result = 0;
989 switch (val.GetEncoding()) {
990 case eIntegerEncoding_JustBits:
991 result = Replicate(bitval, bitlen, 6);
992 break;
993
994 case eIntegerEncoding_Trit: {
995 D = val.GetTritValue();
996 assert(D < 3);
997
998 switch (bitlen) {
999 case 0: {
1000 uint32_t results[3] = {0, 32, 63};
1001 result = results[D];
1002 } break;
1003
1004 case 1: {
1005 C = 50;
1006 } break;
1007
1008 case 2: {
1009 C = 23;
1010 uint32_t b = (bitval >> 1) & 1;
1011 B = (b << 6) | (b << 2) | b;
1012 } break;
1013
1014 case 3: {
1015 C = 11;
1016 uint32_t cb = (bitval >> 1) & 3;
1017 B = (cb << 5) | cb;
1018 } break;
1019
1020 default:
1021 assert(!"Invalid trit encoding for texel weight");
1022 break;
1023 }
1024 } break;
1025
1026 case eIntegerEncoding_Quint: {
1027 D = val.GetQuintValue();
1028 assert(D < 5);
1029
1030 switch (bitlen) {
1031 case 0: {
1032 uint32_t results[5] = {0, 16, 32, 47, 63};
1033 result = results[D];
1034 } break;
1035
1036 case 1: {
1037 C = 28;
1038 } break;
1039
1040 case 2: {
1041 C = 13;
1042 uint32_t b = (bitval >> 1) & 1;
1043 B = (b << 6) | (b << 1);
1044 } break;
1045
1046 default:
1047 assert(!"Invalid quint encoding for texel weight");
1048 break;
1049 }
1050 } break;
1051 }
1052
1053 if (val.GetEncoding() != eIntegerEncoding_JustBits && bitlen > 0) {
1054 // Decode the value...
1055 result = D * C + B;
1056 result ^= A;
1057 result = (A & 0x20) | (result >> 2);
1058 }
1059
1060 assert(result < 64);
1061
1062 // Change from [0,63] to [0,64]
1063 if (result > 32) {
1064 result += 1;
1065 }
1066
1067 return result;
1068}
1069
1070void UnquantizeTexelWeights(uint32_t out[2][144], std::vector<IntegerEncodedValue>& weights,
1071 const TexelWeightParams& params, const uint32_t blockWidth,
1072 const uint32_t blockHeight) {
1073 uint32_t weightIdx = 0;
1074 uint32_t unquantized[2][144];
1075 std::vector<IntegerEncodedValue>::const_iterator itr;
1076 for (itr = weights.begin(); itr != weights.end(); itr++) {
1077 unquantized[0][weightIdx] = UnquantizeTexelWeight(*itr);
1078
1079 if (params.m_bDualPlane) {
1080 itr++;
1081 unquantized[1][weightIdx] = UnquantizeTexelWeight(*itr);
1082 if (itr == weights.end()) {
1083 break;
1084 }
1085 }
1086
1087 if (++weightIdx >= (params.m_Width * params.m_Height))
1088 break;
1089 }
1090
1091 // Do infill if necessary (Section C.2.18) ...
1092 uint32_t Ds = (1024 + (blockWidth / 2)) / (blockWidth - 1);
1093 uint32_t Dt = (1024 + (blockHeight / 2)) / (blockHeight - 1);
1094
1095 const uint32_t kPlaneScale = params.m_bDualPlane ? 2U : 1U;
1096 for (uint32_t plane = 0; plane < kPlaneScale; plane++)
1097 for (uint32_t t = 0; t < blockHeight; t++)
1098 for (uint32_t s = 0; s < blockWidth; s++) {
1099 uint32_t cs = Ds * s;
1100 uint32_t ct = Dt * t;
1101
1102 uint32_t gs = (cs * (params.m_Width - 1) + 32) >> 6;
1103 uint32_t gt = (ct * (params.m_Height - 1) + 32) >> 6;
1104
1105 uint32_t js = gs >> 4;
1106 uint32_t fs = gs & 0xF;
1107
1108 uint32_t jt = gt >> 4;
1109 uint32_t ft = gt & 0x0F;
1110
1111 uint32_t w11 = (fs * ft + 8) >> 4;
1112 uint32_t w10 = ft - w11;
1113 uint32_t w01 = fs - w11;
1114 uint32_t w00 = 16 - fs - ft + w11;
1115
1116 uint32_t v0 = js + jt * params.m_Width;
1117
1118#define FIND_TEXEL(tidx, bidx) \
1119 uint32_t p##bidx = 0; \
1120 do { \
1121 if ((tidx) < (params.m_Width * params.m_Height)) { \
1122 p##bidx = unquantized[plane][(tidx)]; \
1123 } \
1124 } while (0)
1125
1126 FIND_TEXEL(v0, 00);
1127 FIND_TEXEL(v0 + 1, 01);
1128 FIND_TEXEL(v0 + params.m_Width, 10);
1129 FIND_TEXEL(v0 + params.m_Width + 1, 11);
1130
1131#undef FIND_TEXEL
1132
1133 out[plane][t * blockWidth + s] =
1134 (p00 * w00 + p01 * w01 + p10 * w10 + p11 * w11 + 8) >> 4;
1135 }
1136}
1137
1138// Transfers a bit as described in C.2.14
1139static inline void BitTransferSigned(int32_t& a, int32_t& b) {
1140 b >>= 1;
1141 b |= a & 0x80;
1142 a >>= 1;
1143 a &= 0x3F;
1144 if (a & 0x20)
1145 a -= 0x40;
1146}
1147
1148// Adds more precision to the blue channel as described
1149// in C.2.14
1150static inline Pixel BlueContract(int32_t a, int32_t r, int32_t g, int32_t b) {
1151 return Pixel(static_cast<int16_t>(a), static_cast<int16_t>((r + b) >> 1),
1152 static_cast<int16_t>((g + b) >> 1), static_cast<int16_t>(b));
1153}
1154
1155// Partition selection functions as specified in
1156// C.2.21
1157static inline uint32_t hash52(uint32_t p) {
1158 p ^= p >> 15;
1159 p -= p << 17;
1160 p += p << 7;
1161 p += p << 4;
1162 p ^= p >> 5;
1163 p += p << 16;
1164 p ^= p >> 7;
1165 p ^= p >> 3;
1166 p ^= p << 6;
1167 p ^= p >> 17;
1168 return p;
1169}
1170
1171static uint32_t SelectPartition(int32_t seed, int32_t x, int32_t y, int32_t z,
1172 int32_t partitionCount, int32_t smallBlock) {
1173 if (1 == partitionCount)
1174 return 0;
1175
1176 if (smallBlock) {
1177 x <<= 1;
1178 y <<= 1;
1179 z <<= 1;
1180 }
1181
1182 seed += (partitionCount - 1) * 1024;
1183
1184 uint32_t rnum = hash52(static_cast<uint32_t>(seed));
1185 uint8_t seed1 = static_cast<uint8_t>(rnum & 0xF);
1186 uint8_t seed2 = static_cast<uint8_t>((rnum >> 4) & 0xF);
1187 uint8_t seed3 = static_cast<uint8_t>((rnum >> 8) & 0xF);
1188 uint8_t seed4 = static_cast<uint8_t>((rnum >> 12) & 0xF);
1189 uint8_t seed5 = static_cast<uint8_t>((rnum >> 16) & 0xF);
1190 uint8_t seed6 = static_cast<uint8_t>((rnum >> 20) & 0xF);
1191 uint8_t seed7 = static_cast<uint8_t>((rnum >> 24) & 0xF);
1192 uint8_t seed8 = static_cast<uint8_t>((rnum >> 28) & 0xF);
1193 uint8_t seed9 = static_cast<uint8_t>((rnum >> 18) & 0xF);
1194 uint8_t seed10 = static_cast<uint8_t>((rnum >> 22) & 0xF);
1195 uint8_t seed11 = static_cast<uint8_t>((rnum >> 26) & 0xF);
1196 uint8_t seed12 = static_cast<uint8_t>(((rnum >> 30) | (rnum << 2)) & 0xF);
1197
1198 seed1 *= seed1;
1199 seed2 *= seed2;
1200 seed3 *= seed3;
1201 seed4 *= seed4;
1202 seed5 *= seed5;
1203 seed6 *= seed6;
1204 seed7 *= seed7;
1205 seed8 *= seed8;
1206 seed9 *= seed9;
1207 seed10 *= seed10;
1208 seed11 *= seed11;
1209 seed12 *= seed12;
1210
1211 int32_t sh1, sh2, sh3;
1212 if (seed & 1) {
1213 sh1 = (seed & 2) ? 4 : 5;
1214 sh2 = (partitionCount == 3) ? 6 : 5;
1215 } else {
1216 sh1 = (partitionCount == 3) ? 6 : 5;
1217 sh2 = (seed & 2) ? 4 : 5;
1218 }
1219 sh3 = (seed & 0x10) ? sh1 : sh2;
1220
1221 seed1 >>= sh1;
1222 seed2 >>= sh2;
1223 seed3 >>= sh1;
1224 seed4 >>= sh2;
1225 seed5 >>= sh1;
1226 seed6 >>= sh2;
1227 seed7 >>= sh1;
1228 seed8 >>= sh2;
1229 seed9 >>= sh3;
1230 seed10 >>= sh3;
1231 seed11 >>= sh3;
1232 seed12 >>= sh3;
1233
1234 int32_t a = seed1 * x + seed2 * y + seed11 * z + (rnum >> 14);
1235 int32_t b = seed3 * x + seed4 * y + seed12 * z + (rnum >> 10);
1236 int32_t c = seed5 * x + seed6 * y + seed9 * z + (rnum >> 6);
1237 int32_t d = seed7 * x + seed8 * y + seed10 * z + (rnum >> 2);
1238
1239 a &= 0x3F;
1240 b &= 0x3F;
1241 c &= 0x3F;
1242 d &= 0x3F;
1243
1244 if (partitionCount < 4)
1245 d = 0;
1246 if (partitionCount < 3)
1247 c = 0;
1248
1249 if (a >= b && a >= c && a >= d)
1250 return 0;
1251 else if (b >= c && b >= d)
1252 return 1;
1253 else if (c >= d)
1254 return 2;
1255 return 3;
1256}
1257
1258static inline uint32_t Select2DPartition(int32_t seed, int32_t x, int32_t y, int32_t partitionCount,
1259 int32_t smallBlock) {
1260 return SelectPartition(seed, x, y, 0, partitionCount, smallBlock);
1261}
1262
1263// Section C.2.14
1264void ComputeEndpoints(Pixel& ep1, Pixel& ep2, const uint32_t*& colorValues,
1265 uint32_t colorEndpointMode) {
1266#define READ_UINT_VALUES(N) \
1267 uint32_t v[N]; \
1268 for (uint32_t i = 0; i < N; i++) { \
1269 v[i] = *(colorValues++); \
1270 }
1271
1272#define READ_INT_VALUES(N) \
1273 int32_t v[N]; \
1274 for (uint32_t i = 0; i < N; i++) { \
1275 v[i] = static_cast<int32_t>(*(colorValues++)); \
1276 }
1277
1278 switch (colorEndpointMode) {
1279 case 0: {
1280 READ_UINT_VALUES(2)
1281 ep1 = Pixel(0xFF, v[0], v[0], v[0]);
1282 ep2 = Pixel(0xFF, v[1], v[1], v[1]);
1283 } break;
1284
1285 case 1: {
1286 READ_UINT_VALUES(2)
1287 uint32_t L0 = (v[0] >> 2) | (v[1] & 0xC0);
1288 uint32_t L1 = std::max(L0 + (v[1] & 0x3F), 0xFFU);
1289 ep1 = Pixel(0xFF, L0, L0, L0);
1290 ep2 = Pixel(0xFF, L1, L1, L1);
1291 } break;
1292
1293 case 4: {
1294 READ_UINT_VALUES(4)
1295 ep1 = Pixel(v[2], v[0], v[0], v[0]);
1296 ep2 = Pixel(v[3], v[1], v[1], v[1]);
1297 } break;
1298
1299 case 5: {
1300 READ_INT_VALUES(4)
1301 BitTransferSigned(v[1], v[0]);
1302 BitTransferSigned(v[3], v[2]);
1303 ep1 = Pixel(v[2], v[0], v[0], v[0]);
1304 ep2 = Pixel(v[2] + v[3], v[0] + v[1], v[0] + v[1], v[0] + v[1]);
1305 ep1.ClampByte();
1306 ep2.ClampByte();
1307 } break;
1308
1309 case 6: {
1310 READ_UINT_VALUES(4)
1311 ep1 = Pixel(0xFF, v[0] * v[3] >> 8, v[1] * v[3] >> 8, v[2] * v[3] >> 8);
1312 ep2 = Pixel(0xFF, v[0], v[1], v[2]);
1313 } break;
1314
1315 case 8: {
1316 READ_UINT_VALUES(6)
1317 if (v[1] + v[3] + v[5] >= v[0] + v[2] + v[4]) {
1318 ep1 = Pixel(0xFF, v[0], v[2], v[4]);
1319 ep2 = Pixel(0xFF, v[1], v[3], v[5]);
1320 } else {
1321 ep1 = BlueContract(0xFF, v[1], v[3], v[5]);
1322 ep2 = BlueContract(0xFF, v[0], v[2], v[4]);
1323 }
1324 } break;
1325
1326 case 9: {
1327 READ_INT_VALUES(6)
1328 BitTransferSigned(v[1], v[0]);
1329 BitTransferSigned(v[3], v[2]);
1330 BitTransferSigned(v[5], v[4]);
1331 if (v[1] + v[3] + v[5] >= 0) {
1332 ep1 = Pixel(0xFF, v[0], v[2], v[4]);
1333 ep2 = Pixel(0xFF, v[0] + v[1], v[2] + v[3], v[4] + v[5]);
1334 } else {
1335 ep1 = BlueContract(0xFF, v[0] + v[1], v[2] + v[3], v[4] + v[5]);
1336 ep2 = BlueContract(0xFF, v[0], v[2], v[4]);
1337 }
1338 ep1.ClampByte();
1339 ep2.ClampByte();
1340 } break;
1341
1342 case 10: {
1343 READ_UINT_VALUES(6)
1344 ep1 = Pixel(v[4], v[0] * v[3] >> 8, v[1] * v[3] >> 8, v[2] * v[3] >> 8);
1345 ep2 = Pixel(v[5], v[0], v[1], v[2]);
1346 } break;
1347
1348 case 12: {
1349 READ_UINT_VALUES(8)
1350 if (v[1] + v[3] + v[5] >= v[0] + v[2] + v[4]) {
1351 ep1 = Pixel(v[6], v[0], v[2], v[4]);
1352 ep2 = Pixel(v[7], v[1], v[3], v[5]);
1353 } else {
1354 ep1 = BlueContract(v[7], v[1], v[3], v[5]);
1355 ep2 = BlueContract(v[6], v[0], v[2], v[4]);
1356 }
1357 } break;
1358
1359 case 13: {
1360 READ_INT_VALUES(8)
1361 BitTransferSigned(v[1], v[0]);
1362 BitTransferSigned(v[3], v[2]);
1363 BitTransferSigned(v[5], v[4]);
1364 BitTransferSigned(v[7], v[6]);
1365 if (v[1] + v[3] + v[5] >= 0) {
1366 ep1 = Pixel(v[6], v[0], v[2], v[4]);
1367 ep2 = Pixel(v[7] + v[6], v[0] + v[1], v[2] + v[3], v[4] + v[5]);
1368 } else {
1369 ep1 = BlueContract(v[6] + v[7], v[0] + v[1], v[2] + v[3], v[4] + v[5]);
1370 ep2 = BlueContract(v[6], v[0], v[2], v[4]);
1371 }
1372 ep1.ClampByte();
1373 ep2.ClampByte();
1374 } break;
1375
1376 default:
1377 assert(!"Unsupported color endpoint mode (is it HDR?)");
1378 break;
1379 }
1380
1381#undef READ_UINT_VALUES
1382#undef READ_INT_VALUES
1383}
1384
1385void DecompressBlock(uint8_t inBuf[16], const uint32_t blockWidth, const uint32_t blockHeight,
1386 uint32_t* outBuf) {
1387 BitStream strm(inBuf);
1388 TexelWeightParams weightParams = DecodeBlockInfo(strm);
1389
1390 // Was there an error?
1391 if (weightParams.m_bError) {
1392 assert(!"Invalid block mode");
1393 FillError(outBuf, blockWidth, blockHeight);
1394 return;
1395 }
1396
1397 if (weightParams.m_bVoidExtentLDR) {
1398 FillVoidExtentLDR(strm, outBuf, blockWidth, blockHeight);
1399 return;
1400 }
1401
1402 if (weightParams.m_bVoidExtentHDR) {
1403 assert(!"HDR void extent blocks are unsupported!");
1404 FillError(outBuf, blockWidth, blockHeight);
1405 return;
1406 }
1407
1408 if (weightParams.m_Width > blockWidth) {
1409 assert(!"Texel weight grid width should be smaller than block width");
1410 FillError(outBuf, blockWidth, blockHeight);
1411 return;
1412 }
1413
1414 if (weightParams.m_Height > blockHeight) {
1415 assert(!"Texel weight grid height should be smaller than block height");
1416 FillError(outBuf, blockWidth, blockHeight);
1417 return;
1418 }
1419
1420 // Read num partitions
1421 uint32_t nPartitions = strm.ReadBits(2) + 1;
1422 assert(nPartitions <= 4);
1423
1424 if (nPartitions == 4 && weightParams.m_bDualPlane) {
1425 assert(!"Dual plane mode is incompatible with four partition blocks");
1426 FillError(outBuf, blockWidth, blockHeight);
1427 return;
1428 }
1429
1430 // Based on the number of partitions, read the color endpoint mode for
1431 // each partition.
1432
1433 // Determine partitions, partition index, and color endpoint modes
1434 int32_t planeIdx = -1;
1435 uint32_t partitionIndex;
1436 uint32_t colorEndpointMode[4] = {0, 0, 0, 0};
1437
1438 // Define color data.
1439 uint8_t colorEndpointData[16];
1440 memset(colorEndpointData, 0, sizeof(colorEndpointData));
1441 BitStream colorEndpointStream(colorEndpointData, 16 * 8, 0);
1442
1443 // Read extra config data...
1444 uint32_t baseCEM = 0;
1445 if (nPartitions == 1) {
1446 colorEndpointMode[0] = strm.ReadBits(4);
1447 partitionIndex = 0;
1448 } else {
1449 partitionIndex = strm.ReadBits(10);
1450 baseCEM = strm.ReadBits(6);
1451 }
1452 uint32_t baseMode = (baseCEM & 3);
1453
1454 // Remaining bits are color endpoint data...
1455 uint32_t nWeightBits = weightParams.GetPackedBitSize();
1456 int32_t remainingBits = 128 - nWeightBits - strm.GetBitsRead();
1457
1458 // Consider extra bits prior to texel data...
1459 uint32_t extraCEMbits = 0;
1460 if (baseMode) {
1461 switch (nPartitions) {
1462 case 2:
1463 extraCEMbits += 2;
1464 break;
1465 case 3:
1466 extraCEMbits += 5;
1467 break;
1468 case 4:
1469 extraCEMbits += 8;
1470 break;
1471 default:
1472 assert(false);
1473 break;
1474 }
1475 }
1476 remainingBits -= extraCEMbits;
1477
1478 // Do we have a dual plane situation?
1479 uint32_t planeSelectorBits = 0;
1480 if (weightParams.m_bDualPlane) {
1481 planeSelectorBits = 2;
1482 }
1483 remainingBits -= planeSelectorBits;
1484
1485 // Read color data...
1486 uint32_t colorDataBits = remainingBits;
1487 while (remainingBits > 0) {
1488 uint32_t nb = std::min(remainingBits, 8);
1489 uint32_t b = strm.ReadBits(nb);
1490 colorEndpointStream.WriteBits(b, nb);
1491 remainingBits -= 8;
1492 }
1493
1494 // Read the plane selection bits
1495 planeIdx = strm.ReadBits(planeSelectorBits);
1496
1497 // Read the rest of the CEM
1498 if (baseMode) {
1499 uint32_t extraCEM = strm.ReadBits(extraCEMbits);
1500 uint32_t CEM = (extraCEM << 6) | baseCEM;
1501 CEM >>= 2;
1502
1503 bool C[4] = {0};
1504 for (uint32_t i = 0; i < nPartitions; i++) {
1505 C[i] = CEM & 1;
1506 CEM >>= 1;
1507 }
1508
1509 uint8_t M[4] = {0};
1510 for (uint32_t i = 0; i < nPartitions; i++) {
1511 M[i] = CEM & 3;
1512 CEM >>= 2;
1513 assert(M[i] <= 3);
1514 }
1515
1516 for (uint32_t i = 0; i < nPartitions; i++) {
1517 colorEndpointMode[i] = baseMode;
1518 if (!(C[i]))
1519 colorEndpointMode[i] -= 1;
1520 colorEndpointMode[i] <<= 2;
1521 colorEndpointMode[i] |= M[i];
1522 }
1523 } else if (nPartitions > 1) {
1524 uint32_t CEM = baseCEM >> 2;
1525 for (uint32_t i = 0; i < nPartitions; i++) {
1526 colorEndpointMode[i] = CEM;
1527 }
1528 }
1529
1530 // Make sure everything up till here is sane.
1531 for (uint32_t i = 0; i < nPartitions; i++) {
1532 assert(colorEndpointMode[i] < 16);
1533 }
1534 assert(strm.GetBitsRead() + weightParams.GetPackedBitSize() == 128);
1535
1536 // Decode both color data and texel weight data
1537 uint32_t colorValues[32]; // Four values, two endpoints, four maximum paritions
1538 DecodeColorValues(colorValues, colorEndpointData, colorEndpointMode, nPartitions,
1539 colorDataBits);
1540
1541 Pixel endpoints[4][2];
1542 const uint32_t* colorValuesPtr = colorValues;
1543 for (uint32_t i = 0; i < nPartitions; i++) {
1544 ComputeEndpoints(endpoints[i][0], endpoints[i][1], colorValuesPtr, colorEndpointMode[i]);
1545 }
1546
1547 // Read the texel weight data..
1548 uint8_t texelWeightData[16];
1549 memcpy(texelWeightData, inBuf, sizeof(texelWeightData));
1550
1551 // Reverse everything
1552 for (uint32_t i = 0; i < 8; i++) {
1553// Taken from http://graphics.stanford.edu/~seander/bithacks.html#ReverseByteWith64Bits
1554#define REVERSE_BYTE(b) (((b)*0x80200802ULL) & 0x0884422110ULL) * 0x0101010101ULL >> 32
1555 unsigned char a = static_cast<unsigned char>(REVERSE_BYTE(texelWeightData[i]));
1556 unsigned char b = static_cast<unsigned char>(REVERSE_BYTE(texelWeightData[15 - i]));
1557#undef REVERSE_BYTE
1558
1559 texelWeightData[i] = b;
1560 texelWeightData[15 - i] = a;
1561 }
1562
1563 // Make sure that higher non-texel bits are set to zero
1564 const uint32_t clearByteStart = (weightParams.GetPackedBitSize() >> 3) + 1;
1565 texelWeightData[clearByteStart - 1] &= (1 << (weightParams.GetPackedBitSize() % 8)) - 1;
1566 memset(texelWeightData + clearByteStart, 0, 16 - clearByteStart);
1567
1568 std::vector<IntegerEncodedValue> texelWeightValues;
1569 BitStream weightStream(texelWeightData);
1570
1571 IntegerEncodedValue::DecodeIntegerSequence(texelWeightValues, weightStream,
1572 weightParams.m_MaxWeight,
1573 weightParams.GetNumWeightValues());
1574
1575 // Blocks can be at most 12x12, so we can have as many as 144 weights
1576 uint32_t weights[2][144];
1577 UnquantizeTexelWeights(weights, texelWeightValues, weightParams, blockWidth, blockHeight);
1578
1579 // Now that we have endpoints and weights, we can interpolate and generate
1580 // the proper decoding...
1581 for (uint32_t j = 0; j < blockHeight; j++)
1582 for (uint32_t i = 0; i < blockWidth; i++) {
1583 uint32_t partition = Select2DPartition(partitionIndex, i, j, nPartitions,
1584 (blockHeight * blockWidth) < 32);
1585 assert(partition < nPartitions);
1586
1587 Pixel p;
1588 for (uint32_t c = 0; c < 4; c++) {
1589 uint32_t C0 = endpoints[partition][0].Component(c);
1590 C0 = Replicate(C0, 8, 16);
1591 uint32_t C1 = endpoints[partition][1].Component(c);
1592 C1 = Replicate(C1, 8, 16);
1593
1594 uint32_t plane = 0;
1595 if (weightParams.m_bDualPlane && (((planeIdx + 1) & 3) == c)) {
1596 plane = 1;
1597 }
1598
1599 uint32_t weight = weights[plane][j * blockWidth + i];
1600 uint32_t C = (C0 * (64 - weight) + C1 * weight + 32) / 64;
1601 if (C == 65535) {
1602 p.Component(c) = 255;
1603 } else {
1604 double Cf = static_cast<double>(C);
1605 p.Component(c) = static_cast<uint16_t>(255.0 * (Cf / 65536.0) + 0.5);
1606 }
1607 }
1608
1609 outBuf[j * blockWidth + i] = p.Pack();
1610 }
1611}
1612
1613} // namespace ASTCC
1614
1615namespace Tegra::Texture::ASTC {
1616
1617std::vector<uint8_t> Decompress(std::vector<uint8_t>& data, uint32_t width, uint32_t height,
1618 uint32_t block_width, uint32_t block_height) {
1619 uint32_t blockIdx = 0;
1620 std::vector<uint8_t> outData;
1621 outData.resize(height * width * 4);
1622 for (uint32_t j = 0; j < height; j += block_height) {
1623 for (uint32_t i = 0; i < width; i += block_width) {
1624
1625 uint8_t* blockPtr = data.data() + blockIdx * 16;
1626
1627 // Blocks can be at most 12x12
1628 uint32_t uncompData[144];
1629 ASTCC::DecompressBlock(blockPtr, block_width, block_height, uncompData);
1630
1631 uint32_t decompWidth = std::min(block_width, width - i);
1632 uint32_t decompHeight = std::min(block_height, height - j);
1633
1634 uint8_t* outRow = outData.data() + (j * width + i) * 4;
1635 for (uint32_t jj = 0; jj < decompHeight; jj++) {
1636 memcpy(outRow + jj * width * 4, uncompData + jj * block_width, decompWidth * 4);
1637 }
1638
1639 blockIdx++;
1640 }
1641 }
1642
1643 return outData;
1644}
1645
1646} // namespace Tegra::Texture::ASTC
diff --git a/src/video_core/textures/astc.h b/src/video_core/textures/astc.h
new file mode 100644
index 000000000..f0d7c0e56
--- /dev/null
+++ b/src/video_core/textures/astc.h
@@ -0,0 +1,15 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <cstdint>
8#include <vector>
9
10namespace Tegra::Texture::ASTC {
11
12std::vector<uint8_t> Decompress(std::vector<uint8_t>& data, uint32_t width, uint32_t height,
13 uint32_t block_width, uint32_t block_height);
14
15} // namespace Tegra::Texture::ASTC
diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp
index 7bf9c4c4b..0db4367f1 100644
--- a/src/video_core/textures/decoders.cpp
+++ b/src/video_core/textures/decoders.cpp
@@ -53,6 +53,7 @@ u32 BytesPerPixel(TextureFormat format) {
53 case TextureFormat::DXT45: 53 case TextureFormat::DXT45:
54 // In this case a 'pixel' actually refers to a 4x4 tile. 54 // In this case a 'pixel' actually refers to a 4x4 tile.
55 return 16; 55 return 16;
56 case TextureFormat::ASTC_2D_4X4:
56 case TextureFormat::A8R8G8B8: 57 case TextureFormat::A8R8G8B8:
57 case TextureFormat::A2B10G10R10: 58 case TextureFormat::A2B10G10R10:
58 case TextureFormat::BF10GF11RF11: 59 case TextureFormat::BF10GF11RF11:
@@ -94,6 +95,7 @@ std::vector<u8> UnswizzleTexture(VAddr address, TextureFormat format, u32 width,
94 case TextureFormat::R8: 95 case TextureFormat::R8:
95 case TextureFormat::R16_G16_B16_A16: 96 case TextureFormat::R16_G16_B16_A16:
96 case TextureFormat::BF10GF11RF11: 97 case TextureFormat::BF10GF11RF11:
98 case TextureFormat::ASTC_2D_4X4:
97 CopySwizzledData(width, height, bytes_per_pixel, bytes_per_pixel, data, 99 CopySwizzledData(width, height, bytes_per_pixel, bytes_per_pixel, data,
98 unswizzled_data.data(), true, block_height); 100 unswizzled_data.data(), true, block_height);
99 break; 101 break;
@@ -115,6 +117,7 @@ std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat
115 case TextureFormat::DXT23: 117 case TextureFormat::DXT23:
116 case TextureFormat::DXT45: 118 case TextureFormat::DXT45:
117 case TextureFormat::DXN1: 119 case TextureFormat::DXN1:
120 case TextureFormat::ASTC_2D_4X4:
118 case TextureFormat::A8R8G8B8: 121 case TextureFormat::A8R8G8B8:
119 case TextureFormat::A2B10G10R10: 122 case TextureFormat::A2B10G10R10:
120 case TextureFormat::A1B5G5R5: 123 case TextureFormat::A1B5G5R5:
diff --git a/src/yuzu/CMakeLists.txt b/src/yuzu/CMakeLists.txt
index 5af3154d7..c662570d2 100644
--- a/src/yuzu/CMakeLists.txt
+++ b/src/yuzu/CMakeLists.txt
@@ -32,8 +32,6 @@ add_executable(yuzu
32 debugger/graphics/graphics_surface.h 32 debugger/graphics/graphics_surface.h
33 debugger/profiler.cpp 33 debugger/profiler.cpp
34 debugger/profiler.h 34 debugger/profiler.h
35 debugger/registers.cpp
36 debugger/registers.h
37 debugger/wait_tree.cpp 35 debugger/wait_tree.cpp
38 debugger/wait_tree.h 36 debugger/wait_tree.h
39 game_list.cpp 37 game_list.cpp
@@ -60,7 +58,6 @@ set(UIS
60 configuration/configure_graphics.ui 58 configuration/configure_graphics.ui
61 configuration/configure_input.ui 59 configuration/configure_input.ui
62 configuration/configure_system.ui 60 configuration/configure_system.ui
63 debugger/registers.ui
64 hotkeys.ui 61 hotkeys.ui
65 main.ui 62 main.ui
66) 63)
diff --git a/src/yuzu/debugger/registers.cpp b/src/yuzu/debugger/registers.cpp
deleted file mode 100644
index 178cc65a7..000000000
--- a/src/yuzu/debugger/registers.cpp
+++ /dev/null
@@ -1,190 +0,0 @@
1// Copyright 2014 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <QTreeWidgetItem>
6#include "core/arm/arm_interface.h"
7#include "core/core.h"
8#include "yuzu/debugger/registers.h"
9#include "yuzu/util/util.h"
10
11RegistersWidget::RegistersWidget(QWidget* parent) : QDockWidget(parent) {
12 cpu_regs_ui.setupUi(this);
13
14 tree = cpu_regs_ui.treeWidget;
15 tree->addTopLevelItem(core_registers = new QTreeWidgetItem(QStringList(tr("Registers"))));
16 tree->addTopLevelItem(vfp_registers = new QTreeWidgetItem(QStringList(tr("VFP Registers"))));
17 tree->addTopLevelItem(vfp_system_registers =
18 new QTreeWidgetItem(QStringList(tr("VFP System Registers"))));
19 tree->addTopLevelItem(cpsr = new QTreeWidgetItem(QStringList("CPSR")));
20
21 for (int i = 0; i < 16; ++i) {
22 QTreeWidgetItem* child = new QTreeWidgetItem(QStringList(QString("R[%1]").arg(i)));
23 core_registers->addChild(child);
24 }
25
26 for (int i = 0; i < 32; ++i) {
27 QTreeWidgetItem* child = new QTreeWidgetItem(QStringList(QString("S[%1]").arg(i)));
28 vfp_registers->addChild(child);
29 }
30
31 QFont font = GetMonospaceFont();
32
33 CreateCPSRChildren();
34 CreateVFPSystemRegisterChildren();
35
36 // Set Registers to display in monospace font
37 for (int i = 0; i < core_registers->childCount(); ++i)
38 core_registers->child(i)->setFont(1, font);
39
40 for (int i = 0; i < vfp_registers->childCount(); ++i)
41 vfp_registers->child(i)->setFont(1, font);
42
43 for (int i = 0; i < vfp_system_registers->childCount(); ++i) {
44 vfp_system_registers->child(i)->setFont(1, font);
45 for (int x = 0; x < vfp_system_registers->child(i)->childCount(); ++x) {
46 vfp_system_registers->child(i)->child(x)->setFont(1, font);
47 }
48 }
49 // Set CSPR to display in monospace font
50 cpsr->setFont(1, font);
51 for (int i = 0; i < cpsr->childCount(); ++i) {
52 cpsr->child(i)->setFont(1, font);
53 for (int x = 0; x < cpsr->child(i)->childCount(); ++x) {
54 cpsr->child(i)->child(x)->setFont(1, font);
55 }
56 }
57 setEnabled(false);
58}
59
60void RegistersWidget::OnDebugModeEntered() {
61 if (!Core::System::GetInstance().IsPoweredOn())
62 return;
63
64 for (int i = 0; i < core_registers->childCount(); ++i)
65 core_registers->child(i)->setText(
66 1, QString("0x%1").arg(Core::CurrentArmInterface().GetReg(i), 8, 16, QLatin1Char('0')));
67
68 UpdateCPSRValues();
69}
70
71void RegistersWidget::OnDebugModeLeft() {}
72
73void RegistersWidget::OnEmulationStarting(EmuThread* emu_thread) {
74 setEnabled(true);
75}
76
77void RegistersWidget::OnEmulationStopping() {
78 // Reset widget text
79 for (int i = 0; i < core_registers->childCount(); ++i)
80 core_registers->child(i)->setText(1, QString(""));
81
82 for (int i = 0; i < vfp_registers->childCount(); ++i)
83 vfp_registers->child(i)->setText(1, QString(""));
84
85 for (int i = 0; i < cpsr->childCount(); ++i)
86 cpsr->child(i)->setText(1, QString(""));
87
88 cpsr->setText(1, QString(""));
89
90 // FPSCR
91 for (int i = 0; i < vfp_system_registers->child(0)->childCount(); ++i)
92 vfp_system_registers->child(0)->child(i)->setText(1, QString(""));
93
94 // FPEXC
95 for (int i = 0; i < vfp_system_registers->child(1)->childCount(); ++i)
96 vfp_system_registers->child(1)->child(i)->setText(1, QString(""));
97
98 vfp_system_registers->child(0)->setText(1, QString(""));
99 vfp_system_registers->child(1)->setText(1, QString(""));
100 vfp_system_registers->child(2)->setText(1, QString(""));
101 vfp_system_registers->child(3)->setText(1, QString(""));
102
103 setEnabled(false);
104}
105
106void RegistersWidget::CreateCPSRChildren() {
107 cpsr->addChild(new QTreeWidgetItem(QStringList("M")));
108 cpsr->addChild(new QTreeWidgetItem(QStringList("T")));
109 cpsr->addChild(new QTreeWidgetItem(QStringList("F")));
110 cpsr->addChild(new QTreeWidgetItem(QStringList("I")));
111 cpsr->addChild(new QTreeWidgetItem(QStringList("A")));
112 cpsr->addChild(new QTreeWidgetItem(QStringList("E")));
113 cpsr->addChild(new QTreeWidgetItem(QStringList("IT")));
114 cpsr->addChild(new QTreeWidgetItem(QStringList("GE")));
115 cpsr->addChild(new QTreeWidgetItem(QStringList("DNM")));
116 cpsr->addChild(new QTreeWidgetItem(QStringList("J")));
117 cpsr->addChild(new QTreeWidgetItem(QStringList("Q")));
118 cpsr->addChild(new QTreeWidgetItem(QStringList("V")));
119 cpsr->addChild(new QTreeWidgetItem(QStringList("C")));
120 cpsr->addChild(new QTreeWidgetItem(QStringList("Z")));
121 cpsr->addChild(new QTreeWidgetItem(QStringList("N")));
122}
123
124void RegistersWidget::UpdateCPSRValues() {
125 const u32 cpsr_val = Core::CurrentArmInterface().GetCPSR();
126
127 cpsr->setText(1, QString("0x%1").arg(cpsr_val, 8, 16, QLatin1Char('0')));
128 cpsr->child(0)->setText(
129 1, QString("b%1").arg(cpsr_val & 0x1F, 5, 2, QLatin1Char('0'))); // M - Mode
130 cpsr->child(1)->setText(1, QString::number((cpsr_val >> 5) & 1)); // T - State
131 cpsr->child(2)->setText(1, QString::number((cpsr_val >> 6) & 1)); // F - FIQ disable
132 cpsr->child(3)->setText(1, QString::number((cpsr_val >> 7) & 1)); // I - IRQ disable
133 cpsr->child(4)->setText(1, QString::number((cpsr_val >> 8) & 1)); // A - Imprecise abort
134 cpsr->child(5)->setText(1, QString::number((cpsr_val >> 9) & 1)); // E - Data endianness
135 cpsr->child(6)->setText(1,
136 QString::number((cpsr_val >> 10) & 0x3F)); // IT - If-Then state (DNM)
137 cpsr->child(7)->setText(1,
138 QString::number((cpsr_val >> 16) & 0xF)); // GE - Greater-than-or-Equal
139 cpsr->child(8)->setText(1, QString::number((cpsr_val >> 20) & 0xF)); // DNM - Do not modify
140 cpsr->child(9)->setText(1, QString::number((cpsr_val >> 24) & 1)); // J - Jazelle
141 cpsr->child(10)->setText(1, QString::number((cpsr_val >> 27) & 1)); // Q - Saturation
142 cpsr->child(11)->setText(1, QString::number((cpsr_val >> 28) & 1)); // V - Overflow
143 cpsr->child(12)->setText(1, QString::number((cpsr_val >> 29) & 1)); // C - Carry/Borrow/Extend
144 cpsr->child(13)->setText(1, QString::number((cpsr_val >> 30) & 1)); // Z - Zero
145 cpsr->child(14)->setText(1, QString::number((cpsr_val >> 31) & 1)); // N - Negative/Less than
146}
147
148void RegistersWidget::CreateVFPSystemRegisterChildren() {
149 QTreeWidgetItem* const fpscr = new QTreeWidgetItem(QStringList("FPSCR"));
150 fpscr->addChild(new QTreeWidgetItem(QStringList("IOC")));
151 fpscr->addChild(new QTreeWidgetItem(QStringList("DZC")));
152 fpscr->addChild(new QTreeWidgetItem(QStringList("OFC")));
153 fpscr->addChild(new QTreeWidgetItem(QStringList("UFC")));
154 fpscr->addChild(new QTreeWidgetItem(QStringList("IXC")));
155 fpscr->addChild(new QTreeWidgetItem(QStringList("IDC")));
156 fpscr->addChild(new QTreeWidgetItem(QStringList("IOE")));
157 fpscr->addChild(new QTreeWidgetItem(QStringList("DZE")));
158 fpscr->addChild(new QTreeWidgetItem(QStringList("OFE")));
159 fpscr->addChild(new QTreeWidgetItem(QStringList("UFE")));
160 fpscr->addChild(new QTreeWidgetItem(QStringList("IXE")));
161 fpscr->addChild(new QTreeWidgetItem(QStringList("IDE")));
162 fpscr->addChild(new QTreeWidgetItem(QStringList(tr("Vector Length"))));
163 fpscr->addChild(new QTreeWidgetItem(QStringList(tr("Vector Stride"))));
164 fpscr->addChild(new QTreeWidgetItem(QStringList(tr("Rounding Mode"))));
165 fpscr->addChild(new QTreeWidgetItem(QStringList("FZ")));
166 fpscr->addChild(new QTreeWidgetItem(QStringList("DN")));
167 fpscr->addChild(new QTreeWidgetItem(QStringList("V")));
168 fpscr->addChild(new QTreeWidgetItem(QStringList("C")));
169 fpscr->addChild(new QTreeWidgetItem(QStringList("Z")));
170 fpscr->addChild(new QTreeWidgetItem(QStringList("N")));
171
172 QTreeWidgetItem* const fpexc = new QTreeWidgetItem(QStringList("FPEXC"));
173 fpexc->addChild(new QTreeWidgetItem(QStringList("IOC")));
174 fpexc->addChild(new QTreeWidgetItem(QStringList("OFC")));
175 fpexc->addChild(new QTreeWidgetItem(QStringList("UFC")));
176 fpexc->addChild(new QTreeWidgetItem(QStringList("INV")));
177 fpexc->addChild(new QTreeWidgetItem(QStringList(tr("Vector Iteration Count"))));
178 fpexc->addChild(new QTreeWidgetItem(QStringList("FP2V")));
179 fpexc->addChild(new QTreeWidgetItem(QStringList("EN")));
180 fpexc->addChild(new QTreeWidgetItem(QStringList("EX")));
181
182 vfp_system_registers->addChild(fpscr);
183 vfp_system_registers->addChild(fpexc);
184 vfp_system_registers->addChild(new QTreeWidgetItem(QStringList("FPINST")));
185 vfp_system_registers->addChild(new QTreeWidgetItem(QStringList("FPINST2")));
186}
187
188void RegistersWidget::UpdateVFPSystemRegisterValues() {
189 UNIMPLEMENTED();
190}
diff --git a/src/yuzu/debugger/registers.h b/src/yuzu/debugger/registers.h
deleted file mode 100644
index 55bda5b59..000000000
--- a/src/yuzu/debugger/registers.h
+++ /dev/null
@@ -1,42 +0,0 @@
1// Copyright 2014 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <QDockWidget>
8#include "ui_registers.h"
9
10class QTreeWidget;
11class QTreeWidgetItem;
12class EmuThread;
13
14class RegistersWidget : public QDockWidget {
15 Q_OBJECT
16
17public:
18 explicit RegistersWidget(QWidget* parent = nullptr);
19
20public slots:
21 void OnDebugModeEntered();
22 void OnDebugModeLeft();
23
24 void OnEmulationStarting(EmuThread* emu_thread);
25 void OnEmulationStopping();
26
27private:
28 void CreateCPSRChildren();
29 void UpdateCPSRValues();
30
31 void CreateVFPSystemRegisterChildren();
32 void UpdateVFPSystemRegisterValues();
33
34 Ui::ARMRegisters cpu_regs_ui;
35
36 QTreeWidget* tree;
37
38 QTreeWidgetItem* core_registers;
39 QTreeWidgetItem* vfp_registers;
40 QTreeWidgetItem* vfp_system_registers;
41 QTreeWidgetItem* cpsr;
42};
diff --git a/src/yuzu/debugger/registers.ui b/src/yuzu/debugger/registers.ui
deleted file mode 100644
index c81ae03f9..000000000
--- a/src/yuzu/debugger/registers.ui
+++ /dev/null
@@ -1,40 +0,0 @@
1<?xml version="1.0" encoding="UTF-8"?>
2<ui version="4.0">
3 <class>ARMRegisters</class>
4 <widget class="QDockWidget" name="ARMRegisters">
5 <property name="geometry">
6 <rect>
7 <x>0</x>
8 <y>0</y>
9 <width>400</width>
10 <height>300</height>
11 </rect>
12 </property>
13 <property name="windowTitle">
14 <string>ARM Registers</string>
15 </property>
16 <widget class="QWidget" name="dockWidgetContents">
17 <layout class="QVBoxLayout" name="verticalLayout">
18 <item>
19 <widget class="QTreeWidget" name="treeWidget">
20 <property name="alternatingRowColors">
21 <bool>true</bool>
22 </property>
23 <column>
24 <property name="text">
25 <string>Register</string>
26 </property>
27 </column>
28 <column>
29 <property name="text">
30 <string>Value</string>
31 </property>
32 </column>
33 </widget>
34 </item>
35 </layout>
36 </widget>
37 </widget>
38 <resources/>
39 <connections/>
40</ui>
diff --git a/src/yuzu/debugger/wait_tree.cpp b/src/yuzu/debugger/wait_tree.cpp
index 017bef13c..7101b381e 100644
--- a/src/yuzu/debugger/wait_tree.cpp
+++ b/src/yuzu/debugger/wait_tree.cpp
@@ -213,6 +213,9 @@ QString WaitTreeThread::GetText() const {
213 case THREADSTATUS_WAIT_MUTEX: 213 case THREADSTATUS_WAIT_MUTEX:
214 status = tr("waiting for mutex"); 214 status = tr("waiting for mutex");
215 break; 215 break;
216 case THREADSTATUS_WAIT_ARB:
217 status = tr("waiting for address arbiter");
218 break;
216 case THREADSTATUS_DORMANT: 219 case THREADSTATUS_DORMANT:
217 status = tr("dormant"); 220 status = tr("dormant");
218 break; 221 break;
@@ -240,6 +243,7 @@ QColor WaitTreeThread::GetColor() const {
240 case THREADSTATUS_WAIT_SYNCH_ALL: 243 case THREADSTATUS_WAIT_SYNCH_ALL:
241 case THREADSTATUS_WAIT_SYNCH_ANY: 244 case THREADSTATUS_WAIT_SYNCH_ANY:
242 case THREADSTATUS_WAIT_MUTEX: 245 case THREADSTATUS_WAIT_MUTEX:
246 case THREADSTATUS_WAIT_ARB:
243 return QColor(Qt::GlobalColor::red); 247 return QColor(Qt::GlobalColor::red);
244 case THREADSTATUS_DORMANT: 248 case THREADSTATUS_DORMANT:
245 return QColor(Qt::GlobalColor::darkCyan); 249 return QColor(Qt::GlobalColor::darkCyan);
diff --git a/src/yuzu/game_list.cpp b/src/yuzu/game_list.cpp
index bbd681eae..55dce6d47 100644
--- a/src/yuzu/game_list.cpp
+++ b/src/yuzu/game_list.cpp
@@ -3,6 +3,7 @@
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <QApplication> 5#include <QApplication>
6#include <QDir>
6#include <QFileInfo> 7#include <QFileInfo>
7#include <QHeaderView> 8#include <QHeaderView>
8#include <QKeyEvent> 9#include <QKeyEvent>
@@ -264,8 +265,17 @@ void GameList::ValidateEntry(const QModelIndex& item) {
264 if (file_path.isEmpty()) 265 if (file_path.isEmpty())
265 return; 266 return;
266 std::string std_file_path(file_path.toStdString()); 267 std::string std_file_path(file_path.toStdString());
267 if (!FileUtil::Exists(std_file_path) || FileUtil::IsDirectory(std_file_path)) 268 if (!FileUtil::Exists(std_file_path))
268 return; 269 return;
270 if (FileUtil::IsDirectory(std_file_path)) {
271 QDir dir(std_file_path.c_str());
272 QStringList matching_main = dir.entryList(QStringList("main"), QDir::Files);
273 if (matching_main.size() == 1) {
274 emit GameChosen(dir.path() + DIR_SEP + matching_main[0]);
275 }
276 return;
277 }
278
269 // Users usually want to run a diffrent game after closing one 279 // Users usually want to run a diffrent game after closing one
270 search_field->clear(); 280 search_field->clear();
271 emit GameChosen(file_path); 281 emit GameChosen(file_path);
@@ -356,13 +366,26 @@ void GameList::LoadInterfaceLayout() {
356 item_model->sort(header->sortIndicatorSection(), header->sortIndicatorOrder()); 366 item_model->sort(header->sortIndicatorSection(), header->sortIndicatorOrder());
357} 367}
358 368
359const QStringList GameList::supported_file_extensions = {"nso", "nro"}; 369const QStringList GameList::supported_file_extensions = {"nso", "nro", "nca"};
360 370
361static bool HasSupportedFileExtension(const std::string& file_name) { 371static bool HasSupportedFileExtension(const std::string& file_name) {
362 QFileInfo file = QFileInfo(file_name.c_str()); 372 QFileInfo file = QFileInfo(file_name.c_str());
363 return GameList::supported_file_extensions.contains(file.suffix(), Qt::CaseInsensitive); 373 return GameList::supported_file_extensions.contains(file.suffix(), Qt::CaseInsensitive);
364} 374}
365 375
376static bool IsExtractedNCAMain(const std::string& file_name) {
377 return QFileInfo(file_name.c_str()).fileName() == "main";
378}
379
380static QString FormatGameName(const std::string& physical_name) {
381 QFileInfo file_info(physical_name.c_str());
382 if (IsExtractedNCAMain(physical_name)) {
383 return file_info.dir().path();
384 } else {
385 return QString::fromStdString(physical_name);
386 }
387}
388
366void GameList::RefreshGameDirectory() { 389void GameList::RefreshGameDirectory() {
367 if (!UISettings::values.gamedir.isEmpty() && current_worker != nullptr) { 390 if (!UISettings::values.gamedir.isEmpty() && current_worker != nullptr) {
368 NGLOG_INFO(Frontend, "Change detected in the games directory. Reloading game list."); 391 NGLOG_INFO(Frontend, "Change detected in the games directory. Reloading game list.");
@@ -380,7 +403,8 @@ void GameListWorker::AddFstEntriesToGameList(const std::string& dir_path, unsign
380 return false; // Breaks the callback loop. 403 return false; // Breaks the callback loop.
381 404
382 bool is_dir = FileUtil::IsDirectory(physical_name); 405 bool is_dir = FileUtil::IsDirectory(physical_name);
383 if (!is_dir && HasSupportedFileExtension(physical_name)) { 406 if (!is_dir &&
407 (HasSupportedFileExtension(physical_name) || IsExtractedNCAMain(physical_name))) {
384 std::unique_ptr<Loader::AppLoader> loader = Loader::GetLoader(physical_name); 408 std::unique_ptr<Loader::AppLoader> loader = Loader::GetLoader(physical_name);
385 if (!loader) 409 if (!loader)
386 return true; 410 return true;
@@ -392,7 +416,7 @@ void GameListWorker::AddFstEntriesToGameList(const std::string& dir_path, unsign
392 loader->ReadProgramId(program_id); 416 loader->ReadProgramId(program_id);
393 417
394 emit EntryReady({ 418 emit EntryReady({
395 new GameListItemPath(QString::fromStdString(physical_name), smdh, program_id), 419 new GameListItemPath(FormatGameName(physical_name), smdh, program_id),
396 new GameListItem( 420 new GameListItem(
397 QString::fromStdString(Loader::GetFileTypeString(loader->GetFileType()))), 421 QString::fromStdString(Loader::GetFileTypeString(loader->GetFileType()))),
398 new GameListItemSize(FileUtil::GetSize(physical_name)), 422 new GameListItemSize(FileUtil::GetSize(physical_name)),
diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp
index 3038bd6da..97be548d7 100644
--- a/src/yuzu/main.cpp
+++ b/src/yuzu/main.cpp
@@ -13,6 +13,7 @@
13#include <QMessageBox> 13#include <QMessageBox>
14#include <QtGui> 14#include <QtGui>
15#include <QtWidgets> 15#include <QtWidgets>
16#include "common/common_paths.h"
16#include "common/logging/backend.h" 17#include "common/logging/backend.h"
17#include "common/logging/filter.h" 18#include "common/logging/filter.h"
18#include "common/logging/log.h" 19#include "common/logging/log.h"
@@ -33,7 +34,6 @@
33#include "yuzu/debugger/graphics/graphics_breakpoints.h" 34#include "yuzu/debugger/graphics/graphics_breakpoints.h"
34#include "yuzu/debugger/graphics/graphics_surface.h" 35#include "yuzu/debugger/graphics/graphics_surface.h"
35#include "yuzu/debugger/profiler.h" 36#include "yuzu/debugger/profiler.h"
36#include "yuzu/debugger/registers.h"
37#include "yuzu/debugger/wait_tree.h" 37#include "yuzu/debugger/wait_tree.h"
38#include "yuzu/game_list.h" 38#include "yuzu/game_list.h"
39#include "yuzu/hotkeys.h" 39#include "yuzu/hotkeys.h"
@@ -169,15 +169,6 @@ void GMainWindow::InitializeDebugWidgets() {
169 debug_menu->addAction(microProfileDialog->toggleViewAction()); 169 debug_menu->addAction(microProfileDialog->toggleViewAction());
170#endif 170#endif
171 171
172 registersWidget = new RegistersWidget(this);
173 addDockWidget(Qt::RightDockWidgetArea, registersWidget);
174 registersWidget->hide();
175 debug_menu->addAction(registersWidget->toggleViewAction());
176 connect(this, &GMainWindow::EmulationStarting, registersWidget,
177 &RegistersWidget::OnEmulationStarting);
178 connect(this, &GMainWindow::EmulationStopping, registersWidget,
179 &RegistersWidget::OnEmulationStopping);
180
181 graphicsBreakpointsWidget = new GraphicsBreakPointsWidget(debug_context, this); 172 graphicsBreakpointsWidget = new GraphicsBreakPointsWidget(debug_context, this);
182 addDockWidget(Qt::RightDockWidgetArea, graphicsBreakpointsWidget); 173 addDockWidget(Qt::RightDockWidgetArea, graphicsBreakpointsWidget);
183 graphicsBreakpointsWidget->hide(); 174 graphicsBreakpointsWidget->hide();
@@ -288,6 +279,7 @@ void GMainWindow::ConnectWidgetEvents() {
288void GMainWindow::ConnectMenuEvents() { 279void GMainWindow::ConnectMenuEvents() {
289 // File 280 // File
290 connect(ui.action_Load_File, &QAction::triggered, this, &GMainWindow::OnMenuLoadFile); 281 connect(ui.action_Load_File, &QAction::triggered, this, &GMainWindow::OnMenuLoadFile);
282 connect(ui.action_Load_Folder, &QAction::triggered, this, &GMainWindow::OnMenuLoadFolder);
291 connect(ui.action_Select_Game_List_Root, &QAction::triggered, this, 283 connect(ui.action_Select_Game_List_Root, &QAction::triggered, this,
292 &GMainWindow::OnMenuSelectGameListRoot); 284 &GMainWindow::OnMenuSelectGameListRoot);
293 connect(ui.action_Exit, &QAction::triggered, this, &QMainWindow::close); 285 connect(ui.action_Exit, &QAction::triggered, this, &QMainWindow::close);
@@ -460,17 +452,12 @@ void GMainWindow::BootGame(const QString& filename) {
460 connect(render_window, &GRenderWindow::Closed, this, &GMainWindow::OnStopGame); 452 connect(render_window, &GRenderWindow::Closed, this, &GMainWindow::OnStopGame);
461 // BlockingQueuedConnection is important here, it makes sure we've finished refreshing our views 453 // BlockingQueuedConnection is important here, it makes sure we've finished refreshing our views
462 // before the CPU continues 454 // before the CPU continues
463 connect(emu_thread.get(), &EmuThread::DebugModeEntered, registersWidget,
464 &RegistersWidget::OnDebugModeEntered, Qt::BlockingQueuedConnection);
465 connect(emu_thread.get(), &EmuThread::DebugModeEntered, waitTreeWidget, 455 connect(emu_thread.get(), &EmuThread::DebugModeEntered, waitTreeWidget,
466 &WaitTreeWidget::OnDebugModeEntered, Qt::BlockingQueuedConnection); 456 &WaitTreeWidget::OnDebugModeEntered, Qt::BlockingQueuedConnection);
467 connect(emu_thread.get(), &EmuThread::DebugModeLeft, registersWidget,
468 &RegistersWidget::OnDebugModeLeft, Qt::BlockingQueuedConnection);
469 connect(emu_thread.get(), &EmuThread::DebugModeLeft, waitTreeWidget, 457 connect(emu_thread.get(), &EmuThread::DebugModeLeft, waitTreeWidget,
470 &WaitTreeWidget::OnDebugModeLeft, Qt::BlockingQueuedConnection); 458 &WaitTreeWidget::OnDebugModeLeft, Qt::BlockingQueuedConnection);
471 459
472 // Update the GUI 460 // Update the GUI
473 registersWidget->OnDebugModeEntered();
474 if (ui.action_Single_Window_Mode->isChecked()) { 461 if (ui.action_Single_Window_Mode->isChecked()) {
475 game_list->hide(); 462 game_list->hide();
476 } 463 }
@@ -565,6 +552,8 @@ void GMainWindow::OnMenuLoadFile() {
565 for (const auto& piece : game_list->supported_file_extensions) 552 for (const auto& piece : game_list->supported_file_extensions)
566 extensions += "*." + piece + " "; 553 extensions += "*." + piece + " ";
567 554
555 extensions += "main ";
556
568 QString file_filter = tr("Switch Executable") + " (" + extensions + ")"; 557 QString file_filter = tr("Switch Executable") + " (" + extensions + ")";
569 file_filter += ";;" + tr("All Files (*.*)"); 558 file_filter += ";;" + tr("All Files (*.*)");
570 559
@@ -577,6 +566,18 @@ void GMainWindow::OnMenuLoadFile() {
577 } 566 }
578} 567}
579 568
569void GMainWindow::OnMenuLoadFolder() {
570 QDir dir = QFileDialog::getExistingDirectory(this, tr("Open Extracted ROM Directory"));
571
572 QStringList matching_main = dir.entryList(QStringList("main"), QDir::Files);
573 if (matching_main.size() == 1) {
574 BootGame(dir.path() + DIR_SEP + matching_main[0]);
575 } else {
576 QMessageBox::warning(this, tr("Invalid Directory Selected"),
577 tr("The directory you have selected does not contain a 'main' file."));
578 }
579}
580
580void GMainWindow::OnMenuSelectGameListRoot() { 581void GMainWindow::OnMenuSelectGameListRoot() {
581 QString dir_path = QFileDialog::getExistingDirectory(this, tr("Select Directory")); 582 QString dir_path = QFileDialog::getExistingDirectory(this, tr("Select Directory"));
582 if (!dir_path.isEmpty()) { 583 if (!dir_path.isEmpty()) {
diff --git a/src/yuzu/main.h b/src/yuzu/main.h
index ac3024d8a..074bba3f9 100644
--- a/src/yuzu/main.h
+++ b/src/yuzu/main.h
@@ -19,7 +19,6 @@ class GraphicsSurfaceWidget;
19class GRenderWindow; 19class GRenderWindow;
20class MicroProfileDialog; 20class MicroProfileDialog;
21class ProfilerWidget; 21class ProfilerWidget;
22class RegistersWidget;
23class WaitTreeWidget; 22class WaitTreeWidget;
24 23
25namespace Tegra { 24namespace Tegra {
@@ -124,6 +123,7 @@ private slots:
124 void OnGameListLoadFile(QString game_path); 123 void OnGameListLoadFile(QString game_path);
125 void OnGameListOpenSaveFolder(u64 program_id); 124 void OnGameListOpenSaveFolder(u64 program_id);
126 void OnMenuLoadFile(); 125 void OnMenuLoadFile();
126 void OnMenuLoadFolder();
127 /// Called whenever a user selects the "File->Select Game List Root" menu item 127 /// Called whenever a user selects the "File->Select Game List Root" menu item
128 void OnMenuSelectGameListRoot(); 128 void OnMenuSelectGameListRoot();
129 void OnMenuRecentFile(); 129 void OnMenuRecentFile();
@@ -163,7 +163,6 @@ private:
163 // Debugger panes 163 // Debugger panes
164 ProfilerWidget* profilerWidget; 164 ProfilerWidget* profilerWidget;
165 MicroProfileDialog* microProfileDialog; 165 MicroProfileDialog* microProfileDialog;
166 RegistersWidget* registersWidget;
167 GraphicsBreakPointsWidget* graphicsBreakpointsWidget; 166 GraphicsBreakPointsWidget* graphicsBreakpointsWidget;
168 GraphicsSurfaceWidget* graphicsSurfaceWidget; 167 GraphicsSurfaceWidget* graphicsSurfaceWidget;
169 WaitTreeWidget* waitTreeWidget; 168 WaitTreeWidget* waitTreeWidget;
diff --git a/src/yuzu/main.ui b/src/yuzu/main.ui
index 0fcd93cc2..22c4cad08 100644
--- a/src/yuzu/main.ui
+++ b/src/yuzu/main.ui
@@ -58,6 +58,7 @@
58 </property> 58 </property>
59 </widget> 59 </widget>
60 <addaction name="action_Load_File"/> 60 <addaction name="action_Load_File"/>
61 <addaction name="action_Load_Folder"/>
61 <addaction name="separator"/> 62 <addaction name="separator"/>
62 <addaction name="action_Select_Game_List_Root"/> 63 <addaction name="action_Select_Game_List_Root"/>
63 <addaction name="menu_recent_files"/> 64 <addaction name="menu_recent_files"/>
@@ -106,6 +107,11 @@
106 <string>Load File...</string> 107 <string>Load File...</string>
107 </property> 108 </property>
108 </action> 109 </action>
110 <action name="action_Load_Folder">
111 <property name="text">
112 <string>Load Folder...</string>
113 </property>
114 </action>
109 <action name="action_Load_Symbol_Map"> 115 <action name="action_Load_Symbol_Map">
110 <property name="text"> 116 <property name="text">
111 <string>Load Symbol Map...</string> 117 <string>Load Symbol Map...</string>