summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/common/logging/backend.cpp1
-rw-r--r--src/common/logging/log.h1
-rw-r--r--src/common/string_util.cpp4
-rw-r--r--src/common/string_util.h2
-rw-r--r--src/core/CMakeLists.txt6
-rw-r--r--src/core/arm/unicorn/arm_unicorn.cpp38
-rw-r--r--src/core/arm/unicorn/arm_unicorn.h4
-rw-r--r--src/core/file_sys/partition_filesystem.cpp24
-rw-r--r--src/core/file_sys/partition_filesystem.h8
-rw-r--r--src/core/gdbstub/gdbstub.cpp170
-rw-r--r--src/core/gdbstub/gdbstub.h9
-rw-r--r--src/core/hle/kernel/address_arbiter.cpp173
-rw-r--r--src/core/hle/kernel/address_arbiter.h32
-rw-r--r--src/core/hle/kernel/errors.h12
-rw-r--r--src/core/hle/kernel/hle_ipc.cpp5
-rw-r--r--src/core/hle/kernel/mutex.cpp4
-rw-r--r--src/core/hle/kernel/svc.cpp68
-rw-r--r--src/core/hle/kernel/svc_wrap.h14
-rw-r--r--src/core/hle/kernel/thread.cpp6
-rw-r--r--src/core/hle/kernel/thread.h4
-rw-r--r--src/core/hle/service/am/am.cpp13
-rw-r--r--src/core/hle/service/am/am.h1
-rw-r--r--src/core/hle/service/audio/audren_u.cpp105
-rw-r--r--src/core/hle/service/audio/audren_u.h6
-rw-r--r--src/core/hle/service/filesystem/fsp_srv.cpp31
-rw-r--r--src/core/hle/service/hid/hid.cpp7
-rw-r--r--src/core/hle/service/hid/hid.h2
-rw-r--r--src/core/hle/service/mm/mm_u.cpp50
-rw-r--r--src/core/hle/service/mm/mm_u.h29
-rw-r--r--src/core/hle/service/nfp/nfp.cpp108
-rw-r--r--src/core/hle/service/nifm/nifm.cpp11
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp22
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h31
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp3
-rw-r--r--src/core/hle/service/service.cpp2
-rw-r--r--src/core/hle/service/set/set.cpp5
-rw-r--r--src/core/loader/loader.cpp10
-rw-r--r--src/core/loader/loader.h1
-rw-r--r--src/core/loader/nca.cpp303
-rw-r--r--src/core/loader/nca.h49
-rw-r--r--src/core/loader/nso.cpp79
-rw-r--r--src/core/loader/nso.h3
-rw-r--r--src/core/memory.cpp4
-rw-r--r--src/core/memory.h7
-rw-r--r--src/video_core/CMakeLists.txt4
-rw-r--r--src/video_core/command_processor.cpp6
-rw-r--r--src/video_core/engines/fermi_2d.cpp1
-rw-r--r--src/video_core/engines/maxwell_3d.cpp39
-rw-r--r--src/video_core/engines/maxwell_3d.h34
-rw-r--r--src/video_core/engines/maxwell_dma.cpp69
-rw-r--r--src/video_core/engines/maxwell_dma.h155
-rw-r--r--src/video_core/engines/shader_bytecode.h181
-rw-r--r--src/video_core/gpu.cpp6
-rw-r--r--src/video_core/gpu.h5
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp167
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h24
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.cpp139
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.h36
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp581
-rw-r--r--src/video_core/renderer_opengl/gl_shader_gen.cpp6
-rw-r--r--src/video_core/renderer_opengl/gl_shader_gen.h58
-rw-r--r--src/video_core/renderer_opengl/gl_shader_manager.cpp23
-rw-r--r--src/video_core/renderer_opengl/gl_shader_manager.h1
-rw-r--r--src/video_core/renderer_opengl/gl_state.cpp17
-rw-r--r--src/video_core/renderer_opengl/gl_state.h6
-rw-r--r--src/video_core/renderer_opengl/maxwell_to_gl.h23
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp1
-rw-r--r--src/video_core/textures/astc.cpp1646
-rw-r--r--src/video_core/textures/astc.h15
-rw-r--r--src/video_core/textures/decoders.cpp6
-rw-r--r--src/video_core/textures/texture.h16
-rw-r--r--src/yuzu/CMakeLists.txt3
-rw-r--r--src/yuzu/debugger/registers.cpp190
-rw-r--r--src/yuzu/debugger/registers.h42
-rw-r--r--src/yuzu/debugger/registers.ui40
-rw-r--r--src/yuzu/debugger/wait_tree.cpp4
-rw-r--r--src/yuzu/game_list.cpp32
-rw-r--r--src/yuzu/main.cpp31
-rw-r--r--src/yuzu/main.h3
-rw-r--r--src/yuzu/main.ui6
80 files changed, 4260 insertions, 823 deletions
diff --git a/src/common/logging/backend.cpp b/src/common/logging/backend.cpp
index 3e31a74f2..c26b20062 100644
--- a/src/common/logging/backend.cpp
+++ b/src/common/logging/backend.cpp
@@ -41,6 +41,7 @@ namespace Log {
41 SUB(Service, FS) \ 41 SUB(Service, FS) \
42 SUB(Service, HID) \ 42 SUB(Service, HID) \
43 SUB(Service, LM) \ 43 SUB(Service, LM) \
44 SUB(Service, MM) \
44 SUB(Service, NFP) \ 45 SUB(Service, NFP) \
45 SUB(Service, NIFM) \ 46 SUB(Service, NIFM) \
46 SUB(Service, NS) \ 47 SUB(Service, NS) \
diff --git a/src/common/logging/log.h b/src/common/logging/log.h
index 43e572ebe..c5015531c 100644
--- a/src/common/logging/log.h
+++ b/src/common/logging/log.h
@@ -61,6 +61,7 @@ enum class Class : ClassType {
61 Service_FS, ///< The FS (Filesystem) service 61 Service_FS, ///< The FS (Filesystem) service
62 Service_HID, ///< The HID (Human interface device) service 62 Service_HID, ///< The HID (Human interface device) service
63 Service_LM, ///< The LM (Logger) service 63 Service_LM, ///< The LM (Logger) service
64 Service_MM, ///< The MM (Multimedia) service
64 Service_NFP, ///< The NFP service 65 Service_NFP, ///< The NFP service
65 Service_NIFM, ///< The NIFM (Network interface) service 66 Service_NIFM, ///< The NIFM (Network interface) service
66 Service_NS, ///< The NS services 67 Service_NS, ///< The NS services
diff --git a/src/common/string_util.cpp b/src/common/string_util.cpp
index 1d952874d..646400db0 100644
--- a/src/common/string_util.cpp
+++ b/src/common/string_util.cpp
@@ -64,6 +64,10 @@ std::string ArrayToString(const u8* data, size_t size, int line_len, bool spaces
64 return oss.str(); 64 return oss.str();
65} 65}
66 66
67std::string StringFromBuffer(const std::vector<u8>& data) {
68 return std::string(data.begin(), std::find(data.begin(), data.end(), '\0'));
69}
70
67// Turns " hej " into "hej". Also handles tabs. 71// Turns " hej " into "hej". Also handles tabs.
68std::string StripSpaces(const std::string& str) { 72std::string StripSpaces(const std::string& str) {
69 const size_t s = str.find_first_not_of(" \t\r\n"); 73 const size_t s = str.find_first_not_of(" \t\r\n");
diff --git a/src/common/string_util.h b/src/common/string_util.h
index 65e4ea5d3..1f5a383cb 100644
--- a/src/common/string_util.h
+++ b/src/common/string_util.h
@@ -21,6 +21,8 @@ std::string ToUpper(std::string str);
21 21
22std::string ArrayToString(const u8* data, size_t size, int line_len = 20, bool spaces = true); 22std::string ArrayToString(const u8* data, size_t size, int line_len = 20, bool spaces = true);
23 23
24std::string StringFromBuffer(const std::vector<u8>& data);
25
24std::string StripSpaces(const std::string& s); 26std::string StripSpaces(const std::string& s);
25std::string StripQuotes(const std::string& s); 27std::string StripQuotes(const std::string& s);
26 28
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt
index aff1d2180..51e4088d2 100644
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -40,6 +40,8 @@ add_library(core STATIC
40 hle/config_mem.h 40 hle/config_mem.h
41 hle/ipc.h 41 hle/ipc.h
42 hle/ipc_helpers.h 42 hle/ipc_helpers.h
43 hle/kernel/address_arbiter.cpp
44 hle/kernel/address_arbiter.h
43 hle/kernel/client_port.cpp 45 hle/kernel/client_port.cpp
44 hle/kernel/client_port.h 46 hle/kernel/client_port.h
45 hle/kernel/client_session.cpp 47 hle/kernel/client_session.cpp
@@ -148,6 +150,8 @@ add_library(core STATIC
148 hle/service/hid/hid.h 150 hle/service/hid/hid.h
149 hle/service/lm/lm.cpp 151 hle/service/lm/lm.cpp
150 hle/service/lm/lm.h 152 hle/service/lm/lm.h
153 hle/service/mm/mm_u.cpp
154 hle/service/mm/mm_u.h
151 hle/service/nifm/nifm.cpp 155 hle/service/nifm/nifm.cpp
152 hle/service/nifm/nifm.h 156 hle/service/nifm/nifm.h
153 hle/service/nifm/nifm_a.cpp 157 hle/service/nifm/nifm_a.cpp
@@ -255,6 +259,8 @@ add_library(core STATIC
255 loader/linker.h 259 loader/linker.h
256 loader/loader.cpp 260 loader/loader.cpp
257 loader/loader.h 261 loader/loader.h
262 loader/nca.cpp
263 loader/nca.h
258 loader/nro.cpp 264 loader/nro.cpp
259 loader/nro.h 265 loader/nro.h
260 loader/nso.cpp 266 loader/nso.cpp
diff --git a/src/core/arm/unicorn/arm_unicorn.cpp b/src/core/arm/unicorn/arm_unicorn.cpp
index c0cc62f03..ce6c5616d 100644
--- a/src/core/arm/unicorn/arm_unicorn.cpp
+++ b/src/core/arm/unicorn/arm_unicorn.cpp
@@ -35,6 +35,17 @@ LoadDll LoadDll::g_load_dll;
35 } \ 35 } \
36 } while (0) 36 } while (0)
37 37
38static void CodeHook(uc_engine* uc, uint64_t address, uint32_t size, void* user_data) {
39 GDBStub::BreakpointAddress bkpt =
40 GDBStub::GetNextBreakpointFromAddress(address, GDBStub::BreakpointType::Execute);
41 if (GDBStub::IsMemoryBreak() ||
42 (bkpt.type != GDBStub::BreakpointType::None && address == bkpt.address)) {
43 auto core = static_cast<ARM_Unicorn*>(user_data);
44 core->RecordBreak(bkpt);
45 uc_emu_stop(uc);
46 }
47}
48
38static void InterruptHook(uc_engine* uc, u32 intNo, void* user_data) { 49static void InterruptHook(uc_engine* uc, u32 intNo, void* user_data) {
39 u32 esr{}; 50 u32 esr{};
40 CHECKED(uc_reg_read(uc, UC_ARM64_REG_ESR, &esr)); 51 CHECKED(uc_reg_read(uc, UC_ARM64_REG_ESR, &esr));
@@ -67,6 +78,10 @@ ARM_Unicorn::ARM_Unicorn() {
67 uc_hook hook{}; 78 uc_hook hook{};
68 CHECKED(uc_hook_add(uc, &hook, UC_HOOK_INTR, (void*)InterruptHook, this, 0, -1)); 79 CHECKED(uc_hook_add(uc, &hook, UC_HOOK_INTR, (void*)InterruptHook, this, 0, -1));
69 CHECKED(uc_hook_add(uc, &hook, UC_HOOK_MEM_INVALID, (void*)UnmappedMemoryHook, this, 0, -1)); 80 CHECKED(uc_hook_add(uc, &hook, UC_HOOK_MEM_INVALID, (void*)UnmappedMemoryHook, this, 0, -1));
81 if (GDBStub::IsServerEnabled()) {
82 CHECKED(uc_hook_add(uc, &hook, UC_HOOK_CODE, (void*)CodeHook, this, 0, -1));
83 last_bkpt_hit = false;
84 }
70} 85}
71 86
72ARM_Unicorn::~ARM_Unicorn() { 87ARM_Unicorn::~ARM_Unicorn() {
@@ -155,7 +170,11 @@ void ARM_Unicorn::SetTlsAddress(VAddr base) {
155} 170}
156 171
157void ARM_Unicorn::Run() { 172void ARM_Unicorn::Run() {
158 ExecuteInstructions(std::max(CoreTiming::GetDowncount(), 0)); 173 if (GDBStub::IsServerEnabled()) {
174 ExecuteInstructions(std::max(4000000, 0));
175 } else {
176 ExecuteInstructions(std::max(CoreTiming::GetDowncount(), 0));
177 }
159} 178}
160 179
161void ARM_Unicorn::Step() { 180void ARM_Unicorn::Step() {
@@ -168,6 +187,18 @@ void ARM_Unicorn::ExecuteInstructions(int num_instructions) {
168 MICROPROFILE_SCOPE(ARM_Jit); 187 MICROPROFILE_SCOPE(ARM_Jit);
169 CHECKED(uc_emu_start(uc, GetPC(), 1ULL << 63, 0, num_instructions)); 188 CHECKED(uc_emu_start(uc, GetPC(), 1ULL << 63, 0, num_instructions));
170 CoreTiming::AddTicks(num_instructions); 189 CoreTiming::AddTicks(num_instructions);
190 if (GDBStub::IsServerEnabled()) {
191 if (last_bkpt_hit) {
192 uc_reg_write(uc, UC_ARM64_REG_PC, &last_bkpt.address);
193 }
194 Kernel::Thread* thread = Kernel::GetCurrentThread();
195 SaveContext(thread->context);
196 if (last_bkpt_hit) {
197 last_bkpt_hit = false;
198 GDBStub::Break();
199 }
200 GDBStub::SendTrap(thread, 5);
201 }
171} 202}
172 203
173void ARM_Unicorn::SaveContext(ARM_Interface::ThreadContext& ctx) { 204void ARM_Unicorn::SaveContext(ARM_Interface::ThreadContext& ctx) {
@@ -233,3 +264,8 @@ void ARM_Unicorn::PrepareReschedule() {
233} 264}
234 265
235void ARM_Unicorn::ClearInstructionCache() {} 266void ARM_Unicorn::ClearInstructionCache() {}
267
268void ARM_Unicorn::RecordBreak(GDBStub::BreakpointAddress bkpt) {
269 last_bkpt = bkpt;
270 last_bkpt_hit = true;
271}
diff --git a/src/core/arm/unicorn/arm_unicorn.h b/src/core/arm/unicorn/arm_unicorn.h
index b99b58e4c..a482a2aa3 100644
--- a/src/core/arm/unicorn/arm_unicorn.h
+++ b/src/core/arm/unicorn/arm_unicorn.h
@@ -7,6 +7,7 @@
7#include <unicorn/unicorn.h> 7#include <unicorn/unicorn.h>
8#include "common/common_types.h" 8#include "common/common_types.h"
9#include "core/arm/arm_interface.h" 9#include "core/arm/arm_interface.h"
10#include "core/gdbstub/gdbstub.h"
10 11
11class ARM_Unicorn final : public ARM_Interface { 12class ARM_Unicorn final : public ARM_Interface {
12public: 13public:
@@ -35,7 +36,10 @@ public:
35 void Step() override; 36 void Step() override;
36 void ClearInstructionCache() override; 37 void ClearInstructionCache() override;
37 void PageTableChanged() override{}; 38 void PageTableChanged() override{};
39 void RecordBreak(GDBStub::BreakpointAddress bkpt);
38 40
39private: 41private:
40 uc_engine* uc{}; 42 uc_engine* uc{};
43 GDBStub::BreakpointAddress last_bkpt{};
44 bool last_bkpt_hit;
41}; 45};
diff --git a/src/core/file_sys/partition_filesystem.cpp b/src/core/file_sys/partition_filesystem.cpp
index 808254ecc..874b9e23b 100644
--- a/src/core/file_sys/partition_filesystem.cpp
+++ b/src/core/file_sys/partition_filesystem.cpp
@@ -19,13 +19,20 @@ Loader::ResultStatus PartitionFilesystem::Load(const std::string& file_path, siz
19 if (file.GetSize() < sizeof(Header)) 19 if (file.GetSize() < sizeof(Header))
20 return Loader::ResultStatus::Error; 20 return Loader::ResultStatus::Error;
21 21
22 file.Seek(offset, SEEK_SET);
22 // For cartridges, HFSs can get very large, so we need to calculate the size up to 23 // For cartridges, HFSs can get very large, so we need to calculate the size up to
23 // the actual content itself instead of just blindly reading in the entire file. 24 // the actual content itself instead of just blindly reading in the entire file.
24 Header pfs_header; 25 Header pfs_header;
25 if (!file.ReadBytes(&pfs_header, sizeof(Header))) 26 if (!file.ReadBytes(&pfs_header, sizeof(Header)))
26 return Loader::ResultStatus::Error; 27 return Loader::ResultStatus::Error;
27 28
28 bool is_hfs = (memcmp(pfs_header.magic.data(), "HFS", 3) == 0); 29 if (pfs_header.magic != Common::MakeMagic('H', 'F', 'S', '0') &&
30 pfs_header.magic != Common::MakeMagic('P', 'F', 'S', '0')) {
31 return Loader::ResultStatus::ErrorInvalidFormat;
32 }
33
34 bool is_hfs = pfs_header.magic == Common::MakeMagic('H', 'F', 'S', '0');
35
29 size_t entry_size = is_hfs ? sizeof(HFSEntry) : sizeof(PFSEntry); 36 size_t entry_size = is_hfs ? sizeof(HFSEntry) : sizeof(PFSEntry);
30 size_t metadata_size = 37 size_t metadata_size =
31 sizeof(Header) + (pfs_header.num_entries * entry_size) + pfs_header.strtab_size; 38 sizeof(Header) + (pfs_header.num_entries * entry_size) + pfs_header.strtab_size;
@@ -50,7 +57,12 @@ Loader::ResultStatus PartitionFilesystem::Load(const std::vector<u8>& file_data,
50 return Loader::ResultStatus::Error; 57 return Loader::ResultStatus::Error;
51 58
52 memcpy(&pfs_header, &file_data[offset], sizeof(Header)); 59 memcpy(&pfs_header, &file_data[offset], sizeof(Header));
53 is_hfs = (memcmp(pfs_header.magic.data(), "HFS", 3) == 0); 60 if (pfs_header.magic != Common::MakeMagic('H', 'F', 'S', '0') &&
61 pfs_header.magic != Common::MakeMagic('P', 'F', 'S', '0')) {
62 return Loader::ResultStatus::ErrorInvalidFormat;
63 }
64
65 is_hfs = pfs_header.magic == Common::MakeMagic('H', 'F', 'S', '0');
54 66
55 size_t entries_offset = offset + sizeof(Header); 67 size_t entries_offset = offset + sizeof(Header);
56 size_t entry_size = is_hfs ? sizeof(HFSEntry) : sizeof(PFSEntry); 68 size_t entry_size = is_hfs ? sizeof(HFSEntry) : sizeof(PFSEntry);
@@ -73,21 +85,21 @@ u32 PartitionFilesystem::GetNumEntries() const {
73 return pfs_header.num_entries; 85 return pfs_header.num_entries;
74} 86}
75 87
76u64 PartitionFilesystem::GetEntryOffset(int index) const { 88u64 PartitionFilesystem::GetEntryOffset(u32 index) const {
77 if (index > GetNumEntries()) 89 if (index > GetNumEntries())
78 return 0; 90 return 0;
79 91
80 return content_offset + pfs_entries[index].fs_entry.offset; 92 return content_offset + pfs_entries[index].fs_entry.offset;
81} 93}
82 94
83u64 PartitionFilesystem::GetEntrySize(int index) const { 95u64 PartitionFilesystem::GetEntrySize(u32 index) const {
84 if (index > GetNumEntries()) 96 if (index > GetNumEntries())
85 return 0; 97 return 0;
86 98
87 return pfs_entries[index].fs_entry.size; 99 return pfs_entries[index].fs_entry.size;
88} 100}
89 101
90std::string PartitionFilesystem::GetEntryName(int index) const { 102std::string PartitionFilesystem::GetEntryName(u32 index) const {
91 if (index > GetNumEntries()) 103 if (index > GetNumEntries())
92 return ""; 104 return "";
93 105
@@ -113,7 +125,7 @@ u64 PartitionFilesystem::GetFileSize(const std::string& name) const {
113} 125}
114 126
115void PartitionFilesystem::Print() const { 127void PartitionFilesystem::Print() const {
116 NGLOG_DEBUG(Service_FS, "Magic: {:.4}", pfs_header.magic.data()); 128 NGLOG_DEBUG(Service_FS, "Magic: {}", pfs_header.magic);
117 NGLOG_DEBUG(Service_FS, "Files: {}", pfs_header.num_entries); 129 NGLOG_DEBUG(Service_FS, "Files: {}", pfs_header.num_entries);
118 for (u32 i = 0; i < pfs_header.num_entries; i++) { 130 for (u32 i = 0; i < pfs_header.num_entries; i++) {
119 NGLOG_DEBUG(Service_FS, " > File {}: {} (0x{:X} bytes, at 0x{:X})", i, 131 NGLOG_DEBUG(Service_FS, " > File {}: {} (0x{:X} bytes, at 0x{:X})", i,
diff --git a/src/core/file_sys/partition_filesystem.h b/src/core/file_sys/partition_filesystem.h
index 573c90057..9c5810cf1 100644
--- a/src/core/file_sys/partition_filesystem.h
+++ b/src/core/file_sys/partition_filesystem.h
@@ -27,9 +27,9 @@ public:
27 Loader::ResultStatus Load(const std::vector<u8>& file_data, size_t offset = 0); 27 Loader::ResultStatus Load(const std::vector<u8>& file_data, size_t offset = 0);
28 28
29 u32 GetNumEntries() const; 29 u32 GetNumEntries() const;
30 u64 GetEntryOffset(int index) const; 30 u64 GetEntryOffset(u32 index) const;
31 u64 GetEntrySize(int index) const; 31 u64 GetEntrySize(u32 index) const;
32 std::string GetEntryName(int index) const; 32 std::string GetEntryName(u32 index) const;
33 u64 GetFileOffset(const std::string& name) const; 33 u64 GetFileOffset(const std::string& name) const;
34 u64 GetFileSize(const std::string& name) const; 34 u64 GetFileSize(const std::string& name) const;
35 35
@@ -37,7 +37,7 @@ public:
37 37
38private: 38private:
39 struct Header { 39 struct Header {
40 std::array<char, 4> magic; 40 u32_le magic;
41 u32_le num_entries; 41 u32_le num_entries;
42 u32_le strtab_size; 42 u32_le strtab_size;
43 INSERT_PADDING_BYTES(0x4); 43 INSERT_PADDING_BYTES(0x4);
diff --git a/src/core/gdbstub/gdbstub.cpp b/src/core/gdbstub/gdbstub.cpp
index 6c5a40ba8..2603192fe 100644
--- a/src/core/gdbstub/gdbstub.cpp
+++ b/src/core/gdbstub/gdbstub.cpp
@@ -32,9 +32,13 @@
32 32
33#include "common/logging/log.h" 33#include "common/logging/log.h"
34#include "common/string_util.h" 34#include "common/string_util.h"
35#include "common/swap.h"
35#include "core/arm/arm_interface.h" 36#include "core/arm/arm_interface.h"
36#include "core/core.h" 37#include "core/core.h"
38#include "core/core_cpu.h"
37#include "core/gdbstub/gdbstub.h" 39#include "core/gdbstub/gdbstub.h"
40#include "core/hle/kernel/kernel.h"
41#include "core/hle/kernel/scheduler.h"
38#include "core/loader/loader.h" 42#include "core/loader/loader.h"
39#include "core/memory.h" 43#include "core/memory.h"
40 44
@@ -137,15 +141,17 @@ static u8 command_buffer[GDB_BUFFER_SIZE];
137static u32 command_length; 141static u32 command_length;
138 142
139static u32 latest_signal = 0; 143static u32 latest_signal = 0;
140static bool step_break = false;
141static bool memory_break = false; 144static bool memory_break = false;
142 145
146static Kernel::Thread* current_thread = nullptr;
147
143// Binding to a port within the reserved ports range (0-1023) requires root permissions, 148// Binding to a port within the reserved ports range (0-1023) requires root permissions,
144// so default to a port outside of that range. 149// so default to a port outside of that range.
145static u16 gdbstub_port = 24689; 150static u16 gdbstub_port = 24689;
146 151
147static bool halt_loop = true; 152static bool halt_loop = true;
148static bool step_loop = false; 153static bool step_loop = false;
154static bool send_trap = false;
149 155
150// If set to false, the server will never be started and no 156// If set to false, the server will never be started and no
151// gdbstub-related functions will be executed. 157// gdbstub-related functions will be executed.
@@ -165,6 +171,53 @@ static std::map<u64, Breakpoint> breakpoints_execute;
165static std::map<u64, Breakpoint> breakpoints_read; 171static std::map<u64, Breakpoint> breakpoints_read;
166static std::map<u64, Breakpoint> breakpoints_write; 172static std::map<u64, Breakpoint> breakpoints_write;
167 173
174static Kernel::Thread* FindThreadById(int id) {
175 for (int core = 0; core < Core::NUM_CPU_CORES; core++) {
176 auto threads = Core::System::GetInstance().Scheduler(core)->GetThreadList();
177 for (auto thread : threads) {
178 if (thread->GetThreadId() == id) {
179 current_thread = thread.get();
180 return current_thread;
181 }
182 }
183 }
184 return nullptr;
185}
186
187static u64 RegRead(int id, Kernel::Thread* thread = nullptr) {
188 if (!thread) {
189 return 0;
190 }
191
192 if (id < SP_REGISTER) {
193 return thread->context.cpu_registers[id];
194 } else if (id == SP_REGISTER) {
195 return thread->context.sp;
196 } else if (id == PC_REGISTER) {
197 return thread->context.pc;
198 } else if (id == CPSR_REGISTER) {
199 return thread->context.cpsr;
200 } else {
201 return 0;
202 }
203}
204
205static void RegWrite(int id, u64 val, Kernel::Thread* thread = nullptr) {
206 if (!thread) {
207 return;
208 }
209
210 if (id < SP_REGISTER) {
211 thread->context.cpu_registers[id] = val;
212 } else if (id == SP_REGISTER) {
213 thread->context.sp = val;
214 } else if (id == PC_REGISTER) {
215 thread->context.pc = val;
216 } else if (id == CPSR_REGISTER) {
217 thread->context.cpsr = val;
218 }
219}
220
168/** 221/**
169 * Turns hex string character into the equivalent byte. 222 * Turns hex string character into the equivalent byte.
170 * 223 *
@@ -193,7 +246,7 @@ static u8 NibbleToHex(u8 n) {
193 if (n < 0xA) { 246 if (n < 0xA) {
194 return '0' + n; 247 return '0' + n;
195 } else { 248 } else {
196 return 'A' + n - 0xA; 249 return 'a' + n - 0xA;
197 } 250 }
198} 251}
199 252
@@ -439,6 +492,8 @@ static void SendReply(const char* reply) {
439 return; 492 return;
440 } 493 }
441 494
495 NGLOG_DEBUG(Debug_GDBStub, "Reply: {}", reply);
496
442 memset(command_buffer, 0, sizeof(command_buffer)); 497 memset(command_buffer, 0, sizeof(command_buffer));
443 498
444 command_length = static_cast<u32>(strlen(reply)); 499 command_length = static_cast<u32>(strlen(reply));
@@ -483,6 +538,22 @@ static void HandleQuery() {
483 } else if (strncmp(query, "Xfer:features:read:target.xml:", 538 } else if (strncmp(query, "Xfer:features:read:target.xml:",
484 strlen("Xfer:features:read:target.xml:")) == 0) { 539 strlen("Xfer:features:read:target.xml:")) == 0) {
485 SendReply(target_xml); 540 SendReply(target_xml);
541 } else if (strncmp(query, "Offsets", strlen("Offsets")) == 0) {
542 std::string buffer = fmt::format("TextSeg={:0x}", Memory::PROCESS_IMAGE_VADDR);
543 SendReply(buffer.c_str());
544 } else if (strncmp(query, "fThreadInfo", strlen("fThreadInfo")) == 0) {
545 std::string val = "m";
546 for (int core = 0; core < Core::NUM_CPU_CORES; core++) {
547 auto threads = Core::System::GetInstance().Scheduler(core)->GetThreadList();
548 for (auto thread : threads) {
549 val += fmt::format("{:x}", thread->GetThreadId());
550 val += ",";
551 }
552 }
553 val.pop_back();
554 SendReply(val.c_str());
555 } else if (strncmp(query, "sThreadInfo", strlen("sThreadInfo")) == 0) {
556 SendReply("l");
486 } else { 557 } else {
487 SendReply(""); 558 SendReply("");
488 } 559 }
@@ -490,11 +561,40 @@ static void HandleQuery() {
490 561
491/// Handle set thread command from gdb client. 562/// Handle set thread command from gdb client.
492static void HandleSetThread() { 563static void HandleSetThread() {
493 if (memcmp(command_buffer, "Hg0", 3) == 0 || memcmp(command_buffer, "Hc-1", 4) == 0 || 564 if (memcmp(command_buffer, "Hc", 2) == 0 || memcmp(command_buffer, "Hg", 2) == 0) {
494 memcmp(command_buffer, "Hc0", 4) == 0 || memcmp(command_buffer, "Hc1", 4) == 0) { 565 int thread_id = -1;
495 return SendReply("OK"); 566 if (command_buffer[2] != '-') {
567 thread_id = static_cast<int>(HexToInt(
568 command_buffer + 2,
569 command_length - 2 /*strlen(reinterpret_cast<char*>(command_buffer) + 2)*/));
570 }
571 if (thread_id >= 1) {
572 current_thread = FindThreadById(thread_id);
573 }
574 if (!current_thread) {
575 thread_id = 1;
576 current_thread = FindThreadById(thread_id);
577 }
578 if (current_thread) {
579 SendReply("OK");
580 return;
581 }
496 } 582 }
583 SendReply("E01");
584}
497 585
586/// Handle thread alive command from gdb client.
587static void HandleThreadAlive() {
588 int thread_id = static_cast<int>(
589 HexToInt(command_buffer + 1,
590 command_length - 1 /*strlen(reinterpret_cast<char*>(command_buffer) + 1)*/));
591 if (thread_id == 0) {
592 thread_id = 1;
593 }
594 if (FindThreadById(thread_id)) {
595 SendReply("OK");
596 return;
597 }
498 SendReply("E01"); 598 SendReply("E01");
499} 599}
500 600
@@ -503,15 +603,24 @@ static void HandleSetThread() {
503 * 603 *
504 * @param signal Signal to be sent to client. 604 * @param signal Signal to be sent to client.
505 */ 605 */
506static void SendSignal(u32 signal) { 606static void SendSignal(Kernel::Thread* thread, u32 signal, bool full = true) {
507 if (gdbserver_socket == -1) { 607 if (gdbserver_socket == -1) {
508 return; 608 return;
509 } 609 }
510 610
511 latest_signal = signal; 611 latest_signal = signal;
512 612
513 std::string buffer = fmt::format("T{:02x}", latest_signal); 613 std::string buffer;
514 NGLOG_DEBUG(Debug_GDBStub, "Response: {}", buffer); 614 if (full) {
615 buffer = fmt::format("T{:02x}{:02x}:{:016x};{:02x}:{:016x};", latest_signal, PC_REGISTER,
616 Common::swap64(RegRead(PC_REGISTER, thread)), SP_REGISTER,
617 Common::swap64(RegRead(SP_REGISTER, thread)));
618 } else {
619 buffer = fmt::format("T{:02x};", latest_signal);
620 }
621
622 buffer += fmt::format("thread:{:x};", thread->GetThreadId());
623
515 SendReply(buffer.c_str()); 624 SendReply(buffer.c_str());
516} 625}
517 626
@@ -527,7 +636,7 @@ static void ReadCommand() {
527 } else if (c == 0x03) { 636 } else if (c == 0x03) {
528 NGLOG_INFO(Debug_GDBStub, "gdb: found break command"); 637 NGLOG_INFO(Debug_GDBStub, "gdb: found break command");
529 halt_loop = true; 638 halt_loop = true;
530 SendSignal(SIGTRAP); 639 SendSignal(current_thread, SIGTRAP);
531 return; 640 return;
532 } else if (c != GDB_STUB_START) { 641 } else if (c != GDB_STUB_START) {
533 NGLOG_DEBUG(Debug_GDBStub, "gdb: read invalid byte {:02X}", c); 642 NGLOG_DEBUG(Debug_GDBStub, "gdb: read invalid byte {:02X}", c);
@@ -598,11 +707,11 @@ static void ReadRegister() {
598 } 707 }
599 708
600 if (id <= SP_REGISTER) { 709 if (id <= SP_REGISTER) {
601 LongToGdbHex(reply, Core::CurrentArmInterface().GetReg(static_cast<int>(id))); 710 LongToGdbHex(reply, RegRead(id, current_thread));
602 } else if (id == PC_REGISTER) { 711 } else if (id == PC_REGISTER) {
603 LongToGdbHex(reply, Core::CurrentArmInterface().GetPC()); 712 LongToGdbHex(reply, RegRead(id, current_thread));
604 } else if (id == CPSR_REGISTER) { 713 } else if (id == CPSR_REGISTER) {
605 IntToGdbHex(reply, Core::CurrentArmInterface().GetCPSR()); 714 IntToGdbHex(reply, (u32)RegRead(id, current_thread));
606 } else { 715 } else {
607 return SendReply("E01"); 716 return SendReply("E01");
608 } 717 }
@@ -618,16 +727,16 @@ static void ReadRegisters() {
618 u8* bufptr = buffer; 727 u8* bufptr = buffer;
619 728
620 for (int reg = 0; reg <= SP_REGISTER; reg++) { 729 for (int reg = 0; reg <= SP_REGISTER; reg++) {
621 LongToGdbHex(bufptr + reg * 16, Core::CurrentArmInterface().GetReg(reg)); 730 LongToGdbHex(bufptr + reg * 16, RegRead(reg, current_thread));
622 } 731 }
623 732
624 bufptr += (32 * 16); 733 bufptr += (32 * 16);
625 734
626 LongToGdbHex(bufptr, Core::CurrentArmInterface().GetPC()); 735 LongToGdbHex(bufptr, RegRead(PC_REGISTER, current_thread));
627 736
628 bufptr += 16; 737 bufptr += 16;
629 738
630 IntToGdbHex(bufptr, Core::CurrentArmInterface().GetCPSR()); 739 IntToGdbHex(bufptr, (u32)RegRead(CPSR_REGISTER, current_thread));
631 740
632 bufptr += 8; 741 bufptr += 8;
633 742
@@ -646,11 +755,11 @@ static void WriteRegister() {
646 } 755 }
647 756
648 if (id <= SP_REGISTER) { 757 if (id <= SP_REGISTER) {
649 Core::CurrentArmInterface().SetReg(id, GdbHexToLong(buffer_ptr)); 758 RegWrite(id, GdbHexToLong(buffer_ptr), current_thread);
650 } else if (id == PC_REGISTER) { 759 } else if (id == PC_REGISTER) {
651 Core::CurrentArmInterface().SetPC(GdbHexToLong(buffer_ptr)); 760 RegWrite(id, GdbHexToLong(buffer_ptr), current_thread);
652 } else if (id == CPSR_REGISTER) { 761 } else if (id == CPSR_REGISTER) {
653 Core::CurrentArmInterface().SetCPSR(GdbHexToInt(buffer_ptr)); 762 RegWrite(id, GdbHexToInt(buffer_ptr), current_thread);
654 } else { 763 } else {
655 return SendReply("E01"); 764 return SendReply("E01");
656 } 765 }
@@ -667,11 +776,11 @@ static void WriteRegisters() {
667 776
668 for (int i = 0, reg = 0; reg <= CPSR_REGISTER; i++, reg++) { 777 for (int i = 0, reg = 0; reg <= CPSR_REGISTER; i++, reg++) {
669 if (reg <= SP_REGISTER) { 778 if (reg <= SP_REGISTER) {
670 Core::CurrentArmInterface().SetReg(reg, GdbHexToLong(buffer_ptr + i * 16)); 779 RegWrite(reg, GdbHexToLong(buffer_ptr + i * 16), current_thread);
671 } else if (reg == PC_REGISTER) { 780 } else if (reg == PC_REGISTER) {
672 Core::CurrentArmInterface().SetPC(GdbHexToLong(buffer_ptr + i * 16)); 781 RegWrite(PC_REGISTER, GdbHexToLong(buffer_ptr + i * 16), current_thread);
673 } else if (reg == CPSR_REGISTER) { 782 } else if (reg == CPSR_REGISTER) {
674 Core::CurrentArmInterface().SetCPSR(GdbHexToInt(buffer_ptr + i * 16)); 783 RegWrite(CPSR_REGISTER, GdbHexToInt(buffer_ptr + i * 16), current_thread);
675 } else { 784 } else {
676 UNIMPLEMENTED(); 785 UNIMPLEMENTED();
677 } 786 }
@@ -734,7 +843,7 @@ static void WriteMemory() {
734void Break(bool is_memory_break) { 843void Break(bool is_memory_break) {
735 if (!halt_loop) { 844 if (!halt_loop) {
736 halt_loop = true; 845 halt_loop = true;
737 SendSignal(SIGTRAP); 846 send_trap = true;
738 } 847 }
739 848
740 memory_break = is_memory_break; 849 memory_break = is_memory_break;
@@ -744,10 +853,10 @@ void Break(bool is_memory_break) {
744static void Step() { 853static void Step() {
745 step_loop = true; 854 step_loop = true;
746 halt_loop = true; 855 halt_loop = true;
747 step_break = true; 856 send_trap = true;
748 SendSignal(SIGTRAP);
749} 857}
750 858
859/// Tell the CPU if we hit a memory breakpoint.
751bool IsMemoryBreak() { 860bool IsMemoryBreak() {
752 if (IsConnected()) { 861 if (IsConnected()) {
753 return false; 862 return false;
@@ -759,7 +868,6 @@ bool IsMemoryBreak() {
759/// Tell the CPU to continue executing. 868/// Tell the CPU to continue executing.
760static void Continue() { 869static void Continue() {
761 memory_break = false; 870 memory_break = false;
762 step_break = false;
763 step_loop = false; 871 step_loop = false;
764 halt_loop = false; 872 halt_loop = false;
765} 873}
@@ -898,7 +1006,7 @@ void HandlePacket() {
898 HandleSetThread(); 1006 HandleSetThread();
899 break; 1007 break;
900 case '?': 1008 case '?':
901 SendSignal(latest_signal); 1009 SendSignal(current_thread, latest_signal);
902 break; 1010 break;
903 case 'k': 1011 case 'k':
904 Shutdown(); 1012 Shutdown();
@@ -935,6 +1043,9 @@ void HandlePacket() {
935 case 'Z': 1043 case 'Z':
936 AddBreakpoint(); 1044 AddBreakpoint();
937 break; 1045 break;
1046 case 'T':
1047 HandleThreadAlive();
1048 break;
938 default: 1049 default:
939 SendReply(""); 1050 SendReply("");
940 break; 1051 break;
@@ -1079,4 +1190,11 @@ bool GetCpuStepFlag() {
1079void SetCpuStepFlag(bool is_step) { 1190void SetCpuStepFlag(bool is_step) {
1080 step_loop = is_step; 1191 step_loop = is_step;
1081} 1192}
1193
1194void SendTrap(Kernel::Thread* thread, int trap) {
1195 if (send_trap) {
1196 send_trap = false;
1197 SendSignal(thread, trap);
1198 }
1199}
1082}; // namespace GDBStub 1200}; // namespace GDBStub
diff --git a/src/core/gdbstub/gdbstub.h b/src/core/gdbstub/gdbstub.h
index 201fca095..f2418c9e4 100644
--- a/src/core/gdbstub/gdbstub.h
+++ b/src/core/gdbstub/gdbstub.h
@@ -7,6 +7,7 @@
7#pragma once 7#pragma once
8 8
9#include "common/common_types.h" 9#include "common/common_types.h"
10#include "core/hle/kernel/thread.h"
10 11
11namespace GDBStub { 12namespace GDBStub {
12 13
@@ -91,4 +92,12 @@ bool GetCpuStepFlag();
91 * @param is_step 92 * @param is_step
92 */ 93 */
93void SetCpuStepFlag(bool is_step); 94void SetCpuStepFlag(bool is_step);
95
96/**
97 * Send trap signal from thread back to the gdbstub server.
98 *
99 * @param thread Sending thread.
100 * @param trap Trap no.
101 */
102void SendTrap(Kernel::Thread* thread, int trap);
94} // namespace GDBStub 103} // namespace GDBStub
diff --git a/src/core/hle/kernel/address_arbiter.cpp b/src/core/hle/kernel/address_arbiter.cpp
new file mode 100644
index 000000000..e9c8369d7
--- /dev/null
+++ b/src/core/hle/kernel/address_arbiter.cpp
@@ -0,0 +1,173 @@
1// Copyright 2018 yuzu emulator team
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/common_funcs.h"
7#include "common/common_types.h"
8#include "core/core.h"
9#include "core/hle/kernel/errors.h"
10#include "core/hle/kernel/kernel.h"
11#include "core/hle/kernel/process.h"
12#include "core/hle/kernel/thread.h"
13#include "core/hle/lock.h"
14#include "core/memory.h"
15
16namespace Kernel {
17namespace AddressArbiter {
18
19// Performs actual address waiting logic.
20static ResultCode WaitForAddress(VAddr address, s64 timeout) {
21 SharedPtr<Thread> current_thread = GetCurrentThread();
22 current_thread->arb_wait_address = address;
23 current_thread->status = THREADSTATUS_WAIT_ARB;
24 current_thread->wakeup_callback = nullptr;
25
26 current_thread->WakeAfterDelay(timeout);
27
28 Core::System::GetInstance().CpuCore(current_thread->processor_id).PrepareReschedule();
29 return RESULT_TIMEOUT;
30}
31
32// Gets the threads waiting on an address.
33static void GetThreadsWaitingOnAddress(std::vector<SharedPtr<Thread>>& waiting_threads,
34 VAddr address) {
35 auto RetrieveWaitingThreads =
36 [](size_t core_index, std::vector<SharedPtr<Thread>>& waiting_threads, VAddr arb_addr) {
37 const auto& scheduler = Core::System::GetInstance().Scheduler(core_index);
38 auto& thread_list = scheduler->GetThreadList();
39
40 for (auto& thread : thread_list) {
41 if (thread->arb_wait_address == arb_addr)
42 waiting_threads.push_back(thread);
43 }
44 };
45
46 // Retrieve a list of all threads that are waiting for this address.
47 RetrieveWaitingThreads(0, waiting_threads, address);
48 RetrieveWaitingThreads(1, waiting_threads, address);
49 RetrieveWaitingThreads(2, waiting_threads, address);
50 RetrieveWaitingThreads(3, waiting_threads, address);
51 // Sort them by priority, such that the highest priority ones come first.
52 std::sort(waiting_threads.begin(), waiting_threads.end(),
53 [](const SharedPtr<Thread>& lhs, const SharedPtr<Thread>& rhs) {
54 return lhs->current_priority < rhs->current_priority;
55 });
56}
57
58// Wake up num_to_wake (or all) threads in a vector.
59static void WakeThreads(std::vector<SharedPtr<Thread>>& waiting_threads, s32 num_to_wake) {
60 // Only process up to 'target' threads, unless 'target' is <= 0, in which case process
61 // them all.
62 size_t last = waiting_threads.size();
63 if (num_to_wake > 0)
64 last = num_to_wake;
65
66 // Signal the waiting threads.
67 for (size_t i = 0; i < last; i++) {
68 ASSERT(waiting_threads[i]->status = THREADSTATUS_WAIT_ARB);
69 waiting_threads[i]->SetWaitSynchronizationResult(RESULT_SUCCESS);
70 waiting_threads[i]->arb_wait_address = 0;
71 waiting_threads[i]->ResumeFromWait();
72 }
73}
74
75// Signals an address being waited on.
76ResultCode SignalToAddress(VAddr address, s32 num_to_wake) {
77 // Get threads waiting on the address.
78 std::vector<SharedPtr<Thread>> waiting_threads;
79 GetThreadsWaitingOnAddress(waiting_threads, address);
80
81 WakeThreads(waiting_threads, num_to_wake);
82 return RESULT_SUCCESS;
83}
84
85// Signals an address being waited on and increments its value if equal to the value argument.
86ResultCode IncrementAndSignalToAddressIfEqual(VAddr address, s32 value, s32 num_to_wake) {
87 // Ensure that we can write to the address.
88 if (!Memory::IsValidVirtualAddress(address)) {
89 return ERR_INVALID_ADDRESS_STATE;
90 }
91
92 if (static_cast<s32>(Memory::Read32(address)) == value) {
93 Memory::Write32(address, static_cast<u32>(value + 1));
94 } else {
95 return ERR_INVALID_STATE;
96 }
97
98 return SignalToAddress(address, num_to_wake);
99}
100
101// Signals an address being waited on and modifies its value based on waiting thread count if equal
102// to the value argument.
103ResultCode ModifyByWaitingCountAndSignalToAddressIfEqual(VAddr address, s32 value,
104 s32 num_to_wake) {
105 // Ensure that we can write to the address.
106 if (!Memory::IsValidVirtualAddress(address)) {
107 return ERR_INVALID_ADDRESS_STATE;
108 }
109
110 // Get threads waiting on the address.
111 std::vector<SharedPtr<Thread>> waiting_threads;
112 GetThreadsWaitingOnAddress(waiting_threads, address);
113
114 // Determine the modified value depending on the waiting count.
115 s32 updated_value;
116 if (waiting_threads.size() == 0) {
117 updated_value = value - 1;
118 } else if (num_to_wake <= 0 || waiting_threads.size() <= num_to_wake) {
119 updated_value = value + 1;
120 } else {
121 updated_value = value;
122 }
123
124 if (static_cast<s32>(Memory::Read32(address)) == value) {
125 Memory::Write32(address, static_cast<u32>(updated_value));
126 } else {
127 return ERR_INVALID_STATE;
128 }
129
130 WakeThreads(waiting_threads, num_to_wake);
131 return RESULT_SUCCESS;
132}
133
134// Waits on an address if the value passed is less than the argument value, optionally decrementing.
135ResultCode WaitForAddressIfLessThan(VAddr address, s32 value, s64 timeout, bool should_decrement) {
136 // Ensure that we can read the address.
137 if (!Memory::IsValidVirtualAddress(address)) {
138 return ERR_INVALID_ADDRESS_STATE;
139 }
140
141 s32 cur_value = static_cast<s32>(Memory::Read32(address));
142 if (cur_value < value) {
143 Memory::Write32(address, static_cast<u32>(cur_value - 1));
144 } else {
145 return ERR_INVALID_STATE;
146 }
147 // Short-circuit without rescheduling, if timeout is zero.
148 if (timeout == 0) {
149 return RESULT_TIMEOUT;
150 }
151
152 return WaitForAddress(address, timeout);
153}
154
155// Waits on an address if the value passed is equal to the argument value.
156ResultCode WaitForAddressIfEqual(VAddr address, s32 value, s64 timeout) {
157 // Ensure that we can read the address.
158 if (!Memory::IsValidVirtualAddress(address)) {
159 return ERR_INVALID_ADDRESS_STATE;
160 }
161 // Only wait for the address if equal.
162 if (static_cast<s32>(Memory::Read32(address)) != value) {
163 return ERR_INVALID_STATE;
164 }
165 // Short-circuit without rescheduling, if timeout is zero.
166 if (timeout == 0) {
167 return RESULT_TIMEOUT;
168 }
169
170 return WaitForAddress(address, timeout);
171}
172} // namespace AddressArbiter
173} // namespace Kernel
diff --git a/src/core/hle/kernel/address_arbiter.h b/src/core/hle/kernel/address_arbiter.h
new file mode 100644
index 000000000..f20f3dbc0
--- /dev/null
+++ b/src/core/hle/kernel/address_arbiter.h
@@ -0,0 +1,32 @@
1// Copyright 2018 yuzu emulator team
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "core/hle/result.h"
8
9namespace Kernel {
10
11namespace AddressArbiter {
12enum class ArbitrationType {
13 WaitIfLessThan = 0,
14 DecrementAndWaitIfLessThan = 1,
15 WaitIfEqual = 2,
16};
17
18enum class SignalType {
19 Signal = 0,
20 IncrementAndSignalIfEqual = 1,
21 ModifyByWaitingCountAndSignalIfEqual = 2,
22};
23
24ResultCode SignalToAddress(VAddr address, s32 num_to_wake);
25ResultCode IncrementAndSignalToAddressIfEqual(VAddr address, s32 value, s32 num_to_wake);
26ResultCode ModifyByWaitingCountAndSignalToAddressIfEqual(VAddr address, s32 value, s32 num_to_wake);
27
28ResultCode WaitForAddressIfLessThan(VAddr address, s32 value, s64 timeout, bool should_decrement);
29ResultCode WaitForAddressIfEqual(VAddr address, s32 value, s64 timeout);
30} // namespace AddressArbiter
31
32} // namespace Kernel
diff --git a/src/core/hle/kernel/errors.h b/src/core/hle/kernel/errors.h
index e1b5430bf..221cb1bb5 100644
--- a/src/core/hle/kernel/errors.h
+++ b/src/core/hle/kernel/errors.h
@@ -20,13 +20,16 @@ enum {
20 MaxConnectionsReached = 52, 20 MaxConnectionsReached = 52,
21 21
22 // Confirmed Switch OS error codes 22 // Confirmed Switch OS error codes
23 MisalignedAddress = 102, 23 InvalidAddress = 102,
24 InvalidMemoryState = 106,
24 InvalidProcessorId = 113, 25 InvalidProcessorId = 113,
25 InvalidHandle = 114, 26 InvalidHandle = 114,
26 InvalidCombination = 116, 27 InvalidCombination = 116,
27 Timeout = 117, 28 Timeout = 117,
28 SynchronizationCanceled = 118, 29 SynchronizationCanceled = 118,
29 TooLarge = 119, 30 TooLarge = 119,
31 InvalidEnumValue = 120,
32 InvalidState = 125,
30}; 33};
31} 34}
32 35
@@ -39,14 +42,15 @@ constexpr ResultCode ERR_SESSION_CLOSED_BY_REMOTE(-1);
39constexpr ResultCode ERR_PORT_NAME_TOO_LONG(-1); 42constexpr ResultCode ERR_PORT_NAME_TOO_LONG(-1);
40constexpr ResultCode ERR_WRONG_PERMISSION(-1); 43constexpr ResultCode ERR_WRONG_PERMISSION(-1);
41constexpr ResultCode ERR_MAX_CONNECTIONS_REACHED(-1); 44constexpr ResultCode ERR_MAX_CONNECTIONS_REACHED(-1);
42constexpr ResultCode ERR_INVALID_ENUM_VALUE(-1); 45constexpr ResultCode ERR_INVALID_ENUM_VALUE(ErrorModule::Kernel, ErrCodes::InvalidEnumValue);
43constexpr ResultCode ERR_INVALID_ENUM_VALUE_FND(-1); 46constexpr ResultCode ERR_INVALID_ENUM_VALUE_FND(-1);
44constexpr ResultCode ERR_INVALID_COMBINATION(-1); 47constexpr ResultCode ERR_INVALID_COMBINATION(-1);
45constexpr ResultCode ERR_INVALID_COMBINATION_KERNEL(-1); 48constexpr ResultCode ERR_INVALID_COMBINATION_KERNEL(-1);
46constexpr ResultCode ERR_OUT_OF_MEMORY(-1); 49constexpr ResultCode ERR_OUT_OF_MEMORY(-1);
47constexpr ResultCode ERR_INVALID_ADDRESS(-1); 50constexpr ResultCode ERR_INVALID_ADDRESS(ErrorModule::Kernel, ErrCodes::InvalidAddress);
48constexpr ResultCode ERR_INVALID_ADDRESS_STATE(-1); 51constexpr ResultCode ERR_INVALID_ADDRESS_STATE(ErrorModule::Kernel, ErrCodes::InvalidMemoryState);
49constexpr ResultCode ERR_INVALID_HANDLE(ErrorModule::Kernel, ErrCodes::InvalidHandle); 52constexpr ResultCode ERR_INVALID_HANDLE(ErrorModule::Kernel, ErrCodes::InvalidHandle);
53constexpr ResultCode ERR_INVALID_STATE(ErrorModule::Kernel, ErrCodes::InvalidState);
50constexpr ResultCode ERR_INVALID_POINTER(-1); 54constexpr ResultCode ERR_INVALID_POINTER(-1);
51constexpr ResultCode ERR_INVALID_OBJECT_ADDR(-1); 55constexpr ResultCode ERR_INVALID_OBJECT_ADDR(-1);
52constexpr ResultCode ERR_NOT_AUTHORIZED(-1); 56constexpr ResultCode ERR_NOT_AUTHORIZED(-1);
diff --git a/src/core/hle/kernel/hle_ipc.cpp b/src/core/hle/kernel/hle_ipc.cpp
index 01904467e..b0d83f401 100644
--- a/src/core/hle/kernel/hle_ipc.cpp
+++ b/src/core/hle/kernel/hle_ipc.cpp
@@ -271,6 +271,11 @@ std::vector<u8> HLERequestContext::ReadBuffer(int buffer_index) const {
271} 271}
272 272
273size_t HLERequestContext::WriteBuffer(const void* buffer, size_t size, int buffer_index) const { 273size_t HLERequestContext::WriteBuffer(const void* buffer, size_t size, int buffer_index) const {
274 if (size == 0) {
275 NGLOG_WARNING(Core, "skip empty buffer write");
276 return 0;
277 }
278
274 const bool is_buffer_b{BufferDescriptorB().size() && BufferDescriptorB()[buffer_index].Size()}; 279 const bool is_buffer_b{BufferDescriptorB().size() && BufferDescriptorB()[buffer_index].Size()};
275 const size_t buffer_size{GetWriteBufferSize(buffer_index)}; 280 const size_t buffer_size{GetWriteBufferSize(buffer_index)};
276 if (size > buffer_size) { 281 if (size > buffer_size) {
diff --git a/src/core/hle/kernel/mutex.cpp b/src/core/hle/kernel/mutex.cpp
index bc144f3de..65560226d 100644
--- a/src/core/hle/kernel/mutex.cpp
+++ b/src/core/hle/kernel/mutex.cpp
@@ -59,7 +59,7 @@ ResultCode Mutex::TryAcquire(VAddr address, Handle holding_thread_handle,
59 Handle requesting_thread_handle) { 59 Handle requesting_thread_handle) {
60 // The mutex address must be 4-byte aligned 60 // The mutex address must be 4-byte aligned
61 if ((address % sizeof(u32)) != 0) { 61 if ((address % sizeof(u32)) != 0) {
62 return ResultCode(ErrorModule::Kernel, ErrCodes::MisalignedAddress); 62 return ResultCode(ErrorModule::Kernel, ErrCodes::InvalidAddress);
63 } 63 }
64 64
65 SharedPtr<Thread> holding_thread = g_handle_table.Get<Thread>(holding_thread_handle); 65 SharedPtr<Thread> holding_thread = g_handle_table.Get<Thread>(holding_thread_handle);
@@ -97,7 +97,7 @@ ResultCode Mutex::TryAcquire(VAddr address, Handle holding_thread_handle,
97ResultCode Mutex::Release(VAddr address) { 97ResultCode Mutex::Release(VAddr address) {
98 // The mutex address must be 4-byte aligned 98 // The mutex address must be 4-byte aligned
99 if ((address % sizeof(u32)) != 0) { 99 if ((address % sizeof(u32)) != 0) {
100 return ResultCode(ErrorModule::Kernel, ErrCodes::MisalignedAddress); 100 return ResultCode(ErrorModule::Kernel, ErrCodes::InvalidAddress);
101 } 101 }
102 102
103 auto [thread, num_waiters] = GetHighestPriorityMutexWaitingThread(GetCurrentThread(), address); 103 auto [thread, num_waiters] = GetHighestPriorityMutexWaitingThread(GetCurrentThread(), address);
diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp
index ec3601e8b..1a36e0d02 100644
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -11,6 +11,7 @@
11#include "common/string_util.h" 11#include "common/string_util.h"
12#include "core/core.h" 12#include "core/core.h"
13#include "core/core_timing.h" 13#include "core/core_timing.h"
14#include "core/hle/kernel/address_arbiter.h"
14#include "core/hle/kernel/client_port.h" 15#include "core/hle/kernel/client_port.h"
15#include "core/hle/kernel/client_session.h" 16#include "core/hle/kernel/client_session.h"
16#include "core/hle/kernel/event.h" 17#include "core/hle/kernel/event.h"
@@ -316,6 +317,11 @@ static ResultCode GetInfo(u64* result, u64 info_id, u64 handle, u64 info_sub_id)
316 "(STUBBED) Attempted to query privileged process id bounds, returned 0"); 317 "(STUBBED) Attempted to query privileged process id bounds, returned 0");
317 *result = 0; 318 *result = 0;
318 break; 319 break;
320 case GetInfoType::UserExceptionContextAddr:
321 NGLOG_WARNING(Kernel_SVC,
322 "(STUBBED) Attempted to query user exception context address, returned 0");
323 *result = 0;
324 break;
319 default: 325 default:
320 UNIMPLEMENTED(); 326 UNIMPLEMENTED();
321 } 327 }
@@ -575,7 +581,7 @@ static void SleepThread(s64 nanoseconds) {
575 Core::System::GetInstance().PrepareReschedule(); 581 Core::System::GetInstance().PrepareReschedule();
576} 582}
577 583
578/// Signal process wide key atomic 584/// Wait process wide key atomic
579static ResultCode WaitProcessWideKeyAtomic(VAddr mutex_addr, VAddr condition_variable_addr, 585static ResultCode WaitProcessWideKeyAtomic(VAddr mutex_addr, VAddr condition_variable_addr,
580 Handle thread_handle, s64 nano_seconds) { 586 Handle thread_handle, s64 nano_seconds) {
581 NGLOG_TRACE( 587 NGLOG_TRACE(
@@ -684,6 +690,58 @@ static ResultCode SignalProcessWideKey(VAddr condition_variable_addr, s32 target
684 return RESULT_SUCCESS; 690 return RESULT_SUCCESS;
685} 691}
686 692
693// Wait for an address (via Address Arbiter)
694static ResultCode WaitForAddress(VAddr address, u32 type, s32 value, s64 timeout) {
695 NGLOG_WARNING(Kernel_SVC, "called, address=0x{:X}, type=0x{:X}, value=0x{:X}, timeout={}",
696 address, type, value, timeout);
697 // If the passed address is a kernel virtual address, return invalid memory state.
698 if (Memory::IsKernelVirtualAddress(address)) {
699 return ERR_INVALID_ADDRESS_STATE;
700 }
701 // If the address is not properly aligned to 4 bytes, return invalid address.
702 if (address % sizeof(u32) != 0) {
703 return ERR_INVALID_ADDRESS;
704 }
705
706 switch (static_cast<AddressArbiter::ArbitrationType>(type)) {
707 case AddressArbiter::ArbitrationType::WaitIfLessThan:
708 return AddressArbiter::WaitForAddressIfLessThan(address, value, timeout, false);
709 case AddressArbiter::ArbitrationType::DecrementAndWaitIfLessThan:
710 return AddressArbiter::WaitForAddressIfLessThan(address, value, timeout, true);
711 case AddressArbiter::ArbitrationType::WaitIfEqual:
712 return AddressArbiter::WaitForAddressIfEqual(address, value, timeout);
713 default:
714 return ERR_INVALID_ENUM_VALUE;
715 }
716}
717
718// Signals to an address (via Address Arbiter)
719static ResultCode SignalToAddress(VAddr address, u32 type, s32 value, s32 num_to_wake) {
720 NGLOG_WARNING(Kernel_SVC,
721 "called, address=0x{:X}, type=0x{:X}, value=0x{:X}, num_to_wake=0x{:X}", address,
722 type, value, num_to_wake);
723 // If the passed address is a kernel virtual address, return invalid memory state.
724 if (Memory::IsKernelVirtualAddress(address)) {
725 return ERR_INVALID_ADDRESS_STATE;
726 }
727 // If the address is not properly aligned to 4 bytes, return invalid address.
728 if (address % sizeof(u32) != 0) {
729 return ERR_INVALID_ADDRESS;
730 }
731
732 switch (static_cast<AddressArbiter::SignalType>(type)) {
733 case AddressArbiter::SignalType::Signal:
734 return AddressArbiter::SignalToAddress(address, num_to_wake);
735 case AddressArbiter::SignalType::IncrementAndSignalIfEqual:
736 return AddressArbiter::IncrementAndSignalToAddressIfEqual(address, value, num_to_wake);
737 case AddressArbiter::SignalType::ModifyByWaitingCountAndSignalIfEqual:
738 return AddressArbiter::ModifyByWaitingCountAndSignalToAddressIfEqual(address, value,
739 num_to_wake);
740 default:
741 return ERR_INVALID_ENUM_VALUE;
742 }
743}
744
687/// This returns the total CPU ticks elapsed since the CPU was powered-on 745/// This returns the total CPU ticks elapsed since the CPU was powered-on
688static u64 GetSystemTick() { 746static u64 GetSystemTick() {
689 const u64 result{CoreTiming::GetTicks()}; 747 const u64 result{CoreTiming::GetTicks()};
@@ -744,7 +802,7 @@ static ResultCode SetThreadCoreMask(Handle thread_handle, u32 core, u64 mask) {
744 ASSERT(thread->owner_process->ideal_processor != THREADPROCESSORID_DEFAULT); 802 ASSERT(thread->owner_process->ideal_processor != THREADPROCESSORID_DEFAULT);
745 // Set the target CPU to the one specified in the process' exheader. 803 // Set the target CPU to the one specified in the process' exheader.
746 core = thread->owner_process->ideal_processor; 804 core = thread->owner_process->ideal_processor;
747 mask = 1 << core; 805 mask = 1ull << core;
748 } 806 }
749 807
750 if (mask == 0) { 808 if (mask == 0) {
@@ -761,7 +819,7 @@ static ResultCode SetThreadCoreMask(Handle thread_handle, u32 core, u64 mask) {
761 } 819 }
762 820
763 // Error out if the input core isn't enabled in the input mask. 821 // Error out if the input core isn't enabled in the input mask.
764 if (core < Core::NUM_CPU_CORES && (mask & (1 << core)) == 0) { 822 if (core < Core::NUM_CPU_CORES && (mask & (1ull << core)) == 0) {
765 return ResultCode(ErrorModule::Kernel, ErrCodes::InvalidCombination); 823 return ResultCode(ErrorModule::Kernel, ErrCodes::InvalidCombination);
766 } 824 }
767 825
@@ -856,8 +914,8 @@ static const FunctionDef SVC_Table[] = {
856 {0x31, nullptr, "GetResourceLimitCurrentValue"}, 914 {0x31, nullptr, "GetResourceLimitCurrentValue"},
857 {0x32, SvcWrap<SetThreadActivity>, "SetThreadActivity"}, 915 {0x32, SvcWrap<SetThreadActivity>, "SetThreadActivity"},
858 {0x33, SvcWrap<GetThreadContext>, "GetThreadContext"}, 916 {0x33, SvcWrap<GetThreadContext>, "GetThreadContext"},
859 {0x34, nullptr, "WaitForAddress"}, 917 {0x34, SvcWrap<WaitForAddress>, "WaitForAddress"},
860 {0x35, nullptr, "SignalToAddress"}, 918 {0x35, SvcWrap<SignalToAddress>, "SignalToAddress"},
861 {0x36, nullptr, "Unknown"}, 919 {0x36, nullptr, "Unknown"},
862 {0x37, nullptr, "Unknown"}, 920 {0x37, nullptr, "Unknown"},
863 {0x38, nullptr, "Unknown"}, 921 {0x38, nullptr, "Unknown"},
diff --git a/src/core/hle/kernel/svc_wrap.h b/src/core/hle/kernel/svc_wrap.h
index 40aa88cc1..79c3fe31b 100644
--- a/src/core/hle/kernel/svc_wrap.h
+++ b/src/core/hle/kernel/svc_wrap.h
@@ -179,6 +179,20 @@ void SvcWrap() {
179 FuncReturn(retval); 179 FuncReturn(retval);
180} 180}
181 181
182template <ResultCode func(u64, u32, s32, s64)>
183void SvcWrap() {
184 FuncReturn(
185 func(PARAM(0), (u32)(PARAM(1) & 0xFFFFFFFF), (s32)(PARAM(2) & 0xFFFFFFFF), (s64)PARAM(3))
186 .raw);
187}
188
189template <ResultCode func(u64, u32, s32, s32)>
190void SvcWrap() {
191 FuncReturn(func(PARAM(0), (u32)(PARAM(1) & 0xFFFFFFFF), (s32)(PARAM(2) & 0xFFFFFFFF),
192 (s32)(PARAM(3) & 0xFFFFFFFF))
193 .raw);
194}
195
182//////////////////////////////////////////////////////////////////////////////////////////////////// 196////////////////////////////////////////////////////////////////////////////////////////////////////
183// Function wrappers that return type u32 197// Function wrappers that return type u32
184 198
diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp
index cffa7ca83..2f333ec34 100644
--- a/src/core/hle/kernel/thread.cpp
+++ b/src/core/hle/kernel/thread.cpp
@@ -140,6 +140,11 @@ static void ThreadWakeupCallback(u64 thread_handle, int cycles_late) {
140 } 140 }
141 } 141 }
142 142
143 if (thread->arb_wait_address != 0) {
144 ASSERT(thread->status == THREADSTATUS_WAIT_ARB);
145 thread->arb_wait_address = 0;
146 }
147
143 if (resume) 148 if (resume)
144 thread->ResumeFromWait(); 149 thread->ResumeFromWait();
145} 150}
@@ -179,6 +184,7 @@ void Thread::ResumeFromWait() {
179 case THREADSTATUS_WAIT_SLEEP: 184 case THREADSTATUS_WAIT_SLEEP:
180 case THREADSTATUS_WAIT_IPC: 185 case THREADSTATUS_WAIT_IPC:
181 case THREADSTATUS_WAIT_MUTEX: 186 case THREADSTATUS_WAIT_MUTEX:
187 case THREADSTATUS_WAIT_ARB:
182 break; 188 break;
183 189
184 case THREADSTATUS_READY: 190 case THREADSTATUS_READY:
diff --git a/src/core/hle/kernel/thread.h b/src/core/hle/kernel/thread.h
index 1d2da6d50..f1e759802 100644
--- a/src/core/hle/kernel/thread.h
+++ b/src/core/hle/kernel/thread.h
@@ -45,6 +45,7 @@ enum ThreadStatus {
45 THREADSTATUS_WAIT_SYNCH_ANY, ///< Waiting due to WaitSynch1 or WaitSynchN with wait_all = false 45 THREADSTATUS_WAIT_SYNCH_ANY, ///< Waiting due to WaitSynch1 or WaitSynchN with wait_all = false
46 THREADSTATUS_WAIT_SYNCH_ALL, ///< Waiting due to WaitSynchronizationN with wait_all = true 46 THREADSTATUS_WAIT_SYNCH_ALL, ///< Waiting due to WaitSynchronizationN with wait_all = true
47 THREADSTATUS_WAIT_MUTEX, ///< Waiting due to an ArbitrateLock/WaitProcessWideKey svc 47 THREADSTATUS_WAIT_MUTEX, ///< Waiting due to an ArbitrateLock/WaitProcessWideKey svc
48 THREADSTATUS_WAIT_ARB, ///< Waiting due to a SignalToAddress/WaitForAddress svc
48 THREADSTATUS_DORMANT, ///< Created but not yet made ready 49 THREADSTATUS_DORMANT, ///< Created but not yet made ready
49 THREADSTATUS_DEAD ///< Run to completion, or forcefully terminated 50 THREADSTATUS_DEAD ///< Run to completion, or forcefully terminated
50}; 51};
@@ -230,6 +231,9 @@ public:
230 VAddr mutex_wait_address; ///< If waiting on a Mutex, this is the mutex address 231 VAddr mutex_wait_address; ///< If waiting on a Mutex, this is the mutex address
231 Handle wait_handle; ///< The handle used to wait for the mutex. 232 Handle wait_handle; ///< The handle used to wait for the mutex.
232 233
234 // If waiting for an AddressArbiter, this is the address being waited on.
235 VAddr arb_wait_address{0};
236
233 std::string name; 237 std::string name;
234 238
235 /// Handle used by guest emulated application to access this thread 239 /// Handle used by guest emulated application to access this thread
diff --git a/src/core/hle/service/am/am.cpp b/src/core/hle/service/am/am.cpp
index 12954556d..b8d6b8d4d 100644
--- a/src/core/hle/service/am/am.cpp
+++ b/src/core/hle/service/am/am.cpp
@@ -561,7 +561,7 @@ IApplicationFunctions::IApplicationFunctions() : ServiceFramework("IApplicationF
561 {32, nullptr, "BeginBlockingHomeButton"}, 561 {32, nullptr, "BeginBlockingHomeButton"},
562 {33, nullptr, "EndBlockingHomeButton"}, 562 {33, nullptr, "EndBlockingHomeButton"},
563 {40, &IApplicationFunctions::NotifyRunning, "NotifyRunning"}, 563 {40, &IApplicationFunctions::NotifyRunning, "NotifyRunning"},
564 {50, nullptr, "GetPseudoDeviceId"}, 564 {50, &IApplicationFunctions::GetPseudoDeviceId, "GetPseudoDeviceId"},
565 {60, nullptr, "SetMediaPlaybackStateForApplication"}, 565 {60, nullptr, "SetMediaPlaybackStateForApplication"},
566 {65, nullptr, "IsGamePlayRecordingSupported"}, 566 {65, nullptr, "IsGamePlayRecordingSupported"},
567 {66, &IApplicationFunctions::InitializeGamePlayRecording, "InitializeGamePlayRecording"}, 567 {66, &IApplicationFunctions::InitializeGamePlayRecording, "InitializeGamePlayRecording"},
@@ -684,6 +684,17 @@ void IApplicationFunctions::NotifyRunning(Kernel::HLERequestContext& ctx) {
684 NGLOG_WARNING(Service_AM, "(STUBBED) called"); 684 NGLOG_WARNING(Service_AM, "(STUBBED) called");
685} 685}
686 686
687void IApplicationFunctions::GetPseudoDeviceId(Kernel::HLERequestContext& ctx) {
688 IPC::ResponseBuilder rb{ctx, 6};
689 rb.Push(RESULT_SUCCESS);
690
691 // Returns a 128-bit UUID
692 rb.Push<u64>(0);
693 rb.Push<u64>(0);
694
695 NGLOG_WARNING(Service_AM, "(STUBBED) called");
696}
697
687void InstallInterfaces(SM::ServiceManager& service_manager, 698void InstallInterfaces(SM::ServiceManager& service_manager,
688 std::shared_ptr<NVFlinger::NVFlinger> nvflinger) { 699 std::shared_ptr<NVFlinger::NVFlinger> nvflinger) {
689 std::make_shared<AppletAE>(nvflinger)->InstallAsService(service_manager); 700 std::make_shared<AppletAE>(nvflinger)->InstallAsService(service_manager);
diff --git a/src/core/hle/service/am/am.h b/src/core/hle/service/am/am.h
index 301a6c798..1da79fd01 100644
--- a/src/core/hle/service/am/am.h
+++ b/src/core/hle/service/am/am.h
@@ -138,6 +138,7 @@ private:
138 void InitializeGamePlayRecording(Kernel::HLERequestContext& ctx); 138 void InitializeGamePlayRecording(Kernel::HLERequestContext& ctx);
139 void SetGamePlayRecordingState(Kernel::HLERequestContext& ctx); 139 void SetGamePlayRecordingState(Kernel::HLERequestContext& ctx);
140 void NotifyRunning(Kernel::HLERequestContext& ctx); 140 void NotifyRunning(Kernel::HLERequestContext& ctx);
141 void GetPseudoDeviceId(Kernel::HLERequestContext& ctx);
141}; 142};
142 143
143class IHomeMenuFunctions final : public ServiceFramework<IHomeMenuFunctions> { 144class IHomeMenuFunctions final : public ServiceFramework<IHomeMenuFunctions> {
diff --git a/src/core/hle/service/audio/audren_u.cpp b/src/core/hle/service/audio/audren_u.cpp
index 6e8002bc9..44b7ef216 100644
--- a/src/core/hle/service/audio/audren_u.cpp
+++ b/src/core/hle/service/audio/audren_u.cpp
@@ -57,27 +57,26 @@ private:
57 } 57 }
58 58
59 void RequestUpdateAudioRenderer(Kernel::HLERequestContext& ctx) { 59 void RequestUpdateAudioRenderer(Kernel::HLERequestContext& ctx) {
60 NGLOG_DEBUG(Service_Audio, "{}", ctx.Description()); 60 AudioRendererConfig config;
61 AudioRendererResponseData response_data{}; 61 auto buf = ctx.ReadBuffer();
62 62 std::memcpy(&config, buf.data(), sizeof(AudioRendererConfig));
63 response_data.section_0_size = 63
64 static_cast<u32>(response_data.state_entries.size() * sizeof(AudioRendererStateEntry)); 64 AudioRendererResponse response_data{config};
65 response_data.section_1_size = static_cast<u32>(response_data.section_1.size()); 65
66 response_data.section_2_size = static_cast<u32>(response_data.section_2.size()); 66 ASSERT(ctx.GetWriteBufferSize() == response_data.total_size);
67 response_data.section_3_size = static_cast<u32>(response_data.section_3.size()); 67
68 response_data.section_4_size = static_cast<u32>(response_data.section_4.size()); 68 std::vector<u8> output(response_data.total_size);
69 response_data.section_5_size = static_cast<u32>(response_data.section_5.size()); 69 std::memcpy(output.data(), &response_data, sizeof(AudioRendererResponse));
70 response_data.total_size = sizeof(AudioRendererResponseData); 70 std::vector<MemoryPoolEntry> memory_pool(config.memory_pools_size / 0x20);
71 71 for (auto& entry : memory_pool) {
72 for (unsigned i = 0; i < response_data.state_entries.size(); i++) { 72 entry.state = 5;
73 // 4 = Busy and 5 = Ready?
74 response_data.state_entries[i].state = 5;
75 } 73 }
74 std::memcpy(output.data() + sizeof(AudioRendererResponse), memory_pool.data(),
75 response_data.memory_pools_size);
76 76
77 ctx.WriteBuffer(&response_data, response_data.total_size); 77 ctx.WriteBuffer(output);
78 78
79 IPC::ResponseBuilder rb{ctx, 2}; 79 IPC::ResponseBuilder rb{ctx, 2};
80
81 rb.Push(RESULT_SUCCESS); 80 rb.Push(RESULT_SUCCESS);
82 81
83 NGLOG_WARNING(Service_Audio, "(STUBBED) called"); 82 NGLOG_WARNING(Service_Audio, "(STUBBED) called");
@@ -109,43 +108,55 @@ private:
109 NGLOG_WARNING(Service_Audio, "(STUBBED) called"); 108 NGLOG_WARNING(Service_Audio, "(STUBBED) called");
110 } 109 }
111 110
112 struct AudioRendererStateEntry { 111 struct MemoryPoolEntry {
113 u32_le state; 112 u32_le state;
114 u32_le unknown_4; 113 u32_le unknown_4;
115 u32_le unknown_8; 114 u32_le unknown_8;
116 u32_le unknown_c; 115 u32_le unknown_c;
117 }; 116 };
118 static_assert(sizeof(AudioRendererStateEntry) == 0x10, 117 static_assert(sizeof(MemoryPoolEntry) == 0x10, "MemoryPoolEntry has wrong size");
119 "AudioRendererStateEntry has wrong size"); 118
120 119 struct AudioRendererConfig {
121 struct AudioRendererResponseData { 120 u32 revision;
122 u32_le unknown_0; 121 u32 behavior_size;
123 u32_le section_5_size; 122 u32 memory_pools_size;
124 u32_le section_0_size; 123 u32 voices_size;
125 u32_le section_1_size; 124 u32 voice_resource_size;
125 u32 effects_size;
126 u32 mixes_size;
127 u32 sinks_size;
128 u32 performance_buffer_size;
129 INSERT_PADDING_WORDS(6);
130 u32 total_size;
131 };
132 static_assert(sizeof(AudioRendererConfig) == 0x40, "AudioRendererConfig has wrong size");
133
134 struct AudioRendererResponse {
135 AudioRendererResponse(const AudioRendererConfig& config) {
136 revision = config.revision;
137 error_info_size = 0xb0;
138 memory_pools_size = (config.memory_pools_size / 0x20) * 0x10;
139 voices_size = (config.voices_size / 0x170) * 0x10;
140 effects_size = (config.effects_size / 0xC0) * 0x10;
141 sinks_size = (config.sinks_size / 0x140) * 0x20;
142 performance_manager_size = 0x10;
143 total_size = sizeof(AudioRendererResponse) + error_info_size + memory_pools_size +
144 voices_size + effects_size + sinks_size + performance_manager_size;
145 }
146
147 u32_le revision;
148 u32_le error_info_size;
149 u32_le memory_pools_size;
150 u32_le voices_size;
126 u32_le unknown_10; 151 u32_le unknown_10;
127 u32_le section_2_size; 152 u32_le effects_size;
128 u32_le unknown_18; 153 u32_le unknown_18;
129 u32_le section_3_size; 154 u32_le sinks_size;
130 u32_le section_4_size; 155 u32_le performance_manager_size;
131 u32_le unknown_24; 156 INSERT_PADDING_WORDS(6);
132 u32_le unknown_28;
133 u32_le unknown_2c;
134 u32_le unknown_30;
135 u32_le unknown_34;
136 u32_le unknown_38;
137 u32_le total_size; 157 u32_le total_size;
138
139 std::array<AudioRendererStateEntry, 0x18e> state_entries;
140
141 std::array<u8, 0x600> section_1;
142 std::array<u8, 0xe0> section_2;
143 std::array<u8, 0x20> section_3;
144 std::array<u8, 0x10> section_4;
145 std::array<u8, 0xb0> section_5;
146 }; 158 };
147 static_assert(sizeof(AudioRendererResponseData) == 0x20e0, 159 static_assert(sizeof(AudioRendererResponse) == 0x40, "AudioRendererResponse has wrong size");
148 "AudioRendererResponseData has wrong size");
149 160
150 /// This is used to trigger the audio event callback. 161 /// This is used to trigger the audio event callback.
151 CoreTiming::EventType* audio_event; 162 CoreTiming::EventType* audio_event;
@@ -258,7 +269,7 @@ void AudRenU::OpenAudioRenderer(Kernel::HLERequestContext& ctx) {
258 269
259void AudRenU::GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx) { 270void AudRenU::GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx) {
260 IPC::RequestParser rp{ctx}; 271 IPC::RequestParser rp{ctx};
261 auto params = rp.PopRaw<WorkerBufferParameters>(); 272 auto params = rp.PopRaw<AudioRendererParameters>();
262 273
263 u64 buffer_sz = Common::AlignUp(4 * params.unknown8, 0x40); 274 u64 buffer_sz = Common::AlignUp(4 * params.unknown8, 0x40);
264 buffer_sz += params.unknownC * 1024; 275 buffer_sz += params.unknownC * 1024;
@@ -328,7 +339,7 @@ bool AudRenU::IsFeatureSupported(AudioFeatures feature, u32_le revision) const {
328 u32_be version_num = (revision - Common::MakeMagic('R', 'E', 'V', '0')); // Byte swap 339 u32_be version_num = (revision - Common::MakeMagic('R', 'E', 'V', '0')); // Byte swap
329 switch (feature) { 340 switch (feature) {
330 case AudioFeatures::Splitter: 341 case AudioFeatures::Splitter:
331 return version_num >= 2; 342 return version_num >= 2u;
332 default: 343 default:
333 return false; 344 return false;
334 } 345 }
diff --git a/src/core/hle/service/audio/audren_u.h b/src/core/hle/service/audio/audren_u.h
index fe53de4ce..7dbd9b74d 100644
--- a/src/core/hle/service/audio/audren_u.h
+++ b/src/core/hle/service/audio/audren_u.h
@@ -22,7 +22,7 @@ private:
22 void GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx); 22 void GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx);
23 void GetAudioDevice(Kernel::HLERequestContext& ctx); 23 void GetAudioDevice(Kernel::HLERequestContext& ctx);
24 24
25 struct WorkerBufferParameters { 25 struct AudioRendererParameters {
26 u32_le sample_rate; 26 u32_le sample_rate;
27 u32_le sample_count; 27 u32_le sample_count;
28 u32_le unknown8; 28 u32_le unknown8;
@@ -38,8 +38,8 @@ private:
38 u8 padding2[4]; 38 u8 padding2[4];
39 u32_le magic; 39 u32_le magic;
40 }; 40 };
41 static_assert(sizeof(WorkerBufferParameters) == 52, 41 static_assert(sizeof(AudioRendererParameters) == 52,
42 "WorkerBufferParameters is an invalid size"); 42 "AudioRendererParameters is an invalid size");
43 43
44 enum class AudioFeatures : u32 { 44 enum class AudioFeatures : u32 {
45 Splitter, 45 Splitter,
diff --git a/src/core/hle/service/filesystem/fsp_srv.cpp b/src/core/hle/service/filesystem/fsp_srv.cpp
index 8a47bb7af..1cf97e876 100644
--- a/src/core/hle/service/filesystem/fsp_srv.cpp
+++ b/src/core/hle/service/filesystem/fsp_srv.cpp
@@ -4,6 +4,7 @@
4 4
5#include <cinttypes> 5#include <cinttypes>
6#include "common/logging/log.h" 6#include "common/logging/log.h"
7#include "common/string_util.h"
7#include "core/core.h" 8#include "core/core.h"
8#include "core/file_sys/directory.h" 9#include "core/file_sys/directory.h"
9#include "core/file_sys/filesystem.h" 10#include "core/file_sys/filesystem.h"
@@ -258,9 +259,7 @@ public:
258 IPC::RequestParser rp{ctx}; 259 IPC::RequestParser rp{ctx};
259 260
260 auto file_buffer = ctx.ReadBuffer(); 261 auto file_buffer = ctx.ReadBuffer();
261 auto end = std::find(file_buffer.begin(), file_buffer.end(), '\0'); 262 std::string name = Common::StringFromBuffer(file_buffer);
262
263 std::string name(file_buffer.begin(), end);
264 263
265 u64 mode = rp.Pop<u64>(); 264 u64 mode = rp.Pop<u64>();
266 u32 size = rp.Pop<u32>(); 265 u32 size = rp.Pop<u32>();
@@ -275,9 +274,7 @@ public:
275 IPC::RequestParser rp{ctx}; 274 IPC::RequestParser rp{ctx};
276 275
277 auto file_buffer = ctx.ReadBuffer(); 276 auto file_buffer = ctx.ReadBuffer();
278 auto end = std::find(file_buffer.begin(), file_buffer.end(), '\0'); 277 std::string name = Common::StringFromBuffer(file_buffer);
279
280 std::string name(file_buffer.begin(), end);
281 278
282 NGLOG_DEBUG(Service_FS, "called file {}", name); 279 NGLOG_DEBUG(Service_FS, "called file {}", name);
283 280
@@ -289,9 +286,7 @@ public:
289 IPC::RequestParser rp{ctx}; 286 IPC::RequestParser rp{ctx};
290 287
291 auto file_buffer = ctx.ReadBuffer(); 288 auto file_buffer = ctx.ReadBuffer();
292 auto end = std::find(file_buffer.begin(), file_buffer.end(), '\0'); 289 std::string name = Common::StringFromBuffer(file_buffer);
293
294 std::string name(file_buffer.begin(), end);
295 290
296 NGLOG_DEBUG(Service_FS, "called directory {}", name); 291 NGLOG_DEBUG(Service_FS, "called directory {}", name);
297 292
@@ -305,13 +300,11 @@ public:
305 std::vector<u8> buffer; 300 std::vector<u8> buffer;
306 buffer.resize(ctx.BufferDescriptorX()[0].Size()); 301 buffer.resize(ctx.BufferDescriptorX()[0].Size());
307 Memory::ReadBlock(ctx.BufferDescriptorX()[0].Address(), buffer.data(), buffer.size()); 302 Memory::ReadBlock(ctx.BufferDescriptorX()[0].Address(), buffer.data(), buffer.size());
308 auto end = std::find(buffer.begin(), buffer.end(), '\0'); 303 std::string src_name = Common::StringFromBuffer(buffer);
309 std::string src_name(buffer.begin(), end);
310 304
311 buffer.resize(ctx.BufferDescriptorX()[1].Size()); 305 buffer.resize(ctx.BufferDescriptorX()[1].Size());
312 Memory::ReadBlock(ctx.BufferDescriptorX()[1].Address(), buffer.data(), buffer.size()); 306 Memory::ReadBlock(ctx.BufferDescriptorX()[1].Address(), buffer.data(), buffer.size());
313 end = std::find(buffer.begin(), buffer.end(), '\0'); 307 std::string dst_name = Common::StringFromBuffer(buffer);
314 std::string dst_name(buffer.begin(), end);
315 308
316 NGLOG_DEBUG(Service_FS, "called file '{}' to file '{}'", src_name, dst_name); 309 NGLOG_DEBUG(Service_FS, "called file '{}' to file '{}'", src_name, dst_name);
317 310
@@ -323,9 +316,7 @@ public:
323 IPC::RequestParser rp{ctx}; 316 IPC::RequestParser rp{ctx};
324 317
325 auto file_buffer = ctx.ReadBuffer(); 318 auto file_buffer = ctx.ReadBuffer();
326 auto end = std::find(file_buffer.begin(), file_buffer.end(), '\0'); 319 std::string name = Common::StringFromBuffer(file_buffer);
327
328 std::string name(file_buffer.begin(), end);
329 320
330 auto mode = static_cast<FileSys::Mode>(rp.Pop<u32>()); 321 auto mode = static_cast<FileSys::Mode>(rp.Pop<u32>());
331 322
@@ -349,9 +340,7 @@ public:
349 IPC::RequestParser rp{ctx}; 340 IPC::RequestParser rp{ctx};
350 341
351 auto file_buffer = ctx.ReadBuffer(); 342 auto file_buffer = ctx.ReadBuffer();
352 auto end = std::find(file_buffer.begin(), file_buffer.end(), '\0'); 343 std::string name = Common::StringFromBuffer(file_buffer);
353
354 std::string name(file_buffer.begin(), end);
355 344
356 // TODO(Subv): Implement this filter. 345 // TODO(Subv): Implement this filter.
357 u32 filter_flags = rp.Pop<u32>(); 346 u32 filter_flags = rp.Pop<u32>();
@@ -376,9 +365,7 @@ public:
376 IPC::RequestParser rp{ctx}; 365 IPC::RequestParser rp{ctx};
377 366
378 auto file_buffer = ctx.ReadBuffer(); 367 auto file_buffer = ctx.ReadBuffer();
379 auto end = std::find(file_buffer.begin(), file_buffer.end(), '\0'); 368 std::string name = Common::StringFromBuffer(file_buffer);
380
381 std::string name(file_buffer.begin(), end);
382 369
383 NGLOG_DEBUG(Service_FS, "called file {}", name); 370 NGLOG_DEBUG(Service_FS, "called file {}", name);
384 371
diff --git a/src/core/hle/service/hid/hid.cpp b/src/core/hle/service/hid/hid.cpp
index 00c5308ba..2696a8bf0 100644
--- a/src/core/hle/service/hid/hid.cpp
+++ b/src/core/hle/service/hid/hid.cpp
@@ -84,6 +84,10 @@ private:
84 84
85 for (size_t controller = 0; controller < mem.controllers.size(); controller++) { 85 for (size_t controller = 0; controller < mem.controllers.size(); controller++) {
86 for (int index = 0; index < HID_NUM_LAYOUTS; index++) { 86 for (int index = 0; index < HID_NUM_LAYOUTS; index++) {
87 // TODO(DarkLordZach): Is this layout/controller config actually invalid?
88 if (controller == Controller_Handheld && index == Layout_Single)
89 continue;
90
87 ControllerLayout& layout = mem.controllers[controller].layouts[index]; 91 ControllerLayout& layout = mem.controllers[controller].layouts[index];
88 layout.header.num_entries = HID_NUM_ENTRIES; 92 layout.header.num_entries = HID_NUM_ENTRIES;
89 layout.header.max_entry_index = HID_NUM_ENTRIES - 1; 93 layout.header.max_entry_index = HID_NUM_ENTRIES - 1;
@@ -94,7 +98,6 @@ private:
94 layout.header.latest_entry = (layout.header.latest_entry + 1) % HID_NUM_ENTRIES; 98 layout.header.latest_entry = (layout.header.latest_entry + 1) % HID_NUM_ENTRIES;
95 99
96 ControllerInputEntry& entry = layout.entries[layout.header.latest_entry]; 100 ControllerInputEntry& entry = layout.entries[layout.header.latest_entry];
97 entry.connection_state = ConnectionState_Connected | ConnectionState_Wired;
98 entry.timestamp++; 101 entry.timestamp++;
99 // TODO(shinyquagsire23): Is this always identical to timestamp? 102 // TODO(shinyquagsire23): Is this always identical to timestamp?
100 entry.timestamp_2++; 103 entry.timestamp_2++;
@@ -103,6 +106,8 @@ private:
103 if (controller != Controller_Handheld) 106 if (controller != Controller_Handheld)
104 continue; 107 continue;
105 108
109 entry.connection_state = ConnectionState_Connected | ConnectionState_Wired;
110
106 // TODO(shinyquagsire23): Set up some LUTs for each layout mapping in the future? 111 // TODO(shinyquagsire23): Set up some LUTs for each layout mapping in the future?
107 // For now everything is just the default handheld layout, but split Joy-Con will 112 // For now everything is just the default handheld layout, but split Joy-Con will
108 // rotate the face buttons and directions for certain layouts. 113 // rotate the face buttons and directions for certain layouts.
diff --git a/src/core/hle/service/hid/hid.h b/src/core/hle/service/hid/hid.h
index 15eee8f01..b499308d6 100644
--- a/src/core/hle/service/hid/hid.h
+++ b/src/core/hle/service/hid/hid.h
@@ -12,7 +12,7 @@ namespace Service::HID {
12// Begin enums and output structs 12// Begin enums and output structs
13 13
14constexpr u32 HID_NUM_ENTRIES = 17; 14constexpr u32 HID_NUM_ENTRIES = 17;
15constexpr u32 HID_NUM_LAYOUTS = 2; 15constexpr u32 HID_NUM_LAYOUTS = 7;
16constexpr s32 HID_JOYSTICK_MAX = 0x8000; 16constexpr s32 HID_JOYSTICK_MAX = 0x8000;
17constexpr s32 HID_JOYSTICK_MIN = -0x8000; 17constexpr s32 HID_JOYSTICK_MIN = -0x8000;
18 18
diff --git a/src/core/hle/service/mm/mm_u.cpp b/src/core/hle/service/mm/mm_u.cpp
new file mode 100644
index 000000000..b3a85b818
--- /dev/null
+++ b/src/core/hle/service/mm/mm_u.cpp
@@ -0,0 +1,50 @@
1// Copyright 2018 yuzu emulator team
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/logging/log.h"
6#include "core/hle/ipc_helpers.h"
7#include "core/hle/kernel/client_session.h"
8#include "core/hle/service/mm/mm_u.h"
9
10namespace Service::MM {
11
12void InstallInterfaces(SM::ServiceManager& service_manager) {
13 std::make_shared<MM_U>()->InstallAsService(service_manager);
14}
15
16void MM_U::Initialize(Kernel::HLERequestContext& ctx) {
17 NGLOG_WARNING(Service_MM, "(STUBBED) called");
18 IPC::ResponseBuilder rb{ctx, 2};
19 rb.Push(RESULT_SUCCESS);
20}
21
22void MM_U::SetAndWait(Kernel::HLERequestContext& ctx) {
23 IPC::RequestParser rp{ctx};
24 min = rp.Pop<u32>();
25 max = rp.Pop<u32>();
26 current = min;
27
28 NGLOG_WARNING(Service_MM, "(STUBBED) called, min=0x{:X}, max=0x{:X}", min, max);
29 IPC::ResponseBuilder rb{ctx, 2};
30 rb.Push(RESULT_SUCCESS);
31}
32
33void MM_U::Get(Kernel::HLERequestContext& ctx) {
34 NGLOG_WARNING(Service_MM, "(STUBBED) called");
35 IPC::ResponseBuilder rb{ctx, 3};
36 rb.Push(RESULT_SUCCESS);
37 rb.Push(current);
38}
39
40MM_U::MM_U() : ServiceFramework("mm:u") {
41 static const FunctionInfo functions[] = {
42 {0, nullptr, "InitializeOld"}, {1, nullptr, "FinalizeOld"},
43 {2, nullptr, "SetAndWaitOld"}, {3, nullptr, "GetOld"},
44 {4, &MM_U::Initialize, "Initialize"}, {5, nullptr, "Finalize"},
45 {6, &MM_U::SetAndWait, "SetAndWait"}, {7, &MM_U::Get, "Get"},
46 };
47 RegisterHandlers(functions);
48}
49
50} // namespace Service::MM
diff --git a/src/core/hle/service/mm/mm_u.h b/src/core/hle/service/mm/mm_u.h
new file mode 100644
index 000000000..79eeedf9c
--- /dev/null
+++ b/src/core/hle/service/mm/mm_u.h
@@ -0,0 +1,29 @@
1// Copyright 2018 yuzu emulator team
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "core/hle/service/service.h"
8
9namespace Service::MM {
10
11class MM_U final : public ServiceFramework<MM_U> {
12public:
13 MM_U();
14 ~MM_U() = default;
15
16private:
17 void Initialize(Kernel::HLERequestContext& ctx);
18 void SetAndWait(Kernel::HLERequestContext& ctx);
19 void Get(Kernel::HLERequestContext& ctx);
20
21 u32 min{0};
22 u32 max{0};
23 u32 current{0};
24};
25
26/// Registers all MM services with the specified service manager.
27void InstallInterfaces(SM::ServiceManager& service_manager);
28
29} // namespace Service::MM
diff --git a/src/core/hle/service/nfp/nfp.cpp b/src/core/hle/service/nfp/nfp.cpp
index 2af4465de..2a9f84037 100644
--- a/src/core/hle/service/nfp/nfp.cpp
+++ b/src/core/hle/service/nfp/nfp.cpp
@@ -4,6 +4,8 @@
4 4
5#include "common/logging/log.h" 5#include "common/logging/log.h"
6#include "core/hle/ipc_helpers.h" 6#include "core/hle/ipc_helpers.h"
7#include "core/hle/kernel/event.h"
8#include "core/hle/service/hid/hid.h"
7#include "core/hle/service/nfp/nfp.h" 9#include "core/hle/service/nfp/nfp.h"
8#include "core/hle/service/nfp/nfp_user.h" 10#include "core/hle/service/nfp/nfp_user.h"
9 11
@@ -18,7 +20,7 @@ public:
18 static const FunctionInfo functions[] = { 20 static const FunctionInfo functions[] = {
19 {0, &IUser::Initialize, "Initialize"}, 21 {0, &IUser::Initialize, "Initialize"},
20 {1, nullptr, "Finalize"}, 22 {1, nullptr, "Finalize"},
21 {2, nullptr, "ListDevices"}, 23 {2, &IUser::ListDevices, "ListDevices"},
22 {3, nullptr, "StartDetection"}, 24 {3, nullptr, "StartDetection"},
23 {4, nullptr, "StopDetection"}, 25 {4, nullptr, "StopDetection"},
24 {5, nullptr, "Mount"}, 26 {5, nullptr, "Mount"},
@@ -33,24 +35,116 @@ public:
33 {14, nullptr, "GetRegisterInfo"}, 35 {14, nullptr, "GetRegisterInfo"},
34 {15, nullptr, "GetCommonInfo"}, 36 {15, nullptr, "GetCommonInfo"},
35 {16, nullptr, "GetModelInfo"}, 37 {16, nullptr, "GetModelInfo"},
36 {17, nullptr, "AttachActivateEvent"}, 38 {17, &IUser::AttachActivateEvent, "AttachActivateEvent"},
37 {18, nullptr, "AttachDeactivateEvent"}, 39 {18, &IUser::AttachDeactivateEvent, "AttachDeactivateEvent"},
38 {19, nullptr, "GetState"}, 40 {19, &IUser::GetState, "GetState"},
39 {20, nullptr, "GetDeviceState"}, 41 {20, &IUser::GetDeviceState, "GetDeviceState"},
40 {21, nullptr, "GetNpadId"}, 42 {21, &IUser::GetNpadId, "GetNpadId"},
41 {22, nullptr, "GetApplicationArea2"}, 43 {22, nullptr, "GetApplicationArea2"},
42 {23, nullptr, "AttachAvailabilityChangeEvent"}, 44 {23, &IUser::AttachAvailabilityChangeEvent, "AttachAvailabilityChangeEvent"},
43 {24, nullptr, "RecreateApplicationArea"}, 45 {24, nullptr, "RecreateApplicationArea"},
44 }; 46 };
45 RegisterHandlers(functions); 47 RegisterHandlers(functions);
48
49 activate_event = Kernel::Event::Create(Kernel::ResetType::OneShot, "IUser:ActivateEvent");
50 deactivate_event =
51 Kernel::Event::Create(Kernel::ResetType::OneShot, "IUser:DeactivateEvent");
52 availability_change_event =
53 Kernel::Event::Create(Kernel::ResetType::OneShot, "IUser:AvailabilityChangeEvent");
46 } 54 }
47 55
48private: 56private:
57 enum class State : u32 {
58 NonInitialized = 0,
59 Initialized = 1,
60 };
61
62 enum class DeviceState : u32 {
63 Initialized = 0,
64 };
65
49 void Initialize(Kernel::HLERequestContext& ctx) { 66 void Initialize(Kernel::HLERequestContext& ctx) {
50 NGLOG_WARNING(Service_NFP, "(STUBBED) called"); 67 NGLOG_WARNING(Service_NFP, "(STUBBED) called");
68
69 state = State::Initialized;
70
51 IPC::ResponseBuilder rb{ctx, 2}; 71 IPC::ResponseBuilder rb{ctx, 2};
52 rb.Push(RESULT_SUCCESS); 72 rb.Push(RESULT_SUCCESS);
53 } 73 }
74
75 void ListDevices(Kernel::HLERequestContext& ctx) {
76 IPC::RequestParser rp{ctx};
77 const u32 array_size = rp.Pop<u32>();
78
79 ctx.WriteBuffer(&device_handle, sizeof(device_handle));
80
81 NGLOG_WARNING(Service_NFP, "(STUBBED) called, array_size={}", array_size);
82
83 IPC::ResponseBuilder rb{ctx, 3};
84 rb.Push(RESULT_SUCCESS);
85 rb.Push<u32>(0);
86 }
87
88 void AttachActivateEvent(Kernel::HLERequestContext& ctx) {
89 IPC::RequestParser rp{ctx};
90 const u64 dev_handle = rp.Pop<u64>();
91 NGLOG_WARNING(Service_NFP, "(STUBBED) called, dev_handle=0x{:X}", dev_handle);
92
93 IPC::ResponseBuilder rb{ctx, 2, 1};
94 rb.Push(RESULT_SUCCESS);
95 rb.PushCopyObjects(activate_event);
96 }
97
98 void AttachDeactivateEvent(Kernel::HLERequestContext& ctx) {
99 IPC::RequestParser rp{ctx};
100 const u64 dev_handle = rp.Pop<u64>();
101 NGLOG_WARNING(Service_NFP, "(STUBBED) called, dev_handle=0x{:X}", dev_handle);
102
103 IPC::ResponseBuilder rb{ctx, 2, 1};
104 rb.Push(RESULT_SUCCESS);
105 rb.PushCopyObjects(deactivate_event);
106 }
107
108 void GetState(Kernel::HLERequestContext& ctx) {
109 NGLOG_WARNING(Service_NFP, "(STUBBED) called");
110 IPC::ResponseBuilder rb{ctx, 3};
111 rb.Push(RESULT_SUCCESS);
112 rb.Push<u32>(static_cast<u32>(state));
113 }
114
115 void GetDeviceState(Kernel::HLERequestContext& ctx) {
116 NGLOG_WARNING(Service_NFP, "(STUBBED) called");
117 IPC::ResponseBuilder rb{ctx, 3};
118 rb.Push(RESULT_SUCCESS);
119 rb.Push<u32>(static_cast<u32>(device_state));
120 }
121
122 void GetNpadId(Kernel::HLERequestContext& ctx) {
123 IPC::RequestParser rp{ctx};
124 const u64 dev_handle = rp.Pop<u64>();
125 NGLOG_WARNING(Service_NFP, "(STUBBED) called, dev_handle=0x{:X}", dev_handle);
126 IPC::ResponseBuilder rb{ctx, 3};
127 rb.Push(RESULT_SUCCESS);
128 rb.Push<u32>(npad_id);
129 }
130
131 void AttachAvailabilityChangeEvent(Kernel::HLERequestContext& ctx) {
132 IPC::RequestParser rp{ctx};
133 const u64 dev_handle = rp.Pop<u64>();
134 NGLOG_WARNING(Service_NFP, "(STUBBED) called, dev_handle=0x{:X}", dev_handle);
135
136 IPC::ResponseBuilder rb{ctx, 2, 1};
137 rb.Push(RESULT_SUCCESS);
138 rb.PushCopyObjects(availability_change_event);
139 }
140
141 const u64 device_handle{0xDEAD};
142 const HID::ControllerID npad_id{HID::Controller_Player1};
143 State state{State::NonInitialized};
144 DeviceState device_state{DeviceState::Initialized};
145 Kernel::SharedPtr<Kernel::Event> activate_event;
146 Kernel::SharedPtr<Kernel::Event> deactivate_event;
147 Kernel::SharedPtr<Kernel::Event> availability_change_event;
54}; 148};
55 149
56void Module::Interface::CreateUserInterface(Kernel::HLERequestContext& ctx) { 150void Module::Interface::CreateUserInterface(Kernel::HLERequestContext& ctx) {
diff --git a/src/core/hle/service/nifm/nifm.cpp b/src/core/hle/service/nifm/nifm.cpp
index eee92cfcd..62489c7fe 100644
--- a/src/core/hle/service/nifm/nifm.cpp
+++ b/src/core/hle/service/nifm/nifm.cpp
@@ -38,7 +38,7 @@ public:
38 {8, nullptr, "SetPriority"}, 38 {8, nullptr, "SetPriority"},
39 {9, nullptr, "SetNetworkProfileId"}, 39 {9, nullptr, "SetNetworkProfileId"},
40 {10, nullptr, "SetRejectable"}, 40 {10, nullptr, "SetRejectable"},
41 {11, nullptr, "SetConnectionConfirmationOption"}, 41 {11, &IRequest::SetConnectionConfirmationOption, "SetConnectionConfirmationOption"},
42 {12, nullptr, "SetPersistent"}, 42 {12, nullptr, "SetPersistent"},
43 {13, nullptr, "SetInstant"}, 43 {13, nullptr, "SetInstant"},
44 {14, nullptr, "SetSustainable"}, 44 {14, nullptr, "SetSustainable"},
@@ -67,23 +67,32 @@ private:
67 rb.Push(RESULT_SUCCESS); 67 rb.Push(RESULT_SUCCESS);
68 rb.Push<u32>(0); 68 rb.Push<u32>(0);
69 } 69 }
70
70 void GetResult(Kernel::HLERequestContext& ctx) { 71 void GetResult(Kernel::HLERequestContext& ctx) {
71 NGLOG_WARNING(Service_NIFM, "(STUBBED) called"); 72 NGLOG_WARNING(Service_NIFM, "(STUBBED) called");
72 IPC::ResponseBuilder rb{ctx, 2}; 73 IPC::ResponseBuilder rb{ctx, 2};
73 rb.Push(RESULT_SUCCESS); 74 rb.Push(RESULT_SUCCESS);
74 } 75 }
76
75 void GetSystemEventReadableHandles(Kernel::HLERequestContext& ctx) { 77 void GetSystemEventReadableHandles(Kernel::HLERequestContext& ctx) {
76 NGLOG_WARNING(Service_NIFM, "(STUBBED) called"); 78 NGLOG_WARNING(Service_NIFM, "(STUBBED) called");
77 IPC::ResponseBuilder rb{ctx, 2, 2}; 79 IPC::ResponseBuilder rb{ctx, 2, 2};
78 rb.Push(RESULT_SUCCESS); 80 rb.Push(RESULT_SUCCESS);
79 rb.PushCopyObjects(event1, event2); 81 rb.PushCopyObjects(event1, event2);
80 } 82 }
83
81 void Cancel(Kernel::HLERequestContext& ctx) { 84 void Cancel(Kernel::HLERequestContext& ctx) {
82 NGLOG_WARNING(Service_NIFM, "(STUBBED) called"); 85 NGLOG_WARNING(Service_NIFM, "(STUBBED) called");
83 IPC::ResponseBuilder rb{ctx, 2}; 86 IPC::ResponseBuilder rb{ctx, 2};
84 rb.Push(RESULT_SUCCESS); 87 rb.Push(RESULT_SUCCESS);
85 } 88 }
86 89
90 void SetConnectionConfirmationOption(Kernel::HLERequestContext& ctx) {
91 NGLOG_WARNING(Service_NIFM, "(STUBBED) called");
92 IPC::ResponseBuilder rb{ctx, 2};
93 rb.Push(RESULT_SUCCESS);
94 }
95
87 Kernel::SharedPtr<Kernel::Event> event1, event2; 96 Kernel::SharedPtr<Kernel::Event> event1, event2;
88}; 97};
89 98
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp
index a9538ff43..0abc0de83 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp
@@ -26,6 +26,10 @@ u32 nvhost_ctrl_gpu::ioctl(Ioctl command, const std::vector<u8>& input, std::vec
26 return ZCullGetInfo(input, output); 26 return ZCullGetInfo(input, output);
27 case IoctlCommand::IocZbcSetTable: 27 case IoctlCommand::IocZbcSetTable:
28 return ZBCSetTable(input, output); 28 return ZBCSetTable(input, output);
29 case IoctlCommand::IocZbcQueryTable:
30 return ZBCQueryTable(input, output);
31 case IoctlCommand::IocFlushL2:
32 return FlushL2(input, output);
29 } 33 }
30 UNIMPLEMENTED_MSG("Unimplemented ioctl"); 34 UNIMPLEMENTED_MSG("Unimplemented ioctl");
31 return 0; 35 return 0;
@@ -136,4 +140,22 @@ u32 nvhost_ctrl_gpu::ZBCSetTable(const std::vector<u8>& input, std::vector<u8>&
136 return 0; 140 return 0;
137} 141}
138 142
143u32 nvhost_ctrl_gpu::ZBCQueryTable(const std::vector<u8>& input, std::vector<u8>& output) {
144 NGLOG_WARNING(Service_NVDRV, "(STUBBED) called");
145 IoctlZbcQueryTable params{};
146 std::memcpy(&params, input.data(), input.size());
147 // TODO : To implement properly
148 std::memcpy(output.data(), &params, output.size());
149 return 0;
150}
151
152u32 nvhost_ctrl_gpu::FlushL2(const std::vector<u8>& input, std::vector<u8>& output) {
153 NGLOG_WARNING(Service_NVDRV, "(STUBBED) called");
154 IoctlFlushL2 params{};
155 std::memcpy(&params, input.data(), input.size());
156 // TODO : To implement properly
157 std::memcpy(output.data(), &params, output.size());
158 return 0;
159}
160
139} // namespace Service::Nvidia::Devices 161} // namespace Service::Nvidia::Devices
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h
index 1d5ba2e67..f09113e67 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h
@@ -26,6 +26,18 @@ private:
26 IocZcullGetCtxSizeCommand = 0x80044701, 26 IocZcullGetCtxSizeCommand = 0x80044701,
27 IocZcullGetInfo = 0x80284702, 27 IocZcullGetInfo = 0x80284702,
28 IocZbcSetTable = 0x402C4703, 28 IocZbcSetTable = 0x402C4703,
29 IocZbcQueryTable = 0xC0344704,
30 IocFlushL2 = 0x40084707,
31 IocInvalICache = 0x4008470D,
32 IocSetMmudebugMode = 0x4008470E,
33 IocSetSmDebugMode = 0x4010470F,
34 IocWaitForPause = 0xC0084710,
35 IocGetTcpExceptionEnStatus = 0x80084711,
36 IocNumVsms = 0x80084712,
37 IocVsmsMapping = 0xC0044713,
38 IocGetErrorChannelUserData = 0xC008471B,
39 IocGetGpuTime = 0xC010471C,
40 IocGetCpuTimeCorrelationInfo = 0xC108471D,
29 }; 41 };
30 42
31 struct IoctlGpuCharacteristics { 43 struct IoctlGpuCharacteristics {
@@ -127,12 +139,31 @@ private:
127 }; 139 };
128 static_assert(sizeof(IoctlZbcSetTable) == 44, "IoctlZbcSetTable is incorrect size"); 140 static_assert(sizeof(IoctlZbcSetTable) == 44, "IoctlZbcSetTable is incorrect size");
129 141
142 struct IoctlZbcQueryTable {
143 u32_le color_ds[4];
144 u32_le color_l2[4];
145 u32_le depth;
146 u32_le ref_cnt;
147 u32_le format;
148 u32_le type;
149 u32_le index_size;
150 };
151 static_assert(sizeof(IoctlZbcQueryTable) == 52, "IoctlZbcQueryTable is incorrect size");
152
153 struct IoctlFlushL2 {
154 u32_le flush; // l2_flush | l2_invalidate << 1 | fb_flush << 2
155 u32_le reserved;
156 };
157 static_assert(sizeof(IoctlFlushL2) == 8, "IoctlFlushL2 is incorrect size");
158
130 u32 GetCharacteristics(const std::vector<u8>& input, std::vector<u8>& output); 159 u32 GetCharacteristics(const std::vector<u8>& input, std::vector<u8>& output);
131 u32 GetTPCMasks(const std::vector<u8>& input, std::vector<u8>& output); 160 u32 GetTPCMasks(const std::vector<u8>& input, std::vector<u8>& output);
132 u32 GetActiveSlotMask(const std::vector<u8>& input, std::vector<u8>& output); 161 u32 GetActiveSlotMask(const std::vector<u8>& input, std::vector<u8>& output);
133 u32 ZCullGetCtxSize(const std::vector<u8>& input, std::vector<u8>& output); 162 u32 ZCullGetCtxSize(const std::vector<u8>& input, std::vector<u8>& output);
134 u32 ZCullGetInfo(const std::vector<u8>& input, std::vector<u8>& output); 163 u32 ZCullGetInfo(const std::vector<u8>& input, std::vector<u8>& output);
135 u32 ZBCSetTable(const std::vector<u8>& input, std::vector<u8>& output); 164 u32 ZBCSetTable(const std::vector<u8>& input, std::vector<u8>& output);
165 u32 ZBCQueryTable(const std::vector<u8>& input, std::vector<u8>& output);
166 u32 FlushL2(const std::vector<u8>& input, std::vector<u8>& output);
136}; 167};
137 168
138} // namespace Service::Nvidia::Devices 169} // namespace Service::Nvidia::Devices
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
index 79aab87f9..ed7b6dc03 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
@@ -121,8 +121,9 @@ u32 nvhost_gpu::AllocateObjectContext(const std::vector<u8>& input, std::vector<
121} 121}
122 122
123u32 nvhost_gpu::SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& output) { 123u32 nvhost_gpu::SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& output) {
124 if (input.size() < sizeof(IoctlSubmitGpfifo)) 124 if (input.size() < sizeof(IoctlSubmitGpfifo)) {
125 UNIMPLEMENTED(); 125 UNIMPLEMENTED();
126 }
126 IoctlSubmitGpfifo params{}; 127 IoctlSubmitGpfifo params{};
127 std::memcpy(&params, input.data(), sizeof(IoctlSubmitGpfifo)); 128 std::memcpy(&params, input.data(), sizeof(IoctlSubmitGpfifo));
128 NGLOG_WARNING(Service_NVDRV, "(STUBBED) called, gpfifo={:X}, num_entries={:X}, flags={:X}", 129 NGLOG_WARNING(Service_NVDRV, "(STUBBED) called, gpfifo={:X}, num_entries={:X}, flags={:X}",
diff --git a/src/core/hle/service/service.cpp b/src/core/hle/service/service.cpp
index 409fec470..bdd9eb5a5 100644
--- a/src/core/hle/service/service.cpp
+++ b/src/core/hle/service/service.cpp
@@ -26,6 +26,7 @@
26#include "core/hle/service/friend/friend.h" 26#include "core/hle/service/friend/friend.h"
27#include "core/hle/service/hid/hid.h" 27#include "core/hle/service/hid/hid.h"
28#include "core/hle/service/lm/lm.h" 28#include "core/hle/service/lm/lm.h"
29#include "core/hle/service/mm/mm_u.h"
29#include "core/hle/service/nfp/nfp.h" 30#include "core/hle/service/nfp/nfp.h"
30#include "core/hle/service/nifm/nifm.h" 31#include "core/hle/service/nifm/nifm.h"
31#include "core/hle/service/ns/ns.h" 32#include "core/hle/service/ns/ns.h"
@@ -191,6 +192,7 @@ void Init(std::shared_ptr<SM::ServiceManager>& sm) {
191 Friend::InstallInterfaces(*sm); 192 Friend::InstallInterfaces(*sm);
192 HID::InstallInterfaces(*sm); 193 HID::InstallInterfaces(*sm);
193 LM::InstallInterfaces(*sm); 194 LM::InstallInterfaces(*sm);
195 MM::InstallInterfaces(*sm);
194 NFP::InstallInterfaces(*sm); 196 NFP::InstallInterfaces(*sm);
195 NIFM::InstallInterfaces(*sm); 197 NIFM::InstallInterfaces(*sm);
196 NS::InstallInterfaces(*sm); 198 NS::InstallInterfaces(*sm);
diff --git a/src/core/hle/service/set/set.cpp b/src/core/hle/service/set/set.cpp
index f0572bed6..baeecb0ec 100644
--- a/src/core/hle/service/set/set.cpp
+++ b/src/core/hle/service/set/set.cpp
@@ -12,9 +12,6 @@
12namespace Service::Set { 12namespace Service::Set {
13 13
14void SET::GetAvailableLanguageCodes(Kernel::HLERequestContext& ctx) { 14void SET::GetAvailableLanguageCodes(Kernel::HLERequestContext& ctx) {
15 IPC::RequestParser rp{ctx};
16 u32 id = rp.Pop<u32>();
17
18 static constexpr std::array<LanguageCode, 17> available_language_codes = {{ 15 static constexpr std::array<LanguageCode, 17> available_language_codes = {{
19 LanguageCode::JA, 16 LanguageCode::JA,
20 LanguageCode::EN_US, 17 LanguageCode::EN_US,
@@ -50,7 +47,7 @@ SET::SET() : ServiceFramework("set") {
50 {2, nullptr, "MakeLanguageCode"}, 47 {2, nullptr, "MakeLanguageCode"},
51 {3, nullptr, "GetAvailableLanguageCodeCount"}, 48 {3, nullptr, "GetAvailableLanguageCodeCount"},
52 {4, nullptr, "GetRegionCode"}, 49 {4, nullptr, "GetRegionCode"},
53 {5, nullptr, "GetAvailableLanguageCodes2"}, 50 {5, &SET::GetAvailableLanguageCodes, "GetAvailableLanguageCodes2"},
54 {6, nullptr, "GetAvailableLanguageCodeCount2"}, 51 {6, nullptr, "GetAvailableLanguageCodeCount2"},
55 {7, nullptr, "GetKeyCodeMap"}, 52 {7, nullptr, "GetKeyCodeMap"},
56 {8, nullptr, "GetQuestFlag"}, 53 {8, nullptr, "GetQuestFlag"},
diff --git a/src/core/loader/loader.cpp b/src/core/loader/loader.cpp
index 6a4fd38cb..20cc0bac0 100644
--- a/src/core/loader/loader.cpp
+++ b/src/core/loader/loader.cpp
@@ -9,6 +9,7 @@
9#include "core/hle/kernel/process.h" 9#include "core/hle/kernel/process.h"
10#include "core/loader/deconstructed_rom_directory.h" 10#include "core/loader/deconstructed_rom_directory.h"
11#include "core/loader/elf.h" 11#include "core/loader/elf.h"
12#include "core/loader/nca.h"
12#include "core/loader/nro.h" 13#include "core/loader/nro.h"
13#include "core/loader/nso.h" 14#include "core/loader/nso.h"
14 15
@@ -32,6 +33,7 @@ FileType IdentifyFile(FileUtil::IOFile& file, const std::string& filepath) {
32 CHECK_TYPE(ELF) 33 CHECK_TYPE(ELF)
33 CHECK_TYPE(NSO) 34 CHECK_TYPE(NSO)
34 CHECK_TYPE(NRO) 35 CHECK_TYPE(NRO)
36 CHECK_TYPE(NCA)
35 37
36#undef CHECK_TYPE 38#undef CHECK_TYPE
37 39
@@ -57,6 +59,8 @@ FileType GuessFromExtension(const std::string& extension_) {
57 return FileType::NRO; 59 return FileType::NRO;
58 else if (extension == ".nso") 60 else if (extension == ".nso")
59 return FileType::NSO; 61 return FileType::NSO;
62 else if (extension == ".nca")
63 return FileType::NCA;
60 64
61 return FileType::Unknown; 65 return FileType::Unknown;
62} 66}
@@ -69,6 +73,8 @@ const char* GetFileTypeString(FileType type) {
69 return "NRO"; 73 return "NRO";
70 case FileType::NSO: 74 case FileType::NSO:
71 return "NSO"; 75 return "NSO";
76 case FileType::NCA:
77 return "NCA";
72 case FileType::DeconstructedRomDirectory: 78 case FileType::DeconstructedRomDirectory:
73 return "Directory"; 79 return "Directory";
74 case FileType::Error: 80 case FileType::Error:
@@ -104,6 +110,10 @@ static std::unique_ptr<AppLoader> GetFileLoader(FileUtil::IOFile&& file, FileTyp
104 case FileType::NRO: 110 case FileType::NRO:
105 return std::make_unique<AppLoader_NRO>(std::move(file), filepath); 111 return std::make_unique<AppLoader_NRO>(std::move(file), filepath);
106 112
113 // NX NCA file format.
114 case FileType::NCA:
115 return std::make_unique<AppLoader_NCA>(std::move(file), filepath);
116
107 // NX deconstructed ROM directory. 117 // NX deconstructed ROM directory.
108 case FileType::DeconstructedRomDirectory: 118 case FileType::DeconstructedRomDirectory:
109 return std::make_unique<AppLoader_DeconstructedRomDirectory>(std::move(file), filepath); 119 return std::make_unique<AppLoader_DeconstructedRomDirectory>(std::move(file), filepath);
diff --git a/src/core/loader/loader.h b/src/core/loader/loader.h
index b1aabb1cb..b76f7b13d 100644
--- a/src/core/loader/loader.h
+++ b/src/core/loader/loader.h
@@ -29,6 +29,7 @@ enum class FileType {
29 ELF, 29 ELF,
30 NSO, 30 NSO,
31 NRO, 31 NRO,
32 NCA,
32 DeconstructedRomDirectory, 33 DeconstructedRomDirectory,
33}; 34};
34 35
diff --git a/src/core/loader/nca.cpp b/src/core/loader/nca.cpp
new file mode 100644
index 000000000..067945d46
--- /dev/null
+++ b/src/core/loader/nca.cpp
@@ -0,0 +1,303 @@
1// Copyright 2018 yuzu emulator team
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <vector>
6
7#include "common/common_funcs.h"
8#include "common/file_util.h"
9#include "common/logging/log.h"
10#include "common/swap.h"
11#include "core/core.h"
12#include "core/file_sys/program_metadata.h"
13#include "core/file_sys/romfs_factory.h"
14#include "core/hle/kernel/process.h"
15#include "core/hle/kernel/resource_limit.h"
16#include "core/hle/service/filesystem/filesystem.h"
17#include "core/loader/nca.h"
18#include "core/loader/nso.h"
19#include "core/memory.h"
20
21namespace Loader {
22
23// Media offsets in headers are stored divided by 512. Mult. by this to get real offset.
24constexpr u64 MEDIA_OFFSET_MULTIPLIER = 0x200;
25
26constexpr u64 SECTION_HEADER_SIZE = 0x200;
27constexpr u64 SECTION_HEADER_OFFSET = 0x400;
28
29enum class NcaContentType : u8 { Program = 0, Meta = 1, Control = 2, Manual = 3, Data = 4 };
30
31enum class NcaSectionFilesystemType : u8 { PFS0 = 0x2, ROMFS = 0x3 };
32
33struct NcaSectionTableEntry {
34 u32_le media_offset;
35 u32_le media_end_offset;
36 INSERT_PADDING_BYTES(0x8);
37};
38static_assert(sizeof(NcaSectionTableEntry) == 0x10, "NcaSectionTableEntry has incorrect size.");
39
40struct NcaHeader {
41 std::array<u8, 0x100> rsa_signature_1;
42 std::array<u8, 0x100> rsa_signature_2;
43 u32_le magic;
44 u8 is_system;
45 NcaContentType content_type;
46 u8 crypto_type;
47 u8 key_index;
48 u64_le size;
49 u64_le title_id;
50 INSERT_PADDING_BYTES(0x4);
51 u32_le sdk_version;
52 u8 crypto_type_2;
53 INSERT_PADDING_BYTES(15);
54 std::array<u8, 0x10> rights_id;
55 std::array<NcaSectionTableEntry, 0x4> section_tables;
56 std::array<std::array<u8, 0x20>, 0x4> hash_tables;
57 std::array<std::array<u8, 0x10>, 0x4> key_area;
58 INSERT_PADDING_BYTES(0xC0);
59};
60static_assert(sizeof(NcaHeader) == 0x400, "NcaHeader has incorrect size.");
61
62struct NcaSectionHeaderBlock {
63 INSERT_PADDING_BYTES(3);
64 NcaSectionFilesystemType filesystem_type;
65 u8 crypto_type;
66 INSERT_PADDING_BYTES(3);
67};
68static_assert(sizeof(NcaSectionHeaderBlock) == 0x8, "NcaSectionHeaderBlock has incorrect size.");
69
70struct Pfs0Superblock {
71 NcaSectionHeaderBlock header_block;
72 std::array<u8, 0x20> hash;
73 u32_le size;
74 INSERT_PADDING_BYTES(4);
75 u64_le hash_table_offset;
76 u64_le hash_table_size;
77 u64_le pfs0_header_offset;
78 u64_le pfs0_size;
79 INSERT_PADDING_BYTES(432);
80};
81static_assert(sizeof(Pfs0Superblock) == 0x200, "Pfs0Superblock has incorrect size.");
82
83static bool IsValidNca(const NcaHeader& header) {
84 return header.magic == Common::MakeMagic('N', 'C', 'A', '2') ||
85 header.magic == Common::MakeMagic('N', 'C', 'A', '3');
86}
87
88// TODO(DarkLordZach): Add support for encrypted.
89class Nca final {
90 std::vector<FileSys::PartitionFilesystem> pfs;
91 std::vector<u64> pfs_offset;
92
93 u64 romfs_offset = 0;
94 u64 romfs_size = 0;
95
96 boost::optional<u8> exefs_id = boost::none;
97
98 FileUtil::IOFile file;
99 std::string path;
100
101 u64 GetExeFsFileOffset(const std::string& file_name) const;
102 u64 GetExeFsFileSize(const std::string& file_name) const;
103
104public:
105 ResultStatus Load(FileUtil::IOFile&& file, std::string path);
106
107 FileSys::PartitionFilesystem GetPfs(u8 id) const;
108
109 u64 GetRomFsOffset() const;
110 u64 GetRomFsSize() const;
111
112 std::vector<u8> GetExeFsFile(const std::string& file_name);
113};
114
115static bool IsPfsExeFs(const FileSys::PartitionFilesystem& pfs) {
116 // According to switchbrew, an exefs must only contain these two files:
117 return pfs.GetFileSize("main") > 0 && pfs.GetFileSize("main.npdm") > 0;
118}
119
120ResultStatus Nca::Load(FileUtil::IOFile&& in_file, std::string in_path) {
121 file = std::move(in_file);
122 path = in_path;
123 file.Seek(0, SEEK_SET);
124 std::array<u8, sizeof(NcaHeader)> header_array{};
125 if (sizeof(NcaHeader) != file.ReadBytes(header_array.data(), sizeof(NcaHeader)))
126 NGLOG_CRITICAL(Loader, "File reader errored out during header read.");
127
128 NcaHeader header{};
129 std::memcpy(&header, header_array.data(), sizeof(NcaHeader));
130 if (!IsValidNca(header))
131 return ResultStatus::ErrorInvalidFormat;
132
133 int number_sections =
134 std::count_if(std::begin(header.section_tables), std::end(header.section_tables),
135 [](NcaSectionTableEntry entry) { return entry.media_offset > 0; });
136
137 for (int i = 0; i < number_sections; ++i) {
138 // Seek to beginning of this section.
139 file.Seek(SECTION_HEADER_OFFSET + i * SECTION_HEADER_SIZE, SEEK_SET);
140 std::array<u8, sizeof(NcaSectionHeaderBlock)> array{};
141 if (sizeof(NcaSectionHeaderBlock) !=
142 file.ReadBytes(array.data(), sizeof(NcaSectionHeaderBlock)))
143 NGLOG_CRITICAL(Loader, "File reader errored out during header read.");
144
145 NcaSectionHeaderBlock block{};
146 std::memcpy(&block, array.data(), sizeof(NcaSectionHeaderBlock));
147
148 if (block.filesystem_type == NcaSectionFilesystemType::ROMFS) {
149 romfs_offset = header.section_tables[i].media_offset * MEDIA_OFFSET_MULTIPLIER;
150 romfs_size =
151 header.section_tables[i].media_end_offset * MEDIA_OFFSET_MULTIPLIER - romfs_offset;
152 } else if (block.filesystem_type == NcaSectionFilesystemType::PFS0) {
153 Pfs0Superblock sb{};
154 // Seek back to beginning of this section.
155 file.Seek(SECTION_HEADER_OFFSET + i * SECTION_HEADER_SIZE, SEEK_SET);
156 if (sizeof(Pfs0Superblock) != file.ReadBytes(&sb, sizeof(Pfs0Superblock)))
157 NGLOG_CRITICAL(Loader, "File reader errored out during header read.");
158
159 u64 offset = (static_cast<u64>(header.section_tables[i].media_offset) *
160 MEDIA_OFFSET_MULTIPLIER) +
161 sb.pfs0_header_offset;
162 FileSys::PartitionFilesystem npfs{};
163 ResultStatus status = npfs.Load(path, offset);
164
165 if (status == ResultStatus::Success) {
166 pfs.emplace_back(std::move(npfs));
167 pfs_offset.emplace_back(offset);
168 }
169 }
170 }
171
172 for (size_t i = 0; i < pfs.size(); ++i) {
173 if (IsPfsExeFs(pfs[i]))
174 exefs_id = i;
175 }
176
177 return ResultStatus::Success;
178}
179
180FileSys::PartitionFilesystem Nca::GetPfs(u8 id) const {
181 return pfs[id];
182}
183
184u64 Nca::GetExeFsFileOffset(const std::string& file_name) const {
185 if (exefs_id == boost::none)
186 return 0;
187 return pfs[*exefs_id].GetFileOffset(file_name) + pfs_offset[*exefs_id];
188}
189
190u64 Nca::GetExeFsFileSize(const std::string& file_name) const {
191 if (exefs_id == boost::none)
192 return 0;
193 return pfs[*exefs_id].GetFileSize(file_name);
194}
195
196u64 Nca::GetRomFsOffset() const {
197 return romfs_offset;
198}
199
200u64 Nca::GetRomFsSize() const {
201 return romfs_size;
202}
203
204std::vector<u8> Nca::GetExeFsFile(const std::string& file_name) {
205 std::vector<u8> out(GetExeFsFileSize(file_name));
206 file.Seek(GetExeFsFileOffset(file_name), SEEK_SET);
207 file.ReadBytes(out.data(), GetExeFsFileSize(file_name));
208 return out;
209}
210
211AppLoader_NCA::AppLoader_NCA(FileUtil::IOFile&& file, std::string filepath)
212 : AppLoader(std::move(file)), filepath(std::move(filepath)) {}
213
214FileType AppLoader_NCA::IdentifyType(FileUtil::IOFile& file, const std::string&) {
215 file.Seek(0, SEEK_SET);
216 std::array<u8, 0x400> header_enc_array{};
217 if (0x400 != file.ReadBytes(header_enc_array.data(), 0x400))
218 return FileType::Error;
219
220 // TODO(DarkLordZach): Assuming everything is decrypted. Add crypto support.
221 NcaHeader header{};
222 std::memcpy(&header, header_enc_array.data(), sizeof(NcaHeader));
223
224 if (IsValidNca(header) && header.content_type == NcaContentType::Program)
225 return FileType::NCA;
226
227 return FileType::Error;
228}
229
230ResultStatus AppLoader_NCA::Load(Kernel::SharedPtr<Kernel::Process>& process) {
231 if (is_loaded) {
232 return ResultStatus::ErrorAlreadyLoaded;
233 }
234 if (!file.IsOpen()) {
235 return ResultStatus::Error;
236 }
237
238 nca = std::make_unique<Nca>();
239 ResultStatus result = nca->Load(std::move(file), filepath);
240 if (result != ResultStatus::Success) {
241 return result;
242 }
243
244 result = metadata.Load(nca->GetExeFsFile("main.npdm"));
245 if (result != ResultStatus::Success) {
246 return result;
247 }
248 metadata.Print();
249
250 const FileSys::ProgramAddressSpaceType arch_bits{metadata.GetAddressSpaceType()};
251 if (arch_bits == FileSys::ProgramAddressSpaceType::Is32Bit) {
252 return ResultStatus::ErrorUnsupportedArch;
253 }
254
255 VAddr next_load_addr{Memory::PROCESS_IMAGE_VADDR};
256 for (const auto& module : {"rtld", "main", "subsdk0", "subsdk1", "subsdk2", "subsdk3",
257 "subsdk4", "subsdk5", "subsdk6", "subsdk7", "sdk"}) {
258 const VAddr load_addr = next_load_addr;
259 next_load_addr = AppLoader_NSO::LoadModule(module, nca->GetExeFsFile(module), load_addr);
260 if (next_load_addr) {
261 NGLOG_DEBUG(Loader, "loaded module {} @ 0x{:X}", module, load_addr);
262 } else {
263 next_load_addr = load_addr;
264 }
265 }
266
267 process->program_id = metadata.GetTitleID();
268 process->svc_access_mask.set();
269 process->address_mappings = default_address_mappings;
270 process->resource_limit =
271 Kernel::ResourceLimit::GetForCategory(Kernel::ResourceLimitCategory::APPLICATION);
272 process->Run(Memory::PROCESS_IMAGE_VADDR, metadata.GetMainThreadPriority(),
273 metadata.GetMainThreadStackSize());
274
275 if (nca->GetRomFsSize() > 0)
276 Service::FileSystem::RegisterFileSystem(std::make_unique<FileSys::RomFS_Factory>(*this),
277 Service::FileSystem::Type::RomFS);
278
279 is_loaded = true;
280 return ResultStatus::Success;
281}
282
283ResultStatus AppLoader_NCA::ReadRomFS(std::shared_ptr<FileUtil::IOFile>& romfs_file, u64& offset,
284 u64& size) {
285 if (nca->GetRomFsSize() == 0) {
286 NGLOG_DEBUG(Loader, "No RomFS available");
287 return ResultStatus::ErrorNotUsed;
288 }
289
290 romfs_file = std::make_shared<FileUtil::IOFile>(filepath, "rb");
291
292 offset = nca->GetRomFsOffset();
293 size = nca->GetRomFsSize();
294
295 NGLOG_DEBUG(Loader, "RomFS offset: 0x{:016X}", offset);
296 NGLOG_DEBUG(Loader, "RomFS size: 0x{:016X}", size);
297
298 return ResultStatus::Success;
299}
300
301AppLoader_NCA::~AppLoader_NCA() = default;
302
303} // namespace Loader
diff --git a/src/core/loader/nca.h b/src/core/loader/nca.h
new file mode 100644
index 000000000..3b6c451d0
--- /dev/null
+++ b/src/core/loader/nca.h
@@ -0,0 +1,49 @@
1// Copyright 2018 yuzu emulator team
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <string>
8#include "common/common_types.h"
9#include "core/file_sys/partition_filesystem.h"
10#include "core/file_sys/program_metadata.h"
11#include "core/hle/kernel/kernel.h"
12#include "core/loader/loader.h"
13
14namespace Loader {
15
16class Nca;
17
18/// Loads an NCA file
19class AppLoader_NCA final : public AppLoader {
20public:
21 AppLoader_NCA(FileUtil::IOFile&& file, std::string filepath);
22
23 /**
24 * Returns the type of the file
25 * @param file FileUtil::IOFile open file
26 * @param filepath Path of the file that we are opening.
27 * @return FileType found, or FileType::Error if this loader doesn't know it
28 */
29 static FileType IdentifyType(FileUtil::IOFile& file, const std::string& filepath);
30
31 FileType GetFileType() override {
32 return IdentifyType(file, filepath);
33 }
34
35 ResultStatus Load(Kernel::SharedPtr<Kernel::Process>& process) override;
36
37 ResultStatus ReadRomFS(std::shared_ptr<FileUtil::IOFile>& romfs_file, u64& offset,
38 u64& size) override;
39
40 ~AppLoader_NCA();
41
42private:
43 std::string filepath;
44 FileSys::ProgramMetadata metadata;
45
46 std::unique_ptr<Nca> nca;
47};
48
49} // namespace Loader
diff --git a/src/core/loader/nso.cpp b/src/core/loader/nso.cpp
index 01be9e217..845ed7e90 100644
--- a/src/core/loader/nso.cpp
+++ b/src/core/loader/nso.cpp
@@ -66,8 +66,22 @@ FileType AppLoader_NSO::IdentifyType(FileUtil::IOFile& file, const std::string&)
66 return FileType::Error; 66 return FileType::Error;
67} 67}
68 68
69static std::vector<u8> DecompressSegment(const std::vector<u8>& compressed_data,
70 const NsoSegmentHeader& header) {
71 std::vector<u8> uncompressed_data;
72 uncompressed_data.resize(header.size);
73 const int bytes_uncompressed = LZ4_decompress_safe(
74 reinterpret_cast<const char*>(compressed_data.data()),
75 reinterpret_cast<char*>(uncompressed_data.data()), compressed_data.size(), header.size);
76
77 ASSERT_MSG(bytes_uncompressed == header.size && bytes_uncompressed == uncompressed_data.size(),
78 "{} != {} != {}", bytes_uncompressed, header.size, uncompressed_data.size());
79
80 return uncompressed_data;
81}
82
69static std::vector<u8> ReadSegment(FileUtil::IOFile& file, const NsoSegmentHeader& header, 83static std::vector<u8> ReadSegment(FileUtil::IOFile& file, const NsoSegmentHeader& header,
70 int compressed_size) { 84 size_t compressed_size) {
71 std::vector<u8> compressed_data; 85 std::vector<u8> compressed_data;
72 compressed_data.resize(compressed_size); 86 compressed_data.resize(compressed_size);
73 87
@@ -77,22 +91,65 @@ static std::vector<u8> ReadSegment(FileUtil::IOFile& file, const NsoSegmentHeade
77 return {}; 91 return {};
78 } 92 }
79 93
80 std::vector<u8> uncompressed_data; 94 return DecompressSegment(compressed_data, header);
81 uncompressed_data.resize(header.size);
82 const int bytes_uncompressed = LZ4_decompress_safe(
83 reinterpret_cast<const char*>(compressed_data.data()),
84 reinterpret_cast<char*>(uncompressed_data.data()), compressed_size, header.size);
85
86 ASSERT_MSG(bytes_uncompressed == header.size && bytes_uncompressed == uncompressed_data.size(),
87 "{} != {} != {}", bytes_uncompressed, header.size, uncompressed_data.size());
88
89 return uncompressed_data;
90} 95}
91 96
92static constexpr u32 PageAlignSize(u32 size) { 97static constexpr u32 PageAlignSize(u32 size) {
93 return (size + Memory::PAGE_MASK) & ~Memory::PAGE_MASK; 98 return (size + Memory::PAGE_MASK) & ~Memory::PAGE_MASK;
94} 99}
95 100
101VAddr AppLoader_NSO::LoadModule(const std::string& name, const std::vector<u8>& file_data,
102 VAddr load_base) {
103 if (file_data.size() < sizeof(NsoHeader))
104 return {};
105
106 NsoHeader nso_header;
107 std::memcpy(&nso_header, file_data.data(), sizeof(NsoHeader));
108
109 if (nso_header.magic != Common::MakeMagic('N', 'S', 'O', '0'))
110 return {};
111
112 // Build program image
113 Kernel::SharedPtr<Kernel::CodeSet> codeset = Kernel::CodeSet::Create("");
114 std::vector<u8> program_image;
115 for (int i = 0; i < nso_header.segments.size(); ++i) {
116 std::vector<u8> compressed_data(nso_header.segments_compressed_size[i]);
117 for (int j = 0; j < nso_header.segments_compressed_size[i]; ++j)
118 compressed_data[j] = file_data[nso_header.segments[i].offset + j];
119 std::vector<u8> data = DecompressSegment(compressed_data, nso_header.segments[i]);
120 program_image.resize(nso_header.segments[i].location);
121 program_image.insert(program_image.end(), data.begin(), data.end());
122 codeset->segments[i].addr = nso_header.segments[i].location;
123 codeset->segments[i].offset = nso_header.segments[i].location;
124 codeset->segments[i].size = PageAlignSize(static_cast<u32>(data.size()));
125 }
126
127 // MOD header pointer is at .text offset + 4
128 u32 module_offset;
129 std::memcpy(&module_offset, program_image.data() + 4, sizeof(u32));
130
131 // Read MOD header
132 ModHeader mod_header{};
133 // Default .bss to size in segment header if MOD0 section doesn't exist
134 u32 bss_size{PageAlignSize(nso_header.segments[2].bss_size)};
135 std::memcpy(&mod_header, program_image.data() + module_offset, sizeof(ModHeader));
136 const bool has_mod_header{mod_header.magic == Common::MakeMagic('M', 'O', 'D', '0')};
137 if (has_mod_header) {
138 // Resize program image to include .bss section and page align each section
139 bss_size = PageAlignSize(mod_header.bss_end_offset - mod_header.bss_start_offset);
140 }
141 codeset->data.size += bss_size;
142 const u32 image_size{PageAlignSize(static_cast<u32>(program_image.size()) + bss_size)};
143 program_image.resize(image_size);
144
145 // Load codeset for current process
146 codeset->name = name;
147 codeset->memory = std::make_shared<std::vector<u8>>(std::move(program_image));
148 Core::CurrentProcess()->LoadModule(codeset, load_base);
149
150 return load_base + image_size;
151}
152
96VAddr AppLoader_NSO::LoadModule(const std::string& path, VAddr load_base) { 153VAddr AppLoader_NSO::LoadModule(const std::string& path, VAddr load_base) {
97 FileUtil::IOFile file(path, "rb"); 154 FileUtil::IOFile file(path, "rb");
98 if (!file.IsOpen()) { 155 if (!file.IsOpen()) {
diff --git a/src/core/loader/nso.h b/src/core/loader/nso.h
index 1ae30a824..386f4d39a 100644
--- a/src/core/loader/nso.h
+++ b/src/core/loader/nso.h
@@ -29,6 +29,9 @@ public:
29 return IdentifyType(file, filepath); 29 return IdentifyType(file, filepath);
30 } 30 }
31 31
32 static VAddr LoadModule(const std::string& name, const std::vector<u8>& file_data,
33 VAddr load_base);
34
32 static VAddr LoadModule(const std::string& path, VAddr load_base); 35 static VAddr LoadModule(const std::string& path, VAddr load_base);
33 36
34 ResultStatus Load(Kernel::SharedPtr<Kernel::Process>& process) override; 37 ResultStatus Load(Kernel::SharedPtr<Kernel::Process>& process) override;
diff --git a/src/core/memory.cpp b/src/core/memory.cpp
index 3b81acd63..f070dee7d 100644
--- a/src/core/memory.cpp
+++ b/src/core/memory.cpp
@@ -241,6 +241,10 @@ bool IsValidVirtualAddress(const VAddr vaddr) {
241 return IsValidVirtualAddress(*Core::CurrentProcess(), vaddr); 241 return IsValidVirtualAddress(*Core::CurrentProcess(), vaddr);
242} 242}
243 243
244bool IsKernelVirtualAddress(const VAddr vaddr) {
245 return KERNEL_REGION_VADDR <= vaddr && vaddr < KERNEL_REGION_END;
246}
247
244bool IsValidPhysicalAddress(const PAddr paddr) { 248bool IsValidPhysicalAddress(const PAddr paddr) {
245 return GetPhysicalPointer(paddr) != nullptr; 249 return GetPhysicalPointer(paddr) != nullptr;
246} 250}
diff --git a/src/core/memory.h b/src/core/memory.h
index 3f56a2c6a..8d5d017a4 100644
--- a/src/core/memory.h
+++ b/src/core/memory.h
@@ -188,6 +188,11 @@ enum : VAddr {
188 MAP_REGION_VADDR = NEW_MAP_REGION_VADDR_END, 188 MAP_REGION_VADDR = NEW_MAP_REGION_VADDR_END,
189 MAP_REGION_SIZE = 0x1000000000, 189 MAP_REGION_SIZE = 0x1000000000,
190 MAP_REGION_VADDR_END = MAP_REGION_VADDR + MAP_REGION_SIZE, 190 MAP_REGION_VADDR_END = MAP_REGION_VADDR + MAP_REGION_SIZE,
191
192 /// Kernel Virtual Address Range
193 KERNEL_REGION_VADDR = 0xFFFFFF8000000000,
194 KERNEL_REGION_SIZE = 0x7FFFE00000,
195 KERNEL_REGION_END = KERNEL_REGION_VADDR + KERNEL_REGION_SIZE,
191}; 196};
192 197
193/// Currently active page table 198/// Currently active page table
@@ -197,6 +202,8 @@ PageTable* GetCurrentPageTable();
197/// Determines if the given VAddr is valid for the specified process. 202/// Determines if the given VAddr is valid for the specified process.
198bool IsValidVirtualAddress(const Kernel::Process& process, const VAddr vaddr); 203bool IsValidVirtualAddress(const Kernel::Process& process, const VAddr vaddr);
199bool IsValidVirtualAddress(const VAddr addr); 204bool IsValidVirtualAddress(const VAddr addr);
205/// Determines if the given VAddr is a kernel address
206bool IsKernelVirtualAddress(const VAddr addr);
200 207
201bool IsValidPhysicalAddress(const PAddr addr); 208bool IsValidPhysicalAddress(const PAddr addr);
202 209
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 281810357..c6431e722 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -9,6 +9,8 @@ add_library(video_core STATIC
9 engines/maxwell_3d.h 9 engines/maxwell_3d.h
10 engines/maxwell_compute.cpp 10 engines/maxwell_compute.cpp
11 engines/maxwell_compute.h 11 engines/maxwell_compute.h
12 engines/maxwell_dma.cpp
13 engines/maxwell_dma.h
12 engines/shader_bytecode.h 14 engines/shader_bytecode.h
13 gpu.cpp 15 gpu.cpp
14 gpu.h 16 gpu.h
@@ -39,6 +41,8 @@ add_library(video_core STATIC
39 renderer_opengl/maxwell_to_gl.h 41 renderer_opengl/maxwell_to_gl.h
40 renderer_opengl/renderer_opengl.cpp 42 renderer_opengl/renderer_opengl.cpp
41 renderer_opengl/renderer_opengl.h 43 renderer_opengl/renderer_opengl.h
44 textures/astc.cpp
45 textures/astc.h
42 textures/decoders.cpp 46 textures/decoders.cpp
43 textures/decoders.h 47 textures/decoders.h
44 textures/texture.h 48 textures/texture.h
diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp
index d72d6f760..cec9cb9f3 100644
--- a/src/video_core/command_processor.cpp
+++ b/src/video_core/command_processor.cpp
@@ -16,6 +16,7 @@
16#include "video_core/engines/fermi_2d.h" 16#include "video_core/engines/fermi_2d.h"
17#include "video_core/engines/maxwell_3d.h" 17#include "video_core/engines/maxwell_3d.h"
18#include "video_core/engines/maxwell_compute.h" 18#include "video_core/engines/maxwell_compute.h"
19#include "video_core/engines/maxwell_dma.h"
19#include "video_core/gpu.h" 20#include "video_core/gpu.h"
20#include "video_core/renderer_base.h" 21#include "video_core/renderer_base.h"
21#include "video_core/video_core.h" 22#include "video_core/video_core.h"
@@ -60,8 +61,11 @@ void GPU::WriteReg(u32 method, u32 subchannel, u32 value, u32 remaining_params)
60 case EngineID::MAXWELL_COMPUTE_B: 61 case EngineID::MAXWELL_COMPUTE_B:
61 maxwell_compute->WriteReg(method, value); 62 maxwell_compute->WriteReg(method, value);
62 break; 63 break;
64 case EngineID::MAXWELL_DMA_COPY_A:
65 maxwell_dma->WriteReg(method, value);
66 break;
63 default: 67 default:
64 UNIMPLEMENTED(); 68 UNIMPLEMENTED_MSG("Unimplemented engine");
65 } 69 }
66} 70}
67 71
diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp
index 6b9382f06..998b7c843 100644
--- a/src/video_core/engines/fermi_2d.cpp
+++ b/src/video_core/engines/fermi_2d.cpp
@@ -47,6 +47,7 @@ void Fermi2D::HandleSurfaceCopy() {
47 47
48 if (regs.src.linear == regs.dst.linear) { 48 if (regs.src.linear == regs.dst.linear) {
49 // If the input layout and the output layout are the same, just perform a raw copy. 49 // If the input layout and the output layout are the same, just perform a raw copy.
50 ASSERT(regs.src.BlockHeight() == regs.dst.BlockHeight());
50 Memory::CopyBlock(dest_cpu, source_cpu, 51 Memory::CopyBlock(dest_cpu, source_cpu,
51 src_bytes_per_pixel * regs.dst.width * regs.dst.height); 52 src_bytes_per_pixel * regs.dst.width * regs.dst.height);
52 return; 53 return;
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index ef12d9300..93c43c8cb 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -328,8 +328,9 @@ std::vector<Texture::FullTextureInfo> Maxwell3D::GetStageTextures(Regs::ShaderSt
328 328
329 Texture::FullTextureInfo tex_info{}; 329 Texture::FullTextureInfo tex_info{};
330 // TODO(Subv): Use the shader to determine which textures are actually accessed. 330 // TODO(Subv): Use the shader to determine which textures are actually accessed.
331 tex_info.index = (current_texture - tex_info_buffer.address - TextureInfoOffset) / 331 tex_info.index =
332 sizeof(Texture::TextureHandle); 332 static_cast<u32>(current_texture - tex_info_buffer.address - TextureInfoOffset) /
333 sizeof(Texture::TextureHandle);
333 334
334 // Load the TIC data. 335 // Load the TIC data.
335 if (tex_handle.tic_id != 0) { 336 if (tex_handle.tic_id != 0) {
@@ -354,6 +355,40 @@ std::vector<Texture::FullTextureInfo> Maxwell3D::GetStageTextures(Regs::ShaderSt
354 return textures; 355 return textures;
355} 356}
356 357
358Texture::FullTextureInfo Maxwell3D::GetStageTexture(Regs::ShaderStage stage, size_t offset) const {
359 auto& shader = state.shader_stages[static_cast<size_t>(stage)];
360 auto& tex_info_buffer = shader.const_buffers[regs.tex_cb_index];
361 ASSERT(tex_info_buffer.enabled && tex_info_buffer.address != 0);
362
363 GPUVAddr tex_info_address = tex_info_buffer.address + offset * sizeof(Texture::TextureHandle);
364
365 ASSERT(tex_info_address < tex_info_buffer.address + tex_info_buffer.size);
366
367 boost::optional<VAddr> tex_address_cpu = memory_manager.GpuToCpuAddress(tex_info_address);
368 Texture::TextureHandle tex_handle{Memory::Read32(*tex_address_cpu)};
369
370 Texture::FullTextureInfo tex_info{};
371 tex_info.index = static_cast<u32>(offset);
372
373 // Load the TIC data.
374 if (tex_handle.tic_id != 0) {
375 tex_info.enabled = true;
376
377 auto tic_entry = GetTICEntry(tex_handle.tic_id);
378 // TODO(Subv): Workaround for BitField's move constructor being deleted.
379 std::memcpy(&tex_info.tic, &tic_entry, sizeof(tic_entry));
380 }
381
382 // Load the TSC data
383 if (tex_handle.tsc_id != 0) {
384 auto tsc_entry = GetTSCEntry(tex_handle.tsc_id);
385 // TODO(Subv): Workaround for BitField's move constructor being deleted.
386 std::memcpy(&tex_info.tsc, &tsc_entry, sizeof(tsc_entry));
387 }
388
389 return tex_info;
390}
391
357u32 Maxwell3D::GetRegisterValue(u32 method) const { 392u32 Maxwell3D::GetRegisterValue(u32 method) const {
358 ASSERT_MSG(method < Regs::NUM_REGS, "Invalid Maxwell3D register"); 393 ASSERT_MSG(method < Regs::NUM_REGS, "Invalid Maxwell3D register");
359 return regs.reg_array[method]; 394 return regs.reg_array[method];
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 245410c95..2dc251205 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -318,6 +318,7 @@ public:
318 Equation equation_a; 318 Equation equation_a;
319 Factor factor_source_a; 319 Factor factor_source_a;
320 Factor factor_dest_a; 320 Factor factor_dest_a;
321 INSERT_PADDING_WORDS(1);
321 }; 322 };
322 323
323 union { 324 union {
@@ -432,7 +433,27 @@ public:
432 }; 433 };
433 } rt_control; 434 } rt_control;
434 435
435 INSERT_PADDING_WORDS(0xCF); 436 INSERT_PADDING_WORDS(0x31);
437
438 u32 independent_blend_enable;
439
440 INSERT_PADDING_WORDS(0x15);
441
442 struct {
443 u32 separate_alpha;
444 Blend::Equation equation_rgb;
445 Blend::Factor factor_source_rgb;
446 Blend::Factor factor_dest_rgb;
447 Blend::Equation equation_a;
448 Blend::Factor factor_source_a;
449 INSERT_PADDING_WORDS(1);
450 Blend::Factor factor_dest_a;
451
452 u32 enable_common;
453 u32 enable[NumRenderTargets];
454 } blend;
455
456 INSERT_PADDING_WORDS(0x77);
436 457
437 struct { 458 struct {
438 u32 tsc_address_high; 459 u32 tsc_address_high;
@@ -557,9 +578,7 @@ public:
557 578
558 } vertex_array[NumVertexArrays]; 579 } vertex_array[NumVertexArrays];
559 580
560 Blend blend; 581 Blend independent_blend[NumRenderTargets];
561
562 INSERT_PADDING_WORDS(0x39);
563 582
564 struct { 583 struct {
565 u32 limit_high; 584 u32 limit_high;
@@ -664,6 +683,9 @@ public:
664 /// Returns a list of enabled textures for the specified shader stage. 683 /// Returns a list of enabled textures for the specified shader stage.
665 std::vector<Texture::FullTextureInfo> GetStageTextures(Regs::ShaderStage stage) const; 684 std::vector<Texture::FullTextureInfo> GetStageTextures(Regs::ShaderStage stage) const;
666 685
686 /// Returns the texture information for a specific texture in a specific shader stage.
687 Texture::FullTextureInfo GetStageTexture(Regs::ShaderStage stage, size_t offset) const;
688
667 /// Returns whether the specified shader stage is enabled or not. 689 /// Returns whether the specified shader stage is enabled or not.
668 bool IsShaderStageEnabled(Regs::ShaderStage stage) const; 690 bool IsShaderStageEnabled(Regs::ShaderStage stage) const;
669 691
@@ -719,6 +741,8 @@ ASSERT_REG_POSITION(vertex_buffer, 0x35D);
719ASSERT_REG_POSITION(zeta, 0x3F8); 741ASSERT_REG_POSITION(zeta, 0x3F8);
720ASSERT_REG_POSITION(vertex_attrib_format[0], 0x458); 742ASSERT_REG_POSITION(vertex_attrib_format[0], 0x458);
721ASSERT_REG_POSITION(rt_control, 0x487); 743ASSERT_REG_POSITION(rt_control, 0x487);
744ASSERT_REG_POSITION(independent_blend_enable, 0x4B9);
745ASSERT_REG_POSITION(blend, 0x4CF);
722ASSERT_REG_POSITION(tsc, 0x557); 746ASSERT_REG_POSITION(tsc, 0x557);
723ASSERT_REG_POSITION(tic, 0x55D); 747ASSERT_REG_POSITION(tic, 0x55D);
724ASSERT_REG_POSITION(code_address, 0x582); 748ASSERT_REG_POSITION(code_address, 0x582);
@@ -726,7 +750,7 @@ ASSERT_REG_POSITION(draw, 0x585);
726ASSERT_REG_POSITION(index_array, 0x5F2); 750ASSERT_REG_POSITION(index_array, 0x5F2);
727ASSERT_REG_POSITION(query, 0x6C0); 751ASSERT_REG_POSITION(query, 0x6C0);
728ASSERT_REG_POSITION(vertex_array[0], 0x700); 752ASSERT_REG_POSITION(vertex_array[0], 0x700);
729ASSERT_REG_POSITION(blend, 0x780); 753ASSERT_REG_POSITION(independent_blend, 0x780);
730ASSERT_REG_POSITION(vertex_array_limit[0], 0x7C0); 754ASSERT_REG_POSITION(vertex_array_limit[0], 0x7C0);
731ASSERT_REG_POSITION(shader_config[0], 0x800); 755ASSERT_REG_POSITION(shader_config[0], 0x800);
732ASSERT_REG_POSITION(const_buffer, 0x8E0); 756ASSERT_REG_POSITION(const_buffer, 0x8E0);
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
new file mode 100644
index 000000000..442138988
--- /dev/null
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -0,0 +1,69 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "core/memory.h"
6#include "video_core/engines/maxwell_dma.h"
7#include "video_core/textures/decoders.h"
8
9namespace Tegra {
10namespace Engines {
11
12MaxwellDMA::MaxwellDMA(MemoryManager& memory_manager) : memory_manager(memory_manager) {}
13
14void MaxwellDMA::WriteReg(u32 method, u32 value) {
15 ASSERT_MSG(method < Regs::NUM_REGS,
16 "Invalid MaxwellDMA register, increase the size of the Regs structure");
17
18 regs.reg_array[method] = value;
19
20#define MAXWELLDMA_REG_INDEX(field_name) \
21 (offsetof(Tegra::Engines::MaxwellDMA::Regs, field_name) / sizeof(u32))
22
23 switch (method) {
24 case MAXWELLDMA_REG_INDEX(exec): {
25 HandleCopy();
26 break;
27 }
28 }
29
30#undef MAXWELLDMA_REG_INDEX
31}
32
33void MaxwellDMA::HandleCopy() {
34 NGLOG_WARNING(HW_GPU, "Requested a DMA copy");
35
36 const GPUVAddr source = regs.src_address.Address();
37 const GPUVAddr dest = regs.dst_address.Address();
38
39 const VAddr source_cpu = *memory_manager.GpuToCpuAddress(source);
40 const VAddr dest_cpu = *memory_manager.GpuToCpuAddress(dest);
41
42 // TODO(Subv): Perform more research and implement all features of this engine.
43 ASSERT(regs.exec.enable_swizzle == 0);
44 ASSERT(regs.exec.enable_2d == 1);
45 ASSERT(regs.exec.query_mode == Regs::QueryMode::None);
46 ASSERT(regs.exec.query_intr == Regs::QueryIntr::None);
47 ASSERT(regs.exec.copy_mode == Regs::CopyMode::Unk2);
48 ASSERT(regs.src_params.pos_x == 0);
49 ASSERT(regs.src_params.pos_y == 0);
50 ASSERT(regs.dst_params.pos_x == 0);
51 ASSERT(regs.dst_params.pos_y == 0);
52 ASSERT(regs.exec.is_dst_linear != regs.exec.is_src_linear);
53
54 u8* src_buffer = Memory::GetPointer(source_cpu);
55 u8* dst_buffer = Memory::GetPointer(dest_cpu);
56
57 if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) {
58 // If the input is tiled and the output is linear, deswizzle the input and copy it over.
59 Texture::CopySwizzledData(regs.src_params.size_x, regs.src_params.size_y, 1, 1, src_buffer,
60 dst_buffer, true, regs.src_params.BlockHeight());
61 } else {
62 // If the input is linear and the output is tiled, swizzle the input and copy it over.
63 Texture::CopySwizzledData(regs.dst_params.size_x, regs.dst_params.size_y, 1, 1, dst_buffer,
64 src_buffer, false, regs.dst_params.BlockHeight());
65 }
66}
67
68} // namespace Engines
69} // namespace Tegra
diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h
new file mode 100644
index 000000000..905749bde
--- /dev/null
+++ b/src/video_core/engines/maxwell_dma.h
@@ -0,0 +1,155 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include "common/assert.h"
9#include "common/bit_field.h"
10#include "common/common_funcs.h"
11#include "common/common_types.h"
12#include "video_core/gpu.h"
13#include "video_core/memory_manager.h"
14
15namespace Tegra {
16namespace Engines {
17
18class MaxwellDMA final {
19public:
20 explicit MaxwellDMA(MemoryManager& memory_manager);
21 ~MaxwellDMA() = default;
22
23 /// Write the value to the register identified by method.
24 void WriteReg(u32 method, u32 value);
25
26 struct Regs {
27 static constexpr size_t NUM_REGS = 0x1D6;
28
29 struct Parameters {
30 union {
31 BitField<0, 4, u32> block_depth;
32 BitField<4, 4, u32> block_height;
33 BitField<8, 4, u32> block_width;
34 };
35 u32 size_x;
36 u32 size_y;
37 u32 size_z;
38 u32 pos_z;
39 union {
40 BitField<0, 16, u32> pos_x;
41 BitField<16, 16, u32> pos_y;
42 };
43
44 u32 BlockHeight() const {
45 return 1 << block_height;
46 }
47 };
48
49 static_assert(sizeof(Parameters) == 24, "Parameters has wrong size");
50
51 enum class CopyMode : u32 {
52 None = 0,
53 Unk1 = 1,
54 Unk2 = 2,
55 };
56
57 enum class QueryMode : u32 {
58 None = 0,
59 Short = 1,
60 Long = 2,
61 };
62
63 enum class QueryIntr : u32 {
64 None = 0,
65 Block = 1,
66 NonBlock = 2,
67 };
68
69 union {
70 struct {
71 INSERT_PADDING_WORDS(0xC0);
72
73 struct {
74 union {
75 BitField<0, 2, CopyMode> copy_mode;
76 BitField<2, 1, u32> flush;
77
78 BitField<3, 2, QueryMode> query_mode;
79 BitField<5, 2, QueryIntr> query_intr;
80
81 BitField<7, 1, u32> is_src_linear;
82 BitField<8, 1, u32> is_dst_linear;
83
84 BitField<9, 1, u32> enable_2d;
85 BitField<10, 1, u32> enable_swizzle;
86 };
87 } exec;
88
89 INSERT_PADDING_WORDS(0x3F);
90
91 struct {
92 u32 address_high;
93 u32 address_low;
94
95 GPUVAddr Address() const {
96 return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
97 address_low);
98 }
99 } src_address;
100
101 struct {
102 u32 address_high;
103 u32 address_low;
104
105 GPUVAddr Address() const {
106 return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
107 address_low);
108 }
109 } dst_address;
110
111 u32 src_pitch;
112 u32 dst_pitch;
113 u32 x_count;
114 u32 y_count;
115
116 INSERT_PADDING_WORDS(0xBB);
117
118 Parameters dst_params;
119
120 INSERT_PADDING_WORDS(1);
121
122 Parameters src_params;
123
124 INSERT_PADDING_WORDS(0x13);
125 };
126 std::array<u32, NUM_REGS> reg_array;
127 };
128 } regs{};
129
130 MemoryManager& memory_manager;
131
132private:
133 /// Performs the copy from the source buffer to the destination buffer as configured in the
134 /// registers.
135 void HandleCopy();
136};
137
138#define ASSERT_REG_POSITION(field_name, position) \
139 static_assert(offsetof(MaxwellDMA::Regs, field_name) == position * 4, \
140 "Field " #field_name " has invalid position")
141
142ASSERT_REG_POSITION(exec, 0xC0);
143ASSERT_REG_POSITION(src_address, 0x100);
144ASSERT_REG_POSITION(dst_address, 0x102);
145ASSERT_REG_POSITION(src_pitch, 0x104);
146ASSERT_REG_POSITION(dst_pitch, 0x105);
147ASSERT_REG_POSITION(x_count, 0x106);
148ASSERT_REG_POSITION(y_count, 0x107);
149ASSERT_REG_POSITION(dst_params, 0x1C3);
150ASSERT_REG_POSITION(src_params, 0x1CA);
151
152#undef ASSERT_REG_POSITION
153
154} // namespace Engines
155} // namespace Tegra
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index 38757c038..cb4db0679 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -109,11 +109,6 @@ union Sampler {
109 u64 value{}; 109 u64 value{};
110}; 110};
111 111
112union Uniform {
113 BitField<20, 14, u64> offset;
114 BitField<34, 5, u64> index;
115};
116
117} // namespace Shader 112} // namespace Shader
118} // namespace Tegra 113} // namespace Tegra
119 114
@@ -173,6 +168,31 @@ enum class SubOp : u64 {
173 Min = 0x8, 168 Min = 0x8,
174}; 169};
175 170
171enum class F2iRoundingOp : u64 {
172 None = 0,
173 Floor = 1,
174 Ceil = 2,
175 Trunc = 3,
176};
177
178enum class F2fRoundingOp : u64 {
179 None = 0,
180 Pass = 3,
181 Round = 8,
182 Floor = 9,
183 Ceil = 10,
184 Trunc = 11,
185};
186
187enum class UniformType : u64 {
188 UnsignedByte = 0,
189 SignedByte = 1,
190 UnsignedShort = 2,
191 SignedShort = 3,
192 Single = 4,
193 Double = 5,
194};
195
176union Instruction { 196union Instruction {
177 Instruction& operator=(const Instruction& instr) { 197 Instruction& operator=(const Instruction& instr) {
178 value = instr.value; 198 value = instr.value;
@@ -196,12 +216,12 @@ union Instruction {
196 216
197 union { 217 union {
198 BitField<20, 19, u64> imm20_19; 218 BitField<20, 19, u64> imm20_19;
199 BitField<20, 32, u64> imm20_32; 219 BitField<20, 32, s64> imm20_32;
200 BitField<45, 1, u64> negate_b; 220 BitField<45, 1, u64> negate_b;
201 BitField<46, 1, u64> abs_a; 221 BitField<46, 1, u64> abs_a;
202 BitField<48, 1, u64> negate_a; 222 BitField<48, 1, u64> negate_a;
203 BitField<49, 1, u64> abs_b; 223 BitField<49, 1, u64> abs_b;
204 BitField<50, 1, u64> abs_d; 224 BitField<50, 1, u64> saturate_d;
205 BitField<56, 1, u64> negate_imm; 225 BitField<56, 1, u64> negate_imm;
206 226
207 union { 227 union {
@@ -210,10 +230,18 @@ union Instruction {
210 } fmnmx; 230 } fmnmx;
211 231
212 union { 232 union {
233 BitField<39, 1, u64> invert_a;
234 BitField<40, 1, u64> invert_b;
235 BitField<41, 2, LogicOperation> operation;
236 BitField<44, 2, u64> unk44;
237 BitField<48, 3, Pred> pred48;
238 } lop;
239
240 union {
213 BitField<53, 2, LogicOperation> operation; 241 BitField<53, 2, LogicOperation> operation;
214 BitField<55, 1, u64> invert_a; 242 BitField<55, 1, u64> invert_a;
215 BitField<56, 1, u64> invert_b; 243 BitField<56, 1, u64> invert_b;
216 } lop; 244 } lop32i;
217 245
218 float GetImm20_19() const { 246 float GetImm20_19() const {
219 float result{}; 247 float result{};
@@ -226,7 +254,7 @@ union Instruction {
226 254
227 float GetImm20_32() const { 255 float GetImm20_32() const {
228 float result{}; 256 float result{};
229 u32 imm{static_cast<u32>(imm20_32)}; 257 s32 imm{static_cast<s32>(imm20_32)};
230 std::memcpy(&result, &imm, sizeof(imm)); 258 std::memcpy(&result, &imm, sizeof(imm));
231 return result; 259 return result;
232 } 260 }
@@ -240,10 +268,30 @@ union Instruction {
240 } alu; 268 } alu;
241 269
242 union { 270 union {
271 BitField<48, 1, u64> is_signed;
272 } shift;
273
274 union {
243 BitField<39, 5, u64> shift_amount; 275 BitField<39, 5, u64> shift_amount;
244 BitField<48, 1, u64> negate_b; 276 BitField<48, 1, u64> negate_b;
245 BitField<49, 1, u64> negate_a; 277 BitField<49, 1, u64> negate_a;
246 } iscadd; 278 } alu_integer;
279
280 union {
281 BitField<54, 1, u64> saturate;
282 BitField<56, 1, u64> negate_a;
283 } iadd32i;
284
285 union {
286 BitField<20, 8, u64> shift_position;
287 BitField<28, 8, u64> shift_length;
288 BitField<48, 1, u64> negate_b;
289 BitField<49, 1, u64> negate_a;
290
291 u64 GetLeftShiftValue() const {
292 return 32 - (shift_position + shift_length);
293 }
294 } bfe;
247 295
248 union { 296 union {
249 BitField<48, 1, u64> negate_b; 297 BitField<48, 1, u64> negate_b;
@@ -251,6 +299,11 @@ union Instruction {
251 } ffma; 299 } ffma;
252 300
253 union { 301 union {
302 BitField<48, 3, UniformType> type;
303 BitField<44, 2, u64> unknown;
304 } ld_c;
305
306 union {
254 BitField<0, 3, u64> pred0; 307 BitField<0, 3, u64> pred0;
255 BitField<3, 3, u64> pred3; 308 BitField<3, 3, u64> pred3;
256 BitField<7, 1, u64> abs_a; 309 BitField<7, 1, u64> abs_a;
@@ -289,19 +342,37 @@ union Instruction {
289 } fset; 342 } fset;
290 343
291 union { 344 union {
292 BitField<10, 2, Register::Size> size; 345 BitField<39, 3, u64> pred39;
293 BitField<13, 1, u64> is_signed; 346 BitField<42, 1, u64> neg_pred;
347 BitField<44, 1, u64> bf;
348 BitField<45, 2, PredOperation> op;
349 BitField<48, 1, u64> is_signed;
350 BitField<49, 3, PredCondition> cond;
351 } iset;
352
353 union {
354 BitField<8, 2, Register::Size> dest_size;
355 BitField<10, 2, Register::Size> src_size;
356 BitField<12, 1, u64> is_output_signed;
357 BitField<13, 1, u64> is_input_signed;
294 BitField<41, 2, u64> selector; 358 BitField<41, 2, u64> selector;
295 BitField<45, 1, u64> negate_a; 359 BitField<45, 1, u64> negate_a;
296 BitField<49, 1, u64> abs_a; 360 BitField<49, 1, u64> abs_a;
297 BitField<50, 1, u64> saturate_a; 361
362 union {
363 BitField<39, 2, F2iRoundingOp> rounding;
364 } f2i;
365
366 union {
367 BitField<39, 4, F2fRoundingOp> rounding;
368 } f2f;
298 } conversion; 369 } conversion;
299 370
300 union { 371 union {
301 BitField<31, 4, u64> component_mask; 372 BitField<31, 4, u64> component_mask;
302 373
303 bool IsComponentEnabled(size_t component) const { 374 bool IsComponentEnabled(size_t component) const {
304 return ((1 << component) & component_mask) != 0; 375 return ((1ull << component) & component_mask) != 0;
305 } 376 }
306 } tex; 377 } tex;
307 378
@@ -320,7 +391,7 @@ union Instruction {
320 391
321 ASSERT(component_mask_selector < mask.size()); 392 ASSERT(component_mask_selector < mask.size());
322 393
323 return ((1 << component) & mask[component_mask_selector]) != 0; 394 return ((1ull << component) & mask[component_mask_selector]) != 0;
324 } 395 }
325 } texs; 396 } texs;
326 397
@@ -338,12 +409,21 @@ union Instruction {
338 } 409 }
339 } bra; 410 } bra;
340 411
412 union {
413 BitField<20, 14, u64> offset;
414 BitField<34, 5, u64> index;
415 } cbuf34;
416
417 union {
418 BitField<20, 16, s64> offset;
419 BitField<36, 5, u64> index;
420 } cbuf36;
421
341 BitField<61, 1, u64> is_b_imm; 422 BitField<61, 1, u64> is_b_imm;
342 BitField<60, 1, u64> is_b_gpr; 423 BitField<60, 1, u64> is_b_gpr;
343 BitField<59, 1, u64> is_c_gpr; 424 BitField<59, 1, u64> is_c_gpr;
344 425
345 Attribute attribute; 426 Attribute attribute;
346 Uniform uniform;
347 Sampler sampler; 427 Sampler sampler;
348 428
349 u64 value; 429 u64 value;
@@ -356,8 +436,13 @@ class OpCode {
356public: 436public:
357 enum class Id { 437 enum class Id {
358 KIL, 438 KIL,
439 SSY,
440 BFE_C,
441 BFE_R,
442 BFE_IMM,
359 BRA, 443 BRA,
360 LD_A, 444 LD_A,
445 LD_C,
361 ST_A, 446 ST_A,
362 TEX, 447 TEX,
363 TEXQ, // Texture Query 448 TEXQ, // Texture Query
@@ -376,6 +461,10 @@ public:
376 FMUL_R, 461 FMUL_R,
377 FMUL_IMM, 462 FMUL_IMM,
378 FMUL32_IMM, 463 FMUL32_IMM,
464 IADD_C,
465 IADD_R,
466 IADD_IMM,
467 IADD32I,
379 ISCADD_C, // Scale and Add 468 ISCADD_C, // Scale and Add
380 ISCADD_R, 469 ISCADD_R,
381 ISCADD_IMM, 470 ISCADD_IMM,
@@ -395,6 +484,9 @@ public:
395 I2I_C, 484 I2I_C,
396 I2I_R, 485 I2I_R,
397 I2I_IMM, 486 I2I_IMM,
487 LOP_C,
488 LOP_R,
489 LOP_IMM,
398 LOP32I, 490 LOP32I,
399 MOV_C, 491 MOV_C,
400 MOV_R, 492 MOV_R,
@@ -409,6 +501,9 @@ public:
409 FMNMX_C, 501 FMNMX_C,
410 FMNMX_R, 502 FMNMX_R,
411 FMNMX_IMM, 503 FMNMX_IMM,
504 IMNMX_C,
505 IMNMX_R,
506 IMNMX_IMM,
412 FSETP_C, // Set Predicate 507 FSETP_C, // Set Predicate
413 FSETP_R, 508 FSETP_R,
414 FSETP_IMM, 509 FSETP_IMM,
@@ -418,20 +513,30 @@ public:
418 ISETP_C, 513 ISETP_C,
419 ISETP_IMM, 514 ISETP_IMM,
420 ISETP_R, 515 ISETP_R,
516 ISET_R,
517 ISET_C,
518 ISET_IMM,
421 PSETP, 519 PSETP,
520 XMAD_IMM,
521 XMAD_CR,
522 XMAD_RC,
523 XMAD_RR,
422 }; 524 };
423 525
424 enum class Type { 526 enum class Type {
425 Trivial, 527 Trivial,
426 Arithmetic, 528 Arithmetic,
427 Logic, 529 ArithmeticImmediate,
530 ArithmeticInteger,
531 ArithmeticIntegerImmediate,
532 Bfe,
428 Shift, 533 Shift,
429 ScaledAdd,
430 Ffma, 534 Ffma,
431 Flow, 535 Flow,
432 Memory, 536 Memory,
433 FloatSet, 537 FloatSet,
434 FloatSetPredicate, 538 FloatSetPredicate,
539 IntegerSet,
435 IntegerSetPredicate, 540 IntegerSetPredicate,
436 PredicateSetPredicate, 541 PredicateSetPredicate,
437 Conversion, 542 Conversion,
@@ -530,8 +635,10 @@ private:
530 std::vector<Matcher> table = { 635 std::vector<Matcher> table = {
531#define INST(bitstring, op, type, name) Detail::GetMatcher(bitstring, op, type, name) 636#define INST(bitstring, op, type, name) Detail::GetMatcher(bitstring, op, type, name)
532 INST("111000110011----", Id::KIL, Type::Flow, "KIL"), 637 INST("111000110011----", Id::KIL, Type::Flow, "KIL"),
638 INST("111000101001----", Id::SSY, Type::Flow, "SSY"),
533 INST("111000100100----", Id::BRA, Type::Flow, "BRA"), 639 INST("111000100100----", Id::BRA, Type::Flow, "BRA"),
534 INST("1110111111011---", Id::LD_A, Type::Memory, "LD_A"), 640 INST("1110111111011---", Id::LD_A, Type::Memory, "LD_A"),
641 INST("1110111110010---", Id::LD_C, Type::Memory, "LD_C"),
535 INST("1110111111110---", Id::ST_A, Type::Memory, "ST_A"), 642 INST("1110111111110---", Id::ST_A, Type::Memory, "ST_A"),
536 INST("1100000000111---", Id::TEX, Type::Memory, "TEX"), 643 INST("1100000000111---", Id::TEX, Type::Memory, "TEX"),
537 INST("1101111101001---", Id::TEXQ, Type::Memory, "TEXQ"), 644 INST("1101111101001---", Id::TEXQ, Type::Memory, "TEXQ"),
@@ -549,10 +656,14 @@ private:
549 INST("0100110001101---", Id::FMUL_C, Type::Arithmetic, "FMUL_C"), 656 INST("0100110001101---", Id::FMUL_C, Type::Arithmetic, "FMUL_C"),
550 INST("0101110001101---", Id::FMUL_R, Type::Arithmetic, "FMUL_R"), 657 INST("0101110001101---", Id::FMUL_R, Type::Arithmetic, "FMUL_R"),
551 INST("0011100-01101---", Id::FMUL_IMM, Type::Arithmetic, "FMUL_IMM"), 658 INST("0011100-01101---", Id::FMUL_IMM, Type::Arithmetic, "FMUL_IMM"),
552 INST("00011110--------", Id::FMUL32_IMM, Type::Arithmetic, "FMUL32_IMM"), 659 INST("00011110--------", Id::FMUL32_IMM, Type::ArithmeticImmediate, "FMUL32_IMM"),
553 INST("0100110000011---", Id::ISCADD_C, Type::ScaledAdd, "ISCADD_C"), 660 INST("0100110000010---", Id::IADD_C, Type::ArithmeticInteger, "IADD_C"),
554 INST("0101110000011---", Id::ISCADD_R, Type::ScaledAdd, "ISCADD_R"), 661 INST("0101110000010---", Id::IADD_R, Type::ArithmeticInteger, "IADD_R"),
555 INST("0011100-00011---", Id::ISCADD_IMM, Type::ScaledAdd, "ISCADD_IMM"), 662 INST("0011100-00010---", Id::IADD_IMM, Type::ArithmeticInteger, "IADD_IMM"),
663 INST("0001110---------", Id::IADD32I, Type::ArithmeticIntegerImmediate, "IADD32I"),
664 INST("0100110000011---", Id::ISCADD_C, Type::ArithmeticInteger, "ISCADD_C"),
665 INST("0101110000011---", Id::ISCADD_R, Type::ArithmeticInteger, "ISCADD_R"),
666 INST("0011100-00011---", Id::ISCADD_IMM, Type::ArithmeticInteger, "ISCADD_IMM"),
556 INST("0101000010000---", Id::MUFU, Type::Arithmetic, "MUFU"), 667 INST("0101000010000---", Id::MUFU, Type::Arithmetic, "MUFU"),
557 INST("0100110010010---", Id::RRO_C, Type::Arithmetic, "RRO_C"), 668 INST("0100110010010---", Id::RRO_C, Type::Arithmetic, "RRO_C"),
558 INST("0101110010010---", Id::RRO_R, Type::Arithmetic, "RRO_R"), 669 INST("0101110010010---", Id::RRO_R, Type::Arithmetic, "RRO_R"),
@@ -560,17 +671,26 @@ private:
560 INST("0100110010101---", Id::F2F_C, Type::Conversion, "F2F_C"), 671 INST("0100110010101---", Id::F2F_C, Type::Conversion, "F2F_C"),
561 INST("0101110010101---", Id::F2F_R, Type::Conversion, "F2F_R"), 672 INST("0101110010101---", Id::F2F_R, Type::Conversion, "F2F_R"),
562 INST("0011100-10101---", Id::F2F_IMM, Type::Conversion, "F2F_IMM"), 673 INST("0011100-10101---", Id::F2F_IMM, Type::Conversion, "F2F_IMM"),
563 INST("0100110010110---", Id::F2I_C, Type::Arithmetic, "F2I_C"), 674 INST("0100110010110---", Id::F2I_C, Type::Conversion, "F2I_C"),
564 INST("0101110010110---", Id::F2I_R, Type::Arithmetic, "F2I_R"), 675 INST("0101110010110---", Id::F2I_R, Type::Conversion, "F2I_R"),
565 INST("0011100-10110---", Id::F2I_IMM, Type::Arithmetic, "F2I_IMM"), 676 INST("0011100-10110---", Id::F2I_IMM, Type::Conversion, "F2I_IMM"),
566 INST("0100110010011---", Id::MOV_C, Type::Arithmetic, "MOV_C"), 677 INST("0100110010011---", Id::MOV_C, Type::Arithmetic, "MOV_C"),
567 INST("0101110010011---", Id::MOV_R, Type::Arithmetic, "MOV_R"), 678 INST("0101110010011---", Id::MOV_R, Type::Arithmetic, "MOV_R"),
568 INST("0011100-10011---", Id::MOV_IMM, Type::Arithmetic, "MOV_IMM"), 679 INST("0011100-10011---", Id::MOV_IMM, Type::Arithmetic, "MOV_IMM"),
569 INST("000000010000----", Id::MOV32_IMM, Type::Arithmetic, "MOV32_IMM"), 680 INST("000000010000----", Id::MOV32_IMM, Type::ArithmeticImmediate, "MOV32_IMM"),
570 INST("0100110001100---", Id::FMNMX_C, Type::Arithmetic, "FMNMX_C"), 681 INST("0100110001100---", Id::FMNMX_C, Type::Arithmetic, "FMNMX_C"),
571 INST("0101110001100---", Id::FMNMX_R, Type::Arithmetic, "FMNMX_R"), 682 INST("0101110001100---", Id::FMNMX_R, Type::Arithmetic, "FMNMX_R"),
572 INST("0011100-01100---", Id::FMNMX_IMM, Type::Arithmetic, "FMNMX_IMM"), 683 INST("0011100-01100---", Id::FMNMX_IMM, Type::Arithmetic, "FMNMX_IMM"),
573 INST("000001----------", Id::LOP32I, Type::Logic, "LOP32I"), 684 INST("0100110000100---", Id::IMNMX_C, Type::Arithmetic, "FMNMX_IMM"),
685 INST("0101110000100---", Id::IMNMX_R, Type::Arithmetic, "FMNMX_IMM"),
686 INST("0011100-00100---", Id::IMNMX_IMM, Type::Arithmetic, "FMNMX_IMM"),
687 INST("0100110000000---", Id::BFE_C, Type::Bfe, "BFE_C"),
688 INST("0101110000000---", Id::BFE_R, Type::Bfe, "BFE_R"),
689 INST("0011100-00000---", Id::BFE_IMM, Type::Bfe, "BFE_IMM"),
690 INST("0100110001000---", Id::LOP_C, Type::ArithmeticInteger, "LOP_C"),
691 INST("0101110001000---", Id::LOP_R, Type::ArithmeticInteger, "LOP_R"),
692 INST("0011100001000---", Id::LOP_IMM, Type::ArithmeticInteger, "LOP_IMM"),
693 INST("000001----------", Id::LOP32I, Type::ArithmeticIntegerImmediate, "LOP32I"),
574 INST("0100110001001---", Id::SHL_C, Type::Shift, "SHL_C"), 694 INST("0100110001001---", Id::SHL_C, Type::Shift, "SHL_C"),
575 INST("0101110001001---", Id::SHL_R, Type::Shift, "SHL_R"), 695 INST("0101110001001---", Id::SHL_R, Type::Shift, "SHL_R"),
576 INST("0011100-01001---", Id::SHL_IMM, Type::Shift, "SHL_IMM"), 696 INST("0011100-01001---", Id::SHL_IMM, Type::Shift, "SHL_IMM"),
@@ -592,7 +712,14 @@ private:
592 INST("010010110110----", Id::ISETP_C, Type::IntegerSetPredicate, "ISETP_C"), 712 INST("010010110110----", Id::ISETP_C, Type::IntegerSetPredicate, "ISETP_C"),
593 INST("010110110110----", Id::ISETP_R, Type::IntegerSetPredicate, "ISETP_R"), 713 INST("010110110110----", Id::ISETP_R, Type::IntegerSetPredicate, "ISETP_R"),
594 INST("0011011-0110----", Id::ISETP_IMM, Type::IntegerSetPredicate, "ISETP_IMM"), 714 INST("0011011-0110----", Id::ISETP_IMM, Type::IntegerSetPredicate, "ISETP_IMM"),
715 INST("010110110101----", Id::ISET_R, Type::IntegerSet, "ISET_R"),
716 INST("010010110101----", Id::ISET_C, Type::IntegerSet, "ISET_C"),
717 INST("0011011-0101----", Id::ISET_IMM, Type::IntegerSet, "ISET_IMM"),
595 INST("0101000010010---", Id::PSETP, Type::PredicateSetPredicate, "PSETP"), 718 INST("0101000010010---", Id::PSETP, Type::PredicateSetPredicate, "PSETP"),
719 INST("0011011-00------", Id::XMAD_IMM, Type::Arithmetic, "XMAD_IMM"),
720 INST("0100111---------", Id::XMAD_CR, Type::Arithmetic, "XMAD_CR"),
721 INST("010100010-------", Id::XMAD_RC, Type::Arithmetic, "XMAD_RC"),
722 INST("0101101100------", Id::XMAD_RR, Type::Arithmetic, "XMAD_RR"),
596 }; 723 };
597#undef INST 724#undef INST
598 std::stable_sort(table.begin(), table.end(), [](const auto& a, const auto& b) { 725 std::stable_sort(table.begin(), table.end(), [](const auto& a, const auto& b) {
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 756518ee7..e36483145 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -5,6 +5,7 @@
5#include "video_core/engines/fermi_2d.h" 5#include "video_core/engines/fermi_2d.h"
6#include "video_core/engines/maxwell_3d.h" 6#include "video_core/engines/maxwell_3d.h"
7#include "video_core/engines/maxwell_compute.h" 7#include "video_core/engines/maxwell_compute.h"
8#include "video_core/engines/maxwell_dma.h"
8#include "video_core/gpu.h" 9#include "video_core/gpu.h"
9 10
10namespace Tegra { 11namespace Tegra {
@@ -14,6 +15,7 @@ GPU::GPU() {
14 maxwell_3d = std::make_unique<Engines::Maxwell3D>(*memory_manager); 15 maxwell_3d = std::make_unique<Engines::Maxwell3D>(*memory_manager);
15 fermi_2d = std::make_unique<Engines::Fermi2D>(*memory_manager); 16 fermi_2d = std::make_unique<Engines::Fermi2D>(*memory_manager);
16 maxwell_compute = std::make_unique<Engines::MaxwellCompute>(); 17 maxwell_compute = std::make_unique<Engines::MaxwellCompute>();
18 maxwell_dma = std::make_unique<Engines::MaxwellDMA>(*memory_manager);
17} 19}
18 20
19GPU::~GPU() = default; 21GPU::~GPU() = default;
@@ -26,6 +28,10 @@ u32 RenderTargetBytesPerPixel(RenderTargetFormat format) {
26 ASSERT(format != RenderTargetFormat::NONE); 28 ASSERT(format != RenderTargetFormat::NONE);
27 29
28 switch (format) { 30 switch (format) {
31 case RenderTargetFormat::RGBA32_FLOAT:
32 return 16;
33 case RenderTargetFormat::RGBA16_FLOAT:
34 return 8;
29 case RenderTargetFormat::RGBA8_UNORM: 35 case RenderTargetFormat::RGBA8_UNORM:
30 case RenderTargetFormat::RGB10_A2_UNORM: 36 case RenderTargetFormat::RGB10_A2_UNORM:
31 return 4; 37 return 4;
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index f168a5171..7b4e9b842 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -15,10 +15,12 @@ namespace Tegra {
15 15
16enum class RenderTargetFormat : u32 { 16enum class RenderTargetFormat : u32 {
17 NONE = 0x0, 17 NONE = 0x0,
18 RGBA32_FLOAT = 0xC0,
18 RGBA16_FLOAT = 0xCA, 19 RGBA16_FLOAT = 0xCA,
19 RGB10_A2_UNORM = 0xD1, 20 RGB10_A2_UNORM = 0xD1,
20 RGBA8_UNORM = 0xD5, 21 RGBA8_UNORM = 0xD5,
21 RGBA8_SRGB = 0xD6, 22 RGBA8_SRGB = 0xD6,
23 R11G11B10_FLOAT = 0xE0,
22}; 24};
23 25
24/// Returns the number of bytes per pixel of each rendertarget format. 26/// Returns the number of bytes per pixel of each rendertarget format.
@@ -61,6 +63,7 @@ namespace Engines {
61class Fermi2D; 63class Fermi2D;
62class Maxwell3D; 64class Maxwell3D;
63class MaxwellCompute; 65class MaxwellCompute;
66class MaxwellDMA;
64} // namespace Engines 67} // namespace Engines
65 68
66enum class EngineID { 69enum class EngineID {
@@ -101,6 +104,8 @@ private:
101 std::unique_ptr<Engines::Fermi2D> fermi_2d; 104 std::unique_ptr<Engines::Fermi2D> fermi_2d;
102 /// Compute engine 105 /// Compute engine
103 std::unique_ptr<Engines::MaxwellCompute> maxwell_compute; 106 std::unique_ptr<Engines::MaxwellCompute> maxwell_compute;
107 /// DMA engine
108 std::unique_ptr<Engines::MaxwellDMA> maxwell_dma;
104}; 109};
105 110
106} // namespace Tegra 111} // namespace Tegra
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 0a33868b7..3ba20f978 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -196,8 +196,10 @@ void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset) {
196 auto& gpu = Core::System().GetInstance().GPU().Maxwell3D(); 196 auto& gpu = Core::System().GetInstance().GPU().Maxwell3D();
197 ASSERT_MSG(!gpu.regs.shader_config[0].enable, "VertexA is unsupported!"); 197 ASSERT_MSG(!gpu.regs.shader_config[0].enable, "VertexA is unsupported!");
198 198
199 // Next available bindpoint to use when uploading the const buffers to the GLSL shaders. 199 // Next available bindpoints to use when uploading the const buffers and textures to the GLSL
200 // shaders.
200 u32 current_constbuffer_bindpoint = 0; 201 u32 current_constbuffer_bindpoint = 0;
202 u32 current_texture_bindpoint = 0;
201 203
202 for (unsigned index = 1; index < Maxwell::MaxShaderProgram; ++index) { 204 for (unsigned index = 1; index < Maxwell::MaxShaderProgram; ++index) {
203 auto& shader_config = gpu.regs.shader_config[index]; 205 auto& shader_config = gpu.regs.shader_config[index];
@@ -212,13 +214,17 @@ void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset) {
212 continue; 214 continue;
213 } 215 }
214 216
217 GLShader::MaxwellUniformData ubo{};
218 ubo.SetFromRegs(gpu.state.shader_stages[stage]);
219 std::memcpy(buffer_ptr, &ubo, sizeof(ubo));
220
221 // Flush the buffer so that the GPU can see the data we just wrote.
222 glFlushMappedBufferRange(GL_ARRAY_BUFFER, buffer_offset, sizeof(ubo));
223
215 // Upload uniform data as one UBO per stage 224 // Upload uniform data as one UBO per stage
216 const GLintptr ubo_offset = buffer_offset; 225 const GLintptr ubo_offset = buffer_offset;
217 copy_buffer(uniform_buffers[stage].handle, ubo_offset, 226 copy_buffer(uniform_buffers[stage].handle, ubo_offset,
218 sizeof(GLShader::MaxwellUniformData)); 227 sizeof(GLShader::MaxwellUniformData));
219 GLShader::MaxwellUniformData* ub_ptr =
220 reinterpret_cast<GLShader::MaxwellUniformData*>(buffer_ptr);
221 ub_ptr->SetFromRegs(gpu.state.shader_stages[stage]);
222 228
223 buffer_ptr += sizeof(GLShader::MaxwellUniformData); 229 buffer_ptr += sizeof(GLShader::MaxwellUniformData);
224 buffer_offset += sizeof(GLShader::MaxwellUniformData); 230 buffer_offset += sizeof(GLShader::MaxwellUniformData);
@@ -258,6 +264,11 @@ void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset) {
258 current_constbuffer_bindpoint = 264 current_constbuffer_bindpoint =
259 SetupConstBuffers(static_cast<Maxwell::ShaderStage>(stage), gl_stage_program, 265 SetupConstBuffers(static_cast<Maxwell::ShaderStage>(stage), gl_stage_program,
260 current_constbuffer_bindpoint, shader_resources.const_buffer_entries); 266 current_constbuffer_bindpoint, shader_resources.const_buffer_entries);
267
268 // Configure the textures for this shader stage.
269 current_texture_bindpoint =
270 SetupTextures(static_cast<Maxwell::ShaderStage>(stage), gl_stage_program,
271 current_texture_bindpoint, shader_resources.texture_samplers);
261 } 272 }
262 273
263 shader_program_manager->UseTrivialGeometryShader(); 274 shader_program_manager->UseTrivialGeometryShader();
@@ -338,12 +349,12 @@ void RasterizerOpenGL::DrawArrays() {
338 // Sync the viewport 349 // Sync the viewport
339 SyncViewport(surfaces_rect, res_scale); 350 SyncViewport(surfaces_rect, res_scale);
340 351
352 // Sync the blend state registers
353 SyncBlendState();
354
341 // TODO(bunnei): Sync framebuffer_scale uniform here 355 // TODO(bunnei): Sync framebuffer_scale uniform here
342 // TODO(bunnei): Sync scissorbox uniform(s) here 356 // TODO(bunnei): Sync scissorbox uniform(s) here
343 357
344 // Sync and bind the texture surfaces
345 BindTextures();
346
347 // Viewport can have negative offsets or larger dimensions than our framebuffer sub-rect. Enable 358 // Viewport can have negative offsets or larger dimensions than our framebuffer sub-rect. Enable
348 // scissor test to prevent drawing outside of the framebuffer region 359 // scissor test to prevent drawing outside of the framebuffer region
349 state.scissor.enabled = true; 360 state.scissor.enabled = true;
@@ -447,65 +458,7 @@ void RasterizerOpenGL::DrawArrays() {
447 } 458 }
448} 459}
449 460
450void RasterizerOpenGL::BindTextures() { 461void RasterizerOpenGL::NotifyMaxwellRegisterChanged(u32 method) {}
451 using Regs = Tegra::Engines::Maxwell3D::Regs;
452 auto& maxwell3d = Core::System::GetInstance().GPU().Get3DEngine();
453
454 // Each Maxwell shader stage can have an arbitrary number of textures, but we're limited to a
455 // certain number in OpenGL. We try to only use the minimum amount of host textures by not
456 // keeping a 1:1 relation between guest texture ids and host texture ids, ie, guest texture id 8
457 // can be host texture id 0 if it's the only texture used in the guest shader program.
458 u32 host_texture_index = 0;
459 for (u32 stage = 0; stage < Regs::MaxShaderStage; ++stage) {
460 ASSERT(host_texture_index < texture_samplers.size());
461 const auto textures = maxwell3d.GetStageTextures(static_cast<Regs::ShaderStage>(stage));
462 for (unsigned texture_index = 0; texture_index < textures.size(); ++texture_index) {
463 const auto& texture = textures[texture_index];
464
465 if (texture.enabled) {
466 texture_samplers[host_texture_index].SyncWithConfig(texture.tsc);
467 Surface surface = res_cache.GetTextureSurface(texture);
468 if (surface != nullptr) {
469 state.texture_units[host_texture_index].texture_2d = surface->texture.handle;
470 } else {
471 // Can occur when texture addr is null or its memory is unmapped/invalid
472 state.texture_units[texture_index].texture_2d = 0;
473 }
474
475 ++host_texture_index;
476 } else {
477 state.texture_units[texture_index].texture_2d = 0;
478 }
479 }
480 }
481}
482
483void RasterizerOpenGL::NotifyMaxwellRegisterChanged(u32 method) {
484 const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs;
485 switch (method) {
486 case MAXWELL3D_REG_INDEX(blend.separate_alpha):
487 ASSERT_MSG(false, "unimplemented");
488 break;
489 case MAXWELL3D_REG_INDEX(blend.equation_rgb):
490 state.blend.rgb_equation = MaxwellToGL::BlendEquation(regs.blend.equation_rgb);
491 break;
492 case MAXWELL3D_REG_INDEX(blend.factor_source_rgb):
493 state.blend.src_rgb_func = MaxwellToGL::BlendFunc(regs.blend.factor_source_rgb);
494 break;
495 case MAXWELL3D_REG_INDEX(blend.factor_dest_rgb):
496 state.blend.dst_rgb_func = MaxwellToGL::BlendFunc(regs.blend.factor_dest_rgb);
497 break;
498 case MAXWELL3D_REG_INDEX(blend.equation_a):
499 state.blend.a_equation = MaxwellToGL::BlendEquation(regs.blend.equation_a);
500 break;
501 case MAXWELL3D_REG_INDEX(blend.factor_source_a):
502 state.blend.src_a_func = MaxwellToGL::BlendFunc(regs.blend.factor_source_a);
503 break;
504 case MAXWELL3D_REG_INDEX(blend.factor_dest_a):
505 state.blend.dst_a_func = MaxwellToGL::BlendFunc(regs.blend.factor_dest_a);
506 break;
507 }
508}
509 462
510void RasterizerOpenGL::FlushAll() { 463void RasterizerOpenGL::FlushAll() {
511 MICROPROFILE_SCOPE(OpenGL_CacheManagement); 464 MICROPROFILE_SCOPE(OpenGL_CacheManagement);
@@ -654,7 +607,16 @@ u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, GLuint progr
654 buffer_draw_state.bindpoint = current_bindpoint + bindpoint; 607 buffer_draw_state.bindpoint = current_bindpoint + bindpoint;
655 608
656 boost::optional<VAddr> addr = gpu.memory_manager->GpuToCpuAddress(buffer.address); 609 boost::optional<VAddr> addr = gpu.memory_manager->GpuToCpuAddress(buffer.address);
657 std::vector<u8> data(used_buffer.GetSize() * sizeof(float)); 610
611 std::vector<u8> data;
612 if (used_buffer.IsIndirect()) {
613 // Buffer is accessed indirectly, so upload the entire thing
614 data.resize(buffer.size * sizeof(float));
615 } else {
616 // Buffer is accessed directly, upload just what we use
617 data.resize(used_buffer.GetSize() * sizeof(float));
618 }
619
658 Memory::ReadBlock(*addr, data.data(), data.size()); 620 Memory::ReadBlock(*addr, data.data(), data.size());
659 621
660 glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffer_draw_state.ssbo); 622 glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffer_draw_state.ssbo);
@@ -671,7 +633,53 @@ u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, GLuint progr
671 633
672 state.Apply(); 634 state.Apply();
673 635
674 return current_bindpoint + entries.size(); 636 return current_bindpoint + static_cast<u32>(entries.size());
637}
638
639u32 RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, GLuint program, u32 current_unit,
640 const std::vector<GLShader::SamplerEntry>& entries) {
641 auto& gpu = Core::System::GetInstance().GPU();
642 auto& maxwell3d = gpu.Get3DEngine();
643
644 ASSERT_MSG(maxwell3d.IsShaderStageEnabled(stage),
645 "Attempted to upload textures of disabled shader stage");
646
647 ASSERT_MSG(current_unit + entries.size() <= std::size(state.texture_units),
648 "Exceeded the number of active textures.");
649
650 for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) {
651 const auto& entry = entries[bindpoint];
652 u32 current_bindpoint = current_unit + bindpoint;
653
654 // Bind the uniform to the sampler.
655 GLint uniform = glGetUniformLocation(program, entry.GetName().c_str());
656 ASSERT(uniform != -1);
657 glProgramUniform1i(program, uniform, current_bindpoint);
658
659 const auto texture = maxwell3d.GetStageTexture(entry.GetStage(), entry.GetOffset());
660 ASSERT(texture.enabled);
661
662 texture_samplers[current_bindpoint].SyncWithConfig(texture.tsc);
663 Surface surface = res_cache.GetTextureSurface(texture);
664 if (surface != nullptr) {
665 state.texture_units[current_bindpoint].texture_2d = surface->texture.handle;
666 state.texture_units[current_bindpoint].swizzle.r =
667 MaxwellToGL::SwizzleSource(texture.tic.x_source);
668 state.texture_units[current_bindpoint].swizzle.g =
669 MaxwellToGL::SwizzleSource(texture.tic.y_source);
670 state.texture_units[current_bindpoint].swizzle.b =
671 MaxwellToGL::SwizzleSource(texture.tic.z_source);
672 state.texture_units[current_bindpoint].swizzle.a =
673 MaxwellToGL::SwizzleSource(texture.tic.w_source);
674 } else {
675 // Can occur when texture addr is null or its memory is unmapped/invalid
676 state.texture_units[current_bindpoint].texture_2d = 0;
677 }
678 }
679
680 state.Apply();
681
682 return current_unit + static_cast<u32>(entries.size());
675} 683}
676 684
677void RasterizerOpenGL::BindFramebufferSurfaces(const Surface& color_surface, 685void RasterizerOpenGL::BindFramebufferSurfaces(const Surface& color_surface,
@@ -730,14 +738,21 @@ void RasterizerOpenGL::SyncDepthOffset() {
730 UNREACHABLE(); 738 UNREACHABLE();
731} 739}
732 740
733void RasterizerOpenGL::SyncBlendEnabled() { 741void RasterizerOpenGL::SyncBlendState() {
734 UNREACHABLE(); 742 const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs;
735}
736 743
737void RasterizerOpenGL::SyncBlendFuncs() { 744 // TODO(Subv): Support more than just render target 0.
738 UNREACHABLE(); 745 state.blend.enabled = regs.blend.enable[0] != 0;
739}
740 746
741void RasterizerOpenGL::SyncBlendColor() { 747 if (!state.blend.enabled)
742 UNREACHABLE(); 748 return;
749
750 ASSERT_MSG(regs.independent_blend_enable == 1, "Only independent blending is implemented");
751 ASSERT_MSG(!regs.independent_blend[0].separate_alpha, "Unimplemented");
752 state.blend.rgb_equation = MaxwellToGL::BlendEquation(regs.independent_blend[0].equation_rgb);
753 state.blend.src_rgb_func = MaxwellToGL::BlendFunc(regs.independent_blend[0].factor_source_rgb);
754 state.blend.dst_rgb_func = MaxwellToGL::BlendFunc(regs.independent_blend[0].factor_dest_rgb);
755 state.blend.a_equation = MaxwellToGL::BlendEquation(regs.independent_blend[0].equation_a);
756 state.blend.src_a_func = MaxwellToGL::BlendFunc(regs.independent_blend[0].factor_source_a);
757 state.blend.dst_a_func = MaxwellToGL::BlendFunc(regs.independent_blend[0].factor_dest_a);
743} 758}
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 4b915c76a..b7c8cf843 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -80,9 +80,6 @@ private:
80 void BindFramebufferSurfaces(const Surface& color_surface, const Surface& depth_surface, 80 void BindFramebufferSurfaces(const Surface& color_surface, const Surface& depth_surface,
81 bool has_stencil); 81 bool has_stencil);
82 82
83 /// Binds the required textures to OpenGL before drawing a batch.
84 void BindTextures();
85
86 /* 83 /*
87 * Configures the current constbuffers to use for the draw command. 84 * Configures the current constbuffers to use for the draw command.
88 * @param stage The shader stage to configure buffers for. 85 * @param stage The shader stage to configure buffers for.
@@ -95,6 +92,17 @@ private:
95 u32 current_bindpoint, 92 u32 current_bindpoint,
96 const std::vector<GLShader::ConstBufferEntry>& entries); 93 const std::vector<GLShader::ConstBufferEntry>& entries);
97 94
95 /*
96 * Configures the current textures to use for the draw command.
97 * @param stage The shader stage to configure textures for.
98 * @param program The OpenGL program object that contains the specified stage.
99 * @param current_unit The offset at which to start counting unused texture units.
100 * @param entries Vector describing the textures that are actually used in the guest shader.
101 * @returns The next available bindpoint for use in the next shader stage.
102 */
103 u32 SetupTextures(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, GLuint program,
104 u32 current_unit, const std::vector<GLShader::SamplerEntry>& entries);
105
98 /// Syncs the viewport to match the guest state 106 /// Syncs the viewport to match the guest state
99 void SyncViewport(const MathUtil::Rectangle<u32>& surfaces_rect, u16 res_scale); 107 void SyncViewport(const MathUtil::Rectangle<u32>& surfaces_rect, u16 res_scale);
100 108
@@ -113,14 +121,8 @@ private:
113 /// Syncs the depth offset to match the guest state 121 /// Syncs the depth offset to match the guest state
114 void SyncDepthOffset(); 122 void SyncDepthOffset();
115 123
116 /// Syncs the blend enabled status to match the guest state 124 /// Syncs the blend state to match the guest state
117 void SyncBlendEnabled(); 125 void SyncBlendState();
118
119 /// Syncs the blend functions to match the guest state
120 void SyncBlendFuncs();
121
122 /// Syncs the blend color to match the guest state
123 void SyncBlendColor();
124 126
125 bool has_ARB_buffer_storage; 127 bool has_ARB_buffer_storage;
126 bool has_ARB_direct_state_access; 128 bool has_ARB_direct_state_access;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
index d6048f639..61d670dcb 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -28,6 +28,7 @@
28#include "video_core/engines/maxwell_3d.h" 28#include "video_core/engines/maxwell_3d.h"
29#include "video_core/renderer_opengl/gl_rasterizer_cache.h" 29#include "video_core/renderer_opengl/gl_rasterizer_cache.h"
30#include "video_core/renderer_opengl/gl_state.h" 30#include "video_core/renderer_opengl/gl_state.h"
31#include "video_core/textures/astc.h"
31#include "video_core/textures/decoders.h" 32#include "video_core/textures/decoders.h"
32#include "video_core/utils.h" 33#include "video_core/utils.h"
33#include "video_core/video_core.h" 34#include "video_core/video_core.h"
@@ -50,18 +51,22 @@ static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_form
50 {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV, false}, // A1B5G5R5 51 {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV, false}, // A1B5G5R5
51 {GL_R8, GL_RED, GL_UNSIGNED_BYTE, false}, // R8 52 {GL_R8, GL_RED, GL_UNSIGNED_BYTE, false}, // R8
52 {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT, false}, // RGBA16F 53 {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT, false}, // RGBA16F
54 {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV, false}, // R11FG11FB10F
53 {GL_COMPRESSED_RGB_S3TC_DXT1_EXT, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT1 55 {GL_COMPRESSED_RGB_S3TC_DXT1_EXT, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT1
54 {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT23 56 {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT23
55 {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT45 57 {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT45
56 {GL_COMPRESSED_RED_RGTC1, GL_RED, GL_UNSIGNED_INT_8_8_8_8, true}, // DXN1 58 {GL_COMPRESSED_RED_RGTC1, GL_RED, GL_UNSIGNED_INT_8_8_8_8, true}, // DXN1
59 {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_4X4
57}}; 60}};
58 61
59static const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType component_type) { 62static const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType component_type) {
60 const SurfaceType type = SurfaceParams::GetFormatType(pixel_format); 63 const SurfaceType type = SurfaceParams::GetFormatType(pixel_format);
61 if (type == SurfaceType::ColorTexture) { 64 if (type == SurfaceType::ColorTexture) {
62 ASSERT(static_cast<size_t>(pixel_format) < tex_format_tuples.size()); 65 ASSERT(static_cast<size_t>(pixel_format) < tex_format_tuples.size());
63 // For now only UNORM components are supported, or RGBA16F which is type FLOAT 66 // For now only UNORM components are supported, or either R11FG11FB10F or RGBA16F which are
64 ASSERT(component_type == ComponentType::UNorm || pixel_format == PixelFormat::RGBA16F); 67 // type FLOAT
68 ASSERT(component_type == ComponentType::UNorm || pixel_format == PixelFormat::RGBA16F ||
69 pixel_format == PixelFormat::R11FG11FB10F);
65 return tex_format_tuples[static_cast<unsigned int>(pixel_format)]; 70 return tex_format_tuples[static_cast<unsigned int>(pixel_format)];
66 } else if (type == SurfaceType::Depth || type == SurfaceType::DepthStencil) { 71 } else if (type == SurfaceType::Depth || type == SurfaceType::DepthStencil) {
67 // TODO(Subv): Implement depth formats 72 // TODO(Subv): Implement depth formats
@@ -83,6 +88,23 @@ static u16 GetResolutionScaleFactor() {
83 : Settings::values.resolution_factor); 88 : Settings::values.resolution_factor);
84} 89}
85 90
91static void ConvertASTCToRGBA8(std::vector<u8>& data, PixelFormat format, u32 width, u32 height) {
92 u32 block_width{};
93 u32 block_height{};
94
95 switch (format) {
96 case PixelFormat::ASTC_2D_4X4:
97 block_width = 4;
98 block_height = 4;
99 break;
100 default:
101 NGLOG_CRITICAL(HW_GPU, "Unhandled format: {}", static_cast<u32>(format));
102 UNREACHABLE();
103 }
104
105 data = Tegra::Texture::ASTC::Decompress(data, width, height, block_width, block_height);
106}
107
86template <bool morton_to_gl, PixelFormat format> 108template <bool morton_to_gl, PixelFormat format>
87void MortonCopy(u32 stride, u32 block_height, u32 height, u8* gl_buffer, Tegra::GPUVAddr base, 109void MortonCopy(u32 stride, u32 block_height, u32 height, u8* gl_buffer, Tegra::GPUVAddr base,
88 Tegra::GPUVAddr start, Tegra::GPUVAddr end) { 110 Tegra::GPUVAddr start, Tegra::GPUVAddr end) {
@@ -94,6 +116,12 @@ void MortonCopy(u32 stride, u32 block_height, u32 height, u8* gl_buffer, Tegra::
94 auto data = Tegra::Texture::UnswizzleTexture( 116 auto data = Tegra::Texture::UnswizzleTexture(
95 *gpu.memory_manager->GpuToCpuAddress(base), 117 *gpu.memory_manager->GpuToCpuAddress(base),
96 SurfaceParams::TextureFormatFromPixelFormat(format), stride, height, block_height); 118 SurfaceParams::TextureFormatFromPixelFormat(format), stride, height, block_height);
119
120 if (SurfaceParams::IsFormatASTC(format)) {
121 // ASTC formats are converted to RGBA8 in software, as most PC GPUs do not support this
122 ConvertASTCToRGBA8(data, format, stride, height);
123 }
124
97 std::memcpy(gl_buffer, data.data(), data.size()); 125 std::memcpy(gl_buffer, data.data(), data.size());
98 } else { 126 } else {
99 // TODO(bunnei): Assumes the default rendering GOB size of 16 (128 lines). We should check 127 // TODO(bunnei): Assumes the default rendering GOB size of 16 (128 lines). We should check
@@ -110,11 +138,12 @@ static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr, Tegra:
110 Tegra::GPUVAddr), 138 Tegra::GPUVAddr),
111 SurfaceParams::MaxPixelFormat> 139 SurfaceParams::MaxPixelFormat>
112 morton_to_gl_fns = { 140 morton_to_gl_fns = {
113 MortonCopy<true, PixelFormat::ABGR8>, MortonCopy<true, PixelFormat::B5G6R5>, 141 MortonCopy<true, PixelFormat::ABGR8>, MortonCopy<true, PixelFormat::B5G6R5>,
114 MortonCopy<true, PixelFormat::A2B10G10R10>, MortonCopy<true, PixelFormat::A1B5G5R5>, 142 MortonCopy<true, PixelFormat::A2B10G10R10>, MortonCopy<true, PixelFormat::A1B5G5R5>,
115 MortonCopy<true, PixelFormat::R8>, MortonCopy<true, PixelFormat::RGBA16F>, 143 MortonCopy<true, PixelFormat::R8>, MortonCopy<true, PixelFormat::RGBA16F>,
116 MortonCopy<true, PixelFormat::DXT1>, MortonCopy<true, PixelFormat::DXT23>, 144 MortonCopy<true, PixelFormat::R11FG11FB10F>, MortonCopy<true, PixelFormat::DXT1>,
117 MortonCopy<true, PixelFormat::DXT45>, MortonCopy<true, PixelFormat::DXN1>, 145 MortonCopy<true, PixelFormat::DXT23>, MortonCopy<true, PixelFormat::DXT45>,
146 MortonCopy<true, PixelFormat::DXN1>, MortonCopy<true, PixelFormat::ASTC_2D_4X4>,
118}; 147};
119 148
120static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr, Tegra::GPUVAddr, 149static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr, Tegra::GPUVAddr,
@@ -127,11 +156,13 @@ static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr, Tegra:
127 MortonCopy<false, PixelFormat::A1B5G5R5>, 156 MortonCopy<false, PixelFormat::A1B5G5R5>,
128 MortonCopy<false, PixelFormat::R8>, 157 MortonCopy<false, PixelFormat::R8>,
129 MortonCopy<false, PixelFormat::RGBA16F>, 158 MortonCopy<false, PixelFormat::RGBA16F>,
159 MortonCopy<false, PixelFormat::R11FG11FB10F>,
130 // TODO(Subv): Swizzling the DXT1/DXT23/DXT45/DXN1 formats is not yet supported 160 // TODO(Subv): Swizzling the DXT1/DXT23/DXT45/DXN1 formats is not yet supported
131 nullptr, 161 nullptr,
132 nullptr, 162 nullptr,
133 nullptr, 163 nullptr,
134 nullptr, 164 nullptr,
165 MortonCopy<false, PixelFormat::ABGR8>,
135}; 166};
136 167
137// Allocate an uninitialized texture of appropriate size and format for the surface 168// Allocate an uninitialized texture of appropriate size and format for the surface
@@ -164,60 +195,10 @@ static void AllocateSurfaceTexture(GLuint texture, const FormatTuple& format_tup
164static bool BlitTextures(GLuint src_tex, const MathUtil::Rectangle<u32>& src_rect, GLuint dst_tex, 195static bool BlitTextures(GLuint src_tex, const MathUtil::Rectangle<u32>& src_rect, GLuint dst_tex,
165 const MathUtil::Rectangle<u32>& dst_rect, SurfaceType type, 196 const MathUtil::Rectangle<u32>& dst_rect, SurfaceType type,
166 GLuint read_fb_handle, GLuint draw_fb_handle) { 197 GLuint read_fb_handle, GLuint draw_fb_handle) {
167 OpenGLState state = OpenGLState::GetCurState();
168
169 OpenGLState prev_state = state;
170 SCOPE_EXIT({ prev_state.Apply(); });
171
172 // Make sure textures aren't bound to texture units, since going to bind them to framebuffer
173 // components
174 state.ResetTexture(src_tex);
175 state.ResetTexture(dst_tex);
176
177 state.draw.read_framebuffer = read_fb_handle;
178 state.draw.draw_framebuffer = draw_fb_handle;
179 state.Apply();
180
181 u32 buffers = 0;
182
183 if (type == SurfaceType::ColorTexture) {
184 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, src_tex,
185 0);
186 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0,
187 0);
188
189 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, dst_tex,
190 0);
191 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0,
192 0);
193
194 buffers = GL_COLOR_BUFFER_BIT;
195 } else if (type == SurfaceType::Depth) {
196 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
197 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, src_tex, 0);
198 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
199
200 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
201 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, dst_tex, 0);
202 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
203
204 buffers = GL_DEPTH_BUFFER_BIT;
205 } else if (type == SurfaceType::DepthStencil) {
206 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
207 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
208 src_tex, 0);
209
210 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
211 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
212 dst_tex, 0);
213
214 buffers = GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT;
215 }
216
217 glBlitFramebuffer(src_rect.left, src_rect.bottom, src_rect.right, src_rect.top, dst_rect.left,
218 dst_rect.bottom, dst_rect.right, dst_rect.top, buffers,
219 buffers == GL_COLOR_BUFFER_BIT ? GL_LINEAR : GL_NEAREST);
220 198
199 glCopyImageSubData(src_tex, GL_TEXTURE_2D, 0, src_rect.left, src_rect.bottom, 0, dst_tex,
200 GL_TEXTURE_2D, 0, dst_rect.left, dst_rect.bottom, 0, src_rect.GetWidth(),
201 src_rect.GetHeight(), 0);
221 return true; 202 return true;
222} 203}
223 204
@@ -594,7 +575,7 @@ void CachedSurface::UploadGLTexture(const MathUtil::Rectangle<u32>& rect, GLuint
594 glCompressedTexImage2D(GL_TEXTURE_2D, 0, tuple.internal_format, 575 glCompressedTexImage2D(GL_TEXTURE_2D, 0, tuple.internal_format,
595 static_cast<GLsizei>(rect.GetWidth() * GetCompresssionFactor()), 576 static_cast<GLsizei>(rect.GetWidth() * GetCompresssionFactor()),
596 static_cast<GLsizei>(rect.GetHeight() * GetCompresssionFactor()), 0, 577 static_cast<GLsizei>(rect.GetHeight() * GetCompresssionFactor()), 0,
597 size, &gl_buffer[buffer_offset]); 578 static_cast<GLsizei>(size), &gl_buffer[buffer_offset]);
598 } else { 579 } else {
599 glTexSubImage2D(GL_TEXTURE_2D, 0, x0, y0, static_cast<GLsizei>(rect.GetWidth()), 580 glTexSubImage2D(GL_TEXTURE_2D, 0, x0, y0, static_cast<GLsizei>(rect.GetWidth()),
600 static_cast<GLsizei>(rect.GetHeight()), tuple.format, tuple.type, 581 static_cast<GLsizei>(rect.GetHeight()), tuple.format, tuple.type,
@@ -933,9 +914,6 @@ Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, ScaleMatc
933 // Use GetSurfaceSubRect instead 914 // Use GetSurfaceSubRect instead
934 ASSERT(params.width == params.stride); 915 ASSERT(params.width == params.stride);
935 916
936 ASSERT(!params.is_tiled ||
937 (params.GetActualWidth() % 8 == 0 && params.GetActualHeight() % 8 == 0));
938
939 // Check for an exact match in existing surfaces 917 // Check for an exact match in existing surfaces
940 Surface surface = 918 Surface surface =
941 FindMatch<MatchFlags::Exact | MatchFlags::Invalid>(surface_cache, params, match_res_scale); 919 FindMatch<MatchFlags::Exact | MatchFlags::Invalid>(surface_cache, params, match_res_scale);
@@ -1078,8 +1056,11 @@ Surface RasterizerCacheOpenGL::GetTextureSurface(const Tegra::Texture::FullTextu
1078 params.addr = config.tic.Address(); 1056 params.addr = config.tic.Address();
1079 params.is_tiled = config.tic.IsTiled(); 1057 params.is_tiled = config.tic.IsTiled();
1080 params.pixel_format = SurfaceParams::PixelFormatFromTextureFormat(config.tic.format); 1058 params.pixel_format = SurfaceParams::PixelFormatFromTextureFormat(config.tic.format);
1081 params.width = config.tic.Width() / params.GetCompresssionFactor(); 1059
1082 params.height = config.tic.Height() / params.GetCompresssionFactor(); 1060 params.width = Common::AlignUp(config.tic.Width(), params.GetCompresssionFactor()) /
1061 params.GetCompresssionFactor();
1062 params.height = Common::AlignUp(config.tic.Height(), params.GetCompresssionFactor()) /
1063 params.GetCompresssionFactor();
1083 1064
1084 // TODO(Subv): Different types per component are not supported. 1065 // TODO(Subv): Different types per component are not supported.
1085 ASSERT(config.tic.r_type.Value() == config.tic.g_type.Value() && 1066 ASSERT(config.tic.r_type.Value() == config.tic.g_type.Value() &&
@@ -1090,6 +1071,13 @@ Surface RasterizerCacheOpenGL::GetTextureSurface(const Tegra::Texture::FullTextu
1090 1071
1091 if (config.tic.IsTiled()) { 1072 if (config.tic.IsTiled()) {
1092 params.block_height = config.tic.BlockHeight(); 1073 params.block_height = config.tic.BlockHeight();
1074
1075 // TODO(bunnei): The below align up is a hack. This is here because some compressed textures
1076 // are not a multiple of their own compression factor, and so this accounts for that. This
1077 // could potentially result in an extra row of 4px being decoded if a texture is not a
1078 // multiple of 4.
1079 params.width = Common::AlignUp(params.width, 4);
1080 params.height = Common::AlignUp(params.height, 4);
1093 } else { 1081 } else {
1094 // Use the texture-provided stride value if the texture isn't tiled. 1082 // Use the texture-provided stride value if the texture isn't tiled.
1095 params.stride = static_cast<u32>(params.PixelsInBytes(config.tic.Pitch())); 1083 params.stride = static_cast<u32>(params.PixelsInBytes(config.tic.Pitch()));
@@ -1097,23 +1085,6 @@ Surface RasterizerCacheOpenGL::GetTextureSurface(const Tegra::Texture::FullTextu
1097 1085
1098 params.UpdateParams(); 1086 params.UpdateParams();
1099 1087
1100 if (config.tic.Width() % 8 != 0 || config.tic.Height() % 8 != 0 ||
1101 params.stride != params.width) {
1102 Surface src_surface;
1103 MathUtil::Rectangle<u32> rect;
1104 std::tie(src_surface, rect) = GetSurfaceSubRect(params, ScaleMatch::Ignore, true);
1105
1106 params.res_scale = src_surface->res_scale;
1107 Surface tmp_surface = CreateSurface(params);
1108 BlitTextures(src_surface->texture.handle, rect, tmp_surface->texture.handle,
1109 tmp_surface->GetScaledRect(),
1110 SurfaceParams::GetFormatType(params.pixel_format), read_framebuffer.handle,
1111 draw_framebuffer.handle);
1112
1113 remove_surfaces.emplace(tmp_surface);
1114 return tmp_surface;
1115 }
1116
1117 return GetSurface(params, ScaleMatch::Ignore, true); 1088 return GetSurface(params, ScaleMatch::Ignore, true);
1118} 1089}
1119 1090
@@ -1288,7 +1259,7 @@ void RasterizerCacheOpenGL::ValidateSurface(const Surface& surface, Tegra::GPUVA
1288 1259
1289 const auto interval = *it & validate_interval; 1260 const auto interval = *it & validate_interval;
1290 // Look for a valid surface to copy from 1261 // Look for a valid surface to copy from
1291 SurfaceParams params = surface->FromInterval(interval); 1262 SurfaceParams params = *surface;
1292 1263
1293 Surface copy_surface = 1264 Surface copy_surface =
1294 FindMatch<MatchFlags::Copy>(surface_cache, params, ScaleMatch::Ignore, interval); 1265 FindMatch<MatchFlags::Copy>(surface_cache, params, ScaleMatch::Ignore, interval);
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
index 6f08678ab..9da945e19 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -60,10 +60,12 @@ struct SurfaceParams {
60 A1B5G5R5 = 3, 60 A1B5G5R5 = 3,
61 R8 = 4, 61 R8 = 4,
62 RGBA16F = 5, 62 RGBA16F = 5,
63 DXT1 = 6, 63 R11FG11FB10F = 6,
64 DXT23 = 7, 64 DXT1 = 7,
65 DXT45 = 8, 65 DXT23 = 8,
66 DXN1 = 9, // This is also known as BC4 66 DXT45 = 9,
67 DXN1 = 10, // This is also known as BC4
68 ASTC_2D_4X4 = 11,
67 69
68 Max, 70 Max,
69 Invalid = 255, 71 Invalid = 255,
@@ -104,11 +106,13 @@ struct SurfaceParams {
104 1, // A2B10G10R10 106 1, // A2B10G10R10
105 1, // A1B5G5R5 107 1, // A1B5G5R5
106 1, // R8 108 1, // R8
107 2, // RGBA16F 109 1, // RGBA16F
110 1, // R11FG11FB10F
108 4, // DXT1 111 4, // DXT1
109 4, // DXT23 112 4, // DXT23
110 4, // DXT45 113 4, // DXT45
111 4, // DXN1 114 4, // DXN1
115 1, // ASTC_2D_4X4
112 }}; 116 }};
113 117
114 ASSERT(static_cast<size_t>(format) < compression_factor_table.size()); 118 ASSERT(static_cast<size_t>(format) < compression_factor_table.size());
@@ -129,10 +133,12 @@ struct SurfaceParams {
129 16, // A1B5G5R5 133 16, // A1B5G5R5
130 8, // R8 134 8, // R8
131 64, // RGBA16F 135 64, // RGBA16F
136 32, // R11FG11FB10F
132 64, // DXT1 137 64, // DXT1
133 128, // DXT23 138 128, // DXT23
134 128, // DXT45 139 128, // DXT45
135 64, // DXN1 140 64, // DXN1
141 32, // ASTC_2D_4X4
136 }}; 142 }};
137 143
138 ASSERT(static_cast<size_t>(format) < bpp_table.size()); 144 ASSERT(static_cast<size_t>(format) < bpp_table.size());
@@ -151,12 +157,23 @@ struct SurfaceParams {
151 return PixelFormat::A2B10G10R10; 157 return PixelFormat::A2B10G10R10;
152 case Tegra::RenderTargetFormat::RGBA16_FLOAT: 158 case Tegra::RenderTargetFormat::RGBA16_FLOAT:
153 return PixelFormat::RGBA16F; 159 return PixelFormat::RGBA16F;
160 case Tegra::RenderTargetFormat::R11G11B10_FLOAT:
161 return PixelFormat::R11FG11FB10F;
154 default: 162 default:
155 NGLOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format)); 163 NGLOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
156 UNREACHABLE(); 164 UNREACHABLE();
157 } 165 }
158 } 166 }
159 167
168 static bool IsFormatASTC(PixelFormat format) {
169 switch (format) {
170 case PixelFormat::ASTC_2D_4X4:
171 return true;
172 default:
173 return false;
174 }
175 }
176
160 static PixelFormat PixelFormatFromGPUPixelFormat(Tegra::FramebufferConfig::PixelFormat format) { 177 static PixelFormat PixelFormatFromGPUPixelFormat(Tegra::FramebufferConfig::PixelFormat format) {
161 switch (format) { 178 switch (format) {
162 case Tegra::FramebufferConfig::PixelFormat::ABGR8: 179 case Tegra::FramebufferConfig::PixelFormat::ABGR8:
@@ -182,6 +199,8 @@ struct SurfaceParams {
182 return PixelFormat::R8; 199 return PixelFormat::R8;
183 case Tegra::Texture::TextureFormat::R16_G16_B16_A16: 200 case Tegra::Texture::TextureFormat::R16_G16_B16_A16:
184 return PixelFormat::RGBA16F; 201 return PixelFormat::RGBA16F;
202 case Tegra::Texture::TextureFormat::BF10GF11RF11:
203 return PixelFormat::R11FG11FB10F;
185 case Tegra::Texture::TextureFormat::DXT1: 204 case Tegra::Texture::TextureFormat::DXT1:
186 return PixelFormat::DXT1; 205 return PixelFormat::DXT1;
187 case Tegra::Texture::TextureFormat::DXT23: 206 case Tegra::Texture::TextureFormat::DXT23:
@@ -190,6 +209,8 @@ struct SurfaceParams {
190 return PixelFormat::DXT45; 209 return PixelFormat::DXT45;
191 case Tegra::Texture::TextureFormat::DXN1: 210 case Tegra::Texture::TextureFormat::DXN1:
192 return PixelFormat::DXN1; 211 return PixelFormat::DXN1;
212 case Tegra::Texture::TextureFormat::ASTC_2D_4X4:
213 return PixelFormat::ASTC_2D_4X4;
193 default: 214 default:
194 NGLOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format)); 215 NGLOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
195 UNREACHABLE(); 216 UNREACHABLE();
@@ -211,6 +232,8 @@ struct SurfaceParams {
211 return Tegra::Texture::TextureFormat::R8; 232 return Tegra::Texture::TextureFormat::R8;
212 case PixelFormat::RGBA16F: 233 case PixelFormat::RGBA16F:
213 return Tegra::Texture::TextureFormat::R16_G16_B16_A16; 234 return Tegra::Texture::TextureFormat::R16_G16_B16_A16;
235 case PixelFormat::R11FG11FB10F:
236 return Tegra::Texture::TextureFormat::BF10GF11RF11;
214 case PixelFormat::DXT1: 237 case PixelFormat::DXT1:
215 return Tegra::Texture::TextureFormat::DXT1; 238 return Tegra::Texture::TextureFormat::DXT1;
216 case PixelFormat::DXT23: 239 case PixelFormat::DXT23:
@@ -219,6 +242,8 @@ struct SurfaceParams {
219 return Tegra::Texture::TextureFormat::DXT45; 242 return Tegra::Texture::TextureFormat::DXT45;
220 case PixelFormat::DXN1: 243 case PixelFormat::DXN1:
221 return Tegra::Texture::TextureFormat::DXN1; 244 return Tegra::Texture::TextureFormat::DXN1;
245 case PixelFormat::ASTC_2D_4X4:
246 return Tegra::Texture::TextureFormat::ASTC_2D_4X4;
222 default: 247 default:
223 UNREACHABLE(); 248 UNREACHABLE();
224 } 249 }
@@ -243,6 +268,7 @@ struct SurfaceParams {
243 case Tegra::RenderTargetFormat::RGB10_A2_UNORM: 268 case Tegra::RenderTargetFormat::RGB10_A2_UNORM:
244 return ComponentType::UNorm; 269 return ComponentType::UNorm;
245 case Tegra::RenderTargetFormat::RGBA16_FLOAT: 270 case Tegra::RenderTargetFormat::RGBA16_FLOAT:
271 case Tegra::RenderTargetFormat::R11G11B10_FLOAT:
246 return ComponentType::Float; 272 return ComponentType::Float;
247 default: 273 default:
248 NGLOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format)); 274 NGLOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index f886e49ca..65fed77ef 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -16,11 +16,11 @@ namespace Decompiler {
16 16
17using Tegra::Shader::Attribute; 17using Tegra::Shader::Attribute;
18using Tegra::Shader::Instruction; 18using Tegra::Shader::Instruction;
19using Tegra::Shader::LogicOperation;
19using Tegra::Shader::OpCode; 20using Tegra::Shader::OpCode;
20using Tegra::Shader::Register; 21using Tegra::Shader::Register;
21using Tegra::Shader::Sampler; 22using Tegra::Shader::Sampler;
22using Tegra::Shader::SubOp; 23using Tegra::Shader::SubOp;
23using Tegra::Shader::Uniform;
24 24
25constexpr u32 PROGRAM_END = MAX_PROGRAM_CODE_LENGTH; 25constexpr u32 PROGRAM_END = MAX_PROGRAM_CODE_LENGTH;
26 26
@@ -267,6 +267,27 @@ public:
267 } 267 }
268 268
269 /** 269 /**
270 * Returns code that does an integer size conversion for the specified size.
271 * @param value Value to perform integer size conversion on.
272 * @param size Register size to use for conversion instructions.
273 * @returns GLSL string corresponding to the value converted to the specified size.
274 */
275 static std::string ConvertIntegerSize(const std::string& value, Register::Size size) {
276 switch (size) {
277 case Register::Size::Byte:
278 return "((" + value + " << 24) >> 24)";
279 case Register::Size::Short:
280 return "((" + value + " << 16) >> 16)";
281 case Register::Size::Word:
282 // Default - do nothing
283 return value;
284 default:
285 NGLOG_CRITICAL(HW_GPU, "Unimplemented conversion size {}", static_cast<u32>(size));
286 UNREACHABLE();
287 }
288 }
289
290 /**
270 * Gets a register as an float. 291 * Gets a register as an float.
271 * @param reg The register to get. 292 * @param reg The register to get.
272 * @param elem The element to use for the operation. 293 * @param elem The element to use for the operation.
@@ -282,15 +303,18 @@ public:
282 * @param reg The register to get. 303 * @param reg The register to get.
283 * @param elem The element to use for the operation. 304 * @param elem The element to use for the operation.
284 * @param is_signed Whether to get the register as a signed (or unsigned) integer. 305 * @param is_signed Whether to get the register as a signed (or unsigned) integer.
306 * @param size Register size to use for conversion instructions.
285 * @returns GLSL string corresponding to the register as an integer. 307 * @returns GLSL string corresponding to the register as an integer.
286 */ 308 */
287 std::string GetRegisterAsInteger(const Register& reg, unsigned elem = 0, 309 std::string GetRegisterAsInteger(const Register& reg, unsigned elem = 0, bool is_signed = true,
288 bool is_signed = true) { 310 Register::Size size = Register::Size::Word) {
289 const std::string func = GetGLSLConversionFunc( 311 const std::string func = GetGLSLConversionFunc(
290 GLSLRegister::Type::Float, 312 GLSLRegister::Type::Float,
291 is_signed ? GLSLRegister::Type::Integer : GLSLRegister::Type::UnsignedInteger); 313 is_signed ? GLSLRegister::Type::Integer : GLSLRegister::Type::UnsignedInteger);
292 314
293 return func + '(' + GetRegister(reg, elem) + ')'; 315 std::string value = func + '(' + GetRegister(reg, elem) + ')';
316
317 return ConvertIntegerSize(value, size);
294 } 318 }
295 319
296 /** 320 /**
@@ -300,13 +324,15 @@ public:
300 * @param value The code representing the value to assign. 324 * @param value The code representing the value to assign.
301 * @param dest_num_components Number of components in the destination. 325 * @param dest_num_components Number of components in the destination.
302 * @param value_num_components Number of components in the value. 326 * @param value_num_components Number of components in the value.
303 * @param is_abs Optional, when True, applies absolute value to output. 327 * @param is_saturated Optional, when True, saturates the provided value.
304 * @param dest_elem Optional, the destination element to use for the operation. 328 * @param dest_elem Optional, the destination element to use for the operation.
305 */ 329 */
306 void SetRegisterToFloat(const Register& reg, u64 elem, const std::string& value, 330 void SetRegisterToFloat(const Register& reg, u64 elem, const std::string& value,
307 u64 dest_num_components, u64 value_num_components, bool is_abs = false, 331 u64 dest_num_components, u64 value_num_components,
308 u64 dest_elem = 0) { 332 bool is_saturated = false, u64 dest_elem = 0) {
309 SetRegister(reg, elem, value, dest_num_components, value_num_components, is_abs, dest_elem); 333
334 SetRegister(reg, elem, is_saturated ? "clamp(" + value + ", 0.0, 1.0)" : value,
335 dest_num_components, value_num_components, dest_elem);
310 } 336 }
311 337
312 /** 338 /**
@@ -316,18 +342,22 @@ public:
316 * @param value The code representing the value to assign. 342 * @param value The code representing the value to assign.
317 * @param dest_num_components Number of components in the destination. 343 * @param dest_num_components Number of components in the destination.
318 * @param value_num_components Number of components in the value. 344 * @param value_num_components Number of components in the value.
319 * @param is_abs Optional, when True, applies absolute value to output. 345 * @param is_saturated Optional, when True, saturates the provided value.
320 * @param dest_elem Optional, the destination element to use for the operation. 346 * @param dest_elem Optional, the destination element to use for the operation.
347 * @param size Register size to use for conversion instructions.
321 */ 348 */
322 void SetRegisterToInteger(const Register& reg, bool is_signed, u64 elem, 349 void SetRegisterToInteger(const Register& reg, bool is_signed, u64 elem,
323 const std::string& value, u64 dest_num_components, 350 const std::string& value, u64 dest_num_components,
324 u64 value_num_components, bool is_abs = false, u64 dest_elem = 0) { 351 u64 value_num_components, bool is_saturated = false,
352 u64 dest_elem = 0, Register::Size size = Register::Size::Word) {
353 ASSERT_MSG(!is_saturated, "Unimplemented");
354
325 const std::string func = GetGLSLConversionFunc( 355 const std::string func = GetGLSLConversionFunc(
326 is_signed ? GLSLRegister::Type::Integer : GLSLRegister::Type::UnsignedInteger, 356 is_signed ? GLSLRegister::Type::Integer : GLSLRegister::Type::UnsignedInteger,
327 GLSLRegister::Type::Float); 357 GLSLRegister::Type::Float);
328 358
329 SetRegister(reg, elem, func + '(' + value + ')', dest_num_components, value_num_components, 359 SetRegister(reg, elem, func + '(' + ConvertIntegerSize(value, size) + ')',
330 is_abs, dest_elem); 360 dest_num_components, value_num_components, dest_elem);
331 } 361 }
332 362
333 /** 363 /**
@@ -365,11 +395,9 @@ public:
365 } 395 }
366 396
367 /// Generates code representing a uniform (C buffer) register, interpreted as the input type. 397 /// Generates code representing a uniform (C buffer) register, interpreted as the input type.
368 std::string GetUniform(const Uniform& uniform, GLSLRegister::Type type) { 398 std::string GetUniform(u64 index, u64 offset, GLSLRegister::Type type) {
369 declr_const_buffers[uniform.index].MarkAsUsed(static_cast<unsigned>(uniform.index), 399 declr_const_buffers[index].MarkAsUsed(index, offset, stage);
370 static_cast<unsigned>(uniform.offset), stage); 400 std::string value = 'c' + std::to_string(index) + '[' + std::to_string(offset) + ']';
371 std::string value =
372 'c' + std::to_string(uniform.index) + '[' + std::to_string(uniform.offset) + ']';
373 401
374 if (type == GLSLRegister::Type::Float) { 402 if (type == GLSLRegister::Type::Float) {
375 return value; 403 return value;
@@ -380,10 +408,19 @@ public:
380 } 408 }
381 } 409 }
382 410
383 /// Generates code representing a uniform (C buffer) register, interpreted as the type of the 411 std::string GetUniformIndirect(u64 index, s64 offset, const Register& index_reg,
384 /// destination register. 412 GLSLRegister::Type type) {
385 std::string GetUniform(const Uniform& uniform, const Register& dest_reg) { 413 declr_const_buffers[index].MarkAsUsedIndirect(index, stage);
386 return GetUniform(uniform, regs[dest_reg].GetActiveType()); 414 std::string value = 'c' + std::to_string(index) + "[(floatBitsToInt(" +
415 GetRegister(index_reg, 0) + ") + " + std::to_string(offset) + ") / 4]";
416
417 if (type == GLSLRegister::Type::Float) {
418 return value;
419 } else if (type == GLSLRegister::Type::Integer) {
420 return "floatBitsToInt(" + value + ')';
421 } else {
422 UNREACHABLE();
423 }
387 } 424 }
388 425
389 /// Add declarations for registers 426 /// Add declarations for registers
@@ -425,6 +462,14 @@ public:
425 ++const_buffer_layout; 462 ++const_buffer_layout;
426 } 463 }
427 declarations.AddNewLine(); 464 declarations.AddNewLine();
465
466 // Append the sampler2D array for the used textures.
467 size_t num_samplers = GetSamplers().size();
468 if (num_samplers > 0) {
469 declarations.AddLine("uniform sampler2D " + SamplerEntry::GetArrayName(stage) + '[' +
470 std::to_string(num_samplers) + "];");
471 declarations.AddNewLine();
472 }
428 } 473 }
429 474
430 /// Returns a list of constant buffer declarations 475 /// Returns a list of constant buffer declarations
@@ -435,6 +480,32 @@ public:
435 return result; 480 return result;
436 } 481 }
437 482
483 /// Returns a list of samplers used in the shader
484 std::vector<SamplerEntry> GetSamplers() const {
485 return used_samplers;
486 }
487
488 /// Returns the GLSL sampler used for the input shader sampler, and creates a new one if
489 /// necessary.
490 std::string AccessSampler(const Sampler& sampler) {
491 size_t offset = static_cast<size_t>(sampler.index.Value());
492
493 // If this sampler has already been used, return the existing mapping.
494 auto itr =
495 std::find_if(used_samplers.begin(), used_samplers.end(),
496 [&](const SamplerEntry& entry) { return entry.GetOffset() == offset; });
497
498 if (itr != used_samplers.end()) {
499 return itr->GetName();
500 }
501
502 // Otherwise create a new mapping for this sampler
503 size_t next_index = used_samplers.size();
504 SamplerEntry entry{stage, offset, next_index};
505 used_samplers.emplace_back(entry);
506 return entry.GetName();
507 }
508
438private: 509private:
439 /// Build GLSL conversion function, e.g. floatBitsToInt, intBitsToFloat, etc. 510 /// Build GLSL conversion function, e.g. floatBitsToInt, intBitsToFloat, etc.
440 const std::string GetGLSLConversionFunc(GLSLRegister::Type src, GLSLRegister::Type dest) const { 511 const std::string GetGLSLConversionFunc(GLSLRegister::Type src, GLSLRegister::Type dest) const {
@@ -460,13 +531,11 @@ private:
460 * @param value The code representing the value to assign. 531 * @param value The code representing the value to assign.
461 * @param dest_num_components Number of components in the destination. 532 * @param dest_num_components Number of components in the destination.
462 * @param value_num_components Number of components in the value. 533 * @param value_num_components Number of components in the value.
463 * @param is_abs Optional, when True, applies absolute value to output.
464 * @param dest_elem Optional, the destination element to use for the operation. 534 * @param dest_elem Optional, the destination element to use for the operation.
465 */ 535 */
466 void SetRegister(const Register& reg, u64 elem, const std::string& value, 536 void SetRegister(const Register& reg, u64 elem, const std::string& value,
467 u64 dest_num_components, u64 value_num_components, bool is_abs, 537 u64 dest_num_components, u64 value_num_components, u64 dest_elem) {
468 u64 dest_elem) { 538 std::string dest = GetRegister(reg, static_cast<u32>(dest_elem));
469 std::string dest = GetRegister(reg, dest_elem);
470 if (dest_num_components > 1) { 539 if (dest_num_components > 1) {
471 dest += GetSwizzle(elem); 540 dest += GetSwizzle(elem);
472 } 541 }
@@ -476,8 +545,6 @@ private:
476 src += GetSwizzle(elem); 545 src += GetSwizzle(elem);
477 } 546 }
478 547
479 src = is_abs ? "abs(" + src + ')' : src;
480
481 shader.AddLine(dest + " = " + src + ';'); 548 shader.AddLine(dest + " = " + src + ';');
482 } 549 }
483 550
@@ -498,7 +565,7 @@ private:
498 // vertex shader, and what's the value of the fourth element when inside a Tess Eval 565 // vertex shader, and what's the value of the fourth element when inside a Tess Eval
499 // shader. 566 // shader.
500 ASSERT(stage == Maxwell3D::Regs::ShaderStage::Vertex); 567 ASSERT(stage == Maxwell3D::Regs::ShaderStage::Vertex);
501 return "vec4(0, 0, gl_InstanceID, gl_VertexID)"; 568 return "vec4(0, 0, uintBitsToFloat(gl_InstanceID), uintBitsToFloat(gl_VertexID))";
502 default: 569 default:
503 const u32 index{static_cast<u32>(attribute) - 570 const u32 index{static_cast<u32>(attribute) -
504 static_cast<u32>(Attribute::Index::Attribute_0)}; 571 static_cast<u32>(Attribute::Index::Attribute_0)};
@@ -544,6 +611,7 @@ private:
544 std::set<Attribute::Index> declr_input_attribute; 611 std::set<Attribute::Index> declr_input_attribute;
545 std::set<Attribute::Index> declr_output_attribute; 612 std::set<Attribute::Index> declr_output_attribute;
546 std::array<ConstBufferEntry, Maxwell3D::Regs::MaxConstBuffers> declr_const_buffers; 613 std::array<ConstBufferEntry, Maxwell3D::Regs::MaxConstBuffers> declr_const_buffers;
614 std::vector<SamplerEntry> used_samplers;
547 const Maxwell3D::Regs::ShaderStage& stage; 615 const Maxwell3D::Regs::ShaderStage& stage;
548}; 616};
549 617
@@ -563,7 +631,7 @@ public:
563 631
564 /// Returns entries in the shader that are useful for external functions 632 /// Returns entries in the shader that are useful for external functions
565 ShaderEntries GetEntries() const { 633 ShaderEntries GetEntries() const {
566 return {regs.GetConstBuffersDeclarations()}; 634 return {regs.GetConstBuffersDeclarations(), regs.GetSamplers()};
567 } 635 }
568 636
569private: 637private:
@@ -585,12 +653,8 @@ private:
585 } 653 }
586 654
587 /// Generates code representing a texture sampler. 655 /// Generates code representing a texture sampler.
588 std::string GetSampler(const Sampler& sampler) const { 656 std::string GetSampler(const Sampler& sampler) {
589 // TODO(Subv): Support more than just texture sampler 0 657 return regs.AccessSampler(sampler);
590 ASSERT_MSG(sampler.index == Sampler::Index::Sampler_0, "unsupported");
591 const unsigned index{static_cast<unsigned>(sampler.index.Value()) -
592 static_cast<unsigned>(Sampler::Index::Sampler_0)};
593 return "tex[" + std::to_string(index) + ']';
594 } 658 }
595 659
596 /** 660 /**
@@ -696,6 +760,31 @@ private:
696 return (absolute_offset % SchedPeriod) == 0; 760 return (absolute_offset % SchedPeriod) == 0;
697 } 761 }
698 762
763 void WriteLogicOperation(Register dest, LogicOperation logic_op, const std::string& op_a,
764 const std::string& op_b) {
765 switch (logic_op) {
766 case LogicOperation::And: {
767 regs.SetRegisterToInteger(dest, true, 0, '(' + op_a + " & " + op_b + ')', 1, 1);
768 break;
769 }
770 case LogicOperation::Or: {
771 regs.SetRegisterToInteger(dest, true, 0, '(' + op_a + " | " + op_b + ')', 1, 1);
772 break;
773 }
774 case LogicOperation::Xor: {
775 regs.SetRegisterToInteger(dest, true, 0, '(' + op_a + " ^ " + op_b + ')', 1, 1);
776 break;
777 }
778 case LogicOperation::PassB: {
779 regs.SetRegisterToInteger(dest, true, 0, op_b, 1, 1);
780 break;
781 }
782 default:
783 NGLOG_CRITICAL(HW_GPU, "Unimplemented logic operation: {}", static_cast<u32>(logic_op));
784 UNREACHABLE();
785 }
786 }
787
699 /** 788 /**
700 * Compiles a single instruction from Tegra to GLSL. 789 * Compiles a single instruction from Tegra to GLSL.
701 * @param offset the offset of the Tegra shader instruction. 790 * @param offset the offset of the Tegra shader instruction.
@@ -733,21 +822,25 @@ private:
733 822
734 switch (opcode->GetType()) { 823 switch (opcode->GetType()) {
735 case OpCode::Type::Arithmetic: { 824 case OpCode::Type::Arithmetic: {
736 std::string op_a = instr.alu.negate_a ? "-" : ""; 825 std::string op_a = regs.GetRegisterAsFloat(instr.gpr8);
737 op_a += regs.GetRegisterAsFloat(instr.gpr8);
738 if (instr.alu.abs_a) { 826 if (instr.alu.abs_a) {
739 op_a = "abs(" + op_a + ')'; 827 op_a = "abs(" + op_a + ')';
740 } 828 }
741 829
742 std::string op_b = instr.alu.negate_b ? "-" : ""; 830 if (instr.alu.negate_a) {
831 op_a = "-(" + op_a + ')';
832 }
833
834 std::string op_b;
743 835
744 if (instr.is_b_imm) { 836 if (instr.is_b_imm) {
745 op_b += GetImmediate19(instr); 837 op_b = GetImmediate19(instr);
746 } else { 838 } else {
747 if (instr.is_b_gpr) { 839 if (instr.is_b_gpr) {
748 op_b += regs.GetRegisterAsFloat(instr.gpr20); 840 op_b = regs.GetRegisterAsFloat(instr.gpr20);
749 } else { 841 } else {
750 op_b += regs.GetUniform(instr.uniform, instr.gpr0); 842 op_b = regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset,
843 GLSLRegister::Type::Float);
751 } 844 }
752 } 845 }
753 846
@@ -755,6 +848,10 @@ private:
755 op_b = "abs(" + op_b + ')'; 848 op_b = "abs(" + op_b + ')';
756 } 849 }
757 850
851 if (instr.alu.negate_b) {
852 op_b = "-(" + op_b + ')';
853 }
854
758 switch (opcode->GetId()) { 855 switch (opcode->GetId()) {
759 case OpCode::Id::MOV_C: 856 case OpCode::Id::MOV_C:
760 case OpCode::Id::MOV_R: { 857 case OpCode::Id::MOV_R: {
@@ -762,58 +859,49 @@ private:
762 break; 859 break;
763 } 860 }
764 861
765 case OpCode::Id::MOV32_IMM: {
766 // mov32i doesn't have abs or neg bits.
767 regs.SetRegisterToFloat(instr.gpr0, 0, GetImmediate32(instr), 1, 1);
768 break;
769 }
770 case OpCode::Id::FMUL_C: 862 case OpCode::Id::FMUL_C:
771 case OpCode::Id::FMUL_R: 863 case OpCode::Id::FMUL_R:
772 case OpCode::Id::FMUL_IMM: { 864 case OpCode::Id::FMUL_IMM: {
773 regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " * " + op_b, 1, 1, instr.alu.abs_d); 865 regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " * " + op_b, 1, 1,
774 break; 866 instr.alu.saturate_d);
775 }
776 case OpCode::Id::FMUL32_IMM: {
777 // fmul32i doesn't have abs or neg bits.
778 regs.SetRegisterToFloat(
779 instr.gpr0, 0,
780 regs.GetRegisterAsFloat(instr.gpr8) + " * " + GetImmediate32(instr), 1, 1);
781 break; 867 break;
782 } 868 }
783 case OpCode::Id::FADD_C: 869 case OpCode::Id::FADD_C:
784 case OpCode::Id::FADD_R: 870 case OpCode::Id::FADD_R:
785 case OpCode::Id::FADD_IMM: { 871 case OpCode::Id::FADD_IMM: {
786 regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " + " + op_b, 1, 1, instr.alu.abs_d); 872 regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " + " + op_b, 1, 1,
873 instr.alu.saturate_d);
787 break; 874 break;
788 } 875 }
789 case OpCode::Id::MUFU: { 876 case OpCode::Id::MUFU: {
790 switch (instr.sub_op) { 877 switch (instr.sub_op) {
791 case SubOp::Cos: 878 case SubOp::Cos:
792 regs.SetRegisterToFloat(instr.gpr0, 0, "cos(" + op_a + ')', 1, 1, 879 regs.SetRegisterToFloat(instr.gpr0, 0, "cos(" + op_a + ')', 1, 1,
793 instr.alu.abs_d); 880 instr.alu.saturate_d);
794 break; 881 break;
795 case SubOp::Sin: 882 case SubOp::Sin:
796 regs.SetRegisterToFloat(instr.gpr0, 0, "sin(" + op_a + ')', 1, 1, 883 regs.SetRegisterToFloat(instr.gpr0, 0, "sin(" + op_a + ')', 1, 1,
797 instr.alu.abs_d); 884 instr.alu.saturate_d);
798 break; 885 break;
799 case SubOp::Ex2: 886 case SubOp::Ex2:
800 regs.SetRegisterToFloat(instr.gpr0, 0, "exp2(" + op_a + ')', 1, 1, 887 regs.SetRegisterToFloat(instr.gpr0, 0, "exp2(" + op_a + ')', 1, 1,
801 instr.alu.abs_d); 888 instr.alu.saturate_d);
802 break; 889 break;
803 case SubOp::Lg2: 890 case SubOp::Lg2:
804 regs.SetRegisterToFloat(instr.gpr0, 0, "log2(" + op_a + ')', 1, 1, 891 regs.SetRegisterToFloat(instr.gpr0, 0, "log2(" + op_a + ')', 1, 1,
805 instr.alu.abs_d); 892 instr.alu.saturate_d);
806 break; 893 break;
807 case SubOp::Rcp: 894 case SubOp::Rcp:
808 regs.SetRegisterToFloat(instr.gpr0, 0, "1.0 / " + op_a, 1, 1, instr.alu.abs_d); 895 regs.SetRegisterToFloat(instr.gpr0, 0, "1.0 / " + op_a, 1, 1,
896 instr.alu.saturate_d);
809 break; 897 break;
810 case SubOp::Rsq: 898 case SubOp::Rsq:
811 regs.SetRegisterToFloat(instr.gpr0, 0, "inversesqrt(" + op_a + ')', 1, 1, 899 regs.SetRegisterToFloat(instr.gpr0, 0, "inversesqrt(" + op_a + ')', 1, 1,
812 instr.alu.abs_d); 900 instr.alu.saturate_d);
813 break; 901 break;
814 case SubOp::Min: 902 case SubOp::Min:
815 regs.SetRegisterToFloat(instr.gpr0, 0, "min(" + op_a + "," + op_b + ')', 1, 1, 903 regs.SetRegisterToFloat(instr.gpr0, 0, "min(" + op_a + "," + op_b + ')', 1, 1,
816 instr.alu.abs_d); 904 instr.alu.saturate_d);
817 break; 905 break;
818 default: 906 default:
819 NGLOG_CRITICAL(HW_GPU, "Unhandled MUFU sub op: {0:x}", 907 NGLOG_CRITICAL(HW_GPU, "Unhandled MUFU sub op: {0:x}",
@@ -850,52 +938,49 @@ private:
850 } 938 }
851 break; 939 break;
852 } 940 }
853 case OpCode::Type::Logic: { 941 case OpCode::Type::ArithmeticImmediate: {
854 std::string op_a = regs.GetRegisterAsInteger(instr.gpr8, 0, false);
855
856 if (instr.alu.lop.invert_a)
857 op_a = "~(" + op_a + ')';
858
859 switch (opcode->GetId()) { 942 switch (opcode->GetId()) {
860 case OpCode::Id::LOP32I: { 943 case OpCode::Id::MOV32_IMM: {
861 u32 imm = static_cast<u32>(instr.alu.imm20_32.Value()); 944 regs.SetRegisterToFloat(instr.gpr0, 0, GetImmediate32(instr), 1, 1);
945 break;
946 }
947 case OpCode::Id::FMUL32_IMM: {
948 regs.SetRegisterToFloat(
949 instr.gpr0, 0,
950 regs.GetRegisterAsFloat(instr.gpr8) + " * " + GetImmediate32(instr), 1, 1);
951 break;
952 }
953 }
954 break;
955 }
956 case OpCode::Type::Bfe: {
957 ASSERT_MSG(!instr.bfe.negate_b, "Unimplemented");
862 958
863 if (instr.alu.lop.invert_b) 959 std::string op_a = instr.bfe.negate_a ? "-" : "";
864 imm = ~imm; 960 op_a += regs.GetRegisterAsInteger(instr.gpr8);
865 961
866 switch (instr.alu.lop.operation) { 962 switch (opcode->GetId()) {
867 case Tegra::Shader::LogicOperation::And: { 963 case OpCode::Id::BFE_IMM: {
868 regs.SetRegisterToInteger(instr.gpr0, false, 0, 964 std::string inner_shift =
869 '(' + op_a + " & " + std::to_string(imm) + ')', 1, 1); 965 '(' + op_a + " << " + std::to_string(instr.bfe.GetLeftShiftValue()) + ')';
870 break; 966 std::string outer_shift =
871 } 967 '(' + inner_shift + " >> " +
872 case Tegra::Shader::LogicOperation::Or: { 968 std::to_string(instr.bfe.GetLeftShiftValue() + instr.bfe.shift_position) + ')';
873 regs.SetRegisterToInteger(instr.gpr0, false, 0, 969
874 '(' + op_a + " | " + std::to_string(imm) + ')', 1, 1); 970 regs.SetRegisterToInteger(instr.gpr0, true, 0, outer_shift, 1, 1);
875 break;
876 }
877 case Tegra::Shader::LogicOperation::Xor: {
878 regs.SetRegisterToInteger(instr.gpr0, false, 0,
879 '(' + op_a + " ^ " + std::to_string(imm) + ')', 1, 1);
880 break;
881 }
882 default:
883 NGLOG_CRITICAL(HW_GPU, "Unimplemented lop32i operation: {}",
884 static_cast<u32>(instr.alu.lop.operation.Value()));
885 UNREACHABLE();
886 }
887 break; 971 break;
888 } 972 }
889 default: { 973 default: {
890 NGLOG_CRITICAL(HW_GPU, "Unhandled logic instruction: {}", opcode->GetName()); 974 NGLOG_CRITICAL(HW_GPU, "Unhandled BFE instruction: {}", opcode->GetName());
891 UNREACHABLE(); 975 UNREACHABLE();
892 } 976 }
893 } 977 }
978
894 break; 979 break;
895 } 980 }
896 981
897 case OpCode::Type::Shift: { 982 case OpCode::Type::Shift: {
898 std::string op_a = regs.GetRegisterAsInteger(instr.gpr8, 0, false); 983 std::string op_a = regs.GetRegisterAsInteger(instr.gpr8, 0, true);
899 std::string op_b; 984 std::string op_b;
900 985
901 if (instr.is_b_imm) { 986 if (instr.is_b_imm) {
@@ -904,11 +989,25 @@ private:
904 if (instr.is_b_gpr) { 989 if (instr.is_b_gpr) {
905 op_b += regs.GetRegisterAsInteger(instr.gpr20); 990 op_b += regs.GetRegisterAsInteger(instr.gpr20);
906 } else { 991 } else {
907 op_b += regs.GetUniform(instr.uniform, GLSLRegister::Type::Integer); 992 op_b += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset,
993 GLSLRegister::Type::Integer);
908 } 994 }
909 } 995 }
910 996
911 switch (opcode->GetId()) { 997 switch (opcode->GetId()) {
998 case OpCode::Id::SHR_C:
999 case OpCode::Id::SHR_R:
1000 case OpCode::Id::SHR_IMM: {
1001 if (!instr.shift.is_signed) {
1002 // Logical shift right
1003 op_a = "uint(" + op_a + ')';
1004 }
1005
1006 // Cast to int is superfluous for arithmetic shift, it's only for a logical shift
1007 regs.SetRegisterToInteger(instr.gpr0, true, 0, "int(" + op_a + " >> " + op_b + ')',
1008 1, 1);
1009 break;
1010 }
912 case OpCode::Id::SHL_C: 1011 case OpCode::Id::SHL_C:
913 case OpCode::Id::SHL_R: 1012 case OpCode::Id::SHL_R:
914 case OpCode::Id::SHL_IMM: 1013 case OpCode::Id::SHL_IMM:
@@ -922,28 +1021,101 @@ private:
922 break; 1021 break;
923 } 1022 }
924 1023
925 case OpCode::Type::ScaledAdd: { 1024 case OpCode::Type::ArithmeticIntegerImmediate: {
926 std::string op_a = regs.GetRegisterAsInteger(instr.gpr8); 1025 std::string op_a = regs.GetRegisterAsInteger(instr.gpr8);
1026 std::string op_b = std::to_string(instr.alu.imm20_32.Value());
927 1027
928 if (instr.iscadd.negate_a) 1028 switch (opcode->GetId()) {
929 op_a = '-' + op_a; 1029 case OpCode::Id::IADD32I:
1030 if (instr.iadd32i.negate_a)
1031 op_a = "-(" + op_a + ')';
1032
1033 regs.SetRegisterToInteger(instr.gpr0, true, 0, op_a + " + " + op_b, 1, 1,
1034 instr.iadd32i.saturate != 0);
1035 break;
1036 case OpCode::Id::LOP32I: {
1037 if (instr.alu.lop32i.invert_a)
1038 op_a = "~(" + op_a + ')';
930 1039
931 std::string op_b = instr.iscadd.negate_b ? "-" : ""; 1040 if (instr.alu.lop32i.invert_b)
1041 op_b = "~(" + op_b + ')';
932 1042
1043 WriteLogicOperation(instr.gpr0, instr.alu.lop32i.operation, op_a, op_b);
1044 break;
1045 }
1046 default: {
1047 NGLOG_CRITICAL(HW_GPU, "Unhandled ArithmeticIntegerImmediate instruction: {}",
1048 opcode->GetName());
1049 UNREACHABLE();
1050 }
1051 }
1052 break;
1053 }
1054 case OpCode::Type::ArithmeticInteger: {
1055 std::string op_a = regs.GetRegisterAsInteger(instr.gpr8);
1056 std::string op_b;
933 if (instr.is_b_imm) { 1057 if (instr.is_b_imm) {
934 op_b += '(' + std::to_string(instr.alu.GetSignedImm20_20()) + ')'; 1058 op_b += '(' + std::to_string(instr.alu.GetSignedImm20_20()) + ')';
935 } else { 1059 } else {
936 if (instr.is_b_gpr) { 1060 if (instr.is_b_gpr) {
937 op_b += regs.GetRegisterAsInteger(instr.gpr20); 1061 op_b += regs.GetRegisterAsInteger(instr.gpr20);
938 } else { 1062 } else {
939 op_b += regs.GetUniform(instr.uniform, GLSLRegister::Type::Integer); 1063 op_b += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset,
1064 GLSLRegister::Type::Integer);
940 } 1065 }
941 } 1066 }
942 1067
943 std::string shift = std::to_string(instr.iscadd.shift_amount.Value()); 1068 switch (opcode->GetId()) {
1069 case OpCode::Id::IADD_C:
1070 case OpCode::Id::IADD_R:
1071 case OpCode::Id::IADD_IMM: {
1072 if (instr.alu_integer.negate_a)
1073 op_a = "-(" + op_a + ')';
1074
1075 if (instr.alu_integer.negate_b)
1076 op_b = "-(" + op_b + ')';
1077
1078 regs.SetRegisterToInteger(instr.gpr0, true, 0, op_a + " + " + op_b, 1, 1,
1079 instr.alu.saturate_d);
1080 break;
1081 }
1082 case OpCode::Id::ISCADD_C:
1083 case OpCode::Id::ISCADD_R:
1084 case OpCode::Id::ISCADD_IMM: {
1085 if (instr.alu_integer.negate_a)
1086 op_a = "-(" + op_a + ')';
1087
1088 if (instr.alu_integer.negate_b)
1089 op_b = "-(" + op_b + ')';
1090
1091 std::string shift = std::to_string(instr.alu_integer.shift_amount.Value());
1092
1093 regs.SetRegisterToInteger(instr.gpr0, true, 0,
1094 "((" + op_a + " << " + shift + ") + " + op_b + ')', 1, 1);
1095 break;
1096 }
1097 case OpCode::Id::LOP_C:
1098 case OpCode::Id::LOP_R:
1099 case OpCode::Id::LOP_IMM: {
1100 ASSERT_MSG(!instr.alu.lop.unk44, "Unimplemented");
1101 ASSERT_MSG(instr.alu.lop.pred48 == Pred::UnusedIndex, "Unimplemented");
1102
1103 if (instr.alu.lop.invert_a)
1104 op_a = "~(" + op_a + ')';
1105
1106 if (instr.alu.lop.invert_b)
1107 op_b = "~(" + op_b + ')';
1108
1109 WriteLogicOperation(instr.gpr0, instr.alu.lop.operation, op_a, op_b);
1110 break;
1111 }
1112 default: {
1113 NGLOG_CRITICAL(HW_GPU, "Unhandled ArithmeticInteger instruction: {}",
1114 opcode->GetName());
1115 UNREACHABLE();
1116 }
1117 }
944 1118
945 regs.SetRegisterToInteger(instr.gpr0, true, 0,
946 "((" + op_a + " << " + shift + ") + " + op_b + ')', 1, 1);
947 break; 1119 break;
948 } 1120 }
949 case OpCode::Type::Ffma: { 1121 case OpCode::Type::Ffma: {
@@ -953,7 +1125,8 @@ private:
953 1125
954 switch (opcode->GetId()) { 1126 switch (opcode->GetId()) {
955 case OpCode::Id::FFMA_CR: { 1127 case OpCode::Id::FFMA_CR: {
956 op_b += regs.GetUniform(instr.uniform, instr.gpr0); 1128 op_b += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset,
1129 GLSLRegister::Type::Float);
957 op_c += regs.GetRegisterAsFloat(instr.gpr39); 1130 op_c += regs.GetRegisterAsFloat(instr.gpr39);
958 break; 1131 break;
959 } 1132 }
@@ -964,7 +1137,8 @@ private:
964 } 1137 }
965 case OpCode::Id::FFMA_RC: { 1138 case OpCode::Id::FFMA_RC: {
966 op_b += regs.GetRegisterAsFloat(instr.gpr39); 1139 op_b += regs.GetRegisterAsFloat(instr.gpr39);
967 op_c += regs.GetUniform(instr.uniform, instr.gpr0); 1140 op_c += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset,
1141 GLSLRegister::Type::Float);
968 break; 1142 break;
969 } 1143 }
970 case OpCode::Id::FFMA_IMM: { 1144 case OpCode::Id::FFMA_IMM: {
@@ -978,31 +1152,33 @@ private:
978 } 1152 }
979 } 1153 }
980 1154
981 regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " * " + op_b + " + " + op_c, 1, 1); 1155 regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " * " + op_b + " + " + op_c, 1, 1,
1156 instr.alu.saturate_d);
982 break; 1157 break;
983 } 1158 }
984 case OpCode::Type::Conversion: { 1159 case OpCode::Type::Conversion: {
985 ASSERT_MSG(instr.conversion.size == Register::Size::Word, "Unimplemented");
986 ASSERT_MSG(!instr.conversion.negate_a, "Unimplemented"); 1160 ASSERT_MSG(!instr.conversion.negate_a, "Unimplemented");
987 ASSERT_MSG(!instr.conversion.saturate_a, "Unimplemented");
988 1161
989 switch (opcode->GetId()) { 1162 switch (opcode->GetId()) {
990 case OpCode::Id::I2I_R: { 1163 case OpCode::Id::I2I_R: {
991 ASSERT_MSG(!instr.conversion.selector, "Unimplemented"); 1164 ASSERT_MSG(!instr.conversion.selector, "Unimplemented");
992 1165
993 std::string op_a = 1166 std::string op_a = regs.GetRegisterAsInteger(
994 regs.GetRegisterAsInteger(instr.gpr20, 0, instr.conversion.is_signed); 1167 instr.gpr20, 0, instr.conversion.is_input_signed, instr.conversion.src_size);
995 1168
996 if (instr.conversion.abs_a) { 1169 if (instr.conversion.abs_a) {
997 op_a = "abs(" + op_a + ')'; 1170 op_a = "abs(" + op_a + ')';
998 } 1171 }
999 1172
1000 regs.SetRegisterToInteger(instr.gpr0, instr.conversion.is_signed, 0, op_a, 1, 1); 1173 regs.SetRegisterToInteger(instr.gpr0, instr.conversion.is_output_signed, 0, op_a, 1,
1174 1, instr.alu.saturate_d, 0, instr.conversion.dest_size);
1001 break; 1175 break;
1002 } 1176 }
1003 case OpCode::Id::I2F_R: { 1177 case OpCode::Id::I2F_R: {
1004 std::string op_a = 1178 ASSERT_MSG(instr.conversion.dest_size == Register::Size::Word, "Unimplemented");
1005 regs.GetRegisterAsInteger(instr.gpr20, 0, instr.conversion.is_signed); 1179 ASSERT_MSG(!instr.conversion.selector, "Unimplemented");
1180 std::string op_a = regs.GetRegisterAsInteger(
1181 instr.gpr20, 0, instr.conversion.is_input_signed, instr.conversion.src_size);
1006 1182
1007 if (instr.conversion.abs_a) { 1183 if (instr.conversion.abs_a) {
1008 op_a = "abs(" + op_a + ')'; 1184 op_a = "abs(" + op_a + ')';
@@ -1012,13 +1188,71 @@ private:
1012 break; 1188 break;
1013 } 1189 }
1014 case OpCode::Id::F2F_R: { 1190 case OpCode::Id::F2F_R: {
1191 ASSERT_MSG(instr.conversion.dest_size == Register::Size::Word, "Unimplemented");
1192 ASSERT_MSG(instr.conversion.src_size == Register::Size::Word, "Unimplemented");
1015 std::string op_a = regs.GetRegisterAsFloat(instr.gpr20); 1193 std::string op_a = regs.GetRegisterAsFloat(instr.gpr20);
1016 1194
1195 switch (instr.conversion.f2f.rounding) {
1196 case Tegra::Shader::F2fRoundingOp::None:
1197 break;
1198 case Tegra::Shader::F2fRoundingOp::Floor:
1199 op_a = "floor(" + op_a + ')';
1200 break;
1201 case Tegra::Shader::F2fRoundingOp::Ceil:
1202 op_a = "ceil(" + op_a + ')';
1203 break;
1204 case Tegra::Shader::F2fRoundingOp::Trunc:
1205 op_a = "trunc(" + op_a + ')';
1206 break;
1207 default:
1208 NGLOG_CRITICAL(HW_GPU, "Unimplemented f2f rounding mode {}",
1209 static_cast<u32>(instr.conversion.f2f.rounding.Value()));
1210 UNREACHABLE();
1211 break;
1212 }
1213
1017 if (instr.conversion.abs_a) { 1214 if (instr.conversion.abs_a) {
1018 op_a = "abs(" + op_a + ')'; 1215 op_a = "abs(" + op_a + ')';
1019 } 1216 }
1020 1217
1021 regs.SetRegisterToFloat(instr.gpr0, 0, op_a, 1, 1); 1218 regs.SetRegisterToFloat(instr.gpr0, 0, op_a, 1, 1, instr.alu.saturate_d);
1219 break;
1220 }
1221 case OpCode::Id::F2I_R: {
1222 ASSERT_MSG(instr.conversion.src_size == Register::Size::Word, "Unimplemented");
1223 std::string op_a = regs.GetRegisterAsFloat(instr.gpr20);
1224
1225 if (instr.conversion.abs_a) {
1226 op_a = "abs(" + op_a + ')';
1227 }
1228
1229 switch (instr.conversion.f2i.rounding) {
1230 case Tegra::Shader::F2iRoundingOp::None:
1231 break;
1232 case Tegra::Shader::F2iRoundingOp::Floor:
1233 op_a = "floor(" + op_a + ')';
1234 break;
1235 case Tegra::Shader::F2iRoundingOp::Ceil:
1236 op_a = "ceil(" + op_a + ')';
1237 break;
1238 case Tegra::Shader::F2iRoundingOp::Trunc:
1239 op_a = "trunc(" + op_a + ')';
1240 break;
1241 default:
1242 NGLOG_CRITICAL(HW_GPU, "Unimplemented f2i rounding mode {}",
1243 static_cast<u32>(instr.conversion.f2i.rounding.Value()));
1244 UNREACHABLE();
1245 break;
1246 }
1247
1248 if (instr.conversion.is_output_signed) {
1249 op_a = "int(" + op_a + ')';
1250 } else {
1251 op_a = "uint(" + op_a + ')';
1252 }
1253
1254 regs.SetRegisterToInteger(instr.gpr0, instr.conversion.is_output_signed, 0, op_a, 1,
1255 1, false, 0, instr.conversion.dest_size);
1022 break; 1256 break;
1023 } 1257 }
1024 default: { 1258 default: {
@@ -1029,36 +1263,60 @@ private:
1029 break; 1263 break;
1030 } 1264 }
1031 case OpCode::Type::Memory: { 1265 case OpCode::Type::Memory: {
1032 const Attribute::Index attribute = instr.attribute.fmt20.index;
1033
1034 switch (opcode->GetId()) { 1266 switch (opcode->GetId()) {
1035 case OpCode::Id::LD_A: { 1267 case OpCode::Id::LD_A: {
1036 ASSERT_MSG(instr.attribute.fmt20.size == 0, "untested"); 1268 ASSERT_MSG(instr.attribute.fmt20.size == 0, "untested");
1037 regs.SetRegisterToInputAttibute(instr.gpr0, instr.attribute.fmt20.element, 1269 regs.SetRegisterToInputAttibute(instr.gpr0, instr.attribute.fmt20.element,
1038 attribute); 1270 instr.attribute.fmt20.index);
1271 break;
1272 }
1273 case OpCode::Id::LD_C: {
1274 ASSERT_MSG(instr.ld_c.unknown == 0, "Unimplemented");
1275
1276 std::string op_a =
1277 regs.GetUniformIndirect(instr.cbuf36.index, instr.cbuf36.offset + 0, instr.gpr8,
1278 GLSLRegister::Type::Float);
1279 std::string op_b =
1280 regs.GetUniformIndirect(instr.cbuf36.index, instr.cbuf36.offset + 4, instr.gpr8,
1281 GLSLRegister::Type::Float);
1282
1283 switch (instr.ld_c.type.Value()) {
1284 case Tegra::Shader::UniformType::Single:
1285 regs.SetRegisterToFloat(instr.gpr0, 0, op_a, 1, 1);
1286 break;
1287
1288 case Tegra::Shader::UniformType::Double:
1289 regs.SetRegisterToFloat(instr.gpr0, 0, op_a, 1, 1);
1290 regs.SetRegisterToFloat(instr.gpr0.Value() + 1, 0, op_b, 1, 1);
1291 break;
1292
1293 default:
1294 NGLOG_CRITICAL(HW_GPU, "Unhandled type: {}",
1295 static_cast<unsigned>(instr.ld_c.type.Value()));
1296 UNREACHABLE();
1297 }
1039 break; 1298 break;
1040 } 1299 }
1041 case OpCode::Id::ST_A: { 1300 case OpCode::Id::ST_A: {
1042 ASSERT_MSG(instr.attribute.fmt20.size == 0, "untested"); 1301 ASSERT_MSG(instr.attribute.fmt20.size == 0, "untested");
1043 regs.SetOutputAttributeToRegister(attribute, instr.attribute.fmt20.element, 1302 regs.SetOutputAttributeToRegister(instr.attribute.fmt20.index,
1044 instr.gpr0); 1303 instr.attribute.fmt20.element, instr.gpr0);
1045 break; 1304 break;
1046 } 1305 }
1047 case OpCode::Id::TEX: { 1306 case OpCode::Id::TEX: {
1048 ASSERT_MSG(instr.attribute.fmt20.size == 4, "untested");
1049 const std::string op_a = regs.GetRegisterAsFloat(instr.gpr8); 1307 const std::string op_a = regs.GetRegisterAsFloat(instr.gpr8);
1050 const std::string op_b = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); 1308 const std::string op_b = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
1051 const std::string sampler = GetSampler(instr.sampler); 1309 const std::string sampler = GetSampler(instr.sampler);
1052 const std::string coord = "vec2 coords = vec2(" + op_a + ", " + op_b + ");"; 1310 const std::string coord = "vec2 coords = vec2(" + op_a + ", " + op_b + ");";
1053 // Add an extra scope and declare the texture coords inside to prevent overwriting 1311 // Add an extra scope and declare the texture coords inside to prevent
1054 // them in case they are used as outputs of the texs instruction. 1312 // overwriting them in case they are used as outputs of the texs instruction.
1055 shader.AddLine("{"); 1313 shader.AddLine("{");
1056 ++shader.scope; 1314 ++shader.scope;
1057 shader.AddLine(coord); 1315 shader.AddLine(coord);
1058 const std::string texture = "texture(" + sampler + ", coords)"; 1316 const std::string texture = "texture(" + sampler + ", coords)";
1059 1317
1060 size_t dest_elem{}; 1318 size_t dest_elem{};
1061 for (size_t elem = 0; elem < instr.attribute.fmt20.size; ++elem) { 1319 for (size_t elem = 0; elem < 4; ++elem) {
1062 if (!instr.tex.IsComponentEnabled(elem)) { 1320 if (!instr.tex.IsComponentEnabled(elem)) {
1063 // Skip disabled components 1321 // Skip disabled components
1064 continue; 1322 continue;
@@ -1071,7 +1329,6 @@ private:
1071 break; 1329 break;
1072 } 1330 }
1073 case OpCode::Id::TEXS: { 1331 case OpCode::Id::TEXS: {
1074 ASSERT_MSG(instr.attribute.fmt20.size == 4, "untested");
1075 const std::string op_a = regs.GetRegisterAsFloat(instr.gpr8); 1332 const std::string op_a = regs.GetRegisterAsFloat(instr.gpr8);
1076 const std::string op_b = regs.GetRegisterAsFloat(instr.gpr20); 1333 const std::string op_b = regs.GetRegisterAsFloat(instr.gpr20);
1077 const std::string sampler = GetSampler(instr.sampler); 1334 const std::string sampler = GetSampler(instr.sampler);
@@ -1083,8 +1340,8 @@ private:
1083 shader.AddLine(coord); 1340 shader.AddLine(coord);
1084 const std::string texture = "texture(" + sampler + ", coords)"; 1341 const std::string texture = "texture(" + sampler + ", coords)";
1085 1342
1086 // TEXS has two destination registers. RG goes into gpr0+0 and gpr0+1, and BA goes 1343 // TEXS has two destination registers. RG goes into gpr0+0 and gpr0+1, and BA
1087 // into gpr28+0 and gpr28+1 1344 // goes into gpr28+0 and gpr28+1
1088 size_t offset{}; 1345 size_t offset{};
1089 1346
1090 for (const auto& dest : {instr.gpr0.Value(), instr.gpr28.Value()}) { 1347 for (const auto& dest : {instr.gpr0.Value(), instr.gpr28.Value()}) {
@@ -1134,7 +1391,8 @@ private:
1134 if (instr.is_b_gpr) { 1391 if (instr.is_b_gpr) {
1135 op_b += regs.GetRegisterAsFloat(instr.gpr20); 1392 op_b += regs.GetRegisterAsFloat(instr.gpr20);
1136 } else { 1393 } else {
1137 op_b += regs.GetUniform(instr.uniform, GLSLRegister::Type::Float); 1394 op_b += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset,
1395 GLSLRegister::Type::Float);
1138 } 1396 }
1139 } 1397 }
1140 1398
@@ -1167,15 +1425,17 @@ private:
1167 } 1425 }
1168 case OpCode::Type::IntegerSetPredicate: { 1426 case OpCode::Type::IntegerSetPredicate: {
1169 std::string op_a = regs.GetRegisterAsInteger(instr.gpr8, 0, instr.isetp.is_signed); 1427 std::string op_a = regs.GetRegisterAsInteger(instr.gpr8, 0, instr.isetp.is_signed);
1428 std::string op_b;
1170 1429
1171 std::string op_b{}; 1430 if (instr.is_b_imm) {
1172 1431 op_b += '(' + std::to_string(instr.alu.GetSignedImm20_20()) + ')';
1173 ASSERT_MSG(!instr.is_b_imm, "ISETP_IMM not implemented");
1174
1175 if (instr.is_b_gpr) {
1176 op_b += regs.GetRegisterAsInteger(instr.gpr20, 0, instr.isetp.is_signed);
1177 } else { 1432 } else {
1178 op_b += regs.GetUniform(instr.uniform, GLSLRegister::Type::Integer); 1433 if (instr.is_b_gpr) {
1434 op_b += regs.GetRegisterAsInteger(instr.gpr20, 0, instr.isetp.is_signed);
1435 } else {
1436 op_b += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset,
1437 GLSLRegister::Type::Integer);
1438 }
1179 } 1439 }
1180 1440
1181 using Tegra::Shader::Pred; 1441 using Tegra::Shader::Pred;
@@ -1221,7 +1481,8 @@ private:
1221 if (instr.is_b_gpr) { 1481 if (instr.is_b_gpr) {
1222 op_b += regs.GetRegisterAsFloat(instr.gpr20); 1482 op_b += regs.GetRegisterAsFloat(instr.gpr20);
1223 } else { 1483 } else {
1224 op_b += regs.GetUniform(instr.uniform, GLSLRegister::Type::Float); 1484 op_b += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset,
1485 GLSLRegister::Type::Float);
1225 } 1486 }
1226 } 1487 }
1227 1488
@@ -1229,8 +1490,8 @@ private:
1229 op_b = "abs(" + op_b + ')'; 1490 op_b = "abs(" + op_b + ')';
1230 } 1491 }
1231 1492
1232 // The fset instruction sets a register to 1.0 if the condition is true, and to 0 1493 // The fset instruction sets a register to 1.0 or -1 (depending on the bf bit) if the
1233 // otherwise. 1494 // condition is true, and to 0 otherwise.
1234 std::string second_pred = 1495 std::string second_pred =
1235 GetPredicateCondition(instr.fset.pred39, instr.fset.neg_pred != 0); 1496 GetPredicateCondition(instr.fset.pred39, instr.fset.neg_pred != 0);
1236 1497
@@ -1248,6 +1509,41 @@ private:
1248 } 1509 }
1249 break; 1510 break;
1250 } 1511 }
1512 case OpCode::Type::IntegerSet: {
1513 std::string op_a = regs.GetRegisterAsInteger(instr.gpr8, 0, instr.iset.is_signed);
1514
1515 std::string op_b;
1516
1517 if (instr.is_b_imm) {
1518 op_b = std::to_string(instr.alu.GetSignedImm20_20());
1519 } else {
1520 if (instr.is_b_gpr) {
1521 op_b = regs.GetRegisterAsInteger(instr.gpr20, 0, instr.iset.is_signed);
1522 } else {
1523 op_b = regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset,
1524 GLSLRegister::Type::Integer);
1525 }
1526 }
1527
1528 // The iset instruction sets a register to 1.0 or -1 (depending on the bf bit) if the
1529 // condition is true, and to 0 otherwise.
1530 std::string second_pred =
1531 GetPredicateCondition(instr.iset.pred39, instr.iset.neg_pred != 0);
1532
1533 std::string comparator = GetPredicateComparison(instr.iset.cond);
1534 std::string combiner = GetPredicateCombiner(instr.iset.op);
1535
1536 std::string predicate = "(((" + op_a + ") " + comparator + " (" + op_b + ")) " +
1537 combiner + " (" + second_pred + "))";
1538
1539 if (instr.iset.bf) {
1540 regs.SetRegisterToFloat(instr.gpr0, 0, predicate + " ? 1.0 : 0.0", 1, 1);
1541 } else {
1542 regs.SetRegisterToInteger(instr.gpr0, false, 0, predicate + " ? 0xFFFFFFFF : 0", 1,
1543 1);
1544 }
1545 break;
1546 }
1251 default: { 1547 default: {
1252 switch (opcode->GetId()) { 1548 switch (opcode->GetId()) {
1253 case OpCode::Id::EXIT: { 1549 case OpCode::Id::EXIT: {
@@ -1261,8 +1557,8 @@ private:
1261 1557
1262 shader.AddLine("return true;"); 1558 shader.AddLine("return true;");
1263 if (instr.pred.pred_index == static_cast<u64>(Pred::UnusedIndex)) { 1559 if (instr.pred.pred_index == static_cast<u64>(Pred::UnusedIndex)) {
1264 // If this is an unconditional exit then just end processing here, otherwise we 1560 // If this is an unconditional exit then just end processing here, otherwise
1265 // have to account for the possibility of the condition not being met, so 1561 // we have to account for the possibility of the condition not being met, so
1266 // continue processing the next instruction. 1562 // continue processing the next instruction.
1267 offset = PROGRAM_END - 1; 1563 offset = PROGRAM_END - 1;
1268 } 1564 }
@@ -1284,6 +1580,11 @@ private:
1284 regs.SetRegisterToInputAttibute(instr.gpr0, attribute.element, attribute.index); 1580 regs.SetRegisterToInputAttibute(instr.gpr0, attribute.element, attribute.index);
1285 break; 1581 break;
1286 } 1582 }
1583 case OpCode::Id::SSY: {
1584 // The SSY opcode tells the GPU where to re-converge divergent execution paths, we
1585 // can ignore this when generating GLSL code.
1586 break;
1587 }
1287 default: { 1588 default: {
1288 NGLOG_CRITICAL(HW_GPU, "Unhandled instruction: {}", opcode->GetName()); 1589 NGLOG_CRITICAL(HW_GPU, "Unhandled instruction: {}", opcode->GetName());
1289 UNREACHABLE(); 1590 UNREACHABLE();
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp
index 254f6e2c3..c1e6fac9f 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp
@@ -39,6 +39,10 @@ void main() {
39 // Viewport can be flipped, which is unsupported by glViewport 39 // Viewport can be flipped, which is unsupported by glViewport
40 position.xy *= viewport_flip.xy; 40 position.xy *= viewport_flip.xy;
41 gl_Position = position; 41 gl_Position = position;
42
43 // TODO(bunnei): This is likely a hack, position.w should be interpolated as 1.0
44 // For now, this is here to bring order in lieu of proper emulation
45 position.w = 1.0;
42} 46}
43)"; 47)";
44 out += program.first; 48 out += program.first;
@@ -62,8 +66,6 @@ layout (std140) uniform fs_config {
62 vec4 viewport_flip; 66 vec4 viewport_flip;
63}; 67};
64 68
65uniform sampler2D tex[32];
66
67void main() { 69void main() {
68 exec_shader(); 70 exec_shader();
69} 71}
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h
index 458032b5c..ed890e0f9 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.h
+++ b/src/video_core/renderer_opengl/gl_shader_gen.h
@@ -22,17 +22,28 @@ class ConstBufferEntry {
22 using Maxwell = Tegra::Engines::Maxwell3D::Regs; 22 using Maxwell = Tegra::Engines::Maxwell3D::Regs;
23 23
24public: 24public:
25 void MarkAsUsed(unsigned index, unsigned offset, Maxwell::ShaderStage stage) { 25 void MarkAsUsed(u64 index, u64 offset, Maxwell::ShaderStage stage) {
26 is_used = true; 26 is_used = true;
27 this->index = index; 27 this->index = static_cast<unsigned>(index);
28 this->stage = stage;
29 max_offset = std::max(max_offset, static_cast<unsigned>(offset));
30 }
31
32 void MarkAsUsedIndirect(u64 index, Maxwell::ShaderStage stage) {
33 is_used = true;
34 is_indirect = true;
35 this->index = static_cast<unsigned>(index);
28 this->stage = stage; 36 this->stage = stage;
29 max_offset = std::max(max_offset, offset);
30 } 37 }
31 38
32 bool IsUsed() const { 39 bool IsUsed() const {
33 return is_used; 40 return is_used;
34 } 41 }
35 42
43 bool IsIndirect() const {
44 return is_indirect;
45 }
46
36 unsigned GetIndex() const { 47 unsigned GetIndex() const {
37 return index; 48 return index;
38 } 49 }
@@ -51,13 +62,54 @@ private:
51 }; 62 };
52 63
53 bool is_used{}; 64 bool is_used{};
65 bool is_indirect{};
54 unsigned index{}; 66 unsigned index{};
55 unsigned max_offset{}; 67 unsigned max_offset{};
56 Maxwell::ShaderStage stage; 68 Maxwell::ShaderStage stage;
57}; 69};
58 70
71class SamplerEntry {
72 using Maxwell = Tegra::Engines::Maxwell3D::Regs;
73
74public:
75 SamplerEntry(Maxwell::ShaderStage stage, size_t offset, size_t index)
76 : offset(offset), stage(stage), sampler_index(index) {}
77
78 size_t GetOffset() const {
79 return offset;
80 }
81
82 size_t GetIndex() const {
83 return sampler_index;
84 }
85
86 Maxwell::ShaderStage GetStage() const {
87 return stage;
88 }
89
90 std::string GetName() const {
91 return std::string(TextureSamplerNames[static_cast<size_t>(stage)]) + '[' +
92 std::to_string(sampler_index) + ']';
93 }
94
95 static std::string GetArrayName(Maxwell::ShaderStage stage) {
96 return TextureSamplerNames[static_cast<size_t>(stage)];
97 }
98
99private:
100 static constexpr std::array<const char*, Maxwell::MaxShaderStage> TextureSamplerNames = {
101 "tex_vs", "tex_tessc", "tex_tesse", "tex_gs", "tex_fs",
102 };
103 /// Offset in TSC memory from which to read the sampler object, as specified by the sampling
104 /// instruction.
105 size_t offset;
106 Maxwell::ShaderStage stage; ///< Shader stage where this sampler was used.
107 size_t sampler_index; ///< Value used to index into the generated GLSL sampler array.
108};
109
59struct ShaderEntries { 110struct ShaderEntries {
60 std::vector<ConstBufferEntry> const_buffer_entries; 111 std::vector<ConstBufferEntry> const_buffer_entries;
112 std::vector<SamplerEntry> texture_samplers;
61}; 113};
62 114
63using ProgramResult = std::pair<std::string, ShaderEntries>; 115using ProgramResult = std::pair<std::string, ShaderEntries>;
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp
index ccdfc2718..d7167b298 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp
@@ -32,33 +32,14 @@ void SetShaderUniformBlockBindings(GLuint shader) {
32 sizeof(MaxwellUniformData)); 32 sizeof(MaxwellUniformData));
33} 33}
34 34
35void SetShaderSamplerBindings(GLuint shader) {
36 OpenGLState cur_state = OpenGLState::GetCurState();
37 GLuint old_program = std::exchange(cur_state.draw.shader_program, shader);
38 cur_state.Apply();
39
40 // Set the texture samplers to correspond to different texture units
41 for (u32 texture = 0; texture < NumTextureSamplers; ++texture) {
42 // Set the texture samplers to correspond to different texture units
43 std::string uniform_name = "tex[" + std::to_string(texture) + "]";
44 GLint uniform_tex = glGetUniformLocation(shader, uniform_name.c_str());
45 if (uniform_tex != -1) {
46 glUniform1i(uniform_tex, TextureUnits::MaxwellTexture(texture).id);
47 }
48 }
49
50 cur_state.draw.shader_program = old_program;
51 cur_state.Apply();
52}
53
54} // namespace Impl 35} // namespace Impl
55 36
56void MaxwellUniformData::SetFromRegs(const Maxwell3D::State::ShaderStageInfo& shader_stage) { 37void MaxwellUniformData::SetFromRegs(const Maxwell3D::State::ShaderStageInfo& shader_stage) {
57 const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs; 38 const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs;
58 39
59 // TODO(bunnei): Support more than one viewport 40 // TODO(bunnei): Support more than one viewport
60 viewport_flip[0] = regs.viewport_transform[0].scale_x < 0.0 ? -1.0 : 1.0; 41 viewport_flip[0] = regs.viewport_transform[0].scale_x < 0.0 ? -1.0f : 1.0f;
61 viewport_flip[1] = regs.viewport_transform[0].scale_y < 0.0 ? -1.0 : 1.0; 42 viewport_flip[1] = regs.viewport_transform[0].scale_y < 0.0 ? -1.0f : 1.0f;
62} 43}
63 44
64} // namespace GLShader 45} // namespace GLShader
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h
index e963b4b7e..4295c20a6 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.h
+++ b/src/video_core/renderer_opengl/gl_shader_manager.h
@@ -45,7 +45,6 @@ public:
45 shader.Create(program_result.first.c_str(), type); 45 shader.Create(program_result.first.c_str(), type);
46 program.Create(true, shader.handle); 46 program.Create(true, shader.handle);
47 Impl::SetShaderUniformBlockBindings(program.handle); 47 Impl::SetShaderUniformBlockBindings(program.handle);
48 Impl::SetShaderSamplerBindings(program.handle);
49 entries = program_result.second; 48 entries = program_result.second;
50 } 49 }
51 GLuint GetHandle() const { 50 GLuint GetHandle() const {
diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp
index f91dfe36a..1f1e48425 100644
--- a/src/video_core/renderer_opengl/gl_state.cpp
+++ b/src/video_core/renderer_opengl/gl_state.cpp
@@ -50,6 +50,10 @@ OpenGLState::OpenGLState() {
50 for (auto& texture_unit : texture_units) { 50 for (auto& texture_unit : texture_units) {
51 texture_unit.texture_2d = 0; 51 texture_unit.texture_2d = 0;
52 texture_unit.sampler = 0; 52 texture_unit.sampler = 0;
53 texture_unit.swizzle.r = GL_RED;
54 texture_unit.swizzle.g = GL_GREEN;
55 texture_unit.swizzle.b = GL_BLUE;
56 texture_unit.swizzle.a = GL_ALPHA;
53 } 57 }
54 58
55 lighting_lut.texture_buffer = 0; 59 lighting_lut.texture_buffer = 0;
@@ -192,13 +196,22 @@ void OpenGLState::Apply() const {
192 } 196 }
193 197
194 // Textures 198 // Textures
195 for (size_t i = 0; i < std::size(texture_units); ++i) { 199 for (int i = 0; i < std::size(texture_units); ++i) {
196 if (texture_units[i].texture_2d != cur_state.texture_units[i].texture_2d) { 200 if (texture_units[i].texture_2d != cur_state.texture_units[i].texture_2d) {
197 glActiveTexture(TextureUnits::MaxwellTexture(i).Enum()); 201 glActiveTexture(TextureUnits::MaxwellTexture(i).Enum());
198 glBindTexture(GL_TEXTURE_2D, texture_units[i].texture_2d); 202 glBindTexture(GL_TEXTURE_2D, texture_units[i].texture_2d);
199 } 203 }
200 if (texture_units[i].sampler != cur_state.texture_units[i].sampler) { 204 if (texture_units[i].sampler != cur_state.texture_units[i].sampler) {
201 glBindSampler(i, texture_units[i].sampler); 205 glBindSampler(static_cast<GLuint>(i), texture_units[i].sampler);
206 }
207 // Update the texture swizzle
208 if (texture_units[i].swizzle.r != cur_state.texture_units[i].swizzle.r ||
209 texture_units[i].swizzle.g != cur_state.texture_units[i].swizzle.g ||
210 texture_units[i].swizzle.b != cur_state.texture_units[i].swizzle.b ||
211 texture_units[i].swizzle.a != cur_state.texture_units[i].swizzle.a) {
212 std::array<GLint, 4> mask = {texture_units[i].swizzle.r, texture_units[i].swizzle.g,
213 texture_units[i].swizzle.b, texture_units[i].swizzle.a};
214 glTexParameteriv(GL_TEXTURE_2D, GL_TEXTURE_SWIZZLE_RGBA, mask.data());
202 } 215 }
203 } 216 }
204 217
diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h
index 75c08e645..839e50e93 100644
--- a/src/video_core/renderer_opengl/gl_state.h
+++ b/src/video_core/renderer_opengl/gl_state.h
@@ -85,6 +85,12 @@ public:
85 struct { 85 struct {
86 GLuint texture_2d; // GL_TEXTURE_BINDING_2D 86 GLuint texture_2d; // GL_TEXTURE_BINDING_2D
87 GLuint sampler; // GL_SAMPLER_BINDING 87 GLuint sampler; // GL_SAMPLER_BINDING
88 struct {
89 GLint r; // GL_TEXTURE_SWIZZLE_R
90 GLint g; // GL_TEXTURE_SWIZZLE_G
91 GLint b; // GL_TEXTURE_SWIZZLE_B
92 GLint a; // GL_TEXTURE_SWIZZLE_A
93 } swizzle;
88 } texture_units[32]; 94 } texture_units[32];
89 95
90 struct { 96 struct {
diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h
index a630610d8..2155fb019 100644
--- a/src/video_core/renderer_opengl/maxwell_to_gl.h
+++ b/src/video_core/renderer_opengl/maxwell_to_gl.h
@@ -100,6 +100,8 @@ inline GLenum WrapMode(Tegra::Texture::WrapMode wrap_mode) {
100 switch (wrap_mode) { 100 switch (wrap_mode) {
101 case Tegra::Texture::WrapMode::Wrap: 101 case Tegra::Texture::WrapMode::Wrap:
102 return GL_REPEAT; 102 return GL_REPEAT;
103 case Tegra::Texture::WrapMode::Mirror:
104 return GL_MIRRORED_REPEAT;
103 case Tegra::Texture::WrapMode::ClampToEdge: 105 case Tegra::Texture::WrapMode::ClampToEdge:
104 return GL_CLAMP_TO_EDGE; 106 return GL_CLAMP_TO_EDGE;
105 case Tegra::Texture::WrapMode::ClampOGL: 107 case Tegra::Texture::WrapMode::ClampOGL:
@@ -178,4 +180,25 @@ inline GLenum BlendFunc(Maxwell::Blend::Factor factor) {
178 return {}; 180 return {};
179} 181}
180 182
183inline GLenum SwizzleSource(Tegra::Texture::SwizzleSource source) {
184 switch (source) {
185 case Tegra::Texture::SwizzleSource::Zero:
186 return GL_ZERO;
187 case Tegra::Texture::SwizzleSource::R:
188 return GL_RED;
189 case Tegra::Texture::SwizzleSource::G:
190 return GL_GREEN;
191 case Tegra::Texture::SwizzleSource::B:
192 return GL_BLUE;
193 case Tegra::Texture::SwizzleSource::A:
194 return GL_ALPHA;
195 case Tegra::Texture::SwizzleSource::OneInt:
196 case Tegra::Texture::SwizzleSource::OneFloat:
197 return GL_ONE;
198 }
199 NGLOG_CRITICAL(Render_OpenGL, "Unimplemented swizzle source={}", static_cast<u32>(source));
200 UNREACHABLE();
201 return {};
202}
203
181} // namespace MaxwellToGL 204} // namespace MaxwellToGL
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index 3440d2190..f33766bfd 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -316,6 +316,7 @@ void RendererOpenGL::DrawScreenTriangles(const ScreenInfo& screen_info, float x,
316 }}; 316 }};
317 317
318 state.texture_units[0].texture_2d = screen_info.display_texture; 318 state.texture_units[0].texture_2d = screen_info.display_texture;
319 state.texture_units[0].swizzle = {GL_RED, GL_GREEN, GL_BLUE, GL_ALPHA};
319 state.Apply(); 320 state.Apply();
320 321
321 glBufferSubData(GL_ARRAY_BUFFER, 0, sizeof(vertices), vertices.data()); 322 glBufferSubData(GL_ARRAY_BUFFER, 0, sizeof(vertices), vertices.data());
diff --git a/src/video_core/textures/astc.cpp b/src/video_core/textures/astc.cpp
new file mode 100644
index 000000000..3c4ad1c9d
--- /dev/null
+++ b/src/video_core/textures/astc.cpp
@@ -0,0 +1,1646 @@
1// Copyright 2016 The University of North Carolina at Chapel Hill
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14//
15// Please send all BUG REPORTS to <pavel@cs.unc.edu>.
16// <http://gamma.cs.unc.edu/FasTC/>
17
18#include <algorithm>
19#include <cassert>
20#include <cstdint>
21#include <cstring>
22#include <vector>
23
24#include "video_core/textures/astc.h"
25
26class BitStream {
27public:
28 BitStream(unsigned char* ptr, int nBits = 0, int start_offset = 0)
29 : m_BitsWritten(0), m_BitsRead(0), m_NumBits(nBits), m_CurByte(ptr),
30 m_NextBit(start_offset % 8), done(false) {}
31
32 int GetBitsWritten() const {
33 return m_BitsWritten;
34 }
35
36 ~BitStream() {}
37
38 void WriteBitsR(unsigned int val, unsigned int nBits) {
39 for (unsigned int i = 0; i < nBits; i++) {
40 WriteBit((val >> (nBits - i - 1)) & 1);
41 }
42 }
43
44 void WriteBits(unsigned int val, unsigned int nBits) {
45 for (unsigned int i = 0; i < nBits; i++) {
46 WriteBit((val >> i) & 1);
47 }
48 }
49
50 int GetBitsRead() const {
51 return m_BitsRead;
52 }
53
54 int ReadBit() {
55
56 int bit = *m_CurByte >> m_NextBit++;
57 while (m_NextBit >= 8) {
58 m_NextBit -= 8;
59 m_CurByte++;
60 }
61
62 m_BitsRead++;
63 return bit & 1;
64 }
65
66 unsigned int ReadBits(unsigned int nBits) {
67 unsigned int ret = 0;
68 for (unsigned int i = 0; i < nBits; i++) {
69 ret |= (ReadBit() & 1) << i;
70 }
71 return ret;
72 }
73
74private:
75 void WriteBit(int b) {
76
77 if (done)
78 return;
79
80 const unsigned int mask = 1 << m_NextBit++;
81
82 // clear the bit
83 *m_CurByte &= ~mask;
84
85 // Write the bit, if necessary
86 if (b)
87 *m_CurByte |= mask;
88
89 // Next byte?
90 if (m_NextBit >= 8) {
91 m_CurByte += 1;
92 m_NextBit = 0;
93 }
94
95 done = done || ++m_BitsWritten >= m_NumBits;
96 }
97
98 int m_BitsWritten;
99 const int m_NumBits;
100 unsigned char* m_CurByte;
101 int m_NextBit;
102 int m_BitsRead;
103
104 bool done;
105};
106
107template <typename IntType>
108class Bits {
109private:
110 const IntType& m_Bits;
111
112 // Don't copy
113 Bits() {}
114 Bits(const Bits&) {}
115 Bits& operator=(const Bits&) {}
116
117public:
118 explicit Bits(IntType& v) : m_Bits(v) {}
119
120 uint8_t operator[](uint32_t bitPos) {
121 return static_cast<uint8_t>((m_Bits >> bitPos) & 1);
122 }
123
124 IntType operator()(uint32_t start, uint32_t end) {
125 if (start == end) {
126 return (*this)[start];
127 } else if (start > end) {
128 uint32_t t = start;
129 start = end;
130 end = t;
131 }
132
133 uint64_t mask = (1 << (end - start + 1)) - 1;
134 return (m_Bits >> start) & mask;
135 }
136};
137
138enum EIntegerEncoding { eIntegerEncoding_JustBits, eIntegerEncoding_Quint, eIntegerEncoding_Trit };
139
140class IntegerEncodedValue {
141private:
142 const EIntegerEncoding m_Encoding;
143 const uint32_t m_NumBits;
144 uint32_t m_BitValue;
145 union {
146 uint32_t m_QuintValue;
147 uint32_t m_TritValue;
148 };
149
150public:
151 // Jank, but we're not doing any heavy lifting in this class, so it's
152 // probably OK. It allows us to use these in std::vectors...
153 IntegerEncodedValue& operator=(const IntegerEncodedValue& other) {
154 new (this) IntegerEncodedValue(other);
155 return *this;
156 }
157
158 IntegerEncodedValue(EIntegerEncoding encoding, uint32_t numBits)
159 : m_Encoding(encoding), m_NumBits(numBits) {}
160
161 EIntegerEncoding GetEncoding() const {
162 return m_Encoding;
163 }
164 uint32_t BaseBitLength() const {
165 return m_NumBits;
166 }
167
168 uint32_t GetBitValue() const {
169 return m_BitValue;
170 }
171 void SetBitValue(uint32_t val) {
172 m_BitValue = val;
173 }
174
175 uint32_t GetTritValue() const {
176 return m_TritValue;
177 }
178 void SetTritValue(uint32_t val) {
179 m_TritValue = val;
180 }
181
182 uint32_t GetQuintValue() const {
183 return m_QuintValue;
184 }
185 void SetQuintValue(uint32_t val) {
186 m_QuintValue = val;
187 }
188
189 bool MatchesEncoding(const IntegerEncodedValue& other) {
190 return m_Encoding == other.m_Encoding && m_NumBits == other.m_NumBits;
191 }
192
193 // Returns the number of bits required to encode nVals values.
194 uint32_t GetBitLength(uint32_t nVals) {
195 uint32_t totalBits = m_NumBits * nVals;
196 if (m_Encoding == eIntegerEncoding_Trit) {
197 totalBits += (nVals * 8 + 4) / 5;
198 } else if (m_Encoding == eIntegerEncoding_Quint) {
199 totalBits += (nVals * 7 + 2) / 3;
200 }
201 return totalBits;
202 }
203
204 // Count the number of bits set in a number.
205 static inline uint32_t Popcnt(uint32_t n) {
206 uint32_t c;
207 for (c = 0; n; c++) {
208 n &= n - 1;
209 }
210 return c;
211 }
212
213 // Returns a new instance of this struct that corresponds to the
214 // can take no more than maxval values
215 static IntegerEncodedValue CreateEncoding(uint32_t maxVal) {
216 while (maxVal > 0) {
217 uint32_t check = maxVal + 1;
218
219 // Is maxVal a power of two?
220 if (!(check & (check - 1))) {
221 return IntegerEncodedValue(eIntegerEncoding_JustBits, Popcnt(maxVal));
222 }
223
224 // Is maxVal of the type 3*2^n - 1?
225 if ((check % 3 == 0) && !((check / 3) & ((check / 3) - 1))) {
226 return IntegerEncodedValue(eIntegerEncoding_Trit, Popcnt(check / 3 - 1));
227 }
228
229 // Is maxVal of the type 5*2^n - 1?
230 if ((check % 5 == 0) && !((check / 5) & ((check / 5) - 1))) {
231 return IntegerEncodedValue(eIntegerEncoding_Quint, Popcnt(check / 5 - 1));
232 }
233
234 // Apparently it can't be represented with a bounded integer sequence...
235 // just iterate.
236 maxVal--;
237 }
238 return IntegerEncodedValue(eIntegerEncoding_JustBits, 0);
239 }
240
241 // Fills result with the values that are encoded in the given
242 // bitstream. We must know beforehand what the maximum possible
243 // value is, and how many values we're decoding.
244 static void DecodeIntegerSequence(std::vector<IntegerEncodedValue>& result, BitStream& bits,
245 uint32_t maxRange, uint32_t nValues) {
246 // Determine encoding parameters
247 IntegerEncodedValue val = IntegerEncodedValue::CreateEncoding(maxRange);
248
249 // Start decoding
250 uint32_t nValsDecoded = 0;
251 while (nValsDecoded < nValues) {
252 switch (val.GetEncoding()) {
253 case eIntegerEncoding_Quint:
254 DecodeQuintBlock(bits, result, val.BaseBitLength());
255 nValsDecoded += 3;
256 break;
257
258 case eIntegerEncoding_Trit:
259 DecodeTritBlock(bits, result, val.BaseBitLength());
260 nValsDecoded += 5;
261 break;
262
263 case eIntegerEncoding_JustBits:
264 val.SetBitValue(bits.ReadBits(val.BaseBitLength()));
265 result.push_back(val);
266 nValsDecoded++;
267 break;
268 }
269 }
270 }
271
272private:
273 static void DecodeTritBlock(BitStream& bits, std::vector<IntegerEncodedValue>& result,
274 uint32_t nBitsPerValue) {
275 // Implement the algorithm in section C.2.12
276 uint32_t m[5];
277 uint32_t t[5];
278 uint32_t T;
279
280 // Read the trit encoded block according to
281 // table C.2.14
282 m[0] = bits.ReadBits(nBitsPerValue);
283 T = bits.ReadBits(2);
284 m[1] = bits.ReadBits(nBitsPerValue);
285 T |= bits.ReadBits(2) << 2;
286 m[2] = bits.ReadBits(nBitsPerValue);
287 T |= bits.ReadBit() << 4;
288 m[3] = bits.ReadBits(nBitsPerValue);
289 T |= bits.ReadBits(2) << 5;
290 m[4] = bits.ReadBits(nBitsPerValue);
291 T |= bits.ReadBit() << 7;
292
293 uint32_t C = 0;
294
295 Bits<uint32_t> Tb(T);
296 if (Tb(2, 4) == 7) {
297 C = (Tb(5, 7) << 2) | Tb(0, 1);
298 t[4] = t[3] = 2;
299 } else {
300 C = Tb(0, 4);
301 if (Tb(5, 6) == 3) {
302 t[4] = 2;
303 t[3] = Tb[7];
304 } else {
305 t[4] = Tb[7];
306 t[3] = Tb(5, 6);
307 }
308 }
309
310 Bits<uint32_t> Cb(C);
311 if (Cb(0, 1) == 3) {
312 t[2] = 2;
313 t[1] = Cb[4];
314 t[0] = (Cb[3] << 1) | (Cb[2] & ~Cb[3]);
315 } else if (Cb(2, 3) == 3) {
316 t[2] = 2;
317 t[1] = 2;
318 t[0] = Cb(0, 1);
319 } else {
320 t[2] = Cb[4];
321 t[1] = Cb(2, 3);
322 t[0] = (Cb[1] << 1) | (Cb[0] & ~Cb[1]);
323 }
324
325 for (uint32_t i = 0; i < 5; i++) {
326 IntegerEncodedValue val(eIntegerEncoding_Trit, nBitsPerValue);
327 val.SetBitValue(m[i]);
328 val.SetTritValue(t[i]);
329 result.push_back(val);
330 }
331 }
332
333 static void DecodeQuintBlock(BitStream& bits, std::vector<IntegerEncodedValue>& result,
334 uint32_t nBitsPerValue) {
335 // Implement the algorithm in section C.2.12
336 uint32_t m[3];
337 uint32_t q[3];
338 uint32_t Q;
339
340 // Read the trit encoded block according to
341 // table C.2.15
342 m[0] = bits.ReadBits(nBitsPerValue);
343 Q = bits.ReadBits(3);
344 m[1] = bits.ReadBits(nBitsPerValue);
345 Q |= bits.ReadBits(2) << 3;
346 m[2] = bits.ReadBits(nBitsPerValue);
347 Q |= bits.ReadBits(2) << 5;
348
349 Bits<uint32_t> Qb(Q);
350 if (Qb(1, 2) == 3 && Qb(5, 6) == 0) {
351 q[0] = q[1] = 4;
352 q[2] = (Qb[0] << 2) | ((Qb[4] & ~Qb[0]) << 1) | (Qb[3] & ~Qb[0]);
353 } else {
354 uint32_t C = 0;
355 if (Qb(1, 2) == 3) {
356 q[2] = 4;
357 C = (Qb(3, 4) << 3) | ((~Qb(5, 6) & 3) << 1) | Qb[0];
358 } else {
359 q[2] = Qb(5, 6);
360 C = Qb(0, 4);
361 }
362
363 Bits<uint32_t> Cb(C);
364 if (Cb(0, 2) == 5) {
365 q[1] = 4;
366 q[0] = Cb(3, 4);
367 } else {
368 q[1] = Cb(3, 4);
369 q[0] = Cb(0, 2);
370 }
371 }
372
373 for (uint32_t i = 0; i < 3; i++) {
374 IntegerEncodedValue val(eIntegerEncoding_Quint, nBitsPerValue);
375 val.m_BitValue = m[i];
376 val.m_QuintValue = q[i];
377 result.push_back(val);
378 }
379 }
380};
381
382namespace ASTCC {
383
384struct TexelWeightParams {
385 uint32_t m_Width;
386 uint32_t m_Height;
387 bool m_bDualPlane;
388 uint32_t m_MaxWeight;
389 bool m_bError;
390 bool m_bVoidExtentLDR;
391 bool m_bVoidExtentHDR;
392
393 TexelWeightParams() {
394 memset(this, 0, sizeof(*this));
395 }
396
397 uint32_t GetPackedBitSize() {
398 // How many indices do we have?
399 uint32_t nIdxs = m_Height * m_Width;
400 if (m_bDualPlane) {
401 nIdxs *= 2;
402 }
403
404 return IntegerEncodedValue::CreateEncoding(m_MaxWeight).GetBitLength(nIdxs);
405 }
406
407 uint32_t GetNumWeightValues() const {
408 uint32_t ret = m_Width * m_Height;
409 if (m_bDualPlane) {
410 ret *= 2;
411 }
412 return ret;
413 }
414};
415
416TexelWeightParams DecodeBlockInfo(BitStream& strm) {
417 TexelWeightParams params;
418
419 // Read the entire block mode all at once
420 uint16_t modeBits = strm.ReadBits(11);
421
422 // Does this match the void extent block mode?
423 if ((modeBits & 0x01FF) == 0x1FC) {
424 if (modeBits & 0x200) {
425 params.m_bVoidExtentHDR = true;
426 } else {
427 params.m_bVoidExtentLDR = true;
428 }
429
430 // Next two bits must be one.
431 if (!(modeBits & 0x400) || !strm.ReadBit()) {
432 params.m_bError = true;
433 }
434
435 return params;
436 }
437
438 // First check if the last four bits are zero
439 if ((modeBits & 0xF) == 0) {
440 params.m_bError = true;
441 return params;
442 }
443
444 // If the last two bits are zero, then if bits
445 // [6-8] are all ones, this is also reserved.
446 if ((modeBits & 0x3) == 0 && (modeBits & 0x1C0) == 0x1C0) {
447 params.m_bError = true;
448 return params;
449 }
450
451 // Otherwise, there is no error... Figure out the layout
452 // of the block mode. Layout is determined by a number
453 // between 0 and 9 corresponding to table C.2.8 of the
454 // ASTC spec.
455 uint32_t layout = 0;
456
457 if ((modeBits & 0x1) || (modeBits & 0x2)) {
458 // layout is in [0-4]
459 if (modeBits & 0x8) {
460 // layout is in [2-4]
461 if (modeBits & 0x4) {
462 // layout is in [3-4]
463 if (modeBits & 0x100) {
464 layout = 4;
465 } else {
466 layout = 3;
467 }
468 } else {
469 layout = 2;
470 }
471 } else {
472 // layout is in [0-1]
473 if (modeBits & 0x4) {
474 layout = 1;
475 } else {
476 layout = 0;
477 }
478 }
479 } else {
480 // layout is in [5-9]
481 if (modeBits & 0x100) {
482 // layout is in [7-9]
483 if (modeBits & 0x80) {
484 // layout is in [7-8]
485 assert((modeBits & 0x40) == 0U);
486 if (modeBits & 0x20) {
487 layout = 8;
488 } else {
489 layout = 7;
490 }
491 } else {
492 layout = 9;
493 }
494 } else {
495 // layout is in [5-6]
496 if (modeBits & 0x80) {
497 layout = 6;
498 } else {
499 layout = 5;
500 }
501 }
502 }
503
504 assert(layout < 10);
505
506 // Determine R
507 uint32_t R = !!(modeBits & 0x10);
508 if (layout < 5) {
509 R |= (modeBits & 0x3) << 1;
510 } else {
511 R |= (modeBits & 0xC) >> 1;
512 }
513 assert(2 <= R && R <= 7);
514
515 // Determine width & height
516 switch (layout) {
517 case 0: {
518 uint32_t A = (modeBits >> 5) & 0x3;
519 uint32_t B = (modeBits >> 7) & 0x3;
520 params.m_Width = B + 4;
521 params.m_Height = A + 2;
522 break;
523 }
524
525 case 1: {
526 uint32_t A = (modeBits >> 5) & 0x3;
527 uint32_t B = (modeBits >> 7) & 0x3;
528 params.m_Width = B + 8;
529 params.m_Height = A + 2;
530 break;
531 }
532
533 case 2: {
534 uint32_t A = (modeBits >> 5) & 0x3;
535 uint32_t B = (modeBits >> 7) & 0x3;
536 params.m_Width = A + 2;
537 params.m_Height = B + 8;
538 break;
539 }
540
541 case 3: {
542 uint32_t A = (modeBits >> 5) & 0x3;
543 uint32_t B = (modeBits >> 7) & 0x1;
544 params.m_Width = A + 2;
545 params.m_Height = B + 6;
546 break;
547 }
548
549 case 4: {
550 uint32_t A = (modeBits >> 5) & 0x3;
551 uint32_t B = (modeBits >> 7) & 0x1;
552 params.m_Width = B + 2;
553 params.m_Height = A + 2;
554 break;
555 }
556
557 case 5: {
558 uint32_t A = (modeBits >> 5) & 0x3;
559 params.m_Width = 12;
560 params.m_Height = A + 2;
561 break;
562 }
563
564 case 6: {
565 uint32_t A = (modeBits >> 5) & 0x3;
566 params.m_Width = A + 2;
567 params.m_Height = 12;
568 break;
569 }
570
571 case 7: {
572 params.m_Width = 6;
573 params.m_Height = 10;
574 break;
575 }
576
577 case 8: {
578 params.m_Width = 10;
579 params.m_Height = 6;
580 break;
581 }
582
583 case 9: {
584 uint32_t A = (modeBits >> 5) & 0x3;
585 uint32_t B = (modeBits >> 9) & 0x3;
586 params.m_Width = A + 6;
587 params.m_Height = B + 6;
588 break;
589 }
590
591 default:
592 assert(!"Don't know this layout...");
593 params.m_bError = true;
594 break;
595 }
596
597 // Determine whether or not we're using dual planes
598 // and/or high precision layouts.
599 bool D = (layout != 9) && (modeBits & 0x400);
600 bool H = (layout != 9) && (modeBits & 0x200);
601
602 if (H) {
603 const uint32_t maxWeights[6] = {9, 11, 15, 19, 23, 31};
604 params.m_MaxWeight = maxWeights[R - 2];
605 } else {
606 const uint32_t maxWeights[6] = {1, 2, 3, 4, 5, 7};
607 params.m_MaxWeight = maxWeights[R - 2];
608 }
609
610 params.m_bDualPlane = D;
611
612 return params;
613}
614
615void FillVoidExtentLDR(BitStream& strm, uint32_t* const outBuf, uint32_t blockWidth,
616 uint32_t blockHeight) {
617 // Don't actually care about the void extent, just read the bits...
618 for (int i = 0; i < 4; ++i) {
619 strm.ReadBits(13);
620 }
621
622 // Decode the RGBA components and renormalize them to the range [0, 255]
623 uint16_t r = strm.ReadBits(16);
624 uint16_t g = strm.ReadBits(16);
625 uint16_t b = strm.ReadBits(16);
626 uint16_t a = strm.ReadBits(16);
627
628 uint32_t rgba = (r >> 8) | (g & 0xFF00) | (static_cast<uint32_t>(b) & 0xFF00) << 8 |
629 (static_cast<uint32_t>(a) & 0xFF00) << 16;
630
631 for (uint32_t j = 0; j < blockHeight; j++)
632 for (uint32_t i = 0; i < blockWidth; i++) {
633 outBuf[j * blockWidth + i] = rgba;
634 }
635}
636
637void FillError(uint32_t* outBuf, uint32_t blockWidth, uint32_t blockHeight) {
638 for (uint32_t j = 0; j < blockHeight; j++)
639 for (uint32_t i = 0; i < blockWidth; i++) {
640 outBuf[j * blockWidth + i] = 0xFFFF00FF;
641 }
642}
643
644// Replicates low numBits such that [(toBit - 1):(toBit - 1 - fromBit)]
645// is the same as [(numBits - 1):0] and repeats all the way down.
646template <typename IntType>
647IntType Replicate(const IntType& val, uint32_t numBits, uint32_t toBit) {
648 if (numBits == 0)
649 return 0;
650 if (toBit == 0)
651 return 0;
652 IntType v = val & ((1 << numBits) - 1);
653 IntType res = v;
654 uint32_t reslen = numBits;
655 while (reslen < toBit) {
656 uint32_t comp = 0;
657 if (numBits > toBit - reslen) {
658 uint32_t newshift = toBit - reslen;
659 comp = numBits - newshift;
660 numBits = newshift;
661 }
662 res <<= numBits;
663 res |= v >> comp;
664 reslen += numBits;
665 }
666 return res;
667}
668
669class Pixel {
670protected:
671 typedef int16_t ChannelType;
672 uint8_t m_BitDepth[4];
673 int16_t color[4];
674
675public:
676 Pixel() {
677 for (int i = 0; i < 4; i++) {
678 m_BitDepth[i] = 8;
679 color[i] = 0;
680 }
681 }
682
683 Pixel(ChannelType a, ChannelType r, ChannelType g, ChannelType b, unsigned bitDepth = 8) {
684 for (int i = 0; i < 4; i++)
685 m_BitDepth[i] = bitDepth;
686
687 color[0] = a;
688 color[1] = r;
689 color[2] = g;
690 color[3] = b;
691 }
692
693 // Changes the depth of each pixel. This scales the values to
694 // the appropriate bit depth by either truncating the least
695 // significant bits when going from larger to smaller bit depth
696 // or by repeating the most significant bits when going from
697 // smaller to larger bit depths.
698 void ChangeBitDepth(const uint8_t (&depth)[4]) {
699 for (uint32_t i = 0; i < 4; i++) {
700 Component(i) = ChangeBitDepth(Component(i), m_BitDepth[i], depth[i]);
701 m_BitDepth[i] = depth[i];
702 }
703 }
704
705 template <typename IntType>
706 static float ConvertChannelToFloat(IntType channel, uint8_t bitDepth) {
707 float denominator = static_cast<float>((1 << bitDepth) - 1);
708 return static_cast<float>(channel) / denominator;
709 }
710
711 // Changes the bit depth of a single component. See the comment
712 // above for how we do this.
713 static ChannelType ChangeBitDepth(Pixel::ChannelType val, uint8_t oldDepth, uint8_t newDepth) {
714 assert(newDepth <= 8);
715 assert(oldDepth <= 8);
716
717 if (oldDepth == newDepth) {
718 // Do nothing
719 return val;
720 } else if (oldDepth == 0 && newDepth != 0) {
721 return (1 << newDepth) - 1;
722 } else if (newDepth > oldDepth) {
723 return Replicate(val, oldDepth, newDepth);
724 } else {
725 // oldDepth > newDepth
726 if (newDepth == 0) {
727 return 0xFF;
728 } else {
729 uint8_t bitsWasted = oldDepth - newDepth;
730 uint16_t v = static_cast<uint16_t>(val);
731 v = (v + (1 << (bitsWasted - 1))) >> bitsWasted;
732 v = ::std::min<uint16_t>(::std::max<uint16_t>(0, v), (1 << newDepth) - 1);
733 return static_cast<uint8_t>(v);
734 }
735 }
736
737 assert(!"We shouldn't get here.");
738 return 0;
739 }
740
741 const ChannelType& A() const {
742 return color[0];
743 }
744 ChannelType& A() {
745 return color[0];
746 }
747 const ChannelType& R() const {
748 return color[1];
749 }
750 ChannelType& R() {
751 return color[1];
752 }
753 const ChannelType& G() const {
754 return color[2];
755 }
756 ChannelType& G() {
757 return color[2];
758 }
759 const ChannelType& B() const {
760 return color[3];
761 }
762 ChannelType& B() {
763 return color[3];
764 }
765 const ChannelType& Component(uint32_t idx) const {
766 return color[idx];
767 }
768 ChannelType& Component(uint32_t idx) {
769 return color[idx];
770 }
771
772 void GetBitDepth(uint8_t (&outDepth)[4]) const {
773 for (int i = 0; i < 4; i++) {
774 outDepth[i] = m_BitDepth[i];
775 }
776 }
777
778 // Take all of the components, transform them to their 8-bit variants,
779 // and then pack each channel into an R8G8B8A8 32-bit integer. We assume
780 // that the architecture is little-endian, so the alpha channel will end
781 // up in the most-significant byte.
782 uint32_t Pack() const {
783 Pixel eightBit(*this);
784 const uint8_t eightBitDepth[4] = {8, 8, 8, 8};
785 eightBit.ChangeBitDepth(eightBitDepth);
786
787 uint32_t r = 0;
788 r |= eightBit.A();
789 r <<= 8;
790 r |= eightBit.B();
791 r <<= 8;
792 r |= eightBit.G();
793 r <<= 8;
794 r |= eightBit.R();
795 return r;
796 }
797
798 // Clamps the pixel to the range [0,255]
799 void ClampByte() {
800 for (uint32_t i = 0; i < 4; i++) {
801 color[i] = (color[i] < 0) ? 0 : ((color[i] > 255) ? 255 : color[i]);
802 }
803 }
804
805 void MakeOpaque() {
806 A() = 255;
807 }
808};
809
810void DecodeColorValues(uint32_t* out, uint8_t* data, uint32_t* modes, const uint32_t nPartitions,
811 const uint32_t nBitsForColorData) {
812 // First figure out how many color values we have
813 uint32_t nValues = 0;
814 for (uint32_t i = 0; i < nPartitions; i++) {
815 nValues += ((modes[i] >> 2) + 1) << 1;
816 }
817
818 // Then based on the number of values and the remaining number of bits,
819 // figure out the max value for each of them...
820 uint32_t range = 256;
821 while (--range > 0) {
822 IntegerEncodedValue val = IntegerEncodedValue::CreateEncoding(range);
823 uint32_t bitLength = val.GetBitLength(nValues);
824 if (bitLength <= nBitsForColorData) {
825 // Find the smallest possible range that matches the given encoding
826 while (--range > 0) {
827 IntegerEncodedValue newval = IntegerEncodedValue::CreateEncoding(range);
828 if (!newval.MatchesEncoding(val)) {
829 break;
830 }
831 }
832
833 // Return to last matching range.
834 range++;
835 break;
836 }
837 }
838
839 // We now have enough to decode our integer sequence.
840 std::vector<IntegerEncodedValue> decodedColorValues;
841 BitStream colorStream(data);
842 IntegerEncodedValue::DecodeIntegerSequence(decodedColorValues, colorStream, range, nValues);
843
844 // Once we have the decoded values, we need to dequantize them to the 0-255 range
845 // This procedure is outlined in ASTC spec C.2.13
846 uint32_t outIdx = 0;
847 std::vector<IntegerEncodedValue>::const_iterator itr;
848 for (itr = decodedColorValues.begin(); itr != decodedColorValues.end(); itr++) {
849 // Have we already decoded all that we need?
850 if (outIdx >= nValues) {
851 break;
852 }
853
854 const IntegerEncodedValue& val = *itr;
855 uint32_t bitlen = val.BaseBitLength();
856 uint32_t bitval = val.GetBitValue();
857
858 assert(bitlen >= 1);
859
860 uint32_t A = 0, B = 0, C = 0, D = 0;
861 // A is just the lsb replicated 9 times.
862 A = Replicate(bitval & 1, 1, 9);
863
864 switch (val.GetEncoding()) {
865 // Replicate bits
866 case eIntegerEncoding_JustBits:
867 out[outIdx++] = Replicate(bitval, bitlen, 8);
868 break;
869
870 // Use algorithm in C.2.13
871 case eIntegerEncoding_Trit: {
872
873 D = val.GetTritValue();
874
875 switch (bitlen) {
876 case 1: {
877 C = 204;
878 } break;
879
880 case 2: {
881 C = 93;
882 // B = b000b0bb0
883 uint32_t b = (bitval >> 1) & 1;
884 B = (b << 8) | (b << 4) | (b << 2) | (b << 1);
885 } break;
886
887 case 3: {
888 C = 44;
889 // B = cb000cbcb
890 uint32_t cb = (bitval >> 1) & 3;
891 B = (cb << 7) | (cb << 2) | cb;
892 } break;
893
894 case 4: {
895 C = 22;
896 // B = dcb000dcb
897 uint32_t dcb = (bitval >> 1) & 7;
898 B = (dcb << 6) | dcb;
899 } break;
900
901 case 5: {
902 C = 11;
903 // B = edcb000ed
904 uint32_t edcb = (bitval >> 1) & 0xF;
905 B = (edcb << 5) | (edcb >> 2);
906 } break;
907
908 case 6: {
909 C = 5;
910 // B = fedcb000f
911 uint32_t fedcb = (bitval >> 1) & 0x1F;
912 B = (fedcb << 4) | (fedcb >> 4);
913 } break;
914
915 default:
916 assert(!"Unsupported trit encoding for color values!");
917 break;
918 } // switch(bitlen)
919 } // case eIntegerEncoding_Trit
920 break;
921
922 case eIntegerEncoding_Quint: {
923
924 D = val.GetQuintValue();
925
926 switch (bitlen) {
927 case 1: {
928 C = 113;
929 } break;
930
931 case 2: {
932 C = 54;
933 // B = b0000bb00
934 uint32_t b = (bitval >> 1) & 1;
935 B = (b << 8) | (b << 3) | (b << 2);
936 } break;
937
938 case 3: {
939 C = 26;
940 // B = cb0000cbc
941 uint32_t cb = (bitval >> 1) & 3;
942 B = (cb << 7) | (cb << 1) | (cb >> 1);
943 } break;
944
945 case 4: {
946 C = 13;
947 // B = dcb0000dc
948 uint32_t dcb = (bitval >> 1) & 7;
949 B = (dcb << 6) | (dcb >> 1);
950 } break;
951
952 case 5: {
953 C = 6;
954 // B = edcb0000e
955 uint32_t edcb = (bitval >> 1) & 0xF;
956 B = (edcb << 5) | (edcb >> 3);
957 } break;
958
959 default:
960 assert(!"Unsupported quint encoding for color values!");
961 break;
962 } // switch(bitlen)
963 } // case eIntegerEncoding_Quint
964 break;
965 } // switch(val.GetEncoding())
966
967 if (val.GetEncoding() != eIntegerEncoding_JustBits) {
968 uint32_t T = D * C + B;
969 T ^= A;
970 T = (A & 0x80) | (T >> 2);
971 out[outIdx++] = T;
972 }
973 }
974
975 // Make sure that each of our values is in the proper range...
976 for (uint32_t i = 0; i < nValues; i++) {
977 assert(out[i] <= 255);
978 }
979}
980
981uint32_t UnquantizeTexelWeight(const IntegerEncodedValue& val) {
982 uint32_t bitval = val.GetBitValue();
983 uint32_t bitlen = val.BaseBitLength();
984
985 uint32_t A = Replicate(bitval & 1, 1, 7);
986 uint32_t B = 0, C = 0, D = 0;
987
988 uint32_t result = 0;
989 switch (val.GetEncoding()) {
990 case eIntegerEncoding_JustBits:
991 result = Replicate(bitval, bitlen, 6);
992 break;
993
994 case eIntegerEncoding_Trit: {
995 D = val.GetTritValue();
996 assert(D < 3);
997
998 switch (bitlen) {
999 case 0: {
1000 uint32_t results[3] = {0, 32, 63};
1001 result = results[D];
1002 } break;
1003
1004 case 1: {
1005 C = 50;
1006 } break;
1007
1008 case 2: {
1009 C = 23;
1010 uint32_t b = (bitval >> 1) & 1;
1011 B = (b << 6) | (b << 2) | b;
1012 } break;
1013
1014 case 3: {
1015 C = 11;
1016 uint32_t cb = (bitval >> 1) & 3;
1017 B = (cb << 5) | cb;
1018 } break;
1019
1020 default:
1021 assert(!"Invalid trit encoding for texel weight");
1022 break;
1023 }
1024 } break;
1025
1026 case eIntegerEncoding_Quint: {
1027 D = val.GetQuintValue();
1028 assert(D < 5);
1029
1030 switch (bitlen) {
1031 case 0: {
1032 uint32_t results[5] = {0, 16, 32, 47, 63};
1033 result = results[D];
1034 } break;
1035
1036 case 1: {
1037 C = 28;
1038 } break;
1039
1040 case 2: {
1041 C = 13;
1042 uint32_t b = (bitval >> 1) & 1;
1043 B = (b << 6) | (b << 1);
1044 } break;
1045
1046 default:
1047 assert(!"Invalid quint encoding for texel weight");
1048 break;
1049 }
1050 } break;
1051 }
1052
1053 if (val.GetEncoding() != eIntegerEncoding_JustBits && bitlen > 0) {
1054 // Decode the value...
1055 result = D * C + B;
1056 result ^= A;
1057 result = (A & 0x20) | (result >> 2);
1058 }
1059
1060 assert(result < 64);
1061
1062 // Change from [0,63] to [0,64]
1063 if (result > 32) {
1064 result += 1;
1065 }
1066
1067 return result;
1068}
1069
1070void UnquantizeTexelWeights(uint32_t out[2][144], std::vector<IntegerEncodedValue>& weights,
1071 const TexelWeightParams& params, const uint32_t blockWidth,
1072 const uint32_t blockHeight) {
1073 uint32_t weightIdx = 0;
1074 uint32_t unquantized[2][144];
1075 std::vector<IntegerEncodedValue>::const_iterator itr;
1076 for (itr = weights.begin(); itr != weights.end(); itr++) {
1077 unquantized[0][weightIdx] = UnquantizeTexelWeight(*itr);
1078
1079 if (params.m_bDualPlane) {
1080 itr++;
1081 unquantized[1][weightIdx] = UnquantizeTexelWeight(*itr);
1082 if (itr == weights.end()) {
1083 break;
1084 }
1085 }
1086
1087 if (++weightIdx >= (params.m_Width * params.m_Height))
1088 break;
1089 }
1090
1091 // Do infill if necessary (Section C.2.18) ...
1092 uint32_t Ds = (1024 + (blockWidth / 2)) / (blockWidth - 1);
1093 uint32_t Dt = (1024 + (blockHeight / 2)) / (blockHeight - 1);
1094
1095 const uint32_t kPlaneScale = params.m_bDualPlane ? 2U : 1U;
1096 for (uint32_t plane = 0; plane < kPlaneScale; plane++)
1097 for (uint32_t t = 0; t < blockHeight; t++)
1098 for (uint32_t s = 0; s < blockWidth; s++) {
1099 uint32_t cs = Ds * s;
1100 uint32_t ct = Dt * t;
1101
1102 uint32_t gs = (cs * (params.m_Width - 1) + 32) >> 6;
1103 uint32_t gt = (ct * (params.m_Height - 1) + 32) >> 6;
1104
1105 uint32_t js = gs >> 4;
1106 uint32_t fs = gs & 0xF;
1107
1108 uint32_t jt = gt >> 4;
1109 uint32_t ft = gt & 0x0F;
1110
1111 uint32_t w11 = (fs * ft + 8) >> 4;
1112 uint32_t w10 = ft - w11;
1113 uint32_t w01 = fs - w11;
1114 uint32_t w00 = 16 - fs - ft + w11;
1115
1116 uint32_t v0 = js + jt * params.m_Width;
1117
1118#define FIND_TEXEL(tidx, bidx) \
1119 uint32_t p##bidx = 0; \
1120 do { \
1121 if ((tidx) < (params.m_Width * params.m_Height)) { \
1122 p##bidx = unquantized[plane][(tidx)]; \
1123 } \
1124 } while (0)
1125
1126 FIND_TEXEL(v0, 00);
1127 FIND_TEXEL(v0 + 1, 01);
1128 FIND_TEXEL(v0 + params.m_Width, 10);
1129 FIND_TEXEL(v0 + params.m_Width + 1, 11);
1130
1131#undef FIND_TEXEL
1132
1133 out[plane][t * blockWidth + s] =
1134 (p00 * w00 + p01 * w01 + p10 * w10 + p11 * w11 + 8) >> 4;
1135 }
1136}
1137
1138// Transfers a bit as described in C.2.14
1139static inline void BitTransferSigned(int32_t& a, int32_t& b) {
1140 b >>= 1;
1141 b |= a & 0x80;
1142 a >>= 1;
1143 a &= 0x3F;
1144 if (a & 0x20)
1145 a -= 0x40;
1146}
1147
1148// Adds more precision to the blue channel as described
1149// in C.2.14
1150static inline Pixel BlueContract(int32_t a, int32_t r, int32_t g, int32_t b) {
1151 return Pixel(static_cast<int16_t>(a), static_cast<int16_t>((r + b) >> 1),
1152 static_cast<int16_t>((g + b) >> 1), static_cast<int16_t>(b));
1153}
1154
1155// Partition selection functions as specified in
1156// C.2.21
1157static inline uint32_t hash52(uint32_t p) {
1158 p ^= p >> 15;
1159 p -= p << 17;
1160 p += p << 7;
1161 p += p << 4;
1162 p ^= p >> 5;
1163 p += p << 16;
1164 p ^= p >> 7;
1165 p ^= p >> 3;
1166 p ^= p << 6;
1167 p ^= p >> 17;
1168 return p;
1169}
1170
1171static uint32_t SelectPartition(int32_t seed, int32_t x, int32_t y, int32_t z,
1172 int32_t partitionCount, int32_t smallBlock) {
1173 if (1 == partitionCount)
1174 return 0;
1175
1176 if (smallBlock) {
1177 x <<= 1;
1178 y <<= 1;
1179 z <<= 1;
1180 }
1181
1182 seed += (partitionCount - 1) * 1024;
1183
1184 uint32_t rnum = hash52(static_cast<uint32_t>(seed));
1185 uint8_t seed1 = static_cast<uint8_t>(rnum & 0xF);
1186 uint8_t seed2 = static_cast<uint8_t>((rnum >> 4) & 0xF);
1187 uint8_t seed3 = static_cast<uint8_t>((rnum >> 8) & 0xF);
1188 uint8_t seed4 = static_cast<uint8_t>((rnum >> 12) & 0xF);
1189 uint8_t seed5 = static_cast<uint8_t>((rnum >> 16) & 0xF);
1190 uint8_t seed6 = static_cast<uint8_t>((rnum >> 20) & 0xF);
1191 uint8_t seed7 = static_cast<uint8_t>((rnum >> 24) & 0xF);
1192 uint8_t seed8 = static_cast<uint8_t>((rnum >> 28) & 0xF);
1193 uint8_t seed9 = static_cast<uint8_t>((rnum >> 18) & 0xF);
1194 uint8_t seed10 = static_cast<uint8_t>((rnum >> 22) & 0xF);
1195 uint8_t seed11 = static_cast<uint8_t>((rnum >> 26) & 0xF);
1196 uint8_t seed12 = static_cast<uint8_t>(((rnum >> 30) | (rnum << 2)) & 0xF);
1197
1198 seed1 *= seed1;
1199 seed2 *= seed2;
1200 seed3 *= seed3;
1201 seed4 *= seed4;
1202 seed5 *= seed5;
1203 seed6 *= seed6;
1204 seed7 *= seed7;
1205 seed8 *= seed8;
1206 seed9 *= seed9;
1207 seed10 *= seed10;
1208 seed11 *= seed11;
1209 seed12 *= seed12;
1210
1211 int32_t sh1, sh2, sh3;
1212 if (seed & 1) {
1213 sh1 = (seed & 2) ? 4 : 5;
1214 sh2 = (partitionCount == 3) ? 6 : 5;
1215 } else {
1216 sh1 = (partitionCount == 3) ? 6 : 5;
1217 sh2 = (seed & 2) ? 4 : 5;
1218 }
1219 sh3 = (seed & 0x10) ? sh1 : sh2;
1220
1221 seed1 >>= sh1;
1222 seed2 >>= sh2;
1223 seed3 >>= sh1;
1224 seed4 >>= sh2;
1225 seed5 >>= sh1;
1226 seed6 >>= sh2;
1227 seed7 >>= sh1;
1228 seed8 >>= sh2;
1229 seed9 >>= sh3;
1230 seed10 >>= sh3;
1231 seed11 >>= sh3;
1232 seed12 >>= sh3;
1233
1234 int32_t a = seed1 * x + seed2 * y + seed11 * z + (rnum >> 14);
1235 int32_t b = seed3 * x + seed4 * y + seed12 * z + (rnum >> 10);
1236 int32_t c = seed5 * x + seed6 * y + seed9 * z + (rnum >> 6);
1237 int32_t d = seed7 * x + seed8 * y + seed10 * z + (rnum >> 2);
1238
1239 a &= 0x3F;
1240 b &= 0x3F;
1241 c &= 0x3F;
1242 d &= 0x3F;
1243
1244 if (partitionCount < 4)
1245 d = 0;
1246 if (partitionCount < 3)
1247 c = 0;
1248
1249 if (a >= b && a >= c && a >= d)
1250 return 0;
1251 else if (b >= c && b >= d)
1252 return 1;
1253 else if (c >= d)
1254 return 2;
1255 return 3;
1256}
1257
1258static inline uint32_t Select2DPartition(int32_t seed, int32_t x, int32_t y, int32_t partitionCount,
1259 int32_t smallBlock) {
1260 return SelectPartition(seed, x, y, 0, partitionCount, smallBlock);
1261}
1262
1263// Section C.2.14
1264void ComputeEndpoints(Pixel& ep1, Pixel& ep2, const uint32_t*& colorValues,
1265 uint32_t colorEndpointMode) {
1266#define READ_UINT_VALUES(N) \
1267 uint32_t v[N]; \
1268 for (uint32_t i = 0; i < N; i++) { \
1269 v[i] = *(colorValues++); \
1270 }
1271
1272#define READ_INT_VALUES(N) \
1273 int32_t v[N]; \
1274 for (uint32_t i = 0; i < N; i++) { \
1275 v[i] = static_cast<int32_t>(*(colorValues++)); \
1276 }
1277
1278 switch (colorEndpointMode) {
1279 case 0: {
1280 READ_UINT_VALUES(2)
1281 ep1 = Pixel(0xFF, v[0], v[0], v[0]);
1282 ep2 = Pixel(0xFF, v[1], v[1], v[1]);
1283 } break;
1284
1285 case 1: {
1286 READ_UINT_VALUES(2)
1287 uint32_t L0 = (v[0] >> 2) | (v[1] & 0xC0);
1288 uint32_t L1 = std::max(L0 + (v[1] & 0x3F), 0xFFU);
1289 ep1 = Pixel(0xFF, L0, L0, L0);
1290 ep2 = Pixel(0xFF, L1, L1, L1);
1291 } break;
1292
1293 case 4: {
1294 READ_UINT_VALUES(4)
1295 ep1 = Pixel(v[2], v[0], v[0], v[0]);
1296 ep2 = Pixel(v[3], v[1], v[1], v[1]);
1297 } break;
1298
1299 case 5: {
1300 READ_INT_VALUES(4)
1301 BitTransferSigned(v[1], v[0]);
1302 BitTransferSigned(v[3], v[2]);
1303 ep1 = Pixel(v[2], v[0], v[0], v[0]);
1304 ep2 = Pixel(v[2] + v[3], v[0] + v[1], v[0] + v[1], v[0] + v[1]);
1305 ep1.ClampByte();
1306 ep2.ClampByte();
1307 } break;
1308
1309 case 6: {
1310 READ_UINT_VALUES(4)
1311 ep1 = Pixel(0xFF, v[0] * v[3] >> 8, v[1] * v[3] >> 8, v[2] * v[3] >> 8);
1312 ep2 = Pixel(0xFF, v[0], v[1], v[2]);
1313 } break;
1314
1315 case 8: {
1316 READ_UINT_VALUES(6)
1317 if (v[1] + v[3] + v[5] >= v[0] + v[2] + v[4]) {
1318 ep1 = Pixel(0xFF, v[0], v[2], v[4]);
1319 ep2 = Pixel(0xFF, v[1], v[3], v[5]);
1320 } else {
1321 ep1 = BlueContract(0xFF, v[1], v[3], v[5]);
1322 ep2 = BlueContract(0xFF, v[0], v[2], v[4]);
1323 }
1324 } break;
1325
1326 case 9: {
1327 READ_INT_VALUES(6)
1328 BitTransferSigned(v[1], v[0]);
1329 BitTransferSigned(v[3], v[2]);
1330 BitTransferSigned(v[5], v[4]);
1331 if (v[1] + v[3] + v[5] >= 0) {
1332 ep1 = Pixel(0xFF, v[0], v[2], v[4]);
1333 ep2 = Pixel(0xFF, v[0] + v[1], v[2] + v[3], v[4] + v[5]);
1334 } else {
1335 ep1 = BlueContract(0xFF, v[0] + v[1], v[2] + v[3], v[4] + v[5]);
1336 ep2 = BlueContract(0xFF, v[0], v[2], v[4]);
1337 }
1338 ep1.ClampByte();
1339 ep2.ClampByte();
1340 } break;
1341
1342 case 10: {
1343 READ_UINT_VALUES(6)
1344 ep1 = Pixel(v[4], v[0] * v[3] >> 8, v[1] * v[3] >> 8, v[2] * v[3] >> 8);
1345 ep2 = Pixel(v[5], v[0], v[1], v[2]);
1346 } break;
1347
1348 case 12: {
1349 READ_UINT_VALUES(8)
1350 if (v[1] + v[3] + v[5] >= v[0] + v[2] + v[4]) {
1351 ep1 = Pixel(v[6], v[0], v[2], v[4]);
1352 ep2 = Pixel(v[7], v[1], v[3], v[5]);
1353 } else {
1354 ep1 = BlueContract(v[7], v[1], v[3], v[5]);
1355 ep2 = BlueContract(v[6], v[0], v[2], v[4]);
1356 }
1357 } break;
1358
1359 case 13: {
1360 READ_INT_VALUES(8)
1361 BitTransferSigned(v[1], v[0]);
1362 BitTransferSigned(v[3], v[2]);
1363 BitTransferSigned(v[5], v[4]);
1364 BitTransferSigned(v[7], v[6]);
1365 if (v[1] + v[3] + v[5] >= 0) {
1366 ep1 = Pixel(v[6], v[0], v[2], v[4]);
1367 ep2 = Pixel(v[7] + v[6], v[0] + v[1], v[2] + v[3], v[4] + v[5]);
1368 } else {
1369 ep1 = BlueContract(v[6] + v[7], v[0] + v[1], v[2] + v[3], v[4] + v[5]);
1370 ep2 = BlueContract(v[6], v[0], v[2], v[4]);
1371 }
1372 ep1.ClampByte();
1373 ep2.ClampByte();
1374 } break;
1375
1376 default:
1377 assert(!"Unsupported color endpoint mode (is it HDR?)");
1378 break;
1379 }
1380
1381#undef READ_UINT_VALUES
1382#undef READ_INT_VALUES
1383}
1384
1385void DecompressBlock(uint8_t inBuf[16], const uint32_t blockWidth, const uint32_t blockHeight,
1386 uint32_t* outBuf) {
1387 BitStream strm(inBuf);
1388 TexelWeightParams weightParams = DecodeBlockInfo(strm);
1389
1390 // Was there an error?
1391 if (weightParams.m_bError) {
1392 assert(!"Invalid block mode");
1393 FillError(outBuf, blockWidth, blockHeight);
1394 return;
1395 }
1396
1397 if (weightParams.m_bVoidExtentLDR) {
1398 FillVoidExtentLDR(strm, outBuf, blockWidth, blockHeight);
1399 return;
1400 }
1401
1402 if (weightParams.m_bVoidExtentHDR) {
1403 assert(!"HDR void extent blocks are unsupported!");
1404 FillError(outBuf, blockWidth, blockHeight);
1405 return;
1406 }
1407
1408 if (weightParams.m_Width > blockWidth) {
1409 assert(!"Texel weight grid width should be smaller than block width");
1410 FillError(outBuf, blockWidth, blockHeight);
1411 return;
1412 }
1413
1414 if (weightParams.m_Height > blockHeight) {
1415 assert(!"Texel weight grid height should be smaller than block height");
1416 FillError(outBuf, blockWidth, blockHeight);
1417 return;
1418 }
1419
1420 // Read num partitions
1421 uint32_t nPartitions = strm.ReadBits(2) + 1;
1422 assert(nPartitions <= 4);
1423
1424 if (nPartitions == 4 && weightParams.m_bDualPlane) {
1425 assert(!"Dual plane mode is incompatible with four partition blocks");
1426 FillError(outBuf, blockWidth, blockHeight);
1427 return;
1428 }
1429
1430 // Based on the number of partitions, read the color endpoint mode for
1431 // each partition.
1432
1433 // Determine partitions, partition index, and color endpoint modes
1434 int32_t planeIdx = -1;
1435 uint32_t partitionIndex;
1436 uint32_t colorEndpointMode[4] = {0, 0, 0, 0};
1437
1438 // Define color data.
1439 uint8_t colorEndpointData[16];
1440 memset(colorEndpointData, 0, sizeof(colorEndpointData));
1441 BitStream colorEndpointStream(colorEndpointData, 16 * 8, 0);
1442
1443 // Read extra config data...
1444 uint32_t baseCEM = 0;
1445 if (nPartitions == 1) {
1446 colorEndpointMode[0] = strm.ReadBits(4);
1447 partitionIndex = 0;
1448 } else {
1449 partitionIndex = strm.ReadBits(10);
1450 baseCEM = strm.ReadBits(6);
1451 }
1452 uint32_t baseMode = (baseCEM & 3);
1453
1454 // Remaining bits are color endpoint data...
1455 uint32_t nWeightBits = weightParams.GetPackedBitSize();
1456 int32_t remainingBits = 128 - nWeightBits - strm.GetBitsRead();
1457
1458 // Consider extra bits prior to texel data...
1459 uint32_t extraCEMbits = 0;
1460 if (baseMode) {
1461 switch (nPartitions) {
1462 case 2:
1463 extraCEMbits += 2;
1464 break;
1465 case 3:
1466 extraCEMbits += 5;
1467 break;
1468 case 4:
1469 extraCEMbits += 8;
1470 break;
1471 default:
1472 assert(false);
1473 break;
1474 }
1475 }
1476 remainingBits -= extraCEMbits;
1477
1478 // Do we have a dual plane situation?
1479 uint32_t planeSelectorBits = 0;
1480 if (weightParams.m_bDualPlane) {
1481 planeSelectorBits = 2;
1482 }
1483 remainingBits -= planeSelectorBits;
1484
1485 // Read color data...
1486 uint32_t colorDataBits = remainingBits;
1487 while (remainingBits > 0) {
1488 uint32_t nb = std::min(remainingBits, 8);
1489 uint32_t b = strm.ReadBits(nb);
1490 colorEndpointStream.WriteBits(b, nb);
1491 remainingBits -= 8;
1492 }
1493
1494 // Read the plane selection bits
1495 planeIdx = strm.ReadBits(planeSelectorBits);
1496
1497 // Read the rest of the CEM
1498 if (baseMode) {
1499 uint32_t extraCEM = strm.ReadBits(extraCEMbits);
1500 uint32_t CEM = (extraCEM << 6) | baseCEM;
1501 CEM >>= 2;
1502
1503 bool C[4] = {0};
1504 for (uint32_t i = 0; i < nPartitions; i++) {
1505 C[i] = CEM & 1;
1506 CEM >>= 1;
1507 }
1508
1509 uint8_t M[4] = {0};
1510 for (uint32_t i = 0; i < nPartitions; i++) {
1511 M[i] = CEM & 3;
1512 CEM >>= 2;
1513 assert(M[i] <= 3);
1514 }
1515
1516 for (uint32_t i = 0; i < nPartitions; i++) {
1517 colorEndpointMode[i] = baseMode;
1518 if (!(C[i]))
1519 colorEndpointMode[i] -= 1;
1520 colorEndpointMode[i] <<= 2;
1521 colorEndpointMode[i] |= M[i];
1522 }
1523 } else if (nPartitions > 1) {
1524 uint32_t CEM = baseCEM >> 2;
1525 for (uint32_t i = 0; i < nPartitions; i++) {
1526 colorEndpointMode[i] = CEM;
1527 }
1528 }
1529
1530 // Make sure everything up till here is sane.
1531 for (uint32_t i = 0; i < nPartitions; i++) {
1532 assert(colorEndpointMode[i] < 16);
1533 }
1534 assert(strm.GetBitsRead() + weightParams.GetPackedBitSize() == 128);
1535
1536 // Decode both color data and texel weight data
1537 uint32_t colorValues[32]; // Four values, two endpoints, four maximum paritions
1538 DecodeColorValues(colorValues, colorEndpointData, colorEndpointMode, nPartitions,
1539 colorDataBits);
1540
1541 Pixel endpoints[4][2];
1542 const uint32_t* colorValuesPtr = colorValues;
1543 for (uint32_t i = 0; i < nPartitions; i++) {
1544 ComputeEndpoints(endpoints[i][0], endpoints[i][1], colorValuesPtr, colorEndpointMode[i]);
1545 }
1546
1547 // Read the texel weight data..
1548 uint8_t texelWeightData[16];
1549 memcpy(texelWeightData, inBuf, sizeof(texelWeightData));
1550
1551 // Reverse everything
1552 for (uint32_t i = 0; i < 8; i++) {
1553// Taken from http://graphics.stanford.edu/~seander/bithacks.html#ReverseByteWith64Bits
1554#define REVERSE_BYTE(b) (((b)*0x80200802ULL) & 0x0884422110ULL) * 0x0101010101ULL >> 32
1555 unsigned char a = static_cast<unsigned char>(REVERSE_BYTE(texelWeightData[i]));
1556 unsigned char b = static_cast<unsigned char>(REVERSE_BYTE(texelWeightData[15 - i]));
1557#undef REVERSE_BYTE
1558
1559 texelWeightData[i] = b;
1560 texelWeightData[15 - i] = a;
1561 }
1562
1563 // Make sure that higher non-texel bits are set to zero
1564 const uint32_t clearByteStart = (weightParams.GetPackedBitSize() >> 3) + 1;
1565 texelWeightData[clearByteStart - 1] &= (1 << (weightParams.GetPackedBitSize() % 8)) - 1;
1566 memset(texelWeightData + clearByteStart, 0, 16 - clearByteStart);
1567
1568 std::vector<IntegerEncodedValue> texelWeightValues;
1569 BitStream weightStream(texelWeightData);
1570
1571 IntegerEncodedValue::DecodeIntegerSequence(texelWeightValues, weightStream,
1572 weightParams.m_MaxWeight,
1573 weightParams.GetNumWeightValues());
1574
1575 // Blocks can be at most 12x12, so we can have as many as 144 weights
1576 uint32_t weights[2][144];
1577 UnquantizeTexelWeights(weights, texelWeightValues, weightParams, blockWidth, blockHeight);
1578
1579 // Now that we have endpoints and weights, we can interpolate and generate
1580 // the proper decoding...
1581 for (uint32_t j = 0; j < blockHeight; j++)
1582 for (uint32_t i = 0; i < blockWidth; i++) {
1583 uint32_t partition = Select2DPartition(partitionIndex, i, j, nPartitions,
1584 (blockHeight * blockWidth) < 32);
1585 assert(partition < nPartitions);
1586
1587 Pixel p;
1588 for (uint32_t c = 0; c < 4; c++) {
1589 uint32_t C0 = endpoints[partition][0].Component(c);
1590 C0 = Replicate(C0, 8, 16);
1591 uint32_t C1 = endpoints[partition][1].Component(c);
1592 C1 = Replicate(C1, 8, 16);
1593
1594 uint32_t plane = 0;
1595 if (weightParams.m_bDualPlane && (((planeIdx + 1) & 3) == c)) {
1596 plane = 1;
1597 }
1598
1599 uint32_t weight = weights[plane][j * blockWidth + i];
1600 uint32_t C = (C0 * (64 - weight) + C1 * weight + 32) / 64;
1601 if (C == 65535) {
1602 p.Component(c) = 255;
1603 } else {
1604 double Cf = static_cast<double>(C);
1605 p.Component(c) = static_cast<uint16_t>(255.0 * (Cf / 65536.0) + 0.5);
1606 }
1607 }
1608
1609 outBuf[j * blockWidth + i] = p.Pack();
1610 }
1611}
1612
1613} // namespace ASTCC
1614
1615namespace Tegra::Texture::ASTC {
1616
1617std::vector<uint8_t> Decompress(std::vector<uint8_t>& data, uint32_t width, uint32_t height,
1618 uint32_t block_width, uint32_t block_height) {
1619 uint32_t blockIdx = 0;
1620 std::vector<uint8_t> outData;
1621 outData.resize(height * width * 4);
1622 for (uint32_t j = 0; j < height; j += block_height) {
1623 for (uint32_t i = 0; i < width; i += block_width) {
1624
1625 uint8_t* blockPtr = data.data() + blockIdx * 16;
1626
1627 // Blocks can be at most 12x12
1628 uint32_t uncompData[144];
1629 ASTCC::DecompressBlock(blockPtr, block_width, block_height, uncompData);
1630
1631 uint32_t decompWidth = std::min(block_width, width - i);
1632 uint32_t decompHeight = std::min(block_height, height - j);
1633
1634 uint8_t* outRow = outData.data() + (j * width + i) * 4;
1635 for (uint32_t jj = 0; jj < decompHeight; jj++) {
1636 memcpy(outRow + jj * width * 4, uncompData + jj * block_width, decompWidth * 4);
1637 }
1638
1639 blockIdx++;
1640 }
1641 }
1642
1643 return outData;
1644}
1645
1646} // namespace Tegra::Texture::ASTC
diff --git a/src/video_core/textures/astc.h b/src/video_core/textures/astc.h
new file mode 100644
index 000000000..f0d7c0e56
--- /dev/null
+++ b/src/video_core/textures/astc.h
@@ -0,0 +1,15 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <cstdint>
8#include <vector>
9
10namespace Tegra::Texture::ASTC {
11
12std::vector<uint8_t> Decompress(std::vector<uint8_t>& data, uint32_t width, uint32_t height,
13 uint32_t block_width, uint32_t block_height);
14
15} // namespace Tegra::Texture::ASTC
diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp
index 2d2af5554..0db4367f1 100644
--- a/src/video_core/textures/decoders.cpp
+++ b/src/video_core/textures/decoders.cpp
@@ -53,8 +53,10 @@ u32 BytesPerPixel(TextureFormat format) {
53 case TextureFormat::DXT45: 53 case TextureFormat::DXT45:
54 // In this case a 'pixel' actually refers to a 4x4 tile. 54 // In this case a 'pixel' actually refers to a 4x4 tile.
55 return 16; 55 return 16;
56 case TextureFormat::ASTC_2D_4X4:
56 case TextureFormat::A8R8G8B8: 57 case TextureFormat::A8R8G8B8:
57 case TextureFormat::A2B10G10R10: 58 case TextureFormat::A2B10G10R10:
59 case TextureFormat::BF10GF11RF11:
58 return 4; 60 return 4;
59 case TextureFormat::A1B5G5R5: 61 case TextureFormat::A1B5G5R5:
60 case TextureFormat::B5G6R5: 62 case TextureFormat::B5G6R5:
@@ -92,6 +94,8 @@ std::vector<u8> UnswizzleTexture(VAddr address, TextureFormat format, u32 width,
92 case TextureFormat::B5G6R5: 94 case TextureFormat::B5G6R5:
93 case TextureFormat::R8: 95 case TextureFormat::R8:
94 case TextureFormat::R16_G16_B16_A16: 96 case TextureFormat::R16_G16_B16_A16:
97 case TextureFormat::BF10GF11RF11:
98 case TextureFormat::ASTC_2D_4X4:
95 CopySwizzledData(width, height, bytes_per_pixel, bytes_per_pixel, data, 99 CopySwizzledData(width, height, bytes_per_pixel, bytes_per_pixel, data,
96 unswizzled_data.data(), true, block_height); 100 unswizzled_data.data(), true, block_height);
97 break; 101 break;
@@ -113,11 +117,13 @@ std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat
113 case TextureFormat::DXT23: 117 case TextureFormat::DXT23:
114 case TextureFormat::DXT45: 118 case TextureFormat::DXT45:
115 case TextureFormat::DXN1: 119 case TextureFormat::DXN1:
120 case TextureFormat::ASTC_2D_4X4:
116 case TextureFormat::A8R8G8B8: 121 case TextureFormat::A8R8G8B8:
117 case TextureFormat::A2B10G10R10: 122 case TextureFormat::A2B10G10R10:
118 case TextureFormat::A1B5G5R5: 123 case TextureFormat::A1B5G5R5:
119 case TextureFormat::B5G6R5: 124 case TextureFormat::B5G6R5:
120 case TextureFormat::R8: 125 case TextureFormat::R8:
126 case TextureFormat::BF10GF11RF11:
121 // TODO(Subv): For the time being just forward the same data without any decoding. 127 // TODO(Subv): For the time being just forward the same data without any decoding.
122 rgba_data = texture_data; 128 rgba_data = texture_data;
123 break; 129 break;
diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h
index f48ca30b8..a17eaf19d 100644
--- a/src/video_core/textures/texture.h
+++ b/src/video_core/textures/texture.h
@@ -122,6 +122,17 @@ enum class ComponentType : u32 {
122 FLOAT = 7 122 FLOAT = 7
123}; 123};
124 124
125enum class SwizzleSource : u32 {
126 Zero = 0,
127
128 R = 2,
129 G = 3,
130 B = 4,
131 A = 5,
132 OneInt = 6,
133 OneFloat = 7,
134};
135
125union TextureHandle { 136union TextureHandle {
126 u32 raw; 137 u32 raw;
127 BitField<0, 20, u32> tic_id; 138 BitField<0, 20, u32> tic_id;
@@ -139,6 +150,11 @@ struct TICEntry {
139 BitField<10, 3, ComponentType> g_type; 150 BitField<10, 3, ComponentType> g_type;
140 BitField<13, 3, ComponentType> b_type; 151 BitField<13, 3, ComponentType> b_type;
141 BitField<16, 3, ComponentType> a_type; 152 BitField<16, 3, ComponentType> a_type;
153
154 BitField<19, 3, SwizzleSource> x_source;
155 BitField<22, 3, SwizzleSource> y_source;
156 BitField<25, 3, SwizzleSource> z_source;
157 BitField<28, 3, SwizzleSource> w_source;
142 }; 158 };
143 u32 address_low; 159 u32 address_low;
144 union { 160 union {
diff --git a/src/yuzu/CMakeLists.txt b/src/yuzu/CMakeLists.txt
index 5af3154d7..c662570d2 100644
--- a/src/yuzu/CMakeLists.txt
+++ b/src/yuzu/CMakeLists.txt
@@ -32,8 +32,6 @@ add_executable(yuzu
32 debugger/graphics/graphics_surface.h 32 debugger/graphics/graphics_surface.h
33 debugger/profiler.cpp 33 debugger/profiler.cpp
34 debugger/profiler.h 34 debugger/profiler.h
35 debugger/registers.cpp
36 debugger/registers.h
37 debugger/wait_tree.cpp 35 debugger/wait_tree.cpp
38 debugger/wait_tree.h 36 debugger/wait_tree.h
39 game_list.cpp 37 game_list.cpp
@@ -60,7 +58,6 @@ set(UIS
60 configuration/configure_graphics.ui 58 configuration/configure_graphics.ui
61 configuration/configure_input.ui 59 configuration/configure_input.ui
62 configuration/configure_system.ui 60 configuration/configure_system.ui
63 debugger/registers.ui
64 hotkeys.ui 61 hotkeys.ui
65 main.ui 62 main.ui
66) 63)
diff --git a/src/yuzu/debugger/registers.cpp b/src/yuzu/debugger/registers.cpp
deleted file mode 100644
index 178cc65a7..000000000
--- a/src/yuzu/debugger/registers.cpp
+++ /dev/null
@@ -1,190 +0,0 @@
1// Copyright 2014 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <QTreeWidgetItem>
6#include "core/arm/arm_interface.h"
7#include "core/core.h"
8#include "yuzu/debugger/registers.h"
9#include "yuzu/util/util.h"
10
11RegistersWidget::RegistersWidget(QWidget* parent) : QDockWidget(parent) {
12 cpu_regs_ui.setupUi(this);
13
14 tree = cpu_regs_ui.treeWidget;
15 tree->addTopLevelItem(core_registers = new QTreeWidgetItem(QStringList(tr("Registers"))));
16 tree->addTopLevelItem(vfp_registers = new QTreeWidgetItem(QStringList(tr("VFP Registers"))));
17 tree->addTopLevelItem(vfp_system_registers =
18 new QTreeWidgetItem(QStringList(tr("VFP System Registers"))));
19 tree->addTopLevelItem(cpsr = new QTreeWidgetItem(QStringList("CPSR")));
20
21 for (int i = 0; i < 16; ++i) {
22 QTreeWidgetItem* child = new QTreeWidgetItem(QStringList(QString("R[%1]").arg(i)));
23 core_registers->addChild(child);
24 }
25
26 for (int i = 0; i < 32; ++i) {
27 QTreeWidgetItem* child = new QTreeWidgetItem(QStringList(QString("S[%1]").arg(i)));
28 vfp_registers->addChild(child);
29 }
30
31 QFont font = GetMonospaceFont();
32
33 CreateCPSRChildren();
34 CreateVFPSystemRegisterChildren();
35
36 // Set Registers to display in monospace font
37 for (int i = 0; i < core_registers->childCount(); ++i)
38 core_registers->child(i)->setFont(1, font);
39
40 for (int i = 0; i < vfp_registers->childCount(); ++i)
41 vfp_registers->child(i)->setFont(1, font);
42
43 for (int i = 0; i < vfp_system_registers->childCount(); ++i) {
44 vfp_system_registers->child(i)->setFont(1, font);
45 for (int x = 0; x < vfp_system_registers->child(i)->childCount(); ++x) {
46 vfp_system_registers->child(i)->child(x)->setFont(1, font);
47 }
48 }
49 // Set CSPR to display in monospace font
50 cpsr->setFont(1, font);
51 for (int i = 0; i < cpsr->childCount(); ++i) {
52 cpsr->child(i)->setFont(1, font);
53 for (int x = 0; x < cpsr->child(i)->childCount(); ++x) {
54 cpsr->child(i)->child(x)->setFont(1, font);
55 }
56 }
57 setEnabled(false);
58}
59
60void RegistersWidget::OnDebugModeEntered() {
61 if (!Core::System::GetInstance().IsPoweredOn())
62 return;
63
64 for (int i = 0; i < core_registers->childCount(); ++i)
65 core_registers->child(i)->setText(
66 1, QString("0x%1").arg(Core::CurrentArmInterface().GetReg(i), 8, 16, QLatin1Char('0')));
67
68 UpdateCPSRValues();
69}
70
71void RegistersWidget::OnDebugModeLeft() {}
72
73void RegistersWidget::OnEmulationStarting(EmuThread* emu_thread) {
74 setEnabled(true);
75}
76
77void RegistersWidget::OnEmulationStopping() {
78 // Reset widget text
79 for (int i = 0; i < core_registers->childCount(); ++i)
80 core_registers->child(i)->setText(1, QString(""));
81
82 for (int i = 0; i < vfp_registers->childCount(); ++i)
83 vfp_registers->child(i)->setText(1, QString(""));
84
85 for (int i = 0; i < cpsr->childCount(); ++i)
86 cpsr->child(i)->setText(1, QString(""));
87
88 cpsr->setText(1, QString(""));
89
90 // FPSCR
91 for (int i = 0; i < vfp_system_registers->child(0)->childCount(); ++i)
92 vfp_system_registers->child(0)->child(i)->setText(1, QString(""));
93
94 // FPEXC
95 for (int i = 0; i < vfp_system_registers->child(1)->childCount(); ++i)
96 vfp_system_registers->child(1)->child(i)->setText(1, QString(""));
97
98 vfp_system_registers->child(0)->setText(1, QString(""));
99 vfp_system_registers->child(1)->setText(1, QString(""));
100 vfp_system_registers->child(2)->setText(1, QString(""));
101 vfp_system_registers->child(3)->setText(1, QString(""));
102
103 setEnabled(false);
104}
105
106void RegistersWidget::CreateCPSRChildren() {
107 cpsr->addChild(new QTreeWidgetItem(QStringList("M")));
108 cpsr->addChild(new QTreeWidgetItem(QStringList("T")));
109 cpsr->addChild(new QTreeWidgetItem(QStringList("F")));
110 cpsr->addChild(new QTreeWidgetItem(QStringList("I")));
111 cpsr->addChild(new QTreeWidgetItem(QStringList("A")));
112 cpsr->addChild(new QTreeWidgetItem(QStringList("E")));
113 cpsr->addChild(new QTreeWidgetItem(QStringList("IT")));
114 cpsr->addChild(new QTreeWidgetItem(QStringList("GE")));
115 cpsr->addChild(new QTreeWidgetItem(QStringList("DNM")));
116 cpsr->addChild(new QTreeWidgetItem(QStringList("J")));
117 cpsr->addChild(new QTreeWidgetItem(QStringList("Q")));
118 cpsr->addChild(new QTreeWidgetItem(QStringList("V")));
119 cpsr->addChild(new QTreeWidgetItem(QStringList("C")));
120 cpsr->addChild(new QTreeWidgetItem(QStringList("Z")));
121 cpsr->addChild(new QTreeWidgetItem(QStringList("N")));
122}
123
124void RegistersWidget::UpdateCPSRValues() {
125 const u32 cpsr_val = Core::CurrentArmInterface().GetCPSR();
126
127 cpsr->setText(1, QString("0x%1").arg(cpsr_val, 8, 16, QLatin1Char('0')));
128 cpsr->child(0)->setText(
129 1, QString("b%1").arg(cpsr_val & 0x1F, 5, 2, QLatin1Char('0'))); // M - Mode
130 cpsr->child(1)->setText(1, QString::number((cpsr_val >> 5) & 1)); // T - State
131 cpsr->child(2)->setText(1, QString::number((cpsr_val >> 6) & 1)); // F - FIQ disable
132 cpsr->child(3)->setText(1, QString::number((cpsr_val >> 7) & 1)); // I - IRQ disable
133 cpsr->child(4)->setText(1, QString::number((cpsr_val >> 8) & 1)); // A - Imprecise abort
134 cpsr->child(5)->setText(1, QString::number((cpsr_val >> 9) & 1)); // E - Data endianness
135 cpsr->child(6)->setText(1,
136 QString::number((cpsr_val >> 10) & 0x3F)); // IT - If-Then state (DNM)
137 cpsr->child(7)->setText(1,
138 QString::number((cpsr_val >> 16) & 0xF)); // GE - Greater-than-or-Equal
139 cpsr->child(8)->setText(1, QString::number((cpsr_val >> 20) & 0xF)); // DNM - Do not modify
140 cpsr->child(9)->setText(1, QString::number((cpsr_val >> 24) & 1)); // J - Jazelle
141 cpsr->child(10)->setText(1, QString::number((cpsr_val >> 27) & 1)); // Q - Saturation
142 cpsr->child(11)->setText(1, QString::number((cpsr_val >> 28) & 1)); // V - Overflow
143 cpsr->child(12)->setText(1, QString::number((cpsr_val >> 29) & 1)); // C - Carry/Borrow/Extend
144 cpsr->child(13)->setText(1, QString::number((cpsr_val >> 30) & 1)); // Z - Zero
145 cpsr->child(14)->setText(1, QString::number((cpsr_val >> 31) & 1)); // N - Negative/Less than
146}
147
148void RegistersWidget::CreateVFPSystemRegisterChildren() {
149 QTreeWidgetItem* const fpscr = new QTreeWidgetItem(QStringList("FPSCR"));
150 fpscr->addChild(new QTreeWidgetItem(QStringList("IOC")));
151 fpscr->addChild(new QTreeWidgetItem(QStringList("DZC")));
152 fpscr->addChild(new QTreeWidgetItem(QStringList("OFC")));
153 fpscr->addChild(new QTreeWidgetItem(QStringList("UFC")));
154 fpscr->addChild(new QTreeWidgetItem(QStringList("IXC")));
155 fpscr->addChild(new QTreeWidgetItem(QStringList("IDC")));
156 fpscr->addChild(new QTreeWidgetItem(QStringList("IOE")));
157 fpscr->addChild(new QTreeWidgetItem(QStringList("DZE")));
158 fpscr->addChild(new QTreeWidgetItem(QStringList("OFE")));
159 fpscr->addChild(new QTreeWidgetItem(QStringList("UFE")));
160 fpscr->addChild(new QTreeWidgetItem(QStringList("IXE")));
161 fpscr->addChild(new QTreeWidgetItem(QStringList("IDE")));
162 fpscr->addChild(new QTreeWidgetItem(QStringList(tr("Vector Length"))));
163 fpscr->addChild(new QTreeWidgetItem(QStringList(tr("Vector Stride"))));
164 fpscr->addChild(new QTreeWidgetItem(QStringList(tr("Rounding Mode"))));
165 fpscr->addChild(new QTreeWidgetItem(QStringList("FZ")));
166 fpscr->addChild(new QTreeWidgetItem(QStringList("DN")));
167 fpscr->addChild(new QTreeWidgetItem(QStringList("V")));
168 fpscr->addChild(new QTreeWidgetItem(QStringList("C")));
169 fpscr->addChild(new QTreeWidgetItem(QStringList("Z")));
170 fpscr->addChild(new QTreeWidgetItem(QStringList("N")));
171
172 QTreeWidgetItem* const fpexc = new QTreeWidgetItem(QStringList("FPEXC"));
173 fpexc->addChild(new QTreeWidgetItem(QStringList("IOC")));
174 fpexc->addChild(new QTreeWidgetItem(QStringList("OFC")));
175 fpexc->addChild(new QTreeWidgetItem(QStringList("UFC")));
176 fpexc->addChild(new QTreeWidgetItem(QStringList("INV")));
177 fpexc->addChild(new QTreeWidgetItem(QStringList(tr("Vector Iteration Count"))));
178 fpexc->addChild(new QTreeWidgetItem(QStringList("FP2V")));
179 fpexc->addChild(new QTreeWidgetItem(QStringList("EN")));
180 fpexc->addChild(new QTreeWidgetItem(QStringList("EX")));
181
182 vfp_system_registers->addChild(fpscr);
183 vfp_system_registers->addChild(fpexc);
184 vfp_system_registers->addChild(new QTreeWidgetItem(QStringList("FPINST")));
185 vfp_system_registers->addChild(new QTreeWidgetItem(QStringList("FPINST2")));
186}
187
188void RegistersWidget::UpdateVFPSystemRegisterValues() {
189 UNIMPLEMENTED();
190}
diff --git a/src/yuzu/debugger/registers.h b/src/yuzu/debugger/registers.h
deleted file mode 100644
index 55bda5b59..000000000
--- a/src/yuzu/debugger/registers.h
+++ /dev/null
@@ -1,42 +0,0 @@
1// Copyright 2014 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <QDockWidget>
8#include "ui_registers.h"
9
10class QTreeWidget;
11class QTreeWidgetItem;
12class EmuThread;
13
14class RegistersWidget : public QDockWidget {
15 Q_OBJECT
16
17public:
18 explicit RegistersWidget(QWidget* parent = nullptr);
19
20public slots:
21 void OnDebugModeEntered();
22 void OnDebugModeLeft();
23
24 void OnEmulationStarting(EmuThread* emu_thread);
25 void OnEmulationStopping();
26
27private:
28 void CreateCPSRChildren();
29 void UpdateCPSRValues();
30
31 void CreateVFPSystemRegisterChildren();
32 void UpdateVFPSystemRegisterValues();
33
34 Ui::ARMRegisters cpu_regs_ui;
35
36 QTreeWidget* tree;
37
38 QTreeWidgetItem* core_registers;
39 QTreeWidgetItem* vfp_registers;
40 QTreeWidgetItem* vfp_system_registers;
41 QTreeWidgetItem* cpsr;
42};
diff --git a/src/yuzu/debugger/registers.ui b/src/yuzu/debugger/registers.ui
deleted file mode 100644
index c81ae03f9..000000000
--- a/src/yuzu/debugger/registers.ui
+++ /dev/null
@@ -1,40 +0,0 @@
1<?xml version="1.0" encoding="UTF-8"?>
2<ui version="4.0">
3 <class>ARMRegisters</class>
4 <widget class="QDockWidget" name="ARMRegisters">
5 <property name="geometry">
6 <rect>
7 <x>0</x>
8 <y>0</y>
9 <width>400</width>
10 <height>300</height>
11 </rect>
12 </property>
13 <property name="windowTitle">
14 <string>ARM Registers</string>
15 </property>
16 <widget class="QWidget" name="dockWidgetContents">
17 <layout class="QVBoxLayout" name="verticalLayout">
18 <item>
19 <widget class="QTreeWidget" name="treeWidget">
20 <property name="alternatingRowColors">
21 <bool>true</bool>
22 </property>
23 <column>
24 <property name="text">
25 <string>Register</string>
26 </property>
27 </column>
28 <column>
29 <property name="text">
30 <string>Value</string>
31 </property>
32 </column>
33 </widget>
34 </item>
35 </layout>
36 </widget>
37 </widget>
38 <resources/>
39 <connections/>
40</ui>
diff --git a/src/yuzu/debugger/wait_tree.cpp b/src/yuzu/debugger/wait_tree.cpp
index 017bef13c..7101b381e 100644
--- a/src/yuzu/debugger/wait_tree.cpp
+++ b/src/yuzu/debugger/wait_tree.cpp
@@ -213,6 +213,9 @@ QString WaitTreeThread::GetText() const {
213 case THREADSTATUS_WAIT_MUTEX: 213 case THREADSTATUS_WAIT_MUTEX:
214 status = tr("waiting for mutex"); 214 status = tr("waiting for mutex");
215 break; 215 break;
216 case THREADSTATUS_WAIT_ARB:
217 status = tr("waiting for address arbiter");
218 break;
216 case THREADSTATUS_DORMANT: 219 case THREADSTATUS_DORMANT:
217 status = tr("dormant"); 220 status = tr("dormant");
218 break; 221 break;
@@ -240,6 +243,7 @@ QColor WaitTreeThread::GetColor() const {
240 case THREADSTATUS_WAIT_SYNCH_ALL: 243 case THREADSTATUS_WAIT_SYNCH_ALL:
241 case THREADSTATUS_WAIT_SYNCH_ANY: 244 case THREADSTATUS_WAIT_SYNCH_ANY:
242 case THREADSTATUS_WAIT_MUTEX: 245 case THREADSTATUS_WAIT_MUTEX:
246 case THREADSTATUS_WAIT_ARB:
243 return QColor(Qt::GlobalColor::red); 247 return QColor(Qt::GlobalColor::red);
244 case THREADSTATUS_DORMANT: 248 case THREADSTATUS_DORMANT:
245 return QColor(Qt::GlobalColor::darkCyan); 249 return QColor(Qt::GlobalColor::darkCyan);
diff --git a/src/yuzu/game_list.cpp b/src/yuzu/game_list.cpp
index bbd681eae..55dce6d47 100644
--- a/src/yuzu/game_list.cpp
+++ b/src/yuzu/game_list.cpp
@@ -3,6 +3,7 @@
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <QApplication> 5#include <QApplication>
6#include <QDir>
6#include <QFileInfo> 7#include <QFileInfo>
7#include <QHeaderView> 8#include <QHeaderView>
8#include <QKeyEvent> 9#include <QKeyEvent>
@@ -264,8 +265,17 @@ void GameList::ValidateEntry(const QModelIndex& item) {
264 if (file_path.isEmpty()) 265 if (file_path.isEmpty())
265 return; 266 return;
266 std::string std_file_path(file_path.toStdString()); 267 std::string std_file_path(file_path.toStdString());
267 if (!FileUtil::Exists(std_file_path) || FileUtil::IsDirectory(std_file_path)) 268 if (!FileUtil::Exists(std_file_path))
268 return; 269 return;
270 if (FileUtil::IsDirectory(std_file_path)) {
271 QDir dir(std_file_path.c_str());
272 QStringList matching_main = dir.entryList(QStringList("main"), QDir::Files);
273 if (matching_main.size() == 1) {
274 emit GameChosen(dir.path() + DIR_SEP + matching_main[0]);
275 }
276 return;
277 }
278
269 // Users usually want to run a diffrent game after closing one 279 // Users usually want to run a diffrent game after closing one
270 search_field->clear(); 280 search_field->clear();
271 emit GameChosen(file_path); 281 emit GameChosen(file_path);
@@ -356,13 +366,26 @@ void GameList::LoadInterfaceLayout() {
356 item_model->sort(header->sortIndicatorSection(), header->sortIndicatorOrder()); 366 item_model->sort(header->sortIndicatorSection(), header->sortIndicatorOrder());
357} 367}
358 368
359const QStringList GameList::supported_file_extensions = {"nso", "nro"}; 369const QStringList GameList::supported_file_extensions = {"nso", "nro", "nca"};
360 370
361static bool HasSupportedFileExtension(const std::string& file_name) { 371static bool HasSupportedFileExtension(const std::string& file_name) {
362 QFileInfo file = QFileInfo(file_name.c_str()); 372 QFileInfo file = QFileInfo(file_name.c_str());
363 return GameList::supported_file_extensions.contains(file.suffix(), Qt::CaseInsensitive); 373 return GameList::supported_file_extensions.contains(file.suffix(), Qt::CaseInsensitive);
364} 374}
365 375
376static bool IsExtractedNCAMain(const std::string& file_name) {
377 return QFileInfo(file_name.c_str()).fileName() == "main";
378}
379
380static QString FormatGameName(const std::string& physical_name) {
381 QFileInfo file_info(physical_name.c_str());
382 if (IsExtractedNCAMain(physical_name)) {
383 return file_info.dir().path();
384 } else {
385 return QString::fromStdString(physical_name);
386 }
387}
388
366void GameList::RefreshGameDirectory() { 389void GameList::RefreshGameDirectory() {
367 if (!UISettings::values.gamedir.isEmpty() && current_worker != nullptr) { 390 if (!UISettings::values.gamedir.isEmpty() && current_worker != nullptr) {
368 NGLOG_INFO(Frontend, "Change detected in the games directory. Reloading game list."); 391 NGLOG_INFO(Frontend, "Change detected in the games directory. Reloading game list.");
@@ -380,7 +403,8 @@ void GameListWorker::AddFstEntriesToGameList(const std::string& dir_path, unsign
380 return false; // Breaks the callback loop. 403 return false; // Breaks the callback loop.
381 404
382 bool is_dir = FileUtil::IsDirectory(physical_name); 405 bool is_dir = FileUtil::IsDirectory(physical_name);
383 if (!is_dir && HasSupportedFileExtension(physical_name)) { 406 if (!is_dir &&
407 (HasSupportedFileExtension(physical_name) || IsExtractedNCAMain(physical_name))) {
384 std::unique_ptr<Loader::AppLoader> loader = Loader::GetLoader(physical_name); 408 std::unique_ptr<Loader::AppLoader> loader = Loader::GetLoader(physical_name);
385 if (!loader) 409 if (!loader)
386 return true; 410 return true;
@@ -392,7 +416,7 @@ void GameListWorker::AddFstEntriesToGameList(const std::string& dir_path, unsign
392 loader->ReadProgramId(program_id); 416 loader->ReadProgramId(program_id);
393 417
394 emit EntryReady({ 418 emit EntryReady({
395 new GameListItemPath(QString::fromStdString(physical_name), smdh, program_id), 419 new GameListItemPath(FormatGameName(physical_name), smdh, program_id),
396 new GameListItem( 420 new GameListItem(
397 QString::fromStdString(Loader::GetFileTypeString(loader->GetFileType()))), 421 QString::fromStdString(Loader::GetFileTypeString(loader->GetFileType()))),
398 new GameListItemSize(FileUtil::GetSize(physical_name)), 422 new GameListItemSize(FileUtil::GetSize(physical_name)),
diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp
index 3038bd6da..97be548d7 100644
--- a/src/yuzu/main.cpp
+++ b/src/yuzu/main.cpp
@@ -13,6 +13,7 @@
13#include <QMessageBox> 13#include <QMessageBox>
14#include <QtGui> 14#include <QtGui>
15#include <QtWidgets> 15#include <QtWidgets>
16#include "common/common_paths.h"
16#include "common/logging/backend.h" 17#include "common/logging/backend.h"
17#include "common/logging/filter.h" 18#include "common/logging/filter.h"
18#include "common/logging/log.h" 19#include "common/logging/log.h"
@@ -33,7 +34,6 @@
33#include "yuzu/debugger/graphics/graphics_breakpoints.h" 34#include "yuzu/debugger/graphics/graphics_breakpoints.h"
34#include "yuzu/debugger/graphics/graphics_surface.h" 35#include "yuzu/debugger/graphics/graphics_surface.h"
35#include "yuzu/debugger/profiler.h" 36#include "yuzu/debugger/profiler.h"
36#include "yuzu/debugger/registers.h"
37#include "yuzu/debugger/wait_tree.h" 37#include "yuzu/debugger/wait_tree.h"
38#include "yuzu/game_list.h" 38#include "yuzu/game_list.h"
39#include "yuzu/hotkeys.h" 39#include "yuzu/hotkeys.h"
@@ -169,15 +169,6 @@ void GMainWindow::InitializeDebugWidgets() {
169 debug_menu->addAction(microProfileDialog->toggleViewAction()); 169 debug_menu->addAction(microProfileDialog->toggleViewAction());
170#endif 170#endif
171 171
172 registersWidget = new RegistersWidget(this);
173 addDockWidget(Qt::RightDockWidgetArea, registersWidget);
174 registersWidget->hide();
175 debug_menu->addAction(registersWidget->toggleViewAction());
176 connect(this, &GMainWindow::EmulationStarting, registersWidget,
177 &RegistersWidget::OnEmulationStarting);
178 connect(this, &GMainWindow::EmulationStopping, registersWidget,
179 &RegistersWidget::OnEmulationStopping);
180
181 graphicsBreakpointsWidget = new GraphicsBreakPointsWidget(debug_context, this); 172 graphicsBreakpointsWidget = new GraphicsBreakPointsWidget(debug_context, this);
182 addDockWidget(Qt::RightDockWidgetArea, graphicsBreakpointsWidget); 173 addDockWidget(Qt::RightDockWidgetArea, graphicsBreakpointsWidget);
183 graphicsBreakpointsWidget->hide(); 174 graphicsBreakpointsWidget->hide();
@@ -288,6 +279,7 @@ void GMainWindow::ConnectWidgetEvents() {
288void GMainWindow::ConnectMenuEvents() { 279void GMainWindow::ConnectMenuEvents() {
289 // File 280 // File
290 connect(ui.action_Load_File, &QAction::triggered, this, &GMainWindow::OnMenuLoadFile); 281 connect(ui.action_Load_File, &QAction::triggered, this, &GMainWindow::OnMenuLoadFile);
282 connect(ui.action_Load_Folder, &QAction::triggered, this, &GMainWindow::OnMenuLoadFolder);
291 connect(ui.action_Select_Game_List_Root, &QAction::triggered, this, 283 connect(ui.action_Select_Game_List_Root, &QAction::triggered, this,
292 &GMainWindow::OnMenuSelectGameListRoot); 284 &GMainWindow::OnMenuSelectGameListRoot);
293 connect(ui.action_Exit, &QAction::triggered, this, &QMainWindow::close); 285 connect(ui.action_Exit, &QAction::triggered, this, &QMainWindow::close);
@@ -460,17 +452,12 @@ void GMainWindow::BootGame(const QString& filename) {
460 connect(render_window, &GRenderWindow::Closed, this, &GMainWindow::OnStopGame); 452 connect(render_window, &GRenderWindow::Closed, this, &GMainWindow::OnStopGame);
461 // BlockingQueuedConnection is important here, it makes sure we've finished refreshing our views 453 // BlockingQueuedConnection is important here, it makes sure we've finished refreshing our views
462 // before the CPU continues 454 // before the CPU continues
463 connect(emu_thread.get(), &EmuThread::DebugModeEntered, registersWidget,
464 &RegistersWidget::OnDebugModeEntered, Qt::BlockingQueuedConnection);
465 connect(emu_thread.get(), &EmuThread::DebugModeEntered, waitTreeWidget, 455 connect(emu_thread.get(), &EmuThread::DebugModeEntered, waitTreeWidget,
466 &WaitTreeWidget::OnDebugModeEntered, Qt::BlockingQueuedConnection); 456 &WaitTreeWidget::OnDebugModeEntered, Qt::BlockingQueuedConnection);
467 connect(emu_thread.get(), &EmuThread::DebugModeLeft, registersWidget,
468 &RegistersWidget::OnDebugModeLeft, Qt::BlockingQueuedConnection);
469 connect(emu_thread.get(), &EmuThread::DebugModeLeft, waitTreeWidget, 457 connect(emu_thread.get(), &EmuThread::DebugModeLeft, waitTreeWidget,
470 &WaitTreeWidget::OnDebugModeLeft, Qt::BlockingQueuedConnection); 458 &WaitTreeWidget::OnDebugModeLeft, Qt::BlockingQueuedConnection);
471 459
472 // Update the GUI 460 // Update the GUI
473 registersWidget->OnDebugModeEntered();
474 if (ui.action_Single_Window_Mode->isChecked()) { 461 if (ui.action_Single_Window_Mode->isChecked()) {
475 game_list->hide(); 462 game_list->hide();
476 } 463 }
@@ -565,6 +552,8 @@ void GMainWindow::OnMenuLoadFile() {
565 for (const auto& piece : game_list->supported_file_extensions) 552 for (const auto& piece : game_list->supported_file_extensions)
566 extensions += "*." + piece + " "; 553 extensions += "*." + piece + " ";
567 554
555 extensions += "main ";
556
568 QString file_filter = tr("Switch Executable") + " (" + extensions + ")"; 557 QString file_filter = tr("Switch Executable") + " (" + extensions + ")";
569 file_filter += ";;" + tr("All Files (*.*)"); 558 file_filter += ";;" + tr("All Files (*.*)");
570 559
@@ -577,6 +566,18 @@ void GMainWindow::OnMenuLoadFile() {
577 } 566 }
578} 567}
579 568
569void GMainWindow::OnMenuLoadFolder() {
570 QDir dir = QFileDialog::getExistingDirectory(this, tr("Open Extracted ROM Directory"));
571
572 QStringList matching_main = dir.entryList(QStringList("main"), QDir::Files);
573 if (matching_main.size() == 1) {
574 BootGame(dir.path() + DIR_SEP + matching_main[0]);
575 } else {
576 QMessageBox::warning(this, tr("Invalid Directory Selected"),
577 tr("The directory you have selected does not contain a 'main' file."));
578 }
579}
580
580void GMainWindow::OnMenuSelectGameListRoot() { 581void GMainWindow::OnMenuSelectGameListRoot() {
581 QString dir_path = QFileDialog::getExistingDirectory(this, tr("Select Directory")); 582 QString dir_path = QFileDialog::getExistingDirectory(this, tr("Select Directory"));
582 if (!dir_path.isEmpty()) { 583 if (!dir_path.isEmpty()) {
diff --git a/src/yuzu/main.h b/src/yuzu/main.h
index ac3024d8a..074bba3f9 100644
--- a/src/yuzu/main.h
+++ b/src/yuzu/main.h
@@ -19,7 +19,6 @@ class GraphicsSurfaceWidget;
19class GRenderWindow; 19class GRenderWindow;
20class MicroProfileDialog; 20class MicroProfileDialog;
21class ProfilerWidget; 21class ProfilerWidget;
22class RegistersWidget;
23class WaitTreeWidget; 22class WaitTreeWidget;
24 23
25namespace Tegra { 24namespace Tegra {
@@ -124,6 +123,7 @@ private slots:
124 void OnGameListLoadFile(QString game_path); 123 void OnGameListLoadFile(QString game_path);
125 void OnGameListOpenSaveFolder(u64 program_id); 124 void OnGameListOpenSaveFolder(u64 program_id);
126 void OnMenuLoadFile(); 125 void OnMenuLoadFile();
126 void OnMenuLoadFolder();
127 /// Called whenever a user selects the "File->Select Game List Root" menu item 127 /// Called whenever a user selects the "File->Select Game List Root" menu item
128 void OnMenuSelectGameListRoot(); 128 void OnMenuSelectGameListRoot();
129 void OnMenuRecentFile(); 129 void OnMenuRecentFile();
@@ -163,7 +163,6 @@ private:
163 // Debugger panes 163 // Debugger panes
164 ProfilerWidget* profilerWidget; 164 ProfilerWidget* profilerWidget;
165 MicroProfileDialog* microProfileDialog; 165 MicroProfileDialog* microProfileDialog;
166 RegistersWidget* registersWidget;
167 GraphicsBreakPointsWidget* graphicsBreakpointsWidget; 166 GraphicsBreakPointsWidget* graphicsBreakpointsWidget;
168 GraphicsSurfaceWidget* graphicsSurfaceWidget; 167 GraphicsSurfaceWidget* graphicsSurfaceWidget;
169 WaitTreeWidget* waitTreeWidget; 168 WaitTreeWidget* waitTreeWidget;
diff --git a/src/yuzu/main.ui b/src/yuzu/main.ui
index 0fcd93cc2..22c4cad08 100644
--- a/src/yuzu/main.ui
+++ b/src/yuzu/main.ui
@@ -58,6 +58,7 @@
58 </property> 58 </property>
59 </widget> 59 </widget>
60 <addaction name="action_Load_File"/> 60 <addaction name="action_Load_File"/>
61 <addaction name="action_Load_Folder"/>
61 <addaction name="separator"/> 62 <addaction name="separator"/>
62 <addaction name="action_Select_Game_List_Root"/> 63 <addaction name="action_Select_Game_List_Root"/>
63 <addaction name="menu_recent_files"/> 64 <addaction name="menu_recent_files"/>
@@ -106,6 +107,11 @@
106 <string>Load File...</string> 107 <string>Load File...</string>
107 </property> 108 </property>
108 </action> 109 </action>
110 <action name="action_Load_Folder">
111 <property name="text">
112 <string>Load Folder...</string>
113 </property>
114 </action>
109 <action name="action_Load_Symbol_Map"> 115 <action name="action_Load_Symbol_Map">
110 <property name="text"> 116 <property name="text">
111 <string>Load Symbol Map...</string> 117 <string>Load Symbol Map...</string>