summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--appveyor.yml2
-rw-r--r--src/core/arm/unicorn/arm_unicorn.cpp38
-rw-r--r--src/core/arm/unicorn/arm_unicorn.h4
-rw-r--r--src/core/gdbstub/gdbstub.cpp170
-rw-r--r--src/core/gdbstub/gdbstub.h9
-rw-r--r--src/core/hle/service/am/am.cpp13
-rw-r--r--src/core/hle/service/am/am.h1
-rw-r--r--src/core/hle/service/nfp/nfp.cpp108
-rw-r--r--src/core/hle/service/nifm/nifm.cpp11
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp22
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h31
-rw-r--r--src/video_core/engines/maxwell_3d.cpp34
-rw-r--r--src/video_core/engines/maxwell_3d.h3
-rw-r--r--src/video_core/engines/shader_bytecode.h134
-rw-r--r--src/video_core/gpu.cpp4
-rw-r--r--src/video_core/gpu.h2
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp102
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h14
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.cpp84
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.h20
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp272
-rw-r--r--src/video_core/renderer_opengl/gl_shader_gen.cpp2
-rw-r--r--src/video_core/renderer_opengl/gl_shader_gen.h58
-rw-r--r--src/video_core/renderer_opengl/gl_shader_manager.cpp19
-rw-r--r--src/video_core/renderer_opengl/gl_shader_manager.h1
-rw-r--r--src/video_core/renderer_opengl/gl_state.cpp13
-rw-r--r--src/video_core/renderer_opengl/gl_state.h6
-rw-r--r--src/video_core/renderer_opengl/maxwell_to_gl.h23
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp1
-rw-r--r--src/video_core/textures/decoders.cpp3
-rw-r--r--src/video_core/textures/texture.h16
31 files changed, 977 insertions, 243 deletions
diff --git a/appveyor.yml b/appveyor.yml
index 4f928adb5..72cda26a7 100644
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -53,7 +53,7 @@ build_script:
53 # https://www.appveyor.com/docs/build-phase 53 # https://www.appveyor.com/docs/build-phase
54 msbuild msvc_build/yuzu.sln /maxcpucount /logger:"C:\Program Files\AppVeyor\BuildAgent\Appveyor.MSBuildLogger.dll" 54 msbuild msvc_build/yuzu.sln /maxcpucount /logger:"C:\Program Files\AppVeyor\BuildAgent\Appveyor.MSBuildLogger.dll"
55 } else { 55 } else {
56 C:\msys64\usr\bin\bash.exe -lc 'mingw32-make -j4 -C mingw_build/ 2>&1' 56 C:\msys64\usr\bin\bash.exe -lc 'mingw32-make -C mingw_build/ 2>&1'
57 } 57 }
58 58
59after_build: 59after_build:
diff --git a/src/core/arm/unicorn/arm_unicorn.cpp b/src/core/arm/unicorn/arm_unicorn.cpp
index c0cc62f03..ce6c5616d 100644
--- a/src/core/arm/unicorn/arm_unicorn.cpp
+++ b/src/core/arm/unicorn/arm_unicorn.cpp
@@ -35,6 +35,17 @@ LoadDll LoadDll::g_load_dll;
35 } \ 35 } \
36 } while (0) 36 } while (0)
37 37
38static void CodeHook(uc_engine* uc, uint64_t address, uint32_t size, void* user_data) {
39 GDBStub::BreakpointAddress bkpt =
40 GDBStub::GetNextBreakpointFromAddress(address, GDBStub::BreakpointType::Execute);
41 if (GDBStub::IsMemoryBreak() ||
42 (bkpt.type != GDBStub::BreakpointType::None && address == bkpt.address)) {
43 auto core = static_cast<ARM_Unicorn*>(user_data);
44 core->RecordBreak(bkpt);
45 uc_emu_stop(uc);
46 }
47}
48
38static void InterruptHook(uc_engine* uc, u32 intNo, void* user_data) { 49static void InterruptHook(uc_engine* uc, u32 intNo, void* user_data) {
39 u32 esr{}; 50 u32 esr{};
40 CHECKED(uc_reg_read(uc, UC_ARM64_REG_ESR, &esr)); 51 CHECKED(uc_reg_read(uc, UC_ARM64_REG_ESR, &esr));
@@ -67,6 +78,10 @@ ARM_Unicorn::ARM_Unicorn() {
67 uc_hook hook{}; 78 uc_hook hook{};
68 CHECKED(uc_hook_add(uc, &hook, UC_HOOK_INTR, (void*)InterruptHook, this, 0, -1)); 79 CHECKED(uc_hook_add(uc, &hook, UC_HOOK_INTR, (void*)InterruptHook, this, 0, -1));
69 CHECKED(uc_hook_add(uc, &hook, UC_HOOK_MEM_INVALID, (void*)UnmappedMemoryHook, this, 0, -1)); 80 CHECKED(uc_hook_add(uc, &hook, UC_HOOK_MEM_INVALID, (void*)UnmappedMemoryHook, this, 0, -1));
81 if (GDBStub::IsServerEnabled()) {
82 CHECKED(uc_hook_add(uc, &hook, UC_HOOK_CODE, (void*)CodeHook, this, 0, -1));
83 last_bkpt_hit = false;
84 }
70} 85}
71 86
72ARM_Unicorn::~ARM_Unicorn() { 87ARM_Unicorn::~ARM_Unicorn() {
@@ -155,7 +170,11 @@ void ARM_Unicorn::SetTlsAddress(VAddr base) {
155} 170}
156 171
157void ARM_Unicorn::Run() { 172void ARM_Unicorn::Run() {
158 ExecuteInstructions(std::max(CoreTiming::GetDowncount(), 0)); 173 if (GDBStub::IsServerEnabled()) {
174 ExecuteInstructions(std::max(4000000, 0));
175 } else {
176 ExecuteInstructions(std::max(CoreTiming::GetDowncount(), 0));
177 }
159} 178}
160 179
161void ARM_Unicorn::Step() { 180void ARM_Unicorn::Step() {
@@ -168,6 +187,18 @@ void ARM_Unicorn::ExecuteInstructions(int num_instructions) {
168 MICROPROFILE_SCOPE(ARM_Jit); 187 MICROPROFILE_SCOPE(ARM_Jit);
169 CHECKED(uc_emu_start(uc, GetPC(), 1ULL << 63, 0, num_instructions)); 188 CHECKED(uc_emu_start(uc, GetPC(), 1ULL << 63, 0, num_instructions));
170 CoreTiming::AddTicks(num_instructions); 189 CoreTiming::AddTicks(num_instructions);
190 if (GDBStub::IsServerEnabled()) {
191 if (last_bkpt_hit) {
192 uc_reg_write(uc, UC_ARM64_REG_PC, &last_bkpt.address);
193 }
194 Kernel::Thread* thread = Kernel::GetCurrentThread();
195 SaveContext(thread->context);
196 if (last_bkpt_hit) {
197 last_bkpt_hit = false;
198 GDBStub::Break();
199 }
200 GDBStub::SendTrap(thread, 5);
201 }
171} 202}
172 203
173void ARM_Unicorn::SaveContext(ARM_Interface::ThreadContext& ctx) { 204void ARM_Unicorn::SaveContext(ARM_Interface::ThreadContext& ctx) {
@@ -233,3 +264,8 @@ void ARM_Unicorn::PrepareReschedule() {
233} 264}
234 265
235void ARM_Unicorn::ClearInstructionCache() {} 266void ARM_Unicorn::ClearInstructionCache() {}
267
268void ARM_Unicorn::RecordBreak(GDBStub::BreakpointAddress bkpt) {
269 last_bkpt = bkpt;
270 last_bkpt_hit = true;
271}
diff --git a/src/core/arm/unicorn/arm_unicorn.h b/src/core/arm/unicorn/arm_unicorn.h
index b99b58e4c..a482a2aa3 100644
--- a/src/core/arm/unicorn/arm_unicorn.h
+++ b/src/core/arm/unicorn/arm_unicorn.h
@@ -7,6 +7,7 @@
7#include <unicorn/unicorn.h> 7#include <unicorn/unicorn.h>
8#include "common/common_types.h" 8#include "common/common_types.h"
9#include "core/arm/arm_interface.h" 9#include "core/arm/arm_interface.h"
10#include "core/gdbstub/gdbstub.h"
10 11
11class ARM_Unicorn final : public ARM_Interface { 12class ARM_Unicorn final : public ARM_Interface {
12public: 13public:
@@ -35,7 +36,10 @@ public:
35 void Step() override; 36 void Step() override;
36 void ClearInstructionCache() override; 37 void ClearInstructionCache() override;
37 void PageTableChanged() override{}; 38 void PageTableChanged() override{};
39 void RecordBreak(GDBStub::BreakpointAddress bkpt);
38 40
39private: 41private:
40 uc_engine* uc{}; 42 uc_engine* uc{};
43 GDBStub::BreakpointAddress last_bkpt{};
44 bool last_bkpt_hit;
41}; 45};
diff --git a/src/core/gdbstub/gdbstub.cpp b/src/core/gdbstub/gdbstub.cpp
index 6c5a40ba8..2603192fe 100644
--- a/src/core/gdbstub/gdbstub.cpp
+++ b/src/core/gdbstub/gdbstub.cpp
@@ -32,9 +32,13 @@
32 32
33#include "common/logging/log.h" 33#include "common/logging/log.h"
34#include "common/string_util.h" 34#include "common/string_util.h"
35#include "common/swap.h"
35#include "core/arm/arm_interface.h" 36#include "core/arm/arm_interface.h"
36#include "core/core.h" 37#include "core/core.h"
38#include "core/core_cpu.h"
37#include "core/gdbstub/gdbstub.h" 39#include "core/gdbstub/gdbstub.h"
40#include "core/hle/kernel/kernel.h"
41#include "core/hle/kernel/scheduler.h"
38#include "core/loader/loader.h" 42#include "core/loader/loader.h"
39#include "core/memory.h" 43#include "core/memory.h"
40 44
@@ -137,15 +141,17 @@ static u8 command_buffer[GDB_BUFFER_SIZE];
137static u32 command_length; 141static u32 command_length;
138 142
139static u32 latest_signal = 0; 143static u32 latest_signal = 0;
140static bool step_break = false;
141static bool memory_break = false; 144static bool memory_break = false;
142 145
146static Kernel::Thread* current_thread = nullptr;
147
143// Binding to a port within the reserved ports range (0-1023) requires root permissions, 148// Binding to a port within the reserved ports range (0-1023) requires root permissions,
144// so default to a port outside of that range. 149// so default to a port outside of that range.
145static u16 gdbstub_port = 24689; 150static u16 gdbstub_port = 24689;
146 151
147static bool halt_loop = true; 152static bool halt_loop = true;
148static bool step_loop = false; 153static bool step_loop = false;
154static bool send_trap = false;
149 155
150// If set to false, the server will never be started and no 156// If set to false, the server will never be started and no
151// gdbstub-related functions will be executed. 157// gdbstub-related functions will be executed.
@@ -165,6 +171,53 @@ static std::map<u64, Breakpoint> breakpoints_execute;
165static std::map<u64, Breakpoint> breakpoints_read; 171static std::map<u64, Breakpoint> breakpoints_read;
166static std::map<u64, Breakpoint> breakpoints_write; 172static std::map<u64, Breakpoint> breakpoints_write;
167 173
174static Kernel::Thread* FindThreadById(int id) {
175 for (int core = 0; core < Core::NUM_CPU_CORES; core++) {
176 auto threads = Core::System::GetInstance().Scheduler(core)->GetThreadList();
177 for (auto thread : threads) {
178 if (thread->GetThreadId() == id) {
179 current_thread = thread.get();
180 return current_thread;
181 }
182 }
183 }
184 return nullptr;
185}
186
187static u64 RegRead(int id, Kernel::Thread* thread = nullptr) {
188 if (!thread) {
189 return 0;
190 }
191
192 if (id < SP_REGISTER) {
193 return thread->context.cpu_registers[id];
194 } else if (id == SP_REGISTER) {
195 return thread->context.sp;
196 } else if (id == PC_REGISTER) {
197 return thread->context.pc;
198 } else if (id == CPSR_REGISTER) {
199 return thread->context.cpsr;
200 } else {
201 return 0;
202 }
203}
204
205static void RegWrite(int id, u64 val, Kernel::Thread* thread = nullptr) {
206 if (!thread) {
207 return;
208 }
209
210 if (id < SP_REGISTER) {
211 thread->context.cpu_registers[id] = val;
212 } else if (id == SP_REGISTER) {
213 thread->context.sp = val;
214 } else if (id == PC_REGISTER) {
215 thread->context.pc = val;
216 } else if (id == CPSR_REGISTER) {
217 thread->context.cpsr = val;
218 }
219}
220
168/** 221/**
169 * Turns hex string character into the equivalent byte. 222 * Turns hex string character into the equivalent byte.
170 * 223 *
@@ -193,7 +246,7 @@ static u8 NibbleToHex(u8 n) {
193 if (n < 0xA) { 246 if (n < 0xA) {
194 return '0' + n; 247 return '0' + n;
195 } else { 248 } else {
196 return 'A' + n - 0xA; 249 return 'a' + n - 0xA;
197 } 250 }
198} 251}
199 252
@@ -439,6 +492,8 @@ static void SendReply(const char* reply) {
439 return; 492 return;
440 } 493 }
441 494
495 NGLOG_DEBUG(Debug_GDBStub, "Reply: {}", reply);
496
442 memset(command_buffer, 0, sizeof(command_buffer)); 497 memset(command_buffer, 0, sizeof(command_buffer));
443 498
444 command_length = static_cast<u32>(strlen(reply)); 499 command_length = static_cast<u32>(strlen(reply));
@@ -483,6 +538,22 @@ static void HandleQuery() {
483 } else if (strncmp(query, "Xfer:features:read:target.xml:", 538 } else if (strncmp(query, "Xfer:features:read:target.xml:",
484 strlen("Xfer:features:read:target.xml:")) == 0) { 539 strlen("Xfer:features:read:target.xml:")) == 0) {
485 SendReply(target_xml); 540 SendReply(target_xml);
541 } else if (strncmp(query, "Offsets", strlen("Offsets")) == 0) {
542 std::string buffer = fmt::format("TextSeg={:0x}", Memory::PROCESS_IMAGE_VADDR);
543 SendReply(buffer.c_str());
544 } else if (strncmp(query, "fThreadInfo", strlen("fThreadInfo")) == 0) {
545 std::string val = "m";
546 for (int core = 0; core < Core::NUM_CPU_CORES; core++) {
547 auto threads = Core::System::GetInstance().Scheduler(core)->GetThreadList();
548 for (auto thread : threads) {
549 val += fmt::format("{:x}", thread->GetThreadId());
550 val += ",";
551 }
552 }
553 val.pop_back();
554 SendReply(val.c_str());
555 } else if (strncmp(query, "sThreadInfo", strlen("sThreadInfo")) == 0) {
556 SendReply("l");
486 } else { 557 } else {
487 SendReply(""); 558 SendReply("");
488 } 559 }
@@ -490,11 +561,40 @@ static void HandleQuery() {
490 561
491/// Handle set thread command from gdb client. 562/// Handle set thread command from gdb client.
492static void HandleSetThread() { 563static void HandleSetThread() {
493 if (memcmp(command_buffer, "Hg0", 3) == 0 || memcmp(command_buffer, "Hc-1", 4) == 0 || 564 if (memcmp(command_buffer, "Hc", 2) == 0 || memcmp(command_buffer, "Hg", 2) == 0) {
494 memcmp(command_buffer, "Hc0", 4) == 0 || memcmp(command_buffer, "Hc1", 4) == 0) { 565 int thread_id = -1;
495 return SendReply("OK"); 566 if (command_buffer[2] != '-') {
567 thread_id = static_cast<int>(HexToInt(
568 command_buffer + 2,
569 command_length - 2 /*strlen(reinterpret_cast<char*>(command_buffer) + 2)*/));
570 }
571 if (thread_id >= 1) {
572 current_thread = FindThreadById(thread_id);
573 }
574 if (!current_thread) {
575 thread_id = 1;
576 current_thread = FindThreadById(thread_id);
577 }
578 if (current_thread) {
579 SendReply("OK");
580 return;
581 }
496 } 582 }
583 SendReply("E01");
584}
497 585
586/// Handle thread alive command from gdb client.
587static void HandleThreadAlive() {
588 int thread_id = static_cast<int>(
589 HexToInt(command_buffer + 1,
590 command_length - 1 /*strlen(reinterpret_cast<char*>(command_buffer) + 1)*/));
591 if (thread_id == 0) {
592 thread_id = 1;
593 }
594 if (FindThreadById(thread_id)) {
595 SendReply("OK");
596 return;
597 }
498 SendReply("E01"); 598 SendReply("E01");
499} 599}
500 600
@@ -503,15 +603,24 @@ static void HandleSetThread() {
503 * 603 *
504 * @param signal Signal to be sent to client. 604 * @param signal Signal to be sent to client.
505 */ 605 */
506static void SendSignal(u32 signal) { 606static void SendSignal(Kernel::Thread* thread, u32 signal, bool full = true) {
507 if (gdbserver_socket == -1) { 607 if (gdbserver_socket == -1) {
508 return; 608 return;
509 } 609 }
510 610
511 latest_signal = signal; 611 latest_signal = signal;
512 612
513 std::string buffer = fmt::format("T{:02x}", latest_signal); 613 std::string buffer;
514 NGLOG_DEBUG(Debug_GDBStub, "Response: {}", buffer); 614 if (full) {
615 buffer = fmt::format("T{:02x}{:02x}:{:016x};{:02x}:{:016x};", latest_signal, PC_REGISTER,
616 Common::swap64(RegRead(PC_REGISTER, thread)), SP_REGISTER,
617 Common::swap64(RegRead(SP_REGISTER, thread)));
618 } else {
619 buffer = fmt::format("T{:02x};", latest_signal);
620 }
621
622 buffer += fmt::format("thread:{:x};", thread->GetThreadId());
623
515 SendReply(buffer.c_str()); 624 SendReply(buffer.c_str());
516} 625}
517 626
@@ -527,7 +636,7 @@ static void ReadCommand() {
527 } else if (c == 0x03) { 636 } else if (c == 0x03) {
528 NGLOG_INFO(Debug_GDBStub, "gdb: found break command"); 637 NGLOG_INFO(Debug_GDBStub, "gdb: found break command");
529 halt_loop = true; 638 halt_loop = true;
530 SendSignal(SIGTRAP); 639 SendSignal(current_thread, SIGTRAP);
531 return; 640 return;
532 } else if (c != GDB_STUB_START) { 641 } else if (c != GDB_STUB_START) {
533 NGLOG_DEBUG(Debug_GDBStub, "gdb: read invalid byte {:02X}", c); 642 NGLOG_DEBUG(Debug_GDBStub, "gdb: read invalid byte {:02X}", c);
@@ -598,11 +707,11 @@ static void ReadRegister() {
598 } 707 }
599 708
600 if (id <= SP_REGISTER) { 709 if (id <= SP_REGISTER) {
601 LongToGdbHex(reply, Core::CurrentArmInterface().GetReg(static_cast<int>(id))); 710 LongToGdbHex(reply, RegRead(id, current_thread));
602 } else if (id == PC_REGISTER) { 711 } else if (id == PC_REGISTER) {
603 LongToGdbHex(reply, Core::CurrentArmInterface().GetPC()); 712 LongToGdbHex(reply, RegRead(id, current_thread));
604 } else if (id == CPSR_REGISTER) { 713 } else if (id == CPSR_REGISTER) {
605 IntToGdbHex(reply, Core::CurrentArmInterface().GetCPSR()); 714 IntToGdbHex(reply, (u32)RegRead(id, current_thread));
606 } else { 715 } else {
607 return SendReply("E01"); 716 return SendReply("E01");
608 } 717 }
@@ -618,16 +727,16 @@ static void ReadRegisters() {
618 u8* bufptr = buffer; 727 u8* bufptr = buffer;
619 728
620 for (int reg = 0; reg <= SP_REGISTER; reg++) { 729 for (int reg = 0; reg <= SP_REGISTER; reg++) {
621 LongToGdbHex(bufptr + reg * 16, Core::CurrentArmInterface().GetReg(reg)); 730 LongToGdbHex(bufptr + reg * 16, RegRead(reg, current_thread));
622 } 731 }
623 732
624 bufptr += (32 * 16); 733 bufptr += (32 * 16);
625 734
626 LongToGdbHex(bufptr, Core::CurrentArmInterface().GetPC()); 735 LongToGdbHex(bufptr, RegRead(PC_REGISTER, current_thread));
627 736
628 bufptr += 16; 737 bufptr += 16;
629 738
630 IntToGdbHex(bufptr, Core::CurrentArmInterface().GetCPSR()); 739 IntToGdbHex(bufptr, (u32)RegRead(CPSR_REGISTER, current_thread));
631 740
632 bufptr += 8; 741 bufptr += 8;
633 742
@@ -646,11 +755,11 @@ static void WriteRegister() {
646 } 755 }
647 756
648 if (id <= SP_REGISTER) { 757 if (id <= SP_REGISTER) {
649 Core::CurrentArmInterface().SetReg(id, GdbHexToLong(buffer_ptr)); 758 RegWrite(id, GdbHexToLong(buffer_ptr), current_thread);
650 } else if (id == PC_REGISTER) { 759 } else if (id == PC_REGISTER) {
651 Core::CurrentArmInterface().SetPC(GdbHexToLong(buffer_ptr)); 760 RegWrite(id, GdbHexToLong(buffer_ptr), current_thread);
652 } else if (id == CPSR_REGISTER) { 761 } else if (id == CPSR_REGISTER) {
653 Core::CurrentArmInterface().SetCPSR(GdbHexToInt(buffer_ptr)); 762 RegWrite(id, GdbHexToInt(buffer_ptr), current_thread);
654 } else { 763 } else {
655 return SendReply("E01"); 764 return SendReply("E01");
656 } 765 }
@@ -667,11 +776,11 @@ static void WriteRegisters() {
667 776
668 for (int i = 0, reg = 0; reg <= CPSR_REGISTER; i++, reg++) { 777 for (int i = 0, reg = 0; reg <= CPSR_REGISTER; i++, reg++) {
669 if (reg <= SP_REGISTER) { 778 if (reg <= SP_REGISTER) {
670 Core::CurrentArmInterface().SetReg(reg, GdbHexToLong(buffer_ptr + i * 16)); 779 RegWrite(reg, GdbHexToLong(buffer_ptr + i * 16), current_thread);
671 } else if (reg == PC_REGISTER) { 780 } else if (reg == PC_REGISTER) {
672 Core::CurrentArmInterface().SetPC(GdbHexToLong(buffer_ptr + i * 16)); 781 RegWrite(PC_REGISTER, GdbHexToLong(buffer_ptr + i * 16), current_thread);
673 } else if (reg == CPSR_REGISTER) { 782 } else if (reg == CPSR_REGISTER) {
674 Core::CurrentArmInterface().SetCPSR(GdbHexToInt(buffer_ptr + i * 16)); 783 RegWrite(CPSR_REGISTER, GdbHexToInt(buffer_ptr + i * 16), current_thread);
675 } else { 784 } else {
676 UNIMPLEMENTED(); 785 UNIMPLEMENTED();
677 } 786 }
@@ -734,7 +843,7 @@ static void WriteMemory() {
734void Break(bool is_memory_break) { 843void Break(bool is_memory_break) {
735 if (!halt_loop) { 844 if (!halt_loop) {
736 halt_loop = true; 845 halt_loop = true;
737 SendSignal(SIGTRAP); 846 send_trap = true;
738 } 847 }
739 848
740 memory_break = is_memory_break; 849 memory_break = is_memory_break;
@@ -744,10 +853,10 @@ void Break(bool is_memory_break) {
744static void Step() { 853static void Step() {
745 step_loop = true; 854 step_loop = true;
746 halt_loop = true; 855 halt_loop = true;
747 step_break = true; 856 send_trap = true;
748 SendSignal(SIGTRAP);
749} 857}
750 858
859/// Tell the CPU if we hit a memory breakpoint.
751bool IsMemoryBreak() { 860bool IsMemoryBreak() {
752 if (IsConnected()) { 861 if (IsConnected()) {
753 return false; 862 return false;
@@ -759,7 +868,6 @@ bool IsMemoryBreak() {
759/// Tell the CPU to continue executing. 868/// Tell the CPU to continue executing.
760static void Continue() { 869static void Continue() {
761 memory_break = false; 870 memory_break = false;
762 step_break = false;
763 step_loop = false; 871 step_loop = false;
764 halt_loop = false; 872 halt_loop = false;
765} 873}
@@ -898,7 +1006,7 @@ void HandlePacket() {
898 HandleSetThread(); 1006 HandleSetThread();
899 break; 1007 break;
900 case '?': 1008 case '?':
901 SendSignal(latest_signal); 1009 SendSignal(current_thread, latest_signal);
902 break; 1010 break;
903 case 'k': 1011 case 'k':
904 Shutdown(); 1012 Shutdown();
@@ -935,6 +1043,9 @@ void HandlePacket() {
935 case 'Z': 1043 case 'Z':
936 AddBreakpoint(); 1044 AddBreakpoint();
937 break; 1045 break;
1046 case 'T':
1047 HandleThreadAlive();
1048 break;
938 default: 1049 default:
939 SendReply(""); 1050 SendReply("");
940 break; 1051 break;
@@ -1079,4 +1190,11 @@ bool GetCpuStepFlag() {
1079void SetCpuStepFlag(bool is_step) { 1190void SetCpuStepFlag(bool is_step) {
1080 step_loop = is_step; 1191 step_loop = is_step;
1081} 1192}
1193
1194void SendTrap(Kernel::Thread* thread, int trap) {
1195 if (send_trap) {
1196 send_trap = false;
1197 SendSignal(thread, trap);
1198 }
1199}
1082}; // namespace GDBStub 1200}; // namespace GDBStub
diff --git a/src/core/gdbstub/gdbstub.h b/src/core/gdbstub/gdbstub.h
index 201fca095..f2418c9e4 100644
--- a/src/core/gdbstub/gdbstub.h
+++ b/src/core/gdbstub/gdbstub.h
@@ -7,6 +7,7 @@
7#pragma once 7#pragma once
8 8
9#include "common/common_types.h" 9#include "common/common_types.h"
10#include "core/hle/kernel/thread.h"
10 11
11namespace GDBStub { 12namespace GDBStub {
12 13
@@ -91,4 +92,12 @@ bool GetCpuStepFlag();
91 * @param is_step 92 * @param is_step
92 */ 93 */
93void SetCpuStepFlag(bool is_step); 94void SetCpuStepFlag(bool is_step);
95
96/**
97 * Send trap signal from thread back to the gdbstub server.
98 *
99 * @param thread Sending thread.
100 * @param trap Trap no.
101 */
102void SendTrap(Kernel::Thread* thread, int trap);
94} // namespace GDBStub 103} // namespace GDBStub
diff --git a/src/core/hle/service/am/am.cpp b/src/core/hle/service/am/am.cpp
index 12954556d..b8d6b8d4d 100644
--- a/src/core/hle/service/am/am.cpp
+++ b/src/core/hle/service/am/am.cpp
@@ -561,7 +561,7 @@ IApplicationFunctions::IApplicationFunctions() : ServiceFramework("IApplicationF
561 {32, nullptr, "BeginBlockingHomeButton"}, 561 {32, nullptr, "BeginBlockingHomeButton"},
562 {33, nullptr, "EndBlockingHomeButton"}, 562 {33, nullptr, "EndBlockingHomeButton"},
563 {40, &IApplicationFunctions::NotifyRunning, "NotifyRunning"}, 563 {40, &IApplicationFunctions::NotifyRunning, "NotifyRunning"},
564 {50, nullptr, "GetPseudoDeviceId"}, 564 {50, &IApplicationFunctions::GetPseudoDeviceId, "GetPseudoDeviceId"},
565 {60, nullptr, "SetMediaPlaybackStateForApplication"}, 565 {60, nullptr, "SetMediaPlaybackStateForApplication"},
566 {65, nullptr, "IsGamePlayRecordingSupported"}, 566 {65, nullptr, "IsGamePlayRecordingSupported"},
567 {66, &IApplicationFunctions::InitializeGamePlayRecording, "InitializeGamePlayRecording"}, 567 {66, &IApplicationFunctions::InitializeGamePlayRecording, "InitializeGamePlayRecording"},
@@ -684,6 +684,17 @@ void IApplicationFunctions::NotifyRunning(Kernel::HLERequestContext& ctx) {
684 NGLOG_WARNING(Service_AM, "(STUBBED) called"); 684 NGLOG_WARNING(Service_AM, "(STUBBED) called");
685} 685}
686 686
687void IApplicationFunctions::GetPseudoDeviceId(Kernel::HLERequestContext& ctx) {
688 IPC::ResponseBuilder rb{ctx, 6};
689 rb.Push(RESULT_SUCCESS);
690
691 // Returns a 128-bit UUID
692 rb.Push<u64>(0);
693 rb.Push<u64>(0);
694
695 NGLOG_WARNING(Service_AM, "(STUBBED) called");
696}
697
687void InstallInterfaces(SM::ServiceManager& service_manager, 698void InstallInterfaces(SM::ServiceManager& service_manager,
688 std::shared_ptr<NVFlinger::NVFlinger> nvflinger) { 699 std::shared_ptr<NVFlinger::NVFlinger> nvflinger) {
689 std::make_shared<AppletAE>(nvflinger)->InstallAsService(service_manager); 700 std::make_shared<AppletAE>(nvflinger)->InstallAsService(service_manager);
diff --git a/src/core/hle/service/am/am.h b/src/core/hle/service/am/am.h
index 301a6c798..1da79fd01 100644
--- a/src/core/hle/service/am/am.h
+++ b/src/core/hle/service/am/am.h
@@ -138,6 +138,7 @@ private:
138 void InitializeGamePlayRecording(Kernel::HLERequestContext& ctx); 138 void InitializeGamePlayRecording(Kernel::HLERequestContext& ctx);
139 void SetGamePlayRecordingState(Kernel::HLERequestContext& ctx); 139 void SetGamePlayRecordingState(Kernel::HLERequestContext& ctx);
140 void NotifyRunning(Kernel::HLERequestContext& ctx); 140 void NotifyRunning(Kernel::HLERequestContext& ctx);
141 void GetPseudoDeviceId(Kernel::HLERequestContext& ctx);
141}; 142};
142 143
143class IHomeMenuFunctions final : public ServiceFramework<IHomeMenuFunctions> { 144class IHomeMenuFunctions final : public ServiceFramework<IHomeMenuFunctions> {
diff --git a/src/core/hle/service/nfp/nfp.cpp b/src/core/hle/service/nfp/nfp.cpp
index 2af4465de..2a9f84037 100644
--- a/src/core/hle/service/nfp/nfp.cpp
+++ b/src/core/hle/service/nfp/nfp.cpp
@@ -4,6 +4,8 @@
4 4
5#include "common/logging/log.h" 5#include "common/logging/log.h"
6#include "core/hle/ipc_helpers.h" 6#include "core/hle/ipc_helpers.h"
7#include "core/hle/kernel/event.h"
8#include "core/hle/service/hid/hid.h"
7#include "core/hle/service/nfp/nfp.h" 9#include "core/hle/service/nfp/nfp.h"
8#include "core/hle/service/nfp/nfp_user.h" 10#include "core/hle/service/nfp/nfp_user.h"
9 11
@@ -18,7 +20,7 @@ public:
18 static const FunctionInfo functions[] = { 20 static const FunctionInfo functions[] = {
19 {0, &IUser::Initialize, "Initialize"}, 21 {0, &IUser::Initialize, "Initialize"},
20 {1, nullptr, "Finalize"}, 22 {1, nullptr, "Finalize"},
21 {2, nullptr, "ListDevices"}, 23 {2, &IUser::ListDevices, "ListDevices"},
22 {3, nullptr, "StartDetection"}, 24 {3, nullptr, "StartDetection"},
23 {4, nullptr, "StopDetection"}, 25 {4, nullptr, "StopDetection"},
24 {5, nullptr, "Mount"}, 26 {5, nullptr, "Mount"},
@@ -33,24 +35,116 @@ public:
33 {14, nullptr, "GetRegisterInfo"}, 35 {14, nullptr, "GetRegisterInfo"},
34 {15, nullptr, "GetCommonInfo"}, 36 {15, nullptr, "GetCommonInfo"},
35 {16, nullptr, "GetModelInfo"}, 37 {16, nullptr, "GetModelInfo"},
36 {17, nullptr, "AttachActivateEvent"}, 38 {17, &IUser::AttachActivateEvent, "AttachActivateEvent"},
37 {18, nullptr, "AttachDeactivateEvent"}, 39 {18, &IUser::AttachDeactivateEvent, "AttachDeactivateEvent"},
38 {19, nullptr, "GetState"}, 40 {19, &IUser::GetState, "GetState"},
39 {20, nullptr, "GetDeviceState"}, 41 {20, &IUser::GetDeviceState, "GetDeviceState"},
40 {21, nullptr, "GetNpadId"}, 42 {21, &IUser::GetNpadId, "GetNpadId"},
41 {22, nullptr, "GetApplicationArea2"}, 43 {22, nullptr, "GetApplicationArea2"},
42 {23, nullptr, "AttachAvailabilityChangeEvent"}, 44 {23, &IUser::AttachAvailabilityChangeEvent, "AttachAvailabilityChangeEvent"},
43 {24, nullptr, "RecreateApplicationArea"}, 45 {24, nullptr, "RecreateApplicationArea"},
44 }; 46 };
45 RegisterHandlers(functions); 47 RegisterHandlers(functions);
48
49 activate_event = Kernel::Event::Create(Kernel::ResetType::OneShot, "IUser:ActivateEvent");
50 deactivate_event =
51 Kernel::Event::Create(Kernel::ResetType::OneShot, "IUser:DeactivateEvent");
52 availability_change_event =
53 Kernel::Event::Create(Kernel::ResetType::OneShot, "IUser:AvailabilityChangeEvent");
46 } 54 }
47 55
48private: 56private:
57 enum class State : u32 {
58 NonInitialized = 0,
59 Initialized = 1,
60 };
61
62 enum class DeviceState : u32 {
63 Initialized = 0,
64 };
65
49 void Initialize(Kernel::HLERequestContext& ctx) { 66 void Initialize(Kernel::HLERequestContext& ctx) {
50 NGLOG_WARNING(Service_NFP, "(STUBBED) called"); 67 NGLOG_WARNING(Service_NFP, "(STUBBED) called");
68
69 state = State::Initialized;
70
51 IPC::ResponseBuilder rb{ctx, 2}; 71 IPC::ResponseBuilder rb{ctx, 2};
52 rb.Push(RESULT_SUCCESS); 72 rb.Push(RESULT_SUCCESS);
53 } 73 }
74
75 void ListDevices(Kernel::HLERequestContext& ctx) {
76 IPC::RequestParser rp{ctx};
77 const u32 array_size = rp.Pop<u32>();
78
79 ctx.WriteBuffer(&device_handle, sizeof(device_handle));
80
81 NGLOG_WARNING(Service_NFP, "(STUBBED) called, array_size={}", array_size);
82
83 IPC::ResponseBuilder rb{ctx, 3};
84 rb.Push(RESULT_SUCCESS);
85 rb.Push<u32>(0);
86 }
87
88 void AttachActivateEvent(Kernel::HLERequestContext& ctx) {
89 IPC::RequestParser rp{ctx};
90 const u64 dev_handle = rp.Pop<u64>();
91 NGLOG_WARNING(Service_NFP, "(STUBBED) called, dev_handle=0x{:X}", dev_handle);
92
93 IPC::ResponseBuilder rb{ctx, 2, 1};
94 rb.Push(RESULT_SUCCESS);
95 rb.PushCopyObjects(activate_event);
96 }
97
98 void AttachDeactivateEvent(Kernel::HLERequestContext& ctx) {
99 IPC::RequestParser rp{ctx};
100 const u64 dev_handle = rp.Pop<u64>();
101 NGLOG_WARNING(Service_NFP, "(STUBBED) called, dev_handle=0x{:X}", dev_handle);
102
103 IPC::ResponseBuilder rb{ctx, 2, 1};
104 rb.Push(RESULT_SUCCESS);
105 rb.PushCopyObjects(deactivate_event);
106 }
107
108 void GetState(Kernel::HLERequestContext& ctx) {
109 NGLOG_WARNING(Service_NFP, "(STUBBED) called");
110 IPC::ResponseBuilder rb{ctx, 3};
111 rb.Push(RESULT_SUCCESS);
112 rb.Push<u32>(static_cast<u32>(state));
113 }
114
115 void GetDeviceState(Kernel::HLERequestContext& ctx) {
116 NGLOG_WARNING(Service_NFP, "(STUBBED) called");
117 IPC::ResponseBuilder rb{ctx, 3};
118 rb.Push(RESULT_SUCCESS);
119 rb.Push<u32>(static_cast<u32>(device_state));
120 }
121
122 void GetNpadId(Kernel::HLERequestContext& ctx) {
123 IPC::RequestParser rp{ctx};
124 const u64 dev_handle = rp.Pop<u64>();
125 NGLOG_WARNING(Service_NFP, "(STUBBED) called, dev_handle=0x{:X}", dev_handle);
126 IPC::ResponseBuilder rb{ctx, 3};
127 rb.Push(RESULT_SUCCESS);
128 rb.Push<u32>(npad_id);
129 }
130
131 void AttachAvailabilityChangeEvent(Kernel::HLERequestContext& ctx) {
132 IPC::RequestParser rp{ctx};
133 const u64 dev_handle = rp.Pop<u64>();
134 NGLOG_WARNING(Service_NFP, "(STUBBED) called, dev_handle=0x{:X}", dev_handle);
135
136 IPC::ResponseBuilder rb{ctx, 2, 1};
137 rb.Push(RESULT_SUCCESS);
138 rb.PushCopyObjects(availability_change_event);
139 }
140
141 const u64 device_handle{0xDEAD};
142 const HID::ControllerID npad_id{HID::Controller_Player1};
143 State state{State::NonInitialized};
144 DeviceState device_state{DeviceState::Initialized};
145 Kernel::SharedPtr<Kernel::Event> activate_event;
146 Kernel::SharedPtr<Kernel::Event> deactivate_event;
147 Kernel::SharedPtr<Kernel::Event> availability_change_event;
54}; 148};
55 149
56void Module::Interface::CreateUserInterface(Kernel::HLERequestContext& ctx) { 150void Module::Interface::CreateUserInterface(Kernel::HLERequestContext& ctx) {
diff --git a/src/core/hle/service/nifm/nifm.cpp b/src/core/hle/service/nifm/nifm.cpp
index eee92cfcd..62489c7fe 100644
--- a/src/core/hle/service/nifm/nifm.cpp
+++ b/src/core/hle/service/nifm/nifm.cpp
@@ -38,7 +38,7 @@ public:
38 {8, nullptr, "SetPriority"}, 38 {8, nullptr, "SetPriority"},
39 {9, nullptr, "SetNetworkProfileId"}, 39 {9, nullptr, "SetNetworkProfileId"},
40 {10, nullptr, "SetRejectable"}, 40 {10, nullptr, "SetRejectable"},
41 {11, nullptr, "SetConnectionConfirmationOption"}, 41 {11, &IRequest::SetConnectionConfirmationOption, "SetConnectionConfirmationOption"},
42 {12, nullptr, "SetPersistent"}, 42 {12, nullptr, "SetPersistent"},
43 {13, nullptr, "SetInstant"}, 43 {13, nullptr, "SetInstant"},
44 {14, nullptr, "SetSustainable"}, 44 {14, nullptr, "SetSustainable"},
@@ -67,23 +67,32 @@ private:
67 rb.Push(RESULT_SUCCESS); 67 rb.Push(RESULT_SUCCESS);
68 rb.Push<u32>(0); 68 rb.Push<u32>(0);
69 } 69 }
70
70 void GetResult(Kernel::HLERequestContext& ctx) { 71 void GetResult(Kernel::HLERequestContext& ctx) {
71 NGLOG_WARNING(Service_NIFM, "(STUBBED) called"); 72 NGLOG_WARNING(Service_NIFM, "(STUBBED) called");
72 IPC::ResponseBuilder rb{ctx, 2}; 73 IPC::ResponseBuilder rb{ctx, 2};
73 rb.Push(RESULT_SUCCESS); 74 rb.Push(RESULT_SUCCESS);
74 } 75 }
76
75 void GetSystemEventReadableHandles(Kernel::HLERequestContext& ctx) { 77 void GetSystemEventReadableHandles(Kernel::HLERequestContext& ctx) {
76 NGLOG_WARNING(Service_NIFM, "(STUBBED) called"); 78 NGLOG_WARNING(Service_NIFM, "(STUBBED) called");
77 IPC::ResponseBuilder rb{ctx, 2, 2}; 79 IPC::ResponseBuilder rb{ctx, 2, 2};
78 rb.Push(RESULT_SUCCESS); 80 rb.Push(RESULT_SUCCESS);
79 rb.PushCopyObjects(event1, event2); 81 rb.PushCopyObjects(event1, event2);
80 } 82 }
83
81 void Cancel(Kernel::HLERequestContext& ctx) { 84 void Cancel(Kernel::HLERequestContext& ctx) {
82 NGLOG_WARNING(Service_NIFM, "(STUBBED) called"); 85 NGLOG_WARNING(Service_NIFM, "(STUBBED) called");
83 IPC::ResponseBuilder rb{ctx, 2}; 86 IPC::ResponseBuilder rb{ctx, 2};
84 rb.Push(RESULT_SUCCESS); 87 rb.Push(RESULT_SUCCESS);
85 } 88 }
86 89
90 void SetConnectionConfirmationOption(Kernel::HLERequestContext& ctx) {
91 NGLOG_WARNING(Service_NIFM, "(STUBBED) called");
92 IPC::ResponseBuilder rb{ctx, 2};
93 rb.Push(RESULT_SUCCESS);
94 }
95
87 Kernel::SharedPtr<Kernel::Event> event1, event2; 96 Kernel::SharedPtr<Kernel::Event> event1, event2;
88}; 97};
89 98
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp
index a9538ff43..0abc0de83 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp
@@ -26,6 +26,10 @@ u32 nvhost_ctrl_gpu::ioctl(Ioctl command, const std::vector<u8>& input, std::vec
26 return ZCullGetInfo(input, output); 26 return ZCullGetInfo(input, output);
27 case IoctlCommand::IocZbcSetTable: 27 case IoctlCommand::IocZbcSetTable:
28 return ZBCSetTable(input, output); 28 return ZBCSetTable(input, output);
29 case IoctlCommand::IocZbcQueryTable:
30 return ZBCQueryTable(input, output);
31 case IoctlCommand::IocFlushL2:
32 return FlushL2(input, output);
29 } 33 }
30 UNIMPLEMENTED_MSG("Unimplemented ioctl"); 34 UNIMPLEMENTED_MSG("Unimplemented ioctl");
31 return 0; 35 return 0;
@@ -136,4 +140,22 @@ u32 nvhost_ctrl_gpu::ZBCSetTable(const std::vector<u8>& input, std::vector<u8>&
136 return 0; 140 return 0;
137} 141}
138 142
143u32 nvhost_ctrl_gpu::ZBCQueryTable(const std::vector<u8>& input, std::vector<u8>& output) {
144 NGLOG_WARNING(Service_NVDRV, "(STUBBED) called");
145 IoctlZbcQueryTable params{};
146 std::memcpy(&params, input.data(), input.size());
147 // TODO : To implement properly
148 std::memcpy(output.data(), &params, output.size());
149 return 0;
150}
151
152u32 nvhost_ctrl_gpu::FlushL2(const std::vector<u8>& input, std::vector<u8>& output) {
153 NGLOG_WARNING(Service_NVDRV, "(STUBBED) called");
154 IoctlFlushL2 params{};
155 std::memcpy(&params, input.data(), input.size());
156 // TODO : To implement properly
157 std::memcpy(output.data(), &params, output.size());
158 return 0;
159}
160
139} // namespace Service::Nvidia::Devices 161} // namespace Service::Nvidia::Devices
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h
index 1d5ba2e67..f09113e67 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h
@@ -26,6 +26,18 @@ private:
26 IocZcullGetCtxSizeCommand = 0x80044701, 26 IocZcullGetCtxSizeCommand = 0x80044701,
27 IocZcullGetInfo = 0x80284702, 27 IocZcullGetInfo = 0x80284702,
28 IocZbcSetTable = 0x402C4703, 28 IocZbcSetTable = 0x402C4703,
29 IocZbcQueryTable = 0xC0344704,
30 IocFlushL2 = 0x40084707,
31 IocInvalICache = 0x4008470D,
32 IocSetMmudebugMode = 0x4008470E,
33 IocSetSmDebugMode = 0x4010470F,
34 IocWaitForPause = 0xC0084710,
35 IocGetTcpExceptionEnStatus = 0x80084711,
36 IocNumVsms = 0x80084712,
37 IocVsmsMapping = 0xC0044713,
38 IocGetErrorChannelUserData = 0xC008471B,
39 IocGetGpuTime = 0xC010471C,
40 IocGetCpuTimeCorrelationInfo = 0xC108471D,
29 }; 41 };
30 42
31 struct IoctlGpuCharacteristics { 43 struct IoctlGpuCharacteristics {
@@ -127,12 +139,31 @@ private:
127 }; 139 };
128 static_assert(sizeof(IoctlZbcSetTable) == 44, "IoctlZbcSetTable is incorrect size"); 140 static_assert(sizeof(IoctlZbcSetTable) == 44, "IoctlZbcSetTable is incorrect size");
129 141
142 struct IoctlZbcQueryTable {
143 u32_le color_ds[4];
144 u32_le color_l2[4];
145 u32_le depth;
146 u32_le ref_cnt;
147 u32_le format;
148 u32_le type;
149 u32_le index_size;
150 };
151 static_assert(sizeof(IoctlZbcQueryTable) == 52, "IoctlZbcQueryTable is incorrect size");
152
153 struct IoctlFlushL2 {
154 u32_le flush; // l2_flush | l2_invalidate << 1 | fb_flush << 2
155 u32_le reserved;
156 };
157 static_assert(sizeof(IoctlFlushL2) == 8, "IoctlFlushL2 is incorrect size");
158
130 u32 GetCharacteristics(const std::vector<u8>& input, std::vector<u8>& output); 159 u32 GetCharacteristics(const std::vector<u8>& input, std::vector<u8>& output);
131 u32 GetTPCMasks(const std::vector<u8>& input, std::vector<u8>& output); 160 u32 GetTPCMasks(const std::vector<u8>& input, std::vector<u8>& output);
132 u32 GetActiveSlotMask(const std::vector<u8>& input, std::vector<u8>& output); 161 u32 GetActiveSlotMask(const std::vector<u8>& input, std::vector<u8>& output);
133 u32 ZCullGetCtxSize(const std::vector<u8>& input, std::vector<u8>& output); 162 u32 ZCullGetCtxSize(const std::vector<u8>& input, std::vector<u8>& output);
134 u32 ZCullGetInfo(const std::vector<u8>& input, std::vector<u8>& output); 163 u32 ZCullGetInfo(const std::vector<u8>& input, std::vector<u8>& output);
135 u32 ZBCSetTable(const std::vector<u8>& input, std::vector<u8>& output); 164 u32 ZBCSetTable(const std::vector<u8>& input, std::vector<u8>& output);
165 u32 ZBCQueryTable(const std::vector<u8>& input, std::vector<u8>& output);
166 u32 FlushL2(const std::vector<u8>& input, std::vector<u8>& output);
136}; 167};
137 168
138} // namespace Service::Nvidia::Devices 169} // namespace Service::Nvidia::Devices
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index ef12d9300..86e9dc998 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -354,6 +354,40 @@ std::vector<Texture::FullTextureInfo> Maxwell3D::GetStageTextures(Regs::ShaderSt
354 return textures; 354 return textures;
355} 355}
356 356
357Texture::FullTextureInfo Maxwell3D::GetStageTexture(Regs::ShaderStage stage, size_t offset) const {
358 auto& shader = state.shader_stages[static_cast<size_t>(stage)];
359 auto& tex_info_buffer = shader.const_buffers[regs.tex_cb_index];
360 ASSERT(tex_info_buffer.enabled && tex_info_buffer.address != 0);
361
362 GPUVAddr tex_info_address = tex_info_buffer.address + offset * sizeof(Texture::TextureHandle);
363
364 ASSERT(tex_info_address < tex_info_buffer.address + tex_info_buffer.size);
365
366 boost::optional<VAddr> tex_address_cpu = memory_manager.GpuToCpuAddress(tex_info_address);
367 Texture::TextureHandle tex_handle{Memory::Read32(*tex_address_cpu)};
368
369 Texture::FullTextureInfo tex_info{};
370 tex_info.index = static_cast<u32>(offset);
371
372 // Load the TIC data.
373 if (tex_handle.tic_id != 0) {
374 tex_info.enabled = true;
375
376 auto tic_entry = GetTICEntry(tex_handle.tic_id);
377 // TODO(Subv): Workaround for BitField's move constructor being deleted.
378 std::memcpy(&tex_info.tic, &tic_entry, sizeof(tic_entry));
379 }
380
381 // Load the TSC data
382 if (tex_handle.tsc_id != 0) {
383 auto tsc_entry = GetTSCEntry(tex_handle.tsc_id);
384 // TODO(Subv): Workaround for BitField's move constructor being deleted.
385 std::memcpy(&tex_info.tsc, &tsc_entry, sizeof(tsc_entry));
386 }
387
388 return tex_info;
389}
390
357u32 Maxwell3D::GetRegisterValue(u32 method) const { 391u32 Maxwell3D::GetRegisterValue(u32 method) const {
358 ASSERT_MSG(method < Regs::NUM_REGS, "Invalid Maxwell3D register"); 392 ASSERT_MSG(method < Regs::NUM_REGS, "Invalid Maxwell3D register");
359 return regs.reg_array[method]; 393 return regs.reg_array[method];
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 245410c95..56b837372 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -664,6 +664,9 @@ public:
664 /// Returns a list of enabled textures for the specified shader stage. 664 /// Returns a list of enabled textures for the specified shader stage.
665 std::vector<Texture::FullTextureInfo> GetStageTextures(Regs::ShaderStage stage) const; 665 std::vector<Texture::FullTextureInfo> GetStageTextures(Regs::ShaderStage stage) const;
666 666
667 /// Returns the texture information for a specific texture in a specific shader stage.
668 Texture::FullTextureInfo GetStageTexture(Regs::ShaderStage stage, size_t offset) const;
669
667 /// Returns whether the specified shader stage is enabled or not. 670 /// Returns whether the specified shader stage is enabled or not.
668 bool IsShaderStageEnabled(Regs::ShaderStage stage) const; 671 bool IsShaderStageEnabled(Regs::ShaderStage stage) const;
669 672
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index 8d4ea3401..32800392b 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -109,11 +109,6 @@ union Sampler {
109 u64 value{}; 109 u64 value{};
110}; 110};
111 111
112union Uniform {
113 BitField<20, 14, u64> offset;
114 BitField<34, 5, u64> index;
115};
116
117} // namespace Shader 112} // namespace Shader
118} // namespace Tegra 113} // namespace Tegra
119 114
@@ -173,6 +168,31 @@ enum class SubOp : u64 {
173 Min = 0x8, 168 Min = 0x8,
174}; 169};
175 170
171enum class F2iRoundingOp : u64 {
172 None = 0,
173 Floor = 1,
174 Ceil = 2,
175 Trunc = 3,
176};
177
178enum class F2fRoundingOp : u64 {
179 None = 0,
180 Pass = 3,
181 Round = 8,
182 Floor = 9,
183 Ceil = 10,
184 Trunc = 11,
185};
186
187enum class UniformType : u64 {
188 UnsignedByte = 0,
189 SignedByte = 1,
190 UnsignedShort = 2,
191 SignedShort = 3,
192 Single = 4,
193 Double = 5,
194};
195
176union Instruction { 196union Instruction {
177 Instruction& operator=(const Instruction& instr) { 197 Instruction& operator=(const Instruction& instr) {
178 value = instr.value; 198 value = instr.value;
@@ -230,23 +250,31 @@ union Instruction {
230 std::memcpy(&result, &imm, sizeof(imm)); 250 std::memcpy(&result, &imm, sizeof(imm));
231 return result; 251 return result;
232 } 252 }
253
254 s32 GetSignedImm20_20() const {
255 u32 immediate = static_cast<u32>(imm20_19 | (negate_imm << 19));
256 // Sign extend the 20-bit value.
257 u32 mask = 1U << (20 - 1);
258 return static_cast<s32>((immediate ^ mask) - mask);
259 }
233 } alu; 260 } alu;
234 261
235 union { 262 union {
236 BitField<39, 5, u64> shift_amount; 263 BitField<39, 5, u64> shift_amount;
237 BitField<20, 19, u64> immediate_low;
238 BitField<56, 1, u64> immediate_high;
239 BitField<48, 1, u64> negate_b; 264 BitField<48, 1, u64> negate_b;
240 BitField<49, 1, u64> negate_a; 265 BitField<49, 1, u64> negate_a;
266 } iscadd;
241 267
242 s32 GetImmediate() const { 268 union {
243 u32 immediate = static_cast<u32>(immediate_low | (immediate_high << 19)); 269 BitField<20, 8, u64> shift_position;
244 // Sign extend the 20-bit value. 270 BitField<28, 8, u64> shift_length;
245 u32 mask = 1U << (20 - 1); 271 BitField<48, 1, u64> negate_b;
246 return static_cast<s32>((immediate ^ mask) - mask); 272 BitField<49, 1, u64> negate_a;
247 }
248 273
249 } iscadd; 274 u64 GetLeftShiftValue() const {
275 return 32 - (shift_position + shift_length);
276 }
277 } bfe;
250 278
251 union { 279 union {
252 BitField<48, 1, u64> negate_b; 280 BitField<48, 1, u64> negate_b;
@@ -254,6 +282,11 @@ union Instruction {
254 } ffma; 282 } ffma;
255 283
256 union { 284 union {
285 BitField<48, 3, UniformType> type;
286 BitField<44, 2, u64> unknown;
287 } ld_c;
288
289 union {
257 BitField<0, 3, u64> pred0; 290 BitField<0, 3, u64> pred0;
258 BitField<3, 3, u64> pred3; 291 BitField<3, 3, u64> pred3;
259 BitField<7, 1, u64> abs_a; 292 BitField<7, 1, u64> abs_a;
@@ -293,11 +326,20 @@ union Instruction {
293 326
294 union { 327 union {
295 BitField<10, 2, Register::Size> size; 328 BitField<10, 2, Register::Size> size;
296 BitField<13, 1, u64> is_signed; 329 BitField<12, 1, u64> is_output_signed;
330 BitField<13, 1, u64> is_input_signed;
297 BitField<41, 2, u64> selector; 331 BitField<41, 2, u64> selector;
298 BitField<45, 1, u64> negate_a; 332 BitField<45, 1, u64> negate_a;
299 BitField<49, 1, u64> abs_a; 333 BitField<49, 1, u64> abs_a;
300 BitField<50, 1, u64> saturate_a; 334 BitField<50, 1, u64> saturate_a;
335
336 union {
337 BitField<39, 2, F2iRoundingOp> rounding;
338 } f2i;
339
340 union {
341 BitField<39, 4, F2fRoundingOp> rounding;
342 } f2f;
301 } conversion; 343 } conversion;
302 344
303 union { 345 union {
@@ -328,24 +370,34 @@ union Instruction {
328 } texs; 370 } texs;
329 371
330 union { 372 union {
331 BitField<20, 5, u64> target; 373 BitField<20, 24, u64> target;
332 BitField<5, 1, u64> constant_buffer; 374 BitField<5, 1, u64> constant_buffer;
333 375
334 s32 GetBranchTarget() const { 376 s32 GetBranchTarget() const {
335 // Sign extend the branch target offset 377 // Sign extend the branch target offset
336 u32 mask = 1U << (5 - 1); 378 u32 mask = 1U << (24 - 1);
337 u32 value = static_cast<u32>(target); 379 u32 value = static_cast<u32>(target);
338 // The branch offset is relative to the next instruction, so add 1 to it. 380 // The branch offset is relative to the next instruction and is stored in bytes, so
339 return static_cast<s32>((value ^ mask) - mask) + 1; 381 // divide it by the size of an instruction and add 1 to it.
382 return static_cast<s32>((value ^ mask) - mask) / sizeof(Instruction) + 1;
340 } 383 }
341 } bra; 384 } bra;
342 385
386 union {
387 BitField<20, 14, u64> offset;
388 BitField<34, 5, u64> index;
389 } cbuf34;
390
391 union {
392 BitField<20, 16, s64> offset;
393 BitField<36, 5, u64> index;
394 } cbuf36;
395
343 BitField<61, 1, u64> is_b_imm; 396 BitField<61, 1, u64> is_b_imm;
344 BitField<60, 1, u64> is_b_gpr; 397 BitField<60, 1, u64> is_b_gpr;
345 BitField<59, 1, u64> is_c_gpr; 398 BitField<59, 1, u64> is_c_gpr;
346 399
347 Attribute attribute; 400 Attribute attribute;
348 Uniform uniform;
349 Sampler sampler; 401 Sampler sampler;
350 402
351 u64 value; 403 u64 value;
@@ -358,8 +410,12 @@ class OpCode {
358public: 410public:
359 enum class Id { 411 enum class Id {
360 KIL, 412 KIL,
413 BFE_C,
414 BFE_R,
415 BFE_IMM,
361 BRA, 416 BRA,
362 LD_A, 417 LD_A,
418 LD_C,
363 ST_A, 419 ST_A,
364 TEX, 420 TEX,
365 TEXQ, // Texture Query 421 TEXQ, // Texture Query
@@ -402,12 +458,18 @@ public:
402 MOV_R, 458 MOV_R,
403 MOV_IMM, 459 MOV_IMM,
404 MOV32_IMM, 460 MOV32_IMM,
461 SHL_C,
462 SHL_R,
463 SHL_IMM,
405 SHR_C, 464 SHR_C,
406 SHR_R, 465 SHR_R,
407 SHR_IMM, 466 SHR_IMM,
408 FMNMX_C, 467 FMNMX_C,
409 FMNMX_R, 468 FMNMX_R,
410 FMNMX_IMM, 469 FMNMX_IMM,
470 IMNMX_C,
471 IMNMX_R,
472 IMNMX_IMM,
411 FSETP_C, // Set Predicate 473 FSETP_C, // Set Predicate
412 FSETP_R, 474 FSETP_R,
413 FSETP_IMM, 475 FSETP_IMM,
@@ -418,12 +480,18 @@ public:
418 ISETP_IMM, 480 ISETP_IMM,
419 ISETP_R, 481 ISETP_R,
420 PSETP, 482 PSETP,
483 XMAD_IMM,
484 XMAD_CR,
485 XMAD_RC,
486 XMAD_RR,
421 }; 487 };
422 488
423 enum class Type { 489 enum class Type {
424 Trivial, 490 Trivial,
425 Arithmetic, 491 Arithmetic,
492 Bfe,
426 Logic, 493 Logic,
494 Shift,
427 ScaledAdd, 495 ScaledAdd,
428 Ffma, 496 Ffma,
429 Flow, 497 Flow,
@@ -530,6 +598,7 @@ private:
530 INST("111000110011----", Id::KIL, Type::Flow, "KIL"), 598 INST("111000110011----", Id::KIL, Type::Flow, "KIL"),
531 INST("111000100100----", Id::BRA, Type::Flow, "BRA"), 599 INST("111000100100----", Id::BRA, Type::Flow, "BRA"),
532 INST("1110111111011---", Id::LD_A, Type::Memory, "LD_A"), 600 INST("1110111111011---", Id::LD_A, Type::Memory, "LD_A"),
601 INST("1110111110010---", Id::LD_C, Type::Memory, "LD_C"),
533 INST("1110111111110---", Id::ST_A, Type::Memory, "ST_A"), 602 INST("1110111111110---", Id::ST_A, Type::Memory, "ST_A"),
534 INST("1100000000111---", Id::TEX, Type::Memory, "TEX"), 603 INST("1100000000111---", Id::TEX, Type::Memory, "TEX"),
535 INST("1101111101001---", Id::TEXQ, Type::Memory, "TEXQ"), 604 INST("1101111101001---", Id::TEXQ, Type::Memory, "TEXQ"),
@@ -558,20 +627,29 @@ private:
558 INST("0100110010101---", Id::F2F_C, Type::Conversion, "F2F_C"), 627 INST("0100110010101---", Id::F2F_C, Type::Conversion, "F2F_C"),
559 INST("0101110010101---", Id::F2F_R, Type::Conversion, "F2F_R"), 628 INST("0101110010101---", Id::F2F_R, Type::Conversion, "F2F_R"),
560 INST("0011100-10101---", Id::F2F_IMM, Type::Conversion, "F2F_IMM"), 629 INST("0011100-10101---", Id::F2F_IMM, Type::Conversion, "F2F_IMM"),
561 INST("0100110010110---", Id::F2I_C, Type::Arithmetic, "F2I_C"), 630 INST("0100110010110---", Id::F2I_C, Type::Conversion, "F2I_C"),
562 INST("0101110010110---", Id::F2I_R, Type::Arithmetic, "F2I_R"), 631 INST("0101110010110---", Id::F2I_R, Type::Conversion, "F2I_R"),
563 INST("0011100-10110---", Id::F2I_IMM, Type::Arithmetic, "F2I_IMM"), 632 INST("0011100-10110---", Id::F2I_IMM, Type::Conversion, "F2I_IMM"),
564 INST("0100110010011---", Id::MOV_C, Type::Arithmetic, "MOV_C"), 633 INST("0100110010011---", Id::MOV_C, Type::Arithmetic, "MOV_C"),
565 INST("0101110010011---", Id::MOV_R, Type::Arithmetic, "MOV_R"), 634 INST("0101110010011---", Id::MOV_R, Type::Arithmetic, "MOV_R"),
566 INST("0011100-10011---", Id::MOV_IMM, Type::Arithmetic, "MOV_IMM"), 635 INST("0011100-10011---", Id::MOV_IMM, Type::Arithmetic, "MOV_IMM"),
567 INST("000000010000----", Id::MOV32_IMM, Type::Arithmetic, "MOV32_IMM"), 636 INST("000000010000----", Id::MOV32_IMM, Type::Arithmetic, "MOV32_IMM"),
568 INST("0100110000101---", Id::SHR_C, Type::Arithmetic, "SHR_C"),
569 INST("0101110000101---", Id::SHR_R, Type::Arithmetic, "SHR_R"),
570 INST("0011100-00101---", Id::SHR_IMM, Type::Arithmetic, "SHR_IMM"),
571 INST("0100110001100---", Id::FMNMX_C, Type::Arithmetic, "FMNMX_C"), 637 INST("0100110001100---", Id::FMNMX_C, Type::Arithmetic, "FMNMX_C"),
572 INST("0101110001100---", Id::FMNMX_R, Type::Arithmetic, "FMNMX_R"), 638 INST("0101110001100---", Id::FMNMX_R, Type::Arithmetic, "FMNMX_R"),
573 INST("0011100-01100---", Id::FMNMX_IMM, Type::Arithmetic, "FMNMX_IMM"), 639 INST("0011100-01100---", Id::FMNMX_IMM, Type::Arithmetic, "FMNMX_IMM"),
640 INST("0100110000100---", Id::IMNMX_C, Type::Arithmetic, "FMNMX_IMM"),
641 INST("0101110000100---", Id::IMNMX_R, Type::Arithmetic, "FMNMX_IMM"),
642 INST("0011100-00100---", Id::IMNMX_IMM, Type::Arithmetic, "FMNMX_IMM"),
643 INST("0100110000000---", Id::BFE_C, Type::Bfe, "BFE_C"),
644 INST("0101110000000---", Id::BFE_R, Type::Bfe, "BFE_R"),
645 INST("0011100-00000---", Id::BFE_IMM, Type::Bfe, "BFE_IMM"),
574 INST("000001----------", Id::LOP32I, Type::Logic, "LOP32I"), 646 INST("000001----------", Id::LOP32I, Type::Logic, "LOP32I"),
647 INST("0100110001001---", Id::SHL_C, Type::Shift, "SHL_C"),
648 INST("0101110001001---", Id::SHL_R, Type::Shift, "SHL_R"),
649 INST("0011100-01001---", Id::SHL_IMM, Type::Shift, "SHL_IMM"),
650 INST("0100110000101---", Id::SHR_C, Type::Shift, "SHR_C"),
651 INST("0101110000101---", Id::SHR_R, Type::Shift, "SHR_R"),
652 INST("0011100-00101---", Id::SHR_IMM, Type::Shift, "SHR_IMM"),
575 INST("0100110011100---", Id::I2I_C, Type::Conversion, "I2I_C"), 653 INST("0100110011100---", Id::I2I_C, Type::Conversion, "I2I_C"),
576 INST("0101110011100---", Id::I2I_R, Type::Conversion, "I2I_R"), 654 INST("0101110011100---", Id::I2I_R, Type::Conversion, "I2I_R"),
577 INST("01110001-1000---", Id::I2I_IMM, Type::Conversion, "I2I_IMM"), 655 INST("01110001-1000---", Id::I2I_IMM, Type::Conversion, "I2I_IMM"),
@@ -588,6 +666,10 @@ private:
588 INST("010110110110----", Id::ISETP_R, Type::IntegerSetPredicate, "ISETP_R"), 666 INST("010110110110----", Id::ISETP_R, Type::IntegerSetPredicate, "ISETP_R"),
589 INST("0011011-0110----", Id::ISETP_IMM, Type::IntegerSetPredicate, "ISETP_IMM"), 667 INST("0011011-0110----", Id::ISETP_IMM, Type::IntegerSetPredicate, "ISETP_IMM"),
590 INST("0101000010010---", Id::PSETP, Type::PredicateSetPredicate, "PSETP"), 668 INST("0101000010010---", Id::PSETP, Type::PredicateSetPredicate, "PSETP"),
669 INST("0011011-00------", Id::XMAD_IMM, Type::Arithmetic, "XMAD_IMM"),
670 INST("0100111---------", Id::XMAD_CR, Type::Arithmetic, "XMAD_CR"),
671 INST("010100010-------", Id::XMAD_RC, Type::Arithmetic, "XMAD_RC"),
672 INST("0101101100------", Id::XMAD_RR, Type::Arithmetic, "XMAD_RR"),
591 }; 673 };
592#undef INST 674#undef INST
593 std::stable_sort(table.begin(), table.end(), [](const auto& a, const auto& b) { 675 std::stable_sort(table.begin(), table.end(), [](const auto& a, const auto& b) {
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 756518ee7..66351fe6e 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -26,6 +26,10 @@ u32 RenderTargetBytesPerPixel(RenderTargetFormat format) {
26 ASSERT(format != RenderTargetFormat::NONE); 26 ASSERT(format != RenderTargetFormat::NONE);
27 27
28 switch (format) { 28 switch (format) {
29 case RenderTargetFormat::RGBA32_FLOAT:
30 return 16;
31 case RenderTargetFormat::RGBA16_FLOAT:
32 return 8;
29 case RenderTargetFormat::RGBA8_UNORM: 33 case RenderTargetFormat::RGBA8_UNORM:
30 case RenderTargetFormat::RGB10_A2_UNORM: 34 case RenderTargetFormat::RGB10_A2_UNORM:
31 return 4; 35 return 4;
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index f168a5171..5852b9619 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -15,10 +15,12 @@ namespace Tegra {
15 15
16enum class RenderTargetFormat : u32 { 16enum class RenderTargetFormat : u32 {
17 NONE = 0x0, 17 NONE = 0x0,
18 RGBA32_FLOAT = 0xC0,
18 RGBA16_FLOAT = 0xCA, 19 RGBA16_FLOAT = 0xCA,
19 RGB10_A2_UNORM = 0xD1, 20 RGB10_A2_UNORM = 0xD1,
20 RGBA8_UNORM = 0xD5, 21 RGBA8_UNORM = 0xD5,
21 RGBA8_SRGB = 0xD6, 22 RGBA8_SRGB = 0xD6,
23 R11G11B10_FLOAT = 0xE0,
22}; 24};
23 25
24/// Returns the number of bytes per pixel of each rendertarget format. 26/// Returns the number of bytes per pixel of each rendertarget format.
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 0a33868b7..2e90ebcf4 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -196,8 +196,10 @@ void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset) {
196 auto& gpu = Core::System().GetInstance().GPU().Maxwell3D(); 196 auto& gpu = Core::System().GetInstance().GPU().Maxwell3D();
197 ASSERT_MSG(!gpu.regs.shader_config[0].enable, "VertexA is unsupported!"); 197 ASSERT_MSG(!gpu.regs.shader_config[0].enable, "VertexA is unsupported!");
198 198
199 // Next available bindpoint to use when uploading the const buffers to the GLSL shaders. 199 // Next available bindpoints to use when uploading the const buffers and textures to the GLSL
200 // shaders.
200 u32 current_constbuffer_bindpoint = 0; 201 u32 current_constbuffer_bindpoint = 0;
202 u32 current_texture_bindpoint = 0;
201 203
202 for (unsigned index = 1; index < Maxwell::MaxShaderProgram; ++index) { 204 for (unsigned index = 1; index < Maxwell::MaxShaderProgram; ++index) {
203 auto& shader_config = gpu.regs.shader_config[index]; 205 auto& shader_config = gpu.regs.shader_config[index];
@@ -258,6 +260,11 @@ void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset) {
258 current_constbuffer_bindpoint = 260 current_constbuffer_bindpoint =
259 SetupConstBuffers(static_cast<Maxwell::ShaderStage>(stage), gl_stage_program, 261 SetupConstBuffers(static_cast<Maxwell::ShaderStage>(stage), gl_stage_program,
260 current_constbuffer_bindpoint, shader_resources.const_buffer_entries); 262 current_constbuffer_bindpoint, shader_resources.const_buffer_entries);
263
264 // Configure the textures for this shader stage.
265 current_texture_bindpoint =
266 SetupTextures(static_cast<Maxwell::ShaderStage>(stage), gl_stage_program,
267 current_texture_bindpoint, shader_resources.texture_samplers);
261 } 268 }
262 269
263 shader_program_manager->UseTrivialGeometryShader(); 270 shader_program_manager->UseTrivialGeometryShader();
@@ -341,9 +348,6 @@ void RasterizerOpenGL::DrawArrays() {
341 // TODO(bunnei): Sync framebuffer_scale uniform here 348 // TODO(bunnei): Sync framebuffer_scale uniform here
342 // TODO(bunnei): Sync scissorbox uniform(s) here 349 // TODO(bunnei): Sync scissorbox uniform(s) here
343 350
344 // Sync and bind the texture surfaces
345 BindTextures();
346
347 // Viewport can have negative offsets or larger dimensions than our framebuffer sub-rect. Enable 351 // Viewport can have negative offsets or larger dimensions than our framebuffer sub-rect. Enable
348 // scissor test to prevent drawing outside of the framebuffer region 352 // scissor test to prevent drawing outside of the framebuffer region
349 state.scissor.enabled = true; 353 state.scissor.enabled = true;
@@ -447,39 +451,6 @@ void RasterizerOpenGL::DrawArrays() {
447 } 451 }
448} 452}
449 453
450void RasterizerOpenGL::BindTextures() {
451 using Regs = Tegra::Engines::Maxwell3D::Regs;
452 auto& maxwell3d = Core::System::GetInstance().GPU().Get3DEngine();
453
454 // Each Maxwell shader stage can have an arbitrary number of textures, but we're limited to a
455 // certain number in OpenGL. We try to only use the minimum amount of host textures by not
456 // keeping a 1:1 relation between guest texture ids and host texture ids, ie, guest texture id 8
457 // can be host texture id 0 if it's the only texture used in the guest shader program.
458 u32 host_texture_index = 0;
459 for (u32 stage = 0; stage < Regs::MaxShaderStage; ++stage) {
460 ASSERT(host_texture_index < texture_samplers.size());
461 const auto textures = maxwell3d.GetStageTextures(static_cast<Regs::ShaderStage>(stage));
462 for (unsigned texture_index = 0; texture_index < textures.size(); ++texture_index) {
463 const auto& texture = textures[texture_index];
464
465 if (texture.enabled) {
466 texture_samplers[host_texture_index].SyncWithConfig(texture.tsc);
467 Surface surface = res_cache.GetTextureSurface(texture);
468 if (surface != nullptr) {
469 state.texture_units[host_texture_index].texture_2d = surface->texture.handle;
470 } else {
471 // Can occur when texture addr is null or its memory is unmapped/invalid
472 state.texture_units[texture_index].texture_2d = 0;
473 }
474
475 ++host_texture_index;
476 } else {
477 state.texture_units[texture_index].texture_2d = 0;
478 }
479 }
480 }
481}
482
483void RasterizerOpenGL::NotifyMaxwellRegisterChanged(u32 method) { 454void RasterizerOpenGL::NotifyMaxwellRegisterChanged(u32 method) {
484 const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs; 455 const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs;
485 switch (method) { 456 switch (method) {
@@ -654,7 +625,16 @@ u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, GLuint progr
654 buffer_draw_state.bindpoint = current_bindpoint + bindpoint; 625 buffer_draw_state.bindpoint = current_bindpoint + bindpoint;
655 626
656 boost::optional<VAddr> addr = gpu.memory_manager->GpuToCpuAddress(buffer.address); 627 boost::optional<VAddr> addr = gpu.memory_manager->GpuToCpuAddress(buffer.address);
657 std::vector<u8> data(used_buffer.GetSize() * sizeof(float)); 628
629 std::vector<u8> data;
630 if (used_buffer.IsIndirect()) {
631 // Buffer is accessed indirectly, so upload the entire thing
632 data.resize(buffer.size * sizeof(float));
633 } else {
634 // Buffer is accessed directly, upload just what we use
635 data.resize(used_buffer.GetSize() * sizeof(float));
636 }
637
658 Memory::ReadBlock(*addr, data.data(), data.size()); 638 Memory::ReadBlock(*addr, data.data(), data.size());
659 639
660 glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffer_draw_state.ssbo); 640 glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffer_draw_state.ssbo);
@@ -674,6 +654,52 @@ u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, GLuint progr
674 return current_bindpoint + entries.size(); 654 return current_bindpoint + entries.size();
675} 655}
676 656
657u32 RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, GLuint program, u32 current_unit,
658 const std::vector<GLShader::SamplerEntry>& entries) {
659 auto& gpu = Core::System::GetInstance().GPU();
660 auto& maxwell3d = gpu.Get3DEngine();
661
662 ASSERT_MSG(maxwell3d.IsShaderStageEnabled(stage),
663 "Attempted to upload textures of disabled shader stage");
664
665 ASSERT_MSG(current_unit + entries.size() <= std::size(state.texture_units),
666 "Exceeded the number of active textures.");
667
668 for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) {
669 const auto& entry = entries[bindpoint];
670 u32 current_bindpoint = current_unit + bindpoint;
671
672 // Bind the uniform to the sampler.
673 GLint uniform = glGetUniformLocation(program, entry.GetName().c_str());
674 ASSERT(uniform != -1);
675 glProgramUniform1i(program, uniform, current_bindpoint);
676
677 const auto texture = maxwell3d.GetStageTexture(entry.GetStage(), entry.GetOffset());
678 ASSERT(texture.enabled);
679
680 texture_samplers[current_bindpoint].SyncWithConfig(texture.tsc);
681 Surface surface = res_cache.GetTextureSurface(texture);
682 if (surface != nullptr) {
683 state.texture_units[current_bindpoint].texture_2d = surface->texture.handle;
684 state.texture_units[current_bindpoint].swizzle.r =
685 MaxwellToGL::SwizzleSource(texture.tic.x_source);
686 state.texture_units[current_bindpoint].swizzle.g =
687 MaxwellToGL::SwizzleSource(texture.tic.y_source);
688 state.texture_units[current_bindpoint].swizzle.b =
689 MaxwellToGL::SwizzleSource(texture.tic.z_source);
690 state.texture_units[current_bindpoint].swizzle.a =
691 MaxwellToGL::SwizzleSource(texture.tic.w_source);
692 } else {
693 // Can occur when texture addr is null or its memory is unmapped/invalid
694 state.texture_units[current_bindpoint].texture_2d = 0;
695 }
696 }
697
698 state.Apply();
699
700 return current_unit + entries.size();
701}
702
677void RasterizerOpenGL::BindFramebufferSurfaces(const Surface& color_surface, 703void RasterizerOpenGL::BindFramebufferSurfaces(const Surface& color_surface,
678 const Surface& depth_surface, bool has_stencil) { 704 const Surface& depth_surface, bool has_stencil) {
679 state.draw.draw_framebuffer = framebuffer.handle; 705 state.draw.draw_framebuffer = framebuffer.handle;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 4b915c76a..d3f0558ed 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -80,9 +80,6 @@ private:
80 void BindFramebufferSurfaces(const Surface& color_surface, const Surface& depth_surface, 80 void BindFramebufferSurfaces(const Surface& color_surface, const Surface& depth_surface,
81 bool has_stencil); 81 bool has_stencil);
82 82
83 /// Binds the required textures to OpenGL before drawing a batch.
84 void BindTextures();
85
86 /* 83 /*
87 * Configures the current constbuffers to use for the draw command. 84 * Configures the current constbuffers to use for the draw command.
88 * @param stage The shader stage to configure buffers for. 85 * @param stage The shader stage to configure buffers for.
@@ -95,6 +92,17 @@ private:
95 u32 current_bindpoint, 92 u32 current_bindpoint,
96 const std::vector<GLShader::ConstBufferEntry>& entries); 93 const std::vector<GLShader::ConstBufferEntry>& entries);
97 94
95 /*
96 * Configures the current textures to use for the draw command.
97 * @param stage The shader stage to configure textures for.
98 * @param program The OpenGL program object that contains the specified stage.
99 * @param current_unit The offset at which to start counting unused texture units.
100 * @param entries Vector describing the textures that are actually used in the guest shader.
101 * @returns The next available bindpoint for use in the next shader stage.
102 */
103 u32 SetupTextures(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, GLuint program,
104 u32 current_unit, const std::vector<GLShader::SamplerEntry>& entries);
105
98 /// Syncs the viewport to match the guest state 106 /// Syncs the viewport to match the guest state
99 void SyncViewport(const MathUtil::Rectangle<u32>& surfaces_rect, u16 res_scale); 107 void SyncViewport(const MathUtil::Rectangle<u32>& surfaces_rect, u16 res_scale);
100 108
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
index d6048f639..df2474ea2 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -50,6 +50,7 @@ static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_form
50 {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV, false}, // A1B5G5R5 50 {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV, false}, // A1B5G5R5
51 {GL_R8, GL_RED, GL_UNSIGNED_BYTE, false}, // R8 51 {GL_R8, GL_RED, GL_UNSIGNED_BYTE, false}, // R8
52 {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT, false}, // RGBA16F 52 {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT, false}, // RGBA16F
53 {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV, false}, // R11FG11FB10F
53 {GL_COMPRESSED_RGB_S3TC_DXT1_EXT, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT1 54 {GL_COMPRESSED_RGB_S3TC_DXT1_EXT, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT1
54 {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT23 55 {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT23
55 {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT45 56 {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT45
@@ -60,8 +61,10 @@ static const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType
60 const SurfaceType type = SurfaceParams::GetFormatType(pixel_format); 61 const SurfaceType type = SurfaceParams::GetFormatType(pixel_format);
61 if (type == SurfaceType::ColorTexture) { 62 if (type == SurfaceType::ColorTexture) {
62 ASSERT(static_cast<size_t>(pixel_format) < tex_format_tuples.size()); 63 ASSERT(static_cast<size_t>(pixel_format) < tex_format_tuples.size());
63 // For now only UNORM components are supported, or RGBA16F which is type FLOAT 64 // For now only UNORM components are supported, or either R11FG11FB10F or RGBA16F which are
64 ASSERT(component_type == ComponentType::UNorm || pixel_format == PixelFormat::RGBA16F); 65 // type FLOAT
66 ASSERT(component_type == ComponentType::UNorm || pixel_format == PixelFormat::RGBA16F ||
67 pixel_format == PixelFormat::R11FG11FB10F);
65 return tex_format_tuples[static_cast<unsigned int>(pixel_format)]; 68 return tex_format_tuples[static_cast<unsigned int>(pixel_format)];
66 } else if (type == SurfaceType::Depth || type == SurfaceType::DepthStencil) { 69 } else if (type == SurfaceType::Depth || type == SurfaceType::DepthStencil) {
67 // TODO(Subv): Implement depth formats 70 // TODO(Subv): Implement depth formats
@@ -110,11 +113,12 @@ static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr, Tegra:
110 Tegra::GPUVAddr), 113 Tegra::GPUVAddr),
111 SurfaceParams::MaxPixelFormat> 114 SurfaceParams::MaxPixelFormat>
112 morton_to_gl_fns = { 115 morton_to_gl_fns = {
113 MortonCopy<true, PixelFormat::ABGR8>, MortonCopy<true, PixelFormat::B5G6R5>, 116 MortonCopy<true, PixelFormat::ABGR8>, MortonCopy<true, PixelFormat::B5G6R5>,
114 MortonCopy<true, PixelFormat::A2B10G10R10>, MortonCopy<true, PixelFormat::A1B5G5R5>, 117 MortonCopy<true, PixelFormat::A2B10G10R10>, MortonCopy<true, PixelFormat::A1B5G5R5>,
115 MortonCopy<true, PixelFormat::R8>, MortonCopy<true, PixelFormat::RGBA16F>, 118 MortonCopy<true, PixelFormat::R8>, MortonCopy<true, PixelFormat::RGBA16F>,
116 MortonCopy<true, PixelFormat::DXT1>, MortonCopy<true, PixelFormat::DXT23>, 119 MortonCopy<true, PixelFormat::R11FG11FB10F>, MortonCopy<true, PixelFormat::DXT1>,
117 MortonCopy<true, PixelFormat::DXT45>, MortonCopy<true, PixelFormat::DXN1>, 120 MortonCopy<true, PixelFormat::DXT23>, MortonCopy<true, PixelFormat::DXT45>,
121 MortonCopy<true, PixelFormat::DXN1>,
118}; 122};
119 123
120static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr, Tegra::GPUVAddr, 124static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr, Tegra::GPUVAddr,
@@ -127,6 +131,7 @@ static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr, Tegra:
127 MortonCopy<false, PixelFormat::A1B5G5R5>, 131 MortonCopy<false, PixelFormat::A1B5G5R5>,
128 MortonCopy<false, PixelFormat::R8>, 132 MortonCopy<false, PixelFormat::R8>,
129 MortonCopy<false, PixelFormat::RGBA16F>, 133 MortonCopy<false, PixelFormat::RGBA16F>,
134 MortonCopy<false, PixelFormat::R11FG11FB10F>,
130 // TODO(Subv): Swizzling the DXT1/DXT23/DXT45/DXN1 formats is not yet supported 135 // TODO(Subv): Swizzling the DXT1/DXT23/DXT45/DXN1 formats is not yet supported
131 nullptr, 136 nullptr,
132 nullptr, 137 nullptr,
@@ -164,60 +169,10 @@ static void AllocateSurfaceTexture(GLuint texture, const FormatTuple& format_tup
164static bool BlitTextures(GLuint src_tex, const MathUtil::Rectangle<u32>& src_rect, GLuint dst_tex, 169static bool BlitTextures(GLuint src_tex, const MathUtil::Rectangle<u32>& src_rect, GLuint dst_tex,
165 const MathUtil::Rectangle<u32>& dst_rect, SurfaceType type, 170 const MathUtil::Rectangle<u32>& dst_rect, SurfaceType type,
166 GLuint read_fb_handle, GLuint draw_fb_handle) { 171 GLuint read_fb_handle, GLuint draw_fb_handle) {
167 OpenGLState state = OpenGLState::GetCurState();
168
169 OpenGLState prev_state = state;
170 SCOPE_EXIT({ prev_state.Apply(); });
171
172 // Make sure textures aren't bound to texture units, since going to bind them to framebuffer
173 // components
174 state.ResetTexture(src_tex);
175 state.ResetTexture(dst_tex);
176
177 state.draw.read_framebuffer = read_fb_handle;
178 state.draw.draw_framebuffer = draw_fb_handle;
179 state.Apply();
180
181 u32 buffers = 0;
182
183 if (type == SurfaceType::ColorTexture) {
184 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, src_tex,
185 0);
186 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0,
187 0);
188
189 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, dst_tex,
190 0);
191 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0,
192 0);
193
194 buffers = GL_COLOR_BUFFER_BIT;
195 } else if (type == SurfaceType::Depth) {
196 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
197 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, src_tex, 0);
198 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
199
200 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
201 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, dst_tex, 0);
202 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
203
204 buffers = GL_DEPTH_BUFFER_BIT;
205 } else if (type == SurfaceType::DepthStencil) {
206 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
207 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
208 src_tex, 0);
209
210 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
211 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
212 dst_tex, 0);
213
214 buffers = GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT;
215 }
216
217 glBlitFramebuffer(src_rect.left, src_rect.bottom, src_rect.right, src_rect.top, dst_rect.left,
218 dst_rect.bottom, dst_rect.right, dst_rect.top, buffers,
219 buffers == GL_COLOR_BUFFER_BIT ? GL_LINEAR : GL_NEAREST);
220 172
173 glCopyImageSubData(src_tex, GL_TEXTURE_2D, 0, src_rect.left, src_rect.bottom, 0, dst_tex,
174 GL_TEXTURE_2D, 0, dst_rect.left, dst_rect.bottom, 0, src_rect.GetWidth(),
175 src_rect.GetHeight(), 0);
221 return true; 176 return true;
222} 177}
223 178
@@ -1097,16 +1052,19 @@ Surface RasterizerCacheOpenGL::GetTextureSurface(const Tegra::Texture::FullTextu
1097 1052
1098 params.UpdateParams(); 1053 params.UpdateParams();
1099 1054
1100 if (config.tic.Width() % 8 != 0 || config.tic.Height() % 8 != 0 || 1055 if (params.GetActualWidth() % 8 != 0 || params.GetActualHeight() % 8 != 0 ||
1101 params.stride != params.width) { 1056 params.stride != params.width) {
1102 Surface src_surface; 1057 Surface src_surface;
1103 MathUtil::Rectangle<u32> rect; 1058 MathUtil::Rectangle<u32> rect;
1104 std::tie(src_surface, rect) = GetSurfaceSubRect(params, ScaleMatch::Ignore, true); 1059 std::tie(src_surface, rect) = GetSurfaceSubRect(params, ScaleMatch::Ignore, true);
1105 1060
1061 rect = rect.Scale(params.GetCompresssionFactor());
1062
1106 params.res_scale = src_surface->res_scale; 1063 params.res_scale = src_surface->res_scale;
1107 Surface tmp_surface = CreateSurface(params); 1064 Surface tmp_surface = CreateSurface(params);
1108 BlitTextures(src_surface->texture.handle, rect, tmp_surface->texture.handle, 1065
1109 tmp_surface->GetScaledRect(), 1066 auto dst_rect = tmp_surface->GetScaledRect().Scale(params.GetCompresssionFactor());
1067 BlitTextures(src_surface->texture.handle, rect, tmp_surface->texture.handle, dst_rect,
1110 SurfaceParams::GetFormatType(params.pixel_format), read_framebuffer.handle, 1068 SurfaceParams::GetFormatType(params.pixel_format), read_framebuffer.handle,
1111 draw_framebuffer.handle); 1069 draw_framebuffer.handle);
1112 1070
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
index 6f08678ab..0f43e863d 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -60,10 +60,11 @@ struct SurfaceParams {
60 A1B5G5R5 = 3, 60 A1B5G5R5 = 3,
61 R8 = 4, 61 R8 = 4,
62 RGBA16F = 5, 62 RGBA16F = 5,
63 DXT1 = 6, 63 R11FG11FB10F = 6,
64 DXT23 = 7, 64 DXT1 = 7,
65 DXT45 = 8, 65 DXT23 = 8,
66 DXN1 = 9, // This is also known as BC4 66 DXT45 = 9,
67 DXN1 = 10, // This is also known as BC4
67 68
68 Max, 69 Max,
69 Invalid = 255, 70 Invalid = 255,
@@ -104,7 +105,8 @@ struct SurfaceParams {
104 1, // A2B10G10R10 105 1, // A2B10G10R10
105 1, // A1B5G5R5 106 1, // A1B5G5R5
106 1, // R8 107 1, // R8
107 2, // RGBA16F 108 1, // RGBA16F
109 1, // R11FG11FB10F
108 4, // DXT1 110 4, // DXT1
109 4, // DXT23 111 4, // DXT23
110 4, // DXT45 112 4, // DXT45
@@ -129,6 +131,7 @@ struct SurfaceParams {
129 16, // A1B5G5R5 131 16, // A1B5G5R5
130 8, // R8 132 8, // R8
131 64, // RGBA16F 133 64, // RGBA16F
134 32, // R11FG11FB10F
132 64, // DXT1 135 64, // DXT1
133 128, // DXT23 136 128, // DXT23
134 128, // DXT45 137 128, // DXT45
@@ -151,6 +154,8 @@ struct SurfaceParams {
151 return PixelFormat::A2B10G10R10; 154 return PixelFormat::A2B10G10R10;
152 case Tegra::RenderTargetFormat::RGBA16_FLOAT: 155 case Tegra::RenderTargetFormat::RGBA16_FLOAT:
153 return PixelFormat::RGBA16F; 156 return PixelFormat::RGBA16F;
157 case Tegra::RenderTargetFormat::R11G11B10_FLOAT:
158 return PixelFormat::R11FG11FB10F;
154 default: 159 default:
155 NGLOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format)); 160 NGLOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
156 UNREACHABLE(); 161 UNREACHABLE();
@@ -182,6 +187,8 @@ struct SurfaceParams {
182 return PixelFormat::R8; 187 return PixelFormat::R8;
183 case Tegra::Texture::TextureFormat::R16_G16_B16_A16: 188 case Tegra::Texture::TextureFormat::R16_G16_B16_A16:
184 return PixelFormat::RGBA16F; 189 return PixelFormat::RGBA16F;
190 case Tegra::Texture::TextureFormat::BF10GF11RF11:
191 return PixelFormat::R11FG11FB10F;
185 case Tegra::Texture::TextureFormat::DXT1: 192 case Tegra::Texture::TextureFormat::DXT1:
186 return PixelFormat::DXT1; 193 return PixelFormat::DXT1;
187 case Tegra::Texture::TextureFormat::DXT23: 194 case Tegra::Texture::TextureFormat::DXT23:
@@ -211,6 +218,8 @@ struct SurfaceParams {
211 return Tegra::Texture::TextureFormat::R8; 218 return Tegra::Texture::TextureFormat::R8;
212 case PixelFormat::RGBA16F: 219 case PixelFormat::RGBA16F:
213 return Tegra::Texture::TextureFormat::R16_G16_B16_A16; 220 return Tegra::Texture::TextureFormat::R16_G16_B16_A16;
221 case PixelFormat::R11FG11FB10F:
222 return Tegra::Texture::TextureFormat::BF10GF11RF11;
214 case PixelFormat::DXT1: 223 case PixelFormat::DXT1:
215 return Tegra::Texture::TextureFormat::DXT1; 224 return Tegra::Texture::TextureFormat::DXT1;
216 case PixelFormat::DXT23: 225 case PixelFormat::DXT23:
@@ -243,6 +252,7 @@ struct SurfaceParams {
243 case Tegra::RenderTargetFormat::RGB10_A2_UNORM: 252 case Tegra::RenderTargetFormat::RGB10_A2_UNORM:
244 return ComponentType::UNorm; 253 return ComponentType::UNorm;
245 case Tegra::RenderTargetFormat::RGBA16_FLOAT: 254 case Tegra::RenderTargetFormat::RGBA16_FLOAT:
255 case Tegra::RenderTargetFormat::R11G11B10_FLOAT:
246 return ComponentType::Float; 256 return ComponentType::Float;
247 default: 257 default:
248 NGLOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format)); 258 NGLOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index eb8d37c9b..94c6bc4b2 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -20,7 +20,6 @@ using Tegra::Shader::OpCode;
20using Tegra::Shader::Register; 20using Tegra::Shader::Register;
21using Tegra::Shader::Sampler; 21using Tegra::Shader::Sampler;
22using Tegra::Shader::SubOp; 22using Tegra::Shader::SubOp;
23using Tegra::Shader::Uniform;
24 23
25constexpr u32 PROGRAM_END = MAX_PROGRAM_CODE_LENGTH; 24constexpr u32 PROGRAM_END = MAX_PROGRAM_CODE_LENGTH;
26 25
@@ -365,11 +364,9 @@ public:
365 } 364 }
366 365
367 /// Generates code representing a uniform (C buffer) register, interpreted as the input type. 366 /// Generates code representing a uniform (C buffer) register, interpreted as the input type.
368 std::string GetUniform(const Uniform& uniform, GLSLRegister::Type type) { 367 std::string GetUniform(u64 index, u64 offset, GLSLRegister::Type type) {
369 declr_const_buffers[uniform.index].MarkAsUsed(static_cast<unsigned>(uniform.index), 368 declr_const_buffers[index].MarkAsUsed(index, offset, stage);
370 static_cast<unsigned>(uniform.offset), stage); 369 std::string value = 'c' + std::to_string(index) + '[' + std::to_string(offset) + ']';
371 std::string value =
372 'c' + std::to_string(uniform.index) + '[' + std::to_string(uniform.offset) + ']';
373 370
374 if (type == GLSLRegister::Type::Float) { 371 if (type == GLSLRegister::Type::Float) {
375 return value; 372 return value;
@@ -380,10 +377,19 @@ public:
380 } 377 }
381 } 378 }
382 379
383 /// Generates code representing a uniform (C buffer) register, interpreted as the type of the 380 std::string GetUniformIndirect(u64 index, s64 offset, const Register& index_reg,
384 /// destination register. 381 GLSLRegister::Type type) {
385 std::string GetUniform(const Uniform& uniform, const Register& dest_reg) { 382 declr_const_buffers[index].MarkAsUsedIndirect(index, stage);
386 return GetUniform(uniform, regs[dest_reg].GetActiveType()); 383 std::string value = 'c' + std::to_string(index) + "[(floatBitsToInt(" +
384 GetRegister(index_reg, 0) + ") + " + std::to_string(offset) + ") / 4]";
385
386 if (type == GLSLRegister::Type::Float) {
387 return value;
388 } else if (type == GLSLRegister::Type::Integer) {
389 return "floatBitsToInt(" + value + ')';
390 } else {
391 UNREACHABLE();
392 }
387 } 393 }
388 394
389 /// Add declarations for registers 395 /// Add declarations for registers
@@ -425,6 +431,14 @@ public:
425 ++const_buffer_layout; 431 ++const_buffer_layout;
426 } 432 }
427 declarations.AddNewLine(); 433 declarations.AddNewLine();
434
435 // Append the sampler2D array for the used textures.
436 size_t num_samplers = GetSamplers().size();
437 if (num_samplers > 0) {
438 declarations.AddLine("uniform sampler2D " + SamplerEntry::GetArrayName(stage) + '[' +
439 std::to_string(num_samplers) + "];");
440 declarations.AddNewLine();
441 }
428 } 442 }
429 443
430 /// Returns a list of constant buffer declarations 444 /// Returns a list of constant buffer declarations
@@ -435,6 +449,32 @@ public:
435 return result; 449 return result;
436 } 450 }
437 451
452 /// Returns a list of samplers used in the shader
453 std::vector<SamplerEntry> GetSamplers() const {
454 return used_samplers;
455 }
456
457 /// Returns the GLSL sampler used for the input shader sampler, and creates a new one if
458 /// necessary.
459 std::string AccessSampler(const Sampler& sampler) {
460 size_t offset = static_cast<size_t>(sampler.index.Value());
461
462 // If this sampler has already been used, return the existing mapping.
463 auto itr =
464 std::find_if(used_samplers.begin(), used_samplers.end(),
465 [&](const SamplerEntry& entry) { return entry.GetOffset() == offset; });
466
467 if (itr != used_samplers.end()) {
468 return itr->GetName();
469 }
470
471 // Otherwise create a new mapping for this sampler
472 size_t next_index = used_samplers.size();
473 SamplerEntry entry{stage, offset, next_index};
474 used_samplers.emplace_back(entry);
475 return entry.GetName();
476 }
477
438private: 478private:
439 /// Build GLSL conversion function, e.g. floatBitsToInt, intBitsToFloat, etc. 479 /// Build GLSL conversion function, e.g. floatBitsToInt, intBitsToFloat, etc.
440 const std::string GetGLSLConversionFunc(GLSLRegister::Type src, GLSLRegister::Type dest) const { 480 const std::string GetGLSLConversionFunc(GLSLRegister::Type src, GLSLRegister::Type dest) const {
@@ -544,6 +584,7 @@ private:
544 std::set<Attribute::Index> declr_input_attribute; 584 std::set<Attribute::Index> declr_input_attribute;
545 std::set<Attribute::Index> declr_output_attribute; 585 std::set<Attribute::Index> declr_output_attribute;
546 std::array<ConstBufferEntry, Maxwell3D::Regs::MaxConstBuffers> declr_const_buffers; 586 std::array<ConstBufferEntry, Maxwell3D::Regs::MaxConstBuffers> declr_const_buffers;
587 std::vector<SamplerEntry> used_samplers;
547 const Maxwell3D::Regs::ShaderStage& stage; 588 const Maxwell3D::Regs::ShaderStage& stage;
548}; 589};
549 590
@@ -563,7 +604,7 @@ public:
563 604
564 /// Returns entries in the shader that are useful for external functions 605 /// Returns entries in the shader that are useful for external functions
565 ShaderEntries GetEntries() const { 606 ShaderEntries GetEntries() const {
566 return {regs.GetConstBuffersDeclarations()}; 607 return {regs.GetConstBuffersDeclarations(), regs.GetSamplers()};
567 } 608 }
568 609
569private: 610private:
@@ -585,12 +626,8 @@ private:
585 } 626 }
586 627
587 /// Generates code representing a texture sampler. 628 /// Generates code representing a texture sampler.
588 std::string GetSampler(const Sampler& sampler) const { 629 std::string GetSampler(const Sampler& sampler) {
589 // TODO(Subv): Support more than just texture sampler 0 630 return regs.AccessSampler(sampler);
590 ASSERT_MSG(sampler.index == Sampler::Index::Sampler_0, "unsupported");
591 const unsigned index{static_cast<unsigned>(sampler.index.Value()) -
592 static_cast<unsigned>(Sampler::Index::Sampler_0)};
593 return "tex[" + std::to_string(index) + ']';
594 } 631 }
595 632
596 /** 633 /**
@@ -747,7 +784,8 @@ private:
747 if (instr.is_b_gpr) { 784 if (instr.is_b_gpr) {
748 op_b += regs.GetRegisterAsFloat(instr.gpr20); 785 op_b += regs.GetRegisterAsFloat(instr.gpr20);
749 } else { 786 } else {
750 op_b += regs.GetUniform(instr.uniform, instr.gpr0); 787 op_b += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset,
788 GLSLRegister::Type::Float);
751 } 789 }
752 } 790 }
753 791
@@ -850,8 +888,33 @@ private:
850 } 888 }
851 break; 889 break;
852 } 890 }
891 case OpCode::Type::Bfe: {
892 ASSERT_MSG(!instr.bfe.negate_b, "Unimplemented");
893
894 std::string op_a = instr.bfe.negate_a ? "-" : "";
895 op_a += regs.GetRegisterAsInteger(instr.gpr8);
896
897 switch (opcode->GetId()) {
898 case OpCode::Id::BFE_IMM: {
899 std::string inner_shift =
900 '(' + op_a + " << " + std::to_string(instr.bfe.GetLeftShiftValue()) + ')';
901 std::string outer_shift =
902 '(' + inner_shift + " >> " +
903 std::to_string(instr.bfe.GetLeftShiftValue() + instr.bfe.shift_position) + ')';
904
905 regs.SetRegisterToInteger(instr.gpr0, true, 0, outer_shift, 1, 1);
906 break;
907 }
908 default: {
909 NGLOG_CRITICAL(HW_GPU, "Unhandled BFE instruction: {}", opcode->GetName());
910 UNREACHABLE();
911 }
912 }
913
914 break;
915 }
853 case OpCode::Type::Logic: { 916 case OpCode::Type::Logic: {
854 std::string op_a = regs.GetRegisterAsInteger(instr.gpr8, 0, false); 917 std::string op_a = regs.GetRegisterAsInteger(instr.gpr8, 0, true);
855 918
856 if (instr.alu.lop.invert_a) 919 if (instr.alu.lop.invert_a)
857 op_a = "~(" + op_a + ')'; 920 op_a = "~(" + op_a + ')';
@@ -865,17 +928,17 @@ private:
865 928
866 switch (instr.alu.lop.operation) { 929 switch (instr.alu.lop.operation) {
867 case Tegra::Shader::LogicOperation::And: { 930 case Tegra::Shader::LogicOperation::And: {
868 regs.SetRegisterToInteger(instr.gpr0, false, 0, 931 regs.SetRegisterToInteger(instr.gpr0, true, 0,
869 '(' + op_a + " & " + std::to_string(imm) + ')', 1, 1); 932 '(' + op_a + " & " + std::to_string(imm) + ')', 1, 1);
870 break; 933 break;
871 } 934 }
872 case Tegra::Shader::LogicOperation::Or: { 935 case Tegra::Shader::LogicOperation::Or: {
873 regs.SetRegisterToInteger(instr.gpr0, false, 0, 936 regs.SetRegisterToInteger(instr.gpr0, true, 0,
874 '(' + op_a + " | " + std::to_string(imm) + ')', 1, 1); 937 '(' + op_a + " | " + std::to_string(imm) + ')', 1, 1);
875 break; 938 break;
876 } 939 }
877 case Tegra::Shader::LogicOperation::Xor: { 940 case Tegra::Shader::LogicOperation::Xor: {
878 regs.SetRegisterToInteger(instr.gpr0, false, 0, 941 regs.SetRegisterToInteger(instr.gpr0, true, 0,
879 '(' + op_a + " ^ " + std::to_string(imm) + ')', 1, 1); 942 '(' + op_a + " ^ " + std::to_string(imm) + ')', 1, 1);
880 break; 943 break;
881 } 944 }
@@ -893,6 +956,36 @@ private:
893 } 956 }
894 break; 957 break;
895 } 958 }
959
960 case OpCode::Type::Shift: {
961 std::string op_a = regs.GetRegisterAsInteger(instr.gpr8, 0, true);
962 std::string op_b;
963
964 if (instr.is_b_imm) {
965 op_b += '(' + std::to_string(instr.alu.GetSignedImm20_20()) + ')';
966 } else {
967 if (instr.is_b_gpr) {
968 op_b += regs.GetRegisterAsInteger(instr.gpr20);
969 } else {
970 op_b += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset,
971 GLSLRegister::Type::Integer);
972 }
973 }
974
975 switch (opcode->GetId()) {
976 case OpCode::Id::SHL_C:
977 case OpCode::Id::SHL_R:
978 case OpCode::Id::SHL_IMM:
979 regs.SetRegisterToInteger(instr.gpr0, true, 0, op_a + " << " + op_b, 1, 1);
980 break;
981 default: {
982 NGLOG_CRITICAL(HW_GPU, "Unhandled shift instruction: {}", opcode->GetName());
983 UNREACHABLE();
984 }
985 }
986 break;
987 }
988
896 case OpCode::Type::ScaledAdd: { 989 case OpCode::Type::ScaledAdd: {
897 std::string op_a = regs.GetRegisterAsInteger(instr.gpr8); 990 std::string op_a = regs.GetRegisterAsInteger(instr.gpr8);
898 991
@@ -902,12 +995,13 @@ private:
902 std::string op_b = instr.iscadd.negate_b ? "-" : ""; 995 std::string op_b = instr.iscadd.negate_b ? "-" : "";
903 996
904 if (instr.is_b_imm) { 997 if (instr.is_b_imm) {
905 op_b += '(' + std::to_string(instr.iscadd.GetImmediate()) + ')'; 998 op_b += '(' + std::to_string(instr.alu.GetSignedImm20_20()) + ')';
906 } else { 999 } else {
907 if (instr.is_b_gpr) { 1000 if (instr.is_b_gpr) {
908 op_b += regs.GetRegisterAsInteger(instr.gpr20); 1001 op_b += regs.GetRegisterAsInteger(instr.gpr20);
909 } else { 1002 } else {
910 op_b += regs.GetUniform(instr.uniform, instr.gpr0); 1003 op_b += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset,
1004 GLSLRegister::Type::Integer);
911 } 1005 }
912 } 1006 }
913 1007
@@ -924,7 +1018,8 @@ private:
924 1018
925 switch (opcode->GetId()) { 1019 switch (opcode->GetId()) {
926 case OpCode::Id::FFMA_CR: { 1020 case OpCode::Id::FFMA_CR: {
927 op_b += regs.GetUniform(instr.uniform, instr.gpr0); 1021 op_b += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset,
1022 GLSLRegister::Type::Float);
928 op_c += regs.GetRegisterAsFloat(instr.gpr39); 1023 op_c += regs.GetRegisterAsFloat(instr.gpr39);
929 break; 1024 break;
930 } 1025 }
@@ -935,7 +1030,8 @@ private:
935 } 1030 }
936 case OpCode::Id::FFMA_RC: { 1031 case OpCode::Id::FFMA_RC: {
937 op_b += regs.GetRegisterAsFloat(instr.gpr39); 1032 op_b += regs.GetRegisterAsFloat(instr.gpr39);
938 op_c += regs.GetUniform(instr.uniform, instr.gpr0); 1033 op_c += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset,
1034 GLSLRegister::Type::Float);
939 break; 1035 break;
940 } 1036 }
941 case OpCode::Id::FFMA_IMM: { 1037 case OpCode::Id::FFMA_IMM: {
@@ -962,18 +1058,20 @@ private:
962 ASSERT_MSG(!instr.conversion.selector, "Unimplemented"); 1058 ASSERT_MSG(!instr.conversion.selector, "Unimplemented");
963 1059
964 std::string op_a = 1060 std::string op_a =
965 regs.GetRegisterAsInteger(instr.gpr20, 0, instr.conversion.is_signed); 1061 regs.GetRegisterAsInteger(instr.gpr20, 0, instr.conversion.is_input_signed);
966 1062
967 if (instr.conversion.abs_a) { 1063 if (instr.conversion.abs_a) {
968 op_a = "abs(" + op_a + ')'; 1064 op_a = "abs(" + op_a + ')';
969 } 1065 }
970 1066
971 regs.SetRegisterToInteger(instr.gpr0, instr.conversion.is_signed, 0, op_a, 1, 1); 1067 regs.SetRegisterToInteger(instr.gpr0, instr.conversion.is_output_signed, 0, op_a, 1,
1068 1);
972 break; 1069 break;
973 } 1070 }
974 case OpCode::Id::I2F_R: { 1071 case OpCode::Id::I2F_R: {
1072 ASSERT_MSG(!instr.conversion.selector, "Unimplemented");
975 std::string op_a = 1073 std::string op_a =
976 regs.GetRegisterAsInteger(instr.gpr20, 0, instr.conversion.is_signed); 1074 regs.GetRegisterAsInteger(instr.gpr20, 0, instr.conversion.is_input_signed);
977 1075
978 if (instr.conversion.abs_a) { 1076 if (instr.conversion.abs_a) {
979 op_a = "abs(" + op_a + ')'; 1077 op_a = "abs(" + op_a + ')';
@@ -985,6 +1083,25 @@ private:
985 case OpCode::Id::F2F_R: { 1083 case OpCode::Id::F2F_R: {
986 std::string op_a = regs.GetRegisterAsFloat(instr.gpr20); 1084 std::string op_a = regs.GetRegisterAsFloat(instr.gpr20);
987 1085
1086 switch (instr.conversion.f2f.rounding) {
1087 case Tegra::Shader::F2fRoundingOp::None:
1088 break;
1089 case Tegra::Shader::F2fRoundingOp::Floor:
1090 op_a = "floor(" + op_a + ')';
1091 break;
1092 case Tegra::Shader::F2fRoundingOp::Ceil:
1093 op_a = "ceil(" + op_a + ')';
1094 break;
1095 case Tegra::Shader::F2fRoundingOp::Trunc:
1096 op_a = "trunc(" + op_a + ')';
1097 break;
1098 default:
1099 NGLOG_CRITICAL(HW_GPU, "Unimplemented f2f rounding mode {}",
1100 static_cast<u32>(instr.conversion.f2f.rounding.Value()));
1101 UNREACHABLE();
1102 break;
1103 }
1104
988 if (instr.conversion.abs_a) { 1105 if (instr.conversion.abs_a) {
989 op_a = "abs(" + op_a + ')'; 1106 op_a = "abs(" + op_a + ')';
990 } 1107 }
@@ -992,6 +1109,42 @@ private:
992 regs.SetRegisterToFloat(instr.gpr0, 0, op_a, 1, 1); 1109 regs.SetRegisterToFloat(instr.gpr0, 0, op_a, 1, 1);
993 break; 1110 break;
994 } 1111 }
1112 case OpCode::Id::F2I_R: {
1113 std::string op_a = regs.GetRegisterAsFloat(instr.gpr20);
1114
1115 if (instr.conversion.abs_a) {
1116 op_a = "abs(" + op_a + ')';
1117 }
1118
1119 switch (instr.conversion.f2i.rounding) {
1120 case Tegra::Shader::F2iRoundingOp::None:
1121 break;
1122 case Tegra::Shader::F2iRoundingOp::Floor:
1123 op_a = "floor(" + op_a + ')';
1124 break;
1125 case Tegra::Shader::F2iRoundingOp::Ceil:
1126 op_a = "ceil(" + op_a + ')';
1127 break;
1128 case Tegra::Shader::F2iRoundingOp::Trunc:
1129 op_a = "trunc(" + op_a + ')';
1130 break;
1131 default:
1132 NGLOG_CRITICAL(HW_GPU, "Unimplemented f2i rounding mode {}",
1133 static_cast<u32>(instr.conversion.f2i.rounding.Value()));
1134 UNREACHABLE();
1135 break;
1136 }
1137
1138 if (instr.conversion.is_output_signed) {
1139 op_a = "int(" + op_a + ')';
1140 } else {
1141 op_a = "uint(" + op_a + ')';
1142 }
1143
1144 regs.SetRegisterToInteger(instr.gpr0, instr.conversion.is_output_signed, 0, op_a, 1,
1145 1);
1146 break;
1147 }
995 default: { 1148 default: {
996 NGLOG_CRITICAL(HW_GPU, "Unhandled conversion instruction: {}", opcode->GetName()); 1149 NGLOG_CRITICAL(HW_GPU, "Unhandled conversion instruction: {}", opcode->GetName());
997 UNREACHABLE(); 1150 UNREACHABLE();
@@ -1000,23 +1153,47 @@ private:
1000 break; 1153 break;
1001 } 1154 }
1002 case OpCode::Type::Memory: { 1155 case OpCode::Type::Memory: {
1003 const Attribute::Index attribute = instr.attribute.fmt20.index;
1004
1005 switch (opcode->GetId()) { 1156 switch (opcode->GetId()) {
1006 case OpCode::Id::LD_A: { 1157 case OpCode::Id::LD_A: {
1007 ASSERT_MSG(instr.attribute.fmt20.size == 0, "untested"); 1158 ASSERT_MSG(instr.attribute.fmt20.size == 0, "untested");
1008 regs.SetRegisterToInputAttibute(instr.gpr0, instr.attribute.fmt20.element, 1159 regs.SetRegisterToInputAttibute(instr.gpr0, instr.attribute.fmt20.element,
1009 attribute); 1160 instr.attribute.fmt20.index);
1161 break;
1162 }
1163 case OpCode::Id::LD_C: {
1164 ASSERT_MSG(instr.ld_c.unknown == 0, "Unimplemented");
1165
1166 std::string op_a =
1167 regs.GetUniformIndirect(instr.cbuf36.index, instr.cbuf36.offset + 0, instr.gpr8,
1168 GLSLRegister::Type::Float);
1169 std::string op_b =
1170 regs.GetUniformIndirect(instr.cbuf36.index, instr.cbuf36.offset + 4, instr.gpr8,
1171 GLSLRegister::Type::Float);
1172
1173 switch (instr.ld_c.type.Value()) {
1174 case Tegra::Shader::UniformType::Single:
1175 regs.SetRegisterToFloat(instr.gpr0, 0, op_a, 1, 1);
1176 break;
1177
1178 case Tegra::Shader::UniformType::Double:
1179 regs.SetRegisterToFloat(instr.gpr0, 0, op_a, 1, 1);
1180 regs.SetRegisterToFloat(instr.gpr0.Value() + 1, 0, op_b, 1, 1);
1181 break;
1182
1183 default:
1184 NGLOG_CRITICAL(HW_GPU, "Unhandled type: {}",
1185 static_cast<unsigned>(instr.ld_c.type.Value()));
1186 UNREACHABLE();
1187 }
1010 break; 1188 break;
1011 } 1189 }
1012 case OpCode::Id::ST_A: { 1190 case OpCode::Id::ST_A: {
1013 ASSERT_MSG(instr.attribute.fmt20.size == 0, "untested"); 1191 ASSERT_MSG(instr.attribute.fmt20.size == 0, "untested");
1014 regs.SetOutputAttributeToRegister(attribute, instr.attribute.fmt20.element, 1192 regs.SetOutputAttributeToRegister(instr.attribute.fmt20.index,
1015 instr.gpr0); 1193 instr.attribute.fmt20.element, instr.gpr0);
1016 break; 1194 break;
1017 } 1195 }
1018 case OpCode::Id::TEX: { 1196 case OpCode::Id::TEX: {
1019 ASSERT_MSG(instr.attribute.fmt20.size == 4, "untested");
1020 const std::string op_a = regs.GetRegisterAsFloat(instr.gpr8); 1197 const std::string op_a = regs.GetRegisterAsFloat(instr.gpr8);
1021 const std::string op_b = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); 1198 const std::string op_b = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
1022 const std::string sampler = GetSampler(instr.sampler); 1199 const std::string sampler = GetSampler(instr.sampler);
@@ -1029,7 +1206,7 @@ private:
1029 const std::string texture = "texture(" + sampler + ", coords)"; 1206 const std::string texture = "texture(" + sampler + ", coords)";
1030 1207
1031 size_t dest_elem{}; 1208 size_t dest_elem{};
1032 for (size_t elem = 0; elem < instr.attribute.fmt20.size; ++elem) { 1209 for (size_t elem = 0; elem < 4; ++elem) {
1033 if (!instr.tex.IsComponentEnabled(elem)) { 1210 if (!instr.tex.IsComponentEnabled(elem)) {
1034 // Skip disabled components 1211 // Skip disabled components
1035 continue; 1212 continue;
@@ -1042,7 +1219,6 @@ private:
1042 break; 1219 break;
1043 } 1220 }
1044 case OpCode::Id::TEXS: { 1221 case OpCode::Id::TEXS: {
1045 ASSERT_MSG(instr.attribute.fmt20.size == 4, "untested");
1046 const std::string op_a = regs.GetRegisterAsFloat(instr.gpr8); 1222 const std::string op_a = regs.GetRegisterAsFloat(instr.gpr8);
1047 const std::string op_b = regs.GetRegisterAsFloat(instr.gpr20); 1223 const std::string op_b = regs.GetRegisterAsFloat(instr.gpr20);
1048 const std::string sampler = GetSampler(instr.sampler); 1224 const std::string sampler = GetSampler(instr.sampler);
@@ -1105,7 +1281,8 @@ private:
1105 if (instr.is_b_gpr) { 1281 if (instr.is_b_gpr) {
1106 op_b += regs.GetRegisterAsFloat(instr.gpr20); 1282 op_b += regs.GetRegisterAsFloat(instr.gpr20);
1107 } else { 1283 } else {
1108 op_b += regs.GetUniform(instr.uniform, GLSLRegister::Type::Float); 1284 op_b += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset,
1285 GLSLRegister::Type::Float);
1109 } 1286 }
1110 } 1287 }
1111 1288
@@ -1138,15 +1315,17 @@ private:
1138 } 1315 }
1139 case OpCode::Type::IntegerSetPredicate: { 1316 case OpCode::Type::IntegerSetPredicate: {
1140 std::string op_a = regs.GetRegisterAsInteger(instr.gpr8, 0, instr.isetp.is_signed); 1317 std::string op_a = regs.GetRegisterAsInteger(instr.gpr8, 0, instr.isetp.is_signed);
1318 std::string op_b;
1141 1319
1142 std::string op_b{}; 1320 if (instr.is_b_imm) {
1143 1321 op_b += '(' + std::to_string(instr.alu.GetSignedImm20_20()) + ')';
1144 ASSERT_MSG(!instr.is_b_imm, "ISETP_IMM not implemented");
1145
1146 if (instr.is_b_gpr) {
1147 op_b += regs.GetRegisterAsInteger(instr.gpr20, 0, instr.isetp.is_signed);
1148 } else { 1322 } else {
1149 op_b += regs.GetUniform(instr.uniform, GLSLRegister::Type::Integer); 1323 if (instr.is_b_gpr) {
1324 op_b += regs.GetRegisterAsInteger(instr.gpr20, 0, instr.isetp.is_signed);
1325 } else {
1326 op_b += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset,
1327 GLSLRegister::Type::Integer);
1328 }
1150 } 1329 }
1151 1330
1152 using Tegra::Shader::Pred; 1331 using Tegra::Shader::Pred;
@@ -1192,7 +1371,8 @@ private:
1192 if (instr.is_b_gpr) { 1371 if (instr.is_b_gpr) {
1193 op_b += regs.GetRegisterAsFloat(instr.gpr20); 1372 op_b += regs.GetRegisterAsFloat(instr.gpr20);
1194 } else { 1373 } else {
1195 op_b += regs.GetUniform(instr.uniform, GLSLRegister::Type::Float); 1374 op_b += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset,
1375 GLSLRegister::Type::Float);
1196 } 1376 }
1197 } 1377 }
1198 1378
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp
index 254f6e2c3..b88d592b7 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp
@@ -62,8 +62,6 @@ layout (std140) uniform fs_config {
62 vec4 viewport_flip; 62 vec4 viewport_flip;
63}; 63};
64 64
65uniform sampler2D tex[32];
66
67void main() { 65void main() {
68 exec_shader(); 66 exec_shader();
69} 67}
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h
index 458032b5c..ed890e0f9 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.h
+++ b/src/video_core/renderer_opengl/gl_shader_gen.h
@@ -22,17 +22,28 @@ class ConstBufferEntry {
22 using Maxwell = Tegra::Engines::Maxwell3D::Regs; 22 using Maxwell = Tegra::Engines::Maxwell3D::Regs;
23 23
24public: 24public:
25 void MarkAsUsed(unsigned index, unsigned offset, Maxwell::ShaderStage stage) { 25 void MarkAsUsed(u64 index, u64 offset, Maxwell::ShaderStage stage) {
26 is_used = true; 26 is_used = true;
27 this->index = index; 27 this->index = static_cast<unsigned>(index);
28 this->stage = stage;
29 max_offset = std::max(max_offset, static_cast<unsigned>(offset));
30 }
31
32 void MarkAsUsedIndirect(u64 index, Maxwell::ShaderStage stage) {
33 is_used = true;
34 is_indirect = true;
35 this->index = static_cast<unsigned>(index);
28 this->stage = stage; 36 this->stage = stage;
29 max_offset = std::max(max_offset, offset);
30 } 37 }
31 38
32 bool IsUsed() const { 39 bool IsUsed() const {
33 return is_used; 40 return is_used;
34 } 41 }
35 42
43 bool IsIndirect() const {
44 return is_indirect;
45 }
46
36 unsigned GetIndex() const { 47 unsigned GetIndex() const {
37 return index; 48 return index;
38 } 49 }
@@ -51,13 +62,54 @@ private:
51 }; 62 };
52 63
53 bool is_used{}; 64 bool is_used{};
65 bool is_indirect{};
54 unsigned index{}; 66 unsigned index{};
55 unsigned max_offset{}; 67 unsigned max_offset{};
56 Maxwell::ShaderStage stage; 68 Maxwell::ShaderStage stage;
57}; 69};
58 70
71class SamplerEntry {
72 using Maxwell = Tegra::Engines::Maxwell3D::Regs;
73
74public:
75 SamplerEntry(Maxwell::ShaderStage stage, size_t offset, size_t index)
76 : offset(offset), stage(stage), sampler_index(index) {}
77
78 size_t GetOffset() const {
79 return offset;
80 }
81
82 size_t GetIndex() const {
83 return sampler_index;
84 }
85
86 Maxwell::ShaderStage GetStage() const {
87 return stage;
88 }
89
90 std::string GetName() const {
91 return std::string(TextureSamplerNames[static_cast<size_t>(stage)]) + '[' +
92 std::to_string(sampler_index) + ']';
93 }
94
95 static std::string GetArrayName(Maxwell::ShaderStage stage) {
96 return TextureSamplerNames[static_cast<size_t>(stage)];
97 }
98
99private:
100 static constexpr std::array<const char*, Maxwell::MaxShaderStage> TextureSamplerNames = {
101 "tex_vs", "tex_tessc", "tex_tesse", "tex_gs", "tex_fs",
102 };
103 /// Offset in TSC memory from which to read the sampler object, as specified by the sampling
104 /// instruction.
105 size_t offset;
106 Maxwell::ShaderStage stage; ///< Shader stage where this sampler was used.
107 size_t sampler_index; ///< Value used to index into the generated GLSL sampler array.
108};
109
59struct ShaderEntries { 110struct ShaderEntries {
60 std::vector<ConstBufferEntry> const_buffer_entries; 111 std::vector<ConstBufferEntry> const_buffer_entries;
112 std::vector<SamplerEntry> texture_samplers;
61}; 113};
62 114
63using ProgramResult = std::pair<std::string, ShaderEntries>; 115using ProgramResult = std::pair<std::string, ShaderEntries>;
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp
index ccdfc2718..7c00beb33 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp
@@ -32,25 +32,6 @@ void SetShaderUniformBlockBindings(GLuint shader) {
32 sizeof(MaxwellUniformData)); 32 sizeof(MaxwellUniformData));
33} 33}
34 34
35void SetShaderSamplerBindings(GLuint shader) {
36 OpenGLState cur_state = OpenGLState::GetCurState();
37 GLuint old_program = std::exchange(cur_state.draw.shader_program, shader);
38 cur_state.Apply();
39
40 // Set the texture samplers to correspond to different texture units
41 for (u32 texture = 0; texture < NumTextureSamplers; ++texture) {
42 // Set the texture samplers to correspond to different texture units
43 std::string uniform_name = "tex[" + std::to_string(texture) + "]";
44 GLint uniform_tex = glGetUniformLocation(shader, uniform_name.c_str());
45 if (uniform_tex != -1) {
46 glUniform1i(uniform_tex, TextureUnits::MaxwellTexture(texture).id);
47 }
48 }
49
50 cur_state.draw.shader_program = old_program;
51 cur_state.Apply();
52}
53
54} // namespace Impl 35} // namespace Impl
55 36
56void MaxwellUniformData::SetFromRegs(const Maxwell3D::State::ShaderStageInfo& shader_stage) { 37void MaxwellUniformData::SetFromRegs(const Maxwell3D::State::ShaderStageInfo& shader_stage) {
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h
index e963b4b7e..4295c20a6 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.h
+++ b/src/video_core/renderer_opengl/gl_shader_manager.h
@@ -45,7 +45,6 @@ public:
45 shader.Create(program_result.first.c_str(), type); 45 shader.Create(program_result.first.c_str(), type);
46 program.Create(true, shader.handle); 46 program.Create(true, shader.handle);
47 Impl::SetShaderUniformBlockBindings(program.handle); 47 Impl::SetShaderUniformBlockBindings(program.handle);
48 Impl::SetShaderSamplerBindings(program.handle);
49 entries = program_result.second; 48 entries = program_result.second;
50 } 49 }
51 GLuint GetHandle() const { 50 GLuint GetHandle() const {
diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp
index f91dfe36a..44f0c8a01 100644
--- a/src/video_core/renderer_opengl/gl_state.cpp
+++ b/src/video_core/renderer_opengl/gl_state.cpp
@@ -50,6 +50,10 @@ OpenGLState::OpenGLState() {
50 for (auto& texture_unit : texture_units) { 50 for (auto& texture_unit : texture_units) {
51 texture_unit.texture_2d = 0; 51 texture_unit.texture_2d = 0;
52 texture_unit.sampler = 0; 52 texture_unit.sampler = 0;
53 texture_unit.swizzle.r = GL_RED;
54 texture_unit.swizzle.g = GL_GREEN;
55 texture_unit.swizzle.b = GL_BLUE;
56 texture_unit.swizzle.a = GL_ALPHA;
53 } 57 }
54 58
55 lighting_lut.texture_buffer = 0; 59 lighting_lut.texture_buffer = 0;
@@ -200,6 +204,15 @@ void OpenGLState::Apply() const {
200 if (texture_units[i].sampler != cur_state.texture_units[i].sampler) { 204 if (texture_units[i].sampler != cur_state.texture_units[i].sampler) {
201 glBindSampler(i, texture_units[i].sampler); 205 glBindSampler(i, texture_units[i].sampler);
202 } 206 }
207 // Update the texture swizzle
208 if (texture_units[i].swizzle.r != cur_state.texture_units[i].swizzle.r ||
209 texture_units[i].swizzle.g != cur_state.texture_units[i].swizzle.g ||
210 texture_units[i].swizzle.b != cur_state.texture_units[i].swizzle.b ||
211 texture_units[i].swizzle.a != cur_state.texture_units[i].swizzle.a) {
212 std::array<GLint, 4> mask = {texture_units[i].swizzle.r, texture_units[i].swizzle.g,
213 texture_units[i].swizzle.b, texture_units[i].swizzle.a};
214 glTexParameteriv(GL_TEXTURE_2D, GL_TEXTURE_SWIZZLE_RGBA, mask.data());
215 }
203 } 216 }
204 217
205 // Constbuffers 218 // Constbuffers
diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h
index 75c08e645..839e50e93 100644
--- a/src/video_core/renderer_opengl/gl_state.h
+++ b/src/video_core/renderer_opengl/gl_state.h
@@ -85,6 +85,12 @@ public:
85 struct { 85 struct {
86 GLuint texture_2d; // GL_TEXTURE_BINDING_2D 86 GLuint texture_2d; // GL_TEXTURE_BINDING_2D
87 GLuint sampler; // GL_SAMPLER_BINDING 87 GLuint sampler; // GL_SAMPLER_BINDING
88 struct {
89 GLint r; // GL_TEXTURE_SWIZZLE_R
90 GLint g; // GL_TEXTURE_SWIZZLE_G
91 GLint b; // GL_TEXTURE_SWIZZLE_B
92 GLint a; // GL_TEXTURE_SWIZZLE_A
93 } swizzle;
88 } texture_units[32]; 94 } texture_units[32];
89 95
90 struct { 96 struct {
diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h
index a630610d8..2155fb019 100644
--- a/src/video_core/renderer_opengl/maxwell_to_gl.h
+++ b/src/video_core/renderer_opengl/maxwell_to_gl.h
@@ -100,6 +100,8 @@ inline GLenum WrapMode(Tegra::Texture::WrapMode wrap_mode) {
100 switch (wrap_mode) { 100 switch (wrap_mode) {
101 case Tegra::Texture::WrapMode::Wrap: 101 case Tegra::Texture::WrapMode::Wrap:
102 return GL_REPEAT; 102 return GL_REPEAT;
103 case Tegra::Texture::WrapMode::Mirror:
104 return GL_MIRRORED_REPEAT;
103 case Tegra::Texture::WrapMode::ClampToEdge: 105 case Tegra::Texture::WrapMode::ClampToEdge:
104 return GL_CLAMP_TO_EDGE; 106 return GL_CLAMP_TO_EDGE;
105 case Tegra::Texture::WrapMode::ClampOGL: 107 case Tegra::Texture::WrapMode::ClampOGL:
@@ -178,4 +180,25 @@ inline GLenum BlendFunc(Maxwell::Blend::Factor factor) {
178 return {}; 180 return {};
179} 181}
180 182
183inline GLenum SwizzleSource(Tegra::Texture::SwizzleSource source) {
184 switch (source) {
185 case Tegra::Texture::SwizzleSource::Zero:
186 return GL_ZERO;
187 case Tegra::Texture::SwizzleSource::R:
188 return GL_RED;
189 case Tegra::Texture::SwizzleSource::G:
190 return GL_GREEN;
191 case Tegra::Texture::SwizzleSource::B:
192 return GL_BLUE;
193 case Tegra::Texture::SwizzleSource::A:
194 return GL_ALPHA;
195 case Tegra::Texture::SwizzleSource::OneInt:
196 case Tegra::Texture::SwizzleSource::OneFloat:
197 return GL_ONE;
198 }
199 NGLOG_CRITICAL(Render_OpenGL, "Unimplemented swizzle source={}", static_cast<u32>(source));
200 UNREACHABLE();
201 return {};
202}
203
181} // namespace MaxwellToGL 204} // namespace MaxwellToGL
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index 3440d2190..f33766bfd 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -316,6 +316,7 @@ void RendererOpenGL::DrawScreenTriangles(const ScreenInfo& screen_info, float x,
316 }}; 316 }};
317 317
318 state.texture_units[0].texture_2d = screen_info.display_texture; 318 state.texture_units[0].texture_2d = screen_info.display_texture;
319 state.texture_units[0].swizzle = {GL_RED, GL_GREEN, GL_BLUE, GL_ALPHA};
319 state.Apply(); 320 state.Apply();
320 321
321 glBufferSubData(GL_ARRAY_BUFFER, 0, sizeof(vertices), vertices.data()); 322 glBufferSubData(GL_ARRAY_BUFFER, 0, sizeof(vertices), vertices.data());
diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp
index 2d2af5554..7bf9c4c4b 100644
--- a/src/video_core/textures/decoders.cpp
+++ b/src/video_core/textures/decoders.cpp
@@ -55,6 +55,7 @@ u32 BytesPerPixel(TextureFormat format) {
55 return 16; 55 return 16;
56 case TextureFormat::A8R8G8B8: 56 case TextureFormat::A8R8G8B8:
57 case TextureFormat::A2B10G10R10: 57 case TextureFormat::A2B10G10R10:
58 case TextureFormat::BF10GF11RF11:
58 return 4; 59 return 4;
59 case TextureFormat::A1B5G5R5: 60 case TextureFormat::A1B5G5R5:
60 case TextureFormat::B5G6R5: 61 case TextureFormat::B5G6R5:
@@ -92,6 +93,7 @@ std::vector<u8> UnswizzleTexture(VAddr address, TextureFormat format, u32 width,
92 case TextureFormat::B5G6R5: 93 case TextureFormat::B5G6R5:
93 case TextureFormat::R8: 94 case TextureFormat::R8:
94 case TextureFormat::R16_G16_B16_A16: 95 case TextureFormat::R16_G16_B16_A16:
96 case TextureFormat::BF10GF11RF11:
95 CopySwizzledData(width, height, bytes_per_pixel, bytes_per_pixel, data, 97 CopySwizzledData(width, height, bytes_per_pixel, bytes_per_pixel, data,
96 unswizzled_data.data(), true, block_height); 98 unswizzled_data.data(), true, block_height);
97 break; 99 break;
@@ -118,6 +120,7 @@ std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat
118 case TextureFormat::A1B5G5R5: 120 case TextureFormat::A1B5G5R5:
119 case TextureFormat::B5G6R5: 121 case TextureFormat::B5G6R5:
120 case TextureFormat::R8: 122 case TextureFormat::R8:
123 case TextureFormat::BF10GF11RF11:
121 // TODO(Subv): For the time being just forward the same data without any decoding. 124 // TODO(Subv): For the time being just forward the same data without any decoding.
122 rgba_data = texture_data; 125 rgba_data = texture_data;
123 break; 126 break;
diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h
index f48ca30b8..a17eaf19d 100644
--- a/src/video_core/textures/texture.h
+++ b/src/video_core/textures/texture.h
@@ -122,6 +122,17 @@ enum class ComponentType : u32 {
122 FLOAT = 7 122 FLOAT = 7
123}; 123};
124 124
125enum class SwizzleSource : u32 {
126 Zero = 0,
127
128 R = 2,
129 G = 3,
130 B = 4,
131 A = 5,
132 OneInt = 6,
133 OneFloat = 7,
134};
135
125union TextureHandle { 136union TextureHandle {
126 u32 raw; 137 u32 raw;
127 BitField<0, 20, u32> tic_id; 138 BitField<0, 20, u32> tic_id;
@@ -139,6 +150,11 @@ struct TICEntry {
139 BitField<10, 3, ComponentType> g_type; 150 BitField<10, 3, ComponentType> g_type;
140 BitField<13, 3, ComponentType> b_type; 151 BitField<13, 3, ComponentType> b_type;
141 BitField<16, 3, ComponentType> a_type; 152 BitField<16, 3, ComponentType> a_type;
153
154 BitField<19, 3, SwizzleSource> x_source;
155 BitField<22, 3, SwizzleSource> y_source;
156 BitField<25, 3, SwizzleSource> z_source;
157 BitField<28, 3, SwizzleSource> w_source;
142 }; 158 };
143 u32 address_low; 159 u32 address_low;
144 union { 160 union {