summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-x.travis-build.sh4
-rwxr-xr-x.travis-upload.sh4
-rw-r--r--src/core/hle/kernel/shared_memory.cpp27
-rw-r--r--src/core/hle/kernel/thread.cpp6
-rw-r--r--src/core/hle/kernel/thread.h6
-rw-r--r--src/core/hle/service/apt/apt.cpp73
-rw-r--r--src/core/hle/service/apt/apt_s.cpp4
-rw-r--r--src/core/hle/service/nim/nim.cpp18
-rw-r--r--src/core/hle/service/nim/nim.h11
-rw-r--r--src/core/hle/service/nim/nim_u.cpp2
-rw-r--r--src/core/memory.cpp67
-rw-r--r--src/core/memory.h4
-rw-r--r--src/video_core/command_processor.cpp439
13 files changed, 375 insertions, 290 deletions
diff --git a/.travis-build.sh b/.travis-build.sh
index bb4e6fc47..fc5a5f8b2 100755
--- a/.travis-build.sh
+++ b/.travis-build.sh
@@ -52,8 +52,8 @@ elif [ "$TRAVIS_OS_NAME" = "osx" ]; then
52 export Qt5_DIR=$(brew --prefix)/opt/qt5 52 export Qt5_DIR=$(brew --prefix)/opt/qt5
53 53
54 mkdir build && cd build 54 mkdir build && cd build
55 cmake .. -DUSE_SYSTEM_CURL=ON -GXcode 55 cmake .. -DUSE_SYSTEM_CURL=ON -DCMAKE_OSX_ARCHITECTURES="x86_64;x86_64h" -DCMAKE_BUILD_TYPE=Release
56 xcodebuild -configuration Release 56 make -j4
57 57
58 ctest -VV -C Release 58 ctest -VV -C Release
59fi 59fi
diff --git a/.travis-upload.sh b/.travis-upload.sh
index 8c1fa21c5..edf195f7d 100755
--- a/.travis-upload.sh
+++ b/.travis-upload.sh
@@ -16,8 +16,8 @@ elif [ "$TRAVIS_OS_NAME" = "osx" ]; then
16 COMPRESSION_FLAGS="-czvf" 16 COMPRESSION_FLAGS="-czvf"
17 mkdir "$REV_NAME" 17 mkdir "$REV_NAME"
18 18
19 cp build/src/citra/Release/citra "$REV_NAME" 19 cp build/src/citra/citra "$REV_NAME"
20 cp -r build/src/citra_qt/Release/citra-qt.app "$REV_NAME" 20 cp -r build/src/citra_qt/citra-qt.app "$REV_NAME"
21 21
22 # move qt libs into app bundle for deployment 22 # move qt libs into app bundle for deployment
23 $(brew --prefix)/opt/qt5/bin/macdeployqt "${REV_NAME}/citra-qt.app" 23 $(brew --prefix)/opt/qt5/bin/macdeployqt "${REV_NAME}/citra-qt.app"
diff --git a/src/core/hle/kernel/shared_memory.cpp b/src/core/hle/kernel/shared_memory.cpp
index 02d5a7a36..d45daca35 100644
--- a/src/core/hle/kernel/shared_memory.cpp
+++ b/src/core/hle/kernel/shared_memory.cpp
@@ -55,22 +55,19 @@ SharedPtr<SharedMemory> SharedMemory::Create(SharedPtr<Process> owner_process, u
55 Kernel::g_current_process->vm_manager.RefreshMemoryBlockMappings(linheap_memory.get()); 55 Kernel::g_current_process->vm_manager.RefreshMemoryBlockMappings(linheap_memory.get());
56 } 56 }
57 } else { 57 } else {
58 // TODO(Subv): What happens if an application tries to create multiple memory blocks
59 // pointing to the same address?
60 auto& vm_manager = shared_memory->owner_process->vm_manager; 58 auto& vm_manager = shared_memory->owner_process->vm_manager;
61 // The memory is already available and mapped in the owner process. 59 // The memory is already available and mapped in the owner process.
62 auto vma = vm_manager.FindVMA(address)->second; 60 auto vma = vm_manager.FindVMA(address);
63 // Copy it over to our own storage 61 ASSERT_MSG(vma != vm_manager.vma_map.end(), "Invalid memory address");
64 shared_memory->backing_block = std::make_shared<std::vector<u8>>( 62 ASSERT_MSG(vma->second.backing_block, "Backing block doesn't exist for address");
65 vma.backing_block->data() + vma.offset, vma.backing_block->data() + vma.offset + size); 63
66 shared_memory->backing_block_offset = 0; 64 // The returned VMA might be a bigger one encompassing the desired address.
67 // Unmap the existing pages 65 auto vma_offset = address - vma->first;
68 vm_manager.UnmapRange(address, size); 66 ASSERT_MSG(vma_offset + size <= vma->second.size,
69 // Map our own block into the address space 67 "Shared memory exceeds bounds of mapped block");
70 vm_manager.MapMemoryBlock(address, shared_memory->backing_block, 0, size, 68
71 MemoryState::Shared); 69 shared_memory->backing_block = vma->second.backing_block;
72 // Reprotect the block with the new permissions 70 shared_memory->backing_block_offset = vma->second.offset + vma_offset;
73 vm_manager.ReprotectRange(address, size, ConvertPermissions(permissions));
74 } 71 }
75 72
76 shared_memory->base_address = address; 73 shared_memory->base_address = address;
@@ -184,4 +181,4 @@ u8* SharedMemory::GetPointer(u32 offset) {
184 return backing_block->data() + backing_block_offset + offset; 181 return backing_block->data() + backing_block_offset + offset;
185} 182}
186 183
187} // namespace 184} // namespace Kernel
diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp
index 2614a260c..0f7970ebe 100644
--- a/src/core/hle/kernel/thread.cpp
+++ b/src/core/hle/kernel/thread.cpp
@@ -541,6 +541,12 @@ s32 Thread::GetWaitObjectIndex(WaitObject* object) const {
541 return static_cast<s32>(std::distance(match, wait_objects.rend()) - 1); 541 return static_cast<s32>(std::distance(match, wait_objects.rend()) - 1);
542} 542}
543 543
544VAddr Thread::GetCommandBufferAddress() const {
545 // Offset from the start of TLS at which the IPC command buffer begins.
546 static constexpr int CommandHeaderOffset = 0x80;
547 return GetTLSAddress() + CommandHeaderOffset;
548}
549
544//////////////////////////////////////////////////////////////////////////////////////////////////// 550////////////////////////////////////////////////////////////////////////////////////////////////////
545 551
546void ThreadingInit() { 552void ThreadingInit() {
diff --git a/src/core/hle/kernel/thread.h b/src/core/hle/kernel/thread.h
index 4679c2022..314fba81f 100644
--- a/src/core/hle/kernel/thread.h
+++ b/src/core/hle/kernel/thread.h
@@ -164,6 +164,12 @@ public:
164 return tls_address; 164 return tls_address;
165 } 165 }
166 166
167 /*
168 * Returns the address of the current thread's command buffer, located in the TLS.
169 * @returns VAddr of the thread's command buffer.
170 */
171 VAddr GetCommandBufferAddress() const;
172
167 /** 173 /**
168 * Returns whether this thread is waiting for all the objects in 174 * Returns whether this thread is waiting for all the objects in
169 * its wait list to become ready, as a result of a WaitSynchronizationN call 175 * its wait list to become ready, as a result of a WaitSynchronizationN call
diff --git a/src/core/hle/service/apt/apt.cpp b/src/core/hle/service/apt/apt.cpp
index 2f7362748..59ea9823d 100644
--- a/src/core/hle/service/apt/apt.cpp
+++ b/src/core/hle/service/apt/apt.cpp
@@ -171,7 +171,11 @@ void SendParameter(const MessageParameter& parameter) {
171 next_parameter = parameter; 171 next_parameter = parameter;
172 // Signal the event to let the receiver know that a new parameter is ready to be read 172 // Signal the event to let the receiver know that a new parameter is ready to be read
173 auto* const slot_data = GetAppletSlotData(static_cast<AppletId>(parameter.destination_id)); 173 auto* const slot_data = GetAppletSlotData(static_cast<AppletId>(parameter.destination_id));
174 ASSERT(slot_data); 174 if (slot_data == nullptr) {
175 LOG_DEBUG(Service_APT, "No applet was registered with the id %03X",
176 parameter.destination_id);
177 return;
178 }
175 179
176 slot_data->parameter_event->Signal(); 180 slot_data->parameter_event->Signal();
177} 181}
@@ -505,9 +509,6 @@ void SendParameter(Service::Interface* self) {
505 size_t size; 509 size_t size;
506 VAddr buffer = rp.PopStaticBuffer(&size); 510 VAddr buffer = rp.PopStaticBuffer(&size);
507 511
508 std::shared_ptr<HLE::Applets::Applet> dest_applet =
509 HLE::Applets::Applet::Get(static_cast<AppletId>(dst_app_id));
510
511 LOG_DEBUG(Service_APT, 512 LOG_DEBUG(Service_APT,
512 "called src_app_id=0x%08X, dst_app_id=0x%08X, signal_type=0x%08X," 513 "called src_app_id=0x%08X, dst_app_id=0x%08X, signal_type=0x%08X,"
513 "buffer_size=0x%08X, handle=0x%08X, size=0x%08zX, in_param_buffer_ptr=0x%08X", 514 "buffer_size=0x%08X, handle=0x%08X, size=0x%08zX, in_param_buffer_ptr=0x%08X",
@@ -522,12 +523,6 @@ void SendParameter(Service::Interface* self) {
522 return; 523 return;
523 } 524 }
524 525
525 if (dest_applet == nullptr) {
526 LOG_ERROR(Service_APT, "Unknown applet id=0x%08X", dst_app_id);
527 rb.Push<u32>(-1); // TODO(Subv): Find the right error code
528 return;
529 }
530
531 MessageParameter param; 526 MessageParameter param;
532 param.destination_id = dst_app_id; 527 param.destination_id = dst_app_id;
533 param.sender_id = src_app_id; 528 param.sender_id = src_app_id;
@@ -536,7 +531,14 @@ void SendParameter(Service::Interface* self) {
536 param.buffer.resize(buffer_size); 531 param.buffer.resize(buffer_size);
537 Memory::ReadBlock(buffer, param.buffer.data(), param.buffer.size()); 532 Memory::ReadBlock(buffer, param.buffer.data(), param.buffer.size());
538 533
539 rb.Push(dest_applet->ReceiveParameter(param)); 534 SendParameter(param);
535
536 // If the applet is running in HLE mode, use the HLE interface to communicate with it.
537 if (auto dest_applet = HLE::Applets::Applet::Get(static_cast<AppletId>(dst_app_id))) {
538 rb.Push(dest_applet->ReceiveParameter(param));
539 } else {
540 rb.Push(RESULT_SUCCESS);
541 }
540} 542}
541 543
542void ReceiveParameter(Service::Interface* self) { 544void ReceiveParameter(Service::Interface* self) {
@@ -765,7 +767,12 @@ void PrepareToStartLibraryApplet(Service::Interface* self) {
765 IPC::RequestParser rp(Kernel::GetCommandBuffer(), 0x18, 1, 0); // 0x180040 767 IPC::RequestParser rp(Kernel::GetCommandBuffer(), 0x18, 1, 0); // 0x180040
766 AppletId applet_id = static_cast<AppletId>(rp.Pop<u32>()); 768 AppletId applet_id = static_cast<AppletId>(rp.Pop<u32>());
767 769
770 LOG_DEBUG(Service_APT, "called applet_id=%08X", applet_id);
771
768 IPC::RequestBuilder rb = rp.MakeBuilder(1, 0); 772 IPC::RequestBuilder rb = rp.MakeBuilder(1, 0);
773
774 // TODO(Subv): Launch the requested applet application.
775
769 auto applet = HLE::Applets::Applet::Get(applet_id); 776 auto applet = HLE::Applets::Applet::Get(applet_id);
770 if (applet) { 777 if (applet) {
771 LOG_WARNING(Service_APT, "applet has already been started id=%08X", applet_id); 778 LOG_WARNING(Service_APT, "applet has already been started id=%08X", applet_id);
@@ -773,7 +780,6 @@ void PrepareToStartLibraryApplet(Service::Interface* self) {
773 } else { 780 } else {
774 rb.Push(HLE::Applets::Applet::Create(applet_id)); 781 rb.Push(HLE::Applets::Applet::Create(applet_id));
775 } 782 }
776 LOG_DEBUG(Service_APT, "called applet_id=%08X", applet_id);
777} 783}
778 784
779void PrepareToStartNewestHomeMenu(Service::Interface* self) { 785void PrepareToStartNewestHomeMenu(Service::Interface* self) {
@@ -794,7 +800,12 @@ void PreloadLibraryApplet(Service::Interface* self) {
794 IPC::RequestParser rp(Kernel::GetCommandBuffer(), 0x16, 1, 0); // 0x160040 800 IPC::RequestParser rp(Kernel::GetCommandBuffer(), 0x16, 1, 0); // 0x160040
795 AppletId applet_id = static_cast<AppletId>(rp.Pop<u32>()); 801 AppletId applet_id = static_cast<AppletId>(rp.Pop<u32>());
796 802
803 LOG_DEBUG(Service_APT, "called applet_id=%08X", applet_id);
804
797 IPC::RequestBuilder rb = rp.MakeBuilder(1, 0); 805 IPC::RequestBuilder rb = rp.MakeBuilder(1, 0);
806
807 // TODO(Subv): Launch the requested applet application.
808
798 auto applet = HLE::Applets::Applet::Get(applet_id); 809 auto applet = HLE::Applets::Applet::Get(applet_id);
799 if (applet) { 810 if (applet) {
800 LOG_WARNING(Service_APT, "applet has already been started id=%08X", applet_id); 811 LOG_WARNING(Service_APT, "applet has already been started id=%08X", applet_id);
@@ -802,34 +813,40 @@ void PreloadLibraryApplet(Service::Interface* self) {
802 } else { 813 } else {
803 rb.Push(HLE::Applets::Applet::Create(applet_id)); 814 rb.Push(HLE::Applets::Applet::Create(applet_id));
804 } 815 }
805 LOG_DEBUG(Service_APT, "called applet_id=%08X", applet_id);
806} 816}
807 817
808void StartLibraryApplet(Service::Interface* self) { 818void StartLibraryApplet(Service::Interface* self) {
809 IPC::RequestParser rp(Kernel::GetCommandBuffer(), 0x1E, 2, 4); // 0x1E0084 819 IPC::RequestParser rp(Kernel::GetCommandBuffer(), 0x1E, 2, 4); // 0x1E0084
810 AppletId applet_id = static_cast<AppletId>(rp.Pop<u32>()); 820 AppletId applet_id = static_cast<AppletId>(rp.Pop<u32>());
811 std::shared_ptr<HLE::Applets::Applet> applet = HLE::Applets::Applet::Get(applet_id);
812
813 LOG_DEBUG(Service_APT, "called applet_id=%08X", applet_id);
814
815 if (applet == nullptr) {
816 LOG_ERROR(Service_APT, "unknown applet id=%08X", applet_id);
817 IPC::RequestBuilder rb = rp.MakeBuilder(1, 0, false);
818 rb.Push<u32>(-1); // TODO(Subv): Find the right error code
819 return;
820 }
821 821
822 size_t buffer_size = rp.Pop<u32>(); 822 size_t buffer_size = rp.Pop<u32>();
823 Kernel::Handle handle = rp.PopHandle(); 823 Kernel::Handle handle = rp.PopHandle();
824 VAddr buffer_addr = rp.PopStaticBuffer(); 824 VAddr buffer_addr = rp.PopStaticBuffer();
825 825
826 AppletStartupParameter parameter; 826 LOG_DEBUG(Service_APT, "called applet_id=%08X", applet_id);
827 parameter.object = Kernel::g_handle_table.GetGeneric(handle);
828 parameter.buffer.resize(buffer_size);
829 Memory::ReadBlock(buffer_addr, parameter.buffer.data(), parameter.buffer.size());
830 827
831 IPC::RequestBuilder rb = rp.MakeBuilder(1, 0); 828 IPC::RequestBuilder rb = rp.MakeBuilder(1, 0);
832 rb.Push(applet->Start(parameter)); 829
830 // Send the Wakeup signal to the applet
831 MessageParameter param;
832 param.destination_id = static_cast<u32>(applet_id);
833 param.sender_id = static_cast<u32>(AppletId::Application);
834 param.object = Kernel::g_handle_table.GetGeneric(handle);
835 param.signal = static_cast<u32>(SignalType::Wakeup);
836 param.buffer.resize(buffer_size);
837 Memory::ReadBlock(buffer_addr, param.buffer.data(), param.buffer.size());
838 SendParameter(param);
839
840 // In case the applet is being HLEd, attempt to communicate with it.
841 if (auto applet = HLE::Applets::Applet::Get(applet_id)) {
842 AppletStartupParameter parameter;
843 parameter.object = Kernel::g_handle_table.GetGeneric(handle);
844 parameter.buffer.resize(buffer_size);
845 Memory::ReadBlock(buffer_addr, parameter.buffer.data(), parameter.buffer.size());
846 rb.Push(applet->Start(parameter));
847 } else {
848 rb.Push(RESULT_SUCCESS);
849 }
833} 850}
834 851
835void CancelLibraryApplet(Service::Interface* self) { 852void CancelLibraryApplet(Service::Interface* self) {
diff --git a/src/core/hle/service/apt/apt_s.cpp b/src/core/hle/service/apt/apt_s.cpp
index fe1d21fff..bb78ee7d7 100644
--- a/src/core/hle/service/apt/apt_s.cpp
+++ b/src/core/hle/service/apt/apt_s.cpp
@@ -20,7 +20,7 @@ const Interface::FunctionInfo FunctionTable[] = {
20 {0x00090040, IsRegistered, "IsRegistered"}, 20 {0x00090040, IsRegistered, "IsRegistered"},
21 {0x000A0040, nullptr, "GetAttribute"}, 21 {0x000A0040, nullptr, "GetAttribute"},
22 {0x000B0040, InquireNotification, "InquireNotification"}, 22 {0x000B0040, InquireNotification, "InquireNotification"},
23 {0x000C0104, nullptr, "SendParameter"}, 23 {0x000C0104, SendParameter, "SendParameter"},
24 {0x000D0080, ReceiveParameter, "ReceiveParameter"}, 24 {0x000D0080, ReceiveParameter, "ReceiveParameter"},
25 {0x000E0080, GlanceParameter, "GlanceParameter"}, 25 {0x000E0080, GlanceParameter, "GlanceParameter"},
26 {0x000F0100, nullptr, "CancelParameter"}, 26 {0x000F0100, nullptr, "CancelParameter"},
@@ -38,7 +38,7 @@ const Interface::FunctionInfo FunctionTable[] = {
38 {0x001B00C4, nullptr, "StartApplication"}, 38 {0x001B00C4, nullptr, "StartApplication"},
39 {0x001C0000, nullptr, "WakeupApplication"}, 39 {0x001C0000, nullptr, "WakeupApplication"},
40 {0x001D0000, nullptr, "CancelApplication"}, 40 {0x001D0000, nullptr, "CancelApplication"},
41 {0x001E0084, nullptr, "StartLibraryApplet"}, 41 {0x001E0084, StartLibraryApplet, "StartLibraryApplet"},
42 {0x001F0084, nullptr, "StartSystemApplet"}, 42 {0x001F0084, nullptr, "StartSystemApplet"},
43 {0x00200044, nullptr, "StartNewestHomeMenu"}, 43 {0x00200044, nullptr, "StartNewestHomeMenu"},
44 {0x00210000, nullptr, "OrderToCloseApplication"}, 44 {0x00210000, nullptr, "OrderToCloseApplication"},
diff --git a/src/core/hle/service/nim/nim.cpp b/src/core/hle/service/nim/nim.cpp
index d5624fe54..b10d5852b 100644
--- a/src/core/hle/service/nim/nim.cpp
+++ b/src/core/hle/service/nim/nim.cpp
@@ -5,6 +5,8 @@
5#include "common/common_types.h" 5#include "common/common_types.h"
6#include "common/logging/log.h" 6#include "common/logging/log.h"
7#include "core/hle/ipc.h" 7#include "core/hle/ipc.h"
8#include "core/hle/ipc_helpers.h"
9#include "core/hle/kernel/event.h"
8#include "core/hle/service/nim/nim.h" 10#include "core/hle/service/nim/nim.h"
9#include "core/hle/service/nim/nim_aoc.h" 11#include "core/hle/service/nim/nim_aoc.h"
10#include "core/hle/service/nim/nim_s.h" 12#include "core/hle/service/nim/nim_s.h"
@@ -14,6 +16,16 @@
14namespace Service { 16namespace Service {
15namespace NIM { 17namespace NIM {
16 18
19static Kernel::SharedPtr<Kernel::Event> nim_system_update_event;
20
21void CheckForSysUpdateEvent(Service::Interface* self) {
22 IPC::RequestParser rp(Kernel::GetCommandBuffer(), 0x5, 0, 0); // 0x50000
23 IPC::RequestBuilder rb = rp.MakeBuilder(1, 2);
24 rb.Push(RESULT_SUCCESS);
25 rb.PushCopyHandles(Kernel::g_handle_table.Create(nim_system_update_event).Unwrap());
26 LOG_TRACE(Service_NIM, "called");
27}
28
17void CheckSysUpdateAvailable(Service::Interface* self) { 29void CheckSysUpdateAvailable(Service::Interface* self) {
18 u32* cmd_buff = Kernel::GetCommandBuffer(); 30 u32* cmd_buff = Kernel::GetCommandBuffer();
19 31
@@ -29,9 +41,13 @@ void Init() {
29 AddService(new NIM_AOC_Interface); 41 AddService(new NIM_AOC_Interface);
30 AddService(new NIM_S_Interface); 42 AddService(new NIM_S_Interface);
31 AddService(new NIM_U_Interface); 43 AddService(new NIM_U_Interface);
44
45 nim_system_update_event = Kernel::Event::Create(ResetType::OneShot, "NIM System Update Event");
32} 46}
33 47
34void Shutdown() {} 48void Shutdown() {
49 nim_system_update_event = nullptr;
50}
35 51
36} // namespace NIM 52} // namespace NIM
37 53
diff --git a/src/core/hle/service/nim/nim.h b/src/core/hle/service/nim/nim.h
index c3106f18b..dbf605e5a 100644
--- a/src/core/hle/service/nim/nim.h
+++ b/src/core/hle/service/nim/nim.h
@@ -11,6 +11,17 @@ class Interface;
11namespace NIM { 11namespace NIM {
12 12
13/** 13/**
14 * NIM::CheckForSysUpdateEvent service function
15 * Inputs:
16 * 1 : None
17 * Outputs:
18 * 1 : Result of function, 0 on success, otherwise error code
19 * 2 : Copy handle descriptor
20 * 3 : System Update event handle
21 */
22void CheckForSysUpdateEvent(Service::Interface* self);
23
24/**
14 * NIM::CheckSysUpdateAvailable service function 25 * NIM::CheckSysUpdateAvailable service function
15 * Inputs: 26 * Inputs:
16 * 1 : None 27 * 1 : None
diff --git a/src/core/hle/service/nim/nim_u.cpp b/src/core/hle/service/nim/nim_u.cpp
index 7664bad60..569660278 100644
--- a/src/core/hle/service/nim/nim_u.cpp
+++ b/src/core/hle/service/nim/nim_u.cpp
@@ -12,7 +12,7 @@ const Interface::FunctionInfo FunctionTable[] = {
12 {0x00010000, nullptr, "StartSysUpdate"}, 12 {0x00010000, nullptr, "StartSysUpdate"},
13 {0x00020000, nullptr, "GetUpdateDownloadProgress"}, 13 {0x00020000, nullptr, "GetUpdateDownloadProgress"},
14 {0x00040000, nullptr, "FinishTitlesInstall"}, 14 {0x00040000, nullptr, "FinishTitlesInstall"},
15 {0x00050000, nullptr, "CheckForSysUpdateEvent"}, 15 {0x00050000, CheckForSysUpdateEvent, "CheckForSysUpdateEvent"},
16 {0x00090000, CheckSysUpdateAvailable, "CheckSysUpdateAvailable"}, 16 {0x00090000, CheckSysUpdateAvailable, "CheckSysUpdateAvailable"},
17 {0x000A0000, nullptr, "GetState"}, 17 {0x000A0000, nullptr, "GetState"},
18 {0x000B0000, nullptr, "GetSystemTitleHash"}, 18 {0x000B0000, nullptr, "GetSystemTitleHash"},
diff --git a/src/core/memory.cpp b/src/core/memory.cpp
index 847e69710..7f58be6de 100644
--- a/src/core/memory.cpp
+++ b/src/core/memory.cpp
@@ -82,10 +82,10 @@ void UnmapRegion(PageTable& page_table, VAddr base, u32 size) {
82 * Gets a pointer to the exact memory at the virtual address (i.e. not page aligned) 82 * Gets a pointer to the exact memory at the virtual address (i.e. not page aligned)
83 * using a VMA from the current process 83 * using a VMA from the current process
84 */ 84 */
85static u8* GetPointerFromVMA(VAddr vaddr) { 85static u8* GetPointerFromVMA(const Kernel::Process& process, VAddr vaddr) {
86 u8* direct_pointer = nullptr; 86 u8* direct_pointer = nullptr;
87 87
88 auto& vm_manager = Kernel::g_current_process->vm_manager; 88 auto& vm_manager = process.vm_manager;
89 89
90 auto it = vm_manager.FindVMA(vaddr); 90 auto it = vm_manager.FindVMA(vaddr);
91 ASSERT(it != vm_manager.vma_map.end()); 91 ASSERT(it != vm_manager.vma_map.end());
@@ -108,6 +108,14 @@ static u8* GetPointerFromVMA(VAddr vaddr) {
108} 108}
109 109
110/** 110/**
111 * Gets a pointer to the exact memory at the virtual address (i.e. not page aligned)
112 * using a VMA from the current process.
113 */
114static u8* GetPointerFromVMA(VAddr vaddr) {
115 return GetPointerFromVMA(*Kernel::g_current_process, vaddr);
116}
117
118/**
111 * This function should only be called for virtual addreses with attribute `PageType::Special`. 119 * This function should only be called for virtual addreses with attribute `PageType::Special`.
112 */ 120 */
113static MMIORegionPointer GetMMIOHandler(const PageTable& page_table, VAddr vaddr) { 121static MMIORegionPointer GetMMIOHandler(const PageTable& page_table, VAddr vaddr) {
@@ -470,7 +478,10 @@ u64 Read64(const VAddr addr) {
470 return Read<u64_le>(addr); 478 return Read<u64_le>(addr);
471} 479}
472 480
473void ReadBlock(const VAddr src_addr, void* dest_buffer, const size_t size) { 481void ReadBlock(const Kernel::Process& process, const VAddr src_addr, void* dest_buffer,
482 const size_t size) {
483 auto& page_table = process.vm_manager.page_table;
484
474 size_t remaining_size = size; 485 size_t remaining_size = size;
475 size_t page_index = src_addr >> PAGE_BITS; 486 size_t page_index = src_addr >> PAGE_BITS;
476 size_t page_offset = src_addr & PAGE_MASK; 487 size_t page_offset = src_addr & PAGE_MASK;
@@ -479,7 +490,7 @@ void ReadBlock(const VAddr src_addr, void* dest_buffer, const size_t size) {
479 const size_t copy_amount = std::min(PAGE_SIZE - page_offset, remaining_size); 490 const size_t copy_amount = std::min(PAGE_SIZE - page_offset, remaining_size);
480 const VAddr current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset); 491 const VAddr current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset);
481 492
482 switch (current_page_table->attributes[page_index]) { 493 switch (page_table.attributes[page_index]) {
483 case PageType::Unmapped: { 494 case PageType::Unmapped: {
484 LOG_ERROR(HW_Memory, "unmapped ReadBlock @ 0x%08X (start address = 0x%08X, size = %zu)", 495 LOG_ERROR(HW_Memory, "unmapped ReadBlock @ 0x%08X (start address = 0x%08X, size = %zu)",
485 current_vaddr, src_addr, size); 496 current_vaddr, src_addr, size);
@@ -487,29 +498,30 @@ void ReadBlock(const VAddr src_addr, void* dest_buffer, const size_t size) {
487 break; 498 break;
488 } 499 }
489 case PageType::Memory: { 500 case PageType::Memory: {
490 DEBUG_ASSERT(current_page_table->pointers[page_index]); 501 DEBUG_ASSERT(page_table.pointers[page_index]);
491 502
492 const u8* src_ptr = current_page_table->pointers[page_index] + page_offset; 503 const u8* src_ptr = page_table.pointers[page_index] + page_offset;
493 std::memcpy(dest_buffer, src_ptr, copy_amount); 504 std::memcpy(dest_buffer, src_ptr, copy_amount);
494 break; 505 break;
495 } 506 }
496 case PageType::Special: { 507 case PageType::Special: {
497 DEBUG_ASSERT(GetMMIOHandler(current_vaddr)); 508 MMIORegionPointer handler = GetMMIOHandler(page_table, current_vaddr);
498 509 DEBUG_ASSERT(handler);
499 GetMMIOHandler(current_vaddr)->ReadBlock(current_vaddr, dest_buffer, copy_amount); 510 handler->ReadBlock(current_vaddr, dest_buffer, copy_amount);
500 break; 511 break;
501 } 512 }
502 case PageType::RasterizerCachedMemory: { 513 case PageType::RasterizerCachedMemory: {
503 RasterizerFlushVirtualRegion(current_vaddr, static_cast<u32>(copy_amount), 514 RasterizerFlushVirtualRegion(current_vaddr, static_cast<u32>(copy_amount),
504 FlushMode::Flush); 515 FlushMode::Flush);
505 std::memcpy(dest_buffer, GetPointerFromVMA(current_vaddr), copy_amount); 516 std::memcpy(dest_buffer, GetPointerFromVMA(process, current_vaddr), copy_amount);
506 break; 517 break;
507 } 518 }
508 case PageType::RasterizerCachedSpecial: { 519 case PageType::RasterizerCachedSpecial: {
509 DEBUG_ASSERT(GetMMIOHandler(current_vaddr)); 520 MMIORegionPointer handler = GetMMIOHandler(page_table, current_vaddr);
521 DEBUG_ASSERT(handler);
510 RasterizerFlushVirtualRegion(current_vaddr, static_cast<u32>(copy_amount), 522 RasterizerFlushVirtualRegion(current_vaddr, static_cast<u32>(copy_amount),
511 FlushMode::Flush); 523 FlushMode::Flush);
512 GetMMIOHandler(current_vaddr)->ReadBlock(current_vaddr, dest_buffer, copy_amount); 524 handler->ReadBlock(current_vaddr, dest_buffer, copy_amount);
513 break; 525 break;
514 } 526 }
515 default: 527 default:
@@ -523,6 +535,10 @@ void ReadBlock(const VAddr src_addr, void* dest_buffer, const size_t size) {
523 } 535 }
524} 536}
525 537
538void ReadBlock(const VAddr src_addr, void* dest_buffer, const size_t size) {
539 ReadBlock(*Kernel::g_current_process, src_addr, dest_buffer, size);
540}
541
526void Write8(const VAddr addr, const u8 data) { 542void Write8(const VAddr addr, const u8 data) {
527 Write<u8>(addr, data); 543 Write<u8>(addr, data);
528} 544}
@@ -539,7 +555,9 @@ void Write64(const VAddr addr, const u64 data) {
539 Write<u64_le>(addr, data); 555 Write<u64_le>(addr, data);
540} 556}
541 557
542void WriteBlock(const VAddr dest_addr, const void* src_buffer, const size_t size) { 558void WriteBlock(const Kernel::Process& process, const VAddr dest_addr, const void* src_buffer,
559 const size_t size) {
560 auto& page_table = process.vm_manager.page_table;
543 size_t remaining_size = size; 561 size_t remaining_size = size;
544 size_t page_index = dest_addr >> PAGE_BITS; 562 size_t page_index = dest_addr >> PAGE_BITS;
545 size_t page_offset = dest_addr & PAGE_MASK; 563 size_t page_offset = dest_addr & PAGE_MASK;
@@ -548,7 +566,7 @@ void WriteBlock(const VAddr dest_addr, const void* src_buffer, const size_t size
548 const size_t copy_amount = std::min(PAGE_SIZE - page_offset, remaining_size); 566 const size_t copy_amount = std::min(PAGE_SIZE - page_offset, remaining_size);
549 const VAddr current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset); 567 const VAddr current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset);
550 568
551 switch (current_page_table->attributes[page_index]) { 569 switch (page_table.attributes[page_index]) {
552 case PageType::Unmapped: { 570 case PageType::Unmapped: {
553 LOG_ERROR(HW_Memory, 571 LOG_ERROR(HW_Memory,
554 "unmapped WriteBlock @ 0x%08X (start address = 0x%08X, size = %zu)", 572 "unmapped WriteBlock @ 0x%08X (start address = 0x%08X, size = %zu)",
@@ -556,29 +574,30 @@ void WriteBlock(const VAddr dest_addr, const void* src_buffer, const size_t size
556 break; 574 break;
557 } 575 }
558 case PageType::Memory: { 576 case PageType::Memory: {
559 DEBUG_ASSERT(current_page_table->pointers[page_index]); 577 DEBUG_ASSERT(page_table.pointers[page_index]);
560 578
561 u8* dest_ptr = current_page_table->pointers[page_index] + page_offset; 579 u8* dest_ptr = page_table.pointers[page_index] + page_offset;
562 std::memcpy(dest_ptr, src_buffer, copy_amount); 580 std::memcpy(dest_ptr, src_buffer, copy_amount);
563 break; 581 break;
564 } 582 }
565 case PageType::Special: { 583 case PageType::Special: {
566 DEBUG_ASSERT(GetMMIOHandler(current_vaddr)); 584 MMIORegionPointer handler = GetMMIOHandler(page_table, current_vaddr);
567 585 DEBUG_ASSERT(handler);
568 GetMMIOHandler(current_vaddr)->WriteBlock(current_vaddr, src_buffer, copy_amount); 586 handler->WriteBlock(current_vaddr, src_buffer, copy_amount);
569 break; 587 break;
570 } 588 }
571 case PageType::RasterizerCachedMemory: { 589 case PageType::RasterizerCachedMemory: {
572 RasterizerFlushVirtualRegion(current_vaddr, static_cast<u32>(copy_amount), 590 RasterizerFlushVirtualRegion(current_vaddr, static_cast<u32>(copy_amount),
573 FlushMode::FlushAndInvalidate); 591 FlushMode::FlushAndInvalidate);
574 std::memcpy(GetPointerFromVMA(current_vaddr), src_buffer, copy_amount); 592 std::memcpy(GetPointerFromVMA(process, current_vaddr), src_buffer, copy_amount);
575 break; 593 break;
576 } 594 }
577 case PageType::RasterizerCachedSpecial: { 595 case PageType::RasterizerCachedSpecial: {
578 DEBUG_ASSERT(GetMMIOHandler(current_vaddr)); 596 MMIORegionPointer handler = GetMMIOHandler(page_table, current_vaddr);
597 DEBUG_ASSERT(handler);
579 RasterizerFlushVirtualRegion(current_vaddr, static_cast<u32>(copy_amount), 598 RasterizerFlushVirtualRegion(current_vaddr, static_cast<u32>(copy_amount),
580 FlushMode::FlushAndInvalidate); 599 FlushMode::FlushAndInvalidate);
581 GetMMIOHandler(current_vaddr)->WriteBlock(current_vaddr, src_buffer, copy_amount); 600 handler->WriteBlock(current_vaddr, src_buffer, copy_amount);
582 break; 601 break;
583 } 602 }
584 default: 603 default:
@@ -592,6 +611,10 @@ void WriteBlock(const VAddr dest_addr, const void* src_buffer, const size_t size
592 } 611 }
593} 612}
594 613
614void WriteBlock(const VAddr dest_addr, const void* src_buffer, const size_t size) {
615 WriteBlock(*Kernel::g_current_process, dest_addr, src_buffer, size);
616}
617
595void ZeroBlock(const VAddr dest_addr, const size_t size) { 618void ZeroBlock(const VAddr dest_addr, const size_t size) {
596 size_t remaining_size = size; 619 size_t remaining_size = size;
597 size_t page_index = dest_addr >> PAGE_BITS; 620 size_t page_index = dest_addr >> PAGE_BITS;
diff --git a/src/core/memory.h b/src/core/memory.h
index 347c08c78..dd599f73e 100644
--- a/src/core/memory.h
+++ b/src/core/memory.h
@@ -205,7 +205,11 @@ void Write16(VAddr addr, u16 data);
205void Write32(VAddr addr, u32 data); 205void Write32(VAddr addr, u32 data);
206void Write64(VAddr addr, u64 data); 206void Write64(VAddr addr, u64 data);
207 207
208void ReadBlock(const Kernel::Process& process, const VAddr src_addr, void* dest_buffer,
209 size_t size);
208void ReadBlock(const VAddr src_addr, void* dest_buffer, size_t size); 210void ReadBlock(const VAddr src_addr, void* dest_buffer, size_t size);
211void WriteBlock(const Kernel::Process& process, const VAddr dest_addr, const void* src_buffer,
212 size_t size);
209void WriteBlock(const VAddr dest_addr, const void* src_buffer, size_t size); 213void WriteBlock(const VAddr dest_addr, const void* src_buffer, size_t size);
210void ZeroBlock(const VAddr dest_addr, const size_t size); 214void ZeroBlock(const VAddr dest_addr, const size_t size);
211void CopyBlock(VAddr dest_addr, VAddr src_addr, size_t size); 215void CopyBlock(VAddr dest_addr, VAddr src_addr, size_t size);
diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp
index 3ab4af374..caf9f7a06 100644
--- a/src/video_core/command_processor.cpp
+++ b/src/video_core/command_processor.cpp
@@ -119,6 +119,224 @@ static void WriteUniformFloatReg(ShaderRegs& config, Shader::ShaderSetup& setup,
119 } 119 }
120} 120}
121 121
122static void LoadDefaultVertexAttributes(u32 register_value) {
123 auto& regs = g_state.regs;
124
125 // TODO: Does actual hardware indeed keep an intermediate buffer or does
126 // it directly write the values?
127 default_attr_write_buffer[default_attr_counter++] = register_value;
128
129 // Default attributes are written in a packed format such that four float24 values are encoded
130 // in three 32-bit numbers.
131 // We write to internal memory once a full such vector is written.
132 if (default_attr_counter >= 3) {
133 default_attr_counter = 0;
134
135 auto& setup = regs.pipeline.vs_default_attributes_setup;
136
137 if (setup.index >= 16) {
138 LOG_ERROR(HW_GPU, "Invalid VS default attribute index %d", (int)setup.index);
139 return;
140 }
141
142 Math::Vec4<float24> attribute;
143
144 // NOTE: The destination component order indeed is "backwards"
145 attribute.w = float24::FromRaw(default_attr_write_buffer[0] >> 8);
146 attribute.z = float24::FromRaw(((default_attr_write_buffer[0] & 0xFF) << 16) |
147 ((default_attr_write_buffer[1] >> 16) & 0xFFFF));
148 attribute.y = float24::FromRaw(((default_attr_write_buffer[1] & 0xFFFF) << 8) |
149 ((default_attr_write_buffer[2] >> 24) & 0xFF));
150 attribute.x = float24::FromRaw(default_attr_write_buffer[2] & 0xFFFFFF);
151
152 LOG_TRACE(HW_GPU, "Set default VS attribute %x to (%f %f %f %f)", (int)setup.index,
153 attribute.x.ToFloat32(), attribute.y.ToFloat32(), attribute.z.ToFloat32(),
154 attribute.w.ToFloat32());
155
156 // TODO: Verify that this actually modifies the register!
157 if (setup.index < 15) {
158 g_state.input_default_attributes.attr[setup.index] = attribute;
159 setup.index++;
160 } else {
161 // Put each attribute into an immediate input buffer. When all specified immediate
162 // attributes are present, the Vertex Shader is invoked and everything is sent to
163 // the primitive assembler.
164
165 auto& immediate_input = g_state.immediate.input_vertex;
166 auto& immediate_attribute_id = g_state.immediate.current_attribute;
167
168 immediate_input.attr[immediate_attribute_id] = attribute;
169
170 if (immediate_attribute_id < regs.pipeline.max_input_attrib_index) {
171 immediate_attribute_id += 1;
172 } else {
173 MICROPROFILE_SCOPE(GPU_Drawing);
174 immediate_attribute_id = 0;
175
176 auto* shader_engine = Shader::GetEngine();
177 shader_engine->SetupBatch(g_state.vs, regs.vs.main_offset);
178
179 // Send to vertex shader
180 if (g_debug_context)
181 g_debug_context->OnEvent(DebugContext::Event::VertexShaderInvocation,
182 static_cast<void*>(&immediate_input));
183 Shader::UnitState shader_unit;
184 Shader::AttributeBuffer output{};
185
186 shader_unit.LoadInput(regs.vs, immediate_input);
187 shader_engine->Run(g_state.vs, shader_unit);
188 shader_unit.WriteOutput(regs.vs, output);
189
190 // Send to geometry pipeline
191 if (g_state.immediate.reset_geometry_pipeline) {
192 g_state.geometry_pipeline.Reconfigure();
193 g_state.immediate.reset_geometry_pipeline = false;
194 }
195 ASSERT(!g_state.geometry_pipeline.NeedIndexInput());
196 g_state.geometry_pipeline.Setup(shader_engine);
197 g_state.geometry_pipeline.SubmitVertex(output);
198
199 // TODO: If drawing after every immediate mode triangle kills performance,
200 // change it to flush triangles whenever a drawing config register changes
201 // See: https://github.com/citra-emu/citra/pull/2866#issuecomment-327011550
202 VideoCore::g_renderer->Rasterizer()->DrawTriangles();
203 if (g_debug_context) {
204 g_debug_context->OnEvent(DebugContext::Event::FinishedPrimitiveBatch, nullptr);
205 }
206 }
207 }
208 }
209}
210
211static void Draw(u32 command_id) {
212 MICROPROFILE_SCOPE(GPU_Drawing);
213 auto& regs = g_state.regs;
214
215#if PICA_LOG_TEV
216 DebugUtils::DumpTevStageConfig(regs.GetTevStages());
217#endif
218 if (g_debug_context)
219 g_debug_context->OnEvent(DebugContext::Event::IncomingPrimitiveBatch, nullptr);
220
221 // Processes information about internal vertex attributes to figure out how a vertex is
222 // loaded.
223 // Later, these can be compiled and cached.
224 const u32 base_address = regs.pipeline.vertex_attributes.GetPhysicalBaseAddress();
225 VertexLoader loader(regs.pipeline);
226
227 // Load vertices
228 bool is_indexed = (command_id == PICA_REG_INDEX(pipeline.trigger_draw_indexed));
229
230 const auto& index_info = regs.pipeline.index_array;
231 const u8* index_address_8 = Memory::GetPhysicalPointer(base_address + index_info.offset);
232 const u16* index_address_16 = reinterpret_cast<const u16*>(index_address_8);
233 bool index_u16 = index_info.format != 0;
234
235 PrimitiveAssembler<Shader::OutputVertex>& primitive_assembler = g_state.primitive_assembler;
236
237 if (g_debug_context && g_debug_context->recorder) {
238 for (int i = 0; i < 3; ++i) {
239 const auto texture = regs.texturing.GetTextures()[i];
240 if (!texture.enabled)
241 continue;
242
243 u8* texture_data = Memory::GetPhysicalPointer(texture.config.GetPhysicalAddress());
244 g_debug_context->recorder->MemoryAccessed(
245 texture_data, Pica::TexturingRegs::NibblesPerPixel(texture.format) *
246 texture.config.width / 2 * texture.config.height,
247 texture.config.GetPhysicalAddress());
248 }
249 }
250
251 DebugUtils::MemoryAccessTracker memory_accesses;
252
253 // Simple circular-replacement vertex cache
254 // The size has been tuned for optimal balance between hit-rate and the cost of lookup
255 const size_t VERTEX_CACHE_SIZE = 32;
256 std::array<u16, VERTEX_CACHE_SIZE> vertex_cache_ids;
257 std::array<Shader::AttributeBuffer, VERTEX_CACHE_SIZE> vertex_cache;
258 Shader::AttributeBuffer vs_output;
259
260 unsigned int vertex_cache_pos = 0;
261 vertex_cache_ids.fill(-1);
262
263 auto* shader_engine = Shader::GetEngine();
264 Shader::UnitState shader_unit;
265
266 shader_engine->SetupBatch(g_state.vs, regs.vs.main_offset);
267
268 g_state.geometry_pipeline.Reconfigure();
269 g_state.geometry_pipeline.Setup(shader_engine);
270 if (g_state.geometry_pipeline.NeedIndexInput())
271 ASSERT(is_indexed);
272
273 for (unsigned int index = 0; index < regs.pipeline.num_vertices; ++index) {
274 // Indexed rendering doesn't use the start offset
275 unsigned int vertex = is_indexed
276 ? (index_u16 ? index_address_16[index] : index_address_8[index])
277 : (index + regs.pipeline.vertex_offset);
278
279 // -1 is a common special value used for primitive restart. Since it's unknown if
280 // the PICA supports it, and it would mess up the caching, guard against it here.
281 ASSERT(vertex != -1);
282
283 bool vertex_cache_hit = false;
284
285 if (is_indexed) {
286 if (g_state.geometry_pipeline.NeedIndexInput()) {
287 g_state.geometry_pipeline.SubmitIndex(vertex);
288 continue;
289 }
290
291 if (g_debug_context && Pica::g_debug_context->recorder) {
292 int size = index_u16 ? 2 : 1;
293 memory_accesses.AddAccess(base_address + index_info.offset + size * index, size);
294 }
295
296 for (unsigned int i = 0; i < VERTEX_CACHE_SIZE; ++i) {
297 if (vertex == vertex_cache_ids[i]) {
298 vs_output = vertex_cache[i];
299 vertex_cache_hit = true;
300 break;
301 }
302 }
303 }
304
305 if (!vertex_cache_hit) {
306 // Initialize data for the current vertex
307 Shader::AttributeBuffer input;
308 loader.LoadVertex(base_address, index, vertex, input, memory_accesses);
309
310 // Send to vertex shader
311 if (g_debug_context)
312 g_debug_context->OnEvent(DebugContext::Event::VertexShaderInvocation,
313 (void*)&input);
314 shader_unit.LoadInput(regs.vs, input);
315 shader_engine->Run(g_state.vs, shader_unit);
316 shader_unit.WriteOutput(regs.vs, vs_output);
317
318 if (is_indexed) {
319 vertex_cache[vertex_cache_pos] = vs_output;
320 vertex_cache_ids[vertex_cache_pos] = vertex;
321 vertex_cache_pos = (vertex_cache_pos + 1) % VERTEX_CACHE_SIZE;
322 }
323 }
324
325 // Send to geometry pipeline
326 g_state.geometry_pipeline.SubmitVertex(vs_output);
327 }
328
329 for (auto& range : memory_accesses.ranges) {
330 g_debug_context->recorder->MemoryAccessed(Memory::GetPhysicalPointer(range.first),
331 range.second, range.first);
332 }
333
334 VideoCore::g_renderer->Rasterizer()->DrawTriangles();
335 if (g_debug_context) {
336 g_debug_context->OnEvent(DebugContext::Event::FinishedPrimitiveBatch, nullptr);
337 }
338}
339
122static void WritePicaReg(u32 id, u32 value, u32 mask) { 340static void WritePicaReg(u32 id, u32 value, u32 mask) {
123 auto& regs = g_state.regs; 341 auto& regs = g_state.regs;
124 342
@@ -168,95 +386,9 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
168 // Load default vertex input attributes 386 // Load default vertex input attributes
169 case PICA_REG_INDEX_WORKAROUND(pipeline.vs_default_attributes_setup.set_value[0], 0x233): 387 case PICA_REG_INDEX_WORKAROUND(pipeline.vs_default_attributes_setup.set_value[0], 0x233):
170 case PICA_REG_INDEX_WORKAROUND(pipeline.vs_default_attributes_setup.set_value[1], 0x234): 388 case PICA_REG_INDEX_WORKAROUND(pipeline.vs_default_attributes_setup.set_value[1], 0x234):
171 case PICA_REG_INDEX_WORKAROUND(pipeline.vs_default_attributes_setup.set_value[2], 0x235): { 389 case PICA_REG_INDEX_WORKAROUND(pipeline.vs_default_attributes_setup.set_value[2], 0x235):
172 // TODO: Does actual hardware indeed keep an intermediate buffer or does 390 LoadDefaultVertexAttributes(value);
173 // it directly write the values?
174 default_attr_write_buffer[default_attr_counter++] = value;
175
176 // Default attributes are written in a packed format such that four float24 values are
177 // encoded in
178 // three 32-bit numbers. We write to internal memory once a full such vector is
179 // written.
180 if (default_attr_counter >= 3) {
181 default_attr_counter = 0;
182
183 auto& setup = regs.pipeline.vs_default_attributes_setup;
184
185 if (setup.index >= 16) {
186 LOG_ERROR(HW_GPU, "Invalid VS default attribute index %d", (int)setup.index);
187 break;
188 }
189
190 Math::Vec4<float24> attribute;
191
192 // NOTE: The destination component order indeed is "backwards"
193 attribute.w = float24::FromRaw(default_attr_write_buffer[0] >> 8);
194 attribute.z = float24::FromRaw(((default_attr_write_buffer[0] & 0xFF) << 16) |
195 ((default_attr_write_buffer[1] >> 16) & 0xFFFF));
196 attribute.y = float24::FromRaw(((default_attr_write_buffer[1] & 0xFFFF) << 8) |
197 ((default_attr_write_buffer[2] >> 24) & 0xFF));
198 attribute.x = float24::FromRaw(default_attr_write_buffer[2] & 0xFFFFFF);
199
200 LOG_TRACE(HW_GPU, "Set default VS attribute %x to (%f %f %f %f)", (int)setup.index,
201 attribute.x.ToFloat32(), attribute.y.ToFloat32(), attribute.z.ToFloat32(),
202 attribute.w.ToFloat32());
203
204 // TODO: Verify that this actually modifies the register!
205 if (setup.index < 15) {
206 g_state.input_default_attributes.attr[setup.index] = attribute;
207 setup.index++;
208 } else {
209 // Put each attribute into an immediate input buffer. When all specified immediate
210 // attributes are present, the Vertex Shader is invoked and everything is sent to
211 // the primitive assembler.
212
213 auto& immediate_input = g_state.immediate.input_vertex;
214 auto& immediate_attribute_id = g_state.immediate.current_attribute;
215
216 immediate_input.attr[immediate_attribute_id] = attribute;
217
218 if (immediate_attribute_id < regs.pipeline.max_input_attrib_index) {
219 immediate_attribute_id += 1;
220 } else {
221 MICROPROFILE_SCOPE(GPU_Drawing);
222 immediate_attribute_id = 0;
223
224 auto* shader_engine = Shader::GetEngine();
225 shader_engine->SetupBatch(g_state.vs, regs.vs.main_offset);
226
227 // Send to vertex shader
228 if (g_debug_context)
229 g_debug_context->OnEvent(DebugContext::Event::VertexShaderInvocation,
230 static_cast<void*>(&immediate_input));
231 Shader::UnitState shader_unit;
232 Shader::AttributeBuffer output{};
233
234 shader_unit.LoadInput(regs.vs, immediate_input);
235 shader_engine->Run(g_state.vs, shader_unit);
236 shader_unit.WriteOutput(regs.vs, output);
237
238 // Send to geometry pipeline
239 if (g_state.immediate.reset_geometry_pipeline) {
240 g_state.geometry_pipeline.Reconfigure();
241 g_state.immediate.reset_geometry_pipeline = false;
242 }
243 ASSERT(!g_state.geometry_pipeline.NeedIndexInput());
244 g_state.geometry_pipeline.Setup(shader_engine);
245 g_state.geometry_pipeline.SubmitVertex(output);
246
247 // TODO: If drawing after every immediate mode triangle kills performance,
248 // change it to flush triangles whenever a drawing config register changes
249 // See: https://github.com/citra-emu/citra/pull/2866#issuecomment-327011550
250 VideoCore::g_renderer->Rasterizer()->DrawTriangles();
251 if (g_debug_context) {
252 g_debug_context->OnEvent(DebugContext::Event::FinishedPrimitiveBatch,
253 nullptr);
254 }
255 }
256 }
257 }
258 break; 391 break;
259 }
260 392
261 case PICA_REG_INDEX(pipeline.gpu_mode): 393 case PICA_REG_INDEX(pipeline.gpu_mode):
262 // This register likely just enables vertex processing and doesn't need any special handling 394 // This register likely just enables vertex processing and doesn't need any special handling
@@ -275,136 +407,9 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
275 407
276 // It seems like these trigger vertex rendering 408 // It seems like these trigger vertex rendering
277 case PICA_REG_INDEX(pipeline.trigger_draw): 409 case PICA_REG_INDEX(pipeline.trigger_draw):
278 case PICA_REG_INDEX(pipeline.trigger_draw_indexed): { 410 case PICA_REG_INDEX(pipeline.trigger_draw_indexed):
279 MICROPROFILE_SCOPE(GPU_Drawing); 411 Draw(id);
280
281#if PICA_LOG_TEV
282 DebugUtils::DumpTevStageConfig(regs.GetTevStages());
283#endif
284 if (g_debug_context)
285 g_debug_context->OnEvent(DebugContext::Event::IncomingPrimitiveBatch, nullptr);
286
287 // Processes information about internal vertex attributes to figure out how a vertex is
288 // loaded.
289 // Later, these can be compiled and cached.
290 const u32 base_address = regs.pipeline.vertex_attributes.GetPhysicalBaseAddress();
291 VertexLoader loader(regs.pipeline);
292
293 // Load vertices
294 bool is_indexed = (id == PICA_REG_INDEX(pipeline.trigger_draw_indexed));
295
296 const auto& index_info = regs.pipeline.index_array;
297 const u8* index_address_8 = Memory::GetPhysicalPointer(base_address + index_info.offset);
298 const u16* index_address_16 = reinterpret_cast<const u16*>(index_address_8);
299 bool index_u16 = index_info.format != 0;
300
301 PrimitiveAssembler<Shader::OutputVertex>& primitive_assembler = g_state.primitive_assembler;
302
303 if (g_debug_context && g_debug_context->recorder) {
304 for (int i = 0; i < 3; ++i) {
305 const auto texture = regs.texturing.GetTextures()[i];
306 if (!texture.enabled)
307 continue;
308
309 u8* texture_data = Memory::GetPhysicalPointer(texture.config.GetPhysicalAddress());
310 g_debug_context->recorder->MemoryAccessed(
311 texture_data, Pica::TexturingRegs::NibblesPerPixel(texture.format) *
312 texture.config.width / 2 * texture.config.height,
313 texture.config.GetPhysicalAddress());
314 }
315 }
316
317 DebugUtils::MemoryAccessTracker memory_accesses;
318
319 // Simple circular-replacement vertex cache
320 // The size has been tuned for optimal balance between hit-rate and the cost of lookup
321 const size_t VERTEX_CACHE_SIZE = 32;
322 std::array<u16, VERTEX_CACHE_SIZE> vertex_cache_ids;
323 std::array<Shader::AttributeBuffer, VERTEX_CACHE_SIZE> vertex_cache;
324 Shader::AttributeBuffer vs_output;
325
326 unsigned int vertex_cache_pos = 0;
327 vertex_cache_ids.fill(-1);
328
329 auto* shader_engine = Shader::GetEngine();
330 Shader::UnitState shader_unit;
331
332 shader_engine->SetupBatch(g_state.vs, regs.vs.main_offset);
333
334 g_state.geometry_pipeline.Reconfigure();
335 g_state.geometry_pipeline.Setup(shader_engine);
336 if (g_state.geometry_pipeline.NeedIndexInput())
337 ASSERT(is_indexed);
338
339 for (unsigned int index = 0; index < regs.pipeline.num_vertices; ++index) {
340 // Indexed rendering doesn't use the start offset
341 unsigned int vertex =
342 is_indexed ? (index_u16 ? index_address_16[index] : index_address_8[index])
343 : (index + regs.pipeline.vertex_offset);
344
345 // -1 is a common special value used for primitive restart. Since it's unknown if
346 // the PICA supports it, and it would mess up the caching, guard against it here.
347 ASSERT(vertex != -1);
348
349 bool vertex_cache_hit = false;
350
351 if (is_indexed) {
352 if (g_state.geometry_pipeline.NeedIndexInput()) {
353 g_state.geometry_pipeline.SubmitIndex(vertex);
354 continue;
355 }
356
357 if (g_debug_context && Pica::g_debug_context->recorder) {
358 int size = index_u16 ? 2 : 1;
359 memory_accesses.AddAccess(base_address + index_info.offset + size * index,
360 size);
361 }
362
363 for (unsigned int i = 0; i < VERTEX_CACHE_SIZE; ++i) {
364 if (vertex == vertex_cache_ids[i]) {
365 vs_output = vertex_cache[i];
366 vertex_cache_hit = true;
367 break;
368 }
369 }
370 }
371
372 if (!vertex_cache_hit) {
373 // Initialize data for the current vertex
374 Shader::AttributeBuffer input;
375 loader.LoadVertex(base_address, index, vertex, input, memory_accesses);
376
377 // Send to vertex shader
378 if (g_debug_context)
379 g_debug_context->OnEvent(DebugContext::Event::VertexShaderInvocation,
380 (void*)&input);
381 shader_unit.LoadInput(regs.vs, input);
382 shader_engine->Run(g_state.vs, shader_unit);
383 shader_unit.WriteOutput(regs.vs, vs_output);
384
385 if (is_indexed) {
386 vertex_cache[vertex_cache_pos] = vs_output;
387 vertex_cache_ids[vertex_cache_pos] = vertex;
388 vertex_cache_pos = (vertex_cache_pos + 1) % VERTEX_CACHE_SIZE;
389 }
390 }
391
392 // Send to geometry pipeline
393 g_state.geometry_pipeline.SubmitVertex(vs_output);
394 }
395
396 for (auto& range : memory_accesses.ranges) {
397 g_debug_context->recorder->MemoryAccessed(Memory::GetPhysicalPointer(range.first),
398 range.second, range.first);
399 }
400
401 VideoCore::g_renderer->Rasterizer()->DrawTriangles();
402 if (g_debug_context) {
403 g_debug_context->OnEvent(DebugContext::Event::FinishedPrimitiveBatch, nullptr);
404 }
405
406 break; 412 break;
407 }
408 413
409 case PICA_REG_INDEX(gs.bool_uniforms): 414 case PICA_REG_INDEX(gs.bool_uniforms):
410 WriteUniformBoolReg(g_state.gs, g_state.regs.gs.bool_uniforms.Value()); 415 WriteUniformBoolReg(g_state.gs, g_state.regs.gs.bool_uniforms.Value());