summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/citra_qt/debugger/callstack.cpp4
-rw-r--r--src/citra_qt/debugger/graphics_breakpoints.cpp14
-rw-r--r--src/citra_qt/debugger/graphics_cmdlists.cpp56
-rw-r--r--src/citra_qt/debugger/graphics_framebuffer.cpp23
-rw-r--r--src/citra_qt/util/spinbox.cpp2
-rw-r--r--src/common/bit_field.h6
-rw-r--r--src/core/arm/dyncom/arm_dyncom_interpreter.cpp48
-rw-r--r--src/core/arm/interpreter/armemu.cpp102
-rw-r--r--src/core/file_sys/archive_backend.h11
-rw-r--r--src/core/hle/kernel/kernel.h12
-rw-r--r--src/core/hle/kernel/semaphore.cpp8
-rw-r--r--src/core/hle/kernel/semaphore.h2
-rw-r--r--src/core/hle/kernel/thread.cpp3
-rw-r--r--src/core/hle/kernel/thread.h3
-rw-r--r--src/core/loader/3dsx.cpp4
-rw-r--r--src/video_core/command_processor.cpp31
-rw-r--r--src/video_core/debug_utils/debug_utils.cpp255
-rw-r--r--src/video_core/debug_utils/debug_utils.h15
-rw-r--r--src/video_core/pica.h118
-rw-r--r--src/video_core/primitive_assembly.cpp23
-rw-r--r--src/video_core/primitive_assembly.h1
-rw-r--r--src/video_core/rasterizer.cpp164
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp4
-rw-r--r--src/video_core/vertex_shader.cpp338
-rw-r--r--src/video_core/vertex_shader.h223
25 files changed, 928 insertions, 542 deletions
diff --git a/src/citra_qt/debugger/callstack.cpp b/src/citra_qt/debugger/callstack.cpp
index 895851be3..a9ec2f7fe 100644
--- a/src/citra_qt/debugger/callstack.cpp
+++ b/src/citra_qt/debugger/callstack.cpp
@@ -27,10 +27,10 @@ void CallstackWidget::OnCPUStepped()
27 ARM_Interface* app_core = Core::g_app_core; 27 ARM_Interface* app_core = Core::g_app_core;
28 28
29 u32 sp = app_core->GetReg(13); //stack pointer 29 u32 sp = app_core->GetReg(13); //stack pointer
30 u32 addr, ret_addr, call_addr, func_addr; 30 u32 ret_addr, call_addr, func_addr;
31 31
32 int counter = 0; 32 int counter = 0;
33 for (int addr = 0x10000000; addr >= sp; addr -= 4) 33 for (u32 addr = 0x10000000; addr >= sp; addr -= 4)
34 { 34 {
35 ret_addr = Memory::Read32(addr); 35 ret_addr = Memory::Read32(addr);
36 call_addr = ret_addr - 4; //get call address??? 36 call_addr = ret_addr - 4; //get call address???
diff --git a/src/citra_qt/debugger/graphics_breakpoints.cpp b/src/citra_qt/debugger/graphics_breakpoints.cpp
index 53394b6e6..4cb41db22 100644
--- a/src/citra_qt/debugger/graphics_breakpoints.cpp
+++ b/src/citra_qt/debugger/graphics_breakpoints.cpp
@@ -39,15 +39,17 @@ QVariant BreakPointModel::data(const QModelIndex& index, int role) const
39 switch (index.column()) { 39 switch (index.column()) {
40 case 0: 40 case 0:
41 { 41 {
42 std::map<Pica::DebugContext::Event, QString> map; 42 static const std::map<Pica::DebugContext::Event, QString> map = {
43 map.insert({Pica::DebugContext::Event::CommandLoaded, tr("Pica command loaded")}); 43 { Pica::DebugContext::Event::CommandLoaded, tr("Pica command loaded") },
44 map.insert({Pica::DebugContext::Event::CommandProcessed, tr("Pica command processed")}); 44 { Pica::DebugContext::Event::CommandProcessed, tr("Pica command processed") },
45 map.insert({Pica::DebugContext::Event::IncomingPrimitiveBatch, tr("Incoming primitive batch")}); 45 { Pica::DebugContext::Event::IncomingPrimitiveBatch, tr("Incoming primitive batch") },
46 map.insert({Pica::DebugContext::Event::FinishedPrimitiveBatch, tr("Finished primitive batch")}); 46 { Pica::DebugContext::Event::FinishedPrimitiveBatch, tr("Finished primitive batch") },
47 { Pica::DebugContext::Event::VertexLoaded, tr("Vertex Loaded") }
48 };
47 49
48 _dbg_assert_(Debug_GPU, map.size() == static_cast<size_t>(Pica::DebugContext::Event::NumEvents)); 50 _dbg_assert_(Debug_GPU, map.size() == static_cast<size_t>(Pica::DebugContext::Event::NumEvents));
49 51
50 return map[event]; 52 return (map.find(event) != map.end()) ? map.at(event) : QString();
51 } 53 }
52 54
53 case 1: 55 case 1:
diff --git a/src/citra_qt/debugger/graphics_cmdlists.cpp b/src/citra_qt/debugger/graphics_cmdlists.cpp
index 7f97cf143..95187e54d 100644
--- a/src/citra_qt/debugger/graphics_cmdlists.cpp
+++ b/src/citra_qt/debugger/graphics_cmdlists.cpp
@@ -24,7 +24,7 @@ QImage LoadTexture(u8* src, const Pica::DebugUtils::TextureInfo& info) {
24 QImage decoded_image(info.width, info.height, QImage::Format_ARGB32); 24 QImage decoded_image(info.width, info.height, QImage::Format_ARGB32);
25 for (int y = 0; y < info.height; ++y) { 25 for (int y = 0; y < info.height; ++y) {
26 for (int x = 0; x < info.width; ++x) { 26 for (int x = 0; x < info.width; ++x) {
27 Math::Vec4<u8> color = Pica::DebugUtils::LookupTexture(src, x, y, info); 27 Math::Vec4<u8> color = Pica::DebugUtils::LookupTexture(src, x, y, info, true);
28 decoded_image.setPixel(x, y, qRgba(color.r(), color.g(), color.b(), color.a())); 28 decoded_image.setPixel(x, y, qRgba(color.r(), color.g(), color.b(), color.a()));
29 } 29 }
30 } 30 }
@@ -47,7 +47,7 @@ public:
47}; 47};
48 48
49TextureInfoDockWidget::TextureInfoDockWidget(const Pica::DebugUtils::TextureInfo& info, QWidget* parent) 49TextureInfoDockWidget::TextureInfoDockWidget(const Pica::DebugUtils::TextureInfo& info, QWidget* parent)
50 : QDockWidget(tr("Texture 0x%1").arg(info.address, 8, 16, QLatin1Char('0'))), 50 : QDockWidget(tr("Texture 0x%1").arg(info.physical_address, 8, 16, QLatin1Char('0'))),
51 info(info) { 51 info(info) {
52 52
53 QWidget* main_widget = new QWidget; 53 QWidget* main_widget = new QWidget;
@@ -60,7 +60,7 @@ TextureInfoDockWidget::TextureInfoDockWidget(const Pica::DebugUtils::TextureInfo
60 phys_address_spinbox->SetBase(16); 60 phys_address_spinbox->SetBase(16);
61 phys_address_spinbox->SetRange(0, 0xFFFFFFFF); 61 phys_address_spinbox->SetRange(0, 0xFFFFFFFF);
62 phys_address_spinbox->SetPrefix("0x"); 62 phys_address_spinbox->SetPrefix("0x");
63 phys_address_spinbox->SetValue(info.address); 63 phys_address_spinbox->SetValue(info.physical_address);
64 connect(phys_address_spinbox, SIGNAL(ValueChanged(qint64)), this, SLOT(OnAddressChanged(qint64))); 64 connect(phys_address_spinbox, SIGNAL(ValueChanged(qint64)), this, SLOT(OnAddressChanged(qint64)));
65 65
66 QComboBox* format_choice = new QComboBox; 66 QComboBox* format_choice = new QComboBox;
@@ -69,6 +69,13 @@ TextureInfoDockWidget::TextureInfoDockWidget(const Pica::DebugUtils::TextureInfo
69 format_choice->addItem(tr("RGBA5551")); 69 format_choice->addItem(tr("RGBA5551"));
70 format_choice->addItem(tr("RGB565")); 70 format_choice->addItem(tr("RGB565"));
71 format_choice->addItem(tr("RGBA4")); 71 format_choice->addItem(tr("RGBA4"));
72 format_choice->addItem(tr("IA8"));
73 format_choice->addItem(tr("UNK6"));
74 format_choice->addItem(tr("I8"));
75 format_choice->addItem(tr("A8"));
76 format_choice->addItem(tr("IA4"));
77 format_choice->addItem(tr("UNK10"));
78 format_choice->addItem(tr("A4"));
72 format_choice->setCurrentIndex(static_cast<int>(info.format)); 79 format_choice->setCurrentIndex(static_cast<int>(info.format));
73 connect(format_choice, SIGNAL(currentIndexChanged(int)), this, SLOT(OnFormatChanged(int))); 80 connect(format_choice, SIGNAL(currentIndexChanged(int)), this, SLOT(OnFormatChanged(int)));
74 81
@@ -125,7 +132,7 @@ TextureInfoDockWidget::TextureInfoDockWidget(const Pica::DebugUtils::TextureInfo
125} 132}
126 133
127void TextureInfoDockWidget::OnAddressChanged(qint64 value) { 134void TextureInfoDockWidget::OnAddressChanged(qint64 value) {
128 info.address = value; 135 info.physical_address = value;
129 emit UpdatePixmap(ReloadPixmap()); 136 emit UpdatePixmap(ReloadPixmap());
130} 137}
131 138
@@ -150,7 +157,7 @@ void TextureInfoDockWidget::OnStrideChanged(int value) {
150} 157}
151 158
152QPixmap TextureInfoDockWidget::ReloadPixmap() const { 159QPixmap TextureInfoDockWidget::ReloadPixmap() const {
153 u8* src = Memory::GetPointer(info.address); 160 u8* src = Memory::GetPointer(Pica::PAddrToVAddr(info.physical_address));
154 return QPixmap::fromImage(LoadTexture(src, info)); 161 return QPixmap::fromImage(LoadTexture(src, info));
155} 162}
156 163
@@ -223,9 +230,21 @@ void GPUCommandListModel::OnPicaTraceFinished(const Pica::DebugUtils::PicaTrace&
223 230
224void GPUCommandListWidget::OnCommandDoubleClicked(const QModelIndex& index) { 231void GPUCommandListWidget::OnCommandDoubleClicked(const QModelIndex& index) {
225 const int command_id = list_widget->model()->data(index, GPUCommandListModel::CommandIdRole).toInt(); 232 const int command_id = list_widget->model()->data(index, GPUCommandListModel::CommandIdRole).toInt();
226 if (COMMAND_IN_RANGE(command_id, texture0)) { 233 if (COMMAND_IN_RANGE(command_id, texture0) ||
227 auto info = Pica::DebugUtils::TextureInfo::FromPicaRegister(Pica::registers.texture0, 234 COMMAND_IN_RANGE(command_id, texture1) ||
228 Pica::registers.texture0_format); 235 COMMAND_IN_RANGE(command_id, texture2)) {
236
237 unsigned index;
238 if (COMMAND_IN_RANGE(command_id, texture0)) {
239 index = 0;
240 } else if (COMMAND_IN_RANGE(command_id, texture1)) {
241 index = 1;
242 } else {
243 index = 2;
244 }
245 auto config = Pica::registers.GetTextures()[index].config;
246 auto format = Pica::registers.GetTextures()[index].format;
247 auto info = Pica::DebugUtils::TextureInfo::FromPicaRegister(config, format);
229 248
230 // TODO: Instead, emit a signal here to be caught by the main window widget. 249 // TODO: Instead, emit a signal here to be caught by the main window widget.
231 auto main_window = static_cast<QMainWindow*>(parent()); 250 auto main_window = static_cast<QMainWindow*>(parent());
@@ -237,10 +256,23 @@ void GPUCommandListWidget::SetCommandInfo(const QModelIndex& index) {
237 QWidget* new_info_widget; 256 QWidget* new_info_widget;
238 257
239 const int command_id = list_widget->model()->data(index, GPUCommandListModel::CommandIdRole).toInt(); 258 const int command_id = list_widget->model()->data(index, GPUCommandListModel::CommandIdRole).toInt();
240 if (COMMAND_IN_RANGE(command_id, texture0)) { 259 if (COMMAND_IN_RANGE(command_id, texture0) ||
241 u8* src = Memory::GetPointer(Pica::registers.texture0.GetPhysicalAddress()); 260 COMMAND_IN_RANGE(command_id, texture1) ||
242 auto info = Pica::DebugUtils::TextureInfo::FromPicaRegister(Pica::registers.texture0, 261 COMMAND_IN_RANGE(command_id, texture2)) {
243 Pica::registers.texture0_format); 262
263 unsigned index;
264 if (COMMAND_IN_RANGE(command_id, texture0)) {
265 index = 0;
266 } else if (COMMAND_IN_RANGE(command_id, texture1)) {
267 index = 1;
268 } else {
269 index = 2;
270 }
271 auto config = Pica::registers.GetTextures()[index].config;
272 auto format = Pica::registers.GetTextures()[index].format;
273
274 auto info = Pica::DebugUtils::TextureInfo::FromPicaRegister(config, format);
275 u8* src = Memory::GetPointer(Pica::PAddrToVAddr(config.GetPhysicalAddress()));
244 new_info_widget = new TextureInfoWidget(src, info); 276 new_info_widget = new TextureInfoWidget(src, info);
245 } else { 277 } else {
246 new_info_widget = new QWidget; 278 new_info_widget = new QWidget;
diff --git a/src/citra_qt/debugger/graphics_framebuffer.cpp b/src/citra_qt/debugger/graphics_framebuffer.cpp
index ac47f298d..484be1db5 100644
--- a/src/citra_qt/debugger/graphics_framebuffer.cpp
+++ b/src/citra_qt/debugger/graphics_framebuffer.cpp
@@ -125,7 +125,8 @@ GraphicsFramebufferWidget::GraphicsFramebufferWidget(std::shared_ptr<Pica::Debug
125 setWidget(main_widget); 125 setWidget(main_widget);
126 126
127 // Load current data - TODO: Make sure this works when emulation is not running 127 // Load current data - TODO: Make sure this works when emulation is not running
128 emit Update(); 128 if (debug_context && debug_context->at_breakpoint)
129 emit Update();
129 widget()->setEnabled(false); // TODO: Only enable if currently at breakpoint 130 widget()->setEnabled(false); // TODO: Only enable if currently at breakpoint
130} 131}
131 132
@@ -198,7 +199,7 @@ void GraphicsFramebufferWidget::OnUpdate()
198 auto framebuffer = Pica::registers.framebuffer; 199 auto framebuffer = Pica::registers.framebuffer;
199 using Framebuffer = decltype(framebuffer); 200 using Framebuffer = decltype(framebuffer);
200 201
201 framebuffer_address = framebuffer.GetColorBufferAddress(); 202 framebuffer_address = framebuffer.GetColorBufferPhysicalAddress();
202 framebuffer_width = framebuffer.GetWidth(); 203 framebuffer_width = framebuffer.GetWidth();
203 framebuffer_height = framebuffer.GetHeight(); 204 framebuffer_height = framebuffer.GetHeight();
204 framebuffer_format = static_cast<Format>(framebuffer.color_format); 205 framebuffer_format = static_cast<Format>(framebuffer.color_format);
@@ -223,9 +224,9 @@ void GraphicsFramebufferWidget::OnUpdate()
223 case Format::RGBA8: 224 case Format::RGBA8:
224 { 225 {
225 QImage decoded_image(framebuffer_width, framebuffer_height, QImage::Format_ARGB32); 226 QImage decoded_image(framebuffer_width, framebuffer_height, QImage::Format_ARGB32);
226 u32* color_buffer = (u32*)Memory::GetPointer(framebuffer_address); 227 u32* color_buffer = (u32*)Memory::GetPointer(Pica::PAddrToVAddr(framebuffer_address));
227 for (int y = 0; y < framebuffer_height; ++y) { 228 for (unsigned y = 0; y < framebuffer_height; ++y) {
228 for (int x = 0; x < framebuffer_width; ++x) { 229 for (unsigned x = 0; x < framebuffer_width; ++x) {
229 u32 value = *(color_buffer + x + y * framebuffer_width); 230 u32 value = *(color_buffer + x + y * framebuffer_width);
230 231
231 decoded_image.setPixel(x, y, qRgba((value >> 16) & 0xFF, (value >> 8) & 0xFF, value & 0xFF, 255/*value >> 24*/)); 232 decoded_image.setPixel(x, y, qRgba((value >> 16) & 0xFF, (value >> 8) & 0xFF, value & 0xFF, 255/*value >> 24*/));
@@ -238,9 +239,9 @@ void GraphicsFramebufferWidget::OnUpdate()
238 case Format::RGB8: 239 case Format::RGB8:
239 { 240 {
240 QImage decoded_image(framebuffer_width, framebuffer_height, QImage::Format_ARGB32); 241 QImage decoded_image(framebuffer_width, framebuffer_height, QImage::Format_ARGB32);
241 u8* color_buffer = Memory::GetPointer(framebuffer_address); 242 u8* color_buffer = Memory::GetPointer(Pica::PAddrToVAddr(framebuffer_address));
242 for (int y = 0; y < framebuffer_height; ++y) { 243 for (unsigned y = 0; y < framebuffer_height; ++y) {
243 for (int x = 0; x < framebuffer_width; ++x) { 244 for (unsigned x = 0; x < framebuffer_width; ++x) {
244 u8* pixel_pointer = color_buffer + x * 3 + y * 3 * framebuffer_width; 245 u8* pixel_pointer = color_buffer + x * 3 + y * 3 * framebuffer_width;
245 246
246 decoded_image.setPixel(x, y, qRgba(pixel_pointer[0], pixel_pointer[1], pixel_pointer[2], 255/*value >> 24*/)); 247 decoded_image.setPixel(x, y, qRgba(pixel_pointer[0], pixel_pointer[1], pixel_pointer[2], 255/*value >> 24*/));
@@ -253,9 +254,9 @@ void GraphicsFramebufferWidget::OnUpdate()
253 case Format::RGBA5551: 254 case Format::RGBA5551:
254 { 255 {
255 QImage decoded_image(framebuffer_width, framebuffer_height, QImage::Format_ARGB32); 256 QImage decoded_image(framebuffer_width, framebuffer_height, QImage::Format_ARGB32);
256 u32* color_buffer = (u32*)Memory::GetPointer(framebuffer_address); 257 u32* color_buffer = (u32*)Memory::GetPointer(Pica::PAddrToVAddr(framebuffer_address));
257 for (int y = 0; y < framebuffer_height; ++y) { 258 for (unsigned y = 0; y < framebuffer_height; ++y) {
258 for (int x = 0; x < framebuffer_width; ++x) { 259 for (unsigned x = 0; x < framebuffer_width; ++x) {
259 u16 value = *(u16*)(((u8*)color_buffer) + x * 2 + y * framebuffer_width * 2); 260 u16 value = *(u16*)(((u8*)color_buffer) + x * 2 + y * framebuffer_width * 2);
260 u8 r = (value >> 11) & 0x1F; 261 u8 r = (value >> 11) & 0x1F;
261 u8 g = (value >> 6) & 0x1F; 262 u8 g = (value >> 6) & 0x1F;
diff --git a/src/citra_qt/util/spinbox.cpp b/src/citra_qt/util/spinbox.cpp
index 9672168f5..24ea3a967 100644
--- a/src/citra_qt/util/spinbox.cpp
+++ b/src/citra_qt/util/spinbox.cpp
@@ -238,7 +238,7 @@ QValidator::State CSpinBox::validate(QString& input, int& pos) const
238 if (!prefix.isEmpty() && input.left(prefix.length()) != prefix) 238 if (!prefix.isEmpty() && input.left(prefix.length()) != prefix)
239 return QValidator::Invalid; 239 return QValidator::Invalid;
240 240
241 unsigned strpos = prefix.length(); 241 int strpos = prefix.length();
242 242
243 // Empty "numbers" allowed as intermediate values 243 // Empty "numbers" allowed as intermediate values
244 if (strpos >= input.length() - HasSign() - suffix.length()) 244 if (strpos >= input.length() - HasSign() - suffix.length())
diff --git a/src/common/bit_field.h b/src/common/bit_field.h
index 9e02210f9..3ec061e63 100644
--- a/src/common/bit_field.h
+++ b/src/common/bit_field.h
@@ -142,7 +142,7 @@ public:
142 142
143 __forceinline BitField& operator=(T val) 143 __forceinline BitField& operator=(T val)
144 { 144 {
145 storage = (storage & ~GetMask()) | (((StorageType)val << position) & GetMask()); 145 Assign(val);
146 return *this; 146 return *this;
147 } 147 }
148 148
@@ -151,6 +151,10 @@ public:
151 return Value(); 151 return Value();
152 } 152 }
153 153
154 __forceinline void Assign(const T& value) {
155 storage = (storage & ~GetMask()) | (((StorageType)value << position) & GetMask());
156 }
157
154 __forceinline T Value() const 158 __forceinline T Value() const
155 { 159 {
156 if (std::numeric_limits<T>::is_signed) 160 if (std::numeric_limits<T>::is_signed)
diff --git a/src/core/arm/dyncom/arm_dyncom_interpreter.cpp b/src/core/arm/dyncom/arm_dyncom_interpreter.cpp
index 68012bffd..84b4a38f0 100644
--- a/src/core/arm/dyncom/arm_dyncom_interpreter.cpp
+++ b/src/core/arm/dyncom/arm_dyncom_interpreter.cpp
@@ -1266,6 +1266,13 @@ typedef struct _smla_inst {
1266 unsigned int Rn; 1266 unsigned int Rn;
1267} smla_inst; 1267} smla_inst;
1268 1268
1269typedef struct umaal_inst {
1270 unsigned int Rn;
1271 unsigned int Rm;
1272 unsigned int RdHi;
1273 unsigned int RdLo;
1274} umaal_inst;
1275
1269typedef struct _umlal_inst { 1276typedef struct _umlal_inst {
1270 unsigned int S; 1277 unsigned int S;
1271 unsigned int Rm; 1278 unsigned int Rm;
@@ -3010,7 +3017,26 @@ ARM_INST_PTR INTERPRETER_TRANSLATE(uhaddsubx)(unsigned int inst, int index) { UN
3010ARM_INST_PTR INTERPRETER_TRANSLATE(uhsub16)(unsigned int inst, int index) { UNIMPLEMENTED_INSTRUCTION("UHSUB16"); } 3017ARM_INST_PTR INTERPRETER_TRANSLATE(uhsub16)(unsigned int inst, int index) { UNIMPLEMENTED_INSTRUCTION("UHSUB16"); }
3011ARM_INST_PTR INTERPRETER_TRANSLATE(uhsub8)(unsigned int inst, int index) { UNIMPLEMENTED_INSTRUCTION("UHSUB8"); } 3018ARM_INST_PTR INTERPRETER_TRANSLATE(uhsub8)(unsigned int inst, int index) { UNIMPLEMENTED_INSTRUCTION("UHSUB8"); }
3012ARM_INST_PTR INTERPRETER_TRANSLATE(uhsubaddx)(unsigned int inst, int index) { UNIMPLEMENTED_INSTRUCTION("UHSUBADDX"); } 3019ARM_INST_PTR INTERPRETER_TRANSLATE(uhsubaddx)(unsigned int inst, int index) { UNIMPLEMENTED_INSTRUCTION("UHSUBADDX"); }
3013ARM_INST_PTR INTERPRETER_TRANSLATE(umaal)(unsigned int inst, int index) { UNIMPLEMENTED_INSTRUCTION("UMAAL"); } 3020ARM_INST_PTR INTERPRETER_TRANSLATE(umaal)(unsigned int inst, int index)
3021{
3022 arm_inst* const inst_base = (arm_inst*)AllocBuffer(sizeof(arm_inst) + sizeof(umaal_inst));
3023 umaal_inst* const inst_cream = (umaal_inst*)inst_base->component;
3024
3025 inst_base->cond = BITS(inst, 28, 31);
3026 inst_base->idx = index;
3027 inst_base->br = NON_BRANCH;
3028 inst_base->load_r15 = 0;
3029
3030 inst_cream->Rm = BITS(inst, 8, 11);
3031 inst_cream->Rn = BITS(inst, 0, 3);
3032 inst_cream->RdLo = BITS(inst, 12, 15);
3033 inst_cream->RdHi = BITS(inst, 16, 19);
3034
3035 if (CHECK_RM || CHECK_RN)
3036 inst_base->load_r15 = 1;
3037
3038 return inst_base;
3039}
3014ARM_INST_PTR INTERPRETER_TRANSLATE(umlal)(unsigned int inst, int index) 3040ARM_INST_PTR INTERPRETER_TRANSLATE(umlal)(unsigned int inst, int index)
3015{ 3041{
3016 arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(umlal_inst)); 3042 arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(umlal_inst));
@@ -6374,6 +6400,26 @@ unsigned InterpreterMainLoop(ARMul_State* state)
6374 UHSUB8_INST: 6400 UHSUB8_INST:
6375 UHSUBADDX_INST: 6401 UHSUBADDX_INST:
6376 UMAAL_INST: 6402 UMAAL_INST:
6403 {
6404 INC_ICOUNTER;
6405 if (inst_base->cond == 0xE || CondPassed(cpu, inst_base->cond)) {
6406 umaal_inst* const inst_cream = (umaal_inst*)inst_base->component;
6407
6408 const u32 rm = RM;
6409 const u32 rn = RN;
6410 const u32 rd_lo = RDLO;
6411 const u32 rd_hi = RDHI;
6412
6413 const u64 result = (rm * rn) + rd_lo + rd_hi;
6414
6415 RDLO = (result & 0xFFFFFFFF);
6416 RDHI = ((result >> 32) & 0xFFFFFFFF);
6417 }
6418 cpu->Reg[15] += GET_INST_SIZE(cpu);
6419 INC_PC(sizeof(umaal_inst));
6420 FETCH_INST;
6421 GOTO_NEXT_INST;
6422 }
6377 UMLAL_INST: 6423 UMLAL_INST:
6378 { 6424 {
6379 INC_ICOUNTER; 6425 INC_ICOUNTER;
diff --git a/src/core/arm/interpreter/armemu.cpp b/src/core/arm/interpreter/armemu.cpp
index 63cfd03c6..7a319b635 100644
--- a/src/core/arm/interpreter/armemu.cpp
+++ b/src/core/arm/interpreter/armemu.cpp
@@ -5882,8 +5882,10 @@ L_stm_s_takeabort:
5882 printf("Unhandled v6 insn: %08x", BITS(20, 27)); 5882 printf("Unhandled v6 insn: %08x", BITS(20, 27));
5883 } 5883 }
5884 break; 5884 break;
5885 case 0x62: // QSUB16 and QADD16 5885 case 0x62: // QADD16, QASX, QSAX, and QSUB16
5886 if ((instr & 0xFF0) == 0xf70 || (instr & 0xFF0) == 0xf10) { 5886 if ((instr & 0xFF0) == 0xf10 || (instr & 0xFF0) == 0xf30 ||
5887 (instr & 0xFF0) == 0xf50 || (instr & 0xFF0) == 0xf70)
5888 {
5887 const u8 rd_idx = BITS(12, 15); 5889 const u8 rd_idx = BITS(12, 15);
5888 const u8 rn_idx = BITS(16, 19); 5890 const u8 rn_idx = BITS(16, 19);
5889 const u8 rm_idx = BITS(0, 3); 5891 const u8 rm_idx = BITS(0, 3);
@@ -5895,15 +5897,26 @@ L_stm_s_takeabort:
5895 s32 lo_result; 5897 s32 lo_result;
5896 s32 hi_result; 5898 s32 hi_result;
5897 5899
5900 // QADD16
5901 if ((instr & 0xFF0) == 0xf10) {
5902 lo_result = (rn_lo + rm_lo);
5903 hi_result = (rn_hi + rm_hi);
5904 }
5905 // QASX
5906 else if ((instr & 0xFF0) == 0xf30) {
5907 lo_result = (rn_lo - rm_hi);
5908 hi_result = (rn_hi + rm_lo);
5909 }
5910 // QSAX
5911 else if ((instr & 0xFF0) == 0xf50) {
5912 lo_result = (rn_lo + rm_hi);
5913 hi_result = (rn_hi - rm_lo);
5914 }
5898 // QSUB16 5915 // QSUB16
5899 if ((instr & 0xFF0) == 0xf70) { 5916 else {
5900 lo_result = (rn_lo - rm_lo); 5917 lo_result = (rn_lo - rm_lo);
5901 hi_result = (rn_hi - rm_hi); 5918 hi_result = (rn_hi - rm_hi);
5902 } 5919 }
5903 else { // QADD16
5904 lo_result = (rn_lo + rm_lo);
5905 hi_result = (rn_hi + rm_hi);
5906 }
5907 5920
5908 if (lo_result > 0x7FFF) 5921 if (lo_result > 0x7FFF)
5909 lo_result = 0x7FFF; 5922 lo_result = 0x7FFF;
@@ -6078,22 +6091,28 @@ L_stm_s_takeabort:
6078 //ichfly 6091 //ichfly
6079 //SSAT16 6092 //SSAT16
6080 { 6093 {
6081 u8 tar = BITS(12, 15); 6094 const u8 rd_idx = BITS(12, 15);
6082 u8 src = BITS(0, 3); 6095 const u8 rn_idx = BITS(0, 3);
6083 u8 val = BITS(16, 19) + 1; 6096 const u8 num_bits = BITS(16, 19) + 1;
6084 s16 a1 = (state->Reg[src]); 6097 const s16 min = -(0x8000 >> (16 - num_bits));
6085 s16 a2 = (state->Reg[src] >> 0x10); 6098 const s16 max = (0x7FFF >> (16 - num_bits));
6086 s16 min = (s16)(0x8000 >> (16 - val)); 6099 s16 rn_lo = (state->Reg[rn_idx]);
6087 s16 max = 0x7FFF >> (16 - val); 6100 s16 rn_hi = (state->Reg[rn_idx] >> 16);
6088 if (min > a1) a1 = min; 6101
6089 if (max < a1) a1 = max; 6102 if (rn_lo > max)
6090 if (min > a2) a2 = min; 6103 rn_lo = max;
6091 if (max < a2) a2 = max; 6104 else if (rn_lo < min)
6092 u32 temp2 = ((u32)(a2)) << 0x10; 6105 rn_lo = min;
6093 state->Reg[tar] = (a1 & 0xFFFF) | (temp2); 6106
6107 if (rn_hi > max)
6108 rn_hi = max;
6109 else if (rn_hi < min)
6110 rn_hi = min;
6111
6112 state->Reg[rd_idx] = (rn_lo & 0xFFFF) | ((rn_hi & 0xFFFF) << 16);
6113 return 1;
6094 } 6114 }
6095 6115
6096 return 1;
6097 default: 6116 default:
6098 break; 6117 break;
6099 } 6118 }
@@ -6314,11 +6333,14 @@ L_stm_s_takeabort:
6314 } 6333 }
6315 case 0x70: 6334 case 0x70:
6316 // ichfly 6335 // ichfly
6317 // SMUAD, SMUSD, SMLAD 6336 // SMUAD, SMUSD, SMLAD, and SMLSD
6318 if ((instr & 0xf0d0) == 0xf010 || (instr & 0xf0d0) == 0xf050 || (instr & 0xd0) == 0x10) { 6337 if ((instr & 0xf0d0) == 0xf010 || (instr & 0xf0d0) == 0xf050 ||
6338 (instr & 0xd0) == 0x10 || (instr & 0xd0) == 0x50)
6339 {
6319 const u8 rd_idx = BITS(16, 19); 6340 const u8 rd_idx = BITS(16, 19);
6320 const u8 rn_idx = BITS(0, 3); 6341 const u8 rn_idx = BITS(0, 3);
6321 const u8 rm_idx = BITS(8, 11); 6342 const u8 rm_idx = BITS(8, 11);
6343 const u8 ra_idx = BITS(12, 15);
6322 const bool do_swap = (BIT(5) == 1); 6344 const bool do_swap = (BIT(5) == 1);
6323 6345
6324 u32 rm_val = state->Reg[rm_idx]; 6346 u32 rm_val = state->Reg[rm_idx];
@@ -6341,13 +6363,14 @@ L_stm_s_takeabort:
6341 state->Reg[rd_idx] = (rn_lo * rm_lo) - (rn_hi * rm_hi); 6363 state->Reg[rd_idx] = (rn_lo * rm_lo) - (rn_hi * rm_hi);
6342 } 6364 }
6343 // SMLAD 6365 // SMLAD
6344 else { 6366 else if ((instr & 0xd0) == 0x10) {
6345 const u8 ra_idx = BITS(12, 15);
6346 state->Reg[rd_idx] = (rn_lo * rm_lo) + (rn_hi * rm_hi) + (s32)state->Reg[ra_idx]; 6367 state->Reg[rd_idx] = (rn_lo * rm_lo) + (rn_hi * rm_hi) + (s32)state->Reg[ra_idx];
6347 } 6368 }
6369 // SMLSD
6370 else {
6371 state->Reg[rd_idx] = ((rn_lo * rm_lo) - (rn_hi * rm_hi)) + (s32)state->Reg[ra_idx];
6372 }
6348 return 1; 6373 return 1;
6349 } else {
6350 printf ("Unhandled v6 insn: smlsd\n");
6351 } 6374 }
6352 break; 6375 break;
6353 case 0x74: 6376 case 0x74:
@@ -6357,7 +6380,30 @@ L_stm_s_takeabort:
6357 printf ("Unhandled v6 insn: smmla/smmls/smmul\n"); 6380 printf ("Unhandled v6 insn: smmla/smmls/smmul\n");
6358 break; 6381 break;
6359 case 0x78: 6382 case 0x78:
6360 printf ("Unhandled v6 insn: usad/usada8\n"); 6383 if (BITS(20, 24) == 0x18)
6384 {
6385 const u8 rm_idx = BITS(8, 11);
6386 const u8 rn_idx = BITS(0, 3);
6387 const u8 rd_idx = BITS(16, 19);
6388
6389 const u32 rm_val = state->Reg[rm_idx];
6390 const u32 rn_val = state->Reg[rn_idx];
6391
6392 const u8 diff1 = (u8)std::labs((rn_val & 0xFF) - (rm_val & 0xFF));
6393 const u8 diff2 = (u8)std::labs(((rn_val >> 8) & 0xFF) - ((rm_val >> 8) & 0xFF));
6394 const u8 diff3 = (u8)std::labs(((rn_val >> 16) & 0xFF) - ((rm_val >> 16) & 0xFF));
6395 const u8 diff4 = (u8)std::labs(((rn_val >> 24) & 0xFF) - ((rm_val >> 24) & 0xFF));
6396
6397 u32 finalDif = (diff1 + diff2 + diff3 + diff4);
6398
6399 // Op is USADA8 if true.
6400 const u8 ra_idx = BITS(12, 15);
6401 if (ra_idx != 15)
6402 finalDif += state->Reg[ra_idx];
6403
6404 state->Reg[rd_idx] = finalDif;
6405 return 1;
6406 }
6361 break; 6407 break;
6362 case 0x7a: 6408 case 0x7a:
6363 printf ("Unhandled v6 insn: usbfx\n"); 6409 printf ("Unhandled v6 insn: usbfx\n");
diff --git a/src/core/file_sys/archive_backend.h b/src/core/file_sys/archive_backend.h
index 18c314884..d7959b2ca 100644
--- a/src/core/file_sys/archive_backend.h
+++ b/src/core/file_sys/archive_backend.h
@@ -143,7 +143,16 @@ public:
143 case Char: 143 case Char:
144 return std::vector<u8>(string.begin(), string.end()); 144 return std::vector<u8>(string.begin(), string.end());
145 case Wchar: 145 case Wchar:
146 return std::vector<u8>(u16str.begin(), u16str.end()); 146 {
147 // use two u8 for each character of u16str
148 std::vector<u8> to_return(u16str.size() * 2);
149 for (size_t i = 0; i < u16str.size(); ++i) {
150 u16 tmp_char = u16str.at(i);
151 to_return[i*2] = (tmp_char & 0xFF00) >> 8;
152 to_return[i*2 + 1] = (tmp_char & 0x00FF);
153 }
154 return to_return;
155 }
147 case Empty: 156 case Empty:
148 return {}; 157 return {};
149 default: 158 default:
diff --git a/src/core/hle/kernel/kernel.h b/src/core/hle/kernel/kernel.h
index 7123485be..683fffeee 100644
--- a/src/core/hle/kernel/kernel.h
+++ b/src/core/hle/kernel/kernel.h
@@ -14,6 +14,10 @@ typedef s32 Result;
14 14
15namespace Kernel { 15namespace Kernel {
16 16
17// From kernel.h. Declarations duplicated here to avoid a circular header dependency.
18class Thread;
19Thread* GetCurrentThread();
20
17enum KernelHandle { 21enum KernelHandle {
18 CurrentThread = 0xFFFF8000, 22 CurrentThread = 0xFFFF8000,
19 CurrentProcess = 0xFFFF8001, 23 CurrentProcess = 0xFFFF8001,
@@ -81,6 +85,10 @@ public:
81 85
82 template <class T> 86 template <class T>
83 T* Get(Handle handle) { 87 T* Get(Handle handle) {
88 if (handle == CurrentThread) {
89 return reinterpret_cast<T*>(GetCurrentThread());
90 }
91
84 if (handle < HANDLE_OFFSET || handle >= HANDLE_OFFSET + MAX_COUNT || !occupied[handle - HANDLE_OFFSET]) { 92 if (handle < HANDLE_OFFSET || handle >= HANDLE_OFFSET + MAX_COUNT || !occupied[handle - HANDLE_OFFSET]) {
85 if (handle != 0) { 93 if (handle != 0) {
86 LOG_ERROR(Kernel, "Bad object handle %08x", handle); 94 LOG_ERROR(Kernel, "Bad object handle %08x", handle);
@@ -99,6 +107,10 @@ public:
99 // ONLY use this when you know the handle is valid. 107 // ONLY use this when you know the handle is valid.
100 template <class T> 108 template <class T>
101 T *GetFast(Handle handle) { 109 T *GetFast(Handle handle) {
110 if (handle == CurrentThread) {
111 return reinterpret_cast<T*>(GetCurrentThread());
112 }
113
102 const Handle realHandle = handle - HANDLE_OFFSET; 114 const Handle realHandle = handle - HANDLE_OFFSET;
103 _dbg_assert_(Kernel, realHandle >= 0 && realHandle < MAX_COUNT && occupied[realHandle]); 115 _dbg_assert_(Kernel, realHandle >= 0 && realHandle < MAX_COUNT && occupied[realHandle]);
104 return static_cast<T*>(pool[realHandle]); 116 return static_cast<T*>(pool[realHandle]);
diff --git a/src/core/hle/kernel/semaphore.cpp b/src/core/hle/kernel/semaphore.cpp
index 6f56da8a9..f955d1957 100644
--- a/src/core/hle/kernel/semaphore.cpp
+++ b/src/core/hle/kernel/semaphore.cpp
@@ -20,8 +20,8 @@ public:
20 static Kernel::HandleType GetStaticHandleType() { return Kernel::HandleType::Semaphore; } 20 static Kernel::HandleType GetStaticHandleType() { return Kernel::HandleType::Semaphore; }
21 Kernel::HandleType GetHandleType() const override { return Kernel::HandleType::Semaphore; } 21 Kernel::HandleType GetHandleType() const override { return Kernel::HandleType::Semaphore; }
22 22
23 u32 max_count; ///< Maximum number of simultaneous holders the semaphore can have 23 s32 max_count; ///< Maximum number of simultaneous holders the semaphore can have
24 u32 available_count; ///< Number of free slots left in the semaphore 24 s32 available_count; ///< Number of free slots left in the semaphore
25 std::queue<Handle> waiting_threads; ///< Threads that are waiting for the semaphore 25 std::queue<Handle> waiting_threads; ///< Threads that are waiting for the semaphore
26 std::string name; ///< Name of semaphore (optional) 26 std::string name; ///< Name of semaphore (optional)
27 27
@@ -49,8 +49,8 @@ public:
49 49
50//////////////////////////////////////////////////////////////////////////////////////////////////// 50////////////////////////////////////////////////////////////////////////////////////////////////////
51 51
52ResultCode CreateSemaphore(Handle* handle, u32 initial_count, 52ResultCode CreateSemaphore(Handle* handle, s32 initial_count,
53 u32 max_count, const std::string& name) { 53 s32 max_count, const std::string& name) {
54 54
55 if (initial_count > max_count) 55 if (initial_count > max_count)
56 return ResultCode(ErrorDescription::InvalidCombination, ErrorModule::Kernel, 56 return ResultCode(ErrorDescription::InvalidCombination, ErrorModule::Kernel,
diff --git a/src/core/hle/kernel/semaphore.h b/src/core/hle/kernel/semaphore.h
index f0075fdb8..ad474b875 100644
--- a/src/core/hle/kernel/semaphore.h
+++ b/src/core/hle/kernel/semaphore.h
@@ -18,7 +18,7 @@ namespace Kernel {
18 * @param name Optional name of semaphore 18 * @param name Optional name of semaphore
19 * @return ResultCode of the error 19 * @return ResultCode of the error
20 */ 20 */
21ResultCode CreateSemaphore(Handle* handle, u32 initial_count, u32 max_count, const std::string& name = "Unknown"); 21ResultCode CreateSemaphore(Handle* handle, s32 initial_count, s32 max_count, const std::string& name = "Unknown");
22 22
23/** 23/**
24 * Releases a certain number of slots from a semaphore. 24 * Releases a certain number of slots from a semaphore.
diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp
index 1c04701de..47be22653 100644
--- a/src/core/hle/kernel/thread.cpp
+++ b/src/core/hle/kernel/thread.cpp
@@ -83,8 +83,7 @@ static Thread* current_thread;
83static const u32 INITIAL_THREAD_ID = 1; ///< The first available thread id at startup 83static const u32 INITIAL_THREAD_ID = 1; ///< The first available thread id at startup
84static u32 next_thread_id; ///< The next available thread id 84static u32 next_thread_id; ///< The next available thread id
85 85
86/// Gets the current thread 86Thread* GetCurrentThread() {
87inline Thread* GetCurrentThread() {
88 return current_thread; 87 return current_thread;
89} 88}
90 89
diff --git a/src/core/hle/kernel/thread.h b/src/core/hle/kernel/thread.h
index be7adface..ec3b887d4 100644
--- a/src/core/hle/kernel/thread.h
+++ b/src/core/hle/kernel/thread.h
@@ -78,6 +78,9 @@ Handle ArbitrateHighestPriorityThread(u32 arbiter, u32 address);
78/// Arbitrate all threads currently waiting... 78/// Arbitrate all threads currently waiting...
79void ArbitrateAllThreads(u32 arbiter, u32 address); 79void ArbitrateAllThreads(u32 arbiter, u32 address);
80 80
81/// Gets the current thread
82Thread* GetCurrentThread();
83
81/// Gets the current thread handle 84/// Gets the current thread handle
82Handle GetCurrentThreadHandle(); 85Handle GetCurrentThreadHandle();
83 86
diff --git a/src/core/loader/3dsx.cpp b/src/core/loader/3dsx.cpp
index 0437e5374..3d84fc5da 100644
--- a/src/core/loader/3dsx.cpp
+++ b/src/core/loader/3dsx.cpp
@@ -223,9 +223,7 @@ int THREEDSXReader::Load3DSXFile(const std::string& filename, u32 base_addr)
223 LOG_INFO(Loader, "Loading 3DSX file %s...", filename.c_str()); 223 LOG_INFO(Loader, "Loading 3DSX file %s...", filename.c_str());
224 FileUtil::IOFile file(filename, "rb"); 224 FileUtil::IOFile file(filename, "rb");
225 if (file.IsOpen()) { 225 if (file.IsOpen()) {
226 226 THREEDSXReader::Load3DSXFile(filename, 0x00100000);
227 THREEDSXReader reader;
228 reader.Load3DSXFile(filename, 0x00100000);
229 Kernel::LoadExec(0x00100000); 227 Kernel::LoadExec(0x00100000);
230 } else { 228 } else {
231 return ResultStatus::Error; 229 return ResultStatus::Error;
diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp
index b74cd3261..9b8ecf8e3 100644
--- a/src/video_core/command_processor.cpp
+++ b/src/video_core/command_processor.cpp
@@ -56,10 +56,11 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) {
56 g_debug_context->OnEvent(DebugContext::Event::IncomingPrimitiveBatch, nullptr); 56 g_debug_context->OnEvent(DebugContext::Event::IncomingPrimitiveBatch, nullptr);
57 57
58 const auto& attribute_config = registers.vertex_attributes; 58 const auto& attribute_config = registers.vertex_attributes;
59 const u8* const base_address = Memory::GetPointer(attribute_config.GetBaseAddress()); 59 const u32 base_address = attribute_config.GetPhysicalBaseAddress();
60 60
61 // Information about internal vertex attributes 61 // Information about internal vertex attributes
62 const u8* vertex_attribute_sources[16]; 62 u32 vertex_attribute_sources[16];
63 std::fill(vertex_attribute_sources, &vertex_attribute_sources[16], 0xdeadbeef);
63 u32 vertex_attribute_strides[16]; 64 u32 vertex_attribute_strides[16];
64 u32 vertex_attribute_formats[16]; 65 u32 vertex_attribute_formats[16];
65 u32 vertex_attribute_elements[16]; 66 u32 vertex_attribute_elements[16];
@@ -69,7 +70,7 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) {
69 for (int loader = 0; loader < 12; ++loader) { 70 for (int loader = 0; loader < 12; ++loader) {
70 const auto& loader_config = attribute_config.attribute_loaders[loader]; 71 const auto& loader_config = attribute_config.attribute_loaders[loader];
71 72
72 const u8* load_address = base_address + loader_config.data_offset; 73 u32 load_address = base_address + loader_config.data_offset;
73 74
74 // TODO: What happens if a loader overwrites a previous one's data? 75 // TODO: What happens if a loader overwrites a previous one's data?
75 for (unsigned component = 0; component < loader_config.component_count; ++component) { 76 for (unsigned component = 0; component < loader_config.component_count; ++component) {
@@ -87,7 +88,7 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) {
87 bool is_indexed = (id == PICA_REG_INDEX(trigger_draw_indexed)); 88 bool is_indexed = (id == PICA_REG_INDEX(trigger_draw_indexed));
88 89
89 const auto& index_info = registers.index_array; 90 const auto& index_info = registers.index_array;
90 const u8* index_address_8 = (u8*)base_address + index_info.offset; 91 const u8* index_address_8 = Memory::GetPointer(PAddrToVAddr(base_address + index_info.offset));
91 const u16* index_address_16 = (u16*)index_address_8; 92 const u16* index_address_16 = (u16*)index_address_8;
92 bool index_u16 = (bool)index_info.format; 93 bool index_u16 = (bool)index_info.format;
93 94
@@ -108,7 +109,14 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) {
108 109
109 for (int i = 0; i < attribute_config.GetNumTotalAttributes(); ++i) { 110 for (int i = 0; i < attribute_config.GetNumTotalAttributes(); ++i) {
110 for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) { 111 for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) {
111 const u8* srcdata = vertex_attribute_sources[i] + vertex_attribute_strides[i] * vertex + comp * vertex_attribute_element_size[i]; 112 const u8* srcdata = Memory::GetPointer(PAddrToVAddr(vertex_attribute_sources[i] + vertex_attribute_strides[i] * vertex + comp * vertex_attribute_element_size[i]));
113
114 // TODO(neobrain): Ocarina of Time 3D has GetNumTotalAttributes return 8,
115 // yet only provides 2 valid source data addresses. Need to figure out
116 // what's wrong there, until then we just continue when address lookup fails
117 if (srcdata == nullptr)
118 continue;
119
112 const float srcval = (vertex_attribute_formats[i] == 0) ? *(s8*)srcdata : 120 const float srcval = (vertex_attribute_formats[i] == 0) ? *(s8*)srcdata :
113 (vertex_attribute_formats[i] == 1) ? *(u8*)srcdata : 121 (vertex_attribute_formats[i] == 1) ? *(u8*)srcdata :
114 (vertex_attribute_formats[i] == 2) ? *(s16*)srcdata : 122 (vertex_attribute_formats[i] == 2) ? *(s16*)srcdata :
@@ -116,13 +124,16 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) {
116 input.attr[i][comp] = float24::FromFloat32(srcval); 124 input.attr[i][comp] = float24::FromFloat32(srcval);
117 LOG_TRACE(HW_GPU, "Loaded component %x of attribute %x for vertex %x (index %x) from 0x%08x + 0x%08lx + 0x%04lx: %f", 125 LOG_TRACE(HW_GPU, "Loaded component %x of attribute %x for vertex %x (index %x) from 0x%08x + 0x%08lx + 0x%04lx: %f",
118 comp, i, vertex, index, 126 comp, i, vertex, index,
119 attribute_config.GetBaseAddress(), 127 attribute_config.GetPhysicalBaseAddress(),
120 vertex_attribute_sources[i] - base_address, 128 vertex_attribute_sources[i] - base_address,
121 srcdata - vertex_attribute_sources[i], 129 vertex_attribute_strides[i] * vertex + comp * vertex_attribute_element_size[i],
122 input.attr[i][comp].ToFloat32()); 130 input.attr[i][comp].ToFloat32());
123 } 131 }
124 } 132 }
125 133
134 if (g_debug_context)
135 g_debug_context->OnEvent(DebugContext::Event::VertexLoaded, (void*)&input);
136
126 // NOTE: When dumping geometry, we simply assume that the first input attribute 137 // NOTE: When dumping geometry, we simply assume that the first input attribute
127 // corresponds to the position for now. 138 // corresponds to the position for now.
128 DebugUtils::GeometryDumper::Vertex dumped_vertex = { 139 DebugUtils::GeometryDumper::Vertex dumped_vertex = {
@@ -151,6 +162,12 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) {
151 break; 162 break;
152 } 163 }
153 164
165 case PICA_REG_INDEX(vs_bool_uniforms):
166 for (unsigned i = 0; i < 16; ++i)
167 VertexShader::GetBoolUniform(i) = (registers.vs_bool_uniforms.Value() & (1 << i));
168
169 break;
170
154 case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[0], 0x2c1): 171 case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[0], 0x2c1):
155 case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[1], 0x2c2): 172 case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[1], 0x2c2):
156 case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[2], 0x2c3): 173 case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[2], 0x2c3):
diff --git a/src/video_core/debug_utils/debug_utils.cpp b/src/video_core/debug_utils/debug_utils.cpp
index 1a20f19ec..328386b7e 100644
--- a/src/video_core/debug_utils/debug_utils.cpp
+++ b/src/video_core/debug_utils/debug_utils.cpp
@@ -14,6 +14,8 @@
14#include <png.h> 14#include <png.h>
15#endif 15#endif
16 16
17#include <nihstro/shader_binary.h>
18
17#include "common/log.h" 19#include "common/log.h"
18#include "common/file_util.h" 20#include "common/file_util.h"
19 21
@@ -22,6 +24,10 @@
22 24
23#include "debug_utils.h" 25#include "debug_utils.h"
24 26
27using nihstro::DVLBHeader;
28using nihstro::DVLEHeader;
29using nihstro::DVLPHeader;
30
25namespace Pica { 31namespace Pica {
26 32
27void DebugContext::OnEvent(Event event, void* data) { 33void DebugContext::OnEvent(Event event, void* data) {
@@ -98,65 +104,6 @@ void GeometryDumper::Dump() {
98 } 104 }
99} 105}
100 106
101#pragma pack(1)
102struct DVLBHeader {
103 enum : u32 {
104 MAGIC_WORD = 0x424C5644, // "DVLB"
105 };
106
107 u32 magic_word;
108 u32 num_programs;
109// u32 dvle_offset_table[];
110};
111static_assert(sizeof(DVLBHeader) == 0x8, "Incorrect structure size");
112
113struct DVLPHeader {
114 enum : u32 {
115 MAGIC_WORD = 0x504C5644, // "DVLP"
116 };
117
118 u32 magic_word;
119 u32 version;
120 u32 binary_offset; // relative to DVLP start
121 u32 binary_size_words;
122 u32 swizzle_patterns_offset;
123 u32 swizzle_patterns_num_entries;
124 u32 unk2;
125};
126static_assert(sizeof(DVLPHeader) == 0x1C, "Incorrect structure size");
127
128struct DVLEHeader {
129 enum : u32 {
130 MAGIC_WORD = 0x454c5644, // "DVLE"
131 };
132
133 enum class ShaderType : u8 {
134 VERTEX = 0,
135 GEOMETRY = 1,
136 };
137
138 u32 magic_word;
139 u16 pad1;
140 ShaderType type;
141 u8 pad2;
142 u32 main_offset_words; // offset within binary blob
143 u32 endmain_offset_words;
144 u32 pad3;
145 u32 pad4;
146 u32 constant_table_offset;
147 u32 constant_table_size; // number of entries
148 u32 label_table_offset;
149 u32 label_table_size;
150 u32 output_register_table_offset;
151 u32 output_register_table_size;
152 u32 uniform_table_offset;
153 u32 uniform_table_size;
154 u32 symbol_table_offset;
155 u32 symbol_table_size;
156
157};
158static_assert(sizeof(DVLEHeader) == 0x40, "Incorrect structure size");
159#pragma pack()
160 107
161void DumpShader(const u32* binary_data, u32 binary_size, const u32* swizzle_data, u32 swizzle_size, 108void DumpShader(const u32* binary_data, u32 binary_size, const u32* swizzle_data, u32 swizzle_size,
162 u32 main_offset, const Regs::VSOutputAttributes* output_attributes) 109 u32 main_offset, const Regs::VSOutputAttributes* output_attributes)
@@ -276,8 +223,8 @@ void DumpShader(const u32* binary_data, u32 binary_size, const u32* swizzle_data
276 dvlp.binary_size_words = binary_size; 223 dvlp.binary_size_words = binary_size;
277 QueueForWriting((u8*)binary_data, binary_size * sizeof(u32)); 224 QueueForWriting((u8*)binary_data, binary_size * sizeof(u32));
278 225
279 dvlp.swizzle_patterns_offset = write_offset - dvlp_offset; 226 dvlp.swizzle_info_offset = write_offset - dvlp_offset;
280 dvlp.swizzle_patterns_num_entries = swizzle_size; 227 dvlp.swizzle_info_num_entries = swizzle_size;
281 u32 dummy = 0; 228 u32 dummy = 0;
282 for (unsigned int i = 0; i < swizzle_size; ++i) { 229 for (unsigned int i = 0; i < swizzle_size; ++i) {
283 QueueForWriting((u8*)&swizzle_data[i], sizeof(swizzle_data[i])); 230 QueueForWriting((u8*)&swizzle_data[i], sizeof(swizzle_data[i]));
@@ -356,10 +303,29 @@ std::unique_ptr<PicaTrace> FinishPicaTracing()
356 return std::move(ret); 303 return std::move(ret);
357} 304}
358 305
359const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const TextureInfo& info) { 306const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const TextureInfo& info, bool disable_alpha) {
360 _dbg_assert_(Debug_GPU, info.format == Pica::Regs::TextureFormat::RGB8); 307
361 308 // Images are split into 8x8 tiles. Each tile is composed of four 4x4 subtiles each
362 // Cf. rasterizer code for an explanation of this algorithm. 309 // of which is composed of four 2x2 subtiles each of which is composed of four texels.
310 // Each structure is embedded into the next-bigger one in a diagonal pattern, e.g.
311 // texels are laid out in a 2x2 subtile like this:
312 // 2 3
313 // 0 1
314 //
315 // The full 8x8 tile has the texels arranged like this:
316 //
317 // 42 43 46 47 58 59 62 63
318 // 40 41 44 45 56 57 60 61
319 // 34 35 38 39 50 51 54 55
320 // 32 33 36 37 48 49 52 53
321 // 10 11 14 15 26 27 30 31
322 // 08 09 12 13 24 25 28 29
323 // 02 03 06 07 18 19 22 23
324 // 00 01 04 05 16 17 20 21
325
326 // TODO(neobrain): Not sure if this swizzling pattern is used for all textures.
327 // To be flexible in case different but similar patterns are used, we keep this
328 // somewhat inefficient code around for now.
363 int texel_index_within_tile = 0; 329 int texel_index_within_tile = 0;
364 for (int block_size_index = 0; block_size_index < 3; ++block_size_index) { 330 for (int block_size_index = 0; block_size_index < 3; ++block_size_index) {
365 int sub_tile_width = 1 << block_size_index; 331 int sub_tile_width = 1 << block_size_index;
@@ -376,19 +342,134 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const Texture
376 int coarse_x = (x / block_width) * block_width; 342 int coarse_x = (x / block_width) * block_width;
377 int coarse_y = (y / block_height) * block_height; 343 int coarse_y = (y / block_height) * block_height;
378 344
379 const u8* source_ptr = source + coarse_x * block_height * 3 + coarse_y * info.stride + texel_index_within_tile * 3; 345 switch (info.format) {
380 return { source_ptr[2], source_ptr[1], source_ptr[0], 255 }; 346 case Regs::TextureFormat::RGBA8:
347 {
348 const u8* source_ptr = source + coarse_x * block_height * 4 + coarse_y * info.stride + texel_index_within_tile * 4;
349 return { source_ptr[3], source_ptr[2], source_ptr[1], disable_alpha ? (u8)255 : source_ptr[0] };
350 }
351
352 case Regs::TextureFormat::RGB8:
353 {
354 const u8* source_ptr = source + coarse_x * block_height * 3 + coarse_y * info.stride + texel_index_within_tile * 3;
355 return { source_ptr[2], source_ptr[1], source_ptr[0], 255 };
356 }
357
358 case Regs::TextureFormat::RGBA5551:
359 {
360 const u16 source_ptr = *(const u16*)(source + coarse_x * block_height * 2 + coarse_y * info.stride + texel_index_within_tile * 2);
361 u8 r = (source_ptr >> 11) & 0x1F;
362 u8 g = ((source_ptr) >> 6) & 0x1F;
363 u8 b = (source_ptr >> 1) & 0x1F;
364 u8 a = source_ptr & 1;
365 return Math::MakeVec<u8>((r << 3) | (r >> 2), (g << 3) | (g >> 2), (b << 3) | (b >> 2), disable_alpha ? 255 : (a * 255));
366 }
367
368 case Regs::TextureFormat::RGB565:
369 {
370 const u16 source_ptr = *(const u16*)(source + coarse_x * block_height * 2 + coarse_y * info.stride + texel_index_within_tile * 2);
371 u8 r = (source_ptr >> 11) & 0x1F;
372 u8 g = ((source_ptr) >> 5) & 0x3F;
373 u8 b = (source_ptr) & 0x1F;
374 return Math::MakeVec<u8>((r << 3) | (r >> 2), (g << 2) | (g >> 4), (b << 3) | (b >> 2), 255);
375 }
376
377 case Regs::TextureFormat::RGBA4:
378 {
379 const u8* source_ptr = source + coarse_x * block_height * 2 + coarse_y * info.stride + texel_index_within_tile * 2;
380 u8 r = source_ptr[1] >> 4;
381 u8 g = source_ptr[1] & 0xFF;
382 u8 b = source_ptr[0] >> 4;
383 u8 a = source_ptr[0] & 0xFF;
384 r = (r << 4) | r;
385 g = (g << 4) | g;
386 b = (b << 4) | b;
387 a = (a << 4) | a;
388 return { r, g, b, disable_alpha ? (u8)255 : a };
389 }
390
391 case Regs::TextureFormat::IA8:
392 {
393 const u8* source_ptr = source + coarse_x * block_height * 2 + coarse_y * info.stride + texel_index_within_tile * 2;
394
395 // TODO: component order not verified
396
397 if (disable_alpha) {
398 // Show intensity as red, alpha as green
399 return { source_ptr[0], source_ptr[1], 0, 255 };
400 } else {
401 return { source_ptr[0], source_ptr[0], source_ptr[0], source_ptr[1]};
402 }
403 }
404
405 case Regs::TextureFormat::I8:
406 {
407 const u8* source_ptr = source + coarse_x * block_height + coarse_y * info.stride + texel_index_within_tile;
408 return { *source_ptr, *source_ptr, *source_ptr, 255 };
409 }
410
411 case Regs::TextureFormat::A8:
412 {
413 const u8* source_ptr = source + coarse_x * block_height + coarse_y * info.stride + texel_index_within_tile;
414
415 if (disable_alpha) {
416 return { *source_ptr, *source_ptr, *source_ptr, 255 };
417 } else {
418 return { 0, 0, 0, *source_ptr };
419 }
420 }
421
422 case Regs::TextureFormat::IA4:
423 {
424 const u8* source_ptr = source + coarse_x * block_height / 2 + coarse_y * info.stride + texel_index_within_tile / 2;
425
426 // TODO: component order not verified
427
428 u8 i = (*source_ptr) & 0xF;
429 u8 a = ((*source_ptr) & 0xF0) >> 4;
430 a |= a << 4;
431 i |= i << 4;
432
433 if (disable_alpha) {
434 // Show intensity as red, alpha as green
435 return { i, a, 0, 255 };
436 } else {
437 return { i, i, i, a };
438 }
439 }
440
441 case Regs::TextureFormat::A4:
442 {
443 const u8* source_ptr = source + coarse_x * block_height / 2 + coarse_y * info.stride + texel_index_within_tile / 2;
444
445 // TODO: component order not verified
446
447 u8 a = (coarse_x % 2) ? ((*source_ptr)&0xF) : (((*source_ptr) & 0xF0) >> 4);
448 a |= a << 4;
449
450 if (disable_alpha) {
451 return { *source_ptr, *source_ptr, *source_ptr, 255 };
452 } else {
453 return { 0, 0, 0, *source_ptr };
454 }
455 }
456
457 default:
458 LOG_ERROR(HW_GPU, "Unknown texture format: %x", (u32)info.format);
459 _dbg_assert_(HW_GPU, 0);
460 return {};
461 }
381} 462}
382 463
383TextureInfo TextureInfo::FromPicaRegister(const Regs::TextureConfig& config, 464TextureInfo TextureInfo::FromPicaRegister(const Regs::TextureConfig& config,
384 const Regs::TextureFormat& format) 465 const Regs::TextureFormat& format)
385{ 466{
386 TextureInfo info; 467 TextureInfo info;
387 info.address = config.GetPhysicalAddress(); 468 info.physical_address = config.GetPhysicalAddress();
388 info.width = config.width; 469 info.width = config.width;
389 info.height = config.height; 470 info.height = config.height;
390 info.format = format; 471 info.format = format;
391 info.stride = Pica::Regs::BytesPerPixel(info.format) * info.width; 472 info.stride = Pica::Regs::NibblesPerPixel(info.format) * info.width / 2;
392 return info; 473 return info;
393} 474}
394 475
@@ -499,26 +580,32 @@ void DumpTevStageConfig(const std::array<Pica::Regs::TevStageConfig,6>& stages)
499 for (size_t index = 0; index < stages.size(); ++index) { 580 for (size_t index = 0; index < stages.size(); ++index) {
500 const auto& tev_stage = stages[index]; 581 const auto& tev_stage = stages[index];
501 582
502 const std::map<Source, std::string> source_map = { 583 static const std::map<Source, std::string> source_map = {
503 { Source::PrimaryColor, "PrimaryColor" }, 584 { Source::PrimaryColor, "PrimaryColor" },
504 { Source::Texture0, "Texture0" }, 585 { Source::Texture0, "Texture0" },
586 { Source::Texture1, "Texture1" },
587 { Source::Texture2, "Texture2" },
505 { Source::Constant, "Constant" }, 588 { Source::Constant, "Constant" },
506 { Source::Previous, "Previous" }, 589 { Source::Previous, "Previous" },
507 }; 590 };
508 591
509 const std::map<ColorModifier, std::string> color_modifier_map = { 592 static const std::map<ColorModifier, std::string> color_modifier_map = {
510 { ColorModifier::SourceColor, { "%source.rgb" } } 593 { ColorModifier::SourceColor, { "%source.rgb" } },
594 { ColorModifier::SourceAlpha, { "%source.aaa" } },
511 }; 595 };
512 const std::map<AlphaModifier, std::string> alpha_modifier_map = { 596 static const std::map<AlphaModifier, std::string> alpha_modifier_map = {
513 { AlphaModifier::SourceAlpha, "%source.a" } 597 { AlphaModifier::SourceAlpha, "%source.a" },
598 { AlphaModifier::OneMinusSourceAlpha, "(255 - %source.a)" },
514 }; 599 };
515 600
516 std::map<Operation, std::string> combiner_map = { 601 static const std::map<Operation, std::string> combiner_map = {
517 { Operation::Replace, "%source1" }, 602 { Operation::Replace, "%source1" },
518 { Operation::Modulate, "(%source1 * %source2) / 255" }, 603 { Operation::Modulate, "(%source1 * %source2) / 255" },
604 { Operation::Add, "(%source1 + %source2)" },
605 { Operation::Lerp, "lerp(%source1, %source2, %source3)" },
519 }; 606 };
520 607
521 auto ReplacePattern = 608 static auto ReplacePattern =
522 [](const std::string& input, const std::string& pattern, const std::string& replacement) -> std::string { 609 [](const std::string& input, const std::string& pattern, const std::string& replacement) -> std::string {
523 size_t start = input.find(pattern); 610 size_t start = input.find(pattern);
524 if (start == std::string::npos) 611 if (start == std::string::npos)
@@ -528,8 +615,8 @@ void DumpTevStageConfig(const std::array<Pica::Regs::TevStageConfig,6>& stages)
528 ret.replace(start, pattern.length(), replacement); 615 ret.replace(start, pattern.length(), replacement);
529 return ret; 616 return ret;
530 }; 617 };
531 auto GetColorSourceStr = 618 static auto GetColorSourceStr =
532 [&source_map,&color_modifier_map,&ReplacePattern](const Source& src, const ColorModifier& modifier) { 619 [](const Source& src, const ColorModifier& modifier) {
533 auto src_it = source_map.find(src); 620 auto src_it = source_map.find(src);
534 std::string src_str = "Unknown"; 621 std::string src_str = "Unknown";
535 if (src_it != source_map.end()) 622 if (src_it != source_map.end())
@@ -542,8 +629,8 @@ void DumpTevStageConfig(const std::array<Pica::Regs::TevStageConfig,6>& stages)
542 629
543 return ReplacePattern(modifier_str, "%source", src_str); 630 return ReplacePattern(modifier_str, "%source", src_str);
544 }; 631 };
545 auto GetColorCombinerStr = 632 static auto GetColorCombinerStr =
546 [&](const Regs::TevStageConfig& tev_stage) { 633 [](const Regs::TevStageConfig& tev_stage) {
547 auto op_it = combiner_map.find(tev_stage.color_op); 634 auto op_it = combiner_map.find(tev_stage.color_op);
548 std::string op_str = "Unknown op (%source1, %source2, %source3)"; 635 std::string op_str = "Unknown op (%source1, %source2, %source3)";
549 if (op_it != combiner_map.end()) 636 if (op_it != combiner_map.end())
@@ -553,8 +640,8 @@ void DumpTevStageConfig(const std::array<Pica::Regs::TevStageConfig,6>& stages)
553 op_str = ReplacePattern(op_str, "%source2", GetColorSourceStr(tev_stage.color_source2, tev_stage.color_modifier2)); 640 op_str = ReplacePattern(op_str, "%source2", GetColorSourceStr(tev_stage.color_source2, tev_stage.color_modifier2));
554 return ReplacePattern(op_str, "%source3", GetColorSourceStr(tev_stage.color_source3, tev_stage.color_modifier3)); 641 return ReplacePattern(op_str, "%source3", GetColorSourceStr(tev_stage.color_source3, tev_stage.color_modifier3));
555 }; 642 };
556 auto GetAlphaSourceStr = 643 static auto GetAlphaSourceStr =
557 [&source_map,&alpha_modifier_map,&ReplacePattern](const Source& src, const AlphaModifier& modifier) { 644 [](const Source& src, const AlphaModifier& modifier) {
558 auto src_it = source_map.find(src); 645 auto src_it = source_map.find(src);
559 std::string src_str = "Unknown"; 646 std::string src_str = "Unknown";
560 if (src_it != source_map.end()) 647 if (src_it != source_map.end())
@@ -567,8 +654,8 @@ void DumpTevStageConfig(const std::array<Pica::Regs::TevStageConfig,6>& stages)
567 654
568 return ReplacePattern(modifier_str, "%source", src_str); 655 return ReplacePattern(modifier_str, "%source", src_str);
569 }; 656 };
570 auto GetAlphaCombinerStr = 657 static auto GetAlphaCombinerStr =
571 [&](const Regs::TevStageConfig& tev_stage) { 658 [](const Regs::TevStageConfig& tev_stage) {
572 auto op_it = combiner_map.find(tev_stage.alpha_op); 659 auto op_it = combiner_map.find(tev_stage.alpha_op);
573 std::string op_str = "Unknown op (%source1, %source2, %source3)"; 660 std::string op_str = "Unknown op (%source1, %source2, %source3)";
574 if (op_it != combiner_map.end()) 661 if (op_it != combiner_map.end())
diff --git a/src/video_core/debug_utils/debug_utils.h b/src/video_core/debug_utils/debug_utils.h
index 51f14f12f..f361a5385 100644
--- a/src/video_core/debug_utils/debug_utils.h
+++ b/src/video_core/debug_utils/debug_utils.h
@@ -26,6 +26,7 @@ public:
26 CommandProcessed, 26 CommandProcessed,
27 IncomingPrimitiveBatch, 27 IncomingPrimitiveBatch,
28 FinishedPrimitiveBatch, 28 FinishedPrimitiveBatch,
29 VertexLoaded,
29 30
30 NumEvents 31 NumEvents
31 }; 32 };
@@ -192,7 +193,7 @@ void OnPicaRegWrite(u32 id, u32 value);
192std::unique_ptr<PicaTrace> FinishPicaTracing(); 193std::unique_ptr<PicaTrace> FinishPicaTracing();
193 194
194struct TextureInfo { 195struct TextureInfo {
195 unsigned int address; 196 PAddr physical_address;
196 int width; 197 int width;
197 int height; 198 int height;
198 int stride; 199 int stride;
@@ -202,7 +203,17 @@ struct TextureInfo {
202 const Pica::Regs::TextureFormat& format); 203 const Pica::Regs::TextureFormat& format);
203}; 204};
204 205
205const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const TextureInfo& info); 206/**
207 * Lookup texel located at the given coordinates and return an RGBA vector of its color.
208 * @param source Source pointer to read data from
209 * @param s,t Texture coordinates to read from
210 * @param info TextureInfo object describing the texture setup
211 * @param disable_alpha This is used for debug widgets which use this method to display textures without providing a good way to visualize alpha by themselves. If true, this will return 255 for the alpha component, and either drop the information entirely or store it in an "unused" color channel.
212 * @todo Eventually we should get rid of the disable_alpha parameter.
213 */
214const Math::Vec4<u8> LookupTexture(const u8* source, int s, int t, const TextureInfo& info,
215 bool disable_alpha = false);
216
206void DumpTexture(const Pica::Regs::TextureConfig& texture_config, u8* data); 217void DumpTexture(const Pica::Regs::TextureConfig& texture_config, u8* data);
207 218
208void DumpTevStageConfig(const std::array<Pica::Regs::TevStageConfig,6>& stages); 219void DumpTevStageConfig(const std::array<Pica::Regs::TevStageConfig,6>& stages);
diff --git a/src/video_core/pica.h b/src/video_core/pica.h
index 4c3791ad9..06552a3ef 100644
--- a/src/video_core/pica.h
+++ b/src/video_core/pica.h
@@ -8,6 +8,7 @@
8#include <cstddef> 8#include <cstddef>
9#include <initializer_list> 9#include <initializer_list>
10#include <map> 10#include <map>
11#include <vector>
11 12
12#include "common/bit_field.h" 13#include "common/bit_field.h"
13#include "common/common_types.h" 14#include "common/common_types.h"
@@ -104,6 +105,11 @@ struct Regs {
104 INSERT_PADDING_WORDS(0x17); 105 INSERT_PADDING_WORDS(0x17);
105 106
106 struct TextureConfig { 107 struct TextureConfig {
108 enum WrapMode : u32 {
109 ClampToEdge = 0,
110 Repeat = 2,
111 };
112
107 INSERT_PADDING_WORDS(0x1); 113 INSERT_PADDING_WORDS(0x1);
108 114
109 union { 115 union {
@@ -111,12 +117,17 @@ struct Regs {
111 BitField<16, 16, u32> width; 117 BitField<16, 16, u32> width;
112 }; 118 };
113 119
114 INSERT_PADDING_WORDS(0x2); 120 union {
121 BitField< 8, 2, WrapMode> wrap_s;
122 BitField<11, 2, WrapMode> wrap_t;
123 };
124
125 INSERT_PADDING_WORDS(0x1);
115 126
116 u32 address; 127 u32 address;
117 128
118 u32 GetPhysicalAddress() const { 129 u32 GetPhysicalAddress() const {
119 return DecodeAddressRegister(address) - Memory::FCRAM_PADDR + Memory::HEAP_LINEAR_VADDR; 130 return DecodeAddressRegister(address);
120 } 131 }
121 132
122 // texture1 and texture2 store the texture format directly after the address 133 // texture1 and texture2 store the texture format directly after the address
@@ -131,36 +142,70 @@ struct Regs {
131 RGBA5551 = 2, 142 RGBA5551 = 2,
132 RGB565 = 3, 143 RGB565 = 3,
133 RGBA4 = 4, 144 RGBA4 = 4,
145 IA8 = 5,
146
147 I8 = 7,
148 A8 = 8,
149 IA4 = 9,
134 150
151 A4 = 11,
135 // TODO: Support for the other formats is not implemented, yet. 152 // TODO: Support for the other formats is not implemented, yet.
136 // Seems like they are luminance formats and compressed textures. 153 // Seems like they are luminance formats and compressed textures.
137 }; 154 };
138 155
139 static unsigned BytesPerPixel(TextureFormat format) { 156 static unsigned NibblesPerPixel(TextureFormat format) {
140 switch (format) { 157 switch (format) {
141 case TextureFormat::RGBA8: 158 case TextureFormat::RGBA8:
142 return 4; 159 return 8;
143 160
144 case TextureFormat::RGB8: 161 case TextureFormat::RGB8:
145 return 3; 162 return 6;
146 163
147 case TextureFormat::RGBA5551: 164 case TextureFormat::RGBA5551:
148 case TextureFormat::RGB565: 165 case TextureFormat::RGB565:
149 case TextureFormat::RGBA4: 166 case TextureFormat::RGBA4:
150 return 2; 167 case TextureFormat::IA8:
168 return 4;
151 169
152 default: 170 case TextureFormat::A4:
153 // placeholder for yet unknown formats
154 return 1; 171 return 1;
172
173 case TextureFormat::I8:
174 case TextureFormat::A8:
175 case TextureFormat::IA4:
176 default: // placeholder for yet unknown formats
177 return 2;
155 } 178 }
156 } 179 }
157 180
158 BitField< 0, 1, u32> texturing_enable; 181 union {
182 BitField< 0, 1, u32> texture0_enable;
183 BitField< 1, 1, u32> texture1_enable;
184 BitField< 2, 1, u32> texture2_enable;
185 };
159 TextureConfig texture0; 186 TextureConfig texture0;
160 INSERT_PADDING_WORDS(0x8); 187 INSERT_PADDING_WORDS(0x8);
161 BitField<0, 4, TextureFormat> texture0_format; 188 BitField<0, 4, TextureFormat> texture0_format;
162 189 INSERT_PADDING_WORDS(0x2);
163 INSERT_PADDING_WORDS(0x31); 190 TextureConfig texture1;
191 BitField<0, 4, TextureFormat> texture1_format;
192 INSERT_PADDING_WORDS(0x2);
193 TextureConfig texture2;
194 BitField<0, 4, TextureFormat> texture2_format;
195 INSERT_PADDING_WORDS(0x21);
196
197 struct FullTextureConfig {
198 const bool enabled;
199 const TextureConfig config;
200 const TextureFormat format;
201 };
202 const std::array<FullTextureConfig, 3> GetTextures() const {
203 return {{
204 { static_cast<bool>(texture0_enable), texture0, texture0_format },
205 { static_cast<bool>(texture1_enable), texture1, texture1_format },
206 { static_cast<bool>(texture2_enable), texture2, texture2_format }
207 }};
208 }
164 209
165 // 0xc0-0xff: Texture Combiner (akin to glTexEnv) 210 // 0xc0-0xff: Texture Combiner (akin to glTexEnv)
166 struct TevStageConfig { 211 struct TevStageConfig {
@@ -282,11 +327,11 @@ struct Regs {
282 327
283 INSERT_PADDING_WORDS(0x1); 328 INSERT_PADDING_WORDS(0x1);
284 329
285 inline u32 GetColorBufferAddress() const { 330 inline u32 GetColorBufferPhysicalAddress() const {
286 return Memory::PhysicalToVirtualAddress(DecodeAddressRegister(color_buffer_address)); 331 return DecodeAddressRegister(color_buffer_address);
287 } 332 }
288 inline u32 GetDepthBufferAddress() const { 333 inline u32 GetDepthBufferPhysicalAddress() const {
289 return Memory::PhysicalToVirtualAddress(DecodeAddressRegister(depth_buffer_address)); 334 return DecodeAddressRegister(depth_buffer_address);
290 } 335 }
291 336
292 inline u32 GetWidth() const { 337 inline u32 GetWidth() const {
@@ -310,9 +355,8 @@ struct Regs {
310 355
311 BitField<0, 29, u32> base_address; 356 BitField<0, 29, u32> base_address;
312 357
313 inline u32 GetBaseAddress() const { 358 u32 GetPhysicalBaseAddress() const {
314 // TODO: Ugly, should fix PhysicalToVirtualAddress instead 359 return DecodeAddressRegister(base_address);
315 return DecodeAddressRegister(base_address) - Memory::FCRAM_PADDR + Memory::HEAP_LINEAR_VADDR;
316 } 360 }
317 361
318 // Descriptor for internal vertex attributes 362 // Descriptor for internal vertex attributes
@@ -448,7 +492,11 @@ struct Regs {
448 492
449 BitField<8, 2, TriangleTopology> triangle_topology; 493 BitField<8, 2, TriangleTopology> triangle_topology;
450 494
451 INSERT_PADDING_WORDS(0x5b); 495 INSERT_PADDING_WORDS(0x51);
496
497 BitField<0, 16, u32> vs_bool_uniforms;
498
499 INSERT_PADDING_WORDS(0x9);
452 500
453 // Offset to shader program entry point (in words) 501 // Offset to shader program entry point (in words)
454 BitField<0, 16, u32> vs_main_offset; 502 BitField<0, 16, u32> vs_main_offset;
@@ -556,9 +604,13 @@ struct Regs {
556 ADD_FIELD(viewport_depth_range); 604 ADD_FIELD(viewport_depth_range);
557 ADD_FIELD(viewport_depth_far_plane); 605 ADD_FIELD(viewport_depth_far_plane);
558 ADD_FIELD(viewport_corner); 606 ADD_FIELD(viewport_corner);
559 ADD_FIELD(texturing_enable); 607 ADD_FIELD(texture0_enable);
560 ADD_FIELD(texture0); 608 ADD_FIELD(texture0);
561 ADD_FIELD(texture0_format); 609 ADD_FIELD(texture0_format);
610 ADD_FIELD(texture1);
611 ADD_FIELD(texture1_format);
612 ADD_FIELD(texture2);
613 ADD_FIELD(texture2_format);
562 ADD_FIELD(tev_stage0); 614 ADD_FIELD(tev_stage0);
563 ADD_FIELD(tev_stage1); 615 ADD_FIELD(tev_stage1);
564 ADD_FIELD(tev_stage2); 616 ADD_FIELD(tev_stage2);
@@ -572,6 +624,7 @@ struct Regs {
572 ADD_FIELD(trigger_draw); 624 ADD_FIELD(trigger_draw);
573 ADD_FIELD(trigger_draw_indexed); 625 ADD_FIELD(trigger_draw_indexed);
574 ADD_FIELD(triangle_topology); 626 ADD_FIELD(triangle_topology);
627 ADD_FIELD(vs_bool_uniforms);
575 ADD_FIELD(vs_main_offset); 628 ADD_FIELD(vs_main_offset);
576 ADD_FIELD(vs_input_register_map); 629 ADD_FIELD(vs_input_register_map);
577 ADD_FIELD(vs_uniform_setup); 630 ADD_FIELD(vs_uniform_setup);
@@ -622,9 +675,13 @@ ASSERT_REG_POSITION(viewport_depth_far_plane, 0x4e);
622ASSERT_REG_POSITION(vs_output_attributes[0], 0x50); 675ASSERT_REG_POSITION(vs_output_attributes[0], 0x50);
623ASSERT_REG_POSITION(vs_output_attributes[1], 0x51); 676ASSERT_REG_POSITION(vs_output_attributes[1], 0x51);
624ASSERT_REG_POSITION(viewport_corner, 0x68); 677ASSERT_REG_POSITION(viewport_corner, 0x68);
625ASSERT_REG_POSITION(texturing_enable, 0x80); 678ASSERT_REG_POSITION(texture0_enable, 0x80);
626ASSERT_REG_POSITION(texture0, 0x81); 679ASSERT_REG_POSITION(texture0, 0x81);
627ASSERT_REG_POSITION(texture0_format, 0x8e); 680ASSERT_REG_POSITION(texture0_format, 0x8e);
681ASSERT_REG_POSITION(texture1, 0x91);
682ASSERT_REG_POSITION(texture1_format, 0x96);
683ASSERT_REG_POSITION(texture2, 0x99);
684ASSERT_REG_POSITION(texture2_format, 0x9e);
628ASSERT_REG_POSITION(tev_stage0, 0xc0); 685ASSERT_REG_POSITION(tev_stage0, 0xc0);
629ASSERT_REG_POSITION(tev_stage1, 0xc8); 686ASSERT_REG_POSITION(tev_stage1, 0xc8);
630ASSERT_REG_POSITION(tev_stage2, 0xd0); 687ASSERT_REG_POSITION(tev_stage2, 0xd0);
@@ -638,6 +695,7 @@ ASSERT_REG_POSITION(num_vertices, 0x228);
638ASSERT_REG_POSITION(trigger_draw, 0x22e); 695ASSERT_REG_POSITION(trigger_draw, 0x22e);
639ASSERT_REG_POSITION(trigger_draw_indexed, 0x22f); 696ASSERT_REG_POSITION(trigger_draw_indexed, 0x22f);
640ASSERT_REG_POSITION(triangle_topology, 0x25e); 697ASSERT_REG_POSITION(triangle_topology, 0x25e);
698ASSERT_REG_POSITION(vs_bool_uniforms, 0x2b0);
641ASSERT_REG_POSITION(vs_main_offset, 0x2ba); 699ASSERT_REG_POSITION(vs_main_offset, 0x2ba);
642ASSERT_REG_POSITION(vs_input_register_map, 0x2bb); 700ASSERT_REG_POSITION(vs_input_register_map, 0x2bb);
643ASSERT_REG_POSITION(vs_uniform_setup, 0x2c0); 701ASSERT_REG_POSITION(vs_uniform_setup, 0x2c0);
@@ -719,6 +777,14 @@ struct float24 {
719 return ToFloat32() <= flt.ToFloat32(); 777 return ToFloat32() <= flt.ToFloat32();
720 } 778 }
721 779
780 bool operator == (const float24& flt) const {
781 return ToFloat32() == flt.ToFloat32();
782 }
783
784 bool operator != (const float24& flt) const {
785 return ToFloat32() != flt.ToFloat32();
786 }
787
722private: 788private:
723 // Stored as a regular float, merely for convenience 789 // Stored as a regular float, merely for convenience
724 // TODO: Perform proper arithmetic on this! 790 // TODO: Perform proper arithmetic on this!
@@ -736,5 +802,15 @@ union CommandHeader {
736 BitField<31, 1, u32> group_commands; 802 BitField<31, 1, u32> group_commands;
737}; 803};
738 804
805// TODO: Ugly, should fix PhysicalToVirtualAddress instead
806inline static u32 PAddrToVAddr(u32 addr) {
807 if (addr >= Memory::VRAM_PADDR && addr < Memory::VRAM_PADDR + Memory::VRAM_SIZE) {
808 return addr - Memory::VRAM_PADDR + Memory::VRAM_VADDR;
809 } else if (addr >= Memory::FCRAM_PADDR && addr < Memory::FCRAM_PADDR + Memory::FCRAM_SIZE) {
810 return addr - Memory::FCRAM_PADDR + Memory::HEAP_LINEAR_VADDR;
811 } else {
812 return 0;
813 }
814}
739 815
740} // namespace 816} // namespace
diff --git a/src/video_core/primitive_assembly.cpp b/src/video_core/primitive_assembly.cpp
index 102693ed9..ff46c7b52 100644
--- a/src/video_core/primitive_assembly.cpp
+++ b/src/video_core/primitive_assembly.cpp
@@ -30,20 +30,27 @@ void PrimitiveAssembler<VertexType>::SubmitVertex(VertexType& vtx, TriangleHandl
30 } 30 }
31 break; 31 break;
32 32
33 case Regs::TriangleTopology::Strip:
33 case Regs::TriangleTopology::Fan: 34 case Regs::TriangleTopology::Fan:
34 if (buffer_index == 2) { 35 if (strip_ready) {
35 buffer_index = 0; 36 // TODO: Should be "buffer[0], buffer[1], vtx" instead!
36 37 // Not quite sure why we need this order for things to show up properly.
37 triangle_handler(buffer[0], buffer[1], vtx); 38 // Maybe a bug in the rasterizer?
39 triangle_handler(buffer[1], buffer[0], vtx);
40 }
41 buffer[buffer_index] = vtx;
38 42
39 buffer[1] = vtx; 43 if (topology == Regs::TriangleTopology::Strip) {
40 } else { 44 strip_ready |= (buffer_index == 1);
41 buffer[buffer_index++] = vtx; 45 buffer_index = !buffer_index;
46 } else if (topology == Regs::TriangleTopology::Fan) {
47 buffer_index = 1;
48 strip_ready = true;
42 } 49 }
43 break; 50 break;
44 51
45 default: 52 default:
46 LOG_ERROR(Render_Software, "Unknown triangle topology %x:", (int)topology); 53 LOG_ERROR(HW_GPU, "Unknown triangle topology %x:", (int)topology);
47 break; 54 break;
48 } 55 }
49} 56}
diff --git a/src/video_core/primitive_assembly.h b/src/video_core/primitive_assembly.h
index ea2e2f61e..decf0fd64 100644
--- a/src/video_core/primitive_assembly.h
+++ b/src/video_core/primitive_assembly.h
@@ -37,6 +37,7 @@ private:
37 37
38 int buffer_index; 38 int buffer_index;
39 VertexType buffer[2]; 39 VertexType buffer[2];
40 bool strip_ready = false;
40}; 41};
41 42
42 43
diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp
index b7e04a560..bf9c36661 100644
--- a/src/video_core/rasterizer.cpp
+++ b/src/video_core/rasterizer.cpp
@@ -18,7 +18,7 @@ namespace Pica {
18namespace Rasterizer { 18namespace Rasterizer {
19 19
20static void DrawPixel(int x, int y, const Math::Vec4<u8>& color) { 20static void DrawPixel(int x, int y, const Math::Vec4<u8>& color) {
21 u32* color_buffer = (u32*)Memory::GetPointer(registers.framebuffer.GetColorBufferAddress()); 21 u32* color_buffer = reinterpret_cast<u32*>(Memory::GetPointer(PAddrToVAddr(registers.framebuffer.GetColorBufferPhysicalAddress())));
22 u32 value = (color.a() << 24) | (color.r() << 16) | (color.g() << 8) | color.b(); 22 u32 value = (color.a() << 24) | (color.r() << 16) | (color.g() << 8) | color.b();
23 23
24 // Assuming RGBA8 format until actual framebuffer format handling is implemented 24 // Assuming RGBA8 format until actual framebuffer format handling is implemented
@@ -26,14 +26,14 @@ static void DrawPixel(int x, int y, const Math::Vec4<u8>& color) {
26} 26}
27 27
28static u32 GetDepth(int x, int y) { 28static u32 GetDepth(int x, int y) {
29 u16* depth_buffer = (u16*)Memory::GetPointer(registers.framebuffer.GetDepthBufferAddress()); 29 u16* depth_buffer = reinterpret_cast<u16*>(Memory::GetPointer(PAddrToVAddr(registers.framebuffer.GetDepthBufferPhysicalAddress())));
30 30
31 // Assuming 16-bit depth buffer format until actual format handling is implemented 31 // Assuming 16-bit depth buffer format until actual format handling is implemented
32 return *(depth_buffer + x + y * registers.framebuffer.GetWidth()); 32 return *(depth_buffer + x + y * registers.framebuffer.GetWidth());
33} 33}
34 34
35static void SetDepth(int x, int y, u16 value) { 35static void SetDepth(int x, int y, u16 value) {
36 u16* depth_buffer = (u16*)Memory::GetPointer(registers.framebuffer.GetDepthBufferAddress()); 36 u16* depth_buffer = reinterpret_cast<u16*>(Memory::GetPointer(PAddrToVAddr(registers.framebuffer.GetDepthBufferPhysicalAddress())));
37 37
38 // Assuming 16-bit depth buffer format until actual format handling is implemented 38 // Assuming 16-bit depth buffer format until actual format handling is implemented
39 *(depth_buffer + x + y * registers.framebuffer.GetWidth()) = value; 39 *(depth_buffer + x + y * registers.framebuffer.GetWidth()) = value;
@@ -167,60 +167,48 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
167 (u8)(GetInterpolatedAttribute(v0.color.a(), v1.color.a(), v2.color.a()).ToFloat32() * 255) 167 (u8)(GetInterpolatedAttribute(v0.color.a(), v1.color.a(), v2.color.a()).ToFloat32() * 255)
168 }; 168 };
169 169
170 Math::Vec4<u8> texture_color{}; 170 Math::Vec2<float24> uv[3];
171 float24 u = GetInterpolatedAttribute(v0.tc0.u(), v1.tc0.u(), v2.tc0.u()); 171 uv[0].u() = GetInterpolatedAttribute(v0.tc0.u(), v1.tc0.u(), v2.tc0.u());
172 float24 v = GetInterpolatedAttribute(v0.tc0.v(), v1.tc0.v(), v2.tc0.v()); 172 uv[0].v() = GetInterpolatedAttribute(v0.tc0.v(), v1.tc0.v(), v2.tc0.v());
173 if (registers.texturing_enable) { 173 uv[1].u() = GetInterpolatedAttribute(v0.tc1.u(), v1.tc1.u(), v2.tc1.u());
174 // Images are split into 8x8 tiles. Each tile is composed of four 4x4 subtiles each 174 uv[1].v() = GetInterpolatedAttribute(v0.tc1.v(), v1.tc1.v(), v2.tc1.v());
175 // of which is composed of four 2x2 subtiles each of which is composed of four texels. 175 uv[2].u() = GetInterpolatedAttribute(v0.tc2.u(), v1.tc2.u(), v2.tc2.u());
176 // Each structure is embedded into the next-bigger one in a diagonal pattern, e.g. 176 uv[2].v() = GetInterpolatedAttribute(v0.tc2.v(), v1.tc2.v(), v2.tc2.v());
177 // texels are laid out in a 2x2 subtile like this: 177
178 // 2 3 178 Math::Vec4<u8> texture_color[3]{};
179 // 0 1 179 for (int i = 0; i < 3; ++i) {
180 // 180 auto texture = registers.GetTextures()[i];
181 // The full 8x8 tile has the texels arranged like this: 181 if (!texture.enabled)
182 // 182 continue;
183 // 42 43 46 47 58 59 62 63 183
184 // 40 41 44 45 56 57 60 61 184 _dbg_assert_(HW_GPU, 0 != texture.config.address);
185 // 34 35 38 39 50 51 54 55 185
186 // 32 33 36 37 48 49 52 53 186 int s = (int)(uv[i].u() * float24::FromFloat32(static_cast<float>(texture.config.width))).ToFloat32();
187 // 10 11 14 15 26 27 30 31 187 int t = (int)(uv[i].v() * float24::FromFloat32(static_cast<float>(texture.config.height))).ToFloat32();
188 // 08 09 12 13 24 25 28 29 188 auto GetWrappedTexCoord = [](Regs::TextureConfig::WrapMode mode, int val, unsigned size) {
189 // 02 03 06 07 18 19 22 23 189 switch (mode) {
190 // 00 01 04 05 16 17 20 21 190 case Regs::TextureConfig::ClampToEdge:
191 191 val = std::max(val, 0);
192 // TODO: This is currently hardcoded for RGB8 192 val = std::min(val, (int)size - 1);
193 u32* texture_data = (u32*)Memory::GetPointer(registers.texture0.GetPhysicalAddress()); 193 return val;
194 194
195 // TODO(neobrain): Not sure if this swizzling pattern is used for all textures. 195 case Regs::TextureConfig::Repeat:
196 // To be flexible in case different but similar patterns are used, we keep this 196 return (int)(((unsigned)val) % size);
197 // somewhat inefficient code around for now. 197
198 int s = (int)(u * float24::FromFloat32(static_cast<float>(registers.texture0.width))).ToFloat32(); 198 default:
199 int t = (int)(v * float24::FromFloat32(static_cast<float>(registers.texture0.height))).ToFloat32(); 199 LOG_ERROR(HW_GPU, "Unknown texture coordinate wrapping mode %x\n", (int)mode);
200 int texel_index_within_tile = 0; 200 _dbg_assert_(HW_GPU, 0);
201 for (int block_size_index = 0; block_size_index < 3; ++block_size_index) { 201 return 0;
202 int sub_tile_width = 1 << block_size_index; 202 }
203 int sub_tile_height = 1 << block_size_index; 203 };
204 204 s = GetWrappedTexCoord(registers.texture0.wrap_s, s, registers.texture0.width);
205 int sub_tile_index = (s & sub_tile_width) << block_size_index; 205 t = GetWrappedTexCoord(registers.texture0.wrap_t, t, registers.texture0.height);
206 sub_tile_index += 2 * ((t & sub_tile_height) << block_size_index); 206
207 texel_index_within_tile += sub_tile_index; 207 u8* texture_data = Memory::GetPointer(PAddrToVAddr(texture.config.GetPhysicalAddress()));
208 } 208 auto info = DebugUtils::TextureInfo::FromPicaRegister(texture.config, texture.format);
209 209
210 const int block_width = 8; 210 texture_color[i] = DebugUtils::LookupTexture(texture_data, s, t, info);
211 const int block_height = 8; 211 DebugUtils::DumpTexture(texture.config, texture_data);
212
213 int coarse_s = (s / block_width) * block_width;
214 int coarse_t = (t / block_height) * block_height;
215
216 const int row_stride = registers.texture0.width * 3;
217 u8* source_ptr = (u8*)texture_data + coarse_s * block_height * 3 + coarse_t * row_stride + texel_index_within_tile * 3;
218 texture_color.r() = source_ptr[2];
219 texture_color.g() = source_ptr[1];
220 texture_color.b() = source_ptr[0];
221 texture_color.a() = 0xFF;
222
223 DebugUtils::DumpTexture(registers.texture0, (u8*)texture_data);
224 } 212 }
225 213
226 // Texture environment - consists of 6 stages of color and alpha combining. 214 // Texture environment - consists of 6 stages of color and alpha combining.
@@ -237,22 +225,29 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
237 using AlphaModifier = Regs::TevStageConfig::AlphaModifier; 225 using AlphaModifier = Regs::TevStageConfig::AlphaModifier;
238 using Operation = Regs::TevStageConfig::Operation; 226 using Operation = Regs::TevStageConfig::Operation;
239 227
240 auto GetColorSource = [&](Source source) -> Math::Vec3<u8> { 228 auto GetColorSource = [&](Source source) -> Math::Vec4<u8> {
241 switch (source) { 229 switch (source) {
242 case Source::PrimaryColor: 230 case Source::PrimaryColor:
243 return primary_color.rgb(); 231 return primary_color;
244 232
245 case Source::Texture0: 233 case Source::Texture0:
246 return texture_color.rgb(); 234 return texture_color[0];
235
236 case Source::Texture1:
237 return texture_color[1];
238
239 case Source::Texture2:
240 return texture_color[2];
247 241
248 case Source::Constant: 242 case Source::Constant:
249 return {tev_stage.const_r, tev_stage.const_g, tev_stage.const_b}; 243 return {tev_stage.const_r, tev_stage.const_g, tev_stage.const_b, tev_stage.const_a};
250 244
251 case Source::Previous: 245 case Source::Previous:
252 return combiner_output.rgb(); 246 return combiner_output;
253 247
254 default: 248 default:
255 LOG_ERROR(HW_GPU, "Unknown color combiner source %d\n", (int)source); 249 LOG_ERROR(HW_GPU, "Unknown color combiner source %d\n", (int)source);
250 _dbg_assert_(HW_GPU, 0);
256 return {}; 251 return {};
257 } 252 }
258 }; 253 };
@@ -263,7 +258,13 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
263 return primary_color.a(); 258 return primary_color.a();
264 259
265 case Source::Texture0: 260 case Source::Texture0:
266 return texture_color.a(); 261 return texture_color[0].a();
262
263 case Source::Texture1:
264 return texture_color[1].a();
265
266 case Source::Texture2:
267 return texture_color[2].a();
267 268
268 case Source::Constant: 269 case Source::Constant:
269 return tev_stage.const_a; 270 return tev_stage.const_a;
@@ -273,17 +274,23 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
273 274
274 default: 275 default:
275 LOG_ERROR(HW_GPU, "Unknown alpha combiner source %d\n", (int)source); 276 LOG_ERROR(HW_GPU, "Unknown alpha combiner source %d\n", (int)source);
277 _dbg_assert_(HW_GPU, 0);
276 return 0; 278 return 0;
277 } 279 }
278 }; 280 };
279 281
280 auto GetColorModifier = [](ColorModifier factor, const Math::Vec3<u8>& values) -> Math::Vec3<u8> { 282 auto GetColorModifier = [](ColorModifier factor, const Math::Vec4<u8>& values) -> Math::Vec3<u8> {
281 switch (factor) 283 switch (factor)
282 { 284 {
283 case ColorModifier::SourceColor: 285 case ColorModifier::SourceColor:
284 return values; 286 return values.rgb();
287
288 case ColorModifier::SourceAlpha:
289 return { values.a(), values.a(), values.a() };
290
285 default: 291 default:
286 LOG_ERROR(HW_GPU, "Unknown color factor %d\n", (int)factor); 292 LOG_ERROR(HW_GPU, "Unknown color factor %d\n", (int)factor);
293 _dbg_assert_(HW_GPU, 0);
287 return {}; 294 return {};
288 } 295 }
289 }; 296 };
@@ -292,8 +299,13 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
292 switch (factor) { 299 switch (factor) {
293 case AlphaModifier::SourceAlpha: 300 case AlphaModifier::SourceAlpha:
294 return value; 301 return value;
302
303 case AlphaModifier::OneMinusSourceAlpha:
304 return 255 - value;
305
295 default: 306 default:
296 LOG_ERROR(HW_GPU, "Unknown color factor %d\n", (int)factor); 307 LOG_ERROR(HW_GPU, "Unknown alpha factor %d\n", (int)factor);
308 _dbg_assert_(HW_GPU, 0);
297 return 0; 309 return 0;
298 } 310 }
299 }; 311 };
@@ -306,8 +318,21 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
306 case Operation::Modulate: 318 case Operation::Modulate:
307 return ((input[0] * input[1]) / 255).Cast<u8>(); 319 return ((input[0] * input[1]) / 255).Cast<u8>();
308 320
321 case Operation::Add:
322 {
323 auto result = input[0] + input[1];
324 result.r() = std::min(255, result.r());
325 result.g() = std::min(255, result.g());
326 result.b() = std::min(255, result.b());
327 return result.Cast<u8>();
328 }
329
330 case Operation::Lerp:
331 return ((input[0] * input[2] + input[1] * (Math::MakeVec<u8>(255, 255, 255) - input[2]).Cast<u8>()) / 255).Cast<u8>();
332
309 default: 333 default:
310 LOG_ERROR(HW_GPU, "Unknown color combiner operation %d\n", (int)op); 334 LOG_ERROR(HW_GPU, "Unknown color combiner operation %d\n", (int)op);
335 _dbg_assert_(HW_GPU, 0);
311 return {}; 336 return {};
312 } 337 }
313 }; 338 };
@@ -320,8 +345,15 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
320 case Operation::Modulate: 345 case Operation::Modulate:
321 return input[0] * input[1] / 255; 346 return input[0] * input[1] / 255;
322 347
348 case Operation::Add:
349 return std::min(255, input[0] + input[1]);
350
351 case Operation::Lerp:
352 return (input[0] * input[2] + input[1] * (255 - input[2])) / 255;
353
323 default: 354 default:
324 LOG_ERROR(HW_GPU, "Unknown alpha combiner operation %d\n", (int)op); 355 LOG_ERROR(HW_GPU, "Unknown alpha combiner operation %d\n", (int)op);
356 _dbg_assert_(HW_GPU, 0);
325 return 0; 357 return 0;
326 } 358 }
327 }; 359 };
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index e2caeeb8f..e20d7adb7 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -240,14 +240,14 @@ MathUtil::Rectangle<unsigned> RendererOpenGL::GetViewportExtent() {
240 MathUtil::Rectangle<unsigned> viewport_extent; 240 MathUtil::Rectangle<unsigned> viewport_extent;
241 if (window_aspect_ratio > emulation_aspect_ratio) { 241 if (window_aspect_ratio > emulation_aspect_ratio) {
242 // Window is narrower than the emulation content => apply borders to the top and bottom 242 // Window is narrower than the emulation content => apply borders to the top and bottom
243 unsigned viewport_height = std::round(emulation_aspect_ratio * framebuffer_width); 243 unsigned viewport_height = static_cast<unsigned>(std::round(emulation_aspect_ratio * framebuffer_width));
244 viewport_extent.left = 0; 244 viewport_extent.left = 0;
245 viewport_extent.top = (framebuffer_height - viewport_height) / 2; 245 viewport_extent.top = (framebuffer_height - viewport_height) / 2;
246 viewport_extent.right = viewport_extent.left + framebuffer_width; 246 viewport_extent.right = viewport_extent.left + framebuffer_width;
247 viewport_extent.bottom = viewport_extent.top + viewport_height; 247 viewport_extent.bottom = viewport_extent.top + viewport_height;
248 } else { 248 } else {
249 // Otherwise, apply borders to the left and right sides of the window. 249 // Otherwise, apply borders to the left and right sides of the window.
250 unsigned viewport_width = std::round(framebuffer_height / emulation_aspect_ratio); 250 unsigned viewport_width = static_cast<unsigned>(std::round(framebuffer_height / emulation_aspect_ratio));
251 viewport_extent.left = (framebuffer_width - viewport_width) / 2; 251 viewport_extent.left = (framebuffer_width - viewport_width) / 2;
252 viewport_extent.top = 0; 252 viewport_extent.top = 0;
253 viewport_extent.right = viewport_extent.left + viewport_width; 253 viewport_extent.right = viewport_extent.left + viewport_width;
diff --git a/src/video_core/vertex_shader.cpp b/src/video_core/vertex_shader.cpp
index 477e78cfe..4ba69fa51 100644
--- a/src/video_core/vertex_shader.cpp
+++ b/src/video_core/vertex_shader.cpp
@@ -2,16 +2,25 @@
2// Licensed under GPLv2 2// Licensed under GPLv2
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <stack>
6
5#include <boost/range/algorithm.hpp> 7#include <boost/range/algorithm.hpp>
6 8
7#include <common/file_util.h> 9#include <common/file_util.h>
8 10
9#include <core/mem_map.h> 11#include <core/mem_map.h>
10 12
11#include "debug_utils/debug_utils.h" 13#include <nihstro/shader_bytecode.h>
14
12 15
13#include "pica.h" 16#include "pica.h"
14#include "vertex_shader.h" 17#include "vertex_shader.h"
18#include "debug_utils/debug_utils.h"
19
20using nihstro::Instruction;
21using nihstro::RegisterType;
22using nihstro::SourceRegister;
23using nihstro::SwizzlePattern;
15 24
16namespace Pica { 25namespace Pica {
17 26
@@ -19,13 +28,14 @@ namespace VertexShader {
19 28
20static struct { 29static struct {
21 Math::Vec4<float24> f[96]; 30 Math::Vec4<float24> f[96];
22} shader_uniforms;
23 31
32 std::array<bool,16> b;
33} shader_uniforms;
24 34
25// TODO: Not sure where the shader binary and swizzle patterns are supposed to be loaded to! 35// TODO: Not sure where the shader binary and swizzle patterns are supposed to be loaded to!
26// For now, we just keep these local arrays around. 36// For now, we just keep these local arrays around.
27static u32 shader_memory[1024]; 37static std::array<u32, 1024> shader_memory;
28static u32 swizzle_data[1024]; 38static std::array<u32, 1024> swizzle_data;
29 39
30void SubmitShaderMemoryChange(u32 addr, u32 value) 40void SubmitShaderMemoryChange(u32 addr, u32 value)
31{ 41{
@@ -42,6 +52,21 @@ Math::Vec4<float24>& GetFloatUniform(u32 index)
42 return shader_uniforms.f[index]; 52 return shader_uniforms.f[index];
43} 53}
44 54
55bool& GetBoolUniform(u32 index)
56{
57 return shader_uniforms.b[index];
58}
59
60const std::array<u32, 1024>& GetShaderBinary()
61{
62 return shader_memory;
63}
64
65const std::array<u32, 1024>& GetSwizzlePatterns()
66{
67 return swizzle_data;
68}
69
45struct VertexShaderState { 70struct VertexShaderState {
46 u32* program_counter; 71 u32* program_counter;
47 72
@@ -49,13 +74,23 @@ struct VertexShaderState {
49 float24* output_register_table[7*4]; 74 float24* output_register_table[7*4];
50 75
51 Math::Vec4<float24> temporary_registers[16]; 76 Math::Vec4<float24> temporary_registers[16];
52 bool status_registers[2]; 77 bool conditional_code[2];
78
79 // Two Address registers and one loop counter
80 // TODO: How many bits do these actually have?
81 s32 address_registers[3];
53 82
54 enum { 83 enum {
55 INVALID_ADDRESS = 0xFFFFFFFF 84 INVALID_ADDRESS = 0xFFFFFFFF
56 }; 85 };
57 u32 call_stack[8]; // TODO: What is the maximal call stack depth? 86
58 u32* call_stack_pointer; 87 struct CallStackElement {
88 u32 final_address;
89 u32 return_address;
90 };
91
92 // TODO: Is there a maximal size for this?
93 std::stack<CallStackElement> call_stack;
59 94
60 struct { 95 struct {
61 u32 max_offset; // maximum program counter ever reached 96 u32 max_offset; // maximum program counter ever reached
@@ -64,49 +99,105 @@ struct VertexShaderState {
64}; 99};
65 100
66static void ProcessShaderCode(VertexShaderState& state) { 101static void ProcessShaderCode(VertexShaderState& state) {
102
103 // Placeholder for invalid inputs
104 static float24 dummy_vec4_float24[4];
105
67 while (true) { 106 while (true) {
68 bool increment_pc = true; 107 if (!state.call_stack.empty()) {
108 if (state.program_counter - shader_memory.data() == state.call_stack.top().final_address) {
109 state.program_counter = &shader_memory[state.call_stack.top().return_address];
110 state.call_stack.pop();
111
112 // TODO: Is "trying again" accurate to hardware?
113 continue;
114 }
115 }
116
69 bool exit_loop = false; 117 bool exit_loop = false;
70 const Instruction& instr = *(const Instruction*)state.program_counter; 118 const Instruction& instr = *(const Instruction*)state.program_counter;
71 state.debug.max_offset = std::max<u32>(state.debug.max_offset, 1 + (state.program_counter - shader_memory));
72
73 const float24* src1_ = (instr.common.src1 < 0x10) ? state.input_register_table[instr.common.src1.GetIndex()]
74 : (instr.common.src1 < 0x20) ? &state.temporary_registers[instr.common.src1.GetIndex()].x
75 : (instr.common.src1 < 0x80) ? &shader_uniforms.f[instr.common.src1.GetIndex()].x
76 : nullptr;
77 const float24* src2_ = (instr.common.src2 < 0x10) ? state.input_register_table[instr.common.src2.GetIndex()]
78 : &state.temporary_registers[instr.common.src2.GetIndex()].x;
79 float24* dest = (instr.common.dest < 0x08) ? state.output_register_table[4*instr.common.dest.GetIndex()]
80 : (instr.common.dest < 0x10) ? nullptr
81 : (instr.common.dest < 0x20) ? &state.temporary_registers[instr.common.dest.GetIndex()][0]
82 : nullptr;
83
84 const SwizzlePattern& swizzle = *(SwizzlePattern*)&swizzle_data[instr.common.operand_desc_id]; 119 const SwizzlePattern& swizzle = *(SwizzlePattern*)&swizzle_data[instr.common.operand_desc_id];
85 const bool negate_src1 = (swizzle.negate != 0);
86 120
87 float24 src1[4] = { 121 auto call = [&](VertexShaderState& state, u32 offset, u32 num_instructions, u32 return_offset) {
88 src1_[(int)swizzle.GetSelectorSrc1(0)], 122 state.program_counter = &shader_memory[offset] - 1; // -1 to make sure when incrementing the PC we end up at the correct offset
89 src1_[(int)swizzle.GetSelectorSrc1(1)], 123 state.call_stack.push({ offset + num_instructions, return_offset });
90 src1_[(int)swizzle.GetSelectorSrc1(2)],
91 src1_[(int)swizzle.GetSelectorSrc1(3)],
92 }; 124 };
93 if (negate_src1) { 125 u32 binary_offset = state.program_counter - shader_memory.data();
94 src1[0] = src1[0] * float24::FromFloat32(-1); 126
95 src1[1] = src1[1] * float24::FromFloat32(-1); 127 state.debug.max_offset = std::max<u32>(state.debug.max_offset, 1 + binary_offset);
96 src1[2] = src1[2] * float24::FromFloat32(-1); 128
97 src1[3] = src1[3] * float24::FromFloat32(-1); 129 auto LookupSourceRegister = [&](const SourceRegister& source_reg) -> const float24* {
98 } 130 switch (source_reg.GetRegisterType()) {
99 const float24 src2[4] = { 131 case RegisterType::Input:
100 src2_[(int)swizzle.GetSelectorSrc2(0)], 132 return state.input_register_table[source_reg.GetIndex()];
101 src2_[(int)swizzle.GetSelectorSrc2(1)], 133
102 src2_[(int)swizzle.GetSelectorSrc2(2)], 134 case RegisterType::Temporary:
103 src2_[(int)swizzle.GetSelectorSrc2(3)], 135 return &state.temporary_registers[source_reg.GetIndex()].x;
136
137 case RegisterType::FloatUniform:
138 return &shader_uniforms.f[source_reg.GetIndex()].x;
139
140 default:
141 return dummy_vec4_float24;
142 }
104 }; 143 };
105 144
106 switch (instr.opcode) { 145 switch (instr.opcode.GetInfo().type) {
146 case Instruction::OpCodeType::Arithmetic:
147 {
148 bool is_inverted = 0 != (instr.opcode.GetInfo().subtype & Instruction::OpCodeInfo::SrcInversed);
149 if (is_inverted) {
150 // TODO: We don't really support this properly: For instance, the address register
151 // offset needs to be applied to SRC2 instead, etc.
152 // For now, we just abort in this situation.
153 LOG_CRITICAL(HW_GPU, "Bad condition...");
154 exit(0);
155 }
156
157 const int address_offset = (instr.common.address_register_index == 0)
158 ? 0 : state.address_registers[instr.common.address_register_index - 1];
159
160 const float24* src1_ = LookupSourceRegister(instr.common.GetSrc1(is_inverted) + address_offset);
161 const float24* src2_ = LookupSourceRegister(instr.common.GetSrc2(is_inverted));
162
163 const bool negate_src1 = (swizzle.negate_src1 != false);
164 const bool negate_src2 = (swizzle.negate_src2 != false);
165
166 float24 src1[4] = {
167 src1_[(int)swizzle.GetSelectorSrc1(0)],
168 src1_[(int)swizzle.GetSelectorSrc1(1)],
169 src1_[(int)swizzle.GetSelectorSrc1(2)],
170 src1_[(int)swizzle.GetSelectorSrc1(3)],
171 };
172 if (negate_src1) {
173 src1[0] = src1[0] * float24::FromFloat32(-1);
174 src1[1] = src1[1] * float24::FromFloat32(-1);
175 src1[2] = src1[2] * float24::FromFloat32(-1);
176 src1[3] = src1[3] * float24::FromFloat32(-1);
177 }
178 float24 src2[4] = {
179 src2_[(int)swizzle.GetSelectorSrc2(0)],
180 src2_[(int)swizzle.GetSelectorSrc2(1)],
181 src2_[(int)swizzle.GetSelectorSrc2(2)],
182 src2_[(int)swizzle.GetSelectorSrc2(3)],
183 };
184 if (negate_src2) {
185 src2[0] = src2[0] * float24::FromFloat32(-1);
186 src2[1] = src2[1] * float24::FromFloat32(-1);
187 src2[2] = src2[2] * float24::FromFloat32(-1);
188 src2[3] = src2[3] * float24::FromFloat32(-1);
189 }
190
191 float24* dest = (instr.common.dest < 0x08) ? state.output_register_table[4*instr.common.dest.GetIndex()]
192 : (instr.common.dest < 0x10) ? dummy_vec4_float24
193 : (instr.common.dest < 0x20) ? &state.temporary_registers[instr.common.dest.GetIndex()][0]
194 : dummy_vec4_float24;
195
196 state.debug.max_opdesc_id = std::max<u32>(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id);
197
198 switch (instr.opcode.EffectiveOpCode()) {
107 case Instruction::OpCode::ADD: 199 case Instruction::OpCode::ADD:
108 { 200 {
109 state.debug.max_opdesc_id = std::max<u32>(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id);
110 for (int i = 0; i < 4; ++i) { 201 for (int i = 0; i < 4; ++i) {
111 if (!swizzle.DestComponentEnabled(i)) 202 if (!swizzle.DestComponentEnabled(i))
112 continue; 203 continue;
@@ -119,7 +210,6 @@ static void ProcessShaderCode(VertexShaderState& state) {
119 210
120 case Instruction::OpCode::MUL: 211 case Instruction::OpCode::MUL:
121 { 212 {
122 state.debug.max_opdesc_id = std::max<u32>(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id);
123 for (int i = 0; i < 4; ++i) { 213 for (int i = 0; i < 4; ++i) {
124 if (!swizzle.DestComponentEnabled(i)) 214 if (!swizzle.DestComponentEnabled(i))
125 continue; 215 continue;
@@ -130,10 +220,18 @@ static void ProcessShaderCode(VertexShaderState& state) {
130 break; 220 break;
131 } 221 }
132 222
223 case Instruction::OpCode::MAX:
224 for (int i = 0; i < 4; ++i) {
225 if (!swizzle.DestComponentEnabled(i))
226 continue;
227
228 dest[i] = std::max(src1[i], src2[i]);
229 }
230 break;
231
133 case Instruction::OpCode::DP3: 232 case Instruction::OpCode::DP3:
134 case Instruction::OpCode::DP4: 233 case Instruction::OpCode::DP4:
135 { 234 {
136 state.debug.max_opdesc_id = std::max<u32>(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id);
137 float24 dot = float24::FromFloat32(0.f); 235 float24 dot = float24::FromFloat32(0.f);
138 int num_components = (instr.opcode == Instruction::OpCode::DP3) ? 3 : 4; 236 int num_components = (instr.opcode == Instruction::OpCode::DP3) ? 3 : 4;
139 for (int i = 0; i < num_components; ++i) 237 for (int i = 0; i < num_components; ++i)
@@ -151,7 +249,6 @@ static void ProcessShaderCode(VertexShaderState& state) {
151 // Reciprocal 249 // Reciprocal
152 case Instruction::OpCode::RCP: 250 case Instruction::OpCode::RCP:
153 { 251 {
154 state.debug.max_opdesc_id = std::max<u32>(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id);
155 for (int i = 0; i < 4; ++i) { 252 for (int i = 0; i < 4; ++i) {
156 if (!swizzle.DestComponentEnabled(i)) 253 if (!swizzle.DestComponentEnabled(i))
157 continue; 254 continue;
@@ -167,7 +264,6 @@ static void ProcessShaderCode(VertexShaderState& state) {
167 // Reciprocal Square Root 264 // Reciprocal Square Root
168 case Instruction::OpCode::RSQ: 265 case Instruction::OpCode::RSQ:
169 { 266 {
170 state.debug.max_opdesc_id = std::max<u32>(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id);
171 for (int i = 0; i < 4; ++i) { 267 for (int i = 0; i < 4; ++i) {
172 if (!swizzle.DestComponentEnabled(i)) 268 if (!swizzle.DestComponentEnabled(i))
173 continue; 269 continue;
@@ -180,9 +276,21 @@ static void ProcessShaderCode(VertexShaderState& state) {
180 break; 276 break;
181 } 277 }
182 278
279 case Instruction::OpCode::MOVA:
280 {
281 for (int i = 0; i < 2; ++i) {
282 if (!swizzle.DestComponentEnabled(i))
283 continue;
284
285 // TODO: Figure out how the rounding is done on hardware
286 state.address_registers[i] = static_cast<s32>(src1[i].ToFloat32());
287 }
288
289 break;
290 }
291
183 case Instruction::OpCode::MOV: 292 case Instruction::OpCode::MOV:
184 { 293 {
185 state.debug.max_opdesc_id = std::max<u32>(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id);
186 for (int i = 0; i < 4; ++i) { 294 for (int i = 0; i < 4; ++i) {
187 if (!swizzle.DestComponentEnabled(i)) 295 if (!swizzle.DestComponentEnabled(i))
188 continue; 296 continue;
@@ -192,39 +300,137 @@ static void ProcessShaderCode(VertexShaderState& state) {
192 break; 300 break;
193 } 301 }
194 302
195 case Instruction::OpCode::RET: 303 case Instruction::OpCode::CMP:
196 if (*state.call_stack_pointer == VertexShaderState::INVALID_ADDRESS) { 304 for (int i = 0; i < 2; ++i) {
197 exit_loop = true; 305 // TODO: Can you restrict to one compare via dest masking?
198 } else { 306
199 // Jump back to call stack position, invalidate call stack entry, move up call stack pointer 307 auto compare_op = instr.common.compare_op;
200 state.program_counter = &shader_memory[*state.call_stack_pointer]; 308 auto op = (i == 0) ? compare_op.x.Value() : compare_op.y.Value();
201 *state.call_stack_pointer-- = VertexShaderState::INVALID_ADDRESS; 309
310 switch (op) {
311 case compare_op.Equal:
312 state.conditional_code[i] = (src1[i] == src2[i]);
313 break;
314
315 case compare_op.NotEqual:
316 state.conditional_code[i] = (src1[i] != src2[i]);
317 break;
318
319 case compare_op.LessThan:
320 state.conditional_code[i] = (src1[i] < src2[i]);
321 break;
322
323 case compare_op.LessEqual:
324 state.conditional_code[i] = (src1[i] <= src2[i]);
325 break;
326
327 case compare_op.GreaterThan:
328 state.conditional_code[i] = (src1[i] > src2[i]);
329 break;
330
331 case compare_op.GreaterEqual:
332 state.conditional_code[i] = (src1[i] >= src2[i]);
333 break;
334
335 default:
336 LOG_ERROR(HW_GPU, "Unknown compare mode %x", static_cast<int>(op));
337 break;
338 }
202 } 339 }
340 break;
203 341
342 default:
343 LOG_ERROR(HW_GPU, "Unhandled arithmetic instruction: 0x%02x (%s): 0x%08x",
344 (int)instr.opcode.Value(), instr.opcode.GetInfo().name, instr.hex);
345 _dbg_assert_(HW_GPU, 0);
346 break;
347 }
348
349 break;
350 }
351 default:
352 // Handle each instruction on its own
353 switch (instr.opcode) {
354 case Instruction::OpCode::END:
355 exit_loop = true;
204 break; 356 break;
205 357
206 case Instruction::OpCode::CALL: 358 case Instruction::OpCode::CALL:
207 increment_pc = false; 359 call(state,
360 instr.flow_control.dest_offset,
361 instr.flow_control.num_instructions,
362 binary_offset + 1);
363 break;
364
365 case Instruction::OpCode::NOP:
366 break;
208 367
209 _dbg_assert_(HW_GPU, state.call_stack_pointer - state.call_stack < sizeof(state.call_stack)); 368 case Instruction::OpCode::IFU:
369 if (shader_uniforms.b[instr.flow_control.bool_uniform_id]) {
370 call(state,
371 binary_offset + 1,
372 instr.flow_control.dest_offset - binary_offset - 1,
373 instr.flow_control.dest_offset + instr.flow_control.num_instructions);
374 } else {
375 call(state,
376 instr.flow_control.dest_offset,
377 instr.flow_control.num_instructions,
378 instr.flow_control.dest_offset + instr.flow_control.num_instructions);
379 }
210 380
211 *++state.call_stack_pointer = state.program_counter - shader_memory;
212 // TODO: Does this offset refer to the beginning of shader memory?
213 state.program_counter = &shader_memory[instr.flow_control.offset_words];
214 break; 381 break;
215 382
216 case Instruction::OpCode::FLS: 383 case Instruction::OpCode::IFC:
217 // TODO: Do whatever needs to be done here? 384 {
385 // TODO: Do we need to consider swizzlers here?
386
387 auto flow_control = instr.flow_control;
388 bool results[3] = { flow_control.refx == state.conditional_code[0],
389 flow_control.refy == state.conditional_code[1] };
390
391 switch (flow_control.op) {
392 case flow_control.Or:
393 results[2] = results[0] || results[1];
394 break;
395
396 case flow_control.And:
397 results[2] = results[0] && results[1];
398 break;
399
400 case flow_control.JustX:
401 results[2] = results[0];
402 break;
403
404 case flow_control.JustY:
405 results[2] = results[1];
406 break;
407 }
408
409 if (results[2]) {
410 call(state,
411 binary_offset + 1,
412 instr.flow_control.dest_offset - binary_offset - 1,
413 instr.flow_control.dest_offset + instr.flow_control.num_instructions);
414 } else {
415 call(state,
416 instr.flow_control.dest_offset,
417 instr.flow_control.num_instructions,
418 instr.flow_control.dest_offset + instr.flow_control.num_instructions);
419 }
420
218 break; 421 break;
422 }
219 423
220 default: 424 default:
221 LOG_ERROR(HW_GPU, "Unhandled instruction: 0x%02x (%s): 0x%08x", 425 LOG_ERROR(HW_GPU, "Unhandled instruction: 0x%02x (%s): 0x%08x",
222 (int)instr.opcode.Value(), instr.GetOpCodeName().c_str(), instr.hex); 426 (int)instr.opcode.Value(), instr.opcode.GetInfo().name, instr.hex);
223 break; 427 break;
428 }
429
430 break;
224 } 431 }
225 432
226 if (increment_pc) 433 ++state.program_counter;
227 ++state.program_counter;
228 434
229 if (exit_loop) 435 if (exit_loop)
230 break; 436 break;
@@ -275,13 +481,11 @@ OutputVertex RunShader(const InputVertex& input, int num_attributes)
275 state.output_register_table[4*i+comp] = ((float24*)&ret) + semantics[comp]; 481 state.output_register_table[4*i+comp] = ((float24*)&ret) + semantics[comp];
276 } 482 }
277 483
278 state.status_registers[0] = false; 484 state.conditional_code[0] = false;
279 state.status_registers[1] = false; 485 state.conditional_code[1] = false;
280 boost::fill(state.call_stack, VertexShaderState::INVALID_ADDRESS);
281 state.call_stack_pointer = &state.call_stack[0];
282 486
283 ProcessShaderCode(state); 487 ProcessShaderCode(state);
284 DebugUtils::DumpShader(shader_memory, state.debug.max_offset, swizzle_data, 488 DebugUtils::DumpShader(shader_memory.data(), state.debug.max_offset, swizzle_data.data(),
285 state.debug.max_opdesc_id, registers.vs_main_offset, 489 state.debug.max_opdesc_id, registers.vs_main_offset,
286 registers.vs_output_attributes); 490 registers.vs_output_attributes);
287 491
diff --git a/src/video_core/vertex_shader.h b/src/video_core/vertex_shader.h
index bfb6fb6e3..047dde046 100644
--- a/src/video_core/vertex_shader.h
+++ b/src/video_core/vertex_shader.h
@@ -27,15 +27,18 @@ struct OutputVertex {
27 Math::Vec4<float24> dummy; // quaternions (not implemented, yet) 27 Math::Vec4<float24> dummy; // quaternions (not implemented, yet)
28 Math::Vec4<float24> color; 28 Math::Vec4<float24> color;
29 Math::Vec2<float24> tc0; 29 Math::Vec2<float24> tc0;
30 Math::Vec2<float24> tc1;
31 float24 pad[6];
32 Math::Vec2<float24> tc2;
30 33
31 // Padding for optimal alignment 34 // Padding for optimal alignment
32 float24 pad[14]; 35 float24 pad2[4];
33 36
34 // Attributes used to store intermediate results 37 // Attributes used to store intermediate results
35 38
36 // position after perspective divide 39 // position after perspective divide
37 Math::Vec3<float24> screenpos; 40 Math::Vec3<float24> screenpos;
38 float24 pad2; 41 float24 pad3;
39 42
40 // Linear interpolation 43 // Linear interpolation
41 // factor: 0=this, 1=vtx 44 // factor: 0=this, 1=vtx
@@ -44,6 +47,8 @@ struct OutputVertex {
44 47
45 // TODO: Should perform perspective correct interpolation here... 48 // TODO: Should perform perspective correct interpolation here...
46 tc0 = tc0 * factor + vtx.tc0 * (float24::FromFloat32(1) - factor); 49 tc0 = tc0 * factor + vtx.tc0 * (float24::FromFloat32(1) - factor);
50 tc1 = tc1 * factor + vtx.tc1 * (float24::FromFloat32(1) - factor);
51 tc2 = tc2 * factor + vtx.tc2 * (float24::FromFloat32(1) - factor);
47 52
48 screenpos = screenpos * factor + vtx.screenpos * (float24::FromFloat32(1) - factor); 53 screenpos = screenpos * factor + vtx.screenpos * (float24::FromFloat32(1) - factor);
49 54
@@ -61,222 +66,16 @@ struct OutputVertex {
61static_assert(std::is_pod<OutputVertex>::value, "Structure is not POD"); 66static_assert(std::is_pod<OutputVertex>::value, "Structure is not POD");
62static_assert(sizeof(OutputVertex) == 32 * sizeof(float), "OutputVertex has invalid size"); 67static_assert(sizeof(OutputVertex) == 32 * sizeof(float), "OutputVertex has invalid size");
63 68
64union Instruction {
65 enum class OpCode : u32 {
66 ADD = 0x0,
67 DP3 = 0x1,
68 DP4 = 0x2,
69
70 MUL = 0x8,
71
72 MAX = 0xC,
73 MIN = 0xD,
74 RCP = 0xE,
75 RSQ = 0xF,
76
77 MOV = 0x13,
78
79 RET = 0x21,
80 FLS = 0x22, // Flush
81 CALL = 0x24,
82 };
83
84 std::string GetOpCodeName() const {
85 std::map<OpCode, std::string> map = {
86 { OpCode::ADD, "ADD" },
87 { OpCode::DP3, "DP3" },
88 { OpCode::DP4, "DP4" },
89 { OpCode::MUL, "MUL" },
90 { OpCode::MAX, "MAX" },
91 { OpCode::MIN, "MIN" },
92 { OpCode::RCP, "RCP" },
93 { OpCode::RSQ, "RSQ" },
94 { OpCode::MOV, "MOV" },
95 { OpCode::RET, "RET" },
96 { OpCode::FLS, "FLS" },
97 };
98 auto it = map.find(opcode);
99 if (it == map.end())
100 return "UNK";
101 else
102 return it->second;
103 }
104
105 u32 hex;
106
107 BitField<0x1a, 0x6, OpCode> opcode;
108
109 // General notes:
110 //
111 // When two input registers are used, one of them uses a 5-bit index while the other
112 // one uses a 7-bit index. This is because at most one floating point uniform may be used
113 // as an input.
114
115
116 // Format used e.g. by arithmetic instructions and comparisons
117 // "src1" and "src2" specify register indices (i.e. indices referring to groups of 4 floats),
118 // while "dest" addresses individual floats.
119 union {
120 BitField<0x00, 0x5, u32> operand_desc_id;
121
122 template<class BitFieldType>
123 struct SourceRegister : BitFieldType {
124 enum RegisterType {
125 Input,
126 Temporary,
127 FloatUniform
128 };
129
130 RegisterType GetRegisterType() const {
131 if (BitFieldType::Value() < 0x10)
132 return Input;
133 else if (BitFieldType::Value() < 0x20)
134 return Temporary;
135 else
136 return FloatUniform;
137 }
138
139 int GetIndex() const {
140 if (GetRegisterType() == Input)
141 return BitFieldType::Value();
142 else if (GetRegisterType() == Temporary)
143 return BitFieldType::Value() - 0x10;
144 else // if (GetRegisterType() == FloatUniform)
145 return BitFieldType::Value() - 0x20;
146 }
147
148 std::string GetRegisterName() const {
149 std::map<RegisterType, std::string> type = {
150 { Input, "i" },
151 { Temporary, "t" },
152 { FloatUniform, "f" },
153 };
154 return type[GetRegisterType()] + std::to_string(GetIndex());
155 }
156 };
157
158 SourceRegister<BitField<0x07, 0x5, u32>> src2;
159 SourceRegister<BitField<0x0c, 0x7, u32>> src1;
160
161 struct : BitField<0x15, 0x5, u32>
162 {
163 enum RegisterType {
164 Output,
165 Temporary,
166 Unknown
167 };
168 RegisterType GetRegisterType() const {
169 if (Value() < 0x8)
170 return Output;
171 else if (Value() < 0x10)
172 return Unknown;
173 else
174 return Temporary;
175 }
176 int GetIndex() const {
177 if (GetRegisterType() == Output)
178 return Value();
179 else if (GetRegisterType() == Temporary)
180 return Value() - 0x10;
181 else
182 return Value();
183 }
184 std::string GetRegisterName() const {
185 std::map<RegisterType, std::string> type = {
186 { Output, "o" },
187 { Temporary, "t" },
188 { Unknown, "u" }
189 };
190 return type[GetRegisterType()] + std::to_string(GetIndex());
191 }
192 } dest;
193 } common;
194
195 // Format used for flow control instructions ("if")
196 union {
197 BitField<0x00, 0x8, u32> num_instructions;
198 BitField<0x0a, 0xc, u32> offset_words;
199 } flow_control;
200};
201static_assert(std::is_standard_layout<Instruction>::value, "Structure is not using standard layout!");
202
203union SwizzlePattern {
204 u32 hex;
205
206 enum class Selector : u32 {
207 x = 0,
208 y = 1,
209 z = 2,
210 w = 3
211 };
212
213 Selector GetSelectorSrc1(int comp) const {
214 Selector selectors[] = {
215 src1_selector_0, src1_selector_1, src1_selector_2, src1_selector_3
216 };
217 return selectors[comp];
218 }
219
220 Selector GetSelectorSrc2(int comp) const {
221 Selector selectors[] = {
222 src2_selector_0, src2_selector_1, src2_selector_2, src2_selector_3
223 };
224 return selectors[comp];
225 }
226
227 bool DestComponentEnabled(int i) const {
228 return (dest_mask & (0x8 >> i)) != 0;
229 }
230
231 std::string SelectorToString(bool src2) const {
232 std::map<Selector, std::string> map = {
233 { Selector::x, "x" },
234 { Selector::y, "y" },
235 { Selector::z, "z" },
236 { Selector::w, "w" }
237 };
238 std::string ret;
239 for (int i = 0; i < 4; ++i) {
240 ret += map.at(src2 ? GetSelectorSrc2(i) : GetSelectorSrc1(i));
241 }
242 return ret;
243 }
244
245 std::string DestMaskToString() const {
246 std::string ret;
247 for (int i = 0; i < 4; ++i) {
248 if (!DestComponentEnabled(i))
249 ret += "_";
250 else
251 ret += "xyzw"[i];
252 }
253 return ret;
254 }
255
256 // Components of "dest" that should be written to: LSB=dest.w, MSB=dest.x
257 BitField< 0, 4, u32> dest_mask;
258
259 BitField< 4, 1, u32> negate; // negates src1
260
261 BitField< 5, 2, Selector> src1_selector_3;
262 BitField< 7, 2, Selector> src1_selector_2;
263 BitField< 9, 2, Selector> src1_selector_1;
264 BitField<11, 2, Selector> src1_selector_0;
265
266 BitField<14, 2, Selector> src2_selector_3;
267 BitField<16, 2, Selector> src2_selector_2;
268 BitField<18, 2, Selector> src2_selector_1;
269 BitField<20, 2, Selector> src2_selector_0;
270
271 BitField<31, 1, u32> flag; // not sure what this means, maybe it's the sign?
272};
273
274void SubmitShaderMemoryChange(u32 addr, u32 value); 69void SubmitShaderMemoryChange(u32 addr, u32 value);
275void SubmitSwizzleDataChange(u32 addr, u32 value); 70void SubmitSwizzleDataChange(u32 addr, u32 value);
276 71
277OutputVertex RunShader(const InputVertex& input, int num_attributes); 72OutputVertex RunShader(const InputVertex& input, int num_attributes);
278 73
279Math::Vec4<float24>& GetFloatUniform(u32 index); 74Math::Vec4<float24>& GetFloatUniform(u32 index);
75bool& GetBoolUniform(u32 index);
76
77const std::array<u32, 1024>& GetShaderBinary();
78const std::array<u32, 1024>& GetSwizzlePatterns();
280 79
281} // namespace 80} // namespace
282 81