summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/citra_qt/debugger/graphics_cmdlists.cpp42
-rw-r--r--src/citra_qt/debugger/graphics_vertex_shader.cpp256
-rw-r--r--src/citra_qt/debugger/graphics_vertex_shader.h51
-rw-r--r--src/core/loader/loader.cpp6
-rw-r--r--src/video_core/command_processor.cpp33
-rw-r--r--src/video_core/debug_utils/debug_utils.cpp99
-rw-r--r--src/video_core/debug_utils/debug_utils.h19
-rw-r--r--src/video_core/pica.h11
-rw-r--r--src/video_core/shader/shader.cpp55
-rw-r--r--src/video_core/shader/shader.h197
-rw-r--r--src/video_core/shader/shader_interpreter.cpp104
-rw-r--r--src/video_core/shader/shader_interpreter.h3
-rw-r--r--src/video_core/shader/shader_jit_x64.cpp8
13 files changed, 735 insertions, 149 deletions
diff --git a/src/citra_qt/debugger/graphics_cmdlists.cpp b/src/citra_qt/debugger/graphics_cmdlists.cpp
index 29b4a04a0..35a3140b2 100644
--- a/src/citra_qt/debugger/graphics_cmdlists.cpp
+++ b/src/citra_qt/debugger/graphics_cmdlists.cpp
@@ -175,29 +175,29 @@ int GPUCommandListModel::rowCount(const QModelIndex& parent) const {
175} 175}
176 176
177int GPUCommandListModel::columnCount(const QModelIndex& parent) const { 177int GPUCommandListModel::columnCount(const QModelIndex& parent) const {
178 return 3; 178 return 4;
179} 179}
180 180
181QVariant GPUCommandListModel::data(const QModelIndex& index, int role) const { 181QVariant GPUCommandListModel::data(const QModelIndex& index, int role) const {
182 if (!index.isValid()) 182 if (!index.isValid())
183 return QVariant(); 183 return QVariant();
184 184
185 const auto& writes = pica_trace.writes; 185 const auto& write = pica_trace.writes[index.row()];
186 const Pica::CommandProcessor::CommandHeader cmd{writes[index.row()].Id()};
187 const u32 val{writes[index.row()].Value()};
188 186
189 if (role == Qt::DisplayRole) { 187 if (role == Qt::DisplayRole) {
190 QString content; 188 QString content;
191 switch ( index.column() ) { 189 switch ( index.column() ) {
192 case 0: 190 case 0:
193 return QString::fromLatin1(Pica::Regs::GetCommandName(cmd.cmd_id).c_str()); 191 return QString::fromLatin1(Pica::Regs::GetCommandName(write.cmd_id).c_str());
194 case 1: 192 case 1:
195 return QString("%1").arg(cmd.cmd_id, 3, 16, QLatin1Char('0')); 193 return QString("%1").arg(write.cmd_id, 3, 16, QLatin1Char('0'));
196 case 2: 194 case 2:
197 return QString("%1").arg(val, 8, 16, QLatin1Char('0')); 195 return QString("%1").arg(write.mask, 4, 2, QLatin1Char('0'));
196 case 3:
197 return QString("%1").arg(write.value, 8, 16, QLatin1Char('0'));
198 } 198 }
199 } else if (role == CommandIdRole) { 199 } else if (role == CommandIdRole) {
200 return QVariant::fromValue<int>(cmd.cmd_id.Value()); 200 return QVariant::fromValue<int>(write.cmd_id);
201 } 201 }
202 202
203 return QVariant(); 203 return QVariant();
@@ -213,6 +213,8 @@ QVariant GPUCommandListModel::headerData(int section, Qt::Orientation orientatio
213 case 1: 213 case 1:
214 return tr("Register"); 214 return tr("Register");
215 case 2: 215 case 2:
216 return tr("Mask");
217 case 3:
216 return tr("New Value"); 218 return tr("New Value");
217 } 219 }
218 220
@@ -260,7 +262,7 @@ void GPUCommandListWidget::OnCommandDoubleClicked(const QModelIndex& index) {
260} 262}
261 263
262void GPUCommandListWidget::SetCommandInfo(const QModelIndex& index) { 264void GPUCommandListWidget::SetCommandInfo(const QModelIndex& index) {
263 QWidget* new_info_widget; 265 QWidget* new_info_widget = nullptr;
264 266
265 const unsigned int command_id = list_widget->model()->data(index, GPUCommandListModel::CommandIdRole).toUInt(); 267 const unsigned int command_id = list_widget->model()->data(index, GPUCommandListModel::CommandIdRole).toUInt();
266 if (COMMAND_IN_RANGE(command_id, texture0) || 268 if (COMMAND_IN_RANGE(command_id, texture0) ||
@@ -281,14 +283,15 @@ void GPUCommandListWidget::SetCommandInfo(const QModelIndex& index) {
281 auto info = Pica::DebugUtils::TextureInfo::FromPicaRegister(config, format); 283 auto info = Pica::DebugUtils::TextureInfo::FromPicaRegister(config, format);
282 u8* src = Memory::GetPhysicalPointer(config.GetPhysicalAddress()); 284 u8* src = Memory::GetPhysicalPointer(config.GetPhysicalAddress());
283 new_info_widget = new TextureInfoWidget(src, info); 285 new_info_widget = new TextureInfoWidget(src, info);
284 } else {
285 new_info_widget = new QWidget;
286 } 286 }
287 287 if (command_info_widget) {
288 widget()->layout()->removeWidget(command_info_widget); 288 delete command_info_widget;
289 delete command_info_widget; 289 command_info_widget = nullptr;
290 widget()->layout()->addWidget(new_info_widget); 290 }
291 command_info_widget = new_info_widget; 291 if (new_info_widget) {
292 widget()->layout()->addWidget(new_info_widget);
293 command_info_widget = new_info_widget;
294 }
292} 295}
293#undef COMMAND_IN_RANGE 296#undef COMMAND_IN_RANGE
294 297
@@ -300,7 +303,9 @@ GPUCommandListWidget::GPUCommandListWidget(QWidget* parent) : QDockWidget(tr("Pi
300 303
301 list_widget = new QTreeView; 304 list_widget = new QTreeView;
302 list_widget->setModel(model); 305 list_widget->setModel(model);
303 list_widget->setFont(QFont("monospace")); 306 QFont font("monospace");
307 font.setStyleHint(QFont::Monospace); // Automatic fallback to a monospace font on on platforms without a font called "monospace"
308 list_widget->setFont(font);
304 list_widget->setRootIsDecorated(false); 309 list_widget->setRootIsDecorated(false);
305 list_widget->setUniformRowHeights(true); 310 list_widget->setUniformRowHeights(true);
306 311
@@ -324,7 +329,7 @@ GPUCommandListWidget::GPUCommandListWidget(QWidget* parent) : QDockWidget(tr("Pi
324 329
325 connect(copy_all, SIGNAL(clicked()), this, SLOT(CopyAllToClipboard())); 330 connect(copy_all, SIGNAL(clicked()), this, SLOT(CopyAllToClipboard()));
326 331
327 command_info_widget = new QWidget; 332 command_info_widget = nullptr;
328 333
329 QVBoxLayout* main_layout = new QVBoxLayout; 334 QVBoxLayout* main_layout = new QVBoxLayout;
330 main_layout->addWidget(list_widget); 335 main_layout->addWidget(list_widget);
@@ -334,7 +339,6 @@ GPUCommandListWidget::GPUCommandListWidget(QWidget* parent) : QDockWidget(tr("Pi
334 sub_layout->addWidget(copy_all); 339 sub_layout->addWidget(copy_all);
335 main_layout->addLayout(sub_layout); 340 main_layout->addLayout(sub_layout);
336 } 341 }
337 main_layout->addWidget(command_info_widget);
338 main_widget->setLayout(main_layout); 342 main_widget->setLayout(main_layout);
339 343
340 setWidget(main_widget); 344 setWidget(main_widget);
diff --git a/src/citra_qt/debugger/graphics_vertex_shader.cpp b/src/citra_qt/debugger/graphics_vertex_shader.cpp
index 302e22d7a..0c17edee0 100644
--- a/src/citra_qt/debugger/graphics_vertex_shader.cpp
+++ b/src/citra_qt/debugger/graphics_vertex_shader.cpp
@@ -6,9 +6,16 @@
6#include <sstream> 6#include <sstream>
7 7
8#include <QBoxLayout> 8#include <QBoxLayout>
9#include <QFileDialog>
10#include <QGroupBox>
11#include <QLabel>
12#include <QLineEdit>
13#include <QPushButton>
14#include <QSignalMapper>
15#include <QSpinBox>
9#include <QTreeView> 16#include <QTreeView>
10 17
11#include "video_core/shader/shader_interpreter.h" 18#include "video_core/shader/shader.h"
12 19
13#include "graphics_vertex_shader.h" 20#include "graphics_vertex_shader.h"
14 21
@@ -17,7 +24,7 @@ using nihstro::Instruction;
17using nihstro::SourceRegister; 24using nihstro::SourceRegister;
18using nihstro::SwizzlePattern; 25using nihstro::SwizzlePattern;
19 26
20GraphicsVertexShaderModel::GraphicsVertexShaderModel(QObject* parent): QAbstractItemModel(parent) { 27GraphicsVertexShaderModel::GraphicsVertexShaderModel(GraphicsVertexShaderWidget* parent): QAbstractItemModel(parent), par(parent) {
21 28
22} 29}
23 30
@@ -34,7 +41,7 @@ int GraphicsVertexShaderModel::columnCount(const QModelIndex& parent) const {
34} 41}
35 42
36int GraphicsVertexShaderModel::rowCount(const QModelIndex& parent) const { 43int GraphicsVertexShaderModel::rowCount(const QModelIndex& parent) const {
37 return static_cast<int>(info.code.size()); 44 return static_cast<int>(par->info.code.size());
38} 45}
39 46
40QVariant GraphicsVertexShaderModel::headerData(int section, Qt::Orientation orientation, int role) const { 47QVariant GraphicsVertexShaderModel::headerData(int section, Qt::Orientation orientation, int role) const {
@@ -62,21 +69,21 @@ QVariant GraphicsVertexShaderModel::data(const QModelIndex& index, int role) con
62 { 69 {
63 switch (index.column()) { 70 switch (index.column()) {
64 case 0: 71 case 0:
65 if (info.HasLabel(index.row())) 72 if (par->info.HasLabel(index.row()))
66 return QString::fromStdString(info.GetLabel(index.row())); 73 return QString::fromStdString(par->info.GetLabel(index.row()));
67 74
68 return QString("%1").arg(4*index.row(), 4, 16, QLatin1Char('0')); 75 return QString("%1").arg(4*index.row(), 4, 16, QLatin1Char('0'));
69 76
70 case 1: 77 case 1:
71 return QString("%1").arg(info.code[index.row()].hex, 8, 16, QLatin1Char('0')); 78 return QString("%1").arg(par->info.code[index.row()].hex, 8, 16, QLatin1Char('0'));
72 79
73 case 2: 80 case 2:
74 { 81 {
75 std::stringstream output; 82 std::stringstream output;
76 output.flags(std::ios::hex); 83 output.flags(std::ios::hex);
77 84
78 Instruction instr = info.code[index.row()]; 85 Instruction instr = par->info.code[index.row()];
79 const SwizzlePattern& swizzle = info.swizzle_info[instr.common.operand_desc_id].pattern; 86 const SwizzlePattern& swizzle = par->info.swizzle_info[instr.common.operand_desc_id].pattern;
80 87
81 // longest known instruction name: "setemit " 88 // longest known instruction name: "setemit "
82 output << std::setw(8) << std::left << instr.opcode.Value().GetInfo().name; 89 output << std::setw(8) << std::left << instr.opcode.Value().GetInfo().name;
@@ -130,13 +137,13 @@ QVariant GraphicsVertexShaderModel::data(const QModelIndex& index, int role) con
130 137
131 print_input_indexed_compact(output, src1, swizzle.negate_src1, swizzle.SelectorToString(false).substr(0,1), instr.common.AddressRegisterName()); 138 print_input_indexed_compact(output, src1, swizzle.negate_src1, swizzle.SelectorToString(false).substr(0,1), instr.common.AddressRegisterName());
132 output << " " << instr.common.compare_op.ToString(instr.common.compare_op.x) << " "; 139 output << " " << instr.common.compare_op.ToString(instr.common.compare_op.x) << " ";
133 print_input(output, src2, swizzle.negate_src2, swizzle.SelectorToString(false).substr(0,1)); 140 print_input(output, src2, swizzle.negate_src2, swizzle.SelectorToString(true).substr(0,1));
134 141
135 output << ", "; 142 output << ", ";
136 143
137 print_input_indexed_compact(output, src1, swizzle.negate_src1, swizzle.SelectorToString(false).substr(1,1), instr.common.AddressRegisterName()); 144 print_input_indexed_compact(output, src1, swizzle.negate_src1, swizzle.SelectorToString(false).substr(1,1), instr.common.AddressRegisterName());
138 output << " " << instr.common.compare_op.ToString(instr.common.compare_op.y) << " "; 145 output << " " << instr.common.compare_op.ToString(instr.common.compare_op.y) << " ";
139 print_input(output, src2, swizzle.negate_src2, swizzle.SelectorToString(false).substr(1,1)); 146 print_input(output, src2, swizzle.negate_src2, swizzle.SelectorToString(true).substr(1,1));
140 147
141 break; 148 break;
142 } 149 }
@@ -167,7 +174,7 @@ QVariant GraphicsVertexShaderModel::data(const QModelIndex& index, int role) con
167 // TODO: In some cases, the Address Register is used as an index for SRC2 instead of SRC1 174 // TODO: In some cases, the Address Register is used as an index for SRC2 instead of SRC1
168 if (instr.opcode.Value().GetInfo().subtype & OpCode::Info::Src2) { 175 if (instr.opcode.Value().GetInfo().subtype & OpCode::Info::Src2) {
169 SourceRegister src2 = instr.common.GetSrc2(src_is_inverted); 176 SourceRegister src2 = instr.common.GetSrc2(src_is_inverted);
170 print_input(output, src2, swizzle.negate_src2, swizzle.SelectorToString(false)); 177 print_input(output, src2, swizzle.negate_src2, swizzle.SelectorToString(true));
171 } 178 }
172 break; 179 break;
173 } 180 }
@@ -240,6 +247,18 @@ QVariant GraphicsVertexShaderModel::data(const QModelIndex& index, int role) con
240 case Qt::FontRole: 247 case Qt::FontRole:
241 return QFont("monospace"); 248 return QFont("monospace");
242 249
250 case Qt::BackgroundRole:
251 // Highlight instructions which have no debug data associated to them
252 for (const auto& record : par->debug_data.records)
253 if (index.row() == record.instruction_offset)
254 return QVariant();
255
256 return QBrush(QColor(255, 255, 127));
257
258
259 // TODO: Draw arrows for each "reachable" instruction to visualize control flow
260
261
243 default: 262 default:
244 break; 263 break;
245 } 264 }
@@ -247,53 +266,232 @@ QVariant GraphicsVertexShaderModel::data(const QModelIndex& index, int role) con
247 return QVariant(); 266 return QVariant();
248} 267}
249 268
250void GraphicsVertexShaderModel::OnUpdate() 269void GraphicsVertexShaderWidget::DumpShader() {
251{ 270 QString filename = QFileDialog::getSaveFileName(this, tr("Save Shader Dump"), "shader_dump.shbin",
252 beginResetModel(); 271 tr("Shader Binary (*.shbin)"));
253
254 info.Clear();
255
256 for (auto instr : Pica::g_state.vs.program_code)
257 info.code.push_back({instr});
258 272
259 for (auto pattern : Pica::g_state.vs.swizzle_data) 273 if (filename.isEmpty()) {
260 info.swizzle_info.push_back({pattern}); 274 // If the user canceled the dialog, don't dump anything.
275 return;
276 }
261 277
262 info.labels.insert({ Pica::g_state.regs.vs.main_offset, "main" }); 278 auto& setup = Pica::g_state.vs;
279 auto& config = Pica::g_state.regs.vs;
263 280
264 endResetModel(); 281 Pica::DebugUtils::DumpShader(filename.toStdString(), config, setup, Pica::g_state.regs.vs_output_attributes);
265} 282}
266 283
267
268GraphicsVertexShaderWidget::GraphicsVertexShaderWidget(std::shared_ptr< Pica::DebugContext > debug_context, 284GraphicsVertexShaderWidget::GraphicsVertexShaderWidget(std::shared_ptr< Pica::DebugContext > debug_context,
269 QWidget* parent) 285 QWidget* parent)
270 : BreakPointObserverDock(debug_context, "Pica Vertex Shader", parent) { 286 : BreakPointObserverDock(debug_context, "Pica Vertex Shader", parent) {
271 setObjectName("PicaVertexShader"); 287 setObjectName("PicaVertexShader");
272 288
273 auto binary_model = new GraphicsVertexShaderModel(this); 289 auto input_data_mapper = new QSignalMapper(this);
274 auto binary_list = new QTreeView; 290
275 binary_list->setModel(binary_model); 291 // TODO: Support inputting data in hexadecimal raw format
292 for (unsigned i = 0; i < ARRAY_SIZE(input_data); ++i) {
293 input_data[i] = new QLineEdit;
294 input_data[i]->setValidator(new QDoubleValidator(input_data[i]));
295 }
296
297 breakpoint_warning = new QLabel(tr("(data only available at VertexLoaded breakpoints)"));
298
299 // TODO: Add some button for jumping to the shader entry point
300
301 model = new GraphicsVertexShaderModel(this);
302 binary_list = new QTreeView;
303 binary_list->setModel(model);
276 binary_list->setRootIsDecorated(false); 304 binary_list->setRootIsDecorated(false);
277 binary_list->setAlternatingRowColors(true); 305 binary_list->setAlternatingRowColors(true);
278 306
279 connect(this, SIGNAL(Update()), binary_model, SLOT(OnUpdate())); 307 auto dump_shader = new QPushButton(QIcon::fromTheme("document-save"), tr("Dump"));
308
309 instruction_description = new QLabel;
310
311 cycle_index = new QSpinBox;
312
313 connect(this, SIGNAL(SelectCommand(const QModelIndex&, QItemSelectionModel::SelectionFlags)),
314 binary_list->selectionModel(), SLOT(select(const QModelIndex&, QItemSelectionModel::SelectionFlags)));
315
316 connect(dump_shader, SIGNAL(clicked()), this, SLOT(DumpShader()));
317
318 connect(cycle_index, SIGNAL(valueChanged(int)), this, SLOT(OnCycleIndexChanged(int)));
319
320 for (unsigned i = 0; i < ARRAY_SIZE(input_data); ++i) {
321 connect(input_data[i], SIGNAL(textEdited(const QString&)), input_data_mapper, SLOT(map()));
322 input_data_mapper->setMapping(input_data[i], i);
323 }
324 connect(input_data_mapper, SIGNAL(mapped(int)), this, SLOT(OnInputAttributeChanged(int)));
280 325
281 auto main_widget = new QWidget; 326 auto main_widget = new QWidget;
282 auto main_layout = new QVBoxLayout; 327 auto main_layout = new QVBoxLayout;
283 { 328 {
329 auto input_data_group = new QGroupBox(tr("Input Data"));
330
331 // For each vertex attribute, add a QHBoxLayout consisting of:
332 // - A QLabel denoting the source attribute index
333 // - Four QLineEdits for showing and manipulating attribute data
334 // - A QLabel denoting the shader input attribute index
335 auto sub_layout = new QVBoxLayout;
336 for (unsigned i = 0; i < 16; ++i) {
337 // Create an HBoxLayout to store the widgets used to specify a particular attribute
338 // and store it in a QWidget to allow for easy hiding and unhiding.
339 auto row_layout = new QHBoxLayout;
340 row_layout->addWidget(new QLabel(tr("Attribute %1").arg(i, 2)));
341 for (unsigned comp = 0; comp < 4; ++comp)
342 row_layout->addWidget(input_data[4 * i + comp]);
343
344 row_layout->addWidget(input_data_mapping[i] = new QLabel);
345
346 input_data_container[i] = new QWidget;
347 input_data_container[i]->setLayout(row_layout);
348 input_data_container[i]->hide();
349
350 sub_layout->addWidget(input_data_container[i]);
351 }
352
353 sub_layout->addWidget(breakpoint_warning);
354 breakpoint_warning->hide();
355
356 input_data_group->setLayout(sub_layout);
357 main_layout->addWidget(input_data_group);
358 }
359 {
284 auto sub_layout = new QHBoxLayout; 360 auto sub_layout = new QHBoxLayout;
285 sub_layout->addWidget(binary_list); 361 sub_layout->addWidget(binary_list);
286 main_layout->addLayout(sub_layout); 362 main_layout->addLayout(sub_layout);
287 } 363 }
364 main_layout->addWidget(dump_shader);
365 {
366 auto sub_layout = new QHBoxLayout;
367 sub_layout->addWidget(new QLabel(tr("Cycle Index:")));
368 sub_layout->addWidget(cycle_index);
369 main_layout->addLayout(sub_layout);
370 }
371 main_layout->addWidget(instruction_description);
372 main_layout->addStretch();
288 main_widget->setLayout(main_layout); 373 main_widget->setLayout(main_layout);
289 setWidget(main_widget); 374 setWidget(main_widget);
375
376 widget()->setEnabled(false);
290} 377}
291 378
292void GraphicsVertexShaderWidget::OnBreakPointHit(Pica::DebugContext::Event event, void* data) { 379void GraphicsVertexShaderWidget::OnBreakPointHit(Pica::DebugContext::Event event, void* data) {
293 emit Update(); 380 auto input = static_cast<Pica::Shader::InputVertex*>(data);
381 if (event == Pica::DebugContext::Event::VertexLoaded) {
382 Reload(true, data);
383 } else {
384 // No vertex data is retrievable => invalidate currently stored vertex data
385 Reload(true, nullptr);
386 }
294 widget()->setEnabled(true); 387 widget()->setEnabled(true);
295} 388}
296 389
390void GraphicsVertexShaderWidget::Reload(bool replace_vertex_data, void* vertex_data) {
391 model->beginResetModel();
392
393 if (replace_vertex_data) {
394 if (vertex_data) {
395 memcpy(&input_vertex, vertex_data, sizeof(input_vertex));
396 for (unsigned attr = 0; attr < 16; ++attr) {
397 for (unsigned comp = 0; comp < 4; ++comp) {
398 input_data[4 * attr + comp]->setText(QString("%1").arg(input_vertex.attr[attr][comp].ToFloat32()));
399 }
400 }
401 breakpoint_warning->hide();
402 } else {
403 for (unsigned attr = 0; attr < 16; ++attr) {
404 for (unsigned comp = 0; comp < 4; ++comp) {
405 input_data[4 * attr + comp]->setText(QString("???"));
406 }
407 }
408 breakpoint_warning->show();
409 }
410 }
411
412 // Reload shader code
413 info.Clear();
414
415 auto& shader_setup = Pica::g_state.vs;
416 auto& shader_config = Pica::g_state.regs.vs;
417 for (auto instr : shader_setup.program_code)
418 info.code.push_back({instr});
419
420 for (auto pattern : shader_setup.swizzle_data)
421 info.swizzle_info.push_back({pattern});
422
423 u32 entry_point = Pica::g_state.regs.vs.main_offset;
424 info.labels.insert({ entry_point, "main" });
425
426 // Generate debug information
427 debug_data = Pica::Shader::ProduceDebugInfo(input_vertex, 1, shader_config, shader_setup);
428
429 // Reload widget state
430
431 // Only show input attributes which are used as input to the shader
432 for (unsigned int attr = 0; attr < 16; ++attr) {
433 input_data_container[attr]->setVisible(false);
434 }
435 for (unsigned int attr = 0; attr < Pica::g_state.regs.vertex_attributes.GetNumTotalAttributes(); ++attr) {
436 unsigned source_attr = shader_config.input_register_map.GetRegisterForAttribute(attr);
437 input_data_mapping[source_attr]->setText(QString("-> v%1").arg(attr));
438 input_data_container[source_attr]->setVisible(true);
439 }
440
441 // Initialize debug info text for current cycle count
442 cycle_index->setMaximum(debug_data.records.size() - 1);
443 OnCycleIndexChanged(cycle_index->value());
444
445 model->endResetModel();
446}
447
297void GraphicsVertexShaderWidget::OnResumed() { 448void GraphicsVertexShaderWidget::OnResumed() {
298 widget()->setEnabled(false); 449 widget()->setEnabled(false);
299} 450}
451
452void GraphicsVertexShaderWidget::OnInputAttributeChanged(int index) {
453 float value = input_data[index]->text().toFloat();
454 Reload();
455}
456
457void GraphicsVertexShaderWidget::OnCycleIndexChanged(int index) {
458 QString text;
459
460 auto& record = debug_data.records[index];
461 if (record.mask & Pica::Shader::DebugDataRecord::SRC1)
462 text += tr("SRC1: %1, %2, %3, %4\n").arg(record.src1.x.ToFloat32()).arg(record.src1.y.ToFloat32()).arg(record.src1.z.ToFloat32()).arg(record.src1.w.ToFloat32());
463 if (record.mask & Pica::Shader::DebugDataRecord::SRC2)
464 text += tr("SRC2: %1, %2, %3, %4\n").arg(record.src2.x.ToFloat32()).arg(record.src2.y.ToFloat32()).arg(record.src2.z.ToFloat32()).arg(record.src2.w.ToFloat32());
465 if (record.mask & Pica::Shader::DebugDataRecord::SRC3)
466 text += tr("SRC3: %1, %2, %3, %4\n").arg(record.src3.x.ToFloat32()).arg(record.src3.y.ToFloat32()).arg(record.src3.z.ToFloat32()).arg(record.src3.w.ToFloat32());
467 if (record.mask & Pica::Shader::DebugDataRecord::DEST_IN)
468 text += tr("DEST_IN: %1, %2, %3, %4\n").arg(record.dest_in.x.ToFloat32()).arg(record.dest_in.y.ToFloat32()).arg(record.dest_in.z.ToFloat32()).arg(record.dest_in.w.ToFloat32());
469 if (record.mask & Pica::Shader::DebugDataRecord::DEST_OUT)
470 text += tr("DEST_OUT: %1, %2, %3, %4\n").arg(record.dest_out.x.ToFloat32()).arg(record.dest_out.y.ToFloat32()).arg(record.dest_out.z.ToFloat32()).arg(record.dest_out.w.ToFloat32());
471
472 if (record.mask & Pica::Shader::DebugDataRecord::ADDR_REG_OUT)
473 text += tr("Addres Registers: %1, %2\n").arg(record.address_registers[0]).arg(record.address_registers[1]);
474 if (record.mask & Pica::Shader::DebugDataRecord::CMP_RESULT)
475 text += tr("Compare Result: %1, %2\n").arg(record.conditional_code[0] ? "true" : "false").arg(record.conditional_code[1] ? "true" : "false");
476
477 if (record.mask & Pica::Shader::DebugDataRecord::COND_BOOL_IN)
478 text += tr("Static Condition: %1\n").arg(record.cond_bool ? "true" : "false");
479 if (record.mask & Pica::Shader::DebugDataRecord::COND_CMP_IN)
480 text += tr("Dynamic Conditions: %1, %2\n").arg(record.cond_cmp[0] ? "true" : "false").arg(record.cond_cmp[1] ? "true" : "false");
481 if (record.mask & Pica::Shader::DebugDataRecord::LOOP_INT_IN)
482 text += tr("Loop Parameters: %1 (repeats), %2 (initializer), %3 (increment), %4\n").arg(record.loop_int.x).arg(record.loop_int.y).arg(record.loop_int.z).arg(record.loop_int.w);
483
484 text += tr("Instruction offset: 0x%1").arg(4 * record.instruction_offset, 4, 16, QLatin1Char('0'));
485 if (record.mask & Pica::Shader::DebugDataRecord::NEXT_INSTR) {
486 text += tr(" -> 0x%2").arg(4 * record.next_instruction, 4, 16, QLatin1Char('0'));
487 } else {
488 text += tr(" (last instruction)");
489 }
490
491 instruction_description->setText(text);
492
493 // Scroll to current instruction
494 const QModelIndex& instr_index = model->index(record.instruction_offset, 0);
495 emit SelectCommand(instr_index, QItemSelectionModel::ClearAndSelect | QItemSelectionModel::Rows);
496 binary_list->scrollTo(instr_index, QAbstractItemView::EnsureVisible);
497}
diff --git a/src/citra_qt/debugger/graphics_vertex_shader.h b/src/citra_qt/debugger/graphics_vertex_shader.h
index 38339dc05..1b3f1f7ec 100644
--- a/src/citra_qt/debugger/graphics_vertex_shader.h
+++ b/src/citra_qt/debugger/graphics_vertex_shader.h
@@ -10,11 +10,18 @@
10 10
11#include "nihstro/parser_shbin.h" 11#include "nihstro/parser_shbin.h"
12 12
13#include "video_core/shader/shader.h"
14
15class QLabel;
16class QSpinBox;
17
18class GraphicsVertexShaderWidget;
19
13class GraphicsVertexShaderModel : public QAbstractItemModel { 20class GraphicsVertexShaderModel : public QAbstractItemModel {
14 Q_OBJECT 21 Q_OBJECT
15 22
16public: 23public:
17 GraphicsVertexShaderModel(QObject* parent); 24 GraphicsVertexShaderModel(GraphicsVertexShaderWidget* parent);
18 25
19 QModelIndex index(int row, int column, const QModelIndex& parent = QModelIndex()) const override; 26 QModelIndex index(int row, int column, const QModelIndex& parent = QModelIndex()) const override;
20 QModelIndex parent(const QModelIndex& child) const override; 27 QModelIndex parent(const QModelIndex& child) const override;
@@ -23,11 +30,10 @@ public:
23 QVariant data(const QModelIndex& index, int role = Qt::DisplayRole) const override; 30 QVariant data(const QModelIndex& index, int role = Qt::DisplayRole) const override;
24 QVariant headerData(int section, Qt::Orientation orientation, int role = Qt::DisplayRole) const override; 31 QVariant headerData(int section, Qt::Orientation orientation, int role = Qt::DisplayRole) const override;
25 32
26public slots:
27 void OnUpdate();
28
29private: 33private:
30 nihstro::ShaderInfo info; 34 GraphicsVertexShaderWidget* par;
35
36 friend class GraphicsVertexShaderWidget;
31}; 37};
32 38
33class GraphicsVertexShaderWidget : public BreakPointObserverDock { 39class GraphicsVertexShaderWidget : public BreakPointObserverDock {
@@ -43,9 +49,42 @@ private slots:
43 void OnBreakPointHit(Pica::DebugContext::Event event, void* data) override; 49 void OnBreakPointHit(Pica::DebugContext::Event event, void* data) override;
44 void OnResumed() override; 50 void OnResumed() override;
45 51
52 void OnInputAttributeChanged(int index);
53
54 void OnCycleIndexChanged(int index);
55
56 void DumpShader();
57
58 /**
59 * Reload widget based on the current PICA200 state
60 * @param replace_vertex_data If true, invalidate all current vertex data
61 * @param vertex_data New vertex data to use, as passed to OnBreakPointHit. May be nullptr to specify that no valid vertex data can be retrieved currently. Only used if replace_vertex_data is true.
62 */
63 void Reload(bool replace_vertex_data = false, void* vertex_data = nullptr);
64
65
46signals: 66signals:
47 void Update(); 67 // Call this to change the current command selection in the disassembly view
68 void SelectCommand(const QModelIndex&, QItemSelectionModel::SelectionFlags);
48 69
49private: 70private:
71 QLabel* instruction_description;
72 QTreeView* binary_list;
73 GraphicsVertexShaderModel* model;
74
75 /// TODO: Move these into a single struct
76 std::array<QLineEdit*, 4*16> input_data; // A text box for each of the 4 components of up to 16 vertex attributes
77 std::array<QWidget*, 16> input_data_container; // QWidget containing the QLayout containing each vertex attribute
78 std::array<QLabel*, 16> input_data_mapping; // A QLabel denoting the shader input attribute which the vertex attribute maps to
79
80 // Text to be shown when input vertex data is not retrievable
81 QLabel* breakpoint_warning;
82
83 QSpinBox* cycle_index;
84
85 nihstro::ShaderInfo info;
86 Pica::Shader::DebugData<true> debug_data;
87 Pica::Shader::InputVertex input_vertex;
50 88
89 friend class GraphicsVertexShaderModel;
51}; 90};
diff --git a/src/core/loader/loader.cpp b/src/core/loader/loader.cpp
index f5b349a77..062291006 100644
--- a/src/core/loader/loader.cpp
+++ b/src/core/loader/loader.cpp
@@ -77,6 +77,8 @@ static const char* GetFileTypeString(FileType type) {
77 return "NCSD"; 77 return "NCSD";
78 case FileType::CXI: 78 case FileType::CXI:
79 return "NCCH"; 79 return "NCCH";
80 case FileType::CIA:
81 return "CIA";
80 case FileType::ELF: 82 case FileType::ELF:
81 return "ELF"; 83 return "ELF";
82 case FileType::THREEDSX: 84 case FileType::THREEDSX:
@@ -134,6 +136,10 @@ ResultStatus LoadFile(const std::string& filename) {
134 break; 136 break;
135 } 137 }
136 138
139 // CIA file format...
140 case FileType::CIA:
141 return ResultStatus::ErrorNotImplemented;
142
137 // Error occurred durring IdentifyFile... 143 // Error occurred durring IdentifyFile...
138 case FileType::Error: 144 case FileType::Error:
139 145
diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp
index 374c4748d..d82e20f86 100644
--- a/src/video_core/command_processor.cpp
+++ b/src/video_core/command_processor.cpp
@@ -35,7 +35,15 @@ static u32 default_attr_write_buffer[3];
35 35
36Common::Profiling::TimingCategory category_drawing("Drawing"); 36Common::Profiling::TimingCategory category_drawing("Drawing");
37 37
38static inline void WritePicaReg(u32 id, u32 value, u32 mask) { 38// Expand a 4-bit mask to 4-byte mask, e.g. 0b0101 -> 0x00FF00FF
39static const u32 expand_bits_to_bytes[] = {
40 0x00000000, 0x000000ff, 0x0000ff00, 0x0000ffff,
41 0x00ff0000, 0x00ff00ff, 0x00ffff00, 0x00ffffff,
42 0xff000000, 0xff0000ff, 0xff00ff00, 0xff00ffff,
43 0xffff0000, 0xffff00ff, 0xffffff00, 0xffffffff
44};
45
46static void WritePicaReg(u32 id, u32 value, u32 mask) {
39 auto& regs = g_state.regs; 47 auto& regs = g_state.regs;
40 48
41 if (id >= regs.NumIds()) 49 if (id >= regs.NumIds())
@@ -47,13 +55,16 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) {
47 55
48 // TODO: Figure out how register masking acts on e.g. vs.uniform_setup.set_value 56 // TODO: Figure out how register masking acts on e.g. vs.uniform_setup.set_value
49 u32 old_value = regs[id]; 57 u32 old_value = regs[id];
50 regs[id] = (old_value & ~mask) | (value & mask); 58
59 const u32 write_mask = expand_bits_to_bytes[mask];
60
61 regs[id] = (old_value & ~write_mask) | (value & write_mask);
62
63 DebugUtils::OnPicaRegWrite({ (u16)id, (u16)mask, regs[id] });
51 64
52 if (g_debug_context) 65 if (g_debug_context)
53 g_debug_context->OnEvent(DebugContext::Event::PicaCommandLoaded, reinterpret_cast<void*>(&id)); 66 g_debug_context->OnEvent(DebugContext::Event::PicaCommandLoaded, reinterpret_cast<void*>(&id));
54 67
55 DebugUtils::OnPicaRegWrite(id, regs[id]);
56
57 switch(id) { 68 switch(id) {
58 // Trigger IRQ 69 // Trigger IRQ
59 case PICA_REG_INDEX(trigger_irq): 70 case PICA_REG_INDEX(trigger_irq):
@@ -215,7 +226,7 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) {
215 unsigned int vertex_cache_pos = 0; 226 unsigned int vertex_cache_pos = 0;
216 vertex_cache_ids.fill(-1); 227 vertex_cache_ids.fill(-1);
217 228
218 Shader::UnitState shader_unit; 229 Shader::UnitState<false> shader_unit;
219 Shader::Setup(shader_unit); 230 Shader::Setup(shader_unit);
220 231
221 for (unsigned int index = 0; index < regs.num_vertices; ++index) 232 for (unsigned int index = 0; index < regs.num_vertices; ++index)
@@ -469,13 +480,6 @@ void ProcessCommandList(const u32* list, u32 size) {
469 g_state.cmd_list.length = size / sizeof(u32); 480 g_state.cmd_list.length = size / sizeof(u32);
470 481
471 while (g_state.cmd_list.current_ptr < g_state.cmd_list.head_ptr + g_state.cmd_list.length) { 482 while (g_state.cmd_list.current_ptr < g_state.cmd_list.head_ptr + g_state.cmd_list.length) {
472 // Expand a 4-bit mask to 4-byte mask, e.g. 0b0101 -> 0x00FF00FF
473 static const u32 expand_bits_to_bytes[] = {
474 0x00000000, 0x000000ff, 0x0000ff00, 0x0000ffff,
475 0x00ff0000, 0x00ff00ff, 0x00ffff00, 0x00ffffff,
476 0xff000000, 0xff0000ff, 0xff00ff00, 0xff00ffff,
477 0xffff0000, 0xffff00ff, 0xffffff00, 0xffffffff
478 };
479 483
480 // Align read pointer to 8 bytes 484 // Align read pointer to 8 bytes
481 if ((g_state.cmd_list.head_ptr - g_state.cmd_list.current_ptr) % 2 != 0) 485 if ((g_state.cmd_list.head_ptr - g_state.cmd_list.current_ptr) % 2 != 0)
@@ -483,14 +487,13 @@ void ProcessCommandList(const u32* list, u32 size) {
483 487
484 u32 value = *g_state.cmd_list.current_ptr++; 488 u32 value = *g_state.cmd_list.current_ptr++;
485 const CommandHeader header = { *g_state.cmd_list.current_ptr++ }; 489 const CommandHeader header = { *g_state.cmd_list.current_ptr++ };
486 const u32 write_mask = expand_bits_to_bytes[header.parameter_mask];
487 u32 cmd = header.cmd_id; 490 u32 cmd = header.cmd_id;
488 491
489 WritePicaReg(cmd, value, write_mask); 492 WritePicaReg(cmd, value, header.parameter_mask);
490 493
491 for (unsigned i = 0; i < header.extra_data_length; ++i) { 494 for (unsigned i = 0; i < header.extra_data_length; ++i) {
492 u32 cmd = header.cmd_id + (header.group_commands ? i + 1 : 0); 495 u32 cmd = header.cmd_id + (header.group_commands ? i + 1 : 0);
493 WritePicaReg(cmd, *g_state.cmd_list.current_ptr++, write_mask); 496 WritePicaReg(cmd, *g_state.cmd_list.current_ptr++, header.parameter_mask);
494 } 497 }
495 } 498 }
496} 499}
diff --git a/src/video_core/debug_utils/debug_utils.cpp b/src/video_core/debug_utils/debug_utils.cpp
index 6d6b65286..8ad77f0c8 100644
--- a/src/video_core/debug_utils/debug_utils.cpp
+++ b/src/video_core/debug_utils/debug_utils.cpp
@@ -4,9 +4,10 @@
4 4
5#include <algorithm> 5#include <algorithm>
6#include <condition_variable> 6#include <condition_variable>
7#include <cstring>
8#include <fstream>
7#include <list> 9#include <list>
8#include <map> 10#include <map>
9#include <fstream>
10#include <mutex> 11#include <mutex>
11#include <string> 12#include <string>
12 13
@@ -14,6 +15,7 @@
14#include <png.h> 15#include <png.h>
15#endif 16#endif
16 17
18#include <nihstro/float24.h>
17#include <nihstro/shader_binary.h> 19#include <nihstro/shader_binary.h>
18 20
19#include "common/assert.h" 21#include "common/assert.h"
@@ -63,7 +65,7 @@ void DebugContext::OnEvent(Event event, void* data) {
63 65
64void DebugContext::Resume() { 66void DebugContext::Resume() {
65 { 67 {
66 std::unique_lock<std::mutex> lock(breakpoint_mutex); 68 std::lock_guard<std::mutex> lock(breakpoint_mutex);
67 69
68 // Tell all observers that we are about to resume 70 // Tell all observers that we are about to resume
69 for (auto& breakpoint_observer : breakpoint_observers) { 71 for (auto& breakpoint_observer : breakpoint_observers) {
@@ -110,8 +112,7 @@ void GeometryDumper::Dump() {
110} 112}
111 113
112 114
113void DumpShader(const u32* binary_data, u32 binary_size, const u32* swizzle_data, u32 swizzle_size, 115void DumpShader(const std::string& filename, const Regs::ShaderConfig& config, const State::ShaderSetup& setup, const Regs::VSOutputAttributes* output_attributes)
114 u32 main_offset, const Regs::VSOutputAttributes* output_attributes)
115{ 116{
116 struct StuffToWrite { 117 struct StuffToWrite {
117 u8* pointer; 118 u8* pointer;
@@ -131,11 +132,14 @@ void DumpShader(const u32* binary_data, u32 binary_size, const u32* swizzle_data
131 // into shbin format (separate type and component mask). 132 // into shbin format (separate type and component mask).
132 union OutputRegisterInfo { 133 union OutputRegisterInfo {
133 enum Type : u64 { 134 enum Type : u64 {
134 POSITION = 0, 135 POSITION = 0,
135 COLOR = 2, 136 QUATERNION = 1,
136 TEXCOORD0 = 3, 137 COLOR = 2,
137 TEXCOORD1 = 5, 138 TEXCOORD0 = 3,
138 TEXCOORD2 = 6, 139 TEXCOORD1 = 5,
140 TEXCOORD2 = 6,
141
142 VIEW = 8,
139 }; 143 };
140 144
141 BitField< 0, 64, u64> hex; 145 BitField< 0, 64, u64> hex;
@@ -157,6 +161,10 @@ void DumpShader(const u32* binary_data, u32 binary_size, const u32* swizzle_data
157 { OutputAttributes::POSITION_Y, { OutputRegisterInfo::POSITION, 2} }, 161 { OutputAttributes::POSITION_Y, { OutputRegisterInfo::POSITION, 2} },
158 { OutputAttributes::POSITION_Z, { OutputRegisterInfo::POSITION, 4} }, 162 { OutputAttributes::POSITION_Z, { OutputRegisterInfo::POSITION, 4} },
159 { OutputAttributes::POSITION_W, { OutputRegisterInfo::POSITION, 8} }, 163 { OutputAttributes::POSITION_W, { OutputRegisterInfo::POSITION, 8} },
164 { OutputAttributes::QUATERNION_X, { OutputRegisterInfo::QUATERNION, 1} },
165 { OutputAttributes::QUATERNION_Y, { OutputRegisterInfo::QUATERNION, 2} },
166 { OutputAttributes::QUATERNION_Z, { OutputRegisterInfo::QUATERNION, 4} },
167 { OutputAttributes::QUATERNION_W, { OutputRegisterInfo::QUATERNION, 8} },
160 { OutputAttributes::COLOR_R, { OutputRegisterInfo::COLOR, 1} }, 168 { OutputAttributes::COLOR_R, { OutputRegisterInfo::COLOR, 1} },
161 { OutputAttributes::COLOR_G, { OutputRegisterInfo::COLOR, 2} }, 169 { OutputAttributes::COLOR_G, { OutputRegisterInfo::COLOR, 2} },
162 { OutputAttributes::COLOR_B, { OutputRegisterInfo::COLOR, 4} }, 170 { OutputAttributes::COLOR_B, { OutputRegisterInfo::COLOR, 4} },
@@ -166,7 +174,10 @@ void DumpShader(const u32* binary_data, u32 binary_size, const u32* swizzle_data
166 { OutputAttributes::TEXCOORD1_U, { OutputRegisterInfo::TEXCOORD1, 1} }, 174 { OutputAttributes::TEXCOORD1_U, { OutputRegisterInfo::TEXCOORD1, 1} },
167 { OutputAttributes::TEXCOORD1_V, { OutputRegisterInfo::TEXCOORD1, 2} }, 175 { OutputAttributes::TEXCOORD1_V, { OutputRegisterInfo::TEXCOORD1, 2} },
168 { OutputAttributes::TEXCOORD2_U, { OutputRegisterInfo::TEXCOORD2, 1} }, 176 { OutputAttributes::TEXCOORD2_U, { OutputRegisterInfo::TEXCOORD2, 1} },
169 { OutputAttributes::TEXCOORD2_V, { OutputRegisterInfo::TEXCOORD2, 2} } 177 { OutputAttributes::TEXCOORD2_V, { OutputRegisterInfo::TEXCOORD2, 2} },
178 { OutputAttributes::VIEW_X, { OutputRegisterInfo::VIEW, 1} },
179 { OutputAttributes::VIEW_Y, { OutputRegisterInfo::VIEW, 2} },
180 { OutputAttributes::VIEW_Z, { OutputRegisterInfo::VIEW, 4} }
170 }; 181 };
171 182
172 for (const auto& semantic : std::vector<OutputAttributes::Semantic>{ 183 for (const auto& semantic : std::vector<OutputAttributes::Semantic>{
@@ -221,28 +232,69 @@ void DumpShader(const u32* binary_data, u32 binary_size, const u32* swizzle_data
221 232
222 // TODO: Reduce the amount of binary code written to relevant portions 233 // TODO: Reduce the amount of binary code written to relevant portions
223 dvlp.binary_offset = write_offset - dvlp_offset; 234 dvlp.binary_offset = write_offset - dvlp_offset;
224 dvlp.binary_size_words = binary_size; 235 dvlp.binary_size_words = setup.program_code.size();
225 QueueForWriting((u8*)binary_data, binary_size * sizeof(u32)); 236 QueueForWriting((u8*)setup.program_code.data(), setup.program_code.size() * sizeof(u32));
226 237
227 dvlp.swizzle_info_offset = write_offset - dvlp_offset; 238 dvlp.swizzle_info_offset = write_offset - dvlp_offset;
228 dvlp.swizzle_info_num_entries = swizzle_size; 239 dvlp.swizzle_info_num_entries = setup.swizzle_data.size();
229 u32 dummy = 0; 240 u32 dummy = 0;
230 for (unsigned int i = 0; i < swizzle_size; ++i) { 241 for (unsigned int i = 0; i < setup.swizzle_data.size(); ++i) {
231 QueueForWriting((u8*)&swizzle_data[i], sizeof(swizzle_data[i])); 242 QueueForWriting((u8*)&setup.swizzle_data[i], sizeof(setup.swizzle_data[i]));
232 QueueForWriting((u8*)&dummy, sizeof(dummy)); 243 QueueForWriting((u8*)&dummy, sizeof(dummy));
233 } 244 }
234 245
235 dvle.main_offset_words = main_offset; 246 dvle.main_offset_words = config.main_offset;
236 dvle.output_register_table_offset = write_offset - dvlb.dvle_offset; 247 dvle.output_register_table_offset = write_offset - dvlb.dvle_offset;
237 dvle.output_register_table_size = static_cast<u32>(output_info_table.size()); 248 dvle.output_register_table_size = static_cast<u32>(output_info_table.size());
238 QueueForWriting((u8*)output_info_table.data(), static_cast<u32>(output_info_table.size() * sizeof(OutputRegisterInfo))); 249 QueueForWriting((u8*)output_info_table.data(), static_cast<u32>(output_info_table.size() * sizeof(OutputRegisterInfo)));
239 250
240 // TODO: Create a label table for "main" 251 // TODO: Create a label table for "main"
241 252
253 std::vector<nihstro::ConstantInfo> constant_table;
254 for (unsigned i = 0; i < setup.uniforms.b.size(); ++i) {
255 nihstro::ConstantInfo constant;
256 memset(&constant, 0, sizeof(constant));
257 constant.type = nihstro::ConstantInfo::Bool;
258 constant.regid = i;
259 constant.b = setup.uniforms.b[i];
260 constant_table.emplace_back(constant);
261 }
262 for (unsigned i = 0; i < setup.uniforms.i.size(); ++i) {
263 nihstro::ConstantInfo constant;
264 memset(&constant, 0, sizeof(constant));
265 constant.type = nihstro::ConstantInfo::Int;
266 constant.regid = i;
267 constant.i.x = setup.uniforms.i[i].x;
268 constant.i.y = setup.uniforms.i[i].y;
269 constant.i.z = setup.uniforms.i[i].z;
270 constant.i.w = setup.uniforms.i[i].w;
271 constant_table.emplace_back(constant);
272 }
273 for (unsigned i = 0; i < sizeof(setup.uniforms.f) / sizeof(setup.uniforms.f[0]); ++i) {
274 nihstro::ConstantInfo constant;
275 memset(&constant, 0, sizeof(constant));
276 constant.type = nihstro::ConstantInfo::Float;
277 constant.regid = i;
278 constant.f.x = nihstro::to_float24(setup.uniforms.f[i].x.ToFloat32());
279 constant.f.y = nihstro::to_float24(setup.uniforms.f[i].y.ToFloat32());
280 constant.f.z = nihstro::to_float24(setup.uniforms.f[i].z.ToFloat32());
281 constant.f.w = nihstro::to_float24(setup.uniforms.f[i].w.ToFloat32());
282
283 // Store constant if it's different from zero..
284 if (setup.uniforms.f[i].x.ToFloat32() != 0.0 ||
285 setup.uniforms.f[i].y.ToFloat32() != 0.0 ||
286 setup.uniforms.f[i].z.ToFloat32() != 0.0 ||
287 setup.uniforms.f[i].w.ToFloat32() != 0.0)
288 constant_table.emplace_back(constant);
289 }
290 dvle.constant_table_offset = write_offset - dvlb.dvle_offset;
291 dvle.constant_table_size = constant_table.size();
292 for (const auto& constant : constant_table) {
293 QueueForWriting((uint8_t*)&constant, sizeof(constant));
294 }
242 295
243 // Write data to file 296 // Write data to file
244 static int dump_index = 0; 297 static int dump_index = 0;
245 std::string filename = std::string("shader_dump") + std::to_string(++dump_index) + std::string(".shbin");
246 std::ofstream file(filename, std::ios_base::out | std::ios_base::binary); 298 std::ofstream file(filename, std::ios_base::out | std::ios_base::binary);
247 299
248 for (auto& chunk : writing_queue) { 300 for (auto& chunk : writing_queue) {
@@ -261,11 +313,10 @@ void StartPicaTracing()
261 return; 313 return;
262 } 314 }
263 315
264 pica_trace_mutex.lock(); 316 std::lock_guard<std::mutex> lock(pica_trace_mutex);
265 pica_trace = std::unique_ptr<PicaTrace>(new PicaTrace); 317 pica_trace = std::unique_ptr<PicaTrace>(new PicaTrace);
266 318
267 is_pica_tracing = true; 319 is_pica_tracing = true;
268 pica_trace_mutex.unlock();
269} 320}
270 321
271bool IsPicaTracing() 322bool IsPicaTracing()
@@ -273,18 +324,18 @@ bool IsPicaTracing()
273 return is_pica_tracing != 0; 324 return is_pica_tracing != 0;
274} 325}
275 326
276void OnPicaRegWrite(u32 id, u32 value) 327void OnPicaRegWrite(PicaTrace::Write write)
277{ 328{
278 // Double check for is_pica_tracing to avoid pointless locking overhead 329 // Double check for is_pica_tracing to avoid pointless locking overhead
279 if (!is_pica_tracing) 330 if (!is_pica_tracing)
280 return; 331 return;
281 332
282 std::unique_lock<std::mutex> lock(pica_trace_mutex); 333 std::lock_guard<std::mutex> lock(pica_trace_mutex);
283 334
284 if (!is_pica_tracing) 335 if (!is_pica_tracing)
285 return; 336 return;
286 337
287 pica_trace->writes.emplace_back(id, value); 338 pica_trace->writes.push_back(write);
288} 339}
289 340
290std::unique_ptr<PicaTrace> FinishPicaTracing() 341std::unique_ptr<PicaTrace> FinishPicaTracing()
@@ -298,9 +349,9 @@ std::unique_ptr<PicaTrace> FinishPicaTracing()
298 is_pica_tracing = false; 349 is_pica_tracing = false;
299 350
300 // Wait until running tracing is finished 351 // Wait until running tracing is finished
301 pica_trace_mutex.lock(); 352 std::lock_guard<std::mutex> lock(pica_trace_mutex);
302 std::unique_ptr<PicaTrace> ret(std::move(pica_trace)); 353 std::unique_ptr<PicaTrace> ret(std::move(pica_trace));
303 pica_trace_mutex.unlock(); 354
304 return std::move(ret); 355 return std::move(ret);
305} 356}
306 357
diff --git a/src/video_core/debug_utils/debug_utils.h b/src/video_core/debug_utils/debug_utils.h
index 81eea30a9..85762f5b4 100644
--- a/src/video_core/debug_utils/debug_utils.h
+++ b/src/video_core/debug_utils/debug_utils.h
@@ -158,7 +158,6 @@ extern std::shared_ptr<DebugContext> g_debug_context; // TODO: Get rid of this g
158namespace DebugUtils { 158namespace DebugUtils {
159 159
160#define PICA_DUMP_GEOMETRY 0 160#define PICA_DUMP_GEOMETRY 0
161#define PICA_DUMP_SHADERS 0
162#define PICA_DUMP_TEXTURES 0 161#define PICA_DUMP_TEXTURES 0
163#define PICA_LOG_TEV 0 162#define PICA_LOG_TEV 0
164 163
@@ -182,27 +181,23 @@ private:
182 std::vector<Face> faces; 181 std::vector<Face> faces;
183}; 182};
184 183
185void DumpShader(const u32* binary_data, u32 binary_size, const u32* swizzle_data, u32 swizzle_size, 184void DumpShader(const std::string& filename, const Regs::ShaderConfig& config,
186 u32 main_offset, const Regs::VSOutputAttributes* output_attributes); 185 const State::ShaderSetup& setup, const Regs::VSOutputAttributes* output_attributes);
187 186
188 187
189// Utility class to log Pica commands. 188// Utility class to log Pica commands.
190struct PicaTrace { 189struct PicaTrace {
191 struct Write : public std::pair<u32,u32> { 190 struct Write {
192 Write(u32 id, u32 value) : std::pair<u32,u32>(id, value) {} 191 u16 cmd_id;
193 192 u16 mask;
194 u32& Id() { return first; } 193 u32 value;
195 const u32& Id() const { return first; }
196
197 u32& Value() { return second; }
198 const u32& Value() const { return second; }
199 }; 194 };
200 std::vector<Write> writes; 195 std::vector<Write> writes;
201}; 196};
202 197
203void StartPicaTracing(); 198void StartPicaTracing();
204bool IsPicaTracing(); 199bool IsPicaTracing();
205void OnPicaRegWrite(u32 id, u32 value); 200void OnPicaRegWrite(PicaTrace::Write write);
206std::unique_ptr<PicaTrace> FinishPicaTracing(); 201std::unique_ptr<PicaTrace> FinishPicaTracing();
207 202
208struct TextureInfo { 203struct TextureInfo {
diff --git a/src/video_core/pica.h b/src/video_core/pica.h
index a5ec5ee9f..58b924f9e 100644
--- a/src/video_core/pica.h
+++ b/src/video_core/pica.h
@@ -80,6 +80,11 @@ struct Regs {
80 POSITION_Z = 2, 80 POSITION_Z = 2,
81 POSITION_W = 3, 81 POSITION_W = 3,
82 82
83 QUATERNION_X = 4,
84 QUATERNION_Y = 5,
85 QUATERNION_Z = 6,
86 QUATERNION_W = 7,
87
83 COLOR_R = 8, 88 COLOR_R = 8,
84 COLOR_G = 9, 89 COLOR_G = 9,
85 COLOR_B = 10, 90 COLOR_B = 10,
@@ -89,6 +94,12 @@ struct Regs {
89 TEXCOORD0_V = 13, 94 TEXCOORD0_V = 13,
90 TEXCOORD1_U = 14, 95 TEXCOORD1_U = 14,
91 TEXCOORD1_V = 15, 96 TEXCOORD1_V = 15,
97
98 // TODO: Not verified
99 VIEW_X = 18,
100 VIEW_Y = 19,
101 VIEW_Z = 20,
102
92 TEXCOORD2_U = 22, 103 TEXCOORD2_U = 22,
93 TEXCOORD2_V = 23, 104 TEXCOORD2_V = 23,
94 105
diff --git a/src/video_core/shader/shader.cpp b/src/video_core/shader/shader.cpp
index 6a27a8015..4e9836c80 100644
--- a/src/video_core/shader/shader.cpp
+++ b/src/video_core/shader/shader.cpp
@@ -5,6 +5,8 @@
5#include <memory> 5#include <memory>
6#include <unordered_map> 6#include <unordered_map>
7 7
8#include <boost/range/algorithm/fill.hpp>
9
8#include "common/hash.h" 10#include "common/hash.h"
9#include "common/make_unique.h" 11#include "common/make_unique.h"
10#include "common/profiler.h" 12#include "common/profiler.h"
@@ -30,7 +32,7 @@ static JitCompiler jit;
30static CompiledShader* jit_shader; 32static CompiledShader* jit_shader;
31#endif // ARCHITECTURE_x86_64 33#endif // ARCHITECTURE_x86_64
32 34
33void Setup(UnitState& state) { 35void Setup(UnitState<false>& state) {
34#ifdef ARCHITECTURE_x86_64 36#ifdef ARCHITECTURE_x86_64
35 if (VideoCore::g_shader_jit_enabled) { 37 if (VideoCore::g_shader_jit_enabled) {
36 u64 cache_key = (Common::ComputeHash64(&g_state.vs.program_code, sizeof(g_state.vs.program_code)) ^ 38 u64 cache_key = (Common::ComputeHash64(&g_state.vs.program_code, sizeof(g_state.vs.program_code)) ^
@@ -54,9 +56,8 @@ void Shutdown() {
54 56
55static Common::Profiling::TimingCategory shader_category("Vertex Shader"); 57static Common::Profiling::TimingCategory shader_category("Vertex Shader");
56 58
57OutputVertex Run(UnitState& state, const InputVertex& input, int num_attributes) { 59OutputVertex Run(UnitState<false>& state, const InputVertex& input, int num_attributes) {
58 auto& config = g_state.regs.vs; 60 auto& config = g_state.regs.vs;
59 auto& setup = g_state.vs;
60 61
61 Common::Profiling::ScopeTimer timer(shader_category); 62 Common::Profiling::ScopeTimer timer(shader_category);
62 63
@@ -67,6 +68,8 @@ OutputVertex Run(UnitState& state, const InputVertex& input, int num_attributes)
67 // Setup input register table 68 // Setup input register table
68 const auto& attribute_register_map = config.input_register_map; 69 const auto& attribute_register_map = config.input_register_map;
69 70
71 // TODO: Instead of this cumbersome logic, just load the input data directly like
72 // for (int attr = 0; attr < num_attributes; ++attr) { input_attr[0] = state.registers.input[attribute_register_map.attribute0_register]; }
70 if (num_attributes > 0) state.registers.input[attribute_register_map.attribute0_register] = input.attr[0]; 73 if (num_attributes > 0) state.registers.input[attribute_register_map.attribute0_register] = input.attr[0];
71 if (num_attributes > 1) state.registers.input[attribute_register_map.attribute1_register] = input.attr[1]; 74 if (num_attributes > 1) state.registers.input[attribute_register_map.attribute1_register] = input.attr[1];
72 if (num_attributes > 2) state.registers.input[attribute_register_map.attribute2_register] = input.attr[2]; 75 if (num_attributes > 2) state.registers.input[attribute_register_map.attribute2_register] = input.attr[2];
@@ -96,12 +99,6 @@ OutputVertex Run(UnitState& state, const InputVertex& input, int num_attributes)
96 RunInterpreter(state); 99 RunInterpreter(state);
97#endif // ARCHITECTURE_x86_64 100#endif // ARCHITECTURE_x86_64
98 101
99#if PICA_DUMP_SHADERS
100 DebugUtils::DumpShader(setup.program_code.data(), state.debug.max_offset, setup.swizzle_data.data(),
101 state.debug.max_opdesc_id, config.main_offset,
102 g_state.regs.vs_output_attributes); // TODO: Don't hardcode VS here
103#endif
104
105 // Setup output data 102 // Setup output data
106 OutputVertex ret; 103 OutputVertex ret;
107 // TODO(neobrain): Under some circumstances, up to 16 attributes may be output. We need to 104 // TODO(neobrain): Under some circumstances, up to 16 attributes may be output. We need to
@@ -132,14 +129,52 @@ OutputVertex Run(UnitState& state, const InputVertex& input, int num_attributes)
132 std::fmin(std::fabs(ret.color[i].ToFloat32()), 1.0f)); 129 std::fmin(std::fabs(ret.color[i].ToFloat32()), 1.0f));
133 } 130 }
134 131
135 LOG_TRACE(Render_Software, "Output vertex: pos (%.2f, %.2f, %.2f, %.2f), col(%.2f, %.2f, %.2f, %.2f), tc0(%.2f, %.2f)", 132 LOG_TRACE(Render_Software, "Output vertex: pos (%.2f, %.2f, %.2f, %.2f), quat (%.2f, %.2f, %.2f, %.2f), col(%.2f, %.2f, %.2f, %.2f), tc0(%.2f, %.2f)",
136 ret.pos.x.ToFloat32(), ret.pos.y.ToFloat32(), ret.pos.z.ToFloat32(), ret.pos.w.ToFloat32(), 133 ret.pos.x.ToFloat32(), ret.pos.y.ToFloat32(), ret.pos.z.ToFloat32(), ret.pos.w.ToFloat32(),
134 ret.quat.x.ToFloat32(), ret.quat.y.ToFloat32(), ret.quat.z.ToFloat32(), ret.quat.w.ToFloat32(),
137 ret.color.x.ToFloat32(), ret.color.y.ToFloat32(), ret.color.z.ToFloat32(), ret.color.w.ToFloat32(), 135 ret.color.x.ToFloat32(), ret.color.y.ToFloat32(), ret.color.z.ToFloat32(), ret.color.w.ToFloat32(),
138 ret.tc0.u().ToFloat32(), ret.tc0.v().ToFloat32()); 136 ret.tc0.u().ToFloat32(), ret.tc0.v().ToFloat32());
139 137
140 return ret; 138 return ret;
141} 139}
142 140
141DebugData<true> ProduceDebugInfo(const InputVertex& input, int num_attributes, const Regs::ShaderConfig& config, const State::ShaderSetup& setup) {
142 UnitState<true> state;
143
144 const auto& shader_memory = setup.program_code;
145 state.program_counter = config.main_offset;
146 state.debug.max_offset = 0;
147 state.debug.max_opdesc_id = 0;
148
149 // Setup input register table
150 const auto& attribute_register_map = config.input_register_map;
151 float24 dummy_register;
152 boost::fill(state.registers.input, &dummy_register);
153
154 if (num_attributes > 0) state.registers.input[attribute_register_map.attribute0_register] = &input.attr[0].x;
155 if (num_attributes > 1) state.registers.input[attribute_register_map.attribute1_register] = &input.attr[1].x;
156 if (num_attributes > 2) state.registers.input[attribute_register_map.attribute2_register] = &input.attr[2].x;
157 if (num_attributes > 3) state.registers.input[attribute_register_map.attribute3_register] = &input.attr[3].x;
158 if (num_attributes > 4) state.registers.input[attribute_register_map.attribute4_register] = &input.attr[4].x;
159 if (num_attributes > 5) state.registers.input[attribute_register_map.attribute5_register] = &input.attr[5].x;
160 if (num_attributes > 6) state.registers.input[attribute_register_map.attribute6_register] = &input.attr[6].x;
161 if (num_attributes > 7) state.registers.input[attribute_register_map.attribute7_register] = &input.attr[7].x;
162 if (num_attributes > 8) state.registers.input[attribute_register_map.attribute8_register] = &input.attr[8].x;
163 if (num_attributes > 9) state.registers.input[attribute_register_map.attribute9_register] = &input.attr[9].x;
164 if (num_attributes > 10) state.registers.input[attribute_register_map.attribute10_register] = &input.attr[10].x;
165 if (num_attributes > 11) state.registers.input[attribute_register_map.attribute11_register] = &input.attr[11].x;
166 if (num_attributes > 12) state.registers.input[attribute_register_map.attribute12_register] = &input.attr[12].x;
167 if (num_attributes > 13) state.registers.input[attribute_register_map.attribute13_register] = &input.attr[13].x;
168 if (num_attributes > 14) state.registers.input[attribute_register_map.attribute14_register] = &input.attr[14].x;
169 if (num_attributes > 15) state.registers.input[attribute_register_map.attribute15_register] = &input.attr[15].x;
170
171 state.conditional_code[0] = false;
172 state.conditional_code[1] = false;
173
174 RunInterpreter(state);
175 return state.debug;
176}
177
143} // namespace Shader 178} // namespace Shader
144 179
145} // namespace Pica 180} // namespace Pica
diff --git a/src/video_core/shader/shader.h b/src/video_core/shader/shader.h
index 2007a2844..bac51ddd8 100644
--- a/src/video_core/shader/shader.h
+++ b/src/video_core/shader/shader.h
@@ -4,7 +4,10 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <vector>
8
7#include <boost/container/static_vector.hpp> 9#include <boost/container/static_vector.hpp>
10
8#include <nihstro/shader_binary.h> 11#include <nihstro/shader_binary.h>
9 12
10#include "common/common_funcs.h" 13#include "common/common_funcs.h"
@@ -30,7 +33,7 @@ struct OutputVertex {
30 33
31 // VS output attributes 34 // VS output attributes
32 Math::Vec4<float24> pos; 35 Math::Vec4<float24> pos;
33 Math::Vec4<float24> dummy; // quaternions (not implemented, yet) 36 Math::Vec4<float24> quat;
34 Math::Vec4<float24> color; 37 Math::Vec4<float24> color;
35 Math::Vec2<float24> tc0; 38 Math::Vec2<float24> tc0;
36 Math::Vec2<float24> tc1; 39 Math::Vec2<float24> tc1;
@@ -72,12 +75,185 @@ struct OutputVertex {
72static_assert(std::is_pod<OutputVertex>::value, "Structure is not POD"); 75static_assert(std::is_pod<OutputVertex>::value, "Structure is not POD");
73static_assert(sizeof(OutputVertex) == 32 * sizeof(float), "OutputVertex has invalid size"); 76static_assert(sizeof(OutputVertex) == 32 * sizeof(float), "OutputVertex has invalid size");
74 77
78
79// Helper structure used to keep track of data useful for inspection of shader emulation
80template<bool full_debugging>
81struct DebugData;
82
83template<>
84struct DebugData<false> {
85 // TODO: Hide these behind and interface and move them to DebugData<true>
86 u32 max_offset; // maximum program counter ever reached
87 u32 max_opdesc_id; // maximum swizzle pattern index ever used
88};
89
90template<>
91struct DebugData<true> {
92 // Records store the input and output operands of a particular instruction.
93 struct Record {
94 enum Type {
95 // Floating point arithmetic operands
96 SRC1 = 0x1,
97 SRC2 = 0x2,
98 SRC3 = 0x4,
99
100 // Initial and final output operand value
101 DEST_IN = 0x8,
102 DEST_OUT = 0x10,
103
104 // Current and next instruction offset (in words)
105 CUR_INSTR = 0x20,
106 NEXT_INSTR = 0x40,
107
108 // Output address register value
109 ADDR_REG_OUT = 0x80,
110
111 // Result of a comparison instruction
112 CMP_RESULT = 0x100,
113
114 // Input values for conditional flow control instructions
115 COND_BOOL_IN = 0x200,
116 COND_CMP_IN = 0x400,
117
118 // Input values for a loop
119 LOOP_INT_IN = 0x800,
120 };
121
122 Math::Vec4<float24> src1;
123 Math::Vec4<float24> src2;
124 Math::Vec4<float24> src3;
125
126 Math::Vec4<float24> dest_in;
127 Math::Vec4<float24> dest_out;
128
129 s32 address_registers[2];
130 bool conditional_code[2];
131 bool cond_bool;
132 bool cond_cmp[2];
133 Math::Vec4<u8> loop_int;
134
135 u32 instruction_offset;
136 u32 next_instruction;
137
138 // set of enabled fields (as a combination of Type flags)
139 unsigned mask = 0;
140 };
141
142 u32 max_offset; // maximum program counter ever reached
143 u32 max_opdesc_id; // maximum swizzle pattern index ever used
144
145 // List of records for each executed shader instruction
146 std::vector<DebugData<true>::Record> records;
147};
148
149// Type alias for better readability
150using DebugDataRecord = DebugData<true>::Record;
151
152// Helper function to set a DebugData<true>::Record field based on the template enum parameter.
153template<DebugDataRecord::Type type, typename ValueType>
154inline void SetField(DebugDataRecord& record, ValueType value);
155
156template<>
157inline void SetField<DebugDataRecord::SRC1>(DebugDataRecord& record, float24* value) {
158 record.src1.x = value[0];
159 record.src1.y = value[1];
160 record.src1.z = value[2];
161 record.src1.w = value[3];
162}
163
164template<>
165inline void SetField<DebugDataRecord::SRC2>(DebugDataRecord& record, float24* value) {
166 record.src2.x = value[0];
167 record.src2.y = value[1];
168 record.src2.z = value[2];
169 record.src2.w = value[3];
170}
171
172template<>
173inline void SetField<DebugDataRecord::SRC3>(DebugDataRecord& record, float24* value) {
174 record.src3.x = value[0];
175 record.src3.y = value[1];
176 record.src3.z = value[2];
177 record.src3.w = value[3];
178}
179
180template<>
181inline void SetField<DebugDataRecord::DEST_IN>(DebugDataRecord& record, float24* value) {
182 record.dest_in.x = value[0];
183 record.dest_in.y = value[1];
184 record.dest_in.z = value[2];
185 record.dest_in.w = value[3];
186}
187
188template<>
189inline void SetField<DebugDataRecord::DEST_OUT>(DebugDataRecord& record, float24* value) {
190 record.dest_out.x = value[0];
191 record.dest_out.y = value[1];
192 record.dest_out.z = value[2];
193 record.dest_out.w = value[3];
194}
195
196template<>
197inline void SetField<DebugDataRecord::ADDR_REG_OUT>(DebugDataRecord& record, s32* value) {
198 record.address_registers[0] = value[0];
199 record.address_registers[1] = value[1];
200}
201
202template<>
203inline void SetField<DebugDataRecord::CMP_RESULT>(DebugDataRecord& record, bool* value) {
204 record.conditional_code[0] = value[0];
205 record.conditional_code[1] = value[1];
206}
207
208template<>
209inline void SetField<DebugDataRecord::COND_BOOL_IN>(DebugDataRecord& record, bool value) {
210 record.cond_bool = value;
211}
212
213template<>
214inline void SetField<DebugDataRecord::COND_CMP_IN>(DebugDataRecord& record, bool* value) {
215 record.cond_cmp[0] = value[0];
216 record.cond_cmp[1] = value[1];
217}
218
219template<>
220inline void SetField<DebugDataRecord::LOOP_INT_IN>(DebugDataRecord& record, Math::Vec4<u8> value) {
221 record.loop_int = value;
222}
223
224template<>
225inline void SetField<DebugDataRecord::CUR_INSTR>(DebugDataRecord& record, u32 value) {
226 record.instruction_offset = value;
227}
228
229template<>
230inline void SetField<DebugDataRecord::NEXT_INSTR>(DebugDataRecord& record, u32 value) {
231 record.next_instruction = value;
232}
233
234// Helper function to set debug information on the current shader iteration.
235template<DebugDataRecord::Type type, typename ValueType>
236inline void Record(DebugData<false>& debug_data, u32 offset, ValueType value) {
237 // Debugging disabled => nothing to do
238}
239
240template<DebugDataRecord::Type type, typename ValueType>
241inline void Record(DebugData<true>& debug_data, u32 offset, ValueType value) {
242 if (offset >= debug_data.records.size())
243 debug_data.records.resize(offset + 1);
244
245 SetField<type, ValueType>(debug_data.records[offset], value);
246 debug_data.records[offset].mask |= type;
247}
248
249
75/** 250/**
76 * This structure contains the state information that needs to be unique for a shader unit. The 3DS 251 * This structure contains the state information that needs to be unique for a shader unit. The 3DS
77 * has four shader units that process shaders in parallel. At the present, Citra only implements a 252 * has four shader units that process shaders in parallel. At the present, Citra only implements a
78 * single shader unit that processes all shaders serially. Putting the state information in a struct 253 * single shader unit that processes all shaders serially. Putting the state information in a struct
79 * here will make it easier for us to parallelize the shader processing later. 254 * here will make it easier for us to parallelize the shader processing later.
80 */ 255 */
256template<bool Debug>
81struct UnitState { 257struct UnitState {
82 struct Registers { 258 struct Registers {
83 // The registers are accessed by the shader JIT using SSE instructions, and are therefore 259 // The registers are accessed by the shader JIT using SSE instructions, and are therefore
@@ -111,10 +287,7 @@ struct UnitState {
111 // TODO: Is there a maximal size for this? 287 // TODO: Is there a maximal size for this?
112 boost::container::static_vector<CallStackElement, 16> call_stack; 288 boost::container::static_vector<CallStackElement, 16> call_stack;
113 289
114 struct { 290 DebugData<Debug> debug;
115 u32 max_offset; // maximum program counter ever reached
116 u32 max_opdesc_id; // maximum swizzle pattern index ever used
117 } debug;
118 291
119 static int InputOffset(const SourceRegister& reg) { 292 static int InputOffset(const SourceRegister& reg) {
120 switch (reg.GetRegisterType()) { 293 switch (reg.GetRegisterType()) {
@@ -150,7 +323,7 @@ struct UnitState {
150 * vertex, which would happen within the `Run` function). 323 * vertex, which would happen within the `Run` function).
151 * @param state Shader unit state, must be setup per shader and per shader unit 324 * @param state Shader unit state, must be setup per shader and per shader unit
152 */ 325 */
153void Setup(UnitState& state); 326void Setup(UnitState<false>& state);
154 327
155/// Performs any cleanup when the emulator is shutdown 328/// Performs any cleanup when the emulator is shutdown
156void Shutdown(); 329void Shutdown();
@@ -162,7 +335,17 @@ void Shutdown();
162 * @param num_attributes The number of vertex shader attributes 335 * @param num_attributes The number of vertex shader attributes
163 * @return The output vertex, after having been processed by the vertex shader 336 * @return The output vertex, after having been processed by the vertex shader
164 */ 337 */
165OutputVertex Run(UnitState& state, const InputVertex& input, int num_attributes); 338OutputVertex Run(UnitState<false>& state, const InputVertex& input, int num_attributes);
339
340/**
341 * Produce debug information based on the given shader and input vertex
342 * @param input Input vertex into the shader
343 * @param num_attributes The number of vertex shader attributes
344 * @param config Configuration object for the shader pipeline
345 * @param setup Setup object for the shader pipeline
346 * @return Debug information for this shader with regards to the given vertex
347 */
348DebugData<true> ProduceDebugInfo(const InputVertex& input, int num_attributes, const Regs::ShaderConfig& config, const State::ShaderSetup& setup);
166 349
167} // namespace Shader 350} // namespace Shader
168 351
diff --git a/src/video_core/shader/shader_interpreter.cpp b/src/video_core/shader/shader_interpreter.cpp
index c8489f920..e14de0768 100644
--- a/src/video_core/shader/shader_interpreter.cpp
+++ b/src/video_core/shader/shader_interpreter.cpp
@@ -21,7 +21,8 @@ namespace Pica {
21 21
22namespace Shader { 22namespace Shader {
23 23
24void RunInterpreter(UnitState& state) { 24template<bool Debug>
25void RunInterpreter(UnitState<Debug>& state) {
25 const auto& uniforms = g_state.vs.uniforms; 26 const auto& uniforms = g_state.vs.uniforms;
26 const auto& swizzle_data = g_state.vs.swizzle_data; 27 const auto& swizzle_data = g_state.vs.swizzle_data;
27 const auto& program_code = g_state.vs.program_code; 28 const auto& program_code = g_state.vs.program_code;
@@ -29,7 +30,9 @@ void RunInterpreter(UnitState& state) {
29 // Placeholder for invalid inputs 30 // Placeholder for invalid inputs
30 static float24 dummy_vec4_float24[4]; 31 static float24 dummy_vec4_float24[4];
31 32
32 while (true) { 33 unsigned iteration = 0;
34 bool exit_loop = false;
35 while (!exit_loop) {
33 if (!state.call_stack.empty()) { 36 if (!state.call_stack.empty()) {
34 auto& top = state.call_stack.back(); 37 auto& top = state.call_stack.back();
35 if (state.program_counter == top.final_address) { 38 if (state.program_counter == top.final_address) {
@@ -47,16 +50,19 @@ void RunInterpreter(UnitState& state) {
47 } 50 }
48 } 51 }
49 52
50 bool exit_loop = false;
51 const Instruction instr = { program_code[state.program_counter] }; 53 const Instruction instr = { program_code[state.program_counter] };
52 const SwizzlePattern swizzle = { swizzle_data[instr.common.operand_desc_id] }; 54 const SwizzlePattern swizzle = { swizzle_data[instr.common.operand_desc_id] };
53 55
54 static auto call = [](UnitState& state, u32 offset, u32 num_instructions, 56 static auto call = [](UnitState<Debug>& state, u32 offset, u32 num_instructions,
55 u32 return_offset, u8 repeat_count, u8 loop_increment) { 57 u32 return_offset, u8 repeat_count, u8 loop_increment) {
56 state.program_counter = offset - 1; // -1 to make sure when incrementing the PC we end up at the correct offset 58 state.program_counter = offset - 1; // -1 to make sure when incrementing the PC we end up at the correct offset
57 ASSERT(state.call_stack.size() < state.call_stack.capacity()); 59 ASSERT(state.call_stack.size() < state.call_stack.capacity());
58 state.call_stack.push_back({ offset + num_instructions, return_offset, repeat_count, loop_increment, offset }); 60 state.call_stack.push_back({ offset + num_instructions, return_offset, repeat_count, loop_increment, offset });
59 }; 61 };
62 Record<DebugDataRecord::CUR_INSTR>(state.debug, iteration, state.program_counter);
63 if (iteration > 0)
64 Record<DebugDataRecord::NEXT_INSTR>(state.debug, iteration - 1, state.program_counter);
65
60 state.debug.max_offset = std::max<u32>(state.debug.max_offset, 1 + state.program_counter); 66 state.debug.max_offset = std::max<u32>(state.debug.max_offset, 1 + state.program_counter);
61 67
62 auto LookupSourceRegister = [&](const SourceRegister& source_reg) -> const float24* { 68 auto LookupSourceRegister = [&](const SourceRegister& source_reg) -> const float24* {
@@ -123,58 +129,78 @@ void RunInterpreter(UnitState& state) {
123 switch (instr.opcode.Value().EffectiveOpCode()) { 129 switch (instr.opcode.Value().EffectiveOpCode()) {
124 case OpCode::Id::ADD: 130 case OpCode::Id::ADD:
125 { 131 {
132 Record<DebugDataRecord::SRC1>(state.debug, iteration, src1);
133 Record<DebugDataRecord::SRC2>(state.debug, iteration, src2);
134 Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest);
126 for (int i = 0; i < 4; ++i) { 135 for (int i = 0; i < 4; ++i) {
127 if (!swizzle.DestComponentEnabled(i)) 136 if (!swizzle.DestComponentEnabled(i))
128 continue; 137 continue;
129 138
130 dest[i] = src1[i] + src2[i]; 139 dest[i] = src1[i] + src2[i];
131 } 140 }
132 141 Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest);
133 break; 142 break;
134 } 143 }
135 144
136 case OpCode::Id::MUL: 145 case OpCode::Id::MUL:
137 { 146 {
147 Record<DebugDataRecord::SRC1>(state.debug, iteration, src1);
148 Record<DebugDataRecord::SRC2>(state.debug, iteration, src2);
149 Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest);
138 for (int i = 0; i < 4; ++i) { 150 for (int i = 0; i < 4; ++i) {
139 if (!swizzle.DestComponentEnabled(i)) 151 if (!swizzle.DestComponentEnabled(i))
140 continue; 152 continue;
141 153
142 dest[i] = src1[i] * src2[i]; 154 dest[i] = src1[i] * src2[i];
143 } 155 }
144 156 Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest);
145 break; 157 break;
146 } 158 }
147 159
148 case OpCode::Id::FLR: 160 case OpCode::Id::FLR:
161 Record<DebugDataRecord::SRC1>(state.debug, iteration, src1);
162 Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest);
149 for (int i = 0; i < 4; ++i) { 163 for (int i = 0; i < 4; ++i) {
150 if (!swizzle.DestComponentEnabled(i)) 164 if (!swizzle.DestComponentEnabled(i))
151 continue; 165 continue;
152 166
153 dest[i] = float24::FromFloat32(std::floor(src1[i].ToFloat32())); 167 dest[i] = float24::FromFloat32(std::floor(src1[i].ToFloat32()));
154 } 168 }
169 Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest);
155 break; 170 break;
156 171
157 case OpCode::Id::MAX: 172 case OpCode::Id::MAX:
173 Record<DebugDataRecord::SRC1>(state.debug, iteration, src1);
174 Record<DebugDataRecord::SRC2>(state.debug, iteration, src2);
175 Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest);
158 for (int i = 0; i < 4; ++i) { 176 for (int i = 0; i < 4; ++i) {
159 if (!swizzle.DestComponentEnabled(i)) 177 if (!swizzle.DestComponentEnabled(i))
160 continue; 178 continue;
161 179
162 dest[i] = std::max(src1[i], src2[i]); 180 dest[i] = std::max(src1[i], src2[i]);
163 } 181 }
182 Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest);
164 break; 183 break;
165 184
166 case OpCode::Id::MIN: 185 case OpCode::Id::MIN:
186 Record<DebugDataRecord::SRC1>(state.debug, iteration, src1);
187 Record<DebugDataRecord::SRC2>(state.debug, iteration, src2);
188 Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest);
167 for (int i = 0; i < 4; ++i) { 189 for (int i = 0; i < 4; ++i) {
168 if (!swizzle.DestComponentEnabled(i)) 190 if (!swizzle.DestComponentEnabled(i))
169 continue; 191 continue;
170 192
171 dest[i] = std::min(src1[i], src2[i]); 193 dest[i] = std::min(src1[i], src2[i]);
172 } 194 }
195 Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest);
173 break; 196 break;
174 197
175 case OpCode::Id::DP3: 198 case OpCode::Id::DP3:
176 case OpCode::Id::DP4: 199 case OpCode::Id::DP4:
177 { 200 {
201 Record<DebugDataRecord::SRC1>(state.debug, iteration, src1);
202 Record<DebugDataRecord::SRC2>(state.debug, iteration, src2);
203 Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest);
178 float24 dot = float24::FromFloat32(0.f); 204 float24 dot = float24::FromFloat32(0.f);
179 int num_components = (instr.opcode.Value() == OpCode::Id::DP3) ? 3 : 4; 205 int num_components = (instr.opcode.Value() == OpCode::Id::DP3) ? 3 : 4;
180 for (int i = 0; i < num_components; ++i) 206 for (int i = 0; i < num_components; ++i)
@@ -186,12 +212,15 @@ void RunInterpreter(UnitState& state) {
186 212
187 dest[i] = dot; 213 dest[i] = dot;
188 } 214 }
215 Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest);
189 break; 216 break;
190 } 217 }
191 218
192 // Reciprocal 219 // Reciprocal
193 case OpCode::Id::RCP: 220 case OpCode::Id::RCP:
194 { 221 {
222 Record<DebugDataRecord::SRC1>(state.debug, iteration, src1);
223 Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest);
195 for (int i = 0; i < 4; ++i) { 224 for (int i = 0; i < 4; ++i) {
196 if (!swizzle.DestComponentEnabled(i)) 225 if (!swizzle.DestComponentEnabled(i))
197 continue; 226 continue;
@@ -200,13 +229,15 @@ void RunInterpreter(UnitState& state) {
200 // TODO: I think this might be wrong... we should only use one component here 229 // TODO: I think this might be wrong... we should only use one component here
201 dest[i] = float24::FromFloat32(1.0f / src1[i].ToFloat32()); 230 dest[i] = float24::FromFloat32(1.0f / src1[i].ToFloat32());
202 } 231 }
203 232 Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest);
204 break; 233 break;
205 } 234 }
206 235
207 // Reciprocal Square Root 236 // Reciprocal Square Root
208 case OpCode::Id::RSQ: 237 case OpCode::Id::RSQ:
209 { 238 {
239 Record<DebugDataRecord::SRC1>(state.debug, iteration, src1);
240 Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest);
210 for (int i = 0; i < 4; ++i) { 241 for (int i = 0; i < 4; ++i) {
211 if (!swizzle.DestComponentEnabled(i)) 242 if (!swizzle.DestComponentEnabled(i))
212 continue; 243 continue;
@@ -215,12 +246,13 @@ void RunInterpreter(UnitState& state) {
215 // TODO: I think this might be wrong... we should only use one component here 246 // TODO: I think this might be wrong... we should only use one component here
216 dest[i] = float24::FromFloat32(1.0f / sqrt(src1[i].ToFloat32())); 247 dest[i] = float24::FromFloat32(1.0f / sqrt(src1[i].ToFloat32()));
217 } 248 }
218 249 Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest);
219 break; 250 break;
220 } 251 }
221 252
222 case OpCode::Id::MOVA: 253 case OpCode::Id::MOVA:
223 { 254 {
255 Record<DebugDataRecord::SRC1>(state.debug, iteration, src1);
224 for (int i = 0; i < 2; ++i) { 256 for (int i = 0; i < 2; ++i) {
225 if (!swizzle.DestComponentEnabled(i)) 257 if (!swizzle.DestComponentEnabled(i))
226 continue; 258 continue;
@@ -228,32 +260,41 @@ void RunInterpreter(UnitState& state) {
228 // TODO: Figure out how the rounding is done on hardware 260 // TODO: Figure out how the rounding is done on hardware
229 state.address_registers[i] = static_cast<s32>(src1[i].ToFloat32()); 261 state.address_registers[i] = static_cast<s32>(src1[i].ToFloat32());
230 } 262 }
231 263 Record<DebugDataRecord::ADDR_REG_OUT>(state.debug, iteration, state.address_registers);
232 break; 264 break;
233 } 265 }
234 266
235 case OpCode::Id::MOV: 267 case OpCode::Id::MOV:
236 { 268 {
269 Record<DebugDataRecord::SRC1>(state.debug, iteration, src1);
270 Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest);
237 for (int i = 0; i < 4; ++i) { 271 for (int i = 0; i < 4; ++i) {
238 if (!swizzle.DestComponentEnabled(i)) 272 if (!swizzle.DestComponentEnabled(i))
239 continue; 273 continue;
240 274
241 dest[i] = src1[i]; 275 dest[i] = src1[i];
242 } 276 }
277 Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest);
243 break; 278 break;
244 } 279 }
245 280
246 case OpCode::Id::SLT: 281 case OpCode::Id::SLT:
247 case OpCode::Id::SLTI: 282 case OpCode::Id::SLTI:
283 Record<DebugDataRecord::SRC1>(state.debug, iteration, src1);
284 Record<DebugDataRecord::SRC2>(state.debug, iteration, src2);
285 Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest);
248 for (int i = 0; i < 4; ++i) { 286 for (int i = 0; i < 4; ++i) {
249 if (!swizzle.DestComponentEnabled(i)) 287 if (!swizzle.DestComponentEnabled(i))
250 continue; 288 continue;
251 289
252 dest[i] = (src1[i] < src2[i]) ? float24::FromFloat32(1.0f) : float24::FromFloat32(0.0f); 290 dest[i] = (src1[i] < src2[i]) ? float24::FromFloat32(1.0f) : float24::FromFloat32(0.0f);
253 } 291 }
292 Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest);
254 break; 293 break;
255 294
256 case OpCode::Id::CMP: 295 case OpCode::Id::CMP:
296 Record<DebugDataRecord::SRC1>(state.debug, iteration, src1);
297 Record<DebugDataRecord::SRC2>(state.debug, iteration, src2);
257 for (int i = 0; i < 2; ++i) { 298 for (int i = 0; i < 2; ++i) {
258 // TODO: Can you restrict to one compare via dest masking? 299 // TODO: Can you restrict to one compare via dest masking?
259 300
@@ -261,27 +302,27 @@ void RunInterpreter(UnitState& state) {
261 auto op = (i == 0) ? compare_op.x.Value() : compare_op.y.Value(); 302 auto op = (i == 0) ? compare_op.x.Value() : compare_op.y.Value();
262 303
263 switch (op) { 304 switch (op) {
264 case compare_op.Equal: 305 case Instruction::Common::CompareOpType::Equal:
265 state.conditional_code[i] = (src1[i] == src2[i]); 306 state.conditional_code[i] = (src1[i] == src2[i]);
266 break; 307 break;
267 308
268 case compare_op.NotEqual: 309 case Instruction::Common::CompareOpType::NotEqual:
269 state.conditional_code[i] = (src1[i] != src2[i]); 310 state.conditional_code[i] = (src1[i] != src2[i]);
270 break; 311 break;
271 312
272 case compare_op.LessThan: 313 case Instruction::Common::CompareOpType::LessThan:
273 state.conditional_code[i] = (src1[i] < src2[i]); 314 state.conditional_code[i] = (src1[i] < src2[i]);
274 break; 315 break;
275 316
276 case compare_op.LessEqual: 317 case Instruction::Common::CompareOpType::LessEqual:
277 state.conditional_code[i] = (src1[i] <= src2[i]); 318 state.conditional_code[i] = (src1[i] <= src2[i]);
278 break; 319 break;
279 320
280 case compare_op.GreaterThan: 321 case Instruction::Common::CompareOpType::GreaterThan:
281 state.conditional_code[i] = (src1[i] > src2[i]); 322 state.conditional_code[i] = (src1[i] > src2[i]);
282 break; 323 break;
283 324
284 case compare_op.GreaterEqual: 325 case Instruction::Common::CompareOpType::GreaterEqual:
285 state.conditional_code[i] = (src1[i] >= src2[i]); 326 state.conditional_code[i] = (src1[i] >= src2[i]);
286 break; 327 break;
287 328
@@ -290,6 +331,7 @@ void RunInterpreter(UnitState& state) {
290 break; 331 break;
291 } 332 }
292 } 333 }
334 Record<DebugDataRecord::CMP_RESULT>(state.debug, iteration, state.conditional_code);
293 break; 335 break;
294 336
295 default: 337 default:
@@ -359,12 +401,17 @@ void RunInterpreter(UnitState& state) {
359 : (instr.mad.dest.Value() < 0x20) ? &state.registers.temporary[instr.mad.dest.Value().GetIndex()][0] 401 : (instr.mad.dest.Value() < 0x20) ? &state.registers.temporary[instr.mad.dest.Value().GetIndex()][0]
360 : dummy_vec4_float24; 402 : dummy_vec4_float24;
361 403
404 Record<DebugDataRecord::SRC1>(state.debug, iteration, src1);
405 Record<DebugDataRecord::SRC2>(state.debug, iteration, src2);
406 Record<DebugDataRecord::SRC3>(state.debug, iteration, src3);
407 Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest);
362 for (int i = 0; i < 4; ++i) { 408 for (int i = 0; i < 4; ++i) {
363 if (!swizzle.DestComponentEnabled(i)) 409 if (!swizzle.DestComponentEnabled(i))
364 continue; 410 continue;
365 411
366 dest[i] = src1[i] * src2[i] + src3[i]; 412 dest[i] = src1[i] * src2[i] + src3[i];
367 } 413 }
414 Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest);
368 } else { 415 } else {
369 LOG_ERROR(HW_GPU, "Unhandled multiply-add instruction: 0x%02x (%s): 0x%08x", 416 LOG_ERROR(HW_GPU, "Unhandled multiply-add instruction: 0x%02x (%s): 0x%08x",
370 (int)instr.opcode.Value().EffectiveOpCode(), instr.opcode.Value().GetInfo().name, instr.hex); 417 (int)instr.opcode.Value().EffectiveOpCode(), instr.opcode.Value().GetInfo().name, instr.hex);
@@ -374,7 +421,7 @@ void RunInterpreter(UnitState& state) {
374 421
375 default: 422 default:
376 { 423 {
377 static auto evaluate_condition = [](const UnitState& state, bool refx, bool refy, Instruction::FlowControlType flow_control) { 424 static auto evaluate_condition = [](const UnitState<Debug>& state, bool refx, bool refy, Instruction::FlowControlType flow_control) {
378 bool results[2] = { refx == state.conditional_code[0], 425 bool results[2] = { refx == state.conditional_code[0],
379 refy == state.conditional_code[1] }; 426 refy == state.conditional_code[1] };
380 427
@@ -400,12 +447,14 @@ void RunInterpreter(UnitState& state) {
400 break; 447 break;
401 448
402 case OpCode::Id::JMPC: 449 case OpCode::Id::JMPC:
450 Record<DebugDataRecord::COND_CMP_IN>(state.debug, iteration, state.conditional_code);
403 if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, instr.flow_control)) { 451 if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, instr.flow_control)) {
404 state.program_counter = instr.flow_control.dest_offset - 1; 452 state.program_counter = instr.flow_control.dest_offset - 1;
405 } 453 }
406 break; 454 break;
407 455
408 case OpCode::Id::JMPU: 456 case OpCode::Id::JMPU:
457 Record<DebugDataRecord::COND_BOOL_IN>(state.debug, iteration, uniforms.b[instr.flow_control.bool_uniform_id]);
409 if (uniforms.b[instr.flow_control.bool_uniform_id]) { 458 if (uniforms.b[instr.flow_control.bool_uniform_id]) {
410 state.program_counter = instr.flow_control.dest_offset - 1; 459 state.program_counter = instr.flow_control.dest_offset - 1;
411 } 460 }
@@ -419,6 +468,7 @@ void RunInterpreter(UnitState& state) {
419 break; 468 break;
420 469
421 case OpCode::Id::CALLU: 470 case OpCode::Id::CALLU:
471 Record<DebugDataRecord::COND_BOOL_IN>(state.debug, iteration, uniforms.b[instr.flow_control.bool_uniform_id]);
422 if (uniforms.b[instr.flow_control.bool_uniform_id]) { 472 if (uniforms.b[instr.flow_control.bool_uniform_id]) {
423 call(state, 473 call(state,
424 instr.flow_control.dest_offset, 474 instr.flow_control.dest_offset,
@@ -428,6 +478,7 @@ void RunInterpreter(UnitState& state) {
428 break; 478 break;
429 479
430 case OpCode::Id::CALLC: 480 case OpCode::Id::CALLC:
481 Record<DebugDataRecord::COND_CMP_IN>(state.debug, iteration, state.conditional_code);
431 if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, instr.flow_control)) { 482 if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, instr.flow_control)) {
432 call(state, 483 call(state,
433 instr.flow_control.dest_offset, 484 instr.flow_control.dest_offset,
@@ -440,6 +491,7 @@ void RunInterpreter(UnitState& state) {
440 break; 491 break;
441 492
442 case OpCode::Id::IFU: 493 case OpCode::Id::IFU:
494 Record<DebugDataRecord::COND_BOOL_IN>(state.debug, iteration, uniforms.b[instr.flow_control.bool_uniform_id]);
443 if (uniforms.b[instr.flow_control.bool_uniform_id]) { 495 if (uniforms.b[instr.flow_control.bool_uniform_id]) {
444 call(state, 496 call(state,
445 state.program_counter + 1, 497 state.program_counter + 1,
@@ -458,6 +510,7 @@ void RunInterpreter(UnitState& state) {
458 { 510 {
459 // TODO: Do we need to consider swizzlers here? 511 // TODO: Do we need to consider swizzlers here?
460 512
513 Record<DebugDataRecord::COND_CMP_IN>(state.debug, iteration, state.conditional_code);
461 if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, instr.flow_control)) { 514 if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, instr.flow_control)) {
462 call(state, 515 call(state,
463 state.program_counter + 1, 516 state.program_counter + 1,
@@ -475,14 +528,19 @@ void RunInterpreter(UnitState& state) {
475 528
476 case OpCode::Id::LOOP: 529 case OpCode::Id::LOOP:
477 { 530 {
478 state.address_registers[2] = uniforms.i[instr.flow_control.int_uniform_id].y; 531 Math::Vec4<u8> loop_param(uniforms.i[instr.flow_control.int_uniform_id].x,
532 uniforms.i[instr.flow_control.int_uniform_id].y,
533 uniforms.i[instr.flow_control.int_uniform_id].z,
534 uniforms.i[instr.flow_control.int_uniform_id].w);
535 state.address_registers[2] = loop_param.y;
479 536
537 Record<DebugDataRecord::LOOP_INT_IN>(state.debug, iteration, loop_param);
480 call(state, 538 call(state,
481 state.program_counter + 1, 539 state.program_counter + 1,
482 instr.flow_control.dest_offset - state.program_counter + 1, 540 instr.flow_control.dest_offset - state.program_counter + 1,
483 instr.flow_control.dest_offset + 1, 541 instr.flow_control.dest_offset + 1,
484 uniforms.i[instr.flow_control.int_uniform_id].x, 542 loop_param.x,
485 uniforms.i[instr.flow_control.int_uniform_id].z); 543 loop_param.z);
486 break; 544 break;
487 } 545 }
488 546
@@ -497,12 +555,14 @@ void RunInterpreter(UnitState& state) {
497 } 555 }
498 556
499 ++state.program_counter; 557 ++state.program_counter;
500 558 ++iteration;
501 if (exit_loop)
502 break;
503 } 559 }
504} 560}
505 561
562// Explicit instantiation
563template void RunInterpreter(UnitState<false>& state);
564template void RunInterpreter(UnitState<true>& state);
565
506} // namespace 566} // namespace
507 567
508} // namespace 568} // namespace
diff --git a/src/video_core/shader/shader_interpreter.h b/src/video_core/shader/shader_interpreter.h
index ad6e58e39..71bcad5ac 100644
--- a/src/video_core/shader/shader_interpreter.h
+++ b/src/video_core/shader/shader_interpreter.h
@@ -12,7 +12,8 @@ namespace Pica {
12 12
13namespace Shader { 13namespace Shader {
14 14
15void RunInterpreter(UnitState& state); 15template<bool Debug>
16void RunInterpreter(UnitState<Debug>& state);
16 17
17} // namespace 18} // namespace
18 19
diff --git a/src/video_core/shader/shader_jit_x64.cpp b/src/video_core/shader/shader_jit_x64.cpp
index ce47774d5..836942c6b 100644
--- a/src/video_core/shader/shader_jit_x64.cpp
+++ b/src/video_core/shader/shader_jit_x64.cpp
@@ -141,7 +141,7 @@ void JitCompiler::Compile_SwizzleSrc(Instruction instr, unsigned src_num, Source
141 src_offset = src_reg.GetIndex() * sizeof(float24) * 4; 141 src_offset = src_reg.GetIndex() * sizeof(float24) * 4;
142 } else { 142 } else {
143 src_ptr = REGISTERS; 143 src_ptr = REGISTERS;
144 src_offset = UnitState::InputOffset(src_reg); 144 src_offset = UnitState<false>::InputOffset(src_reg);
145 } 145 }
146 146
147 unsigned operand_desc_id; 147 unsigned operand_desc_id;
@@ -217,11 +217,11 @@ void JitCompiler::Compile_DestEnable(Instruction instr,X64Reg src) {
217 // If all components are enabled, write the result to the destination register 217 // If all components are enabled, write the result to the destination register
218 if (swiz.dest_mask == NO_DEST_REG_MASK) { 218 if (swiz.dest_mask == NO_DEST_REG_MASK) {
219 // Store dest back to memory 219 // Store dest back to memory
220 MOVAPS(MDisp(REGISTERS, UnitState::OutputOffset(dest)), src); 220 MOVAPS(MDisp(REGISTERS, UnitState<false>::OutputOffset(dest)), src);
221 221
222 } else { 222 } else {
223 // Not all components are enabled, so mask the result when storing to the destination register... 223 // Not all components are enabled, so mask the result when storing to the destination register...
224 MOVAPS(SCRATCH, MDisp(REGISTERS, UnitState::OutputOffset(dest))); 224 MOVAPS(SCRATCH, MDisp(REGISTERS, UnitState<false>::OutputOffset(dest)));
225 225
226 if (Common::GetCPUCaps().sse4_1) { 226 if (Common::GetCPUCaps().sse4_1) {
227 u8 mask = ((swiz.dest_mask & 1) << 3) | ((swiz.dest_mask & 8) >> 3) | ((swiz.dest_mask & 2) << 1) | ((swiz.dest_mask & 4) >> 1); 227 u8 mask = ((swiz.dest_mask & 1) << 3) | ((swiz.dest_mask & 8) >> 3) | ((swiz.dest_mask & 2) << 1) | ((swiz.dest_mask & 4) >> 1);
@@ -240,7 +240,7 @@ void JitCompiler::Compile_DestEnable(Instruction instr,X64Reg src) {
240 } 240 }
241 241
242 // Store dest back to memory 242 // Store dest back to memory
243 MOVAPS(MDisp(REGISTERS, UnitState::OutputOffset(dest)), SCRATCH); 243 MOVAPS(MDisp(REGISTERS, UnitState<false>::OutputOffset(dest)), SCRATCH);
244 } 244 }
245} 245}
246 246