diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/citra_qt/debugger/graphics_vertex_shader.cpp | 251 | ||||
| -rw-r--r-- | src/citra_qt/debugger/graphics_vertex_shader.h | 52 | ||||
| -rw-r--r-- | src/video_core/command_processor.cpp | 2 | ||||
| -rw-r--r-- | src/video_core/debug_utils/debug_utils.cpp | 3 | ||||
| -rw-r--r-- | src/video_core/debug_utils/debug_utils.h | 3 | ||||
| -rw-r--r-- | src/video_core/shader/shader.cpp | 49 | ||||
| -rw-r--r-- | src/video_core/shader/shader.h | 195 | ||||
| -rw-r--r-- | src/video_core/shader/shader_interpreter.cpp | 104 | ||||
| -rw-r--r-- | src/video_core/shader/shader_interpreter.h | 3 | ||||
| -rw-r--r-- | src/video_core/shader/shader_jit_x64.cpp | 8 |
10 files changed, 587 insertions, 83 deletions
diff --git a/src/citra_qt/debugger/graphics_vertex_shader.cpp b/src/citra_qt/debugger/graphics_vertex_shader.cpp index b1657620e..359193226 100644 --- a/src/citra_qt/debugger/graphics_vertex_shader.cpp +++ b/src/citra_qt/debugger/graphics_vertex_shader.cpp | |||
| @@ -6,11 +6,16 @@ | |||
| 6 | #include <sstream> | 6 | #include <sstream> |
| 7 | 7 | ||
| 8 | #include <QBoxLayout> | 8 | #include <QBoxLayout> |
| 9 | #include <QFileDialog> | ||
| 10 | #include <QGroupBox> | ||
| 9 | #include <QLabel> | 11 | #include <QLabel> |
| 12 | #include <QLineEdit> | ||
| 10 | #include <QPushButton> | 13 | #include <QPushButton> |
| 14 | #include <QSignalMapper> | ||
| 15 | #include <QSpinBox> | ||
| 11 | #include <QTreeView> | 16 | #include <QTreeView> |
| 12 | 17 | ||
| 13 | #include "video_core/shader/shader_interpreter.h" | 18 | #include "video_core/shader/shader.h" |
| 14 | 19 | ||
| 15 | #include "graphics_vertex_shader.h" | 20 | #include "graphics_vertex_shader.h" |
| 16 | 21 | ||
| @@ -19,7 +24,7 @@ using nihstro::Instruction; | |||
| 19 | using nihstro::SourceRegister; | 24 | using nihstro::SourceRegister; |
| 20 | using nihstro::SwizzlePattern; | 25 | using nihstro::SwizzlePattern; |
| 21 | 26 | ||
| 22 | GraphicsVertexShaderModel::GraphicsVertexShaderModel(QObject* parent): QAbstractItemModel(parent) { | 27 | GraphicsVertexShaderModel::GraphicsVertexShaderModel(GraphicsVertexShaderWidget* parent): QAbstractItemModel(parent), par(parent) { |
| 23 | 28 | ||
| 24 | } | 29 | } |
| 25 | 30 | ||
| @@ -36,7 +41,7 @@ int GraphicsVertexShaderModel::columnCount(const QModelIndex& parent) const { | |||
| 36 | } | 41 | } |
| 37 | 42 | ||
| 38 | int GraphicsVertexShaderModel::rowCount(const QModelIndex& parent) const { | 43 | int GraphicsVertexShaderModel::rowCount(const QModelIndex& parent) const { |
| 39 | return static_cast<int>(info.code.size()); | 44 | return static_cast<int>(par->info.code.size()); |
| 40 | } | 45 | } |
| 41 | 46 | ||
| 42 | QVariant GraphicsVertexShaderModel::headerData(int section, Qt::Orientation orientation, int role) const { | 47 | QVariant GraphicsVertexShaderModel::headerData(int section, Qt::Orientation orientation, int role) const { |
| @@ -64,21 +69,21 @@ QVariant GraphicsVertexShaderModel::data(const QModelIndex& index, int role) con | |||
| 64 | { | 69 | { |
| 65 | switch (index.column()) { | 70 | switch (index.column()) { |
| 66 | case 0: | 71 | case 0: |
| 67 | if (info.HasLabel(index.row())) | 72 | if (par->info.HasLabel(index.row())) |
| 68 | return QString::fromStdString(info.GetLabel(index.row())); | 73 | return QString::fromStdString(par->info.GetLabel(index.row())); |
| 69 | 74 | ||
| 70 | return QString("%1").arg(4*index.row(), 4, 16, QLatin1Char('0')); | 75 | return QString("%1").arg(4*index.row(), 4, 16, QLatin1Char('0')); |
| 71 | 76 | ||
| 72 | case 1: | 77 | case 1: |
| 73 | return QString("%1").arg(info.code[index.row()].hex, 8, 16, QLatin1Char('0')); | 78 | return QString("%1").arg(par->info.code[index.row()].hex, 8, 16, QLatin1Char('0')); |
| 74 | 79 | ||
| 75 | case 2: | 80 | case 2: |
| 76 | { | 81 | { |
| 77 | std::stringstream output; | 82 | std::stringstream output; |
| 78 | output.flags(std::ios::hex); | 83 | output.flags(std::ios::hex); |
| 79 | 84 | ||
| 80 | Instruction instr = info.code[index.row()]; | 85 | Instruction instr = par->info.code[index.row()]; |
| 81 | const SwizzlePattern& swizzle = info.swizzle_info[instr.common.operand_desc_id].pattern; | 86 | const SwizzlePattern& swizzle = par->info.swizzle_info[instr.common.operand_desc_id].pattern; |
| 82 | 87 | ||
| 83 | // longest known instruction name: "setemit " | 88 | // longest known instruction name: "setemit " |
| 84 | output << std::setw(8) << std::left << instr.opcode.Value().GetInfo().name; | 89 | output << std::setw(8) << std::left << instr.opcode.Value().GetInfo().name; |
| @@ -242,6 +247,18 @@ QVariant GraphicsVertexShaderModel::data(const QModelIndex& index, int role) con | |||
| 242 | case Qt::FontRole: | 247 | case Qt::FontRole: |
| 243 | return QFont("monospace"); | 248 | return QFont("monospace"); |
| 244 | 249 | ||
| 250 | case Qt::BackgroundRole: | ||
| 251 | // Highlight instructions which have no debug data associated to them | ||
| 252 | for (const auto& record : par->debug_data.records) | ||
| 253 | if (index.row() == record.instruction_offset) | ||
| 254 | return QVariant(); | ||
| 255 | |||
| 256 | return QBrush(QColor(255, 255, 127)); | ||
| 257 | |||
| 258 | |||
| 259 | // TODO: Draw arrows for each "reachable" instruction to visualize control flow | ||
| 260 | |||
| 261 | |||
| 245 | default: | 262 | default: |
| 246 | break; | 263 | break; |
| 247 | } | 264 | } |
| @@ -249,30 +266,19 @@ QVariant GraphicsVertexShaderModel::data(const QModelIndex& index, int role) con | |||
| 249 | return QVariant(); | 266 | return QVariant(); |
| 250 | } | 267 | } |
| 251 | 268 | ||
| 252 | void GraphicsVertexShaderModel::OnUpdate() | 269 | void GraphicsVertexShaderWidget::DumpShader() { |
| 253 | { | 270 | QString filename = QFileDialog::getSaveFileName(this, tr("Save Shader Dump"), "shader_dump.shbin", |
| 254 | beginResetModel(); | 271 | tr("Shader Binary (*.shbin)")); |
| 255 | |||
| 256 | info.Clear(); | ||
| 257 | |||
| 258 | auto& shader_setup = Pica::g_state.vs; | ||
| 259 | for (auto instr : shader_setup.program_code) | ||
| 260 | info.code.push_back({instr}); | ||
| 261 | |||
| 262 | for (auto pattern : shader_setup.swizzle_data) | ||
| 263 | info.swizzle_info.push_back({pattern}); | ||
| 264 | |||
| 265 | u32 entry_point = Pica::g_state.regs.vs.main_offset; | ||
| 266 | info.labels.insert({ entry_point, "main" }); | ||
| 267 | 272 | ||
| 268 | endResetModel(); | 273 | if (filename.isEmpty()) { |
| 269 | } | 274 | // If the user canceled the dialog, don't dump anything. |
| 275 | return; | ||
| 276 | } | ||
| 270 | 277 | ||
| 271 | void GraphicsVertexShaderModel::DumpShader() { | ||
| 272 | auto& setup = Pica::g_state.vs; | 278 | auto& setup = Pica::g_state.vs; |
| 273 | auto& config = Pica::g_state.regs.vs; | 279 | auto& config = Pica::g_state.regs.vs; |
| 274 | 280 | ||
| 275 | Pica::DebugUtils::DumpShader(config, setup, Pica::g_state.regs.vs_output_attributes); | 281 | Pica::DebugUtils::DumpShader(filename.toStdString(), config, setup, Pica::g_state.regs.vs_output_attributes); |
| 276 | } | 282 | } |
| 277 | 283 | ||
| 278 | GraphicsVertexShaderWidget::GraphicsVertexShaderWidget(std::shared_ptr< Pica::DebugContext > debug_context, | 284 | GraphicsVertexShaderWidget::GraphicsVertexShaderWidget(std::shared_ptr< Pica::DebugContext > debug_context, |
| @@ -280,34 +286,211 @@ GraphicsVertexShaderWidget::GraphicsVertexShaderWidget(std::shared_ptr< Pica::De | |||
| 280 | : BreakPointObserverDock(debug_context, "Pica Vertex Shader", parent) { | 286 | : BreakPointObserverDock(debug_context, "Pica Vertex Shader", parent) { |
| 281 | setObjectName("PicaVertexShader"); | 287 | setObjectName("PicaVertexShader"); |
| 282 | 288 | ||
| 283 | auto binary_model = new GraphicsVertexShaderModel(this); | 289 | auto input_data_mapper = new QSignalMapper(this); |
| 284 | auto binary_list = new QTreeView; | 290 | |
| 285 | binary_list->setModel(binary_model); | 291 | // TODO: Support inputting data in hexadecimal raw format |
| 292 | for (unsigned i = 0; i < ARRAY_SIZE(input_data); ++i) { | ||
| 293 | input_data[i] = new QLineEdit; | ||
| 294 | input_data[i]->setValidator(new QDoubleValidator(input_data[i])); | ||
| 295 | } | ||
| 296 | |||
| 297 | breakpoint_warning = new QLabel(tr("(data only available at VertexLoaded breakpoints)")); | ||
| 298 | |||
| 299 | // TODO: Add some button for jumping to the shader entry point | ||
| 300 | |||
| 301 | model = new GraphicsVertexShaderModel(this); | ||
| 302 | binary_list = new QTreeView; | ||
| 303 | binary_list->setModel(model); | ||
| 286 | binary_list->setRootIsDecorated(false); | 304 | binary_list->setRootIsDecorated(false); |
| 287 | binary_list->setAlternatingRowColors(true); | 305 | binary_list->setAlternatingRowColors(true); |
| 288 | 306 | ||
| 289 | auto dump_shader = new QPushButton(tr("Dump")); | 307 | auto dump_shader = new QPushButton(QIcon::fromTheme("document-save"), tr("Dump")); |
| 308 | |||
| 309 | instruction_description = new QLabel; | ||
| 290 | 310 | ||
| 291 | connect(dump_shader, SIGNAL(clicked()), binary_model, SLOT(DumpShader())); | 311 | iteration_index = new QSpinBox; |
| 292 | connect(this, SIGNAL(Update()), binary_model, SLOT(OnUpdate())); | 312 | |
| 313 | connect(this, SIGNAL(SelectCommand(const QModelIndex&, QItemSelectionModel::SelectionFlags)), | ||
| 314 | binary_list->selectionModel(), SLOT(select(const QModelIndex&, QItemSelectionModel::SelectionFlags))); | ||
| 315 | |||
| 316 | connect(dump_shader, SIGNAL(clicked()), this, SLOT(DumpShader())); | ||
| 317 | |||
| 318 | connect(iteration_index, SIGNAL(valueChanged(int)), this, SLOT(OnIterationIndexChanged(int))); | ||
| 319 | |||
| 320 | for (unsigned i = 0; i < ARRAY_SIZE(input_data); ++i) { | ||
| 321 | connect(input_data[i], SIGNAL(textEdited(const QString&)), input_data_mapper, SLOT(map())); | ||
| 322 | input_data_mapper->setMapping(input_data[i], i); | ||
| 323 | } | ||
| 324 | connect(input_data_mapper, SIGNAL(mapped(int)), this, SLOT(OnInputAttributeChanged(int))); | ||
| 293 | 325 | ||
| 294 | auto main_widget = new QWidget; | 326 | auto main_widget = new QWidget; |
| 295 | auto main_layout = new QVBoxLayout; | 327 | auto main_layout = new QVBoxLayout; |
| 296 | { | 328 | { |
| 329 | auto input_data_group = new QGroupBox(tr("Input Data")); | ||
| 330 | |||
| 331 | // For each vertex attribute, add a QHBoxLayout consisting of: | ||
| 332 | // - A QLabel denoting the source attribute index | ||
| 333 | // - Four QLineEdits for showing and manipulating attribute data | ||
| 334 | // - A QLabel denoting the shader input attribute index | ||
| 335 | auto sub_layout = new QVBoxLayout; | ||
| 336 | for (unsigned i = 0; i < 16; ++i) { | ||
| 337 | // Create an HBoxLayout to store the widgets used to specify a particular attribute | ||
| 338 | // and store it in a QWidget to allow for easy hiding and unhiding. | ||
| 339 | auto row_layout = new QHBoxLayout; | ||
| 340 | row_layout->addWidget(new QLabel(tr("Attribute %1").arg(i, 2))); | ||
| 341 | for (unsigned comp = 0; comp < 4; ++comp) | ||
| 342 | row_layout->addWidget(input_data[4 * i + comp]); | ||
| 343 | |||
| 344 | row_layout->addWidget(input_data_mapping[i] = new QLabel); | ||
| 345 | |||
| 346 | input_data_container[i] = new QWidget; | ||
| 347 | input_data_container[i]->setLayout(row_layout); | ||
| 348 | input_data_container[i]->hide(); | ||
| 349 | |||
| 350 | sub_layout->addWidget(input_data_container[i]); | ||
| 351 | } | ||
| 352 | |||
| 353 | sub_layout->addWidget(breakpoint_warning); | ||
| 354 | breakpoint_warning->hide(); | ||
| 355 | |||
| 356 | input_data_group->setLayout(sub_layout); | ||
| 357 | main_layout->addWidget(input_data_group); | ||
| 358 | } | ||
| 359 | { | ||
| 297 | auto sub_layout = new QHBoxLayout; | 360 | auto sub_layout = new QHBoxLayout; |
| 298 | sub_layout->addWidget(binary_list); | 361 | sub_layout->addWidget(binary_list); |
| 299 | main_layout->addLayout(sub_layout); | 362 | main_layout->addLayout(sub_layout); |
| 300 | } | 363 | } |
| 301 | main_layout->addWidget(dump_shader); | 364 | main_layout->addWidget(dump_shader); |
| 365 | { | ||
| 366 | auto sub_layout = new QHBoxLayout; | ||
| 367 | sub_layout->addWidget(new QLabel(tr("Iteration Index:"))); | ||
| 368 | sub_layout->addWidget(iteration_index); | ||
| 369 | main_layout->addLayout(sub_layout); | ||
| 370 | } | ||
| 371 | main_layout->addWidget(instruction_description); | ||
| 302 | main_widget->setLayout(main_layout); | 372 | main_widget->setLayout(main_layout); |
| 303 | setWidget(main_widget); | 373 | setWidget(main_widget); |
| 374 | |||
| 375 | widget()->setEnabled(false); | ||
| 304 | } | 376 | } |
| 305 | 377 | ||
| 306 | void GraphicsVertexShaderWidget::OnBreakPointHit(Pica::DebugContext::Event event, void* data) { | 378 | void GraphicsVertexShaderWidget::OnBreakPointHit(Pica::DebugContext::Event event, void* data) { |
| 307 | emit Update(); | 379 | auto input = static_cast<Pica::Shader::InputVertex*>(data); |
| 380 | if (event == Pica::DebugContext::Event::VertexLoaded) { | ||
| 381 | Reload(true, data); | ||
| 382 | } else { | ||
| 383 | // No vertex data is retrievable => invalidate currently stored vertex data | ||
| 384 | Reload(true, nullptr); | ||
| 385 | } | ||
| 308 | widget()->setEnabled(true); | 386 | widget()->setEnabled(true); |
| 309 | } | 387 | } |
| 310 | 388 | ||
| 389 | void GraphicsVertexShaderWidget::Reload(bool replace_vertex_data, void* vertex_data) { | ||
| 390 | model->beginResetModel(); | ||
| 391 | |||
| 392 | if (replace_vertex_data) { | ||
| 393 | if (vertex_data) { | ||
| 394 | memcpy(&input_vertex, vertex_data, sizeof(input_vertex)); | ||
| 395 | for (unsigned attr = 0; attr < 16; ++attr) { | ||
| 396 | for (unsigned comp = 0; comp < 4; ++comp) { | ||
| 397 | input_data[4 * attr + comp]->setText(QString("%1").arg(input_vertex.attr[attr][comp].ToFloat32())); | ||
| 398 | } | ||
| 399 | } | ||
| 400 | breakpoint_warning->hide(); | ||
| 401 | } else { | ||
| 402 | for (unsigned attr = 0; attr < 16; ++attr) { | ||
| 403 | for (unsigned comp = 0; comp < 4; ++comp) { | ||
| 404 | input_data[4 * attr + comp]->setText(QString("???")); | ||
| 405 | } | ||
| 406 | } | ||
| 407 | breakpoint_warning->show(); | ||
| 408 | } | ||
| 409 | } | ||
| 410 | |||
| 411 | // Reload shader code | ||
| 412 | info.Clear(); | ||
| 413 | |||
| 414 | auto& shader_setup = Pica::g_state.vs; | ||
| 415 | auto& shader_config = Pica::g_state.regs.vs; | ||
| 416 | for (auto instr : shader_setup.program_code) | ||
| 417 | info.code.push_back({instr}); | ||
| 418 | |||
| 419 | for (auto pattern : shader_setup.swizzle_data) | ||
| 420 | info.swizzle_info.push_back({pattern}); | ||
| 421 | |||
| 422 | u32 entry_point = Pica::g_state.regs.vs.main_offset; | ||
| 423 | info.labels.insert({ entry_point, "main" }); | ||
| 424 | |||
| 425 | // Generate debug information | ||
| 426 | debug_data = Pica::Shader::ProduceDebugInfo(input_vertex, 1, shader_config, shader_setup); | ||
| 427 | |||
| 428 | // Reload widget state | ||
| 429 | |||
| 430 | // Only show input attributes which are used as input to the shader | ||
| 431 | for (unsigned int attr = 0; attr < 16; ++attr) { | ||
| 432 | input_data_container[attr]->setVisible(false); | ||
| 433 | } | ||
| 434 | for (unsigned int attr = 0; attr < Pica::g_state.regs.vertex_attributes.GetNumTotalAttributes(); ++attr) { | ||
| 435 | unsigned source_attr = shader_config.input_register_map.GetRegisterForAttribute(attr); | ||
| 436 | input_data_mapping[source_attr]->setText(QString("-> v%1").arg(attr)); | ||
| 437 | input_data_container[source_attr]->setVisible(true); | ||
| 438 | } | ||
| 439 | |||
| 440 | // Initialize debug info text for current iteration count | ||
| 441 | iteration_index->setMaximum(debug_data.records.size() - 1); | ||
| 442 | OnIterationIndexChanged(iteration_index->value()); | ||
| 443 | |||
| 444 | model->endResetModel(); | ||
| 445 | } | ||
| 446 | |||
| 311 | void GraphicsVertexShaderWidget::OnResumed() { | 447 | void GraphicsVertexShaderWidget::OnResumed() { |
| 312 | widget()->setEnabled(false); | 448 | widget()->setEnabled(false); |
| 313 | } | 449 | } |
| 450 | |||
| 451 | void GraphicsVertexShaderWidget::OnInputAttributeChanged(int index) { | ||
| 452 | float value = input_data[index]->text().toFloat(); | ||
| 453 | Reload(); | ||
| 454 | } | ||
| 455 | |||
| 456 | void GraphicsVertexShaderWidget::OnIterationIndexChanged(int index) { | ||
| 457 | QString text; | ||
| 458 | |||
| 459 | auto& record = debug_data.records[index]; | ||
| 460 | if (record.mask & Pica::Shader::DebugDataRecord::SRC1) | ||
| 461 | text += tr("SRC1: %1, %2, %3, %4\n").arg(record.src1.x.ToFloat32()).arg(record.src1.y.ToFloat32()).arg(record.src1.z.ToFloat32()).arg(record.src1.w.ToFloat32()); | ||
| 462 | if (record.mask & Pica::Shader::DebugDataRecord::SRC2) | ||
| 463 | text += tr("SRC2: %1, %2, %3, %4\n").arg(record.src2.x.ToFloat32()).arg(record.src2.y.ToFloat32()).arg(record.src2.z.ToFloat32()).arg(record.src2.w.ToFloat32()); | ||
| 464 | if (record.mask & Pica::Shader::DebugDataRecord::SRC3) | ||
| 465 | text += tr("SRC3: %1, %2, %3, %4\n").arg(record.src3.x.ToFloat32()).arg(record.src3.y.ToFloat32()).arg(record.src3.z.ToFloat32()).arg(record.src3.w.ToFloat32()); | ||
| 466 | if (record.mask & Pica::Shader::DebugDataRecord::DEST_IN) | ||
| 467 | text += tr("DEST_IN: %1, %2, %3, %4\n").arg(record.dest_in.x.ToFloat32()).arg(record.dest_in.y.ToFloat32()).arg(record.dest_in.z.ToFloat32()).arg(record.dest_in.w.ToFloat32()); | ||
| 468 | if (record.mask & Pica::Shader::DebugDataRecord::DEST_OUT) | ||
| 469 | text += tr("DEST_OUT: %1, %2, %3, %4\n").arg(record.dest_out.x.ToFloat32()).arg(record.dest_out.y.ToFloat32()).arg(record.dest_out.z.ToFloat32()).arg(record.dest_out.w.ToFloat32()); | ||
| 470 | |||
| 471 | if (record.mask & Pica::Shader::DebugDataRecord::ADDR_REG_OUT) | ||
| 472 | text += tr("Addres Registers: %1, %2\n").arg(record.address_registers[0]).arg(record.address_registers[1]); | ||
| 473 | if (record.mask & Pica::Shader::DebugDataRecord::CMP_RESULT) | ||
| 474 | text += tr("Compare Result: %1, %2\n").arg(record.conditional_code[0] ? "true" : "false").arg(record.conditional_code[1] ? "true" : "false"); | ||
| 475 | |||
| 476 | if (record.mask & Pica::Shader::DebugDataRecord::COND_BOOL_IN) | ||
| 477 | text += tr("Static Condition: %1\n").arg(record.cond_bool ? "true" : "false"); | ||
| 478 | if (record.mask & Pica::Shader::DebugDataRecord::COND_CMP_IN) | ||
| 479 | text += tr("Dynamic Conditions: %1, %2\n").arg(record.cond_cmp[0] ? "true" : "false").arg(record.cond_cmp[1] ? "true" : "false"); | ||
| 480 | if (record.mask & Pica::Shader::DebugDataRecord::LOOP_INT_IN) | ||
| 481 | text += tr("Loop Parameters: %1 (repeats), %2 (initializer), %3 (increment), %4\n").arg(record.loop_int.x).arg(record.loop_int.y).arg(record.loop_int.z).arg(record.loop_int.w); | ||
| 482 | |||
| 483 | text += tr("Instruction offset: 0x%1").arg(4 * record.instruction_offset, 4, 16, QLatin1Char('0')); | ||
| 484 | if (record.mask & Pica::Shader::DebugDataRecord::NEXT_INSTR) { | ||
| 485 | text += tr(" -> 0x%2").arg(4 * record.next_instruction, 4, 16, QLatin1Char('0')); | ||
| 486 | } else { | ||
| 487 | text += tr(" (last instruction)"); | ||
| 488 | } | ||
| 489 | |||
| 490 | instruction_description->setText(text); | ||
| 491 | |||
| 492 | // Scroll to current instruction | ||
| 493 | const QModelIndex& instr_index = model->index(record.instruction_offset, 0); | ||
| 494 | emit SelectCommand(instr_index, QItemSelectionModel::ClearAndSelect | QItemSelectionModel::Rows); | ||
| 495 | binary_list->scrollTo(instr_index, QAbstractItemView::EnsureVisible); | ||
| 496 | } | ||
diff --git a/src/citra_qt/debugger/graphics_vertex_shader.h b/src/citra_qt/debugger/graphics_vertex_shader.h index 5dc9e3703..1b46aa0d9 100644 --- a/src/citra_qt/debugger/graphics_vertex_shader.h +++ b/src/citra_qt/debugger/graphics_vertex_shader.h | |||
| @@ -10,11 +10,18 @@ | |||
| 10 | 10 | ||
| 11 | #include "nihstro/parser_shbin.h" | 11 | #include "nihstro/parser_shbin.h" |
| 12 | 12 | ||
| 13 | #include "video_core/shader/shader.h" | ||
| 14 | |||
| 15 | class QLabel; | ||
| 16 | class QSpinBox; | ||
| 17 | |||
| 18 | class GraphicsVertexShaderWidget; | ||
| 19 | |||
| 13 | class GraphicsVertexShaderModel : public QAbstractItemModel { | 20 | class GraphicsVertexShaderModel : public QAbstractItemModel { |
| 14 | Q_OBJECT | 21 | Q_OBJECT |
| 15 | 22 | ||
| 16 | public: | 23 | public: |
| 17 | GraphicsVertexShaderModel(QObject* parent); | 24 | GraphicsVertexShaderModel(GraphicsVertexShaderWidget* parent); |
| 18 | 25 | ||
| 19 | QModelIndex index(int row, int column, const QModelIndex& parent = QModelIndex()) const override; | 26 | QModelIndex index(int row, int column, const QModelIndex& parent = QModelIndex()) const override; |
| 20 | QModelIndex parent(const QModelIndex& child) const override; | 27 | QModelIndex parent(const QModelIndex& child) const override; |
| @@ -23,13 +30,10 @@ public: | |||
| 23 | QVariant data(const QModelIndex& index, int role = Qt::DisplayRole) const override; | 30 | QVariant data(const QModelIndex& index, int role = Qt::DisplayRole) const override; |
| 24 | QVariant headerData(int section, Qt::Orientation orientation, int role = Qt::DisplayRole) const override; | 31 | QVariant headerData(int section, Qt::Orientation orientation, int role = Qt::DisplayRole) const override; |
| 25 | 32 | ||
| 26 | public slots: | ||
| 27 | void OnUpdate(); | ||
| 28 | |||
| 29 | void DumpShader(); | ||
| 30 | |||
| 31 | private: | 33 | private: |
| 32 | nihstro::ShaderInfo info; | 34 | GraphicsVertexShaderWidget* par; |
| 35 | |||
| 36 | friend class GraphicsVertexShaderWidget; | ||
| 33 | }; | 37 | }; |
| 34 | 38 | ||
| 35 | class GraphicsVertexShaderWidget : public BreakPointObserverDock { | 39 | class GraphicsVertexShaderWidget : public BreakPointObserverDock { |
| @@ -45,9 +49,41 @@ private slots: | |||
| 45 | void OnBreakPointHit(Pica::DebugContext::Event event, void* data) override; | 49 | void OnBreakPointHit(Pica::DebugContext::Event event, void* data) override; |
| 46 | void OnResumed() override; | 50 | void OnResumed() override; |
| 47 | 51 | ||
| 52 | void OnInputAttributeChanged(int index); | ||
| 53 | |||
| 54 | void OnIterationIndexChanged(int index); | ||
| 55 | |||
| 56 | void DumpShader(); | ||
| 57 | |||
| 58 | /** Reload widget based on the current PICA200 state | ||
| 59 | * @param replace_vertex_data If true, invalidate all current vertex data | ||
| 60 | * @param vertex_data New vertex data to use, as passed to OnBreakPointHit. May be nullptr to specify that no valid vertex data can be retrieved currently. Only used if replace_vertex_data is true. | ||
| 61 | */ | ||
| 62 | void Reload(bool replace_vertex_data = false, void* vertex_data = nullptr); | ||
| 63 | |||
| 64 | |||
| 48 | signals: | 65 | signals: |
| 49 | void Update(); | 66 | // Call this to change the current command selection in the disassembly view |
| 67 | void SelectCommand(const QModelIndex&, QItemSelectionModel::SelectionFlags); | ||
| 50 | 68 | ||
| 51 | private: | 69 | private: |
| 70 | QLabel* instruction_description; | ||
| 71 | QTreeView* binary_list; | ||
| 72 | GraphicsVertexShaderModel* model; | ||
| 73 | |||
| 74 | // TODO: Move these into a single struct | ||
| 75 | std::array<QLineEdit*, 4*16> input_data; // A text box for each of the 4 components of up to 16 vertex attributes | ||
| 76 | std::array<QWidget*, 16> input_data_container; // QWidget containing the QLayout containing each vertex attribute | ||
| 77 | std::array<QLabel*, 16> input_data_mapping; // A QLabel denoting the shader input attribute which the vertex attribute maps to | ||
| 78 | |||
| 79 | // Text to be shown when input vertex data is not retrievable | ||
| 80 | QLabel* breakpoint_warning; | ||
| 81 | |||
| 82 | QSpinBox* iteration_index; | ||
| 83 | |||
| 84 | nihstro::ShaderInfo info; | ||
| 85 | Pica::Shader::DebugData<true> debug_data; | ||
| 86 | Pica::Shader::InputVertex input_vertex; | ||
| 52 | 87 | ||
| 88 | friend class GraphicsVertexShaderModel; | ||
| 53 | }; | 89 | }; |
diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp index 374c4748d..8c741f31f 100644 --- a/src/video_core/command_processor.cpp +++ b/src/video_core/command_processor.cpp | |||
| @@ -215,7 +215,7 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) { | |||
| 215 | unsigned int vertex_cache_pos = 0; | 215 | unsigned int vertex_cache_pos = 0; |
| 216 | vertex_cache_ids.fill(-1); | 216 | vertex_cache_ids.fill(-1); |
| 217 | 217 | ||
| 218 | Shader::UnitState shader_unit; | 218 | Shader::UnitState<false> shader_unit; |
| 219 | Shader::Setup(shader_unit); | 219 | Shader::Setup(shader_unit); |
| 220 | 220 | ||
| 221 | for (unsigned int index = 0; index < regs.num_vertices; ++index) | 221 | for (unsigned int index = 0; index < regs.num_vertices; ++index) |
diff --git a/src/video_core/debug_utils/debug_utils.cpp b/src/video_core/debug_utils/debug_utils.cpp index a79d90ef1..ac071790a 100644 --- a/src/video_core/debug_utils/debug_utils.cpp +++ b/src/video_core/debug_utils/debug_utils.cpp | |||
| @@ -111,7 +111,7 @@ void GeometryDumper::Dump() { | |||
| 111 | } | 111 | } |
| 112 | 112 | ||
| 113 | 113 | ||
| 114 | void DumpShader(const Regs::ShaderConfig& config, const State::ShaderSetup& setup, const Regs::VSOutputAttributes* output_attributes) | 114 | void DumpShader(const std::string& filename, const Regs::ShaderConfig& config, const State::ShaderSetup& setup, const Regs::VSOutputAttributes* output_attributes) |
| 115 | { | 115 | { |
| 116 | struct StuffToWrite { | 116 | struct StuffToWrite { |
| 117 | u8* pointer; | 117 | u8* pointer; |
| @@ -294,7 +294,6 @@ void DumpShader(const Regs::ShaderConfig& config, const State::ShaderSetup& setu | |||
| 294 | 294 | ||
| 295 | // Write data to file | 295 | // Write data to file |
| 296 | static int dump_index = 0; | 296 | static int dump_index = 0; |
| 297 | std::string filename = std::string("shader_dump") + std::to_string(++dump_index) + std::string(".shbin"); | ||
| 298 | std::ofstream file(filename, std::ios_base::out | std::ios_base::binary); | 297 | std::ofstream file(filename, std::ios_base::out | std::ios_base::binary); |
| 299 | 298 | ||
| 300 | for (auto& chunk : writing_queue) { | 299 | for (auto& chunk : writing_queue) { |
diff --git a/src/video_core/debug_utils/debug_utils.h b/src/video_core/debug_utils/debug_utils.h index 1450e5bf3..0b30d7ffa 100644 --- a/src/video_core/debug_utils/debug_utils.h +++ b/src/video_core/debug_utils/debug_utils.h | |||
| @@ -181,7 +181,8 @@ private: | |||
| 181 | std::vector<Face> faces; | 181 | std::vector<Face> faces; |
| 182 | }; | 182 | }; |
| 183 | 183 | ||
| 184 | void DumpShader(const Regs::ShaderConfig& config, const State::ShaderSetup& setup, const Regs::VSOutputAttributes* output_attributes); | 184 | void DumpShader(const std::string& filename, const Regs::ShaderConfig& config, |
| 185 | const State::ShaderSetup& setup, const Regs::VSOutputAttributes* output_attributes); | ||
| 185 | 186 | ||
| 186 | 187 | ||
| 187 | // Utility class to log Pica commands. | 188 | // Utility class to log Pica commands. |
diff --git a/src/video_core/shader/shader.cpp b/src/video_core/shader/shader.cpp index 2692b91e4..4e9836c80 100644 --- a/src/video_core/shader/shader.cpp +++ b/src/video_core/shader/shader.cpp | |||
| @@ -5,6 +5,8 @@ | |||
| 5 | #include <memory> | 5 | #include <memory> |
| 6 | #include <unordered_map> | 6 | #include <unordered_map> |
| 7 | 7 | ||
| 8 | #include <boost/range/algorithm/fill.hpp> | ||
| 9 | |||
| 8 | #include "common/hash.h" | 10 | #include "common/hash.h" |
| 9 | #include "common/make_unique.h" | 11 | #include "common/make_unique.h" |
| 10 | #include "common/profiler.h" | 12 | #include "common/profiler.h" |
| @@ -30,7 +32,7 @@ static JitCompiler jit; | |||
| 30 | static CompiledShader* jit_shader; | 32 | static CompiledShader* jit_shader; |
| 31 | #endif // ARCHITECTURE_x86_64 | 33 | #endif // ARCHITECTURE_x86_64 |
| 32 | 34 | ||
| 33 | void Setup(UnitState& state) { | 35 | void Setup(UnitState<false>& state) { |
| 34 | #ifdef ARCHITECTURE_x86_64 | 36 | #ifdef ARCHITECTURE_x86_64 |
| 35 | if (VideoCore::g_shader_jit_enabled) { | 37 | if (VideoCore::g_shader_jit_enabled) { |
| 36 | u64 cache_key = (Common::ComputeHash64(&g_state.vs.program_code, sizeof(g_state.vs.program_code)) ^ | 38 | u64 cache_key = (Common::ComputeHash64(&g_state.vs.program_code, sizeof(g_state.vs.program_code)) ^ |
| @@ -54,9 +56,8 @@ void Shutdown() { | |||
| 54 | 56 | ||
| 55 | static Common::Profiling::TimingCategory shader_category("Vertex Shader"); | 57 | static Common::Profiling::TimingCategory shader_category("Vertex Shader"); |
| 56 | 58 | ||
| 57 | OutputVertex Run(UnitState& state, const InputVertex& input, int num_attributes) { | 59 | OutputVertex Run(UnitState<false>& state, const InputVertex& input, int num_attributes) { |
| 58 | auto& config = g_state.regs.vs; | 60 | auto& config = g_state.regs.vs; |
| 59 | auto& setup = g_state.vs; | ||
| 60 | 61 | ||
| 61 | Common::Profiling::ScopeTimer timer(shader_category); | 62 | Common::Profiling::ScopeTimer timer(shader_category); |
| 62 | 63 | ||
| @@ -67,6 +68,8 @@ OutputVertex Run(UnitState& state, const InputVertex& input, int num_attributes) | |||
| 67 | // Setup input register table | 68 | // Setup input register table |
| 68 | const auto& attribute_register_map = config.input_register_map; | 69 | const auto& attribute_register_map = config.input_register_map; |
| 69 | 70 | ||
| 71 | // TODO: Instead of this cumbersome logic, just load the input data directly like | ||
| 72 | // for (int attr = 0; attr < num_attributes; ++attr) { input_attr[0] = state.registers.input[attribute_register_map.attribute0_register]; } | ||
| 70 | if (num_attributes > 0) state.registers.input[attribute_register_map.attribute0_register] = input.attr[0]; | 73 | if (num_attributes > 0) state.registers.input[attribute_register_map.attribute0_register] = input.attr[0]; |
| 71 | if (num_attributes > 1) state.registers.input[attribute_register_map.attribute1_register] = input.attr[1]; | 74 | if (num_attributes > 1) state.registers.input[attribute_register_map.attribute1_register] = input.attr[1]; |
| 72 | if (num_attributes > 2) state.registers.input[attribute_register_map.attribute2_register] = input.attr[2]; | 75 | if (num_attributes > 2) state.registers.input[attribute_register_map.attribute2_register] = input.attr[2]; |
| @@ -126,14 +129,52 @@ OutputVertex Run(UnitState& state, const InputVertex& input, int num_attributes) | |||
| 126 | std::fmin(std::fabs(ret.color[i].ToFloat32()), 1.0f)); | 129 | std::fmin(std::fabs(ret.color[i].ToFloat32()), 1.0f)); |
| 127 | } | 130 | } |
| 128 | 131 | ||
| 129 | LOG_TRACE(Render_Software, "Output vertex: pos (%.2f, %.2f, %.2f, %.2f), col(%.2f, %.2f, %.2f, %.2f), tc0(%.2f, %.2f)", | 132 | LOG_TRACE(Render_Software, "Output vertex: pos (%.2f, %.2f, %.2f, %.2f), quat (%.2f, %.2f, %.2f, %.2f), col(%.2f, %.2f, %.2f, %.2f), tc0(%.2f, %.2f)", |
| 130 | ret.pos.x.ToFloat32(), ret.pos.y.ToFloat32(), ret.pos.z.ToFloat32(), ret.pos.w.ToFloat32(), | 133 | ret.pos.x.ToFloat32(), ret.pos.y.ToFloat32(), ret.pos.z.ToFloat32(), ret.pos.w.ToFloat32(), |
| 134 | ret.quat.x.ToFloat32(), ret.quat.y.ToFloat32(), ret.quat.z.ToFloat32(), ret.quat.w.ToFloat32(), | ||
| 131 | ret.color.x.ToFloat32(), ret.color.y.ToFloat32(), ret.color.z.ToFloat32(), ret.color.w.ToFloat32(), | 135 | ret.color.x.ToFloat32(), ret.color.y.ToFloat32(), ret.color.z.ToFloat32(), ret.color.w.ToFloat32(), |
| 132 | ret.tc0.u().ToFloat32(), ret.tc0.v().ToFloat32()); | 136 | ret.tc0.u().ToFloat32(), ret.tc0.v().ToFloat32()); |
| 133 | 137 | ||
| 134 | return ret; | 138 | return ret; |
| 135 | } | 139 | } |
| 136 | 140 | ||
| 141 | DebugData<true> ProduceDebugInfo(const InputVertex& input, int num_attributes, const Regs::ShaderConfig& config, const State::ShaderSetup& setup) { | ||
| 142 | UnitState<true> state; | ||
| 143 | |||
| 144 | const auto& shader_memory = setup.program_code; | ||
| 145 | state.program_counter = config.main_offset; | ||
| 146 | state.debug.max_offset = 0; | ||
| 147 | state.debug.max_opdesc_id = 0; | ||
| 148 | |||
| 149 | // Setup input register table | ||
| 150 | const auto& attribute_register_map = config.input_register_map; | ||
| 151 | float24 dummy_register; | ||
| 152 | boost::fill(state.registers.input, &dummy_register); | ||
| 153 | |||
| 154 | if (num_attributes > 0) state.registers.input[attribute_register_map.attribute0_register] = &input.attr[0].x; | ||
| 155 | if (num_attributes > 1) state.registers.input[attribute_register_map.attribute1_register] = &input.attr[1].x; | ||
| 156 | if (num_attributes > 2) state.registers.input[attribute_register_map.attribute2_register] = &input.attr[2].x; | ||
| 157 | if (num_attributes > 3) state.registers.input[attribute_register_map.attribute3_register] = &input.attr[3].x; | ||
| 158 | if (num_attributes > 4) state.registers.input[attribute_register_map.attribute4_register] = &input.attr[4].x; | ||
| 159 | if (num_attributes > 5) state.registers.input[attribute_register_map.attribute5_register] = &input.attr[5].x; | ||
| 160 | if (num_attributes > 6) state.registers.input[attribute_register_map.attribute6_register] = &input.attr[6].x; | ||
| 161 | if (num_attributes > 7) state.registers.input[attribute_register_map.attribute7_register] = &input.attr[7].x; | ||
| 162 | if (num_attributes > 8) state.registers.input[attribute_register_map.attribute8_register] = &input.attr[8].x; | ||
| 163 | if (num_attributes > 9) state.registers.input[attribute_register_map.attribute9_register] = &input.attr[9].x; | ||
| 164 | if (num_attributes > 10) state.registers.input[attribute_register_map.attribute10_register] = &input.attr[10].x; | ||
| 165 | if (num_attributes > 11) state.registers.input[attribute_register_map.attribute11_register] = &input.attr[11].x; | ||
| 166 | if (num_attributes > 12) state.registers.input[attribute_register_map.attribute12_register] = &input.attr[12].x; | ||
| 167 | if (num_attributes > 13) state.registers.input[attribute_register_map.attribute13_register] = &input.attr[13].x; | ||
| 168 | if (num_attributes > 14) state.registers.input[attribute_register_map.attribute14_register] = &input.attr[14].x; | ||
| 169 | if (num_attributes > 15) state.registers.input[attribute_register_map.attribute15_register] = &input.attr[15].x; | ||
| 170 | |||
| 171 | state.conditional_code[0] = false; | ||
| 172 | state.conditional_code[1] = false; | ||
| 173 | |||
| 174 | RunInterpreter(state); | ||
| 175 | return state.debug; | ||
| 176 | } | ||
| 177 | |||
| 137 | } // namespace Shader | 178 | } // namespace Shader |
| 138 | 179 | ||
| 139 | } // namespace Pica | 180 | } // namespace Pica |
diff --git a/src/video_core/shader/shader.h b/src/video_core/shader/shader.h index 2007a2844..58d21f7cd 100644 --- a/src/video_core/shader/shader.h +++ b/src/video_core/shader/shader.h | |||
| @@ -4,7 +4,10 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <vector> | ||
| 8 | |||
| 7 | #include <boost/container/static_vector.hpp> | 9 | #include <boost/container/static_vector.hpp> |
| 10 | |||
| 8 | #include <nihstro/shader_binary.h> | 11 | #include <nihstro/shader_binary.h> |
| 9 | 12 | ||
| 10 | #include "common/common_funcs.h" | 13 | #include "common/common_funcs.h" |
| @@ -72,12 +75,185 @@ struct OutputVertex { | |||
| 72 | static_assert(std::is_pod<OutputVertex>::value, "Structure is not POD"); | 75 | static_assert(std::is_pod<OutputVertex>::value, "Structure is not POD"); |
| 73 | static_assert(sizeof(OutputVertex) == 32 * sizeof(float), "OutputVertex has invalid size"); | 76 | static_assert(sizeof(OutputVertex) == 32 * sizeof(float), "OutputVertex has invalid size"); |
| 74 | 77 | ||
| 78 | |||
| 79 | // Helper structure used to keep track of data useful for inspection of shader emulation | ||
| 80 | template<bool full_debugging> | ||
| 81 | struct DebugData; | ||
| 82 | |||
| 83 | template<> | ||
| 84 | struct DebugData<false> { | ||
| 85 | // TODO: Hide these behind and interface and move them to DebugData<true> | ||
| 86 | u32 max_offset; // maximum program counter ever reached | ||
| 87 | u32 max_opdesc_id; // maximum swizzle pattern index ever used | ||
| 88 | }; | ||
| 89 | |||
| 90 | template<> | ||
| 91 | struct DebugData<true> { | ||
| 92 | // Records store the input and output operands of a particular instruction. | ||
| 93 | struct Record { | ||
| 94 | enum Type { | ||
| 95 | // Floating point arithmetic operands | ||
| 96 | SRC1 = 0x1, | ||
| 97 | SRC2 = 0x2, | ||
| 98 | SRC3 = 0x4, | ||
| 99 | |||
| 100 | // Initial and final output operand value | ||
| 101 | DEST_IN = 0x8, | ||
| 102 | DEST_OUT = 0x10, | ||
| 103 | |||
| 104 | // Current and next instruction offset (in words) | ||
| 105 | CUR_INSTR = 0x20, | ||
| 106 | NEXT_INSTR = 0x40, | ||
| 107 | |||
| 108 | // Output address register value | ||
| 109 | ADDR_REG_OUT = 0x80, | ||
| 110 | |||
| 111 | // Result of a comparison instruction | ||
| 112 | CMP_RESULT = 0x100, | ||
| 113 | |||
| 114 | // Input values for conditional flow control instructions | ||
| 115 | COND_BOOL_IN = 0x200, | ||
| 116 | COND_CMP_IN = 0x400, | ||
| 117 | |||
| 118 | // Input values for a loop | ||
| 119 | LOOP_INT_IN = 0x800, | ||
| 120 | }; | ||
| 121 | |||
| 122 | Math::Vec4<float24> src1; | ||
| 123 | Math::Vec4<float24> src2; | ||
| 124 | Math::Vec4<float24> src3; | ||
| 125 | |||
| 126 | Math::Vec4<float24> dest_in; | ||
| 127 | Math::Vec4<float24> dest_out; | ||
| 128 | |||
| 129 | s32 address_registers[2]; | ||
| 130 | bool conditional_code[2]; | ||
| 131 | bool cond_bool; | ||
| 132 | bool cond_cmp[2]; | ||
| 133 | Math::Vec4<u8> loop_int; | ||
| 134 | |||
| 135 | u32 instruction_offset; | ||
| 136 | u32 next_instruction; | ||
| 137 | |||
| 138 | // set of enabled fields (as a combination of Type flags) | ||
| 139 | unsigned mask = 0; | ||
| 140 | }; | ||
| 141 | |||
| 142 | u32 max_offset; // maximum program counter ever reached | ||
| 143 | u32 max_opdesc_id; // maximum swizzle pattern index ever used | ||
| 144 | |||
| 145 | // List of records for each executed shader instruction | ||
| 146 | std::vector<DebugData<true>::Record> records; | ||
| 147 | }; | ||
| 148 | |||
| 149 | // Type alias for better readability | ||
| 150 | using DebugDataRecord = DebugData<true>::Record; | ||
| 151 | |||
| 152 | // Helper function to set a DebugData<true>::Record field based on the template enum parameter. | ||
| 153 | template<DebugDataRecord::Type type, typename ValueType> | ||
| 154 | inline void SetField(DebugDataRecord& record, ValueType value); | ||
| 155 | |||
| 156 | template<> | ||
| 157 | inline void SetField<DebugDataRecord::SRC1>(DebugDataRecord& record, float24* value) { | ||
| 158 | record.src1.x = value[0]; | ||
| 159 | record.src1.y = value[1]; | ||
| 160 | record.src1.z = value[2]; | ||
| 161 | record.src1.w = value[3]; | ||
| 162 | } | ||
| 163 | |||
| 164 | template<> | ||
| 165 | inline void SetField<DebugDataRecord::SRC2>(DebugDataRecord& record, float24* value) { | ||
| 166 | record.src2.x = value[0]; | ||
| 167 | record.src2.y = value[1]; | ||
| 168 | record.src2.z = value[2]; | ||
| 169 | record.src2.w = value[3]; | ||
| 170 | } | ||
| 171 | |||
| 172 | template<> | ||
| 173 | inline void SetField<DebugDataRecord::SRC3>(DebugDataRecord& record, float24* value) { | ||
| 174 | record.src3.x = value[0]; | ||
| 175 | record.src3.y = value[1]; | ||
| 176 | record.src3.z = value[2]; | ||
| 177 | record.src3.w = value[3]; | ||
| 178 | } | ||
| 179 | |||
| 180 | template<> | ||
| 181 | inline void SetField<DebugDataRecord::DEST_IN>(DebugDataRecord& record, float24* value) { | ||
| 182 | record.dest_in.x = value[0]; | ||
| 183 | record.dest_in.y = value[1]; | ||
| 184 | record.dest_in.z = value[2]; | ||
| 185 | record.dest_in.w = value[3]; | ||
| 186 | } | ||
| 187 | |||
| 188 | template<> | ||
| 189 | inline void SetField<DebugDataRecord::DEST_OUT>(DebugDataRecord& record, float24* value) { | ||
| 190 | record.dest_out.x = value[0]; | ||
| 191 | record.dest_out.y = value[1]; | ||
| 192 | record.dest_out.z = value[2]; | ||
| 193 | record.dest_out.w = value[3]; | ||
| 194 | } | ||
| 195 | |||
| 196 | template<> | ||
| 197 | inline void SetField<DebugDataRecord::ADDR_REG_OUT>(DebugDataRecord& record, s32* value) { | ||
| 198 | record.address_registers[0] = value[0]; | ||
| 199 | record.address_registers[1] = value[1]; | ||
| 200 | } | ||
| 201 | |||
| 202 | template<> | ||
| 203 | inline void SetField<DebugDataRecord::CMP_RESULT>(DebugDataRecord& record, bool* value) { | ||
| 204 | record.conditional_code[0] = value[0]; | ||
| 205 | record.conditional_code[1] = value[1]; | ||
| 206 | } | ||
| 207 | |||
| 208 | template<> | ||
| 209 | inline void SetField<DebugDataRecord::COND_BOOL_IN>(DebugDataRecord& record, bool value) { | ||
| 210 | record.cond_bool = value; | ||
| 211 | } | ||
| 212 | |||
| 213 | template<> | ||
| 214 | inline void SetField<DebugDataRecord::COND_CMP_IN>(DebugDataRecord& record, bool* value) { | ||
| 215 | record.cond_cmp[0] = value[0]; | ||
| 216 | record.cond_cmp[1] = value[1]; | ||
| 217 | } | ||
| 218 | |||
| 219 | template<> | ||
| 220 | inline void SetField<DebugDataRecord::LOOP_INT_IN>(DebugDataRecord& record, Math::Vec4<u8> value) { | ||
| 221 | record.loop_int = value; | ||
| 222 | } | ||
| 223 | |||
| 224 | template<> | ||
| 225 | inline void SetField<DebugDataRecord::CUR_INSTR>(DebugDataRecord& record, u32 value) { | ||
| 226 | record.instruction_offset = value; | ||
| 227 | } | ||
| 228 | |||
| 229 | template<> | ||
| 230 | inline void SetField<DebugDataRecord::NEXT_INSTR>(DebugDataRecord& record, u32 value) { | ||
| 231 | record.next_instruction = value; | ||
| 232 | } | ||
| 233 | |||
| 234 | // Helper function to set debug information on the current shader iteration. | ||
| 235 | template<DebugDataRecord::Type type, typename ValueType> | ||
| 236 | inline void Record(DebugData<false>& debug_data, u32 offset, ValueType value) { | ||
| 237 | // Debugging disabled => nothing to do | ||
| 238 | } | ||
| 239 | |||
| 240 | template<DebugDataRecord::Type type, typename ValueType> | ||
| 241 | inline void Record(DebugData<true>& debug_data, u32 offset, ValueType value) { | ||
| 242 | if (offset >= debug_data.records.size()) | ||
| 243 | debug_data.records.resize(offset + 1); | ||
| 244 | |||
| 245 | SetField<type, ValueType>(debug_data.records[offset], value); | ||
| 246 | debug_data.records[offset].mask |= type; | ||
| 247 | } | ||
| 248 | |||
| 249 | |||
| 75 | /** | 250 | /** |
| 76 | * This structure contains the state information that needs to be unique for a shader unit. The 3DS | 251 | * This structure contains the state information that needs to be unique for a shader unit. The 3DS |
| 77 | * has four shader units that process shaders in parallel. At the present, Citra only implements a | 252 | * has four shader units that process shaders in parallel. At the present, Citra only implements a |
| 78 | * single shader unit that processes all shaders serially. Putting the state information in a struct | 253 | * single shader unit that processes all shaders serially. Putting the state information in a struct |
| 79 | * here will make it easier for us to parallelize the shader processing later. | 254 | * here will make it easier for us to parallelize the shader processing later. |
| 80 | */ | 255 | */ |
| 256 | template<bool Debug> | ||
| 81 | struct UnitState { | 257 | struct UnitState { |
| 82 | struct Registers { | 258 | struct Registers { |
| 83 | // The registers are accessed by the shader JIT using SSE instructions, and are therefore | 259 | // The registers are accessed by the shader JIT using SSE instructions, and are therefore |
| @@ -111,10 +287,7 @@ struct UnitState { | |||
| 111 | // TODO: Is there a maximal size for this? | 287 | // TODO: Is there a maximal size for this? |
| 112 | boost::container::static_vector<CallStackElement, 16> call_stack; | 288 | boost::container::static_vector<CallStackElement, 16> call_stack; |
| 113 | 289 | ||
| 114 | struct { | 290 | DebugData<Debug> debug; |
| 115 | u32 max_offset; // maximum program counter ever reached | ||
| 116 | u32 max_opdesc_id; // maximum swizzle pattern index ever used | ||
| 117 | } debug; | ||
| 118 | 291 | ||
| 119 | static int InputOffset(const SourceRegister& reg) { | 292 | static int InputOffset(const SourceRegister& reg) { |
| 120 | switch (reg.GetRegisterType()) { | 293 | switch (reg.GetRegisterType()) { |
| @@ -150,7 +323,7 @@ struct UnitState { | |||
| 150 | * vertex, which would happen within the `Run` function). | 323 | * vertex, which would happen within the `Run` function). |
| 151 | * @param state Shader unit state, must be setup per shader and per shader unit | 324 | * @param state Shader unit state, must be setup per shader and per shader unit |
| 152 | */ | 325 | */ |
| 153 | void Setup(UnitState& state); | 326 | void Setup(UnitState<false>& state); |
| 154 | 327 | ||
| 155 | /// Performs any cleanup when the emulator is shutdown | 328 | /// Performs any cleanup when the emulator is shutdown |
| 156 | void Shutdown(); | 329 | void Shutdown(); |
| @@ -162,7 +335,17 @@ void Shutdown(); | |||
| 162 | * @param num_attributes The number of vertex shader attributes | 335 | * @param num_attributes The number of vertex shader attributes |
| 163 | * @return The output vertex, after having been processed by the vertex shader | 336 | * @return The output vertex, after having been processed by the vertex shader |
| 164 | */ | 337 | */ |
| 165 | OutputVertex Run(UnitState& state, const InputVertex& input, int num_attributes); | 338 | OutputVertex Run(UnitState<false>& state, const InputVertex& input, int num_attributes); |
| 339 | |||
| 340 | /** | ||
| 341 | * Produce debug information based on the given shader and input vertex | ||
| 342 | * @param input Input vertex into the shader | ||
| 343 | * @param num_attributes The number of vertex shader attributes | ||
| 344 | * @param config Configuration object for the shader pipeline | ||
| 345 | * @param setup Setup object for the shader pipeline | ||
| 346 | * @return Debug information for this shader with regards to the given vertex | ||
| 347 | */ | ||
| 348 | DebugData<true> ProduceDebugInfo(const InputVertex& input, int num_attributes, const Regs::ShaderConfig& config, const State::ShaderSetup& setup); | ||
| 166 | 349 | ||
| 167 | } // namespace Shader | 350 | } // namespace Shader |
| 168 | 351 | ||
diff --git a/src/video_core/shader/shader_interpreter.cpp b/src/video_core/shader/shader_interpreter.cpp index c8489f920..e14de0768 100644 --- a/src/video_core/shader/shader_interpreter.cpp +++ b/src/video_core/shader/shader_interpreter.cpp | |||
| @@ -21,7 +21,8 @@ namespace Pica { | |||
| 21 | 21 | ||
| 22 | namespace Shader { | 22 | namespace Shader { |
| 23 | 23 | ||
| 24 | void RunInterpreter(UnitState& state) { | 24 | template<bool Debug> |
| 25 | void RunInterpreter(UnitState<Debug>& state) { | ||
| 25 | const auto& uniforms = g_state.vs.uniforms; | 26 | const auto& uniforms = g_state.vs.uniforms; |
| 26 | const auto& swizzle_data = g_state.vs.swizzle_data; | 27 | const auto& swizzle_data = g_state.vs.swizzle_data; |
| 27 | const auto& program_code = g_state.vs.program_code; | 28 | const auto& program_code = g_state.vs.program_code; |
| @@ -29,7 +30,9 @@ void RunInterpreter(UnitState& state) { | |||
| 29 | // Placeholder for invalid inputs | 30 | // Placeholder for invalid inputs |
| 30 | static float24 dummy_vec4_float24[4]; | 31 | static float24 dummy_vec4_float24[4]; |
| 31 | 32 | ||
| 32 | while (true) { | 33 | unsigned iteration = 0; |
| 34 | bool exit_loop = false; | ||
| 35 | while (!exit_loop) { | ||
| 33 | if (!state.call_stack.empty()) { | 36 | if (!state.call_stack.empty()) { |
| 34 | auto& top = state.call_stack.back(); | 37 | auto& top = state.call_stack.back(); |
| 35 | if (state.program_counter == top.final_address) { | 38 | if (state.program_counter == top.final_address) { |
| @@ -47,16 +50,19 @@ void RunInterpreter(UnitState& state) { | |||
| 47 | } | 50 | } |
| 48 | } | 51 | } |
| 49 | 52 | ||
| 50 | bool exit_loop = false; | ||
| 51 | const Instruction instr = { program_code[state.program_counter] }; | 53 | const Instruction instr = { program_code[state.program_counter] }; |
| 52 | const SwizzlePattern swizzle = { swizzle_data[instr.common.operand_desc_id] }; | 54 | const SwizzlePattern swizzle = { swizzle_data[instr.common.operand_desc_id] }; |
| 53 | 55 | ||
| 54 | static auto call = [](UnitState& state, u32 offset, u32 num_instructions, | 56 | static auto call = [](UnitState<Debug>& state, u32 offset, u32 num_instructions, |
| 55 | u32 return_offset, u8 repeat_count, u8 loop_increment) { | 57 | u32 return_offset, u8 repeat_count, u8 loop_increment) { |
| 56 | state.program_counter = offset - 1; // -1 to make sure when incrementing the PC we end up at the correct offset | 58 | state.program_counter = offset - 1; // -1 to make sure when incrementing the PC we end up at the correct offset |
| 57 | ASSERT(state.call_stack.size() < state.call_stack.capacity()); | 59 | ASSERT(state.call_stack.size() < state.call_stack.capacity()); |
| 58 | state.call_stack.push_back({ offset + num_instructions, return_offset, repeat_count, loop_increment, offset }); | 60 | state.call_stack.push_back({ offset + num_instructions, return_offset, repeat_count, loop_increment, offset }); |
| 59 | }; | 61 | }; |
| 62 | Record<DebugDataRecord::CUR_INSTR>(state.debug, iteration, state.program_counter); | ||
| 63 | if (iteration > 0) | ||
| 64 | Record<DebugDataRecord::NEXT_INSTR>(state.debug, iteration - 1, state.program_counter); | ||
| 65 | |||
| 60 | state.debug.max_offset = std::max<u32>(state.debug.max_offset, 1 + state.program_counter); | 66 | state.debug.max_offset = std::max<u32>(state.debug.max_offset, 1 + state.program_counter); |
| 61 | 67 | ||
| 62 | auto LookupSourceRegister = [&](const SourceRegister& source_reg) -> const float24* { | 68 | auto LookupSourceRegister = [&](const SourceRegister& source_reg) -> const float24* { |
| @@ -123,58 +129,78 @@ void RunInterpreter(UnitState& state) { | |||
| 123 | switch (instr.opcode.Value().EffectiveOpCode()) { | 129 | switch (instr.opcode.Value().EffectiveOpCode()) { |
| 124 | case OpCode::Id::ADD: | 130 | case OpCode::Id::ADD: |
| 125 | { | 131 | { |
| 132 | Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); | ||
| 133 | Record<DebugDataRecord::SRC2>(state.debug, iteration, src2); | ||
| 134 | Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); | ||
| 126 | for (int i = 0; i < 4; ++i) { | 135 | for (int i = 0; i < 4; ++i) { |
| 127 | if (!swizzle.DestComponentEnabled(i)) | 136 | if (!swizzle.DestComponentEnabled(i)) |
| 128 | continue; | 137 | continue; |
| 129 | 138 | ||
| 130 | dest[i] = src1[i] + src2[i]; | 139 | dest[i] = src1[i] + src2[i]; |
| 131 | } | 140 | } |
| 132 | 141 | Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest); | |
| 133 | break; | 142 | break; |
| 134 | } | 143 | } |
| 135 | 144 | ||
| 136 | case OpCode::Id::MUL: | 145 | case OpCode::Id::MUL: |
| 137 | { | 146 | { |
| 147 | Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); | ||
| 148 | Record<DebugDataRecord::SRC2>(state.debug, iteration, src2); | ||
| 149 | Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); | ||
| 138 | for (int i = 0; i < 4; ++i) { | 150 | for (int i = 0; i < 4; ++i) { |
| 139 | if (!swizzle.DestComponentEnabled(i)) | 151 | if (!swizzle.DestComponentEnabled(i)) |
| 140 | continue; | 152 | continue; |
| 141 | 153 | ||
| 142 | dest[i] = src1[i] * src2[i]; | 154 | dest[i] = src1[i] * src2[i]; |
| 143 | } | 155 | } |
| 144 | 156 | Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest); | |
| 145 | break; | 157 | break; |
| 146 | } | 158 | } |
| 147 | 159 | ||
| 148 | case OpCode::Id::FLR: | 160 | case OpCode::Id::FLR: |
| 161 | Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); | ||
| 162 | Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); | ||
| 149 | for (int i = 0; i < 4; ++i) { | 163 | for (int i = 0; i < 4; ++i) { |
| 150 | if (!swizzle.DestComponentEnabled(i)) | 164 | if (!swizzle.DestComponentEnabled(i)) |
| 151 | continue; | 165 | continue; |
| 152 | 166 | ||
| 153 | dest[i] = float24::FromFloat32(std::floor(src1[i].ToFloat32())); | 167 | dest[i] = float24::FromFloat32(std::floor(src1[i].ToFloat32())); |
| 154 | } | 168 | } |
| 169 | Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest); | ||
| 155 | break; | 170 | break; |
| 156 | 171 | ||
| 157 | case OpCode::Id::MAX: | 172 | case OpCode::Id::MAX: |
| 173 | Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); | ||
| 174 | Record<DebugDataRecord::SRC2>(state.debug, iteration, src2); | ||
| 175 | Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); | ||
| 158 | for (int i = 0; i < 4; ++i) { | 176 | for (int i = 0; i < 4; ++i) { |
| 159 | if (!swizzle.DestComponentEnabled(i)) | 177 | if (!swizzle.DestComponentEnabled(i)) |
| 160 | continue; | 178 | continue; |
| 161 | 179 | ||
| 162 | dest[i] = std::max(src1[i], src2[i]); | 180 | dest[i] = std::max(src1[i], src2[i]); |
| 163 | } | 181 | } |
| 182 | Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest); | ||
| 164 | break; | 183 | break; |
| 165 | 184 | ||
| 166 | case OpCode::Id::MIN: | 185 | case OpCode::Id::MIN: |
| 186 | Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); | ||
| 187 | Record<DebugDataRecord::SRC2>(state.debug, iteration, src2); | ||
| 188 | Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); | ||
| 167 | for (int i = 0; i < 4; ++i) { | 189 | for (int i = 0; i < 4; ++i) { |
| 168 | if (!swizzle.DestComponentEnabled(i)) | 190 | if (!swizzle.DestComponentEnabled(i)) |
| 169 | continue; | 191 | continue; |
| 170 | 192 | ||
| 171 | dest[i] = std::min(src1[i], src2[i]); | 193 | dest[i] = std::min(src1[i], src2[i]); |
| 172 | } | 194 | } |
| 195 | Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest); | ||
| 173 | break; | 196 | break; |
| 174 | 197 | ||
| 175 | case OpCode::Id::DP3: | 198 | case OpCode::Id::DP3: |
| 176 | case OpCode::Id::DP4: | 199 | case OpCode::Id::DP4: |
| 177 | { | 200 | { |
| 201 | Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); | ||
| 202 | Record<DebugDataRecord::SRC2>(state.debug, iteration, src2); | ||
| 203 | Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); | ||
| 178 | float24 dot = float24::FromFloat32(0.f); | 204 | float24 dot = float24::FromFloat32(0.f); |
| 179 | int num_components = (instr.opcode.Value() == OpCode::Id::DP3) ? 3 : 4; | 205 | int num_components = (instr.opcode.Value() == OpCode::Id::DP3) ? 3 : 4; |
| 180 | for (int i = 0; i < num_components; ++i) | 206 | for (int i = 0; i < num_components; ++i) |
| @@ -186,12 +212,15 @@ void RunInterpreter(UnitState& state) { | |||
| 186 | 212 | ||
| 187 | dest[i] = dot; | 213 | dest[i] = dot; |
| 188 | } | 214 | } |
| 215 | Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest); | ||
| 189 | break; | 216 | break; |
| 190 | } | 217 | } |
| 191 | 218 | ||
| 192 | // Reciprocal | 219 | // Reciprocal |
| 193 | case OpCode::Id::RCP: | 220 | case OpCode::Id::RCP: |
| 194 | { | 221 | { |
| 222 | Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); | ||
| 223 | Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); | ||
| 195 | for (int i = 0; i < 4; ++i) { | 224 | for (int i = 0; i < 4; ++i) { |
| 196 | if (!swizzle.DestComponentEnabled(i)) | 225 | if (!swizzle.DestComponentEnabled(i)) |
| 197 | continue; | 226 | continue; |
| @@ -200,13 +229,15 @@ void RunInterpreter(UnitState& state) { | |||
| 200 | // TODO: I think this might be wrong... we should only use one component here | 229 | // TODO: I think this might be wrong... we should only use one component here |
| 201 | dest[i] = float24::FromFloat32(1.0f / src1[i].ToFloat32()); | 230 | dest[i] = float24::FromFloat32(1.0f / src1[i].ToFloat32()); |
| 202 | } | 231 | } |
| 203 | 232 | Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest); | |
| 204 | break; | 233 | break; |
| 205 | } | 234 | } |
| 206 | 235 | ||
| 207 | // Reciprocal Square Root | 236 | // Reciprocal Square Root |
| 208 | case OpCode::Id::RSQ: | 237 | case OpCode::Id::RSQ: |
| 209 | { | 238 | { |
| 239 | Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); | ||
| 240 | Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); | ||
| 210 | for (int i = 0; i < 4; ++i) { | 241 | for (int i = 0; i < 4; ++i) { |
| 211 | if (!swizzle.DestComponentEnabled(i)) | 242 | if (!swizzle.DestComponentEnabled(i)) |
| 212 | continue; | 243 | continue; |
| @@ -215,12 +246,13 @@ void RunInterpreter(UnitState& state) { | |||
| 215 | // TODO: I think this might be wrong... we should only use one component here | 246 | // TODO: I think this might be wrong... we should only use one component here |
| 216 | dest[i] = float24::FromFloat32(1.0f / sqrt(src1[i].ToFloat32())); | 247 | dest[i] = float24::FromFloat32(1.0f / sqrt(src1[i].ToFloat32())); |
| 217 | } | 248 | } |
| 218 | 249 | Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest); | |
| 219 | break; | 250 | break; |
| 220 | } | 251 | } |
| 221 | 252 | ||
| 222 | case OpCode::Id::MOVA: | 253 | case OpCode::Id::MOVA: |
| 223 | { | 254 | { |
| 255 | Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); | ||
| 224 | for (int i = 0; i < 2; ++i) { | 256 | for (int i = 0; i < 2; ++i) { |
| 225 | if (!swizzle.DestComponentEnabled(i)) | 257 | if (!swizzle.DestComponentEnabled(i)) |
| 226 | continue; | 258 | continue; |
| @@ -228,32 +260,41 @@ void RunInterpreter(UnitState& state) { | |||
| 228 | // TODO: Figure out how the rounding is done on hardware | 260 | // TODO: Figure out how the rounding is done on hardware |
| 229 | state.address_registers[i] = static_cast<s32>(src1[i].ToFloat32()); | 261 | state.address_registers[i] = static_cast<s32>(src1[i].ToFloat32()); |
| 230 | } | 262 | } |
| 231 | 263 | Record<DebugDataRecord::ADDR_REG_OUT>(state.debug, iteration, state.address_registers); | |
| 232 | break; | 264 | break; |
| 233 | } | 265 | } |
| 234 | 266 | ||
| 235 | case OpCode::Id::MOV: | 267 | case OpCode::Id::MOV: |
| 236 | { | 268 | { |
| 269 | Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); | ||
| 270 | Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); | ||
| 237 | for (int i = 0; i < 4; ++i) { | 271 | for (int i = 0; i < 4; ++i) { |
| 238 | if (!swizzle.DestComponentEnabled(i)) | 272 | if (!swizzle.DestComponentEnabled(i)) |
| 239 | continue; | 273 | continue; |
| 240 | 274 | ||
| 241 | dest[i] = src1[i]; | 275 | dest[i] = src1[i]; |
| 242 | } | 276 | } |
| 277 | Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest); | ||
| 243 | break; | 278 | break; |
| 244 | } | 279 | } |
| 245 | 280 | ||
| 246 | case OpCode::Id::SLT: | 281 | case OpCode::Id::SLT: |
| 247 | case OpCode::Id::SLTI: | 282 | case OpCode::Id::SLTI: |
| 283 | Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); | ||
| 284 | Record<DebugDataRecord::SRC2>(state.debug, iteration, src2); | ||
| 285 | Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); | ||
| 248 | for (int i = 0; i < 4; ++i) { | 286 | for (int i = 0; i < 4; ++i) { |
| 249 | if (!swizzle.DestComponentEnabled(i)) | 287 | if (!swizzle.DestComponentEnabled(i)) |
| 250 | continue; | 288 | continue; |
| 251 | 289 | ||
| 252 | dest[i] = (src1[i] < src2[i]) ? float24::FromFloat32(1.0f) : float24::FromFloat32(0.0f); | 290 | dest[i] = (src1[i] < src2[i]) ? float24::FromFloat32(1.0f) : float24::FromFloat32(0.0f); |
| 253 | } | 291 | } |
| 292 | Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest); | ||
| 254 | break; | 293 | break; |
| 255 | 294 | ||
| 256 | case OpCode::Id::CMP: | 295 | case OpCode::Id::CMP: |
| 296 | Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); | ||
| 297 | Record<DebugDataRecord::SRC2>(state.debug, iteration, src2); | ||
| 257 | for (int i = 0; i < 2; ++i) { | 298 | for (int i = 0; i < 2; ++i) { |
| 258 | // TODO: Can you restrict to one compare via dest masking? | 299 | // TODO: Can you restrict to one compare via dest masking? |
| 259 | 300 | ||
| @@ -261,27 +302,27 @@ void RunInterpreter(UnitState& state) { | |||
| 261 | auto op = (i == 0) ? compare_op.x.Value() : compare_op.y.Value(); | 302 | auto op = (i == 0) ? compare_op.x.Value() : compare_op.y.Value(); |
| 262 | 303 | ||
| 263 | switch (op) { | 304 | switch (op) { |
| 264 | case compare_op.Equal: | 305 | case Instruction::Common::CompareOpType::Equal: |
| 265 | state.conditional_code[i] = (src1[i] == src2[i]); | 306 | state.conditional_code[i] = (src1[i] == src2[i]); |
| 266 | break; | 307 | break; |
| 267 | 308 | ||
| 268 | case compare_op.NotEqual: | 309 | case Instruction::Common::CompareOpType::NotEqual: |
| 269 | state.conditional_code[i] = (src1[i] != src2[i]); | 310 | state.conditional_code[i] = (src1[i] != src2[i]); |
| 270 | break; | 311 | break; |
| 271 | 312 | ||
| 272 | case compare_op.LessThan: | 313 | case Instruction::Common::CompareOpType::LessThan: |
| 273 | state.conditional_code[i] = (src1[i] < src2[i]); | 314 | state.conditional_code[i] = (src1[i] < src2[i]); |
| 274 | break; | 315 | break; |
| 275 | 316 | ||
| 276 | case compare_op.LessEqual: | 317 | case Instruction::Common::CompareOpType::LessEqual: |
| 277 | state.conditional_code[i] = (src1[i] <= src2[i]); | 318 | state.conditional_code[i] = (src1[i] <= src2[i]); |
| 278 | break; | 319 | break; |
| 279 | 320 | ||
| 280 | case compare_op.GreaterThan: | 321 | case Instruction::Common::CompareOpType::GreaterThan: |
| 281 | state.conditional_code[i] = (src1[i] > src2[i]); | 322 | state.conditional_code[i] = (src1[i] > src2[i]); |
| 282 | break; | 323 | break; |
| 283 | 324 | ||
| 284 | case compare_op.GreaterEqual: | 325 | case Instruction::Common::CompareOpType::GreaterEqual: |
| 285 | state.conditional_code[i] = (src1[i] >= src2[i]); | 326 | state.conditional_code[i] = (src1[i] >= src2[i]); |
| 286 | break; | 327 | break; |
| 287 | 328 | ||
| @@ -290,6 +331,7 @@ void RunInterpreter(UnitState& state) { | |||
| 290 | break; | 331 | break; |
| 291 | } | 332 | } |
| 292 | } | 333 | } |
| 334 | Record<DebugDataRecord::CMP_RESULT>(state.debug, iteration, state.conditional_code); | ||
| 293 | break; | 335 | break; |
| 294 | 336 | ||
| 295 | default: | 337 | default: |
| @@ -359,12 +401,17 @@ void RunInterpreter(UnitState& state) { | |||
| 359 | : (instr.mad.dest.Value() < 0x20) ? &state.registers.temporary[instr.mad.dest.Value().GetIndex()][0] | 401 | : (instr.mad.dest.Value() < 0x20) ? &state.registers.temporary[instr.mad.dest.Value().GetIndex()][0] |
| 360 | : dummy_vec4_float24; | 402 | : dummy_vec4_float24; |
| 361 | 403 | ||
| 404 | Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); | ||
| 405 | Record<DebugDataRecord::SRC2>(state.debug, iteration, src2); | ||
| 406 | Record<DebugDataRecord::SRC3>(state.debug, iteration, src3); | ||
| 407 | Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); | ||
| 362 | for (int i = 0; i < 4; ++i) { | 408 | for (int i = 0; i < 4; ++i) { |
| 363 | if (!swizzle.DestComponentEnabled(i)) | 409 | if (!swizzle.DestComponentEnabled(i)) |
| 364 | continue; | 410 | continue; |
| 365 | 411 | ||
| 366 | dest[i] = src1[i] * src2[i] + src3[i]; | 412 | dest[i] = src1[i] * src2[i] + src3[i]; |
| 367 | } | 413 | } |
| 414 | Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest); | ||
| 368 | } else { | 415 | } else { |
| 369 | LOG_ERROR(HW_GPU, "Unhandled multiply-add instruction: 0x%02x (%s): 0x%08x", | 416 | LOG_ERROR(HW_GPU, "Unhandled multiply-add instruction: 0x%02x (%s): 0x%08x", |
| 370 | (int)instr.opcode.Value().EffectiveOpCode(), instr.opcode.Value().GetInfo().name, instr.hex); | 417 | (int)instr.opcode.Value().EffectiveOpCode(), instr.opcode.Value().GetInfo().name, instr.hex); |
| @@ -374,7 +421,7 @@ void RunInterpreter(UnitState& state) { | |||
| 374 | 421 | ||
| 375 | default: | 422 | default: |
| 376 | { | 423 | { |
| 377 | static auto evaluate_condition = [](const UnitState& state, bool refx, bool refy, Instruction::FlowControlType flow_control) { | 424 | static auto evaluate_condition = [](const UnitState<Debug>& state, bool refx, bool refy, Instruction::FlowControlType flow_control) { |
| 378 | bool results[2] = { refx == state.conditional_code[0], | 425 | bool results[2] = { refx == state.conditional_code[0], |
| 379 | refy == state.conditional_code[1] }; | 426 | refy == state.conditional_code[1] }; |
| 380 | 427 | ||
| @@ -400,12 +447,14 @@ void RunInterpreter(UnitState& state) { | |||
| 400 | break; | 447 | break; |
| 401 | 448 | ||
| 402 | case OpCode::Id::JMPC: | 449 | case OpCode::Id::JMPC: |
| 450 | Record<DebugDataRecord::COND_CMP_IN>(state.debug, iteration, state.conditional_code); | ||
| 403 | if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, instr.flow_control)) { | 451 | if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, instr.flow_control)) { |
| 404 | state.program_counter = instr.flow_control.dest_offset - 1; | 452 | state.program_counter = instr.flow_control.dest_offset - 1; |
| 405 | } | 453 | } |
| 406 | break; | 454 | break; |
| 407 | 455 | ||
| 408 | case OpCode::Id::JMPU: | 456 | case OpCode::Id::JMPU: |
| 457 | Record<DebugDataRecord::COND_BOOL_IN>(state.debug, iteration, uniforms.b[instr.flow_control.bool_uniform_id]); | ||
| 409 | if (uniforms.b[instr.flow_control.bool_uniform_id]) { | 458 | if (uniforms.b[instr.flow_control.bool_uniform_id]) { |
| 410 | state.program_counter = instr.flow_control.dest_offset - 1; | 459 | state.program_counter = instr.flow_control.dest_offset - 1; |
| 411 | } | 460 | } |
| @@ -419,6 +468,7 @@ void RunInterpreter(UnitState& state) { | |||
| 419 | break; | 468 | break; |
| 420 | 469 | ||
| 421 | case OpCode::Id::CALLU: | 470 | case OpCode::Id::CALLU: |
| 471 | Record<DebugDataRecord::COND_BOOL_IN>(state.debug, iteration, uniforms.b[instr.flow_control.bool_uniform_id]); | ||
| 422 | if (uniforms.b[instr.flow_control.bool_uniform_id]) { | 472 | if (uniforms.b[instr.flow_control.bool_uniform_id]) { |
| 423 | call(state, | 473 | call(state, |
| 424 | instr.flow_control.dest_offset, | 474 | instr.flow_control.dest_offset, |
| @@ -428,6 +478,7 @@ void RunInterpreter(UnitState& state) { | |||
| 428 | break; | 478 | break; |
| 429 | 479 | ||
| 430 | case OpCode::Id::CALLC: | 480 | case OpCode::Id::CALLC: |
| 481 | Record<DebugDataRecord::COND_CMP_IN>(state.debug, iteration, state.conditional_code); | ||
| 431 | if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, instr.flow_control)) { | 482 | if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, instr.flow_control)) { |
| 432 | call(state, | 483 | call(state, |
| 433 | instr.flow_control.dest_offset, | 484 | instr.flow_control.dest_offset, |
| @@ -440,6 +491,7 @@ void RunInterpreter(UnitState& state) { | |||
| 440 | break; | 491 | break; |
| 441 | 492 | ||
| 442 | case OpCode::Id::IFU: | 493 | case OpCode::Id::IFU: |
| 494 | Record<DebugDataRecord::COND_BOOL_IN>(state.debug, iteration, uniforms.b[instr.flow_control.bool_uniform_id]); | ||
| 443 | if (uniforms.b[instr.flow_control.bool_uniform_id]) { | 495 | if (uniforms.b[instr.flow_control.bool_uniform_id]) { |
| 444 | call(state, | 496 | call(state, |
| 445 | state.program_counter + 1, | 497 | state.program_counter + 1, |
| @@ -458,6 +510,7 @@ void RunInterpreter(UnitState& state) { | |||
| 458 | { | 510 | { |
| 459 | // TODO: Do we need to consider swizzlers here? | 511 | // TODO: Do we need to consider swizzlers here? |
| 460 | 512 | ||
| 513 | Record<DebugDataRecord::COND_CMP_IN>(state.debug, iteration, state.conditional_code); | ||
| 461 | if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, instr.flow_control)) { | 514 | if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, instr.flow_control)) { |
| 462 | call(state, | 515 | call(state, |
| 463 | state.program_counter + 1, | 516 | state.program_counter + 1, |
| @@ -475,14 +528,19 @@ void RunInterpreter(UnitState& state) { | |||
| 475 | 528 | ||
| 476 | case OpCode::Id::LOOP: | 529 | case OpCode::Id::LOOP: |
| 477 | { | 530 | { |
| 478 | state.address_registers[2] = uniforms.i[instr.flow_control.int_uniform_id].y; | 531 | Math::Vec4<u8> loop_param(uniforms.i[instr.flow_control.int_uniform_id].x, |
| 532 | uniforms.i[instr.flow_control.int_uniform_id].y, | ||
| 533 | uniforms.i[instr.flow_control.int_uniform_id].z, | ||
| 534 | uniforms.i[instr.flow_control.int_uniform_id].w); | ||
| 535 | state.address_registers[2] = loop_param.y; | ||
| 479 | 536 | ||
| 537 | Record<DebugDataRecord::LOOP_INT_IN>(state.debug, iteration, loop_param); | ||
| 480 | call(state, | 538 | call(state, |
| 481 | state.program_counter + 1, | 539 | state.program_counter + 1, |
| 482 | instr.flow_control.dest_offset - state.program_counter + 1, | 540 | instr.flow_control.dest_offset - state.program_counter + 1, |
| 483 | instr.flow_control.dest_offset + 1, | 541 | instr.flow_control.dest_offset + 1, |
| 484 | uniforms.i[instr.flow_control.int_uniform_id].x, | 542 | loop_param.x, |
| 485 | uniforms.i[instr.flow_control.int_uniform_id].z); | 543 | loop_param.z); |
| 486 | break; | 544 | break; |
| 487 | } | 545 | } |
| 488 | 546 | ||
| @@ -497,12 +555,14 @@ void RunInterpreter(UnitState& state) { | |||
| 497 | } | 555 | } |
| 498 | 556 | ||
| 499 | ++state.program_counter; | 557 | ++state.program_counter; |
| 500 | 558 | ++iteration; | |
| 501 | if (exit_loop) | ||
| 502 | break; | ||
| 503 | } | 559 | } |
| 504 | } | 560 | } |
| 505 | 561 | ||
| 562 | // Explicit instantiation | ||
| 563 | template void RunInterpreter(UnitState<false>& state); | ||
| 564 | template void RunInterpreter(UnitState<true>& state); | ||
| 565 | |||
| 506 | } // namespace | 566 | } // namespace |
| 507 | 567 | ||
| 508 | } // namespace | 568 | } // namespace |
diff --git a/src/video_core/shader/shader_interpreter.h b/src/video_core/shader/shader_interpreter.h index ad6e58e39..71bcad5ac 100644 --- a/src/video_core/shader/shader_interpreter.h +++ b/src/video_core/shader/shader_interpreter.h | |||
| @@ -12,7 +12,8 @@ namespace Pica { | |||
| 12 | 12 | ||
| 13 | namespace Shader { | 13 | namespace Shader { |
| 14 | 14 | ||
| 15 | void RunInterpreter(UnitState& state); | 15 | template<bool Debug> |
| 16 | void RunInterpreter(UnitState<Debug>& state); | ||
| 16 | 17 | ||
| 17 | } // namespace | 18 | } // namespace |
| 18 | 19 | ||
diff --git a/src/video_core/shader/shader_jit_x64.cpp b/src/video_core/shader/shader_jit_x64.cpp index ce47774d5..836942c6b 100644 --- a/src/video_core/shader/shader_jit_x64.cpp +++ b/src/video_core/shader/shader_jit_x64.cpp | |||
| @@ -141,7 +141,7 @@ void JitCompiler::Compile_SwizzleSrc(Instruction instr, unsigned src_num, Source | |||
| 141 | src_offset = src_reg.GetIndex() * sizeof(float24) * 4; | 141 | src_offset = src_reg.GetIndex() * sizeof(float24) * 4; |
| 142 | } else { | 142 | } else { |
| 143 | src_ptr = REGISTERS; | 143 | src_ptr = REGISTERS; |
| 144 | src_offset = UnitState::InputOffset(src_reg); | 144 | src_offset = UnitState<false>::InputOffset(src_reg); |
| 145 | } | 145 | } |
| 146 | 146 | ||
| 147 | unsigned operand_desc_id; | 147 | unsigned operand_desc_id; |
| @@ -217,11 +217,11 @@ void JitCompiler::Compile_DestEnable(Instruction instr,X64Reg src) { | |||
| 217 | // If all components are enabled, write the result to the destination register | 217 | // If all components are enabled, write the result to the destination register |
| 218 | if (swiz.dest_mask == NO_DEST_REG_MASK) { | 218 | if (swiz.dest_mask == NO_DEST_REG_MASK) { |
| 219 | // Store dest back to memory | 219 | // Store dest back to memory |
| 220 | MOVAPS(MDisp(REGISTERS, UnitState::OutputOffset(dest)), src); | 220 | MOVAPS(MDisp(REGISTERS, UnitState<false>::OutputOffset(dest)), src); |
| 221 | 221 | ||
| 222 | } else { | 222 | } else { |
| 223 | // Not all components are enabled, so mask the result when storing to the destination register... | 223 | // Not all components are enabled, so mask the result when storing to the destination register... |
| 224 | MOVAPS(SCRATCH, MDisp(REGISTERS, UnitState::OutputOffset(dest))); | 224 | MOVAPS(SCRATCH, MDisp(REGISTERS, UnitState<false>::OutputOffset(dest))); |
| 225 | 225 | ||
| 226 | if (Common::GetCPUCaps().sse4_1) { | 226 | if (Common::GetCPUCaps().sse4_1) { |
| 227 | u8 mask = ((swiz.dest_mask & 1) << 3) | ((swiz.dest_mask & 8) >> 3) | ((swiz.dest_mask & 2) << 1) | ((swiz.dest_mask & 4) >> 1); | 227 | u8 mask = ((swiz.dest_mask & 1) << 3) | ((swiz.dest_mask & 8) >> 3) | ((swiz.dest_mask & 2) << 1) | ((swiz.dest_mask & 4) >> 1); |
| @@ -240,7 +240,7 @@ void JitCompiler::Compile_DestEnable(Instruction instr,X64Reg src) { | |||
| 240 | } | 240 | } |
| 241 | 241 | ||
| 242 | // Store dest back to memory | 242 | // Store dest back to memory |
| 243 | MOVAPS(MDisp(REGISTERS, UnitState::OutputOffset(dest)), SCRATCH); | 243 | MOVAPS(MDisp(REGISTERS, UnitState<false>::OutputOffset(dest)), SCRATCH); |
| 244 | } | 244 | } |
| 245 | } | 245 | } |
| 246 | 246 | ||