diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/citra_qt/debugger/graphics_cmdlists.cpp | 42 | ||||
| -rw-r--r-- | src/citra_qt/debugger/graphics_vertex_shader.cpp | 256 | ||||
| -rw-r--r-- | src/citra_qt/debugger/graphics_vertex_shader.h | 51 | ||||
| -rw-r--r-- | src/core/loader/loader.cpp | 6 | ||||
| -rw-r--r-- | src/video_core/command_processor.cpp | 33 | ||||
| -rw-r--r-- | src/video_core/debug_utils/debug_utils.cpp | 99 | ||||
| -rw-r--r-- | src/video_core/debug_utils/debug_utils.h | 19 | ||||
| -rw-r--r-- | src/video_core/pica.h | 11 | ||||
| -rw-r--r-- | src/video_core/shader/shader.cpp | 55 | ||||
| -rw-r--r-- | src/video_core/shader/shader.h | 197 | ||||
| -rw-r--r-- | src/video_core/shader/shader_interpreter.cpp | 104 | ||||
| -rw-r--r-- | src/video_core/shader/shader_interpreter.h | 3 | ||||
| -rw-r--r-- | src/video_core/shader/shader_jit_x64.cpp | 8 |
13 files changed, 735 insertions, 149 deletions
diff --git a/src/citra_qt/debugger/graphics_cmdlists.cpp b/src/citra_qt/debugger/graphics_cmdlists.cpp index 29b4a04a0..35a3140b2 100644 --- a/src/citra_qt/debugger/graphics_cmdlists.cpp +++ b/src/citra_qt/debugger/graphics_cmdlists.cpp | |||
| @@ -175,29 +175,29 @@ int GPUCommandListModel::rowCount(const QModelIndex& parent) const { | |||
| 175 | } | 175 | } |
| 176 | 176 | ||
| 177 | int GPUCommandListModel::columnCount(const QModelIndex& parent) const { | 177 | int GPUCommandListModel::columnCount(const QModelIndex& parent) const { |
| 178 | return 3; | 178 | return 4; |
| 179 | } | 179 | } |
| 180 | 180 | ||
| 181 | QVariant GPUCommandListModel::data(const QModelIndex& index, int role) const { | 181 | QVariant GPUCommandListModel::data(const QModelIndex& index, int role) const { |
| 182 | if (!index.isValid()) | 182 | if (!index.isValid()) |
| 183 | return QVariant(); | 183 | return QVariant(); |
| 184 | 184 | ||
| 185 | const auto& writes = pica_trace.writes; | 185 | const auto& write = pica_trace.writes[index.row()]; |
| 186 | const Pica::CommandProcessor::CommandHeader cmd{writes[index.row()].Id()}; | ||
| 187 | const u32 val{writes[index.row()].Value()}; | ||
| 188 | 186 | ||
| 189 | if (role == Qt::DisplayRole) { | 187 | if (role == Qt::DisplayRole) { |
| 190 | QString content; | 188 | QString content; |
| 191 | switch ( index.column() ) { | 189 | switch ( index.column() ) { |
| 192 | case 0: | 190 | case 0: |
| 193 | return QString::fromLatin1(Pica::Regs::GetCommandName(cmd.cmd_id).c_str()); | 191 | return QString::fromLatin1(Pica::Regs::GetCommandName(write.cmd_id).c_str()); |
| 194 | case 1: | 192 | case 1: |
| 195 | return QString("%1").arg(cmd.cmd_id, 3, 16, QLatin1Char('0')); | 193 | return QString("%1").arg(write.cmd_id, 3, 16, QLatin1Char('0')); |
| 196 | case 2: | 194 | case 2: |
| 197 | return QString("%1").arg(val, 8, 16, QLatin1Char('0')); | 195 | return QString("%1").arg(write.mask, 4, 2, QLatin1Char('0')); |
| 196 | case 3: | ||
| 197 | return QString("%1").arg(write.value, 8, 16, QLatin1Char('0')); | ||
| 198 | } | 198 | } |
| 199 | } else if (role == CommandIdRole) { | 199 | } else if (role == CommandIdRole) { |
| 200 | return QVariant::fromValue<int>(cmd.cmd_id.Value()); | 200 | return QVariant::fromValue<int>(write.cmd_id); |
| 201 | } | 201 | } |
| 202 | 202 | ||
| 203 | return QVariant(); | 203 | return QVariant(); |
| @@ -213,6 +213,8 @@ QVariant GPUCommandListModel::headerData(int section, Qt::Orientation orientatio | |||
| 213 | case 1: | 213 | case 1: |
| 214 | return tr("Register"); | 214 | return tr("Register"); |
| 215 | case 2: | 215 | case 2: |
| 216 | return tr("Mask"); | ||
| 217 | case 3: | ||
| 216 | return tr("New Value"); | 218 | return tr("New Value"); |
| 217 | } | 219 | } |
| 218 | 220 | ||
| @@ -260,7 +262,7 @@ void GPUCommandListWidget::OnCommandDoubleClicked(const QModelIndex& index) { | |||
| 260 | } | 262 | } |
| 261 | 263 | ||
| 262 | void GPUCommandListWidget::SetCommandInfo(const QModelIndex& index) { | 264 | void GPUCommandListWidget::SetCommandInfo(const QModelIndex& index) { |
| 263 | QWidget* new_info_widget; | 265 | QWidget* new_info_widget = nullptr; |
| 264 | 266 | ||
| 265 | const unsigned int command_id = list_widget->model()->data(index, GPUCommandListModel::CommandIdRole).toUInt(); | 267 | const unsigned int command_id = list_widget->model()->data(index, GPUCommandListModel::CommandIdRole).toUInt(); |
| 266 | if (COMMAND_IN_RANGE(command_id, texture0) || | 268 | if (COMMAND_IN_RANGE(command_id, texture0) || |
| @@ -281,14 +283,15 @@ void GPUCommandListWidget::SetCommandInfo(const QModelIndex& index) { | |||
| 281 | auto info = Pica::DebugUtils::TextureInfo::FromPicaRegister(config, format); | 283 | auto info = Pica::DebugUtils::TextureInfo::FromPicaRegister(config, format); |
| 282 | u8* src = Memory::GetPhysicalPointer(config.GetPhysicalAddress()); | 284 | u8* src = Memory::GetPhysicalPointer(config.GetPhysicalAddress()); |
| 283 | new_info_widget = new TextureInfoWidget(src, info); | 285 | new_info_widget = new TextureInfoWidget(src, info); |
| 284 | } else { | ||
| 285 | new_info_widget = new QWidget; | ||
| 286 | } | 286 | } |
| 287 | 287 | if (command_info_widget) { | |
| 288 | widget()->layout()->removeWidget(command_info_widget); | 288 | delete command_info_widget; |
| 289 | delete command_info_widget; | 289 | command_info_widget = nullptr; |
| 290 | widget()->layout()->addWidget(new_info_widget); | 290 | } |
| 291 | command_info_widget = new_info_widget; | 291 | if (new_info_widget) { |
| 292 | widget()->layout()->addWidget(new_info_widget); | ||
| 293 | command_info_widget = new_info_widget; | ||
| 294 | } | ||
| 292 | } | 295 | } |
| 293 | #undef COMMAND_IN_RANGE | 296 | #undef COMMAND_IN_RANGE |
| 294 | 297 | ||
| @@ -300,7 +303,9 @@ GPUCommandListWidget::GPUCommandListWidget(QWidget* parent) : QDockWidget(tr("Pi | |||
| 300 | 303 | ||
| 301 | list_widget = new QTreeView; | 304 | list_widget = new QTreeView; |
| 302 | list_widget->setModel(model); | 305 | list_widget->setModel(model); |
| 303 | list_widget->setFont(QFont("monospace")); | 306 | QFont font("monospace"); |
| 307 | font.setStyleHint(QFont::Monospace); // Automatic fallback to a monospace font on on platforms without a font called "monospace" | ||
| 308 | list_widget->setFont(font); | ||
| 304 | list_widget->setRootIsDecorated(false); | 309 | list_widget->setRootIsDecorated(false); |
| 305 | list_widget->setUniformRowHeights(true); | 310 | list_widget->setUniformRowHeights(true); |
| 306 | 311 | ||
| @@ -324,7 +329,7 @@ GPUCommandListWidget::GPUCommandListWidget(QWidget* parent) : QDockWidget(tr("Pi | |||
| 324 | 329 | ||
| 325 | connect(copy_all, SIGNAL(clicked()), this, SLOT(CopyAllToClipboard())); | 330 | connect(copy_all, SIGNAL(clicked()), this, SLOT(CopyAllToClipboard())); |
| 326 | 331 | ||
| 327 | command_info_widget = new QWidget; | 332 | command_info_widget = nullptr; |
| 328 | 333 | ||
| 329 | QVBoxLayout* main_layout = new QVBoxLayout; | 334 | QVBoxLayout* main_layout = new QVBoxLayout; |
| 330 | main_layout->addWidget(list_widget); | 335 | main_layout->addWidget(list_widget); |
| @@ -334,7 +339,6 @@ GPUCommandListWidget::GPUCommandListWidget(QWidget* parent) : QDockWidget(tr("Pi | |||
| 334 | sub_layout->addWidget(copy_all); | 339 | sub_layout->addWidget(copy_all); |
| 335 | main_layout->addLayout(sub_layout); | 340 | main_layout->addLayout(sub_layout); |
| 336 | } | 341 | } |
| 337 | main_layout->addWidget(command_info_widget); | ||
| 338 | main_widget->setLayout(main_layout); | 342 | main_widget->setLayout(main_layout); |
| 339 | 343 | ||
| 340 | setWidget(main_widget); | 344 | setWidget(main_widget); |
diff --git a/src/citra_qt/debugger/graphics_vertex_shader.cpp b/src/citra_qt/debugger/graphics_vertex_shader.cpp index 302e22d7a..0c17edee0 100644 --- a/src/citra_qt/debugger/graphics_vertex_shader.cpp +++ b/src/citra_qt/debugger/graphics_vertex_shader.cpp | |||
| @@ -6,9 +6,16 @@ | |||
| 6 | #include <sstream> | 6 | #include <sstream> |
| 7 | 7 | ||
| 8 | #include <QBoxLayout> | 8 | #include <QBoxLayout> |
| 9 | #include <QFileDialog> | ||
| 10 | #include <QGroupBox> | ||
| 11 | #include <QLabel> | ||
| 12 | #include <QLineEdit> | ||
| 13 | #include <QPushButton> | ||
| 14 | #include <QSignalMapper> | ||
| 15 | #include <QSpinBox> | ||
| 9 | #include <QTreeView> | 16 | #include <QTreeView> |
| 10 | 17 | ||
| 11 | #include "video_core/shader/shader_interpreter.h" | 18 | #include "video_core/shader/shader.h" |
| 12 | 19 | ||
| 13 | #include "graphics_vertex_shader.h" | 20 | #include "graphics_vertex_shader.h" |
| 14 | 21 | ||
| @@ -17,7 +24,7 @@ using nihstro::Instruction; | |||
| 17 | using nihstro::SourceRegister; | 24 | using nihstro::SourceRegister; |
| 18 | using nihstro::SwizzlePattern; | 25 | using nihstro::SwizzlePattern; |
| 19 | 26 | ||
| 20 | GraphicsVertexShaderModel::GraphicsVertexShaderModel(QObject* parent): QAbstractItemModel(parent) { | 27 | GraphicsVertexShaderModel::GraphicsVertexShaderModel(GraphicsVertexShaderWidget* parent): QAbstractItemModel(parent), par(parent) { |
| 21 | 28 | ||
| 22 | } | 29 | } |
| 23 | 30 | ||
| @@ -34,7 +41,7 @@ int GraphicsVertexShaderModel::columnCount(const QModelIndex& parent) const { | |||
| 34 | } | 41 | } |
| 35 | 42 | ||
| 36 | int GraphicsVertexShaderModel::rowCount(const QModelIndex& parent) const { | 43 | int GraphicsVertexShaderModel::rowCount(const QModelIndex& parent) const { |
| 37 | return static_cast<int>(info.code.size()); | 44 | return static_cast<int>(par->info.code.size()); |
| 38 | } | 45 | } |
| 39 | 46 | ||
| 40 | QVariant GraphicsVertexShaderModel::headerData(int section, Qt::Orientation orientation, int role) const { | 47 | QVariant GraphicsVertexShaderModel::headerData(int section, Qt::Orientation orientation, int role) const { |
| @@ -62,21 +69,21 @@ QVariant GraphicsVertexShaderModel::data(const QModelIndex& index, int role) con | |||
| 62 | { | 69 | { |
| 63 | switch (index.column()) { | 70 | switch (index.column()) { |
| 64 | case 0: | 71 | case 0: |
| 65 | if (info.HasLabel(index.row())) | 72 | if (par->info.HasLabel(index.row())) |
| 66 | return QString::fromStdString(info.GetLabel(index.row())); | 73 | return QString::fromStdString(par->info.GetLabel(index.row())); |
| 67 | 74 | ||
| 68 | return QString("%1").arg(4*index.row(), 4, 16, QLatin1Char('0')); | 75 | return QString("%1").arg(4*index.row(), 4, 16, QLatin1Char('0')); |
| 69 | 76 | ||
| 70 | case 1: | 77 | case 1: |
| 71 | return QString("%1").arg(info.code[index.row()].hex, 8, 16, QLatin1Char('0')); | 78 | return QString("%1").arg(par->info.code[index.row()].hex, 8, 16, QLatin1Char('0')); |
| 72 | 79 | ||
| 73 | case 2: | 80 | case 2: |
| 74 | { | 81 | { |
| 75 | std::stringstream output; | 82 | std::stringstream output; |
| 76 | output.flags(std::ios::hex); | 83 | output.flags(std::ios::hex); |
| 77 | 84 | ||
| 78 | Instruction instr = info.code[index.row()]; | 85 | Instruction instr = par->info.code[index.row()]; |
| 79 | const SwizzlePattern& swizzle = info.swizzle_info[instr.common.operand_desc_id].pattern; | 86 | const SwizzlePattern& swizzle = par->info.swizzle_info[instr.common.operand_desc_id].pattern; |
| 80 | 87 | ||
| 81 | // longest known instruction name: "setemit " | 88 | // longest known instruction name: "setemit " |
| 82 | output << std::setw(8) << std::left << instr.opcode.Value().GetInfo().name; | 89 | output << std::setw(8) << std::left << instr.opcode.Value().GetInfo().name; |
| @@ -130,13 +137,13 @@ QVariant GraphicsVertexShaderModel::data(const QModelIndex& index, int role) con | |||
| 130 | 137 | ||
| 131 | print_input_indexed_compact(output, src1, swizzle.negate_src1, swizzle.SelectorToString(false).substr(0,1), instr.common.AddressRegisterName()); | 138 | print_input_indexed_compact(output, src1, swizzle.negate_src1, swizzle.SelectorToString(false).substr(0,1), instr.common.AddressRegisterName()); |
| 132 | output << " " << instr.common.compare_op.ToString(instr.common.compare_op.x) << " "; | 139 | output << " " << instr.common.compare_op.ToString(instr.common.compare_op.x) << " "; |
| 133 | print_input(output, src2, swizzle.negate_src2, swizzle.SelectorToString(false).substr(0,1)); | 140 | print_input(output, src2, swizzle.negate_src2, swizzle.SelectorToString(true).substr(0,1)); |
| 134 | 141 | ||
| 135 | output << ", "; | 142 | output << ", "; |
| 136 | 143 | ||
| 137 | print_input_indexed_compact(output, src1, swizzle.negate_src1, swizzle.SelectorToString(false).substr(1,1), instr.common.AddressRegisterName()); | 144 | print_input_indexed_compact(output, src1, swizzle.negate_src1, swizzle.SelectorToString(false).substr(1,1), instr.common.AddressRegisterName()); |
| 138 | output << " " << instr.common.compare_op.ToString(instr.common.compare_op.y) << " "; | 145 | output << " " << instr.common.compare_op.ToString(instr.common.compare_op.y) << " "; |
| 139 | print_input(output, src2, swizzle.negate_src2, swizzle.SelectorToString(false).substr(1,1)); | 146 | print_input(output, src2, swizzle.negate_src2, swizzle.SelectorToString(true).substr(1,1)); |
| 140 | 147 | ||
| 141 | break; | 148 | break; |
| 142 | } | 149 | } |
| @@ -167,7 +174,7 @@ QVariant GraphicsVertexShaderModel::data(const QModelIndex& index, int role) con | |||
| 167 | // TODO: In some cases, the Address Register is used as an index for SRC2 instead of SRC1 | 174 | // TODO: In some cases, the Address Register is used as an index for SRC2 instead of SRC1 |
| 168 | if (instr.opcode.Value().GetInfo().subtype & OpCode::Info::Src2) { | 175 | if (instr.opcode.Value().GetInfo().subtype & OpCode::Info::Src2) { |
| 169 | SourceRegister src2 = instr.common.GetSrc2(src_is_inverted); | 176 | SourceRegister src2 = instr.common.GetSrc2(src_is_inverted); |
| 170 | print_input(output, src2, swizzle.negate_src2, swizzle.SelectorToString(false)); | 177 | print_input(output, src2, swizzle.negate_src2, swizzle.SelectorToString(true)); |
| 171 | } | 178 | } |
| 172 | break; | 179 | break; |
| 173 | } | 180 | } |
| @@ -240,6 +247,18 @@ QVariant GraphicsVertexShaderModel::data(const QModelIndex& index, int role) con | |||
| 240 | case Qt::FontRole: | 247 | case Qt::FontRole: |
| 241 | return QFont("monospace"); | 248 | return QFont("monospace"); |
| 242 | 249 | ||
| 250 | case Qt::BackgroundRole: | ||
| 251 | // Highlight instructions which have no debug data associated to them | ||
| 252 | for (const auto& record : par->debug_data.records) | ||
| 253 | if (index.row() == record.instruction_offset) | ||
| 254 | return QVariant(); | ||
| 255 | |||
| 256 | return QBrush(QColor(255, 255, 127)); | ||
| 257 | |||
| 258 | |||
| 259 | // TODO: Draw arrows for each "reachable" instruction to visualize control flow | ||
| 260 | |||
| 261 | |||
| 243 | default: | 262 | default: |
| 244 | break; | 263 | break; |
| 245 | } | 264 | } |
| @@ -247,53 +266,232 @@ QVariant GraphicsVertexShaderModel::data(const QModelIndex& index, int role) con | |||
| 247 | return QVariant(); | 266 | return QVariant(); |
| 248 | } | 267 | } |
| 249 | 268 | ||
| 250 | void GraphicsVertexShaderModel::OnUpdate() | 269 | void GraphicsVertexShaderWidget::DumpShader() { |
| 251 | { | 270 | QString filename = QFileDialog::getSaveFileName(this, tr("Save Shader Dump"), "shader_dump.shbin", |
| 252 | beginResetModel(); | 271 | tr("Shader Binary (*.shbin)")); |
| 253 | |||
| 254 | info.Clear(); | ||
| 255 | |||
| 256 | for (auto instr : Pica::g_state.vs.program_code) | ||
| 257 | info.code.push_back({instr}); | ||
| 258 | 272 | ||
| 259 | for (auto pattern : Pica::g_state.vs.swizzle_data) | 273 | if (filename.isEmpty()) { |
| 260 | info.swizzle_info.push_back({pattern}); | 274 | // If the user canceled the dialog, don't dump anything. |
| 275 | return; | ||
| 276 | } | ||
| 261 | 277 | ||
| 262 | info.labels.insert({ Pica::g_state.regs.vs.main_offset, "main" }); | 278 | auto& setup = Pica::g_state.vs; |
| 279 | auto& config = Pica::g_state.regs.vs; | ||
| 263 | 280 | ||
| 264 | endResetModel(); | 281 | Pica::DebugUtils::DumpShader(filename.toStdString(), config, setup, Pica::g_state.regs.vs_output_attributes); |
| 265 | } | 282 | } |
| 266 | 283 | ||
| 267 | |||
| 268 | GraphicsVertexShaderWidget::GraphicsVertexShaderWidget(std::shared_ptr< Pica::DebugContext > debug_context, | 284 | GraphicsVertexShaderWidget::GraphicsVertexShaderWidget(std::shared_ptr< Pica::DebugContext > debug_context, |
| 269 | QWidget* parent) | 285 | QWidget* parent) |
| 270 | : BreakPointObserverDock(debug_context, "Pica Vertex Shader", parent) { | 286 | : BreakPointObserverDock(debug_context, "Pica Vertex Shader", parent) { |
| 271 | setObjectName("PicaVertexShader"); | 287 | setObjectName("PicaVertexShader"); |
| 272 | 288 | ||
| 273 | auto binary_model = new GraphicsVertexShaderModel(this); | 289 | auto input_data_mapper = new QSignalMapper(this); |
| 274 | auto binary_list = new QTreeView; | 290 | |
| 275 | binary_list->setModel(binary_model); | 291 | // TODO: Support inputting data in hexadecimal raw format |
| 292 | for (unsigned i = 0; i < ARRAY_SIZE(input_data); ++i) { | ||
| 293 | input_data[i] = new QLineEdit; | ||
| 294 | input_data[i]->setValidator(new QDoubleValidator(input_data[i])); | ||
| 295 | } | ||
| 296 | |||
| 297 | breakpoint_warning = new QLabel(tr("(data only available at VertexLoaded breakpoints)")); | ||
| 298 | |||
| 299 | // TODO: Add some button for jumping to the shader entry point | ||
| 300 | |||
| 301 | model = new GraphicsVertexShaderModel(this); | ||
| 302 | binary_list = new QTreeView; | ||
| 303 | binary_list->setModel(model); | ||
| 276 | binary_list->setRootIsDecorated(false); | 304 | binary_list->setRootIsDecorated(false); |
| 277 | binary_list->setAlternatingRowColors(true); | 305 | binary_list->setAlternatingRowColors(true); |
| 278 | 306 | ||
| 279 | connect(this, SIGNAL(Update()), binary_model, SLOT(OnUpdate())); | 307 | auto dump_shader = new QPushButton(QIcon::fromTheme("document-save"), tr("Dump")); |
| 308 | |||
| 309 | instruction_description = new QLabel; | ||
| 310 | |||
| 311 | cycle_index = new QSpinBox; | ||
| 312 | |||
| 313 | connect(this, SIGNAL(SelectCommand(const QModelIndex&, QItemSelectionModel::SelectionFlags)), | ||
| 314 | binary_list->selectionModel(), SLOT(select(const QModelIndex&, QItemSelectionModel::SelectionFlags))); | ||
| 315 | |||
| 316 | connect(dump_shader, SIGNAL(clicked()), this, SLOT(DumpShader())); | ||
| 317 | |||
| 318 | connect(cycle_index, SIGNAL(valueChanged(int)), this, SLOT(OnCycleIndexChanged(int))); | ||
| 319 | |||
| 320 | for (unsigned i = 0; i < ARRAY_SIZE(input_data); ++i) { | ||
| 321 | connect(input_data[i], SIGNAL(textEdited(const QString&)), input_data_mapper, SLOT(map())); | ||
| 322 | input_data_mapper->setMapping(input_data[i], i); | ||
| 323 | } | ||
| 324 | connect(input_data_mapper, SIGNAL(mapped(int)), this, SLOT(OnInputAttributeChanged(int))); | ||
| 280 | 325 | ||
| 281 | auto main_widget = new QWidget; | 326 | auto main_widget = new QWidget; |
| 282 | auto main_layout = new QVBoxLayout; | 327 | auto main_layout = new QVBoxLayout; |
| 283 | { | 328 | { |
| 329 | auto input_data_group = new QGroupBox(tr("Input Data")); | ||
| 330 | |||
| 331 | // For each vertex attribute, add a QHBoxLayout consisting of: | ||
| 332 | // - A QLabel denoting the source attribute index | ||
| 333 | // - Four QLineEdits for showing and manipulating attribute data | ||
| 334 | // - A QLabel denoting the shader input attribute index | ||
| 335 | auto sub_layout = new QVBoxLayout; | ||
| 336 | for (unsigned i = 0; i < 16; ++i) { | ||
| 337 | // Create an HBoxLayout to store the widgets used to specify a particular attribute | ||
| 338 | // and store it in a QWidget to allow for easy hiding and unhiding. | ||
| 339 | auto row_layout = new QHBoxLayout; | ||
| 340 | row_layout->addWidget(new QLabel(tr("Attribute %1").arg(i, 2))); | ||
| 341 | for (unsigned comp = 0; comp < 4; ++comp) | ||
| 342 | row_layout->addWidget(input_data[4 * i + comp]); | ||
| 343 | |||
| 344 | row_layout->addWidget(input_data_mapping[i] = new QLabel); | ||
| 345 | |||
| 346 | input_data_container[i] = new QWidget; | ||
| 347 | input_data_container[i]->setLayout(row_layout); | ||
| 348 | input_data_container[i]->hide(); | ||
| 349 | |||
| 350 | sub_layout->addWidget(input_data_container[i]); | ||
| 351 | } | ||
| 352 | |||
| 353 | sub_layout->addWidget(breakpoint_warning); | ||
| 354 | breakpoint_warning->hide(); | ||
| 355 | |||
| 356 | input_data_group->setLayout(sub_layout); | ||
| 357 | main_layout->addWidget(input_data_group); | ||
| 358 | } | ||
| 359 | { | ||
| 284 | auto sub_layout = new QHBoxLayout; | 360 | auto sub_layout = new QHBoxLayout; |
| 285 | sub_layout->addWidget(binary_list); | 361 | sub_layout->addWidget(binary_list); |
| 286 | main_layout->addLayout(sub_layout); | 362 | main_layout->addLayout(sub_layout); |
| 287 | } | 363 | } |
| 364 | main_layout->addWidget(dump_shader); | ||
| 365 | { | ||
| 366 | auto sub_layout = new QHBoxLayout; | ||
| 367 | sub_layout->addWidget(new QLabel(tr("Cycle Index:"))); | ||
| 368 | sub_layout->addWidget(cycle_index); | ||
| 369 | main_layout->addLayout(sub_layout); | ||
| 370 | } | ||
| 371 | main_layout->addWidget(instruction_description); | ||
| 372 | main_layout->addStretch(); | ||
| 288 | main_widget->setLayout(main_layout); | 373 | main_widget->setLayout(main_layout); |
| 289 | setWidget(main_widget); | 374 | setWidget(main_widget); |
| 375 | |||
| 376 | widget()->setEnabled(false); | ||
| 290 | } | 377 | } |
| 291 | 378 | ||
| 292 | void GraphicsVertexShaderWidget::OnBreakPointHit(Pica::DebugContext::Event event, void* data) { | 379 | void GraphicsVertexShaderWidget::OnBreakPointHit(Pica::DebugContext::Event event, void* data) { |
| 293 | emit Update(); | 380 | auto input = static_cast<Pica::Shader::InputVertex*>(data); |
| 381 | if (event == Pica::DebugContext::Event::VertexLoaded) { | ||
| 382 | Reload(true, data); | ||
| 383 | } else { | ||
| 384 | // No vertex data is retrievable => invalidate currently stored vertex data | ||
| 385 | Reload(true, nullptr); | ||
| 386 | } | ||
| 294 | widget()->setEnabled(true); | 387 | widget()->setEnabled(true); |
| 295 | } | 388 | } |
| 296 | 389 | ||
| 390 | void GraphicsVertexShaderWidget::Reload(bool replace_vertex_data, void* vertex_data) { | ||
| 391 | model->beginResetModel(); | ||
| 392 | |||
| 393 | if (replace_vertex_data) { | ||
| 394 | if (vertex_data) { | ||
| 395 | memcpy(&input_vertex, vertex_data, sizeof(input_vertex)); | ||
| 396 | for (unsigned attr = 0; attr < 16; ++attr) { | ||
| 397 | for (unsigned comp = 0; comp < 4; ++comp) { | ||
| 398 | input_data[4 * attr + comp]->setText(QString("%1").arg(input_vertex.attr[attr][comp].ToFloat32())); | ||
| 399 | } | ||
| 400 | } | ||
| 401 | breakpoint_warning->hide(); | ||
| 402 | } else { | ||
| 403 | for (unsigned attr = 0; attr < 16; ++attr) { | ||
| 404 | for (unsigned comp = 0; comp < 4; ++comp) { | ||
| 405 | input_data[4 * attr + comp]->setText(QString("???")); | ||
| 406 | } | ||
| 407 | } | ||
| 408 | breakpoint_warning->show(); | ||
| 409 | } | ||
| 410 | } | ||
| 411 | |||
| 412 | // Reload shader code | ||
| 413 | info.Clear(); | ||
| 414 | |||
| 415 | auto& shader_setup = Pica::g_state.vs; | ||
| 416 | auto& shader_config = Pica::g_state.regs.vs; | ||
| 417 | for (auto instr : shader_setup.program_code) | ||
| 418 | info.code.push_back({instr}); | ||
| 419 | |||
| 420 | for (auto pattern : shader_setup.swizzle_data) | ||
| 421 | info.swizzle_info.push_back({pattern}); | ||
| 422 | |||
| 423 | u32 entry_point = Pica::g_state.regs.vs.main_offset; | ||
| 424 | info.labels.insert({ entry_point, "main" }); | ||
| 425 | |||
| 426 | // Generate debug information | ||
| 427 | debug_data = Pica::Shader::ProduceDebugInfo(input_vertex, 1, shader_config, shader_setup); | ||
| 428 | |||
| 429 | // Reload widget state | ||
| 430 | |||
| 431 | // Only show input attributes which are used as input to the shader | ||
| 432 | for (unsigned int attr = 0; attr < 16; ++attr) { | ||
| 433 | input_data_container[attr]->setVisible(false); | ||
| 434 | } | ||
| 435 | for (unsigned int attr = 0; attr < Pica::g_state.regs.vertex_attributes.GetNumTotalAttributes(); ++attr) { | ||
| 436 | unsigned source_attr = shader_config.input_register_map.GetRegisterForAttribute(attr); | ||
| 437 | input_data_mapping[source_attr]->setText(QString("-> v%1").arg(attr)); | ||
| 438 | input_data_container[source_attr]->setVisible(true); | ||
| 439 | } | ||
| 440 | |||
| 441 | // Initialize debug info text for current cycle count | ||
| 442 | cycle_index->setMaximum(debug_data.records.size() - 1); | ||
| 443 | OnCycleIndexChanged(cycle_index->value()); | ||
| 444 | |||
| 445 | model->endResetModel(); | ||
| 446 | } | ||
| 447 | |||
| 297 | void GraphicsVertexShaderWidget::OnResumed() { | 448 | void GraphicsVertexShaderWidget::OnResumed() { |
| 298 | widget()->setEnabled(false); | 449 | widget()->setEnabled(false); |
| 299 | } | 450 | } |
| 451 | |||
| 452 | void GraphicsVertexShaderWidget::OnInputAttributeChanged(int index) { | ||
| 453 | float value = input_data[index]->text().toFloat(); | ||
| 454 | Reload(); | ||
| 455 | } | ||
| 456 | |||
| 457 | void GraphicsVertexShaderWidget::OnCycleIndexChanged(int index) { | ||
| 458 | QString text; | ||
| 459 | |||
| 460 | auto& record = debug_data.records[index]; | ||
| 461 | if (record.mask & Pica::Shader::DebugDataRecord::SRC1) | ||
| 462 | text += tr("SRC1: %1, %2, %3, %4\n").arg(record.src1.x.ToFloat32()).arg(record.src1.y.ToFloat32()).arg(record.src1.z.ToFloat32()).arg(record.src1.w.ToFloat32()); | ||
| 463 | if (record.mask & Pica::Shader::DebugDataRecord::SRC2) | ||
| 464 | text += tr("SRC2: %1, %2, %3, %4\n").arg(record.src2.x.ToFloat32()).arg(record.src2.y.ToFloat32()).arg(record.src2.z.ToFloat32()).arg(record.src2.w.ToFloat32()); | ||
| 465 | if (record.mask & Pica::Shader::DebugDataRecord::SRC3) | ||
| 466 | text += tr("SRC3: %1, %2, %3, %4\n").arg(record.src3.x.ToFloat32()).arg(record.src3.y.ToFloat32()).arg(record.src3.z.ToFloat32()).arg(record.src3.w.ToFloat32()); | ||
| 467 | if (record.mask & Pica::Shader::DebugDataRecord::DEST_IN) | ||
| 468 | text += tr("DEST_IN: %1, %2, %3, %4\n").arg(record.dest_in.x.ToFloat32()).arg(record.dest_in.y.ToFloat32()).arg(record.dest_in.z.ToFloat32()).arg(record.dest_in.w.ToFloat32()); | ||
| 469 | if (record.mask & Pica::Shader::DebugDataRecord::DEST_OUT) | ||
| 470 | text += tr("DEST_OUT: %1, %2, %3, %4\n").arg(record.dest_out.x.ToFloat32()).arg(record.dest_out.y.ToFloat32()).arg(record.dest_out.z.ToFloat32()).arg(record.dest_out.w.ToFloat32()); | ||
| 471 | |||
| 472 | if (record.mask & Pica::Shader::DebugDataRecord::ADDR_REG_OUT) | ||
| 473 | text += tr("Addres Registers: %1, %2\n").arg(record.address_registers[0]).arg(record.address_registers[1]); | ||
| 474 | if (record.mask & Pica::Shader::DebugDataRecord::CMP_RESULT) | ||
| 475 | text += tr("Compare Result: %1, %2\n").arg(record.conditional_code[0] ? "true" : "false").arg(record.conditional_code[1] ? "true" : "false"); | ||
| 476 | |||
| 477 | if (record.mask & Pica::Shader::DebugDataRecord::COND_BOOL_IN) | ||
| 478 | text += tr("Static Condition: %1\n").arg(record.cond_bool ? "true" : "false"); | ||
| 479 | if (record.mask & Pica::Shader::DebugDataRecord::COND_CMP_IN) | ||
| 480 | text += tr("Dynamic Conditions: %1, %2\n").arg(record.cond_cmp[0] ? "true" : "false").arg(record.cond_cmp[1] ? "true" : "false"); | ||
| 481 | if (record.mask & Pica::Shader::DebugDataRecord::LOOP_INT_IN) | ||
| 482 | text += tr("Loop Parameters: %1 (repeats), %2 (initializer), %3 (increment), %4\n").arg(record.loop_int.x).arg(record.loop_int.y).arg(record.loop_int.z).arg(record.loop_int.w); | ||
| 483 | |||
| 484 | text += tr("Instruction offset: 0x%1").arg(4 * record.instruction_offset, 4, 16, QLatin1Char('0')); | ||
| 485 | if (record.mask & Pica::Shader::DebugDataRecord::NEXT_INSTR) { | ||
| 486 | text += tr(" -> 0x%2").arg(4 * record.next_instruction, 4, 16, QLatin1Char('0')); | ||
| 487 | } else { | ||
| 488 | text += tr(" (last instruction)"); | ||
| 489 | } | ||
| 490 | |||
| 491 | instruction_description->setText(text); | ||
| 492 | |||
| 493 | // Scroll to current instruction | ||
| 494 | const QModelIndex& instr_index = model->index(record.instruction_offset, 0); | ||
| 495 | emit SelectCommand(instr_index, QItemSelectionModel::ClearAndSelect | QItemSelectionModel::Rows); | ||
| 496 | binary_list->scrollTo(instr_index, QAbstractItemView::EnsureVisible); | ||
| 497 | } | ||
diff --git a/src/citra_qt/debugger/graphics_vertex_shader.h b/src/citra_qt/debugger/graphics_vertex_shader.h index 38339dc05..1b3f1f7ec 100644 --- a/src/citra_qt/debugger/graphics_vertex_shader.h +++ b/src/citra_qt/debugger/graphics_vertex_shader.h | |||
| @@ -10,11 +10,18 @@ | |||
| 10 | 10 | ||
| 11 | #include "nihstro/parser_shbin.h" | 11 | #include "nihstro/parser_shbin.h" |
| 12 | 12 | ||
| 13 | #include "video_core/shader/shader.h" | ||
| 14 | |||
| 15 | class QLabel; | ||
| 16 | class QSpinBox; | ||
| 17 | |||
| 18 | class GraphicsVertexShaderWidget; | ||
| 19 | |||
| 13 | class GraphicsVertexShaderModel : public QAbstractItemModel { | 20 | class GraphicsVertexShaderModel : public QAbstractItemModel { |
| 14 | Q_OBJECT | 21 | Q_OBJECT |
| 15 | 22 | ||
| 16 | public: | 23 | public: |
| 17 | GraphicsVertexShaderModel(QObject* parent); | 24 | GraphicsVertexShaderModel(GraphicsVertexShaderWidget* parent); |
| 18 | 25 | ||
| 19 | QModelIndex index(int row, int column, const QModelIndex& parent = QModelIndex()) const override; | 26 | QModelIndex index(int row, int column, const QModelIndex& parent = QModelIndex()) const override; |
| 20 | QModelIndex parent(const QModelIndex& child) const override; | 27 | QModelIndex parent(const QModelIndex& child) const override; |
| @@ -23,11 +30,10 @@ public: | |||
| 23 | QVariant data(const QModelIndex& index, int role = Qt::DisplayRole) const override; | 30 | QVariant data(const QModelIndex& index, int role = Qt::DisplayRole) const override; |
| 24 | QVariant headerData(int section, Qt::Orientation orientation, int role = Qt::DisplayRole) const override; | 31 | QVariant headerData(int section, Qt::Orientation orientation, int role = Qt::DisplayRole) const override; |
| 25 | 32 | ||
| 26 | public slots: | ||
| 27 | void OnUpdate(); | ||
| 28 | |||
| 29 | private: | 33 | private: |
| 30 | nihstro::ShaderInfo info; | 34 | GraphicsVertexShaderWidget* par; |
| 35 | |||
| 36 | friend class GraphicsVertexShaderWidget; | ||
| 31 | }; | 37 | }; |
| 32 | 38 | ||
| 33 | class GraphicsVertexShaderWidget : public BreakPointObserverDock { | 39 | class GraphicsVertexShaderWidget : public BreakPointObserverDock { |
| @@ -43,9 +49,42 @@ private slots: | |||
| 43 | void OnBreakPointHit(Pica::DebugContext::Event event, void* data) override; | 49 | void OnBreakPointHit(Pica::DebugContext::Event event, void* data) override; |
| 44 | void OnResumed() override; | 50 | void OnResumed() override; |
| 45 | 51 | ||
| 52 | void OnInputAttributeChanged(int index); | ||
| 53 | |||
| 54 | void OnCycleIndexChanged(int index); | ||
| 55 | |||
| 56 | void DumpShader(); | ||
| 57 | |||
| 58 | /** | ||
| 59 | * Reload widget based on the current PICA200 state | ||
| 60 | * @param replace_vertex_data If true, invalidate all current vertex data | ||
| 61 | * @param vertex_data New vertex data to use, as passed to OnBreakPointHit. May be nullptr to specify that no valid vertex data can be retrieved currently. Only used if replace_vertex_data is true. | ||
| 62 | */ | ||
| 63 | void Reload(bool replace_vertex_data = false, void* vertex_data = nullptr); | ||
| 64 | |||
| 65 | |||
| 46 | signals: | 66 | signals: |
| 47 | void Update(); | 67 | // Call this to change the current command selection in the disassembly view |
| 68 | void SelectCommand(const QModelIndex&, QItemSelectionModel::SelectionFlags); | ||
| 48 | 69 | ||
| 49 | private: | 70 | private: |
| 71 | QLabel* instruction_description; | ||
| 72 | QTreeView* binary_list; | ||
| 73 | GraphicsVertexShaderModel* model; | ||
| 74 | |||
| 75 | /// TODO: Move these into a single struct | ||
| 76 | std::array<QLineEdit*, 4*16> input_data; // A text box for each of the 4 components of up to 16 vertex attributes | ||
| 77 | std::array<QWidget*, 16> input_data_container; // QWidget containing the QLayout containing each vertex attribute | ||
| 78 | std::array<QLabel*, 16> input_data_mapping; // A QLabel denoting the shader input attribute which the vertex attribute maps to | ||
| 79 | |||
| 80 | // Text to be shown when input vertex data is not retrievable | ||
| 81 | QLabel* breakpoint_warning; | ||
| 82 | |||
| 83 | QSpinBox* cycle_index; | ||
| 84 | |||
| 85 | nihstro::ShaderInfo info; | ||
| 86 | Pica::Shader::DebugData<true> debug_data; | ||
| 87 | Pica::Shader::InputVertex input_vertex; | ||
| 50 | 88 | ||
| 89 | friend class GraphicsVertexShaderModel; | ||
| 51 | }; | 90 | }; |
diff --git a/src/core/loader/loader.cpp b/src/core/loader/loader.cpp index f5b349a77..062291006 100644 --- a/src/core/loader/loader.cpp +++ b/src/core/loader/loader.cpp | |||
| @@ -77,6 +77,8 @@ static const char* GetFileTypeString(FileType type) { | |||
| 77 | return "NCSD"; | 77 | return "NCSD"; |
| 78 | case FileType::CXI: | 78 | case FileType::CXI: |
| 79 | return "NCCH"; | 79 | return "NCCH"; |
| 80 | case FileType::CIA: | ||
| 81 | return "CIA"; | ||
| 80 | case FileType::ELF: | 82 | case FileType::ELF: |
| 81 | return "ELF"; | 83 | return "ELF"; |
| 82 | case FileType::THREEDSX: | 84 | case FileType::THREEDSX: |
| @@ -134,6 +136,10 @@ ResultStatus LoadFile(const std::string& filename) { | |||
| 134 | break; | 136 | break; |
| 135 | } | 137 | } |
| 136 | 138 | ||
| 139 | // CIA file format... | ||
| 140 | case FileType::CIA: | ||
| 141 | return ResultStatus::ErrorNotImplemented; | ||
| 142 | |||
| 137 | // Error occurred durring IdentifyFile... | 143 | // Error occurred durring IdentifyFile... |
| 138 | case FileType::Error: | 144 | case FileType::Error: |
| 139 | 145 | ||
diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp index 374c4748d..d82e20f86 100644 --- a/src/video_core/command_processor.cpp +++ b/src/video_core/command_processor.cpp | |||
| @@ -35,7 +35,15 @@ static u32 default_attr_write_buffer[3]; | |||
| 35 | 35 | ||
| 36 | Common::Profiling::TimingCategory category_drawing("Drawing"); | 36 | Common::Profiling::TimingCategory category_drawing("Drawing"); |
| 37 | 37 | ||
| 38 | static inline void WritePicaReg(u32 id, u32 value, u32 mask) { | 38 | // Expand a 4-bit mask to 4-byte mask, e.g. 0b0101 -> 0x00FF00FF |
| 39 | static const u32 expand_bits_to_bytes[] = { | ||
| 40 | 0x00000000, 0x000000ff, 0x0000ff00, 0x0000ffff, | ||
| 41 | 0x00ff0000, 0x00ff00ff, 0x00ffff00, 0x00ffffff, | ||
| 42 | 0xff000000, 0xff0000ff, 0xff00ff00, 0xff00ffff, | ||
| 43 | 0xffff0000, 0xffff00ff, 0xffffff00, 0xffffffff | ||
| 44 | }; | ||
| 45 | |||
| 46 | static void WritePicaReg(u32 id, u32 value, u32 mask) { | ||
| 39 | auto& regs = g_state.regs; | 47 | auto& regs = g_state.regs; |
| 40 | 48 | ||
| 41 | if (id >= regs.NumIds()) | 49 | if (id >= regs.NumIds()) |
| @@ -47,13 +55,16 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) { | |||
| 47 | 55 | ||
| 48 | // TODO: Figure out how register masking acts on e.g. vs.uniform_setup.set_value | 56 | // TODO: Figure out how register masking acts on e.g. vs.uniform_setup.set_value |
| 49 | u32 old_value = regs[id]; | 57 | u32 old_value = regs[id]; |
| 50 | regs[id] = (old_value & ~mask) | (value & mask); | 58 | |
| 59 | const u32 write_mask = expand_bits_to_bytes[mask]; | ||
| 60 | |||
| 61 | regs[id] = (old_value & ~write_mask) | (value & write_mask); | ||
| 62 | |||
| 63 | DebugUtils::OnPicaRegWrite({ (u16)id, (u16)mask, regs[id] }); | ||
| 51 | 64 | ||
| 52 | if (g_debug_context) | 65 | if (g_debug_context) |
| 53 | g_debug_context->OnEvent(DebugContext::Event::PicaCommandLoaded, reinterpret_cast<void*>(&id)); | 66 | g_debug_context->OnEvent(DebugContext::Event::PicaCommandLoaded, reinterpret_cast<void*>(&id)); |
| 54 | 67 | ||
| 55 | DebugUtils::OnPicaRegWrite(id, regs[id]); | ||
| 56 | |||
| 57 | switch(id) { | 68 | switch(id) { |
| 58 | // Trigger IRQ | 69 | // Trigger IRQ |
| 59 | case PICA_REG_INDEX(trigger_irq): | 70 | case PICA_REG_INDEX(trigger_irq): |
| @@ -215,7 +226,7 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) { | |||
| 215 | unsigned int vertex_cache_pos = 0; | 226 | unsigned int vertex_cache_pos = 0; |
| 216 | vertex_cache_ids.fill(-1); | 227 | vertex_cache_ids.fill(-1); |
| 217 | 228 | ||
| 218 | Shader::UnitState shader_unit; | 229 | Shader::UnitState<false> shader_unit; |
| 219 | Shader::Setup(shader_unit); | 230 | Shader::Setup(shader_unit); |
| 220 | 231 | ||
| 221 | for (unsigned int index = 0; index < regs.num_vertices; ++index) | 232 | for (unsigned int index = 0; index < regs.num_vertices; ++index) |
| @@ -469,13 +480,6 @@ void ProcessCommandList(const u32* list, u32 size) { | |||
| 469 | g_state.cmd_list.length = size / sizeof(u32); | 480 | g_state.cmd_list.length = size / sizeof(u32); |
| 470 | 481 | ||
| 471 | while (g_state.cmd_list.current_ptr < g_state.cmd_list.head_ptr + g_state.cmd_list.length) { | 482 | while (g_state.cmd_list.current_ptr < g_state.cmd_list.head_ptr + g_state.cmd_list.length) { |
| 472 | // Expand a 4-bit mask to 4-byte mask, e.g. 0b0101 -> 0x00FF00FF | ||
| 473 | static const u32 expand_bits_to_bytes[] = { | ||
| 474 | 0x00000000, 0x000000ff, 0x0000ff00, 0x0000ffff, | ||
| 475 | 0x00ff0000, 0x00ff00ff, 0x00ffff00, 0x00ffffff, | ||
| 476 | 0xff000000, 0xff0000ff, 0xff00ff00, 0xff00ffff, | ||
| 477 | 0xffff0000, 0xffff00ff, 0xffffff00, 0xffffffff | ||
| 478 | }; | ||
| 479 | 483 | ||
| 480 | // Align read pointer to 8 bytes | 484 | // Align read pointer to 8 bytes |
| 481 | if ((g_state.cmd_list.head_ptr - g_state.cmd_list.current_ptr) % 2 != 0) | 485 | if ((g_state.cmd_list.head_ptr - g_state.cmd_list.current_ptr) % 2 != 0) |
| @@ -483,14 +487,13 @@ void ProcessCommandList(const u32* list, u32 size) { | |||
| 483 | 487 | ||
| 484 | u32 value = *g_state.cmd_list.current_ptr++; | 488 | u32 value = *g_state.cmd_list.current_ptr++; |
| 485 | const CommandHeader header = { *g_state.cmd_list.current_ptr++ }; | 489 | const CommandHeader header = { *g_state.cmd_list.current_ptr++ }; |
| 486 | const u32 write_mask = expand_bits_to_bytes[header.parameter_mask]; | ||
| 487 | u32 cmd = header.cmd_id; | 490 | u32 cmd = header.cmd_id; |
| 488 | 491 | ||
| 489 | WritePicaReg(cmd, value, write_mask); | 492 | WritePicaReg(cmd, value, header.parameter_mask); |
| 490 | 493 | ||
| 491 | for (unsigned i = 0; i < header.extra_data_length; ++i) { | 494 | for (unsigned i = 0; i < header.extra_data_length; ++i) { |
| 492 | u32 cmd = header.cmd_id + (header.group_commands ? i + 1 : 0); | 495 | u32 cmd = header.cmd_id + (header.group_commands ? i + 1 : 0); |
| 493 | WritePicaReg(cmd, *g_state.cmd_list.current_ptr++, write_mask); | 496 | WritePicaReg(cmd, *g_state.cmd_list.current_ptr++, header.parameter_mask); |
| 494 | } | 497 | } |
| 495 | } | 498 | } |
| 496 | } | 499 | } |
diff --git a/src/video_core/debug_utils/debug_utils.cpp b/src/video_core/debug_utils/debug_utils.cpp index 6d6b65286..8ad77f0c8 100644 --- a/src/video_core/debug_utils/debug_utils.cpp +++ b/src/video_core/debug_utils/debug_utils.cpp | |||
| @@ -4,9 +4,10 @@ | |||
| 4 | 4 | ||
| 5 | #include <algorithm> | 5 | #include <algorithm> |
| 6 | #include <condition_variable> | 6 | #include <condition_variable> |
| 7 | #include <cstring> | ||
| 8 | #include <fstream> | ||
| 7 | #include <list> | 9 | #include <list> |
| 8 | #include <map> | 10 | #include <map> |
| 9 | #include <fstream> | ||
| 10 | #include <mutex> | 11 | #include <mutex> |
| 11 | #include <string> | 12 | #include <string> |
| 12 | 13 | ||
| @@ -14,6 +15,7 @@ | |||
| 14 | #include <png.h> | 15 | #include <png.h> |
| 15 | #endif | 16 | #endif |
| 16 | 17 | ||
| 18 | #include <nihstro/float24.h> | ||
| 17 | #include <nihstro/shader_binary.h> | 19 | #include <nihstro/shader_binary.h> |
| 18 | 20 | ||
| 19 | #include "common/assert.h" | 21 | #include "common/assert.h" |
| @@ -63,7 +65,7 @@ void DebugContext::OnEvent(Event event, void* data) { | |||
| 63 | 65 | ||
| 64 | void DebugContext::Resume() { | 66 | void DebugContext::Resume() { |
| 65 | { | 67 | { |
| 66 | std::unique_lock<std::mutex> lock(breakpoint_mutex); | 68 | std::lock_guard<std::mutex> lock(breakpoint_mutex); |
| 67 | 69 | ||
| 68 | // Tell all observers that we are about to resume | 70 | // Tell all observers that we are about to resume |
| 69 | for (auto& breakpoint_observer : breakpoint_observers) { | 71 | for (auto& breakpoint_observer : breakpoint_observers) { |
| @@ -110,8 +112,7 @@ void GeometryDumper::Dump() { | |||
| 110 | } | 112 | } |
| 111 | 113 | ||
| 112 | 114 | ||
| 113 | void DumpShader(const u32* binary_data, u32 binary_size, const u32* swizzle_data, u32 swizzle_size, | 115 | void DumpShader(const std::string& filename, const Regs::ShaderConfig& config, const State::ShaderSetup& setup, const Regs::VSOutputAttributes* output_attributes) |
| 114 | u32 main_offset, const Regs::VSOutputAttributes* output_attributes) | ||
| 115 | { | 116 | { |
| 116 | struct StuffToWrite { | 117 | struct StuffToWrite { |
| 117 | u8* pointer; | 118 | u8* pointer; |
| @@ -131,11 +132,14 @@ void DumpShader(const u32* binary_data, u32 binary_size, const u32* swizzle_data | |||
| 131 | // into shbin format (separate type and component mask). | 132 | // into shbin format (separate type and component mask). |
| 132 | union OutputRegisterInfo { | 133 | union OutputRegisterInfo { |
| 133 | enum Type : u64 { | 134 | enum Type : u64 { |
| 134 | POSITION = 0, | 135 | POSITION = 0, |
| 135 | COLOR = 2, | 136 | QUATERNION = 1, |
| 136 | TEXCOORD0 = 3, | 137 | COLOR = 2, |
| 137 | TEXCOORD1 = 5, | 138 | TEXCOORD0 = 3, |
| 138 | TEXCOORD2 = 6, | 139 | TEXCOORD1 = 5, |
| 140 | TEXCOORD2 = 6, | ||
| 141 | |||
| 142 | VIEW = 8, | ||
| 139 | }; | 143 | }; |
| 140 | 144 | ||
| 141 | BitField< 0, 64, u64> hex; | 145 | BitField< 0, 64, u64> hex; |
| @@ -157,6 +161,10 @@ void DumpShader(const u32* binary_data, u32 binary_size, const u32* swizzle_data | |||
| 157 | { OutputAttributes::POSITION_Y, { OutputRegisterInfo::POSITION, 2} }, | 161 | { OutputAttributes::POSITION_Y, { OutputRegisterInfo::POSITION, 2} }, |
| 158 | { OutputAttributes::POSITION_Z, { OutputRegisterInfo::POSITION, 4} }, | 162 | { OutputAttributes::POSITION_Z, { OutputRegisterInfo::POSITION, 4} }, |
| 159 | { OutputAttributes::POSITION_W, { OutputRegisterInfo::POSITION, 8} }, | 163 | { OutputAttributes::POSITION_W, { OutputRegisterInfo::POSITION, 8} }, |
| 164 | { OutputAttributes::QUATERNION_X, { OutputRegisterInfo::QUATERNION, 1} }, | ||
| 165 | { OutputAttributes::QUATERNION_Y, { OutputRegisterInfo::QUATERNION, 2} }, | ||
| 166 | { OutputAttributes::QUATERNION_Z, { OutputRegisterInfo::QUATERNION, 4} }, | ||
| 167 | { OutputAttributes::QUATERNION_W, { OutputRegisterInfo::QUATERNION, 8} }, | ||
| 160 | { OutputAttributes::COLOR_R, { OutputRegisterInfo::COLOR, 1} }, | 168 | { OutputAttributes::COLOR_R, { OutputRegisterInfo::COLOR, 1} }, |
| 161 | { OutputAttributes::COLOR_G, { OutputRegisterInfo::COLOR, 2} }, | 169 | { OutputAttributes::COLOR_G, { OutputRegisterInfo::COLOR, 2} }, |
| 162 | { OutputAttributes::COLOR_B, { OutputRegisterInfo::COLOR, 4} }, | 170 | { OutputAttributes::COLOR_B, { OutputRegisterInfo::COLOR, 4} }, |
| @@ -166,7 +174,10 @@ void DumpShader(const u32* binary_data, u32 binary_size, const u32* swizzle_data | |||
| 166 | { OutputAttributes::TEXCOORD1_U, { OutputRegisterInfo::TEXCOORD1, 1} }, | 174 | { OutputAttributes::TEXCOORD1_U, { OutputRegisterInfo::TEXCOORD1, 1} }, |
| 167 | { OutputAttributes::TEXCOORD1_V, { OutputRegisterInfo::TEXCOORD1, 2} }, | 175 | { OutputAttributes::TEXCOORD1_V, { OutputRegisterInfo::TEXCOORD1, 2} }, |
| 168 | { OutputAttributes::TEXCOORD2_U, { OutputRegisterInfo::TEXCOORD2, 1} }, | 176 | { OutputAttributes::TEXCOORD2_U, { OutputRegisterInfo::TEXCOORD2, 1} }, |
| 169 | { OutputAttributes::TEXCOORD2_V, { OutputRegisterInfo::TEXCOORD2, 2} } | 177 | { OutputAttributes::TEXCOORD2_V, { OutputRegisterInfo::TEXCOORD2, 2} }, |
| 178 | { OutputAttributes::VIEW_X, { OutputRegisterInfo::VIEW, 1} }, | ||
| 179 | { OutputAttributes::VIEW_Y, { OutputRegisterInfo::VIEW, 2} }, | ||
| 180 | { OutputAttributes::VIEW_Z, { OutputRegisterInfo::VIEW, 4} } | ||
| 170 | }; | 181 | }; |
| 171 | 182 | ||
| 172 | for (const auto& semantic : std::vector<OutputAttributes::Semantic>{ | 183 | for (const auto& semantic : std::vector<OutputAttributes::Semantic>{ |
| @@ -221,28 +232,69 @@ void DumpShader(const u32* binary_data, u32 binary_size, const u32* swizzle_data | |||
| 221 | 232 | ||
| 222 | // TODO: Reduce the amount of binary code written to relevant portions | 233 | // TODO: Reduce the amount of binary code written to relevant portions |
| 223 | dvlp.binary_offset = write_offset - dvlp_offset; | 234 | dvlp.binary_offset = write_offset - dvlp_offset; |
| 224 | dvlp.binary_size_words = binary_size; | 235 | dvlp.binary_size_words = setup.program_code.size(); |
| 225 | QueueForWriting((u8*)binary_data, binary_size * sizeof(u32)); | 236 | QueueForWriting((u8*)setup.program_code.data(), setup.program_code.size() * sizeof(u32)); |
| 226 | 237 | ||
| 227 | dvlp.swizzle_info_offset = write_offset - dvlp_offset; | 238 | dvlp.swizzle_info_offset = write_offset - dvlp_offset; |
| 228 | dvlp.swizzle_info_num_entries = swizzle_size; | 239 | dvlp.swizzle_info_num_entries = setup.swizzle_data.size(); |
| 229 | u32 dummy = 0; | 240 | u32 dummy = 0; |
| 230 | for (unsigned int i = 0; i < swizzle_size; ++i) { | 241 | for (unsigned int i = 0; i < setup.swizzle_data.size(); ++i) { |
| 231 | QueueForWriting((u8*)&swizzle_data[i], sizeof(swizzle_data[i])); | 242 | QueueForWriting((u8*)&setup.swizzle_data[i], sizeof(setup.swizzle_data[i])); |
| 232 | QueueForWriting((u8*)&dummy, sizeof(dummy)); | 243 | QueueForWriting((u8*)&dummy, sizeof(dummy)); |
| 233 | } | 244 | } |
| 234 | 245 | ||
| 235 | dvle.main_offset_words = main_offset; | 246 | dvle.main_offset_words = config.main_offset; |
| 236 | dvle.output_register_table_offset = write_offset - dvlb.dvle_offset; | 247 | dvle.output_register_table_offset = write_offset - dvlb.dvle_offset; |
| 237 | dvle.output_register_table_size = static_cast<u32>(output_info_table.size()); | 248 | dvle.output_register_table_size = static_cast<u32>(output_info_table.size()); |
| 238 | QueueForWriting((u8*)output_info_table.data(), static_cast<u32>(output_info_table.size() * sizeof(OutputRegisterInfo))); | 249 | QueueForWriting((u8*)output_info_table.data(), static_cast<u32>(output_info_table.size() * sizeof(OutputRegisterInfo))); |
| 239 | 250 | ||
| 240 | // TODO: Create a label table for "main" | 251 | // TODO: Create a label table for "main" |
| 241 | 252 | ||
| 253 | std::vector<nihstro::ConstantInfo> constant_table; | ||
| 254 | for (unsigned i = 0; i < setup.uniforms.b.size(); ++i) { | ||
| 255 | nihstro::ConstantInfo constant; | ||
| 256 | memset(&constant, 0, sizeof(constant)); | ||
| 257 | constant.type = nihstro::ConstantInfo::Bool; | ||
| 258 | constant.regid = i; | ||
| 259 | constant.b = setup.uniforms.b[i]; | ||
| 260 | constant_table.emplace_back(constant); | ||
| 261 | } | ||
| 262 | for (unsigned i = 0; i < setup.uniforms.i.size(); ++i) { | ||
| 263 | nihstro::ConstantInfo constant; | ||
| 264 | memset(&constant, 0, sizeof(constant)); | ||
| 265 | constant.type = nihstro::ConstantInfo::Int; | ||
| 266 | constant.regid = i; | ||
| 267 | constant.i.x = setup.uniforms.i[i].x; | ||
| 268 | constant.i.y = setup.uniforms.i[i].y; | ||
| 269 | constant.i.z = setup.uniforms.i[i].z; | ||
| 270 | constant.i.w = setup.uniforms.i[i].w; | ||
| 271 | constant_table.emplace_back(constant); | ||
| 272 | } | ||
| 273 | for (unsigned i = 0; i < sizeof(setup.uniforms.f) / sizeof(setup.uniforms.f[0]); ++i) { | ||
| 274 | nihstro::ConstantInfo constant; | ||
| 275 | memset(&constant, 0, sizeof(constant)); | ||
| 276 | constant.type = nihstro::ConstantInfo::Float; | ||
| 277 | constant.regid = i; | ||
| 278 | constant.f.x = nihstro::to_float24(setup.uniforms.f[i].x.ToFloat32()); | ||
| 279 | constant.f.y = nihstro::to_float24(setup.uniforms.f[i].y.ToFloat32()); | ||
| 280 | constant.f.z = nihstro::to_float24(setup.uniforms.f[i].z.ToFloat32()); | ||
| 281 | constant.f.w = nihstro::to_float24(setup.uniforms.f[i].w.ToFloat32()); | ||
| 282 | |||
| 283 | // Store constant if it's different from zero.. | ||
| 284 | if (setup.uniforms.f[i].x.ToFloat32() != 0.0 || | ||
| 285 | setup.uniforms.f[i].y.ToFloat32() != 0.0 || | ||
| 286 | setup.uniforms.f[i].z.ToFloat32() != 0.0 || | ||
| 287 | setup.uniforms.f[i].w.ToFloat32() != 0.0) | ||
| 288 | constant_table.emplace_back(constant); | ||
| 289 | } | ||
| 290 | dvle.constant_table_offset = write_offset - dvlb.dvle_offset; | ||
| 291 | dvle.constant_table_size = constant_table.size(); | ||
| 292 | for (const auto& constant : constant_table) { | ||
| 293 | QueueForWriting((uint8_t*)&constant, sizeof(constant)); | ||
| 294 | } | ||
| 242 | 295 | ||
| 243 | // Write data to file | 296 | // Write data to file |
| 244 | static int dump_index = 0; | 297 | static int dump_index = 0; |
| 245 | std::string filename = std::string("shader_dump") + std::to_string(++dump_index) + std::string(".shbin"); | ||
| 246 | std::ofstream file(filename, std::ios_base::out | std::ios_base::binary); | 298 | std::ofstream file(filename, std::ios_base::out | std::ios_base::binary); |
| 247 | 299 | ||
| 248 | for (auto& chunk : writing_queue) { | 300 | for (auto& chunk : writing_queue) { |
| @@ -261,11 +313,10 @@ void StartPicaTracing() | |||
| 261 | return; | 313 | return; |
| 262 | } | 314 | } |
| 263 | 315 | ||
| 264 | pica_trace_mutex.lock(); | 316 | std::lock_guard<std::mutex> lock(pica_trace_mutex); |
| 265 | pica_trace = std::unique_ptr<PicaTrace>(new PicaTrace); | 317 | pica_trace = std::unique_ptr<PicaTrace>(new PicaTrace); |
| 266 | 318 | ||
| 267 | is_pica_tracing = true; | 319 | is_pica_tracing = true; |
| 268 | pica_trace_mutex.unlock(); | ||
| 269 | } | 320 | } |
| 270 | 321 | ||
| 271 | bool IsPicaTracing() | 322 | bool IsPicaTracing() |
| @@ -273,18 +324,18 @@ bool IsPicaTracing() | |||
| 273 | return is_pica_tracing != 0; | 324 | return is_pica_tracing != 0; |
| 274 | } | 325 | } |
| 275 | 326 | ||
| 276 | void OnPicaRegWrite(u32 id, u32 value) | 327 | void OnPicaRegWrite(PicaTrace::Write write) |
| 277 | { | 328 | { |
| 278 | // Double check for is_pica_tracing to avoid pointless locking overhead | 329 | // Double check for is_pica_tracing to avoid pointless locking overhead |
| 279 | if (!is_pica_tracing) | 330 | if (!is_pica_tracing) |
| 280 | return; | 331 | return; |
| 281 | 332 | ||
| 282 | std::unique_lock<std::mutex> lock(pica_trace_mutex); | 333 | std::lock_guard<std::mutex> lock(pica_trace_mutex); |
| 283 | 334 | ||
| 284 | if (!is_pica_tracing) | 335 | if (!is_pica_tracing) |
| 285 | return; | 336 | return; |
| 286 | 337 | ||
| 287 | pica_trace->writes.emplace_back(id, value); | 338 | pica_trace->writes.push_back(write); |
| 288 | } | 339 | } |
| 289 | 340 | ||
| 290 | std::unique_ptr<PicaTrace> FinishPicaTracing() | 341 | std::unique_ptr<PicaTrace> FinishPicaTracing() |
| @@ -298,9 +349,9 @@ std::unique_ptr<PicaTrace> FinishPicaTracing() | |||
| 298 | is_pica_tracing = false; | 349 | is_pica_tracing = false; |
| 299 | 350 | ||
| 300 | // Wait until running tracing is finished | 351 | // Wait until running tracing is finished |
| 301 | pica_trace_mutex.lock(); | 352 | std::lock_guard<std::mutex> lock(pica_trace_mutex); |
| 302 | std::unique_ptr<PicaTrace> ret(std::move(pica_trace)); | 353 | std::unique_ptr<PicaTrace> ret(std::move(pica_trace)); |
| 303 | pica_trace_mutex.unlock(); | 354 | |
| 304 | return std::move(ret); | 355 | return std::move(ret); |
| 305 | } | 356 | } |
| 306 | 357 | ||
diff --git a/src/video_core/debug_utils/debug_utils.h b/src/video_core/debug_utils/debug_utils.h index 81eea30a9..85762f5b4 100644 --- a/src/video_core/debug_utils/debug_utils.h +++ b/src/video_core/debug_utils/debug_utils.h | |||
| @@ -158,7 +158,6 @@ extern std::shared_ptr<DebugContext> g_debug_context; // TODO: Get rid of this g | |||
| 158 | namespace DebugUtils { | 158 | namespace DebugUtils { |
| 159 | 159 | ||
| 160 | #define PICA_DUMP_GEOMETRY 0 | 160 | #define PICA_DUMP_GEOMETRY 0 |
| 161 | #define PICA_DUMP_SHADERS 0 | ||
| 162 | #define PICA_DUMP_TEXTURES 0 | 161 | #define PICA_DUMP_TEXTURES 0 |
| 163 | #define PICA_LOG_TEV 0 | 162 | #define PICA_LOG_TEV 0 |
| 164 | 163 | ||
| @@ -182,27 +181,23 @@ private: | |||
| 182 | std::vector<Face> faces; | 181 | std::vector<Face> faces; |
| 183 | }; | 182 | }; |
| 184 | 183 | ||
| 185 | void DumpShader(const u32* binary_data, u32 binary_size, const u32* swizzle_data, u32 swizzle_size, | 184 | void DumpShader(const std::string& filename, const Regs::ShaderConfig& config, |
| 186 | u32 main_offset, const Regs::VSOutputAttributes* output_attributes); | 185 | const State::ShaderSetup& setup, const Regs::VSOutputAttributes* output_attributes); |
| 187 | 186 | ||
| 188 | 187 | ||
| 189 | // Utility class to log Pica commands. | 188 | // Utility class to log Pica commands. |
| 190 | struct PicaTrace { | 189 | struct PicaTrace { |
| 191 | struct Write : public std::pair<u32,u32> { | 190 | struct Write { |
| 192 | Write(u32 id, u32 value) : std::pair<u32,u32>(id, value) {} | 191 | u16 cmd_id; |
| 193 | 192 | u16 mask; | |
| 194 | u32& Id() { return first; } | 193 | u32 value; |
| 195 | const u32& Id() const { return first; } | ||
| 196 | |||
| 197 | u32& Value() { return second; } | ||
| 198 | const u32& Value() const { return second; } | ||
| 199 | }; | 194 | }; |
| 200 | std::vector<Write> writes; | 195 | std::vector<Write> writes; |
| 201 | }; | 196 | }; |
| 202 | 197 | ||
| 203 | void StartPicaTracing(); | 198 | void StartPicaTracing(); |
| 204 | bool IsPicaTracing(); | 199 | bool IsPicaTracing(); |
| 205 | void OnPicaRegWrite(u32 id, u32 value); | 200 | void OnPicaRegWrite(PicaTrace::Write write); |
| 206 | std::unique_ptr<PicaTrace> FinishPicaTracing(); | 201 | std::unique_ptr<PicaTrace> FinishPicaTracing(); |
| 207 | 202 | ||
| 208 | struct TextureInfo { | 203 | struct TextureInfo { |
diff --git a/src/video_core/pica.h b/src/video_core/pica.h index a5ec5ee9f..58b924f9e 100644 --- a/src/video_core/pica.h +++ b/src/video_core/pica.h | |||
| @@ -80,6 +80,11 @@ struct Regs { | |||
| 80 | POSITION_Z = 2, | 80 | POSITION_Z = 2, |
| 81 | POSITION_W = 3, | 81 | POSITION_W = 3, |
| 82 | 82 | ||
| 83 | QUATERNION_X = 4, | ||
| 84 | QUATERNION_Y = 5, | ||
| 85 | QUATERNION_Z = 6, | ||
| 86 | QUATERNION_W = 7, | ||
| 87 | |||
| 83 | COLOR_R = 8, | 88 | COLOR_R = 8, |
| 84 | COLOR_G = 9, | 89 | COLOR_G = 9, |
| 85 | COLOR_B = 10, | 90 | COLOR_B = 10, |
| @@ -89,6 +94,12 @@ struct Regs { | |||
| 89 | TEXCOORD0_V = 13, | 94 | TEXCOORD0_V = 13, |
| 90 | TEXCOORD1_U = 14, | 95 | TEXCOORD1_U = 14, |
| 91 | TEXCOORD1_V = 15, | 96 | TEXCOORD1_V = 15, |
| 97 | |||
| 98 | // TODO: Not verified | ||
| 99 | VIEW_X = 18, | ||
| 100 | VIEW_Y = 19, | ||
| 101 | VIEW_Z = 20, | ||
| 102 | |||
| 92 | TEXCOORD2_U = 22, | 103 | TEXCOORD2_U = 22, |
| 93 | TEXCOORD2_V = 23, | 104 | TEXCOORD2_V = 23, |
| 94 | 105 | ||
diff --git a/src/video_core/shader/shader.cpp b/src/video_core/shader/shader.cpp index 6a27a8015..4e9836c80 100644 --- a/src/video_core/shader/shader.cpp +++ b/src/video_core/shader/shader.cpp | |||
| @@ -5,6 +5,8 @@ | |||
| 5 | #include <memory> | 5 | #include <memory> |
| 6 | #include <unordered_map> | 6 | #include <unordered_map> |
| 7 | 7 | ||
| 8 | #include <boost/range/algorithm/fill.hpp> | ||
| 9 | |||
| 8 | #include "common/hash.h" | 10 | #include "common/hash.h" |
| 9 | #include "common/make_unique.h" | 11 | #include "common/make_unique.h" |
| 10 | #include "common/profiler.h" | 12 | #include "common/profiler.h" |
| @@ -30,7 +32,7 @@ static JitCompiler jit; | |||
| 30 | static CompiledShader* jit_shader; | 32 | static CompiledShader* jit_shader; |
| 31 | #endif // ARCHITECTURE_x86_64 | 33 | #endif // ARCHITECTURE_x86_64 |
| 32 | 34 | ||
| 33 | void Setup(UnitState& state) { | 35 | void Setup(UnitState<false>& state) { |
| 34 | #ifdef ARCHITECTURE_x86_64 | 36 | #ifdef ARCHITECTURE_x86_64 |
| 35 | if (VideoCore::g_shader_jit_enabled) { | 37 | if (VideoCore::g_shader_jit_enabled) { |
| 36 | u64 cache_key = (Common::ComputeHash64(&g_state.vs.program_code, sizeof(g_state.vs.program_code)) ^ | 38 | u64 cache_key = (Common::ComputeHash64(&g_state.vs.program_code, sizeof(g_state.vs.program_code)) ^ |
| @@ -54,9 +56,8 @@ void Shutdown() { | |||
| 54 | 56 | ||
| 55 | static Common::Profiling::TimingCategory shader_category("Vertex Shader"); | 57 | static Common::Profiling::TimingCategory shader_category("Vertex Shader"); |
| 56 | 58 | ||
| 57 | OutputVertex Run(UnitState& state, const InputVertex& input, int num_attributes) { | 59 | OutputVertex Run(UnitState<false>& state, const InputVertex& input, int num_attributes) { |
| 58 | auto& config = g_state.regs.vs; | 60 | auto& config = g_state.regs.vs; |
| 59 | auto& setup = g_state.vs; | ||
| 60 | 61 | ||
| 61 | Common::Profiling::ScopeTimer timer(shader_category); | 62 | Common::Profiling::ScopeTimer timer(shader_category); |
| 62 | 63 | ||
| @@ -67,6 +68,8 @@ OutputVertex Run(UnitState& state, const InputVertex& input, int num_attributes) | |||
| 67 | // Setup input register table | 68 | // Setup input register table |
| 68 | const auto& attribute_register_map = config.input_register_map; | 69 | const auto& attribute_register_map = config.input_register_map; |
| 69 | 70 | ||
| 71 | // TODO: Instead of this cumbersome logic, just load the input data directly like | ||
| 72 | // for (int attr = 0; attr < num_attributes; ++attr) { input_attr[0] = state.registers.input[attribute_register_map.attribute0_register]; } | ||
| 70 | if (num_attributes > 0) state.registers.input[attribute_register_map.attribute0_register] = input.attr[0]; | 73 | if (num_attributes > 0) state.registers.input[attribute_register_map.attribute0_register] = input.attr[0]; |
| 71 | if (num_attributes > 1) state.registers.input[attribute_register_map.attribute1_register] = input.attr[1]; | 74 | if (num_attributes > 1) state.registers.input[attribute_register_map.attribute1_register] = input.attr[1]; |
| 72 | if (num_attributes > 2) state.registers.input[attribute_register_map.attribute2_register] = input.attr[2]; | 75 | if (num_attributes > 2) state.registers.input[attribute_register_map.attribute2_register] = input.attr[2]; |
| @@ -96,12 +99,6 @@ OutputVertex Run(UnitState& state, const InputVertex& input, int num_attributes) | |||
| 96 | RunInterpreter(state); | 99 | RunInterpreter(state); |
| 97 | #endif // ARCHITECTURE_x86_64 | 100 | #endif // ARCHITECTURE_x86_64 |
| 98 | 101 | ||
| 99 | #if PICA_DUMP_SHADERS | ||
| 100 | DebugUtils::DumpShader(setup.program_code.data(), state.debug.max_offset, setup.swizzle_data.data(), | ||
| 101 | state.debug.max_opdesc_id, config.main_offset, | ||
| 102 | g_state.regs.vs_output_attributes); // TODO: Don't hardcode VS here | ||
| 103 | #endif | ||
| 104 | |||
| 105 | // Setup output data | 102 | // Setup output data |
| 106 | OutputVertex ret; | 103 | OutputVertex ret; |
| 107 | // TODO(neobrain): Under some circumstances, up to 16 attributes may be output. We need to | 104 | // TODO(neobrain): Under some circumstances, up to 16 attributes may be output. We need to |
| @@ -132,14 +129,52 @@ OutputVertex Run(UnitState& state, const InputVertex& input, int num_attributes) | |||
| 132 | std::fmin(std::fabs(ret.color[i].ToFloat32()), 1.0f)); | 129 | std::fmin(std::fabs(ret.color[i].ToFloat32()), 1.0f)); |
| 133 | } | 130 | } |
| 134 | 131 | ||
| 135 | LOG_TRACE(Render_Software, "Output vertex: pos (%.2f, %.2f, %.2f, %.2f), col(%.2f, %.2f, %.2f, %.2f), tc0(%.2f, %.2f)", | 132 | LOG_TRACE(Render_Software, "Output vertex: pos (%.2f, %.2f, %.2f, %.2f), quat (%.2f, %.2f, %.2f, %.2f), col(%.2f, %.2f, %.2f, %.2f), tc0(%.2f, %.2f)", |
| 136 | ret.pos.x.ToFloat32(), ret.pos.y.ToFloat32(), ret.pos.z.ToFloat32(), ret.pos.w.ToFloat32(), | 133 | ret.pos.x.ToFloat32(), ret.pos.y.ToFloat32(), ret.pos.z.ToFloat32(), ret.pos.w.ToFloat32(), |
| 134 | ret.quat.x.ToFloat32(), ret.quat.y.ToFloat32(), ret.quat.z.ToFloat32(), ret.quat.w.ToFloat32(), | ||
| 137 | ret.color.x.ToFloat32(), ret.color.y.ToFloat32(), ret.color.z.ToFloat32(), ret.color.w.ToFloat32(), | 135 | ret.color.x.ToFloat32(), ret.color.y.ToFloat32(), ret.color.z.ToFloat32(), ret.color.w.ToFloat32(), |
| 138 | ret.tc0.u().ToFloat32(), ret.tc0.v().ToFloat32()); | 136 | ret.tc0.u().ToFloat32(), ret.tc0.v().ToFloat32()); |
| 139 | 137 | ||
| 140 | return ret; | 138 | return ret; |
| 141 | } | 139 | } |
| 142 | 140 | ||
| 141 | DebugData<true> ProduceDebugInfo(const InputVertex& input, int num_attributes, const Regs::ShaderConfig& config, const State::ShaderSetup& setup) { | ||
| 142 | UnitState<true> state; | ||
| 143 | |||
| 144 | const auto& shader_memory = setup.program_code; | ||
| 145 | state.program_counter = config.main_offset; | ||
| 146 | state.debug.max_offset = 0; | ||
| 147 | state.debug.max_opdesc_id = 0; | ||
| 148 | |||
| 149 | // Setup input register table | ||
| 150 | const auto& attribute_register_map = config.input_register_map; | ||
| 151 | float24 dummy_register; | ||
| 152 | boost::fill(state.registers.input, &dummy_register); | ||
| 153 | |||
| 154 | if (num_attributes > 0) state.registers.input[attribute_register_map.attribute0_register] = &input.attr[0].x; | ||
| 155 | if (num_attributes > 1) state.registers.input[attribute_register_map.attribute1_register] = &input.attr[1].x; | ||
| 156 | if (num_attributes > 2) state.registers.input[attribute_register_map.attribute2_register] = &input.attr[2].x; | ||
| 157 | if (num_attributes > 3) state.registers.input[attribute_register_map.attribute3_register] = &input.attr[3].x; | ||
| 158 | if (num_attributes > 4) state.registers.input[attribute_register_map.attribute4_register] = &input.attr[4].x; | ||
| 159 | if (num_attributes > 5) state.registers.input[attribute_register_map.attribute5_register] = &input.attr[5].x; | ||
| 160 | if (num_attributes > 6) state.registers.input[attribute_register_map.attribute6_register] = &input.attr[6].x; | ||
| 161 | if (num_attributes > 7) state.registers.input[attribute_register_map.attribute7_register] = &input.attr[7].x; | ||
| 162 | if (num_attributes > 8) state.registers.input[attribute_register_map.attribute8_register] = &input.attr[8].x; | ||
| 163 | if (num_attributes > 9) state.registers.input[attribute_register_map.attribute9_register] = &input.attr[9].x; | ||
| 164 | if (num_attributes > 10) state.registers.input[attribute_register_map.attribute10_register] = &input.attr[10].x; | ||
| 165 | if (num_attributes > 11) state.registers.input[attribute_register_map.attribute11_register] = &input.attr[11].x; | ||
| 166 | if (num_attributes > 12) state.registers.input[attribute_register_map.attribute12_register] = &input.attr[12].x; | ||
| 167 | if (num_attributes > 13) state.registers.input[attribute_register_map.attribute13_register] = &input.attr[13].x; | ||
| 168 | if (num_attributes > 14) state.registers.input[attribute_register_map.attribute14_register] = &input.attr[14].x; | ||
| 169 | if (num_attributes > 15) state.registers.input[attribute_register_map.attribute15_register] = &input.attr[15].x; | ||
| 170 | |||
| 171 | state.conditional_code[0] = false; | ||
| 172 | state.conditional_code[1] = false; | ||
| 173 | |||
| 174 | RunInterpreter(state); | ||
| 175 | return state.debug; | ||
| 176 | } | ||
| 177 | |||
| 143 | } // namespace Shader | 178 | } // namespace Shader |
| 144 | 179 | ||
| 145 | } // namespace Pica | 180 | } // namespace Pica |
diff --git a/src/video_core/shader/shader.h b/src/video_core/shader/shader.h index 2007a2844..bac51ddd8 100644 --- a/src/video_core/shader/shader.h +++ b/src/video_core/shader/shader.h | |||
| @@ -4,7 +4,10 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <vector> | ||
| 8 | |||
| 7 | #include <boost/container/static_vector.hpp> | 9 | #include <boost/container/static_vector.hpp> |
| 10 | |||
| 8 | #include <nihstro/shader_binary.h> | 11 | #include <nihstro/shader_binary.h> |
| 9 | 12 | ||
| 10 | #include "common/common_funcs.h" | 13 | #include "common/common_funcs.h" |
| @@ -30,7 +33,7 @@ struct OutputVertex { | |||
| 30 | 33 | ||
| 31 | // VS output attributes | 34 | // VS output attributes |
| 32 | Math::Vec4<float24> pos; | 35 | Math::Vec4<float24> pos; |
| 33 | Math::Vec4<float24> dummy; // quaternions (not implemented, yet) | 36 | Math::Vec4<float24> quat; |
| 34 | Math::Vec4<float24> color; | 37 | Math::Vec4<float24> color; |
| 35 | Math::Vec2<float24> tc0; | 38 | Math::Vec2<float24> tc0; |
| 36 | Math::Vec2<float24> tc1; | 39 | Math::Vec2<float24> tc1; |
| @@ -72,12 +75,185 @@ struct OutputVertex { | |||
| 72 | static_assert(std::is_pod<OutputVertex>::value, "Structure is not POD"); | 75 | static_assert(std::is_pod<OutputVertex>::value, "Structure is not POD"); |
| 73 | static_assert(sizeof(OutputVertex) == 32 * sizeof(float), "OutputVertex has invalid size"); | 76 | static_assert(sizeof(OutputVertex) == 32 * sizeof(float), "OutputVertex has invalid size"); |
| 74 | 77 | ||
| 78 | |||
| 79 | // Helper structure used to keep track of data useful for inspection of shader emulation | ||
| 80 | template<bool full_debugging> | ||
| 81 | struct DebugData; | ||
| 82 | |||
| 83 | template<> | ||
| 84 | struct DebugData<false> { | ||
| 85 | // TODO: Hide these behind and interface and move them to DebugData<true> | ||
| 86 | u32 max_offset; // maximum program counter ever reached | ||
| 87 | u32 max_opdesc_id; // maximum swizzle pattern index ever used | ||
| 88 | }; | ||
| 89 | |||
| 90 | template<> | ||
| 91 | struct DebugData<true> { | ||
| 92 | // Records store the input and output operands of a particular instruction. | ||
| 93 | struct Record { | ||
| 94 | enum Type { | ||
| 95 | // Floating point arithmetic operands | ||
| 96 | SRC1 = 0x1, | ||
| 97 | SRC2 = 0x2, | ||
| 98 | SRC3 = 0x4, | ||
| 99 | |||
| 100 | // Initial and final output operand value | ||
| 101 | DEST_IN = 0x8, | ||
| 102 | DEST_OUT = 0x10, | ||
| 103 | |||
| 104 | // Current and next instruction offset (in words) | ||
| 105 | CUR_INSTR = 0x20, | ||
| 106 | NEXT_INSTR = 0x40, | ||
| 107 | |||
| 108 | // Output address register value | ||
| 109 | ADDR_REG_OUT = 0x80, | ||
| 110 | |||
| 111 | // Result of a comparison instruction | ||
| 112 | CMP_RESULT = 0x100, | ||
| 113 | |||
| 114 | // Input values for conditional flow control instructions | ||
| 115 | COND_BOOL_IN = 0x200, | ||
| 116 | COND_CMP_IN = 0x400, | ||
| 117 | |||
| 118 | // Input values for a loop | ||
| 119 | LOOP_INT_IN = 0x800, | ||
| 120 | }; | ||
| 121 | |||
| 122 | Math::Vec4<float24> src1; | ||
| 123 | Math::Vec4<float24> src2; | ||
| 124 | Math::Vec4<float24> src3; | ||
| 125 | |||
| 126 | Math::Vec4<float24> dest_in; | ||
| 127 | Math::Vec4<float24> dest_out; | ||
| 128 | |||
| 129 | s32 address_registers[2]; | ||
| 130 | bool conditional_code[2]; | ||
| 131 | bool cond_bool; | ||
| 132 | bool cond_cmp[2]; | ||
| 133 | Math::Vec4<u8> loop_int; | ||
| 134 | |||
| 135 | u32 instruction_offset; | ||
| 136 | u32 next_instruction; | ||
| 137 | |||
| 138 | // set of enabled fields (as a combination of Type flags) | ||
| 139 | unsigned mask = 0; | ||
| 140 | }; | ||
| 141 | |||
| 142 | u32 max_offset; // maximum program counter ever reached | ||
| 143 | u32 max_opdesc_id; // maximum swizzle pattern index ever used | ||
| 144 | |||
| 145 | // List of records for each executed shader instruction | ||
| 146 | std::vector<DebugData<true>::Record> records; | ||
| 147 | }; | ||
| 148 | |||
| 149 | // Type alias for better readability | ||
| 150 | using DebugDataRecord = DebugData<true>::Record; | ||
| 151 | |||
| 152 | // Helper function to set a DebugData<true>::Record field based on the template enum parameter. | ||
| 153 | template<DebugDataRecord::Type type, typename ValueType> | ||
| 154 | inline void SetField(DebugDataRecord& record, ValueType value); | ||
| 155 | |||
| 156 | template<> | ||
| 157 | inline void SetField<DebugDataRecord::SRC1>(DebugDataRecord& record, float24* value) { | ||
| 158 | record.src1.x = value[0]; | ||
| 159 | record.src1.y = value[1]; | ||
| 160 | record.src1.z = value[2]; | ||
| 161 | record.src1.w = value[3]; | ||
| 162 | } | ||
| 163 | |||
| 164 | template<> | ||
| 165 | inline void SetField<DebugDataRecord::SRC2>(DebugDataRecord& record, float24* value) { | ||
| 166 | record.src2.x = value[0]; | ||
| 167 | record.src2.y = value[1]; | ||
| 168 | record.src2.z = value[2]; | ||
| 169 | record.src2.w = value[3]; | ||
| 170 | } | ||
| 171 | |||
| 172 | template<> | ||
| 173 | inline void SetField<DebugDataRecord::SRC3>(DebugDataRecord& record, float24* value) { | ||
| 174 | record.src3.x = value[0]; | ||
| 175 | record.src3.y = value[1]; | ||
| 176 | record.src3.z = value[2]; | ||
| 177 | record.src3.w = value[3]; | ||
| 178 | } | ||
| 179 | |||
| 180 | template<> | ||
| 181 | inline void SetField<DebugDataRecord::DEST_IN>(DebugDataRecord& record, float24* value) { | ||
| 182 | record.dest_in.x = value[0]; | ||
| 183 | record.dest_in.y = value[1]; | ||
| 184 | record.dest_in.z = value[2]; | ||
| 185 | record.dest_in.w = value[3]; | ||
| 186 | } | ||
| 187 | |||
| 188 | template<> | ||
| 189 | inline void SetField<DebugDataRecord::DEST_OUT>(DebugDataRecord& record, float24* value) { | ||
| 190 | record.dest_out.x = value[0]; | ||
| 191 | record.dest_out.y = value[1]; | ||
| 192 | record.dest_out.z = value[2]; | ||
| 193 | record.dest_out.w = value[3]; | ||
| 194 | } | ||
| 195 | |||
| 196 | template<> | ||
| 197 | inline void SetField<DebugDataRecord::ADDR_REG_OUT>(DebugDataRecord& record, s32* value) { | ||
| 198 | record.address_registers[0] = value[0]; | ||
| 199 | record.address_registers[1] = value[1]; | ||
| 200 | } | ||
| 201 | |||
| 202 | template<> | ||
| 203 | inline void SetField<DebugDataRecord::CMP_RESULT>(DebugDataRecord& record, bool* value) { | ||
| 204 | record.conditional_code[0] = value[0]; | ||
| 205 | record.conditional_code[1] = value[1]; | ||
| 206 | } | ||
| 207 | |||
| 208 | template<> | ||
| 209 | inline void SetField<DebugDataRecord::COND_BOOL_IN>(DebugDataRecord& record, bool value) { | ||
| 210 | record.cond_bool = value; | ||
| 211 | } | ||
| 212 | |||
| 213 | template<> | ||
| 214 | inline void SetField<DebugDataRecord::COND_CMP_IN>(DebugDataRecord& record, bool* value) { | ||
| 215 | record.cond_cmp[0] = value[0]; | ||
| 216 | record.cond_cmp[1] = value[1]; | ||
| 217 | } | ||
| 218 | |||
| 219 | template<> | ||
| 220 | inline void SetField<DebugDataRecord::LOOP_INT_IN>(DebugDataRecord& record, Math::Vec4<u8> value) { | ||
| 221 | record.loop_int = value; | ||
| 222 | } | ||
| 223 | |||
| 224 | template<> | ||
| 225 | inline void SetField<DebugDataRecord::CUR_INSTR>(DebugDataRecord& record, u32 value) { | ||
| 226 | record.instruction_offset = value; | ||
| 227 | } | ||
| 228 | |||
| 229 | template<> | ||
| 230 | inline void SetField<DebugDataRecord::NEXT_INSTR>(DebugDataRecord& record, u32 value) { | ||
| 231 | record.next_instruction = value; | ||
| 232 | } | ||
| 233 | |||
| 234 | // Helper function to set debug information on the current shader iteration. | ||
| 235 | template<DebugDataRecord::Type type, typename ValueType> | ||
| 236 | inline void Record(DebugData<false>& debug_data, u32 offset, ValueType value) { | ||
| 237 | // Debugging disabled => nothing to do | ||
| 238 | } | ||
| 239 | |||
| 240 | template<DebugDataRecord::Type type, typename ValueType> | ||
| 241 | inline void Record(DebugData<true>& debug_data, u32 offset, ValueType value) { | ||
| 242 | if (offset >= debug_data.records.size()) | ||
| 243 | debug_data.records.resize(offset + 1); | ||
| 244 | |||
| 245 | SetField<type, ValueType>(debug_data.records[offset], value); | ||
| 246 | debug_data.records[offset].mask |= type; | ||
| 247 | } | ||
| 248 | |||
| 249 | |||
| 75 | /** | 250 | /** |
| 76 | * This structure contains the state information that needs to be unique for a shader unit. The 3DS | 251 | * This structure contains the state information that needs to be unique for a shader unit. The 3DS |
| 77 | * has four shader units that process shaders in parallel. At the present, Citra only implements a | 252 | * has four shader units that process shaders in parallel. At the present, Citra only implements a |
| 78 | * single shader unit that processes all shaders serially. Putting the state information in a struct | 253 | * single shader unit that processes all shaders serially. Putting the state information in a struct |
| 79 | * here will make it easier for us to parallelize the shader processing later. | 254 | * here will make it easier for us to parallelize the shader processing later. |
| 80 | */ | 255 | */ |
| 256 | template<bool Debug> | ||
| 81 | struct UnitState { | 257 | struct UnitState { |
| 82 | struct Registers { | 258 | struct Registers { |
| 83 | // The registers are accessed by the shader JIT using SSE instructions, and are therefore | 259 | // The registers are accessed by the shader JIT using SSE instructions, and are therefore |
| @@ -111,10 +287,7 @@ struct UnitState { | |||
| 111 | // TODO: Is there a maximal size for this? | 287 | // TODO: Is there a maximal size for this? |
| 112 | boost::container::static_vector<CallStackElement, 16> call_stack; | 288 | boost::container::static_vector<CallStackElement, 16> call_stack; |
| 113 | 289 | ||
| 114 | struct { | 290 | DebugData<Debug> debug; |
| 115 | u32 max_offset; // maximum program counter ever reached | ||
| 116 | u32 max_opdesc_id; // maximum swizzle pattern index ever used | ||
| 117 | } debug; | ||
| 118 | 291 | ||
| 119 | static int InputOffset(const SourceRegister& reg) { | 292 | static int InputOffset(const SourceRegister& reg) { |
| 120 | switch (reg.GetRegisterType()) { | 293 | switch (reg.GetRegisterType()) { |
| @@ -150,7 +323,7 @@ struct UnitState { | |||
| 150 | * vertex, which would happen within the `Run` function). | 323 | * vertex, which would happen within the `Run` function). |
| 151 | * @param state Shader unit state, must be setup per shader and per shader unit | 324 | * @param state Shader unit state, must be setup per shader and per shader unit |
| 152 | */ | 325 | */ |
| 153 | void Setup(UnitState& state); | 326 | void Setup(UnitState<false>& state); |
| 154 | 327 | ||
| 155 | /// Performs any cleanup when the emulator is shutdown | 328 | /// Performs any cleanup when the emulator is shutdown |
| 156 | void Shutdown(); | 329 | void Shutdown(); |
| @@ -162,7 +335,17 @@ void Shutdown(); | |||
| 162 | * @param num_attributes The number of vertex shader attributes | 335 | * @param num_attributes The number of vertex shader attributes |
| 163 | * @return The output vertex, after having been processed by the vertex shader | 336 | * @return The output vertex, after having been processed by the vertex shader |
| 164 | */ | 337 | */ |
| 165 | OutputVertex Run(UnitState& state, const InputVertex& input, int num_attributes); | 338 | OutputVertex Run(UnitState<false>& state, const InputVertex& input, int num_attributes); |
| 339 | |||
| 340 | /** | ||
| 341 | * Produce debug information based on the given shader and input vertex | ||
| 342 | * @param input Input vertex into the shader | ||
| 343 | * @param num_attributes The number of vertex shader attributes | ||
| 344 | * @param config Configuration object for the shader pipeline | ||
| 345 | * @param setup Setup object for the shader pipeline | ||
| 346 | * @return Debug information for this shader with regards to the given vertex | ||
| 347 | */ | ||
| 348 | DebugData<true> ProduceDebugInfo(const InputVertex& input, int num_attributes, const Regs::ShaderConfig& config, const State::ShaderSetup& setup); | ||
| 166 | 349 | ||
| 167 | } // namespace Shader | 350 | } // namespace Shader |
| 168 | 351 | ||
diff --git a/src/video_core/shader/shader_interpreter.cpp b/src/video_core/shader/shader_interpreter.cpp index c8489f920..e14de0768 100644 --- a/src/video_core/shader/shader_interpreter.cpp +++ b/src/video_core/shader/shader_interpreter.cpp | |||
| @@ -21,7 +21,8 @@ namespace Pica { | |||
| 21 | 21 | ||
| 22 | namespace Shader { | 22 | namespace Shader { |
| 23 | 23 | ||
| 24 | void RunInterpreter(UnitState& state) { | 24 | template<bool Debug> |
| 25 | void RunInterpreter(UnitState<Debug>& state) { | ||
| 25 | const auto& uniforms = g_state.vs.uniforms; | 26 | const auto& uniforms = g_state.vs.uniforms; |
| 26 | const auto& swizzle_data = g_state.vs.swizzle_data; | 27 | const auto& swizzle_data = g_state.vs.swizzle_data; |
| 27 | const auto& program_code = g_state.vs.program_code; | 28 | const auto& program_code = g_state.vs.program_code; |
| @@ -29,7 +30,9 @@ void RunInterpreter(UnitState& state) { | |||
| 29 | // Placeholder for invalid inputs | 30 | // Placeholder for invalid inputs |
| 30 | static float24 dummy_vec4_float24[4]; | 31 | static float24 dummy_vec4_float24[4]; |
| 31 | 32 | ||
| 32 | while (true) { | 33 | unsigned iteration = 0; |
| 34 | bool exit_loop = false; | ||
| 35 | while (!exit_loop) { | ||
| 33 | if (!state.call_stack.empty()) { | 36 | if (!state.call_stack.empty()) { |
| 34 | auto& top = state.call_stack.back(); | 37 | auto& top = state.call_stack.back(); |
| 35 | if (state.program_counter == top.final_address) { | 38 | if (state.program_counter == top.final_address) { |
| @@ -47,16 +50,19 @@ void RunInterpreter(UnitState& state) { | |||
| 47 | } | 50 | } |
| 48 | } | 51 | } |
| 49 | 52 | ||
| 50 | bool exit_loop = false; | ||
| 51 | const Instruction instr = { program_code[state.program_counter] }; | 53 | const Instruction instr = { program_code[state.program_counter] }; |
| 52 | const SwizzlePattern swizzle = { swizzle_data[instr.common.operand_desc_id] }; | 54 | const SwizzlePattern swizzle = { swizzle_data[instr.common.operand_desc_id] }; |
| 53 | 55 | ||
| 54 | static auto call = [](UnitState& state, u32 offset, u32 num_instructions, | 56 | static auto call = [](UnitState<Debug>& state, u32 offset, u32 num_instructions, |
| 55 | u32 return_offset, u8 repeat_count, u8 loop_increment) { | 57 | u32 return_offset, u8 repeat_count, u8 loop_increment) { |
| 56 | state.program_counter = offset - 1; // -1 to make sure when incrementing the PC we end up at the correct offset | 58 | state.program_counter = offset - 1; // -1 to make sure when incrementing the PC we end up at the correct offset |
| 57 | ASSERT(state.call_stack.size() < state.call_stack.capacity()); | 59 | ASSERT(state.call_stack.size() < state.call_stack.capacity()); |
| 58 | state.call_stack.push_back({ offset + num_instructions, return_offset, repeat_count, loop_increment, offset }); | 60 | state.call_stack.push_back({ offset + num_instructions, return_offset, repeat_count, loop_increment, offset }); |
| 59 | }; | 61 | }; |
| 62 | Record<DebugDataRecord::CUR_INSTR>(state.debug, iteration, state.program_counter); | ||
| 63 | if (iteration > 0) | ||
| 64 | Record<DebugDataRecord::NEXT_INSTR>(state.debug, iteration - 1, state.program_counter); | ||
| 65 | |||
| 60 | state.debug.max_offset = std::max<u32>(state.debug.max_offset, 1 + state.program_counter); | 66 | state.debug.max_offset = std::max<u32>(state.debug.max_offset, 1 + state.program_counter); |
| 61 | 67 | ||
| 62 | auto LookupSourceRegister = [&](const SourceRegister& source_reg) -> const float24* { | 68 | auto LookupSourceRegister = [&](const SourceRegister& source_reg) -> const float24* { |
| @@ -123,58 +129,78 @@ void RunInterpreter(UnitState& state) { | |||
| 123 | switch (instr.opcode.Value().EffectiveOpCode()) { | 129 | switch (instr.opcode.Value().EffectiveOpCode()) { |
| 124 | case OpCode::Id::ADD: | 130 | case OpCode::Id::ADD: |
| 125 | { | 131 | { |
| 132 | Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); | ||
| 133 | Record<DebugDataRecord::SRC2>(state.debug, iteration, src2); | ||
| 134 | Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); | ||
| 126 | for (int i = 0; i < 4; ++i) { | 135 | for (int i = 0; i < 4; ++i) { |
| 127 | if (!swizzle.DestComponentEnabled(i)) | 136 | if (!swizzle.DestComponentEnabled(i)) |
| 128 | continue; | 137 | continue; |
| 129 | 138 | ||
| 130 | dest[i] = src1[i] + src2[i]; | 139 | dest[i] = src1[i] + src2[i]; |
| 131 | } | 140 | } |
| 132 | 141 | Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest); | |
| 133 | break; | 142 | break; |
| 134 | } | 143 | } |
| 135 | 144 | ||
| 136 | case OpCode::Id::MUL: | 145 | case OpCode::Id::MUL: |
| 137 | { | 146 | { |
| 147 | Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); | ||
| 148 | Record<DebugDataRecord::SRC2>(state.debug, iteration, src2); | ||
| 149 | Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); | ||
| 138 | for (int i = 0; i < 4; ++i) { | 150 | for (int i = 0; i < 4; ++i) { |
| 139 | if (!swizzle.DestComponentEnabled(i)) | 151 | if (!swizzle.DestComponentEnabled(i)) |
| 140 | continue; | 152 | continue; |
| 141 | 153 | ||
| 142 | dest[i] = src1[i] * src2[i]; | 154 | dest[i] = src1[i] * src2[i]; |
| 143 | } | 155 | } |
| 144 | 156 | Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest); | |
| 145 | break; | 157 | break; |
| 146 | } | 158 | } |
| 147 | 159 | ||
| 148 | case OpCode::Id::FLR: | 160 | case OpCode::Id::FLR: |
| 161 | Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); | ||
| 162 | Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); | ||
| 149 | for (int i = 0; i < 4; ++i) { | 163 | for (int i = 0; i < 4; ++i) { |
| 150 | if (!swizzle.DestComponentEnabled(i)) | 164 | if (!swizzle.DestComponentEnabled(i)) |
| 151 | continue; | 165 | continue; |
| 152 | 166 | ||
| 153 | dest[i] = float24::FromFloat32(std::floor(src1[i].ToFloat32())); | 167 | dest[i] = float24::FromFloat32(std::floor(src1[i].ToFloat32())); |
| 154 | } | 168 | } |
| 169 | Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest); | ||
| 155 | break; | 170 | break; |
| 156 | 171 | ||
| 157 | case OpCode::Id::MAX: | 172 | case OpCode::Id::MAX: |
| 173 | Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); | ||
| 174 | Record<DebugDataRecord::SRC2>(state.debug, iteration, src2); | ||
| 175 | Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); | ||
| 158 | for (int i = 0; i < 4; ++i) { | 176 | for (int i = 0; i < 4; ++i) { |
| 159 | if (!swizzle.DestComponentEnabled(i)) | 177 | if (!swizzle.DestComponentEnabled(i)) |
| 160 | continue; | 178 | continue; |
| 161 | 179 | ||
| 162 | dest[i] = std::max(src1[i], src2[i]); | 180 | dest[i] = std::max(src1[i], src2[i]); |
| 163 | } | 181 | } |
| 182 | Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest); | ||
| 164 | break; | 183 | break; |
| 165 | 184 | ||
| 166 | case OpCode::Id::MIN: | 185 | case OpCode::Id::MIN: |
| 186 | Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); | ||
| 187 | Record<DebugDataRecord::SRC2>(state.debug, iteration, src2); | ||
| 188 | Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); | ||
| 167 | for (int i = 0; i < 4; ++i) { | 189 | for (int i = 0; i < 4; ++i) { |
| 168 | if (!swizzle.DestComponentEnabled(i)) | 190 | if (!swizzle.DestComponentEnabled(i)) |
| 169 | continue; | 191 | continue; |
| 170 | 192 | ||
| 171 | dest[i] = std::min(src1[i], src2[i]); | 193 | dest[i] = std::min(src1[i], src2[i]); |
| 172 | } | 194 | } |
| 195 | Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest); | ||
| 173 | break; | 196 | break; |
| 174 | 197 | ||
| 175 | case OpCode::Id::DP3: | 198 | case OpCode::Id::DP3: |
| 176 | case OpCode::Id::DP4: | 199 | case OpCode::Id::DP4: |
| 177 | { | 200 | { |
| 201 | Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); | ||
| 202 | Record<DebugDataRecord::SRC2>(state.debug, iteration, src2); | ||
| 203 | Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); | ||
| 178 | float24 dot = float24::FromFloat32(0.f); | 204 | float24 dot = float24::FromFloat32(0.f); |
| 179 | int num_components = (instr.opcode.Value() == OpCode::Id::DP3) ? 3 : 4; | 205 | int num_components = (instr.opcode.Value() == OpCode::Id::DP3) ? 3 : 4; |
| 180 | for (int i = 0; i < num_components; ++i) | 206 | for (int i = 0; i < num_components; ++i) |
| @@ -186,12 +212,15 @@ void RunInterpreter(UnitState& state) { | |||
| 186 | 212 | ||
| 187 | dest[i] = dot; | 213 | dest[i] = dot; |
| 188 | } | 214 | } |
| 215 | Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest); | ||
| 189 | break; | 216 | break; |
| 190 | } | 217 | } |
| 191 | 218 | ||
| 192 | // Reciprocal | 219 | // Reciprocal |
| 193 | case OpCode::Id::RCP: | 220 | case OpCode::Id::RCP: |
| 194 | { | 221 | { |
| 222 | Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); | ||
| 223 | Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); | ||
| 195 | for (int i = 0; i < 4; ++i) { | 224 | for (int i = 0; i < 4; ++i) { |
| 196 | if (!swizzle.DestComponentEnabled(i)) | 225 | if (!swizzle.DestComponentEnabled(i)) |
| 197 | continue; | 226 | continue; |
| @@ -200,13 +229,15 @@ void RunInterpreter(UnitState& state) { | |||
| 200 | // TODO: I think this might be wrong... we should only use one component here | 229 | // TODO: I think this might be wrong... we should only use one component here |
| 201 | dest[i] = float24::FromFloat32(1.0f / src1[i].ToFloat32()); | 230 | dest[i] = float24::FromFloat32(1.0f / src1[i].ToFloat32()); |
| 202 | } | 231 | } |
| 203 | 232 | Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest); | |
| 204 | break; | 233 | break; |
| 205 | } | 234 | } |
| 206 | 235 | ||
| 207 | // Reciprocal Square Root | 236 | // Reciprocal Square Root |
| 208 | case OpCode::Id::RSQ: | 237 | case OpCode::Id::RSQ: |
| 209 | { | 238 | { |
| 239 | Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); | ||
| 240 | Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); | ||
| 210 | for (int i = 0; i < 4; ++i) { | 241 | for (int i = 0; i < 4; ++i) { |
| 211 | if (!swizzle.DestComponentEnabled(i)) | 242 | if (!swizzle.DestComponentEnabled(i)) |
| 212 | continue; | 243 | continue; |
| @@ -215,12 +246,13 @@ void RunInterpreter(UnitState& state) { | |||
| 215 | // TODO: I think this might be wrong... we should only use one component here | 246 | // TODO: I think this might be wrong... we should only use one component here |
| 216 | dest[i] = float24::FromFloat32(1.0f / sqrt(src1[i].ToFloat32())); | 247 | dest[i] = float24::FromFloat32(1.0f / sqrt(src1[i].ToFloat32())); |
| 217 | } | 248 | } |
| 218 | 249 | Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest); | |
| 219 | break; | 250 | break; |
| 220 | } | 251 | } |
| 221 | 252 | ||
| 222 | case OpCode::Id::MOVA: | 253 | case OpCode::Id::MOVA: |
| 223 | { | 254 | { |
| 255 | Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); | ||
| 224 | for (int i = 0; i < 2; ++i) { | 256 | for (int i = 0; i < 2; ++i) { |
| 225 | if (!swizzle.DestComponentEnabled(i)) | 257 | if (!swizzle.DestComponentEnabled(i)) |
| 226 | continue; | 258 | continue; |
| @@ -228,32 +260,41 @@ void RunInterpreter(UnitState& state) { | |||
| 228 | // TODO: Figure out how the rounding is done on hardware | 260 | // TODO: Figure out how the rounding is done on hardware |
| 229 | state.address_registers[i] = static_cast<s32>(src1[i].ToFloat32()); | 261 | state.address_registers[i] = static_cast<s32>(src1[i].ToFloat32()); |
| 230 | } | 262 | } |
| 231 | 263 | Record<DebugDataRecord::ADDR_REG_OUT>(state.debug, iteration, state.address_registers); | |
| 232 | break; | 264 | break; |
| 233 | } | 265 | } |
| 234 | 266 | ||
| 235 | case OpCode::Id::MOV: | 267 | case OpCode::Id::MOV: |
| 236 | { | 268 | { |
| 269 | Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); | ||
| 270 | Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); | ||
| 237 | for (int i = 0; i < 4; ++i) { | 271 | for (int i = 0; i < 4; ++i) { |
| 238 | if (!swizzle.DestComponentEnabled(i)) | 272 | if (!swizzle.DestComponentEnabled(i)) |
| 239 | continue; | 273 | continue; |
| 240 | 274 | ||
| 241 | dest[i] = src1[i]; | 275 | dest[i] = src1[i]; |
| 242 | } | 276 | } |
| 277 | Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest); | ||
| 243 | break; | 278 | break; |
| 244 | } | 279 | } |
| 245 | 280 | ||
| 246 | case OpCode::Id::SLT: | 281 | case OpCode::Id::SLT: |
| 247 | case OpCode::Id::SLTI: | 282 | case OpCode::Id::SLTI: |
| 283 | Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); | ||
| 284 | Record<DebugDataRecord::SRC2>(state.debug, iteration, src2); | ||
| 285 | Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); | ||
| 248 | for (int i = 0; i < 4; ++i) { | 286 | for (int i = 0; i < 4; ++i) { |
| 249 | if (!swizzle.DestComponentEnabled(i)) | 287 | if (!swizzle.DestComponentEnabled(i)) |
| 250 | continue; | 288 | continue; |
| 251 | 289 | ||
| 252 | dest[i] = (src1[i] < src2[i]) ? float24::FromFloat32(1.0f) : float24::FromFloat32(0.0f); | 290 | dest[i] = (src1[i] < src2[i]) ? float24::FromFloat32(1.0f) : float24::FromFloat32(0.0f); |
| 253 | } | 291 | } |
| 292 | Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest); | ||
| 254 | break; | 293 | break; |
| 255 | 294 | ||
| 256 | case OpCode::Id::CMP: | 295 | case OpCode::Id::CMP: |
| 296 | Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); | ||
| 297 | Record<DebugDataRecord::SRC2>(state.debug, iteration, src2); | ||
| 257 | for (int i = 0; i < 2; ++i) { | 298 | for (int i = 0; i < 2; ++i) { |
| 258 | // TODO: Can you restrict to one compare via dest masking? | 299 | // TODO: Can you restrict to one compare via dest masking? |
| 259 | 300 | ||
| @@ -261,27 +302,27 @@ void RunInterpreter(UnitState& state) { | |||
| 261 | auto op = (i == 0) ? compare_op.x.Value() : compare_op.y.Value(); | 302 | auto op = (i == 0) ? compare_op.x.Value() : compare_op.y.Value(); |
| 262 | 303 | ||
| 263 | switch (op) { | 304 | switch (op) { |
| 264 | case compare_op.Equal: | 305 | case Instruction::Common::CompareOpType::Equal: |
| 265 | state.conditional_code[i] = (src1[i] == src2[i]); | 306 | state.conditional_code[i] = (src1[i] == src2[i]); |
| 266 | break; | 307 | break; |
| 267 | 308 | ||
| 268 | case compare_op.NotEqual: | 309 | case Instruction::Common::CompareOpType::NotEqual: |
| 269 | state.conditional_code[i] = (src1[i] != src2[i]); | 310 | state.conditional_code[i] = (src1[i] != src2[i]); |
| 270 | break; | 311 | break; |
| 271 | 312 | ||
| 272 | case compare_op.LessThan: | 313 | case Instruction::Common::CompareOpType::LessThan: |
| 273 | state.conditional_code[i] = (src1[i] < src2[i]); | 314 | state.conditional_code[i] = (src1[i] < src2[i]); |
| 274 | break; | 315 | break; |
| 275 | 316 | ||
| 276 | case compare_op.LessEqual: | 317 | case Instruction::Common::CompareOpType::LessEqual: |
| 277 | state.conditional_code[i] = (src1[i] <= src2[i]); | 318 | state.conditional_code[i] = (src1[i] <= src2[i]); |
| 278 | break; | 319 | break; |
| 279 | 320 | ||
| 280 | case compare_op.GreaterThan: | 321 | case Instruction::Common::CompareOpType::GreaterThan: |
| 281 | state.conditional_code[i] = (src1[i] > src2[i]); | 322 | state.conditional_code[i] = (src1[i] > src2[i]); |
| 282 | break; | 323 | break; |
| 283 | 324 | ||
| 284 | case compare_op.GreaterEqual: | 325 | case Instruction::Common::CompareOpType::GreaterEqual: |
| 285 | state.conditional_code[i] = (src1[i] >= src2[i]); | 326 | state.conditional_code[i] = (src1[i] >= src2[i]); |
| 286 | break; | 327 | break; |
| 287 | 328 | ||
| @@ -290,6 +331,7 @@ void RunInterpreter(UnitState& state) { | |||
| 290 | break; | 331 | break; |
| 291 | } | 332 | } |
| 292 | } | 333 | } |
| 334 | Record<DebugDataRecord::CMP_RESULT>(state.debug, iteration, state.conditional_code); | ||
| 293 | break; | 335 | break; |
| 294 | 336 | ||
| 295 | default: | 337 | default: |
| @@ -359,12 +401,17 @@ void RunInterpreter(UnitState& state) { | |||
| 359 | : (instr.mad.dest.Value() < 0x20) ? &state.registers.temporary[instr.mad.dest.Value().GetIndex()][0] | 401 | : (instr.mad.dest.Value() < 0x20) ? &state.registers.temporary[instr.mad.dest.Value().GetIndex()][0] |
| 360 | : dummy_vec4_float24; | 402 | : dummy_vec4_float24; |
| 361 | 403 | ||
| 404 | Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); | ||
| 405 | Record<DebugDataRecord::SRC2>(state.debug, iteration, src2); | ||
| 406 | Record<DebugDataRecord::SRC3>(state.debug, iteration, src3); | ||
| 407 | Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); | ||
| 362 | for (int i = 0; i < 4; ++i) { | 408 | for (int i = 0; i < 4; ++i) { |
| 363 | if (!swizzle.DestComponentEnabled(i)) | 409 | if (!swizzle.DestComponentEnabled(i)) |
| 364 | continue; | 410 | continue; |
| 365 | 411 | ||
| 366 | dest[i] = src1[i] * src2[i] + src3[i]; | 412 | dest[i] = src1[i] * src2[i] + src3[i]; |
| 367 | } | 413 | } |
| 414 | Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest); | ||
| 368 | } else { | 415 | } else { |
| 369 | LOG_ERROR(HW_GPU, "Unhandled multiply-add instruction: 0x%02x (%s): 0x%08x", | 416 | LOG_ERROR(HW_GPU, "Unhandled multiply-add instruction: 0x%02x (%s): 0x%08x", |
| 370 | (int)instr.opcode.Value().EffectiveOpCode(), instr.opcode.Value().GetInfo().name, instr.hex); | 417 | (int)instr.opcode.Value().EffectiveOpCode(), instr.opcode.Value().GetInfo().name, instr.hex); |
| @@ -374,7 +421,7 @@ void RunInterpreter(UnitState& state) { | |||
| 374 | 421 | ||
| 375 | default: | 422 | default: |
| 376 | { | 423 | { |
| 377 | static auto evaluate_condition = [](const UnitState& state, bool refx, bool refy, Instruction::FlowControlType flow_control) { | 424 | static auto evaluate_condition = [](const UnitState<Debug>& state, bool refx, bool refy, Instruction::FlowControlType flow_control) { |
| 378 | bool results[2] = { refx == state.conditional_code[0], | 425 | bool results[2] = { refx == state.conditional_code[0], |
| 379 | refy == state.conditional_code[1] }; | 426 | refy == state.conditional_code[1] }; |
| 380 | 427 | ||
| @@ -400,12 +447,14 @@ void RunInterpreter(UnitState& state) { | |||
| 400 | break; | 447 | break; |
| 401 | 448 | ||
| 402 | case OpCode::Id::JMPC: | 449 | case OpCode::Id::JMPC: |
| 450 | Record<DebugDataRecord::COND_CMP_IN>(state.debug, iteration, state.conditional_code); | ||
| 403 | if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, instr.flow_control)) { | 451 | if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, instr.flow_control)) { |
| 404 | state.program_counter = instr.flow_control.dest_offset - 1; | 452 | state.program_counter = instr.flow_control.dest_offset - 1; |
| 405 | } | 453 | } |
| 406 | break; | 454 | break; |
| 407 | 455 | ||
| 408 | case OpCode::Id::JMPU: | 456 | case OpCode::Id::JMPU: |
| 457 | Record<DebugDataRecord::COND_BOOL_IN>(state.debug, iteration, uniforms.b[instr.flow_control.bool_uniform_id]); | ||
| 409 | if (uniforms.b[instr.flow_control.bool_uniform_id]) { | 458 | if (uniforms.b[instr.flow_control.bool_uniform_id]) { |
| 410 | state.program_counter = instr.flow_control.dest_offset - 1; | 459 | state.program_counter = instr.flow_control.dest_offset - 1; |
| 411 | } | 460 | } |
| @@ -419,6 +468,7 @@ void RunInterpreter(UnitState& state) { | |||
| 419 | break; | 468 | break; |
| 420 | 469 | ||
| 421 | case OpCode::Id::CALLU: | 470 | case OpCode::Id::CALLU: |
| 471 | Record<DebugDataRecord::COND_BOOL_IN>(state.debug, iteration, uniforms.b[instr.flow_control.bool_uniform_id]); | ||
| 422 | if (uniforms.b[instr.flow_control.bool_uniform_id]) { | 472 | if (uniforms.b[instr.flow_control.bool_uniform_id]) { |
| 423 | call(state, | 473 | call(state, |
| 424 | instr.flow_control.dest_offset, | 474 | instr.flow_control.dest_offset, |
| @@ -428,6 +478,7 @@ void RunInterpreter(UnitState& state) { | |||
| 428 | break; | 478 | break; |
| 429 | 479 | ||
| 430 | case OpCode::Id::CALLC: | 480 | case OpCode::Id::CALLC: |
| 481 | Record<DebugDataRecord::COND_CMP_IN>(state.debug, iteration, state.conditional_code); | ||
| 431 | if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, instr.flow_control)) { | 482 | if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, instr.flow_control)) { |
| 432 | call(state, | 483 | call(state, |
| 433 | instr.flow_control.dest_offset, | 484 | instr.flow_control.dest_offset, |
| @@ -440,6 +491,7 @@ void RunInterpreter(UnitState& state) { | |||
| 440 | break; | 491 | break; |
| 441 | 492 | ||
| 442 | case OpCode::Id::IFU: | 493 | case OpCode::Id::IFU: |
| 494 | Record<DebugDataRecord::COND_BOOL_IN>(state.debug, iteration, uniforms.b[instr.flow_control.bool_uniform_id]); | ||
| 443 | if (uniforms.b[instr.flow_control.bool_uniform_id]) { | 495 | if (uniforms.b[instr.flow_control.bool_uniform_id]) { |
| 444 | call(state, | 496 | call(state, |
| 445 | state.program_counter + 1, | 497 | state.program_counter + 1, |
| @@ -458,6 +510,7 @@ void RunInterpreter(UnitState& state) { | |||
| 458 | { | 510 | { |
| 459 | // TODO: Do we need to consider swizzlers here? | 511 | // TODO: Do we need to consider swizzlers here? |
| 460 | 512 | ||
| 513 | Record<DebugDataRecord::COND_CMP_IN>(state.debug, iteration, state.conditional_code); | ||
| 461 | if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, instr.flow_control)) { | 514 | if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, instr.flow_control)) { |
| 462 | call(state, | 515 | call(state, |
| 463 | state.program_counter + 1, | 516 | state.program_counter + 1, |
| @@ -475,14 +528,19 @@ void RunInterpreter(UnitState& state) { | |||
| 475 | 528 | ||
| 476 | case OpCode::Id::LOOP: | 529 | case OpCode::Id::LOOP: |
| 477 | { | 530 | { |
| 478 | state.address_registers[2] = uniforms.i[instr.flow_control.int_uniform_id].y; | 531 | Math::Vec4<u8> loop_param(uniforms.i[instr.flow_control.int_uniform_id].x, |
| 532 | uniforms.i[instr.flow_control.int_uniform_id].y, | ||
| 533 | uniforms.i[instr.flow_control.int_uniform_id].z, | ||
| 534 | uniforms.i[instr.flow_control.int_uniform_id].w); | ||
| 535 | state.address_registers[2] = loop_param.y; | ||
| 479 | 536 | ||
| 537 | Record<DebugDataRecord::LOOP_INT_IN>(state.debug, iteration, loop_param); | ||
| 480 | call(state, | 538 | call(state, |
| 481 | state.program_counter + 1, | 539 | state.program_counter + 1, |
| 482 | instr.flow_control.dest_offset - state.program_counter + 1, | 540 | instr.flow_control.dest_offset - state.program_counter + 1, |
| 483 | instr.flow_control.dest_offset + 1, | 541 | instr.flow_control.dest_offset + 1, |
| 484 | uniforms.i[instr.flow_control.int_uniform_id].x, | 542 | loop_param.x, |
| 485 | uniforms.i[instr.flow_control.int_uniform_id].z); | 543 | loop_param.z); |
| 486 | break; | 544 | break; |
| 487 | } | 545 | } |
| 488 | 546 | ||
| @@ -497,12 +555,14 @@ void RunInterpreter(UnitState& state) { | |||
| 497 | } | 555 | } |
| 498 | 556 | ||
| 499 | ++state.program_counter; | 557 | ++state.program_counter; |
| 500 | 558 | ++iteration; | |
| 501 | if (exit_loop) | ||
| 502 | break; | ||
| 503 | } | 559 | } |
| 504 | } | 560 | } |
| 505 | 561 | ||
| 562 | // Explicit instantiation | ||
| 563 | template void RunInterpreter(UnitState<false>& state); | ||
| 564 | template void RunInterpreter(UnitState<true>& state); | ||
| 565 | |||
| 506 | } // namespace | 566 | } // namespace |
| 507 | 567 | ||
| 508 | } // namespace | 568 | } // namespace |
diff --git a/src/video_core/shader/shader_interpreter.h b/src/video_core/shader/shader_interpreter.h index ad6e58e39..71bcad5ac 100644 --- a/src/video_core/shader/shader_interpreter.h +++ b/src/video_core/shader/shader_interpreter.h | |||
| @@ -12,7 +12,8 @@ namespace Pica { | |||
| 12 | 12 | ||
| 13 | namespace Shader { | 13 | namespace Shader { |
| 14 | 14 | ||
| 15 | void RunInterpreter(UnitState& state); | 15 | template<bool Debug> |
| 16 | void RunInterpreter(UnitState<Debug>& state); | ||
| 16 | 17 | ||
| 17 | } // namespace | 18 | } // namespace |
| 18 | 19 | ||
diff --git a/src/video_core/shader/shader_jit_x64.cpp b/src/video_core/shader/shader_jit_x64.cpp index ce47774d5..836942c6b 100644 --- a/src/video_core/shader/shader_jit_x64.cpp +++ b/src/video_core/shader/shader_jit_x64.cpp | |||
| @@ -141,7 +141,7 @@ void JitCompiler::Compile_SwizzleSrc(Instruction instr, unsigned src_num, Source | |||
| 141 | src_offset = src_reg.GetIndex() * sizeof(float24) * 4; | 141 | src_offset = src_reg.GetIndex() * sizeof(float24) * 4; |
| 142 | } else { | 142 | } else { |
| 143 | src_ptr = REGISTERS; | 143 | src_ptr = REGISTERS; |
| 144 | src_offset = UnitState::InputOffset(src_reg); | 144 | src_offset = UnitState<false>::InputOffset(src_reg); |
| 145 | } | 145 | } |
| 146 | 146 | ||
| 147 | unsigned operand_desc_id; | 147 | unsigned operand_desc_id; |
| @@ -217,11 +217,11 @@ void JitCompiler::Compile_DestEnable(Instruction instr,X64Reg src) { | |||
| 217 | // If all components are enabled, write the result to the destination register | 217 | // If all components are enabled, write the result to the destination register |
| 218 | if (swiz.dest_mask == NO_DEST_REG_MASK) { | 218 | if (swiz.dest_mask == NO_DEST_REG_MASK) { |
| 219 | // Store dest back to memory | 219 | // Store dest back to memory |
| 220 | MOVAPS(MDisp(REGISTERS, UnitState::OutputOffset(dest)), src); | 220 | MOVAPS(MDisp(REGISTERS, UnitState<false>::OutputOffset(dest)), src); |
| 221 | 221 | ||
| 222 | } else { | 222 | } else { |
| 223 | // Not all components are enabled, so mask the result when storing to the destination register... | 223 | // Not all components are enabled, so mask the result when storing to the destination register... |
| 224 | MOVAPS(SCRATCH, MDisp(REGISTERS, UnitState::OutputOffset(dest))); | 224 | MOVAPS(SCRATCH, MDisp(REGISTERS, UnitState<false>::OutputOffset(dest))); |
| 225 | 225 | ||
| 226 | if (Common::GetCPUCaps().sse4_1) { | 226 | if (Common::GetCPUCaps().sse4_1) { |
| 227 | u8 mask = ((swiz.dest_mask & 1) << 3) | ((swiz.dest_mask & 8) >> 3) | ((swiz.dest_mask & 2) << 1) | ((swiz.dest_mask & 4) >> 1); | 227 | u8 mask = ((swiz.dest_mask & 1) << 3) | ((swiz.dest_mask & 8) >> 3) | ((swiz.dest_mask & 2) << 1) | ((swiz.dest_mask & 4) >> 1); |
| @@ -240,7 +240,7 @@ void JitCompiler::Compile_DestEnable(Instruction instr,X64Reg src) { | |||
| 240 | } | 240 | } |
| 241 | 241 | ||
| 242 | // Store dest back to memory | 242 | // Store dest back to memory |
| 243 | MOVAPS(MDisp(REGISTERS, UnitState::OutputOffset(dest)), SCRATCH); | 243 | MOVAPS(MDisp(REGISTERS, UnitState<false>::OutputOffset(dest)), SCRATCH); |
| 244 | } | 244 | } |
| 245 | } | 245 | } |
| 246 | 246 | ||