summaryrefslogtreecommitdiff
path: root/src/video_core
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/CMakeLists.txt2
-rw-r--r--src/video_core/command_processor.cpp44
-rw-r--r--src/video_core/command_processor.h13
-rw-r--r--src/video_core/debug_utils/debug_utils.cpp522
-rw-r--r--src/video_core/debug_utils/debug_utils.h66
-rw-r--r--src/video_core/gpu_debugger.h63
-rw-r--r--src/video_core/math.h233
-rw-r--r--src/video_core/pica.h153
-rw-r--r--src/video_core/primitive_assembly.cpp28
-rw-r--r--src/video_core/primitive_assembly.h38
-rw-r--r--src/video_core/rasterizer.cpp222
-rw-r--r--src/video_core/vertex_shader.cpp51
-rw-r--r--src/video_core/vertex_shader.h81
-rw-r--r--src/video_core/video_core.vcxproj2
-rw-r--r--src/video_core/video_core.vcxproj.filters15
15 files changed, 1308 insertions, 225 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 8e7b93acb..71a1b5ecc 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -5,6 +5,7 @@ set(SRCS clipper.cpp
5 utils.cpp 5 utils.cpp
6 vertex_shader.cpp 6 vertex_shader.cpp
7 video_core.cpp 7 video_core.cpp
8 debug_utils/debug_utils.cpp
8 renderer_opengl/renderer_opengl.cpp) 9 renderer_opengl/renderer_opengl.cpp)
9 10
10set(HEADERS clipper.h 11set(HEADERS clipper.h
@@ -17,6 +18,7 @@ set(HEADERS clipper.h
17 renderer_base.h 18 renderer_base.h
18 vertex_shader.h 19 vertex_shader.h
19 video_core.h 20 video_core.h
21 debug_utils/debug_utils.h
20 renderer_opengl/renderer_opengl.h) 22 renderer_opengl/renderer_opengl.h)
21 23
22add_library(video_core STATIC ${SRCS} ${HEADERS}) 24add_library(video_core STATIC ${SRCS} ${HEADERS})
diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp
index 020a4da3f..9567a9849 100644
--- a/src/video_core/command_processor.cpp
+++ b/src/video_core/command_processor.cpp
@@ -2,12 +2,14 @@
2// Licensed under GPLv2 2// Licensed under GPLv2
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include "clipper.h"
5#include "command_processor.h" 6#include "command_processor.h"
6#include "math.h" 7#include "math.h"
7#include "pica.h" 8#include "pica.h"
8#include "primitive_assembly.h" 9#include "primitive_assembly.h"
9#include "vertex_shader.h" 10#include "vertex_shader.h"
10 11
12#include "debug_utils/debug_utils.h"
11 13
12namespace Pica { 14namespace Pica {
13 15
@@ -23,15 +25,24 @@ static u32 uniform_write_buffer[4];
23static u32 vs_binary_write_offset = 0; 25static u32 vs_binary_write_offset = 0;
24static u32 vs_swizzle_write_offset = 0; 26static u32 vs_swizzle_write_offset = 0;
25 27
26static inline void WritePicaReg(u32 id, u32 value) { 28static inline void WritePicaReg(u32 id, u32 value, u32 mask) {
29
30 if (id >= registers.NumIds())
31 return;
32
33 // TODO: Figure out how register masking acts on e.g. vs_uniform_setup.set_value
27 u32 old_value = registers[id]; 34 u32 old_value = registers[id];
28 registers[id] = value; 35 registers[id] = (old_value & ~mask) | (value & mask);
36
37 DebugUtils::OnPicaRegWrite(id, registers[id]);
29 38
30 switch(id) { 39 switch(id) {
31 // It seems like these trigger vertex rendering 40 // It seems like these trigger vertex rendering
32 case PICA_REG_INDEX(trigger_draw): 41 case PICA_REG_INDEX(trigger_draw):
33 case PICA_REG_INDEX(trigger_draw_indexed): 42 case PICA_REG_INDEX(trigger_draw_indexed):
34 { 43 {
44 DebugUtils::DumpTevStageConfig(registers.GetTevStages());
45
35 const auto& attribute_config = registers.vertex_attributes; 46 const auto& attribute_config = registers.vertex_attributes;
36 const u8* const base_address = Memory::GetPointer(attribute_config.GetBaseAddress()); 47 const u8* const base_address = Memory::GetPointer(attribute_config.GetBaseAddress());
37 48
@@ -68,6 +79,10 @@ static inline void WritePicaReg(u32 id, u32 value) {
68 const u16* index_address_16 = (u16*)index_address_8; 79 const u16* index_address_16 = (u16*)index_address_8;
69 bool index_u16 = (bool)index_info.format; 80 bool index_u16 = (bool)index_info.format;
70 81
82 DebugUtils::GeometryDumper geometry_dumper;
83 PrimitiveAssembler<VertexShader::OutputVertex> clipper_primitive_assembler(registers.triangle_topology.Value());
84 PrimitiveAssembler<DebugUtils::GeometryDumper::Vertex> dumping_primitive_assembler(registers.triangle_topology.Value());
85
71 for (int index = 0; index < registers.num_vertices; ++index) 86 for (int index = 0; index < registers.num_vertices; ++index)
72 { 87 {
73 int vertex = is_indexed ? (index_u16 ? index_address_16[index] : index_address_8[index]) : index; 88 int vertex = is_indexed ? (index_u16 ? index_address_16[index] : index_address_8[index]) : index;
@@ -95,14 +110,28 @@ static inline void WritePicaReg(u32 id, u32 value) {
95 input.attr[i][comp].ToFloat32()); 110 input.attr[i][comp].ToFloat32());
96 } 111 }
97 } 112 }
113
114 // NOTE: When dumping geometry, we simply assume that the first input attribute
115 // corresponds to the position for now.
116 DebugUtils::GeometryDumper::Vertex dumped_vertex = {
117 input.attr[0][0].ToFloat32(), input.attr[0][1].ToFloat32(), input.attr[0][2].ToFloat32()
118 };
119 using namespace std::placeholders;
120 dumping_primitive_assembler.SubmitVertex(dumped_vertex,
121 std::bind(&DebugUtils::GeometryDumper::AddTriangle,
122 &geometry_dumper, _1, _2, _3));
123
124 // Send to vertex shader
98 VertexShader::OutputVertex output = VertexShader::RunShader(input, attribute_config.GetNumTotalAttributes()); 125 VertexShader::OutputVertex output = VertexShader::RunShader(input, attribute_config.GetNumTotalAttributes());
99 126
100 if (is_indexed) { 127 if (is_indexed) {
101 // TODO: Add processed vertex to vertex cache! 128 // TODO: Add processed vertex to vertex cache!
102 } 129 }
103 130
104 PrimitiveAssembly::SubmitVertex(output); 131 // Send to triangle clipper
132 clipper_primitive_assembler.SubmitVertex(output, Clipper::ProcessTriangle);
105 } 133 }
134 geometry_dumper.Dump();
106 break; 135 break;
107 } 136 }
108 137
@@ -207,14 +236,17 @@ static std::ptrdiff_t ExecuteCommandBlock(const u32* first_command_word) {
207 236
208 u32* read_pointer = (u32*)first_command_word; 237 u32* read_pointer = (u32*)first_command_word;
209 238
210 // TODO: Take parameter mask into consideration! 239 const u32 write_mask = ((header.parameter_mask & 0x1) ? (0xFFu << 0) : 0u) |
240 ((header.parameter_mask & 0x2) ? (0xFFu << 8) : 0u) |
241 ((header.parameter_mask & 0x4) ? (0xFFu << 16) : 0u) |
242 ((header.parameter_mask & 0x8) ? (0xFFu << 24) : 0u);
211 243
212 WritePicaReg(header.cmd_id, *read_pointer); 244 WritePicaReg(header.cmd_id, *read_pointer, write_mask);
213 read_pointer += 2; 245 read_pointer += 2;
214 246
215 for (int i = 1; i < 1+header.extra_data_length; ++i) { 247 for (int i = 1; i < 1+header.extra_data_length; ++i) {
216 u32 cmd = header.cmd_id + ((header.group_commands) ? i : 0); 248 u32 cmd = header.cmd_id + ((header.group_commands) ? i : 0);
217 WritePicaReg(cmd, *read_pointer); 249 WritePicaReg(cmd, *read_pointer, write_mask);
218 ++read_pointer; 250 ++read_pointer;
219 } 251 }
220 252
diff --git a/src/video_core/command_processor.h b/src/video_core/command_processor.h
index 6b6241a25..955f9daec 100644
--- a/src/video_core/command_processor.h
+++ b/src/video_core/command_processor.h
@@ -17,11 +17,22 @@ union CommandHeader {
17 u32 hex; 17 u32 hex;
18 18
19 BitField< 0, 16, u32> cmd_id; 19 BitField< 0, 16, u32> cmd_id;
20
21 // parameter_mask:
22 // Mask applied to the input value to make it possible to update
23 // parts of a register without overwriting its other fields.
24 // first bit: 0x000000FF
25 // second bit: 0x0000FF00
26 // third bit: 0x00FF0000
27 // fourth bit: 0xFF000000
20 BitField<16, 4, u32> parameter_mask; 28 BitField<16, 4, u32> parameter_mask;
29
21 BitField<20, 11, u32> extra_data_length; 30 BitField<20, 11, u32> extra_data_length;
31
22 BitField<31, 1, u32> group_commands; 32 BitField<31, 1, u32> group_commands;
23}; 33};
24static_assert(std::is_standard_layout<CommandHeader>::value == true, "CommandHeader does not use standard layout"); 34static_assert(std::is_standard_layout<CommandHeader>::value == true,
35 "CommandHeader does not use standard layout");
25static_assert(sizeof(CommandHeader) == sizeof(u32), "CommandHeader has incorrect size!"); 36static_assert(sizeof(CommandHeader) == sizeof(u32), "CommandHeader has incorrect size!");
26 37
27void ProcessCommandList(const u32* list, u32 size); 38void ProcessCommandList(const u32* list, u32 size);
diff --git a/src/video_core/debug_utils/debug_utils.cpp b/src/video_core/debug_utils/debug_utils.cpp
new file mode 100644
index 000000000..48e6dd182
--- /dev/null
+++ b/src/video_core/debug_utils/debug_utils.cpp
@@ -0,0 +1,522 @@
1// Copyright 2014 Citra Emulator Project
2// Licensed under GPLv2
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <map>
7#include <fstream>
8#include <mutex>
9#include <string>
10
11#ifdef HAVE_PNG
12#include <png.h>
13#endif
14
15#include "common/file_util.h"
16
17#include "video_core/pica.h"
18
19#include "debug_utils.h"
20
21namespace Pica {
22
23namespace DebugUtils {
24
25void GeometryDumper::AddTriangle(Vertex& v0, Vertex& v1, Vertex& v2) {
26 vertices.push_back(v0);
27 vertices.push_back(v1);
28 vertices.push_back(v2);
29
30 int num_vertices = vertices.size();
31 faces.push_back({ num_vertices-3, num_vertices-2, num_vertices-1 });
32}
33
34void GeometryDumper::Dump() {
35 // NOTE: Permanently enabling this just trashes the hard disk for no reason.
36 // Hence, this is currently disabled.
37 return;
38
39 static int index = 0;
40 std::string filename = std::string("geometry_dump") + std::to_string(++index) + ".obj";
41
42 std::ofstream file(filename);
43
44 for (const auto& vertex : vertices) {
45 file << "v " << vertex.pos[0]
46 << " " << vertex.pos[1]
47 << " " << vertex.pos[2] << std::endl;
48 }
49
50 for (const Face& face : faces) {
51 file << "f " << 1+face.index[0]
52 << " " << 1+face.index[1]
53 << " " << 1+face.index[2] << std::endl;
54 }
55}
56
57#pragma pack(1)
58struct DVLBHeader {
59 enum : u32 {
60 MAGIC_WORD = 0x424C5644, // "DVLB"
61 };
62
63 u32 magic_word;
64 u32 num_programs;
65// u32 dvle_offset_table[];
66};
67static_assert(sizeof(DVLBHeader) == 0x8, "Incorrect structure size");
68
69struct DVLPHeader {
70 enum : u32 {
71 MAGIC_WORD = 0x504C5644, // "DVLP"
72 };
73
74 u32 magic_word;
75 u32 version;
76 u32 binary_offset; // relative to DVLP start
77 u32 binary_size_words;
78 u32 swizzle_patterns_offset;
79 u32 swizzle_patterns_num_entries;
80 u32 unk2;
81};
82static_assert(sizeof(DVLPHeader) == 0x1C, "Incorrect structure size");
83
84struct DVLEHeader {
85 enum : u32 {
86 MAGIC_WORD = 0x454c5644, // "DVLE"
87 };
88
89 enum class ShaderType : u8 {
90 VERTEX = 0,
91 GEOMETRY = 1,
92 };
93
94 u32 magic_word;
95 u16 pad1;
96 ShaderType type;
97 u8 pad2;
98 u32 main_offset_words; // offset within binary blob
99 u32 endmain_offset_words;
100 u32 pad3;
101 u32 pad4;
102 u32 constant_table_offset;
103 u32 constant_table_size; // number of entries
104 u32 label_table_offset;
105 u32 label_table_size;
106 u32 output_register_table_offset;
107 u32 output_register_table_size;
108 u32 uniform_table_offset;
109 u32 uniform_table_size;
110 u32 symbol_table_offset;
111 u32 symbol_table_size;
112
113};
114static_assert(sizeof(DVLEHeader) == 0x40, "Incorrect structure size");
115#pragma pack()
116
117void DumpShader(const u32* binary_data, u32 binary_size, const u32* swizzle_data, u32 swizzle_size,
118 u32 main_offset, const Regs::VSOutputAttributes* output_attributes)
119{
120 // NOTE: Permanently enabling this just trashes hard disks for no reason.
121 // Hence, this is currently disabled.
122 return;
123
124 struct StuffToWrite {
125 u8* pointer;
126 u32 size;
127 };
128 std::vector<StuffToWrite> writing_queue;
129 u32 write_offset = 0;
130
131 auto QueueForWriting = [&writing_queue,&write_offset](u8* pointer, u32 size) {
132 writing_queue.push_back({pointer, size});
133 u32 old_write_offset = write_offset;
134 write_offset += size;
135 return old_write_offset;
136 };
137
138 // First off, try to translate Pica state (one enum for output attribute type and component)
139 // into shbin format (separate type and component mask).
140 union OutputRegisterInfo {
141 enum Type : u64 {
142 POSITION = 0,
143 COLOR = 2,
144 TEXCOORD0 = 3,
145 TEXCOORD1 = 5,
146 TEXCOORD2 = 6,
147 };
148
149 BitField< 0, 64, u64> hex;
150
151 BitField< 0, 16, Type> type;
152 BitField<16, 16, u64> id;
153 BitField<32, 4, u64> component_mask;
154 };
155
156 // This is put into a try-catch block to make sure we notice unknown configurations.
157 std::vector<OutputRegisterInfo> output_info_table;
158 for (int i = 0; i < 7; ++i) {
159 using OutputAttributes = Pica::Regs::VSOutputAttributes;
160
161 // TODO: It's still unclear how the attribute components map to the register!
162 // Once we know that, this code probably will not make much sense anymore.
163 std::map<OutputAttributes::Semantic, std::pair<OutputRegisterInfo::Type, u32> > map = {
164 { OutputAttributes::POSITION_X, { OutputRegisterInfo::POSITION, 1} },
165 { OutputAttributes::POSITION_Y, { OutputRegisterInfo::POSITION, 2} },
166 { OutputAttributes::POSITION_Z, { OutputRegisterInfo::POSITION, 4} },
167 { OutputAttributes::POSITION_W, { OutputRegisterInfo::POSITION, 8} },
168 { OutputAttributes::COLOR_R, { OutputRegisterInfo::COLOR, 1} },
169 { OutputAttributes::COLOR_G, { OutputRegisterInfo::COLOR, 2} },
170 { OutputAttributes::COLOR_B, { OutputRegisterInfo::COLOR, 4} },
171 { OutputAttributes::COLOR_A, { OutputRegisterInfo::COLOR, 8} },
172 { OutputAttributes::TEXCOORD0_U, { OutputRegisterInfo::TEXCOORD0, 1} },
173 { OutputAttributes::TEXCOORD0_V, { OutputRegisterInfo::TEXCOORD0, 2} },
174 { OutputAttributes::TEXCOORD1_U, { OutputRegisterInfo::TEXCOORD1, 1} },
175 { OutputAttributes::TEXCOORD1_V, { OutputRegisterInfo::TEXCOORD1, 2} },
176 { OutputAttributes::TEXCOORD2_U, { OutputRegisterInfo::TEXCOORD2, 1} },
177 { OutputAttributes::TEXCOORD2_V, { OutputRegisterInfo::TEXCOORD2, 2} }
178 };
179
180 for (const auto& semantic : std::vector<OutputAttributes::Semantic>{
181 output_attributes[i].map_x,
182 output_attributes[i].map_y,
183 output_attributes[i].map_z,
184 output_attributes[i].map_w }) {
185 if (semantic == OutputAttributes::INVALID)
186 continue;
187
188 try {
189 OutputRegisterInfo::Type type = map.at(semantic).first;
190 u32 component_mask = map.at(semantic).second;
191
192 auto it = std::find_if(output_info_table.begin(), output_info_table.end(),
193 [&i, &type](const OutputRegisterInfo& info) {
194 return info.id == i && info.type == type;
195 }
196 );
197
198 if (it == output_info_table.end()) {
199 output_info_table.push_back({});
200 output_info_table.back().type = type;
201 output_info_table.back().component_mask = component_mask;
202 output_info_table.back().id = i;
203 } else {
204 it->component_mask = it->component_mask | component_mask;
205 }
206 } catch (const std::out_of_range& oor) {
207 _dbg_assert_msg_(GPU, 0, "Unknown output attribute mapping");
208 ERROR_LOG(GPU, "Unknown output attribute mapping: %03x, %03x, %03x, %03x",
209 (int)output_attributes[i].map_x.Value(),
210 (int)output_attributes[i].map_y.Value(),
211 (int)output_attributes[i].map_z.Value(),
212 (int)output_attributes[i].map_w.Value());
213 }
214 }
215 }
216
217
218 struct {
219 DVLBHeader header;
220 u32 dvle_offset;
221 } dvlb{ {DVLBHeader::MAGIC_WORD, 1 } }; // 1 DVLE
222
223 DVLPHeader dvlp{ DVLPHeader::MAGIC_WORD };
224 DVLEHeader dvle{ DVLEHeader::MAGIC_WORD };
225
226 QueueForWriting((u8*)&dvlb, sizeof(dvlb));
227 u32 dvlp_offset = QueueForWriting((u8*)&dvlp, sizeof(dvlp));
228 dvlb.dvle_offset = QueueForWriting((u8*)&dvle, sizeof(dvle));
229
230 // TODO: Reduce the amount of binary code written to relevant portions
231 dvlp.binary_offset = write_offset - dvlp_offset;
232 dvlp.binary_size_words = binary_size;
233 QueueForWriting((u8*)binary_data, binary_size * sizeof(u32));
234
235 dvlp.swizzle_patterns_offset = write_offset - dvlp_offset;
236 dvlp.swizzle_patterns_num_entries = swizzle_size;
237 u32 dummy = 0;
238 for (int i = 0; i < swizzle_size; ++i) {
239 QueueForWriting((u8*)&swizzle_data[i], sizeof(swizzle_data[i]));
240 QueueForWriting((u8*)&dummy, sizeof(dummy));
241 }
242
243 dvle.main_offset_words = main_offset;
244 dvle.output_register_table_offset = write_offset - dvlb.dvle_offset;
245 dvle.output_register_table_size = output_info_table.size();
246 QueueForWriting((u8*)output_info_table.data(), output_info_table.size() * sizeof(OutputRegisterInfo));
247
248 // TODO: Create a label table for "main"
249
250
251 // Write data to file
252 static int dump_index = 0;
253 std::string filename = std::string("shader_dump") + std::to_string(++dump_index) + std::string(".shbin");
254 std::ofstream file(filename, std::ios_base::out | std::ios_base::binary);
255
256 for (auto& chunk : writing_queue) {
257 file.write((char*)chunk.pointer, chunk.size);
258 }
259}
260
261static std::unique_ptr<PicaTrace> pica_trace;
262static std::mutex pica_trace_mutex;
263static int is_pica_tracing = false;
264
265void StartPicaTracing()
266{
267 if (is_pica_tracing) {
268 ERROR_LOG(GPU, "StartPicaTracing called even though tracing already running!");
269 return;
270 }
271
272 pica_trace_mutex.lock();
273 pica_trace = std::unique_ptr<PicaTrace>(new PicaTrace);
274
275 is_pica_tracing = true;
276 pica_trace_mutex.unlock();
277}
278
279bool IsPicaTracing()
280{
281 return is_pica_tracing;
282}
283
284void OnPicaRegWrite(u32 id, u32 value)
285{
286 // Double check for is_pica_tracing to avoid pointless locking overhead
287 if (!is_pica_tracing)
288 return;
289
290 std::unique_lock<std::mutex> lock(pica_trace_mutex);
291
292 if (!is_pica_tracing)
293 return;
294
295 pica_trace->writes.push_back({id, value});
296}
297
298std::unique_ptr<PicaTrace> FinishPicaTracing()
299{
300 if (!is_pica_tracing) {
301 ERROR_LOG(GPU, "FinishPicaTracing called even though tracing already running!");
302 return {};
303 }
304
305 // signalize that no further tracing should be performed
306 is_pica_tracing = false;
307
308 // Wait until running tracing is finished
309 pica_trace_mutex.lock();
310 std::unique_ptr<PicaTrace> ret(std::move(pica_trace));
311 pica_trace_mutex.unlock();
312 return std::move(ret);
313}
314
315void DumpTexture(const Pica::Regs::TextureConfig& texture_config, u8* data) {
316 // NOTE: Permanently enabling this just trashes hard disks for no reason.
317 // Hence, this is currently disabled.
318 return;
319
320#ifndef HAVE_PNG
321 return;
322#else
323 if (!data)
324 return;
325
326 // Write data to file
327 static int dump_index = 0;
328 std::string filename = std::string("texture_dump") + std::to_string(++dump_index) + std::string(".png");
329 u32 row_stride = texture_config.width * 3;
330
331 u8* buf;
332
333 char title[] = "Citra texture dump";
334 char title_key[] = "Title";
335 png_structp png_ptr = nullptr;
336 png_infop info_ptr = nullptr;
337
338 // Open file for writing (binary mode)
339 File::IOFile fp(filename, "wb");
340
341 // Initialize write structure
342 png_ptr = png_create_write_struct(PNG_LIBPNG_VER_STRING, nullptr, nullptr, nullptr);
343 if (png_ptr == nullptr) {
344 ERROR_LOG(GPU, "Could not allocate write struct\n");
345 goto finalise;
346
347 }
348
349 // Initialize info structure
350 info_ptr = png_create_info_struct(png_ptr);
351 if (info_ptr == nullptr) {
352 ERROR_LOG(GPU, "Could not allocate info struct\n");
353 goto finalise;
354 }
355
356 // Setup Exception handling
357 if (setjmp(png_jmpbuf(png_ptr))) {
358 ERROR_LOG(GPU, "Error during png creation\n");
359 goto finalise;
360 }
361
362 png_init_io(png_ptr, fp.GetHandle());
363
364 // Write header (8 bit colour depth)
365 png_set_IHDR(png_ptr, info_ptr, texture_config.width, texture_config.height,
366 8, PNG_COLOR_TYPE_RGB /*_ALPHA*/, PNG_INTERLACE_NONE,
367 PNG_COMPRESSION_TYPE_BASE, PNG_FILTER_TYPE_BASE);
368
369 png_text title_text;
370 title_text.compression = PNG_TEXT_COMPRESSION_NONE;
371 title_text.key = title_key;
372 title_text.text = title;
373 png_set_text(png_ptr, info_ptr, &title_text, 1);
374
375 png_write_info(png_ptr, info_ptr);
376
377 buf = new u8[row_stride * texture_config.height];
378 for (int y = 0; y < texture_config.height; ++y) {
379 for (int x = 0; x < texture_config.width; ++x) {
380 // Cf. rasterizer code for an explanation of this algorithm.
381 int texel_index_within_tile = 0;
382 for (int block_size_index = 0; block_size_index < 3; ++block_size_index) {
383 int sub_tile_width = 1 << block_size_index;
384 int sub_tile_height = 1 << block_size_index;
385
386 int sub_tile_index = (x & sub_tile_width) << block_size_index;
387 sub_tile_index += 2 * ((y & sub_tile_height) << block_size_index);
388 texel_index_within_tile += sub_tile_index;
389 }
390
391 const int block_width = 8;
392 const int block_height = 8;
393
394 int coarse_x = (x / block_width) * block_width;
395 int coarse_y = (y / block_height) * block_height;
396
397 u8* source_ptr = (u8*)data + coarse_x * block_height * 3 + coarse_y * row_stride + texel_index_within_tile * 3;
398 buf[3 * x + y * row_stride ] = source_ptr[2];
399 buf[3 * x + y * row_stride + 1] = source_ptr[1];
400 buf[3 * x + y * row_stride + 2] = source_ptr[0];
401 }
402 }
403
404 // Write image data
405 for (auto y = 0; y < texture_config.height; ++y)
406 {
407 u8* row_ptr = (u8*)buf + y * row_stride;
408 u8* ptr = row_ptr;
409 png_write_row(png_ptr, row_ptr);
410 }
411
412 delete[] buf;
413
414 // End write
415 png_write_end(png_ptr, nullptr);
416
417finalise:
418 if (info_ptr != nullptr) png_free_data(png_ptr, info_ptr, PNG_FREE_ALL, -1);
419 if (png_ptr != nullptr) png_destroy_write_struct(&png_ptr, (png_infopp)nullptr);
420#endif
421}
422
423void DumpTevStageConfig(const std::array<Pica::Regs::TevStageConfig,6>& stages)
424{
425 using Source = Pica::Regs::TevStageConfig::Source;
426 using ColorModifier = Pica::Regs::TevStageConfig::ColorModifier;
427 using AlphaModifier = Pica::Regs::TevStageConfig::AlphaModifier;
428 using Operation = Pica::Regs::TevStageConfig::Operation;
429
430 std::string stage_info = "Tev setup:\n";
431 for (int index = 0; index < stages.size(); ++index) {
432 const auto& tev_stage = stages[index];
433
434 const std::map<Source, std::string> source_map = {
435 { Source::PrimaryColor, "PrimaryColor" },
436 { Source::Texture0, "Texture0" },
437 { Source::Constant, "Constant" },
438 { Source::Previous, "Previous" },
439 };
440
441 const std::map<ColorModifier, std::string> color_modifier_map = {
442 { ColorModifier::SourceColor, { "%source.rgb" } }
443 };
444 const std::map<AlphaModifier, std::string> alpha_modifier_map = {
445 { AlphaModifier::SourceAlpha, "%source.a" }
446 };
447
448 std::map<Operation, std::string> combiner_map = {
449 { Operation::Replace, "%source1" },
450 { Operation::Modulate, "(%source1 * %source2) / 255" },
451 };
452
453 auto ReplacePattern =
454 [](const std::string& input, const std::string& pattern, const std::string& replacement) -> std::string {
455 size_t start = input.find(pattern);
456 if (start == std::string::npos)
457 return input;
458
459 std::string ret = input;
460 ret.replace(start, pattern.length(), replacement);
461 return ret;
462 };
463 auto GetColorSourceStr =
464 [&source_map,&color_modifier_map,&ReplacePattern](const Source& src, const ColorModifier& modifier) {
465 auto src_it = source_map.find(src);
466 std::string src_str = "Unknown";
467 if (src_it != source_map.end())
468 src_str = src_it->second;
469
470 auto modifier_it = color_modifier_map.find(modifier);
471 std::string modifier_str = "%source.????";
472 if (modifier_it != color_modifier_map.end())
473 modifier_str = modifier_it->second;
474
475 return ReplacePattern(modifier_str, "%source", src_str);
476 };
477 auto GetColorCombinerStr =
478 [&](const Regs::TevStageConfig& tev_stage) {
479 auto op_it = combiner_map.find(tev_stage.color_op);
480 std::string op_str = "Unknown op (%source1, %source2, %source3)";
481 if (op_it != combiner_map.end())
482 op_str = op_it->second;
483
484 op_str = ReplacePattern(op_str, "%source1", GetColorSourceStr(tev_stage.color_source1, tev_stage.color_modifier1));
485 op_str = ReplacePattern(op_str, "%source2", GetColorSourceStr(tev_stage.color_source2, tev_stage.color_modifier2));
486 return ReplacePattern(op_str, "%source3", GetColorSourceStr(tev_stage.color_source3, tev_stage.color_modifier3));
487 };
488 auto GetAlphaSourceStr =
489 [&source_map,&alpha_modifier_map,&ReplacePattern](const Source& src, const AlphaModifier& modifier) {
490 auto src_it = source_map.find(src);
491 std::string src_str = "Unknown";
492 if (src_it != source_map.end())
493 src_str = src_it->second;
494
495 auto modifier_it = alpha_modifier_map.find(modifier);
496 std::string modifier_str = "%source.????";
497 if (modifier_it != alpha_modifier_map.end())
498 modifier_str = modifier_it->second;
499
500 return ReplacePattern(modifier_str, "%source", src_str);
501 };
502 auto GetAlphaCombinerStr =
503 [&](const Regs::TevStageConfig& tev_stage) {
504 auto op_it = combiner_map.find(tev_stage.alpha_op);
505 std::string op_str = "Unknown op (%source1, %source2, %source3)";
506 if (op_it != combiner_map.end())
507 op_str = op_it->second;
508
509 op_str = ReplacePattern(op_str, "%source1", GetAlphaSourceStr(tev_stage.alpha_source1, tev_stage.alpha_modifier1));
510 op_str = ReplacePattern(op_str, "%source2", GetAlphaSourceStr(tev_stage.alpha_source2, tev_stage.alpha_modifier2));
511 return ReplacePattern(op_str, "%source3", GetAlphaSourceStr(tev_stage.alpha_source3, tev_stage.alpha_modifier3));
512 };
513
514 stage_info += "Stage " + std::to_string(index) + ": " + GetColorCombinerStr(tev_stage) + " " + GetAlphaCombinerStr(tev_stage) + "\n";
515 }
516
517 DEBUG_LOG(GPU, "%s", stage_info.c_str());
518}
519
520} // namespace
521
522} // namespace
diff --git a/src/video_core/debug_utils/debug_utils.h b/src/video_core/debug_utils/debug_utils.h
new file mode 100644
index 000000000..8b1499bf2
--- /dev/null
+++ b/src/video_core/debug_utils/debug_utils.h
@@ -0,0 +1,66 @@
1// Copyright 2014 Citra Emulator Project
2// Licensed under GPLv2
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include <memory>
9#include <vector>
10
11#include "video_core/pica.h"
12
13namespace Pica {
14
15namespace DebugUtils {
16
17// Simple utility class for dumping geometry data to an OBJ file
18class GeometryDumper {
19public:
20 struct Vertex {
21 std::array<float,3> pos;
22 };
23
24 void AddTriangle(Vertex& v0, Vertex& v1, Vertex& v2);
25
26 void Dump();
27
28private:
29 struct Face {
30 int index[3];
31 };
32
33 std::vector<Vertex> vertices;
34 std::vector<Face> faces;
35};
36
37void DumpShader(const u32* binary_data, u32 binary_size, const u32* swizzle_data, u32 swizzle_size,
38 u32 main_offset, const Regs::VSOutputAttributes* output_attributes);
39
40
41// Utility class to log Pica commands.
42struct PicaTrace {
43 struct Write : public std::pair<u32,u32> {
44 Write(u32 id, u32 value) : std::pair<u32,u32>(id, value) {}
45
46 u32& Id() { return first; }
47 const u32& Id() const { return first; }
48
49 u32& Value() { return second; }
50 const u32& Value() const { return second; }
51 };
52 std::vector<Write> writes;
53};
54
55void StartPicaTracing();
56bool IsPicaTracing();
57void OnPicaRegWrite(u32 id, u32 value);
58std::unique_ptr<PicaTrace> FinishPicaTracing();
59
60void DumpTexture(const Pica::Regs::TextureConfig& texture_config, u8* data);
61
62void DumpTevStageConfig(const std::array<Pica::Regs::TevStageConfig,6>& stages);
63
64} // namespace
65
66} // namespace
diff --git a/src/video_core/gpu_debugger.h b/src/video_core/gpu_debugger.h
index 2ba873457..5a81fcfcb 100644
--- a/src/video_core/gpu_debugger.h
+++ b/src/video_core/gpu_debugger.h
@@ -18,19 +18,6 @@
18class GraphicsDebugger 18class GraphicsDebugger
19{ 19{
20public: 20public:
21 // A few utility structs used to expose data
22 // A vector of commands represented by their raw byte sequence
23 struct PicaCommand : public std::vector<u32>
24 {
25 const Pica::CommandProcessor::CommandHeader& GetHeader() const
26 {
27 const u32& val = at(1);
28 return *(Pica::CommandProcessor::CommandHeader*)&val;
29 }
30 };
31
32 typedef std::vector<PicaCommand> PicaCommandList;
33
34 // Base class for all objects which need to be notified about GPU events 21 // Base class for all objects which need to be notified about GPU events
35 class DebuggerObserver 22 class DebuggerObserver
36 { 23 {
@@ -55,16 +42,6 @@ public:
55 ERROR_LOG(GSP, "Received command: id=%x", (int)cmd.id.Value()); 42 ERROR_LOG(GSP, "Received command: id=%x", (int)cmd.id.Value());
56 } 43 }
57 44
58 /**
59 * @param lst command list which triggered this call
60 * @param is_new true if the command list was called for the first time
61 * @todo figure out how to make sure called functions don't keep references around beyond their life time
62 */
63 virtual void OnCommandListCalled(const PicaCommandList& lst, bool is_new)
64 {
65 ERROR_LOG(GSP, "Command list called: %d", (int)is_new);
66 }
67
68 protected: 45 protected:
69 const GraphicsDebugger* GetDebugger() const 46 const GraphicsDebugger* GetDebugger() const
70 { 47 {
@@ -93,49 +70,12 @@ public:
93 } ); 70 } );
94 } 71 }
95 72
96 void CommandListCalled(u32 address, u32* command_list, u32 size_in_words)
97 {
98 if (observers.empty())
99 return;
100
101 PicaCommandList cmdlist;
102 for (u32* parse_pointer = command_list; parse_pointer < command_list + size_in_words;)
103 {
104 const Pica::CommandProcessor::CommandHeader& header = *(Pica::CommandProcessor::CommandHeader*)(&parse_pointer[1]);
105
106 cmdlist.push_back(PicaCommand());
107 auto& cmd = cmdlist.back();
108
109 size_t size = 2 + header.extra_data_length;
110 size = (size + 1) / 2 * 2; // align to 8 bytes
111 cmd.reserve(size);
112 std::copy(parse_pointer, parse_pointer + size, std::back_inserter(cmd));
113
114 parse_pointer += size;
115 }
116
117 auto obj = std::pair<u32,PicaCommandList>(address, cmdlist);
118 auto it = std::find(command_lists.begin(), command_lists.end(), obj);
119 bool is_new = (it == command_lists.end());
120 if (is_new)
121 command_lists.push_back(obj);
122
123 ForEachObserver([&](DebuggerObserver* observer) {
124 observer->OnCommandListCalled(obj.second, is_new);
125 } );
126 }
127
128 const GSP_GPU::Command& ReadGXCommandHistory(int index) const 73 const GSP_GPU::Command& ReadGXCommandHistory(int index) const
129 { 74 {
130 // TODO: Is this thread-safe? 75 // TODO: Is this thread-safe?
131 return gx_command_history[index]; 76 return gx_command_history[index];
132 } 77 }
133 78
134 const std::vector<std::pair<u32,PicaCommandList>>& GetCommandLists() const
135 {
136 return command_lists;
137 }
138
139 void RegisterObserver(DebuggerObserver* observer) 79 void RegisterObserver(DebuggerObserver* observer)
140 { 80 {
141 // TODO: Check for duplicates 81 // TODO: Check for duplicates
@@ -158,7 +98,4 @@ private:
158 std::vector<DebuggerObserver*> observers; 98 std::vector<DebuggerObserver*> observers;
159 99
160 std::vector<GSP_GPU::Command> gx_command_history; 100 std::vector<GSP_GPU::Command> gx_command_history;
161
162 // vector of pairs of command lists and their storage address
163 std::vector<std::pair<u32,PicaCommandList>> command_lists;
164}; 101};
diff --git a/src/video_core/math.h b/src/video_core/math.h
index 7030f2cfb..83ba81235 100644
--- a/src/video_core/math.h
+++ b/src/video_core/math.h
@@ -39,13 +39,19 @@ template<typename T> class Vec2;
39template<typename T> class Vec3; 39template<typename T> class Vec3;
40template<typename T> class Vec4; 40template<typename T> class Vec4;
41 41
42template<typename T>
43static inline Vec2<T> MakeVec(const T& x, const T& y);
44template<typename T>
45static inline Vec3<T> MakeVec(const T& x, const T& y, const T& z);
46template<typename T>
47static inline Vec4<T> MakeVec(const T& x, const T& y, const T& z, const T& w);
48
42 49
43template<typename T> 50template<typename T>
44class Vec2 { 51class Vec2 {
45public: 52public:
46 struct { 53 T x;
47 T x,y; 54 T y;
48 };
49 55
50 T* AsArray() { return &x; } 56 T* AsArray() { return &x; }
51 57
@@ -68,34 +74,34 @@ public:
68 a[0] = x; a[1] = y; 74 a[0] = x; a[1] = y;
69 } 75 }
70 76
71 Vec2 operator +(const Vec2& other) const 77 Vec2<decltype(T{}+T{})> operator +(const Vec2& other) const
72 { 78 {
73 return Vec2(x+other.x, y+other.y); 79 return MakeVec(x+other.x, y+other.y);
74 } 80 }
75 void operator += (const Vec2 &other) 81 void operator += (const Vec2 &other)
76 { 82 {
77 x+=other.x; y+=other.y; 83 x+=other.x; y+=other.y;
78 } 84 }
79 Vec2 operator -(const Vec2& other) const 85 Vec2<decltype(T{}-T{})> operator -(const Vec2& other) const
80 { 86 {
81 return Vec2(x-other.x, y-other.y); 87 return MakeVec(x-other.x, y-other.y);
82 } 88 }
83 void operator -= (const Vec2& other) 89 void operator -= (const Vec2& other)
84 { 90 {
85 x-=other.x; y-=other.y; 91 x-=other.x; y-=other.y;
86 } 92 }
87 Vec2 operator -() const 93 Vec2<decltype(-T{})> operator -() const
88 { 94 {
89 return Vec2(-x,-y); 95 return MakeVec(-x,-y);
90 } 96 }
91 Vec2 operator * (const Vec2& other) const 97 Vec2<decltype(T{}*T{})> operator * (const Vec2& other) const
92 { 98 {
93 return Vec2(x*other.x, y*other.y); 99 return MakeVec(x*other.x, y*other.y);
94 } 100 }
95 template<typename V> 101 template<typename V>
96 Vec2 operator * (const V& f) const 102 Vec2<decltype(T{}*V{})> operator * (const V& f) const
97 { 103 {
98 return Vec2(x*f,y*f); 104 return MakeVec(x*f,y*f);
99 } 105 }
100 template<typename V> 106 template<typename V>
101 void operator *= (const V& f) 107 void operator *= (const V& f)
@@ -103,9 +109,9 @@ public:
103 x*=f; y*=f; 109 x*=f; y*=f;
104 } 110 }
105 template<typename V> 111 template<typename V>
106 Vec2 operator / (const V& f) const 112 Vec2<decltype(T{}/V{})> operator / (const V& f) const
107 { 113 {
108 return Vec2(x/f,y/f); 114 return MakeVec(x/f,y/f);
109 } 115 }
110 template<typename V> 116 template<typename V>
111 void operator /= (const V& f) 117 void operator /= (const V& f)
@@ -152,20 +158,9 @@ public:
152 const T& t() const { return y; } 158 const T& t() const { return y; }
153 159
154 // swizzlers - create a subvector of specific components 160 // swizzlers - create a subvector of specific components
155 Vec2 yx() const { return Vec2(y, x); } 161 const Vec2 yx() const { return Vec2(y, x); }
156 Vec2 vu() const { return Vec2(y, x); } 162 const Vec2 vu() const { return Vec2(y, x); }
157 Vec2 ts() const { return Vec2(y, x); } 163 const Vec2 ts() const { return Vec2(y, x); }
158
159 // Inserters to add new elements to effectively create larger vectors containing this Vec2
160 Vec3<T> InsertBeforeX(const T& value) {
161 return Vec3<T>(value, x, y);
162 }
163 Vec3<T> InsertBeforeY(const T& value) {
164 return Vec3<T>(x, value, y);
165 }
166 Vec3<T> Append(const T& value) {
167 return Vec3<T>(x, y, value);
168 }
169}; 164};
170 165
171template<typename T, typename V> 166template<typename T, typename V>
@@ -180,10 +175,9 @@ template<typename T>
180class Vec3 175class Vec3
181{ 176{
182public: 177public:
183 struct 178 T x;
184 { 179 T y;
185 T x,y,z; 180 T z;
186 };
187 181
188 T* AsArray() { return &x; } 182 T* AsArray() { return &x; }
189 183
@@ -193,7 +187,7 @@ public:
193 187
194 template<typename T2> 188 template<typename T2>
195 Vec3<T2> Cast() const { 189 Vec3<T2> Cast() const {
196 return Vec3<T2>((T2)x, (T2)y, (T2)z); 190 return MakeVec<T2>((T2)x, (T2)y, (T2)z);
197 } 191 }
198 192
199 // Only implemented for T=int and T=float 193 // Only implemented for T=int and T=float
@@ -202,7 +196,7 @@ public:
202 196
203 static Vec3 AssignToAll(const T& f) 197 static Vec3 AssignToAll(const T& f)
204 { 198 {
205 return Vec3<T>(f, f, f); 199 return MakeVec(f, f, f);
206 } 200 }
207 201
208 void Write(T a[3]) 202 void Write(T a[3])
@@ -210,34 +204,34 @@ public:
210 a[0] = x; a[1] = y; a[2] = z; 204 a[0] = x; a[1] = y; a[2] = z;
211 } 205 }
212 206
213 Vec3 operator +(const Vec3 &other) const 207 Vec3<decltype(T{}+T{})> operator +(const Vec3 &other) const
214 { 208 {
215 return Vec3(x+other.x, y+other.y, z+other.z); 209 return MakeVec(x+other.x, y+other.y, z+other.z);
216 } 210 }
217 void operator += (const Vec3 &other) 211 void operator += (const Vec3 &other)
218 { 212 {
219 x+=other.x; y+=other.y; z+=other.z; 213 x+=other.x; y+=other.y; z+=other.z;
220 } 214 }
221 Vec3 operator -(const Vec3 &other) const 215 Vec3<decltype(T{}-T{})> operator -(const Vec3 &other) const
222 { 216 {
223 return Vec3(x-other.x, y-other.y, z-other.z); 217 return MakeVec(x-other.x, y-other.y, z-other.z);
224 } 218 }
225 void operator -= (const Vec3 &other) 219 void operator -= (const Vec3 &other)
226 { 220 {
227 x-=other.x; y-=other.y; z-=other.z; 221 x-=other.x; y-=other.y; z-=other.z;
228 } 222 }
229 Vec3 operator -() const 223 Vec3<decltype(-T{})> operator -() const
230 { 224 {
231 return Vec3(-x,-y,-z); 225 return MakeVec(-x,-y,-z);
232 } 226 }
233 Vec3 operator * (const Vec3 &other) const 227 Vec3<decltype(T{}*T{})> operator * (const Vec3 &other) const
234 { 228 {
235 return Vec3(x*other.x, y*other.y, z*other.z); 229 return MakeVec(x*other.x, y*other.y, z*other.z);
236 } 230 }
237 template<typename V> 231 template<typename V>
238 Vec3 operator * (const V& f) const 232 Vec3<decltype(T{}*V{})> operator * (const V& f) const
239 { 233 {
240 return Vec3(x*f,y*f,z*f); 234 return MakeVec(x*f,y*f,z*f);
241 } 235 }
242 template<typename V> 236 template<typename V>
243 void operator *= (const V& f) 237 void operator *= (const V& f)
@@ -245,9 +239,9 @@ public:
245 x*=f; y*=f; z*=f; 239 x*=f; y*=f; z*=f;
246 } 240 }
247 template<typename V> 241 template<typename V>
248 Vec3 operator / (const V& f) const 242 Vec3<decltype(T{}/V{})> operator / (const V& f) const
249 { 243 {
250 return Vec3(x/f,y/f,z/f); 244 return MakeVec(x/f,y/f,z/f);
251 } 245 }
252 template<typename V> 246 template<typename V>
253 void operator /= (const V& f) 247 void operator /= (const V& f)
@@ -310,7 +304,7 @@ public:
310 // swizzlers - create a subvector of specific components 304 // swizzlers - create a subvector of specific components
311 // e.g. Vec2 uv() { return Vec2(x,y); } 305 // e.g. Vec2 uv() { return Vec2(x,y); }
312 // _DEFINE_SWIZZLER2 defines a single such function, DEFINE_SWIZZLER2 defines all of them for all component names (x<->r) and permutations (xy<->yx) 306 // _DEFINE_SWIZZLER2 defines a single such function, DEFINE_SWIZZLER2 defines all of them for all component names (x<->r) and permutations (xy<->yx)
313#define _DEFINE_SWIZZLER2(a, b, name) Vec2<T> name() const { return Vec2<T>(a, b); } 307#define _DEFINE_SWIZZLER2(a, b, name) const Vec2<T> name() const { return Vec2<T>(a, b); }
314#define DEFINE_SWIZZLER2(a, b, a2, b2, a3, b3, a4, b4) \ 308#define DEFINE_SWIZZLER2(a, b, a2, b2, a3, b3, a4, b4) \
315 _DEFINE_SWIZZLER2(a, b, a##b); \ 309 _DEFINE_SWIZZLER2(a, b, a##b); \
316 _DEFINE_SWIZZLER2(a, b, a2##b2); \ 310 _DEFINE_SWIZZLER2(a, b, a2##b2); \
@@ -319,27 +313,13 @@ public:
319 _DEFINE_SWIZZLER2(b, a, b##a); \ 313 _DEFINE_SWIZZLER2(b, a, b##a); \
320 _DEFINE_SWIZZLER2(b, a, b2##a2); \ 314 _DEFINE_SWIZZLER2(b, a, b2##a2); \
321 _DEFINE_SWIZZLER2(b, a, b3##a3); \ 315 _DEFINE_SWIZZLER2(b, a, b3##a3); \
322 _DEFINE_SWIZZLER2(b, a, b4##a4); 316 _DEFINE_SWIZZLER2(b, a, b4##a4)
323 317
324 DEFINE_SWIZZLER2(x, y, r, g, u, v, s, t); 318 DEFINE_SWIZZLER2(x, y, r, g, u, v, s, t);
325 DEFINE_SWIZZLER2(x, z, r, b, u, w, s, q); 319 DEFINE_SWIZZLER2(x, z, r, b, u, w, s, q);
326 DEFINE_SWIZZLER2(y, z, g, b, v, w, t, q); 320 DEFINE_SWIZZLER2(y, z, g, b, v, w, t, q);
327#undef DEFINE_SWIZZLER2 321#undef DEFINE_SWIZZLER2
328#undef _DEFINE_SWIZZLER2 322#undef _DEFINE_SWIZZLER2
329
330 // Inserters to add new elements to effectively create larger vectors containing this Vec2
331 Vec4<T> InsertBeforeX(const T& value) {
332 return Vec4<T>(value, x, y, z);
333 }
334 Vec4<T> InsertBeforeY(const T& value) {
335 return Vec4<T>(x, value, y, z);
336 }
337 Vec4<T> InsertBeforeZ(const T& value) {
338 return Vec4<T>(x, y, value, z);
339 }
340 Vec4<T> Append(const T& value) {
341 return Vec4<T>(x, y, z, value);
342 }
343}; 323};
344 324
345template<typename T, typename V> 325template<typename T, typename V>
@@ -348,16 +328,27 @@ Vec3<T> operator * (const V& f, const Vec3<T>& vec)
348 return Vec3<T>(f*vec.x,f*vec.y,f*vec.z); 328 return Vec3<T>(f*vec.x,f*vec.y,f*vec.z);
349} 329}
350 330
331template<>
332inline float Vec3<float>::Length() const {
333 return std::sqrt(x * x + y * y + z * z);
334}
335
336template<>
337inline Vec3<float> Vec3<float>::Normalized() const {
338 return *this / Length();
339}
340
341
351typedef Vec3<float> Vec3f; 342typedef Vec3<float> Vec3f;
352 343
353template<typename T> 344template<typename T>
354class Vec4 345class Vec4
355{ 346{
356public: 347public:
357 struct 348 T x;
358 { 349 T y;
359 T x,y,z,w; 350 T z;
360 }; 351 T w;
361 352
362 T* AsArray() { return &x; } 353 T* AsArray() { return &x; }
363 354
@@ -383,34 +374,34 @@ public:
383 a[0] = x; a[1] = y; a[2] = z; a[3] = w; 374 a[0] = x; a[1] = y; a[2] = z; a[3] = w;
384 } 375 }
385 376
386 Vec4 operator +(const Vec4& other) const 377 Vec4<decltype(T{}+T{})> operator +(const Vec4& other) const
387 { 378 {
388 return Vec4(x+other.x, y+other.y, z+other.z, w+other.w); 379 return MakeVec(x+other.x, y+other.y, z+other.z, w+other.w);
389 } 380 }
390 void operator += (const Vec4& other) 381 void operator += (const Vec4& other)
391 { 382 {
392 x+=other.x; y+=other.y; z+=other.z; w+=other.w; 383 x+=other.x; y+=other.y; z+=other.z; w+=other.w;
393 } 384 }
394 Vec4 operator -(const Vec4 &other) const 385 Vec4<decltype(T{}-T{})> operator -(const Vec4 &other) const
395 { 386 {
396 return Vec4(x-other.x, y-other.y, z-other.z, w-other.w); 387 return MakeVec(x-other.x, y-other.y, z-other.z, w-other.w);
397 } 388 }
398 void operator -= (const Vec4 &other) 389 void operator -= (const Vec4 &other)
399 { 390 {
400 x-=other.x; y-=other.y; z-=other.z; w-=other.w; 391 x-=other.x; y-=other.y; z-=other.z; w-=other.w;
401 } 392 }
402 Vec4 operator -() const 393 Vec4<decltype(-T{})> operator -() const
403 { 394 {
404 return Vec4(-x,-y,-z,-w); 395 return MakeVec(-x,-y,-z,-w);
405 } 396 }
406 Vec4 operator * (const Vec4 &other) const 397 Vec4<decltype(T{}*T{})> operator * (const Vec4 &other) const
407 { 398 {
408 return Vec4(x*other.x, y*other.y, z*other.z, w*other.w); 399 return MakeVec(x*other.x, y*other.y, z*other.z, w*other.w);
409 } 400 }
410 template<typename V> 401 template<typename V>
411 Vec4 operator * (const V& f) const 402 Vec4<decltype(T{}*V{})> operator * (const V& f) const
412 { 403 {
413 return Vec4(x*f,y*f,z*f,w*f); 404 return MakeVec(x*f,y*f,z*f,w*f);
414 } 405 }
415 template<typename V> 406 template<typename V>
416 void operator *= (const V& f) 407 void operator *= (const V& f)
@@ -418,9 +409,9 @@ public:
418 x*=f; y*=f; z*=f; w*=f; 409 x*=f; y*=f; z*=f; w*=f;
419 } 410 }
420 template<typename V> 411 template<typename V>
421 Vec4 operator / (const V& f) const 412 Vec4<decltype(T{}/V{})> operator / (const V& f) const
422 { 413 {
423 return Vec4(x/f,y/f,z/f,w/f); 414 return MakeVec(x/f,y/f,z/f,w/f);
424 } 415 }
425 template<typename V> 416 template<typename V>
426 void operator /= (const V& f) 417 void operator /= (const V& f)
@@ -469,12 +460,12 @@ public:
469 // swizzlers - create a subvector of specific components 460 // swizzlers - create a subvector of specific components
470 // e.g. Vec2 uv() { return Vec2(x,y); } 461 // e.g. Vec2 uv() { return Vec2(x,y); }
471 // _DEFINE_SWIZZLER2 defines a single such function, DEFINE_SWIZZLER2 defines all of them for all component names (x<->r) and permutations (xy<->yx) 462 // _DEFINE_SWIZZLER2 defines a single such function, DEFINE_SWIZZLER2 defines all of them for all component names (x<->r) and permutations (xy<->yx)
472#define _DEFINE_SWIZZLER2(a, b, name) Vec2<T> name() const { return Vec2<T>(a, b); } 463#define _DEFINE_SWIZZLER2(a, b, name) const Vec2<T> name() const { return Vec2<T>(a, b); }
473#define DEFINE_SWIZZLER2(a, b, a2, b2) \ 464#define DEFINE_SWIZZLER2(a, b, a2, b2) \
474 _DEFINE_SWIZZLER2(a, b, a##b); \ 465 _DEFINE_SWIZZLER2(a, b, a##b); \
475 _DEFINE_SWIZZLER2(a, b, a2##b2); \ 466 _DEFINE_SWIZZLER2(a, b, a2##b2); \
476 _DEFINE_SWIZZLER2(b, a, b##a); \ 467 _DEFINE_SWIZZLER2(b, a, b##a); \
477 _DEFINE_SWIZZLER2(b, a, b2##a2); 468 _DEFINE_SWIZZLER2(b, a, b2##a2)
478 469
479 DEFINE_SWIZZLER2(x, y, r, g); 470 DEFINE_SWIZZLER2(x, y, r, g);
480 DEFINE_SWIZZLER2(x, z, r, b); 471 DEFINE_SWIZZLER2(x, z, r, b);
@@ -485,7 +476,7 @@ public:
485#undef DEFINE_SWIZZLER2 476#undef DEFINE_SWIZZLER2
486#undef _DEFINE_SWIZZLER2 477#undef _DEFINE_SWIZZLER2
487 478
488#define _DEFINE_SWIZZLER3(a, b, c, name) Vec3<T> name() const { return Vec3<T>(a, b, c); } 479#define _DEFINE_SWIZZLER3(a, b, c, name) const Vec3<T> name() const { return Vec3<T>(a, b, c); }
489#define DEFINE_SWIZZLER3(a, b, c, a2, b2, c2) \ 480#define DEFINE_SWIZZLER3(a, b, c, a2, b2, c2) \
490 _DEFINE_SWIZZLER3(a, b, c, a##b##c); \ 481 _DEFINE_SWIZZLER3(a, b, c, a##b##c); \
491 _DEFINE_SWIZZLER3(a, c, b, a##c##b); \ 482 _DEFINE_SWIZZLER3(a, c, b, a##c##b); \
@@ -498,7 +489,7 @@ public:
498 _DEFINE_SWIZZLER3(b, a, c, b2##a2##c2); \ 489 _DEFINE_SWIZZLER3(b, a, c, b2##a2##c2); \
499 _DEFINE_SWIZZLER3(b, c, a, b2##c2##a2); \ 490 _DEFINE_SWIZZLER3(b, c, a, b2##c2##a2); \
500 _DEFINE_SWIZZLER3(c, a, b, c2##a2##b2); \ 491 _DEFINE_SWIZZLER3(c, a, b, c2##a2##b2); \
501 _DEFINE_SWIZZLER3(c, b, a, c2##b2##a2); 492 _DEFINE_SWIZZLER3(c, b, a, c2##b2##a2)
502 493
503 DEFINE_SWIZZLER3(x, y, z, r, g, b); 494 DEFINE_SWIZZLER3(x, y, z, r, g, b);
504 DEFINE_SWIZZLER3(x, y, w, r, g, a); 495 DEFINE_SWIZZLER3(x, y, w, r, g, a);
@@ -510,69 +501,121 @@ public:
510 501
511 502
512template<typename T, typename V> 503template<typename T, typename V>
513Vec4<T> operator * (const V& f, const Vec4<T>& vec) 504Vec4<decltype(V{}*T{})> operator * (const V& f, const Vec4<T>& vec)
514{ 505{
515 return Vec4<T>(f*vec.x,f*vec.y,f*vec.z,f*vec.w); 506 return MakeVec(f*vec.x,f*vec.y,f*vec.z,f*vec.w);
516} 507}
517 508
518typedef Vec4<float> Vec4f; 509typedef Vec4<float> Vec4f;
519 510
520 511
521template<typename T> 512template<typename T>
522static inline T Dot(const Vec2<T>& a, const Vec2<T>& b) 513static inline decltype(T{}*T{}+T{}*T{}) Dot(const Vec2<T>& a, const Vec2<T>& b)
523{ 514{
524 return a.x*b.x + a.y*b.y; 515 return a.x*b.x + a.y*b.y;
525} 516}
526 517
527template<typename T> 518template<typename T>
528static inline T Dot(const Vec3<T>& a, const Vec3<T>& b) 519static inline decltype(T{}*T{}+T{}*T{}) Dot(const Vec3<T>& a, const Vec3<T>& b)
529{ 520{
530 return a.x*b.x + a.y*b.y + a.z*b.z; 521 return a.x*b.x + a.y*b.y + a.z*b.z;
531} 522}
532 523
533template<typename T> 524template<typename T>
534static inline T Dot(const Vec4<T>& a, const Vec4<T>& b) 525static inline decltype(T{}*T{}+T{}*T{}) Dot(const Vec4<T>& a, const Vec4<T>& b)
535{ 526{
536 return a.x*b.x + a.y*b.y + a.z*b.z + a.w*b.w; 527 return a.x*b.x + a.y*b.y + a.z*b.z + a.w*b.w;
537} 528}
538 529
539template<typename T> 530template<typename T>
540static inline Vec3<T> Cross(const Vec3<T>& a, const Vec3<T>& b) 531static inline Vec3<decltype(T{}*T{}-T{}*T{})> Cross(const Vec3<T>& a, const Vec3<T>& b)
541{ 532{
542 return Vec3<T>(a.y*b.z-a.z*b.y, a.z*b.x-a.x*b.z, a.x*b.y-a.y*b.x); 533 return MakeVec(a.y*b.z-a.z*b.y, a.z*b.x-a.x*b.z, a.x*b.y-a.y*b.x);
543} 534}
544 535
545// linear interpolation via float: 0.0=begin, 1.0=end 536// linear interpolation via float: 0.0=begin, 1.0=end
546template<typename X> 537template<typename X>
547static inline X Lerp(const X& begin, const X& end, const float t) 538static inline decltype(X{}*float{}+X{}*float{}) Lerp(const X& begin, const X& end, const float t)
548{ 539{
549 return begin*(1.f-t) + end*t; 540 return begin*(1.f-t) + end*t;
550} 541}
551 542
552// linear interpolation via int: 0=begin, base=end 543// linear interpolation via int: 0=begin, base=end
553template<typename X, int base> 544template<typename X, int base>
554static inline X LerpInt(const X& begin, const X& end, const int t) 545static inline decltype((X{}*int{}+X{}*int{}) / base) LerpInt(const X& begin, const X& end, const int t)
555{ 546{
556 return (begin*(base-t) + end*t) / base; 547 return (begin*(base-t) + end*t) / base;
557} 548}
558 549
559// Utility vector factories 550// Utility vector factories
560template<typename T> 551template<typename T>
561static inline Vec2<T> MakeVec2(const T& x, const T& y) 552static inline Vec2<T> MakeVec(const T& x, const T& y)
562{ 553{
563 return Vec2<T>{x, y}; 554 return Vec2<T>{x, y};
564} 555}
565 556
566template<typename T> 557template<typename T>
567static inline Vec3<T> MakeVec3(const T& x, const T& y, const T& z) 558static inline Vec3<T> MakeVec(const T& x, const T& y, const T& z)
568{ 559{
569 return Vec3<T>{x, y, z}; 560 return Vec3<T>{x, y, z};
570} 561}
571 562
572template<typename T> 563template<typename T>
573static inline Vec4<T> MakeVec4(const T& x, const T& y, const T& z, const T& w) 564static inline Vec4<T> MakeVec(const T& x, const T& y, const Vec2<T>& zw)
565{
566 return MakeVec(x, y, zw[0], zw[1]);
567}
568
569template<typename T>
570static inline Vec3<T> MakeVec(const Vec2<T>& xy, const T& z)
571{
572 return MakeVec(xy[0], xy[1], z);
573}
574
575template<typename T>
576static inline Vec3<T> MakeVec(const T& x, const Vec2<T>& yz)
577{
578 return MakeVec(x, yz[0], yz[1]);
579}
580
581template<typename T>
582static inline Vec4<T> MakeVec(const T& x, const T& y, const T& z, const T& w)
574{ 583{
575 return Vec4<T>{x, y, z, w}; 584 return Vec4<T>{x, y, z, w};
576} 585}
577 586
587template<typename T>
588static inline Vec4<T> MakeVec(const Vec2<T>& xy, const T& z, const T& w)
589{
590 return MakeVec(xy[0], xy[1], z, w);
591}
592
593template<typename T>
594static inline Vec4<T> MakeVec(const T& x, const Vec2<T>& yz, const T& w)
595{
596 return MakeVec(x, yz[0], yz[1], w);
597}
598
599// NOTE: This has priority over "Vec2<Vec2<T>> MakeVec(const Vec2<T>& x, const Vec2<T>& y)".
600// Even if someone wanted to use an odd object like Vec2<Vec2<T>>, the compiler would error
601// out soon enough due to misuse of the returned structure.
602template<typename T>
603static inline Vec4<T> MakeVec(const Vec2<T>& xy, const Vec2<T>& zw)
604{
605 return MakeVec(xy[0], xy[1], zw[0], zw[1]);
606}
607
608template<typename T>
609static inline Vec4<T> MakeVec(const Vec3<T>& xyz, const T& w)
610{
611 return MakeVec(xyz[0], xyz[1], xyz[2], w);
612}
613
614template<typename T>
615static inline Vec4<T> MakeVec(const T& x, const Vec2<T>& yzw)
616{
617 return MakeVec(x, yzw[0], yzw[1], yzw[2]);
618}
619
620
578} // namespace 621} // namespace
diff --git a/src/video_core/pica.h b/src/video_core/pica.h
index 640830144..cfdc9b934 100644
--- a/src/video_core/pica.h
+++ b/src/video_core/pica.h
@@ -4,6 +4,7 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <array>
7#include <cstddef> 8#include <cstddef>
8#include <initializer_list> 9#include <initializer_list>
9#include <map> 10#include <map>
@@ -57,7 +58,7 @@ struct Regs {
57 58
58 INSERT_PADDING_WORDS(0x1); 59 INSERT_PADDING_WORDS(0x1);
59 60
60 union { 61 union VSOutputAttributes {
61 // Maps components of output vertex attributes to semantics 62 // Maps components of output vertex attributes to semantics
62 enum Semantic : u32 63 enum Semantic : u32
63 { 64 {
@@ -94,7 +95,137 @@ struct Regs {
94 BitField<16, 16, u32> y; 95 BitField<16, 16, u32> y;
95 } viewport_corner; 96 } viewport_corner;
96 97
97 INSERT_PADDING_WORDS(0xa7); 98 INSERT_PADDING_WORDS(0x17);
99
100 struct TextureConfig {
101 INSERT_PADDING_WORDS(0x1);
102
103 union {
104 BitField< 0, 16, u32> height;
105 BitField<16, 16, u32> width;
106 };
107
108 INSERT_PADDING_WORDS(0x2);
109
110 u32 address;
111
112 u32 GetPhysicalAddress() {
113 return DecodeAddressRegister(address) - Memory::FCRAM_PADDR + Memory::HEAP_GSP_VADDR;
114 }
115
116 // texture1 and texture2 store the texture format directly after the address
117 // whereas texture0 inserts some additional flags inbetween.
118 // Hence, we store the format separately so that all other parameters can be described
119 // in a single structure.
120 };
121
122 enum class TextureFormat : u32 {
123 RGBA8 = 0,
124 RGB8 = 1,
125 RGBA5551 = 2,
126 RGB565 = 3,
127 RGBA4 = 4,
128
129 // TODO: Support for the other formats is not implemented, yet.
130 // Seems like they are luminance formats and compressed textures.
131 };
132
133 BitField<0, 1, u32> texturing_enable;
134 TextureConfig texture0;
135 INSERT_PADDING_WORDS(0x8);
136 BitField<0, 4, TextureFormat> texture0_format;
137
138 INSERT_PADDING_WORDS(0x31);
139
140 // 0xc0-0xff: Texture Combiner (akin to glTexEnv)
141 struct TevStageConfig {
142 enum class Source : u32 {
143 PrimaryColor = 0x0,
144 Texture0 = 0x3,
145 Texture1 = 0x4,
146 Texture2 = 0x5,
147 Texture3 = 0x6,
148 // 0x7-0xc = primary color??
149 Constant = 0xe,
150 Previous = 0xf,
151 };
152
153 enum class ColorModifier : u32 {
154 SourceColor = 0,
155 OneMinusSourceColor = 1,
156 SourceAlpha = 2,
157 OneMinusSourceAlpha = 3,
158
159 // Other values seem to be non-standard extensions
160 };
161
162 enum class AlphaModifier : u32 {
163 SourceAlpha = 0,
164 OneMinusSourceAlpha = 1,
165
166 // Other values seem to be non-standard extensions
167 };
168
169 enum class Operation : u32 {
170 Replace = 0,
171 Modulate = 1,
172 Add = 2,
173 AddSigned = 3,
174 Lerp = 4,
175 Subtract = 5,
176 };
177
178 union {
179 BitField< 0, 4, Source> color_source1;
180 BitField< 4, 4, Source> color_source2;
181 BitField< 8, 4, Source> color_source3;
182 BitField<16, 4, Source> alpha_source1;
183 BitField<20, 4, Source> alpha_source2;
184 BitField<24, 4, Source> alpha_source3;
185 };
186
187 union {
188 BitField< 0, 4, ColorModifier> color_modifier1;
189 BitField< 4, 4, ColorModifier> color_modifier2;
190 BitField< 8, 4, ColorModifier> color_modifier3;
191 BitField<12, 3, AlphaModifier> alpha_modifier1;
192 BitField<16, 3, AlphaModifier> alpha_modifier2;
193 BitField<20, 3, AlphaModifier> alpha_modifier3;
194 };
195
196 union {
197 BitField< 0, 4, Operation> color_op;
198 BitField<16, 4, Operation> alpha_op;
199 };
200
201 union {
202 BitField< 0, 8, u32> const_r;
203 BitField< 8, 8, u32> const_g;
204 BitField<16, 8, u32> const_b;
205 BitField<24, 8, u32> const_a;
206 };
207
208 INSERT_PADDING_WORDS(0x1);
209 };
210
211 TevStageConfig tev_stage0;
212 INSERT_PADDING_WORDS(0x3);
213 TevStageConfig tev_stage1;
214 INSERT_PADDING_WORDS(0x3);
215 TevStageConfig tev_stage2;
216 INSERT_PADDING_WORDS(0x3);
217 TevStageConfig tev_stage3;
218 INSERT_PADDING_WORDS(0x13);
219 TevStageConfig tev_stage4;
220 INSERT_PADDING_WORDS(0x3);
221 TevStageConfig tev_stage5;
222 INSERT_PADDING_WORDS(0x13);
223
224 const std::array<Regs::TevStageConfig,6> GetTevStages() const {
225 return { tev_stage0, tev_stage1,
226 tev_stage2, tev_stage3,
227 tev_stage4, tev_stage5 };
228 };
98 229
99 struct { 230 struct {
100 enum ColorFormat : u32 { 231 enum ColorFormat : u32 {
@@ -403,6 +534,15 @@ struct Regs {
403 ADD_FIELD(viewport_depth_range); 534 ADD_FIELD(viewport_depth_range);
404 ADD_FIELD(viewport_depth_far_plane); 535 ADD_FIELD(viewport_depth_far_plane);
405 ADD_FIELD(viewport_corner); 536 ADD_FIELD(viewport_corner);
537 ADD_FIELD(texturing_enable);
538 ADD_FIELD(texture0);
539 ADD_FIELD(texture0_format);
540 ADD_FIELD(tev_stage0);
541 ADD_FIELD(tev_stage1);
542 ADD_FIELD(tev_stage2);
543 ADD_FIELD(tev_stage3);
544 ADD_FIELD(tev_stage4);
545 ADD_FIELD(tev_stage5);
406 ADD_FIELD(framebuffer); 546 ADD_FIELD(framebuffer);
407 ADD_FIELD(vertex_attributes); 547 ADD_FIELD(vertex_attributes);
408 ADD_FIELD(index_array); 548 ADD_FIELD(index_array);
@@ -460,6 +600,15 @@ ASSERT_REG_POSITION(viewport_depth_far_plane, 0x4e);
460ASSERT_REG_POSITION(vs_output_attributes[0], 0x50); 600ASSERT_REG_POSITION(vs_output_attributes[0], 0x50);
461ASSERT_REG_POSITION(vs_output_attributes[1], 0x51); 601ASSERT_REG_POSITION(vs_output_attributes[1], 0x51);
462ASSERT_REG_POSITION(viewport_corner, 0x68); 602ASSERT_REG_POSITION(viewport_corner, 0x68);
603ASSERT_REG_POSITION(texturing_enable, 0x80);
604ASSERT_REG_POSITION(texture0, 0x81);
605ASSERT_REG_POSITION(texture0_format, 0x8e);
606ASSERT_REG_POSITION(tev_stage0, 0xc0);
607ASSERT_REG_POSITION(tev_stage1, 0xc8);
608ASSERT_REG_POSITION(tev_stage2, 0xd0);
609ASSERT_REG_POSITION(tev_stage3, 0xd8);
610ASSERT_REG_POSITION(tev_stage4, 0xf0);
611ASSERT_REG_POSITION(tev_stage5, 0xf8);
463ASSERT_REG_POSITION(framebuffer, 0x110); 612ASSERT_REG_POSITION(framebuffer, 0x110);
464ASSERT_REG_POSITION(vertex_attributes, 0x200); 613ASSERT_REG_POSITION(vertex_attributes, 0x200);
465ASSERT_REG_POSITION(index_array, 0x227); 614ASSERT_REG_POSITION(index_array, 0x227);
diff --git a/src/video_core/primitive_assembly.cpp b/src/video_core/primitive_assembly.cpp
index 2354ffb99..dabf2d1a3 100644
--- a/src/video_core/primitive_assembly.cpp
+++ b/src/video_core/primitive_assembly.cpp
@@ -2,21 +2,23 @@
2// Licensed under GPLv2 2// Licensed under GPLv2
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include "clipper.h"
6#include "pica.h" 5#include "pica.h"
7#include "primitive_assembly.h" 6#include "primitive_assembly.h"
8#include "vertex_shader.h" 7#include "vertex_shader.h"
9 8
10namespace Pica { 9#include "video_core/debug_utils/debug_utils.h"
11 10
12namespace PrimitiveAssembly { 11namespace Pica {
13 12
14static OutputVertex buffer[2]; 13template<typename VertexType>
15static int buffer_index = 0; // TODO: reset this on emulation restart 14PrimitiveAssembler<VertexType>::PrimitiveAssembler(Regs::TriangleTopology topology)
15 : topology(topology), buffer_index(0) {
16}
16 17
17void SubmitVertex(OutputVertex& vtx) 18template<typename VertexType>
19void PrimitiveAssembler<VertexType>::SubmitVertex(VertexType& vtx, TriangleHandler triangle_handler)
18{ 20{
19 switch (registers.triangle_topology) { 21 switch (topology) {
20 case Regs::TriangleTopology::List: 22 case Regs::TriangleTopology::List:
21 case Regs::TriangleTopology::ListIndexed: 23 case Regs::TriangleTopology::ListIndexed:
22 if (buffer_index < 2) { 24 if (buffer_index < 2) {
@@ -24,7 +26,7 @@ void SubmitVertex(OutputVertex& vtx)
24 } else { 26 } else {
25 buffer_index = 0; 27 buffer_index = 0;
26 28
27 Clipper::ProcessTriangle(buffer[0], buffer[1], vtx); 29 triangle_handler(buffer[0], buffer[1], vtx);
28 } 30 }
29 break; 31 break;
30 32
@@ -32,7 +34,7 @@ void SubmitVertex(OutputVertex& vtx)
32 if (buffer_index == 2) { 34 if (buffer_index == 2) {
33 buffer_index = 0; 35 buffer_index = 0;
34 36
35 Clipper::ProcessTriangle(buffer[0], buffer[1], vtx); 37 triangle_handler(buffer[0], buffer[1], vtx);
36 38
37 buffer[1] = vtx; 39 buffer[1] = vtx;
38 } else { 40 } else {
@@ -41,11 +43,15 @@ void SubmitVertex(OutputVertex& vtx)
41 break; 43 break;
42 44
43 default: 45 default:
44 ERROR_LOG(GPU, "Unknown triangle mode %x:", (int)registers.triangle_topology.Value()); 46 ERROR_LOG(GPU, "Unknown triangle topology %x:", (int)topology);
45 break; 47 break;
46 } 48 }
47} 49}
48 50
49} // namespace 51// explicitly instantiate use cases
52template
53struct PrimitiveAssembler<VertexShader::OutputVertex>;
54template
55struct PrimitiveAssembler<DebugUtils::GeometryDumper::Vertex>;
50 56
51} // namespace 57} // namespace
diff --git a/src/video_core/primitive_assembly.h b/src/video_core/primitive_assembly.h
index 2a2b0c170..ea2e2f61e 100644
--- a/src/video_core/primitive_assembly.h
+++ b/src/video_core/primitive_assembly.h
@@ -4,18 +4,40 @@
4 4
5#pragma once 5#pragma once
6 6
7namespace Pica { 7#include <functional>
8 8
9namespace VertexShader { 9#include "video_core/pica.h"
10 struct OutputVertex;
11}
12 10
13namespace PrimitiveAssembly { 11#include "video_core/vertex_shader.h"
14 12
15using VertexShader::OutputVertex; 13namespace Pica {
16 14
17void SubmitVertex(OutputVertex& vtx); 15/*
16 * Utility class to build triangles from a series of vertices,
17 * according to a given triangle topology.
18 */
19template<typename VertexType>
20struct PrimitiveAssembler {
21 using TriangleHandler = std::function<void(VertexType& v0,
22 VertexType& v1,
23 VertexType& v2)>;
24
25 PrimitiveAssembler(Regs::TriangleTopology topology);
26
27 /*
28 * Queues a vertex, builds primitives from the vertex queue according to the given
29 * triangle topology, and calls triangle_handler for each generated primitive.
30 * NOTE: We could specify the triangle handler in the constructor, but this way we can
31 * keep event and handler code next to each other.
32 */
33 void SubmitVertex(VertexType& vtx, TriangleHandler triangle_handler);
34
35private:
36 Regs::TriangleTopology topology;
37
38 int buffer_index;
39 VertexType buffer[2];
40};
18 41
19} // namespace
20 42
21} // namespace 43} // namespace
diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp
index a7c1bab3e..cdfdb6215 100644
--- a/src/video_core/rasterizer.cpp
+++ b/src/video_core/rasterizer.cpp
@@ -11,6 +11,8 @@
11#include "rasterizer.h" 11#include "rasterizer.h"
12#include "vertex_shader.h" 12#include "vertex_shader.h"
13 13
14#include "debug_utils/debug_utils.h"
15
14namespace Pica { 16namespace Pica {
15 17
16namespace Rasterizer { 18namespace Rasterizer {
@@ -78,10 +80,10 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
78 u16 max_x = std::max({vtxpos[0].x, vtxpos[1].x, vtxpos[2].x}); 80 u16 max_x = std::max({vtxpos[0].x, vtxpos[1].x, vtxpos[2].x});
79 u16 max_y = std::max({vtxpos[0].y, vtxpos[1].y, vtxpos[2].y}); 81 u16 max_y = std::max({vtxpos[0].y, vtxpos[1].y, vtxpos[2].y});
80 82
81 min_x = min_x & Fix12P4::IntMask(); 83 min_x &= Fix12P4::IntMask();
82 min_y = min_y & Fix12P4::IntMask(); 84 min_y &= Fix12P4::IntMask();
83 max_x = (max_x + Fix12P4::FracMask()) & Fix12P4::IntMask(); 85 max_x = ((max_x + Fix12P4::FracMask()) & Fix12P4::IntMask());
84 max_y = (max_y + Fix12P4::FracMask()) & Fix12P4::IntMask(); 86 max_y = ((max_y + Fix12P4::FracMask()) & Fix12P4::IntMask());
85 87
86 // Triangle filling rules: Pixels on the right-sided edge or on flat bottom edges are not 88 // Triangle filling rules: Pixels on the right-sided edge or on flat bottom edges are not
87 // drawn. Pixels on any other triangle border are drawn. This is implemented with three bias 89 // drawn. Pixels on any other triangle border are drawn. This is implemented with three bias
@@ -112,10 +114,10 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
112 auto orient2d = [](const Math::Vec2<Fix12P4>& vtx1, 114 auto orient2d = [](const Math::Vec2<Fix12P4>& vtx1,
113 const Math::Vec2<Fix12P4>& vtx2, 115 const Math::Vec2<Fix12P4>& vtx2,
114 const Math::Vec2<Fix12P4>& vtx3) { 116 const Math::Vec2<Fix12P4>& vtx3) {
115 const auto vec1 = (vtx2.Cast<int>() - vtx1.Cast<int>()).Append(0); 117 const auto vec1 = Math::MakeVec(vtx2 - vtx1, 0);
116 const auto vec2 = (vtx3.Cast<int>() - vtx1.Cast<int>()).Append(0); 118 const auto vec2 = Math::MakeVec(vtx3 - vtx1, 0);
117 // TODO: There is a very small chance this will overflow for sizeof(int) == 4 119 // TODO: There is a very small chance this will overflow for sizeof(int) == 4
118 return Cross(vec1, vec2).z; 120 return Math::Cross(vec1, vec2).z;
119 }; 121 };
120 122
121 int w0 = bias0 + orient2d(vtxpos[1].xy(), vtxpos[2].xy(), {x, y}); 123 int w0 = bias0 + orient2d(vtxpos[1].xy(), vtxpos[2].xy(), {x, y});
@@ -143,15 +145,15 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
143 // 145 //
144 // The generalization to three vertices is straightforward in baricentric coordinates. 146 // The generalization to three vertices is straightforward in baricentric coordinates.
145 auto GetInterpolatedAttribute = [&](float24 attr0, float24 attr1, float24 attr2) { 147 auto GetInterpolatedAttribute = [&](float24 attr0, float24 attr1, float24 attr2) {
146 auto attr_over_w = Math::MakeVec3(attr0 / v0.pos.w, 148 auto attr_over_w = Math::MakeVec(attr0 / v0.pos.w,
147 attr1 / v1.pos.w, 149 attr1 / v1.pos.w,
148 attr2 / v2.pos.w); 150 attr2 / v2.pos.w);
149 auto w_inverse = Math::MakeVec3(float24::FromFloat32(1.f) / v0.pos.w, 151 auto w_inverse = Math::MakeVec(float24::FromFloat32(1.f) / v0.pos.w,
150 float24::FromFloat32(1.f) / v1.pos.w, 152 float24::FromFloat32(1.f) / v1.pos.w,
151 float24::FromFloat32(1.f) / v2.pos.w); 153 float24::FromFloat32(1.f) / v2.pos.w);
152 auto baricentric_coordinates = Math::MakeVec3(float24::FromFloat32(w0), 154 auto baricentric_coordinates = Math::MakeVec(float24::FromFloat32(w0),
153 float24::FromFloat32(w1), 155 float24::FromFloat32(w1),
154 float24::FromFloat32(w2)); 156 float24::FromFloat32(w2));
155 157
156 float24 interpolated_attr_over_w = Math::Dot(attr_over_w, baricentric_coordinates); 158 float24 interpolated_attr_over_w = Math::Dot(attr_over_w, baricentric_coordinates);
157 float24 interpolated_w_inverse = Math::Dot(w_inverse, baricentric_coordinates); 159 float24 interpolated_w_inverse = Math::Dot(w_inverse, baricentric_coordinates);
@@ -165,12 +167,196 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
165 (u8)(GetInterpolatedAttribute(v0.color.a(), v1.color.a(), v2.color.a()).ToFloat32() * 255) 167 (u8)(GetInterpolatedAttribute(v0.color.a(), v1.color.a(), v2.color.a()).ToFloat32() * 255)
166 }; 168 };
167 169
170 Math::Vec4<u8> texture_color{};
171 float24 u = GetInterpolatedAttribute(v0.tc0.u(), v1.tc0.u(), v2.tc0.u());
172 float24 v = GetInterpolatedAttribute(v0.tc0.v(), v1.tc0.v(), v2.tc0.v());
173 if (registers.texturing_enable) {
174 // Images are split into 8x8 tiles. Each tile is composed of four 4x4 subtiles each
175 // of which is composed of four 2x2 subtiles each of which is composed of four texels.
176 // Each structure is embedded into the next-bigger one in a diagonal pattern, e.g.
177 // texels are laid out in a 2x2 subtile like this:
178 // 2 3
179 // 0 1
180 //
181 // The full 8x8 tile has the texels arranged like this:
182 //
183 // 42 43 46 47 58 59 62 63
184 // 40 41 44 45 56 57 60 61
185 // 34 35 38 39 50 51 54 55
186 // 32 33 36 37 48 49 52 53
187 // 10 11 14 15 26 27 30 31
188 // 08 09 12 13 24 25 28 29
189 // 02 03 06 07 18 19 22 23
190 // 00 01 04 05 16 17 20 21
191
192 // TODO: This is currently hardcoded for RGB8
193 u32* texture_data = (u32*)Memory::GetPointer(registers.texture0.GetPhysicalAddress());
194
195 // TODO(neobrain): Not sure if this swizzling pattern is used for all textures.
196 // To be flexible in case different but similar patterns are used, we keep this
197 // somewhat inefficient code around for now.
198 int s = (int)(u * float24::FromFloat32(registers.texture0.width)).ToFloat32();
199 int t = (int)(v * float24::FromFloat32(registers.texture0.height)).ToFloat32();
200 int texel_index_within_tile = 0;
201 for (int block_size_index = 0; block_size_index < 3; ++block_size_index) {
202 int sub_tile_width = 1 << block_size_index;
203 int sub_tile_height = 1 << block_size_index;
204
205 int sub_tile_index = (s & sub_tile_width) << block_size_index;
206 sub_tile_index += 2 * ((t & sub_tile_height) << block_size_index);
207 texel_index_within_tile += sub_tile_index;
208 }
209
210 const int block_width = 8;
211 const int block_height = 8;
212
213 int coarse_s = (s / block_width) * block_width;
214 int coarse_t = (t / block_height) * block_height;
215
216 const int row_stride = registers.texture0.width * 3;
217 u8* source_ptr = (u8*)texture_data + coarse_s * block_height * 3 + coarse_t * row_stride + texel_index_within_tile * 3;
218 texture_color.r() = source_ptr[2];
219 texture_color.g() = source_ptr[1];
220 texture_color.b() = source_ptr[0];
221 texture_color.a() = 0xFF;
222
223 DebugUtils::DumpTexture(registers.texture0, (u8*)texture_data);
224 }
225
226 // Texture environment - consists of 6 stages of color and alpha combining.
227 //
228 // Color combiners take three input color values from some source (e.g. interpolated
229 // vertex color, texture color, previous stage, etc), perform some very simple
230 // operations on each of them (e.g. inversion) and then calculate the output color
231 // with some basic arithmetic. Alpha combiners can be configured separately but work
232 // analogously.
233 Math::Vec4<u8> combiner_output;
234 for (auto tev_stage : registers.GetTevStages()) {
235 using Source = Regs::TevStageConfig::Source;
236 using ColorModifier = Regs::TevStageConfig::ColorModifier;
237 using AlphaModifier = Regs::TevStageConfig::AlphaModifier;
238 using Operation = Regs::TevStageConfig::Operation;
239
240 auto GetColorSource = [&](Source source) -> Math::Vec3<u8> {
241 switch (source) {
242 case Source::PrimaryColor:
243 return primary_color.rgb();
244
245 case Source::Texture0:
246 return texture_color.rgb();
247
248 case Source::Constant:
249 return {tev_stage.const_r, tev_stage.const_g, tev_stage.const_b};
250
251 case Source::Previous:
252 return combiner_output.rgb();
253
254 default:
255 ERROR_LOG(GPU, "Unknown color combiner source %d\n", (int)source);
256 return {};
257 }
258 };
259
260 auto GetAlphaSource = [&](Source source) -> u8 {
261 switch (source) {
262 case Source::PrimaryColor:
263 return primary_color.a();
264
265 case Source::Texture0:
266 return texture_color.a();
267
268 case Source::Constant:
269 return tev_stage.const_a;
270
271 case Source::Previous:
272 return combiner_output.a();
273
274 default:
275 ERROR_LOG(GPU, "Unknown alpha combiner source %d\n", (int)source);
276 return 0;
277 }
278 };
279
280 auto GetColorModifier = [](ColorModifier factor, const Math::Vec3<u8>& values) -> Math::Vec3<u8> {
281 switch (factor)
282 {
283 case ColorModifier::SourceColor:
284 return values;
285 default:
286 ERROR_LOG(GPU, "Unknown color factor %d\n", (int)factor);
287 return {};
288 }
289 };
290
291 auto GetAlphaModifier = [](AlphaModifier factor, u8 value) -> u8 {
292 switch (factor) {
293 case AlphaModifier::SourceAlpha:
294 return value;
295 default:
296 ERROR_LOG(GPU, "Unknown color factor %d\n", (int)factor);
297 return 0;
298 }
299 };
300
301 auto ColorCombine = [](Operation op, const Math::Vec3<u8> input[3]) -> Math::Vec3<u8> {
302 switch (op) {
303 case Operation::Replace:
304 return input[0];
305
306 case Operation::Modulate:
307 return ((input[0] * input[1]) / 255).Cast<u8>();
308
309 default:
310 ERROR_LOG(GPU, "Unknown color combiner operation %d\n", (int)op);
311 return {};
312 }
313 };
314
315 auto AlphaCombine = [](Operation op, const std::array<u8,3>& input) -> u8 {
316 switch (op) {
317 case Operation::Replace:
318 return input[0];
319
320 case Operation::Modulate:
321 return input[0] * input[1] / 255;
322
323 default:
324 ERROR_LOG(GPU, "Unknown alpha combiner operation %d\n", (int)op);
325 return 0;
326 }
327 };
328
329 // color combiner
330 // NOTE: Not sure if the alpha combiner might use the color output of the previous
331 // stage as input. Hence, we currently don't directly write the result to
332 // combiner_output.rgb(), but instead store it in a temporary variable until
333 // alpha combining has been done.
334 Math::Vec3<u8> color_result[3] = {
335 GetColorModifier(tev_stage.color_modifier1, GetColorSource(tev_stage.color_source1)),
336 GetColorModifier(tev_stage.color_modifier2, GetColorSource(tev_stage.color_source2)),
337 GetColorModifier(tev_stage.color_modifier3, GetColorSource(tev_stage.color_source3))
338 };
339 auto color_output = ColorCombine(tev_stage.color_op, color_result);
340
341 // alpha combiner
342 std::array<u8,3> alpha_result = {
343 GetAlphaModifier(tev_stage.alpha_modifier1, GetAlphaSource(tev_stage.alpha_source1)),
344 GetAlphaModifier(tev_stage.alpha_modifier2, GetAlphaSource(tev_stage.alpha_source2)),
345 GetAlphaModifier(tev_stage.alpha_modifier3, GetAlphaSource(tev_stage.alpha_source3))
346 };
347 auto alpha_output = AlphaCombine(tev_stage.alpha_op, alpha_result);
348
349 combiner_output = Math::MakeVec(color_output, alpha_output);
350 }
351
352 // TODO: Not sure if the multiplication by 65535 has already been taken care
353 // of when transforming to screen coordinates or not.
168 u16 z = (u16)(((float)v0.screenpos[2].ToFloat32() * w0 + 354 u16 z = (u16)(((float)v0.screenpos[2].ToFloat32() * w0 +
169 (float)v1.screenpos[2].ToFloat32() * w1 + 355 (float)v1.screenpos[2].ToFloat32() * w1 +
170 (float)v2.screenpos[2].ToFloat32() * w2) * 65535.f / wsum); // TODO: Shouldn't need to multiply by 65536? 356 (float)v2.screenpos[2].ToFloat32() * w2) * 65535.f / wsum);
171 SetDepth(x >> 4, y >> 4, z); 357 SetDepth(x >> 4, y >> 4, z);
172 358
173 DrawPixel(x >> 4, y >> 4, primary_color); 359 DrawPixel(x >> 4, y >> 4, combiner_output);
174 } 360 }
175 } 361 }
176} 362}
diff --git a/src/video_core/vertex_shader.cpp b/src/video_core/vertex_shader.cpp
index 93830a96a..db8244317 100644
--- a/src/video_core/vertex_shader.cpp
+++ b/src/video_core/vertex_shader.cpp
@@ -4,6 +4,7 @@
4 4
5#include "pica.h" 5#include "pica.h"
6#include "vertex_shader.h" 6#include "vertex_shader.h"
7#include "debug_utils/debug_utils.h"
7#include <core/mem_map.h> 8#include <core/mem_map.h>
8#include <common/file_util.h> 9#include <common/file_util.h>
9 10
@@ -50,6 +51,11 @@ struct VertexShaderState {
50 }; 51 };
51 u32 call_stack[8]; // TODO: What is the maximal call stack depth? 52 u32 call_stack[8]; // TODO: What is the maximal call stack depth?
52 u32* call_stack_pointer; 53 u32* call_stack_pointer;
54
55 struct {
56 u32 max_offset; // maximum program counter ever reached
57 u32 max_opdesc_id; // maximum swizzle pattern index ever used
58 } debug;
53}; 59};
54 60
55static void ProcessShaderCode(VertexShaderState& state) { 61static void ProcessShaderCode(VertexShaderState& state) {
@@ -57,27 +63,34 @@ static void ProcessShaderCode(VertexShaderState& state) {
57 bool increment_pc = true; 63 bool increment_pc = true;
58 bool exit_loop = false; 64 bool exit_loop = false;
59 const Instruction& instr = *(const Instruction*)state.program_counter; 65 const Instruction& instr = *(const Instruction*)state.program_counter;
66 state.debug.max_offset = std::max<u32>(state.debug.max_offset, 1 + (state.program_counter - shader_memory));
60 67
61 const float24* src1_ = (instr.common.src1 < 0x10) ? state.input_register_table[instr.common.src1] 68 const float24* src1_ = (instr.common.src1 < 0x10) ? state.input_register_table[instr.common.src1.GetIndex()]
62 : (instr.common.src1 < 0x20) ? &state.temporary_registers[instr.common.src1-0x10].x 69 : (instr.common.src1 < 0x20) ? &state.temporary_registers[instr.common.src1.GetIndex()].x
63 : (instr.common.src1 < 0x80) ? &shader_uniforms.f[instr.common.src1-0x20].x 70 : (instr.common.src1 < 0x80) ? &shader_uniforms.f[instr.common.src1.GetIndex()].x
64 : nullptr;
65 const float24* src2_ = (instr.common.src2 < 0x10) ? state.input_register_table[instr.common.src2]
66 : &state.temporary_registers[instr.common.src2-0x10].x;
67 // TODO: Unsure about the limit values
68 float24* dest = (instr.common.dest <= 0x1C) ? state.output_register_table[instr.common.dest]
69 : (instr.common.dest <= 0x3C) ? nullptr
70 : (instr.common.dest <= 0x7C) ? &state.temporary_registers[(instr.common.dest-0x40)/4][instr.common.dest%4]
71 : nullptr; 71 : nullptr;
72 const float24* src2_ = (instr.common.src2 < 0x10) ? state.input_register_table[instr.common.src2.GetIndex()]
73 : &state.temporary_registers[instr.common.src2.GetIndex()].x;
74 float24* dest = (instr.common.dest < 0x08) ? state.output_register_table[4*instr.common.dest.GetIndex()]
75 : (instr.common.dest < 0x10) ? nullptr
76 : (instr.common.dest < 0x20) ? &state.temporary_registers[instr.common.dest.GetIndex()][0]
77 : nullptr;
72 78
73 const SwizzlePattern& swizzle = *(SwizzlePattern*)&swizzle_data[instr.common.operand_desc_id]; 79 const SwizzlePattern& swizzle = *(SwizzlePattern*)&swizzle_data[instr.common.operand_desc_id];
80 const bool negate_src1 = swizzle.negate;
74 81
75 const float24 src1[4] = { 82 float24 src1[4] = {
76 src1_[(int)swizzle.GetSelectorSrc1(0)], 83 src1_[(int)swizzle.GetSelectorSrc1(0)],
77 src1_[(int)swizzle.GetSelectorSrc1(1)], 84 src1_[(int)swizzle.GetSelectorSrc1(1)],
78 src1_[(int)swizzle.GetSelectorSrc1(2)], 85 src1_[(int)swizzle.GetSelectorSrc1(2)],
79 src1_[(int)swizzle.GetSelectorSrc1(3)], 86 src1_[(int)swizzle.GetSelectorSrc1(3)],
80 }; 87 };
88 if (negate_src1) {
89 src1[0] = src1[0] * float24::FromFloat32(-1);
90 src1[1] = src1[1] * float24::FromFloat32(-1);
91 src1[2] = src1[2] * float24::FromFloat32(-1);
92 src1[3] = src1[3] * float24::FromFloat32(-1);
93 }
81 const float24 src2[4] = { 94 const float24 src2[4] = {
82 src2_[(int)swizzle.GetSelectorSrc2(0)], 95 src2_[(int)swizzle.GetSelectorSrc2(0)],
83 src2_[(int)swizzle.GetSelectorSrc2(1)], 96 src2_[(int)swizzle.GetSelectorSrc2(1)],
@@ -88,6 +101,7 @@ static void ProcessShaderCode(VertexShaderState& state) {
88 switch (instr.opcode) { 101 switch (instr.opcode) {
89 case Instruction::OpCode::ADD: 102 case Instruction::OpCode::ADD:
90 { 103 {
104 state.debug.max_opdesc_id = std::max<u32>(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id);
91 for (int i = 0; i < 4; ++i) { 105 for (int i = 0; i < 4; ++i) {
92 if (!swizzle.DestComponentEnabled(i)) 106 if (!swizzle.DestComponentEnabled(i))
93 continue; 107 continue;
@@ -100,6 +114,7 @@ static void ProcessShaderCode(VertexShaderState& state) {
100 114
101 case Instruction::OpCode::MUL: 115 case Instruction::OpCode::MUL:
102 { 116 {
117 state.debug.max_opdesc_id = std::max<u32>(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id);
103 for (int i = 0; i < 4; ++i) { 118 for (int i = 0; i < 4; ++i) {
104 if (!swizzle.DestComponentEnabled(i)) 119 if (!swizzle.DestComponentEnabled(i))
105 continue; 120 continue;
@@ -113,6 +128,7 @@ static void ProcessShaderCode(VertexShaderState& state) {
113 case Instruction::OpCode::DP3: 128 case Instruction::OpCode::DP3:
114 case Instruction::OpCode::DP4: 129 case Instruction::OpCode::DP4:
115 { 130 {
131 state.debug.max_opdesc_id = std::max<u32>(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id);
116 float24 dot = float24::FromFloat32(0.f); 132 float24 dot = float24::FromFloat32(0.f);
117 int num_components = (instr.opcode == Instruction::OpCode::DP3) ? 3 : 4; 133 int num_components = (instr.opcode == Instruction::OpCode::DP3) ? 3 : 4;
118 for (int i = 0; i < num_components; ++i) 134 for (int i = 0; i < num_components; ++i)
@@ -130,6 +146,7 @@ static void ProcessShaderCode(VertexShaderState& state) {
130 // Reciprocal 146 // Reciprocal
131 case Instruction::OpCode::RCP: 147 case Instruction::OpCode::RCP:
132 { 148 {
149 state.debug.max_opdesc_id = std::max<u32>(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id);
133 for (int i = 0; i < 4; ++i) { 150 for (int i = 0; i < 4; ++i) {
134 if (!swizzle.DestComponentEnabled(i)) 151 if (!swizzle.DestComponentEnabled(i))
135 continue; 152 continue;
@@ -145,6 +162,7 @@ static void ProcessShaderCode(VertexShaderState& state) {
145 // Reciprocal Square Root 162 // Reciprocal Square Root
146 case Instruction::OpCode::RSQ: 163 case Instruction::OpCode::RSQ:
147 { 164 {
165 state.debug.max_opdesc_id = std::max<u32>(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id);
148 for (int i = 0; i < 4; ++i) { 166 for (int i = 0; i < 4; ++i) {
149 if (!swizzle.DestComponentEnabled(i)) 167 if (!swizzle.DestComponentEnabled(i))
150 continue; 168 continue;
@@ -159,6 +177,7 @@ static void ProcessShaderCode(VertexShaderState& state) {
159 177
160 case Instruction::OpCode::MOV: 178 case Instruction::OpCode::MOV:
161 { 179 {
180 state.debug.max_opdesc_id = std::max<u32>(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id);
162 for (int i = 0; i < 4; ++i) { 181 for (int i = 0; i < 4; ++i) {
163 if (!swizzle.DestComponentEnabled(i)) 182 if (!swizzle.DestComponentEnabled(i))
164 continue; 183 continue;
@@ -172,8 +191,9 @@ static void ProcessShaderCode(VertexShaderState& state) {
172 if (*state.call_stack_pointer == VertexShaderState::INVALID_ADDRESS) { 191 if (*state.call_stack_pointer == VertexShaderState::INVALID_ADDRESS) {
173 exit_loop = true; 192 exit_loop = true;
174 } else { 193 } else {
175 state.program_counter = &shader_memory[*state.call_stack_pointer--]; 194 // Jump back to call stack position, invalidate call stack entry, move up call stack pointer
176 *state.call_stack_pointer = VertexShaderState::INVALID_ADDRESS; 195 state.program_counter = &shader_memory[*state.call_stack_pointer];
196 *state.call_stack_pointer-- = VertexShaderState::INVALID_ADDRESS;
177 } 197 }
178 198
179 break; 199 break;
@@ -212,6 +232,8 @@ OutputVertex RunShader(const InputVertex& input, int num_attributes)
212 232
213 const u32* main = &shader_memory[registers.vs_main_offset]; 233 const u32* main = &shader_memory[registers.vs_main_offset];
214 state.program_counter = (u32*)main; 234 state.program_counter = (u32*)main;
235 state.debug.max_offset = 0;
236 state.debug.max_opdesc_id = 0;
215 237
216 // Setup input register table 238 // Setup input register table
217 const auto& attribute_register_map = registers.vs_input_register_map; 239 const auto& attribute_register_map = registers.vs_input_register_map;
@@ -255,6 +277,9 @@ OutputVertex RunShader(const InputVertex& input, int num_attributes)
255 state.call_stack_pointer = &state.call_stack[0]; 277 state.call_stack_pointer = &state.call_stack[0];
256 278
257 ProcessShaderCode(state); 279 ProcessShaderCode(state);
280 DebugUtils::DumpShader(shader_memory, state.debug.max_offset, swizzle_data,
281 state.debug.max_opdesc_id, registers.vs_main_offset,
282 registers.vs_output_attributes);
258 283
259 DEBUG_LOG(GPU, "Output vertex: pos (%.2f, %.2f, %.2f, %.2f), col(%.2f, %.2f, %.2f, %.2f), tc0(%.2f, %.2f)", 284 DEBUG_LOG(GPU, "Output vertex: pos (%.2f, %.2f, %.2f, %.2f), col(%.2f, %.2f, %.2f, %.2f), tc0(%.2f, %.2f)",
260 ret.pos.x.ToFloat32(), ret.pos.y.ToFloat32(), ret.pos.z.ToFloat32(), ret.pos.w.ToFloat32(), 285 ret.pos.x.ToFloat32(), ret.pos.y.ToFloat32(), ret.pos.z.ToFloat32(), ret.pos.w.ToFloat32(),
diff --git a/src/video_core/vertex_shader.h b/src/video_core/vertex_shader.h
index 1b71e367b..847fdc450 100644
--- a/src/video_core/vertex_shader.h
+++ b/src/video_core/vertex_shader.h
@@ -27,7 +27,6 @@ struct OutputVertex {
27 Math::Vec4<float24> dummy; // quaternions (not implemented, yet) 27 Math::Vec4<float24> dummy; // quaternions (not implemented, yet)
28 Math::Vec4<float24> color; 28 Math::Vec4<float24> color;
29 Math::Vec2<float24> tc0; 29 Math::Vec2<float24> tc0;
30 float24 tc0_v;
31 30
32 // Padding for optimal alignment 31 // Padding for optimal alignment
33 float24 pad[14]; 32 float24 pad[14];
@@ -36,6 +35,7 @@ struct OutputVertex {
36 35
37 // position after perspective divide 36 // position after perspective divide
38 Math::Vec3<float24> screenpos; 37 Math::Vec3<float24> screenpos;
38 float24 pad2;
39 39
40 // Linear interpolation 40 // Linear interpolation
41 // factor: 0=this, 1=vtx 41 // factor: 0=this, 1=vtx
@@ -59,6 +59,7 @@ struct OutputVertex {
59 } 59 }
60}; 60};
61static_assert(std::is_pod<OutputVertex>::value, "Structure is not POD"); 61static_assert(std::is_pod<OutputVertex>::value, "Structure is not POD");
62static_assert(sizeof(OutputVertex) == 32 * sizeof(float), "OutputVertex has invalid size");
62 63
63union Instruction { 64union Instruction {
64 enum class OpCode : u32 { 65 enum class OpCode : u32 {
@@ -117,9 +118,78 @@ union Instruction {
117 // while "dest" addresses individual floats. 118 // while "dest" addresses individual floats.
118 union { 119 union {
119 BitField<0x00, 0x5, u32> operand_desc_id; 120 BitField<0x00, 0x5, u32> operand_desc_id;
120 BitField<0x07, 0x5, u32> src2; 121
121 BitField<0x0c, 0x7, u32> src1; 122 template<class BitFieldType>
122 BitField<0x13, 0x7, u32> dest; 123 struct SourceRegister : BitFieldType {
124 enum RegisterType {
125 Input,
126 Temporary,
127 FloatUniform
128 };
129
130 RegisterType GetRegisterType() const {
131 if (BitFieldType::Value() < 0x10)
132 return Input;
133 else if (BitFieldType::Value() < 0x20)
134 return Temporary;
135 else
136 return FloatUniform;
137 }
138
139 int GetIndex() const {
140 if (GetRegisterType() == Input)
141 return BitFieldType::Value();
142 else if (GetRegisterType() == Temporary)
143 return BitFieldType::Value() - 0x10;
144 else if (GetRegisterType() == FloatUniform)
145 return BitFieldType::Value() - 0x20;
146 }
147
148 std::string GetRegisterName() const {
149 std::map<RegisterType, std::string> type = {
150 { Input, "i" },
151 { Temporary, "t" },
152 { FloatUniform, "f" },
153 };
154 return type[GetRegisterType()] + std::to_string(GetIndex());
155 }
156 };
157
158 SourceRegister<BitField<0x07, 0x5, u32>> src2;
159 SourceRegister<BitField<0x0c, 0x7, u32>> src1;
160
161 struct : BitField<0x15, 0x5, u32>
162 {
163 enum RegisterType {
164 Output,
165 Temporary,
166 Unknown
167 };
168 RegisterType GetRegisterType() const {
169 if (Value() < 0x8)
170 return Output;
171 else if (Value() < 0x10)
172 return Unknown;
173 else
174 return Temporary;
175 }
176 int GetIndex() const {
177 if (GetRegisterType() == Output)
178 return Value();
179 else if (GetRegisterType() == Temporary)
180 return Value() - 0x10;
181 else
182 return Value();
183 }
184 std::string GetRegisterName() const {
185 std::map<RegisterType, std::string> type = {
186 { Output, "o" },
187 { Temporary, "t" },
188 { Unknown, "u" }
189 };
190 return type[GetRegisterType()] + std::to_string(GetIndex());
191 }
192 } dest;
123 } common; 193 } common;
124 194
125 // Format used for flow control instructions ("if") 195 // Format used for flow control instructions ("if")
@@ -128,6 +198,7 @@ union Instruction {
128 BitField<0x0a, 0xc, u32> offset_words; 198 BitField<0x0a, 0xc, u32> offset_words;
129 } flow_control; 199 } flow_control;
130}; 200};
201static_assert(std::is_standard_layout<Instruction>::value, "Structure is not using standard layout!");
131 202
132union SwizzlePattern { 203union SwizzlePattern {
133 u32 hex; 204 u32 hex;
@@ -185,6 +256,8 @@ union SwizzlePattern {
185 // Components of "dest" that should be written to: LSB=dest.w, MSB=dest.x 256 // Components of "dest" that should be written to: LSB=dest.w, MSB=dest.x
186 BitField< 0, 4, u32> dest_mask; 257 BitField< 0, 4, u32> dest_mask;
187 258
259 BitField< 4, 1, u32> negate; // negates src1
260
188 BitField< 5, 2, Selector> src1_selector_3; 261 BitField< 5, 2, Selector> src1_selector_3;
189 BitField< 7, 2, Selector> src1_selector_2; 262 BitField< 7, 2, Selector> src1_selector_2;
190 BitField< 9, 2, Selector> src1_selector_1; 263 BitField< 9, 2, Selector> src1_selector_1;
diff --git a/src/video_core/video_core.vcxproj b/src/video_core/video_core.vcxproj
index 48d77cdc4..4e129fbe7 100644
--- a/src/video_core/video_core.vcxproj
+++ b/src/video_core/video_core.vcxproj
@@ -19,6 +19,7 @@
19 </ProjectConfiguration> 19 </ProjectConfiguration>
20 </ItemGroup> 20 </ItemGroup>
21 <ItemGroup> 21 <ItemGroup>
22 <ClCompile Include="debug_utils\debug_utils.cpp" />
22 <ClCompile Include="renderer_opengl\renderer_opengl.cpp" /> 23 <ClCompile Include="renderer_opengl\renderer_opengl.cpp" />
23 <ClCompile Include="clipper.cpp" /> 24 <ClCompile Include="clipper.cpp" />
24 <ClCompile Include="command_processor.cpp" /> 25 <ClCompile Include="command_processor.cpp" />
@@ -40,6 +41,7 @@
40 <ClInclude Include="utils.h" /> 41 <ClInclude Include="utils.h" />
41 <ClInclude Include="vertex_shader.h" /> 42 <ClInclude Include="vertex_shader.h" />
42 <ClInclude Include="video_core.h" /> 43 <ClInclude Include="video_core.h" />
44 <ClInclude Include="debug_utils\debug_utils.h" />
43 <ClInclude Include="renderer_opengl\renderer_opengl.h" /> 45 <ClInclude Include="renderer_opengl\renderer_opengl.h" />
44 </ItemGroup> 46 </ItemGroup>
45 <ItemGroup> 47 <ItemGroup>
diff --git a/src/video_core/video_core.vcxproj.filters b/src/video_core/video_core.vcxproj.filters
index 31af4f1df..90541aca0 100644
--- a/src/video_core/video_core.vcxproj.filters
+++ b/src/video_core/video_core.vcxproj.filters
@@ -4,6 +4,9 @@
4 <Filter Include="renderer_opengl"> 4 <Filter Include="renderer_opengl">
5 <UniqueIdentifier>{e0245557-dbd4-423e-9399-513d5e99f1e4}</UniqueIdentifier> 5 <UniqueIdentifier>{e0245557-dbd4-423e-9399-513d5e99f1e4}</UniqueIdentifier>
6 </Filter> 6 </Filter>
7 <Filter Include="debug_utils">
8 <UniqueIdentifier>{0ac498e6-bbd8-46e3-9d5f-e816546ab90e}</UniqueIdentifier>
9 </Filter>
7 </ItemGroup> 10 </ItemGroup>
8 <ItemGroup> 11 <ItemGroup>
9 <ClCompile Include="renderer_opengl\renderer_opengl.cpp"> 12 <ClCompile Include="renderer_opengl\renderer_opengl.cpp">
@@ -16,11 +19,11 @@
16 <ClCompile Include="utils.cpp" /> 19 <ClCompile Include="utils.cpp" />
17 <ClCompile Include="vertex_shader.cpp" /> 20 <ClCompile Include="vertex_shader.cpp" />
18 <ClCompile Include="video_core.cpp" /> 21 <ClCompile Include="video_core.cpp" />
22 <ClCompile Include="debug_utils\debug_utils.cpp">
23 <Filter>debug_utils</Filter>
24 </ClCompile>
19 </ItemGroup> 25 </ItemGroup>
20 <ItemGroup> 26 <ItemGroup>
21 <ClInclude Include="renderer_opengl\renderer_opengl.h">
22 <Filter>renderer_opengl</Filter>
23 </ClInclude>
24 <ClInclude Include="clipper.h" /> 27 <ClInclude Include="clipper.h" />
25 <ClInclude Include="command_processor.h" /> 28 <ClInclude Include="command_processor.h" />
26 <ClInclude Include="gpu_debugger.h" /> 29 <ClInclude Include="gpu_debugger.h" />
@@ -32,8 +35,12 @@
32 <ClInclude Include="utils.h" /> 35 <ClInclude Include="utils.h" />
33 <ClInclude Include="vertex_shader.h" /> 36 <ClInclude Include="vertex_shader.h" />
34 <ClInclude Include="video_core.h" /> 37 <ClInclude Include="video_core.h" />
38 <ClInclude Include="renderer_opengl\renderer_opengl.h" />
39 <ClInclude Include="debug_utils\debug_utils.h">
40 <Filter>debug_utils</Filter>
41 </ClInclude>
35 </ItemGroup> 42 </ItemGroup>
36 <ItemGroup> 43 <ItemGroup>
37 <Text Include="CMakeLists.txt" /> 44 <Text Include="CMakeLists.txt" />
38 </ItemGroup> 45 </ItemGroup>
39</Project> 46</Project> \ No newline at end of file