diff options
Diffstat (limited to 'src/video_core')
| -rw-r--r-- | src/video_core/CMakeLists.txt | 2 | ||||
| -rw-r--r-- | src/video_core/command_processor.cpp | 44 | ||||
| -rw-r--r-- | src/video_core/command_processor.h | 13 | ||||
| -rw-r--r-- | src/video_core/debug_utils/debug_utils.cpp | 522 | ||||
| -rw-r--r-- | src/video_core/debug_utils/debug_utils.h | 66 | ||||
| -rw-r--r-- | src/video_core/gpu_debugger.h | 63 | ||||
| -rw-r--r-- | src/video_core/math.h | 233 | ||||
| -rw-r--r-- | src/video_core/pica.h | 153 | ||||
| -rw-r--r-- | src/video_core/primitive_assembly.cpp | 28 | ||||
| -rw-r--r-- | src/video_core/primitive_assembly.h | 38 | ||||
| -rw-r--r-- | src/video_core/rasterizer.cpp | 222 | ||||
| -rw-r--r-- | src/video_core/vertex_shader.cpp | 51 | ||||
| -rw-r--r-- | src/video_core/vertex_shader.h | 81 | ||||
| -rw-r--r-- | src/video_core/video_core.vcxproj | 2 | ||||
| -rw-r--r-- | src/video_core/video_core.vcxproj.filters | 15 |
15 files changed, 1308 insertions, 225 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 8e7b93acb..71a1b5ecc 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt | |||
| @@ -5,6 +5,7 @@ set(SRCS clipper.cpp | |||
| 5 | utils.cpp | 5 | utils.cpp |
| 6 | vertex_shader.cpp | 6 | vertex_shader.cpp |
| 7 | video_core.cpp | 7 | video_core.cpp |
| 8 | debug_utils/debug_utils.cpp | ||
| 8 | renderer_opengl/renderer_opengl.cpp) | 9 | renderer_opengl/renderer_opengl.cpp) |
| 9 | 10 | ||
| 10 | set(HEADERS clipper.h | 11 | set(HEADERS clipper.h |
| @@ -17,6 +18,7 @@ set(HEADERS clipper.h | |||
| 17 | renderer_base.h | 18 | renderer_base.h |
| 18 | vertex_shader.h | 19 | vertex_shader.h |
| 19 | video_core.h | 20 | video_core.h |
| 21 | debug_utils/debug_utils.h | ||
| 20 | renderer_opengl/renderer_opengl.h) | 22 | renderer_opengl/renderer_opengl.h) |
| 21 | 23 | ||
| 22 | add_library(video_core STATIC ${SRCS} ${HEADERS}) | 24 | add_library(video_core STATIC ${SRCS} ${HEADERS}) |
diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp index 020a4da3f..9567a9849 100644 --- a/src/video_core/command_processor.cpp +++ b/src/video_core/command_processor.cpp | |||
| @@ -2,12 +2,14 @@ | |||
| 2 | // Licensed under GPLv2 | 2 | // Licensed under GPLv2 |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include "clipper.h" | ||
| 5 | #include "command_processor.h" | 6 | #include "command_processor.h" |
| 6 | #include "math.h" | 7 | #include "math.h" |
| 7 | #include "pica.h" | 8 | #include "pica.h" |
| 8 | #include "primitive_assembly.h" | 9 | #include "primitive_assembly.h" |
| 9 | #include "vertex_shader.h" | 10 | #include "vertex_shader.h" |
| 10 | 11 | ||
| 12 | #include "debug_utils/debug_utils.h" | ||
| 11 | 13 | ||
| 12 | namespace Pica { | 14 | namespace Pica { |
| 13 | 15 | ||
| @@ -23,15 +25,24 @@ static u32 uniform_write_buffer[4]; | |||
| 23 | static u32 vs_binary_write_offset = 0; | 25 | static u32 vs_binary_write_offset = 0; |
| 24 | static u32 vs_swizzle_write_offset = 0; | 26 | static u32 vs_swizzle_write_offset = 0; |
| 25 | 27 | ||
| 26 | static inline void WritePicaReg(u32 id, u32 value) { | 28 | static inline void WritePicaReg(u32 id, u32 value, u32 mask) { |
| 29 | |||
| 30 | if (id >= registers.NumIds()) | ||
| 31 | return; | ||
| 32 | |||
| 33 | // TODO: Figure out how register masking acts on e.g. vs_uniform_setup.set_value | ||
| 27 | u32 old_value = registers[id]; | 34 | u32 old_value = registers[id]; |
| 28 | registers[id] = value; | 35 | registers[id] = (old_value & ~mask) | (value & mask); |
| 36 | |||
| 37 | DebugUtils::OnPicaRegWrite(id, registers[id]); | ||
| 29 | 38 | ||
| 30 | switch(id) { | 39 | switch(id) { |
| 31 | // It seems like these trigger vertex rendering | 40 | // It seems like these trigger vertex rendering |
| 32 | case PICA_REG_INDEX(trigger_draw): | 41 | case PICA_REG_INDEX(trigger_draw): |
| 33 | case PICA_REG_INDEX(trigger_draw_indexed): | 42 | case PICA_REG_INDEX(trigger_draw_indexed): |
| 34 | { | 43 | { |
| 44 | DebugUtils::DumpTevStageConfig(registers.GetTevStages()); | ||
| 45 | |||
| 35 | const auto& attribute_config = registers.vertex_attributes; | 46 | const auto& attribute_config = registers.vertex_attributes; |
| 36 | const u8* const base_address = Memory::GetPointer(attribute_config.GetBaseAddress()); | 47 | const u8* const base_address = Memory::GetPointer(attribute_config.GetBaseAddress()); |
| 37 | 48 | ||
| @@ -68,6 +79,10 @@ static inline void WritePicaReg(u32 id, u32 value) { | |||
| 68 | const u16* index_address_16 = (u16*)index_address_8; | 79 | const u16* index_address_16 = (u16*)index_address_8; |
| 69 | bool index_u16 = (bool)index_info.format; | 80 | bool index_u16 = (bool)index_info.format; |
| 70 | 81 | ||
| 82 | DebugUtils::GeometryDumper geometry_dumper; | ||
| 83 | PrimitiveAssembler<VertexShader::OutputVertex> clipper_primitive_assembler(registers.triangle_topology.Value()); | ||
| 84 | PrimitiveAssembler<DebugUtils::GeometryDumper::Vertex> dumping_primitive_assembler(registers.triangle_topology.Value()); | ||
| 85 | |||
| 71 | for (int index = 0; index < registers.num_vertices; ++index) | 86 | for (int index = 0; index < registers.num_vertices; ++index) |
| 72 | { | 87 | { |
| 73 | int vertex = is_indexed ? (index_u16 ? index_address_16[index] : index_address_8[index]) : index; | 88 | int vertex = is_indexed ? (index_u16 ? index_address_16[index] : index_address_8[index]) : index; |
| @@ -95,14 +110,28 @@ static inline void WritePicaReg(u32 id, u32 value) { | |||
| 95 | input.attr[i][comp].ToFloat32()); | 110 | input.attr[i][comp].ToFloat32()); |
| 96 | } | 111 | } |
| 97 | } | 112 | } |
| 113 | |||
| 114 | // NOTE: When dumping geometry, we simply assume that the first input attribute | ||
| 115 | // corresponds to the position for now. | ||
| 116 | DebugUtils::GeometryDumper::Vertex dumped_vertex = { | ||
| 117 | input.attr[0][0].ToFloat32(), input.attr[0][1].ToFloat32(), input.attr[0][2].ToFloat32() | ||
| 118 | }; | ||
| 119 | using namespace std::placeholders; | ||
| 120 | dumping_primitive_assembler.SubmitVertex(dumped_vertex, | ||
| 121 | std::bind(&DebugUtils::GeometryDumper::AddTriangle, | ||
| 122 | &geometry_dumper, _1, _2, _3)); | ||
| 123 | |||
| 124 | // Send to vertex shader | ||
| 98 | VertexShader::OutputVertex output = VertexShader::RunShader(input, attribute_config.GetNumTotalAttributes()); | 125 | VertexShader::OutputVertex output = VertexShader::RunShader(input, attribute_config.GetNumTotalAttributes()); |
| 99 | 126 | ||
| 100 | if (is_indexed) { | 127 | if (is_indexed) { |
| 101 | // TODO: Add processed vertex to vertex cache! | 128 | // TODO: Add processed vertex to vertex cache! |
| 102 | } | 129 | } |
| 103 | 130 | ||
| 104 | PrimitiveAssembly::SubmitVertex(output); | 131 | // Send to triangle clipper |
| 132 | clipper_primitive_assembler.SubmitVertex(output, Clipper::ProcessTriangle); | ||
| 105 | } | 133 | } |
| 134 | geometry_dumper.Dump(); | ||
| 106 | break; | 135 | break; |
| 107 | } | 136 | } |
| 108 | 137 | ||
| @@ -207,14 +236,17 @@ static std::ptrdiff_t ExecuteCommandBlock(const u32* first_command_word) { | |||
| 207 | 236 | ||
| 208 | u32* read_pointer = (u32*)first_command_word; | 237 | u32* read_pointer = (u32*)first_command_word; |
| 209 | 238 | ||
| 210 | // TODO: Take parameter mask into consideration! | 239 | const u32 write_mask = ((header.parameter_mask & 0x1) ? (0xFFu << 0) : 0u) | |
| 240 | ((header.parameter_mask & 0x2) ? (0xFFu << 8) : 0u) | | ||
| 241 | ((header.parameter_mask & 0x4) ? (0xFFu << 16) : 0u) | | ||
| 242 | ((header.parameter_mask & 0x8) ? (0xFFu << 24) : 0u); | ||
| 211 | 243 | ||
| 212 | WritePicaReg(header.cmd_id, *read_pointer); | 244 | WritePicaReg(header.cmd_id, *read_pointer, write_mask); |
| 213 | read_pointer += 2; | 245 | read_pointer += 2; |
| 214 | 246 | ||
| 215 | for (int i = 1; i < 1+header.extra_data_length; ++i) { | 247 | for (int i = 1; i < 1+header.extra_data_length; ++i) { |
| 216 | u32 cmd = header.cmd_id + ((header.group_commands) ? i : 0); | 248 | u32 cmd = header.cmd_id + ((header.group_commands) ? i : 0); |
| 217 | WritePicaReg(cmd, *read_pointer); | 249 | WritePicaReg(cmd, *read_pointer, write_mask); |
| 218 | ++read_pointer; | 250 | ++read_pointer; |
| 219 | } | 251 | } |
| 220 | 252 | ||
diff --git a/src/video_core/command_processor.h b/src/video_core/command_processor.h index 6b6241a25..955f9daec 100644 --- a/src/video_core/command_processor.h +++ b/src/video_core/command_processor.h | |||
| @@ -17,11 +17,22 @@ union CommandHeader { | |||
| 17 | u32 hex; | 17 | u32 hex; |
| 18 | 18 | ||
| 19 | BitField< 0, 16, u32> cmd_id; | 19 | BitField< 0, 16, u32> cmd_id; |
| 20 | |||
| 21 | // parameter_mask: | ||
| 22 | // Mask applied to the input value to make it possible to update | ||
| 23 | // parts of a register without overwriting its other fields. | ||
| 24 | // first bit: 0x000000FF | ||
| 25 | // second bit: 0x0000FF00 | ||
| 26 | // third bit: 0x00FF0000 | ||
| 27 | // fourth bit: 0xFF000000 | ||
| 20 | BitField<16, 4, u32> parameter_mask; | 28 | BitField<16, 4, u32> parameter_mask; |
| 29 | |||
| 21 | BitField<20, 11, u32> extra_data_length; | 30 | BitField<20, 11, u32> extra_data_length; |
| 31 | |||
| 22 | BitField<31, 1, u32> group_commands; | 32 | BitField<31, 1, u32> group_commands; |
| 23 | }; | 33 | }; |
| 24 | static_assert(std::is_standard_layout<CommandHeader>::value == true, "CommandHeader does not use standard layout"); | 34 | static_assert(std::is_standard_layout<CommandHeader>::value == true, |
| 35 | "CommandHeader does not use standard layout"); | ||
| 25 | static_assert(sizeof(CommandHeader) == sizeof(u32), "CommandHeader has incorrect size!"); | 36 | static_assert(sizeof(CommandHeader) == sizeof(u32), "CommandHeader has incorrect size!"); |
| 26 | 37 | ||
| 27 | void ProcessCommandList(const u32* list, u32 size); | 38 | void ProcessCommandList(const u32* list, u32 size); |
diff --git a/src/video_core/debug_utils/debug_utils.cpp b/src/video_core/debug_utils/debug_utils.cpp new file mode 100644 index 000000000..48e6dd182 --- /dev/null +++ b/src/video_core/debug_utils/debug_utils.cpp | |||
| @@ -0,0 +1,522 @@ | |||
| 1 | // Copyright 2014 Citra Emulator Project | ||
| 2 | // Licensed under GPLv2 | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <map> | ||
| 7 | #include <fstream> | ||
| 8 | #include <mutex> | ||
| 9 | #include <string> | ||
| 10 | |||
| 11 | #ifdef HAVE_PNG | ||
| 12 | #include <png.h> | ||
| 13 | #endif | ||
| 14 | |||
| 15 | #include "common/file_util.h" | ||
| 16 | |||
| 17 | #include "video_core/pica.h" | ||
| 18 | |||
| 19 | #include "debug_utils.h" | ||
| 20 | |||
| 21 | namespace Pica { | ||
| 22 | |||
| 23 | namespace DebugUtils { | ||
| 24 | |||
| 25 | void GeometryDumper::AddTriangle(Vertex& v0, Vertex& v1, Vertex& v2) { | ||
| 26 | vertices.push_back(v0); | ||
| 27 | vertices.push_back(v1); | ||
| 28 | vertices.push_back(v2); | ||
| 29 | |||
| 30 | int num_vertices = vertices.size(); | ||
| 31 | faces.push_back({ num_vertices-3, num_vertices-2, num_vertices-1 }); | ||
| 32 | } | ||
| 33 | |||
| 34 | void GeometryDumper::Dump() { | ||
| 35 | // NOTE: Permanently enabling this just trashes the hard disk for no reason. | ||
| 36 | // Hence, this is currently disabled. | ||
| 37 | return; | ||
| 38 | |||
| 39 | static int index = 0; | ||
| 40 | std::string filename = std::string("geometry_dump") + std::to_string(++index) + ".obj"; | ||
| 41 | |||
| 42 | std::ofstream file(filename); | ||
| 43 | |||
| 44 | for (const auto& vertex : vertices) { | ||
| 45 | file << "v " << vertex.pos[0] | ||
| 46 | << " " << vertex.pos[1] | ||
| 47 | << " " << vertex.pos[2] << std::endl; | ||
| 48 | } | ||
| 49 | |||
| 50 | for (const Face& face : faces) { | ||
| 51 | file << "f " << 1+face.index[0] | ||
| 52 | << " " << 1+face.index[1] | ||
| 53 | << " " << 1+face.index[2] << std::endl; | ||
| 54 | } | ||
| 55 | } | ||
| 56 | |||
| 57 | #pragma pack(1) | ||
| 58 | struct DVLBHeader { | ||
| 59 | enum : u32 { | ||
| 60 | MAGIC_WORD = 0x424C5644, // "DVLB" | ||
| 61 | }; | ||
| 62 | |||
| 63 | u32 magic_word; | ||
| 64 | u32 num_programs; | ||
| 65 | // u32 dvle_offset_table[]; | ||
| 66 | }; | ||
| 67 | static_assert(sizeof(DVLBHeader) == 0x8, "Incorrect structure size"); | ||
| 68 | |||
| 69 | struct DVLPHeader { | ||
| 70 | enum : u32 { | ||
| 71 | MAGIC_WORD = 0x504C5644, // "DVLP" | ||
| 72 | }; | ||
| 73 | |||
| 74 | u32 magic_word; | ||
| 75 | u32 version; | ||
| 76 | u32 binary_offset; // relative to DVLP start | ||
| 77 | u32 binary_size_words; | ||
| 78 | u32 swizzle_patterns_offset; | ||
| 79 | u32 swizzle_patterns_num_entries; | ||
| 80 | u32 unk2; | ||
| 81 | }; | ||
| 82 | static_assert(sizeof(DVLPHeader) == 0x1C, "Incorrect structure size"); | ||
| 83 | |||
| 84 | struct DVLEHeader { | ||
| 85 | enum : u32 { | ||
| 86 | MAGIC_WORD = 0x454c5644, // "DVLE" | ||
| 87 | }; | ||
| 88 | |||
| 89 | enum class ShaderType : u8 { | ||
| 90 | VERTEX = 0, | ||
| 91 | GEOMETRY = 1, | ||
| 92 | }; | ||
| 93 | |||
| 94 | u32 magic_word; | ||
| 95 | u16 pad1; | ||
| 96 | ShaderType type; | ||
| 97 | u8 pad2; | ||
| 98 | u32 main_offset_words; // offset within binary blob | ||
| 99 | u32 endmain_offset_words; | ||
| 100 | u32 pad3; | ||
| 101 | u32 pad4; | ||
| 102 | u32 constant_table_offset; | ||
| 103 | u32 constant_table_size; // number of entries | ||
| 104 | u32 label_table_offset; | ||
| 105 | u32 label_table_size; | ||
| 106 | u32 output_register_table_offset; | ||
| 107 | u32 output_register_table_size; | ||
| 108 | u32 uniform_table_offset; | ||
| 109 | u32 uniform_table_size; | ||
| 110 | u32 symbol_table_offset; | ||
| 111 | u32 symbol_table_size; | ||
| 112 | |||
| 113 | }; | ||
| 114 | static_assert(sizeof(DVLEHeader) == 0x40, "Incorrect structure size"); | ||
| 115 | #pragma pack() | ||
| 116 | |||
| 117 | void DumpShader(const u32* binary_data, u32 binary_size, const u32* swizzle_data, u32 swizzle_size, | ||
| 118 | u32 main_offset, const Regs::VSOutputAttributes* output_attributes) | ||
| 119 | { | ||
| 120 | // NOTE: Permanently enabling this just trashes hard disks for no reason. | ||
| 121 | // Hence, this is currently disabled. | ||
| 122 | return; | ||
| 123 | |||
| 124 | struct StuffToWrite { | ||
| 125 | u8* pointer; | ||
| 126 | u32 size; | ||
| 127 | }; | ||
| 128 | std::vector<StuffToWrite> writing_queue; | ||
| 129 | u32 write_offset = 0; | ||
| 130 | |||
| 131 | auto QueueForWriting = [&writing_queue,&write_offset](u8* pointer, u32 size) { | ||
| 132 | writing_queue.push_back({pointer, size}); | ||
| 133 | u32 old_write_offset = write_offset; | ||
| 134 | write_offset += size; | ||
| 135 | return old_write_offset; | ||
| 136 | }; | ||
| 137 | |||
| 138 | // First off, try to translate Pica state (one enum for output attribute type and component) | ||
| 139 | // into shbin format (separate type and component mask). | ||
| 140 | union OutputRegisterInfo { | ||
| 141 | enum Type : u64 { | ||
| 142 | POSITION = 0, | ||
| 143 | COLOR = 2, | ||
| 144 | TEXCOORD0 = 3, | ||
| 145 | TEXCOORD1 = 5, | ||
| 146 | TEXCOORD2 = 6, | ||
| 147 | }; | ||
| 148 | |||
| 149 | BitField< 0, 64, u64> hex; | ||
| 150 | |||
| 151 | BitField< 0, 16, Type> type; | ||
| 152 | BitField<16, 16, u64> id; | ||
| 153 | BitField<32, 4, u64> component_mask; | ||
| 154 | }; | ||
| 155 | |||
| 156 | // This is put into a try-catch block to make sure we notice unknown configurations. | ||
| 157 | std::vector<OutputRegisterInfo> output_info_table; | ||
| 158 | for (int i = 0; i < 7; ++i) { | ||
| 159 | using OutputAttributes = Pica::Regs::VSOutputAttributes; | ||
| 160 | |||
| 161 | // TODO: It's still unclear how the attribute components map to the register! | ||
| 162 | // Once we know that, this code probably will not make much sense anymore. | ||
| 163 | std::map<OutputAttributes::Semantic, std::pair<OutputRegisterInfo::Type, u32> > map = { | ||
| 164 | { OutputAttributes::POSITION_X, { OutputRegisterInfo::POSITION, 1} }, | ||
| 165 | { OutputAttributes::POSITION_Y, { OutputRegisterInfo::POSITION, 2} }, | ||
| 166 | { OutputAttributes::POSITION_Z, { OutputRegisterInfo::POSITION, 4} }, | ||
| 167 | { OutputAttributes::POSITION_W, { OutputRegisterInfo::POSITION, 8} }, | ||
| 168 | { OutputAttributes::COLOR_R, { OutputRegisterInfo::COLOR, 1} }, | ||
| 169 | { OutputAttributes::COLOR_G, { OutputRegisterInfo::COLOR, 2} }, | ||
| 170 | { OutputAttributes::COLOR_B, { OutputRegisterInfo::COLOR, 4} }, | ||
| 171 | { OutputAttributes::COLOR_A, { OutputRegisterInfo::COLOR, 8} }, | ||
| 172 | { OutputAttributes::TEXCOORD0_U, { OutputRegisterInfo::TEXCOORD0, 1} }, | ||
| 173 | { OutputAttributes::TEXCOORD0_V, { OutputRegisterInfo::TEXCOORD0, 2} }, | ||
| 174 | { OutputAttributes::TEXCOORD1_U, { OutputRegisterInfo::TEXCOORD1, 1} }, | ||
| 175 | { OutputAttributes::TEXCOORD1_V, { OutputRegisterInfo::TEXCOORD1, 2} }, | ||
| 176 | { OutputAttributes::TEXCOORD2_U, { OutputRegisterInfo::TEXCOORD2, 1} }, | ||
| 177 | { OutputAttributes::TEXCOORD2_V, { OutputRegisterInfo::TEXCOORD2, 2} } | ||
| 178 | }; | ||
| 179 | |||
| 180 | for (const auto& semantic : std::vector<OutputAttributes::Semantic>{ | ||
| 181 | output_attributes[i].map_x, | ||
| 182 | output_attributes[i].map_y, | ||
| 183 | output_attributes[i].map_z, | ||
| 184 | output_attributes[i].map_w }) { | ||
| 185 | if (semantic == OutputAttributes::INVALID) | ||
| 186 | continue; | ||
| 187 | |||
| 188 | try { | ||
| 189 | OutputRegisterInfo::Type type = map.at(semantic).first; | ||
| 190 | u32 component_mask = map.at(semantic).second; | ||
| 191 | |||
| 192 | auto it = std::find_if(output_info_table.begin(), output_info_table.end(), | ||
| 193 | [&i, &type](const OutputRegisterInfo& info) { | ||
| 194 | return info.id == i && info.type == type; | ||
| 195 | } | ||
| 196 | ); | ||
| 197 | |||
| 198 | if (it == output_info_table.end()) { | ||
| 199 | output_info_table.push_back({}); | ||
| 200 | output_info_table.back().type = type; | ||
| 201 | output_info_table.back().component_mask = component_mask; | ||
| 202 | output_info_table.back().id = i; | ||
| 203 | } else { | ||
| 204 | it->component_mask = it->component_mask | component_mask; | ||
| 205 | } | ||
| 206 | } catch (const std::out_of_range& oor) { | ||
| 207 | _dbg_assert_msg_(GPU, 0, "Unknown output attribute mapping"); | ||
| 208 | ERROR_LOG(GPU, "Unknown output attribute mapping: %03x, %03x, %03x, %03x", | ||
| 209 | (int)output_attributes[i].map_x.Value(), | ||
| 210 | (int)output_attributes[i].map_y.Value(), | ||
| 211 | (int)output_attributes[i].map_z.Value(), | ||
| 212 | (int)output_attributes[i].map_w.Value()); | ||
| 213 | } | ||
| 214 | } | ||
| 215 | } | ||
| 216 | |||
| 217 | |||
| 218 | struct { | ||
| 219 | DVLBHeader header; | ||
| 220 | u32 dvle_offset; | ||
| 221 | } dvlb{ {DVLBHeader::MAGIC_WORD, 1 } }; // 1 DVLE | ||
| 222 | |||
| 223 | DVLPHeader dvlp{ DVLPHeader::MAGIC_WORD }; | ||
| 224 | DVLEHeader dvle{ DVLEHeader::MAGIC_WORD }; | ||
| 225 | |||
| 226 | QueueForWriting((u8*)&dvlb, sizeof(dvlb)); | ||
| 227 | u32 dvlp_offset = QueueForWriting((u8*)&dvlp, sizeof(dvlp)); | ||
| 228 | dvlb.dvle_offset = QueueForWriting((u8*)&dvle, sizeof(dvle)); | ||
| 229 | |||
| 230 | // TODO: Reduce the amount of binary code written to relevant portions | ||
| 231 | dvlp.binary_offset = write_offset - dvlp_offset; | ||
| 232 | dvlp.binary_size_words = binary_size; | ||
| 233 | QueueForWriting((u8*)binary_data, binary_size * sizeof(u32)); | ||
| 234 | |||
| 235 | dvlp.swizzle_patterns_offset = write_offset - dvlp_offset; | ||
| 236 | dvlp.swizzle_patterns_num_entries = swizzle_size; | ||
| 237 | u32 dummy = 0; | ||
| 238 | for (int i = 0; i < swizzle_size; ++i) { | ||
| 239 | QueueForWriting((u8*)&swizzle_data[i], sizeof(swizzle_data[i])); | ||
| 240 | QueueForWriting((u8*)&dummy, sizeof(dummy)); | ||
| 241 | } | ||
| 242 | |||
| 243 | dvle.main_offset_words = main_offset; | ||
| 244 | dvle.output_register_table_offset = write_offset - dvlb.dvle_offset; | ||
| 245 | dvle.output_register_table_size = output_info_table.size(); | ||
| 246 | QueueForWriting((u8*)output_info_table.data(), output_info_table.size() * sizeof(OutputRegisterInfo)); | ||
| 247 | |||
| 248 | // TODO: Create a label table for "main" | ||
| 249 | |||
| 250 | |||
| 251 | // Write data to file | ||
| 252 | static int dump_index = 0; | ||
| 253 | std::string filename = std::string("shader_dump") + std::to_string(++dump_index) + std::string(".shbin"); | ||
| 254 | std::ofstream file(filename, std::ios_base::out | std::ios_base::binary); | ||
| 255 | |||
| 256 | for (auto& chunk : writing_queue) { | ||
| 257 | file.write((char*)chunk.pointer, chunk.size); | ||
| 258 | } | ||
| 259 | } | ||
| 260 | |||
| 261 | static std::unique_ptr<PicaTrace> pica_trace; | ||
| 262 | static std::mutex pica_trace_mutex; | ||
| 263 | static int is_pica_tracing = false; | ||
| 264 | |||
| 265 | void StartPicaTracing() | ||
| 266 | { | ||
| 267 | if (is_pica_tracing) { | ||
| 268 | ERROR_LOG(GPU, "StartPicaTracing called even though tracing already running!"); | ||
| 269 | return; | ||
| 270 | } | ||
| 271 | |||
| 272 | pica_trace_mutex.lock(); | ||
| 273 | pica_trace = std::unique_ptr<PicaTrace>(new PicaTrace); | ||
| 274 | |||
| 275 | is_pica_tracing = true; | ||
| 276 | pica_trace_mutex.unlock(); | ||
| 277 | } | ||
| 278 | |||
| 279 | bool IsPicaTracing() | ||
| 280 | { | ||
| 281 | return is_pica_tracing; | ||
| 282 | } | ||
| 283 | |||
| 284 | void OnPicaRegWrite(u32 id, u32 value) | ||
| 285 | { | ||
| 286 | // Double check for is_pica_tracing to avoid pointless locking overhead | ||
| 287 | if (!is_pica_tracing) | ||
| 288 | return; | ||
| 289 | |||
| 290 | std::unique_lock<std::mutex> lock(pica_trace_mutex); | ||
| 291 | |||
| 292 | if (!is_pica_tracing) | ||
| 293 | return; | ||
| 294 | |||
| 295 | pica_trace->writes.push_back({id, value}); | ||
| 296 | } | ||
| 297 | |||
| 298 | std::unique_ptr<PicaTrace> FinishPicaTracing() | ||
| 299 | { | ||
| 300 | if (!is_pica_tracing) { | ||
| 301 | ERROR_LOG(GPU, "FinishPicaTracing called even though tracing already running!"); | ||
| 302 | return {}; | ||
| 303 | } | ||
| 304 | |||
| 305 | // signalize that no further tracing should be performed | ||
| 306 | is_pica_tracing = false; | ||
| 307 | |||
| 308 | // Wait until running tracing is finished | ||
| 309 | pica_trace_mutex.lock(); | ||
| 310 | std::unique_ptr<PicaTrace> ret(std::move(pica_trace)); | ||
| 311 | pica_trace_mutex.unlock(); | ||
| 312 | return std::move(ret); | ||
| 313 | } | ||
| 314 | |||
| 315 | void DumpTexture(const Pica::Regs::TextureConfig& texture_config, u8* data) { | ||
| 316 | // NOTE: Permanently enabling this just trashes hard disks for no reason. | ||
| 317 | // Hence, this is currently disabled. | ||
| 318 | return; | ||
| 319 | |||
| 320 | #ifndef HAVE_PNG | ||
| 321 | return; | ||
| 322 | #else | ||
| 323 | if (!data) | ||
| 324 | return; | ||
| 325 | |||
| 326 | // Write data to file | ||
| 327 | static int dump_index = 0; | ||
| 328 | std::string filename = std::string("texture_dump") + std::to_string(++dump_index) + std::string(".png"); | ||
| 329 | u32 row_stride = texture_config.width * 3; | ||
| 330 | |||
| 331 | u8* buf; | ||
| 332 | |||
| 333 | char title[] = "Citra texture dump"; | ||
| 334 | char title_key[] = "Title"; | ||
| 335 | png_structp png_ptr = nullptr; | ||
| 336 | png_infop info_ptr = nullptr; | ||
| 337 | |||
| 338 | // Open file for writing (binary mode) | ||
| 339 | File::IOFile fp(filename, "wb"); | ||
| 340 | |||
| 341 | // Initialize write structure | ||
| 342 | png_ptr = png_create_write_struct(PNG_LIBPNG_VER_STRING, nullptr, nullptr, nullptr); | ||
| 343 | if (png_ptr == nullptr) { | ||
| 344 | ERROR_LOG(GPU, "Could not allocate write struct\n"); | ||
| 345 | goto finalise; | ||
| 346 | |||
| 347 | } | ||
| 348 | |||
| 349 | // Initialize info structure | ||
| 350 | info_ptr = png_create_info_struct(png_ptr); | ||
| 351 | if (info_ptr == nullptr) { | ||
| 352 | ERROR_LOG(GPU, "Could not allocate info struct\n"); | ||
| 353 | goto finalise; | ||
| 354 | } | ||
| 355 | |||
| 356 | // Setup Exception handling | ||
| 357 | if (setjmp(png_jmpbuf(png_ptr))) { | ||
| 358 | ERROR_LOG(GPU, "Error during png creation\n"); | ||
| 359 | goto finalise; | ||
| 360 | } | ||
| 361 | |||
| 362 | png_init_io(png_ptr, fp.GetHandle()); | ||
| 363 | |||
| 364 | // Write header (8 bit colour depth) | ||
| 365 | png_set_IHDR(png_ptr, info_ptr, texture_config.width, texture_config.height, | ||
| 366 | 8, PNG_COLOR_TYPE_RGB /*_ALPHA*/, PNG_INTERLACE_NONE, | ||
| 367 | PNG_COMPRESSION_TYPE_BASE, PNG_FILTER_TYPE_BASE); | ||
| 368 | |||
| 369 | png_text title_text; | ||
| 370 | title_text.compression = PNG_TEXT_COMPRESSION_NONE; | ||
| 371 | title_text.key = title_key; | ||
| 372 | title_text.text = title; | ||
| 373 | png_set_text(png_ptr, info_ptr, &title_text, 1); | ||
| 374 | |||
| 375 | png_write_info(png_ptr, info_ptr); | ||
| 376 | |||
| 377 | buf = new u8[row_stride * texture_config.height]; | ||
| 378 | for (int y = 0; y < texture_config.height; ++y) { | ||
| 379 | for (int x = 0; x < texture_config.width; ++x) { | ||
| 380 | // Cf. rasterizer code for an explanation of this algorithm. | ||
| 381 | int texel_index_within_tile = 0; | ||
| 382 | for (int block_size_index = 0; block_size_index < 3; ++block_size_index) { | ||
| 383 | int sub_tile_width = 1 << block_size_index; | ||
| 384 | int sub_tile_height = 1 << block_size_index; | ||
| 385 | |||
| 386 | int sub_tile_index = (x & sub_tile_width) << block_size_index; | ||
| 387 | sub_tile_index += 2 * ((y & sub_tile_height) << block_size_index); | ||
| 388 | texel_index_within_tile += sub_tile_index; | ||
| 389 | } | ||
| 390 | |||
| 391 | const int block_width = 8; | ||
| 392 | const int block_height = 8; | ||
| 393 | |||
| 394 | int coarse_x = (x / block_width) * block_width; | ||
| 395 | int coarse_y = (y / block_height) * block_height; | ||
| 396 | |||
| 397 | u8* source_ptr = (u8*)data + coarse_x * block_height * 3 + coarse_y * row_stride + texel_index_within_tile * 3; | ||
| 398 | buf[3 * x + y * row_stride ] = source_ptr[2]; | ||
| 399 | buf[3 * x + y * row_stride + 1] = source_ptr[1]; | ||
| 400 | buf[3 * x + y * row_stride + 2] = source_ptr[0]; | ||
| 401 | } | ||
| 402 | } | ||
| 403 | |||
| 404 | // Write image data | ||
| 405 | for (auto y = 0; y < texture_config.height; ++y) | ||
| 406 | { | ||
| 407 | u8* row_ptr = (u8*)buf + y * row_stride; | ||
| 408 | u8* ptr = row_ptr; | ||
| 409 | png_write_row(png_ptr, row_ptr); | ||
| 410 | } | ||
| 411 | |||
| 412 | delete[] buf; | ||
| 413 | |||
| 414 | // End write | ||
| 415 | png_write_end(png_ptr, nullptr); | ||
| 416 | |||
| 417 | finalise: | ||
| 418 | if (info_ptr != nullptr) png_free_data(png_ptr, info_ptr, PNG_FREE_ALL, -1); | ||
| 419 | if (png_ptr != nullptr) png_destroy_write_struct(&png_ptr, (png_infopp)nullptr); | ||
| 420 | #endif | ||
| 421 | } | ||
| 422 | |||
| 423 | void DumpTevStageConfig(const std::array<Pica::Regs::TevStageConfig,6>& stages) | ||
| 424 | { | ||
| 425 | using Source = Pica::Regs::TevStageConfig::Source; | ||
| 426 | using ColorModifier = Pica::Regs::TevStageConfig::ColorModifier; | ||
| 427 | using AlphaModifier = Pica::Regs::TevStageConfig::AlphaModifier; | ||
| 428 | using Operation = Pica::Regs::TevStageConfig::Operation; | ||
| 429 | |||
| 430 | std::string stage_info = "Tev setup:\n"; | ||
| 431 | for (int index = 0; index < stages.size(); ++index) { | ||
| 432 | const auto& tev_stage = stages[index]; | ||
| 433 | |||
| 434 | const std::map<Source, std::string> source_map = { | ||
| 435 | { Source::PrimaryColor, "PrimaryColor" }, | ||
| 436 | { Source::Texture0, "Texture0" }, | ||
| 437 | { Source::Constant, "Constant" }, | ||
| 438 | { Source::Previous, "Previous" }, | ||
| 439 | }; | ||
| 440 | |||
| 441 | const std::map<ColorModifier, std::string> color_modifier_map = { | ||
| 442 | { ColorModifier::SourceColor, { "%source.rgb" } } | ||
| 443 | }; | ||
| 444 | const std::map<AlphaModifier, std::string> alpha_modifier_map = { | ||
| 445 | { AlphaModifier::SourceAlpha, "%source.a" } | ||
| 446 | }; | ||
| 447 | |||
| 448 | std::map<Operation, std::string> combiner_map = { | ||
| 449 | { Operation::Replace, "%source1" }, | ||
| 450 | { Operation::Modulate, "(%source1 * %source2) / 255" }, | ||
| 451 | }; | ||
| 452 | |||
| 453 | auto ReplacePattern = | ||
| 454 | [](const std::string& input, const std::string& pattern, const std::string& replacement) -> std::string { | ||
| 455 | size_t start = input.find(pattern); | ||
| 456 | if (start == std::string::npos) | ||
| 457 | return input; | ||
| 458 | |||
| 459 | std::string ret = input; | ||
| 460 | ret.replace(start, pattern.length(), replacement); | ||
| 461 | return ret; | ||
| 462 | }; | ||
| 463 | auto GetColorSourceStr = | ||
| 464 | [&source_map,&color_modifier_map,&ReplacePattern](const Source& src, const ColorModifier& modifier) { | ||
| 465 | auto src_it = source_map.find(src); | ||
| 466 | std::string src_str = "Unknown"; | ||
| 467 | if (src_it != source_map.end()) | ||
| 468 | src_str = src_it->second; | ||
| 469 | |||
| 470 | auto modifier_it = color_modifier_map.find(modifier); | ||
| 471 | std::string modifier_str = "%source.????"; | ||
| 472 | if (modifier_it != color_modifier_map.end()) | ||
| 473 | modifier_str = modifier_it->second; | ||
| 474 | |||
| 475 | return ReplacePattern(modifier_str, "%source", src_str); | ||
| 476 | }; | ||
| 477 | auto GetColorCombinerStr = | ||
| 478 | [&](const Regs::TevStageConfig& tev_stage) { | ||
| 479 | auto op_it = combiner_map.find(tev_stage.color_op); | ||
| 480 | std::string op_str = "Unknown op (%source1, %source2, %source3)"; | ||
| 481 | if (op_it != combiner_map.end()) | ||
| 482 | op_str = op_it->second; | ||
| 483 | |||
| 484 | op_str = ReplacePattern(op_str, "%source1", GetColorSourceStr(tev_stage.color_source1, tev_stage.color_modifier1)); | ||
| 485 | op_str = ReplacePattern(op_str, "%source2", GetColorSourceStr(tev_stage.color_source2, tev_stage.color_modifier2)); | ||
| 486 | return ReplacePattern(op_str, "%source3", GetColorSourceStr(tev_stage.color_source3, tev_stage.color_modifier3)); | ||
| 487 | }; | ||
| 488 | auto GetAlphaSourceStr = | ||
| 489 | [&source_map,&alpha_modifier_map,&ReplacePattern](const Source& src, const AlphaModifier& modifier) { | ||
| 490 | auto src_it = source_map.find(src); | ||
| 491 | std::string src_str = "Unknown"; | ||
| 492 | if (src_it != source_map.end()) | ||
| 493 | src_str = src_it->second; | ||
| 494 | |||
| 495 | auto modifier_it = alpha_modifier_map.find(modifier); | ||
| 496 | std::string modifier_str = "%source.????"; | ||
| 497 | if (modifier_it != alpha_modifier_map.end()) | ||
| 498 | modifier_str = modifier_it->second; | ||
| 499 | |||
| 500 | return ReplacePattern(modifier_str, "%source", src_str); | ||
| 501 | }; | ||
| 502 | auto GetAlphaCombinerStr = | ||
| 503 | [&](const Regs::TevStageConfig& tev_stage) { | ||
| 504 | auto op_it = combiner_map.find(tev_stage.alpha_op); | ||
| 505 | std::string op_str = "Unknown op (%source1, %source2, %source3)"; | ||
| 506 | if (op_it != combiner_map.end()) | ||
| 507 | op_str = op_it->second; | ||
| 508 | |||
| 509 | op_str = ReplacePattern(op_str, "%source1", GetAlphaSourceStr(tev_stage.alpha_source1, tev_stage.alpha_modifier1)); | ||
| 510 | op_str = ReplacePattern(op_str, "%source2", GetAlphaSourceStr(tev_stage.alpha_source2, tev_stage.alpha_modifier2)); | ||
| 511 | return ReplacePattern(op_str, "%source3", GetAlphaSourceStr(tev_stage.alpha_source3, tev_stage.alpha_modifier3)); | ||
| 512 | }; | ||
| 513 | |||
| 514 | stage_info += "Stage " + std::to_string(index) + ": " + GetColorCombinerStr(tev_stage) + " " + GetAlphaCombinerStr(tev_stage) + "\n"; | ||
| 515 | } | ||
| 516 | |||
| 517 | DEBUG_LOG(GPU, "%s", stage_info.c_str()); | ||
| 518 | } | ||
| 519 | |||
| 520 | } // namespace | ||
| 521 | |||
| 522 | } // namespace | ||
diff --git a/src/video_core/debug_utils/debug_utils.h b/src/video_core/debug_utils/debug_utils.h new file mode 100644 index 000000000..8b1499bf2 --- /dev/null +++ b/src/video_core/debug_utils/debug_utils.h | |||
| @@ -0,0 +1,66 @@ | |||
| 1 | // Copyright 2014 Citra Emulator Project | ||
| 2 | // Licensed under GPLv2 | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <array> | ||
| 8 | #include <memory> | ||
| 9 | #include <vector> | ||
| 10 | |||
| 11 | #include "video_core/pica.h" | ||
| 12 | |||
| 13 | namespace Pica { | ||
| 14 | |||
| 15 | namespace DebugUtils { | ||
| 16 | |||
| 17 | // Simple utility class for dumping geometry data to an OBJ file | ||
| 18 | class GeometryDumper { | ||
| 19 | public: | ||
| 20 | struct Vertex { | ||
| 21 | std::array<float,3> pos; | ||
| 22 | }; | ||
| 23 | |||
| 24 | void AddTriangle(Vertex& v0, Vertex& v1, Vertex& v2); | ||
| 25 | |||
| 26 | void Dump(); | ||
| 27 | |||
| 28 | private: | ||
| 29 | struct Face { | ||
| 30 | int index[3]; | ||
| 31 | }; | ||
| 32 | |||
| 33 | std::vector<Vertex> vertices; | ||
| 34 | std::vector<Face> faces; | ||
| 35 | }; | ||
| 36 | |||
| 37 | void DumpShader(const u32* binary_data, u32 binary_size, const u32* swizzle_data, u32 swizzle_size, | ||
| 38 | u32 main_offset, const Regs::VSOutputAttributes* output_attributes); | ||
| 39 | |||
| 40 | |||
| 41 | // Utility class to log Pica commands. | ||
| 42 | struct PicaTrace { | ||
| 43 | struct Write : public std::pair<u32,u32> { | ||
| 44 | Write(u32 id, u32 value) : std::pair<u32,u32>(id, value) {} | ||
| 45 | |||
| 46 | u32& Id() { return first; } | ||
| 47 | const u32& Id() const { return first; } | ||
| 48 | |||
| 49 | u32& Value() { return second; } | ||
| 50 | const u32& Value() const { return second; } | ||
| 51 | }; | ||
| 52 | std::vector<Write> writes; | ||
| 53 | }; | ||
| 54 | |||
| 55 | void StartPicaTracing(); | ||
| 56 | bool IsPicaTracing(); | ||
| 57 | void OnPicaRegWrite(u32 id, u32 value); | ||
| 58 | std::unique_ptr<PicaTrace> FinishPicaTracing(); | ||
| 59 | |||
| 60 | void DumpTexture(const Pica::Regs::TextureConfig& texture_config, u8* data); | ||
| 61 | |||
| 62 | void DumpTevStageConfig(const std::array<Pica::Regs::TevStageConfig,6>& stages); | ||
| 63 | |||
| 64 | } // namespace | ||
| 65 | |||
| 66 | } // namespace | ||
diff --git a/src/video_core/gpu_debugger.h b/src/video_core/gpu_debugger.h index 2ba873457..5a81fcfcb 100644 --- a/src/video_core/gpu_debugger.h +++ b/src/video_core/gpu_debugger.h | |||
| @@ -18,19 +18,6 @@ | |||
| 18 | class GraphicsDebugger | 18 | class GraphicsDebugger |
| 19 | { | 19 | { |
| 20 | public: | 20 | public: |
| 21 | // A few utility structs used to expose data | ||
| 22 | // A vector of commands represented by their raw byte sequence | ||
| 23 | struct PicaCommand : public std::vector<u32> | ||
| 24 | { | ||
| 25 | const Pica::CommandProcessor::CommandHeader& GetHeader() const | ||
| 26 | { | ||
| 27 | const u32& val = at(1); | ||
| 28 | return *(Pica::CommandProcessor::CommandHeader*)&val; | ||
| 29 | } | ||
| 30 | }; | ||
| 31 | |||
| 32 | typedef std::vector<PicaCommand> PicaCommandList; | ||
| 33 | |||
| 34 | // Base class for all objects which need to be notified about GPU events | 21 | // Base class for all objects which need to be notified about GPU events |
| 35 | class DebuggerObserver | 22 | class DebuggerObserver |
| 36 | { | 23 | { |
| @@ -55,16 +42,6 @@ public: | |||
| 55 | ERROR_LOG(GSP, "Received command: id=%x", (int)cmd.id.Value()); | 42 | ERROR_LOG(GSP, "Received command: id=%x", (int)cmd.id.Value()); |
| 56 | } | 43 | } |
| 57 | 44 | ||
| 58 | /** | ||
| 59 | * @param lst command list which triggered this call | ||
| 60 | * @param is_new true if the command list was called for the first time | ||
| 61 | * @todo figure out how to make sure called functions don't keep references around beyond their life time | ||
| 62 | */ | ||
| 63 | virtual void OnCommandListCalled(const PicaCommandList& lst, bool is_new) | ||
| 64 | { | ||
| 65 | ERROR_LOG(GSP, "Command list called: %d", (int)is_new); | ||
| 66 | } | ||
| 67 | |||
| 68 | protected: | 45 | protected: |
| 69 | const GraphicsDebugger* GetDebugger() const | 46 | const GraphicsDebugger* GetDebugger() const |
| 70 | { | 47 | { |
| @@ -93,49 +70,12 @@ public: | |||
| 93 | } ); | 70 | } ); |
| 94 | } | 71 | } |
| 95 | 72 | ||
| 96 | void CommandListCalled(u32 address, u32* command_list, u32 size_in_words) | ||
| 97 | { | ||
| 98 | if (observers.empty()) | ||
| 99 | return; | ||
| 100 | |||
| 101 | PicaCommandList cmdlist; | ||
| 102 | for (u32* parse_pointer = command_list; parse_pointer < command_list + size_in_words;) | ||
| 103 | { | ||
| 104 | const Pica::CommandProcessor::CommandHeader& header = *(Pica::CommandProcessor::CommandHeader*)(&parse_pointer[1]); | ||
| 105 | |||
| 106 | cmdlist.push_back(PicaCommand()); | ||
| 107 | auto& cmd = cmdlist.back(); | ||
| 108 | |||
| 109 | size_t size = 2 + header.extra_data_length; | ||
| 110 | size = (size + 1) / 2 * 2; // align to 8 bytes | ||
| 111 | cmd.reserve(size); | ||
| 112 | std::copy(parse_pointer, parse_pointer + size, std::back_inserter(cmd)); | ||
| 113 | |||
| 114 | parse_pointer += size; | ||
| 115 | } | ||
| 116 | |||
| 117 | auto obj = std::pair<u32,PicaCommandList>(address, cmdlist); | ||
| 118 | auto it = std::find(command_lists.begin(), command_lists.end(), obj); | ||
| 119 | bool is_new = (it == command_lists.end()); | ||
| 120 | if (is_new) | ||
| 121 | command_lists.push_back(obj); | ||
| 122 | |||
| 123 | ForEachObserver([&](DebuggerObserver* observer) { | ||
| 124 | observer->OnCommandListCalled(obj.second, is_new); | ||
| 125 | } ); | ||
| 126 | } | ||
| 127 | |||
| 128 | const GSP_GPU::Command& ReadGXCommandHistory(int index) const | 73 | const GSP_GPU::Command& ReadGXCommandHistory(int index) const |
| 129 | { | 74 | { |
| 130 | // TODO: Is this thread-safe? | 75 | // TODO: Is this thread-safe? |
| 131 | return gx_command_history[index]; | 76 | return gx_command_history[index]; |
| 132 | } | 77 | } |
| 133 | 78 | ||
| 134 | const std::vector<std::pair<u32,PicaCommandList>>& GetCommandLists() const | ||
| 135 | { | ||
| 136 | return command_lists; | ||
| 137 | } | ||
| 138 | |||
| 139 | void RegisterObserver(DebuggerObserver* observer) | 79 | void RegisterObserver(DebuggerObserver* observer) |
| 140 | { | 80 | { |
| 141 | // TODO: Check for duplicates | 81 | // TODO: Check for duplicates |
| @@ -158,7 +98,4 @@ private: | |||
| 158 | std::vector<DebuggerObserver*> observers; | 98 | std::vector<DebuggerObserver*> observers; |
| 159 | 99 | ||
| 160 | std::vector<GSP_GPU::Command> gx_command_history; | 100 | std::vector<GSP_GPU::Command> gx_command_history; |
| 161 | |||
| 162 | // vector of pairs of command lists and their storage address | ||
| 163 | std::vector<std::pair<u32,PicaCommandList>> command_lists; | ||
| 164 | }; | 101 | }; |
diff --git a/src/video_core/math.h b/src/video_core/math.h index 7030f2cfb..83ba81235 100644 --- a/src/video_core/math.h +++ b/src/video_core/math.h | |||
| @@ -39,13 +39,19 @@ template<typename T> class Vec2; | |||
| 39 | template<typename T> class Vec3; | 39 | template<typename T> class Vec3; |
| 40 | template<typename T> class Vec4; | 40 | template<typename T> class Vec4; |
| 41 | 41 | ||
| 42 | template<typename T> | ||
| 43 | static inline Vec2<T> MakeVec(const T& x, const T& y); | ||
| 44 | template<typename T> | ||
| 45 | static inline Vec3<T> MakeVec(const T& x, const T& y, const T& z); | ||
| 46 | template<typename T> | ||
| 47 | static inline Vec4<T> MakeVec(const T& x, const T& y, const T& z, const T& w); | ||
| 48 | |||
| 42 | 49 | ||
| 43 | template<typename T> | 50 | template<typename T> |
| 44 | class Vec2 { | 51 | class Vec2 { |
| 45 | public: | 52 | public: |
| 46 | struct { | 53 | T x; |
| 47 | T x,y; | 54 | T y; |
| 48 | }; | ||
| 49 | 55 | ||
| 50 | T* AsArray() { return &x; } | 56 | T* AsArray() { return &x; } |
| 51 | 57 | ||
| @@ -68,34 +74,34 @@ public: | |||
| 68 | a[0] = x; a[1] = y; | 74 | a[0] = x; a[1] = y; |
| 69 | } | 75 | } |
| 70 | 76 | ||
| 71 | Vec2 operator +(const Vec2& other) const | 77 | Vec2<decltype(T{}+T{})> operator +(const Vec2& other) const |
| 72 | { | 78 | { |
| 73 | return Vec2(x+other.x, y+other.y); | 79 | return MakeVec(x+other.x, y+other.y); |
| 74 | } | 80 | } |
| 75 | void operator += (const Vec2 &other) | 81 | void operator += (const Vec2 &other) |
| 76 | { | 82 | { |
| 77 | x+=other.x; y+=other.y; | 83 | x+=other.x; y+=other.y; |
| 78 | } | 84 | } |
| 79 | Vec2 operator -(const Vec2& other) const | 85 | Vec2<decltype(T{}-T{})> operator -(const Vec2& other) const |
| 80 | { | 86 | { |
| 81 | return Vec2(x-other.x, y-other.y); | 87 | return MakeVec(x-other.x, y-other.y); |
| 82 | } | 88 | } |
| 83 | void operator -= (const Vec2& other) | 89 | void operator -= (const Vec2& other) |
| 84 | { | 90 | { |
| 85 | x-=other.x; y-=other.y; | 91 | x-=other.x; y-=other.y; |
| 86 | } | 92 | } |
| 87 | Vec2 operator -() const | 93 | Vec2<decltype(-T{})> operator -() const |
| 88 | { | 94 | { |
| 89 | return Vec2(-x,-y); | 95 | return MakeVec(-x,-y); |
| 90 | } | 96 | } |
| 91 | Vec2 operator * (const Vec2& other) const | 97 | Vec2<decltype(T{}*T{})> operator * (const Vec2& other) const |
| 92 | { | 98 | { |
| 93 | return Vec2(x*other.x, y*other.y); | 99 | return MakeVec(x*other.x, y*other.y); |
| 94 | } | 100 | } |
| 95 | template<typename V> | 101 | template<typename V> |
| 96 | Vec2 operator * (const V& f) const | 102 | Vec2<decltype(T{}*V{})> operator * (const V& f) const |
| 97 | { | 103 | { |
| 98 | return Vec2(x*f,y*f); | 104 | return MakeVec(x*f,y*f); |
| 99 | } | 105 | } |
| 100 | template<typename V> | 106 | template<typename V> |
| 101 | void operator *= (const V& f) | 107 | void operator *= (const V& f) |
| @@ -103,9 +109,9 @@ public: | |||
| 103 | x*=f; y*=f; | 109 | x*=f; y*=f; |
| 104 | } | 110 | } |
| 105 | template<typename V> | 111 | template<typename V> |
| 106 | Vec2 operator / (const V& f) const | 112 | Vec2<decltype(T{}/V{})> operator / (const V& f) const |
| 107 | { | 113 | { |
| 108 | return Vec2(x/f,y/f); | 114 | return MakeVec(x/f,y/f); |
| 109 | } | 115 | } |
| 110 | template<typename V> | 116 | template<typename V> |
| 111 | void operator /= (const V& f) | 117 | void operator /= (const V& f) |
| @@ -152,20 +158,9 @@ public: | |||
| 152 | const T& t() const { return y; } | 158 | const T& t() const { return y; } |
| 153 | 159 | ||
| 154 | // swizzlers - create a subvector of specific components | 160 | // swizzlers - create a subvector of specific components |
| 155 | Vec2 yx() const { return Vec2(y, x); } | 161 | const Vec2 yx() const { return Vec2(y, x); } |
| 156 | Vec2 vu() const { return Vec2(y, x); } | 162 | const Vec2 vu() const { return Vec2(y, x); } |
| 157 | Vec2 ts() const { return Vec2(y, x); } | 163 | const Vec2 ts() const { return Vec2(y, x); } |
| 158 | |||
| 159 | // Inserters to add new elements to effectively create larger vectors containing this Vec2 | ||
| 160 | Vec3<T> InsertBeforeX(const T& value) { | ||
| 161 | return Vec3<T>(value, x, y); | ||
| 162 | } | ||
| 163 | Vec3<T> InsertBeforeY(const T& value) { | ||
| 164 | return Vec3<T>(x, value, y); | ||
| 165 | } | ||
| 166 | Vec3<T> Append(const T& value) { | ||
| 167 | return Vec3<T>(x, y, value); | ||
| 168 | } | ||
| 169 | }; | 164 | }; |
| 170 | 165 | ||
| 171 | template<typename T, typename V> | 166 | template<typename T, typename V> |
| @@ -180,10 +175,9 @@ template<typename T> | |||
| 180 | class Vec3 | 175 | class Vec3 |
| 181 | { | 176 | { |
| 182 | public: | 177 | public: |
| 183 | struct | 178 | T x; |
| 184 | { | 179 | T y; |
| 185 | T x,y,z; | 180 | T z; |
| 186 | }; | ||
| 187 | 181 | ||
| 188 | T* AsArray() { return &x; } | 182 | T* AsArray() { return &x; } |
| 189 | 183 | ||
| @@ -193,7 +187,7 @@ public: | |||
| 193 | 187 | ||
| 194 | template<typename T2> | 188 | template<typename T2> |
| 195 | Vec3<T2> Cast() const { | 189 | Vec3<T2> Cast() const { |
| 196 | return Vec3<T2>((T2)x, (T2)y, (T2)z); | 190 | return MakeVec<T2>((T2)x, (T2)y, (T2)z); |
| 197 | } | 191 | } |
| 198 | 192 | ||
| 199 | // Only implemented for T=int and T=float | 193 | // Only implemented for T=int and T=float |
| @@ -202,7 +196,7 @@ public: | |||
| 202 | 196 | ||
| 203 | static Vec3 AssignToAll(const T& f) | 197 | static Vec3 AssignToAll(const T& f) |
| 204 | { | 198 | { |
| 205 | return Vec3<T>(f, f, f); | 199 | return MakeVec(f, f, f); |
| 206 | } | 200 | } |
| 207 | 201 | ||
| 208 | void Write(T a[3]) | 202 | void Write(T a[3]) |
| @@ -210,34 +204,34 @@ public: | |||
| 210 | a[0] = x; a[1] = y; a[2] = z; | 204 | a[0] = x; a[1] = y; a[2] = z; |
| 211 | } | 205 | } |
| 212 | 206 | ||
| 213 | Vec3 operator +(const Vec3 &other) const | 207 | Vec3<decltype(T{}+T{})> operator +(const Vec3 &other) const |
| 214 | { | 208 | { |
| 215 | return Vec3(x+other.x, y+other.y, z+other.z); | 209 | return MakeVec(x+other.x, y+other.y, z+other.z); |
| 216 | } | 210 | } |
| 217 | void operator += (const Vec3 &other) | 211 | void operator += (const Vec3 &other) |
| 218 | { | 212 | { |
| 219 | x+=other.x; y+=other.y; z+=other.z; | 213 | x+=other.x; y+=other.y; z+=other.z; |
| 220 | } | 214 | } |
| 221 | Vec3 operator -(const Vec3 &other) const | 215 | Vec3<decltype(T{}-T{})> operator -(const Vec3 &other) const |
| 222 | { | 216 | { |
| 223 | return Vec3(x-other.x, y-other.y, z-other.z); | 217 | return MakeVec(x-other.x, y-other.y, z-other.z); |
| 224 | } | 218 | } |
| 225 | void operator -= (const Vec3 &other) | 219 | void operator -= (const Vec3 &other) |
| 226 | { | 220 | { |
| 227 | x-=other.x; y-=other.y; z-=other.z; | 221 | x-=other.x; y-=other.y; z-=other.z; |
| 228 | } | 222 | } |
| 229 | Vec3 operator -() const | 223 | Vec3<decltype(-T{})> operator -() const |
| 230 | { | 224 | { |
| 231 | return Vec3(-x,-y,-z); | 225 | return MakeVec(-x,-y,-z); |
| 232 | } | 226 | } |
| 233 | Vec3 operator * (const Vec3 &other) const | 227 | Vec3<decltype(T{}*T{})> operator * (const Vec3 &other) const |
| 234 | { | 228 | { |
| 235 | return Vec3(x*other.x, y*other.y, z*other.z); | 229 | return MakeVec(x*other.x, y*other.y, z*other.z); |
| 236 | } | 230 | } |
| 237 | template<typename V> | 231 | template<typename V> |
| 238 | Vec3 operator * (const V& f) const | 232 | Vec3<decltype(T{}*V{})> operator * (const V& f) const |
| 239 | { | 233 | { |
| 240 | return Vec3(x*f,y*f,z*f); | 234 | return MakeVec(x*f,y*f,z*f); |
| 241 | } | 235 | } |
| 242 | template<typename V> | 236 | template<typename V> |
| 243 | void operator *= (const V& f) | 237 | void operator *= (const V& f) |
| @@ -245,9 +239,9 @@ public: | |||
| 245 | x*=f; y*=f; z*=f; | 239 | x*=f; y*=f; z*=f; |
| 246 | } | 240 | } |
| 247 | template<typename V> | 241 | template<typename V> |
| 248 | Vec3 operator / (const V& f) const | 242 | Vec3<decltype(T{}/V{})> operator / (const V& f) const |
| 249 | { | 243 | { |
| 250 | return Vec3(x/f,y/f,z/f); | 244 | return MakeVec(x/f,y/f,z/f); |
| 251 | } | 245 | } |
| 252 | template<typename V> | 246 | template<typename V> |
| 253 | void operator /= (const V& f) | 247 | void operator /= (const V& f) |
| @@ -310,7 +304,7 @@ public: | |||
| 310 | // swizzlers - create a subvector of specific components | 304 | // swizzlers - create a subvector of specific components |
| 311 | // e.g. Vec2 uv() { return Vec2(x,y); } | 305 | // e.g. Vec2 uv() { return Vec2(x,y); } |
| 312 | // _DEFINE_SWIZZLER2 defines a single such function, DEFINE_SWIZZLER2 defines all of them for all component names (x<->r) and permutations (xy<->yx) | 306 | // _DEFINE_SWIZZLER2 defines a single such function, DEFINE_SWIZZLER2 defines all of them for all component names (x<->r) and permutations (xy<->yx) |
| 313 | #define _DEFINE_SWIZZLER2(a, b, name) Vec2<T> name() const { return Vec2<T>(a, b); } | 307 | #define _DEFINE_SWIZZLER2(a, b, name) const Vec2<T> name() const { return Vec2<T>(a, b); } |
| 314 | #define DEFINE_SWIZZLER2(a, b, a2, b2, a3, b3, a4, b4) \ | 308 | #define DEFINE_SWIZZLER2(a, b, a2, b2, a3, b3, a4, b4) \ |
| 315 | _DEFINE_SWIZZLER2(a, b, a##b); \ | 309 | _DEFINE_SWIZZLER2(a, b, a##b); \ |
| 316 | _DEFINE_SWIZZLER2(a, b, a2##b2); \ | 310 | _DEFINE_SWIZZLER2(a, b, a2##b2); \ |
| @@ -319,27 +313,13 @@ public: | |||
| 319 | _DEFINE_SWIZZLER2(b, a, b##a); \ | 313 | _DEFINE_SWIZZLER2(b, a, b##a); \ |
| 320 | _DEFINE_SWIZZLER2(b, a, b2##a2); \ | 314 | _DEFINE_SWIZZLER2(b, a, b2##a2); \ |
| 321 | _DEFINE_SWIZZLER2(b, a, b3##a3); \ | 315 | _DEFINE_SWIZZLER2(b, a, b3##a3); \ |
| 322 | _DEFINE_SWIZZLER2(b, a, b4##a4); | 316 | _DEFINE_SWIZZLER2(b, a, b4##a4) |
| 323 | 317 | ||
| 324 | DEFINE_SWIZZLER2(x, y, r, g, u, v, s, t); | 318 | DEFINE_SWIZZLER2(x, y, r, g, u, v, s, t); |
| 325 | DEFINE_SWIZZLER2(x, z, r, b, u, w, s, q); | 319 | DEFINE_SWIZZLER2(x, z, r, b, u, w, s, q); |
| 326 | DEFINE_SWIZZLER2(y, z, g, b, v, w, t, q); | 320 | DEFINE_SWIZZLER2(y, z, g, b, v, w, t, q); |
| 327 | #undef DEFINE_SWIZZLER2 | 321 | #undef DEFINE_SWIZZLER2 |
| 328 | #undef _DEFINE_SWIZZLER2 | 322 | #undef _DEFINE_SWIZZLER2 |
| 329 | |||
| 330 | // Inserters to add new elements to effectively create larger vectors containing this Vec2 | ||
| 331 | Vec4<T> InsertBeforeX(const T& value) { | ||
| 332 | return Vec4<T>(value, x, y, z); | ||
| 333 | } | ||
| 334 | Vec4<T> InsertBeforeY(const T& value) { | ||
| 335 | return Vec4<T>(x, value, y, z); | ||
| 336 | } | ||
| 337 | Vec4<T> InsertBeforeZ(const T& value) { | ||
| 338 | return Vec4<T>(x, y, value, z); | ||
| 339 | } | ||
| 340 | Vec4<T> Append(const T& value) { | ||
| 341 | return Vec4<T>(x, y, z, value); | ||
| 342 | } | ||
| 343 | }; | 323 | }; |
| 344 | 324 | ||
| 345 | template<typename T, typename V> | 325 | template<typename T, typename V> |
| @@ -348,16 +328,27 @@ Vec3<T> operator * (const V& f, const Vec3<T>& vec) | |||
| 348 | return Vec3<T>(f*vec.x,f*vec.y,f*vec.z); | 328 | return Vec3<T>(f*vec.x,f*vec.y,f*vec.z); |
| 349 | } | 329 | } |
| 350 | 330 | ||
| 331 | template<> | ||
| 332 | inline float Vec3<float>::Length() const { | ||
| 333 | return std::sqrt(x * x + y * y + z * z); | ||
| 334 | } | ||
| 335 | |||
| 336 | template<> | ||
| 337 | inline Vec3<float> Vec3<float>::Normalized() const { | ||
| 338 | return *this / Length(); | ||
| 339 | } | ||
| 340 | |||
| 341 | |||
| 351 | typedef Vec3<float> Vec3f; | 342 | typedef Vec3<float> Vec3f; |
| 352 | 343 | ||
| 353 | template<typename T> | 344 | template<typename T> |
| 354 | class Vec4 | 345 | class Vec4 |
| 355 | { | 346 | { |
| 356 | public: | 347 | public: |
| 357 | struct | 348 | T x; |
| 358 | { | 349 | T y; |
| 359 | T x,y,z,w; | 350 | T z; |
| 360 | }; | 351 | T w; |
| 361 | 352 | ||
| 362 | T* AsArray() { return &x; } | 353 | T* AsArray() { return &x; } |
| 363 | 354 | ||
| @@ -383,34 +374,34 @@ public: | |||
| 383 | a[0] = x; a[1] = y; a[2] = z; a[3] = w; | 374 | a[0] = x; a[1] = y; a[2] = z; a[3] = w; |
| 384 | } | 375 | } |
| 385 | 376 | ||
| 386 | Vec4 operator +(const Vec4& other) const | 377 | Vec4<decltype(T{}+T{})> operator +(const Vec4& other) const |
| 387 | { | 378 | { |
| 388 | return Vec4(x+other.x, y+other.y, z+other.z, w+other.w); | 379 | return MakeVec(x+other.x, y+other.y, z+other.z, w+other.w); |
| 389 | } | 380 | } |
| 390 | void operator += (const Vec4& other) | 381 | void operator += (const Vec4& other) |
| 391 | { | 382 | { |
| 392 | x+=other.x; y+=other.y; z+=other.z; w+=other.w; | 383 | x+=other.x; y+=other.y; z+=other.z; w+=other.w; |
| 393 | } | 384 | } |
| 394 | Vec4 operator -(const Vec4 &other) const | 385 | Vec4<decltype(T{}-T{})> operator -(const Vec4 &other) const |
| 395 | { | 386 | { |
| 396 | return Vec4(x-other.x, y-other.y, z-other.z, w-other.w); | 387 | return MakeVec(x-other.x, y-other.y, z-other.z, w-other.w); |
| 397 | } | 388 | } |
| 398 | void operator -= (const Vec4 &other) | 389 | void operator -= (const Vec4 &other) |
| 399 | { | 390 | { |
| 400 | x-=other.x; y-=other.y; z-=other.z; w-=other.w; | 391 | x-=other.x; y-=other.y; z-=other.z; w-=other.w; |
| 401 | } | 392 | } |
| 402 | Vec4 operator -() const | 393 | Vec4<decltype(-T{})> operator -() const |
| 403 | { | 394 | { |
| 404 | return Vec4(-x,-y,-z,-w); | 395 | return MakeVec(-x,-y,-z,-w); |
| 405 | } | 396 | } |
| 406 | Vec4 operator * (const Vec4 &other) const | 397 | Vec4<decltype(T{}*T{})> operator * (const Vec4 &other) const |
| 407 | { | 398 | { |
| 408 | return Vec4(x*other.x, y*other.y, z*other.z, w*other.w); | 399 | return MakeVec(x*other.x, y*other.y, z*other.z, w*other.w); |
| 409 | } | 400 | } |
| 410 | template<typename V> | 401 | template<typename V> |
| 411 | Vec4 operator * (const V& f) const | 402 | Vec4<decltype(T{}*V{})> operator * (const V& f) const |
| 412 | { | 403 | { |
| 413 | return Vec4(x*f,y*f,z*f,w*f); | 404 | return MakeVec(x*f,y*f,z*f,w*f); |
| 414 | } | 405 | } |
| 415 | template<typename V> | 406 | template<typename V> |
| 416 | void operator *= (const V& f) | 407 | void operator *= (const V& f) |
| @@ -418,9 +409,9 @@ public: | |||
| 418 | x*=f; y*=f; z*=f; w*=f; | 409 | x*=f; y*=f; z*=f; w*=f; |
| 419 | } | 410 | } |
| 420 | template<typename V> | 411 | template<typename V> |
| 421 | Vec4 operator / (const V& f) const | 412 | Vec4<decltype(T{}/V{})> operator / (const V& f) const |
| 422 | { | 413 | { |
| 423 | return Vec4(x/f,y/f,z/f,w/f); | 414 | return MakeVec(x/f,y/f,z/f,w/f); |
| 424 | } | 415 | } |
| 425 | template<typename V> | 416 | template<typename V> |
| 426 | void operator /= (const V& f) | 417 | void operator /= (const V& f) |
| @@ -469,12 +460,12 @@ public: | |||
| 469 | // swizzlers - create a subvector of specific components | 460 | // swizzlers - create a subvector of specific components |
| 470 | // e.g. Vec2 uv() { return Vec2(x,y); } | 461 | // e.g. Vec2 uv() { return Vec2(x,y); } |
| 471 | // _DEFINE_SWIZZLER2 defines a single such function, DEFINE_SWIZZLER2 defines all of them for all component names (x<->r) and permutations (xy<->yx) | 462 | // _DEFINE_SWIZZLER2 defines a single such function, DEFINE_SWIZZLER2 defines all of them for all component names (x<->r) and permutations (xy<->yx) |
| 472 | #define _DEFINE_SWIZZLER2(a, b, name) Vec2<T> name() const { return Vec2<T>(a, b); } | 463 | #define _DEFINE_SWIZZLER2(a, b, name) const Vec2<T> name() const { return Vec2<T>(a, b); } |
| 473 | #define DEFINE_SWIZZLER2(a, b, a2, b2) \ | 464 | #define DEFINE_SWIZZLER2(a, b, a2, b2) \ |
| 474 | _DEFINE_SWIZZLER2(a, b, a##b); \ | 465 | _DEFINE_SWIZZLER2(a, b, a##b); \ |
| 475 | _DEFINE_SWIZZLER2(a, b, a2##b2); \ | 466 | _DEFINE_SWIZZLER2(a, b, a2##b2); \ |
| 476 | _DEFINE_SWIZZLER2(b, a, b##a); \ | 467 | _DEFINE_SWIZZLER2(b, a, b##a); \ |
| 477 | _DEFINE_SWIZZLER2(b, a, b2##a2); | 468 | _DEFINE_SWIZZLER2(b, a, b2##a2) |
| 478 | 469 | ||
| 479 | DEFINE_SWIZZLER2(x, y, r, g); | 470 | DEFINE_SWIZZLER2(x, y, r, g); |
| 480 | DEFINE_SWIZZLER2(x, z, r, b); | 471 | DEFINE_SWIZZLER2(x, z, r, b); |
| @@ -485,7 +476,7 @@ public: | |||
| 485 | #undef DEFINE_SWIZZLER2 | 476 | #undef DEFINE_SWIZZLER2 |
| 486 | #undef _DEFINE_SWIZZLER2 | 477 | #undef _DEFINE_SWIZZLER2 |
| 487 | 478 | ||
| 488 | #define _DEFINE_SWIZZLER3(a, b, c, name) Vec3<T> name() const { return Vec3<T>(a, b, c); } | 479 | #define _DEFINE_SWIZZLER3(a, b, c, name) const Vec3<T> name() const { return Vec3<T>(a, b, c); } |
| 489 | #define DEFINE_SWIZZLER3(a, b, c, a2, b2, c2) \ | 480 | #define DEFINE_SWIZZLER3(a, b, c, a2, b2, c2) \ |
| 490 | _DEFINE_SWIZZLER3(a, b, c, a##b##c); \ | 481 | _DEFINE_SWIZZLER3(a, b, c, a##b##c); \ |
| 491 | _DEFINE_SWIZZLER3(a, c, b, a##c##b); \ | 482 | _DEFINE_SWIZZLER3(a, c, b, a##c##b); \ |
| @@ -498,7 +489,7 @@ public: | |||
| 498 | _DEFINE_SWIZZLER3(b, a, c, b2##a2##c2); \ | 489 | _DEFINE_SWIZZLER3(b, a, c, b2##a2##c2); \ |
| 499 | _DEFINE_SWIZZLER3(b, c, a, b2##c2##a2); \ | 490 | _DEFINE_SWIZZLER3(b, c, a, b2##c2##a2); \ |
| 500 | _DEFINE_SWIZZLER3(c, a, b, c2##a2##b2); \ | 491 | _DEFINE_SWIZZLER3(c, a, b, c2##a2##b2); \ |
| 501 | _DEFINE_SWIZZLER3(c, b, a, c2##b2##a2); | 492 | _DEFINE_SWIZZLER3(c, b, a, c2##b2##a2) |
| 502 | 493 | ||
| 503 | DEFINE_SWIZZLER3(x, y, z, r, g, b); | 494 | DEFINE_SWIZZLER3(x, y, z, r, g, b); |
| 504 | DEFINE_SWIZZLER3(x, y, w, r, g, a); | 495 | DEFINE_SWIZZLER3(x, y, w, r, g, a); |
| @@ -510,69 +501,121 @@ public: | |||
| 510 | 501 | ||
| 511 | 502 | ||
| 512 | template<typename T, typename V> | 503 | template<typename T, typename V> |
| 513 | Vec4<T> operator * (const V& f, const Vec4<T>& vec) | 504 | Vec4<decltype(V{}*T{})> operator * (const V& f, const Vec4<T>& vec) |
| 514 | { | 505 | { |
| 515 | return Vec4<T>(f*vec.x,f*vec.y,f*vec.z,f*vec.w); | 506 | return MakeVec(f*vec.x,f*vec.y,f*vec.z,f*vec.w); |
| 516 | } | 507 | } |
| 517 | 508 | ||
| 518 | typedef Vec4<float> Vec4f; | 509 | typedef Vec4<float> Vec4f; |
| 519 | 510 | ||
| 520 | 511 | ||
| 521 | template<typename T> | 512 | template<typename T> |
| 522 | static inline T Dot(const Vec2<T>& a, const Vec2<T>& b) | 513 | static inline decltype(T{}*T{}+T{}*T{}) Dot(const Vec2<T>& a, const Vec2<T>& b) |
| 523 | { | 514 | { |
| 524 | return a.x*b.x + a.y*b.y; | 515 | return a.x*b.x + a.y*b.y; |
| 525 | } | 516 | } |
| 526 | 517 | ||
| 527 | template<typename T> | 518 | template<typename T> |
| 528 | static inline T Dot(const Vec3<T>& a, const Vec3<T>& b) | 519 | static inline decltype(T{}*T{}+T{}*T{}) Dot(const Vec3<T>& a, const Vec3<T>& b) |
| 529 | { | 520 | { |
| 530 | return a.x*b.x + a.y*b.y + a.z*b.z; | 521 | return a.x*b.x + a.y*b.y + a.z*b.z; |
| 531 | } | 522 | } |
| 532 | 523 | ||
| 533 | template<typename T> | 524 | template<typename T> |
| 534 | static inline T Dot(const Vec4<T>& a, const Vec4<T>& b) | 525 | static inline decltype(T{}*T{}+T{}*T{}) Dot(const Vec4<T>& a, const Vec4<T>& b) |
| 535 | { | 526 | { |
| 536 | return a.x*b.x + a.y*b.y + a.z*b.z + a.w*b.w; | 527 | return a.x*b.x + a.y*b.y + a.z*b.z + a.w*b.w; |
| 537 | } | 528 | } |
| 538 | 529 | ||
| 539 | template<typename T> | 530 | template<typename T> |
| 540 | static inline Vec3<T> Cross(const Vec3<T>& a, const Vec3<T>& b) | 531 | static inline Vec3<decltype(T{}*T{}-T{}*T{})> Cross(const Vec3<T>& a, const Vec3<T>& b) |
| 541 | { | 532 | { |
| 542 | return Vec3<T>(a.y*b.z-a.z*b.y, a.z*b.x-a.x*b.z, a.x*b.y-a.y*b.x); | 533 | return MakeVec(a.y*b.z-a.z*b.y, a.z*b.x-a.x*b.z, a.x*b.y-a.y*b.x); |
| 543 | } | 534 | } |
| 544 | 535 | ||
| 545 | // linear interpolation via float: 0.0=begin, 1.0=end | 536 | // linear interpolation via float: 0.0=begin, 1.0=end |
| 546 | template<typename X> | 537 | template<typename X> |
| 547 | static inline X Lerp(const X& begin, const X& end, const float t) | 538 | static inline decltype(X{}*float{}+X{}*float{}) Lerp(const X& begin, const X& end, const float t) |
| 548 | { | 539 | { |
| 549 | return begin*(1.f-t) + end*t; | 540 | return begin*(1.f-t) + end*t; |
| 550 | } | 541 | } |
| 551 | 542 | ||
| 552 | // linear interpolation via int: 0=begin, base=end | 543 | // linear interpolation via int: 0=begin, base=end |
| 553 | template<typename X, int base> | 544 | template<typename X, int base> |
| 554 | static inline X LerpInt(const X& begin, const X& end, const int t) | 545 | static inline decltype((X{}*int{}+X{}*int{}) / base) LerpInt(const X& begin, const X& end, const int t) |
| 555 | { | 546 | { |
| 556 | return (begin*(base-t) + end*t) / base; | 547 | return (begin*(base-t) + end*t) / base; |
| 557 | } | 548 | } |
| 558 | 549 | ||
| 559 | // Utility vector factories | 550 | // Utility vector factories |
| 560 | template<typename T> | 551 | template<typename T> |
| 561 | static inline Vec2<T> MakeVec2(const T& x, const T& y) | 552 | static inline Vec2<T> MakeVec(const T& x, const T& y) |
| 562 | { | 553 | { |
| 563 | return Vec2<T>{x, y}; | 554 | return Vec2<T>{x, y}; |
| 564 | } | 555 | } |
| 565 | 556 | ||
| 566 | template<typename T> | 557 | template<typename T> |
| 567 | static inline Vec3<T> MakeVec3(const T& x, const T& y, const T& z) | 558 | static inline Vec3<T> MakeVec(const T& x, const T& y, const T& z) |
| 568 | { | 559 | { |
| 569 | return Vec3<T>{x, y, z}; | 560 | return Vec3<T>{x, y, z}; |
| 570 | } | 561 | } |
| 571 | 562 | ||
| 572 | template<typename T> | 563 | template<typename T> |
| 573 | static inline Vec4<T> MakeVec4(const T& x, const T& y, const T& z, const T& w) | 564 | static inline Vec4<T> MakeVec(const T& x, const T& y, const Vec2<T>& zw) |
| 565 | { | ||
| 566 | return MakeVec(x, y, zw[0], zw[1]); | ||
| 567 | } | ||
| 568 | |||
| 569 | template<typename T> | ||
| 570 | static inline Vec3<T> MakeVec(const Vec2<T>& xy, const T& z) | ||
| 571 | { | ||
| 572 | return MakeVec(xy[0], xy[1], z); | ||
| 573 | } | ||
| 574 | |||
| 575 | template<typename T> | ||
| 576 | static inline Vec3<T> MakeVec(const T& x, const Vec2<T>& yz) | ||
| 577 | { | ||
| 578 | return MakeVec(x, yz[0], yz[1]); | ||
| 579 | } | ||
| 580 | |||
| 581 | template<typename T> | ||
| 582 | static inline Vec4<T> MakeVec(const T& x, const T& y, const T& z, const T& w) | ||
| 574 | { | 583 | { |
| 575 | return Vec4<T>{x, y, z, w}; | 584 | return Vec4<T>{x, y, z, w}; |
| 576 | } | 585 | } |
| 577 | 586 | ||
| 587 | template<typename T> | ||
| 588 | static inline Vec4<T> MakeVec(const Vec2<T>& xy, const T& z, const T& w) | ||
| 589 | { | ||
| 590 | return MakeVec(xy[0], xy[1], z, w); | ||
| 591 | } | ||
| 592 | |||
| 593 | template<typename T> | ||
| 594 | static inline Vec4<T> MakeVec(const T& x, const Vec2<T>& yz, const T& w) | ||
| 595 | { | ||
| 596 | return MakeVec(x, yz[0], yz[1], w); | ||
| 597 | } | ||
| 598 | |||
| 599 | // NOTE: This has priority over "Vec2<Vec2<T>> MakeVec(const Vec2<T>& x, const Vec2<T>& y)". | ||
| 600 | // Even if someone wanted to use an odd object like Vec2<Vec2<T>>, the compiler would error | ||
| 601 | // out soon enough due to misuse of the returned structure. | ||
| 602 | template<typename T> | ||
| 603 | static inline Vec4<T> MakeVec(const Vec2<T>& xy, const Vec2<T>& zw) | ||
| 604 | { | ||
| 605 | return MakeVec(xy[0], xy[1], zw[0], zw[1]); | ||
| 606 | } | ||
| 607 | |||
| 608 | template<typename T> | ||
| 609 | static inline Vec4<T> MakeVec(const Vec3<T>& xyz, const T& w) | ||
| 610 | { | ||
| 611 | return MakeVec(xyz[0], xyz[1], xyz[2], w); | ||
| 612 | } | ||
| 613 | |||
| 614 | template<typename T> | ||
| 615 | static inline Vec4<T> MakeVec(const T& x, const Vec2<T>& yzw) | ||
| 616 | { | ||
| 617 | return MakeVec(x, yzw[0], yzw[1], yzw[2]); | ||
| 618 | } | ||
| 619 | |||
| 620 | |||
| 578 | } // namespace | 621 | } // namespace |
diff --git a/src/video_core/pica.h b/src/video_core/pica.h index 640830144..cfdc9b934 100644 --- a/src/video_core/pica.h +++ b/src/video_core/pica.h | |||
| @@ -4,6 +4,7 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <array> | ||
| 7 | #include <cstddef> | 8 | #include <cstddef> |
| 8 | #include <initializer_list> | 9 | #include <initializer_list> |
| 9 | #include <map> | 10 | #include <map> |
| @@ -57,7 +58,7 @@ struct Regs { | |||
| 57 | 58 | ||
| 58 | INSERT_PADDING_WORDS(0x1); | 59 | INSERT_PADDING_WORDS(0x1); |
| 59 | 60 | ||
| 60 | union { | 61 | union VSOutputAttributes { |
| 61 | // Maps components of output vertex attributes to semantics | 62 | // Maps components of output vertex attributes to semantics |
| 62 | enum Semantic : u32 | 63 | enum Semantic : u32 |
| 63 | { | 64 | { |
| @@ -94,7 +95,137 @@ struct Regs { | |||
| 94 | BitField<16, 16, u32> y; | 95 | BitField<16, 16, u32> y; |
| 95 | } viewport_corner; | 96 | } viewport_corner; |
| 96 | 97 | ||
| 97 | INSERT_PADDING_WORDS(0xa7); | 98 | INSERT_PADDING_WORDS(0x17); |
| 99 | |||
| 100 | struct TextureConfig { | ||
| 101 | INSERT_PADDING_WORDS(0x1); | ||
| 102 | |||
| 103 | union { | ||
| 104 | BitField< 0, 16, u32> height; | ||
| 105 | BitField<16, 16, u32> width; | ||
| 106 | }; | ||
| 107 | |||
| 108 | INSERT_PADDING_WORDS(0x2); | ||
| 109 | |||
| 110 | u32 address; | ||
| 111 | |||
| 112 | u32 GetPhysicalAddress() { | ||
| 113 | return DecodeAddressRegister(address) - Memory::FCRAM_PADDR + Memory::HEAP_GSP_VADDR; | ||
| 114 | } | ||
| 115 | |||
| 116 | // texture1 and texture2 store the texture format directly after the address | ||
| 117 | // whereas texture0 inserts some additional flags inbetween. | ||
| 118 | // Hence, we store the format separately so that all other parameters can be described | ||
| 119 | // in a single structure. | ||
| 120 | }; | ||
| 121 | |||
| 122 | enum class TextureFormat : u32 { | ||
| 123 | RGBA8 = 0, | ||
| 124 | RGB8 = 1, | ||
| 125 | RGBA5551 = 2, | ||
| 126 | RGB565 = 3, | ||
| 127 | RGBA4 = 4, | ||
| 128 | |||
| 129 | // TODO: Support for the other formats is not implemented, yet. | ||
| 130 | // Seems like they are luminance formats and compressed textures. | ||
| 131 | }; | ||
| 132 | |||
| 133 | BitField<0, 1, u32> texturing_enable; | ||
| 134 | TextureConfig texture0; | ||
| 135 | INSERT_PADDING_WORDS(0x8); | ||
| 136 | BitField<0, 4, TextureFormat> texture0_format; | ||
| 137 | |||
| 138 | INSERT_PADDING_WORDS(0x31); | ||
| 139 | |||
| 140 | // 0xc0-0xff: Texture Combiner (akin to glTexEnv) | ||
| 141 | struct TevStageConfig { | ||
| 142 | enum class Source : u32 { | ||
| 143 | PrimaryColor = 0x0, | ||
| 144 | Texture0 = 0x3, | ||
| 145 | Texture1 = 0x4, | ||
| 146 | Texture2 = 0x5, | ||
| 147 | Texture3 = 0x6, | ||
| 148 | // 0x7-0xc = primary color?? | ||
| 149 | Constant = 0xe, | ||
| 150 | Previous = 0xf, | ||
| 151 | }; | ||
| 152 | |||
| 153 | enum class ColorModifier : u32 { | ||
| 154 | SourceColor = 0, | ||
| 155 | OneMinusSourceColor = 1, | ||
| 156 | SourceAlpha = 2, | ||
| 157 | OneMinusSourceAlpha = 3, | ||
| 158 | |||
| 159 | // Other values seem to be non-standard extensions | ||
| 160 | }; | ||
| 161 | |||
| 162 | enum class AlphaModifier : u32 { | ||
| 163 | SourceAlpha = 0, | ||
| 164 | OneMinusSourceAlpha = 1, | ||
| 165 | |||
| 166 | // Other values seem to be non-standard extensions | ||
| 167 | }; | ||
| 168 | |||
| 169 | enum class Operation : u32 { | ||
| 170 | Replace = 0, | ||
| 171 | Modulate = 1, | ||
| 172 | Add = 2, | ||
| 173 | AddSigned = 3, | ||
| 174 | Lerp = 4, | ||
| 175 | Subtract = 5, | ||
| 176 | }; | ||
| 177 | |||
| 178 | union { | ||
| 179 | BitField< 0, 4, Source> color_source1; | ||
| 180 | BitField< 4, 4, Source> color_source2; | ||
| 181 | BitField< 8, 4, Source> color_source3; | ||
| 182 | BitField<16, 4, Source> alpha_source1; | ||
| 183 | BitField<20, 4, Source> alpha_source2; | ||
| 184 | BitField<24, 4, Source> alpha_source3; | ||
| 185 | }; | ||
| 186 | |||
| 187 | union { | ||
| 188 | BitField< 0, 4, ColorModifier> color_modifier1; | ||
| 189 | BitField< 4, 4, ColorModifier> color_modifier2; | ||
| 190 | BitField< 8, 4, ColorModifier> color_modifier3; | ||
| 191 | BitField<12, 3, AlphaModifier> alpha_modifier1; | ||
| 192 | BitField<16, 3, AlphaModifier> alpha_modifier2; | ||
| 193 | BitField<20, 3, AlphaModifier> alpha_modifier3; | ||
| 194 | }; | ||
| 195 | |||
| 196 | union { | ||
| 197 | BitField< 0, 4, Operation> color_op; | ||
| 198 | BitField<16, 4, Operation> alpha_op; | ||
| 199 | }; | ||
| 200 | |||
| 201 | union { | ||
| 202 | BitField< 0, 8, u32> const_r; | ||
| 203 | BitField< 8, 8, u32> const_g; | ||
| 204 | BitField<16, 8, u32> const_b; | ||
| 205 | BitField<24, 8, u32> const_a; | ||
| 206 | }; | ||
| 207 | |||
| 208 | INSERT_PADDING_WORDS(0x1); | ||
| 209 | }; | ||
| 210 | |||
| 211 | TevStageConfig tev_stage0; | ||
| 212 | INSERT_PADDING_WORDS(0x3); | ||
| 213 | TevStageConfig tev_stage1; | ||
| 214 | INSERT_PADDING_WORDS(0x3); | ||
| 215 | TevStageConfig tev_stage2; | ||
| 216 | INSERT_PADDING_WORDS(0x3); | ||
| 217 | TevStageConfig tev_stage3; | ||
| 218 | INSERT_PADDING_WORDS(0x13); | ||
| 219 | TevStageConfig tev_stage4; | ||
| 220 | INSERT_PADDING_WORDS(0x3); | ||
| 221 | TevStageConfig tev_stage5; | ||
| 222 | INSERT_PADDING_WORDS(0x13); | ||
| 223 | |||
| 224 | const std::array<Regs::TevStageConfig,6> GetTevStages() const { | ||
| 225 | return { tev_stage0, tev_stage1, | ||
| 226 | tev_stage2, tev_stage3, | ||
| 227 | tev_stage4, tev_stage5 }; | ||
| 228 | }; | ||
| 98 | 229 | ||
| 99 | struct { | 230 | struct { |
| 100 | enum ColorFormat : u32 { | 231 | enum ColorFormat : u32 { |
| @@ -403,6 +534,15 @@ struct Regs { | |||
| 403 | ADD_FIELD(viewport_depth_range); | 534 | ADD_FIELD(viewport_depth_range); |
| 404 | ADD_FIELD(viewport_depth_far_plane); | 535 | ADD_FIELD(viewport_depth_far_plane); |
| 405 | ADD_FIELD(viewport_corner); | 536 | ADD_FIELD(viewport_corner); |
| 537 | ADD_FIELD(texturing_enable); | ||
| 538 | ADD_FIELD(texture0); | ||
| 539 | ADD_FIELD(texture0_format); | ||
| 540 | ADD_FIELD(tev_stage0); | ||
| 541 | ADD_FIELD(tev_stage1); | ||
| 542 | ADD_FIELD(tev_stage2); | ||
| 543 | ADD_FIELD(tev_stage3); | ||
| 544 | ADD_FIELD(tev_stage4); | ||
| 545 | ADD_FIELD(tev_stage5); | ||
| 406 | ADD_FIELD(framebuffer); | 546 | ADD_FIELD(framebuffer); |
| 407 | ADD_FIELD(vertex_attributes); | 547 | ADD_FIELD(vertex_attributes); |
| 408 | ADD_FIELD(index_array); | 548 | ADD_FIELD(index_array); |
| @@ -460,6 +600,15 @@ ASSERT_REG_POSITION(viewport_depth_far_plane, 0x4e); | |||
| 460 | ASSERT_REG_POSITION(vs_output_attributes[0], 0x50); | 600 | ASSERT_REG_POSITION(vs_output_attributes[0], 0x50); |
| 461 | ASSERT_REG_POSITION(vs_output_attributes[1], 0x51); | 601 | ASSERT_REG_POSITION(vs_output_attributes[1], 0x51); |
| 462 | ASSERT_REG_POSITION(viewport_corner, 0x68); | 602 | ASSERT_REG_POSITION(viewport_corner, 0x68); |
| 603 | ASSERT_REG_POSITION(texturing_enable, 0x80); | ||
| 604 | ASSERT_REG_POSITION(texture0, 0x81); | ||
| 605 | ASSERT_REG_POSITION(texture0_format, 0x8e); | ||
| 606 | ASSERT_REG_POSITION(tev_stage0, 0xc0); | ||
| 607 | ASSERT_REG_POSITION(tev_stage1, 0xc8); | ||
| 608 | ASSERT_REG_POSITION(tev_stage2, 0xd0); | ||
| 609 | ASSERT_REG_POSITION(tev_stage3, 0xd8); | ||
| 610 | ASSERT_REG_POSITION(tev_stage4, 0xf0); | ||
| 611 | ASSERT_REG_POSITION(tev_stage5, 0xf8); | ||
| 463 | ASSERT_REG_POSITION(framebuffer, 0x110); | 612 | ASSERT_REG_POSITION(framebuffer, 0x110); |
| 464 | ASSERT_REG_POSITION(vertex_attributes, 0x200); | 613 | ASSERT_REG_POSITION(vertex_attributes, 0x200); |
| 465 | ASSERT_REG_POSITION(index_array, 0x227); | 614 | ASSERT_REG_POSITION(index_array, 0x227); |
diff --git a/src/video_core/primitive_assembly.cpp b/src/video_core/primitive_assembly.cpp index 2354ffb99..dabf2d1a3 100644 --- a/src/video_core/primitive_assembly.cpp +++ b/src/video_core/primitive_assembly.cpp | |||
| @@ -2,21 +2,23 @@ | |||
| 2 | // Licensed under GPLv2 | 2 | // Licensed under GPLv2 |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include "clipper.h" | ||
| 6 | #include "pica.h" | 5 | #include "pica.h" |
| 7 | #include "primitive_assembly.h" | 6 | #include "primitive_assembly.h" |
| 8 | #include "vertex_shader.h" | 7 | #include "vertex_shader.h" |
| 9 | 8 | ||
| 10 | namespace Pica { | 9 | #include "video_core/debug_utils/debug_utils.h" |
| 11 | 10 | ||
| 12 | namespace PrimitiveAssembly { | 11 | namespace Pica { |
| 13 | 12 | ||
| 14 | static OutputVertex buffer[2]; | 13 | template<typename VertexType> |
| 15 | static int buffer_index = 0; // TODO: reset this on emulation restart | 14 | PrimitiveAssembler<VertexType>::PrimitiveAssembler(Regs::TriangleTopology topology) |
| 15 | : topology(topology), buffer_index(0) { | ||
| 16 | } | ||
| 16 | 17 | ||
| 17 | void SubmitVertex(OutputVertex& vtx) | 18 | template<typename VertexType> |
| 19 | void PrimitiveAssembler<VertexType>::SubmitVertex(VertexType& vtx, TriangleHandler triangle_handler) | ||
| 18 | { | 20 | { |
| 19 | switch (registers.triangle_topology) { | 21 | switch (topology) { |
| 20 | case Regs::TriangleTopology::List: | 22 | case Regs::TriangleTopology::List: |
| 21 | case Regs::TriangleTopology::ListIndexed: | 23 | case Regs::TriangleTopology::ListIndexed: |
| 22 | if (buffer_index < 2) { | 24 | if (buffer_index < 2) { |
| @@ -24,7 +26,7 @@ void SubmitVertex(OutputVertex& vtx) | |||
| 24 | } else { | 26 | } else { |
| 25 | buffer_index = 0; | 27 | buffer_index = 0; |
| 26 | 28 | ||
| 27 | Clipper::ProcessTriangle(buffer[0], buffer[1], vtx); | 29 | triangle_handler(buffer[0], buffer[1], vtx); |
| 28 | } | 30 | } |
| 29 | break; | 31 | break; |
| 30 | 32 | ||
| @@ -32,7 +34,7 @@ void SubmitVertex(OutputVertex& vtx) | |||
| 32 | if (buffer_index == 2) { | 34 | if (buffer_index == 2) { |
| 33 | buffer_index = 0; | 35 | buffer_index = 0; |
| 34 | 36 | ||
| 35 | Clipper::ProcessTriangle(buffer[0], buffer[1], vtx); | 37 | triangle_handler(buffer[0], buffer[1], vtx); |
| 36 | 38 | ||
| 37 | buffer[1] = vtx; | 39 | buffer[1] = vtx; |
| 38 | } else { | 40 | } else { |
| @@ -41,11 +43,15 @@ void SubmitVertex(OutputVertex& vtx) | |||
| 41 | break; | 43 | break; |
| 42 | 44 | ||
| 43 | default: | 45 | default: |
| 44 | ERROR_LOG(GPU, "Unknown triangle mode %x:", (int)registers.triangle_topology.Value()); | 46 | ERROR_LOG(GPU, "Unknown triangle topology %x:", (int)topology); |
| 45 | break; | 47 | break; |
| 46 | } | 48 | } |
| 47 | } | 49 | } |
| 48 | 50 | ||
| 49 | } // namespace | 51 | // explicitly instantiate use cases |
| 52 | template | ||
| 53 | struct PrimitiveAssembler<VertexShader::OutputVertex>; | ||
| 54 | template | ||
| 55 | struct PrimitiveAssembler<DebugUtils::GeometryDumper::Vertex>; | ||
| 50 | 56 | ||
| 51 | } // namespace | 57 | } // namespace |
diff --git a/src/video_core/primitive_assembly.h b/src/video_core/primitive_assembly.h index 2a2b0c170..ea2e2f61e 100644 --- a/src/video_core/primitive_assembly.h +++ b/src/video_core/primitive_assembly.h | |||
| @@ -4,18 +4,40 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | namespace Pica { | 7 | #include <functional> |
| 8 | 8 | ||
| 9 | namespace VertexShader { | 9 | #include "video_core/pica.h" |
| 10 | struct OutputVertex; | ||
| 11 | } | ||
| 12 | 10 | ||
| 13 | namespace PrimitiveAssembly { | 11 | #include "video_core/vertex_shader.h" |
| 14 | 12 | ||
| 15 | using VertexShader::OutputVertex; | 13 | namespace Pica { |
| 16 | 14 | ||
| 17 | void SubmitVertex(OutputVertex& vtx); | 15 | /* |
| 16 | * Utility class to build triangles from a series of vertices, | ||
| 17 | * according to a given triangle topology. | ||
| 18 | */ | ||
| 19 | template<typename VertexType> | ||
| 20 | struct PrimitiveAssembler { | ||
| 21 | using TriangleHandler = std::function<void(VertexType& v0, | ||
| 22 | VertexType& v1, | ||
| 23 | VertexType& v2)>; | ||
| 24 | |||
| 25 | PrimitiveAssembler(Regs::TriangleTopology topology); | ||
| 26 | |||
| 27 | /* | ||
| 28 | * Queues a vertex, builds primitives from the vertex queue according to the given | ||
| 29 | * triangle topology, and calls triangle_handler for each generated primitive. | ||
| 30 | * NOTE: We could specify the triangle handler in the constructor, but this way we can | ||
| 31 | * keep event and handler code next to each other. | ||
| 32 | */ | ||
| 33 | void SubmitVertex(VertexType& vtx, TriangleHandler triangle_handler); | ||
| 34 | |||
| 35 | private: | ||
| 36 | Regs::TriangleTopology topology; | ||
| 37 | |||
| 38 | int buffer_index; | ||
| 39 | VertexType buffer[2]; | ||
| 40 | }; | ||
| 18 | 41 | ||
| 19 | } // namespace | ||
| 20 | 42 | ||
| 21 | } // namespace | 43 | } // namespace |
diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp index a7c1bab3e..cdfdb6215 100644 --- a/src/video_core/rasterizer.cpp +++ b/src/video_core/rasterizer.cpp | |||
| @@ -11,6 +11,8 @@ | |||
| 11 | #include "rasterizer.h" | 11 | #include "rasterizer.h" |
| 12 | #include "vertex_shader.h" | 12 | #include "vertex_shader.h" |
| 13 | 13 | ||
| 14 | #include "debug_utils/debug_utils.h" | ||
| 15 | |||
| 14 | namespace Pica { | 16 | namespace Pica { |
| 15 | 17 | ||
| 16 | namespace Rasterizer { | 18 | namespace Rasterizer { |
| @@ -78,10 +80,10 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, | |||
| 78 | u16 max_x = std::max({vtxpos[0].x, vtxpos[1].x, vtxpos[2].x}); | 80 | u16 max_x = std::max({vtxpos[0].x, vtxpos[1].x, vtxpos[2].x}); |
| 79 | u16 max_y = std::max({vtxpos[0].y, vtxpos[1].y, vtxpos[2].y}); | 81 | u16 max_y = std::max({vtxpos[0].y, vtxpos[1].y, vtxpos[2].y}); |
| 80 | 82 | ||
| 81 | min_x = min_x & Fix12P4::IntMask(); | 83 | min_x &= Fix12P4::IntMask(); |
| 82 | min_y = min_y & Fix12P4::IntMask(); | 84 | min_y &= Fix12P4::IntMask(); |
| 83 | max_x = (max_x + Fix12P4::FracMask()) & Fix12P4::IntMask(); | 85 | max_x = ((max_x + Fix12P4::FracMask()) & Fix12P4::IntMask()); |
| 84 | max_y = (max_y + Fix12P4::FracMask()) & Fix12P4::IntMask(); | 86 | max_y = ((max_y + Fix12P4::FracMask()) & Fix12P4::IntMask()); |
| 85 | 87 | ||
| 86 | // Triangle filling rules: Pixels on the right-sided edge or on flat bottom edges are not | 88 | // Triangle filling rules: Pixels on the right-sided edge or on flat bottom edges are not |
| 87 | // drawn. Pixels on any other triangle border are drawn. This is implemented with three bias | 89 | // drawn. Pixels on any other triangle border are drawn. This is implemented with three bias |
| @@ -112,10 +114,10 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, | |||
| 112 | auto orient2d = [](const Math::Vec2<Fix12P4>& vtx1, | 114 | auto orient2d = [](const Math::Vec2<Fix12P4>& vtx1, |
| 113 | const Math::Vec2<Fix12P4>& vtx2, | 115 | const Math::Vec2<Fix12P4>& vtx2, |
| 114 | const Math::Vec2<Fix12P4>& vtx3) { | 116 | const Math::Vec2<Fix12P4>& vtx3) { |
| 115 | const auto vec1 = (vtx2.Cast<int>() - vtx1.Cast<int>()).Append(0); | 117 | const auto vec1 = Math::MakeVec(vtx2 - vtx1, 0); |
| 116 | const auto vec2 = (vtx3.Cast<int>() - vtx1.Cast<int>()).Append(0); | 118 | const auto vec2 = Math::MakeVec(vtx3 - vtx1, 0); |
| 117 | // TODO: There is a very small chance this will overflow for sizeof(int) == 4 | 119 | // TODO: There is a very small chance this will overflow for sizeof(int) == 4 |
| 118 | return Cross(vec1, vec2).z; | 120 | return Math::Cross(vec1, vec2).z; |
| 119 | }; | 121 | }; |
| 120 | 122 | ||
| 121 | int w0 = bias0 + orient2d(vtxpos[1].xy(), vtxpos[2].xy(), {x, y}); | 123 | int w0 = bias0 + orient2d(vtxpos[1].xy(), vtxpos[2].xy(), {x, y}); |
| @@ -143,15 +145,15 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, | |||
| 143 | // | 145 | // |
| 144 | // The generalization to three vertices is straightforward in baricentric coordinates. | 146 | // The generalization to three vertices is straightforward in baricentric coordinates. |
| 145 | auto GetInterpolatedAttribute = [&](float24 attr0, float24 attr1, float24 attr2) { | 147 | auto GetInterpolatedAttribute = [&](float24 attr0, float24 attr1, float24 attr2) { |
| 146 | auto attr_over_w = Math::MakeVec3(attr0 / v0.pos.w, | 148 | auto attr_over_w = Math::MakeVec(attr0 / v0.pos.w, |
| 147 | attr1 / v1.pos.w, | 149 | attr1 / v1.pos.w, |
| 148 | attr2 / v2.pos.w); | 150 | attr2 / v2.pos.w); |
| 149 | auto w_inverse = Math::MakeVec3(float24::FromFloat32(1.f) / v0.pos.w, | 151 | auto w_inverse = Math::MakeVec(float24::FromFloat32(1.f) / v0.pos.w, |
| 150 | float24::FromFloat32(1.f) / v1.pos.w, | 152 | float24::FromFloat32(1.f) / v1.pos.w, |
| 151 | float24::FromFloat32(1.f) / v2.pos.w); | 153 | float24::FromFloat32(1.f) / v2.pos.w); |
| 152 | auto baricentric_coordinates = Math::MakeVec3(float24::FromFloat32(w0), | 154 | auto baricentric_coordinates = Math::MakeVec(float24::FromFloat32(w0), |
| 153 | float24::FromFloat32(w1), | 155 | float24::FromFloat32(w1), |
| 154 | float24::FromFloat32(w2)); | 156 | float24::FromFloat32(w2)); |
| 155 | 157 | ||
| 156 | float24 interpolated_attr_over_w = Math::Dot(attr_over_w, baricentric_coordinates); | 158 | float24 interpolated_attr_over_w = Math::Dot(attr_over_w, baricentric_coordinates); |
| 157 | float24 interpolated_w_inverse = Math::Dot(w_inverse, baricentric_coordinates); | 159 | float24 interpolated_w_inverse = Math::Dot(w_inverse, baricentric_coordinates); |
| @@ -165,12 +167,196 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, | |||
| 165 | (u8)(GetInterpolatedAttribute(v0.color.a(), v1.color.a(), v2.color.a()).ToFloat32() * 255) | 167 | (u8)(GetInterpolatedAttribute(v0.color.a(), v1.color.a(), v2.color.a()).ToFloat32() * 255) |
| 166 | }; | 168 | }; |
| 167 | 169 | ||
| 170 | Math::Vec4<u8> texture_color{}; | ||
| 171 | float24 u = GetInterpolatedAttribute(v0.tc0.u(), v1.tc0.u(), v2.tc0.u()); | ||
| 172 | float24 v = GetInterpolatedAttribute(v0.tc0.v(), v1.tc0.v(), v2.tc0.v()); | ||
| 173 | if (registers.texturing_enable) { | ||
| 174 | // Images are split into 8x8 tiles. Each tile is composed of four 4x4 subtiles each | ||
| 175 | // of which is composed of four 2x2 subtiles each of which is composed of four texels. | ||
| 176 | // Each structure is embedded into the next-bigger one in a diagonal pattern, e.g. | ||
| 177 | // texels are laid out in a 2x2 subtile like this: | ||
| 178 | // 2 3 | ||
| 179 | // 0 1 | ||
| 180 | // | ||
| 181 | // The full 8x8 tile has the texels arranged like this: | ||
| 182 | // | ||
| 183 | // 42 43 46 47 58 59 62 63 | ||
| 184 | // 40 41 44 45 56 57 60 61 | ||
| 185 | // 34 35 38 39 50 51 54 55 | ||
| 186 | // 32 33 36 37 48 49 52 53 | ||
| 187 | // 10 11 14 15 26 27 30 31 | ||
| 188 | // 08 09 12 13 24 25 28 29 | ||
| 189 | // 02 03 06 07 18 19 22 23 | ||
| 190 | // 00 01 04 05 16 17 20 21 | ||
| 191 | |||
| 192 | // TODO: This is currently hardcoded for RGB8 | ||
| 193 | u32* texture_data = (u32*)Memory::GetPointer(registers.texture0.GetPhysicalAddress()); | ||
| 194 | |||
| 195 | // TODO(neobrain): Not sure if this swizzling pattern is used for all textures. | ||
| 196 | // To be flexible in case different but similar patterns are used, we keep this | ||
| 197 | // somewhat inefficient code around for now. | ||
| 198 | int s = (int)(u * float24::FromFloat32(registers.texture0.width)).ToFloat32(); | ||
| 199 | int t = (int)(v * float24::FromFloat32(registers.texture0.height)).ToFloat32(); | ||
| 200 | int texel_index_within_tile = 0; | ||
| 201 | for (int block_size_index = 0; block_size_index < 3; ++block_size_index) { | ||
| 202 | int sub_tile_width = 1 << block_size_index; | ||
| 203 | int sub_tile_height = 1 << block_size_index; | ||
| 204 | |||
| 205 | int sub_tile_index = (s & sub_tile_width) << block_size_index; | ||
| 206 | sub_tile_index += 2 * ((t & sub_tile_height) << block_size_index); | ||
| 207 | texel_index_within_tile += sub_tile_index; | ||
| 208 | } | ||
| 209 | |||
| 210 | const int block_width = 8; | ||
| 211 | const int block_height = 8; | ||
| 212 | |||
| 213 | int coarse_s = (s / block_width) * block_width; | ||
| 214 | int coarse_t = (t / block_height) * block_height; | ||
| 215 | |||
| 216 | const int row_stride = registers.texture0.width * 3; | ||
| 217 | u8* source_ptr = (u8*)texture_data + coarse_s * block_height * 3 + coarse_t * row_stride + texel_index_within_tile * 3; | ||
| 218 | texture_color.r() = source_ptr[2]; | ||
| 219 | texture_color.g() = source_ptr[1]; | ||
| 220 | texture_color.b() = source_ptr[0]; | ||
| 221 | texture_color.a() = 0xFF; | ||
| 222 | |||
| 223 | DebugUtils::DumpTexture(registers.texture0, (u8*)texture_data); | ||
| 224 | } | ||
| 225 | |||
| 226 | // Texture environment - consists of 6 stages of color and alpha combining. | ||
| 227 | // | ||
| 228 | // Color combiners take three input color values from some source (e.g. interpolated | ||
| 229 | // vertex color, texture color, previous stage, etc), perform some very simple | ||
| 230 | // operations on each of them (e.g. inversion) and then calculate the output color | ||
| 231 | // with some basic arithmetic. Alpha combiners can be configured separately but work | ||
| 232 | // analogously. | ||
| 233 | Math::Vec4<u8> combiner_output; | ||
| 234 | for (auto tev_stage : registers.GetTevStages()) { | ||
| 235 | using Source = Regs::TevStageConfig::Source; | ||
| 236 | using ColorModifier = Regs::TevStageConfig::ColorModifier; | ||
| 237 | using AlphaModifier = Regs::TevStageConfig::AlphaModifier; | ||
| 238 | using Operation = Regs::TevStageConfig::Operation; | ||
| 239 | |||
| 240 | auto GetColorSource = [&](Source source) -> Math::Vec3<u8> { | ||
| 241 | switch (source) { | ||
| 242 | case Source::PrimaryColor: | ||
| 243 | return primary_color.rgb(); | ||
| 244 | |||
| 245 | case Source::Texture0: | ||
| 246 | return texture_color.rgb(); | ||
| 247 | |||
| 248 | case Source::Constant: | ||
| 249 | return {tev_stage.const_r, tev_stage.const_g, tev_stage.const_b}; | ||
| 250 | |||
| 251 | case Source::Previous: | ||
| 252 | return combiner_output.rgb(); | ||
| 253 | |||
| 254 | default: | ||
| 255 | ERROR_LOG(GPU, "Unknown color combiner source %d\n", (int)source); | ||
| 256 | return {}; | ||
| 257 | } | ||
| 258 | }; | ||
| 259 | |||
| 260 | auto GetAlphaSource = [&](Source source) -> u8 { | ||
| 261 | switch (source) { | ||
| 262 | case Source::PrimaryColor: | ||
| 263 | return primary_color.a(); | ||
| 264 | |||
| 265 | case Source::Texture0: | ||
| 266 | return texture_color.a(); | ||
| 267 | |||
| 268 | case Source::Constant: | ||
| 269 | return tev_stage.const_a; | ||
| 270 | |||
| 271 | case Source::Previous: | ||
| 272 | return combiner_output.a(); | ||
| 273 | |||
| 274 | default: | ||
| 275 | ERROR_LOG(GPU, "Unknown alpha combiner source %d\n", (int)source); | ||
| 276 | return 0; | ||
| 277 | } | ||
| 278 | }; | ||
| 279 | |||
| 280 | auto GetColorModifier = [](ColorModifier factor, const Math::Vec3<u8>& values) -> Math::Vec3<u8> { | ||
| 281 | switch (factor) | ||
| 282 | { | ||
| 283 | case ColorModifier::SourceColor: | ||
| 284 | return values; | ||
| 285 | default: | ||
| 286 | ERROR_LOG(GPU, "Unknown color factor %d\n", (int)factor); | ||
| 287 | return {}; | ||
| 288 | } | ||
| 289 | }; | ||
| 290 | |||
| 291 | auto GetAlphaModifier = [](AlphaModifier factor, u8 value) -> u8 { | ||
| 292 | switch (factor) { | ||
| 293 | case AlphaModifier::SourceAlpha: | ||
| 294 | return value; | ||
| 295 | default: | ||
| 296 | ERROR_LOG(GPU, "Unknown color factor %d\n", (int)factor); | ||
| 297 | return 0; | ||
| 298 | } | ||
| 299 | }; | ||
| 300 | |||
| 301 | auto ColorCombine = [](Operation op, const Math::Vec3<u8> input[3]) -> Math::Vec3<u8> { | ||
| 302 | switch (op) { | ||
| 303 | case Operation::Replace: | ||
| 304 | return input[0]; | ||
| 305 | |||
| 306 | case Operation::Modulate: | ||
| 307 | return ((input[0] * input[1]) / 255).Cast<u8>(); | ||
| 308 | |||
| 309 | default: | ||
| 310 | ERROR_LOG(GPU, "Unknown color combiner operation %d\n", (int)op); | ||
| 311 | return {}; | ||
| 312 | } | ||
| 313 | }; | ||
| 314 | |||
| 315 | auto AlphaCombine = [](Operation op, const std::array<u8,3>& input) -> u8 { | ||
| 316 | switch (op) { | ||
| 317 | case Operation::Replace: | ||
| 318 | return input[0]; | ||
| 319 | |||
| 320 | case Operation::Modulate: | ||
| 321 | return input[0] * input[1] / 255; | ||
| 322 | |||
| 323 | default: | ||
| 324 | ERROR_LOG(GPU, "Unknown alpha combiner operation %d\n", (int)op); | ||
| 325 | return 0; | ||
| 326 | } | ||
| 327 | }; | ||
| 328 | |||
| 329 | // color combiner | ||
| 330 | // NOTE: Not sure if the alpha combiner might use the color output of the previous | ||
| 331 | // stage as input. Hence, we currently don't directly write the result to | ||
| 332 | // combiner_output.rgb(), but instead store it in a temporary variable until | ||
| 333 | // alpha combining has been done. | ||
| 334 | Math::Vec3<u8> color_result[3] = { | ||
| 335 | GetColorModifier(tev_stage.color_modifier1, GetColorSource(tev_stage.color_source1)), | ||
| 336 | GetColorModifier(tev_stage.color_modifier2, GetColorSource(tev_stage.color_source2)), | ||
| 337 | GetColorModifier(tev_stage.color_modifier3, GetColorSource(tev_stage.color_source3)) | ||
| 338 | }; | ||
| 339 | auto color_output = ColorCombine(tev_stage.color_op, color_result); | ||
| 340 | |||
| 341 | // alpha combiner | ||
| 342 | std::array<u8,3> alpha_result = { | ||
| 343 | GetAlphaModifier(tev_stage.alpha_modifier1, GetAlphaSource(tev_stage.alpha_source1)), | ||
| 344 | GetAlphaModifier(tev_stage.alpha_modifier2, GetAlphaSource(tev_stage.alpha_source2)), | ||
| 345 | GetAlphaModifier(tev_stage.alpha_modifier3, GetAlphaSource(tev_stage.alpha_source3)) | ||
| 346 | }; | ||
| 347 | auto alpha_output = AlphaCombine(tev_stage.alpha_op, alpha_result); | ||
| 348 | |||
| 349 | combiner_output = Math::MakeVec(color_output, alpha_output); | ||
| 350 | } | ||
| 351 | |||
| 352 | // TODO: Not sure if the multiplication by 65535 has already been taken care | ||
| 353 | // of when transforming to screen coordinates or not. | ||
| 168 | u16 z = (u16)(((float)v0.screenpos[2].ToFloat32() * w0 + | 354 | u16 z = (u16)(((float)v0.screenpos[2].ToFloat32() * w0 + |
| 169 | (float)v1.screenpos[2].ToFloat32() * w1 + | 355 | (float)v1.screenpos[2].ToFloat32() * w1 + |
| 170 | (float)v2.screenpos[2].ToFloat32() * w2) * 65535.f / wsum); // TODO: Shouldn't need to multiply by 65536? | 356 | (float)v2.screenpos[2].ToFloat32() * w2) * 65535.f / wsum); |
| 171 | SetDepth(x >> 4, y >> 4, z); | 357 | SetDepth(x >> 4, y >> 4, z); |
| 172 | 358 | ||
| 173 | DrawPixel(x >> 4, y >> 4, primary_color); | 359 | DrawPixel(x >> 4, y >> 4, combiner_output); |
| 174 | } | 360 | } |
| 175 | } | 361 | } |
| 176 | } | 362 | } |
diff --git a/src/video_core/vertex_shader.cpp b/src/video_core/vertex_shader.cpp index 93830a96a..db8244317 100644 --- a/src/video_core/vertex_shader.cpp +++ b/src/video_core/vertex_shader.cpp | |||
| @@ -4,6 +4,7 @@ | |||
| 4 | 4 | ||
| 5 | #include "pica.h" | 5 | #include "pica.h" |
| 6 | #include "vertex_shader.h" | 6 | #include "vertex_shader.h" |
| 7 | #include "debug_utils/debug_utils.h" | ||
| 7 | #include <core/mem_map.h> | 8 | #include <core/mem_map.h> |
| 8 | #include <common/file_util.h> | 9 | #include <common/file_util.h> |
| 9 | 10 | ||
| @@ -50,6 +51,11 @@ struct VertexShaderState { | |||
| 50 | }; | 51 | }; |
| 51 | u32 call_stack[8]; // TODO: What is the maximal call stack depth? | 52 | u32 call_stack[8]; // TODO: What is the maximal call stack depth? |
| 52 | u32* call_stack_pointer; | 53 | u32* call_stack_pointer; |
| 54 | |||
| 55 | struct { | ||
| 56 | u32 max_offset; // maximum program counter ever reached | ||
| 57 | u32 max_opdesc_id; // maximum swizzle pattern index ever used | ||
| 58 | } debug; | ||
| 53 | }; | 59 | }; |
| 54 | 60 | ||
| 55 | static void ProcessShaderCode(VertexShaderState& state) { | 61 | static void ProcessShaderCode(VertexShaderState& state) { |
| @@ -57,27 +63,34 @@ static void ProcessShaderCode(VertexShaderState& state) { | |||
| 57 | bool increment_pc = true; | 63 | bool increment_pc = true; |
| 58 | bool exit_loop = false; | 64 | bool exit_loop = false; |
| 59 | const Instruction& instr = *(const Instruction*)state.program_counter; | 65 | const Instruction& instr = *(const Instruction*)state.program_counter; |
| 66 | state.debug.max_offset = std::max<u32>(state.debug.max_offset, 1 + (state.program_counter - shader_memory)); | ||
| 60 | 67 | ||
| 61 | const float24* src1_ = (instr.common.src1 < 0x10) ? state.input_register_table[instr.common.src1] | 68 | const float24* src1_ = (instr.common.src1 < 0x10) ? state.input_register_table[instr.common.src1.GetIndex()] |
| 62 | : (instr.common.src1 < 0x20) ? &state.temporary_registers[instr.common.src1-0x10].x | 69 | : (instr.common.src1 < 0x20) ? &state.temporary_registers[instr.common.src1.GetIndex()].x |
| 63 | : (instr.common.src1 < 0x80) ? &shader_uniforms.f[instr.common.src1-0x20].x | 70 | : (instr.common.src1 < 0x80) ? &shader_uniforms.f[instr.common.src1.GetIndex()].x |
| 64 | : nullptr; | ||
| 65 | const float24* src2_ = (instr.common.src2 < 0x10) ? state.input_register_table[instr.common.src2] | ||
| 66 | : &state.temporary_registers[instr.common.src2-0x10].x; | ||
| 67 | // TODO: Unsure about the limit values | ||
| 68 | float24* dest = (instr.common.dest <= 0x1C) ? state.output_register_table[instr.common.dest] | ||
| 69 | : (instr.common.dest <= 0x3C) ? nullptr | ||
| 70 | : (instr.common.dest <= 0x7C) ? &state.temporary_registers[(instr.common.dest-0x40)/4][instr.common.dest%4] | ||
| 71 | : nullptr; | 71 | : nullptr; |
| 72 | const float24* src2_ = (instr.common.src2 < 0x10) ? state.input_register_table[instr.common.src2.GetIndex()] | ||
| 73 | : &state.temporary_registers[instr.common.src2.GetIndex()].x; | ||
| 74 | float24* dest = (instr.common.dest < 0x08) ? state.output_register_table[4*instr.common.dest.GetIndex()] | ||
| 75 | : (instr.common.dest < 0x10) ? nullptr | ||
| 76 | : (instr.common.dest < 0x20) ? &state.temporary_registers[instr.common.dest.GetIndex()][0] | ||
| 77 | : nullptr; | ||
| 72 | 78 | ||
| 73 | const SwizzlePattern& swizzle = *(SwizzlePattern*)&swizzle_data[instr.common.operand_desc_id]; | 79 | const SwizzlePattern& swizzle = *(SwizzlePattern*)&swizzle_data[instr.common.operand_desc_id]; |
| 80 | const bool negate_src1 = swizzle.negate; | ||
| 74 | 81 | ||
| 75 | const float24 src1[4] = { | 82 | float24 src1[4] = { |
| 76 | src1_[(int)swizzle.GetSelectorSrc1(0)], | 83 | src1_[(int)swizzle.GetSelectorSrc1(0)], |
| 77 | src1_[(int)swizzle.GetSelectorSrc1(1)], | 84 | src1_[(int)swizzle.GetSelectorSrc1(1)], |
| 78 | src1_[(int)swizzle.GetSelectorSrc1(2)], | 85 | src1_[(int)swizzle.GetSelectorSrc1(2)], |
| 79 | src1_[(int)swizzle.GetSelectorSrc1(3)], | 86 | src1_[(int)swizzle.GetSelectorSrc1(3)], |
| 80 | }; | 87 | }; |
| 88 | if (negate_src1) { | ||
| 89 | src1[0] = src1[0] * float24::FromFloat32(-1); | ||
| 90 | src1[1] = src1[1] * float24::FromFloat32(-1); | ||
| 91 | src1[2] = src1[2] * float24::FromFloat32(-1); | ||
| 92 | src1[3] = src1[3] * float24::FromFloat32(-1); | ||
| 93 | } | ||
| 81 | const float24 src2[4] = { | 94 | const float24 src2[4] = { |
| 82 | src2_[(int)swizzle.GetSelectorSrc2(0)], | 95 | src2_[(int)swizzle.GetSelectorSrc2(0)], |
| 83 | src2_[(int)swizzle.GetSelectorSrc2(1)], | 96 | src2_[(int)swizzle.GetSelectorSrc2(1)], |
| @@ -88,6 +101,7 @@ static void ProcessShaderCode(VertexShaderState& state) { | |||
| 88 | switch (instr.opcode) { | 101 | switch (instr.opcode) { |
| 89 | case Instruction::OpCode::ADD: | 102 | case Instruction::OpCode::ADD: |
| 90 | { | 103 | { |
| 104 | state.debug.max_opdesc_id = std::max<u32>(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id); | ||
| 91 | for (int i = 0; i < 4; ++i) { | 105 | for (int i = 0; i < 4; ++i) { |
| 92 | if (!swizzle.DestComponentEnabled(i)) | 106 | if (!swizzle.DestComponentEnabled(i)) |
| 93 | continue; | 107 | continue; |
| @@ -100,6 +114,7 @@ static void ProcessShaderCode(VertexShaderState& state) { | |||
| 100 | 114 | ||
| 101 | case Instruction::OpCode::MUL: | 115 | case Instruction::OpCode::MUL: |
| 102 | { | 116 | { |
| 117 | state.debug.max_opdesc_id = std::max<u32>(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id); | ||
| 103 | for (int i = 0; i < 4; ++i) { | 118 | for (int i = 0; i < 4; ++i) { |
| 104 | if (!swizzle.DestComponentEnabled(i)) | 119 | if (!swizzle.DestComponentEnabled(i)) |
| 105 | continue; | 120 | continue; |
| @@ -113,6 +128,7 @@ static void ProcessShaderCode(VertexShaderState& state) { | |||
| 113 | case Instruction::OpCode::DP3: | 128 | case Instruction::OpCode::DP3: |
| 114 | case Instruction::OpCode::DP4: | 129 | case Instruction::OpCode::DP4: |
| 115 | { | 130 | { |
| 131 | state.debug.max_opdesc_id = std::max<u32>(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id); | ||
| 116 | float24 dot = float24::FromFloat32(0.f); | 132 | float24 dot = float24::FromFloat32(0.f); |
| 117 | int num_components = (instr.opcode == Instruction::OpCode::DP3) ? 3 : 4; | 133 | int num_components = (instr.opcode == Instruction::OpCode::DP3) ? 3 : 4; |
| 118 | for (int i = 0; i < num_components; ++i) | 134 | for (int i = 0; i < num_components; ++i) |
| @@ -130,6 +146,7 @@ static void ProcessShaderCode(VertexShaderState& state) { | |||
| 130 | // Reciprocal | 146 | // Reciprocal |
| 131 | case Instruction::OpCode::RCP: | 147 | case Instruction::OpCode::RCP: |
| 132 | { | 148 | { |
| 149 | state.debug.max_opdesc_id = std::max<u32>(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id); | ||
| 133 | for (int i = 0; i < 4; ++i) { | 150 | for (int i = 0; i < 4; ++i) { |
| 134 | if (!swizzle.DestComponentEnabled(i)) | 151 | if (!swizzle.DestComponentEnabled(i)) |
| 135 | continue; | 152 | continue; |
| @@ -145,6 +162,7 @@ static void ProcessShaderCode(VertexShaderState& state) { | |||
| 145 | // Reciprocal Square Root | 162 | // Reciprocal Square Root |
| 146 | case Instruction::OpCode::RSQ: | 163 | case Instruction::OpCode::RSQ: |
| 147 | { | 164 | { |
| 165 | state.debug.max_opdesc_id = std::max<u32>(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id); | ||
| 148 | for (int i = 0; i < 4; ++i) { | 166 | for (int i = 0; i < 4; ++i) { |
| 149 | if (!swizzle.DestComponentEnabled(i)) | 167 | if (!swizzle.DestComponentEnabled(i)) |
| 150 | continue; | 168 | continue; |
| @@ -159,6 +177,7 @@ static void ProcessShaderCode(VertexShaderState& state) { | |||
| 159 | 177 | ||
| 160 | case Instruction::OpCode::MOV: | 178 | case Instruction::OpCode::MOV: |
| 161 | { | 179 | { |
| 180 | state.debug.max_opdesc_id = std::max<u32>(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id); | ||
| 162 | for (int i = 0; i < 4; ++i) { | 181 | for (int i = 0; i < 4; ++i) { |
| 163 | if (!swizzle.DestComponentEnabled(i)) | 182 | if (!swizzle.DestComponentEnabled(i)) |
| 164 | continue; | 183 | continue; |
| @@ -172,8 +191,9 @@ static void ProcessShaderCode(VertexShaderState& state) { | |||
| 172 | if (*state.call_stack_pointer == VertexShaderState::INVALID_ADDRESS) { | 191 | if (*state.call_stack_pointer == VertexShaderState::INVALID_ADDRESS) { |
| 173 | exit_loop = true; | 192 | exit_loop = true; |
| 174 | } else { | 193 | } else { |
| 175 | state.program_counter = &shader_memory[*state.call_stack_pointer--]; | 194 | // Jump back to call stack position, invalidate call stack entry, move up call stack pointer |
| 176 | *state.call_stack_pointer = VertexShaderState::INVALID_ADDRESS; | 195 | state.program_counter = &shader_memory[*state.call_stack_pointer]; |
| 196 | *state.call_stack_pointer-- = VertexShaderState::INVALID_ADDRESS; | ||
| 177 | } | 197 | } |
| 178 | 198 | ||
| 179 | break; | 199 | break; |
| @@ -212,6 +232,8 @@ OutputVertex RunShader(const InputVertex& input, int num_attributes) | |||
| 212 | 232 | ||
| 213 | const u32* main = &shader_memory[registers.vs_main_offset]; | 233 | const u32* main = &shader_memory[registers.vs_main_offset]; |
| 214 | state.program_counter = (u32*)main; | 234 | state.program_counter = (u32*)main; |
| 235 | state.debug.max_offset = 0; | ||
| 236 | state.debug.max_opdesc_id = 0; | ||
| 215 | 237 | ||
| 216 | // Setup input register table | 238 | // Setup input register table |
| 217 | const auto& attribute_register_map = registers.vs_input_register_map; | 239 | const auto& attribute_register_map = registers.vs_input_register_map; |
| @@ -255,6 +277,9 @@ OutputVertex RunShader(const InputVertex& input, int num_attributes) | |||
| 255 | state.call_stack_pointer = &state.call_stack[0]; | 277 | state.call_stack_pointer = &state.call_stack[0]; |
| 256 | 278 | ||
| 257 | ProcessShaderCode(state); | 279 | ProcessShaderCode(state); |
| 280 | DebugUtils::DumpShader(shader_memory, state.debug.max_offset, swizzle_data, | ||
| 281 | state.debug.max_opdesc_id, registers.vs_main_offset, | ||
| 282 | registers.vs_output_attributes); | ||
| 258 | 283 | ||
| 259 | DEBUG_LOG(GPU, "Output vertex: pos (%.2f, %.2f, %.2f, %.2f), col(%.2f, %.2f, %.2f, %.2f), tc0(%.2f, %.2f)", | 284 | DEBUG_LOG(GPU, "Output vertex: pos (%.2f, %.2f, %.2f, %.2f), col(%.2f, %.2f, %.2f, %.2f), tc0(%.2f, %.2f)", |
| 260 | ret.pos.x.ToFloat32(), ret.pos.y.ToFloat32(), ret.pos.z.ToFloat32(), ret.pos.w.ToFloat32(), | 285 | ret.pos.x.ToFloat32(), ret.pos.y.ToFloat32(), ret.pos.z.ToFloat32(), ret.pos.w.ToFloat32(), |
diff --git a/src/video_core/vertex_shader.h b/src/video_core/vertex_shader.h index 1b71e367b..847fdc450 100644 --- a/src/video_core/vertex_shader.h +++ b/src/video_core/vertex_shader.h | |||
| @@ -27,7 +27,6 @@ struct OutputVertex { | |||
| 27 | Math::Vec4<float24> dummy; // quaternions (not implemented, yet) | 27 | Math::Vec4<float24> dummy; // quaternions (not implemented, yet) |
| 28 | Math::Vec4<float24> color; | 28 | Math::Vec4<float24> color; |
| 29 | Math::Vec2<float24> tc0; | 29 | Math::Vec2<float24> tc0; |
| 30 | float24 tc0_v; | ||
| 31 | 30 | ||
| 32 | // Padding for optimal alignment | 31 | // Padding for optimal alignment |
| 33 | float24 pad[14]; | 32 | float24 pad[14]; |
| @@ -36,6 +35,7 @@ struct OutputVertex { | |||
| 36 | 35 | ||
| 37 | // position after perspective divide | 36 | // position after perspective divide |
| 38 | Math::Vec3<float24> screenpos; | 37 | Math::Vec3<float24> screenpos; |
| 38 | float24 pad2; | ||
| 39 | 39 | ||
| 40 | // Linear interpolation | 40 | // Linear interpolation |
| 41 | // factor: 0=this, 1=vtx | 41 | // factor: 0=this, 1=vtx |
| @@ -59,6 +59,7 @@ struct OutputVertex { | |||
| 59 | } | 59 | } |
| 60 | }; | 60 | }; |
| 61 | static_assert(std::is_pod<OutputVertex>::value, "Structure is not POD"); | 61 | static_assert(std::is_pod<OutputVertex>::value, "Structure is not POD"); |
| 62 | static_assert(sizeof(OutputVertex) == 32 * sizeof(float), "OutputVertex has invalid size"); | ||
| 62 | 63 | ||
| 63 | union Instruction { | 64 | union Instruction { |
| 64 | enum class OpCode : u32 { | 65 | enum class OpCode : u32 { |
| @@ -117,9 +118,78 @@ union Instruction { | |||
| 117 | // while "dest" addresses individual floats. | 118 | // while "dest" addresses individual floats. |
| 118 | union { | 119 | union { |
| 119 | BitField<0x00, 0x5, u32> operand_desc_id; | 120 | BitField<0x00, 0x5, u32> operand_desc_id; |
| 120 | BitField<0x07, 0x5, u32> src2; | 121 | |
| 121 | BitField<0x0c, 0x7, u32> src1; | 122 | template<class BitFieldType> |
| 122 | BitField<0x13, 0x7, u32> dest; | 123 | struct SourceRegister : BitFieldType { |
| 124 | enum RegisterType { | ||
| 125 | Input, | ||
| 126 | Temporary, | ||
| 127 | FloatUniform | ||
| 128 | }; | ||
| 129 | |||
| 130 | RegisterType GetRegisterType() const { | ||
| 131 | if (BitFieldType::Value() < 0x10) | ||
| 132 | return Input; | ||
| 133 | else if (BitFieldType::Value() < 0x20) | ||
| 134 | return Temporary; | ||
| 135 | else | ||
| 136 | return FloatUniform; | ||
| 137 | } | ||
| 138 | |||
| 139 | int GetIndex() const { | ||
| 140 | if (GetRegisterType() == Input) | ||
| 141 | return BitFieldType::Value(); | ||
| 142 | else if (GetRegisterType() == Temporary) | ||
| 143 | return BitFieldType::Value() - 0x10; | ||
| 144 | else if (GetRegisterType() == FloatUniform) | ||
| 145 | return BitFieldType::Value() - 0x20; | ||
| 146 | } | ||
| 147 | |||
| 148 | std::string GetRegisterName() const { | ||
| 149 | std::map<RegisterType, std::string> type = { | ||
| 150 | { Input, "i" }, | ||
| 151 | { Temporary, "t" }, | ||
| 152 | { FloatUniform, "f" }, | ||
| 153 | }; | ||
| 154 | return type[GetRegisterType()] + std::to_string(GetIndex()); | ||
| 155 | } | ||
| 156 | }; | ||
| 157 | |||
| 158 | SourceRegister<BitField<0x07, 0x5, u32>> src2; | ||
| 159 | SourceRegister<BitField<0x0c, 0x7, u32>> src1; | ||
| 160 | |||
| 161 | struct : BitField<0x15, 0x5, u32> | ||
| 162 | { | ||
| 163 | enum RegisterType { | ||
| 164 | Output, | ||
| 165 | Temporary, | ||
| 166 | Unknown | ||
| 167 | }; | ||
| 168 | RegisterType GetRegisterType() const { | ||
| 169 | if (Value() < 0x8) | ||
| 170 | return Output; | ||
| 171 | else if (Value() < 0x10) | ||
| 172 | return Unknown; | ||
| 173 | else | ||
| 174 | return Temporary; | ||
| 175 | } | ||
| 176 | int GetIndex() const { | ||
| 177 | if (GetRegisterType() == Output) | ||
| 178 | return Value(); | ||
| 179 | else if (GetRegisterType() == Temporary) | ||
| 180 | return Value() - 0x10; | ||
| 181 | else | ||
| 182 | return Value(); | ||
| 183 | } | ||
| 184 | std::string GetRegisterName() const { | ||
| 185 | std::map<RegisterType, std::string> type = { | ||
| 186 | { Output, "o" }, | ||
| 187 | { Temporary, "t" }, | ||
| 188 | { Unknown, "u" } | ||
| 189 | }; | ||
| 190 | return type[GetRegisterType()] + std::to_string(GetIndex()); | ||
| 191 | } | ||
| 192 | } dest; | ||
| 123 | } common; | 193 | } common; |
| 124 | 194 | ||
| 125 | // Format used for flow control instructions ("if") | 195 | // Format used for flow control instructions ("if") |
| @@ -128,6 +198,7 @@ union Instruction { | |||
| 128 | BitField<0x0a, 0xc, u32> offset_words; | 198 | BitField<0x0a, 0xc, u32> offset_words; |
| 129 | } flow_control; | 199 | } flow_control; |
| 130 | }; | 200 | }; |
| 201 | static_assert(std::is_standard_layout<Instruction>::value, "Structure is not using standard layout!"); | ||
| 131 | 202 | ||
| 132 | union SwizzlePattern { | 203 | union SwizzlePattern { |
| 133 | u32 hex; | 204 | u32 hex; |
| @@ -185,6 +256,8 @@ union SwizzlePattern { | |||
| 185 | // Components of "dest" that should be written to: LSB=dest.w, MSB=dest.x | 256 | // Components of "dest" that should be written to: LSB=dest.w, MSB=dest.x |
| 186 | BitField< 0, 4, u32> dest_mask; | 257 | BitField< 0, 4, u32> dest_mask; |
| 187 | 258 | ||
| 259 | BitField< 4, 1, u32> negate; // negates src1 | ||
| 260 | |||
| 188 | BitField< 5, 2, Selector> src1_selector_3; | 261 | BitField< 5, 2, Selector> src1_selector_3; |
| 189 | BitField< 7, 2, Selector> src1_selector_2; | 262 | BitField< 7, 2, Selector> src1_selector_2; |
| 190 | BitField< 9, 2, Selector> src1_selector_1; | 263 | BitField< 9, 2, Selector> src1_selector_1; |
diff --git a/src/video_core/video_core.vcxproj b/src/video_core/video_core.vcxproj index 48d77cdc4..4e129fbe7 100644 --- a/src/video_core/video_core.vcxproj +++ b/src/video_core/video_core.vcxproj | |||
| @@ -19,6 +19,7 @@ | |||
| 19 | </ProjectConfiguration> | 19 | </ProjectConfiguration> |
| 20 | </ItemGroup> | 20 | </ItemGroup> |
| 21 | <ItemGroup> | 21 | <ItemGroup> |
| 22 | <ClCompile Include="debug_utils\debug_utils.cpp" /> | ||
| 22 | <ClCompile Include="renderer_opengl\renderer_opengl.cpp" /> | 23 | <ClCompile Include="renderer_opengl\renderer_opengl.cpp" /> |
| 23 | <ClCompile Include="clipper.cpp" /> | 24 | <ClCompile Include="clipper.cpp" /> |
| 24 | <ClCompile Include="command_processor.cpp" /> | 25 | <ClCompile Include="command_processor.cpp" /> |
| @@ -40,6 +41,7 @@ | |||
| 40 | <ClInclude Include="utils.h" /> | 41 | <ClInclude Include="utils.h" /> |
| 41 | <ClInclude Include="vertex_shader.h" /> | 42 | <ClInclude Include="vertex_shader.h" /> |
| 42 | <ClInclude Include="video_core.h" /> | 43 | <ClInclude Include="video_core.h" /> |
| 44 | <ClInclude Include="debug_utils\debug_utils.h" /> | ||
| 43 | <ClInclude Include="renderer_opengl\renderer_opengl.h" /> | 45 | <ClInclude Include="renderer_opengl\renderer_opengl.h" /> |
| 44 | </ItemGroup> | 46 | </ItemGroup> |
| 45 | <ItemGroup> | 47 | <ItemGroup> |
diff --git a/src/video_core/video_core.vcxproj.filters b/src/video_core/video_core.vcxproj.filters index 31af4f1df..90541aca0 100644 --- a/src/video_core/video_core.vcxproj.filters +++ b/src/video_core/video_core.vcxproj.filters | |||
| @@ -4,6 +4,9 @@ | |||
| 4 | <Filter Include="renderer_opengl"> | 4 | <Filter Include="renderer_opengl"> |
| 5 | <UniqueIdentifier>{e0245557-dbd4-423e-9399-513d5e99f1e4}</UniqueIdentifier> | 5 | <UniqueIdentifier>{e0245557-dbd4-423e-9399-513d5e99f1e4}</UniqueIdentifier> |
| 6 | </Filter> | 6 | </Filter> |
| 7 | <Filter Include="debug_utils"> | ||
| 8 | <UniqueIdentifier>{0ac498e6-bbd8-46e3-9d5f-e816546ab90e}</UniqueIdentifier> | ||
| 9 | </Filter> | ||
| 7 | </ItemGroup> | 10 | </ItemGroup> |
| 8 | <ItemGroup> | 11 | <ItemGroup> |
| 9 | <ClCompile Include="renderer_opengl\renderer_opengl.cpp"> | 12 | <ClCompile Include="renderer_opengl\renderer_opengl.cpp"> |
| @@ -16,11 +19,11 @@ | |||
| 16 | <ClCompile Include="utils.cpp" /> | 19 | <ClCompile Include="utils.cpp" /> |
| 17 | <ClCompile Include="vertex_shader.cpp" /> | 20 | <ClCompile Include="vertex_shader.cpp" /> |
| 18 | <ClCompile Include="video_core.cpp" /> | 21 | <ClCompile Include="video_core.cpp" /> |
| 22 | <ClCompile Include="debug_utils\debug_utils.cpp"> | ||
| 23 | <Filter>debug_utils</Filter> | ||
| 24 | </ClCompile> | ||
| 19 | </ItemGroup> | 25 | </ItemGroup> |
| 20 | <ItemGroup> | 26 | <ItemGroup> |
| 21 | <ClInclude Include="renderer_opengl\renderer_opengl.h"> | ||
| 22 | <Filter>renderer_opengl</Filter> | ||
| 23 | </ClInclude> | ||
| 24 | <ClInclude Include="clipper.h" /> | 27 | <ClInclude Include="clipper.h" /> |
| 25 | <ClInclude Include="command_processor.h" /> | 28 | <ClInclude Include="command_processor.h" /> |
| 26 | <ClInclude Include="gpu_debugger.h" /> | 29 | <ClInclude Include="gpu_debugger.h" /> |
| @@ -32,8 +35,12 @@ | |||
| 32 | <ClInclude Include="utils.h" /> | 35 | <ClInclude Include="utils.h" /> |
| 33 | <ClInclude Include="vertex_shader.h" /> | 36 | <ClInclude Include="vertex_shader.h" /> |
| 34 | <ClInclude Include="video_core.h" /> | 37 | <ClInclude Include="video_core.h" /> |
| 38 | <ClInclude Include="renderer_opengl\renderer_opengl.h" /> | ||
| 39 | <ClInclude Include="debug_utils\debug_utils.h"> | ||
| 40 | <Filter>debug_utils</Filter> | ||
| 41 | </ClInclude> | ||
| 35 | </ItemGroup> | 42 | </ItemGroup> |
| 36 | <ItemGroup> | 43 | <ItemGroup> |
| 37 | <Text Include="CMakeLists.txt" /> | 44 | <Text Include="CMakeLists.txt" /> |
| 38 | </ItemGroup> | 45 | </ItemGroup> |
| 39 | </Project> | 46 | </Project> \ No newline at end of file |