diff options
| author | 2014-08-25 16:12:10 -0400 | |
|---|---|---|
| committer | 2014-08-25 16:12:10 -0400 | |
| commit | 97fd8fc38d4f9c288779cddb06538860124c6263 (patch) | |
| tree | bc99e0fceaae732f9c8d4831fcdb8f661b49ccb8 /src/video_core/vertex_shader.cpp | |
| parent | Merge pull request #75 from xsacha/qt5 (diff) | |
| parent | Pica/Rasterizer: Clarify a TODO. (diff) | |
| download | yuzu-97fd8fc38d4f9c288779cddb06538860124c6263.tar.gz yuzu-97fd8fc38d4f9c288779cddb06538860124c6263.tar.xz yuzu-97fd8fc38d4f9c288779cddb06538860124c6263.zip | |
Merge pull request #50 from neobrain/pica
Further work on Pica emulation
Diffstat (limited to 'src/video_core/vertex_shader.cpp')
| -rw-r--r-- | src/video_core/vertex_shader.cpp | 51 |
1 files changed, 38 insertions, 13 deletions
diff --git a/src/video_core/vertex_shader.cpp b/src/video_core/vertex_shader.cpp index 93830a96a..db8244317 100644 --- a/src/video_core/vertex_shader.cpp +++ b/src/video_core/vertex_shader.cpp | |||
| @@ -4,6 +4,7 @@ | |||
| 4 | 4 | ||
| 5 | #include "pica.h" | 5 | #include "pica.h" |
| 6 | #include "vertex_shader.h" | 6 | #include "vertex_shader.h" |
| 7 | #include "debug_utils/debug_utils.h" | ||
| 7 | #include <core/mem_map.h> | 8 | #include <core/mem_map.h> |
| 8 | #include <common/file_util.h> | 9 | #include <common/file_util.h> |
| 9 | 10 | ||
| @@ -50,6 +51,11 @@ struct VertexShaderState { | |||
| 50 | }; | 51 | }; |
| 51 | u32 call_stack[8]; // TODO: What is the maximal call stack depth? | 52 | u32 call_stack[8]; // TODO: What is the maximal call stack depth? |
| 52 | u32* call_stack_pointer; | 53 | u32* call_stack_pointer; |
| 54 | |||
| 55 | struct { | ||
| 56 | u32 max_offset; // maximum program counter ever reached | ||
| 57 | u32 max_opdesc_id; // maximum swizzle pattern index ever used | ||
| 58 | } debug; | ||
| 53 | }; | 59 | }; |
| 54 | 60 | ||
| 55 | static void ProcessShaderCode(VertexShaderState& state) { | 61 | static void ProcessShaderCode(VertexShaderState& state) { |
| @@ -57,27 +63,34 @@ static void ProcessShaderCode(VertexShaderState& state) { | |||
| 57 | bool increment_pc = true; | 63 | bool increment_pc = true; |
| 58 | bool exit_loop = false; | 64 | bool exit_loop = false; |
| 59 | const Instruction& instr = *(const Instruction*)state.program_counter; | 65 | const Instruction& instr = *(const Instruction*)state.program_counter; |
| 66 | state.debug.max_offset = std::max<u32>(state.debug.max_offset, 1 + (state.program_counter - shader_memory)); | ||
| 60 | 67 | ||
| 61 | const float24* src1_ = (instr.common.src1 < 0x10) ? state.input_register_table[instr.common.src1] | 68 | const float24* src1_ = (instr.common.src1 < 0x10) ? state.input_register_table[instr.common.src1.GetIndex()] |
| 62 | : (instr.common.src1 < 0x20) ? &state.temporary_registers[instr.common.src1-0x10].x | 69 | : (instr.common.src1 < 0x20) ? &state.temporary_registers[instr.common.src1.GetIndex()].x |
| 63 | : (instr.common.src1 < 0x80) ? &shader_uniforms.f[instr.common.src1-0x20].x | 70 | : (instr.common.src1 < 0x80) ? &shader_uniforms.f[instr.common.src1.GetIndex()].x |
| 64 | : nullptr; | ||
| 65 | const float24* src2_ = (instr.common.src2 < 0x10) ? state.input_register_table[instr.common.src2] | ||
| 66 | : &state.temporary_registers[instr.common.src2-0x10].x; | ||
| 67 | // TODO: Unsure about the limit values | ||
| 68 | float24* dest = (instr.common.dest <= 0x1C) ? state.output_register_table[instr.common.dest] | ||
| 69 | : (instr.common.dest <= 0x3C) ? nullptr | ||
| 70 | : (instr.common.dest <= 0x7C) ? &state.temporary_registers[(instr.common.dest-0x40)/4][instr.common.dest%4] | ||
| 71 | : nullptr; | 71 | : nullptr; |
| 72 | const float24* src2_ = (instr.common.src2 < 0x10) ? state.input_register_table[instr.common.src2.GetIndex()] | ||
| 73 | : &state.temporary_registers[instr.common.src2.GetIndex()].x; | ||
| 74 | float24* dest = (instr.common.dest < 0x08) ? state.output_register_table[4*instr.common.dest.GetIndex()] | ||
| 75 | : (instr.common.dest < 0x10) ? nullptr | ||
| 76 | : (instr.common.dest < 0x20) ? &state.temporary_registers[instr.common.dest.GetIndex()][0] | ||
| 77 | : nullptr; | ||
| 72 | 78 | ||
| 73 | const SwizzlePattern& swizzle = *(SwizzlePattern*)&swizzle_data[instr.common.operand_desc_id]; | 79 | const SwizzlePattern& swizzle = *(SwizzlePattern*)&swizzle_data[instr.common.operand_desc_id]; |
| 80 | const bool negate_src1 = swizzle.negate; | ||
| 74 | 81 | ||
| 75 | const float24 src1[4] = { | 82 | float24 src1[4] = { |
| 76 | src1_[(int)swizzle.GetSelectorSrc1(0)], | 83 | src1_[(int)swizzle.GetSelectorSrc1(0)], |
| 77 | src1_[(int)swizzle.GetSelectorSrc1(1)], | 84 | src1_[(int)swizzle.GetSelectorSrc1(1)], |
| 78 | src1_[(int)swizzle.GetSelectorSrc1(2)], | 85 | src1_[(int)swizzle.GetSelectorSrc1(2)], |
| 79 | src1_[(int)swizzle.GetSelectorSrc1(3)], | 86 | src1_[(int)swizzle.GetSelectorSrc1(3)], |
| 80 | }; | 87 | }; |
| 88 | if (negate_src1) { | ||
| 89 | src1[0] = src1[0] * float24::FromFloat32(-1); | ||
| 90 | src1[1] = src1[1] * float24::FromFloat32(-1); | ||
| 91 | src1[2] = src1[2] * float24::FromFloat32(-1); | ||
| 92 | src1[3] = src1[3] * float24::FromFloat32(-1); | ||
| 93 | } | ||
| 81 | const float24 src2[4] = { | 94 | const float24 src2[4] = { |
| 82 | src2_[(int)swizzle.GetSelectorSrc2(0)], | 95 | src2_[(int)swizzle.GetSelectorSrc2(0)], |
| 83 | src2_[(int)swizzle.GetSelectorSrc2(1)], | 96 | src2_[(int)swizzle.GetSelectorSrc2(1)], |
| @@ -88,6 +101,7 @@ static void ProcessShaderCode(VertexShaderState& state) { | |||
| 88 | switch (instr.opcode) { | 101 | switch (instr.opcode) { |
| 89 | case Instruction::OpCode::ADD: | 102 | case Instruction::OpCode::ADD: |
| 90 | { | 103 | { |
| 104 | state.debug.max_opdesc_id = std::max<u32>(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id); | ||
| 91 | for (int i = 0; i < 4; ++i) { | 105 | for (int i = 0; i < 4; ++i) { |
| 92 | if (!swizzle.DestComponentEnabled(i)) | 106 | if (!swizzle.DestComponentEnabled(i)) |
| 93 | continue; | 107 | continue; |
| @@ -100,6 +114,7 @@ static void ProcessShaderCode(VertexShaderState& state) { | |||
| 100 | 114 | ||
| 101 | case Instruction::OpCode::MUL: | 115 | case Instruction::OpCode::MUL: |
| 102 | { | 116 | { |
| 117 | state.debug.max_opdesc_id = std::max<u32>(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id); | ||
| 103 | for (int i = 0; i < 4; ++i) { | 118 | for (int i = 0; i < 4; ++i) { |
| 104 | if (!swizzle.DestComponentEnabled(i)) | 119 | if (!swizzle.DestComponentEnabled(i)) |
| 105 | continue; | 120 | continue; |
| @@ -113,6 +128,7 @@ static void ProcessShaderCode(VertexShaderState& state) { | |||
| 113 | case Instruction::OpCode::DP3: | 128 | case Instruction::OpCode::DP3: |
| 114 | case Instruction::OpCode::DP4: | 129 | case Instruction::OpCode::DP4: |
| 115 | { | 130 | { |
| 131 | state.debug.max_opdesc_id = std::max<u32>(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id); | ||
| 116 | float24 dot = float24::FromFloat32(0.f); | 132 | float24 dot = float24::FromFloat32(0.f); |
| 117 | int num_components = (instr.opcode == Instruction::OpCode::DP3) ? 3 : 4; | 133 | int num_components = (instr.opcode == Instruction::OpCode::DP3) ? 3 : 4; |
| 118 | for (int i = 0; i < num_components; ++i) | 134 | for (int i = 0; i < num_components; ++i) |
| @@ -130,6 +146,7 @@ static void ProcessShaderCode(VertexShaderState& state) { | |||
| 130 | // Reciprocal | 146 | // Reciprocal |
| 131 | case Instruction::OpCode::RCP: | 147 | case Instruction::OpCode::RCP: |
| 132 | { | 148 | { |
| 149 | state.debug.max_opdesc_id = std::max<u32>(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id); | ||
| 133 | for (int i = 0; i < 4; ++i) { | 150 | for (int i = 0; i < 4; ++i) { |
| 134 | if (!swizzle.DestComponentEnabled(i)) | 151 | if (!swizzle.DestComponentEnabled(i)) |
| 135 | continue; | 152 | continue; |
| @@ -145,6 +162,7 @@ static void ProcessShaderCode(VertexShaderState& state) { | |||
| 145 | // Reciprocal Square Root | 162 | // Reciprocal Square Root |
| 146 | case Instruction::OpCode::RSQ: | 163 | case Instruction::OpCode::RSQ: |
| 147 | { | 164 | { |
| 165 | state.debug.max_opdesc_id = std::max<u32>(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id); | ||
| 148 | for (int i = 0; i < 4; ++i) { | 166 | for (int i = 0; i < 4; ++i) { |
| 149 | if (!swizzle.DestComponentEnabled(i)) | 167 | if (!swizzle.DestComponentEnabled(i)) |
| 150 | continue; | 168 | continue; |
| @@ -159,6 +177,7 @@ static void ProcessShaderCode(VertexShaderState& state) { | |||
| 159 | 177 | ||
| 160 | case Instruction::OpCode::MOV: | 178 | case Instruction::OpCode::MOV: |
| 161 | { | 179 | { |
| 180 | state.debug.max_opdesc_id = std::max<u32>(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id); | ||
| 162 | for (int i = 0; i < 4; ++i) { | 181 | for (int i = 0; i < 4; ++i) { |
| 163 | if (!swizzle.DestComponentEnabled(i)) | 182 | if (!swizzle.DestComponentEnabled(i)) |
| 164 | continue; | 183 | continue; |
| @@ -172,8 +191,9 @@ static void ProcessShaderCode(VertexShaderState& state) { | |||
| 172 | if (*state.call_stack_pointer == VertexShaderState::INVALID_ADDRESS) { | 191 | if (*state.call_stack_pointer == VertexShaderState::INVALID_ADDRESS) { |
| 173 | exit_loop = true; | 192 | exit_loop = true; |
| 174 | } else { | 193 | } else { |
| 175 | state.program_counter = &shader_memory[*state.call_stack_pointer--]; | 194 | // Jump back to call stack position, invalidate call stack entry, move up call stack pointer |
| 176 | *state.call_stack_pointer = VertexShaderState::INVALID_ADDRESS; | 195 | state.program_counter = &shader_memory[*state.call_stack_pointer]; |
| 196 | *state.call_stack_pointer-- = VertexShaderState::INVALID_ADDRESS; | ||
| 177 | } | 197 | } |
| 178 | 198 | ||
| 179 | break; | 199 | break; |
| @@ -212,6 +232,8 @@ OutputVertex RunShader(const InputVertex& input, int num_attributes) | |||
| 212 | 232 | ||
| 213 | const u32* main = &shader_memory[registers.vs_main_offset]; | 233 | const u32* main = &shader_memory[registers.vs_main_offset]; |
| 214 | state.program_counter = (u32*)main; | 234 | state.program_counter = (u32*)main; |
| 235 | state.debug.max_offset = 0; | ||
| 236 | state.debug.max_opdesc_id = 0; | ||
| 215 | 237 | ||
| 216 | // Setup input register table | 238 | // Setup input register table |
| 217 | const auto& attribute_register_map = registers.vs_input_register_map; | 239 | const auto& attribute_register_map = registers.vs_input_register_map; |
| @@ -255,6 +277,9 @@ OutputVertex RunShader(const InputVertex& input, int num_attributes) | |||
| 255 | state.call_stack_pointer = &state.call_stack[0]; | 277 | state.call_stack_pointer = &state.call_stack[0]; |
| 256 | 278 | ||
| 257 | ProcessShaderCode(state); | 279 | ProcessShaderCode(state); |
| 280 | DebugUtils::DumpShader(shader_memory, state.debug.max_offset, swizzle_data, | ||
| 281 | state.debug.max_opdesc_id, registers.vs_main_offset, | ||
| 282 | registers.vs_output_attributes); | ||
| 258 | 283 | ||
| 259 | DEBUG_LOG(GPU, "Output vertex: pos (%.2f, %.2f, %.2f, %.2f), col(%.2f, %.2f, %.2f, %.2f), tc0(%.2f, %.2f)", | 284 | DEBUG_LOG(GPU, "Output vertex: pos (%.2f, %.2f, %.2f, %.2f), col(%.2f, %.2f, %.2f, %.2f), tc0(%.2f, %.2f)", |
| 260 | ret.pos.x.ToFloat32(), ret.pos.y.ToFloat32(), ret.pos.z.ToFloat32(), ret.pos.w.ToFloat32(), | 285 | ret.pos.x.ToFloat32(), ret.pos.y.ToFloat32(), ret.pos.z.ToFloat32(), ret.pos.w.ToFloat32(), |