summaryrefslogtreecommitdiff
path: root/src/video_core/vertex_shader.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core/vertex_shader.cpp')
-rw-r--r--src/video_core/vertex_shader.cpp51
1 files changed, 38 insertions, 13 deletions
diff --git a/src/video_core/vertex_shader.cpp b/src/video_core/vertex_shader.cpp
index 93830a96a..db8244317 100644
--- a/src/video_core/vertex_shader.cpp
+++ b/src/video_core/vertex_shader.cpp
@@ -4,6 +4,7 @@
4 4
5#include "pica.h" 5#include "pica.h"
6#include "vertex_shader.h" 6#include "vertex_shader.h"
7#include "debug_utils/debug_utils.h"
7#include <core/mem_map.h> 8#include <core/mem_map.h>
8#include <common/file_util.h> 9#include <common/file_util.h>
9 10
@@ -50,6 +51,11 @@ struct VertexShaderState {
50 }; 51 };
51 u32 call_stack[8]; // TODO: What is the maximal call stack depth? 52 u32 call_stack[8]; // TODO: What is the maximal call stack depth?
52 u32* call_stack_pointer; 53 u32* call_stack_pointer;
54
55 struct {
56 u32 max_offset; // maximum program counter ever reached
57 u32 max_opdesc_id; // maximum swizzle pattern index ever used
58 } debug;
53}; 59};
54 60
55static void ProcessShaderCode(VertexShaderState& state) { 61static void ProcessShaderCode(VertexShaderState& state) {
@@ -57,27 +63,34 @@ static void ProcessShaderCode(VertexShaderState& state) {
57 bool increment_pc = true; 63 bool increment_pc = true;
58 bool exit_loop = false; 64 bool exit_loop = false;
59 const Instruction& instr = *(const Instruction*)state.program_counter; 65 const Instruction& instr = *(const Instruction*)state.program_counter;
66 state.debug.max_offset = std::max<u32>(state.debug.max_offset, 1 + (state.program_counter - shader_memory));
60 67
61 const float24* src1_ = (instr.common.src1 < 0x10) ? state.input_register_table[instr.common.src1] 68 const float24* src1_ = (instr.common.src1 < 0x10) ? state.input_register_table[instr.common.src1.GetIndex()]
62 : (instr.common.src1 < 0x20) ? &state.temporary_registers[instr.common.src1-0x10].x 69 : (instr.common.src1 < 0x20) ? &state.temporary_registers[instr.common.src1.GetIndex()].x
63 : (instr.common.src1 < 0x80) ? &shader_uniforms.f[instr.common.src1-0x20].x 70 : (instr.common.src1 < 0x80) ? &shader_uniforms.f[instr.common.src1.GetIndex()].x
64 : nullptr;
65 const float24* src2_ = (instr.common.src2 < 0x10) ? state.input_register_table[instr.common.src2]
66 : &state.temporary_registers[instr.common.src2-0x10].x;
67 // TODO: Unsure about the limit values
68 float24* dest = (instr.common.dest <= 0x1C) ? state.output_register_table[instr.common.dest]
69 : (instr.common.dest <= 0x3C) ? nullptr
70 : (instr.common.dest <= 0x7C) ? &state.temporary_registers[(instr.common.dest-0x40)/4][instr.common.dest%4]
71 : nullptr; 71 : nullptr;
72 const float24* src2_ = (instr.common.src2 < 0x10) ? state.input_register_table[instr.common.src2.GetIndex()]
73 : &state.temporary_registers[instr.common.src2.GetIndex()].x;
74 float24* dest = (instr.common.dest < 0x08) ? state.output_register_table[4*instr.common.dest.GetIndex()]
75 : (instr.common.dest < 0x10) ? nullptr
76 : (instr.common.dest < 0x20) ? &state.temporary_registers[instr.common.dest.GetIndex()][0]
77 : nullptr;
72 78
73 const SwizzlePattern& swizzle = *(SwizzlePattern*)&swizzle_data[instr.common.operand_desc_id]; 79 const SwizzlePattern& swizzle = *(SwizzlePattern*)&swizzle_data[instr.common.operand_desc_id];
80 const bool negate_src1 = swizzle.negate;
74 81
75 const float24 src1[4] = { 82 float24 src1[4] = {
76 src1_[(int)swizzle.GetSelectorSrc1(0)], 83 src1_[(int)swizzle.GetSelectorSrc1(0)],
77 src1_[(int)swizzle.GetSelectorSrc1(1)], 84 src1_[(int)swizzle.GetSelectorSrc1(1)],
78 src1_[(int)swizzle.GetSelectorSrc1(2)], 85 src1_[(int)swizzle.GetSelectorSrc1(2)],
79 src1_[(int)swizzle.GetSelectorSrc1(3)], 86 src1_[(int)swizzle.GetSelectorSrc1(3)],
80 }; 87 };
88 if (negate_src1) {
89 src1[0] = src1[0] * float24::FromFloat32(-1);
90 src1[1] = src1[1] * float24::FromFloat32(-1);
91 src1[2] = src1[2] * float24::FromFloat32(-1);
92 src1[3] = src1[3] * float24::FromFloat32(-1);
93 }
81 const float24 src2[4] = { 94 const float24 src2[4] = {
82 src2_[(int)swizzle.GetSelectorSrc2(0)], 95 src2_[(int)swizzle.GetSelectorSrc2(0)],
83 src2_[(int)swizzle.GetSelectorSrc2(1)], 96 src2_[(int)swizzle.GetSelectorSrc2(1)],
@@ -88,6 +101,7 @@ static void ProcessShaderCode(VertexShaderState& state) {
88 switch (instr.opcode) { 101 switch (instr.opcode) {
89 case Instruction::OpCode::ADD: 102 case Instruction::OpCode::ADD:
90 { 103 {
104 state.debug.max_opdesc_id = std::max<u32>(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id);
91 for (int i = 0; i < 4; ++i) { 105 for (int i = 0; i < 4; ++i) {
92 if (!swizzle.DestComponentEnabled(i)) 106 if (!swizzle.DestComponentEnabled(i))
93 continue; 107 continue;
@@ -100,6 +114,7 @@ static void ProcessShaderCode(VertexShaderState& state) {
100 114
101 case Instruction::OpCode::MUL: 115 case Instruction::OpCode::MUL:
102 { 116 {
117 state.debug.max_opdesc_id = std::max<u32>(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id);
103 for (int i = 0; i < 4; ++i) { 118 for (int i = 0; i < 4; ++i) {
104 if (!swizzle.DestComponentEnabled(i)) 119 if (!swizzle.DestComponentEnabled(i))
105 continue; 120 continue;
@@ -113,6 +128,7 @@ static void ProcessShaderCode(VertexShaderState& state) {
113 case Instruction::OpCode::DP3: 128 case Instruction::OpCode::DP3:
114 case Instruction::OpCode::DP4: 129 case Instruction::OpCode::DP4:
115 { 130 {
131 state.debug.max_opdesc_id = std::max<u32>(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id);
116 float24 dot = float24::FromFloat32(0.f); 132 float24 dot = float24::FromFloat32(0.f);
117 int num_components = (instr.opcode == Instruction::OpCode::DP3) ? 3 : 4; 133 int num_components = (instr.opcode == Instruction::OpCode::DP3) ? 3 : 4;
118 for (int i = 0; i < num_components; ++i) 134 for (int i = 0; i < num_components; ++i)
@@ -130,6 +146,7 @@ static void ProcessShaderCode(VertexShaderState& state) {
130 // Reciprocal 146 // Reciprocal
131 case Instruction::OpCode::RCP: 147 case Instruction::OpCode::RCP:
132 { 148 {
149 state.debug.max_opdesc_id = std::max<u32>(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id);
133 for (int i = 0; i < 4; ++i) { 150 for (int i = 0; i < 4; ++i) {
134 if (!swizzle.DestComponentEnabled(i)) 151 if (!swizzle.DestComponentEnabled(i))
135 continue; 152 continue;
@@ -145,6 +162,7 @@ static void ProcessShaderCode(VertexShaderState& state) {
145 // Reciprocal Square Root 162 // Reciprocal Square Root
146 case Instruction::OpCode::RSQ: 163 case Instruction::OpCode::RSQ:
147 { 164 {
165 state.debug.max_opdesc_id = std::max<u32>(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id);
148 for (int i = 0; i < 4; ++i) { 166 for (int i = 0; i < 4; ++i) {
149 if (!swizzle.DestComponentEnabled(i)) 167 if (!swizzle.DestComponentEnabled(i))
150 continue; 168 continue;
@@ -159,6 +177,7 @@ static void ProcessShaderCode(VertexShaderState& state) {
159 177
160 case Instruction::OpCode::MOV: 178 case Instruction::OpCode::MOV:
161 { 179 {
180 state.debug.max_opdesc_id = std::max<u32>(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id);
162 for (int i = 0; i < 4; ++i) { 181 for (int i = 0; i < 4; ++i) {
163 if (!swizzle.DestComponentEnabled(i)) 182 if (!swizzle.DestComponentEnabled(i))
164 continue; 183 continue;
@@ -172,8 +191,9 @@ static void ProcessShaderCode(VertexShaderState& state) {
172 if (*state.call_stack_pointer == VertexShaderState::INVALID_ADDRESS) { 191 if (*state.call_stack_pointer == VertexShaderState::INVALID_ADDRESS) {
173 exit_loop = true; 192 exit_loop = true;
174 } else { 193 } else {
175 state.program_counter = &shader_memory[*state.call_stack_pointer--]; 194 // Jump back to call stack position, invalidate call stack entry, move up call stack pointer
176 *state.call_stack_pointer = VertexShaderState::INVALID_ADDRESS; 195 state.program_counter = &shader_memory[*state.call_stack_pointer];
196 *state.call_stack_pointer-- = VertexShaderState::INVALID_ADDRESS;
177 } 197 }
178 198
179 break; 199 break;
@@ -212,6 +232,8 @@ OutputVertex RunShader(const InputVertex& input, int num_attributes)
212 232
213 const u32* main = &shader_memory[registers.vs_main_offset]; 233 const u32* main = &shader_memory[registers.vs_main_offset];
214 state.program_counter = (u32*)main; 234 state.program_counter = (u32*)main;
235 state.debug.max_offset = 0;
236 state.debug.max_opdesc_id = 0;
215 237
216 // Setup input register table 238 // Setup input register table
217 const auto& attribute_register_map = registers.vs_input_register_map; 239 const auto& attribute_register_map = registers.vs_input_register_map;
@@ -255,6 +277,9 @@ OutputVertex RunShader(const InputVertex& input, int num_attributes)
255 state.call_stack_pointer = &state.call_stack[0]; 277 state.call_stack_pointer = &state.call_stack[0];
256 278
257 ProcessShaderCode(state); 279 ProcessShaderCode(state);
280 DebugUtils::DumpShader(shader_memory, state.debug.max_offset, swizzle_data,
281 state.debug.max_opdesc_id, registers.vs_main_offset,
282 registers.vs_output_attributes);
258 283
259 DEBUG_LOG(GPU, "Output vertex: pos (%.2f, %.2f, %.2f, %.2f), col(%.2f, %.2f, %.2f, %.2f), tc0(%.2f, %.2f)", 284 DEBUG_LOG(GPU, "Output vertex: pos (%.2f, %.2f, %.2f, %.2f), col(%.2f, %.2f, %.2f, %.2f), tc0(%.2f, %.2f)",
260 ret.pos.x.ToFloat32(), ret.pos.y.ToFloat32(), ret.pos.z.ToFloat32(), ret.pos.w.ToFloat32(), 285 ret.pos.x.ToFloat32(), ret.pos.y.ToFloat32(), ret.pos.z.ToFloat32(), ret.pos.w.ToFloat32(),