diff options
| author | 2014-12-20 12:40:06 -0500 | |
|---|---|---|
| committer | 2014-12-20 12:40:06 -0500 | |
| commit | acabd7be82b4174a3adab0771df6320bdbc5a85b (patch) | |
| tree | c47c2f68f50005bfb5c4003831f3977aec0be3ca /src/video_core/vertex_shader.cpp | |
| parent | Merge pull request #315 from chinhodado/master (diff) | |
| parent | Pica/VertexShader: Promote a log message to critical status. (diff) | |
| download | yuzu-acabd7be82b4174a3adab0771df6320bdbc5a85b.tar.gz yuzu-acabd7be82b4174a3adab0771df6320bdbc5a85b.tar.xz yuzu-acabd7be82b4174a3adab0771df6320bdbc5a85b.zip | |
Merge pull request #284 from neobrain/pica_progress
Pica progress: Texturing, shaders, cleanups & more
Diffstat (limited to 'src/video_core/vertex_shader.cpp')
| -rw-r--r-- | src/video_core/vertex_shader.cpp | 338 |
1 files changed, 271 insertions, 67 deletions
diff --git a/src/video_core/vertex_shader.cpp b/src/video_core/vertex_shader.cpp index 477e78cfe..4ba69fa51 100644 --- a/src/video_core/vertex_shader.cpp +++ b/src/video_core/vertex_shader.cpp | |||
| @@ -2,16 +2,25 @@ | |||
| 2 | // Licensed under GPLv2 | 2 | // Licensed under GPLv2 |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <stack> | ||
| 6 | |||
| 5 | #include <boost/range/algorithm.hpp> | 7 | #include <boost/range/algorithm.hpp> |
| 6 | 8 | ||
| 7 | #include <common/file_util.h> | 9 | #include <common/file_util.h> |
| 8 | 10 | ||
| 9 | #include <core/mem_map.h> | 11 | #include <core/mem_map.h> |
| 10 | 12 | ||
| 11 | #include "debug_utils/debug_utils.h" | 13 | #include <nihstro/shader_bytecode.h> |
| 14 | |||
| 12 | 15 | ||
| 13 | #include "pica.h" | 16 | #include "pica.h" |
| 14 | #include "vertex_shader.h" | 17 | #include "vertex_shader.h" |
| 18 | #include "debug_utils/debug_utils.h" | ||
| 19 | |||
| 20 | using nihstro::Instruction; | ||
| 21 | using nihstro::RegisterType; | ||
| 22 | using nihstro::SourceRegister; | ||
| 23 | using nihstro::SwizzlePattern; | ||
| 15 | 24 | ||
| 16 | namespace Pica { | 25 | namespace Pica { |
| 17 | 26 | ||
| @@ -19,13 +28,14 @@ namespace VertexShader { | |||
| 19 | 28 | ||
| 20 | static struct { | 29 | static struct { |
| 21 | Math::Vec4<float24> f[96]; | 30 | Math::Vec4<float24> f[96]; |
| 22 | } shader_uniforms; | ||
| 23 | 31 | ||
| 32 | std::array<bool,16> b; | ||
| 33 | } shader_uniforms; | ||
| 24 | 34 | ||
| 25 | // TODO: Not sure where the shader binary and swizzle patterns are supposed to be loaded to! | 35 | // TODO: Not sure where the shader binary and swizzle patterns are supposed to be loaded to! |
| 26 | // For now, we just keep these local arrays around. | 36 | // For now, we just keep these local arrays around. |
| 27 | static u32 shader_memory[1024]; | 37 | static std::array<u32, 1024> shader_memory; |
| 28 | static u32 swizzle_data[1024]; | 38 | static std::array<u32, 1024> swizzle_data; |
| 29 | 39 | ||
| 30 | void SubmitShaderMemoryChange(u32 addr, u32 value) | 40 | void SubmitShaderMemoryChange(u32 addr, u32 value) |
| 31 | { | 41 | { |
| @@ -42,6 +52,21 @@ Math::Vec4<float24>& GetFloatUniform(u32 index) | |||
| 42 | return shader_uniforms.f[index]; | 52 | return shader_uniforms.f[index]; |
| 43 | } | 53 | } |
| 44 | 54 | ||
| 55 | bool& GetBoolUniform(u32 index) | ||
| 56 | { | ||
| 57 | return shader_uniforms.b[index]; | ||
| 58 | } | ||
| 59 | |||
| 60 | const std::array<u32, 1024>& GetShaderBinary() | ||
| 61 | { | ||
| 62 | return shader_memory; | ||
| 63 | } | ||
| 64 | |||
| 65 | const std::array<u32, 1024>& GetSwizzlePatterns() | ||
| 66 | { | ||
| 67 | return swizzle_data; | ||
| 68 | } | ||
| 69 | |||
| 45 | struct VertexShaderState { | 70 | struct VertexShaderState { |
| 46 | u32* program_counter; | 71 | u32* program_counter; |
| 47 | 72 | ||
| @@ -49,13 +74,23 @@ struct VertexShaderState { | |||
| 49 | float24* output_register_table[7*4]; | 74 | float24* output_register_table[7*4]; |
| 50 | 75 | ||
| 51 | Math::Vec4<float24> temporary_registers[16]; | 76 | Math::Vec4<float24> temporary_registers[16]; |
| 52 | bool status_registers[2]; | 77 | bool conditional_code[2]; |
| 78 | |||
| 79 | // Two Address registers and one loop counter | ||
| 80 | // TODO: How many bits do these actually have? | ||
| 81 | s32 address_registers[3]; | ||
| 53 | 82 | ||
| 54 | enum { | 83 | enum { |
| 55 | INVALID_ADDRESS = 0xFFFFFFFF | 84 | INVALID_ADDRESS = 0xFFFFFFFF |
| 56 | }; | 85 | }; |
| 57 | u32 call_stack[8]; // TODO: What is the maximal call stack depth? | 86 | |
| 58 | u32* call_stack_pointer; | 87 | struct CallStackElement { |
| 88 | u32 final_address; | ||
| 89 | u32 return_address; | ||
| 90 | }; | ||
| 91 | |||
| 92 | // TODO: Is there a maximal size for this? | ||
| 93 | std::stack<CallStackElement> call_stack; | ||
| 59 | 94 | ||
| 60 | struct { | 95 | struct { |
| 61 | u32 max_offset; // maximum program counter ever reached | 96 | u32 max_offset; // maximum program counter ever reached |
| @@ -64,49 +99,105 @@ struct VertexShaderState { | |||
| 64 | }; | 99 | }; |
| 65 | 100 | ||
| 66 | static void ProcessShaderCode(VertexShaderState& state) { | 101 | static void ProcessShaderCode(VertexShaderState& state) { |
| 102 | |||
| 103 | // Placeholder for invalid inputs | ||
| 104 | static float24 dummy_vec4_float24[4]; | ||
| 105 | |||
| 67 | while (true) { | 106 | while (true) { |
| 68 | bool increment_pc = true; | 107 | if (!state.call_stack.empty()) { |
| 108 | if (state.program_counter - shader_memory.data() == state.call_stack.top().final_address) { | ||
| 109 | state.program_counter = &shader_memory[state.call_stack.top().return_address]; | ||
| 110 | state.call_stack.pop(); | ||
| 111 | |||
| 112 | // TODO: Is "trying again" accurate to hardware? | ||
| 113 | continue; | ||
| 114 | } | ||
| 115 | } | ||
| 116 | |||
| 69 | bool exit_loop = false; | 117 | bool exit_loop = false; |
| 70 | const Instruction& instr = *(const Instruction*)state.program_counter; | 118 | const Instruction& instr = *(const Instruction*)state.program_counter; |
| 71 | state.debug.max_offset = std::max<u32>(state.debug.max_offset, 1 + (state.program_counter - shader_memory)); | ||
| 72 | |||
| 73 | const float24* src1_ = (instr.common.src1 < 0x10) ? state.input_register_table[instr.common.src1.GetIndex()] | ||
| 74 | : (instr.common.src1 < 0x20) ? &state.temporary_registers[instr.common.src1.GetIndex()].x | ||
| 75 | : (instr.common.src1 < 0x80) ? &shader_uniforms.f[instr.common.src1.GetIndex()].x | ||
| 76 | : nullptr; | ||
| 77 | const float24* src2_ = (instr.common.src2 < 0x10) ? state.input_register_table[instr.common.src2.GetIndex()] | ||
| 78 | : &state.temporary_registers[instr.common.src2.GetIndex()].x; | ||
| 79 | float24* dest = (instr.common.dest < 0x08) ? state.output_register_table[4*instr.common.dest.GetIndex()] | ||
| 80 | : (instr.common.dest < 0x10) ? nullptr | ||
| 81 | : (instr.common.dest < 0x20) ? &state.temporary_registers[instr.common.dest.GetIndex()][0] | ||
| 82 | : nullptr; | ||
| 83 | |||
| 84 | const SwizzlePattern& swizzle = *(SwizzlePattern*)&swizzle_data[instr.common.operand_desc_id]; | 119 | const SwizzlePattern& swizzle = *(SwizzlePattern*)&swizzle_data[instr.common.operand_desc_id]; |
| 85 | const bool negate_src1 = (swizzle.negate != 0); | ||
| 86 | 120 | ||
| 87 | float24 src1[4] = { | 121 | auto call = [&](VertexShaderState& state, u32 offset, u32 num_instructions, u32 return_offset) { |
| 88 | src1_[(int)swizzle.GetSelectorSrc1(0)], | 122 | state.program_counter = &shader_memory[offset] - 1; // -1 to make sure when incrementing the PC we end up at the correct offset |
| 89 | src1_[(int)swizzle.GetSelectorSrc1(1)], | 123 | state.call_stack.push({ offset + num_instructions, return_offset }); |
| 90 | src1_[(int)swizzle.GetSelectorSrc1(2)], | ||
| 91 | src1_[(int)swizzle.GetSelectorSrc1(3)], | ||
| 92 | }; | 124 | }; |
| 93 | if (negate_src1) { | 125 | u32 binary_offset = state.program_counter - shader_memory.data(); |
| 94 | src1[0] = src1[0] * float24::FromFloat32(-1); | 126 | |
| 95 | src1[1] = src1[1] * float24::FromFloat32(-1); | 127 | state.debug.max_offset = std::max<u32>(state.debug.max_offset, 1 + binary_offset); |
| 96 | src1[2] = src1[2] * float24::FromFloat32(-1); | 128 | |
| 97 | src1[3] = src1[3] * float24::FromFloat32(-1); | 129 | auto LookupSourceRegister = [&](const SourceRegister& source_reg) -> const float24* { |
| 98 | } | 130 | switch (source_reg.GetRegisterType()) { |
| 99 | const float24 src2[4] = { | 131 | case RegisterType::Input: |
| 100 | src2_[(int)swizzle.GetSelectorSrc2(0)], | 132 | return state.input_register_table[source_reg.GetIndex()]; |
| 101 | src2_[(int)swizzle.GetSelectorSrc2(1)], | 133 | |
| 102 | src2_[(int)swizzle.GetSelectorSrc2(2)], | 134 | case RegisterType::Temporary: |
| 103 | src2_[(int)swizzle.GetSelectorSrc2(3)], | 135 | return &state.temporary_registers[source_reg.GetIndex()].x; |
| 136 | |||
| 137 | case RegisterType::FloatUniform: | ||
| 138 | return &shader_uniforms.f[source_reg.GetIndex()].x; | ||
| 139 | |||
| 140 | default: | ||
| 141 | return dummy_vec4_float24; | ||
| 142 | } | ||
| 104 | }; | 143 | }; |
| 105 | 144 | ||
| 106 | switch (instr.opcode) { | 145 | switch (instr.opcode.GetInfo().type) { |
| 146 | case Instruction::OpCodeType::Arithmetic: | ||
| 147 | { | ||
| 148 | bool is_inverted = 0 != (instr.opcode.GetInfo().subtype & Instruction::OpCodeInfo::SrcInversed); | ||
| 149 | if (is_inverted) { | ||
| 150 | // TODO: We don't really support this properly: For instance, the address register | ||
| 151 | // offset needs to be applied to SRC2 instead, etc. | ||
| 152 | // For now, we just abort in this situation. | ||
| 153 | LOG_CRITICAL(HW_GPU, "Bad condition..."); | ||
| 154 | exit(0); | ||
| 155 | } | ||
| 156 | |||
| 157 | const int address_offset = (instr.common.address_register_index == 0) | ||
| 158 | ? 0 : state.address_registers[instr.common.address_register_index - 1]; | ||
| 159 | |||
| 160 | const float24* src1_ = LookupSourceRegister(instr.common.GetSrc1(is_inverted) + address_offset); | ||
| 161 | const float24* src2_ = LookupSourceRegister(instr.common.GetSrc2(is_inverted)); | ||
| 162 | |||
| 163 | const bool negate_src1 = (swizzle.negate_src1 != false); | ||
| 164 | const bool negate_src2 = (swizzle.negate_src2 != false); | ||
| 165 | |||
| 166 | float24 src1[4] = { | ||
| 167 | src1_[(int)swizzle.GetSelectorSrc1(0)], | ||
| 168 | src1_[(int)swizzle.GetSelectorSrc1(1)], | ||
| 169 | src1_[(int)swizzle.GetSelectorSrc1(2)], | ||
| 170 | src1_[(int)swizzle.GetSelectorSrc1(3)], | ||
| 171 | }; | ||
| 172 | if (negate_src1) { | ||
| 173 | src1[0] = src1[0] * float24::FromFloat32(-1); | ||
| 174 | src1[1] = src1[1] * float24::FromFloat32(-1); | ||
| 175 | src1[2] = src1[2] * float24::FromFloat32(-1); | ||
| 176 | src1[3] = src1[3] * float24::FromFloat32(-1); | ||
| 177 | } | ||
| 178 | float24 src2[4] = { | ||
| 179 | src2_[(int)swizzle.GetSelectorSrc2(0)], | ||
| 180 | src2_[(int)swizzle.GetSelectorSrc2(1)], | ||
| 181 | src2_[(int)swizzle.GetSelectorSrc2(2)], | ||
| 182 | src2_[(int)swizzle.GetSelectorSrc2(3)], | ||
| 183 | }; | ||
| 184 | if (negate_src2) { | ||
| 185 | src2[0] = src2[0] * float24::FromFloat32(-1); | ||
| 186 | src2[1] = src2[1] * float24::FromFloat32(-1); | ||
| 187 | src2[2] = src2[2] * float24::FromFloat32(-1); | ||
| 188 | src2[3] = src2[3] * float24::FromFloat32(-1); | ||
| 189 | } | ||
| 190 | |||
| 191 | float24* dest = (instr.common.dest < 0x08) ? state.output_register_table[4*instr.common.dest.GetIndex()] | ||
| 192 | : (instr.common.dest < 0x10) ? dummy_vec4_float24 | ||
| 193 | : (instr.common.dest < 0x20) ? &state.temporary_registers[instr.common.dest.GetIndex()][0] | ||
| 194 | : dummy_vec4_float24; | ||
| 195 | |||
| 196 | state.debug.max_opdesc_id = std::max<u32>(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id); | ||
| 197 | |||
| 198 | switch (instr.opcode.EffectiveOpCode()) { | ||
| 107 | case Instruction::OpCode::ADD: | 199 | case Instruction::OpCode::ADD: |
| 108 | { | 200 | { |
| 109 | state.debug.max_opdesc_id = std::max<u32>(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id); | ||
| 110 | for (int i = 0; i < 4; ++i) { | 201 | for (int i = 0; i < 4; ++i) { |
| 111 | if (!swizzle.DestComponentEnabled(i)) | 202 | if (!swizzle.DestComponentEnabled(i)) |
| 112 | continue; | 203 | continue; |
| @@ -119,7 +210,6 @@ static void ProcessShaderCode(VertexShaderState& state) { | |||
| 119 | 210 | ||
| 120 | case Instruction::OpCode::MUL: | 211 | case Instruction::OpCode::MUL: |
| 121 | { | 212 | { |
| 122 | state.debug.max_opdesc_id = std::max<u32>(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id); | ||
| 123 | for (int i = 0; i < 4; ++i) { | 213 | for (int i = 0; i < 4; ++i) { |
| 124 | if (!swizzle.DestComponentEnabled(i)) | 214 | if (!swizzle.DestComponentEnabled(i)) |
| 125 | continue; | 215 | continue; |
| @@ -130,10 +220,18 @@ static void ProcessShaderCode(VertexShaderState& state) { | |||
| 130 | break; | 220 | break; |
| 131 | } | 221 | } |
| 132 | 222 | ||
| 223 | case Instruction::OpCode::MAX: | ||
| 224 | for (int i = 0; i < 4; ++i) { | ||
| 225 | if (!swizzle.DestComponentEnabled(i)) | ||
| 226 | continue; | ||
| 227 | |||
| 228 | dest[i] = std::max(src1[i], src2[i]); | ||
| 229 | } | ||
| 230 | break; | ||
| 231 | |||
| 133 | case Instruction::OpCode::DP3: | 232 | case Instruction::OpCode::DP3: |
| 134 | case Instruction::OpCode::DP4: | 233 | case Instruction::OpCode::DP4: |
| 135 | { | 234 | { |
| 136 | state.debug.max_opdesc_id = std::max<u32>(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id); | ||
| 137 | float24 dot = float24::FromFloat32(0.f); | 235 | float24 dot = float24::FromFloat32(0.f); |
| 138 | int num_components = (instr.opcode == Instruction::OpCode::DP3) ? 3 : 4; | 236 | int num_components = (instr.opcode == Instruction::OpCode::DP3) ? 3 : 4; |
| 139 | for (int i = 0; i < num_components; ++i) | 237 | for (int i = 0; i < num_components; ++i) |
| @@ -151,7 +249,6 @@ static void ProcessShaderCode(VertexShaderState& state) { | |||
| 151 | // Reciprocal | 249 | // Reciprocal |
| 152 | case Instruction::OpCode::RCP: | 250 | case Instruction::OpCode::RCP: |
| 153 | { | 251 | { |
| 154 | state.debug.max_opdesc_id = std::max<u32>(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id); | ||
| 155 | for (int i = 0; i < 4; ++i) { | 252 | for (int i = 0; i < 4; ++i) { |
| 156 | if (!swizzle.DestComponentEnabled(i)) | 253 | if (!swizzle.DestComponentEnabled(i)) |
| 157 | continue; | 254 | continue; |
| @@ -167,7 +264,6 @@ static void ProcessShaderCode(VertexShaderState& state) { | |||
| 167 | // Reciprocal Square Root | 264 | // Reciprocal Square Root |
| 168 | case Instruction::OpCode::RSQ: | 265 | case Instruction::OpCode::RSQ: |
| 169 | { | 266 | { |
| 170 | state.debug.max_opdesc_id = std::max<u32>(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id); | ||
| 171 | for (int i = 0; i < 4; ++i) { | 267 | for (int i = 0; i < 4; ++i) { |
| 172 | if (!swizzle.DestComponentEnabled(i)) | 268 | if (!swizzle.DestComponentEnabled(i)) |
| 173 | continue; | 269 | continue; |
| @@ -180,9 +276,21 @@ static void ProcessShaderCode(VertexShaderState& state) { | |||
| 180 | break; | 276 | break; |
| 181 | } | 277 | } |
| 182 | 278 | ||
| 279 | case Instruction::OpCode::MOVA: | ||
| 280 | { | ||
| 281 | for (int i = 0; i < 2; ++i) { | ||
| 282 | if (!swizzle.DestComponentEnabled(i)) | ||
| 283 | continue; | ||
| 284 | |||
| 285 | // TODO: Figure out how the rounding is done on hardware | ||
| 286 | state.address_registers[i] = static_cast<s32>(src1[i].ToFloat32()); | ||
| 287 | } | ||
| 288 | |||
| 289 | break; | ||
| 290 | } | ||
| 291 | |||
| 183 | case Instruction::OpCode::MOV: | 292 | case Instruction::OpCode::MOV: |
| 184 | { | 293 | { |
| 185 | state.debug.max_opdesc_id = std::max<u32>(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id); | ||
| 186 | for (int i = 0; i < 4; ++i) { | 294 | for (int i = 0; i < 4; ++i) { |
| 187 | if (!swizzle.DestComponentEnabled(i)) | 295 | if (!swizzle.DestComponentEnabled(i)) |
| 188 | continue; | 296 | continue; |
| @@ -192,39 +300,137 @@ static void ProcessShaderCode(VertexShaderState& state) { | |||
| 192 | break; | 300 | break; |
| 193 | } | 301 | } |
| 194 | 302 | ||
| 195 | case Instruction::OpCode::RET: | 303 | case Instruction::OpCode::CMP: |
| 196 | if (*state.call_stack_pointer == VertexShaderState::INVALID_ADDRESS) { | 304 | for (int i = 0; i < 2; ++i) { |
| 197 | exit_loop = true; | 305 | // TODO: Can you restrict to one compare via dest masking? |
| 198 | } else { | 306 | |
| 199 | // Jump back to call stack position, invalidate call stack entry, move up call stack pointer | 307 | auto compare_op = instr.common.compare_op; |
| 200 | state.program_counter = &shader_memory[*state.call_stack_pointer]; | 308 | auto op = (i == 0) ? compare_op.x.Value() : compare_op.y.Value(); |
| 201 | *state.call_stack_pointer-- = VertexShaderState::INVALID_ADDRESS; | 309 | |
| 310 | switch (op) { | ||
| 311 | case compare_op.Equal: | ||
| 312 | state.conditional_code[i] = (src1[i] == src2[i]); | ||
| 313 | break; | ||
| 314 | |||
| 315 | case compare_op.NotEqual: | ||
| 316 | state.conditional_code[i] = (src1[i] != src2[i]); | ||
| 317 | break; | ||
| 318 | |||
| 319 | case compare_op.LessThan: | ||
| 320 | state.conditional_code[i] = (src1[i] < src2[i]); | ||
| 321 | break; | ||
| 322 | |||
| 323 | case compare_op.LessEqual: | ||
| 324 | state.conditional_code[i] = (src1[i] <= src2[i]); | ||
| 325 | break; | ||
| 326 | |||
| 327 | case compare_op.GreaterThan: | ||
| 328 | state.conditional_code[i] = (src1[i] > src2[i]); | ||
| 329 | break; | ||
| 330 | |||
| 331 | case compare_op.GreaterEqual: | ||
| 332 | state.conditional_code[i] = (src1[i] >= src2[i]); | ||
| 333 | break; | ||
| 334 | |||
| 335 | default: | ||
| 336 | LOG_ERROR(HW_GPU, "Unknown compare mode %x", static_cast<int>(op)); | ||
| 337 | break; | ||
| 338 | } | ||
| 202 | } | 339 | } |
| 340 | break; | ||
| 203 | 341 | ||
| 342 | default: | ||
| 343 | LOG_ERROR(HW_GPU, "Unhandled arithmetic instruction: 0x%02x (%s): 0x%08x", | ||
| 344 | (int)instr.opcode.Value(), instr.opcode.GetInfo().name, instr.hex); | ||
| 345 | _dbg_assert_(HW_GPU, 0); | ||
| 346 | break; | ||
| 347 | } | ||
| 348 | |||
| 349 | break; | ||
| 350 | } | ||
| 351 | default: | ||
| 352 | // Handle each instruction on its own | ||
| 353 | switch (instr.opcode) { | ||
| 354 | case Instruction::OpCode::END: | ||
| 355 | exit_loop = true; | ||
| 204 | break; | 356 | break; |
| 205 | 357 | ||
| 206 | case Instruction::OpCode::CALL: | 358 | case Instruction::OpCode::CALL: |
| 207 | increment_pc = false; | 359 | call(state, |
| 360 | instr.flow_control.dest_offset, | ||
| 361 | instr.flow_control.num_instructions, | ||
| 362 | binary_offset + 1); | ||
| 363 | break; | ||
| 364 | |||
| 365 | case Instruction::OpCode::NOP: | ||
| 366 | break; | ||
| 208 | 367 | ||
| 209 | _dbg_assert_(HW_GPU, state.call_stack_pointer - state.call_stack < sizeof(state.call_stack)); | 368 | case Instruction::OpCode::IFU: |
| 369 | if (shader_uniforms.b[instr.flow_control.bool_uniform_id]) { | ||
| 370 | call(state, | ||
| 371 | binary_offset + 1, | ||
| 372 | instr.flow_control.dest_offset - binary_offset - 1, | ||
| 373 | instr.flow_control.dest_offset + instr.flow_control.num_instructions); | ||
| 374 | } else { | ||
| 375 | call(state, | ||
| 376 | instr.flow_control.dest_offset, | ||
| 377 | instr.flow_control.num_instructions, | ||
| 378 | instr.flow_control.dest_offset + instr.flow_control.num_instructions); | ||
| 379 | } | ||
| 210 | 380 | ||
| 211 | *++state.call_stack_pointer = state.program_counter - shader_memory; | ||
| 212 | // TODO: Does this offset refer to the beginning of shader memory? | ||
| 213 | state.program_counter = &shader_memory[instr.flow_control.offset_words]; | ||
| 214 | break; | 381 | break; |
| 215 | 382 | ||
| 216 | case Instruction::OpCode::FLS: | 383 | case Instruction::OpCode::IFC: |
| 217 | // TODO: Do whatever needs to be done here? | 384 | { |
| 385 | // TODO: Do we need to consider swizzlers here? | ||
| 386 | |||
| 387 | auto flow_control = instr.flow_control; | ||
| 388 | bool results[3] = { flow_control.refx == state.conditional_code[0], | ||
| 389 | flow_control.refy == state.conditional_code[1] }; | ||
| 390 | |||
| 391 | switch (flow_control.op) { | ||
| 392 | case flow_control.Or: | ||
| 393 | results[2] = results[0] || results[1]; | ||
| 394 | break; | ||
| 395 | |||
| 396 | case flow_control.And: | ||
| 397 | results[2] = results[0] && results[1]; | ||
| 398 | break; | ||
| 399 | |||
| 400 | case flow_control.JustX: | ||
| 401 | results[2] = results[0]; | ||
| 402 | break; | ||
| 403 | |||
| 404 | case flow_control.JustY: | ||
| 405 | results[2] = results[1]; | ||
| 406 | break; | ||
| 407 | } | ||
| 408 | |||
| 409 | if (results[2]) { | ||
| 410 | call(state, | ||
| 411 | binary_offset + 1, | ||
| 412 | instr.flow_control.dest_offset - binary_offset - 1, | ||
| 413 | instr.flow_control.dest_offset + instr.flow_control.num_instructions); | ||
| 414 | } else { | ||
| 415 | call(state, | ||
| 416 | instr.flow_control.dest_offset, | ||
| 417 | instr.flow_control.num_instructions, | ||
| 418 | instr.flow_control.dest_offset + instr.flow_control.num_instructions); | ||
| 419 | } | ||
| 420 | |||
| 218 | break; | 421 | break; |
| 422 | } | ||
| 219 | 423 | ||
| 220 | default: | 424 | default: |
| 221 | LOG_ERROR(HW_GPU, "Unhandled instruction: 0x%02x (%s): 0x%08x", | 425 | LOG_ERROR(HW_GPU, "Unhandled instruction: 0x%02x (%s): 0x%08x", |
| 222 | (int)instr.opcode.Value(), instr.GetOpCodeName().c_str(), instr.hex); | 426 | (int)instr.opcode.Value(), instr.opcode.GetInfo().name, instr.hex); |
| 223 | break; | 427 | break; |
| 428 | } | ||
| 429 | |||
| 430 | break; | ||
| 224 | } | 431 | } |
| 225 | 432 | ||
| 226 | if (increment_pc) | 433 | ++state.program_counter; |
| 227 | ++state.program_counter; | ||
| 228 | 434 | ||
| 229 | if (exit_loop) | 435 | if (exit_loop) |
| 230 | break; | 436 | break; |
| @@ -275,13 +481,11 @@ OutputVertex RunShader(const InputVertex& input, int num_attributes) | |||
| 275 | state.output_register_table[4*i+comp] = ((float24*)&ret) + semantics[comp]; | 481 | state.output_register_table[4*i+comp] = ((float24*)&ret) + semantics[comp]; |
| 276 | } | 482 | } |
| 277 | 483 | ||
| 278 | state.status_registers[0] = false; | 484 | state.conditional_code[0] = false; |
| 279 | state.status_registers[1] = false; | 485 | state.conditional_code[1] = false; |
| 280 | boost::fill(state.call_stack, VertexShaderState::INVALID_ADDRESS); | ||
| 281 | state.call_stack_pointer = &state.call_stack[0]; | ||
| 282 | 486 | ||
| 283 | ProcessShaderCode(state); | 487 | ProcessShaderCode(state); |
| 284 | DebugUtils::DumpShader(shader_memory, state.debug.max_offset, swizzle_data, | 488 | DebugUtils::DumpShader(shader_memory.data(), state.debug.max_offset, swizzle_data.data(), |
| 285 | state.debug.max_opdesc_id, registers.vs_main_offset, | 489 | state.debug.max_opdesc_id, registers.vs_main_offset, |
| 286 | registers.vs_output_attributes); | 490 | registers.vs_output_attributes); |
| 287 | 491 | ||