From 8ce1d324602001e1102648319a9281ee08a1af95 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Tue, 16 Dec 2014 00:32:49 +0100 Subject: Pica/VertexShader: Remove (now) duplicated shader bytecode definitions in favor of nihstro's ones. --- src/video_core/vertex_shader.cpp | 43 ++++++++++++++++++++++++++++------------ 1 file changed, 30 insertions(+), 13 deletions(-) (limited to 'src/video_core/vertex_shader.cpp') diff --git a/src/video_core/vertex_shader.cpp b/src/video_core/vertex_shader.cpp index 477e78cfe..064a703eb 100644 --- a/src/video_core/vertex_shader.cpp +++ b/src/video_core/vertex_shader.cpp @@ -8,11 +8,18 @@ #include +#include + #include "debug_utils/debug_utils.h" #include "pica.h" #include "vertex_shader.h" +using nihstro::Instruction; +using nihstro::RegisterType; +using nihstro::SourceRegister; +using nihstro::SwizzlePattern; + namespace Pica { namespace VertexShader { @@ -70,19 +77,28 @@ static void ProcessShaderCode(VertexShaderState& state) { const Instruction& instr = *(const Instruction*)state.program_counter; state.debug.max_offset = std::max(state.debug.max_offset, 1 + (state.program_counter - shader_memory)); - const float24* src1_ = (instr.common.src1 < 0x10) ? state.input_register_table[instr.common.src1.GetIndex()] - : (instr.common.src1 < 0x20) ? &state.temporary_registers[instr.common.src1.GetIndex()].x - : (instr.common.src1 < 0x80) ? &shader_uniforms.f[instr.common.src1.GetIndex()].x - : nullptr; - const float24* src2_ = (instr.common.src2 < 0x10) ? state.input_register_table[instr.common.src2.GetIndex()] - : &state.temporary_registers[instr.common.src2.GetIndex()].x; + auto LookupSourceRegister = [&](const SourceRegister& source_reg) -> const float24* { + switch (source_reg.GetRegisterType()) { + case RegisterType::Input: + return state.input_register_table[source_reg.GetIndex()]; + + case RegisterType::Temporary: + return &state.temporary_registers[source_reg.GetIndex()].x; + + case RegisterType::FloatUniform: + return &shader_uniforms.f[source_reg.GetIndex()].x; + } + }; + bool is_inverted = 0 != (instr.opcode.GetInfo().subtype & Instruction::OpCodeInfo::SrcInversed); + const float24* src1_ = LookupSourceRegister(instr.common.GetSrc1(is_inverted)); + const float24* src2_ = LookupSourceRegister(instr.common.GetSrc2(is_inverted)); float24* dest = (instr.common.dest < 0x08) ? state.output_register_table[4*instr.common.dest.GetIndex()] : (instr.common.dest < 0x10) ? nullptr : (instr.common.dest < 0x20) ? &state.temporary_registers[instr.common.dest.GetIndex()][0] : nullptr; const SwizzlePattern& swizzle = *(SwizzlePattern*)&swizzle_data[instr.common.operand_desc_id]; - const bool negate_src1 = (swizzle.negate != 0); + const bool negate_src1 = (swizzle.negate_src1 != 0); float24 src1[4] = { src1_[(int)swizzle.GetSelectorSrc1(0)], @@ -192,7 +208,9 @@ static void ProcessShaderCode(VertexShaderState& state) { break; } - case Instruction::OpCode::RET: + // NOP is currently used as a heuristic for leaving from a function. + // TODO: This is completely incorrect. + case Instruction::OpCode::NOP: if (*state.call_stack_pointer == VertexShaderState::INVALID_ADDRESS) { exit_loop = true; } else { @@ -209,17 +227,16 @@ static void ProcessShaderCode(VertexShaderState& state) { _dbg_assert_(HW_GPU, state.call_stack_pointer - state.call_stack < sizeof(state.call_stack)); *++state.call_stack_pointer = state.program_counter - shader_memory; - // TODO: Does this offset refer to the beginning of shader memory? - state.program_counter = &shader_memory[instr.flow_control.offset_words]; + state.program_counter = &shader_memory[instr.flow_control.dest_offset]; break; - case Instruction::OpCode::FLS: - // TODO: Do whatever needs to be done here? + case Instruction::OpCode::END: + // TODO break; default: LOG_ERROR(HW_GPU, "Unhandled instruction: 0x%02x (%s): 0x%08x", - (int)instr.opcode.Value(), instr.GetOpCodeName().c_str(), instr.hex); + (int)instr.opcode.Value(), instr.opcode.GetInfo().name, instr.hex); break; } -- cgit v1.2.3 From ce36ad454ecd4707a77916fdb79954c8924b50ee Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Fri, 12 Dec 2014 17:55:43 +0100 Subject: Pica/VertexShader: Support negating src2. --- src/video_core/vertex_shader.cpp | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'src/video_core/vertex_shader.cpp') diff --git a/src/video_core/vertex_shader.cpp b/src/video_core/vertex_shader.cpp index 064a703eb..c5c5261fe 100644 --- a/src/video_core/vertex_shader.cpp +++ b/src/video_core/vertex_shader.cpp @@ -10,10 +10,10 @@ #include -#include "debug_utils/debug_utils.h" #include "pica.h" #include "vertex_shader.h" +#include "debug_utils/debug_utils.h" using nihstro::Instruction; using nihstro::RegisterType; @@ -99,6 +99,7 @@ static void ProcessShaderCode(VertexShaderState& state) { const SwizzlePattern& swizzle = *(SwizzlePattern*)&swizzle_data[instr.common.operand_desc_id]; const bool negate_src1 = (swizzle.negate_src1 != 0); + const bool negate_src2 = (swizzle.negate_src2 != 0); float24 src1[4] = { src1_[(int)swizzle.GetSelectorSrc1(0)], @@ -112,12 +113,18 @@ static void ProcessShaderCode(VertexShaderState& state) { src1[2] = src1[2] * float24::FromFloat32(-1); src1[3] = src1[3] * float24::FromFloat32(-1); } - const float24 src2[4] = { + float24 src2[4] = { src2_[(int)swizzle.GetSelectorSrc2(0)], src2_[(int)swizzle.GetSelectorSrc2(1)], src2_[(int)swizzle.GetSelectorSrc2(2)], src2_[(int)swizzle.GetSelectorSrc2(3)], }; + if (negate_src2) { + src2[0] = src2[0] * float24::FromFloat32(-1); + src2[1] = src2[1] * float24::FromFloat32(-1); + src2[2] = src2[2] * float24::FromFloat32(-1); + src2[3] = src2[3] * float24::FromFloat32(-1); + } switch (instr.opcode) { case Instruction::OpCode::ADD: -- cgit v1.2.3 From b85524c760989f3d053d05df6b244b28252b2f4e Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Tue, 16 Dec 2014 01:20:29 +0100 Subject: Pica/VertexShader: Some cleanups using std::array. --- src/video_core/vertex_shader.cpp | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) (limited to 'src/video_core/vertex_shader.cpp') diff --git a/src/video_core/vertex_shader.cpp b/src/video_core/vertex_shader.cpp index c5c5261fe..c98c625c2 100644 --- a/src/video_core/vertex_shader.cpp +++ b/src/video_core/vertex_shader.cpp @@ -31,8 +31,8 @@ static struct { // TODO: Not sure where the shader binary and swizzle patterns are supposed to be loaded to! // For now, we just keep these local arrays around. -static u32 shader_memory[1024]; -static u32 swizzle_data[1024]; +static std::array shader_memory; +static std::array swizzle_data; void SubmitShaderMemoryChange(u32 addr, u32 value) { @@ -49,6 +49,17 @@ Math::Vec4& GetFloatUniform(u32 index) return shader_uniforms.f[index]; } +const std::array& GetShaderBinary() +{ + return shader_memory; +} + +const std::array& GetSwizzlePatterns() +{ + return swizzle_data; +} + + struct VertexShaderState { u32* program_counter; @@ -75,7 +86,7 @@ static void ProcessShaderCode(VertexShaderState& state) { bool increment_pc = true; bool exit_loop = false; const Instruction& instr = *(const Instruction*)state.program_counter; - state.debug.max_offset = std::max(state.debug.max_offset, 1 + (state.program_counter - shader_memory)); + state.debug.max_offset = std::max(state.debug.max_offset, 1 + (state.program_counter - shader_memory.data())); auto LookupSourceRegister = [&](const SourceRegister& source_reg) -> const float24* { switch (source_reg.GetRegisterType()) { @@ -233,7 +244,7 @@ static void ProcessShaderCode(VertexShaderState& state) { _dbg_assert_(HW_GPU, state.call_stack_pointer - state.call_stack < sizeof(state.call_stack)); - *++state.call_stack_pointer = state.program_counter - shader_memory; + *++state.call_stack_pointer = state.program_counter - shader_memory.data(); state.program_counter = &shader_memory[instr.flow_control.dest_offset]; break; @@ -305,7 +316,7 @@ OutputVertex RunShader(const InputVertex& input, int num_attributes) state.call_stack_pointer = &state.call_stack[0]; ProcessShaderCode(state); - DebugUtils::DumpShader(shader_memory, state.debug.max_offset, swizzle_data, + DebugUtils::DumpShader(shader_memory.data(), state.debug.max_offset, swizzle_data.data(), state.debug.max_opdesc_id, registers.vs_main_offset, registers.vs_output_attributes); -- cgit v1.2.3 From cb1804e0aba48826d671afb0500ae5eaeebd5c5a Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Fri, 12 Dec 2014 18:31:37 +0100 Subject: Pica/VertexShader: Move code around a bit. --- src/video_core/vertex_shader.cpp | 100 +++++++++++++++++++++++---------------- 1 file changed, 58 insertions(+), 42 deletions(-) (limited to 'src/video_core/vertex_shader.cpp') diff --git a/src/video_core/vertex_shader.cpp b/src/video_core/vertex_shader.cpp index c98c625c2..33a862b74 100644 --- a/src/video_core/vertex_shader.cpp +++ b/src/video_core/vertex_shader.cpp @@ -86,6 +86,8 @@ static void ProcessShaderCode(VertexShaderState& state) { bool increment_pc = true; bool exit_loop = false; const Instruction& instr = *(const Instruction*)state.program_counter; + const SwizzlePattern& swizzle = *(SwizzlePattern*)&swizzle_data[instr.common.operand_desc_id]; + state.debug.max_offset = std::max(state.debug.max_offset, 1 + (state.program_counter - shader_memory.data())); auto LookupSourceRegister = [&](const SourceRegister& source_reg) -> const float24* { @@ -100,47 +102,52 @@ static void ProcessShaderCode(VertexShaderState& state) { return &shader_uniforms.f[source_reg.GetIndex()].x; } }; - bool is_inverted = 0 != (instr.opcode.GetInfo().subtype & Instruction::OpCodeInfo::SrcInversed); - const float24* src1_ = LookupSourceRegister(instr.common.GetSrc1(is_inverted)); - const float24* src2_ = LookupSourceRegister(instr.common.GetSrc2(is_inverted)); - float24* dest = (instr.common.dest < 0x08) ? state.output_register_table[4*instr.common.dest.GetIndex()] - : (instr.common.dest < 0x10) ? nullptr - : (instr.common.dest < 0x20) ? &state.temporary_registers[instr.common.dest.GetIndex()][0] - : nullptr; - const SwizzlePattern& swizzle = *(SwizzlePattern*)&swizzle_data[instr.common.operand_desc_id]; - const bool negate_src1 = (swizzle.negate_src1 != 0); - const bool negate_src2 = (swizzle.negate_src2 != 0); - - float24 src1[4] = { - src1_[(int)swizzle.GetSelectorSrc1(0)], - src1_[(int)swizzle.GetSelectorSrc1(1)], - src1_[(int)swizzle.GetSelectorSrc1(2)], - src1_[(int)swizzle.GetSelectorSrc1(3)], - }; - if (negate_src1) { - src1[0] = src1[0] * float24::FromFloat32(-1); - src1[1] = src1[1] * float24::FromFloat32(-1); - src1[2] = src1[2] * float24::FromFloat32(-1); - src1[3] = src1[3] * float24::FromFloat32(-1); - } - float24 src2[4] = { - src2_[(int)swizzle.GetSelectorSrc2(0)], - src2_[(int)swizzle.GetSelectorSrc2(1)], - src2_[(int)swizzle.GetSelectorSrc2(2)], - src2_[(int)swizzle.GetSelectorSrc2(3)], - }; - if (negate_src2) { - src2[0] = src2[0] * float24::FromFloat32(-1); - src2[1] = src2[1] * float24::FromFloat32(-1); - src2[2] = src2[2] * float24::FromFloat32(-1); - src2[3] = src2[3] * float24::FromFloat32(-1); - } + switch (instr.opcode.GetInfo().type) { + case Instruction::OpCodeType::Arithmetic: + { + bool is_inverted = 0 != (instr.opcode.GetInfo().subtype & Instruction::OpCodeInfo::SrcInversed); + const float24* src1_ = LookupSourceRegister(instr.common.GetSrc1(is_inverted)); + const float24* src2_ = LookupSourceRegister(instr.common.GetSrc2(is_inverted)); + + const bool negate_src1 = (swizzle.negate_src1 != 0); + const bool negate_src2 = (swizzle.negate_src2 != 0); + + float24 src1[4] = { + src1_[(int)swizzle.GetSelectorSrc1(0)], + src1_[(int)swizzle.GetSelectorSrc1(1)], + src1_[(int)swizzle.GetSelectorSrc1(2)], + src1_[(int)swizzle.GetSelectorSrc1(3)], + }; + if (negate_src1) { + src1[0] = src1[0] * float24::FromFloat32(-1); + src1[1] = src1[1] * float24::FromFloat32(-1); + src1[2] = src1[2] * float24::FromFloat32(-1); + src1[3] = src1[3] * float24::FromFloat32(-1); + } + float24 src2[4] = { + src2_[(int)swizzle.GetSelectorSrc2(0)], + src2_[(int)swizzle.GetSelectorSrc2(1)], + src2_[(int)swizzle.GetSelectorSrc2(2)], + src2_[(int)swizzle.GetSelectorSrc2(3)], + }; + if (negate_src2) { + src2[0] = src2[0] * float24::FromFloat32(-1); + src2[1] = src2[1] * float24::FromFloat32(-1); + src2[2] = src2[2] * float24::FromFloat32(-1); + src2[3] = src2[3] * float24::FromFloat32(-1); + } + + float24* dest = (instr.common.dest < 0x08) ? state.output_register_table[4*instr.common.dest.GetIndex()] + : (instr.common.dest < 0x10) ? nullptr + : (instr.common.dest < 0x20) ? &state.temporary_registers[instr.common.dest.GetIndex()][0] + : nullptr; + + state.debug.max_opdesc_id = std::max(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id); - switch (instr.opcode) { + switch (instr.opcode) { case Instruction::OpCode::ADD: { - state.debug.max_opdesc_id = std::max(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id); for (int i = 0; i < 4; ++i) { if (!swizzle.DestComponentEnabled(i)) continue; @@ -153,7 +160,6 @@ static void ProcessShaderCode(VertexShaderState& state) { case Instruction::OpCode::MUL: { - state.debug.max_opdesc_id = std::max(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id); for (int i = 0; i < 4; ++i) { if (!swizzle.DestComponentEnabled(i)) continue; @@ -167,7 +173,6 @@ static void ProcessShaderCode(VertexShaderState& state) { case Instruction::OpCode::DP3: case Instruction::OpCode::DP4: { - state.debug.max_opdesc_id = std::max(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id); float24 dot = float24::FromFloat32(0.f); int num_components = (instr.opcode == Instruction::OpCode::DP3) ? 3 : 4; for (int i = 0; i < num_components; ++i) @@ -185,7 +190,6 @@ static void ProcessShaderCode(VertexShaderState& state) { // Reciprocal case Instruction::OpCode::RCP: { - state.debug.max_opdesc_id = std::max(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id); for (int i = 0; i < 4; ++i) { if (!swizzle.DestComponentEnabled(i)) continue; @@ -201,7 +205,6 @@ static void ProcessShaderCode(VertexShaderState& state) { // Reciprocal Square Root case Instruction::OpCode::RSQ: { - state.debug.max_opdesc_id = std::max(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id); for (int i = 0; i < 4; ++i) { if (!swizzle.DestComponentEnabled(i)) continue; @@ -216,7 +219,6 @@ static void ProcessShaderCode(VertexShaderState& state) { case Instruction::OpCode::MOV: { - state.debug.max_opdesc_id = std::max(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id); for (int i = 0; i < 4; ++i) { if (!swizzle.DestComponentEnabled(i)) continue; @@ -226,6 +228,17 @@ static void ProcessShaderCode(VertexShaderState& state) { break; } + default: + LOG_ERROR(HW_GPU, "Unhandled arithmetic instruction: 0x%02x (%s): 0x%08x", + (int)instr.opcode.Value(), instr.opcode.GetInfo().name, instr.hex); + break; + } + + break; + } + default: + // Process instruction explicitly + switch (instr.opcode) { // NOP is currently used as a heuristic for leaving from a function. // TODO: This is completely incorrect. case Instruction::OpCode::NOP: @@ -256,6 +269,9 @@ static void ProcessShaderCode(VertexShaderState& state) { LOG_ERROR(HW_GPU, "Unhandled instruction: 0x%02x (%s): 0x%08x", (int)instr.opcode.Value(), instr.opcode.GetInfo().name, instr.hex); break; + } + + break; } if (increment_pc) -- cgit v1.2.3 From 67618a2c55e0b6860bbb083962cdd28a543bf82a Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Fri, 12 Dec 2014 22:50:09 +0100 Subject: Pica/VertexShader: Add support for MOVA, CMP and IFC. --- src/video_core/vertex_shader.cpp | 137 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 130 insertions(+), 7 deletions(-) (limited to 'src/video_core/vertex_shader.cpp') diff --git a/src/video_core/vertex_shader.cpp b/src/video_core/vertex_shader.cpp index 33a862b74..5d9203c86 100644 --- a/src/video_core/vertex_shader.cpp +++ b/src/video_core/vertex_shader.cpp @@ -59,6 +59,8 @@ const std::array& GetSwizzlePatterns() return swizzle_data; } +// TODO: Is there actually a limit on hardware? +const int if_stack_size = 8; struct VertexShaderState { u32* program_counter; @@ -67,7 +69,11 @@ struct VertexShaderState { float24* output_register_table[7*4]; Math::Vec4 temporary_registers[16]; - bool status_registers[2]; + bool conditional_code[2]; + + // Two Address registers and one loop counter + // TODO: How many bits do these actually have? + s32 address_registers[3]; enum { INVALID_ADDRESS = 0xFFFFFFFF @@ -75,6 +81,12 @@ struct VertexShaderState { u32 call_stack[8]; // TODO: What is the maximal call stack depth? u32* call_stack_pointer; + struct IfStackElement { + u32 else_addr; + u32 else_instructions; + } if_stack[if_stack_size]; + IfStackElement* if_stack_pointer; + struct { u32 max_offset; // maximum program counter ever reached u32 max_opdesc_id; // maximum swizzle pattern index ever used @@ -107,11 +119,20 @@ static void ProcessShaderCode(VertexShaderState& state) { case Instruction::OpCodeType::Arithmetic: { bool is_inverted = 0 != (instr.opcode.GetInfo().subtype & Instruction::OpCodeInfo::SrcInversed); - const float24* src1_ = LookupSourceRegister(instr.common.GetSrc1(is_inverted)); + if (is_inverted) { + // We don't really support this properly and/or reliably + LOG_ERROR(HW_GPU, "Bad condition..."); + exit(0); + } + + const int address_offset = (instr.common.address_register_index == 0) + ? 0 : state.address_registers[instr.common.address_register_index - 1]; + + const float24* src1_ = LookupSourceRegister(instr.common.GetSrc1(is_inverted) + address_offset); const float24* src2_ = LookupSourceRegister(instr.common.GetSrc2(is_inverted)); - const bool negate_src1 = (swizzle.negate_src1 != 0); - const bool negate_src2 = (swizzle.negate_src2 != 0); + const bool negate_src1 = (swizzle.negate_src1 != false); + const bool negate_src2 = (swizzle.negate_src2 != false); float24 src1[4] = { src1_[(int)swizzle.GetSelectorSrc1(0)], @@ -217,6 +238,19 @@ static void ProcessShaderCode(VertexShaderState& state) { break; } + case Instruction::OpCode::MOVA: + { + for (int i = 0; i < 2; ++i) { + if (!swizzle.DestComponentEnabled(i)) + continue; + + // TODO: Figure out how the rounding is done on hardware + state.address_registers[i] = static_cast(src1[i].ToFloat32()); + } + + break; + } + case Instruction::OpCode::MOV: { for (int i = 0; i < 4; ++i) { @@ -228,16 +262,56 @@ static void ProcessShaderCode(VertexShaderState& state) { break; } + case Instruction::OpCode::CMP: + for (int i = 0; i < 2; ++i) { + // TODO: Can you restrict to one compare via dest masking? + + auto compare_op = instr.common.compare_op; + auto op = (i == 0) ? compare_op.x.Value() : compare_op.y.Value(); + + switch (op) { + case compare_op.Equal: + state.conditional_code[i] = (src1[i] == src2[i]); + break; + + case compare_op.NotEqual: + state.conditional_code[i] = (src1[i] != src2[i]); + break; + + case compare_op.LessThan: + state.conditional_code[i] = (src1[i] < src2[i]); + break; + + case compare_op.LessEqual: + state.conditional_code[i] = (src1[i] <= src2[i]); + break; + + case compare_op.GreaterThan: + state.conditional_code[i] = (src1[i] > src2[i]); + break; + + case compare_op.GreaterEqual: + state.conditional_code[i] = (src1[i] >= src2[i]); + break; + + default: + LOG_ERROR(HW_GPU, "Unknown compare mode %x", static_cast(op)); + break; + } + } + break; + default: LOG_ERROR(HW_GPU, "Unhandled arithmetic instruction: 0x%02x (%s): 0x%08x", (int)instr.opcode.Value(), instr.opcode.GetInfo().name, instr.hex); + _dbg_assert_(HW_GPU, 0); break; } break; } default: - // Process instruction explicitly + // Handle each instruction on its own switch (instr.opcode) { // NOP is currently used as a heuristic for leaving from a function. // TODO: This is completely incorrect. @@ -265,6 +339,44 @@ static void ProcessShaderCode(VertexShaderState& state) { // TODO break; + case Instruction::OpCode::IFC: + { + // TODO: Do we need to consider swizzlers here? + + auto flow_control = instr.flow_control; + bool results[3] = { flow_control.refx == state.conditional_code[0], + flow_control.refy == state.conditional_code[1] }; + + switch (flow_control.op) { + case flow_control.Or: + results[2] = results[0] || results[1]; + break; + + case flow_control.And: + results[2] = results[0] && results[1]; + break; + + case flow_control.JustX: + results[2] = results[0]; + break; + + case flow_control.JustY: + results[2] = results[1]; + break; + } + + if (results[2]) { + ++state.if_stack_pointer; + + state.if_stack_pointer->else_addr = instr.flow_control.dest_offset; + state.if_stack_pointer->else_instructions = instr.flow_control.num_instructions; + } else { + state.program_counter = &shader_memory[instr.flow_control.dest_offset] - 1; + } + + break; + } + default: LOG_ERROR(HW_GPU, "Unhandled instruction: 0x%02x (%s): 0x%08x", (int)instr.opcode.Value(), instr.opcode.GetInfo().name, instr.hex); @@ -277,6 +389,13 @@ static void ProcessShaderCode(VertexShaderState& state) { if (increment_pc) ++state.program_counter; + if (state.if_stack_pointer >= &state.if_stack[0]) { + if (state.program_counter - shader_memory.data() == state.if_stack_pointer->else_addr) { + state.program_counter += state.if_stack_pointer->else_instructions; + state.if_stack_pointer--; + } + } + if (exit_loop) break; } @@ -326,11 +445,15 @@ OutputVertex RunShader(const InputVertex& input, int num_attributes) state.output_register_table[4*i+comp] = ((float24*)&ret) + semantics[comp]; } - state.status_registers[0] = false; - state.status_registers[1] = false; + state.conditional_code[0] = false; + state.conditional_code[1] = false; boost::fill(state.call_stack, VertexShaderState::INVALID_ADDRESS); state.call_stack_pointer = &state.call_stack[0]; + std::fill(state.if_stack, state.if_stack + sizeof(state.if_stack) / sizeof(state.if_stack[0]), + VertexShaderState::IfStackElement{VertexShaderState::INVALID_ADDRESS, VertexShaderState::INVALID_ADDRESS}); + state.if_stack_pointer = state.if_stack - 1; // Meh. TODO: Make this less ugly + ProcessShaderCode(state); DebugUtils::DumpShader(shader_memory.data(), state.debug.max_offset, swizzle_data.data(), state.debug.max_opdesc_id, registers.vs_main_offset, -- cgit v1.2.3 From aff808b2fdfd9605179a13eb55b72d68a7cdd8c2 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Sat, 13 Dec 2014 21:20:47 +0100 Subject: Pica: Add support for boolean uniforms. --- src/video_core/vertex_shader.cpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'src/video_core/vertex_shader.cpp') diff --git a/src/video_core/vertex_shader.cpp b/src/video_core/vertex_shader.cpp index 5d9203c86..fbec1bcc8 100644 --- a/src/video_core/vertex_shader.cpp +++ b/src/video_core/vertex_shader.cpp @@ -26,8 +26,9 @@ namespace VertexShader { static struct { Math::Vec4 f[96]; -} shader_uniforms; + std::array b; +} shader_uniforms; // TODO: Not sure where the shader binary and swizzle patterns are supposed to be loaded to! // For now, we just keep these local arrays around. @@ -49,6 +50,11 @@ Math::Vec4& GetFloatUniform(u32 index) return shader_uniforms.f[index]; } +bool& GetBoolUniform(u32 index) +{ + return shader_uniforms.b[index]; +} + const std::array& GetShaderBinary() { return shader_memory; -- cgit v1.2.3 From cd163fb59ae2922d33aa931f51ef5d116c0adc3f Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Sat, 13 Dec 2014 21:22:55 +0100 Subject: Pica/VertexShader: Implement MAX instructions. --- src/video_core/vertex_shader.cpp | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'src/video_core/vertex_shader.cpp') diff --git a/src/video_core/vertex_shader.cpp b/src/video_core/vertex_shader.cpp index fbec1bcc8..742e5a9f2 100644 --- a/src/video_core/vertex_shader.cpp +++ b/src/video_core/vertex_shader.cpp @@ -197,6 +197,15 @@ static void ProcessShaderCode(VertexShaderState& state) { break; } + case Instruction::OpCode::MAX: + for (int i = 0; i < 4; ++i) { + if (!swizzle.DestComponentEnabled(i)) + continue; + + dest[i] = std::max(src1[i], src2[i]); + } + break; + case Instruction::OpCode::DP3: case Instruction::OpCode::DP4: { -- cgit v1.2.3 From 22afb9d8309f56494d95f6132561a413b8e7895c Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Sat, 13 Dec 2014 21:23:41 +0100 Subject: Pica/VertexShader: Run instruction handlers according to the effective opcode. This allows for proper emulation of the different CMP/LRP/MAD instructions. --- src/video_core/vertex_shader.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/video_core/vertex_shader.cpp') diff --git a/src/video_core/vertex_shader.cpp b/src/video_core/vertex_shader.cpp index 742e5a9f2..dd406f9ca 100644 --- a/src/video_core/vertex_shader.cpp +++ b/src/video_core/vertex_shader.cpp @@ -172,7 +172,7 @@ static void ProcessShaderCode(VertexShaderState& state) { state.debug.max_opdesc_id = std::max(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id); - switch (instr.opcode) { + switch (instr.opcode.EffectiveOpCode()) { case Instruction::OpCode::ADD: { for (int i = 0; i < 4; ++i) { -- cgit v1.2.3 From 6bd41de276a97fee1d4f07789a33ff49d494a20d Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Sat, 13 Dec 2014 21:30:13 +0100 Subject: Pica/VertexShader: Cleanup flow control logic and implement CMP/IFU instructions. --- src/video_core/vertex_shader.cpp | 106 +++++++++++++++++++++------------------ 1 file changed, 56 insertions(+), 50 deletions(-) (limited to 'src/video_core/vertex_shader.cpp') diff --git a/src/video_core/vertex_shader.cpp b/src/video_core/vertex_shader.cpp index dd406f9ca..af9332975 100644 --- a/src/video_core/vertex_shader.cpp +++ b/src/video_core/vertex_shader.cpp @@ -2,6 +2,8 @@ // Licensed under GPLv2 // Refer to the license.txt file included. +#include + #include #include @@ -65,9 +67,6 @@ const std::array& GetSwizzlePatterns() return swizzle_data; } -// TODO: Is there actually a limit on hardware? -const int if_stack_size = 8; - struct VertexShaderState { u32* program_counter; @@ -84,14 +83,14 @@ struct VertexShaderState { enum { INVALID_ADDRESS = 0xFFFFFFFF }; - u32 call_stack[8]; // TODO: What is the maximal call stack depth? - u32* call_stack_pointer; - struct IfStackElement { - u32 else_addr; - u32 else_instructions; - } if_stack[if_stack_size]; - IfStackElement* if_stack_pointer; + struct CallStackElement { + u32 final_address; + u32 return_address; + }; + + // TODO: Is there a maximal size for this? + std::stack call_stack; struct { u32 max_offset; // maximum program counter ever reached @@ -101,12 +100,27 @@ struct VertexShaderState { static void ProcessShaderCode(VertexShaderState& state) { while (true) { - bool increment_pc = true; + if (!state.call_stack.empty()) { + if (state.program_counter - shader_memory.data() == state.call_stack.top().final_address) { + state.program_counter = &shader_memory[state.call_stack.top().return_address]; + state.call_stack.pop(); + + // TODO: Is "trying again" accurate to hardware? + continue; + } + } + bool exit_loop = false; const Instruction& instr = *(const Instruction*)state.program_counter; const SwizzlePattern& swizzle = *(SwizzlePattern*)&swizzle_data[instr.common.operand_desc_id]; - state.debug.max_offset = std::max(state.debug.max_offset, 1 + (state.program_counter - shader_memory.data())); + auto call = [&](std::stack& stack, u32 offset, u32 num_instructions, u32 return_offset) { + state.program_counter = &shader_memory[offset] - 1; // -1 to make sure when incrementing the PC we end up at the correct offset + stack.push({ offset + num_instructions, return_offset }); + }; + u32 binary_offset = state.program_counter - shader_memory.data(); + + state.debug.max_offset = std::max(state.debug.max_offset, 1 + binary_offset); auto LookupSourceRegister = [&](const SourceRegister& source_reg) -> const float24* { switch (source_reg.GetRegisterType()) { @@ -328,30 +342,33 @@ static void ProcessShaderCode(VertexShaderState& state) { default: // Handle each instruction on its own switch (instr.opcode) { - // NOP is currently used as a heuristic for leaving from a function. - // TODO: This is completely incorrect. - case Instruction::OpCode::NOP: - if (*state.call_stack_pointer == VertexShaderState::INVALID_ADDRESS) { - exit_loop = true; - } else { - // Jump back to call stack position, invalidate call stack entry, move up call stack pointer - state.program_counter = &shader_memory[*state.call_stack_pointer]; - *state.call_stack_pointer-- = VertexShaderState::INVALID_ADDRESS; - } - + case Instruction::OpCode::END: + exit_loop = true; break; case Instruction::OpCode::CALL: - increment_pc = false; - - _dbg_assert_(HW_GPU, state.call_stack_pointer - state.call_stack < sizeof(state.call_stack)); + call(state.call_stack, + instr.flow_control.dest_offset, + instr.flow_control.num_instructions, + binary_offset + 1); + break; - *++state.call_stack_pointer = state.program_counter - shader_memory.data(); - state.program_counter = &shader_memory[instr.flow_control.dest_offset]; + case Instruction::OpCode::NOP: break; - case Instruction::OpCode::END: - // TODO + case Instruction::OpCode::IFU: + if (shader_uniforms.b[instr.flow_control.bool_uniform_id]) { + call(state.call_stack, + binary_offset + 1, + instr.flow_control.dest_offset - binary_offset - 1, + instr.flow_control.dest_offset + instr.flow_control.num_instructions); + } else { + call(state.call_stack, + instr.flow_control.dest_offset, + instr.flow_control.num_instructions, + instr.flow_control.dest_offset + instr.flow_control.num_instructions); + } + break; case Instruction::OpCode::IFC: @@ -381,12 +398,15 @@ static void ProcessShaderCode(VertexShaderState& state) { } if (results[2]) { - ++state.if_stack_pointer; - - state.if_stack_pointer->else_addr = instr.flow_control.dest_offset; - state.if_stack_pointer->else_instructions = instr.flow_control.num_instructions; + call(state.call_stack, + binary_offset + 1, + instr.flow_control.dest_offset - binary_offset - 1, + instr.flow_control.dest_offset + instr.flow_control.num_instructions); } else { - state.program_counter = &shader_memory[instr.flow_control.dest_offset] - 1; + call(state.call_stack, + instr.flow_control.dest_offset, + instr.flow_control.num_instructions, + instr.flow_control.dest_offset + instr.flow_control.num_instructions); } break; @@ -401,15 +421,7 @@ static void ProcessShaderCode(VertexShaderState& state) { break; } - if (increment_pc) - ++state.program_counter; - - if (state.if_stack_pointer >= &state.if_stack[0]) { - if (state.program_counter - shader_memory.data() == state.if_stack_pointer->else_addr) { - state.program_counter += state.if_stack_pointer->else_instructions; - state.if_stack_pointer--; - } - } + ++state.program_counter; if (exit_loop) break; @@ -462,12 +474,6 @@ OutputVertex RunShader(const InputVertex& input, int num_attributes) state.conditional_code[0] = false; state.conditional_code[1] = false; - boost::fill(state.call_stack, VertexShaderState::INVALID_ADDRESS); - state.call_stack_pointer = &state.call_stack[0]; - - std::fill(state.if_stack, state.if_stack + sizeof(state.if_stack) / sizeof(state.if_stack[0]), - VertexShaderState::IfStackElement{VertexShaderState::INVALID_ADDRESS, VertexShaderState::INVALID_ADDRESS}); - state.if_stack_pointer = state.if_stack - 1; // Meh. TODO: Make this less ugly ProcessShaderCode(state); DebugUtils::DumpShader(shader_memory.data(), state.debug.max_offset, swizzle_data.data(), -- cgit v1.2.3 From ad5db467d7e9a598e7f8e998066bc5ffe99f1436 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Fri, 19 Dec 2014 19:49:17 +0100 Subject: Pica/VertexShader: Clarify a comment. --- src/video_core/vertex_shader.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'src/video_core/vertex_shader.cpp') diff --git a/src/video_core/vertex_shader.cpp b/src/video_core/vertex_shader.cpp index af9332975..5ca30ba53 100644 --- a/src/video_core/vertex_shader.cpp +++ b/src/video_core/vertex_shader.cpp @@ -140,7 +140,9 @@ static void ProcessShaderCode(VertexShaderState& state) { { bool is_inverted = 0 != (instr.opcode.GetInfo().subtype & Instruction::OpCodeInfo::SrcInversed); if (is_inverted) { - // We don't really support this properly and/or reliably + // TODO: We don't really support this properly: For instance, the address register + // offset needs to be applied to SRC2 instead, etc. + // For now, we just abort in this situation. LOG_ERROR(HW_GPU, "Bad condition..."); exit(0); } -- cgit v1.2.3 From a664574ecbddb643dd12fb9815f4c4526f59f9ff Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Fri, 19 Dec 2014 19:58:21 +0100 Subject: Pica/VertexShader: Be robust against invalid inputs. More specifically, this also fixes crashes by Citra trying to load a src2 register even if the current instruction does not use that. --- src/video_core/vertex_shader.cpp | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'src/video_core/vertex_shader.cpp') diff --git a/src/video_core/vertex_shader.cpp b/src/video_core/vertex_shader.cpp index 5ca30ba53..345f3c3fe 100644 --- a/src/video_core/vertex_shader.cpp +++ b/src/video_core/vertex_shader.cpp @@ -99,6 +99,10 @@ struct VertexShaderState { }; static void ProcessShaderCode(VertexShaderState& state) { + + // Placeholder for invalid inputs + static float24 dummy_vec4_float24[4]; + while (true) { if (!state.call_stack.empty()) { if (state.program_counter - shader_memory.data() == state.call_stack.top().final_address) { @@ -132,6 +136,9 @@ static void ProcessShaderCode(VertexShaderState& state) { case RegisterType::FloatUniform: return &shader_uniforms.f[source_reg.GetIndex()].x; + + default: + return dummy_vec4_float24; } }; @@ -182,9 +189,9 @@ static void ProcessShaderCode(VertexShaderState& state) { } float24* dest = (instr.common.dest < 0x08) ? state.output_register_table[4*instr.common.dest.GetIndex()] - : (instr.common.dest < 0x10) ? nullptr + : (instr.common.dest < 0x10) ? dummy_vec4_float24 : (instr.common.dest < 0x20) ? &state.temporary_registers[instr.common.dest.GetIndex()][0] - : nullptr; + : dummy_vec4_float24; state.debug.max_opdesc_id = std::max(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id); -- cgit v1.2.3 From 17f31de364df294337963cabad106a5f0a9d302b Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Sat, 20 Dec 2014 15:19:36 +0100 Subject: Pica/VertexShader: Small optimization. --- src/video_core/vertex_shader.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'src/video_core/vertex_shader.cpp') diff --git a/src/video_core/vertex_shader.cpp b/src/video_core/vertex_shader.cpp index 345f3c3fe..de963f5e9 100644 --- a/src/video_core/vertex_shader.cpp +++ b/src/video_core/vertex_shader.cpp @@ -118,9 +118,9 @@ static void ProcessShaderCode(VertexShaderState& state) { const Instruction& instr = *(const Instruction*)state.program_counter; const SwizzlePattern& swizzle = *(SwizzlePattern*)&swizzle_data[instr.common.operand_desc_id]; - auto call = [&](std::stack& stack, u32 offset, u32 num_instructions, u32 return_offset) { + auto call = [&](VertexShaderState& state, u32 offset, u32 num_instructions, u32 return_offset) { state.program_counter = &shader_memory[offset] - 1; // -1 to make sure when incrementing the PC we end up at the correct offset - stack.push({ offset + num_instructions, return_offset }); + state.call_stack.push({ offset + num_instructions, return_offset }); }; u32 binary_offset = state.program_counter - shader_memory.data(); @@ -356,7 +356,7 @@ static void ProcessShaderCode(VertexShaderState& state) { break; case Instruction::OpCode::CALL: - call(state.call_stack, + call(state, instr.flow_control.dest_offset, instr.flow_control.num_instructions, binary_offset + 1); @@ -367,12 +367,12 @@ static void ProcessShaderCode(VertexShaderState& state) { case Instruction::OpCode::IFU: if (shader_uniforms.b[instr.flow_control.bool_uniform_id]) { - call(state.call_stack, + call(state, binary_offset + 1, instr.flow_control.dest_offset - binary_offset - 1, instr.flow_control.dest_offset + instr.flow_control.num_instructions); } else { - call(state.call_stack, + call(state, instr.flow_control.dest_offset, instr.flow_control.num_instructions, instr.flow_control.dest_offset + instr.flow_control.num_instructions); @@ -407,12 +407,12 @@ static void ProcessShaderCode(VertexShaderState& state) { } if (results[2]) { - call(state.call_stack, + call(state, binary_offset + 1, instr.flow_control.dest_offset - binary_offset - 1, instr.flow_control.dest_offset + instr.flow_control.num_instructions); } else { - call(state.call_stack, + call(state, instr.flow_control.dest_offset, instr.flow_control.num_instructions, instr.flow_control.dest_offset + instr.flow_control.num_instructions); -- cgit v1.2.3 From 08f42c2b8c30d55f5c931f2260a0900ff902735c Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Sat, 20 Dec 2014 15:31:17 +0100 Subject: Pica/VertexShader: Promote a log message to critical status. --- src/video_core/vertex_shader.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/video_core/vertex_shader.cpp') diff --git a/src/video_core/vertex_shader.cpp b/src/video_core/vertex_shader.cpp index de963f5e9..4ba69fa51 100644 --- a/src/video_core/vertex_shader.cpp +++ b/src/video_core/vertex_shader.cpp @@ -150,7 +150,7 @@ static void ProcessShaderCode(VertexShaderState& state) { // TODO: We don't really support this properly: For instance, the address register // offset needs to be applied to SRC2 instead, etc. // For now, we just abort in this situation. - LOG_ERROR(HW_GPU, "Bad condition..."); + LOG_CRITICAL(HW_GPU, "Bad condition..."); exit(0); } -- cgit v1.2.3