diff options
| author | 2014-12-16 00:32:49 +0100 | |
|---|---|---|
| committer | 2014-12-20 18:06:55 +0100 | |
| commit | 8ce1d324602001e1102648319a9281ee08a1af95 (patch) | |
| tree | acabdc7c8614235b283e6e48e13dc17a0a1a85dd | |
| parent | Add nihstro (a 3DS shader tools suite) as a submodule. (diff) | |
| download | yuzu-8ce1d324602001e1102648319a9281ee08a1af95.tar.gz yuzu-8ce1d324602001e1102648319a9281ee08a1af95.tar.xz yuzu-8ce1d324602001e1102648319a9281ee08a1af95.zip | |
Pica/VertexShader: Remove (now) duplicated shader bytecode definitions in favor of nihstro's ones.
| -rw-r--r-- | src/video_core/vertex_shader.cpp | 43 | ||||
| -rw-r--r-- | src/video_core/vertex_shader.h | 209 |
2 files changed, 30 insertions, 222 deletions
diff --git a/src/video_core/vertex_shader.cpp b/src/video_core/vertex_shader.cpp index 477e78cfe..064a703eb 100644 --- a/src/video_core/vertex_shader.cpp +++ b/src/video_core/vertex_shader.cpp | |||
| @@ -8,11 +8,18 @@ | |||
| 8 | 8 | ||
| 9 | #include <core/mem_map.h> | 9 | #include <core/mem_map.h> |
| 10 | 10 | ||
| 11 | #include <nihstro/shader_bytecode.h> | ||
| 12 | |||
| 11 | #include "debug_utils/debug_utils.h" | 13 | #include "debug_utils/debug_utils.h" |
| 12 | 14 | ||
| 13 | #include "pica.h" | 15 | #include "pica.h" |
| 14 | #include "vertex_shader.h" | 16 | #include "vertex_shader.h" |
| 15 | 17 | ||
| 18 | using nihstro::Instruction; | ||
| 19 | using nihstro::RegisterType; | ||
| 20 | using nihstro::SourceRegister; | ||
| 21 | using nihstro::SwizzlePattern; | ||
| 22 | |||
| 16 | namespace Pica { | 23 | namespace Pica { |
| 17 | 24 | ||
| 18 | namespace VertexShader { | 25 | namespace VertexShader { |
| @@ -70,19 +77,28 @@ static void ProcessShaderCode(VertexShaderState& state) { | |||
| 70 | const Instruction& instr = *(const Instruction*)state.program_counter; | 77 | const Instruction& instr = *(const Instruction*)state.program_counter; |
| 71 | state.debug.max_offset = std::max<u32>(state.debug.max_offset, 1 + (state.program_counter - shader_memory)); | 78 | state.debug.max_offset = std::max<u32>(state.debug.max_offset, 1 + (state.program_counter - shader_memory)); |
| 72 | 79 | ||
| 73 | const float24* src1_ = (instr.common.src1 < 0x10) ? state.input_register_table[instr.common.src1.GetIndex()] | 80 | auto LookupSourceRegister = [&](const SourceRegister& source_reg) -> const float24* { |
| 74 | : (instr.common.src1 < 0x20) ? &state.temporary_registers[instr.common.src1.GetIndex()].x | 81 | switch (source_reg.GetRegisterType()) { |
| 75 | : (instr.common.src1 < 0x80) ? &shader_uniforms.f[instr.common.src1.GetIndex()].x | 82 | case RegisterType::Input: |
| 76 | : nullptr; | 83 | return state.input_register_table[source_reg.GetIndex()]; |
| 77 | const float24* src2_ = (instr.common.src2 < 0x10) ? state.input_register_table[instr.common.src2.GetIndex()] | 84 | |
| 78 | : &state.temporary_registers[instr.common.src2.GetIndex()].x; | 85 | case RegisterType::Temporary: |
| 86 | return &state.temporary_registers[source_reg.GetIndex()].x; | ||
| 87 | |||
| 88 | case RegisterType::FloatUniform: | ||
| 89 | return &shader_uniforms.f[source_reg.GetIndex()].x; | ||
| 90 | } | ||
| 91 | }; | ||
| 92 | bool is_inverted = 0 != (instr.opcode.GetInfo().subtype & Instruction::OpCodeInfo::SrcInversed); | ||
| 93 | const float24* src1_ = LookupSourceRegister(instr.common.GetSrc1(is_inverted)); | ||
| 94 | const float24* src2_ = LookupSourceRegister(instr.common.GetSrc2(is_inverted)); | ||
| 79 | float24* dest = (instr.common.dest < 0x08) ? state.output_register_table[4*instr.common.dest.GetIndex()] | 95 | float24* dest = (instr.common.dest < 0x08) ? state.output_register_table[4*instr.common.dest.GetIndex()] |
| 80 | : (instr.common.dest < 0x10) ? nullptr | 96 | : (instr.common.dest < 0x10) ? nullptr |
| 81 | : (instr.common.dest < 0x20) ? &state.temporary_registers[instr.common.dest.GetIndex()][0] | 97 | : (instr.common.dest < 0x20) ? &state.temporary_registers[instr.common.dest.GetIndex()][0] |
| 82 | : nullptr; | 98 | : nullptr; |
| 83 | 99 | ||
| 84 | const SwizzlePattern& swizzle = *(SwizzlePattern*)&swizzle_data[instr.common.operand_desc_id]; | 100 | const SwizzlePattern& swizzle = *(SwizzlePattern*)&swizzle_data[instr.common.operand_desc_id]; |
| 85 | const bool negate_src1 = (swizzle.negate != 0); | 101 | const bool negate_src1 = (swizzle.negate_src1 != 0); |
| 86 | 102 | ||
| 87 | float24 src1[4] = { | 103 | float24 src1[4] = { |
| 88 | src1_[(int)swizzle.GetSelectorSrc1(0)], | 104 | src1_[(int)swizzle.GetSelectorSrc1(0)], |
| @@ -192,7 +208,9 @@ static void ProcessShaderCode(VertexShaderState& state) { | |||
| 192 | break; | 208 | break; |
| 193 | } | 209 | } |
| 194 | 210 | ||
| 195 | case Instruction::OpCode::RET: | 211 | // NOP is currently used as a heuristic for leaving from a function. |
| 212 | // TODO: This is completely incorrect. | ||
| 213 | case Instruction::OpCode::NOP: | ||
| 196 | if (*state.call_stack_pointer == VertexShaderState::INVALID_ADDRESS) { | 214 | if (*state.call_stack_pointer == VertexShaderState::INVALID_ADDRESS) { |
| 197 | exit_loop = true; | 215 | exit_loop = true; |
| 198 | } else { | 216 | } else { |
| @@ -209,17 +227,16 @@ static void ProcessShaderCode(VertexShaderState& state) { | |||
| 209 | _dbg_assert_(HW_GPU, state.call_stack_pointer - state.call_stack < sizeof(state.call_stack)); | 227 | _dbg_assert_(HW_GPU, state.call_stack_pointer - state.call_stack < sizeof(state.call_stack)); |
| 210 | 228 | ||
| 211 | *++state.call_stack_pointer = state.program_counter - shader_memory; | 229 | *++state.call_stack_pointer = state.program_counter - shader_memory; |
| 212 | // TODO: Does this offset refer to the beginning of shader memory? | 230 | state.program_counter = &shader_memory[instr.flow_control.dest_offset]; |
| 213 | state.program_counter = &shader_memory[instr.flow_control.offset_words]; | ||
| 214 | break; | 231 | break; |
| 215 | 232 | ||
| 216 | case Instruction::OpCode::FLS: | 233 | case Instruction::OpCode::END: |
| 217 | // TODO: Do whatever needs to be done here? | 234 | // TODO |
| 218 | break; | 235 | break; |
| 219 | 236 | ||
| 220 | default: | 237 | default: |
| 221 | LOG_ERROR(HW_GPU, "Unhandled instruction: 0x%02x (%s): 0x%08x", | 238 | LOG_ERROR(HW_GPU, "Unhandled instruction: 0x%02x (%s): 0x%08x", |
| 222 | (int)instr.opcode.Value(), instr.GetOpCodeName().c_str(), instr.hex); | 239 | (int)instr.opcode.Value(), instr.opcode.GetInfo().name, instr.hex); |
| 223 | break; | 240 | break; |
| 224 | } | 241 | } |
| 225 | 242 | ||
diff --git a/src/video_core/vertex_shader.h b/src/video_core/vertex_shader.h index c1292fc2d..131769808 100644 --- a/src/video_core/vertex_shader.h +++ b/src/video_core/vertex_shader.h | |||
| @@ -66,215 +66,6 @@ struct OutputVertex { | |||
| 66 | static_assert(std::is_pod<OutputVertex>::value, "Structure is not POD"); | 66 | static_assert(std::is_pod<OutputVertex>::value, "Structure is not POD"); |
| 67 | static_assert(sizeof(OutputVertex) == 32 * sizeof(float), "OutputVertex has invalid size"); | 67 | static_assert(sizeof(OutputVertex) == 32 * sizeof(float), "OutputVertex has invalid size"); |
| 68 | 68 | ||
| 69 | union Instruction { | ||
| 70 | enum class OpCode : u32 { | ||
| 71 | ADD = 0x0, | ||
| 72 | DP3 = 0x1, | ||
| 73 | DP4 = 0x2, | ||
| 74 | |||
| 75 | MUL = 0x8, | ||
| 76 | |||
| 77 | MAX = 0xC, | ||
| 78 | MIN = 0xD, | ||
| 79 | RCP = 0xE, | ||
| 80 | RSQ = 0xF, | ||
| 81 | |||
| 82 | MOV = 0x13, | ||
| 83 | |||
| 84 | RET = 0x21, | ||
| 85 | FLS = 0x22, // Flush | ||
| 86 | CALL = 0x24, | ||
| 87 | }; | ||
| 88 | |||
| 89 | std::string GetOpCodeName() const { | ||
| 90 | std::map<OpCode, std::string> map = { | ||
| 91 | { OpCode::ADD, "ADD" }, | ||
| 92 | { OpCode::DP3, "DP3" }, | ||
| 93 | { OpCode::DP4, "DP4" }, | ||
| 94 | { OpCode::MUL, "MUL" }, | ||
| 95 | { OpCode::MAX, "MAX" }, | ||
| 96 | { OpCode::MIN, "MIN" }, | ||
| 97 | { OpCode::RCP, "RCP" }, | ||
| 98 | { OpCode::RSQ, "RSQ" }, | ||
| 99 | { OpCode::MOV, "MOV" }, | ||
| 100 | { OpCode::RET, "RET" }, | ||
| 101 | { OpCode::FLS, "FLS" }, | ||
| 102 | }; | ||
| 103 | auto it = map.find(opcode); | ||
| 104 | if (it == map.end()) | ||
| 105 | return "UNK"; | ||
| 106 | else | ||
| 107 | return it->second; | ||
| 108 | } | ||
| 109 | |||
| 110 | u32 hex; | ||
| 111 | |||
| 112 | BitField<0x1a, 0x6, OpCode> opcode; | ||
| 113 | |||
| 114 | // General notes: | ||
| 115 | // | ||
| 116 | // When two input registers are used, one of them uses a 5-bit index while the other | ||
| 117 | // one uses a 7-bit index. This is because at most one floating point uniform may be used | ||
| 118 | // as an input. | ||
| 119 | |||
| 120 | |||
| 121 | // Format used e.g. by arithmetic instructions and comparisons | ||
| 122 | // "src1" and "src2" specify register indices (i.e. indices referring to groups of 4 floats), | ||
| 123 | // while "dest" addresses individual floats. | ||
| 124 | union { | ||
| 125 | BitField<0x00, 0x5, u32> operand_desc_id; | ||
| 126 | |||
| 127 | template<class BitFieldType> | ||
| 128 | struct SourceRegister : BitFieldType { | ||
| 129 | enum RegisterType { | ||
| 130 | Input, | ||
| 131 | Temporary, | ||
| 132 | FloatUniform | ||
| 133 | }; | ||
| 134 | |||
| 135 | RegisterType GetRegisterType() const { | ||
| 136 | if (BitFieldType::Value() < 0x10) | ||
| 137 | return Input; | ||
| 138 | else if (BitFieldType::Value() < 0x20) | ||
| 139 | return Temporary; | ||
| 140 | else | ||
| 141 | return FloatUniform; | ||
| 142 | } | ||
| 143 | |||
| 144 | int GetIndex() const { | ||
| 145 | if (GetRegisterType() == Input) | ||
| 146 | return BitFieldType::Value(); | ||
| 147 | else if (GetRegisterType() == Temporary) | ||
| 148 | return BitFieldType::Value() - 0x10; | ||
| 149 | else // if (GetRegisterType() == FloatUniform) | ||
| 150 | return BitFieldType::Value() - 0x20; | ||
| 151 | } | ||
| 152 | |||
| 153 | std::string GetRegisterName() const { | ||
| 154 | std::map<RegisterType, std::string> type = { | ||
| 155 | { Input, "i" }, | ||
| 156 | { Temporary, "t" }, | ||
| 157 | { FloatUniform, "f" }, | ||
| 158 | }; | ||
| 159 | return type[GetRegisterType()] + std::to_string(GetIndex()); | ||
| 160 | } | ||
| 161 | }; | ||
| 162 | |||
| 163 | SourceRegister<BitField<0x07, 0x5, u32>> src2; | ||
| 164 | SourceRegister<BitField<0x0c, 0x7, u32>> src1; | ||
| 165 | |||
| 166 | struct : BitField<0x15, 0x5, u32> | ||
| 167 | { | ||
| 168 | enum RegisterType { | ||
| 169 | Output, | ||
| 170 | Temporary, | ||
| 171 | Unknown | ||
| 172 | }; | ||
| 173 | RegisterType GetRegisterType() const { | ||
| 174 | if (Value() < 0x8) | ||
| 175 | return Output; | ||
| 176 | else if (Value() < 0x10) | ||
| 177 | return Unknown; | ||
| 178 | else | ||
| 179 | return Temporary; | ||
| 180 | } | ||
| 181 | int GetIndex() const { | ||
| 182 | if (GetRegisterType() == Output) | ||
| 183 | return Value(); | ||
| 184 | else if (GetRegisterType() == Temporary) | ||
| 185 | return Value() - 0x10; | ||
| 186 | else | ||
| 187 | return Value(); | ||
| 188 | } | ||
| 189 | std::string GetRegisterName() const { | ||
| 190 | std::map<RegisterType, std::string> type = { | ||
| 191 | { Output, "o" }, | ||
| 192 | { Temporary, "t" }, | ||
| 193 | { Unknown, "u" } | ||
| 194 | }; | ||
| 195 | return type[GetRegisterType()] + std::to_string(GetIndex()); | ||
| 196 | } | ||
| 197 | } dest; | ||
| 198 | } common; | ||
| 199 | |||
| 200 | // Format used for flow control instructions ("if") | ||
| 201 | union { | ||
| 202 | BitField<0x00, 0x8, u32> num_instructions; | ||
| 203 | BitField<0x0a, 0xc, u32> offset_words; | ||
| 204 | } flow_control; | ||
| 205 | }; | ||
| 206 | static_assert(std::is_standard_layout<Instruction>::value, "Structure is not using standard layout!"); | ||
| 207 | |||
| 208 | union SwizzlePattern { | ||
| 209 | u32 hex; | ||
| 210 | |||
| 211 | enum class Selector : u32 { | ||
| 212 | x = 0, | ||
| 213 | y = 1, | ||
| 214 | z = 2, | ||
| 215 | w = 3 | ||
| 216 | }; | ||
| 217 | |||
| 218 | Selector GetSelectorSrc1(int comp) const { | ||
| 219 | Selector selectors[] = { | ||
| 220 | src1_selector_0, src1_selector_1, src1_selector_2, src1_selector_3 | ||
| 221 | }; | ||
| 222 | return selectors[comp]; | ||
| 223 | } | ||
| 224 | |||
| 225 | Selector GetSelectorSrc2(int comp) const { | ||
| 226 | Selector selectors[] = { | ||
| 227 | src2_selector_0, src2_selector_1, src2_selector_2, src2_selector_3 | ||
| 228 | }; | ||
| 229 | return selectors[comp]; | ||
| 230 | } | ||
| 231 | |||
| 232 | bool DestComponentEnabled(int i) const { | ||
| 233 | return (dest_mask & (0x8 >> i)) != 0; | ||
| 234 | } | ||
| 235 | |||
| 236 | std::string SelectorToString(bool src2) const { | ||
| 237 | std::map<Selector, std::string> map = { | ||
| 238 | { Selector::x, "x" }, | ||
| 239 | { Selector::y, "y" }, | ||
| 240 | { Selector::z, "z" }, | ||
| 241 | { Selector::w, "w" } | ||
| 242 | }; | ||
| 243 | std::string ret; | ||
| 244 | for (int i = 0; i < 4; ++i) { | ||
| 245 | ret += map.at(src2 ? GetSelectorSrc2(i) : GetSelectorSrc1(i)); | ||
| 246 | } | ||
| 247 | return ret; | ||
| 248 | } | ||
| 249 | |||
| 250 | std::string DestMaskToString() const { | ||
| 251 | std::string ret; | ||
| 252 | for (int i = 0; i < 4; ++i) { | ||
| 253 | if (!DestComponentEnabled(i)) | ||
| 254 | ret += "_"; | ||
| 255 | else | ||
| 256 | ret += "xyzw"[i]; | ||
| 257 | } | ||
| 258 | return ret; | ||
| 259 | } | ||
| 260 | |||
| 261 | // Components of "dest" that should be written to: LSB=dest.w, MSB=dest.x | ||
| 262 | BitField< 0, 4, u32> dest_mask; | ||
| 263 | |||
| 264 | BitField< 4, 1, u32> negate; // negates src1 | ||
| 265 | |||
| 266 | BitField< 5, 2, Selector> src1_selector_3; | ||
| 267 | BitField< 7, 2, Selector> src1_selector_2; | ||
| 268 | BitField< 9, 2, Selector> src1_selector_1; | ||
| 269 | BitField<11, 2, Selector> src1_selector_0; | ||
| 270 | |||
| 271 | BitField<14, 2, Selector> src2_selector_3; | ||
| 272 | BitField<16, 2, Selector> src2_selector_2; | ||
| 273 | BitField<18, 2, Selector> src2_selector_1; | ||
| 274 | BitField<20, 2, Selector> src2_selector_0; | ||
| 275 | |||
| 276 | BitField<31, 1, u32> flag; // not sure what this means, maybe it's the sign? | ||
| 277 | }; | ||
| 278 | 69 | ||
| 279 | void SubmitShaderMemoryChange(u32 addr, u32 value); | 70 | void SubmitShaderMemoryChange(u32 addr, u32 value); |
| 280 | void SubmitSwizzleDataChange(u32 addr, u32 value); | 71 | void SubmitSwizzleDataChange(u32 addr, u32 value); |