diff options
| author | 2016-05-13 08:49:20 +0200 | |
|---|---|---|
| committer | 2016-05-16 18:55:51 +0200 | |
| commit | ff0fa86b17e8133263bb54c1338ade8ecd97e5d9 (patch) | |
| tree | aedf8d5ac4ecc967ab7bacff7ea011104e95f99f /src/video_core/shader | |
| parent | Merge pull request #1787 from JayFoxRox/refactor-jit (diff) | |
| download | yuzu-ff0fa86b17e8133263bb54c1338ade8ecd97e5d9.tar.gz yuzu-ff0fa86b17e8133263bb54c1338ade8ecd97e5d9.tar.xz yuzu-ff0fa86b17e8133263bb54c1338ade8ecd97e5d9.zip | |
Retrieve shader result from new OutputRegisters-type
Diffstat (limited to 'src/video_core/shader')
| -rw-r--r-- | src/video_core/shader/shader.cpp | 103 | ||||
| -rw-r--r-- | src/video_core/shader/shader.h | 17 | ||||
| -rw-r--r-- | src/video_core/shader/shader_interpreter.cpp | 4 |
3 files changed, 68 insertions, 56 deletions
diff --git a/src/video_core/shader/shader.cpp b/src/video_core/shader/shader.cpp index 161097610..f565e2c91 100644 --- a/src/video_core/shader/shader.cpp +++ b/src/video_core/shader/shader.cpp | |||
| @@ -30,6 +30,58 @@ namespace Pica { | |||
| 30 | 30 | ||
| 31 | namespace Shader { | 31 | namespace Shader { |
| 32 | 32 | ||
| 33 | OutputVertex OutputRegisters::ToVertex(const Regs::ShaderConfig& config) { | ||
| 34 | // Setup output data | ||
| 35 | OutputVertex ret; | ||
| 36 | // TODO(neobrain): Under some circumstances, up to 16 attributes may be output. We need to | ||
| 37 | // figure out what those circumstances are and enable the remaining outputs then. | ||
| 38 | unsigned index = 0; | ||
| 39 | for (unsigned i = 0; i < 7; ++i) { | ||
| 40 | |||
| 41 | if (index >= g_state.regs.vs_output_total) | ||
| 42 | break; | ||
| 43 | |||
| 44 | if ((config.output_mask & (1 << i)) == 0) | ||
| 45 | continue; | ||
| 46 | |||
| 47 | const auto& output_register_map = g_state.regs.vs_output_attributes[index]; | ||
| 48 | |||
| 49 | u32 semantics[4] = { | ||
| 50 | output_register_map.map_x, output_register_map.map_y, | ||
| 51 | output_register_map.map_z, output_register_map.map_w | ||
| 52 | }; | ||
| 53 | |||
| 54 | for (unsigned comp = 0; comp < 4; ++comp) { | ||
| 55 | float24* out = ((float24*)&ret) + semantics[comp]; | ||
| 56 | if (semantics[comp] != Regs::VSOutputAttributes::INVALID) { | ||
| 57 | *out = value[i][comp]; | ||
| 58 | } else { | ||
| 59 | // Zero output so that attributes which aren't output won't have denormals in them, | ||
| 60 | // which would slow us down later. | ||
| 61 | memset(out, 0, sizeof(*out)); | ||
| 62 | } | ||
| 63 | } | ||
| 64 | |||
| 65 | index++; | ||
| 66 | } | ||
| 67 | |||
| 68 | // The hardware takes the absolute and saturates vertex colors like this, *before* doing interpolation | ||
| 69 | for (unsigned i = 0; i < 4; ++i) { | ||
| 70 | ret.color[i] = float24::FromFloat32( | ||
| 71 | std::fmin(std::fabs(ret.color[i].ToFloat32()), 1.0f)); | ||
| 72 | } | ||
| 73 | |||
| 74 | LOG_TRACE(HW_GPU, "Output vertex: pos(%.2f, %.2f, %.2f, %.2f), quat(%.2f, %.2f, %.2f, %.2f), " | ||
| 75 | "col(%.2f, %.2f, %.2f, %.2f), tc0(%.2f, %.2f), view(%.2f, %.2f, %.2f)", | ||
| 76 | ret.pos.x.ToFloat32(), ret.pos.y.ToFloat32(), ret.pos.z.ToFloat32(), ret.pos.w.ToFloat32(), | ||
| 77 | ret.quat.x.ToFloat32(), ret.quat.y.ToFloat32(), ret.quat.z.ToFloat32(), ret.quat.w.ToFloat32(), | ||
| 78 | ret.color.x.ToFloat32(), ret.color.y.ToFloat32(), ret.color.z.ToFloat32(), ret.color.w.ToFloat32(), | ||
| 79 | ret.tc0.u().ToFloat32(), ret.tc0.v().ToFloat32(), | ||
| 80 | ret.view.x.ToFloat32(), ret.view.y.ToFloat32(), ret.view.z.ToFloat32()); | ||
| 81 | |||
| 82 | return ret; | ||
| 83 | } | ||
| 84 | |||
| 33 | #ifdef ARCHITECTURE_x86_64 | 85 | #ifdef ARCHITECTURE_x86_64 |
| 34 | static std::unordered_map<u64, std::unique_ptr<JitShader>> shader_map; | 86 | static std::unordered_map<u64, std::unique_ptr<JitShader>> shader_map; |
| 35 | static const JitShader* jit_shader; | 87 | static const JitShader* jit_shader; |
| @@ -62,7 +114,7 @@ void ShaderSetup::Setup() { | |||
| 62 | 114 | ||
| 63 | MICROPROFILE_DEFINE(GPU_Shader, "GPU", "Shader", MP_RGB(50, 50, 240)); | 115 | MICROPROFILE_DEFINE(GPU_Shader, "GPU", "Shader", MP_RGB(50, 50, 240)); |
| 64 | 116 | ||
| 65 | OutputVertex ShaderSetup::Run(UnitState<false>& state, const InputVertex& input, int num_attributes) { | 117 | void ShaderSetup::Run(UnitState<false>& state, const InputVertex& input, int num_attributes) { |
| 66 | auto& config = g_state.regs.vs; | 118 | auto& config = g_state.regs.vs; |
| 67 | auto& setup = g_state.vs; | 119 | auto& setup = g_state.vs; |
| 68 | 120 | ||
| @@ -89,55 +141,6 @@ OutputVertex ShaderSetup::Run(UnitState<false>& state, const InputVertex& input, | |||
| 89 | RunInterpreter(setup, state, config.main_offset); | 141 | RunInterpreter(setup, state, config.main_offset); |
| 90 | #endif // ARCHITECTURE_x86_64 | 142 | #endif // ARCHITECTURE_x86_64 |
| 91 | 143 | ||
| 92 | // Setup output data | ||
| 93 | OutputVertex ret; | ||
| 94 | // TODO(neobrain): Under some circumstances, up to 16 attributes may be output. We need to | ||
| 95 | // figure out what those circumstances are and enable the remaining outputs then. | ||
| 96 | unsigned index = 0; | ||
| 97 | for (unsigned i = 0; i < 7; ++i) { | ||
| 98 | |||
| 99 | if (index >= g_state.regs.vs_output_total) | ||
| 100 | break; | ||
| 101 | |||
| 102 | if ((g_state.regs.vs.output_mask & (1 << i)) == 0) | ||
| 103 | continue; | ||
| 104 | |||
| 105 | const auto& output_register_map = g_state.regs.vs_output_attributes[index]; // TODO: Don't hardcode VS here | ||
| 106 | |||
| 107 | u32 semantics[4] = { | ||
| 108 | output_register_map.map_x, output_register_map.map_y, | ||
| 109 | output_register_map.map_z, output_register_map.map_w | ||
| 110 | }; | ||
| 111 | |||
| 112 | for (unsigned comp = 0; comp < 4; ++comp) { | ||
| 113 | float24* out = ((float24*)&ret) + semantics[comp]; | ||
| 114 | if (semantics[comp] != Regs::VSOutputAttributes::INVALID) { | ||
| 115 | *out = state.registers.output[i][comp]; | ||
| 116 | } else { | ||
| 117 | // Zero output so that attributes which aren't output won't have denormals in them, | ||
| 118 | // which would slow us down later. | ||
| 119 | memset(out, 0, sizeof(*out)); | ||
| 120 | } | ||
| 121 | } | ||
| 122 | |||
| 123 | index++; | ||
| 124 | } | ||
| 125 | |||
| 126 | // The hardware takes the absolute and saturates vertex colors like this, *before* doing interpolation | ||
| 127 | for (unsigned i = 0; i < 4; ++i) { | ||
| 128 | ret.color[i] = float24::FromFloat32( | ||
| 129 | std::fmin(std::fabs(ret.color[i].ToFloat32()), 1.0f)); | ||
| 130 | } | ||
| 131 | |||
| 132 | LOG_TRACE(HW_GPU, "Output vertex: pos(%.2f, %.2f, %.2f, %.2f), quat(%.2f, %.2f, %.2f, %.2f), " | ||
| 133 | "col(%.2f, %.2f, %.2f, %.2f), tc0(%.2f, %.2f), view(%.2f, %.2f, %.2f)", | ||
| 134 | ret.pos.x.ToFloat32(), ret.pos.y.ToFloat32(), ret.pos.z.ToFloat32(), ret.pos.w.ToFloat32(), | ||
| 135 | ret.quat.x.ToFloat32(), ret.quat.y.ToFloat32(), ret.quat.z.ToFloat32(), ret.quat.w.ToFloat32(), | ||
| 136 | ret.color.x.ToFloat32(), ret.color.y.ToFloat32(), ret.color.z.ToFloat32(), ret.color.w.ToFloat32(), | ||
| 137 | ret.tc0.u().ToFloat32(), ret.tc0.v().ToFloat32(), | ||
| 138 | ret.view.x.ToFloat32(), ret.view.y.ToFloat32(), ret.view.z.ToFloat32()); | ||
| 139 | |||
| 140 | return ret; | ||
| 141 | } | 144 | } |
| 142 | 145 | ||
| 143 | DebugData<true> ShaderSetup::ProduceDebugInfo(const InputVertex& input, int num_attributes, const Regs::ShaderConfig& config, const ShaderSetup& setup) { | 146 | DebugData<true> ShaderSetup::ProduceDebugInfo(const InputVertex& input, int num_attributes, const Regs::ShaderConfig& config, const ShaderSetup& setup) { |
diff --git a/src/video_core/shader/shader.h b/src/video_core/shader/shader.h index 84898f21c..fee16df62 100644 --- a/src/video_core/shader/shader.h +++ b/src/video_core/shader/shader.h | |||
| @@ -84,6 +84,15 @@ struct OutputVertex { | |||
| 84 | static_assert(std::is_pod<OutputVertex>::value, "Structure is not POD"); | 84 | static_assert(std::is_pod<OutputVertex>::value, "Structure is not POD"); |
| 85 | static_assert(sizeof(OutputVertex) == 32 * sizeof(float), "OutputVertex has invalid size"); | 85 | static_assert(sizeof(OutputVertex) == 32 * sizeof(float), "OutputVertex has invalid size"); |
| 86 | 86 | ||
| 87 | struct OutputRegisters { | ||
| 88 | OutputRegisters() = default; | ||
| 89 | |||
| 90 | alignas(16) Math::Vec4<float24> value[16]; | ||
| 91 | |||
| 92 | OutputVertex ToVertex(const Regs::ShaderConfig& config); | ||
| 93 | }; | ||
| 94 | static_assert(std::is_pod<OutputRegisters>::value, "Structure is not POD"); | ||
| 95 | |||
| 87 | // Helper structure used to keep track of data useful for inspection of shader emulation | 96 | // Helper structure used to keep track of data useful for inspection of shader emulation |
| 88 | template<bool full_debugging> | 97 | template<bool full_debugging> |
| 89 | struct DebugData; | 98 | struct DebugData; |
| @@ -267,11 +276,12 @@ struct UnitState { | |||
| 267 | // The registers are accessed by the shader JIT using SSE instructions, and are therefore | 276 | // The registers are accessed by the shader JIT using SSE instructions, and are therefore |
| 268 | // required to be 16-byte aligned. | 277 | // required to be 16-byte aligned. |
| 269 | alignas(16) Math::Vec4<float24> input[16]; | 278 | alignas(16) Math::Vec4<float24> input[16]; |
| 270 | alignas(16) Math::Vec4<float24> output[16]; | ||
| 271 | alignas(16) Math::Vec4<float24> temporary[16]; | 279 | alignas(16) Math::Vec4<float24> temporary[16]; |
| 272 | } registers; | 280 | } registers; |
| 273 | static_assert(std::is_pod<Registers>::value, "Structure is not POD"); | 281 | static_assert(std::is_pod<Registers>::value, "Structure is not POD"); |
| 274 | 282 | ||
| 283 | OutputRegisters output_registers; | ||
| 284 | |||
| 275 | bool conditional_code[2]; | 285 | bool conditional_code[2]; |
| 276 | 286 | ||
| 277 | // Two Address registers and one loop counter | 287 | // Two Address registers and one loop counter |
| @@ -297,7 +307,7 @@ struct UnitState { | |||
| 297 | static size_t OutputOffset(const DestRegister& reg) { | 307 | static size_t OutputOffset(const DestRegister& reg) { |
| 298 | switch (reg.GetRegisterType()) { | 308 | switch (reg.GetRegisterType()) { |
| 299 | case RegisterType::Output: | 309 | case RegisterType::Output: |
| 300 | return offsetof(UnitState, registers.output) + reg.GetIndex()*sizeof(Math::Vec4<float24>); | 310 | return offsetof(UnitState, output_registers.value) + reg.GetIndex()*sizeof(Math::Vec4<float24>); |
| 301 | 311 | ||
| 302 | case RegisterType::Temporary: | 312 | case RegisterType::Temporary: |
| 303 | return offsetof(UnitState, registers.temporary) + reg.GetIndex()*sizeof(Math::Vec4<float24>); | 313 | return offsetof(UnitState, registers.temporary) + reg.GetIndex()*sizeof(Math::Vec4<float24>); |
| @@ -354,9 +364,8 @@ struct ShaderSetup { | |||
| 354 | * @param state Shader unit state, must be setup per shader and per shader unit | 364 | * @param state Shader unit state, must be setup per shader and per shader unit |
| 355 | * @param input Input vertex into the shader | 365 | * @param input Input vertex into the shader |
| 356 | * @param num_attributes The number of vertex shader attributes | 366 | * @param num_attributes The number of vertex shader attributes |
| 357 | * @return The output vertex, after having been processed by the vertex shader | ||
| 358 | */ | 367 | */ |
| 359 | OutputVertex Run(UnitState<false>& state, const InputVertex& input, int num_attributes); | 368 | void Run(UnitState<false>& state, const InputVertex& input, int num_attributes); |
| 360 | 369 | ||
| 361 | /** | 370 | /** |
| 362 | * Produce debug information based on the given shader and input vertex | 371 | * Produce debug information based on the given shader and input vertex |
diff --git a/src/video_core/shader/shader_interpreter.cpp b/src/video_core/shader/shader_interpreter.cpp index 714e8bfd5..b1eadc071 100644 --- a/src/video_core/shader/shader_interpreter.cpp +++ b/src/video_core/shader/shader_interpreter.cpp | |||
| @@ -144,7 +144,7 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned | |||
| 144 | src2[3] = src2[3] * float24::FromFloat32(-1); | 144 | src2[3] = src2[3] * float24::FromFloat32(-1); |
| 145 | } | 145 | } |
| 146 | 146 | ||
| 147 | float24* dest = (instr.common.dest.Value() < 0x10) ? &state.registers.output[instr.common.dest.Value().GetIndex()][0] | 147 | float24* dest = (instr.common.dest.Value() < 0x10) ? &state.output_registers.value[instr.common.dest.Value().GetIndex()][0] |
| 148 | : (instr.common.dest.Value() < 0x20) ? &state.registers.temporary[instr.common.dest.Value().GetIndex()][0] | 148 | : (instr.common.dest.Value() < 0x20) ? &state.registers.temporary[instr.common.dest.Value().GetIndex()][0] |
| 149 | : dummy_vec4_float24; | 149 | : dummy_vec4_float24; |
| 150 | 150 | ||
| @@ -483,7 +483,7 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned | |||
| 483 | src3[3] = src3[3] * float24::FromFloat32(-1); | 483 | src3[3] = src3[3] * float24::FromFloat32(-1); |
| 484 | } | 484 | } |
| 485 | 485 | ||
| 486 | float24* dest = (instr.mad.dest.Value() < 0x10) ? &state.registers.output[instr.mad.dest.Value().GetIndex()][0] | 486 | float24* dest = (instr.mad.dest.Value() < 0x10) ? &state.output_registers.value[instr.mad.dest.Value().GetIndex()][0] |
| 487 | : (instr.mad.dest.Value() < 0x20) ? &state.registers.temporary[instr.mad.dest.Value().GetIndex()][0] | 487 | : (instr.mad.dest.Value() < 0x20) ? &state.registers.temporary[instr.mad.dest.Value().GetIndex()][0] |
| 488 | : dummy_vec4_float24; | 488 | : dummy_vec4_float24; |
| 489 | 489 | ||