diff options
| author | 2016-05-13 08:49:20 +0200 | |
|---|---|---|
| committer | 2016-05-16 18:55:51 +0200 | |
| commit | ff0fa86b17e8133263bb54c1338ade8ecd97e5d9 (patch) | |
| tree | aedf8d5ac4ecc967ab7bacff7ea011104e95f99f /src/video_core/shader/shader.cpp | |
| parent | Merge pull request #1787 from JayFoxRox/refactor-jit (diff) | |
| download | yuzu-ff0fa86b17e8133263bb54c1338ade8ecd97e5d9.tar.gz yuzu-ff0fa86b17e8133263bb54c1338ade8ecd97e5d9.tar.xz yuzu-ff0fa86b17e8133263bb54c1338ade8ecd97e5d9.zip | |
Retrieve shader result from new OutputRegisters-type
Diffstat (limited to 'src/video_core/shader/shader.cpp')
| -rw-r--r-- | src/video_core/shader/shader.cpp | 103 |
1 files changed, 53 insertions, 50 deletions
diff --git a/src/video_core/shader/shader.cpp b/src/video_core/shader/shader.cpp index 161097610..f565e2c91 100644 --- a/src/video_core/shader/shader.cpp +++ b/src/video_core/shader/shader.cpp | |||
| @@ -30,6 +30,58 @@ namespace Pica { | |||
| 30 | 30 | ||
| 31 | namespace Shader { | 31 | namespace Shader { |
| 32 | 32 | ||
| 33 | OutputVertex OutputRegisters::ToVertex(const Regs::ShaderConfig& config) { | ||
| 34 | // Setup output data | ||
| 35 | OutputVertex ret; | ||
| 36 | // TODO(neobrain): Under some circumstances, up to 16 attributes may be output. We need to | ||
| 37 | // figure out what those circumstances are and enable the remaining outputs then. | ||
| 38 | unsigned index = 0; | ||
| 39 | for (unsigned i = 0; i < 7; ++i) { | ||
| 40 | |||
| 41 | if (index >= g_state.regs.vs_output_total) | ||
| 42 | break; | ||
| 43 | |||
| 44 | if ((config.output_mask & (1 << i)) == 0) | ||
| 45 | continue; | ||
| 46 | |||
| 47 | const auto& output_register_map = g_state.regs.vs_output_attributes[index]; | ||
| 48 | |||
| 49 | u32 semantics[4] = { | ||
| 50 | output_register_map.map_x, output_register_map.map_y, | ||
| 51 | output_register_map.map_z, output_register_map.map_w | ||
| 52 | }; | ||
| 53 | |||
| 54 | for (unsigned comp = 0; comp < 4; ++comp) { | ||
| 55 | float24* out = ((float24*)&ret) + semantics[comp]; | ||
| 56 | if (semantics[comp] != Regs::VSOutputAttributes::INVALID) { | ||
| 57 | *out = value[i][comp]; | ||
| 58 | } else { | ||
| 59 | // Zero output so that attributes which aren't output won't have denormals in them, | ||
| 60 | // which would slow us down later. | ||
| 61 | memset(out, 0, sizeof(*out)); | ||
| 62 | } | ||
| 63 | } | ||
| 64 | |||
| 65 | index++; | ||
| 66 | } | ||
| 67 | |||
| 68 | // The hardware takes the absolute and saturates vertex colors like this, *before* doing interpolation | ||
| 69 | for (unsigned i = 0; i < 4; ++i) { | ||
| 70 | ret.color[i] = float24::FromFloat32( | ||
| 71 | std::fmin(std::fabs(ret.color[i].ToFloat32()), 1.0f)); | ||
| 72 | } | ||
| 73 | |||
| 74 | LOG_TRACE(HW_GPU, "Output vertex: pos(%.2f, %.2f, %.2f, %.2f), quat(%.2f, %.2f, %.2f, %.2f), " | ||
| 75 | "col(%.2f, %.2f, %.2f, %.2f), tc0(%.2f, %.2f), view(%.2f, %.2f, %.2f)", | ||
| 76 | ret.pos.x.ToFloat32(), ret.pos.y.ToFloat32(), ret.pos.z.ToFloat32(), ret.pos.w.ToFloat32(), | ||
| 77 | ret.quat.x.ToFloat32(), ret.quat.y.ToFloat32(), ret.quat.z.ToFloat32(), ret.quat.w.ToFloat32(), | ||
| 78 | ret.color.x.ToFloat32(), ret.color.y.ToFloat32(), ret.color.z.ToFloat32(), ret.color.w.ToFloat32(), | ||
| 79 | ret.tc0.u().ToFloat32(), ret.tc0.v().ToFloat32(), | ||
| 80 | ret.view.x.ToFloat32(), ret.view.y.ToFloat32(), ret.view.z.ToFloat32()); | ||
| 81 | |||
| 82 | return ret; | ||
| 83 | } | ||
| 84 | |||
| 33 | #ifdef ARCHITECTURE_x86_64 | 85 | #ifdef ARCHITECTURE_x86_64 |
| 34 | static std::unordered_map<u64, std::unique_ptr<JitShader>> shader_map; | 86 | static std::unordered_map<u64, std::unique_ptr<JitShader>> shader_map; |
| 35 | static const JitShader* jit_shader; | 87 | static const JitShader* jit_shader; |
| @@ -62,7 +114,7 @@ void ShaderSetup::Setup() { | |||
| 62 | 114 | ||
| 63 | MICROPROFILE_DEFINE(GPU_Shader, "GPU", "Shader", MP_RGB(50, 50, 240)); | 115 | MICROPROFILE_DEFINE(GPU_Shader, "GPU", "Shader", MP_RGB(50, 50, 240)); |
| 64 | 116 | ||
| 65 | OutputVertex ShaderSetup::Run(UnitState<false>& state, const InputVertex& input, int num_attributes) { | 117 | void ShaderSetup::Run(UnitState<false>& state, const InputVertex& input, int num_attributes) { |
| 66 | auto& config = g_state.regs.vs; | 118 | auto& config = g_state.regs.vs; |
| 67 | auto& setup = g_state.vs; | 119 | auto& setup = g_state.vs; |
| 68 | 120 | ||
| @@ -89,55 +141,6 @@ OutputVertex ShaderSetup::Run(UnitState<false>& state, const InputVertex& input, | |||
| 89 | RunInterpreter(setup, state, config.main_offset); | 141 | RunInterpreter(setup, state, config.main_offset); |
| 90 | #endif // ARCHITECTURE_x86_64 | 142 | #endif // ARCHITECTURE_x86_64 |
| 91 | 143 | ||
| 92 | // Setup output data | ||
| 93 | OutputVertex ret; | ||
| 94 | // TODO(neobrain): Under some circumstances, up to 16 attributes may be output. We need to | ||
| 95 | // figure out what those circumstances are and enable the remaining outputs then. | ||
| 96 | unsigned index = 0; | ||
| 97 | for (unsigned i = 0; i < 7; ++i) { | ||
| 98 | |||
| 99 | if (index >= g_state.regs.vs_output_total) | ||
| 100 | break; | ||
| 101 | |||
| 102 | if ((g_state.regs.vs.output_mask & (1 << i)) == 0) | ||
| 103 | continue; | ||
| 104 | |||
| 105 | const auto& output_register_map = g_state.regs.vs_output_attributes[index]; // TODO: Don't hardcode VS here | ||
| 106 | |||
| 107 | u32 semantics[4] = { | ||
| 108 | output_register_map.map_x, output_register_map.map_y, | ||
| 109 | output_register_map.map_z, output_register_map.map_w | ||
| 110 | }; | ||
| 111 | |||
| 112 | for (unsigned comp = 0; comp < 4; ++comp) { | ||
| 113 | float24* out = ((float24*)&ret) + semantics[comp]; | ||
| 114 | if (semantics[comp] != Regs::VSOutputAttributes::INVALID) { | ||
| 115 | *out = state.registers.output[i][comp]; | ||
| 116 | } else { | ||
| 117 | // Zero output so that attributes which aren't output won't have denormals in them, | ||
| 118 | // which would slow us down later. | ||
| 119 | memset(out, 0, sizeof(*out)); | ||
| 120 | } | ||
| 121 | } | ||
| 122 | |||
| 123 | index++; | ||
| 124 | } | ||
| 125 | |||
| 126 | // The hardware takes the absolute and saturates vertex colors like this, *before* doing interpolation | ||
| 127 | for (unsigned i = 0; i < 4; ++i) { | ||
| 128 | ret.color[i] = float24::FromFloat32( | ||
| 129 | std::fmin(std::fabs(ret.color[i].ToFloat32()), 1.0f)); | ||
| 130 | } | ||
| 131 | |||
| 132 | LOG_TRACE(HW_GPU, "Output vertex: pos(%.2f, %.2f, %.2f, %.2f), quat(%.2f, %.2f, %.2f, %.2f), " | ||
| 133 | "col(%.2f, %.2f, %.2f, %.2f), tc0(%.2f, %.2f), view(%.2f, %.2f, %.2f)", | ||
| 134 | ret.pos.x.ToFloat32(), ret.pos.y.ToFloat32(), ret.pos.z.ToFloat32(), ret.pos.w.ToFloat32(), | ||
| 135 | ret.quat.x.ToFloat32(), ret.quat.y.ToFloat32(), ret.quat.z.ToFloat32(), ret.quat.w.ToFloat32(), | ||
| 136 | ret.color.x.ToFloat32(), ret.color.y.ToFloat32(), ret.color.z.ToFloat32(), ret.color.w.ToFloat32(), | ||
| 137 | ret.tc0.u().ToFloat32(), ret.tc0.v().ToFloat32(), | ||
| 138 | ret.view.x.ToFloat32(), ret.view.y.ToFloat32(), ret.view.z.ToFloat32()); | ||
| 139 | |||
| 140 | return ret; | ||
| 141 | } | 144 | } |
| 142 | 145 | ||
| 143 | DebugData<true> ShaderSetup::ProduceDebugInfo(const InputVertex& input, int num_attributes, const Regs::ShaderConfig& config, const ShaderSetup& setup) { | 146 | DebugData<true> ShaderSetup::ProduceDebugInfo(const InputVertex& input, int num_attributes, const Regs::ShaderConfig& config, const ShaderSetup& setup) { |