diff options
| author | 2017-01-27 14:29:10 -0300 | |
|---|---|---|
| committer | 2017-01-27 14:29:10 -0300 | |
| commit | bf14f4be2263b4769e97800b35951717192c2d1c (patch) | |
| tree | 9c1c47f5a05e9907257f620d8426a0cebaf0cf78 /src/video_core/shader/shader.cpp | |
| parent | SDL: Select audio device (#2403) (diff) | |
| parent | VideoCore/Shader: Move entry_point to SetupBatch (diff) | |
| download | yuzu-bf14f4be2263b4769e97800b35951717192c2d1c.tar.gz yuzu-bf14f4be2263b4769e97800b35951717192c2d1c.tar.xz yuzu-bf14f4be2263b4769e97800b35951717192c2d1c.zip | |
Merge pull request #2346 from yuriks/shader-refactor2
More shader refactoring
Diffstat (limited to 'src/video_core/shader/shader.cpp')
| -rw-r--r-- | src/video_core/shader/shader.cpp | 102 |
1 files changed, 26 insertions, 76 deletions
diff --git a/src/video_core/shader/shader.cpp b/src/video_core/shader/shader.cpp index 7ae57e619..2da50bd62 100644 --- a/src/video_core/shader/shader.cpp +++ b/src/video_core/shader/shader.cpp | |||
| @@ -2,14 +2,8 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <atomic> | ||
| 6 | #include <cmath> | 5 | #include <cmath> |
| 7 | #include <cstring> | 6 | #include <cstring> |
| 8 | #include <unordered_map> | ||
| 9 | #include <utility> | ||
| 10 | #include <boost/range/algorithm/fill.hpp> | ||
| 11 | #include "common/bit_field.h" | ||
| 12 | #include "common/hash.h" | ||
| 13 | #include "common/logging/log.h" | 7 | #include "common/logging/log.h" |
| 14 | #include "common/microprofile.h" | 8 | #include "common/microprofile.h" |
| 15 | #include "video_core/pica.h" | 9 | #include "video_core/pica.h" |
| @@ -25,7 +19,8 @@ namespace Pica { | |||
| 25 | 19 | ||
| 26 | namespace Shader { | 20 | namespace Shader { |
| 27 | 21 | ||
| 28 | OutputVertex OutputRegisters::ToVertex(const Regs::ShaderConfig& config) const { | 22 | OutputVertex OutputVertex::FromRegisters(Math::Vec4<float24> output_regs[16], const Regs& regs, |
| 23 | u32 output_mask) { | ||
| 29 | // Setup output data | 24 | // Setup output data |
| 30 | OutputVertex ret; | 25 | OutputVertex ret; |
| 31 | // TODO(neobrain): Under some circumstances, up to 16 attributes may be output. We need to | 26 | // TODO(neobrain): Under some circumstances, up to 16 attributes may be output. We need to |
| @@ -33,13 +28,13 @@ OutputVertex OutputRegisters::ToVertex(const Regs::ShaderConfig& config) const { | |||
| 33 | unsigned index = 0; | 28 | unsigned index = 0; |
| 34 | for (unsigned i = 0; i < 7; ++i) { | 29 | for (unsigned i = 0; i < 7; ++i) { |
| 35 | 30 | ||
| 36 | if (index >= g_state.regs.vs_output_total) | 31 | if (index >= regs.vs_output_total) |
| 37 | break; | 32 | break; |
| 38 | 33 | ||
| 39 | if ((config.output_mask & (1 << i)) == 0) | 34 | if ((output_mask & (1 << i)) == 0) |
| 40 | continue; | 35 | continue; |
| 41 | 36 | ||
| 42 | const auto& output_register_map = g_state.regs.vs_output_attributes[index]; | 37 | const auto& output_register_map = regs.vs_output_attributes[index]; |
| 43 | 38 | ||
| 44 | u32 semantics[4] = {output_register_map.map_x, output_register_map.map_y, | 39 | u32 semantics[4] = {output_register_map.map_x, output_register_map.map_y, |
| 45 | output_register_map.map_z, output_register_map.map_w}; | 40 | output_register_map.map_z, output_register_map.map_w}; |
| @@ -47,7 +42,7 @@ OutputVertex OutputRegisters::ToVertex(const Regs::ShaderConfig& config) const { | |||
| 47 | for (unsigned comp = 0; comp < 4; ++comp) { | 42 | for (unsigned comp = 0; comp < 4; ++comp) { |
| 48 | float24* out = ((float24*)&ret) + semantics[comp]; | 43 | float24* out = ((float24*)&ret) + semantics[comp]; |
| 49 | if (semantics[comp] != Regs::VSOutputAttributes::INVALID) { | 44 | if (semantics[comp] != Regs::VSOutputAttributes::INVALID) { |
| 50 | *out = value[i][comp]; | 45 | *out = output_regs[i][comp]; |
| 51 | } else { | 46 | } else { |
| 52 | // Zero output so that attributes which aren't output won't have denormals in them, | 47 | // Zero output so that attributes which aren't output won't have denormals in them, |
| 53 | // which would slow us down later. | 48 | // which would slow us down later. |
| @@ -76,86 +71,41 @@ OutputVertex OutputRegisters::ToVertex(const Regs::ShaderConfig& config) const { | |||
| 76 | return ret; | 71 | return ret; |
| 77 | } | 72 | } |
| 78 | 73 | ||
| 79 | #ifdef ARCHITECTURE_x86_64 | 74 | void UnitState::LoadInputVertex(const InputVertex& input, int num_attributes) { |
| 80 | static std::unordered_map<u64, std::unique_ptr<JitShader>> shader_map; | 75 | // Setup input register table |
| 81 | static const JitShader* jit_shader; | 76 | const auto& attribute_register_map = g_state.regs.vs.input_register_map; |
| 82 | #endif // ARCHITECTURE_x86_64 | 77 | |
| 78 | for (int i = 0; i < num_attributes; i++) | ||
| 79 | registers.input[attribute_register_map.GetRegisterForAttribute(i)] = input.attr[i]; | ||
| 80 | } | ||
| 81 | |||
| 82 | MICROPROFILE_DEFINE(GPU_Shader, "GPU", "Shader", MP_RGB(50, 50, 240)); | ||
| 83 | 83 | ||
| 84 | void ClearCache() { | ||
| 85 | #ifdef ARCHITECTURE_x86_64 | 84 | #ifdef ARCHITECTURE_x86_64 |
| 86 | shader_map.clear(); | 85 | static std::unique_ptr<JitX64Engine> jit_engine; |
| 87 | #endif // ARCHITECTURE_x86_64 | 86 | #endif // ARCHITECTURE_x86_64 |
| 88 | } | 87 | static InterpreterEngine interpreter_engine; |
| 89 | 88 | ||
| 90 | void ShaderSetup::Setup() { | 89 | ShaderEngine* GetEngine() { |
| 91 | #ifdef ARCHITECTURE_x86_64 | 90 | #ifdef ARCHITECTURE_x86_64 |
| 91 | // TODO(yuriks): Re-initialize on each change rather than being persistent | ||
| 92 | if (VideoCore::g_shader_jit_enabled) { | 92 | if (VideoCore::g_shader_jit_enabled) { |
| 93 | u64 cache_key = | 93 | if (jit_engine == nullptr) { |
| 94 | Common::ComputeHash64(&g_state.vs.program_code, sizeof(g_state.vs.program_code)) ^ | 94 | jit_engine = std::make_unique<JitX64Engine>(); |
| 95 | Common::ComputeHash64(&g_state.vs.swizzle_data, sizeof(g_state.vs.swizzle_data)); | ||
| 96 | |||
| 97 | auto iter = shader_map.find(cache_key); | ||
| 98 | if (iter != shader_map.end()) { | ||
| 99 | jit_shader = iter->second.get(); | ||
| 100 | } else { | ||
| 101 | auto shader = std::make_unique<JitShader>(); | ||
| 102 | shader->Compile(); | ||
| 103 | jit_shader = shader.get(); | ||
| 104 | shader_map[cache_key] = std::move(shader); | ||
| 105 | } | 95 | } |
| 96 | return jit_engine.get(); | ||
| 106 | } | 97 | } |
| 107 | #endif // ARCHITECTURE_x86_64 | 98 | #endif // ARCHITECTURE_x86_64 |
| 108 | } | ||
| 109 | |||
| 110 | MICROPROFILE_DEFINE(GPU_Shader, "GPU", "Shader", MP_RGB(50, 50, 240)); | ||
| 111 | |||
| 112 | void ShaderSetup::Run(UnitState& state, const InputVertex& input, int num_attributes) { | ||
| 113 | auto& config = g_state.regs.vs; | ||
| 114 | auto& setup = g_state.vs; | ||
| 115 | |||
| 116 | MICROPROFILE_SCOPE(GPU_Shader); | ||
| 117 | 99 | ||
| 118 | // Setup input register table | 100 | return &interpreter_engine; |
| 119 | const auto& attribute_register_map = config.input_register_map; | 101 | } |
| 120 | |||
| 121 | for (int i = 0; i < num_attributes; i++) | ||
| 122 | state.registers.input[attribute_register_map.GetRegisterForAttribute(i)] = input.attr[i]; | ||
| 123 | |||
| 124 | state.conditional_code[0] = false; | ||
| 125 | state.conditional_code[1] = false; | ||
| 126 | 102 | ||
| 103 | void Shutdown() { | ||
| 127 | #ifdef ARCHITECTURE_x86_64 | 104 | #ifdef ARCHITECTURE_x86_64 |
| 128 | if (VideoCore::g_shader_jit_enabled) { | 105 | jit_engine = nullptr; |
| 129 | jit_shader->Run(setup, state, config.main_offset); | ||
| 130 | } else { | ||
| 131 | DebugData<false> dummy_debug_data; | ||
| 132 | RunInterpreter(setup, state, dummy_debug_data, config.main_offset); | ||
| 133 | } | ||
| 134 | #else | ||
| 135 | DebugData<false> dummy_debug_data; | ||
| 136 | RunInterpreter(setup, state, dummy_debug_data, config.main_offset); | ||
| 137 | #endif // ARCHITECTURE_x86_64 | 106 | #endif // ARCHITECTURE_x86_64 |
| 138 | } | 107 | } |
| 139 | 108 | ||
| 140 | DebugData<true> ShaderSetup::ProduceDebugInfo(const InputVertex& input, int num_attributes, | ||
| 141 | const Regs::ShaderConfig& config, | ||
| 142 | const ShaderSetup& setup) { | ||
| 143 | UnitState state; | ||
| 144 | DebugData<true> debug_data; | ||
| 145 | |||
| 146 | // Setup input register table | ||
| 147 | boost::fill(state.registers.input, Math::Vec4<float24>::AssignToAll(float24::Zero())); | ||
| 148 | const auto& attribute_register_map = config.input_register_map; | ||
| 149 | for (int i = 0; i < num_attributes; i++) | ||
| 150 | state.registers.input[attribute_register_map.GetRegisterForAttribute(i)] = input.attr[i]; | ||
| 151 | |||
| 152 | state.conditional_code[0] = false; | ||
| 153 | state.conditional_code[1] = false; | ||
| 154 | |||
| 155 | RunInterpreter(setup, state, debug_data, config.main_offset); | ||
| 156 | return debug_data; | ||
| 157 | } | ||
| 158 | |||
| 159 | } // namespace Shader | 109 | } // namespace Shader |
| 160 | 110 | ||
| 161 | } // namespace Pica | 111 | } // namespace Pica |