diff options
Diffstat (limited to 'src/video_core/shader')
| -rw-r--r-- | src/video_core/shader/shader.cpp | 39 | ||||
| -rw-r--r-- | src/video_core/shader/shader.h | 92 | ||||
| -rw-r--r-- | src/video_core/shader/shader_interpreter.cpp | 290 | ||||
| -rw-r--r-- | src/video_core/shader/shader_interpreter.h | 5 | ||||
| -rw-r--r-- | src/video_core/shader/shader_jit_x64.cpp | 212 | ||||
| -rw-r--r-- | src/video_core/shader/shader_jit_x64.h | 8 |
6 files changed, 335 insertions, 311 deletions
diff --git a/src/video_core/shader/shader.cpp b/src/video_core/shader/shader.cpp index f565e2c91..852c5a9a0 100644 --- a/src/video_core/shader/shader.cpp +++ b/src/video_core/shader/shader.cpp | |||
| @@ -46,10 +46,8 @@ OutputVertex OutputRegisters::ToVertex(const Regs::ShaderConfig& config) { | |||
| 46 | 46 | ||
| 47 | const auto& output_register_map = g_state.regs.vs_output_attributes[index]; | 47 | const auto& output_register_map = g_state.regs.vs_output_attributes[index]; |
| 48 | 48 | ||
| 49 | u32 semantics[4] = { | 49 | u32 semantics[4] = {output_register_map.map_x, output_register_map.map_y, |
| 50 | output_register_map.map_x, output_register_map.map_y, | 50 | output_register_map.map_z, output_register_map.map_w}; |
| 51 | output_register_map.map_z, output_register_map.map_w | ||
| 52 | }; | ||
| 53 | 51 | ||
| 54 | for (unsigned comp = 0; comp < 4; ++comp) { | 52 | for (unsigned comp = 0; comp < 4; ++comp) { |
| 55 | float24* out = ((float24*)&ret) + semantics[comp]; | 53 | float24* out = ((float24*)&ret) + semantics[comp]; |
| @@ -65,19 +63,20 @@ OutputVertex OutputRegisters::ToVertex(const Regs::ShaderConfig& config) { | |||
| 65 | index++; | 63 | index++; |
| 66 | } | 64 | } |
| 67 | 65 | ||
| 68 | // The hardware takes the absolute and saturates vertex colors like this, *before* doing interpolation | 66 | // The hardware takes the absolute and saturates vertex colors like this, *before* doing |
| 67 | // interpolation | ||
| 69 | for (unsigned i = 0; i < 4; ++i) { | 68 | for (unsigned i = 0; i < 4; ++i) { |
| 70 | ret.color[i] = float24::FromFloat32( | 69 | ret.color[i] = float24::FromFloat32(std::fmin(std::fabs(ret.color[i].ToFloat32()), 1.0f)); |
| 71 | std::fmin(std::fabs(ret.color[i].ToFloat32()), 1.0f)); | ||
| 72 | } | 70 | } |
| 73 | 71 | ||
| 74 | LOG_TRACE(HW_GPU, "Output vertex: pos(%.2f, %.2f, %.2f, %.2f), quat(%.2f, %.2f, %.2f, %.2f), " | 72 | LOG_TRACE(HW_GPU, "Output vertex: pos(%.2f, %.2f, %.2f, %.2f), quat(%.2f, %.2f, %.2f, %.2f), " |
| 75 | "col(%.2f, %.2f, %.2f, %.2f), tc0(%.2f, %.2f), view(%.2f, %.2f, %.2f)", | 73 | "col(%.2f, %.2f, %.2f, %.2f), tc0(%.2f, %.2f), view(%.2f, %.2f, %.2f)", |
| 76 | ret.pos.x.ToFloat32(), ret.pos.y.ToFloat32(), ret.pos.z.ToFloat32(), ret.pos.w.ToFloat32(), | 74 | ret.pos.x.ToFloat32(), ret.pos.y.ToFloat32(), ret.pos.z.ToFloat32(), |
| 77 | ret.quat.x.ToFloat32(), ret.quat.y.ToFloat32(), ret.quat.z.ToFloat32(), ret.quat.w.ToFloat32(), | 75 | ret.pos.w.ToFloat32(), ret.quat.x.ToFloat32(), ret.quat.y.ToFloat32(), |
| 78 | ret.color.x.ToFloat32(), ret.color.y.ToFloat32(), ret.color.z.ToFloat32(), ret.color.w.ToFloat32(), | 76 | ret.quat.z.ToFloat32(), ret.quat.w.ToFloat32(), ret.color.x.ToFloat32(), |
| 79 | ret.tc0.u().ToFloat32(), ret.tc0.v().ToFloat32(), | 77 | ret.color.y.ToFloat32(), ret.color.z.ToFloat32(), ret.color.w.ToFloat32(), |
| 80 | ret.view.x.ToFloat32(), ret.view.y.ToFloat32(), ret.view.z.ToFloat32()); | 78 | ret.tc0.u().ToFloat32(), ret.tc0.v().ToFloat32(), ret.view.x.ToFloat32(), |
| 79 | ret.view.y.ToFloat32(), ret.view.z.ToFloat32()); | ||
| 81 | 80 | ||
| 82 | return ret; | 81 | return ret; |
| 83 | } | 82 | } |
| @@ -96,8 +95,9 @@ void ClearCache() { | |||
| 96 | void ShaderSetup::Setup() { | 95 | void ShaderSetup::Setup() { |
| 97 | #ifdef ARCHITECTURE_x86_64 | 96 | #ifdef ARCHITECTURE_x86_64 |
| 98 | if (VideoCore::g_shader_jit_enabled) { | 97 | if (VideoCore::g_shader_jit_enabled) { |
| 99 | u64 cache_key = (Common::ComputeHash64(&g_state.vs.program_code, sizeof(g_state.vs.program_code)) ^ | 98 | u64 cache_key = |
| 100 | Common::ComputeHash64(&g_state.vs.swizzle_data, sizeof(g_state.vs.swizzle_data))); | 99 | (Common::ComputeHash64(&g_state.vs.program_code, sizeof(g_state.vs.program_code)) ^ |
| 100 | Common::ComputeHash64(&g_state.vs.swizzle_data, sizeof(g_state.vs.swizzle_data))); | ||
| 101 | 101 | ||
| 102 | auto iter = shader_map.find(cache_key); | 102 | auto iter = shader_map.find(cache_key); |
| 103 | if (iter != shader_map.end()) { | 103 | if (iter != shader_map.end()) { |
| @@ -127,7 +127,7 @@ void ShaderSetup::Run(UnitState<false>& state, const InputVertex& input, int num | |||
| 127 | const auto& attribute_register_map = config.input_register_map; | 127 | const auto& attribute_register_map = config.input_register_map; |
| 128 | 128 | ||
| 129 | for (unsigned i = 0; i < num_attributes; i++) | 129 | for (unsigned i = 0; i < num_attributes; i++) |
| 130 | state.registers.input[attribute_register_map.GetRegisterForAttribute(i)] = input.attr[i]; | 130 | state.registers.input[attribute_register_map.GetRegisterForAttribute(i)] = input.attr[i]; |
| 131 | 131 | ||
| 132 | state.conditional_code[0] = false; | 132 | state.conditional_code[0] = false; |
| 133 | state.conditional_code[1] = false; | 133 | state.conditional_code[1] = false; |
| @@ -140,10 +140,11 @@ void ShaderSetup::Run(UnitState<false>& state, const InputVertex& input, int num | |||
| 140 | #else | 140 | #else |
| 141 | RunInterpreter(setup, state, config.main_offset); | 141 | RunInterpreter(setup, state, config.main_offset); |
| 142 | #endif // ARCHITECTURE_x86_64 | 142 | #endif // ARCHITECTURE_x86_64 |
| 143 | |||
| 144 | } | 143 | } |
| 145 | 144 | ||
| 146 | DebugData<true> ShaderSetup::ProduceDebugInfo(const InputVertex& input, int num_attributes, const Regs::ShaderConfig& config, const ShaderSetup& setup) { | 145 | DebugData<true> ShaderSetup::ProduceDebugInfo(const InputVertex& input, int num_attributes, |
| 146 | const Regs::ShaderConfig& config, | ||
| 147 | const ShaderSetup& setup) { | ||
| 147 | UnitState<true> state; | 148 | UnitState<true> state; |
| 148 | 149 | ||
| 149 | state.debug.max_offset = 0; | 150 | state.debug.max_offset = 0; |
| @@ -155,7 +156,7 @@ DebugData<true> ShaderSetup::ProduceDebugInfo(const InputVertex& input, int num_ | |||
| 155 | boost::fill(state.registers.input, &dummy_register); | 156 | boost::fill(state.registers.input, &dummy_register); |
| 156 | 157 | ||
| 157 | for (unsigned i = 0; i < num_attributes; i++) | 158 | for (unsigned i = 0; i < num_attributes; i++) |
| 158 | state.registers.input[attribute_register_map.GetRegisterForAttribute(i)] = input.attr[i]; | 159 | state.registers.input[attribute_register_map.GetRegisterForAttribute(i)] = input.attr[i]; |
| 159 | 160 | ||
| 160 | state.conditional_code[0] = false; | 161 | state.conditional_code[0] = false; |
| 161 | state.conditional_code[1] = false; | 162 | state.conditional_code[1] = false; |
diff --git a/src/video_core/shader/shader.h b/src/video_core/shader/shader.h index fee16df62..830d933a8 100644 --- a/src/video_core/shader/shader.h +++ b/src/video_core/shader/shader.h | |||
| @@ -94,46 +94,46 @@ struct OutputRegisters { | |||
| 94 | static_assert(std::is_pod<OutputRegisters>::value, "Structure is not POD"); | 94 | static_assert(std::is_pod<OutputRegisters>::value, "Structure is not POD"); |
| 95 | 95 | ||
| 96 | // Helper structure used to keep track of data useful for inspection of shader emulation | 96 | // Helper structure used to keep track of data useful for inspection of shader emulation |
| 97 | template<bool full_debugging> | 97 | template <bool full_debugging> |
| 98 | struct DebugData; | 98 | struct DebugData; |
| 99 | 99 | ||
| 100 | template<> | 100 | template <> |
| 101 | struct DebugData<false> { | 101 | struct DebugData<false> { |
| 102 | // TODO: Hide these behind and interface and move them to DebugData<true> | 102 | // TODO: Hide these behind and interface and move them to DebugData<true> |
| 103 | u32 max_offset; // maximum program counter ever reached | 103 | u32 max_offset; // maximum program counter ever reached |
| 104 | u32 max_opdesc_id; // maximum swizzle pattern index ever used | 104 | u32 max_opdesc_id; // maximum swizzle pattern index ever used |
| 105 | }; | 105 | }; |
| 106 | 106 | ||
| 107 | template<> | 107 | template <> |
| 108 | struct DebugData<true> { | 108 | struct DebugData<true> { |
| 109 | // Records store the input and output operands of a particular instruction. | 109 | // Records store the input and output operands of a particular instruction. |
| 110 | struct Record { | 110 | struct Record { |
| 111 | enum Type { | 111 | enum Type { |
| 112 | // Floating point arithmetic operands | 112 | // Floating point arithmetic operands |
| 113 | SRC1 = 0x1, | 113 | SRC1 = 0x1, |
| 114 | SRC2 = 0x2, | 114 | SRC2 = 0x2, |
| 115 | SRC3 = 0x4, | 115 | SRC3 = 0x4, |
| 116 | 116 | ||
| 117 | // Initial and final output operand value | 117 | // Initial and final output operand value |
| 118 | DEST_IN = 0x8, | 118 | DEST_IN = 0x8, |
| 119 | DEST_OUT = 0x10, | 119 | DEST_OUT = 0x10, |
| 120 | 120 | ||
| 121 | // Current and next instruction offset (in words) | 121 | // Current and next instruction offset (in words) |
| 122 | CUR_INSTR = 0x20, | 122 | CUR_INSTR = 0x20, |
| 123 | NEXT_INSTR = 0x40, | 123 | NEXT_INSTR = 0x40, |
| 124 | 124 | ||
| 125 | // Output address register value | 125 | // Output address register value |
| 126 | ADDR_REG_OUT = 0x80, | 126 | ADDR_REG_OUT = 0x80, |
| 127 | 127 | ||
| 128 | // Result of a comparison instruction | 128 | // Result of a comparison instruction |
| 129 | CMP_RESULT = 0x100, | 129 | CMP_RESULT = 0x100, |
| 130 | 130 | ||
| 131 | // Input values for conditional flow control instructions | 131 | // Input values for conditional flow control instructions |
| 132 | COND_BOOL_IN = 0x200, | 132 | COND_BOOL_IN = 0x200, |
| 133 | COND_CMP_IN = 0x400, | 133 | COND_CMP_IN = 0x400, |
| 134 | 134 | ||
| 135 | // Input values for a loop | 135 | // Input values for a loop |
| 136 | LOOP_INT_IN = 0x800, | 136 | LOOP_INT_IN = 0x800, |
| 137 | }; | 137 | }; |
| 138 | 138 | ||
| 139 | Math::Vec4<float24> src1; | 139 | Math::Vec4<float24> src1; |
| @@ -156,7 +156,7 @@ struct DebugData<true> { | |||
| 156 | unsigned mask = 0; | 156 | unsigned mask = 0; |
| 157 | }; | 157 | }; |
| 158 | 158 | ||
| 159 | u32 max_offset; // maximum program counter ever reached | 159 | u32 max_offset; // maximum program counter ever reached |
| 160 | u32 max_opdesc_id; // maximum swizzle pattern index ever used | 160 | u32 max_opdesc_id; // maximum swizzle pattern index ever used |
| 161 | 161 | ||
| 162 | // List of records for each executed shader instruction | 162 | // List of records for each executed shader instruction |
| @@ -167,10 +167,10 @@ struct DebugData<true> { | |||
| 167 | using DebugDataRecord = DebugData<true>::Record; | 167 | using DebugDataRecord = DebugData<true>::Record; |
| 168 | 168 | ||
| 169 | // Helper function to set a DebugData<true>::Record field based on the template enum parameter. | 169 | // Helper function to set a DebugData<true>::Record field based on the template enum parameter. |
| 170 | template<DebugDataRecord::Type type, typename ValueType> | 170 | template <DebugDataRecord::Type type, typename ValueType> |
| 171 | inline void SetField(DebugDataRecord& record, ValueType value); | 171 | inline void SetField(DebugDataRecord& record, ValueType value); |
| 172 | 172 | ||
| 173 | template<> | 173 | template <> |
| 174 | inline void SetField<DebugDataRecord::SRC1>(DebugDataRecord& record, float24* value) { | 174 | inline void SetField<DebugDataRecord::SRC1>(DebugDataRecord& record, float24* value) { |
| 175 | record.src1.x = value[0]; | 175 | record.src1.x = value[0]; |
| 176 | record.src1.y = value[1]; | 176 | record.src1.y = value[1]; |
| @@ -178,7 +178,7 @@ inline void SetField<DebugDataRecord::SRC1>(DebugDataRecord& record, float24* va | |||
| 178 | record.src1.w = value[3]; | 178 | record.src1.w = value[3]; |
| 179 | } | 179 | } |
| 180 | 180 | ||
| 181 | template<> | 181 | template <> |
| 182 | inline void SetField<DebugDataRecord::SRC2>(DebugDataRecord& record, float24* value) { | 182 | inline void SetField<DebugDataRecord::SRC2>(DebugDataRecord& record, float24* value) { |
| 183 | record.src2.x = value[0]; | 183 | record.src2.x = value[0]; |
| 184 | record.src2.y = value[1]; | 184 | record.src2.y = value[1]; |
| @@ -186,7 +186,7 @@ inline void SetField<DebugDataRecord::SRC2>(DebugDataRecord& record, float24* va | |||
| 186 | record.src2.w = value[3]; | 186 | record.src2.w = value[3]; |
| 187 | } | 187 | } |
| 188 | 188 | ||
| 189 | template<> | 189 | template <> |
| 190 | inline void SetField<DebugDataRecord::SRC3>(DebugDataRecord& record, float24* value) { | 190 | inline void SetField<DebugDataRecord::SRC3>(DebugDataRecord& record, float24* value) { |
| 191 | record.src3.x = value[0]; | 191 | record.src3.x = value[0]; |
| 192 | record.src3.y = value[1]; | 192 | record.src3.y = value[1]; |
| @@ -194,7 +194,7 @@ inline void SetField<DebugDataRecord::SRC3>(DebugDataRecord& record, float24* va | |||
| 194 | record.src3.w = value[3]; | 194 | record.src3.w = value[3]; |
| 195 | } | 195 | } |
| 196 | 196 | ||
| 197 | template<> | 197 | template <> |
| 198 | inline void SetField<DebugDataRecord::DEST_IN>(DebugDataRecord& record, float24* value) { | 198 | inline void SetField<DebugDataRecord::DEST_IN>(DebugDataRecord& record, float24* value) { |
| 199 | record.dest_in.x = value[0]; | 199 | record.dest_in.x = value[0]; |
| 200 | record.dest_in.y = value[1]; | 200 | record.dest_in.y = value[1]; |
| @@ -202,7 +202,7 @@ inline void SetField<DebugDataRecord::DEST_IN>(DebugDataRecord& record, float24* | |||
| 202 | record.dest_in.w = value[3]; | 202 | record.dest_in.w = value[3]; |
| 203 | } | 203 | } |
| 204 | 204 | ||
| 205 | template<> | 205 | template <> |
| 206 | inline void SetField<DebugDataRecord::DEST_OUT>(DebugDataRecord& record, float24* value) { | 206 | inline void SetField<DebugDataRecord::DEST_OUT>(DebugDataRecord& record, float24* value) { |
| 207 | record.dest_out.x = value[0]; | 207 | record.dest_out.x = value[0]; |
| 208 | record.dest_out.y = value[1]; | 208 | record.dest_out.y = value[1]; |
| @@ -210,67 +210,66 @@ inline void SetField<DebugDataRecord::DEST_OUT>(DebugDataRecord& record, float24 | |||
| 210 | record.dest_out.w = value[3]; | 210 | record.dest_out.w = value[3]; |
| 211 | } | 211 | } |
| 212 | 212 | ||
| 213 | template<> | 213 | template <> |
| 214 | inline void SetField<DebugDataRecord::ADDR_REG_OUT>(DebugDataRecord& record, s32* value) { | 214 | inline void SetField<DebugDataRecord::ADDR_REG_OUT>(DebugDataRecord& record, s32* value) { |
| 215 | record.address_registers[0] = value[0]; | 215 | record.address_registers[0] = value[0]; |
| 216 | record.address_registers[1] = value[1]; | 216 | record.address_registers[1] = value[1]; |
| 217 | } | 217 | } |
| 218 | 218 | ||
| 219 | template<> | 219 | template <> |
| 220 | inline void SetField<DebugDataRecord::CMP_RESULT>(DebugDataRecord& record, bool* value) { | 220 | inline void SetField<DebugDataRecord::CMP_RESULT>(DebugDataRecord& record, bool* value) { |
| 221 | record.conditional_code[0] = value[0]; | 221 | record.conditional_code[0] = value[0]; |
| 222 | record.conditional_code[1] = value[1]; | 222 | record.conditional_code[1] = value[1]; |
| 223 | } | 223 | } |
| 224 | 224 | ||
| 225 | template<> | 225 | template <> |
| 226 | inline void SetField<DebugDataRecord::COND_BOOL_IN>(DebugDataRecord& record, bool value) { | 226 | inline void SetField<DebugDataRecord::COND_BOOL_IN>(DebugDataRecord& record, bool value) { |
| 227 | record.cond_bool = value; | 227 | record.cond_bool = value; |
| 228 | } | 228 | } |
| 229 | 229 | ||
| 230 | template<> | 230 | template <> |
| 231 | inline void SetField<DebugDataRecord::COND_CMP_IN>(DebugDataRecord& record, bool* value) { | 231 | inline void SetField<DebugDataRecord::COND_CMP_IN>(DebugDataRecord& record, bool* value) { |
| 232 | record.cond_cmp[0] = value[0]; | 232 | record.cond_cmp[0] = value[0]; |
| 233 | record.cond_cmp[1] = value[1]; | 233 | record.cond_cmp[1] = value[1]; |
| 234 | } | 234 | } |
| 235 | 235 | ||
| 236 | template<> | 236 | template <> |
| 237 | inline void SetField<DebugDataRecord::LOOP_INT_IN>(DebugDataRecord& record, Math::Vec4<u8> value) { | 237 | inline void SetField<DebugDataRecord::LOOP_INT_IN>(DebugDataRecord& record, Math::Vec4<u8> value) { |
| 238 | record.loop_int = value; | 238 | record.loop_int = value; |
| 239 | } | 239 | } |
| 240 | 240 | ||
| 241 | template<> | 241 | template <> |
| 242 | inline void SetField<DebugDataRecord::CUR_INSTR>(DebugDataRecord& record, u32 value) { | 242 | inline void SetField<DebugDataRecord::CUR_INSTR>(DebugDataRecord& record, u32 value) { |
| 243 | record.instruction_offset = value; | 243 | record.instruction_offset = value; |
| 244 | } | 244 | } |
| 245 | 245 | ||
| 246 | template<> | 246 | template <> |
| 247 | inline void SetField<DebugDataRecord::NEXT_INSTR>(DebugDataRecord& record, u32 value) { | 247 | inline void SetField<DebugDataRecord::NEXT_INSTR>(DebugDataRecord& record, u32 value) { |
| 248 | record.next_instruction = value; | 248 | record.next_instruction = value; |
| 249 | } | 249 | } |
| 250 | 250 | ||
| 251 | // Helper function to set debug information on the current shader iteration. | 251 | // Helper function to set debug information on the current shader iteration. |
| 252 | template<DebugDataRecord::Type type, typename ValueType> | 252 | template <DebugDataRecord::Type type, typename ValueType> |
| 253 | inline void Record(DebugData<false>& debug_data, u32 offset, ValueType value) { | 253 | inline void Record(DebugData<false>& debug_data, u32 offset, ValueType value) { |
| 254 | // Debugging disabled => nothing to do | 254 | // Debugging disabled => nothing to do |
| 255 | } | 255 | } |
| 256 | 256 | ||
| 257 | template<DebugDataRecord::Type type, typename ValueType> | 257 | template <DebugDataRecord::Type type, typename ValueType> |
| 258 | inline void Record(DebugData<true>& debug_data, u32 offset, ValueType value) { | 258 | inline void Record(DebugData<true>& debug_data, u32 offset, ValueType value) { |
| 259 | if (offset >= debug_data.records.size()) | 259 | if (offset >= debug_data.records.size()) |
| 260 | debug_data.records.resize(offset + 1); | 260 | debug_data.records.resize(offset + 1); |
| 261 | 261 | ||
| 262 | SetField<type, ValueType>(debug_data.records[offset], value); | 262 | SetField<type, ValueType>(debug_data.records[offset], value); |
| 263 | debug_data.records[offset].mask |= type; | 263 | debug_data.records[offset].mask |= type; |
| 264 | } | 264 | } |
| 265 | 265 | ||
| 266 | |||
| 267 | /** | 266 | /** |
| 268 | * This structure contains the state information that needs to be unique for a shader unit. The 3DS | 267 | * This structure contains the state information that needs to be unique for a shader unit. The 3DS |
| 269 | * has four shader units that process shaders in parallel. At the present, Citra only implements a | 268 | * has four shader units that process shaders in parallel. At the present, Citra only implements a |
| 270 | * single shader unit that processes all shaders serially. Putting the state information in a struct | 269 | * single shader unit that processes all shaders serially. Putting the state information in a struct |
| 271 | * here will make it easier for us to parallelize the shader processing later. | 270 | * here will make it easier for us to parallelize the shader processing later. |
| 272 | */ | 271 | */ |
| 273 | template<bool Debug> | 272 | template <bool Debug> |
| 274 | struct UnitState { | 273 | struct UnitState { |
| 275 | struct Registers { | 274 | struct Registers { |
| 276 | // The registers are accessed by the shader JIT using SSE instructions, and are therefore | 275 | // The registers are accessed by the shader JIT using SSE instructions, and are therefore |
| @@ -293,10 +292,12 @@ struct UnitState { | |||
| 293 | static size_t InputOffset(const SourceRegister& reg) { | 292 | static size_t InputOffset(const SourceRegister& reg) { |
| 294 | switch (reg.GetRegisterType()) { | 293 | switch (reg.GetRegisterType()) { |
| 295 | case RegisterType::Input: | 294 | case RegisterType::Input: |
| 296 | return offsetof(UnitState, registers.input) + reg.GetIndex()*sizeof(Math::Vec4<float24>); | 295 | return offsetof(UnitState, registers.input) + |
| 296 | reg.GetIndex() * sizeof(Math::Vec4<float24>); | ||
| 297 | 297 | ||
| 298 | case RegisterType::Temporary: | 298 | case RegisterType::Temporary: |
| 299 | return offsetof(UnitState, registers.temporary) + reg.GetIndex()*sizeof(Math::Vec4<float24>); | 299 | return offsetof(UnitState, registers.temporary) + |
| 300 | reg.GetIndex() * sizeof(Math::Vec4<float24>); | ||
| 300 | 301 | ||
| 301 | default: | 302 | default: |
| 302 | UNREACHABLE(); | 303 | UNREACHABLE(); |
| @@ -307,10 +308,12 @@ struct UnitState { | |||
| 307 | static size_t OutputOffset(const DestRegister& reg) { | 308 | static size_t OutputOffset(const DestRegister& reg) { |
| 308 | switch (reg.GetRegisterType()) { | 309 | switch (reg.GetRegisterType()) { |
| 309 | case RegisterType::Output: | 310 | case RegisterType::Output: |
| 310 | return offsetof(UnitState, output_registers.value) + reg.GetIndex()*sizeof(Math::Vec4<float24>); | 311 | return offsetof(UnitState, output_registers.value) + |
| 312 | reg.GetIndex() * sizeof(Math::Vec4<float24>); | ||
| 311 | 313 | ||
| 312 | case RegisterType::Temporary: | 314 | case RegisterType::Temporary: |
| 313 | return offsetof(UnitState, registers.temporary) + reg.GetIndex()*sizeof(Math::Vec4<float24>); | 315 | return offsetof(UnitState, registers.temporary) + |
| 316 | reg.GetIndex() * sizeof(Math::Vec4<float24>); | ||
| 314 | 317 | ||
| 315 | default: | 318 | default: |
| 316 | UNREACHABLE(); | 319 | UNREACHABLE(); |
| @@ -336,13 +339,13 @@ struct ShaderSetup { | |||
| 336 | static size_t UniformOffset(RegisterType type, unsigned index) { | 339 | static size_t UniformOffset(RegisterType type, unsigned index) { |
| 337 | switch (type) { | 340 | switch (type) { |
| 338 | case RegisterType::FloatUniform: | 341 | case RegisterType::FloatUniform: |
| 339 | return offsetof(ShaderSetup, uniforms.f) + index*sizeof(Math::Vec4<float24>); | 342 | return offsetof(ShaderSetup, uniforms.f) + index * sizeof(Math::Vec4<float24>); |
| 340 | 343 | ||
| 341 | case RegisterType::BoolUniform: | 344 | case RegisterType::BoolUniform: |
| 342 | return offsetof(ShaderSetup, uniforms.b) + index*sizeof(bool); | 345 | return offsetof(ShaderSetup, uniforms.b) + index * sizeof(bool); |
| 343 | 346 | ||
| 344 | case RegisterType::IntUniform: | 347 | case RegisterType::IntUniform: |
| 345 | return offsetof(ShaderSetup, uniforms.i) + index*sizeof(Math::Vec4<u8>); | 348 | return offsetof(ShaderSetup, uniforms.i) + index * sizeof(Math::Vec4<u8>); |
| 346 | 349 | ||
| 347 | default: | 350 | default: |
| 348 | UNREACHABLE(); | 351 | UNREACHABLE(); |
| @@ -354,7 +357,8 @@ struct ShaderSetup { | |||
| 354 | std::array<u32, 1024> swizzle_data; | 357 | std::array<u32, 1024> swizzle_data; |
| 355 | 358 | ||
| 356 | /** | 359 | /** |
| 357 | * Performs any shader unit setup that only needs to happen once per shader (as opposed to once per | 360 | * Performs any shader unit setup that only needs to happen once per shader (as opposed to once |
| 361 | * per | ||
| 358 | * vertex, which would happen within the `Run` function). | 362 | * vertex, which would happen within the `Run` function). |
| 359 | */ | 363 | */ |
| 360 | void Setup(); | 364 | void Setup(); |
| @@ -375,8 +379,8 @@ struct ShaderSetup { | |||
| 375 | * @param setup Setup object for the shader pipeline | 379 | * @param setup Setup object for the shader pipeline |
| 376 | * @return Debug information for this shader with regards to the given vertex | 380 | * @return Debug information for this shader with regards to the given vertex |
| 377 | */ | 381 | */ |
| 378 | DebugData<true> ProduceDebugInfo(const InputVertex& input, int num_attributes, const Regs::ShaderConfig& config, const ShaderSetup& setup); | 382 | DebugData<true> ProduceDebugInfo(const InputVertex& input, int num_attributes, |
| 379 | 383 | const Regs::ShaderConfig& config, const ShaderSetup& setup); | |
| 380 | }; | 384 | }; |
| 381 | 385 | ||
| 382 | } // namespace Shader | 386 | } // namespace Shader |
diff --git a/src/video_core/shader/shader_interpreter.cpp b/src/video_core/shader/shader_interpreter.cpp index f6c86a759..681ff9728 100644 --- a/src/video_core/shader/shader_interpreter.cpp +++ b/src/video_core/shader/shader_interpreter.cpp | |||
| @@ -40,7 +40,7 @@ struct CallStackElement { | |||
| 40 | u32 loop_address; // The address where we'll return to after each loop iteration | 40 | u32 loop_address; // The address where we'll return to after each loop iteration |
| 41 | }; | 41 | }; |
| 42 | 42 | ||
| 43 | template<bool Debug> | 43 | template <bool Debug> |
| 44 | void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned offset) { | 44 | void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned offset) { |
| 45 | // TODO: Is there a maximal size for this? | 45 | // TODO: Is there a maximal size for this? |
| 46 | boost::container::static_vector<CallStackElement, 16> call_stack; | 46 | boost::container::static_vector<CallStackElement, 16> call_stack; |
| @@ -74,14 +74,18 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned | |||
| 74 | } | 74 | } |
| 75 | } | 75 | } |
| 76 | 76 | ||
| 77 | const Instruction instr = { program_code[program_counter] }; | 77 | const Instruction instr = {program_code[program_counter]}; |
| 78 | const SwizzlePattern swizzle = { swizzle_data[instr.common.operand_desc_id] }; | 78 | const SwizzlePattern swizzle = {swizzle_data[instr.common.operand_desc_id]}; |
| 79 | 79 | ||
| 80 | auto call = [&program_counter, &call_stack](UnitState<Debug>& state, u32 offset, u32 num_instructions, | 80 | auto call = [&program_counter, &call_stack](UnitState<Debug>& state, u32 offset, |
| 81 | u32 return_offset, u8 repeat_count, u8 loop_increment) { | 81 | u32 num_instructions, u32 return_offset, |
| 82 | program_counter = offset - 1; // -1 to make sure when incrementing the PC we end up at the correct offset | 82 | u8 repeat_count, u8 loop_increment) { |
| 83 | program_counter = | ||
| 84 | offset - | ||
| 85 | 1; // -1 to make sure when incrementing the PC we end up at the correct offset | ||
| 83 | ASSERT(call_stack.size() < call_stack.capacity()); | 86 | ASSERT(call_stack.size() < call_stack.capacity()); |
| 84 | call_stack.push_back({ offset + num_instructions, return_offset, repeat_count, loop_increment, offset }); | 87 | call_stack.push_back( |
| 88 | {offset + num_instructions, return_offset, repeat_count, loop_increment, offset}); | ||
| 85 | }; | 89 | }; |
| 86 | Record<DebugDataRecord::CUR_INSTR>(state.debug, iteration, program_counter); | 90 | Record<DebugDataRecord::CUR_INSTR>(state.debug, iteration, program_counter); |
| 87 | if (iteration > 0) | 91 | if (iteration > 0) |
| @@ -106,24 +110,26 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned | |||
| 106 | }; | 110 | }; |
| 107 | 111 | ||
| 108 | switch (instr.opcode.Value().GetInfo().type) { | 112 | switch (instr.opcode.Value().GetInfo().type) { |
| 109 | case OpCode::Type::Arithmetic: | 113 | case OpCode::Type::Arithmetic: { |
| 110 | { | 114 | const bool is_inverted = |
| 111 | const bool is_inverted = (0 != (instr.opcode.Value().GetInfo().subtype & OpCode::Info::SrcInversed)); | 115 | (0 != (instr.opcode.Value().GetInfo().subtype & OpCode::Info::SrcInversed)); |
| 112 | 116 | ||
| 113 | const int address_offset = (instr.common.address_register_index == 0) | 117 | const int address_offset = |
| 114 | ? 0 : state.address_registers[instr.common.address_register_index - 1]; | 118 | (instr.common.address_register_index == 0) |
| 119 | ? 0 | ||
| 120 | : state.address_registers[instr.common.address_register_index - 1]; | ||
| 115 | 121 | ||
| 116 | const float24* src1_ = LookupSourceRegister(instr.common.GetSrc1(is_inverted) + (!is_inverted * address_offset)); | 122 | const float24* src1_ = LookupSourceRegister(instr.common.GetSrc1(is_inverted) + |
| 117 | const float24* src2_ = LookupSourceRegister(instr.common.GetSrc2(is_inverted) + ( is_inverted * address_offset)); | 123 | (!is_inverted * address_offset)); |
| 124 | const float24* src2_ = LookupSourceRegister(instr.common.GetSrc2(is_inverted) + | ||
| 125 | (is_inverted * address_offset)); | ||
| 118 | 126 | ||
| 119 | const bool negate_src1 = ((bool)swizzle.negate_src1 != false); | 127 | const bool negate_src1 = ((bool)swizzle.negate_src1 != false); |
| 120 | const bool negate_src2 = ((bool)swizzle.negate_src2 != false); | 128 | const bool negate_src2 = ((bool)swizzle.negate_src2 != false); |
| 121 | 129 | ||
| 122 | float24 src1[4] = { | 130 | float24 src1[4] = { |
| 123 | src1_[(int)swizzle.GetSelectorSrc1(0)], | 131 | src1_[(int)swizzle.GetSelectorSrc1(0)], src1_[(int)swizzle.GetSelectorSrc1(1)], |
| 124 | src1_[(int)swizzle.GetSelectorSrc1(1)], | 132 | src1_[(int)swizzle.GetSelectorSrc1(2)], src1_[(int)swizzle.GetSelectorSrc1(3)], |
| 125 | src1_[(int)swizzle.GetSelectorSrc1(2)], | ||
| 126 | src1_[(int)swizzle.GetSelectorSrc1(3)], | ||
| 127 | }; | 133 | }; |
| 128 | if (negate_src1) { | 134 | if (negate_src1) { |
| 129 | src1[0] = src1[0] * float24::FromFloat32(-1); | 135 | src1[0] = src1[0] * float24::FromFloat32(-1); |
| @@ -132,10 +138,8 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned | |||
| 132 | src1[3] = src1[3] * float24::FromFloat32(-1); | 138 | src1[3] = src1[3] * float24::FromFloat32(-1); |
| 133 | } | 139 | } |
| 134 | float24 src2[4] = { | 140 | float24 src2[4] = { |
| 135 | src2_[(int)swizzle.GetSelectorSrc2(0)], | 141 | src2_[(int)swizzle.GetSelectorSrc2(0)], src2_[(int)swizzle.GetSelectorSrc2(1)], |
| 136 | src2_[(int)swizzle.GetSelectorSrc2(1)], | 142 | src2_[(int)swizzle.GetSelectorSrc2(2)], src2_[(int)swizzle.GetSelectorSrc2(3)], |
| 137 | src2_[(int)swizzle.GetSelectorSrc2(2)], | ||
| 138 | src2_[(int)swizzle.GetSelectorSrc2(3)], | ||
| 139 | }; | 143 | }; |
| 140 | if (negate_src2) { | 144 | if (negate_src2) { |
| 141 | src2[0] = src2[0] * float24::FromFloat32(-1); | 145 | src2[0] = src2[0] * float24::FromFloat32(-1); |
| @@ -144,15 +148,18 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned | |||
| 144 | src2[3] = src2[3] * float24::FromFloat32(-1); | 148 | src2[3] = src2[3] * float24::FromFloat32(-1); |
| 145 | } | 149 | } |
| 146 | 150 | ||
| 147 | float24* dest = (instr.common.dest.Value() < 0x10) ? &state.output_registers.value[instr.common.dest.Value().GetIndex()][0] | 151 | float24* dest = |
| 148 | : (instr.common.dest.Value() < 0x20) ? &state.registers.temporary[instr.common.dest.Value().GetIndex()][0] | 152 | (instr.common.dest.Value() < 0x10) |
| 149 | : dummy_vec4_float24; | 153 | ? &state.output_registers.value[instr.common.dest.Value().GetIndex()][0] |
| 154 | : (instr.common.dest.Value() < 0x20) | ||
| 155 | ? &state.registers.temporary[instr.common.dest.Value().GetIndex()][0] | ||
| 156 | : dummy_vec4_float24; | ||
| 150 | 157 | ||
| 151 | state.debug.max_opdesc_id = std::max<u32>(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id); | 158 | state.debug.max_opdesc_id = |
| 159 | std::max<u32>(state.debug.max_opdesc_id, 1 + instr.common.operand_desc_id); | ||
| 152 | 160 | ||
| 153 | switch (instr.opcode.Value().EffectiveOpCode()) { | 161 | switch (instr.opcode.Value().EffectiveOpCode()) { |
| 154 | case OpCode::Id::ADD: | 162 | case OpCode::Id::ADD: { |
| 155 | { | ||
| 156 | Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); | 163 | Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); |
| 157 | Record<DebugDataRecord::SRC2>(state.debug, iteration, src2); | 164 | Record<DebugDataRecord::SRC2>(state.debug, iteration, src2); |
| 158 | Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); | 165 | Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); |
| @@ -166,8 +173,7 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned | |||
| 166 | break; | 173 | break; |
| 167 | } | 174 | } |
| 168 | 175 | ||
| 169 | case OpCode::Id::MUL: | 176 | case OpCode::Id::MUL: { |
| 170 | { | ||
| 171 | Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); | 177 | Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); |
| 172 | Record<DebugDataRecord::SRC2>(state.debug, iteration, src2); | 178 | Record<DebugDataRecord::SRC2>(state.debug, iteration, src2); |
| 173 | Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); | 179 | Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); |
| @@ -228,8 +234,7 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned | |||
| 228 | case OpCode::Id::DP3: | 234 | case OpCode::Id::DP3: |
| 229 | case OpCode::Id::DP4: | 235 | case OpCode::Id::DP4: |
| 230 | case OpCode::Id::DPH: | 236 | case OpCode::Id::DPH: |
| 231 | case OpCode::Id::DPHI: | 237 | case OpCode::Id::DPHI: { |
| 232 | { | ||
| 233 | Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); | 238 | Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); |
| 234 | Record<DebugDataRecord::SRC2>(state.debug, iteration, src2); | 239 | Record<DebugDataRecord::SRC2>(state.debug, iteration, src2); |
| 235 | Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); | 240 | Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); |
| @@ -239,7 +244,8 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned | |||
| 239 | src1[3] = float24::FromFloat32(1.0f); | 244 | src1[3] = float24::FromFloat32(1.0f); |
| 240 | 245 | ||
| 241 | int num_components = (opcode == OpCode::Id::DP3) ? 3 : 4; | 246 | int num_components = (opcode == OpCode::Id::DP3) ? 3 : 4; |
| 242 | float24 dot = std::inner_product(src1, src1 + num_components, src2, float24::FromFloat32(0.f)); | 247 | float24 dot = std::inner_product(src1, src1 + num_components, src2, |
| 248 | float24::FromFloat32(0.f)); | ||
| 243 | 249 | ||
| 244 | for (int i = 0; i < 4; ++i) { | 250 | for (int i = 0; i < 4; ++i) { |
| 245 | if (!swizzle.DestComponentEnabled(i)) | 251 | if (!swizzle.DestComponentEnabled(i)) |
| @@ -252,8 +258,7 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned | |||
| 252 | } | 258 | } |
| 253 | 259 | ||
| 254 | // Reciprocal | 260 | // Reciprocal |
| 255 | case OpCode::Id::RCP: | 261 | case OpCode::Id::RCP: { |
| 256 | { | ||
| 257 | Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); | 262 | Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); |
| 258 | Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); | 263 | Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); |
| 259 | float24 rcp_res = float24::FromFloat32(1.0f / src1[0].ToFloat32()); | 264 | float24 rcp_res = float24::FromFloat32(1.0f / src1[0].ToFloat32()); |
| @@ -268,8 +273,7 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned | |||
| 268 | } | 273 | } |
| 269 | 274 | ||
| 270 | // Reciprocal Square Root | 275 | // Reciprocal Square Root |
| 271 | case OpCode::Id::RSQ: | 276 | case OpCode::Id::RSQ: { |
| 272 | { | ||
| 273 | Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); | 277 | Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); |
| 274 | Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); | 278 | Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); |
| 275 | float24 rsq_res = float24::FromFloat32(1.0f / std::sqrt(src1[0].ToFloat32())); | 279 | float24 rsq_res = float24::FromFloat32(1.0f / std::sqrt(src1[0].ToFloat32())); |
| @@ -283,8 +287,7 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned | |||
| 283 | break; | 287 | break; |
| 284 | } | 288 | } |
| 285 | 289 | ||
| 286 | case OpCode::Id::MOVA: | 290 | case OpCode::Id::MOVA: { |
| 287 | { | ||
| 288 | Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); | 291 | Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); |
| 289 | for (int i = 0; i < 2; ++i) { | 292 | for (int i = 0; i < 2; ++i) { |
| 290 | if (!swizzle.DestComponentEnabled(i)) | 293 | if (!swizzle.DestComponentEnabled(i)) |
| @@ -293,12 +296,12 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned | |||
| 293 | // TODO: Figure out how the rounding is done on hardware | 296 | // TODO: Figure out how the rounding is done on hardware |
| 294 | state.address_registers[i] = static_cast<s32>(src1[i].ToFloat32()); | 297 | state.address_registers[i] = static_cast<s32>(src1[i].ToFloat32()); |
| 295 | } | 298 | } |
| 296 | Record<DebugDataRecord::ADDR_REG_OUT>(state.debug, iteration, state.address_registers); | 299 | Record<DebugDataRecord::ADDR_REG_OUT>(state.debug, iteration, |
| 300 | state.address_registers); | ||
| 297 | break; | 301 | break; |
| 298 | } | 302 | } |
| 299 | 303 | ||
| 300 | case OpCode::Id::MOV: | 304 | case OpCode::Id::MOV: { |
| 301 | { | ||
| 302 | Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); | 305 | Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); |
| 303 | Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); | 306 | Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); |
| 304 | for (int i = 0; i < 4; ++i) { | 307 | for (int i = 0; i < 4; ++i) { |
| @@ -320,7 +323,8 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned | |||
| 320 | if (!swizzle.DestComponentEnabled(i)) | 323 | if (!swizzle.DestComponentEnabled(i)) |
| 321 | continue; | 324 | continue; |
| 322 | 325 | ||
| 323 | dest[i] = (src1[i] >= src2[i]) ? float24::FromFloat32(1.0f) : float24::FromFloat32(0.0f); | 326 | dest[i] = (src1[i] >= src2[i]) ? float24::FromFloat32(1.0f) |
| 327 | : float24::FromFloat32(0.0f); | ||
| 324 | } | 328 | } |
| 325 | Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest); | 329 | Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest); |
| 326 | break; | 330 | break; |
| @@ -334,7 +338,8 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned | |||
| 334 | if (!swizzle.DestComponentEnabled(i)) | 338 | if (!swizzle.DestComponentEnabled(i)) |
| 335 | continue; | 339 | continue; |
| 336 | 340 | ||
| 337 | dest[i] = (src1[i] < src2[i]) ? float24::FromFloat32(1.0f) : float24::FromFloat32(0.0f); | 341 | dest[i] = (src1[i] < src2[i]) ? float24::FromFloat32(1.0f) |
| 342 | : float24::FromFloat32(0.0f); | ||
| 338 | } | 343 | } |
| 339 | Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest); | 344 | Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest); |
| 340 | break; | 345 | break; |
| @@ -349,40 +354,39 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned | |||
| 349 | auto op = (i == 0) ? compare_op.x.Value() : compare_op.y.Value(); | 354 | auto op = (i == 0) ? compare_op.x.Value() : compare_op.y.Value(); |
| 350 | 355 | ||
| 351 | switch (op) { | 356 | switch (op) { |
| 352 | case Instruction::Common::CompareOpType::Equal: | 357 | case Instruction::Common::CompareOpType::Equal: |
| 353 | state.conditional_code[i] = (src1[i] == src2[i]); | 358 | state.conditional_code[i] = (src1[i] == src2[i]); |
| 354 | break; | 359 | break; |
| 355 | 360 | ||
| 356 | case Instruction::Common::CompareOpType::NotEqual: | 361 | case Instruction::Common::CompareOpType::NotEqual: |
| 357 | state.conditional_code[i] = (src1[i] != src2[i]); | 362 | state.conditional_code[i] = (src1[i] != src2[i]); |
| 358 | break; | 363 | break; |
| 359 | 364 | ||
| 360 | case Instruction::Common::CompareOpType::LessThan: | 365 | case Instruction::Common::CompareOpType::LessThan: |
| 361 | state.conditional_code[i] = (src1[i] < src2[i]); | 366 | state.conditional_code[i] = (src1[i] < src2[i]); |
| 362 | break; | 367 | break; |
| 363 | 368 | ||
| 364 | case Instruction::Common::CompareOpType::LessEqual: | 369 | case Instruction::Common::CompareOpType::LessEqual: |
| 365 | state.conditional_code[i] = (src1[i] <= src2[i]); | 370 | state.conditional_code[i] = (src1[i] <= src2[i]); |
| 366 | break; | 371 | break; |
| 367 | 372 | ||
| 368 | case Instruction::Common::CompareOpType::GreaterThan: | 373 | case Instruction::Common::CompareOpType::GreaterThan: |
| 369 | state.conditional_code[i] = (src1[i] > src2[i]); | 374 | state.conditional_code[i] = (src1[i] > src2[i]); |
| 370 | break; | 375 | break; |
| 371 | 376 | ||
| 372 | case Instruction::Common::CompareOpType::GreaterEqual: | 377 | case Instruction::Common::CompareOpType::GreaterEqual: |
| 373 | state.conditional_code[i] = (src1[i] >= src2[i]); | 378 | state.conditional_code[i] = (src1[i] >= src2[i]); |
| 374 | break; | 379 | break; |
| 375 | 380 | ||
| 376 | default: | 381 | default: |
| 377 | LOG_ERROR(HW_GPU, "Unknown compare mode %x", static_cast<int>(op)); | 382 | LOG_ERROR(HW_GPU, "Unknown compare mode %x", static_cast<int>(op)); |
| 378 | break; | 383 | break; |
| 379 | } | 384 | } |
| 380 | } | 385 | } |
| 381 | Record<DebugDataRecord::CMP_RESULT>(state.debug, iteration, state.conditional_code); | 386 | Record<DebugDataRecord::CMP_RESULT>(state.debug, iteration, state.conditional_code); |
| 382 | break; | 387 | break; |
| 383 | 388 | ||
| 384 | case OpCode::Id::EX2: | 389 | case OpCode::Id::EX2: { |
| 385 | { | ||
| 386 | Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); | 390 | Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); |
| 387 | Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); | 391 | Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); |
| 388 | 392 | ||
| @@ -399,8 +403,7 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned | |||
| 399 | break; | 403 | break; |
| 400 | } | 404 | } |
| 401 | 405 | ||
| 402 | case OpCode::Id::LG2: | 406 | case OpCode::Id::LG2: { |
| 403 | { | ||
| 404 | Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); | 407 | Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); |
| 405 | Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); | 408 | Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); |
| 406 | 409 | ||
| @@ -419,7 +422,8 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned | |||
| 419 | 422 | ||
| 420 | default: | 423 | default: |
| 421 | LOG_ERROR(HW_GPU, "Unhandled arithmetic instruction: 0x%02x (%s): 0x%08x", | 424 | LOG_ERROR(HW_GPU, "Unhandled arithmetic instruction: 0x%02x (%s): 0x%08x", |
| 422 | (int)instr.opcode.Value().EffectiveOpCode(), instr.opcode.Value().GetInfo().name, instr.hex); | 425 | (int)instr.opcode.Value().EffectiveOpCode(), |
| 426 | instr.opcode.Value().GetInfo().name, instr.hex); | ||
| 423 | DEBUG_ASSERT(false); | 427 | DEBUG_ASSERT(false); |
| 424 | break; | 428 | break; |
| 425 | } | 429 | } |
| @@ -427,30 +431,32 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned | |||
| 427 | break; | 431 | break; |
| 428 | } | 432 | } |
| 429 | 433 | ||
| 430 | case OpCode::Type::MultiplyAdd: | 434 | case OpCode::Type::MultiplyAdd: { |
| 431 | { | ||
| 432 | if ((instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MAD) || | 435 | if ((instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MAD) || |
| 433 | (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MADI)) { | 436 | (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MADI)) { |
| 434 | const SwizzlePattern& swizzle = *reinterpret_cast<const SwizzlePattern*>(&swizzle_data[instr.mad.operand_desc_id]); | 437 | const SwizzlePattern& swizzle = *reinterpret_cast<const SwizzlePattern*>( |
| 438 | &swizzle_data[instr.mad.operand_desc_id]); | ||
| 435 | 439 | ||
| 436 | bool is_inverted = (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MADI); | 440 | bool is_inverted = (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MADI); |
| 437 | 441 | ||
| 438 | const int address_offset = (instr.mad.address_register_index == 0) | 442 | const int address_offset = |
| 439 | ? 0 : state.address_registers[instr.mad.address_register_index - 1]; | 443 | (instr.mad.address_register_index == 0) |
| 444 | ? 0 | ||
| 445 | : state.address_registers[instr.mad.address_register_index - 1]; | ||
| 440 | 446 | ||
| 441 | const float24* src1_ = LookupSourceRegister(instr.mad.GetSrc1(is_inverted)); | 447 | const float24* src1_ = LookupSourceRegister(instr.mad.GetSrc1(is_inverted)); |
| 442 | const float24* src2_ = LookupSourceRegister(instr.mad.GetSrc2(is_inverted) + (!is_inverted * address_offset)); | 448 | const float24* src2_ = LookupSourceRegister(instr.mad.GetSrc2(is_inverted) + |
| 443 | const float24* src3_ = LookupSourceRegister(instr.mad.GetSrc3(is_inverted) + ( is_inverted * address_offset)); | 449 | (!is_inverted * address_offset)); |
| 450 | const float24* src3_ = LookupSourceRegister(instr.mad.GetSrc3(is_inverted) + | ||
| 451 | (is_inverted * address_offset)); | ||
| 444 | 452 | ||
| 445 | const bool negate_src1 = ((bool)swizzle.negate_src1 != false); | 453 | const bool negate_src1 = ((bool)swizzle.negate_src1 != false); |
| 446 | const bool negate_src2 = ((bool)swizzle.negate_src2 != false); | 454 | const bool negate_src2 = ((bool)swizzle.negate_src2 != false); |
| 447 | const bool negate_src3 = ((bool)swizzle.negate_src3 != false); | 455 | const bool negate_src3 = ((bool)swizzle.negate_src3 != false); |
| 448 | 456 | ||
| 449 | float24 src1[4] = { | 457 | float24 src1[4] = { |
| 450 | src1_[(int)swizzle.GetSelectorSrc1(0)], | 458 | src1_[(int)swizzle.GetSelectorSrc1(0)], src1_[(int)swizzle.GetSelectorSrc1(1)], |
| 451 | src1_[(int)swizzle.GetSelectorSrc1(1)], | 459 | src1_[(int)swizzle.GetSelectorSrc1(2)], src1_[(int)swizzle.GetSelectorSrc1(3)], |
| 452 | src1_[(int)swizzle.GetSelectorSrc1(2)], | ||
| 453 | src1_[(int)swizzle.GetSelectorSrc1(3)], | ||
| 454 | }; | 460 | }; |
| 455 | if (negate_src1) { | 461 | if (negate_src1) { |
| 456 | src1[0] = src1[0] * float24::FromFloat32(-1); | 462 | src1[0] = src1[0] * float24::FromFloat32(-1); |
| @@ -459,10 +465,8 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned | |||
| 459 | src1[3] = src1[3] * float24::FromFloat32(-1); | 465 | src1[3] = src1[3] * float24::FromFloat32(-1); |
| 460 | } | 466 | } |
| 461 | float24 src2[4] = { | 467 | float24 src2[4] = { |
| 462 | src2_[(int)swizzle.GetSelectorSrc2(0)], | 468 | src2_[(int)swizzle.GetSelectorSrc2(0)], src2_[(int)swizzle.GetSelectorSrc2(1)], |
| 463 | src2_[(int)swizzle.GetSelectorSrc2(1)], | 469 | src2_[(int)swizzle.GetSelectorSrc2(2)], src2_[(int)swizzle.GetSelectorSrc2(3)], |
| 464 | src2_[(int)swizzle.GetSelectorSrc2(2)], | ||
| 465 | src2_[(int)swizzle.GetSelectorSrc2(3)], | ||
| 466 | }; | 470 | }; |
| 467 | if (negate_src2) { | 471 | if (negate_src2) { |
| 468 | src2[0] = src2[0] * float24::FromFloat32(-1); | 472 | src2[0] = src2[0] * float24::FromFloat32(-1); |
| @@ -471,10 +475,8 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned | |||
| 471 | src2[3] = src2[3] * float24::FromFloat32(-1); | 475 | src2[3] = src2[3] * float24::FromFloat32(-1); |
| 472 | } | 476 | } |
| 473 | float24 src3[4] = { | 477 | float24 src3[4] = { |
| 474 | src3_[(int)swizzle.GetSelectorSrc3(0)], | 478 | src3_[(int)swizzle.GetSelectorSrc3(0)], src3_[(int)swizzle.GetSelectorSrc3(1)], |
| 475 | src3_[(int)swizzle.GetSelectorSrc3(1)], | 479 | src3_[(int)swizzle.GetSelectorSrc3(2)], src3_[(int)swizzle.GetSelectorSrc3(3)], |
| 476 | src3_[(int)swizzle.GetSelectorSrc3(2)], | ||
| 477 | src3_[(int)swizzle.GetSelectorSrc3(3)], | ||
| 478 | }; | 480 | }; |
| 479 | if (negate_src3) { | 481 | if (negate_src3) { |
| 480 | src3[0] = src3[0] * float24::FromFloat32(-1); | 482 | src3[0] = src3[0] * float24::FromFloat32(-1); |
| @@ -483,9 +485,12 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned | |||
| 483 | src3[3] = src3[3] * float24::FromFloat32(-1); | 485 | src3[3] = src3[3] * float24::FromFloat32(-1); |
| 484 | } | 486 | } |
| 485 | 487 | ||
| 486 | float24* dest = (instr.mad.dest.Value() < 0x10) ? &state.output_registers.value[instr.mad.dest.Value().GetIndex()][0] | 488 | float24* dest = |
| 487 | : (instr.mad.dest.Value() < 0x20) ? &state.registers.temporary[instr.mad.dest.Value().GetIndex()][0] | 489 | (instr.mad.dest.Value() < 0x10) |
| 488 | : dummy_vec4_float24; | 490 | ? &state.output_registers.value[instr.mad.dest.Value().GetIndex()][0] |
| 491 | : (instr.mad.dest.Value() < 0x20) | ||
| 492 | ? &state.registers.temporary[instr.mad.dest.Value().GetIndex()][0] | ||
| 493 | : dummy_vec4_float24; | ||
| 489 | 494 | ||
| 490 | Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); | 495 | Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); |
| 491 | Record<DebugDataRecord::SRC2>(state.debug, iteration, src2); | 496 | Record<DebugDataRecord::SRC2>(state.debug, iteration, src2); |
| @@ -500,16 +505,17 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned | |||
| 500 | Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest); | 505 | Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest); |
| 501 | } else { | 506 | } else { |
| 502 | LOG_ERROR(HW_GPU, "Unhandled multiply-add instruction: 0x%02x (%s): 0x%08x", | 507 | LOG_ERROR(HW_GPU, "Unhandled multiply-add instruction: 0x%02x (%s): 0x%08x", |
| 503 | (int)instr.opcode.Value().EffectiveOpCode(), instr.opcode.Value().GetInfo().name, instr.hex); | 508 | (int)instr.opcode.Value().EffectiveOpCode(), |
| 509 | instr.opcode.Value().GetInfo().name, instr.hex); | ||
| 504 | } | 510 | } |
| 505 | break; | 511 | break; |
| 506 | } | 512 | } |
| 507 | 513 | ||
| 508 | default: | 514 | default: { |
| 509 | { | 515 | static auto evaluate_condition = [](const UnitState<Debug>& state, bool refx, bool refy, |
| 510 | static auto evaluate_condition = [](const UnitState<Debug>& state, bool refx, bool refy, Instruction::FlowControlType flow_control) { | 516 | Instruction::FlowControlType flow_control) { |
| 511 | bool results[2] = { refx == state.conditional_code[0], | 517 | bool results[2] = {refx == state.conditional_code[0], |
| 512 | refy == state.conditional_code[1] }; | 518 | refy == state.conditional_code[1]}; |
| 513 | 519 | ||
| 514 | switch (flow_control.op) { | 520 | switch (flow_control.op) { |
| 515 | case flow_control.Or: | 521 | case flow_control.Or: |
| @@ -533,44 +539,45 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned | |||
| 533 | break; | 539 | break; |
| 534 | 540 | ||
| 535 | case OpCode::Id::JMPC: | 541 | case OpCode::Id::JMPC: |
| 536 | Record<DebugDataRecord::COND_CMP_IN>(state.debug, iteration, state.conditional_code); | 542 | Record<DebugDataRecord::COND_CMP_IN>(state.debug, iteration, |
| 537 | if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, instr.flow_control)) { | 543 | state.conditional_code); |
| 544 | if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, | ||
| 545 | instr.flow_control)) { | ||
| 538 | program_counter = instr.flow_control.dest_offset - 1; | 546 | program_counter = instr.flow_control.dest_offset - 1; |
| 539 | } | 547 | } |
| 540 | break; | 548 | break; |
| 541 | 549 | ||
| 542 | case OpCode::Id::JMPU: | 550 | case OpCode::Id::JMPU: |
| 543 | Record<DebugDataRecord::COND_BOOL_IN>(state.debug, iteration, uniforms.b[instr.flow_control.bool_uniform_id]); | 551 | Record<DebugDataRecord::COND_BOOL_IN>( |
| 552 | state.debug, iteration, uniforms.b[instr.flow_control.bool_uniform_id]); | ||
| 544 | 553 | ||
| 545 | if (uniforms.b[instr.flow_control.bool_uniform_id] == !(instr.flow_control.num_instructions & 1)) { | 554 | if (uniforms.b[instr.flow_control.bool_uniform_id] == |
| 555 | !(instr.flow_control.num_instructions & 1)) { | ||
| 546 | program_counter = instr.flow_control.dest_offset - 1; | 556 | program_counter = instr.flow_control.dest_offset - 1; |
| 547 | } | 557 | } |
| 548 | break; | 558 | break; |
| 549 | 559 | ||
| 550 | case OpCode::Id::CALL: | 560 | case OpCode::Id::CALL: |
| 551 | call(state, | 561 | call(state, instr.flow_control.dest_offset, instr.flow_control.num_instructions, |
| 552 | instr.flow_control.dest_offset, | ||
| 553 | instr.flow_control.num_instructions, | ||
| 554 | program_counter + 1, 0, 0); | 562 | program_counter + 1, 0, 0); |
| 555 | break; | 563 | break; |
| 556 | 564 | ||
| 557 | case OpCode::Id::CALLU: | 565 | case OpCode::Id::CALLU: |
| 558 | Record<DebugDataRecord::COND_BOOL_IN>(state.debug, iteration, uniforms.b[instr.flow_control.bool_uniform_id]); | 566 | Record<DebugDataRecord::COND_BOOL_IN>( |
| 567 | state.debug, iteration, uniforms.b[instr.flow_control.bool_uniform_id]); | ||
| 559 | if (uniforms.b[instr.flow_control.bool_uniform_id]) { | 568 | if (uniforms.b[instr.flow_control.bool_uniform_id]) { |
| 560 | call(state, | 569 | call(state, instr.flow_control.dest_offset, instr.flow_control.num_instructions, |
| 561 | instr.flow_control.dest_offset, | 570 | program_counter + 1, 0, 0); |
| 562 | instr.flow_control.num_instructions, | ||
| 563 | program_counter + 1, 0, 0); | ||
| 564 | } | 571 | } |
| 565 | break; | 572 | break; |
| 566 | 573 | ||
| 567 | case OpCode::Id::CALLC: | 574 | case OpCode::Id::CALLC: |
| 568 | Record<DebugDataRecord::COND_CMP_IN>(state.debug, iteration, state.conditional_code); | 575 | Record<DebugDataRecord::COND_CMP_IN>(state.debug, iteration, |
| 569 | if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, instr.flow_control)) { | 576 | state.conditional_code); |
| 570 | call(state, | 577 | if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, |
| 571 | instr.flow_control.dest_offset, | 578 | instr.flow_control)) { |
| 572 | instr.flow_control.num_instructions, | 579 | call(state, instr.flow_control.dest_offset, instr.flow_control.num_instructions, |
| 573 | program_counter + 1, 0, 0); | 580 | program_counter + 1, 0, 0); |
| 574 | } | 581 | } |
| 575 | break; | 582 | break; |
| 576 | 583 | ||
| @@ -578,43 +585,42 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned | |||
| 578 | break; | 585 | break; |
| 579 | 586 | ||
| 580 | case OpCode::Id::IFU: | 587 | case OpCode::Id::IFU: |
| 581 | Record<DebugDataRecord::COND_BOOL_IN>(state.debug, iteration, uniforms.b[instr.flow_control.bool_uniform_id]); | 588 | Record<DebugDataRecord::COND_BOOL_IN>( |
| 589 | state.debug, iteration, uniforms.b[instr.flow_control.bool_uniform_id]); | ||
| 582 | if (uniforms.b[instr.flow_control.bool_uniform_id]) { | 590 | if (uniforms.b[instr.flow_control.bool_uniform_id]) { |
| 583 | call(state, | 591 | call(state, program_counter + 1, |
| 584 | program_counter + 1, | ||
| 585 | instr.flow_control.dest_offset - program_counter - 1, | 592 | instr.flow_control.dest_offset - program_counter - 1, |
| 586 | instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, 0); | 593 | instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, |
| 594 | 0); | ||
| 587 | } else { | 595 | } else { |
| 588 | call(state, | 596 | call(state, instr.flow_control.dest_offset, instr.flow_control.num_instructions, |
| 589 | instr.flow_control.dest_offset, | 597 | instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, |
| 590 | instr.flow_control.num_instructions, | 598 | 0); |
| 591 | instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, 0); | ||
| 592 | } | 599 | } |
| 593 | 600 | ||
| 594 | break; | 601 | break; |
| 595 | 602 | ||
| 596 | case OpCode::Id::IFC: | 603 | case OpCode::Id::IFC: { |
| 597 | { | ||
| 598 | // TODO: Do we need to consider swizzlers here? | 604 | // TODO: Do we need to consider swizzlers here? |
| 599 | 605 | ||
| 600 | Record<DebugDataRecord::COND_CMP_IN>(state.debug, iteration, state.conditional_code); | 606 | Record<DebugDataRecord::COND_CMP_IN>(state.debug, iteration, |
| 601 | if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, instr.flow_control)) { | 607 | state.conditional_code); |
| 602 | call(state, | 608 | if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, |
| 603 | program_counter + 1, | 609 | instr.flow_control)) { |
| 610 | call(state, program_counter + 1, | ||
| 604 | instr.flow_control.dest_offset - program_counter - 1, | 611 | instr.flow_control.dest_offset - program_counter - 1, |
| 605 | instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, 0); | 612 | instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, |
| 613 | 0); | ||
| 606 | } else { | 614 | } else { |
| 607 | call(state, | 615 | call(state, instr.flow_control.dest_offset, instr.flow_control.num_instructions, |
| 608 | instr.flow_control.dest_offset, | 616 | instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, |
| 609 | instr.flow_control.num_instructions, | 617 | 0); |
| 610 | instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, 0); | ||
| 611 | } | 618 | } |
| 612 | 619 | ||
| 613 | break; | 620 | break; |
| 614 | } | 621 | } |
| 615 | 622 | ||
| 616 | case OpCode::Id::LOOP: | 623 | case OpCode::Id::LOOP: { |
| 617 | { | ||
| 618 | Math::Vec4<u8> loop_param(uniforms.i[instr.flow_control.int_uniform_id].x, | 624 | Math::Vec4<u8> loop_param(uniforms.i[instr.flow_control.int_uniform_id].x, |
| 619 | uniforms.i[instr.flow_control.int_uniform_id].y, | 625 | uniforms.i[instr.flow_control.int_uniform_id].y, |
| 620 | uniforms.i[instr.flow_control.int_uniform_id].z, | 626 | uniforms.i[instr.flow_control.int_uniform_id].z, |
| @@ -622,18 +628,16 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned | |||
| 622 | state.address_registers[2] = loop_param.y; | 628 | state.address_registers[2] = loop_param.y; |
| 623 | 629 | ||
| 624 | Record<DebugDataRecord::LOOP_INT_IN>(state.debug, iteration, loop_param); | 630 | Record<DebugDataRecord::LOOP_INT_IN>(state.debug, iteration, loop_param); |
| 625 | call(state, | 631 | call(state, program_counter + 1, |
| 626 | program_counter + 1, | ||
| 627 | instr.flow_control.dest_offset - program_counter + 1, | 632 | instr.flow_control.dest_offset - program_counter + 1, |
| 628 | instr.flow_control.dest_offset + 1, | 633 | instr.flow_control.dest_offset + 1, loop_param.x, loop_param.z); |
| 629 | loop_param.x, | ||
| 630 | loop_param.z); | ||
| 631 | break; | 634 | break; |
| 632 | } | 635 | } |
| 633 | 636 | ||
| 634 | default: | 637 | default: |
| 635 | LOG_ERROR(HW_GPU, "Unhandled instruction: 0x%02x (%s): 0x%08x", | 638 | LOG_ERROR(HW_GPU, "Unhandled instruction: 0x%02x (%s): 0x%08x", |
| 636 | (int)instr.opcode.Value().EffectiveOpCode(), instr.opcode.Value().GetInfo().name, instr.hex); | 639 | (int)instr.opcode.Value().EffectiveOpCode(), |
| 640 | instr.opcode.Value().GetInfo().name, instr.hex); | ||
| 637 | break; | 641 | break; |
| 638 | } | 642 | } |
| 639 | 643 | ||
diff --git a/src/video_core/shader/shader_interpreter.h b/src/video_core/shader/shader_interpreter.h index bb3ce1c6e..48ede0a2e 100644 --- a/src/video_core/shader/shader_interpreter.h +++ b/src/video_core/shader/shader_interpreter.h | |||
| @@ -8,9 +8,10 @@ namespace Pica { | |||
| 8 | 8 | ||
| 9 | namespace Shader { | 9 | namespace Shader { |
| 10 | 10 | ||
| 11 | template <bool Debug> struct UnitState; | 11 | template <bool Debug> |
| 12 | struct UnitState; | ||
| 12 | 13 | ||
| 13 | template<bool Debug> | 14 | template <bool Debug> |
| 14 | void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned offset); | 15 | void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned offset); |
| 15 | 16 | ||
| 16 | } // namespace | 17 | } // namespace |
diff --git a/src/video_core/shader/shader_jit_x64.cpp b/src/video_core/shader/shader_jit_x64.cpp index 43e7e6b4c..04e04ba1a 100644 --- a/src/video_core/shader/shader_jit_x64.cpp +++ b/src/video_core/shader/shader_jit_x64.cpp | |||
| @@ -31,70 +31,70 @@ using namespace Gen; | |||
| 31 | typedef void (JitShader::*JitFunction)(Instruction instr); | 31 | typedef void (JitShader::*JitFunction)(Instruction instr); |
| 32 | 32 | ||
| 33 | const JitFunction instr_table[64] = { | 33 | const JitFunction instr_table[64] = { |
| 34 | &JitShader::Compile_ADD, // add | 34 | &JitShader::Compile_ADD, // add |
| 35 | &JitShader::Compile_DP3, // dp3 | 35 | &JitShader::Compile_DP3, // dp3 |
| 36 | &JitShader::Compile_DP4, // dp4 | 36 | &JitShader::Compile_DP4, // dp4 |
| 37 | &JitShader::Compile_DPH, // dph | 37 | &JitShader::Compile_DPH, // dph |
| 38 | nullptr, // unknown | 38 | nullptr, // unknown |
| 39 | &JitShader::Compile_EX2, // ex2 | 39 | &JitShader::Compile_EX2, // ex2 |
| 40 | &JitShader::Compile_LG2, // lg2 | 40 | &JitShader::Compile_LG2, // lg2 |
| 41 | nullptr, // unknown | 41 | nullptr, // unknown |
| 42 | &JitShader::Compile_MUL, // mul | 42 | &JitShader::Compile_MUL, // mul |
| 43 | &JitShader::Compile_SGE, // sge | 43 | &JitShader::Compile_SGE, // sge |
| 44 | &JitShader::Compile_SLT, // slt | 44 | &JitShader::Compile_SLT, // slt |
| 45 | &JitShader::Compile_FLR, // flr | 45 | &JitShader::Compile_FLR, // flr |
| 46 | &JitShader::Compile_MAX, // max | 46 | &JitShader::Compile_MAX, // max |
| 47 | &JitShader::Compile_MIN, // min | 47 | &JitShader::Compile_MIN, // min |
| 48 | &JitShader::Compile_RCP, // rcp | 48 | &JitShader::Compile_RCP, // rcp |
| 49 | &JitShader::Compile_RSQ, // rsq | 49 | &JitShader::Compile_RSQ, // rsq |
| 50 | nullptr, // unknown | 50 | nullptr, // unknown |
| 51 | nullptr, // unknown | 51 | nullptr, // unknown |
| 52 | &JitShader::Compile_MOVA, // mova | 52 | &JitShader::Compile_MOVA, // mova |
| 53 | &JitShader::Compile_MOV, // mov | 53 | &JitShader::Compile_MOV, // mov |
| 54 | nullptr, // unknown | 54 | nullptr, // unknown |
| 55 | nullptr, // unknown | 55 | nullptr, // unknown |
| 56 | nullptr, // unknown | 56 | nullptr, // unknown |
| 57 | nullptr, // unknown | 57 | nullptr, // unknown |
| 58 | &JitShader::Compile_DPH, // dphi | 58 | &JitShader::Compile_DPH, // dphi |
| 59 | nullptr, // unknown | 59 | nullptr, // unknown |
| 60 | &JitShader::Compile_SGE, // sgei | 60 | &JitShader::Compile_SGE, // sgei |
| 61 | &JitShader::Compile_SLT, // slti | 61 | &JitShader::Compile_SLT, // slti |
| 62 | nullptr, // unknown | 62 | nullptr, // unknown |
| 63 | nullptr, // unknown | 63 | nullptr, // unknown |
| 64 | nullptr, // unknown | 64 | nullptr, // unknown |
| 65 | nullptr, // unknown | 65 | nullptr, // unknown |
| 66 | nullptr, // unknown | 66 | nullptr, // unknown |
| 67 | &JitShader::Compile_NOP, // nop | 67 | &JitShader::Compile_NOP, // nop |
| 68 | &JitShader::Compile_END, // end | 68 | &JitShader::Compile_END, // end |
| 69 | nullptr, // break | 69 | nullptr, // break |
| 70 | &JitShader::Compile_CALL, // call | 70 | &JitShader::Compile_CALL, // call |
| 71 | &JitShader::Compile_CALLC, // callc | 71 | &JitShader::Compile_CALLC, // callc |
| 72 | &JitShader::Compile_CALLU, // callu | 72 | &JitShader::Compile_CALLU, // callu |
| 73 | &JitShader::Compile_IF, // ifu | 73 | &JitShader::Compile_IF, // ifu |
| 74 | &JitShader::Compile_IF, // ifc | 74 | &JitShader::Compile_IF, // ifc |
| 75 | &JitShader::Compile_LOOP, // loop | 75 | &JitShader::Compile_LOOP, // loop |
| 76 | nullptr, // emit | 76 | nullptr, // emit |
| 77 | nullptr, // sete | 77 | nullptr, // sete |
| 78 | &JitShader::Compile_JMP, // jmpc | 78 | &JitShader::Compile_JMP, // jmpc |
| 79 | &JitShader::Compile_JMP, // jmpu | 79 | &JitShader::Compile_JMP, // jmpu |
| 80 | &JitShader::Compile_CMP, // cmp | 80 | &JitShader::Compile_CMP, // cmp |
| 81 | &JitShader::Compile_CMP, // cmp | 81 | &JitShader::Compile_CMP, // cmp |
| 82 | &JitShader::Compile_MAD, // madi | 82 | &JitShader::Compile_MAD, // madi |
| 83 | &JitShader::Compile_MAD, // madi | 83 | &JitShader::Compile_MAD, // madi |
| 84 | &JitShader::Compile_MAD, // madi | 84 | &JitShader::Compile_MAD, // madi |
| 85 | &JitShader::Compile_MAD, // madi | 85 | &JitShader::Compile_MAD, // madi |
| 86 | &JitShader::Compile_MAD, // madi | 86 | &JitShader::Compile_MAD, // madi |
| 87 | &JitShader::Compile_MAD, // madi | 87 | &JitShader::Compile_MAD, // madi |
| 88 | &JitShader::Compile_MAD, // madi | 88 | &JitShader::Compile_MAD, // madi |
| 89 | &JitShader::Compile_MAD, // madi | 89 | &JitShader::Compile_MAD, // madi |
| 90 | &JitShader::Compile_MAD, // mad | 90 | &JitShader::Compile_MAD, // mad |
| 91 | &JitShader::Compile_MAD, // mad | 91 | &JitShader::Compile_MAD, // mad |
| 92 | &JitShader::Compile_MAD, // mad | 92 | &JitShader::Compile_MAD, // mad |
| 93 | &JitShader::Compile_MAD, // mad | 93 | &JitShader::Compile_MAD, // mad |
| 94 | &JitShader::Compile_MAD, // mad | 94 | &JitShader::Compile_MAD, // mad |
| 95 | &JitShader::Compile_MAD, // mad | 95 | &JitShader::Compile_MAD, // mad |
| 96 | &JitShader::Compile_MAD, // mad | 96 | &JitShader::Compile_MAD, // mad |
| 97 | &JitShader::Compile_MAD, // mad | 97 | &JitShader::Compile_MAD, // mad |
| 98 | }; | 98 | }; |
| 99 | 99 | ||
| 100 | // The following is used to alias some commonly used registers. Generally, RAX-RDX and XMM0-XMM3 can | 100 | // The following is used to alias some commonly used registers. Generally, RAX-RDX and XMM0-XMM3 can |
| @@ -136,9 +136,9 @@ static const X64Reg NEGBIT = XMM15; | |||
| 136 | // State registers that must not be modified by external functions calls | 136 | // State registers that must not be modified by external functions calls |
| 137 | // Scratch registers, e.g., SRC1 and SCRATCH, have to be saved on the side if needed | 137 | // Scratch registers, e.g., SRC1 and SCRATCH, have to be saved on the side if needed |
| 138 | static const BitSet32 persistent_regs = { | 138 | static const BitSet32 persistent_regs = { |
| 139 | SETUP, STATE, // Pointers to register blocks | 139 | SETUP, STATE, // Pointers to register blocks |
| 140 | ADDROFFS_REG_0, ADDROFFS_REG_1, LOOPCOUNT_REG, COND0, COND1, // Cached registers | 140 | ADDROFFS_REG_0, ADDROFFS_REG_1, LOOPCOUNT_REG, COND0, COND1, // Cached registers |
| 141 | ONE+16, NEGBIT+16, // Constants | 141 | ONE + 16, NEGBIT + 16, // Constants |
| 142 | }; | 142 | }; |
| 143 | 143 | ||
| 144 | /// Raw constant for the source register selector that indicates no swizzling is performed | 144 | /// Raw constant for the source register selector that indicates no swizzling is performed |
| @@ -152,7 +152,7 @@ static const u8 NO_DEST_REG_MASK = 0xf; | |||
| 152 | * @return Instruction at the specified offset | 152 | * @return Instruction at the specified offset |
| 153 | */ | 153 | */ |
| 154 | static Instruction GetVertexShaderInstruction(size_t offset) { | 154 | static Instruction GetVertexShaderInstruction(size_t offset) { |
| 155 | return { g_state.vs.program_code[offset] }; | 155 | return {g_state.vs.program_code[offset]}; |
| 156 | } | 156 | } |
| 157 | 157 | ||
| 158 | static void LogCritical(const char* msg) { | 158 | static void LogCritical(const char* msg) { |
| @@ -172,7 +172,8 @@ void JitShader::Compile_Assert(bool condition, const char* msg) { | |||
| 172 | * @param src_reg SourceRegister object corresponding to the source register to load | 172 | * @param src_reg SourceRegister object corresponding to the source register to load |
| 173 | * @param dest Destination XMM register to store the loaded, swizzled source register | 173 | * @param dest Destination XMM register to store the loaded, swizzled source register |
| 174 | */ | 174 | */ |
| 175 | void JitShader::Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRegister src_reg, X64Reg dest) { | 175 | void JitShader::Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRegister src_reg, |
| 176 | X64Reg dest) { | ||
| 176 | X64Reg src_ptr; | 177 | X64Reg src_ptr; |
| 177 | size_t src_offset; | 178 | size_t src_offset; |
| 178 | 179 | ||
| @@ -189,7 +190,8 @@ void JitShader::Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRe | |||
| 189 | 190 | ||
| 190 | unsigned operand_desc_id; | 191 | unsigned operand_desc_id; |
| 191 | 192 | ||
| 192 | const bool is_inverted = (0 != (instr.opcode.Value().GetInfo().subtype & OpCode::Info::SrcInversed)); | 193 | const bool is_inverted = |
| 194 | (0 != (instr.opcode.Value().GetInfo().subtype & OpCode::Info::SrcInversed)); | ||
| 193 | 195 | ||
| 194 | unsigned address_register_index; | 196 | unsigned address_register_index; |
| 195 | unsigned offset_src; | 197 | unsigned offset_src; |
| @@ -225,7 +227,7 @@ void JitShader::Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRe | |||
| 225 | MOVAPS(dest, MDisp(src_ptr, src_offset_disp)); | 227 | MOVAPS(dest, MDisp(src_ptr, src_offset_disp)); |
| 226 | } | 228 | } |
| 227 | 229 | ||
| 228 | SwizzlePattern swiz = { g_state.vs.swizzle_data[operand_desc_id] }; | 230 | SwizzlePattern swiz = {g_state.vs.swizzle_data[operand_desc_id]}; |
| 229 | 231 | ||
| 230 | // Generate instructions for source register swizzling as needed | 232 | // Generate instructions for source register swizzling as needed |
| 231 | u8 sel = swiz.GetRawSelector(src_num); | 233 | u8 sel = swiz.GetRawSelector(src_num); |
| @@ -238,13 +240,13 @@ void JitShader::Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRe | |||
| 238 | } | 240 | } |
| 239 | 241 | ||
| 240 | // If the source register should be negated, flip the negative bit using XOR | 242 | // If the source register should be negated, flip the negative bit using XOR |
| 241 | const bool negate[] = { swiz.negate_src1, swiz.negate_src2, swiz.negate_src3 }; | 243 | const bool negate[] = {swiz.negate_src1, swiz.negate_src2, swiz.negate_src3}; |
| 242 | if (negate[src_num - 1]) { | 244 | if (negate[src_num - 1]) { |
| 243 | XORPS(dest, R(NEGBIT)); | 245 | XORPS(dest, R(NEGBIT)); |
| 244 | } | 246 | } |
| 245 | } | 247 | } |
| 246 | 248 | ||
| 247 | void JitShader::Compile_DestEnable(Instruction instr,X64Reg src) { | 249 | void JitShader::Compile_DestEnable(Instruction instr, X64Reg src) { |
| 248 | DestRegister dest; | 250 | DestRegister dest; |
| 249 | unsigned operand_desc_id; | 251 | unsigned operand_desc_id; |
| 250 | if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MAD || | 252 | if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MAD || |
| @@ -256,10 +258,11 @@ void JitShader::Compile_DestEnable(Instruction instr,X64Reg src) { | |||
| 256 | dest = instr.common.dest.Value(); | 258 | dest = instr.common.dest.Value(); |
| 257 | } | 259 | } |
| 258 | 260 | ||
| 259 | SwizzlePattern swiz = { g_state.vs.swizzle_data[operand_desc_id] }; | 261 | SwizzlePattern swiz = {g_state.vs.swizzle_data[operand_desc_id]}; |
| 260 | 262 | ||
| 261 | int dest_offset_disp = (int)UnitState<false>::OutputOffset(dest); | 263 | int dest_offset_disp = (int)UnitState<false>::OutputOffset(dest); |
| 262 | ASSERT_MSG(dest_offset_disp == UnitState<false>::OutputOffset(dest), "Destinaton offset too large for int type"); | 264 | ASSERT_MSG(dest_offset_disp == UnitState<false>::OutputOffset(dest), |
| 265 | "Destinaton offset too large for int type"); | ||
| 263 | 266 | ||
| 264 | // If all components are enabled, write the result to the destination register | 267 | // If all components are enabled, write the result to the destination register |
| 265 | if (swiz.dest_mask == NO_DEST_REG_MASK) { | 268 | if (swiz.dest_mask == NO_DEST_REG_MASK) { |
| @@ -267,18 +270,21 @@ void JitShader::Compile_DestEnable(Instruction instr,X64Reg src) { | |||
| 267 | MOVAPS(MDisp(STATE, dest_offset_disp), src); | 270 | MOVAPS(MDisp(STATE, dest_offset_disp), src); |
| 268 | 271 | ||
| 269 | } else { | 272 | } else { |
| 270 | // Not all components are enabled, so mask the result when storing to the destination register... | 273 | // Not all components are enabled, so mask the result when storing to the destination |
| 274 | // register... | ||
| 271 | MOVAPS(SCRATCH, MDisp(STATE, dest_offset_disp)); | 275 | MOVAPS(SCRATCH, MDisp(STATE, dest_offset_disp)); |
| 272 | 276 | ||
| 273 | if (Common::GetCPUCaps().sse4_1) { | 277 | if (Common::GetCPUCaps().sse4_1) { |
| 274 | u8 mask = ((swiz.dest_mask & 1) << 3) | ((swiz.dest_mask & 8) >> 3) | ((swiz.dest_mask & 2) << 1) | ((swiz.dest_mask & 4) >> 1); | 278 | u8 mask = ((swiz.dest_mask & 1) << 3) | ((swiz.dest_mask & 8) >> 3) | |
| 279 | ((swiz.dest_mask & 2) << 1) | ((swiz.dest_mask & 4) >> 1); | ||
| 275 | BLENDPS(SCRATCH, R(src), mask); | 280 | BLENDPS(SCRATCH, R(src), mask); |
| 276 | } else { | 281 | } else { |
| 277 | MOVAPS(SCRATCH2, R(src)); | 282 | MOVAPS(SCRATCH2, R(src)); |
| 278 | UNPCKHPS(SCRATCH2, R(SCRATCH)); // Unpack X/Y components of source and destination | 283 | UNPCKHPS(SCRATCH2, R(SCRATCH)); // Unpack X/Y components of source and destination |
| 279 | UNPCKLPS(SCRATCH, R(src)); // Unpack Z/W components of source and destination | 284 | UNPCKLPS(SCRATCH, R(src)); // Unpack Z/W components of source and destination |
| 280 | 285 | ||
| 281 | // Compute selector to selectively copy source components to destination for SHUFPS instruction | 286 | // Compute selector to selectively copy source components to destination for SHUFPS |
| 287 | // instruction | ||
| 282 | u8 sel = ((swiz.DestComponentEnabled(0) ? 1 : 0) << 0) | | 288 | u8 sel = ((swiz.DestComponentEnabled(0) ? 1 : 0) << 0) | |
| 283 | ((swiz.DestComponentEnabled(1) ? 3 : 2) << 2) | | 289 | ((swiz.DestComponentEnabled(1) ? 3 : 2) << 2) | |
| 284 | ((swiz.DestComponentEnabled(2) ? 0 : 1) << 4) | | 290 | ((swiz.DestComponentEnabled(2) ? 0 : 1) << 4) | |
| @@ -336,7 +342,8 @@ void JitShader::Compile_EvaluateCondition(Instruction instr) { | |||
| 336 | } | 342 | } |
| 337 | 343 | ||
| 338 | void JitShader::Compile_UniformCondition(Instruction instr) { | 344 | void JitShader::Compile_UniformCondition(Instruction instr) { |
| 339 | int offset = ShaderSetup::UniformOffset(RegisterType::BoolUniform, instr.flow_control.bool_uniform_id); | 345 | int offset = |
| 346 | ShaderSetup::UniformOffset(RegisterType::BoolUniform, instr.flow_control.bool_uniform_id); | ||
| 340 | CMP(sizeof(bool) * 8, MDisp(SETUP, offset), Imm8(0)); | 347 | CMP(sizeof(bool) * 8, MDisp(SETUP, offset), Imm8(0)); |
| 341 | } | 348 | } |
| 342 | 349 | ||
| @@ -512,7 +519,7 @@ void JitShader::Compile_MIN(Instruction instr) { | |||
| 512 | } | 519 | } |
| 513 | 520 | ||
| 514 | void JitShader::Compile_MOVA(Instruction instr) { | 521 | void JitShader::Compile_MOVA(Instruction instr) { |
| 515 | SwizzlePattern swiz = { g_state.vs.swizzle_data[instr.common.operand_desc_id] }; | 522 | SwizzlePattern swiz = {g_state.vs.swizzle_data[instr.common.operand_desc_id]}; |
| 516 | 523 | ||
| 517 | if (!swiz.DestComponentEnabled(0) && !swiz.DestComponentEnabled(1)) { | 524 | if (!swiz.DestComponentEnabled(0) && !swiz.DestComponentEnabled(1)) { |
| 518 | return; // NoOp | 525 | return; // NoOp |
| @@ -597,7 +604,7 @@ void JitShader::Compile_CALL(Instruction instr) { | |||
| 597 | 604 | ||
| 598 | // Call the subroutine | 605 | // Call the subroutine |
| 599 | FixupBranch b = CALL(); | 606 | FixupBranch b = CALL(); |
| 600 | fixup_branches.push_back({ b, instr.flow_control.dest_offset }); | 607 | fixup_branches.push_back({b, instr.flow_control.dest_offset}); |
| 601 | 608 | ||
| 602 | // Skip over the return offset that's on the stack | 609 | // Skip over the return offset that's on the stack |
| 603 | ADD(64, R(RSP), Imm32(8)); | 610 | ADD(64, R(RSP), Imm32(8)); |
| @@ -628,7 +635,7 @@ void JitShader::Compile_CMP(Instruction instr) { | |||
| 628 | // SSE doesn't have greater-than (GT) or greater-equal (GE) comparison operators. You need to | 635 | // SSE doesn't have greater-than (GT) or greater-equal (GE) comparison operators. You need to |
| 629 | // emulate them by swapping the lhs and rhs and using LT and LE. NLT and NLE can't be used here | 636 | // emulate them by swapping the lhs and rhs and using LT and LE. NLT and NLE can't be used here |
| 630 | // because they don't match when used with NaNs. | 637 | // because they don't match when used with NaNs. |
| 631 | static const u8 cmp[] = { CMP_EQ, CMP_NEQ, CMP_LT, CMP_LE, CMP_LT, CMP_LE }; | 638 | static const u8 cmp[] = {CMP_EQ, CMP_NEQ, CMP_LT, CMP_LE, CMP_LT, CMP_LE}; |
| 632 | 639 | ||
| 633 | bool invert_op_x = (op_x == Op::GreaterThan || op_x == Op::GreaterEqual); | 640 | bool invert_op_x = (op_x == Op::GreaterThan || op_x == Op::GreaterEqual); |
| 634 | Gen::X64Reg lhs_x = invert_op_x ? SRC2 : SRC1; | 641 | Gen::X64Reg lhs_x = invert_op_x ? SRC2 : SRC1; |
| @@ -678,7 +685,8 @@ void JitShader::Compile_MAD(Instruction instr) { | |||
| 678 | } | 685 | } |
| 679 | 686 | ||
| 680 | void JitShader::Compile_IF(Instruction instr) { | 687 | void JitShader::Compile_IF(Instruction instr) { |
| 681 | Compile_Assert(instr.flow_control.dest_offset >= program_counter, "Backwards if-statements not supported"); | 688 | Compile_Assert(instr.flow_control.dest_offset >= program_counter, |
| 689 | "Backwards if-statements not supported"); | ||
| 682 | 690 | ||
| 683 | // Evaluate the "IF" condition | 691 | // Evaluate the "IF" condition |
| 684 | if (instr.opcode.Value() == OpCode::Id::IFU) { | 692 | if (instr.opcode.Value() == OpCode::Id::IFU) { |
| @@ -709,29 +717,31 @@ void JitShader::Compile_IF(Instruction instr) { | |||
| 709 | } | 717 | } |
| 710 | 718 | ||
| 711 | void JitShader::Compile_LOOP(Instruction instr) { | 719 | void JitShader::Compile_LOOP(Instruction instr) { |
| 712 | Compile_Assert(instr.flow_control.dest_offset >= program_counter, "Backwards loops not supported"); | 720 | Compile_Assert(instr.flow_control.dest_offset >= program_counter, |
| 721 | "Backwards loops not supported"); | ||
| 713 | Compile_Assert(!looping, "Nested loops not supported"); | 722 | Compile_Assert(!looping, "Nested loops not supported"); |
| 714 | 723 | ||
| 715 | looping = true; | 724 | looping = true; |
| 716 | 725 | ||
| 717 | int offset = ShaderSetup::UniformOffset(RegisterType::IntUniform, instr.flow_control.int_uniform_id); | 726 | int offset = |
| 727 | ShaderSetup::UniformOffset(RegisterType::IntUniform, instr.flow_control.int_uniform_id); | ||
| 718 | MOV(32, R(LOOPCOUNT), MDisp(SETUP, offset)); | 728 | MOV(32, R(LOOPCOUNT), MDisp(SETUP, offset)); |
| 719 | MOV(32, R(LOOPCOUNT_REG), R(LOOPCOUNT)); | 729 | MOV(32, R(LOOPCOUNT_REG), R(LOOPCOUNT)); |
| 720 | SHR(32, R(LOOPCOUNT_REG), Imm8(8)); | 730 | SHR(32, R(LOOPCOUNT_REG), Imm8(8)); |
| 721 | AND(32, R(LOOPCOUNT_REG), Imm32(0xff)); // Y-component is the start | 731 | AND(32, R(LOOPCOUNT_REG), Imm32(0xff)); // Y-component is the start |
| 722 | MOV(32, R(LOOPINC), R(LOOPCOUNT)); | 732 | MOV(32, R(LOOPINC), R(LOOPCOUNT)); |
| 723 | SHR(32, R(LOOPINC), Imm8(16)); | 733 | SHR(32, R(LOOPINC), Imm8(16)); |
| 724 | MOVZX(32, 8, LOOPINC, R(LOOPINC)); // Z-component is the incrementer | 734 | MOVZX(32, 8, LOOPINC, R(LOOPINC)); // Z-component is the incrementer |
| 725 | MOVZX(32, 8, LOOPCOUNT, R(LOOPCOUNT)); // X-component is iteration count | 735 | MOVZX(32, 8, LOOPCOUNT, R(LOOPCOUNT)); // X-component is iteration count |
| 726 | ADD(32, R(LOOPCOUNT), Imm8(1)); // Iteration count is X-component + 1 | 736 | ADD(32, R(LOOPCOUNT), Imm8(1)); // Iteration count is X-component + 1 |
| 727 | 737 | ||
| 728 | auto loop_start = GetCodePtr(); | 738 | auto loop_start = GetCodePtr(); |
| 729 | 739 | ||
| 730 | Compile_Block(instr.flow_control.dest_offset + 1); | 740 | Compile_Block(instr.flow_control.dest_offset + 1); |
| 731 | 741 | ||
| 732 | ADD(32, R(LOOPCOUNT_REG), R(LOOPINC)); // Increment LOOPCOUNT_REG by Z-component | 742 | ADD(32, R(LOOPCOUNT_REG), R(LOOPINC)); // Increment LOOPCOUNT_REG by Z-component |
| 733 | SUB(32, R(LOOPCOUNT), Imm8(1)); // Increment loop count by 1 | 743 | SUB(32, R(LOOPCOUNT), Imm8(1)); // Increment loop count by 1 |
| 734 | J_CC(CC_NZ, loop_start); // Loop if not equal | 744 | J_CC(CC_NZ, loop_start); // Loop if not equal |
| 735 | 745 | ||
| 736 | looping = false; | 746 | looping = false; |
| 737 | } | 747 | } |
| @@ -744,11 +754,11 @@ void JitShader::Compile_JMP(Instruction instr) { | |||
| 744 | else | 754 | else |
| 745 | UNREACHABLE(); | 755 | UNREACHABLE(); |
| 746 | 756 | ||
| 747 | bool inverted_condition = (instr.opcode.Value() == OpCode::Id::JMPU) && | 757 | bool inverted_condition = |
| 748 | (instr.flow_control.num_instructions & 1); | 758 | (instr.opcode.Value() == OpCode::Id::JMPU) && (instr.flow_control.num_instructions & 1); |
| 749 | 759 | ||
| 750 | FixupBranch b = J_CC(inverted_condition ? CC_Z : CC_NZ, true); | 760 | FixupBranch b = J_CC(inverted_condition ? CC_Z : CC_NZ, true); |
| 751 | fixup_branches.push_back({ b, instr.flow_control.dest_offset }); | 761 | fixup_branches.push_back({b, instr.flow_control.dest_offset}); |
| 752 | } | 762 | } |
| 753 | 763 | ||
| 754 | void JitShader::Compile_Block(unsigned end) { | 764 | void JitShader::Compile_Block(unsigned end) { |
| @@ -773,7 +783,8 @@ void JitShader::Compile_NextInstr() { | |||
| 773 | Compile_Return(); | 783 | Compile_Return(); |
| 774 | } | 784 | } |
| 775 | 785 | ||
| 776 | ASSERT_MSG(code_ptr[program_counter] == nullptr, "Tried to compile already compiled shader location!"); | 786 | ASSERT_MSG(code_ptr[program_counter] == nullptr, |
| 787 | "Tried to compile already compiled shader location!"); | ||
| 777 | code_ptr[program_counter] = GetCodePtr(); | 788 | code_ptr[program_counter] = GetCodePtr(); |
| 778 | 789 | ||
| 779 | Instruction instr = GetVertexShaderInstruction(program_counter++); | 790 | Instruction instr = GetVertexShaderInstruction(program_counter++); |
| @@ -787,7 +798,7 @@ void JitShader::Compile_NextInstr() { | |||
| 787 | } else { | 798 | } else { |
| 788 | // Unhandled instruction | 799 | // Unhandled instruction |
| 789 | LOG_CRITICAL(HW_GPU, "Unhandled instruction: 0x%02x (0x%08x)", | 800 | LOG_CRITICAL(HW_GPU, "Unhandled instruction: 0x%02x (0x%08x)", |
| 790 | instr.opcode.Value().EffectiveOpCode(), instr.hex); | 801 | instr.opcode.Value().EffectiveOpCode(), instr.hex); |
| 791 | } | 802 | } |
| 792 | } | 803 | } |
| 793 | 804 | ||
| @@ -801,7 +812,8 @@ void JitShader::FindReturnOffsets() { | |||
| 801 | case OpCode::Id::CALL: | 812 | case OpCode::Id::CALL: |
| 802 | case OpCode::Id::CALLC: | 813 | case OpCode::Id::CALLC: |
| 803 | case OpCode::Id::CALLU: | 814 | case OpCode::Id::CALLU: |
| 804 | return_offsets.push_back(instr.flow_control.dest_offset + instr.flow_control.num_instructions); | 815 | return_offsets.push_back(instr.flow_control.dest_offset + |
| 816 | instr.flow_control.num_instructions); | ||
| 805 | break; | 817 | break; |
| 806 | default: | 818 | default: |
| 807 | break; | 819 | break; |
| @@ -835,12 +847,12 @@ void JitShader::Compile() { | |||
| 835 | XOR(64, R(LOOPCOUNT_REG), R(LOOPCOUNT_REG)); | 847 | XOR(64, R(LOOPCOUNT_REG), R(LOOPCOUNT_REG)); |
| 836 | 848 | ||
| 837 | // Used to set a register to one | 849 | // Used to set a register to one |
| 838 | static const __m128 one = { 1.f, 1.f, 1.f, 1.f }; | 850 | static const __m128 one = {1.f, 1.f, 1.f, 1.f}; |
| 839 | MOV(PTRBITS, R(RAX), ImmPtr(&one)); | 851 | MOV(PTRBITS, R(RAX), ImmPtr(&one)); |
| 840 | MOVAPS(ONE, MatR(RAX)); | 852 | MOVAPS(ONE, MatR(RAX)); |
| 841 | 853 | ||
| 842 | // Used to negate registers | 854 | // Used to negate registers |
| 843 | static const __m128 neg = { -0.f, -0.f, -0.f, -0.f }; | 855 | static const __m128 neg = {-0.f, -0.f, -0.f, -0.f}; |
| 844 | MOV(PTRBITS, R(RAX), ImmPtr(&neg)); | 856 | MOV(PTRBITS, R(RAX), ImmPtr(&neg)); |
| 845 | MOVAPS(NEGBIT, MatR(RAX)); | 857 | MOVAPS(NEGBIT, MatR(RAX)); |
| 846 | 858 | ||
| @@ -850,7 +862,8 @@ void JitShader::Compile() { | |||
| 850 | // Compile entire program | 862 | // Compile entire program |
| 851 | Compile_Block(static_cast<unsigned>(g_state.vs.program_code.size())); | 863 | Compile_Block(static_cast<unsigned>(g_state.vs.program_code.size())); |
| 852 | 864 | ||
| 853 | // Set the target for any incomplete branches now that the entire shader program has been emitted | 865 | // Set the target for any incomplete branches now that the entire shader program has been |
| 866 | // emitted | ||
| 854 | for (const auto& branch : fixup_branches) { | 867 | for (const auto& branch : fixup_branches) { |
| 855 | SetJumpTarget(branch.first, code_ptr[branch.second]); | 868 | SetJumpTarget(branch.first, code_ptr[branch.second]); |
| 856 | } | 869 | } |
| @@ -861,7 +874,8 @@ void JitShader::Compile() { | |||
| 861 | fixup_branches.clear(); | 874 | fixup_branches.clear(); |
| 862 | fixup_branches.shrink_to_fit(); | 875 | fixup_branches.shrink_to_fit(); |
| 863 | 876 | ||
| 864 | uintptr_t size = reinterpret_cast<uintptr_t>(GetCodePtr()) - reinterpret_cast<uintptr_t>(program); | 877 | uintptr_t size = |
| 878 | reinterpret_cast<uintptr_t>(GetCodePtr()) - reinterpret_cast<uintptr_t>(program); | ||
| 865 | ASSERT_MSG(size <= MAX_SHADER_SIZE, "Compiled a shader that exceeds the allocated size!"); | 879 | ASSERT_MSG(size <= MAX_SHADER_SIZE, "Compiled a shader that exceeds the allocated size!"); |
| 866 | 880 | ||
| 867 | LOG_DEBUG(HW_GPU, "Compiled shader size=%lu", size); | 881 | LOG_DEBUG(HW_GPU, "Compiled shader size=%lu", size); |
diff --git a/src/video_core/shader/shader_jit_x64.h b/src/video_core/shader/shader_jit_x64.h index 5468459d4..2f37ef8bf 100644 --- a/src/video_core/shader/shader_jit_x64.h +++ b/src/video_core/shader/shader_jit_x64.h | |||
| @@ -70,11 +70,11 @@ public: | |||
| 70 | void Compile_MAD(Instruction instr); | 70 | void Compile_MAD(Instruction instr); |
| 71 | 71 | ||
| 72 | private: | 72 | private: |
| 73 | |||
| 74 | void Compile_Block(unsigned end); | 73 | void Compile_Block(unsigned end); |
| 75 | void Compile_NextInstr(); | 74 | void Compile_NextInstr(); |
| 76 | 75 | ||
| 77 | void Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRegister src_reg, Gen::X64Reg dest); | 76 | void Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRegister src_reg, |
| 77 | Gen::X64Reg dest); | ||
| 78 | void Compile_DestEnable(Instruction instr, Gen::X64Reg dest); | 78 | void Compile_DestEnable(Instruction instr, Gen::X64Reg dest); |
| 79 | 79 | ||
| 80 | /** | 80 | /** |
| @@ -111,8 +111,8 @@ private: | |||
| 111 | /// Offsets in code where a return needs to be inserted | 111 | /// Offsets in code where a return needs to be inserted |
| 112 | std::vector<unsigned> return_offsets; | 112 | std::vector<unsigned> return_offsets; |
| 113 | 113 | ||
| 114 | unsigned program_counter = 0; ///< Offset of the next instruction to decode | 114 | unsigned program_counter = 0; ///< Offset of the next instruction to decode |
| 115 | bool looping = false; ///< True if compiling a loop, used to check for nested loops | 115 | bool looping = false; ///< True if compiling a loop, used to check for nested loops |
| 116 | 116 | ||
| 117 | /// Branches that need to be fixed up once the entire shader program is compiled | 117 | /// Branches that need to be fixed up once the entire shader program is compiled |
| 118 | std::vector<std::pair<Gen::FixupBranch, unsigned>> fixup_branches; | 118 | std::vector<std::pair<Gen::FixupBranch, unsigned>> fixup_branches; |