diff options
Diffstat (limited to 'src/video_core')
31 files changed, 1401 insertions, 352 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 0961a3251..82f47d8a9 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt | |||
| @@ -1,6 +1,7 @@ | |||
| 1 | set(SRCS | 1 | set(SRCS |
| 2 | command_processor.cpp | 2 | command_processor.cpp |
| 3 | debug_utils/debug_utils.cpp | 3 | debug_utils/debug_utils.cpp |
| 4 | geometry_pipeline.cpp | ||
| 4 | pica.cpp | 5 | pica.cpp |
| 5 | primitive_assembly.cpp | 6 | primitive_assembly.cpp |
| 6 | regs.cpp | 7 | regs.cpp |
| @@ -15,6 +16,7 @@ set(SRCS | |||
| 15 | shader/shader_interpreter.cpp | 16 | shader/shader_interpreter.cpp |
| 16 | swrasterizer/clipper.cpp | 17 | swrasterizer/clipper.cpp |
| 17 | swrasterizer/framebuffer.cpp | 18 | swrasterizer/framebuffer.cpp |
| 19 | swrasterizer/lighting.cpp | ||
| 18 | swrasterizer/proctex.cpp | 20 | swrasterizer/proctex.cpp |
| 19 | swrasterizer/rasterizer.cpp | 21 | swrasterizer/rasterizer.cpp |
| 20 | swrasterizer/swrasterizer.cpp | 22 | swrasterizer/swrasterizer.cpp |
| @@ -28,6 +30,7 @@ set(SRCS | |||
| 28 | set(HEADERS | 30 | set(HEADERS |
| 29 | command_processor.h | 31 | command_processor.h |
| 30 | debug_utils/debug_utils.h | 32 | debug_utils/debug_utils.h |
| 33 | geometry_pipeline.h | ||
| 31 | gpu_debugger.h | 34 | gpu_debugger.h |
| 32 | pica.h | 35 | pica.h |
| 33 | pica_state.h | 36 | pica_state.h |
| @@ -55,6 +58,7 @@ set(HEADERS | |||
| 55 | shader/shader_interpreter.h | 58 | shader/shader_interpreter.h |
| 56 | swrasterizer/clipper.h | 59 | swrasterizer/clipper.h |
| 57 | swrasterizer/framebuffer.h | 60 | swrasterizer/framebuffer.h |
| 61 | swrasterizer/lighting.h | ||
| 58 | swrasterizer/proctex.h | 62 | swrasterizer/proctex.h |
| 59 | swrasterizer/rasterizer.h | 63 | swrasterizer/rasterizer.h |
| 60 | swrasterizer/swrasterizer.h | 64 | swrasterizer/swrasterizer.h |
diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp index 4633a1df1..caf9f7a06 100644 --- a/src/video_core/command_processor.cpp +++ b/src/video_core/command_processor.cpp | |||
| @@ -119,24 +119,221 @@ static void WriteUniformFloatReg(ShaderRegs& config, Shader::ShaderSetup& setup, | |||
| 119 | } | 119 | } |
| 120 | } | 120 | } |
| 121 | 121 | ||
| 122 | static void WriteProgramCode(ShaderRegs& config, Shader::ShaderSetup& setup, | 122 | static void LoadDefaultVertexAttributes(u32 register_value) { |
| 123 | unsigned max_program_code_length, u32 value) { | 123 | auto& regs = g_state.regs; |
| 124 | if (config.program.offset >= max_program_code_length) { | 124 | |
| 125 | LOG_ERROR(HW_GPU, "Invalid %s program offset %d", GetShaderSetupTypeName(setup), | 125 | // TODO: Does actual hardware indeed keep an intermediate buffer or does |
| 126 | (int)config.program.offset); | 126 | // it directly write the values? |
| 127 | } else { | 127 | default_attr_write_buffer[default_attr_counter++] = register_value; |
| 128 | setup.program_code[config.program.offset] = value; | 128 | |
| 129 | config.program.offset++; | 129 | // Default attributes are written in a packed format such that four float24 values are encoded |
| 130 | // in three 32-bit numbers. | ||
| 131 | // We write to internal memory once a full such vector is written. | ||
| 132 | if (default_attr_counter >= 3) { | ||
| 133 | default_attr_counter = 0; | ||
| 134 | |||
| 135 | auto& setup = regs.pipeline.vs_default_attributes_setup; | ||
| 136 | |||
| 137 | if (setup.index >= 16) { | ||
| 138 | LOG_ERROR(HW_GPU, "Invalid VS default attribute index %d", (int)setup.index); | ||
| 139 | return; | ||
| 140 | } | ||
| 141 | |||
| 142 | Math::Vec4<float24> attribute; | ||
| 143 | |||
| 144 | // NOTE: The destination component order indeed is "backwards" | ||
| 145 | attribute.w = float24::FromRaw(default_attr_write_buffer[0] >> 8); | ||
| 146 | attribute.z = float24::FromRaw(((default_attr_write_buffer[0] & 0xFF) << 16) | | ||
| 147 | ((default_attr_write_buffer[1] >> 16) & 0xFFFF)); | ||
| 148 | attribute.y = float24::FromRaw(((default_attr_write_buffer[1] & 0xFFFF) << 8) | | ||
| 149 | ((default_attr_write_buffer[2] >> 24) & 0xFF)); | ||
| 150 | attribute.x = float24::FromRaw(default_attr_write_buffer[2] & 0xFFFFFF); | ||
| 151 | |||
| 152 | LOG_TRACE(HW_GPU, "Set default VS attribute %x to (%f %f %f %f)", (int)setup.index, | ||
| 153 | attribute.x.ToFloat32(), attribute.y.ToFloat32(), attribute.z.ToFloat32(), | ||
| 154 | attribute.w.ToFloat32()); | ||
| 155 | |||
| 156 | // TODO: Verify that this actually modifies the register! | ||
| 157 | if (setup.index < 15) { | ||
| 158 | g_state.input_default_attributes.attr[setup.index] = attribute; | ||
| 159 | setup.index++; | ||
| 160 | } else { | ||
| 161 | // Put each attribute into an immediate input buffer. When all specified immediate | ||
| 162 | // attributes are present, the Vertex Shader is invoked and everything is sent to | ||
| 163 | // the primitive assembler. | ||
| 164 | |||
| 165 | auto& immediate_input = g_state.immediate.input_vertex; | ||
| 166 | auto& immediate_attribute_id = g_state.immediate.current_attribute; | ||
| 167 | |||
| 168 | immediate_input.attr[immediate_attribute_id] = attribute; | ||
| 169 | |||
| 170 | if (immediate_attribute_id < regs.pipeline.max_input_attrib_index) { | ||
| 171 | immediate_attribute_id += 1; | ||
| 172 | } else { | ||
| 173 | MICROPROFILE_SCOPE(GPU_Drawing); | ||
| 174 | immediate_attribute_id = 0; | ||
| 175 | |||
| 176 | auto* shader_engine = Shader::GetEngine(); | ||
| 177 | shader_engine->SetupBatch(g_state.vs, regs.vs.main_offset); | ||
| 178 | |||
| 179 | // Send to vertex shader | ||
| 180 | if (g_debug_context) | ||
| 181 | g_debug_context->OnEvent(DebugContext::Event::VertexShaderInvocation, | ||
| 182 | static_cast<void*>(&immediate_input)); | ||
| 183 | Shader::UnitState shader_unit; | ||
| 184 | Shader::AttributeBuffer output{}; | ||
| 185 | |||
| 186 | shader_unit.LoadInput(regs.vs, immediate_input); | ||
| 187 | shader_engine->Run(g_state.vs, shader_unit); | ||
| 188 | shader_unit.WriteOutput(regs.vs, output); | ||
| 189 | |||
| 190 | // Send to geometry pipeline | ||
| 191 | if (g_state.immediate.reset_geometry_pipeline) { | ||
| 192 | g_state.geometry_pipeline.Reconfigure(); | ||
| 193 | g_state.immediate.reset_geometry_pipeline = false; | ||
| 194 | } | ||
| 195 | ASSERT(!g_state.geometry_pipeline.NeedIndexInput()); | ||
| 196 | g_state.geometry_pipeline.Setup(shader_engine); | ||
| 197 | g_state.geometry_pipeline.SubmitVertex(output); | ||
| 198 | |||
| 199 | // TODO: If drawing after every immediate mode triangle kills performance, | ||
| 200 | // change it to flush triangles whenever a drawing config register changes | ||
| 201 | // See: https://github.com/citra-emu/citra/pull/2866#issuecomment-327011550 | ||
| 202 | VideoCore::g_renderer->Rasterizer()->DrawTriangles(); | ||
| 203 | if (g_debug_context) { | ||
| 204 | g_debug_context->OnEvent(DebugContext::Event::FinishedPrimitiveBatch, nullptr); | ||
| 205 | } | ||
| 206 | } | ||
| 207 | } | ||
| 130 | } | 208 | } |
| 131 | } | 209 | } |
| 132 | 210 | ||
| 133 | static void WriteSwizzlePatterns(ShaderRegs& config, Shader::ShaderSetup& setup, u32 value) { | 211 | static void Draw(u32 command_id) { |
| 134 | if (config.swizzle_patterns.offset >= setup.swizzle_data.size()) { | 212 | MICROPROFILE_SCOPE(GPU_Drawing); |
| 135 | LOG_ERROR(HW_GPU, "Invalid %s swizzle pattern offset %d", GetShaderSetupTypeName(setup), | 213 | auto& regs = g_state.regs; |
| 136 | (int)config.swizzle_patterns.offset); | 214 | |
| 137 | } else { | 215 | #if PICA_LOG_TEV |
| 138 | setup.swizzle_data[config.swizzle_patterns.offset] = value; | 216 | DebugUtils::DumpTevStageConfig(regs.GetTevStages()); |
| 139 | config.swizzle_patterns.offset++; | 217 | #endif |
| 218 | if (g_debug_context) | ||
| 219 | g_debug_context->OnEvent(DebugContext::Event::IncomingPrimitiveBatch, nullptr); | ||
| 220 | |||
| 221 | // Processes information about internal vertex attributes to figure out how a vertex is | ||
| 222 | // loaded. | ||
| 223 | // Later, these can be compiled and cached. | ||
| 224 | const u32 base_address = regs.pipeline.vertex_attributes.GetPhysicalBaseAddress(); | ||
| 225 | VertexLoader loader(regs.pipeline); | ||
| 226 | |||
| 227 | // Load vertices | ||
| 228 | bool is_indexed = (command_id == PICA_REG_INDEX(pipeline.trigger_draw_indexed)); | ||
| 229 | |||
| 230 | const auto& index_info = regs.pipeline.index_array; | ||
| 231 | const u8* index_address_8 = Memory::GetPhysicalPointer(base_address + index_info.offset); | ||
| 232 | const u16* index_address_16 = reinterpret_cast<const u16*>(index_address_8); | ||
| 233 | bool index_u16 = index_info.format != 0; | ||
| 234 | |||
| 235 | PrimitiveAssembler<Shader::OutputVertex>& primitive_assembler = g_state.primitive_assembler; | ||
| 236 | |||
| 237 | if (g_debug_context && g_debug_context->recorder) { | ||
| 238 | for (int i = 0; i < 3; ++i) { | ||
| 239 | const auto texture = regs.texturing.GetTextures()[i]; | ||
| 240 | if (!texture.enabled) | ||
| 241 | continue; | ||
| 242 | |||
| 243 | u8* texture_data = Memory::GetPhysicalPointer(texture.config.GetPhysicalAddress()); | ||
| 244 | g_debug_context->recorder->MemoryAccessed( | ||
| 245 | texture_data, Pica::TexturingRegs::NibblesPerPixel(texture.format) * | ||
| 246 | texture.config.width / 2 * texture.config.height, | ||
| 247 | texture.config.GetPhysicalAddress()); | ||
| 248 | } | ||
| 249 | } | ||
| 250 | |||
| 251 | DebugUtils::MemoryAccessTracker memory_accesses; | ||
| 252 | |||
| 253 | // Simple circular-replacement vertex cache | ||
| 254 | // The size has been tuned for optimal balance between hit-rate and the cost of lookup | ||
| 255 | const size_t VERTEX_CACHE_SIZE = 32; | ||
| 256 | std::array<u16, VERTEX_CACHE_SIZE> vertex_cache_ids; | ||
| 257 | std::array<Shader::AttributeBuffer, VERTEX_CACHE_SIZE> vertex_cache; | ||
| 258 | Shader::AttributeBuffer vs_output; | ||
| 259 | |||
| 260 | unsigned int vertex_cache_pos = 0; | ||
| 261 | vertex_cache_ids.fill(-1); | ||
| 262 | |||
| 263 | auto* shader_engine = Shader::GetEngine(); | ||
| 264 | Shader::UnitState shader_unit; | ||
| 265 | |||
| 266 | shader_engine->SetupBatch(g_state.vs, regs.vs.main_offset); | ||
| 267 | |||
| 268 | g_state.geometry_pipeline.Reconfigure(); | ||
| 269 | g_state.geometry_pipeline.Setup(shader_engine); | ||
| 270 | if (g_state.geometry_pipeline.NeedIndexInput()) | ||
| 271 | ASSERT(is_indexed); | ||
| 272 | |||
| 273 | for (unsigned int index = 0; index < regs.pipeline.num_vertices; ++index) { | ||
| 274 | // Indexed rendering doesn't use the start offset | ||
| 275 | unsigned int vertex = is_indexed | ||
| 276 | ? (index_u16 ? index_address_16[index] : index_address_8[index]) | ||
| 277 | : (index + regs.pipeline.vertex_offset); | ||
| 278 | |||
| 279 | // -1 is a common special value used for primitive restart. Since it's unknown if | ||
| 280 | // the PICA supports it, and it would mess up the caching, guard against it here. | ||
| 281 | ASSERT(vertex != -1); | ||
| 282 | |||
| 283 | bool vertex_cache_hit = false; | ||
| 284 | |||
| 285 | if (is_indexed) { | ||
| 286 | if (g_state.geometry_pipeline.NeedIndexInput()) { | ||
| 287 | g_state.geometry_pipeline.SubmitIndex(vertex); | ||
| 288 | continue; | ||
| 289 | } | ||
| 290 | |||
| 291 | if (g_debug_context && Pica::g_debug_context->recorder) { | ||
| 292 | int size = index_u16 ? 2 : 1; | ||
| 293 | memory_accesses.AddAccess(base_address + index_info.offset + size * index, size); | ||
| 294 | } | ||
| 295 | |||
| 296 | for (unsigned int i = 0; i < VERTEX_CACHE_SIZE; ++i) { | ||
| 297 | if (vertex == vertex_cache_ids[i]) { | ||
| 298 | vs_output = vertex_cache[i]; | ||
| 299 | vertex_cache_hit = true; | ||
| 300 | break; | ||
| 301 | } | ||
| 302 | } | ||
| 303 | } | ||
| 304 | |||
| 305 | if (!vertex_cache_hit) { | ||
| 306 | // Initialize data for the current vertex | ||
| 307 | Shader::AttributeBuffer input; | ||
| 308 | loader.LoadVertex(base_address, index, vertex, input, memory_accesses); | ||
| 309 | |||
| 310 | // Send to vertex shader | ||
| 311 | if (g_debug_context) | ||
| 312 | g_debug_context->OnEvent(DebugContext::Event::VertexShaderInvocation, | ||
| 313 | (void*)&input); | ||
| 314 | shader_unit.LoadInput(regs.vs, input); | ||
| 315 | shader_engine->Run(g_state.vs, shader_unit); | ||
| 316 | shader_unit.WriteOutput(regs.vs, vs_output); | ||
| 317 | |||
| 318 | if (is_indexed) { | ||
| 319 | vertex_cache[vertex_cache_pos] = vs_output; | ||
| 320 | vertex_cache_ids[vertex_cache_pos] = vertex; | ||
| 321 | vertex_cache_pos = (vertex_cache_pos + 1) % VERTEX_CACHE_SIZE; | ||
| 322 | } | ||
| 323 | } | ||
| 324 | |||
| 325 | // Send to geometry pipeline | ||
| 326 | g_state.geometry_pipeline.SubmitVertex(vs_output); | ||
| 327 | } | ||
| 328 | |||
| 329 | for (auto& range : memory_accesses.ranges) { | ||
| 330 | g_debug_context->recorder->MemoryAccessed(Memory::GetPhysicalPointer(range.first), | ||
| 331 | range.second, range.first); | ||
| 332 | } | ||
| 333 | |||
| 334 | VideoCore::g_renderer->Rasterizer()->DrawTriangles(); | ||
| 335 | if (g_debug_context) { | ||
| 336 | g_debug_context->OnEvent(DebugContext::Event::FinishedPrimitiveBatch, nullptr); | ||
| 140 | } | 337 | } |
| 141 | } | 338 | } |
| 142 | 339 | ||
| @@ -182,106 +379,19 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { | |||
| 182 | 379 | ||
| 183 | case PICA_REG_INDEX(pipeline.vs_default_attributes_setup.index): | 380 | case PICA_REG_INDEX(pipeline.vs_default_attributes_setup.index): |
| 184 | g_state.immediate.current_attribute = 0; | 381 | g_state.immediate.current_attribute = 0; |
| 382 | g_state.immediate.reset_geometry_pipeline = true; | ||
| 185 | default_attr_counter = 0; | 383 | default_attr_counter = 0; |
| 186 | break; | 384 | break; |
| 187 | 385 | ||
| 188 | // Load default vertex input attributes | 386 | // Load default vertex input attributes |
| 189 | case PICA_REG_INDEX_WORKAROUND(pipeline.vs_default_attributes_setup.set_value[0], 0x233): | 387 | case PICA_REG_INDEX_WORKAROUND(pipeline.vs_default_attributes_setup.set_value[0], 0x233): |
| 190 | case PICA_REG_INDEX_WORKAROUND(pipeline.vs_default_attributes_setup.set_value[1], 0x234): | 388 | case PICA_REG_INDEX_WORKAROUND(pipeline.vs_default_attributes_setup.set_value[1], 0x234): |
| 191 | case PICA_REG_INDEX_WORKAROUND(pipeline.vs_default_attributes_setup.set_value[2], 0x235): { | 389 | case PICA_REG_INDEX_WORKAROUND(pipeline.vs_default_attributes_setup.set_value[2], 0x235): |
| 192 | // TODO: Does actual hardware indeed keep an intermediate buffer or does | 390 | LoadDefaultVertexAttributes(value); |
| 193 | // it directly write the values? | ||
| 194 | default_attr_write_buffer[default_attr_counter++] = value; | ||
| 195 | |||
| 196 | // Default attributes are written in a packed format such that four float24 values are | ||
| 197 | // encoded in | ||
| 198 | // three 32-bit numbers. We write to internal memory once a full such vector is | ||
| 199 | // written. | ||
| 200 | if (default_attr_counter >= 3) { | ||
| 201 | default_attr_counter = 0; | ||
| 202 | |||
| 203 | auto& setup = regs.pipeline.vs_default_attributes_setup; | ||
| 204 | |||
| 205 | if (setup.index >= 16) { | ||
| 206 | LOG_ERROR(HW_GPU, "Invalid VS default attribute index %d", (int)setup.index); | ||
| 207 | break; | ||
| 208 | } | ||
| 209 | |||
| 210 | Math::Vec4<float24> attribute; | ||
| 211 | |||
| 212 | // NOTE: The destination component order indeed is "backwards" | ||
| 213 | attribute.w = float24::FromRaw(default_attr_write_buffer[0] >> 8); | ||
| 214 | attribute.z = float24::FromRaw(((default_attr_write_buffer[0] & 0xFF) << 16) | | ||
| 215 | ((default_attr_write_buffer[1] >> 16) & 0xFFFF)); | ||
| 216 | attribute.y = float24::FromRaw(((default_attr_write_buffer[1] & 0xFFFF) << 8) | | ||
| 217 | ((default_attr_write_buffer[2] >> 24) & 0xFF)); | ||
| 218 | attribute.x = float24::FromRaw(default_attr_write_buffer[2] & 0xFFFFFF); | ||
| 219 | |||
| 220 | LOG_TRACE(HW_GPU, "Set default VS attribute %x to (%f %f %f %f)", (int)setup.index, | ||
| 221 | attribute.x.ToFloat32(), attribute.y.ToFloat32(), attribute.z.ToFloat32(), | ||
| 222 | attribute.w.ToFloat32()); | ||
| 223 | |||
| 224 | // TODO: Verify that this actually modifies the register! | ||
| 225 | if (setup.index < 15) { | ||
| 226 | g_state.input_default_attributes.attr[setup.index] = attribute; | ||
| 227 | setup.index++; | ||
| 228 | } else { | ||
| 229 | // Put each attribute into an immediate input buffer. When all specified immediate | ||
| 230 | // attributes are present, the Vertex Shader is invoked and everything is sent to | ||
| 231 | // the primitive assembler. | ||
| 232 | |||
| 233 | auto& immediate_input = g_state.immediate.input_vertex; | ||
| 234 | auto& immediate_attribute_id = g_state.immediate.current_attribute; | ||
| 235 | |||
| 236 | immediate_input.attr[immediate_attribute_id] = attribute; | ||
| 237 | |||
| 238 | if (immediate_attribute_id < regs.pipeline.max_input_attrib_index) { | ||
| 239 | immediate_attribute_id += 1; | ||
| 240 | } else { | ||
| 241 | MICROPROFILE_SCOPE(GPU_Drawing); | ||
| 242 | immediate_attribute_id = 0; | ||
| 243 | |||
| 244 | auto* shader_engine = Shader::GetEngine(); | ||
| 245 | shader_engine->SetupBatch(g_state.vs, regs.vs.main_offset); | ||
| 246 | |||
| 247 | // Send to vertex shader | ||
| 248 | if (g_debug_context) | ||
| 249 | g_debug_context->OnEvent(DebugContext::Event::VertexShaderInvocation, | ||
| 250 | static_cast<void*>(&immediate_input)); | ||
| 251 | Shader::UnitState shader_unit; | ||
| 252 | Shader::AttributeBuffer output{}; | ||
| 253 | |||
| 254 | shader_unit.LoadInput(regs.vs, immediate_input); | ||
| 255 | shader_engine->Run(g_state.vs, shader_unit); | ||
| 256 | shader_unit.WriteOutput(regs.vs, output); | ||
| 257 | |||
| 258 | // Send to renderer | ||
| 259 | using Pica::Shader::OutputVertex; | ||
| 260 | auto AddTriangle = [](const OutputVertex& v0, const OutputVertex& v1, | ||
| 261 | const OutputVertex& v2) { | ||
| 262 | VideoCore::g_renderer->Rasterizer()->AddTriangle(v0, v1, v2); | ||
| 263 | }; | ||
| 264 | |||
| 265 | g_state.primitive_assembler.SubmitVertex( | ||
| 266 | Shader::OutputVertex::FromAttributeBuffer(regs.rasterizer, output), | ||
| 267 | AddTriangle); | ||
| 268 | } | ||
| 269 | } | ||
| 270 | } | ||
| 271 | break; | 391 | break; |
| 272 | } | ||
| 273 | 392 | ||
| 274 | case PICA_REG_INDEX(pipeline.gpu_mode): | 393 | case PICA_REG_INDEX(pipeline.gpu_mode): |
| 275 | if (regs.pipeline.gpu_mode == PipelineRegs::GPUMode::Configuring) { | 394 | // This register likely just enables vertex processing and doesn't need any special handling |
| 276 | MICROPROFILE_SCOPE(GPU_Drawing); | ||
| 277 | |||
| 278 | // Draw immediate mode triangles when GPU Mode is set to GPUMode::Configuring | ||
| 279 | VideoCore::g_renderer->Rasterizer()->DrawTriangles(); | ||
| 280 | |||
| 281 | if (g_debug_context) { | ||
| 282 | g_debug_context->OnEvent(DebugContext::Event::FinishedPrimitiveBatch, nullptr); | ||
| 283 | } | ||
| 284 | } | ||
| 285 | break; | 395 | break; |
| 286 | 396 | ||
| 287 | case PICA_REG_INDEX_WORKAROUND(pipeline.command_buffer.trigger[0], 0x23c): | 397 | case PICA_REG_INDEX_WORKAROUND(pipeline.command_buffer.trigger[0], 0x23c): |
| @@ -297,130 +407,9 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { | |||
| 297 | 407 | ||
| 298 | // It seems like these trigger vertex rendering | 408 | // It seems like these trigger vertex rendering |
| 299 | case PICA_REG_INDEX(pipeline.trigger_draw): | 409 | case PICA_REG_INDEX(pipeline.trigger_draw): |
| 300 | case PICA_REG_INDEX(pipeline.trigger_draw_indexed): { | 410 | case PICA_REG_INDEX(pipeline.trigger_draw_indexed): |
| 301 | MICROPROFILE_SCOPE(GPU_Drawing); | 411 | Draw(id); |
| 302 | |||
| 303 | #if PICA_LOG_TEV | ||
| 304 | DebugUtils::DumpTevStageConfig(regs.GetTevStages()); | ||
| 305 | #endif | ||
| 306 | if (g_debug_context) | ||
| 307 | g_debug_context->OnEvent(DebugContext::Event::IncomingPrimitiveBatch, nullptr); | ||
| 308 | |||
| 309 | // Processes information about internal vertex attributes to figure out how a vertex is | ||
| 310 | // loaded. | ||
| 311 | // Later, these can be compiled and cached. | ||
| 312 | const u32 base_address = regs.pipeline.vertex_attributes.GetPhysicalBaseAddress(); | ||
| 313 | VertexLoader loader(regs.pipeline); | ||
| 314 | |||
| 315 | // Load vertices | ||
| 316 | bool is_indexed = (id == PICA_REG_INDEX(pipeline.trigger_draw_indexed)); | ||
| 317 | |||
| 318 | const auto& index_info = regs.pipeline.index_array; | ||
| 319 | const u8* index_address_8 = Memory::GetPhysicalPointer(base_address + index_info.offset); | ||
| 320 | const u16* index_address_16 = reinterpret_cast<const u16*>(index_address_8); | ||
| 321 | bool index_u16 = index_info.format != 0; | ||
| 322 | |||
| 323 | PrimitiveAssembler<Shader::OutputVertex>& primitive_assembler = g_state.primitive_assembler; | ||
| 324 | |||
| 325 | if (g_debug_context && g_debug_context->recorder) { | ||
| 326 | for (int i = 0; i < 3; ++i) { | ||
| 327 | const auto texture = regs.texturing.GetTextures()[i]; | ||
| 328 | if (!texture.enabled) | ||
| 329 | continue; | ||
| 330 | |||
| 331 | u8* texture_data = Memory::GetPhysicalPointer(texture.config.GetPhysicalAddress()); | ||
| 332 | g_debug_context->recorder->MemoryAccessed( | ||
| 333 | texture_data, Pica::TexturingRegs::NibblesPerPixel(texture.format) * | ||
| 334 | texture.config.width / 2 * texture.config.height, | ||
| 335 | texture.config.GetPhysicalAddress()); | ||
| 336 | } | ||
| 337 | } | ||
| 338 | |||
| 339 | DebugUtils::MemoryAccessTracker memory_accesses; | ||
| 340 | |||
| 341 | // Simple circular-replacement vertex cache | ||
| 342 | // The size has been tuned for optimal balance between hit-rate and the cost of lookup | ||
| 343 | const size_t VERTEX_CACHE_SIZE = 32; | ||
| 344 | std::array<u16, VERTEX_CACHE_SIZE> vertex_cache_ids; | ||
| 345 | std::array<Shader::OutputVertex, VERTEX_CACHE_SIZE> vertex_cache; | ||
| 346 | Shader::OutputVertex output_vertex; | ||
| 347 | |||
| 348 | unsigned int vertex_cache_pos = 0; | ||
| 349 | vertex_cache_ids.fill(-1); | ||
| 350 | |||
| 351 | auto* shader_engine = Shader::GetEngine(); | ||
| 352 | Shader::UnitState shader_unit; | ||
| 353 | |||
| 354 | shader_engine->SetupBatch(g_state.vs, regs.vs.main_offset); | ||
| 355 | |||
| 356 | for (unsigned int index = 0; index < regs.pipeline.num_vertices; ++index) { | ||
| 357 | // Indexed rendering doesn't use the start offset | ||
| 358 | unsigned int vertex = | ||
| 359 | is_indexed ? (index_u16 ? index_address_16[index] : index_address_8[index]) | ||
| 360 | : (index + regs.pipeline.vertex_offset); | ||
| 361 | |||
| 362 | // -1 is a common special value used for primitive restart. Since it's unknown if | ||
| 363 | // the PICA supports it, and it would mess up the caching, guard against it here. | ||
| 364 | ASSERT(vertex != -1); | ||
| 365 | |||
| 366 | bool vertex_cache_hit = false; | ||
| 367 | |||
| 368 | if (is_indexed) { | ||
| 369 | if (g_debug_context && Pica::g_debug_context->recorder) { | ||
| 370 | int size = index_u16 ? 2 : 1; | ||
| 371 | memory_accesses.AddAccess(base_address + index_info.offset + size * index, | ||
| 372 | size); | ||
| 373 | } | ||
| 374 | |||
| 375 | for (unsigned int i = 0; i < VERTEX_CACHE_SIZE; ++i) { | ||
| 376 | if (vertex == vertex_cache_ids[i]) { | ||
| 377 | output_vertex = vertex_cache[i]; | ||
| 378 | vertex_cache_hit = true; | ||
| 379 | break; | ||
| 380 | } | ||
| 381 | } | ||
| 382 | } | ||
| 383 | |||
| 384 | if (!vertex_cache_hit) { | ||
| 385 | // Initialize data for the current vertex | ||
| 386 | Shader::AttributeBuffer input, output{}; | ||
| 387 | loader.LoadVertex(base_address, index, vertex, input, memory_accesses); | ||
| 388 | |||
| 389 | // Send to vertex shader | ||
| 390 | if (g_debug_context) | ||
| 391 | g_debug_context->OnEvent(DebugContext::Event::VertexShaderInvocation, | ||
| 392 | (void*)&input); | ||
| 393 | shader_unit.LoadInput(regs.vs, input); | ||
| 394 | shader_engine->Run(g_state.vs, shader_unit); | ||
| 395 | shader_unit.WriteOutput(regs.vs, output); | ||
| 396 | |||
| 397 | // Retrieve vertex from register data | ||
| 398 | output_vertex = Shader::OutputVertex::FromAttributeBuffer(regs.rasterizer, output); | ||
| 399 | |||
| 400 | if (is_indexed) { | ||
| 401 | vertex_cache[vertex_cache_pos] = output_vertex; | ||
| 402 | vertex_cache_ids[vertex_cache_pos] = vertex; | ||
| 403 | vertex_cache_pos = (vertex_cache_pos + 1) % VERTEX_CACHE_SIZE; | ||
| 404 | } | ||
| 405 | } | ||
| 406 | |||
| 407 | // Send to renderer | ||
| 408 | using Pica::Shader::OutputVertex; | ||
| 409 | auto AddTriangle = [](const OutputVertex& v0, const OutputVertex& v1, | ||
| 410 | const OutputVertex& v2) { | ||
| 411 | VideoCore::g_renderer->Rasterizer()->AddTriangle(v0, v1, v2); | ||
| 412 | }; | ||
| 413 | |||
| 414 | primitive_assembler.SubmitVertex(output_vertex, AddTriangle); | ||
| 415 | } | ||
| 416 | |||
| 417 | for (auto& range : memory_accesses.ranges) { | ||
| 418 | g_debug_context->recorder->MemoryAccessed(Memory::GetPhysicalPointer(range.first), | ||
| 419 | range.second, range.first); | ||
| 420 | } | ||
| 421 | |||
| 422 | break; | 412 | break; |
| 423 | } | ||
| 424 | 413 | ||
| 425 | case PICA_REG_INDEX(gs.bool_uniforms): | 414 | case PICA_REG_INDEX(gs.bool_uniforms): |
| 426 | WriteUniformBoolReg(g_state.gs, g_state.regs.gs.bool_uniforms.Value()); | 415 | WriteUniformBoolReg(g_state.gs, g_state.regs.gs.bool_uniforms.Value()); |
| @@ -458,7 +447,13 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { | |||
| 458 | case PICA_REG_INDEX_WORKAROUND(gs.program.set_word[5], 0x2a1): | 447 | case PICA_REG_INDEX_WORKAROUND(gs.program.set_word[5], 0x2a1): |
| 459 | case PICA_REG_INDEX_WORKAROUND(gs.program.set_word[6], 0x2a2): | 448 | case PICA_REG_INDEX_WORKAROUND(gs.program.set_word[6], 0x2a2): |
| 460 | case PICA_REG_INDEX_WORKAROUND(gs.program.set_word[7], 0x2a3): { | 449 | case PICA_REG_INDEX_WORKAROUND(gs.program.set_word[7], 0x2a3): { |
| 461 | WriteProgramCode(g_state.regs.gs, g_state.gs, 4096, value); | 450 | u32& offset = g_state.regs.gs.program.offset; |
| 451 | if (offset >= 4096) { | ||
| 452 | LOG_ERROR(HW_GPU, "Invalid GS program offset %u", offset); | ||
| 453 | } else { | ||
| 454 | g_state.gs.program_code[offset] = value; | ||
| 455 | offset++; | ||
| 456 | } | ||
| 462 | break; | 457 | break; |
| 463 | } | 458 | } |
| 464 | 459 | ||
| @@ -470,11 +465,18 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { | |||
| 470 | case PICA_REG_INDEX_WORKAROUND(gs.swizzle_patterns.set_word[5], 0x2ab): | 465 | case PICA_REG_INDEX_WORKAROUND(gs.swizzle_patterns.set_word[5], 0x2ab): |
| 471 | case PICA_REG_INDEX_WORKAROUND(gs.swizzle_patterns.set_word[6], 0x2ac): | 466 | case PICA_REG_INDEX_WORKAROUND(gs.swizzle_patterns.set_word[6], 0x2ac): |
| 472 | case PICA_REG_INDEX_WORKAROUND(gs.swizzle_patterns.set_word[7], 0x2ad): { | 467 | case PICA_REG_INDEX_WORKAROUND(gs.swizzle_patterns.set_word[7], 0x2ad): { |
| 473 | WriteSwizzlePatterns(g_state.regs.gs, g_state.gs, value); | 468 | u32& offset = g_state.regs.gs.swizzle_patterns.offset; |
| 469 | if (offset >= g_state.gs.swizzle_data.size()) { | ||
| 470 | LOG_ERROR(HW_GPU, "Invalid GS swizzle pattern offset %u", offset); | ||
| 471 | } else { | ||
| 472 | g_state.gs.swizzle_data[offset] = value; | ||
| 473 | offset++; | ||
| 474 | } | ||
| 474 | break; | 475 | break; |
| 475 | } | 476 | } |
| 476 | 477 | ||
| 477 | case PICA_REG_INDEX(vs.bool_uniforms): | 478 | case PICA_REG_INDEX(vs.bool_uniforms): |
| 479 | // TODO (wwylele): does regs.pipeline.gs_unit_exclusive_configuration affect this? | ||
| 478 | WriteUniformBoolReg(g_state.vs, g_state.regs.vs.bool_uniforms.Value()); | 480 | WriteUniformBoolReg(g_state.vs, g_state.regs.vs.bool_uniforms.Value()); |
| 479 | break; | 481 | break; |
| 480 | 482 | ||
| @@ -482,6 +484,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { | |||
| 482 | case PICA_REG_INDEX_WORKAROUND(vs.int_uniforms[1], 0x2b2): | 484 | case PICA_REG_INDEX_WORKAROUND(vs.int_uniforms[1], 0x2b2): |
| 483 | case PICA_REG_INDEX_WORKAROUND(vs.int_uniforms[2], 0x2b3): | 485 | case PICA_REG_INDEX_WORKAROUND(vs.int_uniforms[2], 0x2b3): |
| 484 | case PICA_REG_INDEX_WORKAROUND(vs.int_uniforms[3], 0x2b4): { | 486 | case PICA_REG_INDEX_WORKAROUND(vs.int_uniforms[3], 0x2b4): { |
| 487 | // TODO (wwylele): does regs.pipeline.gs_unit_exclusive_configuration affect this? | ||
| 485 | unsigned index = (id - PICA_REG_INDEX_WORKAROUND(vs.int_uniforms[0], 0x2b1)); | 488 | unsigned index = (id - PICA_REG_INDEX_WORKAROUND(vs.int_uniforms[0], 0x2b1)); |
| 486 | auto values = regs.vs.int_uniforms[index]; | 489 | auto values = regs.vs.int_uniforms[index]; |
| 487 | WriteUniformIntReg(g_state.vs, index, | 490 | WriteUniformIntReg(g_state.vs, index, |
| @@ -497,6 +500,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { | |||
| 497 | case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[5], 0x2c6): | 500 | case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[5], 0x2c6): |
| 498 | case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[6], 0x2c7): | 501 | case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[6], 0x2c7): |
| 499 | case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[7], 0x2c8): { | 502 | case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[7], 0x2c8): { |
| 503 | // TODO (wwylele): does regs.pipeline.gs_unit_exclusive_configuration affect this? | ||
| 500 | WriteUniformFloatReg(g_state.regs.vs, g_state.vs, vs_float_regs_counter, | 504 | WriteUniformFloatReg(g_state.regs.vs, g_state.vs, vs_float_regs_counter, |
| 501 | vs_uniform_write_buffer, value); | 505 | vs_uniform_write_buffer, value); |
| 502 | break; | 506 | break; |
| @@ -510,7 +514,16 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { | |||
| 510 | case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[5], 0x2d1): | 514 | case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[5], 0x2d1): |
| 511 | case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[6], 0x2d2): | 515 | case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[6], 0x2d2): |
| 512 | case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[7], 0x2d3): { | 516 | case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[7], 0x2d3): { |
| 513 | WriteProgramCode(g_state.regs.vs, g_state.vs, 512, value); | 517 | u32& offset = g_state.regs.vs.program.offset; |
| 518 | if (offset >= 512) { | ||
| 519 | LOG_ERROR(HW_GPU, "Invalid VS program offset %u", offset); | ||
| 520 | } else { | ||
| 521 | g_state.vs.program_code[offset] = value; | ||
| 522 | if (!g_state.regs.pipeline.gs_unit_exclusive_configuration) { | ||
| 523 | g_state.gs.program_code[offset] = value; | ||
| 524 | } | ||
| 525 | offset++; | ||
| 526 | } | ||
| 514 | break; | 527 | break; |
| 515 | } | 528 | } |
| 516 | 529 | ||
| @@ -522,7 +535,16 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { | |||
| 522 | case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[5], 0x2db): | 535 | case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[5], 0x2db): |
| 523 | case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[6], 0x2dc): | 536 | case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[6], 0x2dc): |
| 524 | case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[7], 0x2dd): { | 537 | case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[7], 0x2dd): { |
| 525 | WriteSwizzlePatterns(g_state.regs.vs, g_state.vs, value); | 538 | u32& offset = g_state.regs.vs.swizzle_patterns.offset; |
| 539 | if (offset >= g_state.vs.swizzle_data.size()) { | ||
| 540 | LOG_ERROR(HW_GPU, "Invalid VS swizzle pattern offset %u", offset); | ||
| 541 | } else { | ||
| 542 | g_state.vs.swizzle_data[offset] = value; | ||
| 543 | if (!g_state.regs.pipeline.gs_unit_exclusive_configuration) { | ||
| 544 | g_state.gs.swizzle_data[offset] = value; | ||
| 545 | } | ||
| 546 | offset++; | ||
| 547 | } | ||
| 526 | break; | 548 | break; |
| 527 | } | 549 | } |
| 528 | 550 | ||
| @@ -620,6 +642,6 @@ void ProcessCommandList(const u32* list, u32 size) { | |||
| 620 | } | 642 | } |
| 621 | } | 643 | } |
| 622 | 644 | ||
| 623 | } // namespace | 645 | } // namespace CommandProcessor |
| 624 | 646 | ||
| 625 | } // namespace | 647 | } // namespace Pica |
diff --git a/src/video_core/geometry_pipeline.cpp b/src/video_core/geometry_pipeline.cpp new file mode 100644 index 000000000..98ff2ccd3 --- /dev/null +++ b/src/video_core/geometry_pipeline.cpp | |||
| @@ -0,0 +1,274 @@ | |||
| 1 | // Copyright 2017 Citra Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "video_core/geometry_pipeline.h" | ||
| 6 | #include "video_core/pica_state.h" | ||
| 7 | #include "video_core/regs.h" | ||
| 8 | #include "video_core/renderer_base.h" | ||
| 9 | #include "video_core/video_core.h" | ||
| 10 | |||
| 11 | namespace Pica { | ||
| 12 | |||
| 13 | /// An attribute buffering interface for different pipeline modes | ||
| 14 | class GeometryPipelineBackend { | ||
| 15 | public: | ||
| 16 | virtual ~GeometryPipelineBackend() = default; | ||
| 17 | |||
| 18 | /// Checks if there is no incomplete data transfer | ||
| 19 | virtual bool IsEmpty() const = 0; | ||
| 20 | |||
| 21 | /// Checks if the pipeline needs a direct input from index buffer | ||
| 22 | virtual bool NeedIndexInput() const = 0; | ||
| 23 | |||
| 24 | /// Submits an index from index buffer | ||
| 25 | virtual void SubmitIndex(unsigned int val) = 0; | ||
| 26 | |||
| 27 | /** | ||
| 28 | * Submits vertex attributes | ||
| 29 | * @param input attributes of a vertex output from vertex shader | ||
| 30 | * @return if the buffer is full and the geometry shader should be invoked | ||
| 31 | */ | ||
| 32 | virtual bool SubmitVertex(const Shader::AttributeBuffer& input) = 0; | ||
| 33 | }; | ||
| 34 | |||
| 35 | // In the Point mode, vertex attributes are sent to the input registers in the geometry shader unit. | ||
| 36 | // The size of vertex shader outputs and geometry shader inputs are constants. Geometry shader is | ||
| 37 | // invoked upon inputs buffer filled up by vertex shader outputs. For example, if we have a geometry | ||
| 38 | // shader that takes 6 inputs, and the vertex shader outputs 2 attributes, it would take 3 vertices | ||
| 39 | // for one geometry shader invocation. | ||
| 40 | // TODO: what happens when the input size is not divisible by the output size? | ||
| 41 | class GeometryPipeline_Point : public GeometryPipelineBackend { | ||
| 42 | public: | ||
| 43 | GeometryPipeline_Point(const Regs& regs, Shader::GSUnitState& unit) : regs(regs), unit(unit) { | ||
| 44 | ASSERT(regs.pipeline.variable_primitive == 0); | ||
| 45 | ASSERT(regs.gs.input_to_uniform == 0); | ||
| 46 | vs_output_num = regs.pipeline.vs_outmap_total_minus_1_a + 1; | ||
| 47 | size_t gs_input_num = regs.gs.max_input_attribute_index + 1; | ||
| 48 | ASSERT(gs_input_num % vs_output_num == 0); | ||
| 49 | buffer_cur = attribute_buffer.attr; | ||
| 50 | buffer_end = attribute_buffer.attr + gs_input_num; | ||
| 51 | } | ||
| 52 | |||
| 53 | bool IsEmpty() const override { | ||
| 54 | return buffer_cur == attribute_buffer.attr; | ||
| 55 | } | ||
| 56 | |||
| 57 | bool NeedIndexInput() const override { | ||
| 58 | return false; | ||
| 59 | } | ||
| 60 | |||
| 61 | void SubmitIndex(unsigned int val) override { | ||
| 62 | UNREACHABLE(); | ||
| 63 | } | ||
| 64 | |||
| 65 | bool SubmitVertex(const Shader::AttributeBuffer& input) override { | ||
| 66 | buffer_cur = std::copy(input.attr, input.attr + vs_output_num, buffer_cur); | ||
| 67 | if (buffer_cur == buffer_end) { | ||
| 68 | buffer_cur = attribute_buffer.attr; | ||
| 69 | unit.LoadInput(regs.gs, attribute_buffer); | ||
| 70 | return true; | ||
| 71 | } | ||
| 72 | return false; | ||
| 73 | } | ||
| 74 | |||
| 75 | private: | ||
| 76 | const Regs& regs; | ||
| 77 | Shader::GSUnitState& unit; | ||
| 78 | Shader::AttributeBuffer attribute_buffer; | ||
| 79 | Math::Vec4<float24>* buffer_cur; | ||
| 80 | Math::Vec4<float24>* buffer_end; | ||
| 81 | unsigned int vs_output_num; | ||
| 82 | }; | ||
| 83 | |||
| 84 | // In VariablePrimitive mode, vertex attributes are buffered into the uniform registers in the | ||
| 85 | // geometry shader unit. The number of vertex is variable, which is specified by the first index | ||
| 86 | // value in the batch. This mode is usually used for subdivision. | ||
| 87 | class GeometryPipeline_VariablePrimitive : public GeometryPipelineBackend { | ||
| 88 | public: | ||
| 89 | GeometryPipeline_VariablePrimitive(const Regs& regs, Shader::ShaderSetup& setup) | ||
| 90 | : regs(regs), setup(setup) { | ||
| 91 | ASSERT(regs.pipeline.variable_primitive == 1); | ||
| 92 | ASSERT(regs.gs.input_to_uniform == 1); | ||
| 93 | vs_output_num = regs.pipeline.vs_outmap_total_minus_1_a + 1; | ||
| 94 | } | ||
| 95 | |||
| 96 | bool IsEmpty() const override { | ||
| 97 | return need_index; | ||
| 98 | } | ||
| 99 | |||
| 100 | bool NeedIndexInput() const override { | ||
| 101 | return need_index; | ||
| 102 | } | ||
| 103 | |||
| 104 | void SubmitIndex(unsigned int val) override { | ||
| 105 | DEBUG_ASSERT(need_index); | ||
| 106 | |||
| 107 | // The number of vertex input is put to the uniform register | ||
| 108 | float24 vertex_num = float24::FromFloat32(static_cast<float>(val)); | ||
| 109 | setup.uniforms.f[0] = Math::MakeVec(vertex_num, vertex_num, vertex_num, vertex_num); | ||
| 110 | |||
| 111 | // The second uniform register and so on are used for receiving input vertices | ||
| 112 | buffer_cur = setup.uniforms.f + 1; | ||
| 113 | |||
| 114 | main_vertex_num = regs.pipeline.variable_vertex_main_num_minus_1 + 1; | ||
| 115 | total_vertex_num = val; | ||
| 116 | need_index = false; | ||
| 117 | } | ||
| 118 | |||
| 119 | bool SubmitVertex(const Shader::AttributeBuffer& input) override { | ||
| 120 | DEBUG_ASSERT(!need_index); | ||
| 121 | if (main_vertex_num != 0) { | ||
| 122 | // For main vertices, receive all attributes | ||
| 123 | buffer_cur = std::copy(input.attr, input.attr + vs_output_num, buffer_cur); | ||
| 124 | --main_vertex_num; | ||
| 125 | } else { | ||
| 126 | // For other vertices, only receive the first attribute (usually the position) | ||
| 127 | *(buffer_cur++) = input.attr[0]; | ||
| 128 | } | ||
| 129 | --total_vertex_num; | ||
| 130 | |||
| 131 | if (total_vertex_num == 0) { | ||
| 132 | need_index = true; | ||
| 133 | return true; | ||
| 134 | } | ||
| 135 | |||
| 136 | return false; | ||
| 137 | } | ||
| 138 | |||
| 139 | private: | ||
| 140 | bool need_index = true; | ||
| 141 | const Regs& regs; | ||
| 142 | Shader::ShaderSetup& setup; | ||
| 143 | unsigned int main_vertex_num; | ||
| 144 | unsigned int total_vertex_num; | ||
| 145 | Math::Vec4<float24>* buffer_cur; | ||
| 146 | unsigned int vs_output_num; | ||
| 147 | }; | ||
| 148 | |||
| 149 | // In FixedPrimitive mode, vertex attributes are buffered into the uniform registers in the geometry | ||
| 150 | // shader unit. The number of vertex per shader invocation is constant. This is usually used for | ||
| 151 | // particle system. | ||
| 152 | class GeometryPipeline_FixedPrimitive : public GeometryPipelineBackend { | ||
| 153 | public: | ||
| 154 | GeometryPipeline_FixedPrimitive(const Regs& regs, Shader::ShaderSetup& setup) | ||
| 155 | : regs(regs), setup(setup) { | ||
| 156 | ASSERT(regs.pipeline.variable_primitive == 0); | ||
| 157 | ASSERT(regs.gs.input_to_uniform == 1); | ||
| 158 | vs_output_num = regs.pipeline.vs_outmap_total_minus_1_a + 1; | ||
| 159 | ASSERT(vs_output_num == regs.pipeline.gs_config.stride_minus_1 + 1); | ||
| 160 | size_t vertex_num = regs.pipeline.gs_config.fixed_vertex_num_minus_1 + 1; | ||
| 161 | buffer_cur = buffer_begin = setup.uniforms.f + regs.pipeline.gs_config.start_index; | ||
| 162 | buffer_end = buffer_begin + vs_output_num * vertex_num; | ||
| 163 | } | ||
| 164 | |||
| 165 | bool IsEmpty() const override { | ||
| 166 | return buffer_cur == buffer_begin; | ||
| 167 | } | ||
| 168 | |||
| 169 | bool NeedIndexInput() const override { | ||
| 170 | return false; | ||
| 171 | } | ||
| 172 | |||
| 173 | void SubmitIndex(unsigned int val) override { | ||
| 174 | UNREACHABLE(); | ||
| 175 | } | ||
| 176 | |||
| 177 | bool SubmitVertex(const Shader::AttributeBuffer& input) override { | ||
| 178 | buffer_cur = std::copy(input.attr, input.attr + vs_output_num, buffer_cur); | ||
| 179 | if (buffer_cur == buffer_end) { | ||
| 180 | buffer_cur = buffer_begin; | ||
| 181 | return true; | ||
| 182 | } | ||
| 183 | return false; | ||
| 184 | } | ||
| 185 | |||
| 186 | private: | ||
| 187 | const Regs& regs; | ||
| 188 | Shader::ShaderSetup& setup; | ||
| 189 | Math::Vec4<float24>* buffer_begin; | ||
| 190 | Math::Vec4<float24>* buffer_cur; | ||
| 191 | Math::Vec4<float24>* buffer_end; | ||
| 192 | unsigned int vs_output_num; | ||
| 193 | }; | ||
| 194 | |||
| 195 | GeometryPipeline::GeometryPipeline(State& state) : state(state) {} | ||
| 196 | |||
| 197 | GeometryPipeline::~GeometryPipeline() = default; | ||
| 198 | |||
| 199 | void GeometryPipeline::SetVertexHandler(Shader::VertexHandler vertex_handler) { | ||
| 200 | this->vertex_handler = vertex_handler; | ||
| 201 | } | ||
| 202 | |||
| 203 | void GeometryPipeline::Setup(Shader::ShaderEngine* shader_engine) { | ||
| 204 | if (!backend) | ||
| 205 | return; | ||
| 206 | |||
| 207 | this->shader_engine = shader_engine; | ||
| 208 | shader_engine->SetupBatch(state.gs, state.regs.gs.main_offset); | ||
| 209 | } | ||
| 210 | |||
| 211 | void GeometryPipeline::Reconfigure() { | ||
| 212 | ASSERT(!backend || backend->IsEmpty()); | ||
| 213 | |||
| 214 | if (state.regs.pipeline.use_gs == PipelineRegs::UseGS::No) { | ||
| 215 | backend = nullptr; | ||
| 216 | return; | ||
| 217 | } | ||
| 218 | |||
| 219 | ASSERT(state.regs.pipeline.use_gs == PipelineRegs::UseGS::Yes); | ||
| 220 | |||
| 221 | // The following assumes that when geometry shader is in use, the shader unit 3 is configured as | ||
| 222 | // a geometry shader unit. | ||
| 223 | // TODO: what happens if this is not true? | ||
| 224 | ASSERT(state.regs.pipeline.gs_unit_exclusive_configuration == 1); | ||
| 225 | ASSERT(state.regs.gs.shader_mode == ShaderRegs::ShaderMode::GS); | ||
| 226 | |||
| 227 | state.gs_unit.ConfigOutput(state.regs.gs); | ||
| 228 | |||
| 229 | ASSERT(state.regs.pipeline.vs_outmap_total_minus_1_a == | ||
| 230 | state.regs.pipeline.vs_outmap_total_minus_1_b); | ||
| 231 | |||
| 232 | switch (state.regs.pipeline.gs_config.mode) { | ||
| 233 | case PipelineRegs::GSMode::Point: | ||
| 234 | backend = std::make_unique<GeometryPipeline_Point>(state.regs, state.gs_unit); | ||
| 235 | break; | ||
| 236 | case PipelineRegs::GSMode::VariablePrimitive: | ||
| 237 | backend = std::make_unique<GeometryPipeline_VariablePrimitive>(state.regs, state.gs); | ||
| 238 | break; | ||
| 239 | case PipelineRegs::GSMode::FixedPrimitive: | ||
| 240 | backend = std::make_unique<GeometryPipeline_FixedPrimitive>(state.regs, state.gs); | ||
| 241 | break; | ||
| 242 | default: | ||
| 243 | UNREACHABLE(); | ||
| 244 | } | ||
| 245 | } | ||
| 246 | |||
| 247 | bool GeometryPipeline::NeedIndexInput() const { | ||
| 248 | if (!backend) | ||
| 249 | return false; | ||
| 250 | return backend->NeedIndexInput(); | ||
| 251 | } | ||
| 252 | |||
| 253 | void GeometryPipeline::SubmitIndex(unsigned int val) { | ||
| 254 | backend->SubmitIndex(val); | ||
| 255 | } | ||
| 256 | |||
| 257 | void GeometryPipeline::SubmitVertex(const Shader::AttributeBuffer& input) { | ||
| 258 | if (!backend) { | ||
| 259 | // No backend means the geometry shader is disabled, so we send the vertex shader output | ||
| 260 | // directly to the primitive assembler. | ||
| 261 | vertex_handler(input); | ||
| 262 | } else { | ||
| 263 | if (backend->SubmitVertex(input)) { | ||
| 264 | shader_engine->Run(state.gs, state.gs_unit); | ||
| 265 | |||
| 266 | // The uniform b15 is set to true after every geometry shader invocation. This is useful | ||
| 267 | // for the shader to know if this is the first invocation in a batch, if the program set | ||
| 268 | // b15 to false first. | ||
| 269 | state.gs.uniforms.b[15] = true; | ||
| 270 | } | ||
| 271 | } | ||
| 272 | } | ||
| 273 | |||
| 274 | } // namespace Pica | ||
diff --git a/src/video_core/geometry_pipeline.h b/src/video_core/geometry_pipeline.h new file mode 100644 index 000000000..91fdd3192 --- /dev/null +++ b/src/video_core/geometry_pipeline.h | |||
| @@ -0,0 +1,49 @@ | |||
| 1 | // Copyright 2017 Citra Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <memory> | ||
| 8 | #include "video_core/shader/shader.h" | ||
| 9 | |||
| 10 | namespace Pica { | ||
| 11 | |||
| 12 | struct State; | ||
| 13 | |||
| 14 | class GeometryPipelineBackend; | ||
| 15 | |||
| 16 | /// A pipeline receiving from vertex shader and sending to geometry shader and primitive assembler | ||
| 17 | class GeometryPipeline { | ||
| 18 | public: | ||
| 19 | explicit GeometryPipeline(State& state); | ||
| 20 | ~GeometryPipeline(); | ||
| 21 | |||
| 22 | /// Sets the handler for receiving vertex outputs from vertex shader | ||
| 23 | void SetVertexHandler(Shader::VertexHandler vertex_handler); | ||
| 24 | |||
| 25 | /** | ||
| 26 | * Setup the geometry shader unit if it is in use | ||
| 27 | * @param shader_engine the shader engine for the geometry shader to run | ||
| 28 | */ | ||
| 29 | void Setup(Shader::ShaderEngine* shader_engine); | ||
| 30 | |||
| 31 | /// Reconfigures the pipeline according to current register settings | ||
| 32 | void Reconfigure(); | ||
| 33 | |||
| 34 | /// Checks if the pipeline needs a direct input from index buffer | ||
| 35 | bool NeedIndexInput() const; | ||
| 36 | |||
| 37 | /// Submits an index from index buffer. Call this only when NeedIndexInput returns true | ||
| 38 | void SubmitIndex(unsigned int val); | ||
| 39 | |||
| 40 | /// Submits vertex attributes output from vertex shader | ||
| 41 | void SubmitVertex(const Shader::AttributeBuffer& input); | ||
| 42 | |||
| 43 | private: | ||
| 44 | Shader::VertexHandler vertex_handler; | ||
| 45 | Shader::ShaderEngine* shader_engine; | ||
| 46 | std::unique_ptr<GeometryPipelineBackend> backend; | ||
| 47 | State& state; | ||
| 48 | }; | ||
| 49 | } // namespace Pica | ||
diff --git a/src/video_core/pica.cpp b/src/video_core/pica.cpp index b95148a6a..218e06883 100644 --- a/src/video_core/pica.cpp +++ b/src/video_core/pica.cpp | |||
| @@ -3,9 +3,11 @@ | |||
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <cstring> | 5 | #include <cstring> |
| 6 | #include "video_core/geometry_pipeline.h" | ||
| 6 | #include "video_core/pica.h" | 7 | #include "video_core/pica.h" |
| 7 | #include "video_core/pica_state.h" | 8 | #include "video_core/pica_state.h" |
| 8 | #include "video_core/regs_pipeline.h" | 9 | #include "video_core/renderer_base.h" |
| 10 | #include "video_core/video_core.h" | ||
| 9 | 11 | ||
| 10 | namespace Pica { | 12 | namespace Pica { |
| 11 | 13 | ||
| @@ -24,6 +26,23 @@ void Zero(T& o) { | |||
| 24 | memset(&o, 0, sizeof(o)); | 26 | memset(&o, 0, sizeof(o)); |
| 25 | } | 27 | } |
| 26 | 28 | ||
| 29 | State::State() : geometry_pipeline(*this) { | ||
| 30 | auto SubmitVertex = [this](const Shader::AttributeBuffer& vertex) { | ||
| 31 | using Pica::Shader::OutputVertex; | ||
| 32 | auto AddTriangle = [this](const OutputVertex& v0, const OutputVertex& v1, | ||
| 33 | const OutputVertex& v2) { | ||
| 34 | VideoCore::g_renderer->Rasterizer()->AddTriangle(v0, v1, v2); | ||
| 35 | }; | ||
| 36 | primitive_assembler.SubmitVertex( | ||
| 37 | Shader::OutputVertex::FromAttributeBuffer(regs.rasterizer, vertex), AddTriangle); | ||
| 38 | }; | ||
| 39 | |||
| 40 | auto SetWinding = [this]() { primitive_assembler.SetWinding(); }; | ||
| 41 | |||
| 42 | g_state.gs_unit.SetVertexHandler(SubmitVertex, SetWinding); | ||
| 43 | g_state.geometry_pipeline.SetVertexHandler(SubmitVertex); | ||
| 44 | } | ||
| 45 | |||
| 27 | void State::Reset() { | 46 | void State::Reset() { |
| 28 | Zero(regs); | 47 | Zero(regs); |
| 29 | Zero(vs); | 48 | Zero(vs); |
diff --git a/src/video_core/pica_state.h b/src/video_core/pica_state.h index 2d23d34e6..c6634a0bc 100644 --- a/src/video_core/pica_state.h +++ b/src/video_core/pica_state.h | |||
| @@ -8,6 +8,7 @@ | |||
| 8 | #include "common/bit_field.h" | 8 | #include "common/bit_field.h" |
| 9 | #include "common/common_types.h" | 9 | #include "common/common_types.h" |
| 10 | #include "common/vector_math.h" | 10 | #include "common/vector_math.h" |
| 11 | #include "video_core/geometry_pipeline.h" | ||
| 11 | #include "video_core/primitive_assembly.h" | 12 | #include "video_core/primitive_assembly.h" |
| 12 | #include "video_core/regs.h" | 13 | #include "video_core/regs.h" |
| 13 | #include "video_core/shader/shader.h" | 14 | #include "video_core/shader/shader.h" |
| @@ -16,6 +17,7 @@ namespace Pica { | |||
| 16 | 17 | ||
| 17 | /// Struct used to describe current Pica state | 18 | /// Struct used to describe current Pica state |
| 18 | struct State { | 19 | struct State { |
| 20 | State(); | ||
| 19 | void Reset(); | 21 | void Reset(); |
| 20 | 22 | ||
| 21 | /// Pica registers | 23 | /// Pica registers |
| @@ -79,7 +81,7 @@ struct State { | |||
| 79 | std::array<ColorDifferenceEntry, 256> color_diff_table; | 81 | std::array<ColorDifferenceEntry, 256> color_diff_table; |
| 80 | } proctex; | 82 | } proctex; |
| 81 | 83 | ||
| 82 | struct { | 84 | struct Lighting { |
| 83 | union LutEntry { | 85 | union LutEntry { |
| 84 | // Used for raw access | 86 | // Used for raw access |
| 85 | u32 raw; | 87 | u32 raw; |
| @@ -137,8 +139,17 @@ struct State { | |||
| 137 | Shader::AttributeBuffer input_vertex; | 139 | Shader::AttributeBuffer input_vertex; |
| 138 | // Index of the next attribute to be loaded into `input_vertex`. | 140 | // Index of the next attribute to be loaded into `input_vertex`. |
| 139 | u32 current_attribute = 0; | 141 | u32 current_attribute = 0; |
| 142 | // Indicates the immediate mode just started and the geometry pipeline needs to reconfigure | ||
| 143 | bool reset_geometry_pipeline = true; | ||
| 140 | } immediate; | 144 | } immediate; |
| 141 | 145 | ||
| 146 | // the geometry shader needs to be kept in the global state because some shaders relie on | ||
| 147 | // preserved register value across shader invocation. | ||
| 148 | // TODO: also bring the three vertex shader units here and implement the shader scheduler. | ||
| 149 | Shader::GSUnitState gs_unit; | ||
| 150 | |||
| 151 | GeometryPipeline geometry_pipeline; | ||
| 152 | |||
| 142 | // This is constructed with a dummy triangle topology | 153 | // This is constructed with a dummy triangle topology |
| 143 | PrimitiveAssembler<Shader::OutputVertex> primitive_assembler; | 154 | PrimitiveAssembler<Shader::OutputVertex> primitive_assembler; |
| 144 | }; | 155 | }; |
diff --git a/src/video_core/pica_types.h b/src/video_core/pica_types.h index 5d7e10066..2eafa7e9e 100644 --- a/src/video_core/pica_types.h +++ b/src/video_core/pica_types.h | |||
| @@ -58,11 +58,12 @@ public: | |||
| 58 | } | 58 | } |
| 59 | 59 | ||
| 60 | Float<M, E> operator*(const Float<M, E>& flt) const { | 60 | Float<M, E> operator*(const Float<M, E>& flt) const { |
| 61 | if ((this->value == 0.f && !std::isnan(flt.value)) || | 61 | float result = value * flt.ToFloat32(); |
| 62 | (flt.value == 0.f && !std::isnan(this->value))) | 62 | // PICA gives 0 instead of NaN when multiplying by inf |
| 63 | // PICA gives 0 instead of NaN when multiplying by inf | 63 | if (!std::isnan(value) && !std::isnan(flt.ToFloat32())) |
| 64 | return Zero(); | 64 | if (std::isnan(result)) |
| 65 | return Float<M, E>::FromFloat32(ToFloat32() * flt.ToFloat32()); | 65 | result = 0.f; |
| 66 | return Float<M, E>::FromFloat32(result); | ||
| 66 | } | 67 | } |
| 67 | 68 | ||
| 68 | Float<M, E> operator/(const Float<M, E>& flt) const { | 69 | Float<M, E> operator/(const Float<M, E>& flt) const { |
| @@ -78,12 +79,7 @@ public: | |||
| 78 | } | 79 | } |
| 79 | 80 | ||
| 80 | Float<M, E>& operator*=(const Float<M, E>& flt) { | 81 | Float<M, E>& operator*=(const Float<M, E>& flt) { |
| 81 | if ((this->value == 0.f && !std::isnan(flt.value)) || | 82 | value = operator*(flt).value; |
| 82 | (flt.value == 0.f && !std::isnan(this->value))) | ||
| 83 | // PICA gives 0 instead of NaN when multiplying by inf | ||
| 84 | *this = Zero(); | ||
| 85 | else | ||
| 86 | value *= flt.ToFloat32(); | ||
| 87 | return *this; | 83 | return *this; |
| 88 | } | 84 | } |
| 89 | 85 | ||
diff --git a/src/video_core/primitive_assembly.cpp b/src/video_core/primitive_assembly.cpp index acd2ac5e2..9c3dd4cab 100644 --- a/src/video_core/primitive_assembly.cpp +++ b/src/video_core/primitive_assembly.cpp | |||
| @@ -17,15 +17,18 @@ template <typename VertexType> | |||
| 17 | void PrimitiveAssembler<VertexType>::SubmitVertex(const VertexType& vtx, | 17 | void PrimitiveAssembler<VertexType>::SubmitVertex(const VertexType& vtx, |
| 18 | TriangleHandler triangle_handler) { | 18 | TriangleHandler triangle_handler) { |
| 19 | switch (topology) { | 19 | switch (topology) { |
| 20 | // TODO: Figure out what's different with TriangleTopology::Shader. | ||
| 21 | case PipelineRegs::TriangleTopology::List: | 20 | case PipelineRegs::TriangleTopology::List: |
| 22 | case PipelineRegs::TriangleTopology::Shader: | 21 | case PipelineRegs::TriangleTopology::Shader: |
| 23 | if (buffer_index < 2) { | 22 | if (buffer_index < 2) { |
| 24 | buffer[buffer_index++] = vtx; | 23 | buffer[buffer_index++] = vtx; |
| 25 | } else { | 24 | } else { |
| 26 | buffer_index = 0; | 25 | buffer_index = 0; |
| 27 | 26 | if (topology == PipelineRegs::TriangleTopology::Shader && winding) { | |
| 28 | triangle_handler(buffer[0], buffer[1], vtx); | 27 | triangle_handler(buffer[1], buffer[0], vtx); |
| 28 | winding = false; | ||
| 29 | } else { | ||
| 30 | triangle_handler(buffer[0], buffer[1], vtx); | ||
| 31 | } | ||
| 29 | } | 32 | } |
| 30 | break; | 33 | break; |
| 31 | 34 | ||
| @@ -51,9 +54,15 @@ void PrimitiveAssembler<VertexType>::SubmitVertex(const VertexType& vtx, | |||
| 51 | } | 54 | } |
| 52 | 55 | ||
| 53 | template <typename VertexType> | 56 | template <typename VertexType> |
| 57 | void PrimitiveAssembler<VertexType>::SetWinding() { | ||
| 58 | winding = true; | ||
| 59 | } | ||
| 60 | |||
| 61 | template <typename VertexType> | ||
| 54 | void PrimitiveAssembler<VertexType>::Reset() { | 62 | void PrimitiveAssembler<VertexType>::Reset() { |
| 55 | buffer_index = 0; | 63 | buffer_index = 0; |
| 56 | strip_ready = false; | 64 | strip_ready = false; |
| 65 | winding = false; | ||
| 57 | } | 66 | } |
| 58 | 67 | ||
| 59 | template <typename VertexType> | 68 | template <typename VertexType> |
diff --git a/src/video_core/primitive_assembly.h b/src/video_core/primitive_assembly.h index e8eccdf27..12de8e3b9 100644 --- a/src/video_core/primitive_assembly.h +++ b/src/video_core/primitive_assembly.h | |||
| @@ -30,6 +30,12 @@ struct PrimitiveAssembler { | |||
| 30 | void SubmitVertex(const VertexType& vtx, TriangleHandler triangle_handler); | 30 | void SubmitVertex(const VertexType& vtx, TriangleHandler triangle_handler); |
| 31 | 31 | ||
| 32 | /** | 32 | /** |
| 33 | * Invert the vertex order of the next triangle. Called by geometry shader emitter. | ||
| 34 | * This only takes effect for TriangleTopology::Shader. | ||
| 35 | */ | ||
| 36 | void SetWinding(); | ||
| 37 | |||
| 38 | /** | ||
| 33 | * Resets the internal state of the PrimitiveAssembler. | 39 | * Resets the internal state of the PrimitiveAssembler. |
| 34 | */ | 40 | */ |
| 35 | void Reset(); | 41 | void Reset(); |
| @@ -45,6 +51,7 @@ private: | |||
| 45 | int buffer_index; | 51 | int buffer_index; |
| 46 | VertexType buffer[2]; | 52 | VertexType buffer[2]; |
| 47 | bool strip_ready = false; | 53 | bool strip_ready = false; |
| 54 | bool winding = false; | ||
| 48 | }; | 55 | }; |
| 49 | 56 | ||
| 50 | } // namespace | 57 | } // namespace |
diff --git a/src/video_core/regs_framebuffer.h b/src/video_core/regs_framebuffer.h index a50bd4111..7b565f911 100644 --- a/src/video_core/regs_framebuffer.h +++ b/src/video_core/regs_framebuffer.h | |||
| @@ -256,10 +256,9 @@ struct FramebufferRegs { | |||
| 256 | return 3; | 256 | return 3; |
| 257 | case DepthFormat::D24S8: | 257 | case DepthFormat::D24S8: |
| 258 | return 4; | 258 | return 4; |
| 259 | default: | ||
| 260 | LOG_CRITICAL(HW_GPU, "Unknown depth format %u", format); | ||
| 261 | UNIMPLEMENTED(); | ||
| 262 | } | 259 | } |
| 260 | |||
| 261 | ASSERT_MSG(false, "Unknown depth format %u", format); | ||
| 263 | } | 262 | } |
| 264 | 263 | ||
| 265 | // Returns the number of bits per depth component of the specified depth format | 264 | // Returns the number of bits per depth component of the specified depth format |
| @@ -270,10 +269,9 @@ struct FramebufferRegs { | |||
| 270 | case DepthFormat::D24: | 269 | case DepthFormat::D24: |
| 271 | case DepthFormat::D24S8: | 270 | case DepthFormat::D24S8: |
| 272 | return 24; | 271 | return 24; |
| 273 | default: | ||
| 274 | LOG_CRITICAL(HW_GPU, "Unknown depth format %u", format); | ||
| 275 | UNIMPLEMENTED(); | ||
| 276 | } | 272 | } |
| 273 | |||
| 274 | ASSERT_MSG(false, "Unknown depth format %u", format); | ||
| 277 | } | 275 | } |
| 278 | 276 | ||
| 279 | INSERT_PADDING_WORDS(0x20); | 277 | INSERT_PADDING_WORDS(0x20); |
diff --git a/src/video_core/regs_pipeline.h b/src/video_core/regs_pipeline.h index 31c747d77..e78c3e331 100644 --- a/src/video_core/regs_pipeline.h +++ b/src/video_core/regs_pipeline.h | |||
| @@ -147,7 +147,15 @@ struct PipelineRegs { | |||
| 147 | // Number of vertices to render | 147 | // Number of vertices to render |
| 148 | u32 num_vertices; | 148 | u32 num_vertices; |
| 149 | 149 | ||
| 150 | INSERT_PADDING_WORDS(0x1); | 150 | enum class UseGS : u32 { |
| 151 | No = 0, | ||
| 152 | Yes = 2, | ||
| 153 | }; | ||
| 154 | |||
| 155 | union { | ||
| 156 | BitField<0, 2, UseGS> use_gs; | ||
| 157 | BitField<31, 1, u32> variable_primitive; | ||
| 158 | }; | ||
| 151 | 159 | ||
| 152 | // The index of the first vertex to render | 160 | // The index of the first vertex to render |
| 153 | u32 vertex_offset; | 161 | u32 vertex_offset; |
| @@ -202,7 +210,14 @@ struct PipelineRegs { | |||
| 202 | /// Number of input attributes to the vertex shader minus 1 | 210 | /// Number of input attributes to the vertex shader minus 1 |
| 203 | BitField<0, 4, u32> max_input_attrib_index; | 211 | BitField<0, 4, u32> max_input_attrib_index; |
| 204 | 212 | ||
| 205 | INSERT_PADDING_WORDS(2); | 213 | INSERT_PADDING_WORDS(1); |
| 214 | |||
| 215 | // The shader unit 3, which can be used for both vertex and geometry shader, gets its | ||
| 216 | // configuration depending on this register. If this is not set, unit 3 will share some | ||
| 217 | // configuration with other units. It is known that program code and swizzle pattern uploaded | ||
| 218 | // via regs.vs will be also uploaded to unit 3 if this is not set. Although very likely, it is | ||
| 219 | // still unclear whether uniforms and other configuration can be also shared. | ||
| 220 | BitField<0, 1, u32> gs_unit_exclusive_configuration; | ||
| 206 | 221 | ||
| 207 | enum class GPUMode : u32 { | 222 | enum class GPUMode : u32 { |
| 208 | Drawing = 0, | 223 | Drawing = 0, |
| @@ -211,7 +226,29 @@ struct PipelineRegs { | |||
| 211 | 226 | ||
| 212 | GPUMode gpu_mode; | 227 | GPUMode gpu_mode; |
| 213 | 228 | ||
| 214 | INSERT_PADDING_WORDS(0x18); | 229 | INSERT_PADDING_WORDS(0x4); |
| 230 | BitField<0, 4, u32> vs_outmap_total_minus_1_a; | ||
| 231 | INSERT_PADDING_WORDS(0x6); | ||
| 232 | BitField<0, 4, u32> vs_outmap_total_minus_1_b; | ||
| 233 | |||
| 234 | enum class GSMode : u32 { | ||
| 235 | Point = 0, | ||
| 236 | VariablePrimitive = 1, | ||
| 237 | FixedPrimitive = 2, | ||
| 238 | }; | ||
| 239 | |||
| 240 | union { | ||
| 241 | BitField<0, 8, GSMode> mode; | ||
| 242 | BitField<8, 4, u32> fixed_vertex_num_minus_1; | ||
| 243 | BitField<12, 4, u32> stride_minus_1; | ||
| 244 | BitField<16, 4, u32> start_index; | ||
| 245 | } gs_config; | ||
| 246 | |||
| 247 | INSERT_PADDING_WORDS(0x1); | ||
| 248 | |||
| 249 | u32 variable_vertex_main_num_minus_1; | ||
| 250 | |||
| 251 | INSERT_PADDING_WORDS(0x9); | ||
| 215 | 252 | ||
| 216 | enum class TriangleTopology : u32 { | 253 | enum class TriangleTopology : u32 { |
| 217 | List = 0, | 254 | List = 0, |
diff --git a/src/video_core/regs_rasterizer.h b/src/video_core/regs_rasterizer.h index 2874fd127..4fef00d76 100644 --- a/src/video_core/regs_rasterizer.h +++ b/src/video_core/regs_rasterizer.h | |||
| @@ -5,10 +5,10 @@ | |||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <array> | 7 | #include <array> |
| 8 | |||
| 9 | #include "common/bit_field.h" | 8 | #include "common/bit_field.h" |
| 10 | #include "common/common_funcs.h" | 9 | #include "common/common_funcs.h" |
| 11 | #include "common/common_types.h" | 10 | #include "common/common_types.h" |
| 11 | #include "video_core/pica_types.h" | ||
| 12 | 12 | ||
| 13 | namespace Pica { | 13 | namespace Pica { |
| 14 | 14 | ||
| @@ -31,7 +31,17 @@ struct RasterizerRegs { | |||
| 31 | 31 | ||
| 32 | BitField<0, 24, u32> viewport_size_y; | 32 | BitField<0, 24, u32> viewport_size_y; |
| 33 | 33 | ||
| 34 | INSERT_PADDING_WORDS(0x9); | 34 | INSERT_PADDING_WORDS(0x3); |
| 35 | |||
| 36 | BitField<0, 1, u32> clip_enable; | ||
| 37 | BitField<0, 24, u32> clip_coef[4]; // float24 | ||
| 38 | |||
| 39 | Math::Vec4<float24> GetClipCoef() const { | ||
| 40 | return {float24::FromRaw(clip_coef[0]), float24::FromRaw(clip_coef[1]), | ||
| 41 | float24::FromRaw(clip_coef[2]), float24::FromRaw(clip_coef[3])}; | ||
| 42 | } | ||
| 43 | |||
| 44 | INSERT_PADDING_WORDS(0x1); | ||
| 35 | 45 | ||
| 36 | BitField<0, 24, u32> viewport_depth_range; // float24 | 46 | BitField<0, 24, u32> viewport_depth_range; // float24 |
| 37 | BitField<0, 24, u32> viewport_depth_near_plane; // float24 | 47 | BitField<0, 24, u32> viewport_depth_near_plane; // float24 |
diff --git a/src/video_core/regs_shader.h b/src/video_core/regs_shader.h index ddb1ee451..c15d4d162 100644 --- a/src/video_core/regs_shader.h +++ b/src/video_core/regs_shader.h | |||
| @@ -24,9 +24,16 @@ struct ShaderRegs { | |||
| 24 | 24 | ||
| 25 | INSERT_PADDING_WORDS(0x4); | 25 | INSERT_PADDING_WORDS(0x4); |
| 26 | 26 | ||
| 27 | enum ShaderMode { | ||
| 28 | GS = 0x08, | ||
| 29 | VS = 0xA0, | ||
| 30 | }; | ||
| 31 | |||
| 27 | union { | 32 | union { |
| 28 | // Number of input attributes to shader unit - 1 | 33 | // Number of input attributes to shader unit - 1 |
| 29 | BitField<0, 4, u32> max_input_attribute_index; | 34 | BitField<0, 4, u32> max_input_attribute_index; |
| 35 | BitField<8, 8, u32> input_to_uniform; | ||
| 36 | BitField<24, 8, ShaderMode> shader_mode; | ||
| 30 | }; | 37 | }; |
| 31 | 38 | ||
| 32 | // Offset to shader program entry point (in words) | 39 | // Offset to shader program entry point (in words) |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 1c6c15a58..7e09e4712 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp | |||
| @@ -28,6 +28,9 @@ MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(100, 100, 255)); | |||
| 28 | MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Mgmt", MP_RGB(100, 255, 100)); | 28 | MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Mgmt", MP_RGB(100, 255, 100)); |
| 29 | 29 | ||
| 30 | RasterizerOpenGL::RasterizerOpenGL() : shader_dirty(true) { | 30 | RasterizerOpenGL::RasterizerOpenGL() : shader_dirty(true) { |
| 31 | // Clipping plane 0 is always enabled for PICA fixed clip plane z <= 0 | ||
| 32 | state.clip_distance[0] = true; | ||
| 33 | |||
| 31 | // Create sampler objects | 34 | // Create sampler objects |
| 32 | for (size_t i = 0; i < texture_samplers.size(); ++i) { | 35 | for (size_t i = 0; i < texture_samplers.size(); ++i) { |
| 33 | texture_samplers[i].Create(); | 36 | texture_samplers[i].Create(); |
| @@ -166,6 +169,8 @@ RasterizerOpenGL::RasterizerOpenGL() : shader_dirty(true) { | |||
| 166 | glTexBuffer(GL_TEXTURE_BUFFER, GL_RGBA32F, proctex_diff_lut_buffer.handle); | 169 | glTexBuffer(GL_TEXTURE_BUFFER, GL_RGBA32F, proctex_diff_lut_buffer.handle); |
| 167 | 170 | ||
| 168 | // Sync fixed function OpenGL state | 171 | // Sync fixed function OpenGL state |
| 172 | SyncClipEnabled(); | ||
| 173 | SyncClipCoef(); | ||
| 169 | SyncCullMode(); | 174 | SyncCullMode(); |
| 170 | SyncBlendEnabled(); | 175 | SyncBlendEnabled(); |
| 171 | SyncBlendFuncs(); | 176 | SyncBlendFuncs(); |
| @@ -232,13 +237,24 @@ void RasterizerOpenGL::DrawTriangles() { | |||
| 232 | 237 | ||
| 233 | glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, | 238 | glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, |
| 234 | color_surface != nullptr ? color_surface->texture.handle : 0, 0); | 239 | color_surface != nullptr ? color_surface->texture.handle : 0, 0); |
| 235 | glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, | 240 | if (depth_surface != nullptr) { |
| 236 | depth_surface != nullptr ? depth_surface->texture.handle : 0, 0); | 241 | if (regs.framebuffer.framebuffer.depth_format == |
| 237 | bool has_stencil = | 242 | Pica::FramebufferRegs::DepthFormat::D24S8) { |
| 238 | regs.framebuffer.framebuffer.depth_format == Pica::FramebufferRegs::DepthFormat::D24S8; | 243 | // attach both depth and stencil |
| 239 | glFramebufferTexture2D( | 244 | glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, |
| 240 | GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, | 245 | depth_surface->texture.handle, 0); |
| 241 | (has_stencil && depth_surface != nullptr) ? depth_surface->texture.handle : 0, 0); | 246 | } else { |
| 247 | // attach depth | ||
| 248 | glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, | ||
| 249 | depth_surface->texture.handle, 0); | ||
| 250 | // clear stencil attachment | ||
| 251 | glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); | ||
| 252 | } | ||
| 253 | } else { | ||
| 254 | // clear both depth and stencil attachment | ||
| 255 | glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, | ||
| 256 | 0); | ||
| 257 | } | ||
| 242 | 258 | ||
| 243 | // Sync the viewport | 259 | // Sync the viewport |
| 244 | // These registers hold half-width and half-height, so must be multiplied by 2 | 260 | // These registers hold half-width and half-height, so must be multiplied by 2 |
| @@ -398,6 +414,18 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) { | |||
| 398 | SyncCullMode(); | 414 | SyncCullMode(); |
| 399 | break; | 415 | break; |
| 400 | 416 | ||
| 417 | // Clipping plane | ||
| 418 | case PICA_REG_INDEX(rasterizer.clip_enable): | ||
| 419 | SyncClipEnabled(); | ||
| 420 | break; | ||
| 421 | |||
| 422 | case PICA_REG_INDEX_WORKAROUND(rasterizer.clip_coef[0], 0x48): | ||
| 423 | case PICA_REG_INDEX_WORKAROUND(rasterizer.clip_coef[1], 0x49): | ||
| 424 | case PICA_REG_INDEX_WORKAROUND(rasterizer.clip_coef[2], 0x4a): | ||
| 425 | case PICA_REG_INDEX_WORKAROUND(rasterizer.clip_coef[3], 0x4b): | ||
| 426 | SyncClipCoef(); | ||
| 427 | break; | ||
| 428 | |||
| 401 | // Depth modifiers | 429 | // Depth modifiers |
| 402 | case PICA_REG_INDEX(rasterizer.viewport_depth_range): | 430 | case PICA_REG_INDEX(rasterizer.viewport_depth_range): |
| 403 | SyncDepthScale(); | 431 | SyncDepthScale(); |
| @@ -1277,6 +1305,20 @@ void RasterizerOpenGL::SetShader() { | |||
| 1277 | } | 1305 | } |
| 1278 | } | 1306 | } |
| 1279 | 1307 | ||
| 1308 | void RasterizerOpenGL::SyncClipEnabled() { | ||
| 1309 | state.clip_distance[1] = Pica::g_state.regs.rasterizer.clip_enable != 0; | ||
| 1310 | } | ||
| 1311 | |||
| 1312 | void RasterizerOpenGL::SyncClipCoef() { | ||
| 1313 | const auto raw_clip_coef = Pica::g_state.regs.rasterizer.GetClipCoef(); | ||
| 1314 | const GLvec4 new_clip_coef = {raw_clip_coef.x.ToFloat32(), raw_clip_coef.y.ToFloat32(), | ||
| 1315 | raw_clip_coef.z.ToFloat32(), raw_clip_coef.w.ToFloat32()}; | ||
| 1316 | if (new_clip_coef != uniform_block_data.data.clip_coef) { | ||
| 1317 | uniform_block_data.data.clip_coef = new_clip_coef; | ||
| 1318 | uniform_block_data.dirty = true; | ||
| 1319 | } | ||
| 1320 | } | ||
| 1321 | |||
| 1280 | void RasterizerOpenGL::SyncCullMode() { | 1322 | void RasterizerOpenGL::SyncCullMode() { |
| 1281 | const auto& regs = Pica::g_state.regs; | 1323 | const auto& regs = Pica::g_state.regs; |
| 1282 | 1324 | ||
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 78e218efe..46c62961c 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h | |||
| @@ -151,14 +151,21 @@ private: | |||
| 151 | LightSrc light_src[8]; | 151 | LightSrc light_src[8]; |
| 152 | alignas(16) GLvec4 const_color[6]; // A vec4 color for each of the six tev stages | 152 | alignas(16) GLvec4 const_color[6]; // A vec4 color for each of the six tev stages |
| 153 | alignas(16) GLvec4 tev_combiner_buffer_color; | 153 | alignas(16) GLvec4 tev_combiner_buffer_color; |
| 154 | alignas(16) GLvec4 clip_coef; | ||
| 154 | }; | 155 | }; |
| 155 | 156 | ||
| 156 | static_assert( | 157 | static_assert( |
| 157 | sizeof(UniformData) == 0x460, | 158 | sizeof(UniformData) == 0x470, |
| 158 | "The size of the UniformData structure has changed, update the structure in the shader"); | 159 | "The size of the UniformData structure has changed, update the structure in the shader"); |
| 159 | static_assert(sizeof(UniformData) < 16384, | 160 | static_assert(sizeof(UniformData) < 16384, |
| 160 | "UniformData structure must be less than 16kb as per the OpenGL spec"); | 161 | "UniformData structure must be less than 16kb as per the OpenGL spec"); |
| 161 | 162 | ||
| 163 | /// Syncs the clip enabled status to match the PICA register | ||
| 164 | void SyncClipEnabled(); | ||
| 165 | |||
| 166 | /// Syncs the clip coefficients to match the PICA register | ||
| 167 | void SyncClipCoef(); | ||
| 168 | |||
| 162 | /// Sets the OpenGL shader in accordance with the current PICA register state | 169 | /// Sets the OpenGL shader in accordance with the current PICA register state |
| 163 | void SetShader(); | 170 | void SetShader(); |
| 164 | 171 | ||
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index bb192affd..9fe183944 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp | |||
| @@ -8,6 +8,7 @@ | |||
| 8 | #include "common/assert.h" | 8 | #include "common/assert.h" |
| 9 | #include "common/bit_field.h" | 9 | #include "common/bit_field.h" |
| 10 | #include "common/logging/log.h" | 10 | #include "common/logging/log.h" |
| 11 | #include "core/core.h" | ||
| 11 | #include "video_core/regs_framebuffer.h" | 12 | #include "video_core/regs_framebuffer.h" |
| 12 | #include "video_core/regs_lighting.h" | 13 | #include "video_core/regs_lighting.h" |
| 13 | #include "video_core/regs_rasterizer.h" | 14 | #include "video_core/regs_rasterizer.h" |
| @@ -24,6 +25,42 @@ using TevStageConfig = TexturingRegs::TevStageConfig; | |||
| 24 | 25 | ||
| 25 | namespace GLShader { | 26 | namespace GLShader { |
| 26 | 27 | ||
| 28 | static const std::string UniformBlockDef = R"( | ||
| 29 | #define NUM_TEV_STAGES 6 | ||
| 30 | #define NUM_LIGHTS 8 | ||
| 31 | |||
| 32 | struct LightSrc { | ||
| 33 | vec3 specular_0; | ||
| 34 | vec3 specular_1; | ||
| 35 | vec3 diffuse; | ||
| 36 | vec3 ambient; | ||
| 37 | vec3 position; | ||
| 38 | vec3 spot_direction; | ||
| 39 | float dist_atten_bias; | ||
| 40 | float dist_atten_scale; | ||
| 41 | }; | ||
| 42 | |||
| 43 | layout (std140) uniform shader_data { | ||
| 44 | vec2 framebuffer_scale; | ||
| 45 | int alphatest_ref; | ||
| 46 | float depth_scale; | ||
| 47 | float depth_offset; | ||
| 48 | int scissor_x1; | ||
| 49 | int scissor_y1; | ||
| 50 | int scissor_x2; | ||
| 51 | int scissor_y2; | ||
| 52 | vec3 fog_color; | ||
| 53 | vec2 proctex_noise_f; | ||
| 54 | vec2 proctex_noise_a; | ||
| 55 | vec2 proctex_noise_p; | ||
| 56 | vec3 lighting_global_ambient; | ||
| 57 | LightSrc light_src[NUM_LIGHTS]; | ||
| 58 | vec4 const_color[NUM_TEV_STAGES]; | ||
| 59 | vec4 tev_combiner_buffer_color; | ||
| 60 | vec4 clip_coef; | ||
| 61 | }; | ||
| 62 | )"; | ||
| 63 | |||
| 27 | PicaShaderConfig PicaShaderConfig::BuildFromRegs(const Pica::Regs& regs) { | 64 | PicaShaderConfig PicaShaderConfig::BuildFromRegs(const Pica::Regs& regs) { |
| 28 | PicaShaderConfig res; | 65 | PicaShaderConfig res; |
| 29 | 66 | ||
| @@ -525,11 +562,12 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) { | |||
| 525 | "float geo_factor = 1.0;\n"; | 562 | "float geo_factor = 1.0;\n"; |
| 526 | 563 | ||
| 527 | // Compute fragment normals and tangents | 564 | // Compute fragment normals and tangents |
| 528 | const std::string pertubation = | 565 | auto Perturbation = [&]() { |
| 529 | "2.0 * (" + SampleTexture(config, lighting.bump_selector) + ").rgb - 1.0"; | 566 | return "2.0 * (" + SampleTexture(config, lighting.bump_selector) + ").rgb - 1.0"; |
| 567 | }; | ||
| 530 | if (lighting.bump_mode == LightingRegs::LightingBumpMode::NormalMap) { | 568 | if (lighting.bump_mode == LightingRegs::LightingBumpMode::NormalMap) { |
| 531 | // Bump mapping is enabled using a normal map | 569 | // Bump mapping is enabled using a normal map |
| 532 | out += "vec3 surface_normal = " + pertubation + ";\n"; | 570 | out += "vec3 surface_normal = " + Perturbation() + ";\n"; |
| 533 | 571 | ||
| 534 | // Recompute Z-component of perturbation if 'renorm' is enabled, this provides a higher | 572 | // Recompute Z-component of perturbation if 'renorm' is enabled, this provides a higher |
| 535 | // precision result | 573 | // precision result |
| @@ -543,7 +581,7 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) { | |||
| 543 | out += "vec3 surface_tangent = vec3(1.0, 0.0, 0.0);\n"; | 581 | out += "vec3 surface_tangent = vec3(1.0, 0.0, 0.0);\n"; |
| 544 | } else if (lighting.bump_mode == LightingRegs::LightingBumpMode::TangentMap) { | 582 | } else if (lighting.bump_mode == LightingRegs::LightingBumpMode::TangentMap) { |
| 545 | // Bump mapping is enabled using a tangent map | 583 | // Bump mapping is enabled using a tangent map |
| 546 | out += "vec3 surface_tangent = " + pertubation + ";\n"; | 584 | out += "vec3 surface_tangent = " + Perturbation() + ";\n"; |
| 547 | // Mathematically, recomputing Z-component of the tangent vector won't affect the relevant | 585 | // Mathematically, recomputing Z-component of the tangent vector won't affect the relevant |
| 548 | // computation below, which is also confirmed on 3DS. So we don't bother recomputing here | 586 | // computation below, which is also confirmed on 3DS. So we don't bother recomputing here |
| 549 | // even if 'renorm' is enabled. | 587 | // even if 'renorm' is enabled. |
| @@ -593,8 +631,8 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) { | |||
| 593 | // Note: even if the normal vector is modified by normal map, which is not the | 631 | // Note: even if the normal vector is modified by normal map, which is not the |
| 594 | // normal of the tangent plane anymore, the half angle vector is still projected | 632 | // normal of the tangent plane anymore, the half angle vector is still projected |
| 595 | // using the modified normal vector. | 633 | // using the modified normal vector. |
| 596 | std::string half_angle_proj = "normalize(half_vector) - normal / dot(normal, " | 634 | std::string half_angle_proj = |
| 597 | "normal) * dot(normal, normalize(half_vector))"; | 635 | "normalize(half_vector) - normal * dot(normal, normalize(half_vector))"; |
| 598 | // Note: the half angle vector projection is confirmed not normalized before the dot | 636 | // Note: the half angle vector projection is confirmed not normalized before the dot |
| 599 | // product. The result is in fact not cos(phi) as the name suggested. | 637 | // product. The result is in fact not cos(phi) as the name suggested. |
| 600 | index = "dot(" + half_angle_proj + ", tangent)"; | 638 | index = "dot(" + half_angle_proj + ", tangent)"; |
| @@ -749,7 +787,8 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) { | |||
| 749 | } | 787 | } |
| 750 | 788 | ||
| 751 | // Fresnel | 789 | // Fresnel |
| 752 | if (lighting.lut_fr.enable && | 790 | // Note: only the last entry in the light slots applies the Fresnel factor |
| 791 | if (light_index == lighting.src_num - 1 && lighting.lut_fr.enable && | ||
| 753 | LightingRegs::IsLightingSamplerSupported(lighting.config, | 792 | LightingRegs::IsLightingSamplerSupported(lighting.config, |
| 754 | LightingRegs::LightingSampler::Fresnel)) { | 793 | LightingRegs::LightingSampler::Fresnel)) { |
| 755 | // Lookup fresnel LUT value | 794 | // Lookup fresnel LUT value |
| @@ -758,17 +797,17 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) { | |||
| 758 | lighting.lut_fr.type, lighting.lut_fr.abs_input); | 797 | lighting.lut_fr.type, lighting.lut_fr.abs_input); |
| 759 | value = "(" + std::to_string(lighting.lut_fr.scale) + " * " + value + ")"; | 798 | value = "(" + std::to_string(lighting.lut_fr.scale) + " * " + value + ")"; |
| 760 | 799 | ||
| 761 | // Enabled for difffuse lighting alpha component | 800 | // Enabled for diffuse lighting alpha component |
| 762 | if (lighting.fresnel_selector == LightingRegs::LightingFresnelSelector::PrimaryAlpha || | 801 | if (lighting.fresnel_selector == LightingRegs::LightingFresnelSelector::PrimaryAlpha || |
| 763 | lighting.fresnel_selector == LightingRegs::LightingFresnelSelector::Both) { | 802 | lighting.fresnel_selector == LightingRegs::LightingFresnelSelector::Both) { |
| 764 | out += "diffuse_sum.a *= " + value + ";\n"; | 803 | out += "diffuse_sum.a = " + value + ";\n"; |
| 765 | } | 804 | } |
| 766 | 805 | ||
| 767 | // Enabled for the specular lighting alpha component | 806 | // Enabled for the specular lighting alpha component |
| 768 | if (lighting.fresnel_selector == | 807 | if (lighting.fresnel_selector == |
| 769 | LightingRegs::LightingFresnelSelector::SecondaryAlpha || | 808 | LightingRegs::LightingFresnelSelector::SecondaryAlpha || |
| 770 | lighting.fresnel_selector == LightingRegs::LightingFresnelSelector::Both) { | 809 | lighting.fresnel_selector == LightingRegs::LightingFresnelSelector::Both) { |
| 771 | out += "specular_sum.a *= " + value + ";\n"; | 810 | out += "specular_sum.a = " + value + ";\n"; |
| 772 | } | 811 | } |
| 773 | } | 812 | } |
| 774 | 813 | ||
| @@ -1007,8 +1046,6 @@ std::string GenerateFragmentShader(const PicaShaderConfig& config) { | |||
| 1007 | 1046 | ||
| 1008 | std::string out = R"( | 1047 | std::string out = R"( |
| 1009 | #version 330 core | 1048 | #version 330 core |
| 1010 | #define NUM_TEV_STAGES 6 | ||
| 1011 | #define NUM_LIGHTS 8 | ||
| 1012 | 1049 | ||
| 1013 | in vec4 primary_color; | 1050 | in vec4 primary_color; |
| 1014 | in vec2 texcoord[3]; | 1051 | in vec2 texcoord[3]; |
| @@ -1020,36 +1057,6 @@ in vec4 gl_FragCoord; | |||
| 1020 | 1057 | ||
| 1021 | out vec4 color; | 1058 | out vec4 color; |
| 1022 | 1059 | ||
| 1023 | struct LightSrc { | ||
| 1024 | vec3 specular_0; | ||
| 1025 | vec3 specular_1; | ||
| 1026 | vec3 diffuse; | ||
| 1027 | vec3 ambient; | ||
| 1028 | vec3 position; | ||
| 1029 | vec3 spot_direction; | ||
| 1030 | float dist_atten_bias; | ||
| 1031 | float dist_atten_scale; | ||
| 1032 | }; | ||
| 1033 | |||
| 1034 | layout (std140) uniform shader_data { | ||
| 1035 | vec2 framebuffer_scale; | ||
| 1036 | int alphatest_ref; | ||
| 1037 | float depth_scale; | ||
| 1038 | float depth_offset; | ||
| 1039 | int scissor_x1; | ||
| 1040 | int scissor_y1; | ||
| 1041 | int scissor_x2; | ||
| 1042 | int scissor_y2; | ||
| 1043 | vec3 fog_color; | ||
| 1044 | vec2 proctex_noise_f; | ||
| 1045 | vec2 proctex_noise_a; | ||
| 1046 | vec2 proctex_noise_p; | ||
| 1047 | vec3 lighting_global_ambient; | ||
| 1048 | LightSrc light_src[NUM_LIGHTS]; | ||
| 1049 | vec4 const_color[NUM_TEV_STAGES]; | ||
| 1050 | vec4 tev_combiner_buffer_color; | ||
| 1051 | }; | ||
| 1052 | |||
| 1053 | uniform sampler2D tex[3]; | 1060 | uniform sampler2D tex[3]; |
| 1054 | uniform samplerBuffer lighting_lut; | 1061 | uniform samplerBuffer lighting_lut; |
| 1055 | uniform samplerBuffer fog_lut; | 1062 | uniform samplerBuffer fog_lut; |
| @@ -1058,7 +1065,11 @@ uniform samplerBuffer proctex_color_map; | |||
| 1058 | uniform samplerBuffer proctex_alpha_map; | 1065 | uniform samplerBuffer proctex_alpha_map; |
| 1059 | uniform samplerBuffer proctex_lut; | 1066 | uniform samplerBuffer proctex_lut; |
| 1060 | uniform samplerBuffer proctex_diff_lut; | 1067 | uniform samplerBuffer proctex_diff_lut; |
| 1068 | )"; | ||
| 1069 | |||
| 1070 | out += UniformBlockDef; | ||
| 1061 | 1071 | ||
| 1072 | out += R"( | ||
| 1062 | // Rotate the vector v by the quaternion q | 1073 | // Rotate the vector v by the quaternion q |
| 1063 | vec3 quaternion_rotate(vec4 q, vec3 v) { | 1074 | vec3 quaternion_rotate(vec4 q, vec3 v) { |
| 1064 | return v + 2.0 * cross(q.xyz, cross(q.xyz, v) + q.w * v); | 1075 | return v + 2.0 * cross(q.xyz, cross(q.xyz, v) + q.w * v); |
| @@ -1111,7 +1122,10 @@ vec4 secondary_fragment_color = vec4(0.0); | |||
| 1111 | "gl_FragCoord.y < scissor_y2)) discard;\n"; | 1122 | "gl_FragCoord.y < scissor_y2)) discard;\n"; |
| 1112 | } | 1123 | } |
| 1113 | 1124 | ||
| 1114 | out += "float z_over_w = 1.0 - gl_FragCoord.z * 2.0;\n"; | 1125 | // After perspective divide, OpenGL transform z_over_w from [-1, 1] to [near, far]. Here we use |
| 1126 | // default near = 0 and far = 1, and undo the transformation to get the original z_over_w, then | ||
| 1127 | // do our own transformation according to PICA specification. | ||
| 1128 | out += "float z_over_w = 2.0 * gl_FragCoord.z - 1.0;\n"; | ||
| 1115 | out += "float depth = z_over_w * depth_scale + depth_offset;\n"; | 1129 | out += "float depth = z_over_w * depth_scale + depth_offset;\n"; |
| 1116 | if (state.depthmap_enable == RasterizerRegs::DepthBuffering::WBuffering) { | 1130 | if (state.depthmap_enable == RasterizerRegs::DepthBuffering::WBuffering) { |
| 1117 | out += "depth /= gl_FragCoord.w;\n"; | 1131 | out += "depth /= gl_FragCoord.w;\n"; |
| @@ -1151,6 +1165,11 @@ vec4 secondary_fragment_color = vec4(0.0); | |||
| 1151 | 1165 | ||
| 1152 | // Blend the fog | 1166 | // Blend the fog |
| 1153 | out += "last_tex_env_out.rgb = mix(fog_color.rgb, last_tex_env_out.rgb, fog_factor);\n"; | 1167 | out += "last_tex_env_out.rgb = mix(fog_color.rgb, last_tex_env_out.rgb, fog_factor);\n"; |
| 1168 | } else if (state.fog_mode == TexturingRegs::FogMode::Gas) { | ||
| 1169 | Core::Telemetry().AddField(Telemetry::FieldType::Session, "VideoCore_Pica_UseGasMode", | ||
| 1170 | true); | ||
| 1171 | LOG_CRITICAL(Render_OpenGL, "Unimplemented gas mode"); | ||
| 1172 | UNIMPLEMENTED(); | ||
| 1154 | } | 1173 | } |
| 1155 | 1174 | ||
| 1156 | out += "gl_FragDepth = depth;\n"; | 1175 | out += "gl_FragDepth = depth;\n"; |
| @@ -1186,6 +1205,12 @@ out float texcoord0_w; | |||
| 1186 | out vec4 normquat; | 1205 | out vec4 normquat; |
| 1187 | out vec3 view; | 1206 | out vec3 view; |
| 1188 | 1207 | ||
| 1208 | )"; | ||
| 1209 | |||
| 1210 | out += UniformBlockDef; | ||
| 1211 | |||
| 1212 | out += R"( | ||
| 1213 | |||
| 1189 | void main() { | 1214 | void main() { |
| 1190 | primary_color = vert_color; | 1215 | primary_color = vert_color; |
| 1191 | texcoord[0] = vert_texcoord0; | 1216 | texcoord[0] = vert_texcoord0; |
| @@ -1194,7 +1219,9 @@ void main() { | |||
| 1194 | texcoord0_w = vert_texcoord0_w; | 1219 | texcoord0_w = vert_texcoord0_w; |
| 1195 | normquat = vert_normquat; | 1220 | normquat = vert_normquat; |
| 1196 | view = vert_view; | 1221 | view = vert_view; |
| 1197 | gl_Position = vec4(vert_position.x, vert_position.y, -vert_position.z, vert_position.w); | 1222 | gl_Position = vert_position; |
| 1223 | gl_ClipDistance[0] = -vert_position.z; // fixed PICA clipping plane z <= 0 | ||
| 1224 | gl_ClipDistance[1] = dot(clip_coef, vert_position); | ||
| 1198 | } | 1225 | } |
| 1199 | )"; | 1226 | )"; |
| 1200 | 1227 | ||
diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp index bc9d34b84..5770ae08f 100644 --- a/src/video_core/renderer_opengl/gl_state.cpp +++ b/src/video_core/renderer_opengl/gl_state.cpp | |||
| @@ -68,6 +68,8 @@ OpenGLState::OpenGLState() { | |||
| 68 | draw.vertex_buffer = 0; | 68 | draw.vertex_buffer = 0; |
| 69 | draw.uniform_buffer = 0; | 69 | draw.uniform_buffer = 0; |
| 70 | draw.shader_program = 0; | 70 | draw.shader_program = 0; |
| 71 | |||
| 72 | clip_distance = {}; | ||
| 71 | } | 73 | } |
| 72 | 74 | ||
| 73 | void OpenGLState::Apply() const { | 75 | void OpenGLState::Apply() const { |
| @@ -261,6 +263,17 @@ void OpenGLState::Apply() const { | |||
| 261 | glUseProgram(draw.shader_program); | 263 | glUseProgram(draw.shader_program); |
| 262 | } | 264 | } |
| 263 | 265 | ||
| 266 | // Clip distance | ||
| 267 | for (size_t i = 0; i < clip_distance.size(); ++i) { | ||
| 268 | if (clip_distance[i] != cur_state.clip_distance[i]) { | ||
| 269 | if (clip_distance[i]) { | ||
| 270 | glEnable(GL_CLIP_DISTANCE0 + static_cast<GLenum>(i)); | ||
| 271 | } else { | ||
| 272 | glDisable(GL_CLIP_DISTANCE0 + static_cast<GLenum>(i)); | ||
| 273 | } | ||
| 274 | } | ||
| 275 | } | ||
| 276 | |||
| 264 | cur_state = *this; | 277 | cur_state = *this; |
| 265 | } | 278 | } |
| 266 | 279 | ||
diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h index 745a74479..437fe34c4 100644 --- a/src/video_core/renderer_opengl/gl_state.h +++ b/src/video_core/renderer_opengl/gl_state.h | |||
| @@ -4,6 +4,7 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <array> | ||
| 7 | #include <glad/glad.h> | 8 | #include <glad/glad.h> |
| 8 | 9 | ||
| 9 | namespace TextureUnits { | 10 | namespace TextureUnits { |
| @@ -123,6 +124,8 @@ public: | |||
| 123 | GLuint shader_program; // GL_CURRENT_PROGRAM | 124 | GLuint shader_program; // GL_CURRENT_PROGRAM |
| 124 | } draw; | 125 | } draw; |
| 125 | 126 | ||
| 127 | std::array<bool, 2> clip_distance; // GL_CLIP_DISTANCE | ||
| 128 | |||
| 126 | OpenGLState(); | 129 | OpenGLState(); |
| 127 | 130 | ||
| 128 | /// Get the currently active OpenGL state | 131 | /// Get the currently active OpenGL state |
diff --git a/src/video_core/shader/shader.cpp b/src/video_core/shader/shader.cpp index 67ed19ba8..2857d2829 100644 --- a/src/video_core/shader/shader.cpp +++ b/src/video_core/shader/shader.cpp | |||
| @@ -21,7 +21,8 @@ namespace Pica { | |||
| 21 | 21 | ||
| 22 | namespace Shader { | 22 | namespace Shader { |
| 23 | 23 | ||
| 24 | OutputVertex OutputVertex::FromAttributeBuffer(const RasterizerRegs& regs, AttributeBuffer& input) { | 24 | OutputVertex OutputVertex::FromAttributeBuffer(const RasterizerRegs& regs, |
| 25 | const AttributeBuffer& input) { | ||
| 25 | // Setup output data | 26 | // Setup output data |
| 26 | union { | 27 | union { |
| 27 | OutputVertex ret{}; | 28 | OutputVertex ret{}; |
| @@ -51,7 +52,8 @@ OutputVertex OutputVertex::FromAttributeBuffer(const RasterizerRegs& regs, Attri | |||
| 51 | // The hardware takes the absolute and saturates vertex colors like this, *before* doing | 52 | // The hardware takes the absolute and saturates vertex colors like this, *before* doing |
| 52 | // interpolation | 53 | // interpolation |
| 53 | for (unsigned i = 0; i < 4; ++i) { | 54 | for (unsigned i = 0; i < 4; ++i) { |
| 54 | ret.color[i] = float24::FromFloat32(std::fmin(std::fabs(ret.color[i].ToFloat32()), 1.0f)); | 55 | float c = std::fabs(ret.color[i].ToFloat32()); |
| 56 | ret.color[i] = float24::FromFloat32(c < 1.0f ? c : 1.0f); | ||
| 55 | } | 57 | } |
| 56 | 58 | ||
| 57 | LOG_TRACE(HW_GPU, "Output vertex: pos(%.2f, %.2f, %.2f, %.2f), quat(%.2f, %.2f, %.2f, %.2f), " | 59 | LOG_TRACE(HW_GPU, "Output vertex: pos(%.2f, %.2f, %.2f, %.2f), quat(%.2f, %.2f, %.2f, %.2f), " |
| @@ -82,6 +84,44 @@ void UnitState::WriteOutput(const ShaderRegs& config, AttributeBuffer& output) { | |||
| 82 | } | 84 | } |
| 83 | } | 85 | } |
| 84 | 86 | ||
| 87 | UnitState::UnitState(GSEmitter* emitter) : emitter_ptr(emitter) {} | ||
| 88 | |||
| 89 | GSEmitter::GSEmitter() { | ||
| 90 | handlers = new Handlers; | ||
| 91 | } | ||
| 92 | |||
| 93 | GSEmitter::~GSEmitter() { | ||
| 94 | delete handlers; | ||
| 95 | } | ||
| 96 | |||
| 97 | void GSEmitter::Emit(Math::Vec4<float24> (&vertex)[16]) { | ||
| 98 | ASSERT(vertex_id < 3); | ||
| 99 | std::copy(std::begin(vertex), std::end(vertex), buffer[vertex_id].begin()); | ||
| 100 | if (prim_emit) { | ||
| 101 | if (winding) | ||
| 102 | handlers->winding_setter(); | ||
| 103 | for (size_t i = 0; i < buffer.size(); ++i) { | ||
| 104 | AttributeBuffer output; | ||
| 105 | unsigned int output_i = 0; | ||
| 106 | for (unsigned int reg : Common::BitSet<u32>(output_mask)) { | ||
| 107 | output.attr[output_i++] = buffer[i][reg]; | ||
| 108 | } | ||
| 109 | handlers->vertex_handler(output); | ||
| 110 | } | ||
| 111 | } | ||
| 112 | } | ||
| 113 | |||
| 114 | GSUnitState::GSUnitState() : UnitState(&emitter) {} | ||
| 115 | |||
| 116 | void GSUnitState::SetVertexHandler(VertexHandler vertex_handler, WindingSetter winding_setter) { | ||
| 117 | emitter.handlers->vertex_handler = std::move(vertex_handler); | ||
| 118 | emitter.handlers->winding_setter = std::move(winding_setter); | ||
| 119 | } | ||
| 120 | |||
| 121 | void GSUnitState::ConfigOutput(const ShaderRegs& config) { | ||
| 122 | emitter.output_mask = config.output_mask; | ||
| 123 | } | ||
| 124 | |||
| 85 | MICROPROFILE_DEFINE(GPU_Shader, "GPU", "Shader", MP_RGB(50, 50, 240)); | 125 | MICROPROFILE_DEFINE(GPU_Shader, "GPU", "Shader", MP_RGB(50, 50, 240)); |
| 86 | 126 | ||
| 87 | #ifdef ARCHITECTURE_x86_64 | 127 | #ifdef ARCHITECTURE_x86_64 |
diff --git a/src/video_core/shader/shader.h b/src/video_core/shader/shader.h index e156f6aef..a3789da01 100644 --- a/src/video_core/shader/shader.h +++ b/src/video_core/shader/shader.h | |||
| @@ -6,6 +6,7 @@ | |||
| 6 | 6 | ||
| 7 | #include <array> | 7 | #include <array> |
| 8 | #include <cstddef> | 8 | #include <cstddef> |
| 9 | #include <functional> | ||
| 9 | #include <type_traits> | 10 | #include <type_traits> |
| 10 | #include <nihstro/shader_bytecode.h> | 11 | #include <nihstro/shader_bytecode.h> |
| 11 | #include "common/assert.h" | 12 | #include "common/assert.h" |
| @@ -31,6 +32,12 @@ struct AttributeBuffer { | |||
| 31 | alignas(16) Math::Vec4<float24> attr[16]; | 32 | alignas(16) Math::Vec4<float24> attr[16]; |
| 32 | }; | 33 | }; |
| 33 | 34 | ||
| 35 | /// Handler type for receiving vertex outputs from vertex shader or geometry shader | ||
| 36 | using VertexHandler = std::function<void(const AttributeBuffer&)>; | ||
| 37 | |||
| 38 | /// Handler type for signaling to invert the vertex order of the next triangle | ||
| 39 | using WindingSetter = std::function<void()>; | ||
| 40 | |||
| 34 | struct OutputVertex { | 41 | struct OutputVertex { |
| 35 | Math::Vec4<float24> pos; | 42 | Math::Vec4<float24> pos; |
| 36 | Math::Vec4<float24> quat; | 43 | Math::Vec4<float24> quat; |
| @@ -43,7 +50,8 @@ struct OutputVertex { | |||
| 43 | INSERT_PADDING_WORDS(1); | 50 | INSERT_PADDING_WORDS(1); |
| 44 | Math::Vec2<float24> tc2; | 51 | Math::Vec2<float24> tc2; |
| 45 | 52 | ||
| 46 | static OutputVertex FromAttributeBuffer(const RasterizerRegs& regs, AttributeBuffer& output); | 53 | static OutputVertex FromAttributeBuffer(const RasterizerRegs& regs, |
| 54 | const AttributeBuffer& output); | ||
| 47 | }; | 55 | }; |
| 48 | #define ASSERT_POS(var, pos) \ | 56 | #define ASSERT_POS(var, pos) \ |
| 49 | static_assert(offsetof(OutputVertex, var) == pos * sizeof(float24), "Semantic at wrong " \ | 57 | static_assert(offsetof(OutputVertex, var) == pos * sizeof(float24), "Semantic at wrong " \ |
| @@ -61,12 +69,36 @@ static_assert(std::is_pod<OutputVertex>::value, "Structure is not POD"); | |||
| 61 | static_assert(sizeof(OutputVertex) == 24 * sizeof(float), "OutputVertex has invalid size"); | 69 | static_assert(sizeof(OutputVertex) == 24 * sizeof(float), "OutputVertex has invalid size"); |
| 62 | 70 | ||
| 63 | /** | 71 | /** |
| 72 | * This structure contains state information for primitive emitting in geometry shader. | ||
| 73 | */ | ||
| 74 | struct GSEmitter { | ||
| 75 | std::array<std::array<Math::Vec4<float24>, 16>, 3> buffer; | ||
| 76 | u8 vertex_id; | ||
| 77 | bool prim_emit; | ||
| 78 | bool winding; | ||
| 79 | u32 output_mask; | ||
| 80 | |||
| 81 | // Function objects are hidden behind a raw pointer to make the structure standard layout type, | ||
| 82 | // for JIT to use offsetof to access other members. | ||
| 83 | struct Handlers { | ||
| 84 | VertexHandler vertex_handler; | ||
| 85 | WindingSetter winding_setter; | ||
| 86 | } * handlers; | ||
| 87 | |||
| 88 | GSEmitter(); | ||
| 89 | ~GSEmitter(); | ||
| 90 | void Emit(Math::Vec4<float24> (&vertex)[16]); | ||
| 91 | }; | ||
| 92 | static_assert(std::is_standard_layout<GSEmitter>::value, "GSEmitter is not standard layout type"); | ||
| 93 | |||
| 94 | /** | ||
| 64 | * This structure contains the state information that needs to be unique for a shader unit. The 3DS | 95 | * This structure contains the state information that needs to be unique for a shader unit. The 3DS |
| 65 | * has four shader units that process shaders in parallel. At the present, Citra only implements a | 96 | * has four shader units that process shaders in parallel. At the present, Citra only implements a |
| 66 | * single shader unit that processes all shaders serially. Putting the state information in a struct | 97 | * single shader unit that processes all shaders serially. Putting the state information in a struct |
| 67 | * here will make it easier for us to parallelize the shader processing later. | 98 | * here will make it easier for us to parallelize the shader processing later. |
| 68 | */ | 99 | */ |
| 69 | struct UnitState { | 100 | struct UnitState { |
| 101 | explicit UnitState(GSEmitter* emitter = nullptr); | ||
| 70 | struct Registers { | 102 | struct Registers { |
| 71 | // The registers are accessed by the shader JIT using SSE instructions, and are therefore | 103 | // The registers are accessed by the shader JIT using SSE instructions, and are therefore |
| 72 | // required to be 16-byte aligned. | 104 | // required to be 16-byte aligned. |
| @@ -82,6 +114,8 @@ struct UnitState { | |||
| 82 | // TODO: How many bits do these actually have? | 114 | // TODO: How many bits do these actually have? |
| 83 | s32 address_registers[3]; | 115 | s32 address_registers[3]; |
| 84 | 116 | ||
| 117 | GSEmitter* emitter_ptr; | ||
| 118 | |||
| 85 | static size_t InputOffset(const SourceRegister& reg) { | 119 | static size_t InputOffset(const SourceRegister& reg) { |
| 86 | switch (reg.GetRegisterType()) { | 120 | switch (reg.GetRegisterType()) { |
| 87 | case RegisterType::Input: | 121 | case RegisterType::Input: |
| @@ -125,6 +159,19 @@ struct UnitState { | |||
| 125 | void WriteOutput(const ShaderRegs& config, AttributeBuffer& output); | 159 | void WriteOutput(const ShaderRegs& config, AttributeBuffer& output); |
| 126 | }; | 160 | }; |
| 127 | 161 | ||
| 162 | /** | ||
| 163 | * This is an extended shader unit state that represents the special unit that can run both vertex | ||
| 164 | * shader and geometry shader. It contains an additional primitive emitter and utilities for | ||
| 165 | * geometry shader. | ||
| 166 | */ | ||
| 167 | struct GSUnitState : public UnitState { | ||
| 168 | GSUnitState(); | ||
| 169 | void SetVertexHandler(VertexHandler vertex_handler, WindingSetter winding_setter); | ||
| 170 | void ConfigOutput(const ShaderRegs& config); | ||
| 171 | |||
| 172 | GSEmitter emitter; | ||
| 173 | }; | ||
| 174 | |||
| 128 | struct ShaderSetup { | 175 | struct ShaderSetup { |
| 129 | struct { | 176 | struct { |
| 130 | // The float uniforms are accessed by the shader JIT using SSE instructions, and are | 177 | // The float uniforms are accessed by the shader JIT using SSE instructions, and are |
diff --git a/src/video_core/shader/shader_interpreter.cpp b/src/video_core/shader/shader_interpreter.cpp index aa1cec81f..9d4da4904 100644 --- a/src/video_core/shader/shader_interpreter.cpp +++ b/src/video_core/shader/shader_interpreter.cpp | |||
| @@ -631,11 +631,27 @@ static void RunInterpreter(const ShaderSetup& setup, UnitState& state, DebugData | |||
| 631 | state.address_registers[2] = loop_param.y; | 631 | state.address_registers[2] = loop_param.y; |
| 632 | 632 | ||
| 633 | Record<DebugDataRecord::LOOP_INT_IN>(debug_data, iteration, loop_param); | 633 | Record<DebugDataRecord::LOOP_INT_IN>(debug_data, iteration, loop_param); |
| 634 | call(program_counter + 1, instr.flow_control.dest_offset - program_counter + 1, | 634 | call(program_counter + 1, instr.flow_control.dest_offset - program_counter, |
| 635 | instr.flow_control.dest_offset + 1, loop_param.x, loop_param.z); | 635 | instr.flow_control.dest_offset + 1, loop_param.x, loop_param.z); |
| 636 | break; | 636 | break; |
| 637 | } | 637 | } |
| 638 | 638 | ||
| 639 | case OpCode::Id::EMIT: { | ||
| 640 | GSEmitter* emitter = state.emitter_ptr; | ||
| 641 | ASSERT_MSG(emitter, "Execute EMIT on VS"); | ||
| 642 | emitter->Emit(state.registers.output); | ||
| 643 | break; | ||
| 644 | } | ||
| 645 | |||
| 646 | case OpCode::Id::SETEMIT: { | ||
| 647 | GSEmitter* emitter = state.emitter_ptr; | ||
| 648 | ASSERT_MSG(emitter, "Execute SETEMIT on VS"); | ||
| 649 | emitter->vertex_id = instr.setemit.vertex_id; | ||
| 650 | emitter->prim_emit = instr.setemit.prim_emit != 0; | ||
| 651 | emitter->winding = instr.setemit.winding != 0; | ||
| 652 | break; | ||
| 653 | } | ||
| 654 | |||
| 639 | default: | 655 | default: |
| 640 | LOG_ERROR(HW_GPU, "Unhandled instruction: 0x%02x (%s): 0x%08x", | 656 | LOG_ERROR(HW_GPU, "Unhandled instruction: 0x%02x (%s): 0x%08x", |
| 641 | (int)instr.opcode.Value().EffectiveOpCode(), | 657 | (int)instr.opcode.Value().EffectiveOpCode(), |
diff --git a/src/video_core/shader/shader_jit_x64_compiler.cpp b/src/video_core/shader/shader_jit_x64_compiler.cpp index 42a57aab1..1b31623bd 100644 --- a/src/video_core/shader/shader_jit_x64_compiler.cpp +++ b/src/video_core/shader/shader_jit_x64_compiler.cpp | |||
| @@ -75,8 +75,8 @@ const JitFunction instr_table[64] = { | |||
| 75 | &JitShader::Compile_IF, // ifu | 75 | &JitShader::Compile_IF, // ifu |
| 76 | &JitShader::Compile_IF, // ifc | 76 | &JitShader::Compile_IF, // ifc |
| 77 | &JitShader::Compile_LOOP, // loop | 77 | &JitShader::Compile_LOOP, // loop |
| 78 | nullptr, // emit | 78 | &JitShader::Compile_EMIT, // emit |
| 79 | nullptr, // sete | 79 | &JitShader::Compile_SETE, // sete |
| 80 | &JitShader::Compile_JMP, // jmpc | 80 | &JitShader::Compile_JMP, // jmpc |
| 81 | &JitShader::Compile_JMP, // jmpu | 81 | &JitShader::Compile_JMP, // jmpu |
| 82 | &JitShader::Compile_CMP, // cmp | 82 | &JitShader::Compile_CMP, // cmp |
| @@ -772,6 +772,51 @@ void JitShader::Compile_JMP(Instruction instr) { | |||
| 772 | } | 772 | } |
| 773 | } | 773 | } |
| 774 | 774 | ||
| 775 | static void Emit(GSEmitter* emitter, Math::Vec4<float24> (*output)[16]) { | ||
| 776 | emitter->Emit(*output); | ||
| 777 | } | ||
| 778 | |||
| 779 | void JitShader::Compile_EMIT(Instruction instr) { | ||
| 780 | Label have_emitter, end; | ||
| 781 | mov(rax, qword[STATE + offsetof(UnitState, emitter_ptr)]); | ||
| 782 | test(rax, rax); | ||
| 783 | jnz(have_emitter); | ||
| 784 | |||
| 785 | ABI_PushRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0); | ||
| 786 | mov(ABI_PARAM1, reinterpret_cast<size_t>("Execute EMIT on VS")); | ||
| 787 | CallFarFunction(*this, LogCritical); | ||
| 788 | ABI_PopRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0); | ||
| 789 | jmp(end); | ||
| 790 | |||
| 791 | L(have_emitter); | ||
| 792 | ABI_PushRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0); | ||
| 793 | mov(ABI_PARAM1, rax); | ||
| 794 | mov(ABI_PARAM2, STATE); | ||
| 795 | add(ABI_PARAM2, static_cast<Xbyak::uint32>(offsetof(UnitState, registers.output))); | ||
| 796 | CallFarFunction(*this, Emit); | ||
| 797 | ABI_PopRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0); | ||
| 798 | L(end); | ||
| 799 | } | ||
| 800 | |||
| 801 | void JitShader::Compile_SETE(Instruction instr) { | ||
| 802 | Label have_emitter, end; | ||
| 803 | mov(rax, qword[STATE + offsetof(UnitState, emitter_ptr)]); | ||
| 804 | test(rax, rax); | ||
| 805 | jnz(have_emitter); | ||
| 806 | |||
| 807 | ABI_PushRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0); | ||
| 808 | mov(ABI_PARAM1, reinterpret_cast<size_t>("Execute SETEMIT on VS")); | ||
| 809 | CallFarFunction(*this, LogCritical); | ||
| 810 | ABI_PopRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0); | ||
| 811 | jmp(end); | ||
| 812 | |||
| 813 | L(have_emitter); | ||
| 814 | mov(byte[rax + offsetof(GSEmitter, vertex_id)], instr.setemit.vertex_id); | ||
| 815 | mov(byte[rax + offsetof(GSEmitter, prim_emit)], instr.setemit.prim_emit); | ||
| 816 | mov(byte[rax + offsetof(GSEmitter, winding)], instr.setemit.winding); | ||
| 817 | L(end); | ||
| 818 | } | ||
| 819 | |||
| 775 | void JitShader::Compile_Block(unsigned end) { | 820 | void JitShader::Compile_Block(unsigned end) { |
| 776 | while (program_counter < end) { | 821 | while (program_counter < end) { |
| 777 | Compile_NextInstr(); | 822 | Compile_NextInstr(); |
diff --git a/src/video_core/shader/shader_jit_x64_compiler.h b/src/video_core/shader/shader_jit_x64_compiler.h index 31af0ca48..4aee56b1d 100644 --- a/src/video_core/shader/shader_jit_x64_compiler.h +++ b/src/video_core/shader/shader_jit_x64_compiler.h | |||
| @@ -66,6 +66,8 @@ public: | |||
| 66 | void Compile_JMP(Instruction instr); | 66 | void Compile_JMP(Instruction instr); |
| 67 | void Compile_CMP(Instruction instr); | 67 | void Compile_CMP(Instruction instr); |
| 68 | void Compile_MAD(Instruction instr); | 68 | void Compile_MAD(Instruction instr); |
| 69 | void Compile_EMIT(Instruction instr); | ||
| 70 | void Compile_SETE(Instruction instr); | ||
| 69 | 71 | ||
| 70 | private: | 72 | private: |
| 71 | void Compile_Block(unsigned end); | 73 | void Compile_Block(unsigned end); |
diff --git a/src/video_core/swrasterizer/clipper.cpp b/src/video_core/swrasterizer/clipper.cpp index 6fb923756..c1ed48398 100644 --- a/src/video_core/swrasterizer/clipper.cpp +++ b/src/video_core/swrasterizer/clipper.cpp | |||
| @@ -31,7 +31,7 @@ public: | |||
| 31 | : coeffs(coeffs), bias(bias) {} | 31 | : coeffs(coeffs), bias(bias) {} |
| 32 | 32 | ||
| 33 | bool IsInside(const Vertex& vertex) const { | 33 | bool IsInside(const Vertex& vertex) const { |
| 34 | return Math::Dot(vertex.pos + bias, coeffs) <= float24::FromFloat32(0); | 34 | return Math::Dot(vertex.pos + bias, coeffs) >= float24::FromFloat32(0); |
| 35 | } | 35 | } |
| 36 | 36 | ||
| 37 | bool IsOutSide(const Vertex& vertex) const { | 37 | bool IsOutSide(const Vertex& vertex) const { |
| @@ -95,6 +95,17 @@ void ProcessTriangle(const OutputVertex& v0, const OutputVertex& v1, const Outpu | |||
| 95 | static const size_t MAX_VERTICES = 9; | 95 | static const size_t MAX_VERTICES = 9; |
| 96 | static_vector<Vertex, MAX_VERTICES> buffer_a = {v0, v1, v2}; | 96 | static_vector<Vertex, MAX_VERTICES> buffer_a = {v0, v1, v2}; |
| 97 | static_vector<Vertex, MAX_VERTICES> buffer_b; | 97 | static_vector<Vertex, MAX_VERTICES> buffer_b; |
| 98 | |||
| 99 | auto FlipQuaternionIfOpposite = [](auto& a, const auto& b) { | ||
| 100 | if (Math::Dot(a, b) < float24::Zero()) | ||
| 101 | a = a * float24::FromFloat32(-1.0f); | ||
| 102 | }; | ||
| 103 | |||
| 104 | // Flip the quaternions if they are opposite to prevent interpolating them over the wrong | ||
| 105 | // direction. | ||
| 106 | FlipQuaternionIfOpposite(buffer_a[1].quat, buffer_a[0].quat); | ||
| 107 | FlipQuaternionIfOpposite(buffer_a[2].quat, buffer_a[0].quat); | ||
| 108 | |||
| 98 | auto* output_list = &buffer_a; | 109 | auto* output_list = &buffer_a; |
| 99 | auto* input_list = &buffer_b; | 110 | auto* input_list = &buffer_b; |
| 100 | 111 | ||
| @@ -105,23 +116,18 @@ void ProcessTriangle(const OutputVertex& v0, const OutputVertex& v1, const Outpu | |||
| 105 | static const float24 f0 = float24::FromFloat32(0.0); | 116 | static const float24 f0 = float24::FromFloat32(0.0); |
| 106 | static const float24 f1 = float24::FromFloat32(1.0); | 117 | static const float24 f1 = float24::FromFloat32(1.0); |
| 107 | static const std::array<ClippingEdge, 7> clipping_edges = {{ | 118 | static const std::array<ClippingEdge, 7> clipping_edges = {{ |
| 108 | {Math::MakeVec(f1, f0, f0, -f1)}, // x = +w | 119 | {Math::MakeVec(-f1, f0, f0, f1)}, // x = +w |
| 109 | {Math::MakeVec(-f1, f0, f0, -f1)}, // x = -w | 120 | {Math::MakeVec(f1, f0, f0, f1)}, // x = -w |
| 110 | {Math::MakeVec(f0, f1, f0, -f1)}, // y = +w | 121 | {Math::MakeVec(f0, -f1, f0, f1)}, // y = +w |
| 111 | {Math::MakeVec(f0, -f1, f0, -f1)}, // y = -w | 122 | {Math::MakeVec(f0, f1, f0, f1)}, // y = -w |
| 112 | {Math::MakeVec(f0, f0, f1, f0)}, // z = 0 | 123 | {Math::MakeVec(f0, f0, -f1, f0)}, // z = 0 |
| 113 | {Math::MakeVec(f0, f0, -f1, -f1)}, // z = -w | 124 | {Math::MakeVec(f0, f0, f1, f1)}, // z = -w |
| 114 | {Math::MakeVec(f0, f0, f0, -f1), Math::Vec4<float24>(f0, f0, f0, EPSILON)}, // w = EPSILON | 125 | {Math::MakeVec(f0, f0, f0, f1), Math::Vec4<float24>(f0, f0, f0, EPSILON)}, // w = EPSILON |
| 115 | }}; | 126 | }}; |
| 116 | 127 | ||
| 117 | // TODO: If one vertex lies outside one of the depth clipping planes, some platforms (e.g. Wii) | ||
| 118 | // drop the whole primitive instead of clipping the primitive properly. We should test if | ||
| 119 | // this happens on the 3DS, too. | ||
| 120 | |||
| 121 | // Simple implementation of the Sutherland-Hodgman clipping algorithm. | 128 | // Simple implementation of the Sutherland-Hodgman clipping algorithm. |
| 122 | // TODO: Make this less inefficient (currently lots of useless buffering overhead happens here) | 129 | // TODO: Make this less inefficient (currently lots of useless buffering overhead happens here) |
| 123 | for (auto edge : clipping_edges) { | 130 | auto Clip = [&](const ClippingEdge& edge) { |
| 124 | |||
| 125 | std::swap(input_list, output_list); | 131 | std::swap(input_list, output_list); |
| 126 | output_list->clear(); | 132 | output_list->clear(); |
| 127 | 133 | ||
| @@ -140,12 +146,24 @@ void ProcessTriangle(const OutputVertex& v0, const OutputVertex& v1, const Outpu | |||
| 140 | } | 146 | } |
| 141 | reference_vertex = &vertex; | 147 | reference_vertex = &vertex; |
| 142 | } | 148 | } |
| 149 | }; | ||
| 150 | |||
| 151 | for (auto edge : clipping_edges) { | ||
| 152 | Clip(edge); | ||
| 143 | 153 | ||
| 144 | // Need to have at least a full triangle to continue... | 154 | // Need to have at least a full triangle to continue... |
| 145 | if (output_list->size() < 3) | 155 | if (output_list->size() < 3) |
| 146 | return; | 156 | return; |
| 147 | } | 157 | } |
| 148 | 158 | ||
| 159 | if (g_state.regs.rasterizer.clip_enable) { | ||
| 160 | ClippingEdge custom_edge{g_state.regs.rasterizer.GetClipCoef()}; | ||
| 161 | Clip(custom_edge); | ||
| 162 | |||
| 163 | if (output_list->size() < 3) | ||
| 164 | return; | ||
| 165 | } | ||
| 166 | |||
| 149 | InitScreenCoordinates((*output_list)[0]); | 167 | InitScreenCoordinates((*output_list)[0]); |
| 150 | InitScreenCoordinates((*output_list)[1]); | 168 | InitScreenCoordinates((*output_list)[1]); |
| 151 | 169 | ||
diff --git a/src/video_core/swrasterizer/framebuffer.cpp b/src/video_core/swrasterizer/framebuffer.cpp index 7de3aac75..f34eab6cf 100644 --- a/src/video_core/swrasterizer/framebuffer.cpp +++ b/src/video_core/swrasterizer/framebuffer.cpp | |||
| @@ -352,6 +352,8 @@ u8 LogicOp(u8 src, u8 dest, FramebufferRegs::LogicOp op) { | |||
| 352 | case FramebufferRegs::LogicOp::OrInverted: | 352 | case FramebufferRegs::LogicOp::OrInverted: |
| 353 | return ~src | dest; | 353 | return ~src | dest; |
| 354 | } | 354 | } |
| 355 | |||
| 356 | UNREACHABLE(); | ||
| 355 | }; | 357 | }; |
| 356 | 358 | ||
| 357 | } // namespace Rasterizer | 359 | } // namespace Rasterizer |
diff --git a/src/video_core/swrasterizer/lighting.cpp b/src/video_core/swrasterizer/lighting.cpp new file mode 100644 index 000000000..5fa748611 --- /dev/null +++ b/src/video_core/swrasterizer/lighting.cpp | |||
| @@ -0,0 +1,308 @@ | |||
| 1 | // Copyright 2017 Citra Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/math_util.h" | ||
| 6 | #include "video_core/swrasterizer/lighting.h" | ||
| 7 | |||
| 8 | namespace Pica { | ||
| 9 | |||
| 10 | static float LookupLightingLut(const Pica::State::Lighting& lighting, size_t lut_index, u8 index, | ||
| 11 | float delta) { | ||
| 12 | ASSERT_MSG(lut_index < lighting.luts.size(), "Out of range lut"); | ||
| 13 | ASSERT_MSG(index < lighting.luts[lut_index].size(), "Out of range index"); | ||
| 14 | |||
| 15 | const auto& lut = lighting.luts[lut_index][index]; | ||
| 16 | |||
| 17 | float lut_value = lut.ToFloat(); | ||
| 18 | float lut_diff = lut.DiffToFloat(); | ||
| 19 | |||
| 20 | return lut_value + lut_diff * delta; | ||
| 21 | } | ||
| 22 | |||
| 23 | std::tuple<Math::Vec4<u8>, Math::Vec4<u8>> ComputeFragmentsColors( | ||
| 24 | const Pica::LightingRegs& lighting, const Pica::State::Lighting& lighting_state, | ||
| 25 | const Math::Quaternion<float>& normquat, const Math::Vec3<float>& view, | ||
| 26 | const Math::Vec4<u8> (&texture_color)[4]) { | ||
| 27 | |||
| 28 | Math::Vec3<float> surface_normal; | ||
| 29 | Math::Vec3<float> surface_tangent; | ||
| 30 | |||
| 31 | if (lighting.config0.bump_mode != LightingRegs::LightingBumpMode::None) { | ||
| 32 | Math::Vec3<float> perturbation = | ||
| 33 | texture_color[lighting.config0.bump_selector].xyz().Cast<float>() / 127.5f - | ||
| 34 | Math::MakeVec(1.0f, 1.0f, 1.0f); | ||
| 35 | if (lighting.config0.bump_mode == LightingRegs::LightingBumpMode::NormalMap) { | ||
| 36 | if (!lighting.config0.disable_bump_renorm) { | ||
| 37 | const float z_square = 1 - perturbation.xy().Length2(); | ||
| 38 | perturbation.z = std::sqrt(std::max(z_square, 0.0f)); | ||
| 39 | } | ||
| 40 | surface_normal = perturbation; | ||
| 41 | surface_tangent = Math::MakeVec(1.0f, 0.0f, 0.0f); | ||
| 42 | } else if (lighting.config0.bump_mode == LightingRegs::LightingBumpMode::TangentMap) { | ||
| 43 | surface_normal = Math::MakeVec(0.0f, 0.0f, 1.0f); | ||
| 44 | surface_tangent = perturbation; | ||
| 45 | } else { | ||
| 46 | LOG_ERROR(HW_GPU, "Unknown bump mode %u", lighting.config0.bump_mode.Value()); | ||
| 47 | } | ||
| 48 | } else { | ||
| 49 | surface_normal = Math::MakeVec(0.0f, 0.0f, 1.0f); | ||
| 50 | surface_tangent = Math::MakeVec(1.0f, 0.0f, 0.0f); | ||
| 51 | } | ||
| 52 | |||
| 53 | // Use the normalized the quaternion when performing the rotation | ||
| 54 | auto normal = Math::QuaternionRotate(normquat, surface_normal); | ||
| 55 | auto tangent = Math::QuaternionRotate(normquat, surface_tangent); | ||
| 56 | |||
| 57 | Math::Vec4<float> diffuse_sum = {0.0f, 0.0f, 0.0f, 1.0f}; | ||
| 58 | Math::Vec4<float> specular_sum = {0.0f, 0.0f, 0.0f, 1.0f}; | ||
| 59 | |||
| 60 | for (unsigned light_index = 0; light_index <= lighting.max_light_index; ++light_index) { | ||
| 61 | unsigned num = lighting.light_enable.GetNum(light_index); | ||
| 62 | const auto& light_config = lighting.light[num]; | ||
| 63 | |||
| 64 | Math::Vec3<float> refl_value = {}; | ||
| 65 | Math::Vec3<float> position = {float16::FromRaw(light_config.x).ToFloat32(), | ||
| 66 | float16::FromRaw(light_config.y).ToFloat32(), | ||
| 67 | float16::FromRaw(light_config.z).ToFloat32()}; | ||
| 68 | Math::Vec3<float> light_vector; | ||
| 69 | |||
| 70 | if (light_config.config.directional) | ||
| 71 | light_vector = position; | ||
| 72 | else | ||
| 73 | light_vector = position + view; | ||
| 74 | |||
| 75 | light_vector.Normalize(); | ||
| 76 | |||
| 77 | Math::Vec3<float> norm_view = view.Normalized(); | ||
| 78 | Math::Vec3<float> half_vector = norm_view + light_vector; | ||
| 79 | |||
| 80 | float dist_atten = 1.0f; | ||
| 81 | if (!lighting.IsDistAttenDisabled(num)) { | ||
| 82 | auto distance = (-view - position).Length(); | ||
| 83 | float scale = Pica::float20::FromRaw(light_config.dist_atten_scale).ToFloat32(); | ||
| 84 | float bias = Pica::float20::FromRaw(light_config.dist_atten_bias).ToFloat32(); | ||
| 85 | size_t lut = | ||
| 86 | static_cast<size_t>(LightingRegs::LightingSampler::DistanceAttenuation) + num; | ||
| 87 | |||
| 88 | float sample_loc = MathUtil::Clamp(scale * distance + bias, 0.0f, 1.0f); | ||
| 89 | |||
| 90 | u8 lutindex = | ||
| 91 | static_cast<u8>(MathUtil::Clamp(std::floor(sample_loc * 256.0f), 0.0f, 255.0f)); | ||
| 92 | float delta = sample_loc * 256 - lutindex; | ||
| 93 | dist_atten = LookupLightingLut(lighting_state, lut, lutindex, delta); | ||
| 94 | } | ||
| 95 | |||
| 96 | auto GetLutValue = [&](LightingRegs::LightingLutInput input, bool abs, | ||
| 97 | LightingRegs::LightingScale scale_enum, | ||
| 98 | LightingRegs::LightingSampler sampler) { | ||
| 99 | float result = 0.0f; | ||
| 100 | |||
| 101 | switch (input) { | ||
| 102 | case LightingRegs::LightingLutInput::NH: | ||
| 103 | result = Math::Dot(normal, half_vector.Normalized()); | ||
| 104 | break; | ||
| 105 | |||
| 106 | case LightingRegs::LightingLutInput::VH: | ||
| 107 | result = Math::Dot(norm_view, half_vector.Normalized()); | ||
| 108 | break; | ||
| 109 | |||
| 110 | case LightingRegs::LightingLutInput::NV: | ||
| 111 | result = Math::Dot(normal, norm_view); | ||
| 112 | break; | ||
| 113 | |||
| 114 | case LightingRegs::LightingLutInput::LN: | ||
| 115 | result = Math::Dot(light_vector, normal); | ||
| 116 | break; | ||
| 117 | |||
| 118 | case LightingRegs::LightingLutInput::SP: { | ||
| 119 | Math::Vec3<s32> spot_dir{light_config.spot_x.Value(), light_config.spot_y.Value(), | ||
| 120 | light_config.spot_z.Value()}; | ||
| 121 | result = Math::Dot(light_vector, spot_dir.Cast<float>() / 2047.0f); | ||
| 122 | break; | ||
| 123 | } | ||
| 124 | case LightingRegs::LightingLutInput::CP: | ||
| 125 | if (lighting.config0.config == LightingRegs::LightingConfig::Config7) { | ||
| 126 | const Math::Vec3<float> norm_half_vector = half_vector.Normalized(); | ||
| 127 | const Math::Vec3<float> half_vector_proj = | ||
| 128 | norm_half_vector - normal * Math::Dot(normal, norm_half_vector); | ||
| 129 | result = Math::Dot(half_vector_proj, tangent); | ||
| 130 | } else { | ||
| 131 | result = 0.0f; | ||
| 132 | } | ||
| 133 | break; | ||
| 134 | default: | ||
| 135 | LOG_CRITICAL(HW_GPU, "Unknown lighting LUT input %u\n", static_cast<u32>(input)); | ||
| 136 | UNIMPLEMENTED(); | ||
| 137 | result = 0.0f; | ||
| 138 | } | ||
| 139 | |||
| 140 | u8 index; | ||
| 141 | float delta; | ||
| 142 | |||
| 143 | if (abs) { | ||
| 144 | if (light_config.config.two_sided_diffuse) | ||
| 145 | result = std::abs(result); | ||
| 146 | else | ||
| 147 | result = std::max(result, 0.0f); | ||
| 148 | |||
| 149 | float flr = std::floor(result * 256.0f); | ||
| 150 | index = static_cast<u8>(MathUtil::Clamp(flr, 0.0f, 255.0f)); | ||
| 151 | delta = result * 256 - index; | ||
| 152 | } else { | ||
| 153 | float flr = std::floor(result * 128.0f); | ||
| 154 | s8 signed_index = static_cast<s8>(MathUtil::Clamp(flr, -128.0f, 127.0f)); | ||
| 155 | delta = result * 128.0f - signed_index; | ||
| 156 | index = static_cast<u8>(signed_index); | ||
| 157 | } | ||
| 158 | |||
| 159 | float scale = lighting.lut_scale.GetScale(scale_enum); | ||
| 160 | return scale * | ||
| 161 | LookupLightingLut(lighting_state, static_cast<size_t>(sampler), index, delta); | ||
| 162 | }; | ||
| 163 | |||
| 164 | // If enabled, compute spot light attenuation value | ||
| 165 | float spot_atten = 1.0f; | ||
| 166 | if (!lighting.IsSpotAttenDisabled(num) && | ||
| 167 | LightingRegs::IsLightingSamplerSupported( | ||
| 168 | lighting.config0.config, LightingRegs::LightingSampler::SpotlightAttenuation)) { | ||
| 169 | auto lut = LightingRegs::SpotlightAttenuationSampler(num); | ||
| 170 | spot_atten = GetLutValue(lighting.lut_input.sp, lighting.abs_lut_input.disable_sp == 0, | ||
| 171 | lighting.lut_scale.sp, lut); | ||
| 172 | } | ||
| 173 | |||
| 174 | // Specular 0 component | ||
| 175 | float d0_lut_value = 1.0f; | ||
| 176 | if (lighting.config1.disable_lut_d0 == 0 && | ||
| 177 | LightingRegs::IsLightingSamplerSupported( | ||
| 178 | lighting.config0.config, LightingRegs::LightingSampler::Distribution0)) { | ||
| 179 | d0_lut_value = | ||
| 180 | GetLutValue(lighting.lut_input.d0, lighting.abs_lut_input.disable_d0 == 0, | ||
| 181 | lighting.lut_scale.d0, LightingRegs::LightingSampler::Distribution0); | ||
| 182 | } | ||
| 183 | |||
| 184 | Math::Vec3<float> specular_0 = d0_lut_value * light_config.specular_0.ToVec3f(); | ||
| 185 | |||
| 186 | // If enabled, lookup ReflectRed value, otherwise, 1.0 is used | ||
| 187 | if (lighting.config1.disable_lut_rr == 0 && | ||
| 188 | LightingRegs::IsLightingSamplerSupported(lighting.config0.config, | ||
| 189 | LightingRegs::LightingSampler::ReflectRed)) { | ||
| 190 | refl_value.x = | ||
| 191 | GetLutValue(lighting.lut_input.rr, lighting.abs_lut_input.disable_rr == 0, | ||
| 192 | lighting.lut_scale.rr, LightingRegs::LightingSampler::ReflectRed); | ||
| 193 | } else { | ||
| 194 | refl_value.x = 1.0f; | ||
| 195 | } | ||
| 196 | |||
| 197 | // If enabled, lookup ReflectGreen value, otherwise, ReflectRed value is used | ||
| 198 | if (lighting.config1.disable_lut_rg == 0 && | ||
| 199 | LightingRegs::IsLightingSamplerSupported(lighting.config0.config, | ||
| 200 | LightingRegs::LightingSampler::ReflectGreen)) { | ||
| 201 | refl_value.y = | ||
| 202 | GetLutValue(lighting.lut_input.rg, lighting.abs_lut_input.disable_rg == 0, | ||
| 203 | lighting.lut_scale.rg, LightingRegs::LightingSampler::ReflectGreen); | ||
| 204 | } else { | ||
| 205 | refl_value.y = refl_value.x; | ||
| 206 | } | ||
| 207 | |||
| 208 | // If enabled, lookup ReflectBlue value, otherwise, ReflectRed value is used | ||
| 209 | if (lighting.config1.disable_lut_rb == 0 && | ||
| 210 | LightingRegs::IsLightingSamplerSupported(lighting.config0.config, | ||
| 211 | LightingRegs::LightingSampler::ReflectBlue)) { | ||
| 212 | refl_value.z = | ||
| 213 | GetLutValue(lighting.lut_input.rb, lighting.abs_lut_input.disable_rb == 0, | ||
| 214 | lighting.lut_scale.rb, LightingRegs::LightingSampler::ReflectBlue); | ||
| 215 | } else { | ||
| 216 | refl_value.z = refl_value.x; | ||
| 217 | } | ||
| 218 | |||
| 219 | // Specular 1 component | ||
| 220 | float d1_lut_value = 1.0f; | ||
| 221 | if (lighting.config1.disable_lut_d1 == 0 && | ||
| 222 | LightingRegs::IsLightingSamplerSupported( | ||
| 223 | lighting.config0.config, LightingRegs::LightingSampler::Distribution1)) { | ||
| 224 | d1_lut_value = | ||
| 225 | GetLutValue(lighting.lut_input.d1, lighting.abs_lut_input.disable_d1 == 0, | ||
| 226 | lighting.lut_scale.d1, LightingRegs::LightingSampler::Distribution1); | ||
| 227 | } | ||
| 228 | |||
| 229 | Math::Vec3<float> specular_1 = | ||
| 230 | d1_lut_value * refl_value * light_config.specular_1.ToVec3f(); | ||
| 231 | |||
| 232 | // Fresnel | ||
| 233 | // Note: only the last entry in the light slots applies the Fresnel factor | ||
| 234 | if (light_index == lighting.max_light_index && lighting.config1.disable_lut_fr == 0 && | ||
| 235 | LightingRegs::IsLightingSamplerSupported(lighting.config0.config, | ||
| 236 | LightingRegs::LightingSampler::Fresnel)) { | ||
| 237 | |||
| 238 | float lut_value = | ||
| 239 | GetLutValue(lighting.lut_input.fr, lighting.abs_lut_input.disable_fr == 0, | ||
| 240 | lighting.lut_scale.fr, LightingRegs::LightingSampler::Fresnel); | ||
| 241 | |||
| 242 | // Enabled for diffuse lighting alpha component | ||
| 243 | if (lighting.config0.fresnel_selector == | ||
| 244 | LightingRegs::LightingFresnelSelector::PrimaryAlpha || | ||
| 245 | lighting.config0.fresnel_selector == LightingRegs::LightingFresnelSelector::Both) { | ||
| 246 | diffuse_sum.a() = lut_value; | ||
| 247 | } | ||
| 248 | |||
| 249 | // Enabled for the specular lighting alpha component | ||
| 250 | if (lighting.config0.fresnel_selector == | ||
| 251 | LightingRegs::LightingFresnelSelector::SecondaryAlpha || | ||
| 252 | lighting.config0.fresnel_selector == LightingRegs::LightingFresnelSelector::Both) { | ||
| 253 | specular_sum.a() = lut_value; | ||
| 254 | } | ||
| 255 | } | ||
| 256 | |||
| 257 | auto dot_product = Math::Dot(light_vector, normal); | ||
| 258 | |||
| 259 | // Calculate clamp highlights before applying the two-sided diffuse configuration to the dot | ||
| 260 | // product. | ||
| 261 | float clamp_highlights = 1.0f; | ||
| 262 | if (lighting.config0.clamp_highlights) { | ||
| 263 | if (dot_product <= 0.0f) | ||
| 264 | clamp_highlights = 0.0f; | ||
| 265 | else | ||
| 266 | clamp_highlights = 1.0f; | ||
| 267 | } | ||
| 268 | |||
| 269 | if (light_config.config.two_sided_diffuse) | ||
| 270 | dot_product = std::abs(dot_product); | ||
| 271 | else | ||
| 272 | dot_product = std::max(dot_product, 0.0f); | ||
| 273 | |||
| 274 | if (light_config.config.geometric_factor_0 || light_config.config.geometric_factor_1) { | ||
| 275 | float geo_factor = half_vector.Length2(); | ||
| 276 | geo_factor = geo_factor == 0.0f ? 0.0f : std::min(dot_product / geo_factor, 1.0f); | ||
| 277 | if (light_config.config.geometric_factor_0) { | ||
| 278 | specular_0 *= geo_factor; | ||
| 279 | } | ||
| 280 | if (light_config.config.geometric_factor_1) { | ||
| 281 | specular_1 *= geo_factor; | ||
| 282 | } | ||
| 283 | } | ||
| 284 | |||
| 285 | auto diffuse = | ||
| 286 | light_config.diffuse.ToVec3f() * dot_product + light_config.ambient.ToVec3f(); | ||
| 287 | diffuse_sum += Math::MakeVec(diffuse * dist_atten * spot_atten, 0.0f); | ||
| 288 | |||
| 289 | specular_sum += Math::MakeVec( | ||
| 290 | (specular_0 + specular_1) * clamp_highlights * dist_atten * spot_atten, 0.0f); | ||
| 291 | } | ||
| 292 | |||
| 293 | diffuse_sum += Math::MakeVec(lighting.global_ambient.ToVec3f(), 0.0f); | ||
| 294 | |||
| 295 | auto diffuse = Math::MakeVec<float>(MathUtil::Clamp(diffuse_sum.x, 0.0f, 1.0f) * 255, | ||
| 296 | MathUtil::Clamp(diffuse_sum.y, 0.0f, 1.0f) * 255, | ||
| 297 | MathUtil::Clamp(diffuse_sum.z, 0.0f, 1.0f) * 255, | ||
| 298 | MathUtil::Clamp(diffuse_sum.w, 0.0f, 1.0f) * 255) | ||
| 299 | .Cast<u8>(); | ||
| 300 | auto specular = Math::MakeVec<float>(MathUtil::Clamp(specular_sum.x, 0.0f, 1.0f) * 255, | ||
| 301 | MathUtil::Clamp(specular_sum.y, 0.0f, 1.0f) * 255, | ||
| 302 | MathUtil::Clamp(specular_sum.z, 0.0f, 1.0f) * 255, | ||
| 303 | MathUtil::Clamp(specular_sum.w, 0.0f, 1.0f) * 255) | ||
| 304 | .Cast<u8>(); | ||
| 305 | return std::make_tuple(diffuse, specular); | ||
| 306 | } | ||
| 307 | |||
| 308 | } // namespace Pica | ||
diff --git a/src/video_core/swrasterizer/lighting.h b/src/video_core/swrasterizer/lighting.h new file mode 100644 index 000000000..d807a3d94 --- /dev/null +++ b/src/video_core/swrasterizer/lighting.h | |||
| @@ -0,0 +1,19 @@ | |||
| 1 | // Copyright 2017 Citra Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <tuple> | ||
| 8 | #include "common/quaternion.h" | ||
| 9 | #include "common/vector_math.h" | ||
| 10 | #include "video_core/pica_state.h" | ||
| 11 | |||
| 12 | namespace Pica { | ||
| 13 | |||
| 14 | std::tuple<Math::Vec4<u8>, Math::Vec4<u8>> ComputeFragmentsColors( | ||
| 15 | const Pica::LightingRegs& lighting, const Pica::State::Lighting& lighting_state, | ||
| 16 | const Math::Quaternion<float>& normquat, const Math::Vec3<float>& view, | ||
| 17 | const Math::Vec4<u8> (&texture_color)[4]); | ||
| 18 | |||
| 19 | } // namespace Pica | ||
diff --git a/src/video_core/swrasterizer/rasterizer.cpp b/src/video_core/swrasterizer/rasterizer.cpp index 512e81c08..862135614 100644 --- a/src/video_core/swrasterizer/rasterizer.cpp +++ b/src/video_core/swrasterizer/rasterizer.cpp | |||
| @@ -13,6 +13,7 @@ | |||
| 13 | #include "common/logging/log.h" | 13 | #include "common/logging/log.h" |
| 14 | #include "common/math_util.h" | 14 | #include "common/math_util.h" |
| 15 | #include "common/microprofile.h" | 15 | #include "common/microprofile.h" |
| 16 | #include "common/quaternion.h" | ||
| 16 | #include "common/vector_math.h" | 17 | #include "common/vector_math.h" |
| 17 | #include "core/hw/gpu.h" | 18 | #include "core/hw/gpu.h" |
| 18 | #include "core/memory.h" | 19 | #include "core/memory.h" |
| @@ -24,6 +25,7 @@ | |||
| 24 | #include "video_core/regs_texturing.h" | 25 | #include "video_core/regs_texturing.h" |
| 25 | #include "video_core/shader/shader.h" | 26 | #include "video_core/shader/shader.h" |
| 26 | #include "video_core/swrasterizer/framebuffer.h" | 27 | #include "video_core/swrasterizer/framebuffer.h" |
| 28 | #include "video_core/swrasterizer/lighting.h" | ||
| 27 | #include "video_core/swrasterizer/proctex.h" | 29 | #include "video_core/swrasterizer/proctex.h" |
| 28 | #include "video_core/swrasterizer/rasterizer.h" | 30 | #include "video_core/swrasterizer/rasterizer.h" |
| 29 | #include "video_core/swrasterizer/texturing.h" | 31 | #include "video_core/swrasterizer/texturing.h" |
| @@ -419,6 +421,26 @@ static void ProcessTriangleInternal(const Vertex& v0, const Vertex& v1, const Ve | |||
| 419 | regs.texturing.tev_combiner_buffer_color.a, | 421 | regs.texturing.tev_combiner_buffer_color.a, |
| 420 | }; | 422 | }; |
| 421 | 423 | ||
| 424 | Math::Vec4<u8> primary_fragment_color = {0, 0, 0, 0}; | ||
| 425 | Math::Vec4<u8> secondary_fragment_color = {0, 0, 0, 0}; | ||
| 426 | |||
| 427 | if (!g_state.regs.lighting.disable) { | ||
| 428 | Math::Quaternion<float> normquat = Math::Quaternion<float>{ | ||
| 429 | {GetInterpolatedAttribute(v0.quat.x, v1.quat.x, v2.quat.x).ToFloat32(), | ||
| 430 | GetInterpolatedAttribute(v0.quat.y, v1.quat.y, v2.quat.y).ToFloat32(), | ||
| 431 | GetInterpolatedAttribute(v0.quat.z, v1.quat.z, v2.quat.z).ToFloat32()}, | ||
| 432 | GetInterpolatedAttribute(v0.quat.w, v1.quat.w, v2.quat.w).ToFloat32(), | ||
| 433 | }.Normalized(); | ||
| 434 | |||
| 435 | Math::Vec3<float> view{ | ||
| 436 | GetInterpolatedAttribute(v0.view.x, v1.view.x, v2.view.x).ToFloat32(), | ||
| 437 | GetInterpolatedAttribute(v0.view.y, v1.view.y, v2.view.y).ToFloat32(), | ||
| 438 | GetInterpolatedAttribute(v0.view.z, v1.view.z, v2.view.z).ToFloat32(), | ||
| 439 | }; | ||
| 440 | std::tie(primary_fragment_color, secondary_fragment_color) = ComputeFragmentsColors( | ||
| 441 | g_state.regs.lighting, g_state.lighting, normquat, view, texture_color); | ||
| 442 | } | ||
| 443 | |||
| 422 | for (unsigned tev_stage_index = 0; tev_stage_index < tev_stages.size(); | 444 | for (unsigned tev_stage_index = 0; tev_stage_index < tev_stages.size(); |
| 423 | ++tev_stage_index) { | 445 | ++tev_stage_index) { |
| 424 | const auto& tev_stage = tev_stages[tev_stage_index]; | 446 | const auto& tev_stage = tev_stages[tev_stage_index]; |
| @@ -427,14 +449,13 @@ static void ProcessTriangleInternal(const Vertex& v0, const Vertex& v1, const Ve | |||
| 427 | auto GetSource = [&](Source source) -> Math::Vec4<u8> { | 449 | auto GetSource = [&](Source source) -> Math::Vec4<u8> { |
| 428 | switch (source) { | 450 | switch (source) { |
| 429 | case Source::PrimaryColor: | 451 | case Source::PrimaryColor: |
| 452 | return primary_color; | ||
| 430 | 453 | ||
| 431 | // HACK: Until we implement fragment lighting, use primary_color | ||
| 432 | case Source::PrimaryFragmentColor: | 454 | case Source::PrimaryFragmentColor: |
| 433 | return primary_color; | 455 | return primary_fragment_color; |
| 434 | 456 | ||
| 435 | // HACK: Until we implement fragment lighting, use zero | ||
| 436 | case Source::SecondaryFragmentColor: | 457 | case Source::SecondaryFragmentColor: |
| 437 | return {0, 0, 0, 0}; | 458 | return secondary_fragment_color; |
| 438 | 459 | ||
| 439 | case Source::Texture0: | 460 | case Source::Texture0: |
| 440 | return texture_color[0]; | 461 | return texture_color[0]; |
diff --git a/src/video_core/swrasterizer/rasterizer.h b/src/video_core/swrasterizer/rasterizer.h index 2f0877581..66cd6cfd4 100644 --- a/src/video_core/swrasterizer/rasterizer.h +++ b/src/video_core/swrasterizer/rasterizer.h | |||
| @@ -19,10 +19,9 @@ struct Vertex : Shader::OutputVertex { | |||
| 19 | 19 | ||
| 20 | // Linear interpolation | 20 | // Linear interpolation |
| 21 | // factor: 0=this, 1=vtx | 21 | // factor: 0=this, 1=vtx |
| 22 | // Note: This function cannot be called after perspective divide | ||
| 22 | void Lerp(float24 factor, const Vertex& vtx) { | 23 | void Lerp(float24 factor, const Vertex& vtx) { |
| 23 | pos = pos * factor + vtx.pos * (float24::FromFloat32(1) - factor); | 24 | pos = pos * factor + vtx.pos * (float24::FromFloat32(1) - factor); |
| 24 | |||
| 25 | // TODO: Should perform perspective correct interpolation here... | ||
| 26 | quat = quat * factor + vtx.quat * (float24::FromFloat32(1) - factor); | 25 | quat = quat * factor + vtx.quat * (float24::FromFloat32(1) - factor); |
| 27 | color = color * factor + vtx.color * (float24::FromFloat32(1) - factor); | 26 | color = color * factor + vtx.color * (float24::FromFloat32(1) - factor); |
| 28 | tc0 = tc0 * factor + vtx.tc0 * (float24::FromFloat32(1) - factor); | 27 | tc0 = tc0 * factor + vtx.tc0 * (float24::FromFloat32(1) - factor); |
| @@ -30,12 +29,11 @@ struct Vertex : Shader::OutputVertex { | |||
| 30 | tc0_w = tc0_w * factor + vtx.tc0_w * (float24::FromFloat32(1) - factor); | 29 | tc0_w = tc0_w * factor + vtx.tc0_w * (float24::FromFloat32(1) - factor); |
| 31 | view = view * factor + vtx.view * (float24::FromFloat32(1) - factor); | 30 | view = view * factor + vtx.view * (float24::FromFloat32(1) - factor); |
| 32 | tc2 = tc2 * factor + vtx.tc2 * (float24::FromFloat32(1) - factor); | 31 | tc2 = tc2 * factor + vtx.tc2 * (float24::FromFloat32(1) - factor); |
| 33 | |||
| 34 | screenpos = screenpos * factor + vtx.screenpos * (float24::FromFloat32(1) - factor); | ||
| 35 | } | 32 | } |
| 36 | 33 | ||
| 37 | // Linear interpolation | 34 | // Linear interpolation |
| 38 | // factor: 0=v0, 1=v1 | 35 | // factor: 0=v0, 1=v1 |
| 36 | // Note: This function cannot be called after perspective divide | ||
| 39 | static Vertex Lerp(float24 factor, const Vertex& v0, const Vertex& v1) { | 37 | static Vertex Lerp(float24 factor, const Vertex& v0, const Vertex& v1) { |
| 40 | Vertex ret = v0; | 38 | Vertex ret = v0; |
| 41 | ret.Lerp(factor, v1); | 39 | ret.Lerp(factor, v1); |
diff --git a/src/video_core/swrasterizer/texturing.cpp b/src/video_core/swrasterizer/texturing.cpp index 4f02b93f2..79b1ce841 100644 --- a/src/video_core/swrasterizer/texturing.cpp +++ b/src/video_core/swrasterizer/texturing.cpp | |||
| @@ -89,6 +89,8 @@ Math::Vec3<u8> GetColorModifier(TevStageConfig::ColorModifier factor, | |||
| 89 | case ColorModifier::OneMinusSourceBlue: | 89 | case ColorModifier::OneMinusSourceBlue: |
| 90 | return (Math::Vec3<u8>(255, 255, 255) - values.bbb()).Cast<u8>(); | 90 | return (Math::Vec3<u8>(255, 255, 255) - values.bbb()).Cast<u8>(); |
| 91 | } | 91 | } |
| 92 | |||
| 93 | UNREACHABLE(); | ||
| 92 | }; | 94 | }; |
| 93 | 95 | ||
| 94 | u8 GetAlphaModifier(TevStageConfig::AlphaModifier factor, const Math::Vec4<u8>& values) { | 96 | u8 GetAlphaModifier(TevStageConfig::AlphaModifier factor, const Math::Vec4<u8>& values) { |
| @@ -119,6 +121,8 @@ u8 GetAlphaModifier(TevStageConfig::AlphaModifier factor, const Math::Vec4<u8>& | |||
| 119 | case AlphaModifier::OneMinusSourceBlue: | 121 | case AlphaModifier::OneMinusSourceBlue: |
| 120 | return 255 - values.b(); | 122 | return 255 - values.b(); |
| 121 | } | 123 | } |
| 124 | |||
| 125 | UNREACHABLE(); | ||
| 122 | }; | 126 | }; |
| 123 | 127 | ||
| 124 | Math::Vec3<u8> ColorCombine(TevStageConfig::Operation op, const Math::Vec3<u8> input[3]) { | 128 | Math::Vec3<u8> ColorCombine(TevStageConfig::Operation op, const Math::Vec3<u8> input[3]) { |
diff --git a/src/video_core/utils.h b/src/video_core/utils.h index 7ce83a055..d8567f314 100644 --- a/src/video_core/utils.h +++ b/src/video_core/utils.h | |||
| @@ -8,17 +8,11 @@ | |||
| 8 | 8 | ||
| 9 | namespace VideoCore { | 9 | namespace VideoCore { |
| 10 | 10 | ||
| 11 | /** | 11 | // 8x8 Z-Order coordinate from 2D coordinates |
| 12 | * Interleave the lower 3 bits of each coordinate to get the intra-block offsets, which are | ||
| 13 | * arranged in a Z-order curve. More details on the bit manipulation at: | ||
| 14 | * https://fgiesen.wordpress.com/2009/12/13/decoding-morton-codes/ | ||
| 15 | */ | ||
| 16 | static inline u32 MortonInterleave(u32 x, u32 y) { | 12 | static inline u32 MortonInterleave(u32 x, u32 y) { |
| 17 | u32 i = (x & 7) | ((y & 7) << 8); // ---- -210 | 13 | static const u32 xlut[] = {0x00, 0x01, 0x04, 0x05, 0x10, 0x11, 0x14, 0x15}; |
| 18 | i = (i ^ (i << 2)) & 0x1313; // ---2 --10 | 14 | static const u32 ylut[] = {0x00, 0x02, 0x08, 0x0a, 0x20, 0x22, 0x28, 0x2a}; |
| 19 | i = (i ^ (i << 1)) & 0x1515; // ---2 -1-0 | 15 | return xlut[x % 8] + ylut[y % 8]; |
| 20 | i = (i | (i >> 7)) & 0x3F; | ||
| 21 | return i; | ||
| 22 | } | 16 | } |
| 23 | 17 | ||
| 24 | /** | 18 | /** |