2 files changed, 44 insertions, 42 deletions
diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp
index ef9584abd..36c3b9947 100644
--- a/src/video_core/command_processor.cpp
+++ b/src/video_core/command_processor.cpp
@@ -221,57 +221,53 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) {
                // Initialize data for the current vertex
                VertexShader::InputVertex input;
-                // Load a debugging token to check whether this gets loaded by the running
-                // application or not.
-                static const float24 debug_token = float24::FromRawFloat24(0x00abcdef);
-                input.attr[0].w = debug_token;
                for (int i = 0; i < attribute_config.GetNumTotalAttributes(); ++i) {
-                    // Load the default attribute if we're configured to do so, this data will be overwritten by the loader data if it's set
+                    if (vertex_attribute_elements[i] != 0) {
-                    if (attribute_config.IsDefaultAttribute(i)) {
+                        // Default attribute values set if array elements have < 4 components. This
+                        // is *not* carried over from the default attribute settings even if they're
+                        // enabled for this attribute.
+                        static const float24 zero = float24::FromFloat32(0.0f);
+                        static const float24 one = float24::FromFloat32(1.0f);
+                        input.attr[i] = Math::Vec4<float24>(zero, zero, zero, one);
+                        // Load per-vertex data from the loader arrays
+                        for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) {
+                            u32 source_addr = vertex_attribute_sources[i] + vertex_attribute_strides[i] * vertex + comp * vertex_attribute_element_size[i];
+                            const u8* srcdata = Memory::GetPhysicalPointer(source_addr);
+                            if (g_debug_context && Pica::g_debug_context->recorder) {
+                                memory_accesses.AddAccess(source_addr,
+                                    (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::FLOAT) ? 4
+                                    : (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::SHORT) ? 2 : 1);
+                            }
+                            const float srcval = (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::BYTE) ? *(s8*)srcdata :
+                                (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::UBYTE) ? *(u8*)srcdata :
+                                (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::SHORT) ? *(s16*)srcdata :
+                                *(float*)srcdata;
+                            input.attr[i][comp] = float24::FromFloat32(srcval);
+                            LOG_TRACE(HW_GPU, "Loaded component %x of attribute %x for vertex %x (index %x) from 0x%08x + 0x%08lx + 0x%04lx: %f",
+                                comp, i, vertex, index,
+                                attribute_config.GetPhysicalBaseAddress(),
+                                vertex_attribute_sources[i] - base_address,
+                                vertex_attribute_strides[i] * vertex + comp * vertex_attribute_element_size[i],
+                                input.attr[i][comp].ToFloat32());
+                        }
+                    } else if (attribute_config.IsDefaultAttribute(i)) {
+                        // Load the default attribute if we're configured to do so
                        input.attr[i] = g_state.vs.default_attributes[i];
                        LOG_TRACE(HW_GPU, "Loaded default attribute %x for vertex %x (index %x): (%f, %f, %f, %f)",
                                  i, vertex, index,
                                  input.attr[i][0].ToFloat32(), input.attr[i][1].ToFloat32(),
                                  input.attr[i][2].ToFloat32(), input.attr[i][3].ToFloat32());
-                    }
+                    } else {
+                        // TODO(yuriks): In this case, no data gets loaded and the vertex remains
-                    // Load per-vertex data from the loader arrays
+                        //              with the last value it had. This isn't currently maintained
-                    for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) {
+                        //              as global state, however, and so won't work in Cita yet.
-                        u32 source_addr = vertex_attribute_sources[i] + vertex_attribute_strides[i] * vertex + comp * vertex_attribute_element_size[i];
-                        const u8* srcdata = Memory::GetPhysicalPointer(source_addr);
-                        if (g_debug_context && Pica::g_debug_context->recorder) {
-                            memory_accesses.AddAccess(source_addr,
-                                    (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::FLOAT) ? 4
-                                    : (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::SHORT) ? 2 : 1);
-                        }
-                        const float srcval = (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::BYTE) ? *(s8*)srcdata :
-                            (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::UBYTE) ? *(u8*)srcdata :
-                            (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::SHORT) ? *(s16*)srcdata :
-                            *(float*)srcdata;
-                        input.attr[i][comp] = float24::FromFloat32(srcval);
-                        LOG_TRACE(HW_GPU, "Loaded component %x of attribute %x for vertex %x (index %x) from 0x%08x + 0x%08lx + 0x%04lx: %f",
-                            comp, i, vertex, index,
-                            attribute_config.GetPhysicalBaseAddress(),
-                            vertex_attribute_sources[i] - base_address,
-                            vertex_attribute_strides[i] * vertex + comp * vertex_attribute_element_size[i],
-                            input.attr[i][comp].ToFloat32());
                    }
                }
-                // HACK: Some games do not initialize the vertex position's w component. This leads
-                //       to critical issues since it messes up perspective division. As a
-                //       workaround, we force the fourth component to 1.0 if we find this to be the
-                //       case.
-                //       To do this, we additionally have to assume that the first input attribute
-                //       is the vertex position, since there's no information about this other than
-                //       the empiric observation that this is usually the case.
-                if (input.attr[0].w == debug_token)
-                    input.attr[0].w = float24::FromFloat32(1.0);
                if (g_debug_context)
                    g_debug_context->OnEvent(DebugContext::Event::VertexLoaded, (void*)&input);
diff --git a/src/video_core/vertex_shader.cpp b/src/video_core/vertex_shader.cpp
index 960ae5779..5f66f3455 100644
--- a/src/video_core/vertex_shader.cpp
+++ b/src/video_core/vertex_shader.cpp
@@ -609,6 +609,12 @@ OutputVertex RunShader(const InputVertex& input, int num_attributes, const Regs:
        }
    }
+    // The hardware takes the absolute and saturates vertex colors like this, *before* doing interpolation
+    for (int i = 0; i < 4; ++i) {
+        ret.color[i] = float24::FromFloat32(
+            std::fmin(std::fabs(ret.color[i].ToFloat32()), 1.0f));
+    }
    LOG_TRACE(Render_Software, "Output vertex: pos (%.2f, %.2f, %.2f, %.2f), col(%.2f, %.2f, %.2f, %.2f), tc0(%.2f, %.2f)",
        ret.pos.x.ToFloat32(), ret.pos.y.ToFloat32(), ret.pos.z.ToFloat32(), ret.pos.w.ToFloat32(),
        ret.color.x.ToFloat32(), ret.color.y.ToFloat32(), ret.color.z.ToFloat32(), ret.color.w.ToFloat32(),

diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp index ef9584abd..36c3b9947 100644 --- a/src/video_core/command_processor.cpp +++ b/src/video_core/command_processor.cpp
@@ -221,57 +221,53 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) {
221	// Initialize data for the current vertex	221	// Initialize data for the current vertex
222	VertexShader::InputVertex input;	222	VertexShader::InputVertex input;
223		223
224	// Load a debugging token to check whether this gets loaded by the running
225	// application or not.
226	static const float24 debug_token = float24::FromRawFloat24(0x00abcdef);
227	input.attr[0].w = debug_token;
228
229	for (int i = 0; i < attribute_config.GetNumTotalAttributes(); ++i) {	224	for (int i = 0; i < attribute_config.GetNumTotalAttributes(); ++i) {
230	// Load the default attribute if we're configured to do so, this data will be overwritten by the loader data if it's set	225	if (vertex_attribute_elements[i] != 0) {
231	if (attribute_config.IsDefaultAttribute(i)) {	226	// Default attribute values set if array elements have < 4 components. This
		227	// is not carried over from the default attribute settings even if they're
		228	// enabled for this attribute.
		229	static const float24 zero = float24::FromFloat32(0.0f);
		230	static const float24 one = float24::FromFloat32(1.0f);
		231	input.attr[i] = Math::Vec4<float24>(zero, zero, zero, one);
		232
		233	// Load per-vertex data from the loader arrays
		234	for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) {
		235	u32 source_addr = vertex_attribute_sources[i] + vertex_attribute_strides[i] * vertex + comp * vertex_attribute_element_size[i];
		236	const u8* srcdata = Memory::GetPhysicalPointer(source_addr);
		237
		238	if (g_debug_context && Pica::g_debug_context->recorder) {
		239	memory_accesses.AddAccess(source_addr,
		240	(vertex_attribute_formats[i] == Regs::VertexAttributeFormat::FLOAT) ? 4
		241	: (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::SHORT) ? 2 : 1);
		242	}
		243
		244	const float srcval = (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::BYTE) ? (s8)srcdata :
		245	(vertex_attribute_formats[i] == Regs::VertexAttributeFormat::UBYTE) ? (u8)srcdata :
		246	(vertex_attribute_formats[i] == Regs::VertexAttributeFormat::SHORT) ? (s16)srcdata :
		247	(float)srcdata;
		248
		249	input.attr[i][comp] = float24::FromFloat32(srcval);
		250	LOG_TRACE(HW_GPU, "Loaded component %x of attribute %x for vertex %x (index %x) from 0x%08x + 0x%08lx + 0x%04lx: %f",
		251	comp, i, vertex, index,
		252	attribute_config.GetPhysicalBaseAddress(),
		253	vertex_attribute_sources[i] - base_address,
		254	vertex_attribute_strides[i] * vertex + comp * vertex_attribute_element_size[i],
		255	input.attr[i][comp].ToFloat32());
		256	}
		257	} else if (attribute_config.IsDefaultAttribute(i)) {
		258	// Load the default attribute if we're configured to do so
232	input.attr[i] = g_state.vs.default_attributes[i];	259	input.attr[i] = g_state.vs.default_attributes[i];
233	LOG_TRACE(HW_GPU, "Loaded default attribute %x for vertex %x (index %x): (%f, %f, %f, %f)",	260	LOG_TRACE(HW_GPU, "Loaded default attribute %x for vertex %x (index %x): (%f, %f, %f, %f)",
234	i, vertex, index,	261	i, vertex, index,
235	input.attr[i][0].ToFloat32(), input.attr[i][1].ToFloat32(),	262	input.attr[i][0].ToFloat32(), input.attr[i][1].ToFloat32(),
236	input.attr[i][2].ToFloat32(), input.attr[i][3].ToFloat32());	263	input.attr[i][2].ToFloat32(), input.attr[i][3].ToFloat32());
237	}	264	} else {
238		265	// TODO(yuriks): In this case, no data gets loaded and the vertex remains
239	// Load per-vertex data from the loader arrays	266	// with the last value it had. This isn't currently maintained
240	for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) {	267	// as global state, however, and so won't work in Cita yet.
241	u32 source_addr = vertex_attribute_sources[i] + vertex_attribute_strides[i] * vertex + comp * vertex_attribute_element_size[i];
242	const u8* srcdata = Memory::GetPhysicalPointer(source_addr);
243
244	if (g_debug_context && Pica::g_debug_context->recorder) {
245	memory_accesses.AddAccess(source_addr,
246	(vertex_attribute_formats[i] == Regs::VertexAttributeFormat::FLOAT) ? 4
247	: (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::SHORT) ? 2 : 1);
248	}
249
250	const float srcval = (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::BYTE) ? (s8)srcdata :
251	(vertex_attribute_formats[i] == Regs::VertexAttributeFormat::UBYTE) ? (u8)srcdata :
252	(vertex_attribute_formats[i] == Regs::VertexAttributeFormat::SHORT) ? (s16)srcdata :
253	(float)srcdata;
254
255	input.attr[i][comp] = float24::FromFloat32(srcval);
256	LOG_TRACE(HW_GPU, "Loaded component %x of attribute %x for vertex %x (index %x) from 0x%08x + 0x%08lx + 0x%04lx: %f",
257	comp, i, vertex, index,
258	attribute_config.GetPhysicalBaseAddress(),
259	vertex_attribute_sources[i] - base_address,
260	vertex_attribute_strides[i] * vertex + comp * vertex_attribute_element_size[i],
261	input.attr[i][comp].ToFloat32());
262	}	268	}
263	}	269	}
264		270
265	// HACK: Some games do not initialize the vertex position's w component. This leads
266	// to critical issues since it messes up perspective division. As a
267	// workaround, we force the fourth component to 1.0 if we find this to be the
268	// case.
269	// To do this, we additionally have to assume that the first input attribute
270	// is the vertex position, since there's no information about this other than
271	// the empiric observation that this is usually the case.
272	if (input.attr[0].w == debug_token)
273	input.attr[0].w = float24::FromFloat32(1.0);
274
275	if (g_debug_context)	271	if (g_debug_context)
276	g_debug_context->OnEvent(DebugContext::Event::VertexLoaded, (void*)&input);	272	g_debug_context->OnEvent(DebugContext::Event::VertexLoaded, (void*)&input);
277		273


diff --git a/src/video_core/vertex_shader.cpp b/src/video_core/vertex_shader.cpp index 960ae5779..5f66f3455 100644 --- a/src/video_core/vertex_shader.cpp +++ b/src/video_core/vertex_shader.cpp
@@ -609,6 +609,12 @@ OutputVertex RunShader(const InputVertex& input, int num_attributes, const Regs:
609	}	609	}
610	}	610	}
611		611
		612	// The hardware takes the absolute and saturates vertex colors like this, before doing interpolation
		613	for (int i = 0; i < 4; ++i) {
		614	ret.color[i] = float24::FromFloat32(
		615	std::fmin(std::fabs(ret.color[i].ToFloat32()), 1.0f));
		616	}
		617
612	LOG_TRACE(Render_Software, "Output vertex: pos (%.2f, %.2f, %.2f, %.2f), col(%.2f, %.2f, %.2f, %.2f), tc0(%.2f, %.2f)",	618	LOG_TRACE(Render_Software, "Output vertex: pos (%.2f, %.2f, %.2f, %.2f), col(%.2f, %.2f, %.2f, %.2f), tc0(%.2f, %.2f)",
613	ret.pos.x.ToFloat32(), ret.pos.y.ToFloat32(), ret.pos.z.ToFloat32(), ret.pos.w.ToFloat32(),	619	ret.pos.x.ToFloat32(), ret.pos.y.ToFloat32(), ret.pos.z.ToFloat32(), ret.pos.w.ToFloat32(),
614	ret.color.x.ToFloat32(), ret.color.y.ToFloat32(), ret.color.z.ToFloat32(), ret.color.w.ToFloat32(),	620	ret.color.x.ToFloat32(), ret.color.y.ToFloat32(), ret.color.z.ToFloat32(), ret.color.w.ToFloat32(),