GPU: Implemented more depth buffer formats.

This fixes the horizontal lines in Picross E, Cubic Ninja, Cave Story 3D and possibly others
author: Subv 2015-03-08 12:05:17 -0500
committer: Subv 2015-03-09 20:12:39 -0500
commit: 414b0741c445a7960f9ad1ee4a5672f8af4760db (patch)
tree: 5218b976cf8e15e745a9cf6037a7c133b7559fed /src
parent: GPU/Textures: Fixed ETC texture decoding. (diff)
download: yuzu-414b0741c445a7960f9ad1ee4a5672f8af4760db.tar.gz
yuzu-414b0741c445a7960f9ad1ee4a5672f8af4760db.tar.xz
yuzu-414b0741c445a7960f9ad1ee4a5672f8af4760db.zip
5 files changed, 120 insertions, 14 deletions
diff --git a/src/core/hw/gpu.cpp b/src/core/hw/gpu.cpp
index 424ce2ca7..b7102b874 100644
--- a/src/core/hw/gpu.cpp
+++ b/src/core/hw/gpu.cpp
@@ -81,9 +81,9 @@ inline void Write(u32 addr, const T data) {
            if (config.fill_24bit) {
                // fill with 24-bit values
                for (u8* ptr = start; ptr < end; ptr += 3) {
-                    ptr[0] = config.value_24bit_b;
+                    ptr[0] = config.value_24bit_r;
                    ptr[1] = config.value_24bit_g;
-                    ptr[2] = config.value_24bit_r;
+                    ptr[2] = config.value_24bit_b;
                }
            } else if (config.fill_32bit) {
                // fill with 32-bit values
diff --git a/src/core/hw/gpu.h b/src/core/hw/gpu.h
index 737b1e968..5ca4a5450 100644
--- a/src/core/hw/gpu.h
+++ b/src/core/hw/gpu.h
@@ -100,10 +100,10 @@ struct Regs {
            // Set to 1 upon completion.
            BitField<0, 1, u32> finished;
-            // 0: fill with 16- or 32-bit wide values; 1: fill with 24-bit wide values
+            // If both of these bits are unset, then it will fill the memory with a 16 bit value
+            // 1: fill with 24-bit wide values
            BitField<8, 1, u32> fill_24bit;
+            // 1: fill with 32-bit wide values
-            // 0: fill with 16-bit wide values; 1: fill with 32-bit wide values
            BitField<9, 1, u32> fill_32bit;
        };
diff --git a/src/video_core/color.h b/src/video_core/color.h
index 35da901f2..35b56efc0 100644
--- a/src/video_core/color.h
+++ b/src/video_core/color.h
@@ -101,6 +101,33 @@ inline const Math::Vec4<u8> DecodeRGBA4(const u8* bytes) {
 }
 /**
+ * Decode a depth value stored in D16 format
+ * @param bytes Pointer to encoded source value
+ * @return Depth value as an u32
+ */
+inline const u32 DecodeD16(const u8* bytes) {
+    return *reinterpret_cast<const u16_le*>(bytes);
+}
+/**
+ * Decode a depth value stored in D24 format
+ * @param bytes Pointer to encoded source value
+ * @return Depth value as an u32
+ */
+inline const u32 DecodeD24(const u8* bytes) {
+    return (bytes[2] << 16) | (bytes[1] << 8) | bytes[0];
+}
+/**
+ * Decode a depth value and a stencil value stored in D24S8 format
+ * @param bytes Pointer to encoded source values
+ * @return Resulting values stored as a Math::Vec2
+ */
+inline const Math::Vec2<u32> DecodeD24S8(const u8* bytes) {
+    return { (bytes[2] << 16) | (bytes[1] << 8) | bytes[0], bytes[3] };
+}
+/**
 * Encode a color as RGBA8 format
 * @param color Source color to encode
 * @param bytes Destination pointer to store encoded color
@@ -153,4 +180,34 @@ inline void EncodeRGBA4(const Math::Vec4<u8>& color, u8* bytes) {
        (Convert8To4(color.g()) << 8) | (Convert8To4(color.b()) << 4) | Convert8To4(color.a());
 }
+/**
+ * Encode a depth value as D16 format
+ * @param value Source depth value to encode
+ * @param bytes Pointer where to store the encoded value
+ */
+inline void EncodeD16(u32 value, u8* bytes) {
+    *reinterpret_cast<u16_le*>(bytes) = value & 0xFFFF;
+}
+/**
+ * Encode a depth value as D24 format
+ * @param value Source depth value to encode
+ * @param bytes Pointer where to store the encoded value
+ */
+inline void EncodeD24(u32 value, u8* bytes) {
+    bytes[0] = value & 0xFF;
+    bytes[1] = (value >> 8) & 0xFF;
+    bytes[2] = (value >> 16) & 0xFF;
+}
+/**
+ * Encode depth and stencil values as D24S8 format
+ * @param depth Source depth values to encode
+ * @param stencil Source stencil value to encode
+ * @param bytes Pointer where to store the encoded value
+ */
+inline void EncodeD24S8(u32 depth, u8 stencil, u8* bytes) {
+    *reinterpret_cast<u32_le*>(bytes) = (stencil << 24) | depth;
+}
 } // namespace
diff --git a/src/video_core/pica.h b/src/video_core/pica.h
index b14de9278..6549693f5 100644
--- a/src/video_core/pica.h
+++ b/src/video_core/pica.h
@@ -418,6 +418,13 @@ struct Regs {
            RGBA4    = 4,
        };
+        enum DepthFormat : u32 {
+            D16     = 0,
+        
+            D24     = 2,
+            D24S8   = 3
+        };
        INSERT_PADDING_WORDS(0x6);
        u32 depth_format;
diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp
index 5861c1926..dc32128c6 100644
--- a/src/video_core/rasterizer.cpp
+++ b/src/video_core/rasterizer.cpp
@@ -91,7 +91,7 @@ static const Math::Vec4<u8> GetPixel(int x, int y) {
    }
    return {};
- }
+}
 static u32 GetDepth(int x, int y) {
    const PAddr addr = registers.framebuffer.GetDepthBufferPhysicalAddress();
@@ -100,23 +100,65 @@ static u32 GetDepth(int x, int y) {
    y = (registers.framebuffer.height - y);
    
    const u32 coarse_y = y & ~7;
-    u32 stride = registers.framebuffer.width * 2;
-    // Assuming 16-bit depth buffer format until actual format handling is implemented
+    switch (registers.framebuffer.depth_format) {
-    return *(u16*)(depth_buffer + VideoCore::GetMortonOffset(x, y, 2) + coarse_y * stride);
+        case registers.framebuffer.D16:
+        {
+            u32 stride = registers.framebuffer.width * 2;
+            return Color::DecodeD16(depth_buffer + VideoCore::GetMortonOffset(x, y, 2) + coarse_y * stride);
+        }
+        case registers.framebuffer.D24:
+        {
+            u32 stride = registers.framebuffer.width * 3;
+            u8* address = depth_buffer + VideoCore::GetMortonOffset(x, y, 3) + coarse_y * stride;
+            return Color::DecodeD24(address);
+        }
+        case registers.framebuffer.D24S8:
+        {
+            u32 stride = registers.framebuffer.width * 4;
+            return Color::DecodeD24S8(depth_buffer + VideoCore::GetMortonOffset(x, y, 4) + coarse_y * stride).x;
+        }
+        default:
+            LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", registers.framebuffer.depth_format);
+            UNIMPLEMENTED();
+            return 0;
+    }
 }
-static void SetDepth(int x, int y, u16 value) {
+static void SetDepth(int x, int y, u32 value) {
    const PAddr addr = registers.framebuffer.GetDepthBufferPhysicalAddress();
    u8* depth_buffer = Memory::GetPointer(PAddrToVAddr(addr));
    y = (registers.framebuffer.height - y);
    const u32 coarse_y = y & ~7;
-    u32 stride = registers.framebuffer.width * 2;
-    // Assuming 16-bit depth buffer format until actual format handling is implemented
+    switch (registers.framebuffer.depth_format) {
-    *(u16*)(depth_buffer + VideoCore::GetMortonOffset(x, y, 2) + coarse_y * stride) = value;
+        case registers.framebuffer.D16:
+        {
+            u32 stride = registers.framebuffer.width * 2;
+            Color::EncodeD16(value, depth_buffer + VideoCore::GetMortonOffset(x, y, 2) + coarse_y * stride);
+            break;
+        }
+        case registers.framebuffer.D24:
+        {
+            u32 stride = registers.framebuffer.width * 3;
+            u8* address = depth_buffer + VideoCore::GetMortonOffset(x, y, 3) + coarse_y * stride;
+            Color::EncodeD24(value, address);
+            break;
+        }
+        case registers.framebuffer.D24S8:
+        {
+            u32 stride = registers.framebuffer.width * 4;
+            // TODO(Subv): Implement the stencil buffer
+            Color::EncodeD24S8(value, 0, depth_buffer + VideoCore::GetMortonOffset(x, y, 4) + coarse_y * stride);
+            break;
+        }
+        default:
+            LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", registers.framebuffer.depth_format);
+            UNIMPLEMENTED();
+            break;
+    }
 }
 // NOTE: Assuming that rasterizer coordinates are 12.4 fixed-point values
@@ -595,7 +637,7 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0,
                u16 z = (u16)((v0.screenpos[2].ToFloat32() * w0 +
                            v1.screenpos[2].ToFloat32() * w1 +
                            v2.screenpos[2].ToFloat32() * w2) * 65535.f / wsum);
-                u16 ref_z = GetDepth(x >> 4, y >> 4);
+                u32 ref_z = GetDepth(x >> 4, y >> 4);
                bool pass = false;
author	Subv	2015-03-08 12:05:17 -0500
committer	Subv	2015-03-09 20:12:39 -0500
commit	414b0741c445a7960f9ad1ee4a5672f8af4760db (patch)
tree	5218b976cf8e15e745a9cf6037a7c133b7559fed /src
parent	GPU/Textures: Fixed ETC texture decoding. (diff)
download	yuzu-414b0741c445a7960f9ad1ee4a5672f8af4760db.tar.gz yuzu-414b0741c445a7960f9ad1ee4a5672f8af4760db.tar.xz yuzu-414b0741c445a7960f9ad1ee4a5672f8af4760db.zip

diff --git a/src/core/hw/gpu.cpp b/src/core/hw/gpu.cpp index 424ce2ca7..b7102b874 100644 --- a/src/core/hw/gpu.cpp +++ b/src/core/hw/gpu.cpp
@@ -81,9 +81,9 @@ inline void Write(u32 addr, const T data) {
81	if (config.fill_24bit) {	81	if (config.fill_24bit) {
82	// fill with 24-bit values	82	// fill with 24-bit values
83	for (u8* ptr = start; ptr < end; ptr += 3) {	83	for (u8* ptr = start; ptr < end; ptr += 3) {
84	ptr[0] = config.value_24bit_b;	84	ptr[0] = config.value_24bit_r;
85	ptr[1] = config.value_24bit_g;	85	ptr[1] = config.value_24bit_g;
86	ptr[2] = config.value_24bit_r;	86	ptr[2] = config.value_24bit_b;
87	}	87	}
88	} else if (config.fill_32bit) {	88	} else if (config.fill_32bit) {
89	// fill with 32-bit values	89	// fill with 32-bit values


diff --git a/src/core/hw/gpu.h b/src/core/hw/gpu.h index 737b1e968..5ca4a5450 100644 --- a/src/core/hw/gpu.h +++ b/src/core/hw/gpu.h
@@ -100,10 +100,10 @@ struct Regs {
100	// Set to 1 upon completion.	100	// Set to 1 upon completion.
101	BitField<0, 1, u32> finished;	101	BitField<0, 1, u32> finished;
102		102
103	// 0: fill with 16- or 32-bit wide values; 1: fill with 24-bit wide values	103	// If both of these bits are unset, then it will fill the memory with a 16 bit value
		104	// 1: fill with 24-bit wide values
104	BitField<8, 1, u32> fill_24bit;	105	BitField<8, 1, u32> fill_24bit;
105		106	// 1: fill with 32-bit wide values
106	// 0: fill with 16-bit wide values; 1: fill with 32-bit wide values
107	BitField<9, 1, u32> fill_32bit;	107	BitField<9, 1, u32> fill_32bit;
108	};	108	};
109		109


diff --git a/src/video_core/color.h b/src/video_core/color.h index 35da901f2..35b56efc0 100644 --- a/src/video_core/color.h +++ b/src/video_core/color.h
@@ -101,6 +101,33 @@ inline const Math::Vec4<u8> DecodeRGBA4(const u8* bytes) {
101	}	101	}
102		102
103	/**	103	/**
		104	* Decode a depth value stored in D16 format
		105	* @param bytes Pointer to encoded source value
		106	* @return Depth value as an u32
		107	*/
		108	inline const u32 DecodeD16(const u8* bytes) {
		109	return reinterpret_cast<const u16_le>(bytes);
		110	}
		111
		112	/**
		113	* Decode a depth value stored in D24 format
		114	* @param bytes Pointer to encoded source value
		115	* @return Depth value as an u32
		116	*/
		117	inline const u32 DecodeD24(const u8* bytes) {
		118	return (bytes[2] << 16) \| (bytes[1] << 8) \| bytes[0];
		119	}
		120
		121	/**
		122	* Decode a depth value and a stencil value stored in D24S8 format
		123	* @param bytes Pointer to encoded source values
		124	* @return Resulting values stored as a Math::Vec2
		125	*/
		126	inline const Math::Vec2<u32> DecodeD24S8(const u8* bytes) {
		127	return { (bytes[2] << 16) \| (bytes[1] << 8) \| bytes[0], bytes[3] };
		128	}
		129
		130	/**
104	* Encode a color as RGBA8 format	131	* Encode a color as RGBA8 format
105	* @param color Source color to encode	132	* @param color Source color to encode
106	* @param bytes Destination pointer to store encoded color	133	* @param bytes Destination pointer to store encoded color
@@ -153,4 +180,34 @@ inline void EncodeRGBA4(const Math::Vec4<u8>& color, u8* bytes) {
153	(Convert8To4(color.g()) << 8) \| (Convert8To4(color.b()) << 4) \| Convert8To4(color.a());	180	(Convert8To4(color.g()) << 8) \| (Convert8To4(color.b()) << 4) \| Convert8To4(color.a());
154	}	181	}
155		182
		183	/**
		184	* Encode a depth value as D16 format
		185	* @param value Source depth value to encode
		186	* @param bytes Pointer where to store the encoded value
		187	*/
		188	inline void EncodeD16(u32 value, u8* bytes) {
		189	reinterpret_cast<u16_le>(bytes) = value & 0xFFFF;
		190	}
		191
		192	/**
		193	* Encode a depth value as D24 format
		194	* @param value Source depth value to encode
		195	* @param bytes Pointer where to store the encoded value
		196	*/
		197	inline void EncodeD24(u32 value, u8* bytes) {
		198	bytes[0] = value & 0xFF;
		199	bytes[1] = (value >> 8) & 0xFF;
		200	bytes[2] = (value >> 16) & 0xFF;
		201	}
		202
		203	/**
		204	* Encode depth and stencil values as D24S8 format
		205	* @param depth Source depth values to encode
		206	* @param stencil Source stencil value to encode
		207	* @param bytes Pointer where to store the encoded value
		208	*/
		209	inline void EncodeD24S8(u32 depth, u8 stencil, u8* bytes) {
		210	reinterpret_cast<u32_le>(bytes) = (stencil << 24) \| depth;
		211	}
		212
156	} // namespace	213	} // namespace


diff --git a/src/video_core/pica.h b/src/video_core/pica.h index b14de9278..6549693f5 100644 --- a/src/video_core/pica.h +++ b/src/video_core/pica.h
@@ -418,6 +418,13 @@ struct Regs {
418	RGBA4 = 4,	418	RGBA4 = 4,
419	};	419	};
420		420
		421	enum DepthFormat : u32 {
		422	D16 = 0,
		423
		424	D24 = 2,
		425	D24S8 = 3
		426	};
		427
421	INSERT_PADDING_WORDS(0x6);	428	INSERT_PADDING_WORDS(0x6);
422		429
423	u32 depth_format;	430	u32 depth_format;


diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp index 5861c1926..dc32128c6 100644 --- a/src/video_core/rasterizer.cpp +++ b/src/video_core/rasterizer.cpp
@@ -91,7 +91,7 @@ static const Math::Vec4<u8> GetPixel(int x, int y) {
91	}	91	}
92		92
93	return {};	93	return {};
94	}	94	}
95		95
96	static u32 GetDepth(int x, int y) {	96	static u32 GetDepth(int x, int y) {
97	const PAddr addr = registers.framebuffer.GetDepthBufferPhysicalAddress();	97	const PAddr addr = registers.framebuffer.GetDepthBufferPhysicalAddress();
@@ -100,23 +100,65 @@ static u32 GetDepth(int x, int y) {
100	y = (registers.framebuffer.height - y);	100	y = (registers.framebuffer.height - y);
101		101
102	const u32 coarse_y = y & ~7;	102	const u32 coarse_y = y & ~7;
103	u32 stride = registers.framebuffer.width * 2;
104		103
105	// Assuming 16-bit depth buffer format until actual format handling is implemented	104	switch (registers.framebuffer.depth_format) {
106	return (u16)(depth_buffer + VideoCore::GetMortonOffset(x, y, 2) + coarse_y * stride);	105	case registers.framebuffer.D16:
		106	{
		107	u32 stride = registers.framebuffer.width * 2;
		108	return Color::DecodeD16(depth_buffer + VideoCore::GetMortonOffset(x, y, 2) + coarse_y * stride);
		109	}
		110	case registers.framebuffer.D24:
		111	{
		112	u32 stride = registers.framebuffer.width * 3;
		113	u8* address = depth_buffer + VideoCore::GetMortonOffset(x, y, 3) + coarse_y * stride;
		114	return Color::DecodeD24(address);
		115	}
		116	case registers.framebuffer.D24S8:
		117	{
		118	u32 stride = registers.framebuffer.width * 4;
		119	return Color::DecodeD24S8(depth_buffer + VideoCore::GetMortonOffset(x, y, 4) + coarse_y * stride).x;
		120	}
		121	default:
		122	LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", registers.framebuffer.depth_format);
		123	UNIMPLEMENTED();
		124	return 0;
		125	}
107	}	126	}
108		127
109	static void SetDepth(int x, int y, u16 value) {	128	static void SetDepth(int x, int y, u32 value) {
110	const PAddr addr = registers.framebuffer.GetDepthBufferPhysicalAddress();	129	const PAddr addr = registers.framebuffer.GetDepthBufferPhysicalAddress();
111	u8* depth_buffer = Memory::GetPointer(PAddrToVAddr(addr));	130	u8* depth_buffer = Memory::GetPointer(PAddrToVAddr(addr));
112		131
113	y = (registers.framebuffer.height - y);	132	y = (registers.framebuffer.height - y);
114		133
115	const u32 coarse_y = y & ~7;	134	const u32 coarse_y = y & ~7;
116	u32 stride = registers.framebuffer.width * 2;
117		135
118	// Assuming 16-bit depth buffer format until actual format handling is implemented	136	switch (registers.framebuffer.depth_format) {
119	(u16)(depth_buffer + VideoCore::GetMortonOffset(x, y, 2) + coarse_y * stride) = value;	137	case registers.framebuffer.D16:
		138	{
		139	u32 stride = registers.framebuffer.width * 2;
		140	Color::EncodeD16(value, depth_buffer + VideoCore::GetMortonOffset(x, y, 2) + coarse_y * stride);
		141	break;
		142	}
		143	case registers.framebuffer.D24:
		144	{
		145	u32 stride = registers.framebuffer.width * 3;
		146	u8* address = depth_buffer + VideoCore::GetMortonOffset(x, y, 3) + coarse_y * stride;
		147	Color::EncodeD24(value, address);
		148	break;
		149	}
		150	case registers.framebuffer.D24S8:
		151	{
		152	u32 stride = registers.framebuffer.width * 4;
		153	// TODO(Subv): Implement the stencil buffer
		154	Color::EncodeD24S8(value, 0, depth_buffer + VideoCore::GetMortonOffset(x, y, 4) + coarse_y * stride);
		155	break;
		156	}
		157	default:
		158	LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", registers.framebuffer.depth_format);
		159	UNIMPLEMENTED();
		160	break;
		161	}
120	}	162	}
121		163
122	// NOTE: Assuming that rasterizer coordinates are 12.4 fixed-point values	164	// NOTE: Assuming that rasterizer coordinates are 12.4 fixed-point values
@@ -595,7 +637,7 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0,
595	u16 z = (u16)((v0.screenpos[2].ToFloat32() * w0 +	637	u16 z = (u16)((v0.screenpos[2].ToFloat32() * w0 +
596	v1.screenpos[2].ToFloat32() * w1 +	638	v1.screenpos[2].ToFloat32() * w1 +
597	v2.screenpos[2].ToFloat32() * w2) * 65535.f / wsum);	639	v2.screenpos[2].ToFloat32() * w2) * 65535.f / wsum);
598	u16 ref_z = GetDepth(x >> 4, y >> 4);	640	u32 ref_z = GetDepth(x >> 4, y >> 4);
599		641
600	bool pass = false;	642	bool pass = false;
601		643