summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGravatar Subv2015-03-08 12:05:17 -0500
committerGravatar Subv2015-03-09 20:12:39 -0500
commit414b0741c445a7960f9ad1ee4a5672f8af4760db (patch)
tree5218b976cf8e15e745a9cf6037a7c133b7559fed /src
parentGPU/Textures: Fixed ETC texture decoding. (diff)
downloadyuzu-414b0741c445a7960f9ad1ee4a5672f8af4760db.tar.gz
yuzu-414b0741c445a7960f9ad1ee4a5672f8af4760db.tar.xz
yuzu-414b0741c445a7960f9ad1ee4a5672f8af4760db.zip
GPU: Implemented more depth buffer formats.
This fixes the horizontal lines in Picross E, Cubic Ninja, Cave Story 3D and possibly others
Diffstat (limited to 'src')
-rw-r--r--src/core/hw/gpu.cpp4
-rw-r--r--src/core/hw/gpu.h6
-rw-r--r--src/video_core/color.h57
-rw-r--r--src/video_core/pica.h7
-rw-r--r--src/video_core/rasterizer.cpp60
5 files changed, 120 insertions, 14 deletions
diff --git a/src/core/hw/gpu.cpp b/src/core/hw/gpu.cpp
index 424ce2ca7..b7102b874 100644
--- a/src/core/hw/gpu.cpp
+++ b/src/core/hw/gpu.cpp
@@ -81,9 +81,9 @@ inline void Write(u32 addr, const T data) {
81 if (config.fill_24bit) { 81 if (config.fill_24bit) {
82 // fill with 24-bit values 82 // fill with 24-bit values
83 for (u8* ptr = start; ptr < end; ptr += 3) { 83 for (u8* ptr = start; ptr < end; ptr += 3) {
84 ptr[0] = config.value_24bit_b; 84 ptr[0] = config.value_24bit_r;
85 ptr[1] = config.value_24bit_g; 85 ptr[1] = config.value_24bit_g;
86 ptr[2] = config.value_24bit_r; 86 ptr[2] = config.value_24bit_b;
87 } 87 }
88 } else if (config.fill_32bit) { 88 } else if (config.fill_32bit) {
89 // fill with 32-bit values 89 // fill with 32-bit values
diff --git a/src/core/hw/gpu.h b/src/core/hw/gpu.h
index 737b1e968..5ca4a5450 100644
--- a/src/core/hw/gpu.h
+++ b/src/core/hw/gpu.h
@@ -100,10 +100,10 @@ struct Regs {
100 // Set to 1 upon completion. 100 // Set to 1 upon completion.
101 BitField<0, 1, u32> finished; 101 BitField<0, 1, u32> finished;
102 102
103 // 0: fill with 16- or 32-bit wide values; 1: fill with 24-bit wide values 103 // If both of these bits are unset, then it will fill the memory with a 16 bit value
104 // 1: fill with 24-bit wide values
104 BitField<8, 1, u32> fill_24bit; 105 BitField<8, 1, u32> fill_24bit;
105 106 // 1: fill with 32-bit wide values
106 // 0: fill with 16-bit wide values; 1: fill with 32-bit wide values
107 BitField<9, 1, u32> fill_32bit; 107 BitField<9, 1, u32> fill_32bit;
108 }; 108 };
109 109
diff --git a/src/video_core/color.h b/src/video_core/color.h
index 35da901f2..35b56efc0 100644
--- a/src/video_core/color.h
+++ b/src/video_core/color.h
@@ -101,6 +101,33 @@ inline const Math::Vec4<u8> DecodeRGBA4(const u8* bytes) {
101} 101}
102 102
103/** 103/**
104 * Decode a depth value stored in D16 format
105 * @param bytes Pointer to encoded source value
106 * @return Depth value as an u32
107 */
108inline const u32 DecodeD16(const u8* bytes) {
109 return *reinterpret_cast<const u16_le*>(bytes);
110}
111
112/**
113 * Decode a depth value stored in D24 format
114 * @param bytes Pointer to encoded source value
115 * @return Depth value as an u32
116 */
117inline const u32 DecodeD24(const u8* bytes) {
118 return (bytes[2] << 16) | (bytes[1] << 8) | bytes[0];
119}
120
121/**
122 * Decode a depth value and a stencil value stored in D24S8 format
123 * @param bytes Pointer to encoded source values
124 * @return Resulting values stored as a Math::Vec2
125 */
126inline const Math::Vec2<u32> DecodeD24S8(const u8* bytes) {
127 return { (bytes[2] << 16) | (bytes[1] << 8) | bytes[0], bytes[3] };
128}
129
130/**
104 * Encode a color as RGBA8 format 131 * Encode a color as RGBA8 format
105 * @param color Source color to encode 132 * @param color Source color to encode
106 * @param bytes Destination pointer to store encoded color 133 * @param bytes Destination pointer to store encoded color
@@ -153,4 +180,34 @@ inline void EncodeRGBA4(const Math::Vec4<u8>& color, u8* bytes) {
153 (Convert8To4(color.g()) << 8) | (Convert8To4(color.b()) << 4) | Convert8To4(color.a()); 180 (Convert8To4(color.g()) << 8) | (Convert8To4(color.b()) << 4) | Convert8To4(color.a());
154} 181}
155 182
183/**
184 * Encode a depth value as D16 format
185 * @param value Source depth value to encode
186 * @param bytes Pointer where to store the encoded value
187 */
188inline void EncodeD16(u32 value, u8* bytes) {
189 *reinterpret_cast<u16_le*>(bytes) = value & 0xFFFF;
190}
191
192/**
193 * Encode a depth value as D24 format
194 * @param value Source depth value to encode
195 * @param bytes Pointer where to store the encoded value
196 */
197inline void EncodeD24(u32 value, u8* bytes) {
198 bytes[0] = value & 0xFF;
199 bytes[1] = (value >> 8) & 0xFF;
200 bytes[2] = (value >> 16) & 0xFF;
201}
202
203/**
204 * Encode depth and stencil values as D24S8 format
205 * @param depth Source depth values to encode
206 * @param stencil Source stencil value to encode
207 * @param bytes Pointer where to store the encoded value
208 */
209inline void EncodeD24S8(u32 depth, u8 stencil, u8* bytes) {
210 *reinterpret_cast<u32_le*>(bytes) = (stencil << 24) | depth;
211}
212
156} // namespace 213} // namespace
diff --git a/src/video_core/pica.h b/src/video_core/pica.h
index b14de9278..6549693f5 100644
--- a/src/video_core/pica.h
+++ b/src/video_core/pica.h
@@ -418,6 +418,13 @@ struct Regs {
418 RGBA4 = 4, 418 RGBA4 = 4,
419 }; 419 };
420 420
421 enum DepthFormat : u32 {
422 D16 = 0,
423
424 D24 = 2,
425 D24S8 = 3
426 };
427
421 INSERT_PADDING_WORDS(0x6); 428 INSERT_PADDING_WORDS(0x6);
422 429
423 u32 depth_format; 430 u32 depth_format;
diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp
index 5861c1926..dc32128c6 100644
--- a/src/video_core/rasterizer.cpp
+++ b/src/video_core/rasterizer.cpp
@@ -91,7 +91,7 @@ static const Math::Vec4<u8> GetPixel(int x, int y) {
91 } 91 }
92 92
93 return {}; 93 return {};
94 } 94}
95 95
96static u32 GetDepth(int x, int y) { 96static u32 GetDepth(int x, int y) {
97 const PAddr addr = registers.framebuffer.GetDepthBufferPhysicalAddress(); 97 const PAddr addr = registers.framebuffer.GetDepthBufferPhysicalAddress();
@@ -100,23 +100,65 @@ static u32 GetDepth(int x, int y) {
100 y = (registers.framebuffer.height - y); 100 y = (registers.framebuffer.height - y);
101 101
102 const u32 coarse_y = y & ~7; 102 const u32 coarse_y = y & ~7;
103 u32 stride = registers.framebuffer.width * 2;
104 103
105 // Assuming 16-bit depth buffer format until actual format handling is implemented 104 switch (registers.framebuffer.depth_format) {
106 return *(u16*)(depth_buffer + VideoCore::GetMortonOffset(x, y, 2) + coarse_y * stride); 105 case registers.framebuffer.D16:
106 {
107 u32 stride = registers.framebuffer.width * 2;
108 return Color::DecodeD16(depth_buffer + VideoCore::GetMortonOffset(x, y, 2) + coarse_y * stride);
109 }
110 case registers.framebuffer.D24:
111 {
112 u32 stride = registers.framebuffer.width * 3;
113 u8* address = depth_buffer + VideoCore::GetMortonOffset(x, y, 3) + coarse_y * stride;
114 return Color::DecodeD24(address);
115 }
116 case registers.framebuffer.D24S8:
117 {
118 u32 stride = registers.framebuffer.width * 4;
119 return Color::DecodeD24S8(depth_buffer + VideoCore::GetMortonOffset(x, y, 4) + coarse_y * stride).x;
120 }
121 default:
122 LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", registers.framebuffer.depth_format);
123 UNIMPLEMENTED();
124 return 0;
125 }
107} 126}
108 127
109static void SetDepth(int x, int y, u16 value) { 128static void SetDepth(int x, int y, u32 value) {
110 const PAddr addr = registers.framebuffer.GetDepthBufferPhysicalAddress(); 129 const PAddr addr = registers.framebuffer.GetDepthBufferPhysicalAddress();
111 u8* depth_buffer = Memory::GetPointer(PAddrToVAddr(addr)); 130 u8* depth_buffer = Memory::GetPointer(PAddrToVAddr(addr));
112 131
113 y = (registers.framebuffer.height - y); 132 y = (registers.framebuffer.height - y);
114 133
115 const u32 coarse_y = y & ~7; 134 const u32 coarse_y = y & ~7;
116 u32 stride = registers.framebuffer.width * 2;
117 135
118 // Assuming 16-bit depth buffer format until actual format handling is implemented 136 switch (registers.framebuffer.depth_format) {
119 *(u16*)(depth_buffer + VideoCore::GetMortonOffset(x, y, 2) + coarse_y * stride) = value; 137 case registers.framebuffer.D16:
138 {
139 u32 stride = registers.framebuffer.width * 2;
140 Color::EncodeD16(value, depth_buffer + VideoCore::GetMortonOffset(x, y, 2) + coarse_y * stride);
141 break;
142 }
143 case registers.framebuffer.D24:
144 {
145 u32 stride = registers.framebuffer.width * 3;
146 u8* address = depth_buffer + VideoCore::GetMortonOffset(x, y, 3) + coarse_y * stride;
147 Color::EncodeD24(value, address);
148 break;
149 }
150 case registers.framebuffer.D24S8:
151 {
152 u32 stride = registers.framebuffer.width * 4;
153 // TODO(Subv): Implement the stencil buffer
154 Color::EncodeD24S8(value, 0, depth_buffer + VideoCore::GetMortonOffset(x, y, 4) + coarse_y * stride);
155 break;
156 }
157 default:
158 LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", registers.framebuffer.depth_format);
159 UNIMPLEMENTED();
160 break;
161 }
120} 162}
121 163
122// NOTE: Assuming that rasterizer coordinates are 12.4 fixed-point values 164// NOTE: Assuming that rasterizer coordinates are 12.4 fixed-point values
@@ -595,7 +637,7 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0,
595 u16 z = (u16)((v0.screenpos[2].ToFloat32() * w0 + 637 u16 z = (u16)((v0.screenpos[2].ToFloat32() * w0 +
596 v1.screenpos[2].ToFloat32() * w1 + 638 v1.screenpos[2].ToFloat32() * w1 +
597 v2.screenpos[2].ToFloat32() * w2) * 65535.f / wsum); 639 v2.screenpos[2].ToFloat32() * w2) * 65535.f / wsum);
598 u16 ref_z = GetDepth(x >> 4, y >> 4); 640 u32 ref_z = GetDepth(x >> 4, y >> 4);
599 641
600 bool pass = false; 642 bool pass = false;
601 643