diff options
| author | 2015-03-09 22:06:30 -0400 | |
|---|---|---|
| committer | 2015-03-09 22:06:30 -0400 | |
| commit | ec5bc54575c03bed67e712a0508ee55c06ec652c (patch) | |
| tree | 0b12d5fe048826ab8411b2ebc38e9200f44c35ab /src/video_core | |
| parent | Merge pull request #647 from neobrain/rip_culling_hack (diff) | |
| parent | GPU: Added the stencil test structure to the Pica Regs struct. (diff) | |
| download | yuzu-ec5bc54575c03bed67e712a0508ee55c06ec652c.tar.gz yuzu-ec5bc54575c03bed67e712a0508ee55c06ec652c.tar.xz yuzu-ec5bc54575c03bed67e712a0508ee55c06ec652c.zip | |
Merge pull request #643 from Subv/dem_feels
GPU: Implemented more depth buffer formats.
Diffstat (limited to 'src/video_core')
| -rw-r--r-- | src/video_core/color.h | 57 | ||||
| -rw-r--r-- | src/video_core/pica.h | 36 | ||||
| -rw-r--r-- | src/video_core/rasterizer.cpp | 54 |
3 files changed, 134 insertions, 13 deletions
diff --git a/src/video_core/color.h b/src/video_core/color.h index 35da901f2..14ade74f2 100644 --- a/src/video_core/color.h +++ b/src/video_core/color.h | |||
| @@ -101,6 +101,33 @@ inline const Math::Vec4<u8> DecodeRGBA4(const u8* bytes) { | |||
| 101 | } | 101 | } |
| 102 | 102 | ||
| 103 | /** | 103 | /** |
| 104 | * Decode a depth value stored in D16 format | ||
| 105 | * @param bytes Pointer to encoded source value | ||
| 106 | * @return Depth value as an u32 | ||
| 107 | */ | ||
| 108 | inline u32 DecodeD16(const u8* bytes) { | ||
| 109 | return *reinterpret_cast<const u16_le*>(bytes); | ||
| 110 | } | ||
| 111 | |||
| 112 | /** | ||
| 113 | * Decode a depth value stored in D24 format | ||
| 114 | * @param bytes Pointer to encoded source value | ||
| 115 | * @return Depth value as an u32 | ||
| 116 | */ | ||
| 117 | inline u32 DecodeD24(const u8* bytes) { | ||
| 118 | return (bytes[2] << 16) | (bytes[1] << 8) | bytes[0]; | ||
| 119 | } | ||
| 120 | |||
| 121 | /** | ||
| 122 | * Decode a depth value and a stencil value stored in D24S8 format | ||
| 123 | * @param bytes Pointer to encoded source values | ||
| 124 | * @return Resulting values stored as a Math::Vec2 | ||
| 125 | */ | ||
| 126 | inline const Math::Vec2<u32> DecodeD24S8(const u8* bytes) { | ||
| 127 | return { (bytes[2] << 16) | (bytes[1] << 8) | bytes[0], bytes[3] }; | ||
| 128 | } | ||
| 129 | |||
| 130 | /** | ||
| 104 | * Encode a color as RGBA8 format | 131 | * Encode a color as RGBA8 format |
| 105 | * @param color Source color to encode | 132 | * @param color Source color to encode |
| 106 | * @param bytes Destination pointer to store encoded color | 133 | * @param bytes Destination pointer to store encoded color |
| @@ -153,4 +180,34 @@ inline void EncodeRGBA4(const Math::Vec4<u8>& color, u8* bytes) { | |||
| 153 | (Convert8To4(color.g()) << 8) | (Convert8To4(color.b()) << 4) | Convert8To4(color.a()); | 180 | (Convert8To4(color.g()) << 8) | (Convert8To4(color.b()) << 4) | Convert8To4(color.a()); |
| 154 | } | 181 | } |
| 155 | 182 | ||
| 183 | /** | ||
| 184 | * Encode a 16 bit depth value as D16 format | ||
| 185 | * @param value 16 bit source depth value to encode | ||
| 186 | * @param bytes Pointer where to store the encoded value | ||
| 187 | */ | ||
| 188 | inline void EncodeD16(u32 value, u8* bytes) { | ||
| 189 | *reinterpret_cast<u16_le*>(bytes) = value & 0xFFFF; | ||
| 190 | } | ||
| 191 | |||
| 192 | /** | ||
| 193 | * Encode a 24 bit depth value as D24 format | ||
| 194 | * @param value 24 bit source depth value to encode | ||
| 195 | * @param bytes Pointer where to store the encoded value | ||
| 196 | */ | ||
| 197 | inline void EncodeD24(u32 value, u8* bytes) { | ||
| 198 | bytes[0] = value & 0xFF; | ||
| 199 | bytes[1] = (value >> 8) & 0xFF; | ||
| 200 | bytes[2] = (value >> 16) & 0xFF; | ||
| 201 | } | ||
| 202 | |||
| 203 | /** | ||
| 204 | * Encode a 24 bit depth and 8 bit stencil values as D24S8 format | ||
| 205 | * @param depth 24 bit source depth value to encode | ||
| 206 | * @param stencil 8 bit source stencil value to encode | ||
| 207 | * @param bytes Pointer where to store the encoded value | ||
| 208 | */ | ||
| 209 | inline void EncodeD24S8(u32 depth, u8 stencil, u8* bytes) { | ||
| 210 | *reinterpret_cast<u32_le*>(bytes) = (stencil << 24) | depth; | ||
| 211 | } | ||
| 212 | |||
| 156 | } // namespace | 213 | } // namespace |
diff --git a/src/video_core/pica.h b/src/video_core/pica.h index b14de9278..fe20cd77d 100644 --- a/src/video_core/pica.h +++ b/src/video_core/pica.h | |||
| @@ -393,7 +393,15 @@ struct Regs { | |||
| 393 | BitField< 8, 8, u32> ref; | 393 | BitField< 8, 8, u32> ref; |
| 394 | } alpha_test; | 394 | } alpha_test; |
| 395 | 395 | ||
| 396 | INSERT_PADDING_WORDS(0x2); | 396 | union { |
| 397 | BitField< 0, 1, u32> stencil_test_enable; | ||
| 398 | BitField< 4, 3, CompareFunc> stencil_test_func; | ||
| 399 | BitField< 8, 8, u32> stencil_replacement_value; | ||
| 400 | BitField<16, 8, u32> stencil_reference_value; | ||
| 401 | BitField<24, 8, u32> stencil_mask; | ||
| 402 | } stencil_test; | ||
| 403 | |||
| 404 | INSERT_PADDING_WORDS(0x1); | ||
| 397 | 405 | ||
| 398 | union { | 406 | union { |
| 399 | BitField< 0, 1, u32> depth_test_enable; | 407 | BitField< 0, 1, u32> depth_test_enable; |
| @@ -408,6 +416,30 @@ struct Regs { | |||
| 408 | INSERT_PADDING_WORDS(0x8); | 416 | INSERT_PADDING_WORDS(0x8); |
| 409 | } output_merger; | 417 | } output_merger; |
| 410 | 418 | ||
| 419 | enum DepthFormat : u32 { | ||
| 420 | D16 = 0, | ||
| 421 | |||
| 422 | D24 = 2, | ||
| 423 | D24S8 = 3 | ||
| 424 | }; | ||
| 425 | |||
| 426 | /* | ||
| 427 | * Returns the number of bytes in the specified depth format | ||
| 428 | */ | ||
| 429 | static u32 BytesPerDepthPixel(DepthFormat format) { | ||
| 430 | switch (format) { | ||
| 431 | case DepthFormat::D16: | ||
| 432 | return 2; | ||
| 433 | case DepthFormat::D24: | ||
| 434 | return 3; | ||
| 435 | case DepthFormat::D24S8: | ||
| 436 | return 4; | ||
| 437 | default: | ||
| 438 | LOG_CRITICAL(HW_GPU, "Unknown depth format %u", format); | ||
| 439 | UNIMPLEMENTED(); | ||
| 440 | } | ||
| 441 | } | ||
| 442 | |||
| 411 | struct { | 443 | struct { |
| 412 | // Components are laid out in reverse byte order, most significant bits first. | 444 | // Components are laid out in reverse byte order, most significant bits first. |
| 413 | enum ColorFormat : u32 { | 445 | enum ColorFormat : u32 { |
| @@ -420,7 +452,7 @@ struct Regs { | |||
| 420 | 452 | ||
| 421 | INSERT_PADDING_WORDS(0x6); | 453 | INSERT_PADDING_WORDS(0x6); |
| 422 | 454 | ||
| 423 | u32 depth_format; | 455 | DepthFormat depth_format; |
| 424 | BitField<16, 3, u32> color_format; | 456 | BitField<16, 3, u32> color_format; |
| 425 | 457 | ||
| 426 | INSERT_PADDING_WORDS(0x4); | 458 | INSERT_PADDING_WORDS(0x4); |
diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp index 5861c1926..dd46f0ec3 100644 --- a/src/video_core/rasterizer.cpp +++ b/src/video_core/rasterizer.cpp | |||
| @@ -91,7 +91,7 @@ static const Math::Vec4<u8> GetPixel(int x, int y) { | |||
| 91 | } | 91 | } |
| 92 | 92 | ||
| 93 | return {}; | 93 | return {}; |
| 94 | } | 94 | } |
| 95 | 95 | ||
| 96 | static u32 GetDepth(int x, int y) { | 96 | static u32 GetDepth(int x, int y) { |
| 97 | const PAddr addr = registers.framebuffer.GetDepthBufferPhysicalAddress(); | 97 | const PAddr addr = registers.framebuffer.GetDepthBufferPhysicalAddress(); |
| @@ -100,23 +100,55 @@ static u32 GetDepth(int x, int y) { | |||
| 100 | y = (registers.framebuffer.height - y); | 100 | y = (registers.framebuffer.height - y); |
| 101 | 101 | ||
| 102 | const u32 coarse_y = y & ~7; | 102 | const u32 coarse_y = y & ~7; |
| 103 | u32 stride = registers.framebuffer.width * 2; | 103 | u32 bytes_per_pixel = Pica::Regs::BytesPerDepthPixel(registers.framebuffer.depth_format); |
| 104 | 104 | u32 stride = registers.framebuffer.width * bytes_per_pixel; | |
| 105 | // Assuming 16-bit depth buffer format until actual format handling is implemented | 105 | |
| 106 | return *(u16*)(depth_buffer + VideoCore::GetMortonOffset(x, y, 2) + coarse_y * stride); | 106 | u32 src_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * stride; |
| 107 | u8* src_pixel = depth_buffer + src_offset; | ||
| 108 | |||
| 109 | switch (registers.framebuffer.depth_format) { | ||
| 110 | case Pica::Regs::DepthFormat::D16: | ||
| 111 | return Color::DecodeD16(src_pixel); | ||
| 112 | case Pica::Regs::DepthFormat::D24: | ||
| 113 | return Color::DecodeD24(src_pixel); | ||
| 114 | case Pica::Regs::DepthFormat::D24S8: | ||
| 115 | return Color::DecodeD24S8(src_pixel).x; | ||
| 116 | default: | ||
| 117 | LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", registers.framebuffer.depth_format); | ||
| 118 | UNIMPLEMENTED(); | ||
| 119 | return 0; | ||
| 120 | } | ||
| 107 | } | 121 | } |
| 108 | 122 | ||
| 109 | static void SetDepth(int x, int y, u16 value) { | 123 | static void SetDepth(int x, int y, u32 value) { |
| 110 | const PAddr addr = registers.framebuffer.GetDepthBufferPhysicalAddress(); | 124 | const PAddr addr = registers.framebuffer.GetDepthBufferPhysicalAddress(); |
| 111 | u8* depth_buffer = Memory::GetPointer(PAddrToVAddr(addr)); | 125 | u8* depth_buffer = Memory::GetPointer(PAddrToVAddr(addr)); |
| 112 | 126 | ||
| 113 | y = (registers.framebuffer.height - y); | 127 | y = (registers.framebuffer.height - y); |
| 114 | 128 | ||
| 115 | const u32 coarse_y = y & ~7; | 129 | const u32 coarse_y = y & ~7; |
| 116 | u32 stride = registers.framebuffer.width * 2; | 130 | u32 bytes_per_pixel = Pica::Regs::BytesPerDepthPixel(registers.framebuffer.depth_format); |
| 117 | 131 | u32 stride = registers.framebuffer.width * bytes_per_pixel; | |
| 118 | // Assuming 16-bit depth buffer format until actual format handling is implemented | 132 | |
| 119 | *(u16*)(depth_buffer + VideoCore::GetMortonOffset(x, y, 2) + coarse_y * stride) = value; | 133 | u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * stride; |
| 134 | u8* dst_pixel = depth_buffer + dst_offset; | ||
| 135 | |||
| 136 | switch (registers.framebuffer.depth_format) { | ||
| 137 | case Pica::Regs::DepthFormat::D16: | ||
| 138 | Color::EncodeD16(value, dst_pixel); | ||
| 139 | break; | ||
| 140 | case Pica::Regs::DepthFormat::D24: | ||
| 141 | Color::EncodeD24(value, dst_pixel); | ||
| 142 | break; | ||
| 143 | case Pica::Regs::DepthFormat::D24S8: | ||
| 144 | // TODO(Subv): Implement the stencil buffer | ||
| 145 | Color::EncodeD24S8(value, 0, dst_pixel); | ||
| 146 | break; | ||
| 147 | default: | ||
| 148 | LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", registers.framebuffer.depth_format); | ||
| 149 | UNIMPLEMENTED(); | ||
| 150 | break; | ||
| 151 | } | ||
| 120 | } | 152 | } |
| 121 | 153 | ||
| 122 | // NOTE: Assuming that rasterizer coordinates are 12.4 fixed-point values | 154 | // NOTE: Assuming that rasterizer coordinates are 12.4 fixed-point values |
| @@ -595,7 +627,7 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0, | |||
| 595 | u16 z = (u16)((v0.screenpos[2].ToFloat32() * w0 + | 627 | u16 z = (u16)((v0.screenpos[2].ToFloat32() * w0 + |
| 596 | v1.screenpos[2].ToFloat32() * w1 + | 628 | v1.screenpos[2].ToFloat32() * w1 + |
| 597 | v2.screenpos[2].ToFloat32() * w2) * 65535.f / wsum); | 629 | v2.screenpos[2].ToFloat32() * w2) * 65535.f / wsum); |
| 598 | u16 ref_z = GetDepth(x >> 4, y >> 4); | 630 | u32 ref_z = GetDepth(x >> 4, y >> 4); |
| 599 | 631 | ||
| 600 | bool pass = false; | 632 | bool pass = false; |
| 601 | 633 | ||