Merge pull request #643 from Subv/dem_feels

GPU: Implemented more depth buffer formats.
author: bunnei 2015-03-09 22:06:30 -0400
committer: bunnei 2015-03-09 22:06:30 -0400
commit: ec5bc54575c03bed67e712a0508ee55c06ec652c (patch)
tree: 0b12d5fe048826ab8411b2ebc38e9200f44c35ab /src
parent: Merge pull request #647 from neobrain/rip_culling_hack (diff)
parent: GPU: Added the stencil test structure to the Pica Regs struct. (diff)
download: yuzu-ec5bc54575c03bed67e712a0508ee55c06ec652c.tar.gz
yuzu-ec5bc54575c03bed67e712a0508ee55c06ec652c.tar.xz
yuzu-ec5bc54575c03bed67e712a0508ee55c06ec652c.zip
5 files changed, 202 insertions, 20 deletions
diff --git a/src/citra_qt/debugger/graphics_framebuffer.cpp b/src/citra_qt/debugger/graphics_framebuffer.cpp
index 5bd6c0235..d621d7204 100644
--- a/src/citra_qt/debugger/graphics_framebuffer.cpp
+++ b/src/citra_qt/debugger/graphics_framebuffer.cpp
@@ -27,6 +27,7 @@ GraphicsFramebufferWidget::GraphicsFramebufferWidget(std::shared_ptr<Pica::Debug
    framebuffer_source_list = new QComboBox;
    framebuffer_source_list->addItem(tr("Active Render Target"));
+    framebuffer_source_list->addItem(tr("Active Depth Buffer"));
    framebuffer_source_list->addItem(tr("Custom"));
    framebuffer_source_list->setCurrentIndex(static_cast<int>(framebuffer_source));
@@ -49,6 +50,9 @@ GraphicsFramebufferWidget::GraphicsFramebufferWidget(std::shared_ptr<Pica::Debug
    framebuffer_format_control->addItem(tr("RGB5A1"));
    framebuffer_format_control->addItem(tr("RGB565"));
    framebuffer_format_control->addItem(tr("RGBA4"));
+    framebuffer_format_control->addItem(tr("D16"));
+    framebuffer_format_control->addItem(tr("D24"));
+    framebuffer_format_control->addItem(tr("D24S8"));
    // TODO: This QLabel should shrink the image to the available space rather than just expanding...
    framebuffer_picture_label = new QLabel;
@@ -172,8 +176,7 @@ void GraphicsFramebufferWidget::OnUpdate()
    {
        // TODO: Store a reference to the registers in the debug context instead of accessing them directly...
-        auto framebuffer = Pica::registers.framebuffer;
+        const auto& framebuffer = Pica::registers.framebuffer;
-        using Framebuffer = decltype(framebuffer);
        framebuffer_address = framebuffer.GetColorBufferPhysicalAddress();
        framebuffer_width = framebuffer.GetWidth();
@@ -184,6 +187,18 @@ void GraphicsFramebufferWidget::OnUpdate()
        break;
    }
+    case Source::DepthBuffer:
+    {
+        const auto& framebuffer = Pica::registers.framebuffer;
+        framebuffer_address = framebuffer.GetDepthBufferPhysicalAddress();
+        framebuffer_width = framebuffer.GetWidth();
+        framebuffer_height = framebuffer.GetHeight();
+        framebuffer_format = Format::D16;
+        break;
+    }
    case Source::Custom:
    {
        // Keep user-specified values
@@ -197,15 +212,16 @@ void GraphicsFramebufferWidget::OnUpdate()
    // TODO: Implement a good way to visualize alpha components!
    // TODO: Unify this decoding code with the texture decoder
-    u32 bytes_per_pixel = GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(framebuffer_format));
+    u32 bytes_per_pixel = GraphicsFramebufferWidget::BytesPerPixel(framebuffer_format);
    QImage decoded_image(framebuffer_width, framebuffer_height, QImage::Format_ARGB32);
-    u8* color_buffer = Memory::GetPointer(Pica::PAddrToVAddr(framebuffer_address));
+    u8* buffer = Memory::GetPointer(Pica::PAddrToVAddr(framebuffer_address));
    for (unsigned int y = 0; y < framebuffer_height; ++y) {
        for (unsigned int x = 0; x < framebuffer_width; ++x) {
            const u32 coarse_y = y & ~7;
            u32 offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * framebuffer_width * bytes_per_pixel;
-            const u8* pixel = color_buffer + offset;
+            const u8* pixel = buffer + offset;
            Math::Vec4<u8> color = { 0, 0, 0, 0 };
            switch (framebuffer_format) {
@@ -224,6 +240,29 @@ void GraphicsFramebufferWidget::OnUpdate()
            case Format::RGBA4:
                color = Color::DecodeRGBA4(pixel);
                break;
+            case Format::D16:
+            {
+                u32 data = Color::DecodeD16(pixel);
+                color.r() = data & 0xFF;
+                color.g() = (data >> 8) & 0xFF;
+                break;
+            }
+            case Format::D24:
+            {
+                u32 data = Color::DecodeD24(pixel);
+                color.r() = data & 0xFF;
+                color.g() = (data >> 8) & 0xFF;
+                color.b() = (data >> 16) & 0xFF;
+                break;
+            }
+            case Format::D24S8:
+            {
+                Math::Vec2<u32> data = Color::DecodeD24S8(pixel);
+                color.r() = data.x & 0xFF;
+                color.g() = (data.x >> 8) & 0xFF;
+                color.b() = (data.x >> 16) & 0xFF;
+                break;
+            }
            default:
                qDebug() << "Unknown fb color format " << static_cast<int>(framebuffer_format);
                break;
@@ -240,3 +279,19 @@ void GraphicsFramebufferWidget::OnUpdate()
    framebuffer_format_control->setCurrentIndex(static_cast<int>(framebuffer_format));
    framebuffer_picture_label->setPixmap(pixmap);
 }
+u32 GraphicsFramebufferWidget::BytesPerPixel(GraphicsFramebufferWidget::Format format) {
+    switch (format) {
+        case Format::RGBA8:
+        case Format::D24S8:
+            return 4;
+        case Format::RGB8:
+        case Format::D24:
+            return 3;
+        case Format::RGB5A1:
+        case Format::RGB565:
+        case Format::RGBA4:
+        case Format::D16:
+            return 2;
+    }
+}
diff --git a/src/citra_qt/debugger/graphics_framebuffer.h b/src/citra_qt/debugger/graphics_framebuffer.h
index 15ebd1f7d..4cb396ffe 100644
--- a/src/citra_qt/debugger/graphics_framebuffer.h
+++ b/src/citra_qt/debugger/graphics_framebuffer.h
@@ -20,8 +20,9 @@ class GraphicsFramebufferWidget : public BreakPointObserverDock {
    using Event = Pica::DebugContext::Event;
    enum class Source {
-        PicaTarget = 0,
+        PicaTarget   = 0,
-        Custom = 1,
+        DepthBuffer  = 1,
+        Custom       = 2,
        // TODO: Add GPU framebuffer sources!
    };
@@ -32,8 +33,13 @@ class GraphicsFramebufferWidget : public BreakPointObserverDock {
        RGB5A1   = 2,
        RGB565   = 3,
        RGBA4    = 4,
+        D16      = 5,
+        D24      = 6,
+        D24S8    = 7
    };
+    static u32 BytesPerPixel(Format format);
 public:
    GraphicsFramebufferWidget(std::shared_ptr<Pica::DebugContext> debug_context, QWidget* parent = nullptr);
diff --git a/src/video_core/color.h b/src/video_core/color.h
index 35da901f2..14ade74f2 100644
--- a/src/video_core/color.h
+++ b/src/video_core/color.h
@@ -101,6 +101,33 @@ inline const Math::Vec4<u8> DecodeRGBA4(const u8* bytes) {
 }
 /**
+ * Decode a depth value stored in D16 format
+ * @param bytes Pointer to encoded source value
+ * @return Depth value as an u32
+ */
+inline u32 DecodeD16(const u8* bytes) {
+    return *reinterpret_cast<const u16_le*>(bytes);
+}
+/**
+ * Decode a depth value stored in D24 format
+ * @param bytes Pointer to encoded source value
+ * @return Depth value as an u32
+ */
+inline u32 DecodeD24(const u8* bytes) {
+    return (bytes[2] << 16) | (bytes[1] << 8) | bytes[0];
+}
+/**
+ * Decode a depth value and a stencil value stored in D24S8 format
+ * @param bytes Pointer to encoded source values
+ * @return Resulting values stored as a Math::Vec2
+ */
+inline const Math::Vec2<u32> DecodeD24S8(const u8* bytes) {
+    return { (bytes[2] << 16) | (bytes[1] << 8) | bytes[0], bytes[3] };
+}
+/**
 * Encode a color as RGBA8 format
 * @param color Source color to encode
 * @param bytes Destination pointer to store encoded color
@@ -153,4 +180,34 @@ inline void EncodeRGBA4(const Math::Vec4<u8>& color, u8* bytes) {
        (Convert8To4(color.g()) << 8) | (Convert8To4(color.b()) << 4) | Convert8To4(color.a());
 }
+/**
+ * Encode a 16 bit depth value as D16 format
+ * @param value 16 bit source depth value to encode
+ * @param bytes Pointer where to store the encoded value
+ */
+inline void EncodeD16(u32 value, u8* bytes) {
+    *reinterpret_cast<u16_le*>(bytes) = value & 0xFFFF;
+}
+/**
+ * Encode a 24 bit depth value as D24 format
+ * @param value 24 bit source depth value to encode
+ * @param bytes Pointer where to store the encoded value
+ */
+inline void EncodeD24(u32 value, u8* bytes) {
+    bytes[0] = value & 0xFF;
+    bytes[1] = (value >> 8) & 0xFF;
+    bytes[2] = (value >> 16) & 0xFF;
+}
+/**
+ * Encode a 24 bit depth and 8 bit stencil values as D24S8 format
+ * @param depth 24 bit source depth value to encode
+ * @param stencil 8 bit source stencil value to encode
+ * @param bytes Pointer where to store the encoded value
+ */
+inline void EncodeD24S8(u32 depth, u8 stencil, u8* bytes) {
+    *reinterpret_cast<u32_le*>(bytes) = (stencil << 24) | depth;
+}
 } // namespace
diff --git a/src/video_core/pica.h b/src/video_core/pica.h
index b14de9278..fe20cd77d 100644
--- a/src/video_core/pica.h
+++ b/src/video_core/pica.h
@@ -393,7 +393,15 @@ struct Regs {
            BitField< 8, 8, u32> ref;
        } alpha_test;
-        INSERT_PADDING_WORDS(0x2);
+        union {
+            BitField< 0, 1, u32> stencil_test_enable;
+            BitField< 4, 3, CompareFunc> stencil_test_func;
+            BitField< 8, 8, u32> stencil_replacement_value;
+            BitField<16, 8, u32> stencil_reference_value;
+            BitField<24, 8, u32> stencil_mask;
+        } stencil_test;
+        INSERT_PADDING_WORDS(0x1);
        union {
            BitField< 0, 1, u32> depth_test_enable;
@@ -408,6 +416,30 @@ struct Regs {
        INSERT_PADDING_WORDS(0x8);
    } output_merger;
+    enum DepthFormat : u32 {
+        D16    = 0,
+        D24    = 2,
+        D24S8  = 3
+    };
+    /*
+     * Returns the number of bytes in the specified depth format
+     */
+    static u32 BytesPerDepthPixel(DepthFormat format) {
+        switch (format) {
+        case DepthFormat::D16:
+            return 2;
+        case DepthFormat::D24:
+            return 3;
+        case DepthFormat::D24S8:
+            return 4;
+        default:
+            LOG_CRITICAL(HW_GPU, "Unknown depth format %u", format);
+            UNIMPLEMENTED();
+        }
+    }
    struct {
        // Components are laid out in reverse byte order, most significant bits first.
        enum ColorFormat : u32 {
@@ -420,7 +452,7 @@ struct Regs {
        INSERT_PADDING_WORDS(0x6);
-        u32 depth_format;
+        DepthFormat depth_format;
        BitField<16, 3, u32> color_format;
        INSERT_PADDING_WORDS(0x4);
diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp
index 5861c1926..dd46f0ec3 100644
--- a/src/video_core/rasterizer.cpp
+++ b/src/video_core/rasterizer.cpp
@@ -91,7 +91,7 @@ static const Math::Vec4<u8> GetPixel(int x, int y) {
    }
    return {};
- }
+}
 static u32 GetDepth(int x, int y) {
    const PAddr addr = registers.framebuffer.GetDepthBufferPhysicalAddress();
@@ -100,23 +100,55 @@ static u32 GetDepth(int x, int y) {
    y = (registers.framebuffer.height - y);
    
    const u32 coarse_y = y & ~7;
-    u32 stride = registers.framebuffer.width * 2;
+    u32 bytes_per_pixel = Pica::Regs::BytesPerDepthPixel(registers.framebuffer.depth_format);
+    u32 stride = registers.framebuffer.width * bytes_per_pixel;
-    // Assuming 16-bit depth buffer format until actual format handling is implemented
-    return *(u16*)(depth_buffer + VideoCore::GetMortonOffset(x, y, 2) + coarse_y * stride);
+    u32 src_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * stride;
+    u8* src_pixel = depth_buffer + src_offset;
+    switch (registers.framebuffer.depth_format) {
+        case Pica::Regs::DepthFormat::D16:
+            return Color::DecodeD16(src_pixel);
+        case Pica::Regs::DepthFormat::D24:
+            return Color::DecodeD24(src_pixel);
+        case Pica::Regs::DepthFormat::D24S8:
+            return Color::DecodeD24S8(src_pixel).x;
+        default:
+            LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", registers.framebuffer.depth_format);
+            UNIMPLEMENTED();
+            return 0;
+    }
 }
-static void SetDepth(int x, int y, u16 value) {
+static void SetDepth(int x, int y, u32 value) {
    const PAddr addr = registers.framebuffer.GetDepthBufferPhysicalAddress();
    u8* depth_buffer = Memory::GetPointer(PAddrToVAddr(addr));
    y = (registers.framebuffer.height - y);
    const u32 coarse_y = y & ~7;
-    u32 stride = registers.framebuffer.width * 2;
+    u32 bytes_per_pixel = Pica::Regs::BytesPerDepthPixel(registers.framebuffer.depth_format);
+    u32 stride = registers.framebuffer.width * bytes_per_pixel;
-    // Assuming 16-bit depth buffer format until actual format handling is implemented
-    *(u16*)(depth_buffer + VideoCore::GetMortonOffset(x, y, 2) + coarse_y * stride) = value;
+    u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * stride;
+    u8* dst_pixel = depth_buffer + dst_offset;
+    switch (registers.framebuffer.depth_format) {
+        case Pica::Regs::DepthFormat::D16:
+            Color::EncodeD16(value, dst_pixel);
+            break;
+        case Pica::Regs::DepthFormat::D24:
+            Color::EncodeD24(value, dst_pixel);
+            break;
+        case Pica::Regs::DepthFormat::D24S8:
+            // TODO(Subv): Implement the stencil buffer
+            Color::EncodeD24S8(value, 0, dst_pixel);
+            break;
+        default:
+            LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", registers.framebuffer.depth_format);
+            UNIMPLEMENTED();
+            break;
+    }
 }
 // NOTE: Assuming that rasterizer coordinates are 12.4 fixed-point values
@@ -595,7 +627,7 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0,
                u16 z = (u16)((v0.screenpos[2].ToFloat32() * w0 +
                            v1.screenpos[2].ToFloat32() * w1 +
                            v2.screenpos[2].ToFloat32() * w2) * 65535.f / wsum);
-                u16 ref_z = GetDepth(x >> 4, y >> 4);
+                u32 ref_z = GetDepth(x >> 4, y >> 4);
                bool pass = false;
author	bunnei	2015-03-09 22:06:30 -0400
committer	bunnei	2015-03-09 22:06:30 -0400
commit	ec5bc54575c03bed67e712a0508ee55c06ec652c (patch)
tree	0b12d5fe048826ab8411b2ebc38e9200f44c35ab /src
parent	Merge pull request #647 from neobrain/rip_culling_hack (diff)
parent	GPU: Added the stencil test structure to the Pica Regs struct. (diff)
download	yuzu-ec5bc54575c03bed67e712a0508ee55c06ec652c.tar.gz yuzu-ec5bc54575c03bed67e712a0508ee55c06ec652c.tar.xz yuzu-ec5bc54575c03bed67e712a0508ee55c06ec652c.zip

diff --git a/src/citra_qt/debugger/graphics_framebuffer.cpp b/src/citra_qt/debugger/graphics_framebuffer.cpp index 5bd6c0235..d621d7204 100644 --- a/src/citra_qt/debugger/graphics_framebuffer.cpp +++ b/src/citra_qt/debugger/graphics_framebuffer.cpp
@@ -27,6 +27,7 @@ GraphicsFramebufferWidget::GraphicsFramebufferWidget(std::shared_ptr<Pica::Debug
27		27
28	framebuffer_source_list = new QComboBox;	28	framebuffer_source_list = new QComboBox;
29	framebuffer_source_list->addItem(tr("Active Render Target"));	29	framebuffer_source_list->addItem(tr("Active Render Target"));
		30	framebuffer_source_list->addItem(tr("Active Depth Buffer"));
30	framebuffer_source_list->addItem(tr("Custom"));	31	framebuffer_source_list->addItem(tr("Custom"));
31	framebuffer_source_list->setCurrentIndex(static_cast<int>(framebuffer_source));	32	framebuffer_source_list->setCurrentIndex(static_cast<int>(framebuffer_source));
32		33
@@ -49,6 +50,9 @@ GraphicsFramebufferWidget::GraphicsFramebufferWidget(std::shared_ptr<Pica::Debug
49	framebuffer_format_control->addItem(tr("RGB5A1"));	50	framebuffer_format_control->addItem(tr("RGB5A1"));
50	framebuffer_format_control->addItem(tr("RGB565"));	51	framebuffer_format_control->addItem(tr("RGB565"));
51	framebuffer_format_control->addItem(tr("RGBA4"));	52	framebuffer_format_control->addItem(tr("RGBA4"));
		53	framebuffer_format_control->addItem(tr("D16"));
		54	framebuffer_format_control->addItem(tr("D24"));
		55	framebuffer_format_control->addItem(tr("D24S8"));
52		56
53	// TODO: This QLabel should shrink the image to the available space rather than just expanding...	57	// TODO: This QLabel should shrink the image to the available space rather than just expanding...
54	framebuffer_picture_label = new QLabel;	58	framebuffer_picture_label = new QLabel;
@@ -172,8 +176,7 @@ void GraphicsFramebufferWidget::OnUpdate()
172	{	176	{
173	// TODO: Store a reference to the registers in the debug context instead of accessing them directly...	177	// TODO: Store a reference to the registers in the debug context instead of accessing them directly...
174		178
175	auto framebuffer = Pica::registers.framebuffer;	179	const auto& framebuffer = Pica::registers.framebuffer;
176	using Framebuffer = decltype(framebuffer);
177		180
178	framebuffer_address = framebuffer.GetColorBufferPhysicalAddress();	181	framebuffer_address = framebuffer.GetColorBufferPhysicalAddress();
179	framebuffer_width = framebuffer.GetWidth();	182	framebuffer_width = framebuffer.GetWidth();
@@ -184,6 +187,18 @@ void GraphicsFramebufferWidget::OnUpdate()
184	break;	187	break;
185	}	188	}
186		189
		190	case Source::DepthBuffer:
		191	{
		192	const auto& framebuffer = Pica::registers.framebuffer;
		193
		194	framebuffer_address = framebuffer.GetDepthBufferPhysicalAddress();
		195	framebuffer_width = framebuffer.GetWidth();
		196	framebuffer_height = framebuffer.GetHeight();
		197	framebuffer_format = Format::D16;
		198
		199	break;
		200	}
		201
187	case Source::Custom:	202	case Source::Custom:
188	{	203	{
189	// Keep user-specified values	204	// Keep user-specified values
@@ -197,15 +212,16 @@ void GraphicsFramebufferWidget::OnUpdate()
197		212
198	// TODO: Implement a good way to visualize alpha components!	213	// TODO: Implement a good way to visualize alpha components!
199	// TODO: Unify this decoding code with the texture decoder	214	// TODO: Unify this decoding code with the texture decoder
200	u32 bytes_per_pixel = GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(framebuffer_format));	215	u32 bytes_per_pixel = GraphicsFramebufferWidget::BytesPerPixel(framebuffer_format);
201		216
202	QImage decoded_image(framebuffer_width, framebuffer_height, QImage::Format_ARGB32);	217	QImage decoded_image(framebuffer_width, framebuffer_height, QImage::Format_ARGB32);
203	u8* color_buffer = Memory::GetPointer(Pica::PAddrToVAddr(framebuffer_address));	218	u8* buffer = Memory::GetPointer(Pica::PAddrToVAddr(framebuffer_address));
		219
204	for (unsigned int y = 0; y < framebuffer_height; ++y) {	220	for (unsigned int y = 0; y < framebuffer_height; ++y) {
205	for (unsigned int x = 0; x < framebuffer_width; ++x) {	221	for (unsigned int x = 0; x < framebuffer_width; ++x) {
206	const u32 coarse_y = y & ~7;	222	const u32 coarse_y = y & ~7;
207	u32 offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * framebuffer_width * bytes_per_pixel;	223	u32 offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * framebuffer_width * bytes_per_pixel;
208	const u8* pixel = color_buffer + offset;	224	const u8* pixel = buffer + offset;
209	Math::Vec4<u8> color = { 0, 0, 0, 0 };	225	Math::Vec4<u8> color = { 0, 0, 0, 0 };
210		226
211	switch (framebuffer_format) {	227	switch (framebuffer_format) {
@@ -224,6 +240,29 @@ void GraphicsFramebufferWidget::OnUpdate()
224	case Format::RGBA4:	240	case Format::RGBA4:
225	color = Color::DecodeRGBA4(pixel);	241	color = Color::DecodeRGBA4(pixel);
226	break;	242	break;
		243	case Format::D16:
		244	{
		245	u32 data = Color::DecodeD16(pixel);
		246	color.r() = data & 0xFF;
		247	color.g() = (data >> 8) & 0xFF;
		248	break;
		249	}
		250	case Format::D24:
		251	{
		252	u32 data = Color::DecodeD24(pixel);
		253	color.r() = data & 0xFF;
		254	color.g() = (data >> 8) & 0xFF;
		255	color.b() = (data >> 16) & 0xFF;
		256	break;
		257	}
		258	case Format::D24S8:
		259	{
		260	Math::Vec2<u32> data = Color::DecodeD24S8(pixel);
		261	color.r() = data.x & 0xFF;
		262	color.g() = (data.x >> 8) & 0xFF;
		263	color.b() = (data.x >> 16) & 0xFF;
		264	break;
		265	}
227	default:	266	default:
228	qDebug() << "Unknown fb color format " << static_cast<int>(framebuffer_format);	267	qDebug() << "Unknown fb color format " << static_cast<int>(framebuffer_format);
229	break;	268	break;
@@ -240,3 +279,19 @@ void GraphicsFramebufferWidget::OnUpdate()
240	framebuffer_format_control->setCurrentIndex(static_cast<int>(framebuffer_format));	279	framebuffer_format_control->setCurrentIndex(static_cast<int>(framebuffer_format));
241	framebuffer_picture_label->setPixmap(pixmap);	280	framebuffer_picture_label->setPixmap(pixmap);
242	}	281	}
		282
		283	u32 GraphicsFramebufferWidget::BytesPerPixel(GraphicsFramebufferWidget::Format format) {
		284	switch (format) {
		285	case Format::RGBA8:
		286	case Format::D24S8:
		287	return 4;
		288	case Format::RGB8:
		289	case Format::D24:
		290	return 3;
		291	case Format::RGB5A1:
		292	case Format::RGB565:
		293	case Format::RGBA4:
		294	case Format::D16:
		295	return 2;
		296	}
		297	}


diff --git a/src/citra_qt/debugger/graphics_framebuffer.h b/src/citra_qt/debugger/graphics_framebuffer.h index 15ebd1f7d..4cb396ffe 100644 --- a/src/citra_qt/debugger/graphics_framebuffer.h +++ b/src/citra_qt/debugger/graphics_framebuffer.h
@@ -20,8 +20,9 @@ class GraphicsFramebufferWidget : public BreakPointObserverDock {
20	using Event = Pica::DebugContext::Event;	20	using Event = Pica::DebugContext::Event;
21		21
22	enum class Source {	22	enum class Source {
23	PicaTarget = 0,	23	PicaTarget = 0,
24	Custom = 1,	24	DepthBuffer = 1,
		25	Custom = 2,
25		26
26	// TODO: Add GPU framebuffer sources!	27	// TODO: Add GPU framebuffer sources!
27	};	28	};
@@ -32,8 +33,13 @@ class GraphicsFramebufferWidget : public BreakPointObserverDock {
32	RGB5A1 = 2,	33	RGB5A1 = 2,
33	RGB565 = 3,	34	RGB565 = 3,
34	RGBA4 = 4,	35	RGBA4 = 4,
		36	D16 = 5,
		37	D24 = 6,
		38	D24S8 = 7
35	};	39	};
36		40
		41	static u32 BytesPerPixel(Format format);
		42
37	public:	43	public:
38	GraphicsFramebufferWidget(std::shared_ptr<Pica::DebugContext> debug_context, QWidget* parent = nullptr);	44	GraphicsFramebufferWidget(std::shared_ptr<Pica::DebugContext> debug_context, QWidget* parent = nullptr);
39		45


diff --git a/src/video_core/color.h b/src/video_core/color.h index 35da901f2..14ade74f2 100644 --- a/src/video_core/color.h +++ b/src/video_core/color.h
@@ -101,6 +101,33 @@ inline const Math::Vec4<u8> DecodeRGBA4(const u8* bytes) {
101	}	101	}
102		102
103	/**	103	/**
		104	* Decode a depth value stored in D16 format
		105	* @param bytes Pointer to encoded source value
		106	* @return Depth value as an u32
		107	*/
		108	inline u32 DecodeD16(const u8* bytes) {
		109	return reinterpret_cast<const u16_le>(bytes);
		110	}
		111
		112	/**
		113	* Decode a depth value stored in D24 format
		114	* @param bytes Pointer to encoded source value
		115	* @return Depth value as an u32
		116	*/
		117	inline u32 DecodeD24(const u8* bytes) {
		118	return (bytes[2] << 16) \| (bytes[1] << 8) \| bytes[0];
		119	}
		120
		121	/**
		122	* Decode a depth value and a stencil value stored in D24S8 format
		123	* @param bytes Pointer to encoded source values
		124	* @return Resulting values stored as a Math::Vec2
		125	*/
		126	inline const Math::Vec2<u32> DecodeD24S8(const u8* bytes) {
		127	return { (bytes[2] << 16) \| (bytes[1] << 8) \| bytes[0], bytes[3] };
		128	}
		129
		130	/**
104	* Encode a color as RGBA8 format	131	* Encode a color as RGBA8 format
105	* @param color Source color to encode	132	* @param color Source color to encode
106	* @param bytes Destination pointer to store encoded color	133	* @param bytes Destination pointer to store encoded color
@@ -153,4 +180,34 @@ inline void EncodeRGBA4(const Math::Vec4<u8>& color, u8* bytes) {
153	(Convert8To4(color.g()) << 8) \| (Convert8To4(color.b()) << 4) \| Convert8To4(color.a());	180	(Convert8To4(color.g()) << 8) \| (Convert8To4(color.b()) << 4) \| Convert8To4(color.a());
154	}	181	}
155		182
		183	/**
		184	* Encode a 16 bit depth value as D16 format
		185	* @param value 16 bit source depth value to encode
		186	* @param bytes Pointer where to store the encoded value
		187	*/
		188	inline void EncodeD16(u32 value, u8* bytes) {
		189	reinterpret_cast<u16_le>(bytes) = value & 0xFFFF;
		190	}
		191
		192	/**
		193	* Encode a 24 bit depth value as D24 format
		194	* @param value 24 bit source depth value to encode
		195	* @param bytes Pointer where to store the encoded value
		196	*/
		197	inline void EncodeD24(u32 value, u8* bytes) {
		198	bytes[0] = value & 0xFF;
		199	bytes[1] = (value >> 8) & 0xFF;
		200	bytes[2] = (value >> 16) & 0xFF;
		201	}
		202
		203	/**
		204	* Encode a 24 bit depth and 8 bit stencil values as D24S8 format
		205	* @param depth 24 bit source depth value to encode
		206	* @param stencil 8 bit source stencil value to encode
		207	* @param bytes Pointer where to store the encoded value
		208	*/
		209	inline void EncodeD24S8(u32 depth, u8 stencil, u8* bytes) {
		210	reinterpret_cast<u32_le>(bytes) = (stencil << 24) \| depth;
		211	}
		212
156	} // namespace	213	} // namespace


diff --git a/src/video_core/pica.h b/src/video_core/pica.h index b14de9278..fe20cd77d 100644 --- a/src/video_core/pica.h +++ b/src/video_core/pica.h
@@ -393,7 +393,15 @@ struct Regs {
393	BitField< 8, 8, u32> ref;	393	BitField< 8, 8, u32> ref;
394	} alpha_test;	394	} alpha_test;
395		395
396	INSERT_PADDING_WORDS(0x2);	396	union {
		397	BitField< 0, 1, u32> stencil_test_enable;
		398	BitField< 4, 3, CompareFunc> stencil_test_func;
		399	BitField< 8, 8, u32> stencil_replacement_value;
		400	BitField<16, 8, u32> stencil_reference_value;
		401	BitField<24, 8, u32> stencil_mask;
		402	} stencil_test;
		403
		404	INSERT_PADDING_WORDS(0x1);
397		405
398	union {	406	union {
399	BitField< 0, 1, u32> depth_test_enable;	407	BitField< 0, 1, u32> depth_test_enable;
@@ -408,6 +416,30 @@ struct Regs {
408	INSERT_PADDING_WORDS(0x8);	416	INSERT_PADDING_WORDS(0x8);
409	} output_merger;	417	} output_merger;
410		418
		419	enum DepthFormat : u32 {
		420	D16 = 0,
		421
		422	D24 = 2,
		423	D24S8 = 3
		424	};
		425
		426	/*
		427	* Returns the number of bytes in the specified depth format
		428	*/
		429	static u32 BytesPerDepthPixel(DepthFormat format) {
		430	switch (format) {
		431	case DepthFormat::D16:
		432	return 2;
		433	case DepthFormat::D24:
		434	return 3;
		435	case DepthFormat::D24S8:
		436	return 4;
		437	default:
		438	LOG_CRITICAL(HW_GPU, "Unknown depth format %u", format);
		439	UNIMPLEMENTED();
		440	}
		441	}
		442
411	struct {	443	struct {
412	// Components are laid out in reverse byte order, most significant bits first.	444	// Components are laid out in reverse byte order, most significant bits first.
413	enum ColorFormat : u32 {	445	enum ColorFormat : u32 {
@@ -420,7 +452,7 @@ struct Regs {
420		452
421	INSERT_PADDING_WORDS(0x6);	453	INSERT_PADDING_WORDS(0x6);
422		454
423	u32 depth_format;	455	DepthFormat depth_format;
424	BitField<16, 3, u32> color_format;	456	BitField<16, 3, u32> color_format;
425		457
426	INSERT_PADDING_WORDS(0x4);	458	INSERT_PADDING_WORDS(0x4);


diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp index 5861c1926..dd46f0ec3 100644 --- a/src/video_core/rasterizer.cpp +++ b/src/video_core/rasterizer.cpp
@@ -91,7 +91,7 @@ static const Math::Vec4<u8> GetPixel(int x, int y) {
91	}	91	}
92		92
93	return {};	93	return {};
94	}	94	}
95		95
96	static u32 GetDepth(int x, int y) {	96	static u32 GetDepth(int x, int y) {
97	const PAddr addr = registers.framebuffer.GetDepthBufferPhysicalAddress();	97	const PAddr addr = registers.framebuffer.GetDepthBufferPhysicalAddress();
@@ -100,23 +100,55 @@ static u32 GetDepth(int x, int y) {
100	y = (registers.framebuffer.height - y);	100	y = (registers.framebuffer.height - y);
101		101
102	const u32 coarse_y = y & ~7;	102	const u32 coarse_y = y & ~7;
103	u32 stride = registers.framebuffer.width * 2;	103	u32 bytes_per_pixel = Pica::Regs::BytesPerDepthPixel(registers.framebuffer.depth_format);
104		104	u32 stride = registers.framebuffer.width * bytes_per_pixel;
105	// Assuming 16-bit depth buffer format until actual format handling is implemented	105
106	return (u16)(depth_buffer + VideoCore::GetMortonOffset(x, y, 2) + coarse_y * stride);	106	u32 src_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * stride;
		107	u8* src_pixel = depth_buffer + src_offset;
		108
		109	switch (registers.framebuffer.depth_format) {
		110	case Pica::Regs::DepthFormat::D16:
		111	return Color::DecodeD16(src_pixel);
		112	case Pica::Regs::DepthFormat::D24:
		113	return Color::DecodeD24(src_pixel);
		114	case Pica::Regs::DepthFormat::D24S8:
		115	return Color::DecodeD24S8(src_pixel).x;
		116	default:
		117	LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", registers.framebuffer.depth_format);
		118	UNIMPLEMENTED();
		119	return 0;
		120	}
107	}	121	}
108		122
109	static void SetDepth(int x, int y, u16 value) {	123	static void SetDepth(int x, int y, u32 value) {
110	const PAddr addr = registers.framebuffer.GetDepthBufferPhysicalAddress();	124	const PAddr addr = registers.framebuffer.GetDepthBufferPhysicalAddress();
111	u8* depth_buffer = Memory::GetPointer(PAddrToVAddr(addr));	125	u8* depth_buffer = Memory::GetPointer(PAddrToVAddr(addr));
112		126
113	y = (registers.framebuffer.height - y);	127	y = (registers.framebuffer.height - y);
114		128
115	const u32 coarse_y = y & ~7;	129	const u32 coarse_y = y & ~7;
116	u32 stride = registers.framebuffer.width * 2;	130	u32 bytes_per_pixel = Pica::Regs::BytesPerDepthPixel(registers.framebuffer.depth_format);
117		131	u32 stride = registers.framebuffer.width * bytes_per_pixel;
118	// Assuming 16-bit depth buffer format until actual format handling is implemented	132
119	(u16)(depth_buffer + VideoCore::GetMortonOffset(x, y, 2) + coarse_y * stride) = value;	133	u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * stride;
		134	u8* dst_pixel = depth_buffer + dst_offset;
		135
		136	switch (registers.framebuffer.depth_format) {
		137	case Pica::Regs::DepthFormat::D16:
		138	Color::EncodeD16(value, dst_pixel);
		139	break;
		140	case Pica::Regs::DepthFormat::D24:
		141	Color::EncodeD24(value, dst_pixel);
		142	break;
		143	case Pica::Regs::DepthFormat::D24S8:
		144	// TODO(Subv): Implement the stencil buffer
		145	Color::EncodeD24S8(value, 0, dst_pixel);
		146	break;
		147	default:
		148	LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", registers.framebuffer.depth_format);
		149	UNIMPLEMENTED();
		150	break;
		151	}
120	}	152	}
121		153
122	// NOTE: Assuming that rasterizer coordinates are 12.4 fixed-point values	154	// NOTE: Assuming that rasterizer coordinates are 12.4 fixed-point values
@@ -595,7 +627,7 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0,
595	u16 z = (u16)((v0.screenpos[2].ToFloat32() * w0 +	627	u16 z = (u16)((v0.screenpos[2].ToFloat32() * w0 +
596	v1.screenpos[2].ToFloat32() * w1 +	628	v1.screenpos[2].ToFloat32() * w1 +
597	v2.screenpos[2].ToFloat32() * w2) * 65535.f / wsum);	629	v2.screenpos[2].ToFloat32() * w2) * 65535.f / wsum);
598	u16 ref_z = GetDepth(x >> 4, y >> 4);	630	u32 ref_z = GetDepth(x >> 4, y >> 4);
599		631
600	bool pass = false;	632	bool pass = false;
601		633