3 files changed, 91 insertions, 54 deletions
diff --git a/src/video_core/debug_utils/debug_utils.cpp b/src/video_core/debug_utils/debug_utils.cpp
index f436aa541..27c246a99 100644
--- a/src/video_core/debug_utils/debug_utils.cpp
+++ b/src/video_core/debug_utils/debug_utils.cpp
@@ -23,6 +23,7 @@
 #include "video_core/color.h"
 #include "video_core/math.h"
 #include "video_core/pica.h"
+#include "video_core/utils.h"
 #include "debug_utils.h"
@@ -306,63 +307,33 @@ std::unique_ptr<PicaTrace> FinishPicaTracing()
 }
 const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const TextureInfo& info, bool disable_alpha) {
-    // Images are split into 8x8 tiles. Each tile is composed of four 4x4 subtiles each
-    // of which is composed of four 2x2 subtiles each of which is composed of four texels.
-    // Each structure is embedded into the next-bigger one in a diagonal pattern, e.g.
-    // texels are laid out in a 2x2 subtile like this:
-    // 2 3
-    // 0 1
-    //
-    // The full 8x8 tile has the texels arranged like this:
-    //
-    // 42 43 46 47 58 59 62 63
-    // 40 41 44 45 56 57 60 61
-    // 34 35 38 39 50 51 54 55
-    // 32 33 36 37 48 49 52 53
-    // 10 11 14 15 26 27 30 31
-    // 08 09 12 13 24 25 28 29
-    // 02 03 06 07 18 19 22 23
-    // 00 01 04 05 16 17 20 21
-    const unsigned int block_width = 8;
-    const unsigned int block_height = 8;
    const unsigned int coarse_x = x & ~7;
    const unsigned int coarse_y = y & ~7;
-    // Interleave the lower 3 bits of each coordinate to get the intra-block offsets, which are
-    // arranged in a Z-order curve. More details on the bit manipulation at:
-    // https://fgiesen.wordpress.com/2009/12/13/decoding-morton-codes/
-    unsigned int i = (x & 7) | ((y & 7) << 8); // ---- -210
-    i = (i ^ (i << 2)) & 0x1313;               // ---2 --10
-    i = (i ^ (i << 1)) & 0x1515;               // ---2 -1-0
-    i = (i | (i >> 7)) & 0x3F;
    if (info.format != Regs::TextureFormat::ETC1 &&
        info.format != Regs::TextureFormat::ETC1A4) {
        // TODO(neobrain): Fix code design to unify vertical block offsets!
        source += coarse_y * info.stride;
    }
-    const unsigned int offset = coarse_x * block_height;
+    
    // TODO: Assert that width/height are multiples of block dimensions
    switch (info.format) {
    case Regs::TextureFormat::RGBA8:
    {
-        const u8* source_ptr = source + offset * 4 + i * 4;
+        const u8* source_ptr = source + VideoCore::GetMortonOffset(x, y, 4);
        return { source_ptr[3], source_ptr[2], source_ptr[1], disable_alpha ? (u8)255 : source_ptr[0] };
    }
    case Regs::TextureFormat::RGB8:
    {
-        const u8* source_ptr = source + offset * 3 + i * 3;
+        const u8* source_ptr = source + VideoCore::GetMortonOffset(x, y, 3);
        return { source_ptr[2], source_ptr[1], source_ptr[0], 255 };
    }
    case Regs::TextureFormat::RGBA5551:
    {
-        const u16 source_ptr = *(const u16*)(source + offset * 2 + i * 2);
+        const u16 source_ptr = *(const u16*)(source + VideoCore::GetMortonOffset(x, y, 2));
        u8 r = (source_ptr >> 11) & 0x1F;
        u8 g = ((source_ptr) >> 6) & 0x1F;
        u8 b = (source_ptr >> 1) & 0x1F;
@@ -373,7 +344,7 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const Texture
    case Regs::TextureFormat::RGB565:
    {
-        const u16 source_ptr = *(const u16*)(source + offset * 2 + i * 2);
+        const u16 source_ptr = *(const u16*)(source + VideoCore::GetMortonOffset(x, y, 2));
        u8 r = Color::Convert5To8((source_ptr >> 11) & 0x1F);
        u8 g = Color::Convert6To8(((source_ptr) >> 5) & 0x3F);
        u8 b = Color::Convert5To8((source_ptr) & 0x1F);
@@ -382,7 +353,7 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const Texture
    case Regs::TextureFormat::RGBA4:
    {
-        const u8* source_ptr = source + offset * 2 + i * 2;
+        const u8* source_ptr = source + VideoCore::GetMortonOffset(x, y, 2);
        u8 r = Color::Convert4To8(source_ptr[1] >> 4);
        u8 g = Color::Convert4To8(source_ptr[1] & 0xF);
        u8 b = Color::Convert4To8(source_ptr[0] >> 4);
@@ -392,7 +363,7 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const Texture
    case Regs::TextureFormat::IA8:
    {
-        const u8* source_ptr = source + offset * 2 + i * 2;
+        const u8* source_ptr = source + VideoCore::GetMortonOffset(x, y, 2);
        if (disable_alpha) {
            // Show intensity as red, alpha as green
@@ -404,13 +375,13 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const Texture
    case Regs::TextureFormat::I8:
    {
-        const u8* source_ptr = source + offset + i;
+        const u8* source_ptr = source + VideoCore::GetMortonOffset(x, y, 1);
        return { *source_ptr, *source_ptr, *source_ptr, 255 };
    }
    case Regs::TextureFormat::A8:
    {
-        const u8* source_ptr = source + offset + i;
+        const u8* source_ptr = source + VideoCore::GetMortonOffset(x, y, 1);
        if (disable_alpha) {
            return { *source_ptr, *source_ptr, *source_ptr, 255 };
@@ -421,7 +392,7 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const Texture
    case Regs::TextureFormat::IA4:
    {
-        const u8* source_ptr = source + offset + i;
+        const u8* source_ptr = source + VideoCore::GetMortonOffset(x, y, 1);
        u8 i = Color::Convert4To8(((*source_ptr) & 0xF0) >> 4);
        u8 a = Color::Convert4To8((*source_ptr) & 0xF);
@@ -436,9 +407,10 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const Texture
    case Regs::TextureFormat::A4:
    {
-        const u8* source_ptr = source + (offset + i) / 2;
+        u32 morton_offset = VideoCore::GetMortonOffset(x, y, 1);
+        const u8* source_ptr = source + morton_offset / 2;
-        u8 a = (i % 2) ? ((*source_ptr & 0xF0) >> 4) : (*source_ptr & 0xF);
+        u8 a = (morton_offset % 2) ? ((*source_ptr & 0xF0) >> 4) : (*source_ptr & 0xF);
        a = Color::Convert4To8(a);
        if (disable_alpha) {
diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp
index 24dc37856..a7bb0612f 100644
--- a/src/video_core/rasterizer.cpp
+++ b/src/video_core/rasterizer.cpp
@@ -7,13 +7,14 @@
 #include "common/common_types.h"
 #include "common/math_util.h"
+#include "core/hw/gpu.h"
+#include "debug_utils/debug_utils.h"
 #include "math.h"
 #include "color.h"
 #include "pica.h"
 #include "rasterizer.h"
 #include "vertex_shader.h"
+#include "video_core/utils.h"
-#include "debug_utils/debug_utils.h"
 namespace Pica {
@@ -27,10 +28,14 @@ static void DrawPixel(int x, int y, const Math::Vec4<u8>& color) {
    // NOTE: The framebuffer height register contains the actual FB height minus one.
    y = (registers.framebuffer.height - y);
+    const u32 coarse_y = y & ~7;
+    u32 bytes_per_pixel = GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(registers.framebuffer.color_format.Value()));
+    u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * registers.framebuffer.width * bytes_per_pixel;
    switch (registers.framebuffer.color_format) {
    case registers.framebuffer.RGBA8:
    {
-        u8* pixel = color_buffer + (x + y * registers.framebuffer.GetWidth()) * 4;
+        u8* pixel = color_buffer + dst_offset;
        pixel[3] = color.r();
        pixel[2] = color.g();
        pixel[1] = color.b();
@@ -40,14 +45,14 @@ static void DrawPixel(int x, int y, const Math::Vec4<u8>& color) {
    case registers.framebuffer.RGBA4:
    {
-        u8* pixel = color_buffer + (x + y * registers.framebuffer.GetWidth()) * 2;
+        u8* pixel = color_buffer + dst_offset;
        pixel[1] = (color.r() & 0xF0) | (color.g() >> 4);
        pixel[0] = (color.b() & 0xF0) | (color.a() >> 4);
        break;
    }
    default:
-        LOG_CRITICAL(Render_Software, "Unknown framebuffer color format %x", registers.framebuffer.color_format);
+        LOG_CRITICAL(Render_Software, "Unknown framebuffer color format %x", registers.framebuffer.color_format.Value());
        UNIMPLEMENTED();
    }
 }
@@ -58,11 +63,15 @@ static const Math::Vec4<u8> GetPixel(int x, int y) {
    y = (registers.framebuffer.height - y);
+    const u32 coarse_y = y & ~7;
+    u32 bytes_per_pixel = GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(registers.framebuffer.color_format.Value()));
+    u32 src_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * registers.framebuffer.width * bytes_per_pixel;
    switch (registers.framebuffer.color_format) {
    case registers.framebuffer.RGBA8:
    {
        Math::Vec4<u8> ret;
-        u8* pixel = color_buffer + (x + y * registers.framebuffer.GetWidth()) * 4;
+        u8* pixel = color_buffer + src_offset;
        ret.r() = pixel[3];
        ret.g() = pixel[2];
        ret.b() = pixel[1];
@@ -73,7 +82,7 @@ static const Math::Vec4<u8> GetPixel(int x, int y) {
    case registers.framebuffer.RGBA4:
    {
        Math::Vec4<u8> ret;
-        u8* pixel = color_buffer + (x + y * registers.framebuffer.GetWidth()) * 2;
+        u8* pixel = color_buffer + src_offset;
        ret.r() = Color::Convert4To8(pixel[1] >> 4);
        ret.g() = Color::Convert4To8(pixel[1] & 0x0F);
        ret.b() = Color::Convert4To8(pixel[0] >> 4);
@@ -82,7 +91,7 @@ static const Math::Vec4<u8> GetPixel(int x, int y) {
    }
    default:
-        LOG_CRITICAL(Render_Software, "Unknown framebuffer color format %x", registers.framebuffer.color_format);
+        LOG_CRITICAL(Render_Software, "Unknown framebuffer color format %x", registers.framebuffer.color_format.Value());
        UNIMPLEMENTED();
    }
@@ -91,22 +100,28 @@ static const Math::Vec4<u8> GetPixel(int x, int y) {
 static u32 GetDepth(int x, int y) {
    const PAddr addr = registers.framebuffer.GetDepthBufferPhysicalAddress();
-    u16* depth_buffer = reinterpret_cast<u16*>(Memory::GetPointer(PAddrToVAddr(addr)));
+    u8* depth_buffer = Memory::GetPointer(PAddrToVAddr(addr));
    y = (registers.framebuffer.height - y);
+    
+    const u32 coarse_y = y & ~7;
+    u32 stride = registers.framebuffer.width * 2;
    // Assuming 16-bit depth buffer format until actual format handling is implemented
-    return *(depth_buffer + x + y * registers.framebuffer.GetWidth());
+    return *(u16*)(depth_buffer + VideoCore::GetMortonOffset(x, y, 2) + coarse_y * stride);
 }
 static void SetDepth(int x, int y, u16 value) {
    const PAddr addr = registers.framebuffer.GetDepthBufferPhysicalAddress();
-    u16* depth_buffer = reinterpret_cast<u16*>(Memory::GetPointer(PAddrToVAddr(addr)));
+    u8* depth_buffer = Memory::GetPointer(PAddrToVAddr(addr));
    y = (registers.framebuffer.height - y);
+    const u32 coarse_y = y & ~7;
+    u32 stride = registers.framebuffer.width * 2;
    // Assuming 16-bit depth buffer format until actual format handling is implemented
-    *(depth_buffer + x + y * registers.framebuffer.GetWidth()) = value;
+    *(u16*)(depth_buffer + VideoCore::GetMortonOffset(x, y, 2) + coarse_y * stride) = value;
 }
 // NOTE: Assuming that rasterizer coordinates are 12.4 fixed-point values
diff --git a/src/video_core/utils.h b/src/video_core/utils.h
index 6fd640425..bda793fa5 100644
--- a/src/video_core/utils.h
+++ b/src/video_core/utils.h
@@ -35,4 +35,54 @@ struct TGAHeader {
 */
 void DumpTGA(std::string filename, short width, short height, u8* raw_data);
+/**
+ * Interleave the lower 3 bits of each coordinate to get the intra-block offsets, which are
+ * arranged in a Z-order curve. More details on the bit manipulation at:
+ * https://fgiesen.wordpress.com/2009/12/13/decoding-morton-codes/
+ */
+static inline u32 MortonInterleave(u32 x, u32 y) {
+    u32 i = (x & 7) | ((y & 7) << 8); // ---- -210
+    i = (i ^ (i << 2)) & 0x1313;      // ---2 --10
+    i = (i ^ (i << 1)) & 0x1515;      // ---2 -1-0
+    i = (i | (i >> 7)) & 0x3F;
+    return i;
+}
+/**
+ * Calculates the offset of the position of the pixel in Morton order
+ */
+static inline u32 GetMortonOffset(u32 x, u32 y, u32 bytes_per_pixel) {
+    // Images are split into 8x8 tiles. Each tile is composed of four 4x4 subtiles each
+    // of which is composed of four 2x2 subtiles each of which is composed of four texels.
+    // Each structure is embedded into the next-bigger one in a diagonal pattern, e.g.
+    // texels are laid out in a 2x2 subtile like this:
+    // 2 3
+    // 0 1
+    //
+    // The full 8x8 tile has the texels arranged like this:
+    //
+    // 42 43 46 47 58 59 62 63
+    // 40 41 44 45 56 57 60 61
+    // 34 35 38 39 50 51 54 55
+    // 32 33 36 37 48 49 52 53
+    // 10 11 14 15 26 27 30 31
+    // 08 09 12 13 24 25 28 29
+    // 02 03 06 07 18 19 22 23
+    // 00 01 04 05 16 17 20 21
+    //
+    // This pattern is what's called Z-order curve, or Morton order.
+    const unsigned int block_width = 8;
+    const unsigned int block_height = 8;
+    const unsigned int coarse_x = x & ~7;
+    const unsigned int coarse_y = y & ~7;
+    u32 i = VideoCore::MortonInterleave(x, y);
+    const unsigned int offset = coarse_x * block_height;
+    return (i + offset) * bytes_per_pixel;
+}
 } // namespace

diff --git a/src/video_core/debug_utils/debug_utils.cpp b/src/video_core/debug_utils/debug_utils.cpp index f436aa541..27c246a99 100644 --- a/src/video_core/debug_utils/debug_utils.cpp +++ b/src/video_core/debug_utils/debug_utils.cpp
@@ -23,6 +23,7 @@
23	#include "video_core/color.h"	23	#include "video_core/color.h"
24	#include "video_core/math.h"	24	#include "video_core/math.h"
25	#include "video_core/pica.h"	25	#include "video_core/pica.h"
		26	#include "video_core/utils.h"
26		27
27	#include "debug_utils.h"	28	#include "debug_utils.h"
28		29
@@ -306,63 +307,33 @@ std::unique_ptr<PicaTrace> FinishPicaTracing()
306	}	307	}
307		308
308	const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const TextureInfo& info, bool disable_alpha) {	309	const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const TextureInfo& info, bool disable_alpha) {
309	// Images are split into 8x8 tiles. Each tile is composed of four 4x4 subtiles each
310	// of which is composed of four 2x2 subtiles each of which is composed of four texels.
311	// Each structure is embedded into the next-bigger one in a diagonal pattern, e.g.
312	// texels are laid out in a 2x2 subtile like this:
313	// 2 3
314	// 0 1
315	//
316	// The full 8x8 tile has the texels arranged like this:
317	//
318	// 42 43 46 47 58 59 62 63
319	// 40 41 44 45 56 57 60 61
320	// 34 35 38 39 50 51 54 55
321	// 32 33 36 37 48 49 52 53
322	// 10 11 14 15 26 27 30 31
323	// 08 09 12 13 24 25 28 29
324	// 02 03 06 07 18 19 22 23
325	// 00 01 04 05 16 17 20 21
326
327	const unsigned int block_width = 8;
328	const unsigned int block_height = 8;
329
330	const unsigned int coarse_x = x & ~7;	310	const unsigned int coarse_x = x & ~7;
331	const unsigned int coarse_y = y & ~7;	311	const unsigned int coarse_y = y & ~7;
332		312
333	// Interleave the lower 3 bits of each coordinate to get the intra-block offsets, which are
334	// arranged in a Z-order curve. More details on the bit manipulation at:
335	// https://fgiesen.wordpress.com/2009/12/13/decoding-morton-codes/
336	unsigned int i = (x & 7) \| ((y & 7) << 8); // ---- -210
337	i = (i ^ (i << 2)) & 0x1313; // ---2 --10
338	i = (i ^ (i << 1)) & 0x1515; // ---2 -1-0
339	i = (i \| (i >> 7)) & 0x3F;
340
341	if (info.format != Regs::TextureFormat::ETC1 &&	313	if (info.format != Regs::TextureFormat::ETC1 &&
342	info.format != Regs::TextureFormat::ETC1A4) {	314	info.format != Regs::TextureFormat::ETC1A4) {
343	// TODO(neobrain): Fix code design to unify vertical block offsets!	315	// TODO(neobrain): Fix code design to unify vertical block offsets!
344	source += coarse_y * info.stride;	316	source += coarse_y * info.stride;
345	}	317	}
346	const unsigned int offset = coarse_x * block_height;	318
347
348	// TODO: Assert that width/height are multiples of block dimensions	319	// TODO: Assert that width/height are multiples of block dimensions
349		320
350	switch (info.format) {	321	switch (info.format) {
351	case Regs::TextureFormat::RGBA8:	322	case Regs::TextureFormat::RGBA8:
352	{	323	{
353	const u8* source_ptr = source + offset * 4 + i * 4;	324	const u8* source_ptr = source + VideoCore::GetMortonOffset(x, y, 4);
354	return { source_ptr[3], source_ptr[2], source_ptr[1], disable_alpha ? (u8)255 : source_ptr[0] };	325	return { source_ptr[3], source_ptr[2], source_ptr[1], disable_alpha ? (u8)255 : source_ptr[0] };
355	}	326	}
356		327
357	case Regs::TextureFormat::RGB8:	328	case Regs::TextureFormat::RGB8:
358	{	329	{
359	const u8* source_ptr = source + offset * 3 + i * 3;	330	const u8* source_ptr = source + VideoCore::GetMortonOffset(x, y, 3);
360	return { source_ptr[2], source_ptr[1], source_ptr[0], 255 };	331	return { source_ptr[2], source_ptr[1], source_ptr[0], 255 };
361	}	332	}
362		333
363	case Regs::TextureFormat::RGBA5551:	334	case Regs::TextureFormat::RGBA5551:
364	{	335	{
365	const u16 source_ptr = (const u16)(source + offset * 2 + i * 2);	336	const u16 source_ptr = (const u16)(source + VideoCore::GetMortonOffset(x, y, 2));
366	u8 r = (source_ptr >> 11) & 0x1F;	337	u8 r = (source_ptr >> 11) & 0x1F;
367	u8 g = ((source_ptr) >> 6) & 0x1F;	338	u8 g = ((source_ptr) >> 6) & 0x1F;
368	u8 b = (source_ptr >> 1) & 0x1F;	339	u8 b = (source_ptr >> 1) & 0x1F;
@@ -373,7 +344,7 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const Texture
373		344
374	case Regs::TextureFormat::RGB565:	345	case Regs::TextureFormat::RGB565:
375	{	346	{
376	const u16 source_ptr = (const u16)(source + offset * 2 + i * 2);	347	const u16 source_ptr = (const u16)(source + VideoCore::GetMortonOffset(x, y, 2));
377	u8 r = Color::Convert5To8((source_ptr >> 11) & 0x1F);	348	u8 r = Color::Convert5To8((source_ptr >> 11) & 0x1F);
378	u8 g = Color::Convert6To8(((source_ptr) >> 5) & 0x3F);	349	u8 g = Color::Convert6To8(((source_ptr) >> 5) & 0x3F);
379	u8 b = Color::Convert5To8((source_ptr) & 0x1F);	350	u8 b = Color::Convert5To8((source_ptr) & 0x1F);
@@ -382,7 +353,7 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const Texture
382		353
383	case Regs::TextureFormat::RGBA4:	354	case Regs::TextureFormat::RGBA4:
384	{	355	{
385	const u8* source_ptr = source + offset * 2 + i * 2;	356	const u8* source_ptr = source + VideoCore::GetMortonOffset(x, y, 2);
386	u8 r = Color::Convert4To8(source_ptr[1] >> 4);	357	u8 r = Color::Convert4To8(source_ptr[1] >> 4);
387	u8 g = Color::Convert4To8(source_ptr[1] & 0xF);	358	u8 g = Color::Convert4To8(source_ptr[1] & 0xF);
388	u8 b = Color::Convert4To8(source_ptr[0] >> 4);	359	u8 b = Color::Convert4To8(source_ptr[0] >> 4);
@@ -392,7 +363,7 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const Texture
392		363
393	case Regs::TextureFormat::IA8:	364	case Regs::TextureFormat::IA8:
394	{	365	{
395	const u8* source_ptr = source + offset * 2 + i * 2;	366	const u8* source_ptr = source + VideoCore::GetMortonOffset(x, y, 2);
396		367
397	if (disable_alpha) {	368	if (disable_alpha) {
398	// Show intensity as red, alpha as green	369	// Show intensity as red, alpha as green
@@ -404,13 +375,13 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const Texture
404		375
405	case Regs::TextureFormat::I8:	376	case Regs::TextureFormat::I8:
406	{	377	{
407	const u8* source_ptr = source + offset + i;	378	const u8* source_ptr = source + VideoCore::GetMortonOffset(x, y, 1);
408	return { source_ptr, source_ptr, *source_ptr, 255 };	379	return { source_ptr, source_ptr, *source_ptr, 255 };
409	}	380	}
410		381
411	case Regs::TextureFormat::A8:	382	case Regs::TextureFormat::A8:
412	{	383	{
413	const u8* source_ptr = source + offset + i;	384	const u8* source_ptr = source + VideoCore::GetMortonOffset(x, y, 1);
414		385
415	if (disable_alpha) {	386	if (disable_alpha) {
416	return { source_ptr, source_ptr, *source_ptr, 255 };	387	return { source_ptr, source_ptr, *source_ptr, 255 };
@@ -421,7 +392,7 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const Texture
421		392
422	case Regs::TextureFormat::IA4:	393	case Regs::TextureFormat::IA4:
423	{	394	{
424	const u8* source_ptr = source + offset + i;	395	const u8* source_ptr = source + VideoCore::GetMortonOffset(x, y, 1);
425		396
426	u8 i = Color::Convert4To8(((*source_ptr) & 0xF0) >> 4);	397	u8 i = Color::Convert4To8(((*source_ptr) & 0xF0) >> 4);
427	u8 a = Color::Convert4To8((*source_ptr) & 0xF);	398	u8 a = Color::Convert4To8((*source_ptr) & 0xF);
@@ -436,9 +407,10 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const Texture
436		407
437	case Regs::TextureFormat::A4:	408	case Regs::TextureFormat::A4:
438	{	409	{
439	const u8* source_ptr = source + (offset + i) / 2;	410	u32 morton_offset = VideoCore::GetMortonOffset(x, y, 1);
		411	const u8* source_ptr = source + morton_offset / 2;
440		412
441	u8 a = (i % 2) ? ((source_ptr & 0xF0) >> 4) : (source_ptr & 0xF);	413	u8 a = (morton_offset % 2) ? ((source_ptr & 0xF0) >> 4) : (source_ptr & 0xF);
442	a = Color::Convert4To8(a);	414	a = Color::Convert4To8(a);
443		415
444	if (disable_alpha) {	416	if (disable_alpha) {


diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp index 24dc37856..a7bb0612f 100644 --- a/src/video_core/rasterizer.cpp +++ b/src/video_core/rasterizer.cpp
@@ -7,13 +7,14 @@
7	#include "common/common_types.h"	7	#include "common/common_types.h"
8	#include "common/math_util.h"	8	#include "common/math_util.h"
9		9
		10	#include "core/hw/gpu.h"
		11	#include "debug_utils/debug_utils.h"
10	#include "math.h"	12	#include "math.h"
11	#include "color.h"	13	#include "color.h"
12	#include "pica.h"	14	#include "pica.h"
13	#include "rasterizer.h"	15	#include "rasterizer.h"
14	#include "vertex_shader.h"	16	#include "vertex_shader.h"
15		17	#include "video_core/utils.h"
16	#include "debug_utils/debug_utils.h"
17		18
18	namespace Pica {	19	namespace Pica {
19		20
@@ -27,10 +28,14 @@ static void DrawPixel(int x, int y, const Math::Vec4<u8>& color) {
27	// NOTE: The framebuffer height register contains the actual FB height minus one.	28	// NOTE: The framebuffer height register contains the actual FB height minus one.
28	y = (registers.framebuffer.height - y);	29	y = (registers.framebuffer.height - y);
29		30
		31	const u32 coarse_y = y & ~7;
		32	u32 bytes_per_pixel = GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(registers.framebuffer.color_format.Value()));
		33	u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * registers.framebuffer.width * bytes_per_pixel;
		34
30	switch (registers.framebuffer.color_format) {	35	switch (registers.framebuffer.color_format) {
31	case registers.framebuffer.RGBA8:	36	case registers.framebuffer.RGBA8:
32	{	37	{
33	u8* pixel = color_buffer + (x + y * registers.framebuffer.GetWidth()) * 4;	38	u8* pixel = color_buffer + dst_offset;
34	pixel[3] = color.r();	39	pixel[3] = color.r();
35	pixel[2] = color.g();	40	pixel[2] = color.g();
36	pixel[1] = color.b();	41	pixel[1] = color.b();
@@ -40,14 +45,14 @@ static void DrawPixel(int x, int y, const Math::Vec4<u8>& color) {
40		45
41	case registers.framebuffer.RGBA4:	46	case registers.framebuffer.RGBA4:
42	{	47	{
43	u8* pixel = color_buffer + (x + y * registers.framebuffer.GetWidth()) * 2;	48	u8* pixel = color_buffer + dst_offset;
44	pixel[1] = (color.r() & 0xF0) \| (color.g() >> 4);	49	pixel[1] = (color.r() & 0xF0) \| (color.g() >> 4);
45	pixel[0] = (color.b() & 0xF0) \| (color.a() >> 4);	50	pixel[0] = (color.b() & 0xF0) \| (color.a() >> 4);
46	break;	51	break;
47	}	52	}
48		53
49	default:	54	default:
50	LOG_CRITICAL(Render_Software, "Unknown framebuffer color format %x", registers.framebuffer.color_format);	55	LOG_CRITICAL(Render_Software, "Unknown framebuffer color format %x", registers.framebuffer.color_format.Value());
51	UNIMPLEMENTED();	56	UNIMPLEMENTED();
52	}	57	}
53	}	58	}
@@ -58,11 +63,15 @@ static const Math::Vec4<u8> GetPixel(int x, int y) {
58		63
59	y = (registers.framebuffer.height - y);	64	y = (registers.framebuffer.height - y);
60		65
		66	const u32 coarse_y = y & ~7;
		67	u32 bytes_per_pixel = GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(registers.framebuffer.color_format.Value()));
		68	u32 src_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * registers.framebuffer.width * bytes_per_pixel;
		69
61	switch (registers.framebuffer.color_format) {	70	switch (registers.framebuffer.color_format) {
62	case registers.framebuffer.RGBA8:	71	case registers.framebuffer.RGBA8:
63	{	72	{
64	Math::Vec4<u8> ret;	73	Math::Vec4<u8> ret;
65	u8* pixel = color_buffer + (x + y * registers.framebuffer.GetWidth()) * 4;	74	u8* pixel = color_buffer + src_offset;
66	ret.r() = pixel[3];	75	ret.r() = pixel[3];
67	ret.g() = pixel[2];	76	ret.g() = pixel[2];
68	ret.b() = pixel[1];	77	ret.b() = pixel[1];
@@ -73,7 +82,7 @@ static const Math::Vec4<u8> GetPixel(int x, int y) {
73	case registers.framebuffer.RGBA4:	82	case registers.framebuffer.RGBA4:
74	{	83	{
75	Math::Vec4<u8> ret;	84	Math::Vec4<u8> ret;
76	u8* pixel = color_buffer + (x + y * registers.framebuffer.GetWidth()) * 2;	85	u8* pixel = color_buffer + src_offset;
77	ret.r() = Color::Convert4To8(pixel[1] >> 4);	86	ret.r() = Color::Convert4To8(pixel[1] >> 4);
78	ret.g() = Color::Convert4To8(pixel[1] & 0x0F);	87	ret.g() = Color::Convert4To8(pixel[1] & 0x0F);
79	ret.b() = Color::Convert4To8(pixel[0] >> 4);	88	ret.b() = Color::Convert4To8(pixel[0] >> 4);
@@ -82,7 +91,7 @@ static const Math::Vec4<u8> GetPixel(int x, int y) {
82	}	91	}
83		92
84	default:	93	default:
85	LOG_CRITICAL(Render_Software, "Unknown framebuffer color format %x", registers.framebuffer.color_format);	94	LOG_CRITICAL(Render_Software, "Unknown framebuffer color format %x", registers.framebuffer.color_format.Value());
86	UNIMPLEMENTED();	95	UNIMPLEMENTED();
87	}	96	}
88		97
@@ -91,22 +100,28 @@ static const Math::Vec4<u8> GetPixel(int x, int y) {
91		100
92	static u32 GetDepth(int x, int y) {	101	static u32 GetDepth(int x, int y) {
93	const PAddr addr = registers.framebuffer.GetDepthBufferPhysicalAddress();	102	const PAddr addr = registers.framebuffer.GetDepthBufferPhysicalAddress();
94	u16* depth_buffer = reinterpret_cast<u16*>(Memory::GetPointer(PAddrToVAddr(addr)));	103	u8* depth_buffer = Memory::GetPointer(PAddrToVAddr(addr));
95		104
96	y = (registers.framebuffer.height - y);	105	y = (registers.framebuffer.height - y);
		106
		107	const u32 coarse_y = y & ~7;
		108	u32 stride = registers.framebuffer.width * 2;
97		109
98	// Assuming 16-bit depth buffer format until actual format handling is implemented	110	// Assuming 16-bit depth buffer format until actual format handling is implemented
99	return (depth_buffer + x + y registers.framebuffer.GetWidth());	111	return (u16)(depth_buffer + VideoCore::GetMortonOffset(x, y, 2) + coarse_y * stride);
100	}	112	}
101		113
102	static void SetDepth(int x, int y, u16 value) {	114	static void SetDepth(int x, int y, u16 value) {
103	const PAddr addr = registers.framebuffer.GetDepthBufferPhysicalAddress();	115	const PAddr addr = registers.framebuffer.GetDepthBufferPhysicalAddress();
104	u16* depth_buffer = reinterpret_cast<u16*>(Memory::GetPointer(PAddrToVAddr(addr)));	116	u8* depth_buffer = Memory::GetPointer(PAddrToVAddr(addr));
105		117
106	y = (registers.framebuffer.height - y);	118	y = (registers.framebuffer.height - y);
107		119
		120	const u32 coarse_y = y & ~7;
		121	u32 stride = registers.framebuffer.width * 2;
		122
108	// Assuming 16-bit depth buffer format until actual format handling is implemented	123	// Assuming 16-bit depth buffer format until actual format handling is implemented
109	(depth_buffer + x + y registers.framebuffer.GetWidth()) = value;	124	(u16)(depth_buffer + VideoCore::GetMortonOffset(x, y, 2) + coarse_y * stride) = value;
110	}	125	}
111		126
112	// NOTE: Assuming that rasterizer coordinates are 12.4 fixed-point values	127	// NOTE: Assuming that rasterizer coordinates are 12.4 fixed-point values


diff --git a/src/video_core/utils.h b/src/video_core/utils.h index 6fd640425..bda793fa5 100644 --- a/src/video_core/utils.h +++ b/src/video_core/utils.h
@@ -35,4 +35,54 @@ struct TGAHeader {
35	*/	35	*/
36	void DumpTGA(std::string filename, short width, short height, u8* raw_data);	36	void DumpTGA(std::string filename, short width, short height, u8* raw_data);
37		37
		38	/**
		39	* Interleave the lower 3 bits of each coordinate to get the intra-block offsets, which are
		40	* arranged in a Z-order curve. More details on the bit manipulation at:
		41	* https://fgiesen.wordpress.com/2009/12/13/decoding-morton-codes/
		42	*/
		43	static inline u32 MortonInterleave(u32 x, u32 y) {
		44	u32 i = (x & 7) \| ((y & 7) << 8); // ---- -210
		45	i = (i ^ (i << 2)) & 0x1313; // ---2 --10
		46	i = (i ^ (i << 1)) & 0x1515; // ---2 -1-0
		47	i = (i \| (i >> 7)) & 0x3F;
		48	return i;
		49	}
		50
		51	/**
		52	* Calculates the offset of the position of the pixel in Morton order
		53	*/
		54	static inline u32 GetMortonOffset(u32 x, u32 y, u32 bytes_per_pixel) {
		55	// Images are split into 8x8 tiles. Each tile is composed of four 4x4 subtiles each
		56	// of which is composed of four 2x2 subtiles each of which is composed of four texels.
		57	// Each structure is embedded into the next-bigger one in a diagonal pattern, e.g.
		58	// texels are laid out in a 2x2 subtile like this:
		59	// 2 3
		60	// 0 1
		61	//
		62	// The full 8x8 tile has the texels arranged like this:
		63	//
		64	// 42 43 46 47 58 59 62 63
		65	// 40 41 44 45 56 57 60 61
		66	// 34 35 38 39 50 51 54 55
		67	// 32 33 36 37 48 49 52 53
		68	// 10 11 14 15 26 27 30 31
		69	// 08 09 12 13 24 25 28 29
		70	// 02 03 06 07 18 19 22 23
		71	// 00 01 04 05 16 17 20 21
		72	//
		73	// This pattern is what's called Z-order curve, or Morton order.
		74
		75	const unsigned int block_width = 8;
		76	const unsigned int block_height = 8;
		77
		78	const unsigned int coarse_x = x & ~7;
		79	const unsigned int coarse_y = y & ~7;
		80
		81	u32 i = VideoCore::MortonInterleave(x, y);
		82
		83	const unsigned int offset = coarse_x * block_height;
		84
		85	return (i + offset) * bytes_per_pixel;
		86	}
		87
38	} // namespace	88	} // namespace