summaryrefslogtreecommitdiff
path: root/src/video_core
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/debug_utils/debug_utils.cpp56
-rw-r--r--src/video_core/rasterizer.cpp39
-rw-r--r--src/video_core/utils.h50
3 files changed, 91 insertions, 54 deletions
diff --git a/src/video_core/debug_utils/debug_utils.cpp b/src/video_core/debug_utils/debug_utils.cpp
index f436aa541..27c246a99 100644
--- a/src/video_core/debug_utils/debug_utils.cpp
+++ b/src/video_core/debug_utils/debug_utils.cpp
@@ -23,6 +23,7 @@
23#include "video_core/color.h" 23#include "video_core/color.h"
24#include "video_core/math.h" 24#include "video_core/math.h"
25#include "video_core/pica.h" 25#include "video_core/pica.h"
26#include "video_core/utils.h"
26 27
27#include "debug_utils.h" 28#include "debug_utils.h"
28 29
@@ -306,63 +307,33 @@ std::unique_ptr<PicaTrace> FinishPicaTracing()
306} 307}
307 308
308const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const TextureInfo& info, bool disable_alpha) { 309const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const TextureInfo& info, bool disable_alpha) {
309 // Images are split into 8x8 tiles. Each tile is composed of four 4x4 subtiles each
310 // of which is composed of four 2x2 subtiles each of which is composed of four texels.
311 // Each structure is embedded into the next-bigger one in a diagonal pattern, e.g.
312 // texels are laid out in a 2x2 subtile like this:
313 // 2 3
314 // 0 1
315 //
316 // The full 8x8 tile has the texels arranged like this:
317 //
318 // 42 43 46 47 58 59 62 63
319 // 40 41 44 45 56 57 60 61
320 // 34 35 38 39 50 51 54 55
321 // 32 33 36 37 48 49 52 53
322 // 10 11 14 15 26 27 30 31
323 // 08 09 12 13 24 25 28 29
324 // 02 03 06 07 18 19 22 23
325 // 00 01 04 05 16 17 20 21
326
327 const unsigned int block_width = 8;
328 const unsigned int block_height = 8;
329
330 const unsigned int coarse_x = x & ~7; 310 const unsigned int coarse_x = x & ~7;
331 const unsigned int coarse_y = y & ~7; 311 const unsigned int coarse_y = y & ~7;
332 312
333 // Interleave the lower 3 bits of each coordinate to get the intra-block offsets, which are
334 // arranged in a Z-order curve. More details on the bit manipulation at:
335 // https://fgiesen.wordpress.com/2009/12/13/decoding-morton-codes/
336 unsigned int i = (x & 7) | ((y & 7) << 8); // ---- -210
337 i = (i ^ (i << 2)) & 0x1313; // ---2 --10
338 i = (i ^ (i << 1)) & 0x1515; // ---2 -1-0
339 i = (i | (i >> 7)) & 0x3F;
340
341 if (info.format != Regs::TextureFormat::ETC1 && 313 if (info.format != Regs::TextureFormat::ETC1 &&
342 info.format != Regs::TextureFormat::ETC1A4) { 314 info.format != Regs::TextureFormat::ETC1A4) {
343 // TODO(neobrain): Fix code design to unify vertical block offsets! 315 // TODO(neobrain): Fix code design to unify vertical block offsets!
344 source += coarse_y * info.stride; 316 source += coarse_y * info.stride;
345 } 317 }
346 const unsigned int offset = coarse_x * block_height; 318
347
348 // TODO: Assert that width/height are multiples of block dimensions 319 // TODO: Assert that width/height are multiples of block dimensions
349 320
350 switch (info.format) { 321 switch (info.format) {
351 case Regs::TextureFormat::RGBA8: 322 case Regs::TextureFormat::RGBA8:
352 { 323 {
353 const u8* source_ptr = source + offset * 4 + i * 4; 324 const u8* source_ptr = source + VideoCore::GetMortonOffset(x, y, 4);
354 return { source_ptr[3], source_ptr[2], source_ptr[1], disable_alpha ? (u8)255 : source_ptr[0] }; 325 return { source_ptr[3], source_ptr[2], source_ptr[1], disable_alpha ? (u8)255 : source_ptr[0] };
355 } 326 }
356 327
357 case Regs::TextureFormat::RGB8: 328 case Regs::TextureFormat::RGB8:
358 { 329 {
359 const u8* source_ptr = source + offset * 3 + i * 3; 330 const u8* source_ptr = source + VideoCore::GetMortonOffset(x, y, 3);
360 return { source_ptr[2], source_ptr[1], source_ptr[0], 255 }; 331 return { source_ptr[2], source_ptr[1], source_ptr[0], 255 };
361 } 332 }
362 333
363 case Regs::TextureFormat::RGBA5551: 334 case Regs::TextureFormat::RGBA5551:
364 { 335 {
365 const u16 source_ptr = *(const u16*)(source + offset * 2 + i * 2); 336 const u16 source_ptr = *(const u16*)(source + VideoCore::GetMortonOffset(x, y, 2));
366 u8 r = (source_ptr >> 11) & 0x1F; 337 u8 r = (source_ptr >> 11) & 0x1F;
367 u8 g = ((source_ptr) >> 6) & 0x1F; 338 u8 g = ((source_ptr) >> 6) & 0x1F;
368 u8 b = (source_ptr >> 1) & 0x1F; 339 u8 b = (source_ptr >> 1) & 0x1F;
@@ -373,7 +344,7 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const Texture
373 344
374 case Regs::TextureFormat::RGB565: 345 case Regs::TextureFormat::RGB565:
375 { 346 {
376 const u16 source_ptr = *(const u16*)(source + offset * 2 + i * 2); 347 const u16 source_ptr = *(const u16*)(source + VideoCore::GetMortonOffset(x, y, 2));
377 u8 r = Color::Convert5To8((source_ptr >> 11) & 0x1F); 348 u8 r = Color::Convert5To8((source_ptr >> 11) & 0x1F);
378 u8 g = Color::Convert6To8(((source_ptr) >> 5) & 0x3F); 349 u8 g = Color::Convert6To8(((source_ptr) >> 5) & 0x3F);
379 u8 b = Color::Convert5To8((source_ptr) & 0x1F); 350 u8 b = Color::Convert5To8((source_ptr) & 0x1F);
@@ -382,7 +353,7 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const Texture
382 353
383 case Regs::TextureFormat::RGBA4: 354 case Regs::TextureFormat::RGBA4:
384 { 355 {
385 const u8* source_ptr = source + offset * 2 + i * 2; 356 const u8* source_ptr = source + VideoCore::GetMortonOffset(x, y, 2);
386 u8 r = Color::Convert4To8(source_ptr[1] >> 4); 357 u8 r = Color::Convert4To8(source_ptr[1] >> 4);
387 u8 g = Color::Convert4To8(source_ptr[1] & 0xF); 358 u8 g = Color::Convert4To8(source_ptr[1] & 0xF);
388 u8 b = Color::Convert4To8(source_ptr[0] >> 4); 359 u8 b = Color::Convert4To8(source_ptr[0] >> 4);
@@ -392,7 +363,7 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const Texture
392 363
393 case Regs::TextureFormat::IA8: 364 case Regs::TextureFormat::IA8:
394 { 365 {
395 const u8* source_ptr = source + offset * 2 + i * 2; 366 const u8* source_ptr = source + VideoCore::GetMortonOffset(x, y, 2);
396 367
397 if (disable_alpha) { 368 if (disable_alpha) {
398 // Show intensity as red, alpha as green 369 // Show intensity as red, alpha as green
@@ -404,13 +375,13 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const Texture
404 375
405 case Regs::TextureFormat::I8: 376 case Regs::TextureFormat::I8:
406 { 377 {
407 const u8* source_ptr = source + offset + i; 378 const u8* source_ptr = source + VideoCore::GetMortonOffset(x, y, 1);
408 return { *source_ptr, *source_ptr, *source_ptr, 255 }; 379 return { *source_ptr, *source_ptr, *source_ptr, 255 };
409 } 380 }
410 381
411 case Regs::TextureFormat::A8: 382 case Regs::TextureFormat::A8:
412 { 383 {
413 const u8* source_ptr = source + offset + i; 384 const u8* source_ptr = source + VideoCore::GetMortonOffset(x, y, 1);
414 385
415 if (disable_alpha) { 386 if (disable_alpha) {
416 return { *source_ptr, *source_ptr, *source_ptr, 255 }; 387 return { *source_ptr, *source_ptr, *source_ptr, 255 };
@@ -421,7 +392,7 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const Texture
421 392
422 case Regs::TextureFormat::IA4: 393 case Regs::TextureFormat::IA4:
423 { 394 {
424 const u8* source_ptr = source + offset + i; 395 const u8* source_ptr = source + VideoCore::GetMortonOffset(x, y, 1);
425 396
426 u8 i = Color::Convert4To8(((*source_ptr) & 0xF0) >> 4); 397 u8 i = Color::Convert4To8(((*source_ptr) & 0xF0) >> 4);
427 u8 a = Color::Convert4To8((*source_ptr) & 0xF); 398 u8 a = Color::Convert4To8((*source_ptr) & 0xF);
@@ -436,9 +407,10 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const Texture
436 407
437 case Regs::TextureFormat::A4: 408 case Regs::TextureFormat::A4:
438 { 409 {
439 const u8* source_ptr = source + (offset + i) / 2; 410 u32 morton_offset = VideoCore::GetMortonOffset(x, y, 1);
411 const u8* source_ptr = source + morton_offset / 2;
440 412
441 u8 a = (i % 2) ? ((*source_ptr & 0xF0) >> 4) : (*source_ptr & 0xF); 413 u8 a = (morton_offset % 2) ? ((*source_ptr & 0xF0) >> 4) : (*source_ptr & 0xF);
442 a = Color::Convert4To8(a); 414 a = Color::Convert4To8(a);
443 415
444 if (disable_alpha) { 416 if (disable_alpha) {
diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp
index 24dc37856..a7bb0612f 100644
--- a/src/video_core/rasterizer.cpp
+++ b/src/video_core/rasterizer.cpp
@@ -7,13 +7,14 @@
7#include "common/common_types.h" 7#include "common/common_types.h"
8#include "common/math_util.h" 8#include "common/math_util.h"
9 9
10#include "core/hw/gpu.h"
11#include "debug_utils/debug_utils.h"
10#include "math.h" 12#include "math.h"
11#include "color.h" 13#include "color.h"
12#include "pica.h" 14#include "pica.h"
13#include "rasterizer.h" 15#include "rasterizer.h"
14#include "vertex_shader.h" 16#include "vertex_shader.h"
15 17#include "video_core/utils.h"
16#include "debug_utils/debug_utils.h"
17 18
18namespace Pica { 19namespace Pica {
19 20
@@ -27,10 +28,14 @@ static void DrawPixel(int x, int y, const Math::Vec4<u8>& color) {
27 // NOTE: The framebuffer height register contains the actual FB height minus one. 28 // NOTE: The framebuffer height register contains the actual FB height minus one.
28 y = (registers.framebuffer.height - y); 29 y = (registers.framebuffer.height - y);
29 30
31 const u32 coarse_y = y & ~7;
32 u32 bytes_per_pixel = GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(registers.framebuffer.color_format.Value()));
33 u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * registers.framebuffer.width * bytes_per_pixel;
34
30 switch (registers.framebuffer.color_format) { 35 switch (registers.framebuffer.color_format) {
31 case registers.framebuffer.RGBA8: 36 case registers.framebuffer.RGBA8:
32 { 37 {
33 u8* pixel = color_buffer + (x + y * registers.framebuffer.GetWidth()) * 4; 38 u8* pixel = color_buffer + dst_offset;
34 pixel[3] = color.r(); 39 pixel[3] = color.r();
35 pixel[2] = color.g(); 40 pixel[2] = color.g();
36 pixel[1] = color.b(); 41 pixel[1] = color.b();
@@ -40,14 +45,14 @@ static void DrawPixel(int x, int y, const Math::Vec4<u8>& color) {
40 45
41 case registers.framebuffer.RGBA4: 46 case registers.framebuffer.RGBA4:
42 { 47 {
43 u8* pixel = color_buffer + (x + y * registers.framebuffer.GetWidth()) * 2; 48 u8* pixel = color_buffer + dst_offset;
44 pixel[1] = (color.r() & 0xF0) | (color.g() >> 4); 49 pixel[1] = (color.r() & 0xF0) | (color.g() >> 4);
45 pixel[0] = (color.b() & 0xF0) | (color.a() >> 4); 50 pixel[0] = (color.b() & 0xF0) | (color.a() >> 4);
46 break; 51 break;
47 } 52 }
48 53
49 default: 54 default:
50 LOG_CRITICAL(Render_Software, "Unknown framebuffer color format %x", registers.framebuffer.color_format); 55 LOG_CRITICAL(Render_Software, "Unknown framebuffer color format %x", registers.framebuffer.color_format.Value());
51 UNIMPLEMENTED(); 56 UNIMPLEMENTED();
52 } 57 }
53} 58}
@@ -58,11 +63,15 @@ static const Math::Vec4<u8> GetPixel(int x, int y) {
58 63
59 y = (registers.framebuffer.height - y); 64 y = (registers.framebuffer.height - y);
60 65
66 const u32 coarse_y = y & ~7;
67 u32 bytes_per_pixel = GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(registers.framebuffer.color_format.Value()));
68 u32 src_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * registers.framebuffer.width * bytes_per_pixel;
69
61 switch (registers.framebuffer.color_format) { 70 switch (registers.framebuffer.color_format) {
62 case registers.framebuffer.RGBA8: 71 case registers.framebuffer.RGBA8:
63 { 72 {
64 Math::Vec4<u8> ret; 73 Math::Vec4<u8> ret;
65 u8* pixel = color_buffer + (x + y * registers.framebuffer.GetWidth()) * 4; 74 u8* pixel = color_buffer + src_offset;
66 ret.r() = pixel[3]; 75 ret.r() = pixel[3];
67 ret.g() = pixel[2]; 76 ret.g() = pixel[2];
68 ret.b() = pixel[1]; 77 ret.b() = pixel[1];
@@ -73,7 +82,7 @@ static const Math::Vec4<u8> GetPixel(int x, int y) {
73 case registers.framebuffer.RGBA4: 82 case registers.framebuffer.RGBA4:
74 { 83 {
75 Math::Vec4<u8> ret; 84 Math::Vec4<u8> ret;
76 u8* pixel = color_buffer + (x + y * registers.framebuffer.GetWidth()) * 2; 85 u8* pixel = color_buffer + src_offset;
77 ret.r() = Color::Convert4To8(pixel[1] >> 4); 86 ret.r() = Color::Convert4To8(pixel[1] >> 4);
78 ret.g() = Color::Convert4To8(pixel[1] & 0x0F); 87 ret.g() = Color::Convert4To8(pixel[1] & 0x0F);
79 ret.b() = Color::Convert4To8(pixel[0] >> 4); 88 ret.b() = Color::Convert4To8(pixel[0] >> 4);
@@ -82,7 +91,7 @@ static const Math::Vec4<u8> GetPixel(int x, int y) {
82 } 91 }
83 92
84 default: 93 default:
85 LOG_CRITICAL(Render_Software, "Unknown framebuffer color format %x", registers.framebuffer.color_format); 94 LOG_CRITICAL(Render_Software, "Unknown framebuffer color format %x", registers.framebuffer.color_format.Value());
86 UNIMPLEMENTED(); 95 UNIMPLEMENTED();
87 } 96 }
88 97
@@ -91,22 +100,28 @@ static const Math::Vec4<u8> GetPixel(int x, int y) {
91 100
92static u32 GetDepth(int x, int y) { 101static u32 GetDepth(int x, int y) {
93 const PAddr addr = registers.framebuffer.GetDepthBufferPhysicalAddress(); 102 const PAddr addr = registers.framebuffer.GetDepthBufferPhysicalAddress();
94 u16* depth_buffer = reinterpret_cast<u16*>(Memory::GetPointer(PAddrToVAddr(addr))); 103 u8* depth_buffer = Memory::GetPointer(PAddrToVAddr(addr));
95 104
96 y = (registers.framebuffer.height - y); 105 y = (registers.framebuffer.height - y);
106
107 const u32 coarse_y = y & ~7;
108 u32 stride = registers.framebuffer.width * 2;
97 109
98 // Assuming 16-bit depth buffer format until actual format handling is implemented 110 // Assuming 16-bit depth buffer format until actual format handling is implemented
99 return *(depth_buffer + x + y * registers.framebuffer.GetWidth()); 111 return *(u16*)(depth_buffer + VideoCore::GetMortonOffset(x, y, 2) + coarse_y * stride);
100} 112}
101 113
102static void SetDepth(int x, int y, u16 value) { 114static void SetDepth(int x, int y, u16 value) {
103 const PAddr addr = registers.framebuffer.GetDepthBufferPhysicalAddress(); 115 const PAddr addr = registers.framebuffer.GetDepthBufferPhysicalAddress();
104 u16* depth_buffer = reinterpret_cast<u16*>(Memory::GetPointer(PAddrToVAddr(addr))); 116 u8* depth_buffer = Memory::GetPointer(PAddrToVAddr(addr));
105 117
106 y = (registers.framebuffer.height - y); 118 y = (registers.framebuffer.height - y);
107 119
120 const u32 coarse_y = y & ~7;
121 u32 stride = registers.framebuffer.width * 2;
122
108 // Assuming 16-bit depth buffer format until actual format handling is implemented 123 // Assuming 16-bit depth buffer format until actual format handling is implemented
109 *(depth_buffer + x + y * registers.framebuffer.GetWidth()) = value; 124 *(u16*)(depth_buffer + VideoCore::GetMortonOffset(x, y, 2) + coarse_y * stride) = value;
110} 125}
111 126
112// NOTE: Assuming that rasterizer coordinates are 12.4 fixed-point values 127// NOTE: Assuming that rasterizer coordinates are 12.4 fixed-point values
diff --git a/src/video_core/utils.h b/src/video_core/utils.h
index 6fd640425..bda793fa5 100644
--- a/src/video_core/utils.h
+++ b/src/video_core/utils.h
@@ -35,4 +35,54 @@ struct TGAHeader {
35 */ 35 */
36void DumpTGA(std::string filename, short width, short height, u8* raw_data); 36void DumpTGA(std::string filename, short width, short height, u8* raw_data);
37 37
38/**
39 * Interleave the lower 3 bits of each coordinate to get the intra-block offsets, which are
40 * arranged in a Z-order curve. More details on the bit manipulation at:
41 * https://fgiesen.wordpress.com/2009/12/13/decoding-morton-codes/
42 */
43static inline u32 MortonInterleave(u32 x, u32 y) {
44 u32 i = (x & 7) | ((y & 7) << 8); // ---- -210
45 i = (i ^ (i << 2)) & 0x1313; // ---2 --10
46 i = (i ^ (i << 1)) & 0x1515; // ---2 -1-0
47 i = (i | (i >> 7)) & 0x3F;
48 return i;
49}
50
51/**
52 * Calculates the offset of the position of the pixel in Morton order
53 */
54static inline u32 GetMortonOffset(u32 x, u32 y, u32 bytes_per_pixel) {
55 // Images are split into 8x8 tiles. Each tile is composed of four 4x4 subtiles each
56 // of which is composed of four 2x2 subtiles each of which is composed of four texels.
57 // Each structure is embedded into the next-bigger one in a diagonal pattern, e.g.
58 // texels are laid out in a 2x2 subtile like this:
59 // 2 3
60 // 0 1
61 //
62 // The full 8x8 tile has the texels arranged like this:
63 //
64 // 42 43 46 47 58 59 62 63
65 // 40 41 44 45 56 57 60 61
66 // 34 35 38 39 50 51 54 55
67 // 32 33 36 37 48 49 52 53
68 // 10 11 14 15 26 27 30 31
69 // 08 09 12 13 24 25 28 29
70 // 02 03 06 07 18 19 22 23
71 // 00 01 04 05 16 17 20 21
72 //
73 // This pattern is what's called Z-order curve, or Morton order.
74
75 const unsigned int block_width = 8;
76 const unsigned int block_height = 8;
77
78 const unsigned int coarse_x = x & ~7;
79 const unsigned int coarse_y = y & ~7;
80
81 u32 i = VideoCore::MortonInterleave(x, y);
82
83 const unsigned int offset = coarse_x * block_height;
84
85 return (i + offset) * bytes_per_pixel;
86}
87
38} // namespace 88} // namespace