summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGravatar bunnei2015-02-26 22:40:27 -0500
committerGravatar bunnei2015-02-26 22:40:27 -0500
commitc9ef377afaa038797de6c08da9f204acf67ed1fc (patch)
treeab2c741325e2e81e4352b4934972151fceae8d5e /src
parentMerge pull request #614 from lioncash/mcr (diff)
parentGPU: Implemented bits 3 and 1 from the display transfer flags. (diff)
downloadyuzu-c9ef377afaa038797de6c08da9f204acf67ed1fc.tar.gz
yuzu-c9ef377afaa038797de6c08da9f204acf67ed1fc.tar.xz
yuzu-c9ef377afaa038797de6c08da9f204acf67ed1fc.zip
Merge pull request #599 from Subv/morton
GPU: Implemented bits 3 and 1 from the display transfer flags.
Diffstat (limited to 'src')
-rw-r--r--src/citra_qt/debugger/graphics_framebuffer.cpp22
-rw-r--r--src/core/hw/gpu.cpp82
-rw-r--r--src/core/hw/gpu.h5
-rw-r--r--src/video_core/debug_utils/debug_utils.cpp56
-rw-r--r--src/video_core/rasterizer.cpp39
-rw-r--r--src/video_core/utils.h50
6 files changed, 171 insertions, 83 deletions
diff --git a/src/citra_qt/debugger/graphics_framebuffer.cpp b/src/citra_qt/debugger/graphics_framebuffer.cpp
index 1ba60021f..574f19cc1 100644
--- a/src/citra_qt/debugger/graphics_framebuffer.cpp
+++ b/src/citra_qt/debugger/graphics_framebuffer.cpp
@@ -9,8 +9,10 @@
9#include <QPushButton> 9#include <QPushButton>
10#include <QSpinBox> 10#include <QSpinBox>
11 11
12#include "core/hw/gpu.h"
12#include "video_core/color.h" 13#include "video_core/color.h"
13#include "video_core/pica.h" 14#include "video_core/pica.h"
15#include "video_core/utils.h"
14 16
15#include "graphics_framebuffer.h" 17#include "graphics_framebuffer.h"
16 18
@@ -195,16 +197,20 @@ void GraphicsFramebufferWidget::OnUpdate()
195 197
196 // TODO: Implement a good way to visualize alpha components! 198 // TODO: Implement a good way to visualize alpha components!
197 // TODO: Unify this decoding code with the texture decoder 199 // TODO: Unify this decoding code with the texture decoder
200 u32 bytes_per_pixel = GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(framebuffer_format));
201
198 switch (framebuffer_format) { 202 switch (framebuffer_format) {
199 case Format::RGBA8: 203 case Format::RGBA8:
200 { 204 {
201 QImage decoded_image(framebuffer_width, framebuffer_height, QImage::Format_ARGB32); 205 QImage decoded_image(framebuffer_width, framebuffer_height, QImage::Format_ARGB32);
202 u32* color_buffer = (u32*)Memory::GetPointer(Pica::PAddrToVAddr(framebuffer_address)); 206 u8* color_buffer = Memory::GetPointer(Pica::PAddrToVAddr(framebuffer_address));
203 for (unsigned int y = 0; y < framebuffer_height; ++y) { 207 for (unsigned int y = 0; y < framebuffer_height; ++y) {
204 for (unsigned int x = 0; x < framebuffer_width; ++x) { 208 for (unsigned int x = 0; x < framebuffer_width; ++x) {
205 u32 value = *(color_buffer + x + y * framebuffer_width); 209 const u32 coarse_y = y & ~7;
210 u32 offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * framebuffer_width * bytes_per_pixel;
211 u8* value = color_buffer + offset;
206 212
207 decoded_image.setPixel(x, y, qRgba((value >> 16) & 0xFF, (value >> 8) & 0xFF, value & 0xFF, 255/*value >> 24*/)); 213 decoded_image.setPixel(x, y, qRgba(value[3], value[2], value[1], 255/*value >> 24*/));
208 } 214 }
209 } 215 }
210 pixmap = QPixmap::fromImage(decoded_image); 216 pixmap = QPixmap::fromImage(decoded_image);
@@ -217,7 +223,9 @@ void GraphicsFramebufferWidget::OnUpdate()
217 u8* color_buffer = Memory::GetPointer(Pica::PAddrToVAddr(framebuffer_address)); 223 u8* color_buffer = Memory::GetPointer(Pica::PAddrToVAddr(framebuffer_address));
218 for (unsigned int y = 0; y < framebuffer_height; ++y) { 224 for (unsigned int y = 0; y < framebuffer_height; ++y) {
219 for (unsigned int x = 0; x < framebuffer_width; ++x) { 225 for (unsigned int x = 0; x < framebuffer_width; ++x) {
220 u8* pixel_pointer = color_buffer + x * 3 + y * 3 * framebuffer_width; 226 const u32 coarse_y = y & ~7;
227 u32 offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * framebuffer_width * bytes_per_pixel;
228 u8* pixel_pointer = color_buffer + offset;
221 229
222 decoded_image.setPixel(x, y, qRgba(pixel_pointer[0], pixel_pointer[1], pixel_pointer[2], 255/*value >> 24*/)); 230 decoded_image.setPixel(x, y, qRgba(pixel_pointer[0], pixel_pointer[1], pixel_pointer[2], 255/*value >> 24*/));
223 } 231 }
@@ -229,10 +237,12 @@ void GraphicsFramebufferWidget::OnUpdate()
229 case Format::RGBA5551: 237 case Format::RGBA5551:
230 { 238 {
231 QImage decoded_image(framebuffer_width, framebuffer_height, QImage::Format_ARGB32); 239 QImage decoded_image(framebuffer_width, framebuffer_height, QImage::Format_ARGB32);
232 u32* color_buffer = (u32*)Memory::GetPointer(Pica::PAddrToVAddr(framebuffer_address)); 240 u8* color_buffer = Memory::GetPointer(Pica::PAddrToVAddr(framebuffer_address));
233 for (unsigned int y = 0; y < framebuffer_height; ++y) { 241 for (unsigned int y = 0; y < framebuffer_height; ++y) {
234 for (unsigned int x = 0; x < framebuffer_width; ++x) { 242 for (unsigned int x = 0; x < framebuffer_width; ++x) {
235 u16 value = *(u16*)(((u8*)color_buffer) + x * 2 + y * framebuffer_width * 2); 243 const u32 coarse_y = y & ~7;
244 u32 offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * framebuffer_width * bytes_per_pixel;
245 u16 value = *(u16*)(color_buffer + offset);
236 u8 r = Color::Convert5To8((value >> 11) & 0x1F); 246 u8 r = Color::Convert5To8((value >> 11) & 0x1F);
237 u8 g = Color::Convert5To8((value >> 6) & 0x1F); 247 u8 g = Color::Convert5To8((value >> 6) & 0x1F);
238 u8 b = Color::Convert5To8((value >> 1) & 0x1F); 248 u8 b = Color::Convert5To8((value >> 1) & 0x1F);
diff --git a/src/core/hw/gpu.cpp b/src/core/hw/gpu.cpp
index e6022d69f..2f1a69d90 100644
--- a/src/core/hw/gpu.cpp
+++ b/src/core/hw/gpu.cpp
@@ -18,10 +18,10 @@
18#include "core/hw/gpu.h" 18#include "core/hw/gpu.h"
19 19
20#include "video_core/command_processor.h" 20#include "video_core/command_processor.h"
21#include "video_core/utils.h"
21#include "video_core/video_core.h" 22#include "video_core/video_core.h"
22#include <video_core/color.h> 23#include <video_core/color.h>
23 24
24
25namespace GPU { 25namespace GPU {
26 26
27Regs g_regs; 27Regs g_regs;
@@ -116,24 +116,64 @@ inline void Write(u32 addr, const T data) {
116 u8* source_pointer = Memory::GetPointer(Memory::PhysicalToVirtualAddress(config.GetPhysicalInputAddress())); 116 u8* source_pointer = Memory::GetPointer(Memory::PhysicalToVirtualAddress(config.GetPhysicalInputAddress()));
117 u8* dest_pointer = Memory::GetPointer(Memory::PhysicalToVirtualAddress(config.GetPhysicalOutputAddress())); 117 u8* dest_pointer = Memory::GetPointer(Memory::PhysicalToVirtualAddress(config.GetPhysicalOutputAddress()));
118 118
119 // Cheap emulation of horizontal scaling: Just skip each second pixel of the 119 unsigned horizontal_scale = (config.scale_horizontally != 0) ? 2 : 1;
120 // input framebuffer. We keep track of this in the pixel_skip variable. 120 unsigned vertical_scale = (config.scale_vertically != 0) ? 2 : 1;
121 unsigned pixel_skip = (config.scale_horizontally != 0) ? 2 : 1; 121
122 122 u32 output_width = config.output_width / horizontal_scale;
123 u32 output_width = config.output_width / pixel_skip; 123 u32 output_height = config.output_height / vertical_scale;
124 124
125 for (u32 y = 0; y < config.output_height; ++y) { 125 if (config.raw_copy) {
126 // TODO: Why does the register seem to hold twice the framebuffer width? 126 // Raw copies do not perform color conversion nor tiled->linear / linear->tiled conversions
127 // TODO(Subv): Verify if raw copies perform scaling
128 memcpy(dest_pointer, source_pointer, config.output_width * config.output_height *
129 GPU::Regs::BytesPerPixel(config.output_format));
130
131 LOG_TRACE(HW_GPU, "DisplayTriggerTransfer: 0x%08x bytes from 0x%08x(%ux%u)-> 0x%08x(%ux%u), flags 0x%08X, Raw copy",
132 config.output_height * output_width * GPU::Regs::BytesPerPixel(config.output_format),
133 config.GetPhysicalInputAddress(), config.input_width.Value(), config.input_height.Value(),
134 config.GetPhysicalOutputAddress(), config.output_width.Value(), config.output_height.Value(),
135 config.output_format.Value(), config.flags);
136
137 GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PPF);
138 break;
139 }
127 140
141 // TODO(Subv): Blend the pixels when horizontal / vertical scaling is enabled,
142 // right now we're just skipping the extra pixels.
143 for (u32 y = 0; y < output_height; ++y) {
128 for (u32 x = 0; x < output_width; ++x) { 144 for (u32 x = 0; x < output_width; ++x) {
129 struct { 145 struct {
130 int r, g, b, a; 146 int r, g, b, a;
131 } source_color = { 0, 0, 0, 0 }; 147 } source_color = { 0, 0, 0, 0 };
132 148
149 u32 scaled_x = x * horizontal_scale;
150 u32 scaled_y = y * vertical_scale;
151
152 u32 dst_bytes_per_pixel = GPU::Regs::BytesPerPixel(config.output_format);
153 u32 src_bytes_per_pixel = GPU::Regs::BytesPerPixel(config.input_format);
154 u32 src_offset;
155 u32 dst_offset;
156
157 if (config.output_tiled) {
158 // Interpret the input as linear and the output as tiled
159 u32 coarse_y = y & ~7;
160 u32 stride = output_width * dst_bytes_per_pixel;
161
162 src_offset = (scaled_x + scaled_y * config.input_width) * src_bytes_per_pixel;
163 dst_offset = VideoCore::GetMortonOffset(x, y, dst_bytes_per_pixel) + coarse_y * stride;
164 } else {
165 // Interpret the input as tiled and the output as linear
166 u32 coarse_y = scaled_y & ~7;
167 u32 stride = config.input_width * src_bytes_per_pixel;
168
169 src_offset = VideoCore::GetMortonOffset(scaled_x, scaled_y, src_bytes_per_pixel) + coarse_y * stride;
170 dst_offset = (x + y * output_width) * dst_bytes_per_pixel;
171 }
172
133 switch (config.input_format) { 173 switch (config.input_format) {
134 case Regs::PixelFormat::RGBA8: 174 case Regs::PixelFormat::RGBA8:
135 { 175 {
136 u8* srcptr = source_pointer + (x * pixel_skip + y * config.input_width) * 4; 176 u8* srcptr = source_pointer + src_offset;
137 source_color.r = srcptr[3]; // red 177 source_color.r = srcptr[3]; // red
138 source_color.g = srcptr[2]; // green 178 source_color.g = srcptr[2]; // green
139 source_color.b = srcptr[1]; // blue 179 source_color.b = srcptr[1]; // blue
@@ -143,7 +183,7 @@ inline void Write(u32 addr, const T data) {
143 183
144 case Regs::PixelFormat::RGB5A1: 184 case Regs::PixelFormat::RGB5A1:
145 { 185 {
146 u16 srcval = *(u16*)(source_pointer + x * 4 * pixel_skip + y * config.input_width * 4 * pixel_skip); 186 u16 srcval = *(u16*)(source_pointer + src_offset);
147 source_color.r = Color::Convert5To8((srcval >> 11) & 0x1F); // red 187 source_color.r = Color::Convert5To8((srcval >> 11) & 0x1F); // red
148 source_color.g = Color::Convert5To8((srcval >> 6) & 0x1F); // green 188 source_color.g = Color::Convert5To8((srcval >> 6) & 0x1F); // green
149 source_color.b = Color::Convert5To8((srcval >> 1) & 0x1F); // blue 189 source_color.b = Color::Convert5To8((srcval >> 1) & 0x1F); // blue
@@ -153,7 +193,7 @@ inline void Write(u32 addr, const T data) {
153 193
154 case Regs::PixelFormat::RGBA4: 194 case Regs::PixelFormat::RGBA4:
155 { 195 {
156 u16 srcval = *(u16*)(source_pointer + x * 4 * pixel_skip + y * config.input_width * 4 * pixel_skip); 196 u16 srcval = *(u16*)(source_pointer + src_offset);
157 source_color.r = Color::Convert4To8((srcval >> 12) & 0xF); // red 197 source_color.r = Color::Convert4To8((srcval >> 12) & 0xF); // red
158 source_color.g = Color::Convert4To8((srcval >> 8) & 0xF); // green 198 source_color.g = Color::Convert4To8((srcval >> 8) & 0xF); // green
159 source_color.b = Color::Convert4To8((srcval >> 4) & 0xF); // blue 199 source_color.b = Color::Convert4To8((srcval >> 4) & 0xF); // blue
@@ -169,7 +209,7 @@ inline void Write(u32 addr, const T data) {
169 switch (config.output_format) { 209 switch (config.output_format) {
170 case Regs::PixelFormat::RGBA8: 210 case Regs::PixelFormat::RGBA8:
171 { 211 {
172 u8* dstptr = dest_pointer + (x * pixel_skip + y * config.output_width) * 4; 212 u8* dstptr = dest_pointer + dst_offset;
173 dstptr[3] = source_color.r; 213 dstptr[3] = source_color.r;
174 dstptr[2] = source_color.g; 214 dstptr[2] = source_color.g;
175 dstptr[1] = source_color.b; 215 dstptr[1] = source_color.b;
@@ -179,7 +219,7 @@ inline void Write(u32 addr, const T data) {
179 219
180 case Regs::PixelFormat::RGB8: 220 case Regs::PixelFormat::RGB8:
181 { 221 {
182 u8* dstptr = dest_pointer + (x + y * output_width) * 3; 222 u8* dstptr = dest_pointer + dst_offset;
183 dstptr[2] = source_color.r; // red 223 dstptr[2] = source_color.r; // red
184 dstptr[1] = source_color.g; // green 224 dstptr[1] = source_color.g; // green
185 dstptr[0] = source_color.b; // blue 225 dstptr[0] = source_color.b; // blue
@@ -188,7 +228,7 @@ inline void Write(u32 addr, const T data) {
188 228
189 case Regs::PixelFormat::RGB5A1: 229 case Regs::PixelFormat::RGB5A1:
190 { 230 {
191 u16* dstptr = (u16*)(dest_pointer + x * 2 + y * config.output_width * 2); 231 u16* dstptr = (u16*)(dest_pointer + dst_offset);
192 *dstptr = ((source_color.r >> 3) << 11) | ((source_color.g >> 3) << 6) 232 *dstptr = ((source_color.r >> 3) << 11) | ((source_color.g >> 3) << 6)
193 | ((source_color.b >> 3) << 1) | ( source_color.a >> 7); 233 | ((source_color.b >> 3) << 1) | ( source_color.a >> 7);
194 break; 234 break;
@@ -196,7 +236,7 @@ inline void Write(u32 addr, const T data) {
196 236
197 case Regs::PixelFormat::RGBA4: 237 case Regs::PixelFormat::RGBA4:
198 { 238 {
199 u16* dstptr = (u16*)(dest_pointer + x * 2 + y * config.output_width * 2); 239 u16* dstptr = (u16*)(dest_pointer + dst_offset);
200 *dstptr = ((source_color.r >> 4) << 12) | ((source_color.g >> 4) << 8) 240 *dstptr = ((source_color.r >> 4) << 12) | ((source_color.g >> 4) << 8)
201 | ((source_color.b >> 4) << 4) | ( source_color.a >> 4); 241 | ((source_color.b >> 4) << 4) | ( source_color.a >> 4);
202 break; 242 break;
@@ -209,11 +249,11 @@ inline void Write(u32 addr, const T data) {
209 } 249 }
210 } 250 }
211 251
212 LOG_TRACE(HW_GPU, "DisplayTriggerTransfer: 0x%08x bytes from 0x%08x(%ux%u)-> 0x%08x(%ux%u), dst format %x", 252 LOG_TRACE(HW_GPU, "DisplayTriggerTransfer: 0x%08x bytes from 0x%08x(%ux%u)-> 0x%08x(%ux%u), dst format %x, flags 0x%08X",
213 config.output_height * output_width * 4, 253 config.output_height * output_width * GPU::Regs::BytesPerPixel(config.output_format),
214 config.GetPhysicalInputAddress(), (u32)config.input_width, (u32)config.input_height, 254 config.GetPhysicalInputAddress(), config.input_width.Value(), config.input_height.Value(),
215 config.GetPhysicalOutputAddress(), (u32)output_width, (u32)config.output_height, 255 config.GetPhysicalOutputAddress(), output_width, output_height,
216 config.output_format.Value()); 256 config.output_format.Value(), config.flags);
217 257
218 GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PPF); 258 GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PPF);
219 } 259 }
diff --git a/src/core/hw/gpu.h b/src/core/hw/gpu.h
index 75f524465..ab1dcf91d 100644
--- a/src/core/hw/gpu.h
+++ b/src/core/hw/gpu.h
@@ -192,12 +192,13 @@ struct Regs {
192 u32 flags; 192 u32 flags;
193 193
194 BitField< 0, 1, u32> flip_data; // flips input data horizontally (TODO) if true 194 BitField< 0, 1, u32> flip_data; // flips input data horizontally (TODO) if true
195 BitField< 1, 1, u32> output_tiled; // Converts from linear to tiled format
196 BitField< 3, 1, u32> raw_copy; // Copies the data without performing any processing
195 BitField< 8, 3, PixelFormat> input_format; 197 BitField< 8, 3, PixelFormat> input_format;
196 BitField<12, 3, PixelFormat> output_format; 198 BitField<12, 3, PixelFormat> output_format;
197 BitField<16, 1, u32> output_tiled; // stores output in a tiled format
198 199
199 // TODO: Not really sure if this actually scales, or even resizes at all.
200 BitField<24, 1, u32> scale_horizontally; 200 BitField<24, 1, u32> scale_horizontally;
201 BitField<25, 1, u32> scale_vertically;
201 }; 202 };
202 203
203 INSERT_PADDING_WORDS(0x1); 204 INSERT_PADDING_WORDS(0x1);
diff --git a/src/video_core/debug_utils/debug_utils.cpp b/src/video_core/debug_utils/debug_utils.cpp
index f436aa541..27c246a99 100644
--- a/src/video_core/debug_utils/debug_utils.cpp
+++ b/src/video_core/debug_utils/debug_utils.cpp
@@ -23,6 +23,7 @@
23#include "video_core/color.h" 23#include "video_core/color.h"
24#include "video_core/math.h" 24#include "video_core/math.h"
25#include "video_core/pica.h" 25#include "video_core/pica.h"
26#include "video_core/utils.h"
26 27
27#include "debug_utils.h" 28#include "debug_utils.h"
28 29
@@ -306,63 +307,33 @@ std::unique_ptr<PicaTrace> FinishPicaTracing()
306} 307}
307 308
308const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const TextureInfo& info, bool disable_alpha) { 309const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const TextureInfo& info, bool disable_alpha) {
309 // Images are split into 8x8 tiles. Each tile is composed of four 4x4 subtiles each
310 // of which is composed of four 2x2 subtiles each of which is composed of four texels.
311 // Each structure is embedded into the next-bigger one in a diagonal pattern, e.g.
312 // texels are laid out in a 2x2 subtile like this:
313 // 2 3
314 // 0 1
315 //
316 // The full 8x8 tile has the texels arranged like this:
317 //
318 // 42 43 46 47 58 59 62 63
319 // 40 41 44 45 56 57 60 61
320 // 34 35 38 39 50 51 54 55
321 // 32 33 36 37 48 49 52 53
322 // 10 11 14 15 26 27 30 31
323 // 08 09 12 13 24 25 28 29
324 // 02 03 06 07 18 19 22 23
325 // 00 01 04 05 16 17 20 21
326
327 const unsigned int block_width = 8;
328 const unsigned int block_height = 8;
329
330 const unsigned int coarse_x = x & ~7; 310 const unsigned int coarse_x = x & ~7;
331 const unsigned int coarse_y = y & ~7; 311 const unsigned int coarse_y = y & ~7;
332 312
333 // Interleave the lower 3 bits of each coordinate to get the intra-block offsets, which are
334 // arranged in a Z-order curve. More details on the bit manipulation at:
335 // https://fgiesen.wordpress.com/2009/12/13/decoding-morton-codes/
336 unsigned int i = (x & 7) | ((y & 7) << 8); // ---- -210
337 i = (i ^ (i << 2)) & 0x1313; // ---2 --10
338 i = (i ^ (i << 1)) & 0x1515; // ---2 -1-0
339 i = (i | (i >> 7)) & 0x3F;
340
341 if (info.format != Regs::TextureFormat::ETC1 && 313 if (info.format != Regs::TextureFormat::ETC1 &&
342 info.format != Regs::TextureFormat::ETC1A4) { 314 info.format != Regs::TextureFormat::ETC1A4) {
343 // TODO(neobrain): Fix code design to unify vertical block offsets! 315 // TODO(neobrain): Fix code design to unify vertical block offsets!
344 source += coarse_y * info.stride; 316 source += coarse_y * info.stride;
345 } 317 }
346 const unsigned int offset = coarse_x * block_height; 318
347
348 // TODO: Assert that width/height are multiples of block dimensions 319 // TODO: Assert that width/height are multiples of block dimensions
349 320
350 switch (info.format) { 321 switch (info.format) {
351 case Regs::TextureFormat::RGBA8: 322 case Regs::TextureFormat::RGBA8:
352 { 323 {
353 const u8* source_ptr = source + offset * 4 + i * 4; 324 const u8* source_ptr = source + VideoCore::GetMortonOffset(x, y, 4);
354 return { source_ptr[3], source_ptr[2], source_ptr[1], disable_alpha ? (u8)255 : source_ptr[0] }; 325 return { source_ptr[3], source_ptr[2], source_ptr[1], disable_alpha ? (u8)255 : source_ptr[0] };
355 } 326 }
356 327
357 case Regs::TextureFormat::RGB8: 328 case Regs::TextureFormat::RGB8:
358 { 329 {
359 const u8* source_ptr = source + offset * 3 + i * 3; 330 const u8* source_ptr = source + VideoCore::GetMortonOffset(x, y, 3);
360 return { source_ptr[2], source_ptr[1], source_ptr[0], 255 }; 331 return { source_ptr[2], source_ptr[1], source_ptr[0], 255 };
361 } 332 }
362 333
363 case Regs::TextureFormat::RGBA5551: 334 case Regs::TextureFormat::RGBA5551:
364 { 335 {
365 const u16 source_ptr = *(const u16*)(source + offset * 2 + i * 2); 336 const u16 source_ptr = *(const u16*)(source + VideoCore::GetMortonOffset(x, y, 2));
366 u8 r = (source_ptr >> 11) & 0x1F; 337 u8 r = (source_ptr >> 11) & 0x1F;
367 u8 g = ((source_ptr) >> 6) & 0x1F; 338 u8 g = ((source_ptr) >> 6) & 0x1F;
368 u8 b = (source_ptr >> 1) & 0x1F; 339 u8 b = (source_ptr >> 1) & 0x1F;
@@ -373,7 +344,7 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const Texture
373 344
374 case Regs::TextureFormat::RGB565: 345 case Regs::TextureFormat::RGB565:
375 { 346 {
376 const u16 source_ptr = *(const u16*)(source + offset * 2 + i * 2); 347 const u16 source_ptr = *(const u16*)(source + VideoCore::GetMortonOffset(x, y, 2));
377 u8 r = Color::Convert5To8((source_ptr >> 11) & 0x1F); 348 u8 r = Color::Convert5To8((source_ptr >> 11) & 0x1F);
378 u8 g = Color::Convert6To8(((source_ptr) >> 5) & 0x3F); 349 u8 g = Color::Convert6To8(((source_ptr) >> 5) & 0x3F);
379 u8 b = Color::Convert5To8((source_ptr) & 0x1F); 350 u8 b = Color::Convert5To8((source_ptr) & 0x1F);
@@ -382,7 +353,7 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const Texture
382 353
383 case Regs::TextureFormat::RGBA4: 354 case Regs::TextureFormat::RGBA4:
384 { 355 {
385 const u8* source_ptr = source + offset * 2 + i * 2; 356 const u8* source_ptr = source + VideoCore::GetMortonOffset(x, y, 2);
386 u8 r = Color::Convert4To8(source_ptr[1] >> 4); 357 u8 r = Color::Convert4To8(source_ptr[1] >> 4);
387 u8 g = Color::Convert4To8(source_ptr[1] & 0xF); 358 u8 g = Color::Convert4To8(source_ptr[1] & 0xF);
388 u8 b = Color::Convert4To8(source_ptr[0] >> 4); 359 u8 b = Color::Convert4To8(source_ptr[0] >> 4);
@@ -392,7 +363,7 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const Texture
392 363
393 case Regs::TextureFormat::IA8: 364 case Regs::TextureFormat::IA8:
394 { 365 {
395 const u8* source_ptr = source + offset * 2 + i * 2; 366 const u8* source_ptr = source + VideoCore::GetMortonOffset(x, y, 2);
396 367
397 if (disable_alpha) { 368 if (disable_alpha) {
398 // Show intensity as red, alpha as green 369 // Show intensity as red, alpha as green
@@ -404,13 +375,13 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const Texture
404 375
405 case Regs::TextureFormat::I8: 376 case Regs::TextureFormat::I8:
406 { 377 {
407 const u8* source_ptr = source + offset + i; 378 const u8* source_ptr = source + VideoCore::GetMortonOffset(x, y, 1);
408 return { *source_ptr, *source_ptr, *source_ptr, 255 }; 379 return { *source_ptr, *source_ptr, *source_ptr, 255 };
409 } 380 }
410 381
411 case Regs::TextureFormat::A8: 382 case Regs::TextureFormat::A8:
412 { 383 {
413 const u8* source_ptr = source + offset + i; 384 const u8* source_ptr = source + VideoCore::GetMortonOffset(x, y, 1);
414 385
415 if (disable_alpha) { 386 if (disable_alpha) {
416 return { *source_ptr, *source_ptr, *source_ptr, 255 }; 387 return { *source_ptr, *source_ptr, *source_ptr, 255 };
@@ -421,7 +392,7 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const Texture
421 392
422 case Regs::TextureFormat::IA4: 393 case Regs::TextureFormat::IA4:
423 { 394 {
424 const u8* source_ptr = source + offset + i; 395 const u8* source_ptr = source + VideoCore::GetMortonOffset(x, y, 1);
425 396
426 u8 i = Color::Convert4To8(((*source_ptr) & 0xF0) >> 4); 397 u8 i = Color::Convert4To8(((*source_ptr) & 0xF0) >> 4);
427 u8 a = Color::Convert4To8((*source_ptr) & 0xF); 398 u8 a = Color::Convert4To8((*source_ptr) & 0xF);
@@ -436,9 +407,10 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const Texture
436 407
437 case Regs::TextureFormat::A4: 408 case Regs::TextureFormat::A4:
438 { 409 {
439 const u8* source_ptr = source + (offset + i) / 2; 410 u32 morton_offset = VideoCore::GetMortonOffset(x, y, 1);
411 const u8* source_ptr = source + morton_offset / 2;
440 412
441 u8 a = (i % 2) ? ((*source_ptr & 0xF0) >> 4) : (*source_ptr & 0xF); 413 u8 a = (morton_offset % 2) ? ((*source_ptr & 0xF0) >> 4) : (*source_ptr & 0xF);
442 a = Color::Convert4To8(a); 414 a = Color::Convert4To8(a);
443 415
444 if (disable_alpha) { 416 if (disable_alpha) {
diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp
index 24dc37856..a7bb0612f 100644
--- a/src/video_core/rasterizer.cpp
+++ b/src/video_core/rasterizer.cpp
@@ -7,13 +7,14 @@
7#include "common/common_types.h" 7#include "common/common_types.h"
8#include "common/math_util.h" 8#include "common/math_util.h"
9 9
10#include "core/hw/gpu.h"
11#include "debug_utils/debug_utils.h"
10#include "math.h" 12#include "math.h"
11#include "color.h" 13#include "color.h"
12#include "pica.h" 14#include "pica.h"
13#include "rasterizer.h" 15#include "rasterizer.h"
14#include "vertex_shader.h" 16#include "vertex_shader.h"
15 17#include "video_core/utils.h"
16#include "debug_utils/debug_utils.h"
17 18
18namespace Pica { 19namespace Pica {
19 20
@@ -27,10 +28,14 @@ static void DrawPixel(int x, int y, const Math::Vec4<u8>& color) {
27 // NOTE: The framebuffer height register contains the actual FB height minus one. 28 // NOTE: The framebuffer height register contains the actual FB height minus one.
28 y = (registers.framebuffer.height - y); 29 y = (registers.framebuffer.height - y);
29 30
31 const u32 coarse_y = y & ~7;
32 u32 bytes_per_pixel = GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(registers.framebuffer.color_format.Value()));
33 u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * registers.framebuffer.width * bytes_per_pixel;
34
30 switch (registers.framebuffer.color_format) { 35 switch (registers.framebuffer.color_format) {
31 case registers.framebuffer.RGBA8: 36 case registers.framebuffer.RGBA8:
32 { 37 {
33 u8* pixel = color_buffer + (x + y * registers.framebuffer.GetWidth()) * 4; 38 u8* pixel = color_buffer + dst_offset;
34 pixel[3] = color.r(); 39 pixel[3] = color.r();
35 pixel[2] = color.g(); 40 pixel[2] = color.g();
36 pixel[1] = color.b(); 41 pixel[1] = color.b();
@@ -40,14 +45,14 @@ static void DrawPixel(int x, int y, const Math::Vec4<u8>& color) {
40 45
41 case registers.framebuffer.RGBA4: 46 case registers.framebuffer.RGBA4:
42 { 47 {
43 u8* pixel = color_buffer + (x + y * registers.framebuffer.GetWidth()) * 2; 48 u8* pixel = color_buffer + dst_offset;
44 pixel[1] = (color.r() & 0xF0) | (color.g() >> 4); 49 pixel[1] = (color.r() & 0xF0) | (color.g() >> 4);
45 pixel[0] = (color.b() & 0xF0) | (color.a() >> 4); 50 pixel[0] = (color.b() & 0xF0) | (color.a() >> 4);
46 break; 51 break;
47 } 52 }
48 53
49 default: 54 default:
50 LOG_CRITICAL(Render_Software, "Unknown framebuffer color format %x", registers.framebuffer.color_format); 55 LOG_CRITICAL(Render_Software, "Unknown framebuffer color format %x", registers.framebuffer.color_format.Value());
51 UNIMPLEMENTED(); 56 UNIMPLEMENTED();
52 } 57 }
53} 58}
@@ -58,11 +63,15 @@ static const Math::Vec4<u8> GetPixel(int x, int y) {
58 63
59 y = (registers.framebuffer.height - y); 64 y = (registers.framebuffer.height - y);
60 65
66 const u32 coarse_y = y & ~7;
67 u32 bytes_per_pixel = GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(registers.framebuffer.color_format.Value()));
68 u32 src_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * registers.framebuffer.width * bytes_per_pixel;
69
61 switch (registers.framebuffer.color_format) { 70 switch (registers.framebuffer.color_format) {
62 case registers.framebuffer.RGBA8: 71 case registers.framebuffer.RGBA8:
63 { 72 {
64 Math::Vec4<u8> ret; 73 Math::Vec4<u8> ret;
65 u8* pixel = color_buffer + (x + y * registers.framebuffer.GetWidth()) * 4; 74 u8* pixel = color_buffer + src_offset;
66 ret.r() = pixel[3]; 75 ret.r() = pixel[3];
67 ret.g() = pixel[2]; 76 ret.g() = pixel[2];
68 ret.b() = pixel[1]; 77 ret.b() = pixel[1];
@@ -73,7 +82,7 @@ static const Math::Vec4<u8> GetPixel(int x, int y) {
73 case registers.framebuffer.RGBA4: 82 case registers.framebuffer.RGBA4:
74 { 83 {
75 Math::Vec4<u8> ret; 84 Math::Vec4<u8> ret;
76 u8* pixel = color_buffer + (x + y * registers.framebuffer.GetWidth()) * 2; 85 u8* pixel = color_buffer + src_offset;
77 ret.r() = Color::Convert4To8(pixel[1] >> 4); 86 ret.r() = Color::Convert4To8(pixel[1] >> 4);
78 ret.g() = Color::Convert4To8(pixel[1] & 0x0F); 87 ret.g() = Color::Convert4To8(pixel[1] & 0x0F);
79 ret.b() = Color::Convert4To8(pixel[0] >> 4); 88 ret.b() = Color::Convert4To8(pixel[0] >> 4);
@@ -82,7 +91,7 @@ static const Math::Vec4<u8> GetPixel(int x, int y) {
82 } 91 }
83 92
84 default: 93 default:
85 LOG_CRITICAL(Render_Software, "Unknown framebuffer color format %x", registers.framebuffer.color_format); 94 LOG_CRITICAL(Render_Software, "Unknown framebuffer color format %x", registers.framebuffer.color_format.Value());
86 UNIMPLEMENTED(); 95 UNIMPLEMENTED();
87 } 96 }
88 97
@@ -91,22 +100,28 @@ static const Math::Vec4<u8> GetPixel(int x, int y) {
91 100
92static u32 GetDepth(int x, int y) { 101static u32 GetDepth(int x, int y) {
93 const PAddr addr = registers.framebuffer.GetDepthBufferPhysicalAddress(); 102 const PAddr addr = registers.framebuffer.GetDepthBufferPhysicalAddress();
94 u16* depth_buffer = reinterpret_cast<u16*>(Memory::GetPointer(PAddrToVAddr(addr))); 103 u8* depth_buffer = Memory::GetPointer(PAddrToVAddr(addr));
95 104
96 y = (registers.framebuffer.height - y); 105 y = (registers.framebuffer.height - y);
106
107 const u32 coarse_y = y & ~7;
108 u32 stride = registers.framebuffer.width * 2;
97 109
98 // Assuming 16-bit depth buffer format until actual format handling is implemented 110 // Assuming 16-bit depth buffer format until actual format handling is implemented
99 return *(depth_buffer + x + y * registers.framebuffer.GetWidth()); 111 return *(u16*)(depth_buffer + VideoCore::GetMortonOffset(x, y, 2) + coarse_y * stride);
100} 112}
101 113
102static void SetDepth(int x, int y, u16 value) { 114static void SetDepth(int x, int y, u16 value) {
103 const PAddr addr = registers.framebuffer.GetDepthBufferPhysicalAddress(); 115 const PAddr addr = registers.framebuffer.GetDepthBufferPhysicalAddress();
104 u16* depth_buffer = reinterpret_cast<u16*>(Memory::GetPointer(PAddrToVAddr(addr))); 116 u8* depth_buffer = Memory::GetPointer(PAddrToVAddr(addr));
105 117
106 y = (registers.framebuffer.height - y); 118 y = (registers.framebuffer.height - y);
107 119
120 const u32 coarse_y = y & ~7;
121 u32 stride = registers.framebuffer.width * 2;
122
108 // Assuming 16-bit depth buffer format until actual format handling is implemented 123 // Assuming 16-bit depth buffer format until actual format handling is implemented
109 *(depth_buffer + x + y * registers.framebuffer.GetWidth()) = value; 124 *(u16*)(depth_buffer + VideoCore::GetMortonOffset(x, y, 2) + coarse_y * stride) = value;
110} 125}
111 126
112// NOTE: Assuming that rasterizer coordinates are 12.4 fixed-point values 127// NOTE: Assuming that rasterizer coordinates are 12.4 fixed-point values
diff --git a/src/video_core/utils.h b/src/video_core/utils.h
index 6fd640425..bda793fa5 100644
--- a/src/video_core/utils.h
+++ b/src/video_core/utils.h
@@ -35,4 +35,54 @@ struct TGAHeader {
35 */ 35 */
36void DumpTGA(std::string filename, short width, short height, u8* raw_data); 36void DumpTGA(std::string filename, short width, short height, u8* raw_data);
37 37
38/**
39 * Interleave the lower 3 bits of each coordinate to get the intra-block offsets, which are
40 * arranged in a Z-order curve. More details on the bit manipulation at:
41 * https://fgiesen.wordpress.com/2009/12/13/decoding-morton-codes/
42 */
43static inline u32 MortonInterleave(u32 x, u32 y) {
44 u32 i = (x & 7) | ((y & 7) << 8); // ---- -210
45 i = (i ^ (i << 2)) & 0x1313; // ---2 --10
46 i = (i ^ (i << 1)) & 0x1515; // ---2 -1-0
47 i = (i | (i >> 7)) & 0x3F;
48 return i;
49}
50
51/**
52 * Calculates the offset of the position of the pixel in Morton order
53 */
54static inline u32 GetMortonOffset(u32 x, u32 y, u32 bytes_per_pixel) {
55 // Images are split into 8x8 tiles. Each tile is composed of four 4x4 subtiles each
56 // of which is composed of four 2x2 subtiles each of which is composed of four texels.
57 // Each structure is embedded into the next-bigger one in a diagonal pattern, e.g.
58 // texels are laid out in a 2x2 subtile like this:
59 // 2 3
60 // 0 1
61 //
62 // The full 8x8 tile has the texels arranged like this:
63 //
64 // 42 43 46 47 58 59 62 63
65 // 40 41 44 45 56 57 60 61
66 // 34 35 38 39 50 51 54 55
67 // 32 33 36 37 48 49 52 53
68 // 10 11 14 15 26 27 30 31
69 // 08 09 12 13 24 25 28 29
70 // 02 03 06 07 18 19 22 23
71 // 00 01 04 05 16 17 20 21
72 //
73 // This pattern is what's called Z-order curve, or Morton order.
74
75 const unsigned int block_width = 8;
76 const unsigned int block_height = 8;
77
78 const unsigned int coarse_x = x & ~7;
79 const unsigned int coarse_y = y & ~7;
80
81 u32 i = VideoCore::MortonInterleave(x, y);
82
83 const unsigned int offset = coarse_x * block_height;
84
85 return (i + offset) * bytes_per_pixel;
86}
87
38} // namespace 88} // namespace