diff options
| author | 2015-02-26 22:40:27 -0500 | |
|---|---|---|
| committer | 2015-02-26 22:40:27 -0500 | |
| commit | c9ef377afaa038797de6c08da9f204acf67ed1fc (patch) | |
| tree | ab2c741325e2e81e4352b4934972151fceae8d5e /src | |
| parent | Merge pull request #614 from lioncash/mcr (diff) | |
| parent | GPU: Implemented bits 3 and 1 from the display transfer flags. (diff) | |
| download | yuzu-c9ef377afaa038797de6c08da9f204acf67ed1fc.tar.gz yuzu-c9ef377afaa038797de6c08da9f204acf67ed1fc.tar.xz yuzu-c9ef377afaa038797de6c08da9f204acf67ed1fc.zip | |
Merge pull request #599 from Subv/morton
GPU: Implemented bits 3 and 1 from the display transfer flags.
Diffstat (limited to 'src')
| -rw-r--r-- | src/citra_qt/debugger/graphics_framebuffer.cpp | 22 | ||||
| -rw-r--r-- | src/core/hw/gpu.cpp | 82 | ||||
| -rw-r--r-- | src/core/hw/gpu.h | 5 | ||||
| -rw-r--r-- | src/video_core/debug_utils/debug_utils.cpp | 56 | ||||
| -rw-r--r-- | src/video_core/rasterizer.cpp | 39 | ||||
| -rw-r--r-- | src/video_core/utils.h | 50 |
6 files changed, 171 insertions, 83 deletions
diff --git a/src/citra_qt/debugger/graphics_framebuffer.cpp b/src/citra_qt/debugger/graphics_framebuffer.cpp index 1ba60021f..574f19cc1 100644 --- a/src/citra_qt/debugger/graphics_framebuffer.cpp +++ b/src/citra_qt/debugger/graphics_framebuffer.cpp | |||
| @@ -9,8 +9,10 @@ | |||
| 9 | #include <QPushButton> | 9 | #include <QPushButton> |
| 10 | #include <QSpinBox> | 10 | #include <QSpinBox> |
| 11 | 11 | ||
| 12 | #include "core/hw/gpu.h" | ||
| 12 | #include "video_core/color.h" | 13 | #include "video_core/color.h" |
| 13 | #include "video_core/pica.h" | 14 | #include "video_core/pica.h" |
| 15 | #include "video_core/utils.h" | ||
| 14 | 16 | ||
| 15 | #include "graphics_framebuffer.h" | 17 | #include "graphics_framebuffer.h" |
| 16 | 18 | ||
| @@ -195,16 +197,20 @@ void GraphicsFramebufferWidget::OnUpdate() | |||
| 195 | 197 | ||
| 196 | // TODO: Implement a good way to visualize alpha components! | 198 | // TODO: Implement a good way to visualize alpha components! |
| 197 | // TODO: Unify this decoding code with the texture decoder | 199 | // TODO: Unify this decoding code with the texture decoder |
| 200 | u32 bytes_per_pixel = GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(framebuffer_format)); | ||
| 201 | |||
| 198 | switch (framebuffer_format) { | 202 | switch (framebuffer_format) { |
| 199 | case Format::RGBA8: | 203 | case Format::RGBA8: |
| 200 | { | 204 | { |
| 201 | QImage decoded_image(framebuffer_width, framebuffer_height, QImage::Format_ARGB32); | 205 | QImage decoded_image(framebuffer_width, framebuffer_height, QImage::Format_ARGB32); |
| 202 | u32* color_buffer = (u32*)Memory::GetPointer(Pica::PAddrToVAddr(framebuffer_address)); | 206 | u8* color_buffer = Memory::GetPointer(Pica::PAddrToVAddr(framebuffer_address)); |
| 203 | for (unsigned int y = 0; y < framebuffer_height; ++y) { | 207 | for (unsigned int y = 0; y < framebuffer_height; ++y) { |
| 204 | for (unsigned int x = 0; x < framebuffer_width; ++x) { | 208 | for (unsigned int x = 0; x < framebuffer_width; ++x) { |
| 205 | u32 value = *(color_buffer + x + y * framebuffer_width); | 209 | const u32 coarse_y = y & ~7; |
| 210 | u32 offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * framebuffer_width * bytes_per_pixel; | ||
| 211 | u8* value = color_buffer + offset; | ||
| 206 | 212 | ||
| 207 | decoded_image.setPixel(x, y, qRgba((value >> 16) & 0xFF, (value >> 8) & 0xFF, value & 0xFF, 255/*value >> 24*/)); | 213 | decoded_image.setPixel(x, y, qRgba(value[3], value[2], value[1], 255/*value >> 24*/)); |
| 208 | } | 214 | } |
| 209 | } | 215 | } |
| 210 | pixmap = QPixmap::fromImage(decoded_image); | 216 | pixmap = QPixmap::fromImage(decoded_image); |
| @@ -217,7 +223,9 @@ void GraphicsFramebufferWidget::OnUpdate() | |||
| 217 | u8* color_buffer = Memory::GetPointer(Pica::PAddrToVAddr(framebuffer_address)); | 223 | u8* color_buffer = Memory::GetPointer(Pica::PAddrToVAddr(framebuffer_address)); |
| 218 | for (unsigned int y = 0; y < framebuffer_height; ++y) { | 224 | for (unsigned int y = 0; y < framebuffer_height; ++y) { |
| 219 | for (unsigned int x = 0; x < framebuffer_width; ++x) { | 225 | for (unsigned int x = 0; x < framebuffer_width; ++x) { |
| 220 | u8* pixel_pointer = color_buffer + x * 3 + y * 3 * framebuffer_width; | 226 | const u32 coarse_y = y & ~7; |
| 227 | u32 offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * framebuffer_width * bytes_per_pixel; | ||
| 228 | u8* pixel_pointer = color_buffer + offset; | ||
| 221 | 229 | ||
| 222 | decoded_image.setPixel(x, y, qRgba(pixel_pointer[0], pixel_pointer[1], pixel_pointer[2], 255/*value >> 24*/)); | 230 | decoded_image.setPixel(x, y, qRgba(pixel_pointer[0], pixel_pointer[1], pixel_pointer[2], 255/*value >> 24*/)); |
| 223 | } | 231 | } |
| @@ -229,10 +237,12 @@ void GraphicsFramebufferWidget::OnUpdate() | |||
| 229 | case Format::RGBA5551: | 237 | case Format::RGBA5551: |
| 230 | { | 238 | { |
| 231 | QImage decoded_image(framebuffer_width, framebuffer_height, QImage::Format_ARGB32); | 239 | QImage decoded_image(framebuffer_width, framebuffer_height, QImage::Format_ARGB32); |
| 232 | u32* color_buffer = (u32*)Memory::GetPointer(Pica::PAddrToVAddr(framebuffer_address)); | 240 | u8* color_buffer = Memory::GetPointer(Pica::PAddrToVAddr(framebuffer_address)); |
| 233 | for (unsigned int y = 0; y < framebuffer_height; ++y) { | 241 | for (unsigned int y = 0; y < framebuffer_height; ++y) { |
| 234 | for (unsigned int x = 0; x < framebuffer_width; ++x) { | 242 | for (unsigned int x = 0; x < framebuffer_width; ++x) { |
| 235 | u16 value = *(u16*)(((u8*)color_buffer) + x * 2 + y * framebuffer_width * 2); | 243 | const u32 coarse_y = y & ~7; |
| 244 | u32 offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * framebuffer_width * bytes_per_pixel; | ||
| 245 | u16 value = *(u16*)(color_buffer + offset); | ||
| 236 | u8 r = Color::Convert5To8((value >> 11) & 0x1F); | 246 | u8 r = Color::Convert5To8((value >> 11) & 0x1F); |
| 237 | u8 g = Color::Convert5To8((value >> 6) & 0x1F); | 247 | u8 g = Color::Convert5To8((value >> 6) & 0x1F); |
| 238 | u8 b = Color::Convert5To8((value >> 1) & 0x1F); | 248 | u8 b = Color::Convert5To8((value >> 1) & 0x1F); |
diff --git a/src/core/hw/gpu.cpp b/src/core/hw/gpu.cpp index e6022d69f..2f1a69d90 100644 --- a/src/core/hw/gpu.cpp +++ b/src/core/hw/gpu.cpp | |||
| @@ -18,10 +18,10 @@ | |||
| 18 | #include "core/hw/gpu.h" | 18 | #include "core/hw/gpu.h" |
| 19 | 19 | ||
| 20 | #include "video_core/command_processor.h" | 20 | #include "video_core/command_processor.h" |
| 21 | #include "video_core/utils.h" | ||
| 21 | #include "video_core/video_core.h" | 22 | #include "video_core/video_core.h" |
| 22 | #include <video_core/color.h> | 23 | #include <video_core/color.h> |
| 23 | 24 | ||
| 24 | |||
| 25 | namespace GPU { | 25 | namespace GPU { |
| 26 | 26 | ||
| 27 | Regs g_regs; | 27 | Regs g_regs; |
| @@ -116,24 +116,64 @@ inline void Write(u32 addr, const T data) { | |||
| 116 | u8* source_pointer = Memory::GetPointer(Memory::PhysicalToVirtualAddress(config.GetPhysicalInputAddress())); | 116 | u8* source_pointer = Memory::GetPointer(Memory::PhysicalToVirtualAddress(config.GetPhysicalInputAddress())); |
| 117 | u8* dest_pointer = Memory::GetPointer(Memory::PhysicalToVirtualAddress(config.GetPhysicalOutputAddress())); | 117 | u8* dest_pointer = Memory::GetPointer(Memory::PhysicalToVirtualAddress(config.GetPhysicalOutputAddress())); |
| 118 | 118 | ||
| 119 | // Cheap emulation of horizontal scaling: Just skip each second pixel of the | 119 | unsigned horizontal_scale = (config.scale_horizontally != 0) ? 2 : 1; |
| 120 | // input framebuffer. We keep track of this in the pixel_skip variable. | 120 | unsigned vertical_scale = (config.scale_vertically != 0) ? 2 : 1; |
| 121 | unsigned pixel_skip = (config.scale_horizontally != 0) ? 2 : 1; | 121 | |
| 122 | 122 | u32 output_width = config.output_width / horizontal_scale; | |
| 123 | u32 output_width = config.output_width / pixel_skip; | 123 | u32 output_height = config.output_height / vertical_scale; |
| 124 | 124 | ||
| 125 | for (u32 y = 0; y < config.output_height; ++y) { | 125 | if (config.raw_copy) { |
| 126 | // TODO: Why does the register seem to hold twice the framebuffer width? | 126 | // Raw copies do not perform color conversion nor tiled->linear / linear->tiled conversions |
| 127 | // TODO(Subv): Verify if raw copies perform scaling | ||
| 128 | memcpy(dest_pointer, source_pointer, config.output_width * config.output_height * | ||
| 129 | GPU::Regs::BytesPerPixel(config.output_format)); | ||
| 130 | |||
| 131 | LOG_TRACE(HW_GPU, "DisplayTriggerTransfer: 0x%08x bytes from 0x%08x(%ux%u)-> 0x%08x(%ux%u), flags 0x%08X, Raw copy", | ||
| 132 | config.output_height * output_width * GPU::Regs::BytesPerPixel(config.output_format), | ||
| 133 | config.GetPhysicalInputAddress(), config.input_width.Value(), config.input_height.Value(), | ||
| 134 | config.GetPhysicalOutputAddress(), config.output_width.Value(), config.output_height.Value(), | ||
| 135 | config.output_format.Value(), config.flags); | ||
| 136 | |||
| 137 | GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PPF); | ||
| 138 | break; | ||
| 139 | } | ||
| 127 | 140 | ||
| 141 | // TODO(Subv): Blend the pixels when horizontal / vertical scaling is enabled, | ||
| 142 | // right now we're just skipping the extra pixels. | ||
| 143 | for (u32 y = 0; y < output_height; ++y) { | ||
| 128 | for (u32 x = 0; x < output_width; ++x) { | 144 | for (u32 x = 0; x < output_width; ++x) { |
| 129 | struct { | 145 | struct { |
| 130 | int r, g, b, a; | 146 | int r, g, b, a; |
| 131 | } source_color = { 0, 0, 0, 0 }; | 147 | } source_color = { 0, 0, 0, 0 }; |
| 132 | 148 | ||
| 149 | u32 scaled_x = x * horizontal_scale; | ||
| 150 | u32 scaled_y = y * vertical_scale; | ||
| 151 | |||
| 152 | u32 dst_bytes_per_pixel = GPU::Regs::BytesPerPixel(config.output_format); | ||
| 153 | u32 src_bytes_per_pixel = GPU::Regs::BytesPerPixel(config.input_format); | ||
| 154 | u32 src_offset; | ||
| 155 | u32 dst_offset; | ||
| 156 | |||
| 157 | if (config.output_tiled) { | ||
| 158 | // Interpret the input as linear and the output as tiled | ||
| 159 | u32 coarse_y = y & ~7; | ||
| 160 | u32 stride = output_width * dst_bytes_per_pixel; | ||
| 161 | |||
| 162 | src_offset = (scaled_x + scaled_y * config.input_width) * src_bytes_per_pixel; | ||
| 163 | dst_offset = VideoCore::GetMortonOffset(x, y, dst_bytes_per_pixel) + coarse_y * stride; | ||
| 164 | } else { | ||
| 165 | // Interpret the input as tiled and the output as linear | ||
| 166 | u32 coarse_y = scaled_y & ~7; | ||
| 167 | u32 stride = config.input_width * src_bytes_per_pixel; | ||
| 168 | |||
| 169 | src_offset = VideoCore::GetMortonOffset(scaled_x, scaled_y, src_bytes_per_pixel) + coarse_y * stride; | ||
| 170 | dst_offset = (x + y * output_width) * dst_bytes_per_pixel; | ||
| 171 | } | ||
| 172 | |||
| 133 | switch (config.input_format) { | 173 | switch (config.input_format) { |
| 134 | case Regs::PixelFormat::RGBA8: | 174 | case Regs::PixelFormat::RGBA8: |
| 135 | { | 175 | { |
| 136 | u8* srcptr = source_pointer + (x * pixel_skip + y * config.input_width) * 4; | 176 | u8* srcptr = source_pointer + src_offset; |
| 137 | source_color.r = srcptr[3]; // red | 177 | source_color.r = srcptr[3]; // red |
| 138 | source_color.g = srcptr[2]; // green | 178 | source_color.g = srcptr[2]; // green |
| 139 | source_color.b = srcptr[1]; // blue | 179 | source_color.b = srcptr[1]; // blue |
| @@ -143,7 +183,7 @@ inline void Write(u32 addr, const T data) { | |||
| 143 | 183 | ||
| 144 | case Regs::PixelFormat::RGB5A1: | 184 | case Regs::PixelFormat::RGB5A1: |
| 145 | { | 185 | { |
| 146 | u16 srcval = *(u16*)(source_pointer + x * 4 * pixel_skip + y * config.input_width * 4 * pixel_skip); | 186 | u16 srcval = *(u16*)(source_pointer + src_offset); |
| 147 | source_color.r = Color::Convert5To8((srcval >> 11) & 0x1F); // red | 187 | source_color.r = Color::Convert5To8((srcval >> 11) & 0x1F); // red |
| 148 | source_color.g = Color::Convert5To8((srcval >> 6) & 0x1F); // green | 188 | source_color.g = Color::Convert5To8((srcval >> 6) & 0x1F); // green |
| 149 | source_color.b = Color::Convert5To8((srcval >> 1) & 0x1F); // blue | 189 | source_color.b = Color::Convert5To8((srcval >> 1) & 0x1F); // blue |
| @@ -153,7 +193,7 @@ inline void Write(u32 addr, const T data) { | |||
| 153 | 193 | ||
| 154 | case Regs::PixelFormat::RGBA4: | 194 | case Regs::PixelFormat::RGBA4: |
| 155 | { | 195 | { |
| 156 | u16 srcval = *(u16*)(source_pointer + x * 4 * pixel_skip + y * config.input_width * 4 * pixel_skip); | 196 | u16 srcval = *(u16*)(source_pointer + src_offset); |
| 157 | source_color.r = Color::Convert4To8((srcval >> 12) & 0xF); // red | 197 | source_color.r = Color::Convert4To8((srcval >> 12) & 0xF); // red |
| 158 | source_color.g = Color::Convert4To8((srcval >> 8) & 0xF); // green | 198 | source_color.g = Color::Convert4To8((srcval >> 8) & 0xF); // green |
| 159 | source_color.b = Color::Convert4To8((srcval >> 4) & 0xF); // blue | 199 | source_color.b = Color::Convert4To8((srcval >> 4) & 0xF); // blue |
| @@ -169,7 +209,7 @@ inline void Write(u32 addr, const T data) { | |||
| 169 | switch (config.output_format) { | 209 | switch (config.output_format) { |
| 170 | case Regs::PixelFormat::RGBA8: | 210 | case Regs::PixelFormat::RGBA8: |
| 171 | { | 211 | { |
| 172 | u8* dstptr = dest_pointer + (x * pixel_skip + y * config.output_width) * 4; | 212 | u8* dstptr = dest_pointer + dst_offset; |
| 173 | dstptr[3] = source_color.r; | 213 | dstptr[3] = source_color.r; |
| 174 | dstptr[2] = source_color.g; | 214 | dstptr[2] = source_color.g; |
| 175 | dstptr[1] = source_color.b; | 215 | dstptr[1] = source_color.b; |
| @@ -179,7 +219,7 @@ inline void Write(u32 addr, const T data) { | |||
| 179 | 219 | ||
| 180 | case Regs::PixelFormat::RGB8: | 220 | case Regs::PixelFormat::RGB8: |
| 181 | { | 221 | { |
| 182 | u8* dstptr = dest_pointer + (x + y * output_width) * 3; | 222 | u8* dstptr = dest_pointer + dst_offset; |
| 183 | dstptr[2] = source_color.r; // red | 223 | dstptr[2] = source_color.r; // red |
| 184 | dstptr[1] = source_color.g; // green | 224 | dstptr[1] = source_color.g; // green |
| 185 | dstptr[0] = source_color.b; // blue | 225 | dstptr[0] = source_color.b; // blue |
| @@ -188,7 +228,7 @@ inline void Write(u32 addr, const T data) { | |||
| 188 | 228 | ||
| 189 | case Regs::PixelFormat::RGB5A1: | 229 | case Regs::PixelFormat::RGB5A1: |
| 190 | { | 230 | { |
| 191 | u16* dstptr = (u16*)(dest_pointer + x * 2 + y * config.output_width * 2); | 231 | u16* dstptr = (u16*)(dest_pointer + dst_offset); |
| 192 | *dstptr = ((source_color.r >> 3) << 11) | ((source_color.g >> 3) << 6) | 232 | *dstptr = ((source_color.r >> 3) << 11) | ((source_color.g >> 3) << 6) |
| 193 | | ((source_color.b >> 3) << 1) | ( source_color.a >> 7); | 233 | | ((source_color.b >> 3) << 1) | ( source_color.a >> 7); |
| 194 | break; | 234 | break; |
| @@ -196,7 +236,7 @@ inline void Write(u32 addr, const T data) { | |||
| 196 | 236 | ||
| 197 | case Regs::PixelFormat::RGBA4: | 237 | case Regs::PixelFormat::RGBA4: |
| 198 | { | 238 | { |
| 199 | u16* dstptr = (u16*)(dest_pointer + x * 2 + y * config.output_width * 2); | 239 | u16* dstptr = (u16*)(dest_pointer + dst_offset); |
| 200 | *dstptr = ((source_color.r >> 4) << 12) | ((source_color.g >> 4) << 8) | 240 | *dstptr = ((source_color.r >> 4) << 12) | ((source_color.g >> 4) << 8) |
| 201 | | ((source_color.b >> 4) << 4) | ( source_color.a >> 4); | 241 | | ((source_color.b >> 4) << 4) | ( source_color.a >> 4); |
| 202 | break; | 242 | break; |
| @@ -209,11 +249,11 @@ inline void Write(u32 addr, const T data) { | |||
| 209 | } | 249 | } |
| 210 | } | 250 | } |
| 211 | 251 | ||
| 212 | LOG_TRACE(HW_GPU, "DisplayTriggerTransfer: 0x%08x bytes from 0x%08x(%ux%u)-> 0x%08x(%ux%u), dst format %x", | 252 | LOG_TRACE(HW_GPU, "DisplayTriggerTransfer: 0x%08x bytes from 0x%08x(%ux%u)-> 0x%08x(%ux%u), dst format %x, flags 0x%08X", |
| 213 | config.output_height * output_width * 4, | 253 | config.output_height * output_width * GPU::Regs::BytesPerPixel(config.output_format), |
| 214 | config.GetPhysicalInputAddress(), (u32)config.input_width, (u32)config.input_height, | 254 | config.GetPhysicalInputAddress(), config.input_width.Value(), config.input_height.Value(), |
| 215 | config.GetPhysicalOutputAddress(), (u32)output_width, (u32)config.output_height, | 255 | config.GetPhysicalOutputAddress(), output_width, output_height, |
| 216 | config.output_format.Value()); | 256 | config.output_format.Value(), config.flags); |
| 217 | 257 | ||
| 218 | GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PPF); | 258 | GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PPF); |
| 219 | } | 259 | } |
diff --git a/src/core/hw/gpu.h b/src/core/hw/gpu.h index 75f524465..ab1dcf91d 100644 --- a/src/core/hw/gpu.h +++ b/src/core/hw/gpu.h | |||
| @@ -192,12 +192,13 @@ struct Regs { | |||
| 192 | u32 flags; | 192 | u32 flags; |
| 193 | 193 | ||
| 194 | BitField< 0, 1, u32> flip_data; // flips input data horizontally (TODO) if true | 194 | BitField< 0, 1, u32> flip_data; // flips input data horizontally (TODO) if true |
| 195 | BitField< 1, 1, u32> output_tiled; // Converts from linear to tiled format | ||
| 196 | BitField< 3, 1, u32> raw_copy; // Copies the data without performing any processing | ||
| 195 | BitField< 8, 3, PixelFormat> input_format; | 197 | BitField< 8, 3, PixelFormat> input_format; |
| 196 | BitField<12, 3, PixelFormat> output_format; | 198 | BitField<12, 3, PixelFormat> output_format; |
| 197 | BitField<16, 1, u32> output_tiled; // stores output in a tiled format | ||
| 198 | 199 | ||
| 199 | // TODO: Not really sure if this actually scales, or even resizes at all. | ||
| 200 | BitField<24, 1, u32> scale_horizontally; | 200 | BitField<24, 1, u32> scale_horizontally; |
| 201 | BitField<25, 1, u32> scale_vertically; | ||
| 201 | }; | 202 | }; |
| 202 | 203 | ||
| 203 | INSERT_PADDING_WORDS(0x1); | 204 | INSERT_PADDING_WORDS(0x1); |
diff --git a/src/video_core/debug_utils/debug_utils.cpp b/src/video_core/debug_utils/debug_utils.cpp index f436aa541..27c246a99 100644 --- a/src/video_core/debug_utils/debug_utils.cpp +++ b/src/video_core/debug_utils/debug_utils.cpp | |||
| @@ -23,6 +23,7 @@ | |||
| 23 | #include "video_core/color.h" | 23 | #include "video_core/color.h" |
| 24 | #include "video_core/math.h" | 24 | #include "video_core/math.h" |
| 25 | #include "video_core/pica.h" | 25 | #include "video_core/pica.h" |
| 26 | #include "video_core/utils.h" | ||
| 26 | 27 | ||
| 27 | #include "debug_utils.h" | 28 | #include "debug_utils.h" |
| 28 | 29 | ||
| @@ -306,63 +307,33 @@ std::unique_ptr<PicaTrace> FinishPicaTracing() | |||
| 306 | } | 307 | } |
| 307 | 308 | ||
| 308 | const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const TextureInfo& info, bool disable_alpha) { | 309 | const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const TextureInfo& info, bool disable_alpha) { |
| 309 | // Images are split into 8x8 tiles. Each tile is composed of four 4x4 subtiles each | ||
| 310 | // of which is composed of four 2x2 subtiles each of which is composed of four texels. | ||
| 311 | // Each structure is embedded into the next-bigger one in a diagonal pattern, e.g. | ||
| 312 | // texels are laid out in a 2x2 subtile like this: | ||
| 313 | // 2 3 | ||
| 314 | // 0 1 | ||
| 315 | // | ||
| 316 | // The full 8x8 tile has the texels arranged like this: | ||
| 317 | // | ||
| 318 | // 42 43 46 47 58 59 62 63 | ||
| 319 | // 40 41 44 45 56 57 60 61 | ||
| 320 | // 34 35 38 39 50 51 54 55 | ||
| 321 | // 32 33 36 37 48 49 52 53 | ||
| 322 | // 10 11 14 15 26 27 30 31 | ||
| 323 | // 08 09 12 13 24 25 28 29 | ||
| 324 | // 02 03 06 07 18 19 22 23 | ||
| 325 | // 00 01 04 05 16 17 20 21 | ||
| 326 | |||
| 327 | const unsigned int block_width = 8; | ||
| 328 | const unsigned int block_height = 8; | ||
| 329 | |||
| 330 | const unsigned int coarse_x = x & ~7; | 310 | const unsigned int coarse_x = x & ~7; |
| 331 | const unsigned int coarse_y = y & ~7; | 311 | const unsigned int coarse_y = y & ~7; |
| 332 | 312 | ||
| 333 | // Interleave the lower 3 bits of each coordinate to get the intra-block offsets, which are | ||
| 334 | // arranged in a Z-order curve. More details on the bit manipulation at: | ||
| 335 | // https://fgiesen.wordpress.com/2009/12/13/decoding-morton-codes/ | ||
| 336 | unsigned int i = (x & 7) | ((y & 7) << 8); // ---- -210 | ||
| 337 | i = (i ^ (i << 2)) & 0x1313; // ---2 --10 | ||
| 338 | i = (i ^ (i << 1)) & 0x1515; // ---2 -1-0 | ||
| 339 | i = (i | (i >> 7)) & 0x3F; | ||
| 340 | |||
| 341 | if (info.format != Regs::TextureFormat::ETC1 && | 313 | if (info.format != Regs::TextureFormat::ETC1 && |
| 342 | info.format != Regs::TextureFormat::ETC1A4) { | 314 | info.format != Regs::TextureFormat::ETC1A4) { |
| 343 | // TODO(neobrain): Fix code design to unify vertical block offsets! | 315 | // TODO(neobrain): Fix code design to unify vertical block offsets! |
| 344 | source += coarse_y * info.stride; | 316 | source += coarse_y * info.stride; |
| 345 | } | 317 | } |
| 346 | const unsigned int offset = coarse_x * block_height; | 318 | |
| 347 | |||
| 348 | // TODO: Assert that width/height are multiples of block dimensions | 319 | // TODO: Assert that width/height are multiples of block dimensions |
| 349 | 320 | ||
| 350 | switch (info.format) { | 321 | switch (info.format) { |
| 351 | case Regs::TextureFormat::RGBA8: | 322 | case Regs::TextureFormat::RGBA8: |
| 352 | { | 323 | { |
| 353 | const u8* source_ptr = source + offset * 4 + i * 4; | 324 | const u8* source_ptr = source + VideoCore::GetMortonOffset(x, y, 4); |
| 354 | return { source_ptr[3], source_ptr[2], source_ptr[1], disable_alpha ? (u8)255 : source_ptr[0] }; | 325 | return { source_ptr[3], source_ptr[2], source_ptr[1], disable_alpha ? (u8)255 : source_ptr[0] }; |
| 355 | } | 326 | } |
| 356 | 327 | ||
| 357 | case Regs::TextureFormat::RGB8: | 328 | case Regs::TextureFormat::RGB8: |
| 358 | { | 329 | { |
| 359 | const u8* source_ptr = source + offset * 3 + i * 3; | 330 | const u8* source_ptr = source + VideoCore::GetMortonOffset(x, y, 3); |
| 360 | return { source_ptr[2], source_ptr[1], source_ptr[0], 255 }; | 331 | return { source_ptr[2], source_ptr[1], source_ptr[0], 255 }; |
| 361 | } | 332 | } |
| 362 | 333 | ||
| 363 | case Regs::TextureFormat::RGBA5551: | 334 | case Regs::TextureFormat::RGBA5551: |
| 364 | { | 335 | { |
| 365 | const u16 source_ptr = *(const u16*)(source + offset * 2 + i * 2); | 336 | const u16 source_ptr = *(const u16*)(source + VideoCore::GetMortonOffset(x, y, 2)); |
| 366 | u8 r = (source_ptr >> 11) & 0x1F; | 337 | u8 r = (source_ptr >> 11) & 0x1F; |
| 367 | u8 g = ((source_ptr) >> 6) & 0x1F; | 338 | u8 g = ((source_ptr) >> 6) & 0x1F; |
| 368 | u8 b = (source_ptr >> 1) & 0x1F; | 339 | u8 b = (source_ptr >> 1) & 0x1F; |
| @@ -373,7 +344,7 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const Texture | |||
| 373 | 344 | ||
| 374 | case Regs::TextureFormat::RGB565: | 345 | case Regs::TextureFormat::RGB565: |
| 375 | { | 346 | { |
| 376 | const u16 source_ptr = *(const u16*)(source + offset * 2 + i * 2); | 347 | const u16 source_ptr = *(const u16*)(source + VideoCore::GetMortonOffset(x, y, 2)); |
| 377 | u8 r = Color::Convert5To8((source_ptr >> 11) & 0x1F); | 348 | u8 r = Color::Convert5To8((source_ptr >> 11) & 0x1F); |
| 378 | u8 g = Color::Convert6To8(((source_ptr) >> 5) & 0x3F); | 349 | u8 g = Color::Convert6To8(((source_ptr) >> 5) & 0x3F); |
| 379 | u8 b = Color::Convert5To8((source_ptr) & 0x1F); | 350 | u8 b = Color::Convert5To8((source_ptr) & 0x1F); |
| @@ -382,7 +353,7 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const Texture | |||
| 382 | 353 | ||
| 383 | case Regs::TextureFormat::RGBA4: | 354 | case Regs::TextureFormat::RGBA4: |
| 384 | { | 355 | { |
| 385 | const u8* source_ptr = source + offset * 2 + i * 2; | 356 | const u8* source_ptr = source + VideoCore::GetMortonOffset(x, y, 2); |
| 386 | u8 r = Color::Convert4To8(source_ptr[1] >> 4); | 357 | u8 r = Color::Convert4To8(source_ptr[1] >> 4); |
| 387 | u8 g = Color::Convert4To8(source_ptr[1] & 0xF); | 358 | u8 g = Color::Convert4To8(source_ptr[1] & 0xF); |
| 388 | u8 b = Color::Convert4To8(source_ptr[0] >> 4); | 359 | u8 b = Color::Convert4To8(source_ptr[0] >> 4); |
| @@ -392,7 +363,7 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const Texture | |||
| 392 | 363 | ||
| 393 | case Regs::TextureFormat::IA8: | 364 | case Regs::TextureFormat::IA8: |
| 394 | { | 365 | { |
| 395 | const u8* source_ptr = source + offset * 2 + i * 2; | 366 | const u8* source_ptr = source + VideoCore::GetMortonOffset(x, y, 2); |
| 396 | 367 | ||
| 397 | if (disable_alpha) { | 368 | if (disable_alpha) { |
| 398 | // Show intensity as red, alpha as green | 369 | // Show intensity as red, alpha as green |
| @@ -404,13 +375,13 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const Texture | |||
| 404 | 375 | ||
| 405 | case Regs::TextureFormat::I8: | 376 | case Regs::TextureFormat::I8: |
| 406 | { | 377 | { |
| 407 | const u8* source_ptr = source + offset + i; | 378 | const u8* source_ptr = source + VideoCore::GetMortonOffset(x, y, 1); |
| 408 | return { *source_ptr, *source_ptr, *source_ptr, 255 }; | 379 | return { *source_ptr, *source_ptr, *source_ptr, 255 }; |
| 409 | } | 380 | } |
| 410 | 381 | ||
| 411 | case Regs::TextureFormat::A8: | 382 | case Regs::TextureFormat::A8: |
| 412 | { | 383 | { |
| 413 | const u8* source_ptr = source + offset + i; | 384 | const u8* source_ptr = source + VideoCore::GetMortonOffset(x, y, 1); |
| 414 | 385 | ||
| 415 | if (disable_alpha) { | 386 | if (disable_alpha) { |
| 416 | return { *source_ptr, *source_ptr, *source_ptr, 255 }; | 387 | return { *source_ptr, *source_ptr, *source_ptr, 255 }; |
| @@ -421,7 +392,7 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const Texture | |||
| 421 | 392 | ||
| 422 | case Regs::TextureFormat::IA4: | 393 | case Regs::TextureFormat::IA4: |
| 423 | { | 394 | { |
| 424 | const u8* source_ptr = source + offset + i; | 395 | const u8* source_ptr = source + VideoCore::GetMortonOffset(x, y, 1); |
| 425 | 396 | ||
| 426 | u8 i = Color::Convert4To8(((*source_ptr) & 0xF0) >> 4); | 397 | u8 i = Color::Convert4To8(((*source_ptr) & 0xF0) >> 4); |
| 427 | u8 a = Color::Convert4To8((*source_ptr) & 0xF); | 398 | u8 a = Color::Convert4To8((*source_ptr) & 0xF); |
| @@ -436,9 +407,10 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const Texture | |||
| 436 | 407 | ||
| 437 | case Regs::TextureFormat::A4: | 408 | case Regs::TextureFormat::A4: |
| 438 | { | 409 | { |
| 439 | const u8* source_ptr = source + (offset + i) / 2; | 410 | u32 morton_offset = VideoCore::GetMortonOffset(x, y, 1); |
| 411 | const u8* source_ptr = source + morton_offset / 2; | ||
| 440 | 412 | ||
| 441 | u8 a = (i % 2) ? ((*source_ptr & 0xF0) >> 4) : (*source_ptr & 0xF); | 413 | u8 a = (morton_offset % 2) ? ((*source_ptr & 0xF0) >> 4) : (*source_ptr & 0xF); |
| 442 | a = Color::Convert4To8(a); | 414 | a = Color::Convert4To8(a); |
| 443 | 415 | ||
| 444 | if (disable_alpha) { | 416 | if (disable_alpha) { |
diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp index 24dc37856..a7bb0612f 100644 --- a/src/video_core/rasterizer.cpp +++ b/src/video_core/rasterizer.cpp | |||
| @@ -7,13 +7,14 @@ | |||
| 7 | #include "common/common_types.h" | 7 | #include "common/common_types.h" |
| 8 | #include "common/math_util.h" | 8 | #include "common/math_util.h" |
| 9 | 9 | ||
| 10 | #include "core/hw/gpu.h" | ||
| 11 | #include "debug_utils/debug_utils.h" | ||
| 10 | #include "math.h" | 12 | #include "math.h" |
| 11 | #include "color.h" | 13 | #include "color.h" |
| 12 | #include "pica.h" | 14 | #include "pica.h" |
| 13 | #include "rasterizer.h" | 15 | #include "rasterizer.h" |
| 14 | #include "vertex_shader.h" | 16 | #include "vertex_shader.h" |
| 15 | 17 | #include "video_core/utils.h" | |
| 16 | #include "debug_utils/debug_utils.h" | ||
| 17 | 18 | ||
| 18 | namespace Pica { | 19 | namespace Pica { |
| 19 | 20 | ||
| @@ -27,10 +28,14 @@ static void DrawPixel(int x, int y, const Math::Vec4<u8>& color) { | |||
| 27 | // NOTE: The framebuffer height register contains the actual FB height minus one. | 28 | // NOTE: The framebuffer height register contains the actual FB height minus one. |
| 28 | y = (registers.framebuffer.height - y); | 29 | y = (registers.framebuffer.height - y); |
| 29 | 30 | ||
| 31 | const u32 coarse_y = y & ~7; | ||
| 32 | u32 bytes_per_pixel = GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(registers.framebuffer.color_format.Value())); | ||
| 33 | u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * registers.framebuffer.width * bytes_per_pixel; | ||
| 34 | |||
| 30 | switch (registers.framebuffer.color_format) { | 35 | switch (registers.framebuffer.color_format) { |
| 31 | case registers.framebuffer.RGBA8: | 36 | case registers.framebuffer.RGBA8: |
| 32 | { | 37 | { |
| 33 | u8* pixel = color_buffer + (x + y * registers.framebuffer.GetWidth()) * 4; | 38 | u8* pixel = color_buffer + dst_offset; |
| 34 | pixel[3] = color.r(); | 39 | pixel[3] = color.r(); |
| 35 | pixel[2] = color.g(); | 40 | pixel[2] = color.g(); |
| 36 | pixel[1] = color.b(); | 41 | pixel[1] = color.b(); |
| @@ -40,14 +45,14 @@ static void DrawPixel(int x, int y, const Math::Vec4<u8>& color) { | |||
| 40 | 45 | ||
| 41 | case registers.framebuffer.RGBA4: | 46 | case registers.framebuffer.RGBA4: |
| 42 | { | 47 | { |
| 43 | u8* pixel = color_buffer + (x + y * registers.framebuffer.GetWidth()) * 2; | 48 | u8* pixel = color_buffer + dst_offset; |
| 44 | pixel[1] = (color.r() & 0xF0) | (color.g() >> 4); | 49 | pixel[1] = (color.r() & 0xF0) | (color.g() >> 4); |
| 45 | pixel[0] = (color.b() & 0xF0) | (color.a() >> 4); | 50 | pixel[0] = (color.b() & 0xF0) | (color.a() >> 4); |
| 46 | break; | 51 | break; |
| 47 | } | 52 | } |
| 48 | 53 | ||
| 49 | default: | 54 | default: |
| 50 | LOG_CRITICAL(Render_Software, "Unknown framebuffer color format %x", registers.framebuffer.color_format); | 55 | LOG_CRITICAL(Render_Software, "Unknown framebuffer color format %x", registers.framebuffer.color_format.Value()); |
| 51 | UNIMPLEMENTED(); | 56 | UNIMPLEMENTED(); |
| 52 | } | 57 | } |
| 53 | } | 58 | } |
| @@ -58,11 +63,15 @@ static const Math::Vec4<u8> GetPixel(int x, int y) { | |||
| 58 | 63 | ||
| 59 | y = (registers.framebuffer.height - y); | 64 | y = (registers.framebuffer.height - y); |
| 60 | 65 | ||
| 66 | const u32 coarse_y = y & ~7; | ||
| 67 | u32 bytes_per_pixel = GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(registers.framebuffer.color_format.Value())); | ||
| 68 | u32 src_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * registers.framebuffer.width * bytes_per_pixel; | ||
| 69 | |||
| 61 | switch (registers.framebuffer.color_format) { | 70 | switch (registers.framebuffer.color_format) { |
| 62 | case registers.framebuffer.RGBA8: | 71 | case registers.framebuffer.RGBA8: |
| 63 | { | 72 | { |
| 64 | Math::Vec4<u8> ret; | 73 | Math::Vec4<u8> ret; |
| 65 | u8* pixel = color_buffer + (x + y * registers.framebuffer.GetWidth()) * 4; | 74 | u8* pixel = color_buffer + src_offset; |
| 66 | ret.r() = pixel[3]; | 75 | ret.r() = pixel[3]; |
| 67 | ret.g() = pixel[2]; | 76 | ret.g() = pixel[2]; |
| 68 | ret.b() = pixel[1]; | 77 | ret.b() = pixel[1]; |
| @@ -73,7 +82,7 @@ static const Math::Vec4<u8> GetPixel(int x, int y) { | |||
| 73 | case registers.framebuffer.RGBA4: | 82 | case registers.framebuffer.RGBA4: |
| 74 | { | 83 | { |
| 75 | Math::Vec4<u8> ret; | 84 | Math::Vec4<u8> ret; |
| 76 | u8* pixel = color_buffer + (x + y * registers.framebuffer.GetWidth()) * 2; | 85 | u8* pixel = color_buffer + src_offset; |
| 77 | ret.r() = Color::Convert4To8(pixel[1] >> 4); | 86 | ret.r() = Color::Convert4To8(pixel[1] >> 4); |
| 78 | ret.g() = Color::Convert4To8(pixel[1] & 0x0F); | 87 | ret.g() = Color::Convert4To8(pixel[1] & 0x0F); |
| 79 | ret.b() = Color::Convert4To8(pixel[0] >> 4); | 88 | ret.b() = Color::Convert4To8(pixel[0] >> 4); |
| @@ -82,7 +91,7 @@ static const Math::Vec4<u8> GetPixel(int x, int y) { | |||
| 82 | } | 91 | } |
| 83 | 92 | ||
| 84 | default: | 93 | default: |
| 85 | LOG_CRITICAL(Render_Software, "Unknown framebuffer color format %x", registers.framebuffer.color_format); | 94 | LOG_CRITICAL(Render_Software, "Unknown framebuffer color format %x", registers.framebuffer.color_format.Value()); |
| 86 | UNIMPLEMENTED(); | 95 | UNIMPLEMENTED(); |
| 87 | } | 96 | } |
| 88 | 97 | ||
| @@ -91,22 +100,28 @@ static const Math::Vec4<u8> GetPixel(int x, int y) { | |||
| 91 | 100 | ||
| 92 | static u32 GetDepth(int x, int y) { | 101 | static u32 GetDepth(int x, int y) { |
| 93 | const PAddr addr = registers.framebuffer.GetDepthBufferPhysicalAddress(); | 102 | const PAddr addr = registers.framebuffer.GetDepthBufferPhysicalAddress(); |
| 94 | u16* depth_buffer = reinterpret_cast<u16*>(Memory::GetPointer(PAddrToVAddr(addr))); | 103 | u8* depth_buffer = Memory::GetPointer(PAddrToVAddr(addr)); |
| 95 | 104 | ||
| 96 | y = (registers.framebuffer.height - y); | 105 | y = (registers.framebuffer.height - y); |
| 106 | |||
| 107 | const u32 coarse_y = y & ~7; | ||
| 108 | u32 stride = registers.framebuffer.width * 2; | ||
| 97 | 109 | ||
| 98 | // Assuming 16-bit depth buffer format until actual format handling is implemented | 110 | // Assuming 16-bit depth buffer format until actual format handling is implemented |
| 99 | return *(depth_buffer + x + y * registers.framebuffer.GetWidth()); | 111 | return *(u16*)(depth_buffer + VideoCore::GetMortonOffset(x, y, 2) + coarse_y * stride); |
| 100 | } | 112 | } |
| 101 | 113 | ||
| 102 | static void SetDepth(int x, int y, u16 value) { | 114 | static void SetDepth(int x, int y, u16 value) { |
| 103 | const PAddr addr = registers.framebuffer.GetDepthBufferPhysicalAddress(); | 115 | const PAddr addr = registers.framebuffer.GetDepthBufferPhysicalAddress(); |
| 104 | u16* depth_buffer = reinterpret_cast<u16*>(Memory::GetPointer(PAddrToVAddr(addr))); | 116 | u8* depth_buffer = Memory::GetPointer(PAddrToVAddr(addr)); |
| 105 | 117 | ||
| 106 | y = (registers.framebuffer.height - y); | 118 | y = (registers.framebuffer.height - y); |
| 107 | 119 | ||
| 120 | const u32 coarse_y = y & ~7; | ||
| 121 | u32 stride = registers.framebuffer.width * 2; | ||
| 122 | |||
| 108 | // Assuming 16-bit depth buffer format until actual format handling is implemented | 123 | // Assuming 16-bit depth buffer format until actual format handling is implemented |
| 109 | *(depth_buffer + x + y * registers.framebuffer.GetWidth()) = value; | 124 | *(u16*)(depth_buffer + VideoCore::GetMortonOffset(x, y, 2) + coarse_y * stride) = value; |
| 110 | } | 125 | } |
| 111 | 126 | ||
| 112 | // NOTE: Assuming that rasterizer coordinates are 12.4 fixed-point values | 127 | // NOTE: Assuming that rasterizer coordinates are 12.4 fixed-point values |
diff --git a/src/video_core/utils.h b/src/video_core/utils.h index 6fd640425..bda793fa5 100644 --- a/src/video_core/utils.h +++ b/src/video_core/utils.h | |||
| @@ -35,4 +35,54 @@ struct TGAHeader { | |||
| 35 | */ | 35 | */ |
| 36 | void DumpTGA(std::string filename, short width, short height, u8* raw_data); | 36 | void DumpTGA(std::string filename, short width, short height, u8* raw_data); |
| 37 | 37 | ||
| 38 | /** | ||
| 39 | * Interleave the lower 3 bits of each coordinate to get the intra-block offsets, which are | ||
| 40 | * arranged in a Z-order curve. More details on the bit manipulation at: | ||
| 41 | * https://fgiesen.wordpress.com/2009/12/13/decoding-morton-codes/ | ||
| 42 | */ | ||
| 43 | static inline u32 MortonInterleave(u32 x, u32 y) { | ||
| 44 | u32 i = (x & 7) | ((y & 7) << 8); // ---- -210 | ||
| 45 | i = (i ^ (i << 2)) & 0x1313; // ---2 --10 | ||
| 46 | i = (i ^ (i << 1)) & 0x1515; // ---2 -1-0 | ||
| 47 | i = (i | (i >> 7)) & 0x3F; | ||
| 48 | return i; | ||
| 49 | } | ||
| 50 | |||
| 51 | /** | ||
| 52 | * Calculates the offset of the position of the pixel in Morton order | ||
| 53 | */ | ||
| 54 | static inline u32 GetMortonOffset(u32 x, u32 y, u32 bytes_per_pixel) { | ||
| 55 | // Images are split into 8x8 tiles. Each tile is composed of four 4x4 subtiles each | ||
| 56 | // of which is composed of four 2x2 subtiles each of which is composed of four texels. | ||
| 57 | // Each structure is embedded into the next-bigger one in a diagonal pattern, e.g. | ||
| 58 | // texels are laid out in a 2x2 subtile like this: | ||
| 59 | // 2 3 | ||
| 60 | // 0 1 | ||
| 61 | // | ||
| 62 | // The full 8x8 tile has the texels arranged like this: | ||
| 63 | // | ||
| 64 | // 42 43 46 47 58 59 62 63 | ||
| 65 | // 40 41 44 45 56 57 60 61 | ||
| 66 | // 34 35 38 39 50 51 54 55 | ||
| 67 | // 32 33 36 37 48 49 52 53 | ||
| 68 | // 10 11 14 15 26 27 30 31 | ||
| 69 | // 08 09 12 13 24 25 28 29 | ||
| 70 | // 02 03 06 07 18 19 22 23 | ||
| 71 | // 00 01 04 05 16 17 20 21 | ||
| 72 | // | ||
| 73 | // This pattern is what's called Z-order curve, or Morton order. | ||
| 74 | |||
| 75 | const unsigned int block_width = 8; | ||
| 76 | const unsigned int block_height = 8; | ||
| 77 | |||
| 78 | const unsigned int coarse_x = x & ~7; | ||
| 79 | const unsigned int coarse_y = y & ~7; | ||
| 80 | |||
| 81 | u32 i = VideoCore::MortonInterleave(x, y); | ||
| 82 | |||
| 83 | const unsigned int offset = coarse_x * block_height; | ||
| 84 | |||
| 85 | return (i + offset) * bytes_per_pixel; | ||
| 86 | } | ||
| 87 | |||
| 38 | } // namespace | 88 | } // namespace |