diff options
| author | 2015-01-01 20:54:45 -0500 | |
|---|---|---|
| committer | 2015-01-01 20:54:45 -0500 | |
| commit | 7c8f6ca0511b35c8a56dce466df01f6364728581 (patch) | |
| tree | bd1fa3b50c090786dd0f91669702ebf010d2a900 | |
| parent | Merge pull request #379 from lioncash/sh (diff) | |
| parent | Pica/Rasterizer: Remove some redundant casts. (diff) | |
| download | yuzu-7c8f6ca0511b35c8a56dce466df01f6364728581.tar.gz yuzu-7c8f6ca0511b35c8a56dce466df01f6364728581.tar.xz yuzu-7c8f6ca0511b35c8a56dce466df01f6364728581.zip | |
Merge pull request #358 from neobrain/pica_progress2
pica_progress followups
| -rw-r--r-- | src/citra_qt/debugger/graphics_framebuffer.cpp | 12 | ||||
| -rw-r--r-- | src/core/hw/gpu.cpp | 6 | ||||
| -rw-r--r-- | src/core/hw/gpu.h | 3 | ||||
| -rw-r--r-- | src/video_core/color.h | 32 | ||||
| -rw-r--r-- | src/video_core/command_processor.cpp | 28 | ||||
| -rw-r--r-- | src/video_core/debug_utils/debug_utils.cpp | 48 | ||||
| -rw-r--r-- | src/video_core/pica.h | 84 | ||||
| -rw-r--r-- | src/video_core/rasterizer.cpp | 241 | ||||
| -rw-r--r-- | src/video_core/utils.h | 26 | ||||
| -rw-r--r-- | src/video_core/vertex_shader.cpp | 27 | ||||
| -rw-r--r-- | src/video_core/vertex_shader.h | 1 |
11 files changed, 384 insertions, 124 deletions
diff --git a/src/citra_qt/debugger/graphics_framebuffer.cpp b/src/citra_qt/debugger/graphics_framebuffer.cpp index dd41c3880..a9e9de652 100644 --- a/src/citra_qt/debugger/graphics_framebuffer.cpp +++ b/src/citra_qt/debugger/graphics_framebuffer.cpp | |||
| @@ -10,6 +10,7 @@ | |||
| 10 | #include <QPushButton> | 10 | #include <QPushButton> |
| 11 | #include <QSpinBox> | 11 | #include <QSpinBox> |
| 12 | 12 | ||
| 13 | #include "video_core/color.h" | ||
| 13 | #include "video_core/pica.h" | 14 | #include "video_core/pica.h" |
| 14 | 15 | ||
| 15 | #include "graphics_framebuffer.hxx" | 16 | #include "graphics_framebuffer.hxx" |
| @@ -202,7 +203,8 @@ void GraphicsFramebufferWidget::OnUpdate() | |||
| 202 | framebuffer_address = framebuffer.GetColorBufferPhysicalAddress(); | 203 | framebuffer_address = framebuffer.GetColorBufferPhysicalAddress(); |
| 203 | framebuffer_width = framebuffer.GetWidth(); | 204 | framebuffer_width = framebuffer.GetWidth(); |
| 204 | framebuffer_height = framebuffer.GetHeight(); | 205 | framebuffer_height = framebuffer.GetHeight(); |
| 205 | framebuffer_format = static_cast<Format>(framebuffer.color_format); | 206 | // TODO: It's unknown how this format is actually specified |
| 207 | framebuffer_format = Format::RGBA8; | ||
| 206 | 208 | ||
| 207 | break; | 209 | break; |
| 208 | } | 210 | } |
| @@ -258,10 +260,10 @@ void GraphicsFramebufferWidget::OnUpdate() | |||
| 258 | for (unsigned y = 0; y < framebuffer_height; ++y) { | 260 | for (unsigned y = 0; y < framebuffer_height; ++y) { |
| 259 | for (unsigned x = 0; x < framebuffer_width; ++x) { | 261 | for (unsigned x = 0; x < framebuffer_width; ++x) { |
| 260 | u16 value = *(u16*)(((u8*)color_buffer) + x * 2 + y * framebuffer_width * 2); | 262 | u16 value = *(u16*)(((u8*)color_buffer) + x * 2 + y * framebuffer_width * 2); |
| 261 | u8 r = (value >> 11) & 0x1F; | 263 | u8 r = Color::Convert5To8((value >> 11) & 0x1F); |
| 262 | u8 g = (value >> 6) & 0x1F; | 264 | u8 g = Color::Convert5To8((value >> 6) & 0x1F); |
| 263 | u8 b = (value >> 1) & 0x1F; | 265 | u8 b = Color::Convert5To8((value >> 1) & 0x1F); |
| 264 | u8 a = value & 1; | 266 | u8 a = Color::Convert1To8(value & 1); |
| 265 | 267 | ||
| 266 | decoded_image.setPixel(x, y, qRgba(r, g, b, 255/*a*/)); | 268 | decoded_image.setPixel(x, y, qRgba(r, g, b, 255/*a*/)); |
| 267 | } | 269 | } |
diff --git a/src/core/hw/gpu.cpp b/src/core/hw/gpu.cpp index dd619cb16..0ff6c6cde 100644 --- a/src/core/hw/gpu.cpp +++ b/src/core/hw/gpu.cpp | |||
| @@ -94,11 +94,15 @@ inline void Write(u32 addr, const T data) { | |||
| 94 | int r, g, b, a; | 94 | int r, g, b, a; |
| 95 | } source_color = { 0, 0, 0, 0 }; | 95 | } source_color = { 0, 0, 0, 0 }; |
| 96 | 96 | ||
| 97 | // Cheap emulation of horizontal scaling: Just skip each second pixel of the | ||
| 98 | // input framebuffer. We keep track of this in the pixel_skip variable. | ||
| 99 | unsigned pixel_skip = (config.scale_horizontally != 0) ? 2 : 1; | ||
| 100 | |||
| 97 | switch (config.input_format) { | 101 | switch (config.input_format) { |
| 98 | case Regs::PixelFormat::RGBA8: | 102 | case Regs::PixelFormat::RGBA8: |
| 99 | { | 103 | { |
| 100 | // TODO: Most likely got the component order messed up. | 104 | // TODO: Most likely got the component order messed up. |
| 101 | u8* srcptr = source_pointer + x * 4 + y * config.input_width * 4; | 105 | u8* srcptr = source_pointer + x * 4 * pixel_skip + y * config.input_width * 4 * pixel_skip; |
| 102 | source_color.r = srcptr[0]; // blue | 106 | source_color.r = srcptr[0]; // blue |
| 103 | source_color.g = srcptr[1]; // green | 107 | source_color.g = srcptr[1]; // green |
| 104 | source_color.b = srcptr[2]; // red | 108 | source_color.b = srcptr[2]; // red |
diff --git a/src/core/hw/gpu.h b/src/core/hw/gpu.h index 292f496c1..7de055232 100644 --- a/src/core/hw/gpu.h +++ b/src/core/hw/gpu.h | |||
| @@ -157,6 +157,9 @@ struct Regs { | |||
| 157 | BitField< 8, 3, PixelFormat> input_format; | 157 | BitField< 8, 3, PixelFormat> input_format; |
| 158 | BitField<12, 3, PixelFormat> output_format; | 158 | BitField<12, 3, PixelFormat> output_format; |
| 159 | BitField<16, 1, u32> output_tiled; // stores output in a tiled format | 159 | BitField<16, 1, u32> output_tiled; // stores output in a tiled format |
| 160 | |||
| 161 | // TODO: Not really sure if this actually scales, or even resizes at all. | ||
| 162 | BitField<24, 1, u32> scale_horizontally; | ||
| 160 | }; | 163 | }; |
| 161 | 164 | ||
| 162 | INSERT_PADDING_WORDS(0x1); | 165 | INSERT_PADDING_WORDS(0x1); |
diff --git a/src/video_core/color.h b/src/video_core/color.h new file mode 100644 index 000000000..e86ac1265 --- /dev/null +++ b/src/video_core/color.h | |||
| @@ -0,0 +1,32 @@ | |||
| 1 | // Copyright 2014 Citra Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "common/common_types.h" | ||
| 8 | |||
| 9 | namespace Color { | ||
| 10 | |||
| 11 | /// Convert a 1-bit color component to 8 bit | ||
| 12 | static inline u8 Convert1To8(u8 value) { | ||
| 13 | return value * 255; | ||
| 14 | } | ||
| 15 | |||
| 16 | /// Convert a 4-bit color component to 8 bit | ||
| 17 | static inline u8 Convert4To8(u8 value) { | ||
| 18 | return (value << 4) | value; | ||
| 19 | } | ||
| 20 | |||
| 21 | /// Convert a 5-bit color component to 8 bit | ||
| 22 | static inline u8 Convert5To8(u8 value) { | ||
| 23 | return (value << 3) | (value >> 2); | ||
| 24 | } | ||
| 25 | |||
| 26 | /// Convert a 6-bit color component to 8 bit | ||
| 27 | static inline u8 Convert6To8(u8 value) { | ||
| 28 | return (value << 2) | (value >> 4); | ||
| 29 | } | ||
| 30 | |||
| 31 | |||
| 32 | } // namespace | ||
diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp index 9602779f4..0d9f4ba66 100644 --- a/src/video_core/command_processor.cpp +++ b/src/video_core/command_processor.cpp | |||
| @@ -112,6 +112,11 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) { | |||
| 112 | // Initialize data for the current vertex | 112 | // Initialize data for the current vertex |
| 113 | VertexShader::InputVertex input; | 113 | VertexShader::InputVertex input; |
| 114 | 114 | ||
| 115 | // Load a debugging token to check whether this gets loaded by the running | ||
| 116 | // application or not. | ||
| 117 | static const float24 debug_token = float24::FromRawFloat24(0x00abcdef); | ||
| 118 | input.attr[0].w = debug_token; | ||
| 119 | |||
| 115 | for (int i = 0; i < attribute_config.GetNumTotalAttributes(); ++i) { | 120 | for (int i = 0; i < attribute_config.GetNumTotalAttributes(); ++i) { |
| 116 | for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) { | 121 | for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) { |
| 117 | const u8* srcdata = Memory::GetPointer(PAddrToVAddr(vertex_attribute_sources[i] + vertex_attribute_strides[i] * vertex + comp * vertex_attribute_element_size[i])); | 122 | const u8* srcdata = Memory::GetPointer(PAddrToVAddr(vertex_attribute_sources[i] + vertex_attribute_strides[i] * vertex + comp * vertex_attribute_element_size[i])); |
| @@ -136,6 +141,16 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) { | |||
| 136 | } | 141 | } |
| 137 | } | 142 | } |
| 138 | 143 | ||
| 144 | // HACK: Some games do not initialize the vertex position's w component. This leads | ||
| 145 | // to critical issues since it messes up perspective division. As a | ||
| 146 | // workaround, we force the fourth component to 1.0 if we find this to be the | ||
| 147 | // case. | ||
| 148 | // To do this, we additionally have to assume that the first input attribute | ||
| 149 | // is the vertex position, since there's no information about this other than | ||
| 150 | // the empiric observation that this is usually the case. | ||
| 151 | if (input.attr[0].w == debug_token) | ||
| 152 | input.attr[0].w = float24::FromFloat32(1.0); | ||
| 153 | |||
| 139 | if (g_debug_context) | 154 | if (g_debug_context) |
| 140 | g_debug_context->OnEvent(DebugContext::Event::VertexLoaded, (void*)&input); | 155 | g_debug_context->OnEvent(DebugContext::Event::VertexLoaded, (void*)&input); |
| 141 | 156 | ||
| @@ -173,6 +188,19 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) { | |||
| 173 | 188 | ||
| 174 | break; | 189 | break; |
| 175 | 190 | ||
| 191 | case PICA_REG_INDEX_WORKAROUND(vs_int_uniforms[0], 0x2b1): | ||
| 192 | case PICA_REG_INDEX_WORKAROUND(vs_int_uniforms[1], 0x2b2): | ||
| 193 | case PICA_REG_INDEX_WORKAROUND(vs_int_uniforms[2], 0x2b3): | ||
| 194 | case PICA_REG_INDEX_WORKAROUND(vs_int_uniforms[3], 0x2b4): | ||
| 195 | { | ||
| 196 | int index = (id - PICA_REG_INDEX_WORKAROUND(vs_int_uniforms[0], 0x2b1)); | ||
| 197 | auto values = registers.vs_int_uniforms[index]; | ||
| 198 | VertexShader::GetIntUniform(index) = Math::Vec4<u8>(values.x, values.y, values.z, values.w); | ||
| 199 | LOG_TRACE(HW_GPU, "Set integer uniform %d to %02x %02x %02x %02x", | ||
| 200 | index, values.x.Value(), values.y.Value(), values.z.Value(), values.w.Value()); | ||
| 201 | break; | ||
| 202 | } | ||
| 203 | |||
| 176 | case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[0], 0x2c1): | 204 | case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[0], 0x2c1): |
| 177 | case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[1], 0x2c2): | 205 | case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[1], 0x2c2): |
| 178 | case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[2], 0x2c3): | 206 | case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[2], 0x2c3): |
diff --git a/src/video_core/debug_utils/debug_utils.cpp b/src/video_core/debug_utils/debug_utils.cpp index 5921185a6..a494465b9 100644 --- a/src/video_core/debug_utils/debug_utils.cpp +++ b/src/video_core/debug_utils/debug_utils.cpp | |||
| @@ -19,6 +19,7 @@ | |||
| 19 | #include "common/log.h" | 19 | #include "common/log.h" |
| 20 | #include "common/file_util.h" | 20 | #include "common/file_util.h" |
| 21 | 21 | ||
| 22 | #include "video_core/color.h" | ||
| 22 | #include "video_core/math.h" | 23 | #include "video_core/math.h" |
| 23 | #include "video_core/pica.h" | 24 | #include "video_core/pica.h" |
| 24 | 25 | ||
| @@ -359,29 +360,26 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const Texture | |||
| 359 | u8 g = ((source_ptr) >> 6) & 0x1F; | 360 | u8 g = ((source_ptr) >> 6) & 0x1F; |
| 360 | u8 b = (source_ptr >> 1) & 0x1F; | 361 | u8 b = (source_ptr >> 1) & 0x1F; |
| 361 | u8 a = source_ptr & 1; | 362 | u8 a = source_ptr & 1; |
| 362 | return Math::MakeVec<u8>((r << 3) | (r >> 2), (g << 3) | (g >> 2), (b << 3) | (b >> 2), disable_alpha ? 255 : (a * 255)); | 363 | return Math::MakeVec<u8>(Color::Convert5To8(r), Color::Convert5To8(g), |
| 364 | Color::Convert5To8(b), disable_alpha ? 255 : Color::Convert1To8(a)); | ||
| 363 | } | 365 | } |
| 364 | 366 | ||
| 365 | case Regs::TextureFormat::RGB565: | 367 | case Regs::TextureFormat::RGB565: |
| 366 | { | 368 | { |
| 367 | const u16 source_ptr = *(const u16*)(source + offset * 2); | 369 | const u16 source_ptr = *(const u16*)(source + offset * 2); |
| 368 | u8 r = (source_ptr >> 11) & 0x1F; | 370 | u8 r = Color::Convert5To8((source_ptr >> 11) & 0x1F); |
| 369 | u8 g = ((source_ptr) >> 5) & 0x3F; | 371 | u8 g = Color::Convert6To8(((source_ptr) >> 5) & 0x3F); |
| 370 | u8 b = (source_ptr) & 0x1F; | 372 | u8 b = Color::Convert5To8((source_ptr) & 0x1F); |
| 371 | return Math::MakeVec<u8>((r << 3) | (r >> 2), (g << 2) | (g >> 4), (b << 3) | (b >> 2), 255); | 373 | return Math::MakeVec<u8>(r, g, b, 255); |
| 372 | } | 374 | } |
| 373 | 375 | ||
| 374 | case Regs::TextureFormat::RGBA4: | 376 | case Regs::TextureFormat::RGBA4: |
| 375 | { | 377 | { |
| 376 | const u8* source_ptr = source + offset * 2; | 378 | const u8* source_ptr = source + offset * 2; |
| 377 | u8 r = source_ptr[1] >> 4; | 379 | u8 r = Color::Convert4To8(source_ptr[1] >> 4); |
| 378 | u8 g = source_ptr[1] & 0xFF; | 380 | u8 g = Color::Convert4To8(source_ptr[1] & 0xF); |
| 379 | u8 b = source_ptr[0] >> 4; | 381 | u8 b = Color::Convert4To8(source_ptr[0] >> 4); |
| 380 | u8 a = source_ptr[0] & 0xFF; | 382 | u8 a = Color::Convert4To8(source_ptr[0] & 0xF); |
| 381 | r = (r << 4) | r; | ||
| 382 | g = (g << 4) | g; | ||
| 383 | b = (b << 4) | b; | ||
| 384 | a = (a << 4) | a; | ||
| 385 | return { r, g, b, disable_alpha ? (u8)255 : a }; | 383 | return { r, g, b, disable_alpha ? (u8)255 : a }; |
| 386 | } | 384 | } |
| 387 | 385 | ||
| @@ -389,13 +387,11 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const Texture | |||
| 389 | { | 387 | { |
| 390 | const u8* source_ptr = source + offset * 2; | 388 | const u8* source_ptr = source + offset * 2; |
| 391 | 389 | ||
| 392 | // TODO: component order not verified | ||
| 393 | |||
| 394 | if (disable_alpha) { | 390 | if (disable_alpha) { |
| 395 | // Show intensity as red, alpha as green | 391 | // Show intensity as red, alpha as green |
| 396 | return { source_ptr[0], source_ptr[1], 0, 255 }; | 392 | return { source_ptr[1], source_ptr[0], 0, 255 }; |
| 397 | } else { | 393 | } else { |
| 398 | return { source_ptr[0], source_ptr[0], source_ptr[0], source_ptr[1]}; | 394 | return { source_ptr[1], source_ptr[1], source_ptr[1], source_ptr[0]}; |
| 399 | } | 395 | } |
| 400 | } | 396 | } |
| 401 | 397 | ||
| @@ -418,14 +414,10 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const Texture | |||
| 418 | 414 | ||
| 419 | case Regs::TextureFormat::IA4: | 415 | case Regs::TextureFormat::IA4: |
| 420 | { | 416 | { |
| 421 | const u8* source_ptr = source + offset / 2; | 417 | const u8* source_ptr = source + offset; |
| 422 | |||
| 423 | // TODO: component order not verified | ||
| 424 | 418 | ||
| 425 | u8 i = (*source_ptr) & 0xF; | 419 | u8 i = Color::Convert4To8(((*source_ptr) & 0xF0) >> 4); |
| 426 | u8 a = ((*source_ptr) & 0xF0) >> 4; | 420 | u8 a = Color::Convert4To8((*source_ptr) & 0xF); |
| 427 | a |= a << 4; | ||
| 428 | i |= i << 4; | ||
| 429 | 421 | ||
| 430 | if (disable_alpha) { | 422 | if (disable_alpha) { |
| 431 | // Show intensity as red, alpha as green | 423 | // Show intensity as red, alpha as green |
| @@ -439,15 +431,13 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const Texture | |||
| 439 | { | 431 | { |
| 440 | const u8* source_ptr = source + offset / 2; | 432 | const u8* source_ptr = source + offset / 2; |
| 441 | 433 | ||
| 442 | // TODO: component order not verified | ||
| 443 | |||
| 444 | u8 a = (coarse_x % 2) ? ((*source_ptr)&0xF) : (((*source_ptr) & 0xF0) >> 4); | 434 | u8 a = (coarse_x % 2) ? ((*source_ptr)&0xF) : (((*source_ptr) & 0xF0) >> 4); |
| 445 | a |= a << 4; | 435 | a = Color::Convert4To8(a); |
| 446 | 436 | ||
| 447 | if (disable_alpha) { | 437 | if (disable_alpha) { |
| 448 | return { *source_ptr, *source_ptr, *source_ptr, 255 }; | 438 | return { a, a, a, 255 }; |
| 449 | } else { | 439 | } else { |
| 450 | return { 0, 0, 0, *source_ptr }; | 440 | return { 0, 0, 0, a }; |
| 451 | } | 441 | } |
| 452 | } | 442 | } |
| 453 | 443 | ||
diff --git a/src/video_core/pica.h b/src/video_core/pica.h index 38bac748c..f5771ed84 100644 --- a/src/video_core/pica.h +++ b/src/video_core/pica.h | |||
| @@ -50,7 +50,19 @@ struct Regs { | |||
| 50 | 50 | ||
| 51 | u32 trigger_irq; | 51 | u32 trigger_irq; |
| 52 | 52 | ||
| 53 | INSERT_PADDING_WORDS(0x30); | 53 | INSERT_PADDING_WORDS(0x2f); |
| 54 | |||
| 55 | enum class CullMode : u32 { | ||
| 56 | // Select which polygons are considered to be "frontfacing". | ||
| 57 | KeepAll = 0, | ||
| 58 | KeepClockWise = 1, | ||
| 59 | KeepCounterClockWise = 2, | ||
| 60 | // TODO: What does the third value imply? | ||
| 61 | }; | ||
| 62 | |||
| 63 | union { | ||
| 64 | BitField<0, 2, CullMode> cull_mode; | ||
| 65 | }; | ||
| 54 | 66 | ||
| 55 | BitField<0, 24, u32> viewport_size_x; | 67 | BitField<0, 24, u32> viewport_size_x; |
| 56 | 68 | ||
| @@ -289,7 +301,7 @@ struct Regs { | |||
| 289 | TevStageConfig tev_stage4; | 301 | TevStageConfig tev_stage4; |
| 290 | INSERT_PADDING_WORDS(0x3); | 302 | INSERT_PADDING_WORDS(0x3); |
| 291 | TevStageConfig tev_stage5; | 303 | TevStageConfig tev_stage5; |
| 292 | INSERT_PADDING_WORDS(0x13); | 304 | INSERT_PADDING_WORDS(0x3); |
| 293 | 305 | ||
| 294 | const std::array<Regs::TevStageConfig,6> GetTevStages() const { | 306 | const std::array<Regs::TevStageConfig,6> GetTevStages() const { |
| 295 | return { tev_stage0, tev_stage1, | 307 | return { tev_stage0, tev_stage1, |
| @@ -298,6 +310,60 @@ struct Regs { | |||
| 298 | }; | 310 | }; |
| 299 | 311 | ||
| 300 | struct { | 312 | struct { |
| 313 | enum DepthFunc : u32 { | ||
| 314 | Always = 1, | ||
| 315 | LessThan = 4, | ||
| 316 | GreaterThan = 6, | ||
| 317 | }; | ||
| 318 | |||
| 319 | union { | ||
| 320 | // If false, logic blending is used | ||
| 321 | BitField<8, 1, u32> alphablend_enable; | ||
| 322 | }; | ||
| 323 | |||
| 324 | union { | ||
| 325 | enum BlendEquation : u32 { | ||
| 326 | Add = 0, | ||
| 327 | }; | ||
| 328 | |||
| 329 | enum BlendFactor : u32 { | ||
| 330 | Zero = 0, | ||
| 331 | One = 1, | ||
| 332 | |||
| 333 | SourceAlpha = 6, | ||
| 334 | OneMinusSourceAlpha = 7, | ||
| 335 | }; | ||
| 336 | |||
| 337 | BitField< 0, 8, BlendEquation> blend_equation_rgb; | ||
| 338 | BitField< 8, 8, BlendEquation> blend_equation_a; | ||
| 339 | |||
| 340 | BitField<16, 4, BlendFactor> factor_source_rgb; | ||
| 341 | BitField<20, 4, BlendFactor> factor_dest_rgb; | ||
| 342 | |||
| 343 | BitField<24, 4, BlendFactor> factor_source_a; | ||
| 344 | BitField<28, 4, BlendFactor> factor_dest_a; | ||
| 345 | } alpha_blending; | ||
| 346 | |||
| 347 | union { | ||
| 348 | enum Op { | ||
| 349 | Set = 4, | ||
| 350 | }; | ||
| 351 | |||
| 352 | BitField<0, 4, Op> op; | ||
| 353 | } logic_op; | ||
| 354 | |||
| 355 | INSERT_PADDING_WORDS(0x4); | ||
| 356 | |||
| 357 | union { | ||
| 358 | BitField< 0, 1, u32> depth_test_enable; | ||
| 359 | BitField< 4, 3, DepthFunc> depth_test_func; | ||
| 360 | BitField<12, 1, u32> depth_write_enable; | ||
| 361 | }; | ||
| 362 | |||
| 363 | INSERT_PADDING_WORDS(0x8); | ||
| 364 | } output_merger; | ||
| 365 | |||
| 366 | struct { | ||
| 301 | enum ColorFormat : u32 { | 367 | enum ColorFormat : u32 { |
| 302 | RGBA8 = 0, | 368 | RGBA8 = 0, |
| 303 | RGB8 = 1, | 369 | RGB8 = 1, |
| @@ -495,8 +561,14 @@ struct Regs { | |||
| 495 | INSERT_PADDING_WORDS(0x51); | 561 | INSERT_PADDING_WORDS(0x51); |
| 496 | 562 | ||
| 497 | BitField<0, 16, u32> vs_bool_uniforms; | 563 | BitField<0, 16, u32> vs_bool_uniforms; |
| 564 | union { | ||
| 565 | BitField< 0, 8, u32> x; | ||
| 566 | BitField< 8, 8, u32> y; | ||
| 567 | BitField<16, 8, u32> z; | ||
| 568 | BitField<24, 8, u32> w; | ||
| 569 | } vs_int_uniforms[4]; | ||
| 498 | 570 | ||
| 499 | INSERT_PADDING_WORDS(0x9); | 571 | INSERT_PADDING_WORDS(0x5); |
| 500 | 572 | ||
| 501 | // Offset to shader program entry point (in words) | 573 | // Offset to shader program entry point (in words) |
| 502 | BitField<0, 16, u32> vs_main_offset; | 574 | BitField<0, 16, u32> vs_main_offset; |
| @@ -599,6 +671,7 @@ struct Regs { | |||
| 599 | } while(false) | 671 | } while(false) |
| 600 | 672 | ||
| 601 | ADD_FIELD(trigger_irq); | 673 | ADD_FIELD(trigger_irq); |
| 674 | ADD_FIELD(cull_mode); | ||
| 602 | ADD_FIELD(viewport_size_x); | 675 | ADD_FIELD(viewport_size_x); |
| 603 | ADD_FIELD(viewport_size_y); | 676 | ADD_FIELD(viewport_size_y); |
| 604 | ADD_FIELD(viewport_depth_range); | 677 | ADD_FIELD(viewport_depth_range); |
| @@ -617,6 +690,7 @@ struct Regs { | |||
| 617 | ADD_FIELD(tev_stage3); | 690 | ADD_FIELD(tev_stage3); |
| 618 | ADD_FIELD(tev_stage4); | 691 | ADD_FIELD(tev_stage4); |
| 619 | ADD_FIELD(tev_stage5); | 692 | ADD_FIELD(tev_stage5); |
| 693 | ADD_FIELD(output_merger); | ||
| 620 | ADD_FIELD(framebuffer); | 694 | ADD_FIELD(framebuffer); |
| 621 | ADD_FIELD(vertex_attributes); | 695 | ADD_FIELD(vertex_attributes); |
| 622 | ADD_FIELD(index_array); | 696 | ADD_FIELD(index_array); |
| @@ -625,6 +699,7 @@ struct Regs { | |||
| 625 | ADD_FIELD(trigger_draw_indexed); | 699 | ADD_FIELD(trigger_draw_indexed); |
| 626 | ADD_FIELD(triangle_topology); | 700 | ADD_FIELD(triangle_topology); |
| 627 | ADD_FIELD(vs_bool_uniforms); | 701 | ADD_FIELD(vs_bool_uniforms); |
| 702 | ADD_FIELD(vs_int_uniforms); | ||
| 628 | ADD_FIELD(vs_main_offset); | 703 | ADD_FIELD(vs_main_offset); |
| 629 | ADD_FIELD(vs_input_register_map); | 704 | ADD_FIELD(vs_input_register_map); |
| 630 | ADD_FIELD(vs_uniform_setup); | 705 | ADD_FIELD(vs_uniform_setup); |
| @@ -668,6 +743,7 @@ private: | |||
| 668 | #define ASSERT_REG_POSITION(field_name, position) static_assert(offsetof(Regs, field_name) == position * 4, "Field "#field_name" has invalid position") | 743 | #define ASSERT_REG_POSITION(field_name, position) static_assert(offsetof(Regs, field_name) == position * 4, "Field "#field_name" has invalid position") |
| 669 | 744 | ||
| 670 | ASSERT_REG_POSITION(trigger_irq, 0x10); | 745 | ASSERT_REG_POSITION(trigger_irq, 0x10); |
| 746 | ASSERT_REG_POSITION(cull_mode, 0x40); | ||
| 671 | ASSERT_REG_POSITION(viewport_size_x, 0x41); | 747 | ASSERT_REG_POSITION(viewport_size_x, 0x41); |
| 672 | ASSERT_REG_POSITION(viewport_size_y, 0x43); | 748 | ASSERT_REG_POSITION(viewport_size_y, 0x43); |
| 673 | ASSERT_REG_POSITION(viewport_depth_range, 0x4d); | 749 | ASSERT_REG_POSITION(viewport_depth_range, 0x4d); |
| @@ -688,6 +764,7 @@ ASSERT_REG_POSITION(tev_stage2, 0xd0); | |||
| 688 | ASSERT_REG_POSITION(tev_stage3, 0xd8); | 764 | ASSERT_REG_POSITION(tev_stage3, 0xd8); |
| 689 | ASSERT_REG_POSITION(tev_stage4, 0xf0); | 765 | ASSERT_REG_POSITION(tev_stage4, 0xf0); |
| 690 | ASSERT_REG_POSITION(tev_stage5, 0xf8); | 766 | ASSERT_REG_POSITION(tev_stage5, 0xf8); |
| 767 | ASSERT_REG_POSITION(output_merger, 0x100); | ||
| 691 | ASSERT_REG_POSITION(framebuffer, 0x110); | 768 | ASSERT_REG_POSITION(framebuffer, 0x110); |
| 692 | ASSERT_REG_POSITION(vertex_attributes, 0x200); | 769 | ASSERT_REG_POSITION(vertex_attributes, 0x200); |
| 693 | ASSERT_REG_POSITION(index_array, 0x227); | 770 | ASSERT_REG_POSITION(index_array, 0x227); |
| @@ -696,6 +773,7 @@ ASSERT_REG_POSITION(trigger_draw, 0x22e); | |||
| 696 | ASSERT_REG_POSITION(trigger_draw_indexed, 0x22f); | 773 | ASSERT_REG_POSITION(trigger_draw_indexed, 0x22f); |
| 697 | ASSERT_REG_POSITION(triangle_topology, 0x25e); | 774 | ASSERT_REG_POSITION(triangle_topology, 0x25e); |
| 698 | ASSERT_REG_POSITION(vs_bool_uniforms, 0x2b0); | 775 | ASSERT_REG_POSITION(vs_bool_uniforms, 0x2b0); |
| 776 | ASSERT_REG_POSITION(vs_int_uniforms, 0x2b1); | ||
| 699 | ASSERT_REG_POSITION(vs_main_offset, 0x2ba); | 777 | ASSERT_REG_POSITION(vs_main_offset, 0x2ba); |
| 700 | ASSERT_REG_POSITION(vs_input_register_map, 0x2bb); | 778 | ASSERT_REG_POSITION(vs_input_register_map, 0x2bb); |
| 701 | ASSERT_REG_POSITION(vs_uniform_setup, 0x2c0); | 779 | ASSERT_REG_POSITION(vs_uniform_setup, 0x2c0); |
diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp index a80148872..025d4e484 100644 --- a/src/video_core/rasterizer.cpp +++ b/src/video_core/rasterizer.cpp | |||
| @@ -18,51 +18,82 @@ namespace Pica { | |||
| 18 | namespace Rasterizer { | 18 | namespace Rasterizer { |
| 19 | 19 | ||
| 20 | static void DrawPixel(int x, int y, const Math::Vec4<u8>& color) { | 20 | static void DrawPixel(int x, int y, const Math::Vec4<u8>& color) { |
| 21 | u32* color_buffer = reinterpret_cast<u32*>(Memory::GetPointer(PAddrToVAddr(registers.framebuffer.GetColorBufferPhysicalAddress()))); | 21 | const PAddr addr = registers.framebuffer.GetColorBufferPhysicalAddress(); |
| 22 | u32* color_buffer = reinterpret_cast<u32*>(Memory::GetPointer(PAddrToVAddr(addr))); | ||
| 22 | u32 value = (color.a() << 24) | (color.r() << 16) | (color.g() << 8) | color.b(); | 23 | u32 value = (color.a() << 24) | (color.r() << 16) | (color.g() << 8) | color.b(); |
| 23 | 24 | ||
| 24 | // Assuming RGBA8 format until actual framebuffer format handling is implemented | 25 | // Assuming RGBA8 format until actual framebuffer format handling is implemented |
| 25 | *(color_buffer + x + y * registers.framebuffer.GetWidth()) = value; | 26 | *(color_buffer + x + y * registers.framebuffer.GetWidth()) = value; |
| 26 | } | 27 | } |
| 27 | 28 | ||
| 29 | static const Math::Vec4<u8> GetPixel(int x, int y) { | ||
| 30 | const PAddr addr = registers.framebuffer.GetColorBufferPhysicalAddress(); | ||
| 31 | u32* color_buffer_u32 = reinterpret_cast<u32*>(Memory::GetPointer(PAddrToVAddr(addr))); | ||
| 32 | |||
| 33 | u32 value = *(color_buffer_u32 + x + y * registers.framebuffer.GetWidth()); | ||
| 34 | Math::Vec4<u8> ret; | ||
| 35 | ret.a() = value >> 24; | ||
| 36 | ret.r() = (value >> 16) & 0xFF; | ||
| 37 | ret.g() = (value >> 8) & 0xFF; | ||
| 38 | ret.b() = value & 0xFF; | ||
| 39 | return ret; | ||
| 40 | } | ||
| 41 | |||
| 28 | static u32 GetDepth(int x, int y) { | 42 | static u32 GetDepth(int x, int y) { |
| 29 | u16* depth_buffer = reinterpret_cast<u16*>(Memory::GetPointer(PAddrToVAddr(registers.framebuffer.GetDepthBufferPhysicalAddress()))); | 43 | const PAddr addr = registers.framebuffer.GetDepthBufferPhysicalAddress(); |
| 44 | u16* depth_buffer = reinterpret_cast<u16*>(Memory::GetPointer(PAddrToVAddr(addr))); | ||
| 30 | 45 | ||
| 31 | // Assuming 16-bit depth buffer format until actual format handling is implemented | 46 | // Assuming 16-bit depth buffer format until actual format handling is implemented |
| 32 | return *(depth_buffer + x + y * registers.framebuffer.GetWidth()); | 47 | return *(depth_buffer + x + y * registers.framebuffer.GetWidth()); |
| 33 | } | 48 | } |
| 34 | 49 | ||
| 35 | static void SetDepth(int x, int y, u16 value) { | 50 | static void SetDepth(int x, int y, u16 value) { |
| 36 | u16* depth_buffer = reinterpret_cast<u16*>(Memory::GetPointer(PAddrToVAddr(registers.framebuffer.GetDepthBufferPhysicalAddress()))); | 51 | const PAddr addr = registers.framebuffer.GetDepthBufferPhysicalAddress(); |
| 52 | u16* depth_buffer = reinterpret_cast<u16*>(Memory::GetPointer(PAddrToVAddr(addr))); | ||
| 37 | 53 | ||
| 38 | // Assuming 16-bit depth buffer format until actual format handling is implemented | 54 | // Assuming 16-bit depth buffer format until actual format handling is implemented |
| 39 | *(depth_buffer + x + y * registers.framebuffer.GetWidth()) = value; | 55 | *(depth_buffer + x + y * registers.framebuffer.GetWidth()) = value; |
| 40 | } | 56 | } |
| 41 | 57 | ||
| 42 | void ProcessTriangle(const VertexShader::OutputVertex& v0, | 58 | // NOTE: Assuming that rasterizer coordinates are 12.4 fixed-point values |
| 43 | const VertexShader::OutputVertex& v1, | 59 | struct Fix12P4 { |
| 44 | const VertexShader::OutputVertex& v2) | 60 | Fix12P4() {} |
| 45 | { | 61 | Fix12P4(u16 val) : val(val) {} |
| 46 | // NOTE: Assuming that rasterizer coordinates are 12.4 fixed-point values | ||
| 47 | struct Fix12P4 { | ||
| 48 | Fix12P4() {} | ||
| 49 | Fix12P4(u16 val) : val(val) {} | ||
| 50 | 62 | ||
| 51 | static u16 FracMask() { return 0xF; } | 63 | static u16 FracMask() { return 0xF; } |
| 52 | static u16 IntMask() { return (u16)~0xF; } | 64 | static u16 IntMask() { return (u16)~0xF; } |
| 53 | 65 | ||
| 54 | operator u16() const { | 66 | operator u16() const { |
| 55 | return val; | 67 | return val; |
| 56 | } | 68 | } |
| 57 | 69 | ||
| 58 | bool operator < (const Fix12P4& oth) const { | 70 | bool operator < (const Fix12P4& oth) const { |
| 59 | return (u16)*this < (u16)oth; | 71 | return (u16)*this < (u16)oth; |
| 60 | } | 72 | } |
| 61 | 73 | ||
| 62 | private: | 74 | private: |
| 63 | u16 val; | 75 | u16 val; |
| 64 | }; | 76 | }; |
| 77 | |||
| 78 | /** | ||
| 79 | * Calculate signed area of the triangle spanned by the three argument vertices. | ||
| 80 | * The sign denotes an orientation. | ||
| 81 | * | ||
| 82 | * @todo define orientation concretely. | ||
| 83 | */ | ||
| 84 | static int SignedArea (const Math::Vec2<Fix12P4>& vtx1, | ||
| 85 | const Math::Vec2<Fix12P4>& vtx2, | ||
| 86 | const Math::Vec2<Fix12P4>& vtx3) { | ||
| 87 | const auto vec1 = Math::MakeVec(vtx2 - vtx1, 0); | ||
| 88 | const auto vec2 = Math::MakeVec(vtx3 - vtx1, 0); | ||
| 89 | // TODO: There is a very small chance this will overflow for sizeof(int) == 4 | ||
| 90 | return Math::Cross(vec1, vec2).z; | ||
| 91 | }; | ||
| 65 | 92 | ||
| 93 | void ProcessTriangle(const VertexShader::OutputVertex& v0, | ||
| 94 | const VertexShader::OutputVertex& v1, | ||
| 95 | const VertexShader::OutputVertex& v2) | ||
| 96 | { | ||
| 66 | // vertex positions in rasterizer coordinates | 97 | // vertex positions in rasterizer coordinates |
| 67 | auto FloatToFix = [](float24 flt) { | 98 | auto FloatToFix = [](float24 flt) { |
| 68 | return Fix12P4(static_cast<unsigned short>(flt.ToFloat32() * 16.0f)); | 99 | return Fix12P4(static_cast<unsigned short>(flt.ToFloat32() * 16.0f)); |
| @@ -70,10 +101,23 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, | |||
| 70 | auto ScreenToRasterizerCoordinates = [FloatToFix](const Math::Vec3<float24> vec) { | 101 | auto ScreenToRasterizerCoordinates = [FloatToFix](const Math::Vec3<float24> vec) { |
| 71 | return Math::Vec3<Fix12P4>{FloatToFix(vec.x), FloatToFix(vec.y), FloatToFix(vec.z)}; | 102 | return Math::Vec3<Fix12P4>{FloatToFix(vec.x), FloatToFix(vec.y), FloatToFix(vec.z)}; |
| 72 | }; | 103 | }; |
| 104 | |||
| 73 | Math::Vec3<Fix12P4> vtxpos[3]{ ScreenToRasterizerCoordinates(v0.screenpos), | 105 | Math::Vec3<Fix12P4> vtxpos[3]{ ScreenToRasterizerCoordinates(v0.screenpos), |
| 74 | ScreenToRasterizerCoordinates(v1.screenpos), | 106 | ScreenToRasterizerCoordinates(v1.screenpos), |
| 75 | ScreenToRasterizerCoordinates(v2.screenpos) }; | 107 | ScreenToRasterizerCoordinates(v2.screenpos) }; |
| 76 | 108 | ||
| 109 | if (registers.cull_mode == Regs::CullMode::KeepClockWise) { | ||
| 110 | // Reverse vertex order and use the CCW code path. | ||
| 111 | std::swap(vtxpos[1], vtxpos[2]); | ||
| 112 | } | ||
| 113 | |||
| 114 | if (registers.cull_mode != Regs::CullMode::KeepAll) { | ||
| 115 | // Cull away triangles which are wound clockwise. | ||
| 116 | // TODO: A check for degenerate triangles ("== 0") should be considered for CullMode::KeepAll | ||
| 117 | if (SignedArea(vtxpos[0].xy(), vtxpos[1].xy(), vtxpos[2].xy()) <= 0) | ||
| 118 | return; | ||
| 119 | } | ||
| 120 | |||
| 77 | // TODO: Proper scissor rect test! | 121 | // TODO: Proper scissor rect test! |
| 78 | u16 min_x = std::min({vtxpos[0].x, vtxpos[1].x, vtxpos[2].x}); | 122 | u16 min_x = std::min({vtxpos[0].x, vtxpos[1].x, vtxpos[2].x}); |
| 79 | u16 min_y = std::min({vtxpos[0].y, vtxpos[1].y, vtxpos[2].y}); | 123 | u16 min_y = std::min({vtxpos[0].y, vtxpos[1].y, vtxpos[2].y}); |
| @@ -116,18 +160,9 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, | |||
| 116 | for (u16 x = min_x; x < max_x; x += 0x10) { | 160 | for (u16 x = min_x; x < max_x; x += 0x10) { |
| 117 | 161 | ||
| 118 | // Calculate the barycentric coordinates w0, w1 and w2 | 162 | // Calculate the barycentric coordinates w0, w1 and w2 |
| 119 | auto orient2d = [](const Math::Vec2<Fix12P4>& vtx1, | 163 | int w0 = bias0 + SignedArea(vtxpos[1].xy(), vtxpos[2].xy(), {x, y}); |
| 120 | const Math::Vec2<Fix12P4>& vtx2, | 164 | int w1 = bias1 + SignedArea(vtxpos[2].xy(), vtxpos[0].xy(), {x, y}); |
| 121 | const Math::Vec2<Fix12P4>& vtx3) { | 165 | int w2 = bias2 + SignedArea(vtxpos[0].xy(), vtxpos[1].xy(), {x, y}); |
| 122 | const auto vec1 = Math::MakeVec(vtx2 - vtx1, 0); | ||
| 123 | const auto vec2 = Math::MakeVec(vtx3 - vtx1, 0); | ||
| 124 | // TODO: There is a very small chance this will overflow for sizeof(int) == 4 | ||
| 125 | return Math::Cross(vec1, vec2).z; | ||
| 126 | }; | ||
| 127 | |||
| 128 | int w0 = bias0 + orient2d(vtxpos[1].xy(), vtxpos[2].xy(), {x, y}); | ||
| 129 | int w1 = bias1 + orient2d(vtxpos[2].xy(), vtxpos[0].xy(), {x, y}); | ||
| 130 | int w2 = bias2 + orient2d(vtxpos[0].xy(), vtxpos[1].xy(), {x, y}); | ||
| 131 | int wsum = w0 + w1 + w2; | 166 | int wsum = w0 + w1 + w2; |
| 132 | 167 | ||
| 133 | // If current pixel is not covered by the current primitive | 168 | // If current pixel is not covered by the current primitive |
| @@ -201,8 +236,8 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, | |||
| 201 | return 0; | 236 | return 0; |
| 202 | } | 237 | } |
| 203 | }; | 238 | }; |
| 204 | s = GetWrappedTexCoord(registers.texture0.wrap_s, s, registers.texture0.width); | 239 | s = GetWrappedTexCoord(texture.config.wrap_s, s, texture.config.width); |
| 205 | t = GetWrappedTexCoord(registers.texture0.wrap_t, t, registers.texture0.height); | 240 | t = texture.config.height - 1 - GetWrappedTexCoord(texture.config.wrap_t, t, texture.config.height); |
| 206 | 241 | ||
| 207 | u8* texture_data = Memory::GetPointer(PAddrToVAddr(texture.config.GetPhysicalAddress())); | 242 | u8* texture_data = Memory::GetPointer(PAddrToVAddr(texture.config.GetPhysicalAddress())); |
| 208 | auto info = DebugUtils::TextureInfo::FromPicaRegister(texture.config, texture.format); | 243 | auto info = DebugUtils::TextureInfo::FromPicaRegister(texture.config, texture.format); |
| @@ -279,12 +314,15 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, | |||
| 279 | } | 314 | } |
| 280 | }; | 315 | }; |
| 281 | 316 | ||
| 282 | auto GetColorModifier = [](ColorModifier factor, const Math::Vec4<u8>& values) -> Math::Vec3<u8> { | 317 | static auto GetColorModifier = [](ColorModifier factor, const Math::Vec4<u8>& values) -> Math::Vec3<u8> { |
| 283 | switch (factor) | 318 | switch (factor) |
| 284 | { | 319 | { |
| 285 | case ColorModifier::SourceColor: | 320 | case ColorModifier::SourceColor: |
| 286 | return values.rgb(); | 321 | return values.rgb(); |
| 287 | 322 | ||
| 323 | case ColorModifier::OneMinusSourceColor: | ||
| 324 | return (Math::Vec3<u8>(255, 255, 255) - values.rgb()).Cast<u8>(); | ||
| 325 | |||
| 288 | case ColorModifier::SourceAlpha: | 326 | case ColorModifier::SourceAlpha: |
| 289 | return { values.a(), values.a(), values.a() }; | 327 | return { values.a(), values.a(), values.a() }; |
| 290 | 328 | ||
| @@ -295,7 +333,7 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, | |||
| 295 | } | 333 | } |
| 296 | }; | 334 | }; |
| 297 | 335 | ||
| 298 | auto GetAlphaModifier = [](AlphaModifier factor, u8 value) -> u8 { | 336 | static auto GetAlphaModifier = [](AlphaModifier factor, u8 value) -> u8 { |
| 299 | switch (factor) { | 337 | switch (factor) { |
| 300 | case AlphaModifier::SourceAlpha: | 338 | case AlphaModifier::SourceAlpha: |
| 301 | return value; | 339 | return value; |
| @@ -310,7 +348,7 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, | |||
| 310 | } | 348 | } |
| 311 | }; | 349 | }; |
| 312 | 350 | ||
| 313 | auto ColorCombine = [](Operation op, const Math::Vec3<u8> input[3]) -> Math::Vec3<u8> { | 351 | static auto ColorCombine = [](Operation op, const Math::Vec3<u8> input[3]) -> Math::Vec3<u8> { |
| 314 | switch (op) { | 352 | switch (op) { |
| 315 | case Operation::Replace: | 353 | case Operation::Replace: |
| 316 | return input[0]; | 354 | return input[0]; |
| @@ -330,6 +368,15 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, | |||
| 330 | case Operation::Lerp: | 368 | case Operation::Lerp: |
| 331 | return ((input[0] * input[2] + input[1] * (Math::MakeVec<u8>(255, 255, 255) - input[2]).Cast<u8>()) / 255).Cast<u8>(); | 369 | return ((input[0] * input[2] + input[1] * (Math::MakeVec<u8>(255, 255, 255) - input[2]).Cast<u8>()) / 255).Cast<u8>(); |
| 332 | 370 | ||
| 371 | case Operation::Subtract: | ||
| 372 | { | ||
| 373 | auto result = input[0].Cast<int>() - input[1].Cast<int>(); | ||
| 374 | result.r() = std::max(0, result.r()); | ||
| 375 | result.g() = std::max(0, result.g()); | ||
| 376 | result.b() = std::max(0, result.b()); | ||
| 377 | return result.Cast<u8>(); | ||
| 378 | } | ||
| 379 | |||
| 333 | default: | 380 | default: |
| 334 | LOG_ERROR(HW_GPU, "Unknown color combiner operation %d\n", (int)op); | 381 | LOG_ERROR(HW_GPU, "Unknown color combiner operation %d\n", (int)op); |
| 335 | _dbg_assert_(HW_GPU, 0); | 382 | _dbg_assert_(HW_GPU, 0); |
| @@ -337,7 +384,7 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, | |||
| 337 | } | 384 | } |
| 338 | }; | 385 | }; |
| 339 | 386 | ||
| 340 | auto AlphaCombine = [](Operation op, const std::array<u8,3>& input) -> u8 { | 387 | static auto AlphaCombine = [](Operation op, const std::array<u8,3>& input) -> u8 { |
| 341 | switch (op) { | 388 | switch (op) { |
| 342 | case Operation::Replace: | 389 | case Operation::Replace: |
| 343 | return input[0]; | 390 | return input[0]; |
| @@ -351,6 +398,9 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, | |||
| 351 | case Operation::Lerp: | 398 | case Operation::Lerp: |
| 352 | return (input[0] * input[2] + input[1] * (255 - input[2])) / 255; | 399 | return (input[0] * input[2] + input[1] * (255 - input[2])) / 255; |
| 353 | 400 | ||
| 401 | case Operation::Subtract: | ||
| 402 | return std::max(0, (int)input[0] - (int)input[1]); | ||
| 403 | |||
| 354 | default: | 404 | default: |
| 355 | LOG_ERROR(HW_GPU, "Unknown alpha combiner operation %d\n", (int)op); | 405 | LOG_ERROR(HW_GPU, "Unknown alpha combiner operation %d\n", (int)op); |
| 356 | _dbg_assert_(HW_GPU, 0); | 406 | _dbg_assert_(HW_GPU, 0); |
| @@ -381,12 +431,111 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, | |||
| 381 | combiner_output = Math::MakeVec(color_output, alpha_output); | 431 | combiner_output = Math::MakeVec(color_output, alpha_output); |
| 382 | } | 432 | } |
| 383 | 433 | ||
| 384 | // TODO: Not sure if the multiplication by 65535 has already been taken care | 434 | // TODO: Does depth indeed only get written even if depth testing is enabled? |
| 385 | // of when transforming to screen coordinates or not. | 435 | if (registers.output_merger.depth_test_enable) { |
| 386 | u16 z = (u16)(((float)v0.screenpos[2].ToFloat32() * w0 + | 436 | u16 z = (u16)(-(v0.screenpos[2].ToFloat32() * w0 + |
| 387 | (float)v1.screenpos[2].ToFloat32() * w1 + | 437 | v1.screenpos[2].ToFloat32() * w1 + |
| 388 | (float)v2.screenpos[2].ToFloat32() * w2) * 65535.f / wsum); | 438 | v2.screenpos[2].ToFloat32() * w2) * 65535.f / wsum); |
| 389 | SetDepth(x >> 4, y >> 4, z); | 439 | u16 ref_z = GetDepth(x >> 4, y >> 4); |
| 440 | |||
| 441 | bool pass = false; | ||
| 442 | |||
| 443 | switch (registers.output_merger.depth_test_func) { | ||
| 444 | case registers.output_merger.Always: | ||
| 445 | pass = true; | ||
| 446 | break; | ||
| 447 | |||
| 448 | case registers.output_merger.LessThan: | ||
| 449 | pass = z < ref_z; | ||
| 450 | break; | ||
| 451 | |||
| 452 | case registers.output_merger.GreaterThan: | ||
| 453 | pass = z > ref_z; | ||
| 454 | break; | ||
| 455 | |||
| 456 | default: | ||
| 457 | LOG_ERROR(HW_GPU, "Unknown depth test function %x", registers.output_merger.depth_test_func.Value()); | ||
| 458 | break; | ||
| 459 | } | ||
| 460 | |||
| 461 | if (!pass) | ||
| 462 | continue; | ||
| 463 | |||
| 464 | if (registers.output_merger.depth_write_enable) | ||
| 465 | SetDepth(x >> 4, y >> 4, z); | ||
| 466 | } | ||
| 467 | |||
| 468 | auto dest = GetPixel(x >> 4, y >> 4); | ||
| 469 | |||
| 470 | if (registers.output_merger.alphablend_enable) { | ||
| 471 | auto params = registers.output_merger.alpha_blending; | ||
| 472 | |||
| 473 | auto LookupFactorRGB = [&](decltype(params)::BlendFactor factor) -> Math::Vec3<u8> { | ||
| 474 | switch(factor) { | ||
| 475 | case params.Zero: | ||
| 476 | return Math::Vec3<u8>(0, 0, 0); | ||
| 477 | |||
| 478 | case params.One: | ||
| 479 | return Math::Vec3<u8>(255, 255, 255); | ||
| 480 | |||
| 481 | case params.SourceAlpha: | ||
| 482 | return Math::MakeVec(combiner_output.a(), combiner_output.a(), combiner_output.a()); | ||
| 483 | |||
| 484 | case params.OneMinusSourceAlpha: | ||
| 485 | return Math::Vec3<u8>(255-combiner_output.a(), 255-combiner_output.a(), 255-combiner_output.a()); | ||
| 486 | |||
| 487 | default: | ||
| 488 | LOG_CRITICAL(HW_GPU, "Unknown color blend factor %x", factor); | ||
| 489 | exit(0); | ||
| 490 | break; | ||
| 491 | } | ||
| 492 | }; | ||
| 493 | |||
| 494 | auto LookupFactorA = [&](decltype(params)::BlendFactor factor) -> u8 { | ||
| 495 | switch(factor) { | ||
| 496 | case params.Zero: | ||
| 497 | return 0; | ||
| 498 | |||
| 499 | case params.One: | ||
| 500 | return 255; | ||
| 501 | |||
| 502 | case params.SourceAlpha: | ||
| 503 | return combiner_output.a(); | ||
| 504 | |||
| 505 | case params.OneMinusSourceAlpha: | ||
| 506 | return 255 - combiner_output.a(); | ||
| 507 | |||
| 508 | default: | ||
| 509 | LOG_CRITICAL(HW_GPU, "Unknown alpha blend factor %x", factor); | ||
| 510 | exit(0); | ||
| 511 | break; | ||
| 512 | } | ||
| 513 | }; | ||
| 514 | |||
| 515 | auto srcfactor = Math::MakeVec(LookupFactorRGB(params.factor_source_rgb), | ||
| 516 | LookupFactorA(params.factor_source_a)); | ||
| 517 | auto dstfactor = Math::MakeVec(LookupFactorRGB(params.factor_dest_rgb), | ||
| 518 | LookupFactorA(params.factor_dest_a)); | ||
| 519 | |||
| 520 | switch (params.blend_equation_rgb) { | ||
| 521 | case params.Add: | ||
| 522 | { | ||
| 523 | auto result = (combiner_output * srcfactor + dest * dstfactor) / 255; | ||
| 524 | result.r() = std::min(255, result.r()); | ||
| 525 | result.g() = std::min(255, result.g()); | ||
| 526 | result.b() = std::min(255, result.b()); | ||
| 527 | combiner_output = result.Cast<u8>(); | ||
| 528 | break; | ||
| 529 | } | ||
| 530 | |||
| 531 | default: | ||
| 532 | LOG_CRITICAL(HW_GPU, "Unknown RGB blend equation %x", params.blend_equation_rgb.Value()); | ||
| 533 | exit(0); | ||
| 534 | } | ||
| 535 | } else { | ||
| 536 | LOG_CRITICAL(HW_GPU, "logic op: %x", registers.output_merger.logic_op); | ||
| 537 | exit(0); | ||
| 538 | } | ||
| 390 | 539 | ||
| 391 | DrawPixel(x >> 4, y >> 4, combiner_output); | 540 | DrawPixel(x >> 4, y >> 4, combiner_output); |
| 392 | } | 541 | } |
diff --git a/src/video_core/utils.h b/src/video_core/utils.h index 63ebccbde..6fd640425 100644 --- a/src/video_core/utils.h +++ b/src/video_core/utils.h | |||
| @@ -8,32 +8,6 @@ | |||
| 8 | 8 | ||
| 9 | #include "common/common_types.h" | 9 | #include "common/common_types.h" |
| 10 | 10 | ||
| 11 | namespace FormatPrecision { | ||
| 12 | |||
| 13 | /// Adjust RGBA8 color with RGBA6 precision | ||
| 14 | static inline u32 rgba8_with_rgba6(u32 src) { | ||
| 15 | u32 color = src; | ||
| 16 | color &= 0xFCFCFCFC; | ||
| 17 | color |= (color >> 6) & 0x03030303; | ||
| 18 | return color; | ||
| 19 | } | ||
| 20 | |||
| 21 | /// Adjust RGBA8 color with RGB565 precision | ||
| 22 | static inline u32 rgba8_with_rgb565(u32 src) { | ||
| 23 | u32 color = (src & 0xF8FCF8); | ||
| 24 | color |= (color >> 5) & 0x070007; | ||
| 25 | color |= (color >> 6) & 0x000300; | ||
| 26 | color |= 0xFF000000; | ||
| 27 | return color; | ||
| 28 | } | ||
| 29 | |||
| 30 | /// Adjust Z24 depth value with Z16 precision | ||
| 31 | static inline u32 z24_with_z16(u32 src) { | ||
| 32 | return (src & 0xFFFF00) | (src >> 16); | ||
| 33 | } | ||
| 34 | |||
| 35 | } // namespace | ||
| 36 | |||
| 37 | namespace VideoCore { | 11 | namespace VideoCore { |
| 38 | 12 | ||
| 39 | /// Structure for the TGA texture format (for dumping) | 13 | /// Structure for the TGA texture format (for dumping) |
diff --git a/src/video_core/vertex_shader.cpp b/src/video_core/vertex_shader.cpp index bed5081a0..ff825e2e1 100644 --- a/src/video_core/vertex_shader.cpp +++ b/src/video_core/vertex_shader.cpp | |||
| @@ -30,6 +30,8 @@ static struct { | |||
| 30 | Math::Vec4<float24> f[96]; | 30 | Math::Vec4<float24> f[96]; |
| 31 | 31 | ||
| 32 | std::array<bool,16> b; | 32 | std::array<bool,16> b; |
| 33 | |||
| 34 | std::array<Math::Vec4<u8>,4> i; | ||
| 33 | } shader_uniforms; | 35 | } shader_uniforms; |
| 34 | 36 | ||
| 35 | // TODO: Not sure where the shader binary and swizzle patterns are supposed to be loaded to! | 37 | // TODO: Not sure where the shader binary and swizzle patterns are supposed to be loaded to! |
| @@ -37,33 +39,31 @@ static struct { | |||
| 37 | static std::array<u32, 1024> shader_memory; | 39 | static std::array<u32, 1024> shader_memory; |
| 38 | static std::array<u32, 1024> swizzle_data; | 40 | static std::array<u32, 1024> swizzle_data; |
| 39 | 41 | ||
| 40 | void SubmitShaderMemoryChange(u32 addr, u32 value) | 42 | void SubmitShaderMemoryChange(u32 addr, u32 value) { |
| 41 | { | ||
| 42 | shader_memory[addr] = value; | 43 | shader_memory[addr] = value; |
| 43 | } | 44 | } |
| 44 | 45 | ||
| 45 | void SubmitSwizzleDataChange(u32 addr, u32 value) | 46 | void SubmitSwizzleDataChange(u32 addr, u32 value) { |
| 46 | { | ||
| 47 | swizzle_data[addr] = value; | 47 | swizzle_data[addr] = value; |
| 48 | } | 48 | } |
| 49 | 49 | ||
| 50 | Math::Vec4<float24>& GetFloatUniform(u32 index) | 50 | Math::Vec4<float24>& GetFloatUniform(u32 index) { |
| 51 | { | ||
| 52 | return shader_uniforms.f[index]; | 51 | return shader_uniforms.f[index]; |
| 53 | } | 52 | } |
| 54 | 53 | ||
| 55 | bool& GetBoolUniform(u32 index) | 54 | bool& GetBoolUniform(u32 index) { |
| 56 | { | ||
| 57 | return shader_uniforms.b[index]; | 55 | return shader_uniforms.b[index]; |
| 58 | } | 56 | } |
| 59 | 57 | ||
| 60 | const std::array<u32, 1024>& GetShaderBinary() | 58 | Math::Vec4<u8>& GetIntUniform(u32 index) { |
| 61 | { | 59 | return shader_uniforms.i[index]; |
| 60 | } | ||
| 61 | |||
| 62 | const std::array<u32, 1024>& GetShaderBinary() { | ||
| 62 | return shader_memory; | 63 | return shader_memory; |
| 63 | } | 64 | } |
| 64 | 65 | ||
| 65 | const std::array<u32, 1024>& GetSwizzlePatterns() | 66 | const std::array<u32, 1024>& GetSwizzlePatterns() { |
| 66 | { | ||
| 67 | return swizzle_data; | 67 | return swizzle_data; |
| 68 | } | 68 | } |
| 69 | 69 | ||
| @@ -437,8 +437,7 @@ static void ProcessShaderCode(VertexShaderState& state) { | |||
| 437 | } | 437 | } |
| 438 | } | 438 | } |
| 439 | 439 | ||
| 440 | OutputVertex RunShader(const InputVertex& input, int num_attributes) | 440 | OutputVertex RunShader(const InputVertex& input, int num_attributes) { |
| 441 | { | ||
| 442 | VertexShaderState state; | 441 | VertexShaderState state; |
| 443 | 442 | ||
| 444 | const u32* main = &shader_memory[registers.vs_main_offset]; | 443 | const u32* main = &shader_memory[registers.vs_main_offset]; |
diff --git a/src/video_core/vertex_shader.h b/src/video_core/vertex_shader.h index af3fb2a2f..3a68a3409 100644 --- a/src/video_core/vertex_shader.h +++ b/src/video_core/vertex_shader.h | |||
| @@ -73,6 +73,7 @@ OutputVertex RunShader(const InputVertex& input, int num_attributes); | |||
| 73 | 73 | ||
| 74 | Math::Vec4<float24>& GetFloatUniform(u32 index); | 74 | Math::Vec4<float24>& GetFloatUniform(u32 index); |
| 75 | bool& GetBoolUniform(u32 index); | 75 | bool& GetBoolUniform(u32 index); |
| 76 | Math::Vec4<u8>& GetIntUniform(u32 index); | ||
| 76 | 77 | ||
| 77 | const std::array<u32, 1024>& GetShaderBinary(); | 78 | const std::array<u32, 1024>& GetShaderBinary(); |
| 78 | const std::array<u32, 1024>& GetSwizzlePatterns(); | 79 | const std::array<u32, 1024>& GetSwizzlePatterns(); |