diff options
| author | 2014-08-25 16:12:10 -0400 | |
|---|---|---|
| committer | 2014-08-25 16:12:10 -0400 | |
| commit | 97fd8fc38d4f9c288779cddb06538860124c6263 (patch) | |
| tree | bc99e0fceaae732f9c8d4831fcdb8f661b49ccb8 /src/video_core/rasterizer.cpp | |
| parent | Merge pull request #75 from xsacha/qt5 (diff) | |
| parent | Pica/Rasterizer: Clarify a TODO. (diff) | |
| download | yuzu-97fd8fc38d4f9c288779cddb06538860124c6263.tar.gz yuzu-97fd8fc38d4f9c288779cddb06538860124c6263.tar.xz yuzu-97fd8fc38d4f9c288779cddb06538860124c6263.zip | |
Merge pull request #50 from neobrain/pica
Further work on Pica emulation
Diffstat (limited to 'src/video_core/rasterizer.cpp')
| -rw-r--r-- | src/video_core/rasterizer.cpp | 222 |
1 files changed, 204 insertions, 18 deletions
diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp index a7c1bab3e..cdfdb6215 100644 --- a/src/video_core/rasterizer.cpp +++ b/src/video_core/rasterizer.cpp | |||
| @@ -11,6 +11,8 @@ | |||
| 11 | #include "rasterizer.h" | 11 | #include "rasterizer.h" |
| 12 | #include "vertex_shader.h" | 12 | #include "vertex_shader.h" |
| 13 | 13 | ||
| 14 | #include "debug_utils/debug_utils.h" | ||
| 15 | |||
| 14 | namespace Pica { | 16 | namespace Pica { |
| 15 | 17 | ||
| 16 | namespace Rasterizer { | 18 | namespace Rasterizer { |
| @@ -78,10 +80,10 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, | |||
| 78 | u16 max_x = std::max({vtxpos[0].x, vtxpos[1].x, vtxpos[2].x}); | 80 | u16 max_x = std::max({vtxpos[0].x, vtxpos[1].x, vtxpos[2].x}); |
| 79 | u16 max_y = std::max({vtxpos[0].y, vtxpos[1].y, vtxpos[2].y}); | 81 | u16 max_y = std::max({vtxpos[0].y, vtxpos[1].y, vtxpos[2].y}); |
| 80 | 82 | ||
| 81 | min_x = min_x & Fix12P4::IntMask(); | 83 | min_x &= Fix12P4::IntMask(); |
| 82 | min_y = min_y & Fix12P4::IntMask(); | 84 | min_y &= Fix12P4::IntMask(); |
| 83 | max_x = (max_x + Fix12P4::FracMask()) & Fix12P4::IntMask(); | 85 | max_x = ((max_x + Fix12P4::FracMask()) & Fix12P4::IntMask()); |
| 84 | max_y = (max_y + Fix12P4::FracMask()) & Fix12P4::IntMask(); | 86 | max_y = ((max_y + Fix12P4::FracMask()) & Fix12P4::IntMask()); |
| 85 | 87 | ||
| 86 | // Triangle filling rules: Pixels on the right-sided edge or on flat bottom edges are not | 88 | // Triangle filling rules: Pixels on the right-sided edge or on flat bottom edges are not |
| 87 | // drawn. Pixels on any other triangle border are drawn. This is implemented with three bias | 89 | // drawn. Pixels on any other triangle border are drawn. This is implemented with three bias |
| @@ -112,10 +114,10 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, | |||
| 112 | auto orient2d = [](const Math::Vec2<Fix12P4>& vtx1, | 114 | auto orient2d = [](const Math::Vec2<Fix12P4>& vtx1, |
| 113 | const Math::Vec2<Fix12P4>& vtx2, | 115 | const Math::Vec2<Fix12P4>& vtx2, |
| 114 | const Math::Vec2<Fix12P4>& vtx3) { | 116 | const Math::Vec2<Fix12P4>& vtx3) { |
| 115 | const auto vec1 = (vtx2.Cast<int>() - vtx1.Cast<int>()).Append(0); | 117 | const auto vec1 = Math::MakeVec(vtx2 - vtx1, 0); |
| 116 | const auto vec2 = (vtx3.Cast<int>() - vtx1.Cast<int>()).Append(0); | 118 | const auto vec2 = Math::MakeVec(vtx3 - vtx1, 0); |
| 117 | // TODO: There is a very small chance this will overflow for sizeof(int) == 4 | 119 | // TODO: There is a very small chance this will overflow for sizeof(int) == 4 |
| 118 | return Cross(vec1, vec2).z; | 120 | return Math::Cross(vec1, vec2).z; |
| 119 | }; | 121 | }; |
| 120 | 122 | ||
| 121 | int w0 = bias0 + orient2d(vtxpos[1].xy(), vtxpos[2].xy(), {x, y}); | 123 | int w0 = bias0 + orient2d(vtxpos[1].xy(), vtxpos[2].xy(), {x, y}); |
| @@ -143,15 +145,15 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, | |||
| 143 | // | 145 | // |
| 144 | // The generalization to three vertices is straightforward in baricentric coordinates. | 146 | // The generalization to three vertices is straightforward in baricentric coordinates. |
| 145 | auto GetInterpolatedAttribute = [&](float24 attr0, float24 attr1, float24 attr2) { | 147 | auto GetInterpolatedAttribute = [&](float24 attr0, float24 attr1, float24 attr2) { |
| 146 | auto attr_over_w = Math::MakeVec3(attr0 / v0.pos.w, | 148 | auto attr_over_w = Math::MakeVec(attr0 / v0.pos.w, |
| 147 | attr1 / v1.pos.w, | 149 | attr1 / v1.pos.w, |
| 148 | attr2 / v2.pos.w); | 150 | attr2 / v2.pos.w); |
| 149 | auto w_inverse = Math::MakeVec3(float24::FromFloat32(1.f) / v0.pos.w, | 151 | auto w_inverse = Math::MakeVec(float24::FromFloat32(1.f) / v0.pos.w, |
| 150 | float24::FromFloat32(1.f) / v1.pos.w, | 152 | float24::FromFloat32(1.f) / v1.pos.w, |
| 151 | float24::FromFloat32(1.f) / v2.pos.w); | 153 | float24::FromFloat32(1.f) / v2.pos.w); |
| 152 | auto baricentric_coordinates = Math::MakeVec3(float24::FromFloat32(w0), | 154 | auto baricentric_coordinates = Math::MakeVec(float24::FromFloat32(w0), |
| 153 | float24::FromFloat32(w1), | 155 | float24::FromFloat32(w1), |
| 154 | float24::FromFloat32(w2)); | 156 | float24::FromFloat32(w2)); |
| 155 | 157 | ||
| 156 | float24 interpolated_attr_over_w = Math::Dot(attr_over_w, baricentric_coordinates); | 158 | float24 interpolated_attr_over_w = Math::Dot(attr_over_w, baricentric_coordinates); |
| 157 | float24 interpolated_w_inverse = Math::Dot(w_inverse, baricentric_coordinates); | 159 | float24 interpolated_w_inverse = Math::Dot(w_inverse, baricentric_coordinates); |
| @@ -165,12 +167,196 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, | |||
| 165 | (u8)(GetInterpolatedAttribute(v0.color.a(), v1.color.a(), v2.color.a()).ToFloat32() * 255) | 167 | (u8)(GetInterpolatedAttribute(v0.color.a(), v1.color.a(), v2.color.a()).ToFloat32() * 255) |
| 166 | }; | 168 | }; |
| 167 | 169 | ||
| 170 | Math::Vec4<u8> texture_color{}; | ||
| 171 | float24 u = GetInterpolatedAttribute(v0.tc0.u(), v1.tc0.u(), v2.tc0.u()); | ||
| 172 | float24 v = GetInterpolatedAttribute(v0.tc0.v(), v1.tc0.v(), v2.tc0.v()); | ||
| 173 | if (registers.texturing_enable) { | ||
| 174 | // Images are split into 8x8 tiles. Each tile is composed of four 4x4 subtiles each | ||
| 175 | // of which is composed of four 2x2 subtiles each of which is composed of four texels. | ||
| 176 | // Each structure is embedded into the next-bigger one in a diagonal pattern, e.g. | ||
| 177 | // texels are laid out in a 2x2 subtile like this: | ||
| 178 | // 2 3 | ||
| 179 | // 0 1 | ||
| 180 | // | ||
| 181 | // The full 8x8 tile has the texels arranged like this: | ||
| 182 | // | ||
| 183 | // 42 43 46 47 58 59 62 63 | ||
| 184 | // 40 41 44 45 56 57 60 61 | ||
| 185 | // 34 35 38 39 50 51 54 55 | ||
| 186 | // 32 33 36 37 48 49 52 53 | ||
| 187 | // 10 11 14 15 26 27 30 31 | ||
| 188 | // 08 09 12 13 24 25 28 29 | ||
| 189 | // 02 03 06 07 18 19 22 23 | ||
| 190 | // 00 01 04 05 16 17 20 21 | ||
| 191 | |||
| 192 | // TODO: This is currently hardcoded for RGB8 | ||
| 193 | u32* texture_data = (u32*)Memory::GetPointer(registers.texture0.GetPhysicalAddress()); | ||
| 194 | |||
| 195 | // TODO(neobrain): Not sure if this swizzling pattern is used for all textures. | ||
| 196 | // To be flexible in case different but similar patterns are used, we keep this | ||
| 197 | // somewhat inefficient code around for now. | ||
| 198 | int s = (int)(u * float24::FromFloat32(registers.texture0.width)).ToFloat32(); | ||
| 199 | int t = (int)(v * float24::FromFloat32(registers.texture0.height)).ToFloat32(); | ||
| 200 | int texel_index_within_tile = 0; | ||
| 201 | for (int block_size_index = 0; block_size_index < 3; ++block_size_index) { | ||
| 202 | int sub_tile_width = 1 << block_size_index; | ||
| 203 | int sub_tile_height = 1 << block_size_index; | ||
| 204 | |||
| 205 | int sub_tile_index = (s & sub_tile_width) << block_size_index; | ||
| 206 | sub_tile_index += 2 * ((t & sub_tile_height) << block_size_index); | ||
| 207 | texel_index_within_tile += sub_tile_index; | ||
| 208 | } | ||
| 209 | |||
| 210 | const int block_width = 8; | ||
| 211 | const int block_height = 8; | ||
| 212 | |||
| 213 | int coarse_s = (s / block_width) * block_width; | ||
| 214 | int coarse_t = (t / block_height) * block_height; | ||
| 215 | |||
| 216 | const int row_stride = registers.texture0.width * 3; | ||
| 217 | u8* source_ptr = (u8*)texture_data + coarse_s * block_height * 3 + coarse_t * row_stride + texel_index_within_tile * 3; | ||
| 218 | texture_color.r() = source_ptr[2]; | ||
| 219 | texture_color.g() = source_ptr[1]; | ||
| 220 | texture_color.b() = source_ptr[0]; | ||
| 221 | texture_color.a() = 0xFF; | ||
| 222 | |||
| 223 | DebugUtils::DumpTexture(registers.texture0, (u8*)texture_data); | ||
| 224 | } | ||
| 225 | |||
| 226 | // Texture environment - consists of 6 stages of color and alpha combining. | ||
| 227 | // | ||
| 228 | // Color combiners take three input color values from some source (e.g. interpolated | ||
| 229 | // vertex color, texture color, previous stage, etc), perform some very simple | ||
| 230 | // operations on each of them (e.g. inversion) and then calculate the output color | ||
| 231 | // with some basic arithmetic. Alpha combiners can be configured separately but work | ||
| 232 | // analogously. | ||
| 233 | Math::Vec4<u8> combiner_output; | ||
| 234 | for (auto tev_stage : registers.GetTevStages()) { | ||
| 235 | using Source = Regs::TevStageConfig::Source; | ||
| 236 | using ColorModifier = Regs::TevStageConfig::ColorModifier; | ||
| 237 | using AlphaModifier = Regs::TevStageConfig::AlphaModifier; | ||
| 238 | using Operation = Regs::TevStageConfig::Operation; | ||
| 239 | |||
| 240 | auto GetColorSource = [&](Source source) -> Math::Vec3<u8> { | ||
| 241 | switch (source) { | ||
| 242 | case Source::PrimaryColor: | ||
| 243 | return primary_color.rgb(); | ||
| 244 | |||
| 245 | case Source::Texture0: | ||
| 246 | return texture_color.rgb(); | ||
| 247 | |||
| 248 | case Source::Constant: | ||
| 249 | return {tev_stage.const_r, tev_stage.const_g, tev_stage.const_b}; | ||
| 250 | |||
| 251 | case Source::Previous: | ||
| 252 | return combiner_output.rgb(); | ||
| 253 | |||
| 254 | default: | ||
| 255 | ERROR_LOG(GPU, "Unknown color combiner source %d\n", (int)source); | ||
| 256 | return {}; | ||
| 257 | } | ||
| 258 | }; | ||
| 259 | |||
| 260 | auto GetAlphaSource = [&](Source source) -> u8 { | ||
| 261 | switch (source) { | ||
| 262 | case Source::PrimaryColor: | ||
| 263 | return primary_color.a(); | ||
| 264 | |||
| 265 | case Source::Texture0: | ||
| 266 | return texture_color.a(); | ||
| 267 | |||
| 268 | case Source::Constant: | ||
| 269 | return tev_stage.const_a; | ||
| 270 | |||
| 271 | case Source::Previous: | ||
| 272 | return combiner_output.a(); | ||
| 273 | |||
| 274 | default: | ||
| 275 | ERROR_LOG(GPU, "Unknown alpha combiner source %d\n", (int)source); | ||
| 276 | return 0; | ||
| 277 | } | ||
| 278 | }; | ||
| 279 | |||
| 280 | auto GetColorModifier = [](ColorModifier factor, const Math::Vec3<u8>& values) -> Math::Vec3<u8> { | ||
| 281 | switch (factor) | ||
| 282 | { | ||
| 283 | case ColorModifier::SourceColor: | ||
| 284 | return values; | ||
| 285 | default: | ||
| 286 | ERROR_LOG(GPU, "Unknown color factor %d\n", (int)factor); | ||
| 287 | return {}; | ||
| 288 | } | ||
| 289 | }; | ||
| 290 | |||
| 291 | auto GetAlphaModifier = [](AlphaModifier factor, u8 value) -> u8 { | ||
| 292 | switch (factor) { | ||
| 293 | case AlphaModifier::SourceAlpha: | ||
| 294 | return value; | ||
| 295 | default: | ||
| 296 | ERROR_LOG(GPU, "Unknown color factor %d\n", (int)factor); | ||
| 297 | return 0; | ||
| 298 | } | ||
| 299 | }; | ||
| 300 | |||
| 301 | auto ColorCombine = [](Operation op, const Math::Vec3<u8> input[3]) -> Math::Vec3<u8> { | ||
| 302 | switch (op) { | ||
| 303 | case Operation::Replace: | ||
| 304 | return input[0]; | ||
| 305 | |||
| 306 | case Operation::Modulate: | ||
| 307 | return ((input[0] * input[1]) / 255).Cast<u8>(); | ||
| 308 | |||
| 309 | default: | ||
| 310 | ERROR_LOG(GPU, "Unknown color combiner operation %d\n", (int)op); | ||
| 311 | return {}; | ||
| 312 | } | ||
| 313 | }; | ||
| 314 | |||
| 315 | auto AlphaCombine = [](Operation op, const std::array<u8,3>& input) -> u8 { | ||
| 316 | switch (op) { | ||
| 317 | case Operation::Replace: | ||
| 318 | return input[0]; | ||
| 319 | |||
| 320 | case Operation::Modulate: | ||
| 321 | return input[0] * input[1] / 255; | ||
| 322 | |||
| 323 | default: | ||
| 324 | ERROR_LOG(GPU, "Unknown alpha combiner operation %d\n", (int)op); | ||
| 325 | return 0; | ||
| 326 | } | ||
| 327 | }; | ||
| 328 | |||
| 329 | // color combiner | ||
| 330 | // NOTE: Not sure if the alpha combiner might use the color output of the previous | ||
| 331 | // stage as input. Hence, we currently don't directly write the result to | ||
| 332 | // combiner_output.rgb(), but instead store it in a temporary variable until | ||
| 333 | // alpha combining has been done. | ||
| 334 | Math::Vec3<u8> color_result[3] = { | ||
| 335 | GetColorModifier(tev_stage.color_modifier1, GetColorSource(tev_stage.color_source1)), | ||
| 336 | GetColorModifier(tev_stage.color_modifier2, GetColorSource(tev_stage.color_source2)), | ||
| 337 | GetColorModifier(tev_stage.color_modifier3, GetColorSource(tev_stage.color_source3)) | ||
| 338 | }; | ||
| 339 | auto color_output = ColorCombine(tev_stage.color_op, color_result); | ||
| 340 | |||
| 341 | // alpha combiner | ||
| 342 | std::array<u8,3> alpha_result = { | ||
| 343 | GetAlphaModifier(tev_stage.alpha_modifier1, GetAlphaSource(tev_stage.alpha_source1)), | ||
| 344 | GetAlphaModifier(tev_stage.alpha_modifier2, GetAlphaSource(tev_stage.alpha_source2)), | ||
| 345 | GetAlphaModifier(tev_stage.alpha_modifier3, GetAlphaSource(tev_stage.alpha_source3)) | ||
| 346 | }; | ||
| 347 | auto alpha_output = AlphaCombine(tev_stage.alpha_op, alpha_result); | ||
| 348 | |||
| 349 | combiner_output = Math::MakeVec(color_output, alpha_output); | ||
| 350 | } | ||
| 351 | |||
| 352 | // TODO: Not sure if the multiplication by 65535 has already been taken care | ||
| 353 | // of when transforming to screen coordinates or not. | ||
| 168 | u16 z = (u16)(((float)v0.screenpos[2].ToFloat32() * w0 + | 354 | u16 z = (u16)(((float)v0.screenpos[2].ToFloat32() * w0 + |
| 169 | (float)v1.screenpos[2].ToFloat32() * w1 + | 355 | (float)v1.screenpos[2].ToFloat32() * w1 + |
| 170 | (float)v2.screenpos[2].ToFloat32() * w2) * 65535.f / wsum); // TODO: Shouldn't need to multiply by 65536? | 356 | (float)v2.screenpos[2].ToFloat32() * w2) * 65535.f / wsum); |
| 171 | SetDepth(x >> 4, y >> 4, z); | 357 | SetDepth(x >> 4, y >> 4, z); |
| 172 | 358 | ||
| 173 | DrawPixel(x >> 4, y >> 4, primary_color); | 359 | DrawPixel(x >> 4, y >> 4, combiner_output); |
| 174 | } | 360 | } |
| 175 | } | 361 | } |
| 176 | } | 362 | } |