diff options
| -rw-r--r-- | CMakeLists.txt | 4 | ||||
| m--------- | externals/boost | 0 | ||||
| -rw-r--r-- | src/citra_qt/main.cpp | 3 | ||||
| -rw-r--r-- | src/video_core/clipper.cpp | 77 | ||||
| -rw-r--r-- | src/video_core/debug_utils/debug_utils.cpp | 51 | ||||
| -rw-r--r-- | src/video_core/pica.h | 20 | ||||
| -rw-r--r-- | src/video_core/rasterizer.cpp | 28 | ||||
| -rw-r--r-- | src/video_core/vertex_shader.cpp | 4 |
8 files changed, 107 insertions, 80 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index 5bb87d50d..884520cef 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt | |||
| @@ -41,11 +41,11 @@ else() | |||
| 41 | message(STATUS "libpng not found. Some debugging features have been disabled.") | 41 | message(STATUS "libpng not found. Some debugging features have been disabled.") |
| 42 | endif() | 42 | endif() |
| 43 | 43 | ||
| 44 | find_package(Boost) | 44 | find_package(Boost 1.57.0) |
| 45 | if (Boost_FOUND) | 45 | if (Boost_FOUND) |
| 46 | include_directories(${Boost_INCLUDE_DIRS}) | 46 | include_directories(${Boost_INCLUDE_DIRS}) |
| 47 | else() | 47 | else() |
| 48 | message(STATUS "Boost not found, falling back to externals") | 48 | message(STATUS "Boost 1.57.0 or newer not found, falling back to externals") |
| 49 | include_directories(externals/boost) | 49 | include_directories(externals/boost) |
| 50 | endif() | 50 | endif() |
| 51 | 51 | ||
diff --git a/externals/boost b/externals/boost | |||
| Subproject b060148c08ae87a3a5809c4f48cb26ac667487a | Subproject 97052c28acb141dbf3c5e14114af99045344b69 | ||
diff --git a/src/citra_qt/main.cpp b/src/citra_qt/main.cpp index 23d4925b8..b12e6a02b 100644 --- a/src/citra_qt/main.cpp +++ b/src/citra_qt/main.cpp | |||
| @@ -230,12 +230,15 @@ void GMainWindow::ToggleWindowMode() | |||
| 230 | render_window->setParent(nullptr); | 230 | render_window->setParent(nullptr); |
| 231 | render_window->setVisible(true); | 231 | render_window->setVisible(true); |
| 232 | render_window->RestoreGeometry(); | 232 | render_window->RestoreGeometry(); |
| 233 | render_window->setFocusPolicy(Qt::NoFocus); | ||
| 233 | } | 234 | } |
| 234 | else if (!enable && render_window->parent() == nullptr) | 235 | else if (!enable && render_window->parent() == nullptr) |
| 235 | { | 236 | { |
| 236 | render_window->BackupGeometry(); | 237 | render_window->BackupGeometry(); |
| 237 | ui.horizontalLayout->addWidget(render_window); | 238 | ui.horizontalLayout->addWidget(render_window); |
| 238 | render_window->setVisible(true); | 239 | render_window->setVisible(true); |
| 240 | render_window->setFocusPolicy(Qt::ClickFocus); | ||
| 241 | render_window->setFocus(); | ||
| 239 | } | 242 | } |
| 240 | } | 243 | } |
| 241 | 244 | ||
diff --git a/src/video_core/clipper.cpp b/src/video_core/clipper.cpp index 0bcd0b895..1744066ba 100644 --- a/src/video_core/clipper.cpp +++ b/src/video_core/clipper.cpp | |||
| @@ -2,7 +2,7 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <vector> | 5 | #include <boost/container/static_vector.hpp> |
| 6 | 6 | ||
| 7 | #include "clipper.h" | 7 | #include "clipper.h" |
| 8 | #include "pica.h" | 8 | #include "pica.h" |
| @@ -91,25 +91,31 @@ static void InitScreenCoordinates(OutputVertex& vtx) | |||
| 91 | viewport.zscale = float24::FromRawFloat24(registers.viewport_depth_range); | 91 | viewport.zscale = float24::FromRawFloat24(registers.viewport_depth_range); |
| 92 | viewport.offset_z = float24::FromRawFloat24(registers.viewport_depth_far_plane); | 92 | viewport.offset_z = float24::FromRawFloat24(registers.viewport_depth_far_plane); |
| 93 | 93 | ||
| 94 | float24 inv_w = float24::FromFloat32(1.f) / vtx.pos.w; | ||
| 95 | vtx.color *= inv_w; | ||
| 96 | vtx.tc0 *= inv_w; | ||
| 97 | vtx.tc1 *= inv_w; | ||
| 98 | vtx.tc2 *= inv_w; | ||
| 99 | vtx.pos.w = inv_w; | ||
| 100 | |||
| 94 | // TODO: Not sure why the viewport width needs to be divided by 2 but the viewport height does not | 101 | // TODO: Not sure why the viewport width needs to be divided by 2 but the viewport height does not |
| 95 | vtx.screenpos[0] = (vtx.pos.x / vtx.pos.w + float24::FromFloat32(1.0)) * viewport.halfsize_x + viewport.offset_x; | 102 | vtx.screenpos[0] = (vtx.pos.x * inv_w + float24::FromFloat32(1.0)) * viewport.halfsize_x + viewport.offset_x; |
| 96 | vtx.screenpos[1] = (vtx.pos.y / vtx.pos.w + float24::FromFloat32(1.0)) * viewport.halfsize_y + viewport.offset_y; | 103 | vtx.screenpos[1] = (vtx.pos.y * inv_w + float24::FromFloat32(1.0)) * viewport.halfsize_y + viewport.offset_y; |
| 97 | vtx.screenpos[2] = viewport.offset_z - vtx.pos.z / vtx.pos.w * viewport.zscale; | 104 | vtx.screenpos[2] = viewport.offset_z - vtx.pos.z * inv_w * viewport.zscale; |
| 98 | } | 105 | } |
| 99 | 106 | ||
| 100 | void ProcessTriangle(OutputVertex &v0, OutputVertex &v1, OutputVertex &v2) { | 107 | void ProcessTriangle(OutputVertex &v0, OutputVertex &v1, OutputVertex &v2) { |
| 101 | 108 | using boost::container::static_vector; | |
| 102 | // TODO (neobrain): | 109 | |
| 103 | // The list of output vertices has some fixed maximum size, | 110 | // Clipping a planar n-gon against a plane will remove at least 1 vertex and introduces 2 at |
| 104 | // however I haven't taken the time to figure out what it is exactly. | 111 | // the new edge (or less in degenerate cases). As such, we can say that each clipping plane |
| 105 | // For now, we hence just assume a maximal size of 1000 vertices. | 112 | // introduces at most 1 new vertex to the polygon. Since we start with a triangle and have a |
| 106 | const size_t max_vertices = 1000; | 113 | // fixed 6 clipping planes, the maximum number of vertices of the clipped polygon is 3 + 6 = 9. |
| 107 | std::vector<OutputVertex> buffer_vertices; | 114 | static const size_t MAX_VERTICES = 9; |
| 108 | std::vector<OutputVertex*> output_list{ &v0, &v1, &v2 }; | 115 | static_vector<OutputVertex, MAX_VERTICES> buffer_a = { v0, v1, v2 }; |
| 109 | 116 | static_vector<OutputVertex, MAX_VERTICES> buffer_b; | |
| 110 | // Make sure to reserve space for all vertices. | 117 | auto* output_list = &buffer_a; |
| 111 | // Without this, buffer reallocation would invalidate references. | 118 | auto* input_list = &buffer_b; |
| 112 | buffer_vertices.reserve(max_vertices); | ||
| 113 | 119 | ||
| 114 | // Simple implementation of the Sutherland-Hodgman clipping algorithm. | 120 | // Simple implementation of the Sutherland-Hodgman clipping algorithm. |
| 115 | // TODO: Make this less inefficient (currently lots of useless buffering overhead happens here) | 121 | // TODO: Make this less inefficient (currently lots of useless buffering overhead happens here) |
| @@ -120,48 +126,45 @@ void ProcessTriangle(OutputVertex &v0, OutputVertex &v1, OutputVertex &v2) { | |||
| 120 | ClippingEdge(ClippingEdge::POS_Z, float24::FromFloat32(+1.0)), | 126 | ClippingEdge(ClippingEdge::POS_Z, float24::FromFloat32(+1.0)), |
| 121 | ClippingEdge(ClippingEdge::NEG_Z, float24::FromFloat32(-1.0)) }) { | 127 | ClippingEdge(ClippingEdge::NEG_Z, float24::FromFloat32(-1.0)) }) { |
| 122 | 128 | ||
| 123 | const std::vector<OutputVertex*> input_list = output_list; | 129 | std::swap(input_list, output_list); |
| 124 | output_list.clear(); | 130 | output_list->clear(); |
| 125 | 131 | ||
| 126 | const OutputVertex* reference_vertex = input_list.back(); | 132 | const OutputVertex* reference_vertex = &input_list->back(); |
| 127 | 133 | ||
| 128 | for (const auto& vertex : input_list) { | 134 | for (const auto& vertex : *input_list) { |
| 129 | // NOTE: This algorithm changes vertex order in some cases! | 135 | // NOTE: This algorithm changes vertex order in some cases! |
| 130 | if (edge.IsInside(*vertex)) { | 136 | if (edge.IsInside(vertex)) { |
| 131 | if (edge.IsOutSide(*reference_vertex)) { | 137 | if (edge.IsOutSide(*reference_vertex)) { |
| 132 | buffer_vertices.push_back(edge.GetIntersection(*vertex, *reference_vertex)); | 138 | output_list->push_back(edge.GetIntersection(vertex, *reference_vertex)); |
| 133 | output_list.push_back(&(buffer_vertices.back())); | ||
| 134 | } | 139 | } |
| 135 | 140 | ||
| 136 | output_list.push_back(vertex); | 141 | output_list->push_back(vertex); |
| 137 | } else if (edge.IsInside(*reference_vertex)) { | 142 | } else if (edge.IsInside(*reference_vertex)) { |
| 138 | buffer_vertices.push_back(edge.GetIntersection(*vertex, *reference_vertex)); | 143 | output_list->push_back(edge.GetIntersection(vertex, *reference_vertex)); |
| 139 | output_list.push_back(&(buffer_vertices.back())); | ||
| 140 | } | 144 | } |
| 141 | 145 | reference_vertex = &vertex; | |
| 142 | reference_vertex = vertex; | ||
| 143 | } | 146 | } |
| 144 | 147 | ||
| 145 | // Need to have at least a full triangle to continue... | 148 | // Need to have at least a full triangle to continue... |
| 146 | if (output_list.size() < 3) | 149 | if (output_list->size() < 3) |
| 147 | return; | 150 | return; |
| 148 | } | 151 | } |
| 149 | 152 | ||
| 150 | InitScreenCoordinates(*(output_list[0])); | 153 | InitScreenCoordinates((*output_list)[0]); |
| 151 | InitScreenCoordinates(*(output_list[1])); | 154 | InitScreenCoordinates((*output_list)[1]); |
| 152 | 155 | ||
| 153 | for (size_t i = 0; i < output_list.size() - 2; i ++) { | 156 | for (size_t i = 0; i < output_list->size() - 2; i ++) { |
| 154 | OutputVertex& vtx0 = *(output_list[0]); | 157 | OutputVertex& vtx0 = (*output_list)[0]; |
| 155 | OutputVertex& vtx1 = *(output_list[i+1]); | 158 | OutputVertex& vtx1 = (*output_list)[i+1]; |
| 156 | OutputVertex& vtx2 = *(output_list[i+2]); | 159 | OutputVertex& vtx2 = (*output_list)[i+2]; |
| 157 | 160 | ||
| 158 | InitScreenCoordinates(vtx2); | 161 | InitScreenCoordinates(vtx2); |
| 159 | 162 | ||
| 160 | LOG_TRACE(Render_Software, | 163 | LOG_TRACE(Render_Software, |
| 161 | "Triangle %lu/%lu (%lu buffer vertices) at position (%.3f, %.3f, %.3f, %.3f), " | 164 | "Triangle %lu/%lu at position (%.3f, %.3f, %.3f, %.3f), " |
| 162 | "(%.3f, %.3f, %.3f, %.3f), (%.3f, %.3f, %.3f, %.3f) and " | 165 | "(%.3f, %.3f, %.3f, %.3f), (%.3f, %.3f, %.3f, %.3f) and " |
| 163 | "screen position (%.2f, %.2f, %.2f), (%.2f, %.2f, %.2f), (%.2f, %.2f, %.2f)", | 166 | "screen position (%.2f, %.2f, %.2f), (%.2f, %.2f, %.2f), (%.2f, %.2f, %.2f)", |
| 164 | i,output_list.size(), buffer_vertices.size(), | 167 | i, output_list->size(), |
| 165 | vtx0.pos.x.ToFloat32(), vtx0.pos.y.ToFloat32(), vtx0.pos.z.ToFloat32(), vtx0.pos.w.ToFloat32(), | 168 | vtx0.pos.x.ToFloat32(), vtx0.pos.y.ToFloat32(), vtx0.pos.z.ToFloat32(), vtx0.pos.w.ToFloat32(), |
| 166 | vtx1.pos.x.ToFloat32(), vtx1.pos.y.ToFloat32(), vtx1.pos.z.ToFloat32(), vtx1.pos.w.ToFloat32(), | 169 | vtx1.pos.x.ToFloat32(), vtx1.pos.y.ToFloat32(), vtx1.pos.z.ToFloat32(), vtx1.pos.w.ToFloat32(), |
| 167 | vtx2.pos.x.ToFloat32(), vtx2.pos.y.ToFloat32(), vtx2.pos.z.ToFloat32(), vtx2.pos.w.ToFloat32(), | 170 | vtx2.pos.x.ToFloat32(), vtx2.pos.y.ToFloat32(), vtx2.pos.z.ToFloat32(), vtx2.pos.w.ToFloat32(), |
diff --git a/src/video_core/debug_utils/debug_utils.cpp b/src/video_core/debug_utils/debug_utils.cpp index 328386b7e..5921185a6 100644 --- a/src/video_core/debug_utils/debug_utils.cpp +++ b/src/video_core/debug_utils/debug_utils.cpp | |||
| @@ -304,7 +304,6 @@ std::unique_ptr<PicaTrace> FinishPicaTracing() | |||
| 304 | } | 304 | } |
| 305 | 305 | ||
| 306 | const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const TextureInfo& info, bool disable_alpha) { | 306 | const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const TextureInfo& info, bool disable_alpha) { |
| 307 | |||
| 308 | // Images are split into 8x8 tiles. Each tile is composed of four 4x4 subtiles each | 307 | // Images are split into 8x8 tiles. Each tile is composed of four 4x4 subtiles each |
| 309 | // of which is composed of four 2x2 subtiles each of which is composed of four texels. | 308 | // of which is composed of four 2x2 subtiles each of which is composed of four texels. |
| 310 | // Each structure is embedded into the next-bigger one in a diagonal pattern, e.g. | 309 | // Each structure is embedded into the next-bigger one in a diagonal pattern, e.g. |
| @@ -323,41 +322,39 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const Texture | |||
| 323 | // 02 03 06 07 18 19 22 23 | 322 | // 02 03 06 07 18 19 22 23 |
| 324 | // 00 01 04 05 16 17 20 21 | 323 | // 00 01 04 05 16 17 20 21 |
| 325 | 324 | ||
| 326 | // TODO(neobrain): Not sure if this swizzling pattern is used for all textures. | 325 | const unsigned int block_width = 8; |
| 327 | // To be flexible in case different but similar patterns are used, we keep this | 326 | const unsigned int block_height = 8; |
| 328 | // somewhat inefficient code around for now. | 327 | |
| 329 | int texel_index_within_tile = 0; | 328 | const unsigned int coarse_x = x & ~7; |
| 330 | for (int block_size_index = 0; block_size_index < 3; ++block_size_index) { | 329 | const unsigned int coarse_y = y & ~7; |
| 331 | int sub_tile_width = 1 << block_size_index; | ||
| 332 | int sub_tile_height = 1 << block_size_index; | ||
| 333 | |||
| 334 | int sub_tile_index = (x & sub_tile_width) << block_size_index; | ||
| 335 | sub_tile_index += 2 * ((y & sub_tile_height) << block_size_index); | ||
| 336 | texel_index_within_tile += sub_tile_index; | ||
| 337 | } | ||
| 338 | 330 | ||
| 339 | const int block_width = 8; | 331 | // Interleave the lower 3 bits of each coordinate to get the intra-block offsets, which are |
| 340 | const int block_height = 8; | 332 | // arranged in a Z-order curve. More details on the bit manipulation at: |
| 333 | // https://fgiesen.wordpress.com/2009/12/13/decoding-morton-codes/ | ||
| 334 | unsigned int i = (x | (y << 8)) & 0x0707; // ---- -210 | ||
| 335 | i = (i ^ (i << 2)) & 0x1313; // ---2 --10 | ||
| 336 | i = (i ^ (i << 1)) & 0x1515; // ---2 -1-0 | ||
| 337 | i = (i | (i >> 7)) & 0x3F; | ||
| 341 | 338 | ||
| 342 | int coarse_x = (x / block_width) * block_width; | 339 | source += coarse_y * info.stride; |
| 343 | int coarse_y = (y / block_height) * block_height; | 340 | const unsigned int offset = coarse_x * block_height + i; |
| 344 | 341 | ||
| 345 | switch (info.format) { | 342 | switch (info.format) { |
| 346 | case Regs::TextureFormat::RGBA8: | 343 | case Regs::TextureFormat::RGBA8: |
| 347 | { | 344 | { |
| 348 | const u8* source_ptr = source + coarse_x * block_height * 4 + coarse_y * info.stride + texel_index_within_tile * 4; | 345 | const u8* source_ptr = source + offset * 4; |
| 349 | return { source_ptr[3], source_ptr[2], source_ptr[1], disable_alpha ? (u8)255 : source_ptr[0] }; | 346 | return { source_ptr[3], source_ptr[2], source_ptr[1], disable_alpha ? (u8)255 : source_ptr[0] }; |
| 350 | } | 347 | } |
| 351 | 348 | ||
| 352 | case Regs::TextureFormat::RGB8: | 349 | case Regs::TextureFormat::RGB8: |
| 353 | { | 350 | { |
| 354 | const u8* source_ptr = source + coarse_x * block_height * 3 + coarse_y * info.stride + texel_index_within_tile * 3; | 351 | const u8* source_ptr = source + offset * 3; |
| 355 | return { source_ptr[2], source_ptr[1], source_ptr[0], 255 }; | 352 | return { source_ptr[2], source_ptr[1], source_ptr[0], 255 }; |
| 356 | } | 353 | } |
| 357 | 354 | ||
| 358 | case Regs::TextureFormat::RGBA5551: | 355 | case Regs::TextureFormat::RGBA5551: |
| 359 | { | 356 | { |
| 360 | const u16 source_ptr = *(const u16*)(source + coarse_x * block_height * 2 + coarse_y * info.stride + texel_index_within_tile * 2); | 357 | const u16 source_ptr = *(const u16*)(source + offset * 2); |
| 361 | u8 r = (source_ptr >> 11) & 0x1F; | 358 | u8 r = (source_ptr >> 11) & 0x1F; |
| 362 | u8 g = ((source_ptr) >> 6) & 0x1F; | 359 | u8 g = ((source_ptr) >> 6) & 0x1F; |
| 363 | u8 b = (source_ptr >> 1) & 0x1F; | 360 | u8 b = (source_ptr >> 1) & 0x1F; |
| @@ -367,7 +364,7 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const Texture | |||
| 367 | 364 | ||
| 368 | case Regs::TextureFormat::RGB565: | 365 | case Regs::TextureFormat::RGB565: |
| 369 | { | 366 | { |
| 370 | const u16 source_ptr = *(const u16*)(source + coarse_x * block_height * 2 + coarse_y * info.stride + texel_index_within_tile * 2); | 367 | const u16 source_ptr = *(const u16*)(source + offset * 2); |
| 371 | u8 r = (source_ptr >> 11) & 0x1F; | 368 | u8 r = (source_ptr >> 11) & 0x1F; |
| 372 | u8 g = ((source_ptr) >> 5) & 0x3F; | 369 | u8 g = ((source_ptr) >> 5) & 0x3F; |
| 373 | u8 b = (source_ptr) & 0x1F; | 370 | u8 b = (source_ptr) & 0x1F; |
| @@ -376,7 +373,7 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const Texture | |||
| 376 | 373 | ||
| 377 | case Regs::TextureFormat::RGBA4: | 374 | case Regs::TextureFormat::RGBA4: |
| 378 | { | 375 | { |
| 379 | const u8* source_ptr = source + coarse_x * block_height * 2 + coarse_y * info.stride + texel_index_within_tile * 2; | 376 | const u8* source_ptr = source + offset * 2; |
| 380 | u8 r = source_ptr[1] >> 4; | 377 | u8 r = source_ptr[1] >> 4; |
| 381 | u8 g = source_ptr[1] & 0xFF; | 378 | u8 g = source_ptr[1] & 0xFF; |
| 382 | u8 b = source_ptr[0] >> 4; | 379 | u8 b = source_ptr[0] >> 4; |
| @@ -390,7 +387,7 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const Texture | |||
| 390 | 387 | ||
| 391 | case Regs::TextureFormat::IA8: | 388 | case Regs::TextureFormat::IA8: |
| 392 | { | 389 | { |
| 393 | const u8* source_ptr = source + coarse_x * block_height * 2 + coarse_y * info.stride + texel_index_within_tile * 2; | 390 | const u8* source_ptr = source + offset * 2; |
| 394 | 391 | ||
| 395 | // TODO: component order not verified | 392 | // TODO: component order not verified |
| 396 | 393 | ||
| @@ -404,13 +401,13 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const Texture | |||
| 404 | 401 | ||
| 405 | case Regs::TextureFormat::I8: | 402 | case Regs::TextureFormat::I8: |
| 406 | { | 403 | { |
| 407 | const u8* source_ptr = source + coarse_x * block_height + coarse_y * info.stride + texel_index_within_tile; | 404 | const u8* source_ptr = source + offset; |
| 408 | return { *source_ptr, *source_ptr, *source_ptr, 255 }; | 405 | return { *source_ptr, *source_ptr, *source_ptr, 255 }; |
| 409 | } | 406 | } |
| 410 | 407 | ||
| 411 | case Regs::TextureFormat::A8: | 408 | case Regs::TextureFormat::A8: |
| 412 | { | 409 | { |
| 413 | const u8* source_ptr = source + coarse_x * block_height + coarse_y * info.stride + texel_index_within_tile; | 410 | const u8* source_ptr = source + offset; |
| 414 | 411 | ||
| 415 | if (disable_alpha) { | 412 | if (disable_alpha) { |
| 416 | return { *source_ptr, *source_ptr, *source_ptr, 255 }; | 413 | return { *source_ptr, *source_ptr, *source_ptr, 255 }; |
| @@ -421,7 +418,7 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const Texture | |||
| 421 | 418 | ||
| 422 | case Regs::TextureFormat::IA4: | 419 | case Regs::TextureFormat::IA4: |
| 423 | { | 420 | { |
| 424 | const u8* source_ptr = source + coarse_x * block_height / 2 + coarse_y * info.stride + texel_index_within_tile / 2; | 421 | const u8* source_ptr = source + offset / 2; |
| 425 | 422 | ||
| 426 | // TODO: component order not verified | 423 | // TODO: component order not verified |
| 427 | 424 | ||
| @@ -440,7 +437,7 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const Texture | |||
| 440 | 437 | ||
| 441 | case Regs::TextureFormat::A4: | 438 | case Regs::TextureFormat::A4: |
| 442 | { | 439 | { |
| 443 | const u8* source_ptr = source + coarse_x * block_height / 2 + coarse_y * info.stride + texel_index_within_tile / 2; | 440 | const u8* source_ptr = source + offset / 2; |
| 444 | 441 | ||
| 445 | // TODO: component order not verified | 442 | // TODO: component order not verified |
| 446 | 443 | ||
diff --git a/src/video_core/pica.h b/src/video_core/pica.h index 89d97e4e9..38bac748c 100644 --- a/src/video_core/pica.h +++ b/src/video_core/pica.h | |||
| @@ -757,6 +757,26 @@ struct float24 { | |||
| 757 | return float24::FromFloat32(ToFloat32() - flt.ToFloat32()); | 757 | return float24::FromFloat32(ToFloat32() - flt.ToFloat32()); |
| 758 | } | 758 | } |
| 759 | 759 | ||
| 760 | float24& operator *= (const float24& flt) { | ||
| 761 | value *= flt.ToFloat32(); | ||
| 762 | return *this; | ||
| 763 | } | ||
| 764 | |||
| 765 | float24& operator /= (const float24& flt) { | ||
| 766 | value /= flt.ToFloat32(); | ||
| 767 | return *this; | ||
| 768 | } | ||
| 769 | |||
| 770 | float24& operator += (const float24& flt) { | ||
| 771 | value += flt.ToFloat32(); | ||
| 772 | return *this; | ||
| 773 | } | ||
| 774 | |||
| 775 | float24& operator -= (const float24& flt) { | ||
| 776 | value -= flt.ToFloat32(); | ||
| 777 | return *this; | ||
| 778 | } | ||
| 779 | |||
| 760 | float24 operator - () const { | 780 | float24 operator - () const { |
| 761 | return float24::FromFloat32(-ToFloat32()); | 781 | return float24::FromFloat32(-ToFloat32()); |
| 762 | } | 782 | } |
diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp index df1f88c79..a80148872 100644 --- a/src/video_core/rasterizer.cpp +++ b/src/video_core/rasterizer.cpp | |||
| @@ -106,6 +106,11 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, | |||
| 106 | int bias1 = IsRightSideOrFlatBottomEdge(vtxpos[1].xy(), vtxpos[2].xy(), vtxpos[0].xy()) ? -1 : 0; | 106 | int bias1 = IsRightSideOrFlatBottomEdge(vtxpos[1].xy(), vtxpos[2].xy(), vtxpos[0].xy()) ? -1 : 0; |
| 107 | int bias2 = IsRightSideOrFlatBottomEdge(vtxpos[2].xy(), vtxpos[0].xy(), vtxpos[1].xy()) ? -1 : 0; | 107 | int bias2 = IsRightSideOrFlatBottomEdge(vtxpos[2].xy(), vtxpos[0].xy(), vtxpos[1].xy()) ? -1 : 0; |
| 108 | 108 | ||
| 109 | auto w_inverse = Math::MakeVec(v0.pos.w, v1.pos.w, v2.pos.w); | ||
| 110 | |||
| 111 | auto textures = registers.GetTextures(); | ||
| 112 | auto tev_stages = registers.GetTevStages(); | ||
| 113 | |||
| 109 | // TODO: Not sure if looping through x first might be faster | 114 | // TODO: Not sure if looping through x first might be faster |
| 110 | for (u16 y = min_y; y < max_y; y += 0x10) { | 115 | for (u16 y = min_y; y < max_y; y += 0x10) { |
| 111 | for (u16 x = min_x; x < max_x; x += 0x10) { | 116 | for (u16 x = min_x; x < max_x; x += 0x10) { |
| @@ -129,6 +134,11 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, | |||
| 129 | if (w0 < 0 || w1 < 0 || w2 < 0) | 134 | if (w0 < 0 || w1 < 0 || w2 < 0) |
| 130 | continue; | 135 | continue; |
| 131 | 136 | ||
| 137 | auto baricentric_coordinates = Math::MakeVec(float24::FromFloat32(static_cast<float>(w0)), | ||
| 138 | float24::FromFloat32(static_cast<float>(w1)), | ||
| 139 | float24::FromFloat32(static_cast<float>(w2))); | ||
| 140 | float24 interpolated_w_inverse = float24::FromFloat32(1.0f) / Math::Dot(w_inverse, baricentric_coordinates); | ||
| 141 | |||
| 132 | // Perspective correct attribute interpolation: | 142 | // Perspective correct attribute interpolation: |
| 133 | // Attribute values cannot be calculated by simple linear interpolation since | 143 | // Attribute values cannot be calculated by simple linear interpolation since |
| 134 | // they are not linear in screen space. For example, when interpolating a | 144 | // they are not linear in screen space. For example, when interpolating a |
| @@ -145,19 +155,9 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, | |||
| 145 | // | 155 | // |
| 146 | // The generalization to three vertices is straightforward in baricentric coordinates. | 156 | // The generalization to three vertices is straightforward in baricentric coordinates. |
| 147 | auto GetInterpolatedAttribute = [&](float24 attr0, float24 attr1, float24 attr2) { | 157 | auto GetInterpolatedAttribute = [&](float24 attr0, float24 attr1, float24 attr2) { |
| 148 | auto attr_over_w = Math::MakeVec(attr0 / v0.pos.w, | 158 | auto attr_over_w = Math::MakeVec(attr0, attr1, attr2); |
| 149 | attr1 / v1.pos.w, | ||
| 150 | attr2 / v2.pos.w); | ||
| 151 | auto w_inverse = Math::MakeVec(float24::FromFloat32(1.f) / v0.pos.w, | ||
| 152 | float24::FromFloat32(1.f) / v1.pos.w, | ||
| 153 | float24::FromFloat32(1.f) / v2.pos.w); | ||
| 154 | auto baricentric_coordinates = Math::MakeVec(float24::FromFloat32(static_cast<float>(w0)), | ||
| 155 | float24::FromFloat32(static_cast<float>(w1)), | ||
| 156 | float24::FromFloat32(static_cast<float>(w2))); | ||
| 157 | |||
| 158 | float24 interpolated_attr_over_w = Math::Dot(attr_over_w, baricentric_coordinates); | 159 | float24 interpolated_attr_over_w = Math::Dot(attr_over_w, baricentric_coordinates); |
| 159 | float24 interpolated_w_inverse = Math::Dot(w_inverse, baricentric_coordinates); | 160 | return interpolated_attr_over_w * interpolated_w_inverse; |
| 160 | return interpolated_attr_over_w / interpolated_w_inverse; | ||
| 161 | }; | 161 | }; |
| 162 | 162 | ||
| 163 | Math::Vec4<u8> primary_color{ | 163 | Math::Vec4<u8> primary_color{ |
| @@ -177,7 +177,7 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, | |||
| 177 | 177 | ||
| 178 | Math::Vec4<u8> texture_color[3]{}; | 178 | Math::Vec4<u8> texture_color[3]{}; |
| 179 | for (int i = 0; i < 3; ++i) { | 179 | for (int i = 0; i < 3; ++i) { |
| 180 | auto texture = registers.GetTextures()[i]; | 180 | const auto& texture = textures[i]; |
| 181 | if (!texture.enabled) | 181 | if (!texture.enabled) |
| 182 | continue; | 182 | continue; |
| 183 | 183 | ||
| @@ -219,7 +219,7 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, | |||
| 219 | // with some basic arithmetic. Alpha combiners can be configured separately but work | 219 | // with some basic arithmetic. Alpha combiners can be configured separately but work |
| 220 | // analogously. | 220 | // analogously. |
| 221 | Math::Vec4<u8> combiner_output; | 221 | Math::Vec4<u8> combiner_output; |
| 222 | for (auto tev_stage : registers.GetTevStages()) { | 222 | for (const auto& tev_stage : tev_stages) { |
| 223 | using Source = Regs::TevStageConfig::Source; | 223 | using Source = Regs::TevStageConfig::Source; |
| 224 | using ColorModifier = Regs::TevStageConfig::ColorModifier; | 224 | using ColorModifier = Regs::TevStageConfig::ColorModifier; |
| 225 | using AlphaModifier = Regs::TevStageConfig::AlphaModifier; | 225 | using AlphaModifier = Regs::TevStageConfig::AlphaModifier; |
diff --git a/src/video_core/vertex_shader.cpp b/src/video_core/vertex_shader.cpp index e31bc3bc7..bed5081a0 100644 --- a/src/video_core/vertex_shader.cpp +++ b/src/video_core/vertex_shader.cpp | |||
| @@ -469,6 +469,10 @@ OutputVertex RunShader(const InputVertex& input, int num_attributes) | |||
| 469 | 469 | ||
| 470 | // Setup output register table | 470 | // Setup output register table |
| 471 | OutputVertex ret; | 471 | OutputVertex ret; |
| 472 | // Zero output so that attributes which aren't output won't have denormals in them, which will | ||
| 473 | // slow us down later. | ||
| 474 | memset(&ret, 0, sizeof(ret)); | ||
| 475 | |||
| 472 | for (int i = 0; i < 7; ++i) { | 476 | for (int i = 0; i < 7; ++i) { |
| 473 | const auto& output_register_map = registers.vs_output_attributes[i]; | 477 | const auto& output_register_map = registers.vs_output_attributes[i]; |
| 474 | 478 | ||