diff options
| author | 2019-06-13 16:41:16 -0400 | |
|---|---|---|
| committer | 2019-06-20 21:38:34 -0300 | |
| commit | 7232a1ed16e46715c29d781fb143bdf799090bec (patch) | |
| tree | b3be910260ee9c0e3eb67fa007f81c9296a60d72 /src | |
| parent | texture_cache: Use siblings textures on Rebuild and fix possible error on bli... (diff) | |
| download | yuzu-7232a1ed16e46715c29d781fb143bdf799090bec.tar.gz yuzu-7232a1ed16e46715c29d781fb143bdf799090bec.tar.xz yuzu-7232a1ed16e46715c29d781fb143bdf799090bec.zip | |
decoders: correct block calculation
Diffstat (limited to 'src')
| -rw-r--r-- | src/video_core/engines/engine_upload.cpp | 2 | ||||
| -rw-r--r-- | src/video_core/engines/engine_upload.h | 6 | ||||
| -rw-r--r-- | src/video_core/engines/fermi_2d.h | 6 | ||||
| -rw-r--r-- | src/video_core/engines/maxwell_dma.cpp | 2 | ||||
| -rw-r--r-- | src/video_core/engines/maxwell_dma.h | 4 | ||||
| -rw-r--r-- | src/video_core/texture_cache/texture_cache.h | 16 | ||||
| -rw-r--r-- | src/video_core/textures/decoders.cpp | 34 |
7 files changed, 41 insertions, 29 deletions
diff --git a/src/video_core/engines/engine_upload.cpp b/src/video_core/engines/engine_upload.cpp index c776b9a56..d44ad0cd8 100644 --- a/src/video_core/engines/engine_upload.cpp +++ b/src/video_core/engines/engine_upload.cpp | |||
| @@ -39,7 +39,7 @@ void State::ProcessData(const u32 data, const bool is_last_call) { | |||
| 39 | UNIMPLEMENTED_IF(regs.dest.BlockWidth() != 0); | 39 | UNIMPLEMENTED_IF(regs.dest.BlockWidth() != 0); |
| 40 | UNIMPLEMENTED_IF(regs.dest.BlockDepth() != 0); | 40 | UNIMPLEMENTED_IF(regs.dest.BlockDepth() != 0); |
| 41 | const std::size_t dst_size = Tegra::Texture::CalculateSize( | 41 | const std::size_t dst_size = Tegra::Texture::CalculateSize( |
| 42 | true, 1, regs.dest.width, regs.dest.height, 1, regs.dest.BlockHeight(), 1); | 42 | true, 1, regs.dest.width, regs.dest.height, 1, regs.dest.BlockHeight(), 0); |
| 43 | tmp_buffer.resize(dst_size); | 43 | tmp_buffer.resize(dst_size); |
| 44 | memory_manager.ReadBlock(address, tmp_buffer.data(), dst_size); | 44 | memory_manager.ReadBlock(address, tmp_buffer.data(), dst_size); |
| 45 | Tegra::Texture::SwizzleKepler(regs.dest.width, regs.dest.height, regs.dest.x, regs.dest.y, | 45 | Tegra::Texture::SwizzleKepler(regs.dest.width, regs.dest.height, regs.dest.x, regs.dest.y, |
diff --git a/src/video_core/engines/engine_upload.h b/src/video_core/engines/engine_upload.h index cb294aec3..462da419e 100644 --- a/src/video_core/engines/engine_upload.h +++ b/src/video_core/engines/engine_upload.h | |||
| @@ -39,15 +39,15 @@ struct Registers { | |||
| 39 | } | 39 | } |
| 40 | 40 | ||
| 41 | u32 BlockWidth() const { | 41 | u32 BlockWidth() const { |
| 42 | return block_width; | 42 | return block_width.Value(); |
| 43 | } | 43 | } |
| 44 | 44 | ||
| 45 | u32 BlockHeight() const { | 45 | u32 BlockHeight() const { |
| 46 | return block_height; | 46 | return block_height.Value(); |
| 47 | } | 47 | } |
| 48 | 48 | ||
| 49 | u32 BlockDepth() const { | 49 | u32 BlockDepth() const { |
| 50 | return block_depth; | 50 | return block_depth.Value(); |
| 51 | } | 51 | } |
| 52 | } dest; | 52 | } dest; |
| 53 | }; | 53 | }; |
diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h index 0a4c7c5ad..05421d185 100644 --- a/src/video_core/engines/fermi_2d.h +++ b/src/video_core/engines/fermi_2d.h | |||
| @@ -84,15 +84,15 @@ public: | |||
| 84 | } | 84 | } |
| 85 | 85 | ||
| 86 | u32 BlockWidth() const { | 86 | u32 BlockWidth() const { |
| 87 | return block_width; | 87 | return block_width.Value(); |
| 88 | } | 88 | } |
| 89 | 89 | ||
| 90 | u32 BlockHeight() const { | 90 | u32 BlockHeight() const { |
| 91 | return block_height; | 91 | return block_height.Value(); |
| 92 | } | 92 | } |
| 93 | 93 | ||
| 94 | u32 BlockDepth() const { | 94 | u32 BlockDepth() const { |
| 95 | return block_depth; | 95 | return block_depth.Value(); |
| 96 | } | 96 | } |
| 97 | }; | 97 | }; |
| 98 | static_assert(sizeof(Surface) == 0x28, "Surface has incorrect size"); | 98 | static_assert(sizeof(Surface) == 0x28, "Surface has incorrect size"); |
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index afb9578d0..3a5dfef0c 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp | |||
| @@ -111,7 +111,7 @@ void MaxwellDMA::HandleCopy() { | |||
| 111 | 111 | ||
| 112 | memory_manager.WriteBlock(dest, write_buffer.data(), dst_size); | 112 | memory_manager.WriteBlock(dest, write_buffer.data(), dst_size); |
| 113 | } else { | 113 | } else { |
| 114 | ASSERT(regs.dst_params.BlockDepth() == 0); | 114 | ASSERT(regs.dst_params.BlockDepth() == 1); |
| 115 | 115 | ||
| 116 | const u32 src_bytes_per_pixel = regs.src_pitch / regs.x_count; | 116 | const u32 src_bytes_per_pixel = regs.src_pitch / regs.x_count; |
| 117 | 117 | ||
diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h index 522fa97dc..17b015ca7 100644 --- a/src/video_core/engines/maxwell_dma.h +++ b/src/video_core/engines/maxwell_dma.h | |||
| @@ -59,11 +59,11 @@ public: | |||
| 59 | }; | 59 | }; |
| 60 | 60 | ||
| 61 | u32 BlockHeight() const { | 61 | u32 BlockHeight() const { |
| 62 | return block_height; | 62 | return block_height.Value(); |
| 63 | } | 63 | } |
| 64 | 64 | ||
| 65 | u32 BlockDepth() const { | 65 | u32 BlockDepth() const { |
| 66 | return block_depth; | 66 | return block_depth.Value(); |
| 67 | } | 67 | } |
| 68 | }; | 68 | }; |
| 69 | 69 | ||
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 201c4d42e..7a9b4c27d 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h | |||
| @@ -335,6 +335,9 @@ private: | |||
| 335 | if (untopological == MatchTopologyResult::CompressUnmatch) { | 335 | if (untopological == MatchTopologyResult::CompressUnmatch) { |
| 336 | return RecycleStrategy::Flush; | 336 | return RecycleStrategy::Flush; |
| 337 | } | 337 | } |
| 338 | if (untopological == MatchTopologyResult::FullMatch && !params.is_tiled) { | ||
| 339 | return RecycleStrategy::Flush; | ||
| 340 | } | ||
| 338 | return RecycleStrategy::Ignore; | 341 | return RecycleStrategy::Ignore; |
| 339 | } | 342 | } |
| 340 | 343 | ||
| @@ -372,6 +375,11 @@ private: | |||
| 372 | } | 375 | } |
| 373 | return InitializeSurface(gpu_addr, params, preserve_contents); | 376 | return InitializeSurface(gpu_addr, params, preserve_contents); |
| 374 | } | 377 | } |
| 378 | case RecycleStrategy::BufferCopy: { | ||
| 379 | auto new_surface = GetUncachedSurface(gpu_addr, params); | ||
| 380 | BufferCopy(overlaps[0], new_surface); | ||
| 381 | return {new_surface, new_surface->GetMainView()}; | ||
| 382 | } | ||
| 375 | default: { | 383 | default: { |
| 376 | UNIMPLEMENTED_MSG("Unimplemented Texture Cache Recycling Strategy!"); | 384 | UNIMPLEMENTED_MSG("Unimplemented Texture Cache Recycling Strategy!"); |
| 377 | return InitializeSurface(gpu_addr, params, do_load); | 385 | return InitializeSurface(gpu_addr, params, do_load); |
| @@ -520,6 +528,10 @@ private: | |||
| 520 | const auto host_ptr{memory_manager->GetPointer(gpu_addr)}; | 528 | const auto host_ptr{memory_manager->GetPointer(gpu_addr)}; |
| 521 | const auto cache_addr{ToCacheAddr(host_ptr)}; | 529 | const auto cache_addr{ToCacheAddr(host_ptr)}; |
| 522 | 530 | ||
| 531 | if (gpu_addr == 0x00000001682F0000ULL) { | ||
| 532 | LOG_CRITICAL(HW_GPU, "Here's the texture!"); | ||
| 533 | } | ||
| 534 | |||
| 523 | // Step 0: guarantee a valid surface | 535 | // Step 0: guarantee a valid surface |
| 524 | if (!cache_addr) { | 536 | if (!cache_addr) { |
| 525 | // Return a null surface if it's invalid | 537 | // Return a null surface if it's invalid |
| @@ -566,6 +578,10 @@ private: | |||
| 566 | return InitializeSurface(gpu_addr, params, preserve_contents); | 578 | return InitializeSurface(gpu_addr, params, preserve_contents); |
| 567 | } | 579 | } |
| 568 | 580 | ||
| 581 | if (!params.is_tiled) { | ||
| 582 | return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, | ||
| 583 | MatchTopologyResult::FullMatch); | ||
| 584 | } | ||
| 569 | // Step 3 | 585 | // Step 3 |
| 570 | // Now we need to figure the relationship between the texture and its overlaps | 586 | // Now we need to figure the relationship between the texture and its overlaps |
| 571 | // we do a topological test to ensure we can find some relationship. If it fails | 587 | // we do a topological test to ensure we can find some relationship. If it fails |
diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp index f45fd175a..9a2f4198a 100644 --- a/src/video_core/textures/decoders.cpp +++ b/src/video_core/textures/decoders.cpp | |||
| @@ -256,19 +256,18 @@ std::vector<u8> UnswizzleTexture(u8* address, u32 tile_size_x, u32 tile_size_y, | |||
| 256 | } | 256 | } |
| 257 | 257 | ||
| 258 | void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width, | 258 | void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width, |
| 259 | u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height) { | 259 | u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height_bit) { |
| 260 | const u32 block_height_size{1U << block_height}; | 260 | const u32 block_height = 1U << block_height_bit; |
| 261 | const u32 image_width_in_gobs{(swizzled_width * bytes_per_pixel + (gob_size_x - 1)) / | 261 | const u32 image_width_in_gobs{(swizzled_width * bytes_per_pixel + (gob_size_x - 1)) / |
| 262 | gob_size_x}; | 262 | gob_size_x}; |
| 263 | for (u32 line = 0; line < subrect_height; ++line) { | 263 | for (u32 line = 0; line < subrect_height; ++line) { |
| 264 | const u32 gob_address_y = | 264 | const u32 gob_address_y = |
| 265 | (line / (gob_size_y * block_height_size)) * gob_size * block_height_size * | 265 | (line / (gob_size_y * block_height)) * gob_size * block_height * image_width_in_gobs + |
| 266 | image_width_in_gobs + | 266 | ((line % (gob_size_y * block_height)) / gob_size_y) * gob_size; |
| 267 | ((line % (gob_size_y * block_height_size)) / gob_size_y) * gob_size; | ||
| 268 | const auto& table = legacy_swizzle_table[line % gob_size_y]; | 267 | const auto& table = legacy_swizzle_table[line % gob_size_y]; |
| 269 | for (u32 x = 0; x < subrect_width; ++x) { | 268 | for (u32 x = 0; x < subrect_width; ++x) { |
| 270 | const u32 gob_address = | 269 | const u32 gob_address = |
| 271 | gob_address_y + (x * bytes_per_pixel / gob_size_x) * gob_size * block_height_size; | 270 | gob_address_y + (x * bytes_per_pixel / gob_size_x) * gob_size * block_height; |
| 272 | const u32 swizzled_offset = gob_address + table[(x * bytes_per_pixel) % gob_size_x]; | 271 | const u32 swizzled_offset = gob_address + table[(x * bytes_per_pixel) % gob_size_x]; |
| 273 | u8* source_line = unswizzled_data + line * source_pitch + x * bytes_per_pixel; | 272 | u8* source_line = unswizzled_data + line * source_pitch + x * bytes_per_pixel; |
| 274 | u8* dest_addr = swizzled_data + swizzled_offset; | 273 | u8* dest_addr = swizzled_data + swizzled_offset; |
| @@ -279,19 +278,17 @@ void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 | |||
| 279 | } | 278 | } |
| 280 | 279 | ||
| 281 | void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 swizzled_width, | 280 | void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 swizzled_width, |
| 282 | u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height, | 281 | u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height_bit, |
| 283 | u32 offset_x, u32 offset_y) { | 282 | u32 offset_x, u32 offset_y) { |
| 284 | const u32 block_height_size{1U << block_height}; | 283 | const u32 block_height = 1U << block_height_bit; |
| 285 | for (u32 line = 0; line < subrect_height; ++line) { | 284 | for (u32 line = 0; line < subrect_height; ++line) { |
| 286 | const u32 y2 = line + offset_y; | 285 | const u32 y2 = line + offset_y; |
| 287 | const u32 gob_address_y = | 286 | const u32 gob_address_y = (y2 / (gob_size_y * block_height)) * gob_size * block_height + |
| 288 | (y2 / (gob_size_y * block_height_size)) * gob_size * block_height_size + | 287 | ((y2 % (gob_size_y * block_height)) / gob_size_y) * gob_size; |
| 289 | ((y2 % (gob_size_y * block_height_size)) / gob_size_y) * gob_size; | ||
| 290 | const auto& table = legacy_swizzle_table[y2 % gob_size_y]; | 288 | const auto& table = legacy_swizzle_table[y2 % gob_size_y]; |
| 291 | for (u32 x = 0; x < subrect_width; ++x) { | 289 | for (u32 x = 0; x < subrect_width; ++x) { |
| 292 | const u32 x2 = (x + offset_x) * bytes_per_pixel; | 290 | const u32 x2 = (x + offset_x) * bytes_per_pixel; |
| 293 | const u32 gob_address = | 291 | const u32 gob_address = gob_address_y + (x2 / gob_size_x) * gob_size * block_height; |
| 294 | gob_address_y + (x2 / gob_size_x) * gob_size * block_height_size; | ||
| 295 | const u32 swizzled_offset = gob_address + table[x2 % gob_size_x]; | 292 | const u32 swizzled_offset = gob_address + table[x2 % gob_size_x]; |
| 296 | u8* dest_line = unswizzled_data + line * dest_pitch + x * bytes_per_pixel; | 293 | u8* dest_line = unswizzled_data + line * dest_pitch + x * bytes_per_pixel; |
| 297 | u8* source_addr = swizzled_data + swizzled_offset; | 294 | u8* source_addr = swizzled_data + swizzled_offset; |
| @@ -302,20 +299,19 @@ void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 | |||
| 302 | } | 299 | } |
| 303 | 300 | ||
| 304 | void SwizzleKepler(const u32 width, const u32 height, const u32 dst_x, const u32 dst_y, | 301 | void SwizzleKepler(const u32 width, const u32 height, const u32 dst_x, const u32 dst_y, |
| 305 | const u32 block_height, const std::size_t copy_size, const u8* source_data, | 302 | const u32 block_height_bit, const std::size_t copy_size, const u8* source_data, |
| 306 | u8* swizzle_data) { | 303 | u8* swizzle_data) { |
| 307 | const u32 block_height_size{1U << block_height}; | 304 | const u32 block_height = 1U << block_height_bit; |
| 308 | const u32 image_width_in_gobs{(width + gob_size_x - 1) / gob_size_x}; | 305 | const u32 image_width_in_gobs{(width + gob_size_x - 1) / gob_size_x}; |
| 309 | std::size_t count = 0; | 306 | std::size_t count = 0; |
| 310 | for (std::size_t y = dst_y; y < height && count < copy_size; ++y) { | 307 | for (std::size_t y = dst_y; y < height && count < copy_size; ++y) { |
| 311 | const std::size_t gob_address_y = | 308 | const std::size_t gob_address_y = |
| 312 | (y / (gob_size_y * block_height_size)) * gob_size * block_height_size * | 309 | (y / (gob_size_y * block_height)) * gob_size * block_height * image_width_in_gobs + |
| 313 | image_width_in_gobs + | 310 | ((y % (gob_size_y * block_height)) / gob_size_y) * gob_size; |
| 314 | ((y % (gob_size_y * block_height_size)) / gob_size_y) * gob_size; | ||
| 315 | const auto& table = legacy_swizzle_table[y % gob_size_y]; | 311 | const auto& table = legacy_swizzle_table[y % gob_size_y]; |
| 316 | for (std::size_t x = dst_x; x < width && count < copy_size; ++x) { | 312 | for (std::size_t x = dst_x; x < width && count < copy_size; ++x) { |
| 317 | const std::size_t gob_address = | 313 | const std::size_t gob_address = |
| 318 | gob_address_y + (x / gob_size_x) * gob_size * block_height_size; | 314 | gob_address_y + (x / gob_size_x) * gob_size * block_height; |
| 319 | const std::size_t swizzled_offset = gob_address + table[x % gob_size_x]; | 315 | const std::size_t swizzled_offset = gob_address + table[x % gob_size_x]; |
| 320 | const u8* source_line = source_data + count; | 316 | const u8* source_line = source_data + count; |
| 321 | u8* dest_addr = swizzle_data + swizzled_offset; | 317 | u8* dest_addr = swizzle_data + swizzled_offset; |