summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar bunnei2019-09-01 13:13:05 -0400
committerGravatar GitHub2019-09-01 13:13:05 -0400
commit50b5bb44a009dc899d0d7722e50d8976ce4e34f1 (patch)
treee2f5723d977a86ca5c5dba40a649971dfb494531
parentvideo_core: Silent miscellaneous warnings (#2820) (diff)
parentMaxwellDMA: Fixes, corrections and relaxations. (diff)
downloadyuzu-50b5bb44a009dc899d0d7722e50d8976ce4e34f1.tar.gz
yuzu-50b5bb44a009dc899d0d7722e50d8976ce4e34f1.tar.xz
yuzu-50b5bb44a009dc899d0d7722e50d8976ce4e34f1.zip
Merge pull request #2765 from FernandoS27/dma-fix
MaxwellDMA: Fixes, corrections and relaxations.
-rw-r--r--src/video_core/engines/maxwell_dma.cpp42
-rw-r--r--src/video_core/textures/decoders.cpp14
-rw-r--r--src/video_core/textures/decoders.h3
3 files changed, 36 insertions, 23 deletions
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index 351cc7cb3..ad8453c5f 100644
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -5,6 +5,7 @@
5#include "common/assert.h" 5#include "common/assert.h"
6#include "common/logging/log.h" 6#include "common/logging/log.h"
7#include "core/core.h" 7#include "core/core.h"
8#include "core/settings.h"
8#include "video_core/engines/maxwell_3d.h" 9#include "video_core/engines/maxwell_3d.h"
9#include "video_core/engines/maxwell_dma.h" 10#include "video_core/engines/maxwell_dma.h"
10#include "video_core/memory_manager.h" 11#include "video_core/memory_manager.h"
@@ -82,13 +83,17 @@ void MaxwellDMA::HandleCopy() {
82 ASSERT(regs.exec.enable_2d == 1); 83 ASSERT(regs.exec.enable_2d == 1);
83 84
84 if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) { 85 if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) {
85 ASSERT(regs.src_params.size_z == 1); 86 ASSERT(regs.src_params.BlockDepth() == 0);
86 // If the input is tiled and the output is linear, deswizzle the input and copy it over. 87 // If the input is tiled and the output is linear, deswizzle the input and copy it over.
87 const u32 src_bytes_per_pixel = regs.src_pitch / regs.src_params.size_x; 88 const u32 bytes_per_pixel = regs.dst_pitch / regs.x_count;
88 const std::size_t src_size = Texture::CalculateSize( 89 const std::size_t src_size = Texture::CalculateSize(
89 true, src_bytes_per_pixel, regs.src_params.size_x, regs.src_params.size_y, 90 true, bytes_per_pixel, regs.src_params.size_x, regs.src_params.size_y,
90 regs.src_params.size_z, regs.src_params.BlockHeight(), regs.src_params.BlockDepth()); 91 regs.src_params.size_z, regs.src_params.BlockHeight(), regs.src_params.BlockDepth());
91 92
93 const std::size_t src_layer_size = Texture::CalculateSize(
94 true, bytes_per_pixel, regs.src_params.size_x, regs.src_params.size_y, 1,
95 regs.src_params.BlockHeight(), regs.src_params.BlockDepth());
96
92 const std::size_t dst_size = regs.dst_pitch * regs.y_count; 97 const std::size_t dst_size = regs.dst_pitch * regs.y_count;
93 98
94 if (read_buffer.size() < src_size) { 99 if (read_buffer.size() < src_size) {
@@ -102,23 +107,23 @@ void MaxwellDMA::HandleCopy() {
102 memory_manager.ReadBlock(source, read_buffer.data(), src_size); 107 memory_manager.ReadBlock(source, read_buffer.data(), src_size);
103 memory_manager.ReadBlock(dest, write_buffer.data(), dst_size); 108 memory_manager.ReadBlock(dest, write_buffer.data(), dst_size);
104 109
105 Texture::UnswizzleSubrect(regs.x_count, regs.y_count, regs.dst_pitch, 110 Texture::UnswizzleSubrect(
106 regs.src_params.size_x, src_bytes_per_pixel, read_buffer.data(), 111 regs.x_count, regs.y_count, regs.dst_pitch, regs.src_params.size_x, bytes_per_pixel,
107 write_buffer.data(), regs.src_params.BlockHeight(), 112 read_buffer.data() + src_layer_size * regs.src_params.pos_z, write_buffer.data(),
108 regs.src_params.pos_x, regs.src_params.pos_y); 113 regs.src_params.BlockHeight(), regs.src_params.pos_x, regs.src_params.pos_y);
109 114
110 memory_manager.WriteBlock(dest, write_buffer.data(), dst_size); 115 memory_manager.WriteBlock(dest, write_buffer.data(), dst_size);
111 } else { 116 } else {
112 ASSERT(regs.dst_params.BlockDepth() == 0); 117 ASSERT(regs.dst_params.BlockDepth() == 0);
113 118
114 const u32 src_bytes_per_pixel = regs.src_pitch / regs.x_count; 119 const u32 bytes_per_pixel = regs.src_pitch / regs.x_count;
115 120
116 const std::size_t dst_size = Texture::CalculateSize( 121 const std::size_t dst_size = Texture::CalculateSize(
117 true, src_bytes_per_pixel, regs.dst_params.size_x, regs.dst_params.size_y, 122 true, bytes_per_pixel, regs.dst_params.size_x, regs.dst_params.size_y,
118 regs.dst_params.size_z, regs.dst_params.BlockHeight(), regs.dst_params.BlockDepth()); 123 regs.dst_params.size_z, regs.dst_params.BlockHeight(), regs.dst_params.BlockDepth());
119 124
120 const std::size_t dst_layer_size = Texture::CalculateSize( 125 const std::size_t dst_layer_size = Texture::CalculateSize(
121 true, src_bytes_per_pixel, regs.dst_params.size_x, regs.dst_params.size_y, 1, 126 true, bytes_per_pixel, regs.dst_params.size_x, regs.dst_params.size_y, 1,
122 regs.dst_params.BlockHeight(), regs.dst_params.BlockDepth()); 127 regs.dst_params.BlockHeight(), regs.dst_params.BlockDepth());
123 128
124 const std::size_t src_size = regs.src_pitch * regs.y_count; 129 const std::size_t src_size = regs.src_pitch * regs.y_count;
@@ -131,14 +136,19 @@ void MaxwellDMA::HandleCopy() {
131 write_buffer.resize(dst_size); 136 write_buffer.resize(dst_size);
132 } 137 }
133 138
134 memory_manager.ReadBlock(source, read_buffer.data(), src_size); 139 if (Settings::values.use_accurate_gpu_emulation) {
135 memory_manager.ReadBlock(dest, write_buffer.data(), dst_size); 140 memory_manager.ReadBlock(source, read_buffer.data(), src_size);
141 memory_manager.ReadBlock(dest, write_buffer.data(), dst_size);
142 } else {
143 memory_manager.ReadBlockUnsafe(source, read_buffer.data(), src_size);
144 memory_manager.ReadBlockUnsafe(dest, write_buffer.data(), dst_size);
145 }
136 146
137 // If the input is linear and the output is tiled, swizzle the input and copy it over. 147 // If the input is linear and the output is tiled, swizzle the input and copy it over.
138 Texture::SwizzleSubrect(regs.x_count, regs.y_count, regs.src_pitch, regs.dst_params.size_x, 148 Texture::SwizzleSubrect(
139 src_bytes_per_pixel, 149 regs.x_count, regs.y_count, regs.src_pitch, regs.dst_params.size_x, bytes_per_pixel,
140 write_buffer.data() + dst_layer_size * regs.dst_params.pos_z, 150 write_buffer.data() + dst_layer_size * regs.dst_params.pos_z, read_buffer.data(),
141 read_buffer.data(), regs.dst_params.BlockHeight()); 151 regs.dst_params.BlockHeight(), regs.dst_params.pos_x, regs.dst_params.pos_y);
142 152
143 memory_manager.WriteBlock(dest, write_buffer.data(), dst_size); 153 memory_manager.WriteBlock(dest, write_buffer.data(), dst_size);
144 } 154 }
diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp
index 7e8295944..7df5f1452 100644
--- a/src/video_core/textures/decoders.cpp
+++ b/src/video_core/textures/decoders.cpp
@@ -257,19 +257,21 @@ std::vector<u8> UnswizzleTexture(u8* address, u32 tile_size_x, u32 tile_size_y,
257 257
258void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width, 258void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width,
259 u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, 259 u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data,
260 u32 block_height_bit) { 260 u32 block_height_bit, u32 offset_x, u32 offset_y) {
261 const u32 block_height = 1U << block_height_bit; 261 const u32 block_height = 1U << block_height_bit;
262 const u32 image_width_in_gobs{(swizzled_width * bytes_per_pixel + (gob_size_x - 1)) / 262 const u32 image_width_in_gobs{(swizzled_width * bytes_per_pixel + (gob_size_x - 1)) /
263 gob_size_x}; 263 gob_size_x};
264 for (u32 line = 0; line < subrect_height; ++line) { 264 for (u32 line = 0; line < subrect_height; ++line) {
265 const u32 dst_y = line + offset_y;
265 const u32 gob_address_y = 266 const u32 gob_address_y =
266 (line / (gob_size_y * block_height)) * gob_size * block_height * image_width_in_gobs + 267 (dst_y / (gob_size_y * block_height)) * gob_size * block_height * image_width_in_gobs +
267 ((line % (gob_size_y * block_height)) / gob_size_y) * gob_size; 268 ((dst_y % (gob_size_y * block_height)) / gob_size_y) * gob_size;
268 const auto& table = legacy_swizzle_table[line % gob_size_y]; 269 const auto& table = legacy_swizzle_table[dst_y % gob_size_y];
269 for (u32 x = 0; x < subrect_width; ++x) { 270 for (u32 x = 0; x < subrect_width; ++x) {
271 const u32 dst_x = x + offset_x;
270 const u32 gob_address = 272 const u32 gob_address =
271 gob_address_y + (x * bytes_per_pixel / gob_size_x) * gob_size * block_height; 273 gob_address_y + (dst_x * bytes_per_pixel / gob_size_x) * gob_size * block_height;
272 const u32 swizzled_offset = gob_address + table[(x * bytes_per_pixel) % gob_size_x]; 274 const u32 swizzled_offset = gob_address + table[(dst_x * bytes_per_pixel) % gob_size_x];
273 u8* source_line = unswizzled_data + line * source_pitch + x * bytes_per_pixel; 275 u8* source_line = unswizzled_data + line * source_pitch + x * bytes_per_pixel;
274 u8* dest_addr = swizzled_data + swizzled_offset; 276 u8* dest_addr = swizzled_data + swizzled_offset;
275 277
diff --git a/src/video_core/textures/decoders.h b/src/video_core/textures/decoders.h
index eaec9b5a5..f1e3952bc 100644
--- a/src/video_core/textures/decoders.h
+++ b/src/video_core/textures/decoders.h
@@ -44,7 +44,8 @@ std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height
44 44
45/// Copies an untiled subrectangle into a tiled surface. 45/// Copies an untiled subrectangle into a tiled surface.
46void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width, 46void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width,
47 u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height); 47 u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height,
48 u32 offset_x, u32 offset_y);
48 49
49/// Copies a tiled subrectangle into a linear surface. 50/// Copies a tiled subrectangle into a linear surface.
50void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 swizzled_width, 51void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 swizzled_width,