summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGravatar FernandoS272018-10-13 20:42:38 -0400
committerGravatar FernandoS272018-10-13 20:58:00 -0400
commit331ce2942c4906945b4d42f1ebe8b9b6e453c6ee (patch)
tree0745b67b973803fcdd7e09494c2845b20f485be8 /src
parentFix a Crash on Zelda BotW and Splatoon 2, and simplified LoadGLBuffer (diff)
downloadyuzu-331ce2942c4906945b4d42f1ebe8b9b6e453c6ee.tar.gz
yuzu-331ce2942c4906945b4d42f1ebe8b9b6e453c6ee.tar.xz
yuzu-331ce2942c4906945b4d42f1ebe8b9b6e453c6ee.zip
Shorten the implementation of 3D swizzle to only 3 functions
Diffstat (limited to 'src')
-rw-r--r--src/video_core/textures/decoders.cpp97
1 files changed, 27 insertions, 70 deletions
diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp
index 5dab47886..0fe91622e 100644
--- a/src/video_core/textures/decoders.cpp
+++ b/src/video_core/textures/decoders.cpp
@@ -45,11 +45,11 @@ constexpr auto fast_swizzle_table = SwizzleTable<8, 4, 16>();
45 * Instead of going gob by gob, we map the coordinates inside a block and manage from 45 * Instead of going gob by gob, we map the coordinates inside a block and manage from
46 * those. Block_Width is assumed to be 1. 46 * those. Block_Width is assumed to be 1.
47 */ 47 */
48void Precise3DProcessBlock(u8* swizzled_data, u8* unswizzled_data, const bool unswizzle, 48void PreciseProcessBlock(u8* swizzled_data, u8* unswizzled_data, const bool unswizzle,
49 const u32 x_start, const u32 y_start, const u32 z_start, const u32 x_end, 49 const u32 x_start, const u32 y_start, const u32 z_start, const u32 x_end,
50 const u32 y_end, const u32 z_end, const u32 tile_offset, 50 const u32 y_end, const u32 z_end, const u32 tile_offset,
51 const u32 xy_block_size, const u32 layer_z, const u32 stride_x, 51 const u32 xy_block_size, const u32 layer_z, const u32 stride_x,
52 const u32 bytes_per_pixel, const u32 out_bytes_per_pixel) { 52 const u32 bytes_per_pixel, const u32 out_bytes_per_pixel) {
53 std::array<u8*, 2> data_ptrs; 53 std::array<u8*, 2> data_ptrs;
54 u32 z_address = tile_offset; 54 u32 z_address = tile_offset;
55 const u32 gob_size_x = 64; 55 const u32 gob_size_x = 64;
@@ -77,64 +77,15 @@ void Precise3DProcessBlock(u8* swizzled_data, u8* unswizzled_data, const bool un
77} 77}
78 78
79/** 79/**
80 * This function unswizzles or swizzles a texture by mapping Linear to BlockLinear Textue.
81 * The body of this function takes care of splitting the swizzled texture into blocks,
82 * and managing the extents of it. Once all the parameters of a single block are obtained,
83 * the function calls '3DProcessBlock' to process that particular Block.
84 *
85 * Documentation for the memory layout and decoding can be found at:
86 * https://envytools.readthedocs.io/en/latest/hw/memory/g80-surface.html#blocklinear-surfaces
87 */
88void Precise3DSwizzledData(u8* swizzled_data, u8* unswizzled_data, const bool unswizzle,
89 const u32 width, const u32 height, const u32 depth,
90 const u32 bytes_per_pixel, const u32 out_bytes_per_pixel,
91 const u32 block_height, const u32 block_depth) {
92 auto div_ceil = [](const u32 x, const u32 y) { return ((x + y - 1) / y); };
93 const u32 stride_x = width * out_bytes_per_pixel;
94 const u32 layer_z = height * stride_x;
95 const u32 gob_x_bytes = 64;
96 const u32 gob_elements_x = gob_x_bytes / bytes_per_pixel;
97 const u32 gob_elements_y = 8;
98 const u32 gob_elements_z = 1;
99 const u32 block_x_elements = gob_elements_x;
100 const u32 block_y_elements = gob_elements_y * block_height;
101 const u32 block_z_elements = gob_elements_z * block_depth;
102 const u32 blocks_on_x = div_ceil(width, block_x_elements);
103 const u32 blocks_on_y = div_ceil(height, block_y_elements);
104 const u32 blocks_on_z = div_ceil(depth, block_z_elements);
105 const u32 blocks = blocks_on_x * blocks_on_y * blocks_on_z;
106 const u32 gob_size = gob_x_bytes * gob_elements_y * gob_elements_z;
107 const u32 xy_block_size = gob_size * block_height;
108 const u32 block_size = xy_block_size * block_depth;
109 u32 tile_offset = 0;
110 for (u32 zb = 0; zb < blocks_on_z; zb++) {
111 const u32 z_start = zb * block_z_elements;
112 const u32 z_end = std::min(depth, z_start + block_z_elements);
113 for (u32 yb = 0; yb < blocks_on_y; yb++) {
114 const u32 y_start = yb * block_y_elements;
115 const u32 y_end = std::min(height, y_start + block_y_elements);
116 for (u32 xb = 0; xb < blocks_on_x; xb++) {
117 const u32 x_start = xb * block_x_elements;
118 const u32 x_end = std::min(width, x_start + block_x_elements);
119 Precise3DProcessBlock(swizzled_data, unswizzled_data, unswizzle, x_start, y_start,
120 z_start, x_end, y_end, z_end, tile_offset, xy_block_size,
121 layer_z, stride_x, bytes_per_pixel, out_bytes_per_pixel);
122 tile_offset += block_size;
123 }
124 }
125 }
126}
127
128/**
129 * This function manages ALL the GOBs(Group of Bytes) Inside a single block. 80 * This function manages ALL the GOBs(Group of Bytes) Inside a single block.
130 * Instead of going gob by gob, we map the coordinates inside a block and manage from 81 * Instead of going gob by gob, we map the coordinates inside a block and manage from
131 * those. Block_Width is assumed to be 1. 82 * those. Block_Width is assumed to be 1.
132 */ 83 */
133void Fast3DProcessBlock(u8* swizzled_data, u8* unswizzled_data, const bool unswizzle, 84void FastProcessBlock(u8* swizzled_data, u8* unswizzled_data, const bool unswizzle,
134 const u32 x_start, const u32 y_start, const u32 z_start, const u32 x_end, 85 const u32 x_start, const u32 y_start, const u32 z_start, const u32 x_end,
135 const u32 y_end, const u32 z_end, const u32 tile_offset, 86 const u32 y_end, const u32 z_end, const u32 tile_offset,
136 const u32 xy_block_size, const u32 layer_z, const u32 stride_x, 87 const u32 xy_block_size, const u32 layer_z, const u32 stride_x,
137 const u32 bytes_per_pixel, const u32 out_bytes_per_pixel) { 88 const u32 bytes_per_pixel, const u32 out_bytes_per_pixel) {
138 std::array<u8*, 2> data_ptrs; 89 std::array<u8*, 2> data_ptrs;
139 u32 z_address = tile_offset; 90 u32 z_address = tile_offset;
140 const u32 x_startb = x_start * bytes_per_pixel; 91 const u32 x_startb = x_start * bytes_per_pixel;
@@ -169,15 +120,15 @@ void Fast3DProcessBlock(u8* swizzled_data, u8* unswizzled_data, const bool unswi
169 * This function unswizzles or swizzles a texture by mapping Linear to BlockLinear Textue. 120 * This function unswizzles or swizzles a texture by mapping Linear to BlockLinear Textue.
170 * The body of this function takes care of splitting the swizzled texture into blocks, 121 * The body of this function takes care of splitting the swizzled texture into blocks,
171 * and managing the extents of it. Once all the parameters of a single block are obtained, 122 * and managing the extents of it. Once all the parameters of a single block are obtained,
172 * the function calls '3DProcessBlock' to process that particular Block. 123 * the function calls 'ProcessBlock' to process that particular Block.
173 * 124 *
174 * Documentation for the memory layout and decoding can be found at: 125 * Documentation for the memory layout and decoding can be found at:
175 * https://envytools.readthedocs.io/en/latest/hw/memory/g80-surface.html#blocklinear-surfaces 126 * https://envytools.readthedocs.io/en/latest/hw/memory/g80-surface.html#blocklinear-surfaces
176 */ 127 */
177void Fast3DSwizzledData(u8* swizzled_data, u8* unswizzled_data, const bool unswizzle, 128template <bool fast>
178 const u32 width, const u32 height, const u32 depth, 129void SwizzledData(u8* swizzled_data, u8* unswizzled_data, const bool unswizzle, const u32 width,
179 const u32 bytes_per_pixel, const u32 out_bytes_per_pixel, 130 const u32 height, const u32 depth, const u32 bytes_per_pixel,
180 const u32 block_height, const u32 block_depth) { 131 const u32 out_bytes_per_pixel, const u32 block_height, const u32 block_depth) {
181 auto div_ceil = [](const u32 x, const u32 y) { return ((x + y - 1) / y); }; 132 auto div_ceil = [](const u32 x, const u32 y) { return ((x + y - 1) / y); };
182 const u32 stride_x = width * out_bytes_per_pixel; 133 const u32 stride_x = width * out_bytes_per_pixel;
183 const u32 layer_z = height * stride_x; 134 const u32 layer_z = height * stride_x;
@@ -205,9 +156,15 @@ void Fast3DSwizzledData(u8* swizzled_data, u8* unswizzled_data, const bool unswi
205 for (u32 xb = 0; xb < blocks_on_x; xb++) { 156 for (u32 xb = 0; xb < blocks_on_x; xb++) {
206 const u32 x_start = xb * block_x_elements; 157 const u32 x_start = xb * block_x_elements;
207 const u32 x_end = std::min(width, x_start + block_x_elements); 158 const u32 x_end = std::min(width, x_start + block_x_elements);
208 Fast3DProcessBlock(swizzled_data, unswizzled_data, unswizzle, x_start, y_start, 159 if (fast) {
209 z_start, x_end, y_end, z_end, tile_offset, xy_block_size, 160 FastProcessBlock(swizzled_data, unswizzled_data, unswizzle, x_start, y_start,
210 layer_z, stride_x, bytes_per_pixel, out_bytes_per_pixel); 161 z_start, x_end, y_end, z_end, tile_offset, xy_block_size,
162 layer_z, stride_x, bytes_per_pixel, out_bytes_per_pixel);
163 } else {
164 PreciseProcessBlock(swizzled_data, unswizzled_data, unswizzle, x_start, y_start,
165 z_start, x_end, y_end, z_end, tile_offset, xy_block_size,
166 layer_z, stride_x, bytes_per_pixel, out_bytes_per_pixel);
167 }
211 tile_offset += block_size; 168 tile_offset += block_size;
212 } 169 }
213 } 170 }
@@ -218,11 +175,11 @@ void CopySwizzledData(u32 width, u32 height, u32 depth, u32 bytes_per_pixel,
218 u32 out_bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, 175 u32 out_bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data,
219 bool unswizzle, u32 block_height, u32 block_depth) { 176 bool unswizzle, u32 block_height, u32 block_depth) {
220 if (bytes_per_pixel % 3 != 0 && (width * bytes_per_pixel) % 16 == 0) { 177 if (bytes_per_pixel % 3 != 0 && (width * bytes_per_pixel) % 16 == 0) {
221 Fast3DSwizzledData(swizzled_data, unswizzled_data, unswizzle, width, height, depth, 178 SwizzledData<true>(swizzled_data, unswizzled_data, unswizzle, width, height, depth,
222 bytes_per_pixel, out_bytes_per_pixel, block_height, block_depth); 179 bytes_per_pixel, out_bytes_per_pixel, block_height, block_depth);
223 } else { 180 } else {
224 Precise3DSwizzledData(swizzled_data, unswizzled_data, unswizzle, width, height, depth, 181 SwizzledData<false>(swizzled_data, unswizzled_data, unswizzle, width, height, depth,
225 bytes_per_pixel, out_bytes_per_pixel, block_height, block_depth); 182 bytes_per_pixel, out_bytes_per_pixel, block_height, block_depth);
226 } 183 }
227} 184}
228 185