summaryrefslogtreecommitdiff
path: root/src/video_core/rasterizer.cpp
diff options
context:
space:
mode:
authorGravatar darkf2014-12-29 19:47:41 -0800
committerGravatar darkf2014-12-29 19:47:41 -0800
commit8ba9ac0f74abb0408a26207a76a0c1808bad8de0 (patch)
treef1c7c3393fa726435b5b90bf335567c93e528ef1 /src/video_core/rasterizer.cpp
parentAdd comment regarding __WIN32__ in SkyEye code (diff)
parentMerge pull request #367 from bunnei/usat_ssat (diff)
downloadyuzu-8ba9ac0f74abb0408a26207a76a0c1808bad8de0.tar.gz
yuzu-8ba9ac0f74abb0408a26207a76a0c1808bad8de0.tar.xz
yuzu-8ba9ac0f74abb0408a26207a76a0c1808bad8de0.zip
Fix merge conflicts
Diffstat (limited to 'src/video_core/rasterizer.cpp')
-rw-r--r--src/video_core/rasterizer.cpp202
1 files changed, 117 insertions, 85 deletions
diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp
index a35f0c0d8..a80148872 100644
--- a/src/video_core/rasterizer.cpp
+++ b/src/video_core/rasterizer.cpp
@@ -1,5 +1,5 @@
1// Copyright 2014 Citra Emulator Project 1// Copyright 2014 Citra Emulator Project
2// Licensed under GPLv2 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <algorithm> 5#include <algorithm>
@@ -18,7 +18,7 @@ namespace Pica {
18namespace Rasterizer { 18namespace Rasterizer {
19 19
20static void DrawPixel(int x, int y, const Math::Vec4<u8>& color) { 20static void DrawPixel(int x, int y, const Math::Vec4<u8>& color) {
21 u32* color_buffer = (u32*)Memory::GetPointer(registers.framebuffer.GetColorBufferAddress()); 21 u32* color_buffer = reinterpret_cast<u32*>(Memory::GetPointer(PAddrToVAddr(registers.framebuffer.GetColorBufferPhysicalAddress())));
22 u32 value = (color.a() << 24) | (color.r() << 16) | (color.g() << 8) | color.b(); 22 u32 value = (color.a() << 24) | (color.r() << 16) | (color.g() << 8) | color.b();
23 23
24 // Assuming RGBA8 format until actual framebuffer format handling is implemented 24 // Assuming RGBA8 format until actual framebuffer format handling is implemented
@@ -26,14 +26,14 @@ static void DrawPixel(int x, int y, const Math::Vec4<u8>& color) {
26} 26}
27 27
28static u32 GetDepth(int x, int y) { 28static u32 GetDepth(int x, int y) {
29 u16* depth_buffer = (u16*)Memory::GetPointer(registers.framebuffer.GetDepthBufferAddress()); 29 u16* depth_buffer = reinterpret_cast<u16*>(Memory::GetPointer(PAddrToVAddr(registers.framebuffer.GetDepthBufferPhysicalAddress())));
30 30
31 // Assuming 16-bit depth buffer format until actual format handling is implemented 31 // Assuming 16-bit depth buffer format until actual format handling is implemented
32 return *(depth_buffer + x + y * registers.framebuffer.GetWidth()); 32 return *(depth_buffer + x + y * registers.framebuffer.GetWidth());
33} 33}
34 34
35static void SetDepth(int x, int y, u16 value) { 35static void SetDepth(int x, int y, u16 value) {
36 u16* depth_buffer = (u16*)Memory::GetPointer(registers.framebuffer.GetDepthBufferAddress()); 36 u16* depth_buffer = reinterpret_cast<u16*>(Memory::GetPointer(PAddrToVAddr(registers.framebuffer.GetDepthBufferPhysicalAddress())));
37 37
38 // Assuming 16-bit depth buffer format until actual format handling is implemented 38 // Assuming 16-bit depth buffer format until actual format handling is implemented
39 *(depth_buffer + x + y * registers.framebuffer.GetWidth()) = value; 39 *(depth_buffer + x + y * registers.framebuffer.GetWidth()) = value;
@@ -106,6 +106,11 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
106 int bias1 = IsRightSideOrFlatBottomEdge(vtxpos[1].xy(), vtxpos[2].xy(), vtxpos[0].xy()) ? -1 : 0; 106 int bias1 = IsRightSideOrFlatBottomEdge(vtxpos[1].xy(), vtxpos[2].xy(), vtxpos[0].xy()) ? -1 : 0;
107 int bias2 = IsRightSideOrFlatBottomEdge(vtxpos[2].xy(), vtxpos[0].xy(), vtxpos[1].xy()) ? -1 : 0; 107 int bias2 = IsRightSideOrFlatBottomEdge(vtxpos[2].xy(), vtxpos[0].xy(), vtxpos[1].xy()) ? -1 : 0;
108 108
109 auto w_inverse = Math::MakeVec(v0.pos.w, v1.pos.w, v2.pos.w);
110
111 auto textures = registers.GetTextures();
112 auto tev_stages = registers.GetTevStages();
113
109 // TODO: Not sure if looping through x first might be faster 114 // TODO: Not sure if looping through x first might be faster
110 for (u16 y = min_y; y < max_y; y += 0x10) { 115 for (u16 y = min_y; y < max_y; y += 0x10) {
111 for (u16 x = min_x; x < max_x; x += 0x10) { 116 for (u16 x = min_x; x < max_x; x += 0x10) {
@@ -129,6 +134,11 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
129 if (w0 < 0 || w1 < 0 || w2 < 0) 134 if (w0 < 0 || w1 < 0 || w2 < 0)
130 continue; 135 continue;
131 136
137 auto baricentric_coordinates = Math::MakeVec(float24::FromFloat32(static_cast<float>(w0)),
138 float24::FromFloat32(static_cast<float>(w1)),
139 float24::FromFloat32(static_cast<float>(w2)));
140 float24 interpolated_w_inverse = float24::FromFloat32(1.0f) / Math::Dot(w_inverse, baricentric_coordinates);
141
132 // Perspective correct attribute interpolation: 142 // Perspective correct attribute interpolation:
133 // Attribute values cannot be calculated by simple linear interpolation since 143 // Attribute values cannot be calculated by simple linear interpolation since
134 // they are not linear in screen space. For example, when interpolating a 144 // they are not linear in screen space. For example, when interpolating a
@@ -145,19 +155,9 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
145 // 155 //
146 // The generalization to three vertices is straightforward in baricentric coordinates. 156 // The generalization to three vertices is straightforward in baricentric coordinates.
147 auto GetInterpolatedAttribute = [&](float24 attr0, float24 attr1, float24 attr2) { 157 auto GetInterpolatedAttribute = [&](float24 attr0, float24 attr1, float24 attr2) {
148 auto attr_over_w = Math::MakeVec(attr0 / v0.pos.w, 158 auto attr_over_w = Math::MakeVec(attr0, attr1, attr2);
149 attr1 / v1.pos.w,
150 attr2 / v2.pos.w);
151 auto w_inverse = Math::MakeVec(float24::FromFloat32(1.f) / v0.pos.w,
152 float24::FromFloat32(1.f) / v1.pos.w,
153 float24::FromFloat32(1.f) / v2.pos.w);
154 auto baricentric_coordinates = Math::MakeVec(float24::FromFloat32(static_cast<float>(w0)),
155 float24::FromFloat32(static_cast<float>(w1)),
156 float24::FromFloat32(static_cast<float>(w2)));
157
158 float24 interpolated_attr_over_w = Math::Dot(attr_over_w, baricentric_coordinates); 159 float24 interpolated_attr_over_w = Math::Dot(attr_over_w, baricentric_coordinates);
159 float24 interpolated_w_inverse = Math::Dot(w_inverse, baricentric_coordinates); 160 return interpolated_attr_over_w * interpolated_w_inverse;
160 return interpolated_attr_over_w / interpolated_w_inverse;
161 }; 161 };
162 162
163 Math::Vec4<u8> primary_color{ 163 Math::Vec4<u8> primary_color{
@@ -167,60 +167,48 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
167 (u8)(GetInterpolatedAttribute(v0.color.a(), v1.color.a(), v2.color.a()).ToFloat32() * 255) 167 (u8)(GetInterpolatedAttribute(v0.color.a(), v1.color.a(), v2.color.a()).ToFloat32() * 255)
168 }; 168 };
169 169
170 Math::Vec4<u8> texture_color{}; 170 Math::Vec2<float24> uv[3];
171 float24 u = GetInterpolatedAttribute(v0.tc0.u(), v1.tc0.u(), v2.tc0.u()); 171 uv[0].u() = GetInterpolatedAttribute(v0.tc0.u(), v1.tc0.u(), v2.tc0.u());
172 float24 v = GetInterpolatedAttribute(v0.tc0.v(), v1.tc0.v(), v2.tc0.v()); 172 uv[0].v() = GetInterpolatedAttribute(v0.tc0.v(), v1.tc0.v(), v2.tc0.v());
173 if (registers.texturing_enable) { 173 uv[1].u() = GetInterpolatedAttribute(v0.tc1.u(), v1.tc1.u(), v2.tc1.u());
174 // Images are split into 8x8 tiles. Each tile is composed of four 4x4 subtiles each 174 uv[1].v() = GetInterpolatedAttribute(v0.tc1.v(), v1.tc1.v(), v2.tc1.v());
175 // of which is composed of four 2x2 subtiles each of which is composed of four texels. 175 uv[2].u() = GetInterpolatedAttribute(v0.tc2.u(), v1.tc2.u(), v2.tc2.u());
176 // Each structure is embedded into the next-bigger one in a diagonal pattern, e.g. 176 uv[2].v() = GetInterpolatedAttribute(v0.tc2.v(), v1.tc2.v(), v2.tc2.v());
177 // texels are laid out in a 2x2 subtile like this: 177
178 // 2 3 178 Math::Vec4<u8> texture_color[3]{};
179 // 0 1 179 for (int i = 0; i < 3; ++i) {
180 // 180 const auto& texture = textures[i];
181 // The full 8x8 tile has the texels arranged like this: 181 if (!texture.enabled)
182 // 182 continue;
183 // 42 43 46 47 58 59 62 63 183
184 // 40 41 44 45 56 57 60 61 184 _dbg_assert_(HW_GPU, 0 != texture.config.address);
185 // 34 35 38 39 50 51 54 55 185
186 // 32 33 36 37 48 49 52 53 186 int s = (int)(uv[i].u() * float24::FromFloat32(static_cast<float>(texture.config.width))).ToFloat32();
187 // 10 11 14 15 26 27 30 31 187 int t = (int)(uv[i].v() * float24::FromFloat32(static_cast<float>(texture.config.height))).ToFloat32();
188 // 08 09 12 13 24 25 28 29 188 auto GetWrappedTexCoord = [](Regs::TextureConfig::WrapMode mode, int val, unsigned size) {
189 // 02 03 06 07 18 19 22 23 189 switch (mode) {
190 // 00 01 04 05 16 17 20 21 190 case Regs::TextureConfig::ClampToEdge:
191 191 val = std::max(val, 0);
192 // TODO: This is currently hardcoded for RGB8 192 val = std::min(val, (int)size - 1);
193 u32* texture_data = (u32*)Memory::GetPointer(registers.texture0.GetPhysicalAddress()); 193 return val;
194 194
195 // TODO(neobrain): Not sure if this swizzling pattern is used for all textures. 195 case Regs::TextureConfig::Repeat:
196 // To be flexible in case different but similar patterns are used, we keep this 196 return (int)(((unsigned)val) % size);
197 // somewhat inefficient code around for now. 197
198 int s = (int)(u * float24::FromFloat32(static_cast<float>(registers.texture0.width))).ToFloat32(); 198 default:
199 int t = (int)(v * float24::FromFloat32(static_cast<float>(registers.texture0.height))).ToFloat32(); 199 LOG_ERROR(HW_GPU, "Unknown texture coordinate wrapping mode %x\n", (int)mode);
200 int texel_index_within_tile = 0; 200 _dbg_assert_(HW_GPU, 0);
201 for (int block_size_index = 0; block_size_index < 3; ++block_size_index) { 201 return 0;
202 int sub_tile_width = 1 << block_size_index; 202 }
203 int sub_tile_height = 1 << block_size_index; 203 };
204 204 s = GetWrappedTexCoord(registers.texture0.wrap_s, s, registers.texture0.width);
205 int sub_tile_index = (s & sub_tile_width) << block_size_index; 205 t = GetWrappedTexCoord(registers.texture0.wrap_t, t, registers.texture0.height);
206 sub_tile_index += 2 * ((t & sub_tile_height) << block_size_index); 206
207 texel_index_within_tile += sub_tile_index; 207 u8* texture_data = Memory::GetPointer(PAddrToVAddr(texture.config.GetPhysicalAddress()));
208 } 208 auto info = DebugUtils::TextureInfo::FromPicaRegister(texture.config, texture.format);
209 209
210 const int block_width = 8; 210 texture_color[i] = DebugUtils::LookupTexture(texture_data, s, t, info);
211 const int block_height = 8; 211 DebugUtils::DumpTexture(texture.config, texture_data);
212
213 int coarse_s = (s / block_width) * block_width;
214 int coarse_t = (t / block_height) * block_height;
215
216 const int row_stride = registers.texture0.width * 3;
217 u8* source_ptr = (u8*)texture_data + coarse_s * block_height * 3 + coarse_t * row_stride + texel_index_within_tile * 3;
218 texture_color.r() = source_ptr[2];
219 texture_color.g() = source_ptr[1];
220 texture_color.b() = source_ptr[0];
221 texture_color.a() = 0xFF;
222
223 DebugUtils::DumpTexture(registers.texture0, (u8*)texture_data);
224 } 212 }
225 213
226 // Texture environment - consists of 6 stages of color and alpha combining. 214 // Texture environment - consists of 6 stages of color and alpha combining.
@@ -231,28 +219,35 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
231 // with some basic arithmetic. Alpha combiners can be configured separately but work 219 // with some basic arithmetic. Alpha combiners can be configured separately but work
232 // analogously. 220 // analogously.
233 Math::Vec4<u8> combiner_output; 221 Math::Vec4<u8> combiner_output;
234 for (auto tev_stage : registers.GetTevStages()) { 222 for (const auto& tev_stage : tev_stages) {
235 using Source = Regs::TevStageConfig::Source; 223 using Source = Regs::TevStageConfig::Source;
236 using ColorModifier = Regs::TevStageConfig::ColorModifier; 224 using ColorModifier = Regs::TevStageConfig::ColorModifier;
237 using AlphaModifier = Regs::TevStageConfig::AlphaModifier; 225 using AlphaModifier = Regs::TevStageConfig::AlphaModifier;
238 using Operation = Regs::TevStageConfig::Operation; 226 using Operation = Regs::TevStageConfig::Operation;
239 227
240 auto GetColorSource = [&](Source source) -> Math::Vec3<u8> { 228 auto GetColorSource = [&](Source source) -> Math::Vec4<u8> {
241 switch (source) { 229 switch (source) {
242 case Source::PrimaryColor: 230 case Source::PrimaryColor:
243 return primary_color.rgb(); 231 return primary_color;
244 232
245 case Source::Texture0: 233 case Source::Texture0:
246 return texture_color.rgb(); 234 return texture_color[0];
235
236 case Source::Texture1:
237 return texture_color[1];
238
239 case Source::Texture2:
240 return texture_color[2];
247 241
248 case Source::Constant: 242 case Source::Constant:
249 return {tev_stage.const_r, tev_stage.const_g, tev_stage.const_b}; 243 return {tev_stage.const_r, tev_stage.const_g, tev_stage.const_b, tev_stage.const_a};
250 244
251 case Source::Previous: 245 case Source::Previous:
252 return combiner_output.rgb(); 246 return combiner_output;
253 247
254 default: 248 default:
255 ERROR_LOG(GPU, "Unknown color combiner source %d\n", (int)source); 249 LOG_ERROR(HW_GPU, "Unknown color combiner source %d\n", (int)source);
250 _dbg_assert_(HW_GPU, 0);
256 return {}; 251 return {};
257 } 252 }
258 }; 253 };
@@ -263,7 +258,13 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
263 return primary_color.a(); 258 return primary_color.a();
264 259
265 case Source::Texture0: 260 case Source::Texture0:
266 return texture_color.a(); 261 return texture_color[0].a();
262
263 case Source::Texture1:
264 return texture_color[1].a();
265
266 case Source::Texture2:
267 return texture_color[2].a();
267 268
268 case Source::Constant: 269 case Source::Constant:
269 return tev_stage.const_a; 270 return tev_stage.const_a;
@@ -272,18 +273,24 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
272 return combiner_output.a(); 273 return combiner_output.a();
273 274
274 default: 275 default:
275 ERROR_LOG(GPU, "Unknown alpha combiner source %d\n", (int)source); 276 LOG_ERROR(HW_GPU, "Unknown alpha combiner source %d\n", (int)source);
277 _dbg_assert_(HW_GPU, 0);
276 return 0; 278 return 0;
277 } 279 }
278 }; 280 };
279 281
280 auto GetColorModifier = [](ColorModifier factor, const Math::Vec3<u8>& values) -> Math::Vec3<u8> { 282 auto GetColorModifier = [](ColorModifier factor, const Math::Vec4<u8>& values) -> Math::Vec3<u8> {
281 switch (factor) 283 switch (factor)
282 { 284 {
283 case ColorModifier::SourceColor: 285 case ColorModifier::SourceColor:
284 return values; 286 return values.rgb();
287
288 case ColorModifier::SourceAlpha:
289 return { values.a(), values.a(), values.a() };
290
285 default: 291 default:
286 ERROR_LOG(GPU, "Unknown color factor %d\n", (int)factor); 292 LOG_ERROR(HW_GPU, "Unknown color factor %d\n", (int)factor);
293 _dbg_assert_(HW_GPU, 0);
287 return {}; 294 return {};
288 } 295 }
289 }; 296 };
@@ -292,8 +299,13 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
292 switch (factor) { 299 switch (factor) {
293 case AlphaModifier::SourceAlpha: 300 case AlphaModifier::SourceAlpha:
294 return value; 301 return value;
302
303 case AlphaModifier::OneMinusSourceAlpha:
304 return 255 - value;
305
295 default: 306 default:
296 ERROR_LOG(GPU, "Unknown color factor %d\n", (int)factor); 307 LOG_ERROR(HW_GPU, "Unknown alpha factor %d\n", (int)factor);
308 _dbg_assert_(HW_GPU, 0);
297 return 0; 309 return 0;
298 } 310 }
299 }; 311 };
@@ -306,8 +318,21 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
306 case Operation::Modulate: 318 case Operation::Modulate:
307 return ((input[0] * input[1]) / 255).Cast<u8>(); 319 return ((input[0] * input[1]) / 255).Cast<u8>();
308 320
321 case Operation::Add:
322 {
323 auto result = input[0] + input[1];
324 result.r() = std::min(255, result.r());
325 result.g() = std::min(255, result.g());
326 result.b() = std::min(255, result.b());
327 return result.Cast<u8>();
328 }
329
330 case Operation::Lerp:
331 return ((input[0] * input[2] + input[1] * (Math::MakeVec<u8>(255, 255, 255) - input[2]).Cast<u8>()) / 255).Cast<u8>();
332
309 default: 333 default:
310 ERROR_LOG(GPU, "Unknown color combiner operation %d\n", (int)op); 334 LOG_ERROR(HW_GPU, "Unknown color combiner operation %d\n", (int)op);
335 _dbg_assert_(HW_GPU, 0);
311 return {}; 336 return {};
312 } 337 }
313 }; 338 };
@@ -320,8 +345,15 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
320 case Operation::Modulate: 345 case Operation::Modulate:
321 return input[0] * input[1] / 255; 346 return input[0] * input[1] / 255;
322 347
348 case Operation::Add:
349 return std::min(255, input[0] + input[1]);
350
351 case Operation::Lerp:
352 return (input[0] * input[2] + input[1] * (255 - input[2])) / 255;
353
323 default: 354 default:
324 ERROR_LOG(GPU, "Unknown alpha combiner operation %d\n", (int)op); 355 LOG_ERROR(HW_GPU, "Unknown alpha combiner operation %d\n", (int)op);
356 _dbg_assert_(HW_GPU, 0);
325 return 0; 357 return 0;
326 } 358 }
327 }; 359 };