summaryrefslogtreecommitdiff
path: root/src/video_core/rasterizer.cpp
diff options
context:
space:
mode:
authorGravatar bunnei2014-08-25 16:12:10 -0400
committerGravatar bunnei2014-08-25 16:12:10 -0400
commit97fd8fc38d4f9c288779cddb06538860124c6263 (patch)
treebc99e0fceaae732f9c8d4831fcdb8f661b49ccb8 /src/video_core/rasterizer.cpp
parentMerge pull request #75 from xsacha/qt5 (diff)
parentPica/Rasterizer: Clarify a TODO. (diff)
downloadyuzu-97fd8fc38d4f9c288779cddb06538860124c6263.tar.gz
yuzu-97fd8fc38d4f9c288779cddb06538860124c6263.tar.xz
yuzu-97fd8fc38d4f9c288779cddb06538860124c6263.zip
Merge pull request #50 from neobrain/pica
Further work on Pica emulation
Diffstat (limited to 'src/video_core/rasterizer.cpp')
-rw-r--r--src/video_core/rasterizer.cpp222
1 files changed, 204 insertions, 18 deletions
diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp
index a7c1bab3e..cdfdb6215 100644
--- a/src/video_core/rasterizer.cpp
+++ b/src/video_core/rasterizer.cpp
@@ -11,6 +11,8 @@
11#include "rasterizer.h" 11#include "rasterizer.h"
12#include "vertex_shader.h" 12#include "vertex_shader.h"
13 13
14#include "debug_utils/debug_utils.h"
15
14namespace Pica { 16namespace Pica {
15 17
16namespace Rasterizer { 18namespace Rasterizer {
@@ -78,10 +80,10 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
78 u16 max_x = std::max({vtxpos[0].x, vtxpos[1].x, vtxpos[2].x}); 80 u16 max_x = std::max({vtxpos[0].x, vtxpos[1].x, vtxpos[2].x});
79 u16 max_y = std::max({vtxpos[0].y, vtxpos[1].y, vtxpos[2].y}); 81 u16 max_y = std::max({vtxpos[0].y, vtxpos[1].y, vtxpos[2].y});
80 82
81 min_x = min_x & Fix12P4::IntMask(); 83 min_x &= Fix12P4::IntMask();
82 min_y = min_y & Fix12P4::IntMask(); 84 min_y &= Fix12P4::IntMask();
83 max_x = (max_x + Fix12P4::FracMask()) & Fix12P4::IntMask(); 85 max_x = ((max_x + Fix12P4::FracMask()) & Fix12P4::IntMask());
84 max_y = (max_y + Fix12P4::FracMask()) & Fix12P4::IntMask(); 86 max_y = ((max_y + Fix12P4::FracMask()) & Fix12P4::IntMask());
85 87
86 // Triangle filling rules: Pixels on the right-sided edge or on flat bottom edges are not 88 // Triangle filling rules: Pixels on the right-sided edge or on flat bottom edges are not
87 // drawn. Pixels on any other triangle border are drawn. This is implemented with three bias 89 // drawn. Pixels on any other triangle border are drawn. This is implemented with three bias
@@ -112,10 +114,10 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
112 auto orient2d = [](const Math::Vec2<Fix12P4>& vtx1, 114 auto orient2d = [](const Math::Vec2<Fix12P4>& vtx1,
113 const Math::Vec2<Fix12P4>& vtx2, 115 const Math::Vec2<Fix12P4>& vtx2,
114 const Math::Vec2<Fix12P4>& vtx3) { 116 const Math::Vec2<Fix12P4>& vtx3) {
115 const auto vec1 = (vtx2.Cast<int>() - vtx1.Cast<int>()).Append(0); 117 const auto vec1 = Math::MakeVec(vtx2 - vtx1, 0);
116 const auto vec2 = (vtx3.Cast<int>() - vtx1.Cast<int>()).Append(0); 118 const auto vec2 = Math::MakeVec(vtx3 - vtx1, 0);
117 // TODO: There is a very small chance this will overflow for sizeof(int) == 4 119 // TODO: There is a very small chance this will overflow for sizeof(int) == 4
118 return Cross(vec1, vec2).z; 120 return Math::Cross(vec1, vec2).z;
119 }; 121 };
120 122
121 int w0 = bias0 + orient2d(vtxpos[1].xy(), vtxpos[2].xy(), {x, y}); 123 int w0 = bias0 + orient2d(vtxpos[1].xy(), vtxpos[2].xy(), {x, y});
@@ -143,15 +145,15 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
143 // 145 //
144 // The generalization to three vertices is straightforward in baricentric coordinates. 146 // The generalization to three vertices is straightforward in baricentric coordinates.
145 auto GetInterpolatedAttribute = [&](float24 attr0, float24 attr1, float24 attr2) { 147 auto GetInterpolatedAttribute = [&](float24 attr0, float24 attr1, float24 attr2) {
146 auto attr_over_w = Math::MakeVec3(attr0 / v0.pos.w, 148 auto attr_over_w = Math::MakeVec(attr0 / v0.pos.w,
147 attr1 / v1.pos.w, 149 attr1 / v1.pos.w,
148 attr2 / v2.pos.w); 150 attr2 / v2.pos.w);
149 auto w_inverse = Math::MakeVec3(float24::FromFloat32(1.f) / v0.pos.w, 151 auto w_inverse = Math::MakeVec(float24::FromFloat32(1.f) / v0.pos.w,
150 float24::FromFloat32(1.f) / v1.pos.w, 152 float24::FromFloat32(1.f) / v1.pos.w,
151 float24::FromFloat32(1.f) / v2.pos.w); 153 float24::FromFloat32(1.f) / v2.pos.w);
152 auto baricentric_coordinates = Math::MakeVec3(float24::FromFloat32(w0), 154 auto baricentric_coordinates = Math::MakeVec(float24::FromFloat32(w0),
153 float24::FromFloat32(w1), 155 float24::FromFloat32(w1),
154 float24::FromFloat32(w2)); 156 float24::FromFloat32(w2));
155 157
156 float24 interpolated_attr_over_w = Math::Dot(attr_over_w, baricentric_coordinates); 158 float24 interpolated_attr_over_w = Math::Dot(attr_over_w, baricentric_coordinates);
157 float24 interpolated_w_inverse = Math::Dot(w_inverse, baricentric_coordinates); 159 float24 interpolated_w_inverse = Math::Dot(w_inverse, baricentric_coordinates);
@@ -165,12 +167,196 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
165 (u8)(GetInterpolatedAttribute(v0.color.a(), v1.color.a(), v2.color.a()).ToFloat32() * 255) 167 (u8)(GetInterpolatedAttribute(v0.color.a(), v1.color.a(), v2.color.a()).ToFloat32() * 255)
166 }; 168 };
167 169
170 Math::Vec4<u8> texture_color{};
171 float24 u = GetInterpolatedAttribute(v0.tc0.u(), v1.tc0.u(), v2.tc0.u());
172 float24 v = GetInterpolatedAttribute(v0.tc0.v(), v1.tc0.v(), v2.tc0.v());
173 if (registers.texturing_enable) {
174 // Images are split into 8x8 tiles. Each tile is composed of four 4x4 subtiles each
175 // of which is composed of four 2x2 subtiles each of which is composed of four texels.
176 // Each structure is embedded into the next-bigger one in a diagonal pattern, e.g.
177 // texels are laid out in a 2x2 subtile like this:
178 // 2 3
179 // 0 1
180 //
181 // The full 8x8 tile has the texels arranged like this:
182 //
183 // 42 43 46 47 58 59 62 63
184 // 40 41 44 45 56 57 60 61
185 // 34 35 38 39 50 51 54 55
186 // 32 33 36 37 48 49 52 53
187 // 10 11 14 15 26 27 30 31
188 // 08 09 12 13 24 25 28 29
189 // 02 03 06 07 18 19 22 23
190 // 00 01 04 05 16 17 20 21
191
192 // TODO: This is currently hardcoded for RGB8
193 u32* texture_data = (u32*)Memory::GetPointer(registers.texture0.GetPhysicalAddress());
194
195 // TODO(neobrain): Not sure if this swizzling pattern is used for all textures.
196 // To be flexible in case different but similar patterns are used, we keep this
197 // somewhat inefficient code around for now.
198 int s = (int)(u * float24::FromFloat32(registers.texture0.width)).ToFloat32();
199 int t = (int)(v * float24::FromFloat32(registers.texture0.height)).ToFloat32();
200 int texel_index_within_tile = 0;
201 for (int block_size_index = 0; block_size_index < 3; ++block_size_index) {
202 int sub_tile_width = 1 << block_size_index;
203 int sub_tile_height = 1 << block_size_index;
204
205 int sub_tile_index = (s & sub_tile_width) << block_size_index;
206 sub_tile_index += 2 * ((t & sub_tile_height) << block_size_index);
207 texel_index_within_tile += sub_tile_index;
208 }
209
210 const int block_width = 8;
211 const int block_height = 8;
212
213 int coarse_s = (s / block_width) * block_width;
214 int coarse_t = (t / block_height) * block_height;
215
216 const int row_stride = registers.texture0.width * 3;
217 u8* source_ptr = (u8*)texture_data + coarse_s * block_height * 3 + coarse_t * row_stride + texel_index_within_tile * 3;
218 texture_color.r() = source_ptr[2];
219 texture_color.g() = source_ptr[1];
220 texture_color.b() = source_ptr[0];
221 texture_color.a() = 0xFF;
222
223 DebugUtils::DumpTexture(registers.texture0, (u8*)texture_data);
224 }
225
226 // Texture environment - consists of 6 stages of color and alpha combining.
227 //
228 // Color combiners take three input color values from some source (e.g. interpolated
229 // vertex color, texture color, previous stage, etc), perform some very simple
230 // operations on each of them (e.g. inversion) and then calculate the output color
231 // with some basic arithmetic. Alpha combiners can be configured separately but work
232 // analogously.
233 Math::Vec4<u8> combiner_output;
234 for (auto tev_stage : registers.GetTevStages()) {
235 using Source = Regs::TevStageConfig::Source;
236 using ColorModifier = Regs::TevStageConfig::ColorModifier;
237 using AlphaModifier = Regs::TevStageConfig::AlphaModifier;
238 using Operation = Regs::TevStageConfig::Operation;
239
240 auto GetColorSource = [&](Source source) -> Math::Vec3<u8> {
241 switch (source) {
242 case Source::PrimaryColor:
243 return primary_color.rgb();
244
245 case Source::Texture0:
246 return texture_color.rgb();
247
248 case Source::Constant:
249 return {tev_stage.const_r, tev_stage.const_g, tev_stage.const_b};
250
251 case Source::Previous:
252 return combiner_output.rgb();
253
254 default:
255 ERROR_LOG(GPU, "Unknown color combiner source %d\n", (int)source);
256 return {};
257 }
258 };
259
260 auto GetAlphaSource = [&](Source source) -> u8 {
261 switch (source) {
262 case Source::PrimaryColor:
263 return primary_color.a();
264
265 case Source::Texture0:
266 return texture_color.a();
267
268 case Source::Constant:
269 return tev_stage.const_a;
270
271 case Source::Previous:
272 return combiner_output.a();
273
274 default:
275 ERROR_LOG(GPU, "Unknown alpha combiner source %d\n", (int)source);
276 return 0;
277 }
278 };
279
280 auto GetColorModifier = [](ColorModifier factor, const Math::Vec3<u8>& values) -> Math::Vec3<u8> {
281 switch (factor)
282 {
283 case ColorModifier::SourceColor:
284 return values;
285 default:
286 ERROR_LOG(GPU, "Unknown color factor %d\n", (int)factor);
287 return {};
288 }
289 };
290
291 auto GetAlphaModifier = [](AlphaModifier factor, u8 value) -> u8 {
292 switch (factor) {
293 case AlphaModifier::SourceAlpha:
294 return value;
295 default:
296 ERROR_LOG(GPU, "Unknown color factor %d\n", (int)factor);
297 return 0;
298 }
299 };
300
301 auto ColorCombine = [](Operation op, const Math::Vec3<u8> input[3]) -> Math::Vec3<u8> {
302 switch (op) {
303 case Operation::Replace:
304 return input[0];
305
306 case Operation::Modulate:
307 return ((input[0] * input[1]) / 255).Cast<u8>();
308
309 default:
310 ERROR_LOG(GPU, "Unknown color combiner operation %d\n", (int)op);
311 return {};
312 }
313 };
314
315 auto AlphaCombine = [](Operation op, const std::array<u8,3>& input) -> u8 {
316 switch (op) {
317 case Operation::Replace:
318 return input[0];
319
320 case Operation::Modulate:
321 return input[0] * input[1] / 255;
322
323 default:
324 ERROR_LOG(GPU, "Unknown alpha combiner operation %d\n", (int)op);
325 return 0;
326 }
327 };
328
329 // color combiner
330 // NOTE: Not sure if the alpha combiner might use the color output of the previous
331 // stage as input. Hence, we currently don't directly write the result to
332 // combiner_output.rgb(), but instead store it in a temporary variable until
333 // alpha combining has been done.
334 Math::Vec3<u8> color_result[3] = {
335 GetColorModifier(tev_stage.color_modifier1, GetColorSource(tev_stage.color_source1)),
336 GetColorModifier(tev_stage.color_modifier2, GetColorSource(tev_stage.color_source2)),
337 GetColorModifier(tev_stage.color_modifier3, GetColorSource(tev_stage.color_source3))
338 };
339 auto color_output = ColorCombine(tev_stage.color_op, color_result);
340
341 // alpha combiner
342 std::array<u8,3> alpha_result = {
343 GetAlphaModifier(tev_stage.alpha_modifier1, GetAlphaSource(tev_stage.alpha_source1)),
344 GetAlphaModifier(tev_stage.alpha_modifier2, GetAlphaSource(tev_stage.alpha_source2)),
345 GetAlphaModifier(tev_stage.alpha_modifier3, GetAlphaSource(tev_stage.alpha_source3))
346 };
347 auto alpha_output = AlphaCombine(tev_stage.alpha_op, alpha_result);
348
349 combiner_output = Math::MakeVec(color_output, alpha_output);
350 }
351
352 // TODO: Not sure if the multiplication by 65535 has already been taken care
353 // of when transforming to screen coordinates or not.
168 u16 z = (u16)(((float)v0.screenpos[2].ToFloat32() * w0 + 354 u16 z = (u16)(((float)v0.screenpos[2].ToFloat32() * w0 +
169 (float)v1.screenpos[2].ToFloat32() * w1 + 355 (float)v1.screenpos[2].ToFloat32() * w1 +
170 (float)v2.screenpos[2].ToFloat32() * w2) * 65535.f / wsum); // TODO: Shouldn't need to multiply by 65536? 356 (float)v2.screenpos[2].ToFloat32() * w2) * 65535.f / wsum);
171 SetDepth(x >> 4, y >> 4, z); 357 SetDepth(x >> 4, y >> 4, z);
172 358
173 DrawPixel(x >> 4, y >> 4, primary_color); 359 DrawPixel(x >> 4, y >> 4, combiner_output);
174 } 360 }
175 } 361 }
176} 362}