summaryrefslogtreecommitdiff
path: root/src/video_core
diff options
context:
space:
mode:
authorGravatar Emmanuel Gil Peyrot2016-09-18 09:38:01 +0900
committerGravatar Emmanuel Gil Peyrot2016-09-18 09:38:01 +0900
commitdc8479928c5aee4c6ad6fe4f59006fb604cee701 (patch)
tree569a7f13128450bbab973236615587ff00bced5f /src/video_core
parentTravis: Import Dolphin’s clang-format hook. (diff)
downloadyuzu-dc8479928c5aee4c6ad6fe4f59006fb604cee701.tar.gz
yuzu-dc8479928c5aee4c6ad6fe4f59006fb604cee701.tar.xz
yuzu-dc8479928c5aee4c6ad6fe4f59006fb604cee701.zip
Sources: Run clang-format on everything.
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/clipper.cpp68
-rw-r--r--src/video_core/clipper.h2
-rw-r--r--src/video_core/command_processor.cpp649
-rw-r--r--src/video_core/command_processor.h6
-rw-r--r--src/video_core/debug_utils/debug_utils.cpp430
-rw-r--r--src/video_core/debug_utils/debug_utils.h31
-rw-r--r--src/video_core/gpu_debugger.h41
-rw-r--r--src/video_core/pica.cpp921
-rw-r--r--src/video_core/pica.h651
-rw-r--r--src/video_core/pica_state.h6
-rw-r--r--src/video_core/pica_types.h35
-rw-r--r--src/video_core/primitive_assembly.cpp63
-rw-r--r--src/video_core/primitive_assembly.h7
-rw-r--r--src/video_core/rasterizer.cpp457
-rw-r--r--src/video_core/rasterizer.h5
-rw-r--r--src/video_core/rasterizer_interface.h21
-rw-r--r--src/video_core/renderer_base.cpp4
-rw-r--r--src/video_core/renderer_base.h11
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp234
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h50
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.cpp285
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.h78
-rw-r--r--src/video_core/renderer_opengl/gl_resource_manager.h114
-rw-r--r--src/video_core/renderer_opengl/gl_shader_gen.cpp213
-rw-r--r--src/video_core/renderer_opengl/gl_shader_util.cpp3
-rw-r--r--src/video_core/renderer_opengl/gl_state.cpp49
-rw-r--r--src/video_core/renderer_opengl/gl_state.h40
-rw-r--r--src/video_core/renderer_opengl/pica_to_gl.h72
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp122
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.h15
-rw-r--r--src/video_core/shader/shader.cpp39
-rw-r--r--src/video_core/shader/shader.h92
-rw-r--r--src/video_core/shader/shader_interpreter.cpp290
-rw-r--r--src/video_core/shader/shader_interpreter.h5
-rw-r--r--src/video_core/shader/shader_jit_x64.cpp212
-rw-r--r--src/video_core/shader/shader_jit_x64.h8
-rw-r--r--src/video_core/swrasterizer.cpp7
-rw-r--r--src/video_core/swrasterizer.h21
-rw-r--r--src/video_core/vertex_loader.cpp88
-rw-r--r--src/video_core/vertex_loader.h9
-rw-r--r--src/video_core/video_core.cpp6
-rw-r--r--src/video_core/video_core.h15
42 files changed, 2943 insertions, 2532 deletions
diff --git a/src/video_core/clipper.cpp b/src/video_core/clipper.cpp
index db99ce666..747285866 100644
--- a/src/video_core/clipper.cpp
+++ b/src/video_core/clipper.cpp
@@ -27,14 +27,10 @@ namespace Clipper {
27 27
28struct ClippingEdge { 28struct ClippingEdge {
29public: 29public:
30 ClippingEdge(Math::Vec4<float24> coeffs, 30 ClippingEdge(Math::Vec4<float24> coeffs, Math::Vec4<float24> bias = Math::Vec4<float24>(
31 Math::Vec4<float24> bias = Math::Vec4<float24>(float24::FromFloat32(0), 31 float24::FromFloat32(0), float24::FromFloat32(0),
32 float24::FromFloat32(0), 32 float24::FromFloat32(0), float24::FromFloat32(0)))
33 float24::FromFloat32(0), 33 : coeffs(coeffs), bias(bias) {
34 float24::FromFloat32(0)))
35 : coeffs(coeffs),
36 bias(bias)
37 {
38 } 34 }
39 35
40 bool IsInside(const OutputVertex& vertex) const { 36 bool IsInside(const OutputVertex& vertex) const {
@@ -59,8 +55,7 @@ private:
59 Math::Vec4<float24> bias; 55 Math::Vec4<float24> bias;
60}; 56};
61 57
62static void InitScreenCoordinates(OutputVertex& vtx) 58static void InitScreenCoordinates(OutputVertex& vtx) {
63{
64 struct { 59 struct {
65 float24 halfsize_x; 60 float24 halfsize_x;
66 float24 offset_x; 61 float24 offset_x;
@@ -73,8 +68,8 @@ static void InitScreenCoordinates(OutputVertex& vtx)
73 const auto& regs = g_state.regs; 68 const auto& regs = g_state.regs;
74 viewport.halfsize_x = float24::FromRaw(regs.viewport_size_x); 69 viewport.halfsize_x = float24::FromRaw(regs.viewport_size_x);
75 viewport.halfsize_y = float24::FromRaw(regs.viewport_size_y); 70 viewport.halfsize_y = float24::FromRaw(regs.viewport_size_y);
76 viewport.offset_x = float24::FromFloat32(static_cast<float>(regs.viewport_corner.x)); 71 viewport.offset_x = float24::FromFloat32(static_cast<float>(regs.viewport_corner.x));
77 viewport.offset_y = float24::FromFloat32(static_cast<float>(regs.viewport_corner.y)); 72 viewport.offset_y = float24::FromFloat32(static_cast<float>(regs.viewport_corner.y));
78 73
79 float24 inv_w = float24::FromFloat32(1.f) / vtx.pos.w; 74 float24 inv_w = float24::FromFloat32(1.f) / vtx.pos.w;
80 vtx.color *= inv_w; 75 vtx.color *= inv_w;
@@ -85,12 +80,14 @@ static void InitScreenCoordinates(OutputVertex& vtx)
85 vtx.tc2 *= inv_w; 80 vtx.tc2 *= inv_w;
86 vtx.pos.w = inv_w; 81 vtx.pos.w = inv_w;
87 82
88 vtx.screenpos[0] = (vtx.pos.x * inv_w + float24::FromFloat32(1.0)) * viewport.halfsize_x + viewport.offset_x; 83 vtx.screenpos[0] =
89 vtx.screenpos[1] = (vtx.pos.y * inv_w + float24::FromFloat32(1.0)) * viewport.halfsize_y + viewport.offset_y; 84 (vtx.pos.x * inv_w + float24::FromFloat32(1.0)) * viewport.halfsize_x + viewport.offset_x;
85 vtx.screenpos[1] =
86 (vtx.pos.y * inv_w + float24::FromFloat32(1.0)) * viewport.halfsize_y + viewport.offset_y;
90 vtx.screenpos[2] = vtx.pos.z * inv_w; 87 vtx.screenpos[2] = vtx.pos.z * inv_w;
91} 88}
92 89
93void ProcessTriangle(const OutputVertex &v0, const OutputVertex &v1, const OutputVertex &v2) { 90void ProcessTriangle(const OutputVertex& v0, const OutputVertex& v1, const OutputVertex& v2) {
94 using boost::container::static_vector; 91 using boost::container::static_vector;
95 92
96 // Clipping a planar n-gon against a plane will remove at least 1 vertex and introduces 2 at 93 // Clipping a planar n-gon against a plane will remove at least 1 vertex and introduces 2 at
@@ -98,10 +95,10 @@ void ProcessTriangle(const OutputVertex &v0, const OutputVertex &v1, const Outpu
98 // introduces at most 1 new vertex to the polygon. Since we start with a triangle and have a 95 // introduces at most 1 new vertex to the polygon. Since we start with a triangle and have a
99 // fixed 6 clipping planes, the maximum number of vertices of the clipped polygon is 3 + 6 = 9. 96 // fixed 6 clipping planes, the maximum number of vertices of the clipped polygon is 3 + 6 = 9.
100 static const size_t MAX_VERTICES = 9; 97 static const size_t MAX_VERTICES = 9;
101 static_vector<OutputVertex, MAX_VERTICES> buffer_a = { v0, v1, v2 }; 98 static_vector<OutputVertex, MAX_VERTICES> buffer_a = {v0, v1, v2};
102 static_vector<OutputVertex, MAX_VERTICES> buffer_b; 99 static_vector<OutputVertex, MAX_VERTICES> buffer_b;
103 auto* output_list = &buffer_a; 100 auto* output_list = &buffer_a;
104 auto* input_list = &buffer_b; 101 auto* input_list = &buffer_b;
105 102
106 // NOTE: We clip against a w=epsilon plane to guarantee that the output has a positive w value. 103 // NOTE: We clip against a w=epsilon plane to guarantee that the output has a positive w value.
107 // TODO: Not sure if this is a valid approach. Also should probably instead use the smallest 104 // TODO: Not sure if this is a valid approach. Also should probably instead use the smallest
@@ -110,13 +107,13 @@ void ProcessTriangle(const OutputVertex &v0, const OutputVertex &v1, const Outpu
110 static const float24 f0 = float24::FromFloat32(0.0); 107 static const float24 f0 = float24::FromFloat32(0.0);
111 static const float24 f1 = float24::FromFloat32(1.0); 108 static const float24 f1 = float24::FromFloat32(1.0);
112 static const std::array<ClippingEdge, 7> clipping_edges = {{ 109 static const std::array<ClippingEdge, 7> clipping_edges = {{
113 { Math::MakeVec( f1, f0, f0, -f1) }, // x = +w 110 {Math::MakeVec(f1, f0, f0, -f1)}, // x = +w
114 { Math::MakeVec(-f1, f0, f0, -f1) }, // x = -w 111 {Math::MakeVec(-f1, f0, f0, -f1)}, // x = -w
115 { Math::MakeVec( f0, f1, f0, -f1) }, // y = +w 112 {Math::MakeVec(f0, f1, f0, -f1)}, // y = +w
116 { Math::MakeVec( f0, -f1, f0, -f1) }, // y = -w 113 {Math::MakeVec(f0, -f1, f0, -f1)}, // y = -w
117 { Math::MakeVec( f0, f0, f1, f0) }, // z = 0 114 {Math::MakeVec(f0, f0, f1, f0)}, // z = 0
118 { Math::MakeVec( f0, f0, -f1, -f1) }, // z = -w 115 {Math::MakeVec(f0, f0, -f1, -f1)}, // z = -w
119 { Math::MakeVec( f0, f0, f0, -f1), Math::Vec4<float24>(f0, f0, f0, EPSILON) }, // w = EPSILON 116 {Math::MakeVec(f0, f0, f0, -f1), Math::Vec4<float24>(f0, f0, f0, EPSILON)}, // w = EPSILON
120 }}; 117 }};
121 118
122 // TODO: If one vertex lies outside one of the depth clipping planes, some platforms (e.g. Wii) 119 // TODO: If one vertex lies outside one of the depth clipping planes, some platforms (e.g. Wii)
@@ -154,10 +151,10 @@ void ProcessTriangle(const OutputVertex &v0, const OutputVertex &v1, const Outpu
154 InitScreenCoordinates((*output_list)[0]); 151 InitScreenCoordinates((*output_list)[0]);
155 InitScreenCoordinates((*output_list)[1]); 152 InitScreenCoordinates((*output_list)[1]);
156 153
157 for (size_t i = 0; i < output_list->size() - 2; i ++) { 154 for (size_t i = 0; i < output_list->size() - 2; i++) {
158 OutputVertex& vtx0 = (*output_list)[0]; 155 OutputVertex& vtx0 = (*output_list)[0];
159 OutputVertex& vtx1 = (*output_list)[i+1]; 156 OutputVertex& vtx1 = (*output_list)[i + 1];
160 OutputVertex& vtx2 = (*output_list)[i+2]; 157 OutputVertex& vtx2 = (*output_list)[i + 2];
161 158
162 InitScreenCoordinates(vtx2); 159 InitScreenCoordinates(vtx2);
163 160
@@ -165,19 +162,20 @@ void ProcessTriangle(const OutputVertex &v0, const OutputVertex &v1, const Outpu
165 "Triangle %lu/%lu at position (%.3f, %.3f, %.3f, %.3f), " 162 "Triangle %lu/%lu at position (%.3f, %.3f, %.3f, %.3f), "
166 "(%.3f, %.3f, %.3f, %.3f), (%.3f, %.3f, %.3f, %.3f) and " 163 "(%.3f, %.3f, %.3f, %.3f), (%.3f, %.3f, %.3f, %.3f) and "
167 "screen position (%.2f, %.2f, %.2f), (%.2f, %.2f, %.2f), (%.2f, %.2f, %.2f)", 164 "screen position (%.2f, %.2f, %.2f), (%.2f, %.2f, %.2f), (%.2f, %.2f, %.2f)",
168 i + 1, output_list->size() - 2, 165 i + 1, output_list->size() - 2, vtx0.pos.x.ToFloat32(), vtx0.pos.y.ToFloat32(),
169 vtx0.pos.x.ToFloat32(), vtx0.pos.y.ToFloat32(), vtx0.pos.z.ToFloat32(), vtx0.pos.w.ToFloat32(), 166 vtx0.pos.z.ToFloat32(), vtx0.pos.w.ToFloat32(), vtx1.pos.x.ToFloat32(),
170 vtx1.pos.x.ToFloat32(), vtx1.pos.y.ToFloat32(), vtx1.pos.z.ToFloat32(), vtx1.pos.w.ToFloat32(), 167 vtx1.pos.y.ToFloat32(), vtx1.pos.z.ToFloat32(), vtx1.pos.w.ToFloat32(),
171 vtx2.pos.x.ToFloat32(), vtx2.pos.y.ToFloat32(), vtx2.pos.z.ToFloat32(), vtx2.pos.w.ToFloat32(), 168 vtx2.pos.x.ToFloat32(), vtx2.pos.y.ToFloat32(), vtx2.pos.z.ToFloat32(),
172 vtx0.screenpos.x.ToFloat32(), vtx0.screenpos.y.ToFloat32(), vtx0.screenpos.z.ToFloat32(), 169 vtx2.pos.w.ToFloat32(), vtx0.screenpos.x.ToFloat32(),
173 vtx1.screenpos.x.ToFloat32(), vtx1.screenpos.y.ToFloat32(), vtx1.screenpos.z.ToFloat32(), 170 vtx0.screenpos.y.ToFloat32(), vtx0.screenpos.z.ToFloat32(),
174 vtx2.screenpos.x.ToFloat32(), vtx2.screenpos.y.ToFloat32(), vtx2.screenpos.z.ToFloat32()); 171 vtx1.screenpos.x.ToFloat32(), vtx1.screenpos.y.ToFloat32(),
172 vtx1.screenpos.z.ToFloat32(), vtx2.screenpos.x.ToFloat32(),
173 vtx2.screenpos.y.ToFloat32(), vtx2.screenpos.z.ToFloat32());
175 174
176 Rasterizer::ProcessTriangle(vtx0, vtx1, vtx2); 175 Rasterizer::ProcessTriangle(vtx0, vtx1, vtx2);
177 } 176 }
178} 177}
179 178
180
181} // namespace 179} // namespace
182 180
183} // namespace 181} // namespace
diff --git a/src/video_core/clipper.h b/src/video_core/clipper.h
index f85d8d4c9..b51af0af9 100644
--- a/src/video_core/clipper.h
+++ b/src/video_core/clipper.h
@@ -7,7 +7,7 @@
7namespace Pica { 7namespace Pica {
8 8
9namespace Shader { 9namespace Shader {
10 struct OutputVertex; 10struct OutputVertex;
11} 11}
12 12
13namespace Clipper { 13namespace Clipper {
diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp
index 689859049..415b5f74c 100644
--- a/src/video_core/command_processor.cpp
+++ b/src/video_core/command_processor.cpp
@@ -43,11 +43,8 @@ static u32 default_attr_write_buffer[3];
43 43
44// Expand a 4-bit mask to 4-byte mask, e.g. 0b0101 -> 0x00FF00FF 44// Expand a 4-bit mask to 4-byte mask, e.g. 0b0101 -> 0x00FF00FF
45static const u32 expand_bits_to_bytes[] = { 45static const u32 expand_bits_to_bytes[] = {
46 0x00000000, 0x000000ff, 0x0000ff00, 0x0000ffff, 46 0x00000000, 0x000000ff, 0x0000ff00, 0x0000ffff, 0x00ff0000, 0x00ff00ff, 0x00ffff00, 0x00ffffff,
47 0x00ff0000, 0x00ff00ff, 0x00ffff00, 0x00ffffff, 47 0xff000000, 0xff0000ff, 0xff00ff00, 0xff00ffff, 0xffff0000, 0xffff00ff, 0xffffff00, 0xffffffff};
48 0xff000000, 0xff0000ff, 0xff00ff00, 0xff00ffff,
49 0xffff0000, 0xffff00ff, 0xffffff00, 0xffffffff
50};
51 48
52MICROPROFILE_DEFINE(GPU_Drawing, "GPU", "Drawing", MP_RGB(50, 50, 240)); 49MICROPROFILE_DEFINE(GPU_Drawing, "GPU", "Drawing", MP_RGB(50, 50, 240));
53 50
@@ -68,383 +65,393 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
68 65
69 regs[id] = (old_value & ~write_mask) | (value & write_mask); 66 regs[id] = (old_value & ~write_mask) | (value & write_mask);
70 67
71 DebugUtils::OnPicaRegWrite({ (u16)id, (u16)mask, regs[id] }); 68 DebugUtils::OnPicaRegWrite({(u16)id, (u16)mask, regs[id]});
72 69
73 if (g_debug_context) 70 if (g_debug_context)
74 g_debug_context->OnEvent(DebugContext::Event::PicaCommandLoaded, reinterpret_cast<void*>(&id)); 71 g_debug_context->OnEvent(DebugContext::Event::PicaCommandLoaded,
75 72 reinterpret_cast<void*>(&id));
76 switch(id) { 73
77 // Trigger IRQ 74 switch (id) {
78 case PICA_REG_INDEX(trigger_irq): 75 // Trigger IRQ
79 GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::P3D); 76 case PICA_REG_INDEX(trigger_irq):
80 break; 77 GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::P3D);
81 78 break;
82 case PICA_REG_INDEX_WORKAROUND(triangle_topology, 0x25E): 79
83 g_state.primitive_assembler.Reconfigure(regs.triangle_topology); 80 case PICA_REG_INDEX_WORKAROUND(triangle_topology, 0x25E):
84 break; 81 g_state.primitive_assembler.Reconfigure(regs.triangle_topology);
85 82 break;
86 case PICA_REG_INDEX_WORKAROUND(restart_primitive, 0x25F): 83
87 g_state.primitive_assembler.Reset(); 84 case PICA_REG_INDEX_WORKAROUND(restart_primitive, 0x25F):
88 break; 85 g_state.primitive_assembler.Reset();
89 86 break;
90 case PICA_REG_INDEX_WORKAROUND(vs_default_attributes_setup.index, 0x232): 87
91 g_state.immediate.current_attribute = 0; 88 case PICA_REG_INDEX_WORKAROUND(vs_default_attributes_setup.index, 0x232):
89 g_state.immediate.current_attribute = 0;
90 default_attr_counter = 0;
91 break;
92
93 // Load default vertex input attributes
94 case PICA_REG_INDEX_WORKAROUND(vs_default_attributes_setup.set_value[0], 0x233):
95 case PICA_REG_INDEX_WORKAROUND(vs_default_attributes_setup.set_value[1], 0x234):
96 case PICA_REG_INDEX_WORKAROUND(vs_default_attributes_setup.set_value[2], 0x235): {
97 // TODO: Does actual hardware indeed keep an intermediate buffer or does
98 // it directly write the values?
99 default_attr_write_buffer[default_attr_counter++] = value;
100
101 // Default attributes are written in a packed format such that four float24 values are
102 // encoded in
103 // three 32-bit numbers. We write to internal memory once a full such vector is
104 // written.
105 if (default_attr_counter >= 3) {
92 default_attr_counter = 0; 106 default_attr_counter = 0;
93 break;
94
95 // Load default vertex input attributes
96 case PICA_REG_INDEX_WORKAROUND(vs_default_attributes_setup.set_value[0], 0x233):
97 case PICA_REG_INDEX_WORKAROUND(vs_default_attributes_setup.set_value[1], 0x234):
98 case PICA_REG_INDEX_WORKAROUND(vs_default_attributes_setup.set_value[2], 0x235):
99 {
100 // TODO: Does actual hardware indeed keep an intermediate buffer or does
101 // it directly write the values?
102 default_attr_write_buffer[default_attr_counter++] = value;
103
104 // Default attributes are written in a packed format such that four float24 values are encoded in
105 // three 32-bit numbers. We write to internal memory once a full such vector is
106 // written.
107 if (default_attr_counter >= 3) {
108 default_attr_counter = 0;
109
110 auto& setup = regs.vs_default_attributes_setup;
111
112 if (setup.index >= 16) {
113 LOG_ERROR(HW_GPU, "Invalid VS default attribute index %d", (int)setup.index);
114 break;
115 }
116 107
117 Math::Vec4<float24> attribute; 108 auto& setup = regs.vs_default_attributes_setup;
118 109
119 // NOTE: The destination component order indeed is "backwards" 110 if (setup.index >= 16) {
120 attribute.w = float24::FromRaw(default_attr_write_buffer[0] >> 8); 111 LOG_ERROR(HW_GPU, "Invalid VS default attribute index %d", (int)setup.index);
121 attribute.z = float24::FromRaw(((default_attr_write_buffer[0] & 0xFF) << 16) | ((default_attr_write_buffer[1] >> 16) & 0xFFFF)); 112 break;
122 attribute.y = float24::FromRaw(((default_attr_write_buffer[1] & 0xFFFF) << 8) | ((default_attr_write_buffer[2] >> 24) & 0xFF)); 113 }
123 attribute.x = float24::FromRaw(default_attr_write_buffer[2] & 0xFFFFFF);
124 114
125 LOG_TRACE(HW_GPU, "Set default VS attribute %x to (%f %f %f %f)", (int)setup.index, 115 Math::Vec4<float24> attribute;
126 attribute.x.ToFloat32(), attribute.y.ToFloat32(), attribute.z.ToFloat32(),
127 attribute.w.ToFloat32());
128 116
129 // TODO: Verify that this actually modifies the register! 117 // NOTE: The destination component order indeed is "backwards"
130 if (setup.index < 15) { 118 attribute.w = float24::FromRaw(default_attr_write_buffer[0] >> 8);
131 g_state.vs_default_attributes[setup.index] = attribute; 119 attribute.z = float24::FromRaw(((default_attr_write_buffer[0] & 0xFF) << 16) |
132 setup.index++; 120 ((default_attr_write_buffer[1] >> 16) & 0xFFFF));
133 } else { 121 attribute.y = float24::FromRaw(((default_attr_write_buffer[1] & 0xFFFF) << 8) |
134 // Put each attribute into an immediate input buffer. 122 ((default_attr_write_buffer[2] >> 24) & 0xFF));
135 // When all specified immediate attributes are present, the Vertex Shader is invoked and everything is 123 attribute.x = float24::FromRaw(default_attr_write_buffer[2] & 0xFFFFFF);
136 // sent to the primitive assembler.
137 124
138 auto& immediate_input = g_state.immediate.input_vertex; 125 LOG_TRACE(HW_GPU, "Set default VS attribute %x to (%f %f %f %f)", (int)setup.index,
139 auto& immediate_attribute_id = g_state.immediate.current_attribute; 126 attribute.x.ToFloat32(), attribute.y.ToFloat32(), attribute.z.ToFloat32(),
127 attribute.w.ToFloat32());
140 128
141 immediate_input.attr[immediate_attribute_id++] = attribute; 129 // TODO: Verify that this actually modifies the register!
130 if (setup.index < 15) {
131 g_state.vs_default_attributes[setup.index] = attribute;
132 setup.index++;
133 } else {
134 // Put each attribute into an immediate input buffer.
135 // When all specified immediate attributes are present, the Vertex Shader is invoked
136 // and everything is
137 // sent to the primitive assembler.
142 138
143 if (immediate_attribute_id >= regs.vs.num_input_attributes+1) { 139 auto& immediate_input = g_state.immediate.input_vertex;
144 immediate_attribute_id = 0; 140 auto& immediate_attribute_id = g_state.immediate.current_attribute;
145 141
146 Shader::UnitState<false> shader_unit; 142 immediate_input.attr[immediate_attribute_id++] = attribute;
147 g_state.vs.Setup();
148 143
149 // Send to vertex shader 144 if (immediate_attribute_id >= regs.vs.num_input_attributes + 1) {
150 if (g_debug_context) 145 immediate_attribute_id = 0;
151 g_debug_context->OnEvent(DebugContext::Event::VertexShaderInvocation, static_cast<void*>(&immediate_input));
152 g_state.vs.Run(shader_unit, immediate_input, regs.vs.num_input_attributes+1);
153 Shader::OutputVertex output_vertex = shader_unit.output_registers.ToVertex(regs.vs);
154 146
155 // Send to renderer 147 Shader::UnitState<false> shader_unit;
156 using Pica::Shader::OutputVertex; 148 g_state.vs.Setup();
157 auto AddTriangle = [](const OutputVertex& v0, const OutputVertex& v1, const OutputVertex& v2) {
158 VideoCore::g_renderer->Rasterizer()->AddTriangle(v0, v1, v2);
159 };
160 149
161 g_state.primitive_assembler.SubmitVertex(output_vertex, AddTriangle); 150 // Send to vertex shader
162 } 151 if (g_debug_context)
152 g_debug_context->OnEvent(DebugContext::Event::VertexShaderInvocation,
153 static_cast<void*>(&immediate_input));
154 g_state.vs.Run(shader_unit, immediate_input, regs.vs.num_input_attributes + 1);
155 Shader::OutputVertex output_vertex =
156 shader_unit.output_registers.ToVertex(regs.vs);
157
158 // Send to renderer
159 using Pica::Shader::OutputVertex;
160 auto AddTriangle = [](const OutputVertex& v0, const OutputVertex& v1,
161 const OutputVertex& v2) {
162 VideoCore::g_renderer->Rasterizer()->AddTriangle(v0, v1, v2);
163 };
164
165 g_state.primitive_assembler.SubmitVertex(output_vertex, AddTriangle);
163 } 166 }
164 } 167 }
165 break;
166 } 168 }
169 break;
170 }
167 171
168 case PICA_REG_INDEX(gpu_mode): 172 case PICA_REG_INDEX(gpu_mode):
169 if (regs.gpu_mode == Regs::GPUMode::Configuring) { 173 if (regs.gpu_mode == Regs::GPUMode::Configuring) {
170 // Draw immediate mode triangles when GPU Mode is set to GPUMode::Configuring 174 // Draw immediate mode triangles when GPU Mode is set to GPUMode::Configuring
171 VideoCore::g_renderer->Rasterizer()->DrawTriangles(); 175 VideoCore::g_renderer->Rasterizer()->DrawTriangles();
172 176
173 if (g_debug_context) { 177 if (g_debug_context) {
174 g_debug_context->OnEvent(DebugContext::Event::FinishedPrimitiveBatch, nullptr); 178 g_debug_context->OnEvent(DebugContext::Event::FinishedPrimitiveBatch, nullptr);
175 }
176 } 179 }
177 break;
178
179 case PICA_REG_INDEX_WORKAROUND(command_buffer.trigger[0], 0x23c):
180 case PICA_REG_INDEX_WORKAROUND(command_buffer.trigger[1], 0x23d):
181 {
182 unsigned index = static_cast<unsigned>(id - PICA_REG_INDEX(command_buffer.trigger[0]));
183 u32* head_ptr = (u32*)Memory::GetPhysicalPointer(regs.command_buffer.GetPhysicalAddress(index));
184 g_state.cmd_list.head_ptr = g_state.cmd_list.current_ptr = head_ptr;
185 g_state.cmd_list.length = regs.command_buffer.GetSize(index) / sizeof(u32);
186 break;
187 } 180 }
181 break;
182
183 case PICA_REG_INDEX_WORKAROUND(command_buffer.trigger[0], 0x23c):
184 case PICA_REG_INDEX_WORKAROUND(command_buffer.trigger[1], 0x23d): {
185 unsigned index = static_cast<unsigned>(id - PICA_REG_INDEX(command_buffer.trigger[0]));
186 u32* head_ptr =
187 (u32*)Memory::GetPhysicalPointer(regs.command_buffer.GetPhysicalAddress(index));
188 g_state.cmd_list.head_ptr = g_state.cmd_list.current_ptr = head_ptr;
189 g_state.cmd_list.length = regs.command_buffer.GetSize(index) / sizeof(u32);
190 break;
191 }
188 192
189 // It seems like these trigger vertex rendering 193 // It seems like these trigger vertex rendering
190 case PICA_REG_INDEX(trigger_draw): 194 case PICA_REG_INDEX(trigger_draw):
191 case PICA_REG_INDEX(trigger_draw_indexed): 195 case PICA_REG_INDEX(trigger_draw_indexed): {
192 { 196 MICROPROFILE_SCOPE(GPU_Drawing);
193 MICROPROFILE_SCOPE(GPU_Drawing);
194 197
195#if PICA_LOG_TEV 198#if PICA_LOG_TEV
196 DebugUtils::DumpTevStageConfig(regs.GetTevStages()); 199 DebugUtils::DumpTevStageConfig(regs.GetTevStages());
197#endif 200#endif
198 if (g_debug_context) 201 if (g_debug_context)
199 g_debug_context->OnEvent(DebugContext::Event::IncomingPrimitiveBatch, nullptr); 202 g_debug_context->OnEvent(DebugContext::Event::IncomingPrimitiveBatch, nullptr);
200 203
201 // Processes information about internal vertex attributes to figure out how a vertex is loaded. 204 // Processes information about internal vertex attributes to figure out how a vertex is
202 // Later, these can be compiled and cached. 205 // loaded.
203 const u32 base_address = regs.vertex_attributes.GetPhysicalBaseAddress(); 206 // Later, these can be compiled and cached.
204 VertexLoader loader(regs); 207 const u32 base_address = regs.vertex_attributes.GetPhysicalBaseAddress();
205 208 VertexLoader loader(regs);
206 // Load vertices 209
207 bool is_indexed = (id == PICA_REG_INDEX(trigger_draw_indexed)); 210 // Load vertices
208 211 bool is_indexed = (id == PICA_REG_INDEX(trigger_draw_indexed));
209 const auto& index_info = regs.index_array; 212
210 const u8* index_address_8 = Memory::GetPhysicalPointer(base_address + index_info.offset); 213 const auto& index_info = regs.index_array;
211 const u16* index_address_16 = reinterpret_cast<const u16*>(index_address_8); 214 const u8* index_address_8 = Memory::GetPhysicalPointer(base_address + index_info.offset);
212 bool index_u16 = index_info.format != 0; 215 const u16* index_address_16 = reinterpret_cast<const u16*>(index_address_8);
213 216 bool index_u16 = index_info.format != 0;
214 PrimitiveAssembler<Shader::OutputVertex>& primitive_assembler = g_state.primitive_assembler; 217
215 218 PrimitiveAssembler<Shader::OutputVertex>& primitive_assembler = g_state.primitive_assembler;
216 if (g_debug_context) { 219
217 for (int i = 0; i < 3; ++i) { 220 if (g_debug_context) {
218 const auto texture = regs.GetTextures()[i]; 221 for (int i = 0; i < 3; ++i) {
219 if (!texture.enabled) 222 const auto texture = regs.GetTextures()[i];
220 continue; 223 if (!texture.enabled)
221 224 continue;
222 u8* texture_data = Memory::GetPhysicalPointer(texture.config.GetPhysicalAddress()); 225
223 if (g_debug_context && Pica::g_debug_context->recorder) 226 u8* texture_data = Memory::GetPhysicalPointer(texture.config.GetPhysicalAddress());
224 g_debug_context->recorder->MemoryAccessed(texture_data, Pica::Regs::NibblesPerPixel(texture.format) * texture.config.width / 2 * texture.config.height, texture.config.GetPhysicalAddress()); 227 if (g_debug_context && Pica::g_debug_context->recorder)
225 } 228 g_debug_context->recorder->MemoryAccessed(
229 texture_data, Pica::Regs::NibblesPerPixel(texture.format) *
230 texture.config.width / 2 * texture.config.height,
231 texture.config.GetPhysicalAddress());
226 } 232 }
233 }
227 234
228 DebugUtils::MemoryAccessTracker memory_accesses; 235 DebugUtils::MemoryAccessTracker memory_accesses;
229
230 // Simple circular-replacement vertex cache
231 // The size has been tuned for optimal balance between hit-rate and the cost of lookup
232 const size_t VERTEX_CACHE_SIZE = 32;
233 std::array<u16, VERTEX_CACHE_SIZE> vertex_cache_ids;
234 std::array<Shader::OutputRegisters, VERTEX_CACHE_SIZE> vertex_cache;
235 236
236 unsigned int vertex_cache_pos = 0; 237 // Simple circular-replacement vertex cache
237 vertex_cache_ids.fill(-1); 238 // The size has been tuned for optimal balance between hit-rate and the cost of lookup
239 const size_t VERTEX_CACHE_SIZE = 32;
240 std::array<u16, VERTEX_CACHE_SIZE> vertex_cache_ids;
241 std::array<Shader::OutputRegisters, VERTEX_CACHE_SIZE> vertex_cache;
238 242
239 Shader::UnitState<false> shader_unit; 243 unsigned int vertex_cache_pos = 0;
240 g_state.vs.Setup(); 244 vertex_cache_ids.fill(-1);
241 245
242 for (unsigned int index = 0; index < regs.num_vertices; ++index) 246 Shader::UnitState<false> shader_unit;
243 { 247 g_state.vs.Setup();
244 // Indexed rendering doesn't use the start offset
245 unsigned int vertex = is_indexed ? (index_u16 ? index_address_16[index] : index_address_8[index]) : (index + regs.vertex_offset);
246 248
247 // -1 is a common special value used for primitive restart. Since it's unknown if 249 for (unsigned int index = 0; index < regs.num_vertices; ++index) {
248 // the PICA supports it, and it would mess up the caching, guard against it here. 250 // Indexed rendering doesn't use the start offset
249 ASSERT(vertex != -1); 251 unsigned int vertex =
252 is_indexed ? (index_u16 ? index_address_16[index] : index_address_8[index])
253 : (index + regs.vertex_offset);
250 254
251 bool vertex_cache_hit = false; 255 // -1 is a common special value used for primitive restart. Since it's unknown if
252 Shader::OutputRegisters output_registers; 256 // the PICA supports it, and it would mess up the caching, guard against it here.
257 ASSERT(vertex != -1);
253 258
254 if (is_indexed) { 259 bool vertex_cache_hit = false;
255 if (g_debug_context && Pica::g_debug_context->recorder) { 260 Shader::OutputRegisters output_registers;
256 int size = index_u16 ? 2 : 1;
257 memory_accesses.AddAccess(base_address + index_info.offset + size * index, size);
258 }
259 261
260 for (unsigned int i = 0; i < VERTEX_CACHE_SIZE; ++i) { 262 if (is_indexed) {
261 if (vertex == vertex_cache_ids[i]) { 263 if (g_debug_context && Pica::g_debug_context->recorder) {
262 output_registers = vertex_cache[i]; 264 int size = index_u16 ? 2 : 1;
263 vertex_cache_hit = true; 265 memory_accesses.AddAccess(base_address + index_info.offset + size * index,
264 break; 266 size);
265 }
266 }
267 } 267 }
268 268
269 if (!vertex_cache_hit) { 269 for (unsigned int i = 0; i < VERTEX_CACHE_SIZE; ++i) {
270 // Initialize data for the current vertex 270 if (vertex == vertex_cache_ids[i]) {
271 Shader::InputVertex input; 271 output_registers = vertex_cache[i];
272 loader.LoadVertex(base_address, index, vertex, input, memory_accesses); 272 vertex_cache_hit = true;
273 273 break;
274 // Send to vertex shader
275 if (g_debug_context)
276 g_debug_context->OnEvent(DebugContext::Event::VertexShaderInvocation, (void*)&input);
277 g_state.vs.Run(shader_unit, input, loader.GetNumTotalAttributes());
278 output_registers = shader_unit.output_registers;
279
280 if (is_indexed) {
281 vertex_cache[vertex_cache_pos] = output_registers;
282 vertex_cache_ids[vertex_cache_pos] = vertex;
283 vertex_cache_pos = (vertex_cache_pos + 1) % VERTEX_CACHE_SIZE;
284 } 274 }
285 } 275 }
276 }
286 277
287 // Retreive vertex from register data 278 if (!vertex_cache_hit) {
288 Shader::OutputVertex output_vertex = output_registers.ToVertex(regs.vs); 279 // Initialize data for the current vertex
280 Shader::InputVertex input;
281 loader.LoadVertex(base_address, index, vertex, input, memory_accesses);
289 282
290 // Send to renderer 283 // Send to vertex shader
291 using Pica::Shader::OutputVertex; 284 if (g_debug_context)
292 auto AddTriangle = []( 285 g_debug_context->OnEvent(DebugContext::Event::VertexShaderInvocation,
293 const OutputVertex& v0, const OutputVertex& v1, const OutputVertex& v2) { 286 (void*)&input);
294 VideoCore::g_renderer->Rasterizer()->AddTriangle(v0, v1, v2); 287 g_state.vs.Run(shader_unit, input, loader.GetNumTotalAttributes());
295 }; 288 output_registers = shader_unit.output_registers;
296 289
297 primitive_assembler.SubmitVertex(output_vertex, AddTriangle); 290 if (is_indexed) {
291 vertex_cache[vertex_cache_pos] = output_registers;
292 vertex_cache_ids[vertex_cache_pos] = vertex;
293 vertex_cache_pos = (vertex_cache_pos + 1) % VERTEX_CACHE_SIZE;
294 }
298 } 295 }
299 296
300 for (auto& range : memory_accesses.ranges) { 297 // Retreive vertex from register data
301 g_debug_context->recorder->MemoryAccessed(Memory::GetPhysicalPointer(range.first), 298 Shader::OutputVertex output_vertex = output_registers.ToVertex(regs.vs);
302 range.second, range.first);
303 }
304 299
305 break; 300 // Send to renderer
301 using Pica::Shader::OutputVertex;
302 auto AddTriangle = [](const OutputVertex& v0, const OutputVertex& v1,
303 const OutputVertex& v2) {
304 VideoCore::g_renderer->Rasterizer()->AddTriangle(v0, v1, v2);
305 };
306
307 primitive_assembler.SubmitVertex(output_vertex, AddTriangle);
306 } 308 }
307 309
308 case PICA_REG_INDEX(vs.bool_uniforms): 310 for (auto& range : memory_accesses.ranges) {
309 for (unsigned i = 0; i < 16; ++i) 311 g_debug_context->recorder->MemoryAccessed(Memory::GetPhysicalPointer(range.first),
310 g_state.vs.uniforms.b[i] = (regs.vs.bool_uniforms.Value() & (1 << i)) != 0; 312 range.second, range.first);
311
312 break;
313
314 case PICA_REG_INDEX_WORKAROUND(vs.int_uniforms[0], 0x2b1):
315 case PICA_REG_INDEX_WORKAROUND(vs.int_uniforms[1], 0x2b2):
316 case PICA_REG_INDEX_WORKAROUND(vs.int_uniforms[2], 0x2b3):
317 case PICA_REG_INDEX_WORKAROUND(vs.int_uniforms[3], 0x2b4):
318 {
319 int index = (id - PICA_REG_INDEX_WORKAROUND(vs.int_uniforms[0], 0x2b1));
320 auto values = regs.vs.int_uniforms[index];
321 g_state.vs.uniforms.i[index] = Math::Vec4<u8>(values.x, values.y, values.z, values.w);
322 LOG_TRACE(HW_GPU, "Set integer uniform %d to %02x %02x %02x %02x",
323 index, values.x.Value(), values.y.Value(), values.z.Value(), values.w.Value());
324 break;
325 } 313 }
326 314
327 case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[0], 0x2c1): 315 break;
328 case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[1], 0x2c2): 316 }
329 case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[2], 0x2c3):
330 case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[3], 0x2c4):
331 case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[4], 0x2c5):
332 case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[5], 0x2c6):
333 case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[6], 0x2c7):
334 case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[7], 0x2c8):
335 {
336 auto& uniform_setup = regs.vs.uniform_setup;
337
338 // TODO: Does actual hardware indeed keep an intermediate buffer or does
339 // it directly write the values?
340 uniform_write_buffer[float_regs_counter++] = value;
341
342 // Uniforms are written in a packed format such that four float24 values are encoded in
343 // three 32-bit numbers. We write to internal memory once a full such vector is
344 // written.
345 if ((float_regs_counter >= 4 && uniform_setup.IsFloat32()) ||
346 (float_regs_counter >= 3 && !uniform_setup.IsFloat32())) {
347 float_regs_counter = 0;
348
349 auto& uniform = g_state.vs.uniforms.f[uniform_setup.index];
350
351 if (uniform_setup.index > 95) {
352 LOG_ERROR(HW_GPU, "Invalid VS uniform index %d", (int)uniform_setup.index);
353 break;
354 }
355 317
356 // NOTE: The destination component order indeed is "backwards" 318 case PICA_REG_INDEX(vs.bool_uniforms):
357 if (uniform_setup.IsFloat32()) { 319 for (unsigned i = 0; i < 16; ++i)
358 for (auto i : {0,1,2,3}) 320 g_state.vs.uniforms.b[i] = (regs.vs.bool_uniforms.Value() & (1 << i)) != 0;
359 uniform[3 - i] = float24::FromFloat32(*(float*)(&uniform_write_buffer[i])); 321
360 } else { 322 break;
361 // TODO: Untested 323
362 uniform.w = float24::FromRaw(uniform_write_buffer[0] >> 8); 324 case PICA_REG_INDEX_WORKAROUND(vs.int_uniforms[0], 0x2b1):
363 uniform.z = float24::FromRaw(((uniform_write_buffer[0] & 0xFF) << 16) | ((uniform_write_buffer[1] >> 16) & 0xFFFF)); 325 case PICA_REG_INDEX_WORKAROUND(vs.int_uniforms[1], 0x2b2):
364 uniform.y = float24::FromRaw(((uniform_write_buffer[1] & 0xFFFF) << 8) | ((uniform_write_buffer[2] >> 24) & 0xFF)); 326 case PICA_REG_INDEX_WORKAROUND(vs.int_uniforms[2], 0x2b3):
365 uniform.x = float24::FromRaw(uniform_write_buffer[2] & 0xFFFFFF); 327 case PICA_REG_INDEX_WORKAROUND(vs.int_uniforms[3], 0x2b4): {
366 } 328 int index = (id - PICA_REG_INDEX_WORKAROUND(vs.int_uniforms[0], 0x2b1));
329 auto values = regs.vs.int_uniforms[index];
330 g_state.vs.uniforms.i[index] = Math::Vec4<u8>(values.x, values.y, values.z, values.w);
331 LOG_TRACE(HW_GPU, "Set integer uniform %d to %02x %02x %02x %02x", index, values.x.Value(),
332 values.y.Value(), values.z.Value(), values.w.Value());
333 break;
334 }
367 335
368 LOG_TRACE(HW_GPU, "Set uniform %x to (%f %f %f %f)", (int)uniform_setup.index, 336 case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[0], 0x2c1):
369 uniform.x.ToFloat32(), uniform.y.ToFloat32(), uniform.z.ToFloat32(), 337 case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[1], 0x2c2):
370 uniform.w.ToFloat32()); 338 case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[2], 0x2c3):
339 case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[3], 0x2c4):
340 case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[4], 0x2c5):
341 case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[5], 0x2c6):
342 case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[6], 0x2c7):
343 case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[7], 0x2c8): {
344 auto& uniform_setup = regs.vs.uniform_setup;
345
346 // TODO: Does actual hardware indeed keep an intermediate buffer or does
347 // it directly write the values?
348 uniform_write_buffer[float_regs_counter++] = value;
349
350 // Uniforms are written in a packed format such that four float24 values are encoded in
351 // three 32-bit numbers. We write to internal memory once a full such vector is
352 // written.
353 if ((float_regs_counter >= 4 && uniform_setup.IsFloat32()) ||
354 (float_regs_counter >= 3 && !uniform_setup.IsFloat32())) {
355 float_regs_counter = 0;
356
357 auto& uniform = g_state.vs.uniforms.f[uniform_setup.index];
358
359 if (uniform_setup.index > 95) {
360 LOG_ERROR(HW_GPU, "Invalid VS uniform index %d", (int)uniform_setup.index);
361 break;
362 }
371 363
372 // TODO: Verify that this actually modifies the register! 364 // NOTE: The destination component order indeed is "backwards"
373 uniform_setup.index.Assign(uniform_setup.index + 1); 365 if (uniform_setup.IsFloat32()) {
366 for (auto i : {0, 1, 2, 3})
367 uniform[3 - i] = float24::FromFloat32(*(float*)(&uniform_write_buffer[i]));
368 } else {
369 // TODO: Untested
370 uniform.w = float24::FromRaw(uniform_write_buffer[0] >> 8);
371 uniform.z = float24::FromRaw(((uniform_write_buffer[0] & 0xFF) << 16) |
372 ((uniform_write_buffer[1] >> 16) & 0xFFFF));
373 uniform.y = float24::FromRaw(((uniform_write_buffer[1] & 0xFFFF) << 8) |
374 ((uniform_write_buffer[2] >> 24) & 0xFF));
375 uniform.x = float24::FromRaw(uniform_write_buffer[2] & 0xFFFFFF);
374 } 376 }
375 break;
376 }
377 377
378 // Load shader program code 378 LOG_TRACE(HW_GPU, "Set uniform %x to (%f %f %f %f)", (int)uniform_setup.index,
379 case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[0], 0x2cc): 379 uniform.x.ToFloat32(), uniform.y.ToFloat32(), uniform.z.ToFloat32(),
380 case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[1], 0x2cd): 380 uniform.w.ToFloat32());
381 case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[2], 0x2ce):
382 case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[3], 0x2cf):
383 case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[4], 0x2d0):
384 case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[5], 0x2d1):
385 case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[6], 0x2d2):
386 case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[7], 0x2d3):
387 {
388 g_state.vs.program_code[regs.vs.program.offset] = value;
389 regs.vs.program.offset++;
390 break;
391 }
392 381
393 // Load swizzle pattern data 382 // TODO: Verify that this actually modifies the register!
394 case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[0], 0x2d6): 383 uniform_setup.index.Assign(uniform_setup.index + 1);
395 case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[1], 0x2d7):
396 case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[2], 0x2d8):
397 case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[3], 0x2d9):
398 case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[4], 0x2da):
399 case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[5], 0x2db):
400 case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[6], 0x2dc):
401 case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[7], 0x2dd):
402 {
403 g_state.vs.swizzle_data[regs.vs.swizzle_patterns.offset] = value;
404 regs.vs.swizzle_patterns.offset++;
405 break;
406 } 384 }
385 break;
386 }
407 387
408 case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[0], 0x1c8): 388 // Load shader program code
409 case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[1], 0x1c9): 389 case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[0], 0x2cc):
410 case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[2], 0x1ca): 390 case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[1], 0x2cd):
411 case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[3], 0x1cb): 391 case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[2], 0x2ce):
412 case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[4], 0x1cc): 392 case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[3], 0x2cf):
413 case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[5], 0x1cd): 393 case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[4], 0x2d0):
414 case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[6], 0x1ce): 394 case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[5], 0x2d1):
415 case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[7], 0x1cf): 395 case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[6], 0x2d2):
416 { 396 case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[7], 0x2d3): {
417 auto& lut_config = regs.lighting.lut_config; 397 g_state.vs.program_code[regs.vs.program.offset] = value;
418 398 regs.vs.program.offset++;
419 ASSERT_MSG(lut_config.index < 256, "lut_config.index exceeded maximum value of 255!"); 399 break;
420 400 }
421 g_state.lighting.luts[lut_config.type][lut_config.index].raw = value;
422 lut_config.index.Assign(lut_config.index + 1);
423 break;
424 }
425 401
426 case PICA_REG_INDEX_WORKAROUND(fog_lut_data[0], 0xe8): 402 // Load swizzle pattern data
427 case PICA_REG_INDEX_WORKAROUND(fog_lut_data[1], 0xe9): 403 case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[0], 0x2d6):
428 case PICA_REG_INDEX_WORKAROUND(fog_lut_data[2], 0xea): 404 case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[1], 0x2d7):
429 case PICA_REG_INDEX_WORKAROUND(fog_lut_data[3], 0xeb): 405 case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[2], 0x2d8):
430 case PICA_REG_INDEX_WORKAROUND(fog_lut_data[4], 0xec): 406 case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[3], 0x2d9):
431 case PICA_REG_INDEX_WORKAROUND(fog_lut_data[5], 0xed): 407 case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[4], 0x2da):
432 case PICA_REG_INDEX_WORKAROUND(fog_lut_data[6], 0xee): 408 case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[5], 0x2db):
433 case PICA_REG_INDEX_WORKAROUND(fog_lut_data[7], 0xef): 409 case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[6], 0x2dc):
434 { 410 case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[7], 0x2dd): {
435 g_state.fog.lut[regs.fog_lut_offset % 128].raw = value; 411 g_state.vs.swizzle_data[regs.vs.swizzle_patterns.offset] = value;
436 regs.fog_lut_offset.Assign(regs.fog_lut_offset + 1); 412 regs.vs.swizzle_patterns.offset++;
437 break; 413 break;
438 } 414 }
415
416 case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[0], 0x1c8):
417 case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[1], 0x1c9):
418 case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[2], 0x1ca):
419 case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[3], 0x1cb):
420 case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[4], 0x1cc):
421 case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[5], 0x1cd):
422 case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[6], 0x1ce):
423 case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[7], 0x1cf): {
424 auto& lut_config = regs.lighting.lut_config;
425
426 ASSERT_MSG(lut_config.index < 256, "lut_config.index exceeded maximum value of 255!");
427
428 g_state.lighting.luts[lut_config.type][lut_config.index].raw = value;
429 lut_config.index.Assign(lut_config.index + 1);
430 break;
431 }
439 432
440 default: 433 case PICA_REG_INDEX_WORKAROUND(fog_lut_data[0], 0xe8):
441 break; 434 case PICA_REG_INDEX_WORKAROUND(fog_lut_data[1], 0xe9):
435 case PICA_REG_INDEX_WORKAROUND(fog_lut_data[2], 0xea):
436 case PICA_REG_INDEX_WORKAROUND(fog_lut_data[3], 0xeb):
437 case PICA_REG_INDEX_WORKAROUND(fog_lut_data[4], 0xec):
438 case PICA_REG_INDEX_WORKAROUND(fog_lut_data[5], 0xed):
439 case PICA_REG_INDEX_WORKAROUND(fog_lut_data[6], 0xee):
440 case PICA_REG_INDEX_WORKAROUND(fog_lut_data[7], 0xef): {
441 g_state.fog.lut[regs.fog_lut_offset % 128].raw = value;
442 regs.fog_lut_offset.Assign(regs.fog_lut_offset + 1);
443 break;
444 }
445
446 default:
447 break;
442 } 448 }
443 449
444 VideoCore::g_renderer->Rasterizer()->NotifyPicaRegisterChanged(id); 450 VideoCore::g_renderer->Rasterizer()->NotifyPicaRegisterChanged(id);
445 451
446 if (g_debug_context) 452 if (g_debug_context)
447 g_debug_context->OnEvent(DebugContext::Event::PicaCommandProcessed, reinterpret_cast<void*>(&id)); 453 g_debug_context->OnEvent(DebugContext::Event::PicaCommandProcessed,
454 reinterpret_cast<void*>(&id));
448} 455}
449 456
450void ProcessCommandList(const u32* list, u32 size) { 457void ProcessCommandList(const u32* list, u32 size) {
@@ -458,14 +465,14 @@ void ProcessCommandList(const u32* list, u32 size) {
458 ++g_state.cmd_list.current_ptr; 465 ++g_state.cmd_list.current_ptr;
459 466
460 u32 value = *g_state.cmd_list.current_ptr++; 467 u32 value = *g_state.cmd_list.current_ptr++;
461 const CommandHeader header = { *g_state.cmd_list.current_ptr++ }; 468 const CommandHeader header = {*g_state.cmd_list.current_ptr++};
462 469
463 WritePicaReg(header.cmd_id, value, header.parameter_mask); 470 WritePicaReg(header.cmd_id, value, header.parameter_mask);
464 471
465 for (unsigned i = 0; i < header.extra_data_length; ++i) { 472 for (unsigned i = 0; i < header.extra_data_length; ++i) {
466 u32 cmd = header.cmd_id + (header.group_commands ? i + 1 : 0); 473 u32 cmd = header.cmd_id + (header.group_commands ? i + 1 : 0);
467 WritePicaReg(cmd, *g_state.cmd_list.current_ptr++, header.parameter_mask); 474 WritePicaReg(cmd, *g_state.cmd_list.current_ptr++, header.parameter_mask);
468 } 475 }
469 } 476 }
470} 477}
471 478
diff --git a/src/video_core/command_processor.h b/src/video_core/command_processor.h
index 022a71f5e..b8dad8e7b 100644
--- a/src/video_core/command_processor.h
+++ b/src/video_core/command_processor.h
@@ -16,7 +16,7 @@ namespace CommandProcessor {
16union CommandHeader { 16union CommandHeader {
17 u32 hex; 17 u32 hex;
18 18
19 BitField< 0, 16, u32> cmd_id; 19 BitField<0, 16, u32> cmd_id;
20 20
21 // parameter_mask: 21 // parameter_mask:
22 // Mask applied to the input value to make it possible to update 22 // Mask applied to the input value to make it possible to update
@@ -25,11 +25,11 @@ union CommandHeader {
25 // second bit: 0x0000FF00 25 // second bit: 0x0000FF00
26 // third bit: 0x00FF0000 26 // third bit: 0x00FF0000
27 // fourth bit: 0xFF000000 27 // fourth bit: 0xFF000000
28 BitField<16, 4, u32> parameter_mask; 28 BitField<16, 4, u32> parameter_mask;
29 29
30 BitField<20, 11, u32> extra_data_length; 30 BitField<20, 11, u32> extra_data_length;
31 31
32 BitField<31, 1, u32> group_commands; 32 BitField<31, 1, u32> group_commands;
33}; 33};
34static_assert(std::is_standard_layout<CommandHeader>::value == true, 34static_assert(std::is_standard_layout<CommandHeader>::value == true,
35 "CommandHeader does not use standard layout"); 35 "CommandHeader does not use standard layout");
diff --git a/src/video_core/debug_utils/debug_utils.cpp b/src/video_core/debug_utils/debug_utils.cpp
index bfa686380..1cb868ead 100644
--- a/src/video_core/debug_utils/debug_utils.cpp
+++ b/src/video_core/debug_utils/debug_utils.cpp
@@ -50,7 +50,8 @@ void DebugContext::DoOnEvent(Event event, void* data) {
50 { 50 {
51 std::unique_lock<std::mutex> lock(breakpoint_mutex); 51 std::unique_lock<std::mutex> lock(breakpoint_mutex);
52 52
53 // Commit the rasterizer's caches so framebuffers, render targets, etc. will show on debug widgets 53 // Commit the rasterizer's caches so framebuffers, render targets, etc. will show on debug
54 // widgets
54 VideoCore::g_renderer->Rasterizer()->FlushAll(); 55 VideoCore::g_renderer->Rasterizer()->FlushAll();
55 56
56 // TODO: Should stop the CPU thread here once we multithread emulation. 57 // TODO: Should stop the CPU thread here once we multithread emulation.
@@ -64,7 +65,7 @@ void DebugContext::DoOnEvent(Event event, void* data) {
64 } 65 }
65 66
66 // Wait until another thread tells us to Resume() 67 // Wait until another thread tells us to Resume()
67 resume_from_breakpoint.wait(lock, [&]{ return !at_breakpoint; }); 68 resume_from_breakpoint.wait(lock, [&] { return !at_breakpoint; });
68 } 69 }
69} 70}
70 71
@@ -88,8 +89,9 @@ std::shared_ptr<DebugContext> g_debug_context; // TODO: Get rid of this global
88 89
89namespace DebugUtils { 90namespace DebugUtils {
90 91
91void DumpShader(const std::string& filename, const Regs::ShaderConfig& config, const Shader::ShaderSetup& setup, const Regs::VSOutputAttributes* output_attributes) 92void DumpShader(const std::string& filename, const Regs::ShaderConfig& config,
92{ 93 const Shader::ShaderSetup& setup,
94 const Regs::VSOutputAttributes* output_attributes) {
93 struct StuffToWrite { 95 struct StuffToWrite {
94 const u8* pointer; 96 const u8* pointer;
95 u32 size; 97 u32 size;
@@ -97,7 +99,7 @@ void DumpShader(const std::string& filename, const Regs::ShaderConfig& config, c
97 std::vector<StuffToWrite> writing_queue; 99 std::vector<StuffToWrite> writing_queue;
98 u32 write_offset = 0; 100 u32 write_offset = 0;
99 101
100 auto QueueForWriting = [&writing_queue,&write_offset](const u8* pointer, u32 size) { 102 auto QueueForWriting = [&writing_queue, &write_offset](const u8* pointer, u32 size) {
101 writing_queue.push_back({pointer, size}); 103 writing_queue.push_back({pointer, size});
102 u32 old_write_offset = write_offset; 104 u32 old_write_offset = write_offset;
103 write_offset += size; 105 write_offset += size;
@@ -108,99 +110,94 @@ void DumpShader(const std::string& filename, const Regs::ShaderConfig& config, c
108 // into shbin format (separate type and component mask). 110 // into shbin format (separate type and component mask).
109 union OutputRegisterInfo { 111 union OutputRegisterInfo {
110 enum Type : u64 { 112 enum Type : u64 {
111 POSITION = 0, 113 POSITION = 0,
112 QUATERNION = 1, 114 QUATERNION = 1,
113 COLOR = 2, 115 COLOR = 2,
114 TEXCOORD0 = 3, 116 TEXCOORD0 = 3,
115 TEXCOORD1 = 5, 117 TEXCOORD1 = 5,
116 TEXCOORD2 = 6, 118 TEXCOORD2 = 6,
117 119
118 VIEW = 8, 120 VIEW = 8,
119 }; 121 };
120 122
121 BitField< 0, 64, u64> hex; 123 BitField<0, 64, u64> hex;
122 124
123 BitField< 0, 16, Type> type; 125 BitField<0, 16, Type> type;
124 BitField<16, 16, u64> id; 126 BitField<16, 16, u64> id;
125 BitField<32, 4, u64> component_mask; 127 BitField<32, 4, u64> component_mask;
126 }; 128 };
127 129
128 // This is put into a try-catch block to make sure we notice unknown configurations. 130 // This is put into a try-catch block to make sure we notice unknown configurations.
129 std::vector<OutputRegisterInfo> output_info_table; 131 std::vector<OutputRegisterInfo> output_info_table;
130 for (unsigned i = 0; i < 7; ++i) { 132 for (unsigned i = 0; i < 7; ++i) {
131 using OutputAttributes = Pica::Regs::VSOutputAttributes; 133 using OutputAttributes = Pica::Regs::VSOutputAttributes;
132 134
133 // TODO: It's still unclear how the attribute components map to the register! 135 // TODO: It's still unclear how the attribute components map to the register!
134 // Once we know that, this code probably will not make much sense anymore. 136 // Once we know that, this code probably will not make much sense anymore.
135 std::map<OutputAttributes::Semantic, std::pair<OutputRegisterInfo::Type, u32> > map = { 137 std::map<OutputAttributes::Semantic, std::pair<OutputRegisterInfo::Type, u32>> map = {
136 { OutputAttributes::POSITION_X, { OutputRegisterInfo::POSITION, 1} }, 138 {OutputAttributes::POSITION_X, {OutputRegisterInfo::POSITION, 1}},
137 { OutputAttributes::POSITION_Y, { OutputRegisterInfo::POSITION, 2} }, 139 {OutputAttributes::POSITION_Y, {OutputRegisterInfo::POSITION, 2}},
138 { OutputAttributes::POSITION_Z, { OutputRegisterInfo::POSITION, 4} }, 140 {OutputAttributes::POSITION_Z, {OutputRegisterInfo::POSITION, 4}},
139 { OutputAttributes::POSITION_W, { OutputRegisterInfo::POSITION, 8} }, 141 {OutputAttributes::POSITION_W, {OutputRegisterInfo::POSITION, 8}},
140 { OutputAttributes::QUATERNION_X, { OutputRegisterInfo::QUATERNION, 1} }, 142 {OutputAttributes::QUATERNION_X, {OutputRegisterInfo::QUATERNION, 1}},
141 { OutputAttributes::QUATERNION_Y, { OutputRegisterInfo::QUATERNION, 2} }, 143 {OutputAttributes::QUATERNION_Y, {OutputRegisterInfo::QUATERNION, 2}},
142 { OutputAttributes::QUATERNION_Z, { OutputRegisterInfo::QUATERNION, 4} }, 144 {OutputAttributes::QUATERNION_Z, {OutputRegisterInfo::QUATERNION, 4}},
143 { OutputAttributes::QUATERNION_W, { OutputRegisterInfo::QUATERNION, 8} }, 145 {OutputAttributes::QUATERNION_W, {OutputRegisterInfo::QUATERNION, 8}},
144 { OutputAttributes::COLOR_R, { OutputRegisterInfo::COLOR, 1} }, 146 {OutputAttributes::COLOR_R, {OutputRegisterInfo::COLOR, 1}},
145 { OutputAttributes::COLOR_G, { OutputRegisterInfo::COLOR, 2} }, 147 {OutputAttributes::COLOR_G, {OutputRegisterInfo::COLOR, 2}},
146 { OutputAttributes::COLOR_B, { OutputRegisterInfo::COLOR, 4} }, 148 {OutputAttributes::COLOR_B, {OutputRegisterInfo::COLOR, 4}},
147 { OutputAttributes::COLOR_A, { OutputRegisterInfo::COLOR, 8} }, 149 {OutputAttributes::COLOR_A, {OutputRegisterInfo::COLOR, 8}},
148 { OutputAttributes::TEXCOORD0_U, { OutputRegisterInfo::TEXCOORD0, 1} }, 150 {OutputAttributes::TEXCOORD0_U, {OutputRegisterInfo::TEXCOORD0, 1}},
149 { OutputAttributes::TEXCOORD0_V, { OutputRegisterInfo::TEXCOORD0, 2} }, 151 {OutputAttributes::TEXCOORD0_V, {OutputRegisterInfo::TEXCOORD0, 2}},
150 { OutputAttributes::TEXCOORD1_U, { OutputRegisterInfo::TEXCOORD1, 1} }, 152 {OutputAttributes::TEXCOORD1_U, {OutputRegisterInfo::TEXCOORD1, 1}},
151 { OutputAttributes::TEXCOORD1_V, { OutputRegisterInfo::TEXCOORD1, 2} }, 153 {OutputAttributes::TEXCOORD1_V, {OutputRegisterInfo::TEXCOORD1, 2}},
152 { OutputAttributes::TEXCOORD2_U, { OutputRegisterInfo::TEXCOORD2, 1} }, 154 {OutputAttributes::TEXCOORD2_U, {OutputRegisterInfo::TEXCOORD2, 1}},
153 { OutputAttributes::TEXCOORD2_V, { OutputRegisterInfo::TEXCOORD2, 2} }, 155 {OutputAttributes::TEXCOORD2_V, {OutputRegisterInfo::TEXCOORD2, 2}},
154 { OutputAttributes::VIEW_X, { OutputRegisterInfo::VIEW, 1} }, 156 {OutputAttributes::VIEW_X, {OutputRegisterInfo::VIEW, 1}},
155 { OutputAttributes::VIEW_Y, { OutputRegisterInfo::VIEW, 2} }, 157 {OutputAttributes::VIEW_Y, {OutputRegisterInfo::VIEW, 2}},
156 { OutputAttributes::VIEW_Z, { OutputRegisterInfo::VIEW, 4} } 158 {OutputAttributes::VIEW_Z, {OutputRegisterInfo::VIEW, 4}}};
157 }; 159
158 160 for (const auto& semantic : std::vector<OutputAttributes::Semantic>{
159 for (const auto& semantic : std::vector<OutputAttributes::Semantic>{ 161 output_attributes[i].map_x, output_attributes[i].map_y, output_attributes[i].map_z,
160 output_attributes[i].map_x, 162 output_attributes[i].map_w}) {
161 output_attributes[i].map_y, 163 if (semantic == OutputAttributes::INVALID)
162 output_attributes[i].map_z, 164 continue;
163 output_attributes[i].map_w }) { 165
164 if (semantic == OutputAttributes::INVALID) 166 try {
165 continue; 167 OutputRegisterInfo::Type type = map.at(semantic).first;
166 168 u32 component_mask = map.at(semantic).second;
167 try { 169
168 OutputRegisterInfo::Type type = map.at(semantic).first; 170 auto it = std::find_if(output_info_table.begin(), output_info_table.end(),
169 u32 component_mask = map.at(semantic).second; 171 [&i, &type](const OutputRegisterInfo& info) {
170 172 return info.id == i && info.type == type;
171 auto it = std::find_if(output_info_table.begin(), output_info_table.end(), 173 });
172 [&i, &type](const OutputRegisterInfo& info) { 174
173 return info.id == i && info.type == type; 175 if (it == output_info_table.end()) {
174 } 176 output_info_table.emplace_back();
175 ); 177 output_info_table.back().type.Assign(type);
176 178 output_info_table.back().component_mask.Assign(component_mask);
177 if (it == output_info_table.end()) { 179 output_info_table.back().id.Assign(i);
178 output_info_table.emplace_back(); 180 } else {
179 output_info_table.back().type.Assign(type); 181 it->component_mask.Assign(it->component_mask | component_mask);
180 output_info_table.back().component_mask.Assign(component_mask);
181 output_info_table.back().id.Assign(i);
182 } else {
183 it->component_mask.Assign(it->component_mask | component_mask);
184 }
185 } catch (const std::out_of_range& ) {
186 DEBUG_ASSERT_MSG(false, "Unknown output attribute mapping");
187 LOG_ERROR(HW_GPU, "Unknown output attribute mapping: %03x, %03x, %03x, %03x",
188 (int)output_attributes[i].map_x.Value(),
189 (int)output_attributes[i].map_y.Value(),
190 (int)output_attributes[i].map_z.Value(),
191 (int)output_attributes[i].map_w.Value());
192 } 182 }
183 } catch (const std::out_of_range&) {
184 DEBUG_ASSERT_MSG(false, "Unknown output attribute mapping");
185 LOG_ERROR(HW_GPU, "Unknown output attribute mapping: %03x, %03x, %03x, %03x",
186 (int)output_attributes[i].map_x.Value(),
187 (int)output_attributes[i].map_y.Value(),
188 (int)output_attributes[i].map_z.Value(),
189 (int)output_attributes[i].map_w.Value());
193 } 190 }
194 } 191 }
195 192 }
196 193
197 struct { 194 struct {
198 DVLBHeader header; 195 DVLBHeader header;
199 u32 dvle_offset; 196 u32 dvle_offset;
200 } dvlb{ {DVLBHeader::MAGIC_WORD, 1 } }; // 1 DVLE 197 } dvlb{{DVLBHeader::MAGIC_WORD, 1}}; // 1 DVLE
201 198
202 DVLPHeader dvlp{ DVLPHeader::MAGIC_WORD }; 199 DVLPHeader dvlp{DVLPHeader::MAGIC_WORD};
203 DVLEHeader dvle{ DVLEHeader::MAGIC_WORD }; 200 DVLEHeader dvle{DVLEHeader::MAGIC_WORD};
204 201
205 QueueForWriting(reinterpret_cast<const u8*>(&dvlb), sizeof(dvlb)); 202 QueueForWriting(reinterpret_cast<const u8*>(&dvlb), sizeof(dvlb));
206 u32 dvlp_offset = QueueForWriting(reinterpret_cast<const u8*>(&dvlp), sizeof(dvlp)); 203 u32 dvlp_offset = QueueForWriting(reinterpret_cast<const u8*>(&dvlp), sizeof(dvlp));
@@ -216,14 +213,16 @@ void DumpShader(const std::string& filename, const Regs::ShaderConfig& config, c
216 dvlp.swizzle_info_num_entries = static_cast<uint32_t>(setup.swizzle_data.size()); 213 dvlp.swizzle_info_num_entries = static_cast<uint32_t>(setup.swizzle_data.size());
217 u32 dummy = 0; 214 u32 dummy = 0;
218 for (unsigned int i = 0; i < setup.swizzle_data.size(); ++i) { 215 for (unsigned int i = 0; i < setup.swizzle_data.size(); ++i) {
219 QueueForWriting(reinterpret_cast<const u8*>(&setup.swizzle_data[i]), sizeof(setup.swizzle_data[i])); 216 QueueForWriting(reinterpret_cast<const u8*>(&setup.swizzle_data[i]),
217 sizeof(setup.swizzle_data[i]));
220 QueueForWriting(reinterpret_cast<const u8*>(&dummy), sizeof(dummy)); 218 QueueForWriting(reinterpret_cast<const u8*>(&dummy), sizeof(dummy));
221 } 219 }
222 220
223 dvle.main_offset_words = config.main_offset; 221 dvle.main_offset_words = config.main_offset;
224 dvle.output_register_table_offset = write_offset - dvlb.dvle_offset; 222 dvle.output_register_table_offset = write_offset - dvlb.dvle_offset;
225 dvle.output_register_table_size = static_cast<u32>(output_info_table.size()); 223 dvle.output_register_table_size = static_cast<u32>(output_info_table.size());
226 QueueForWriting(reinterpret_cast<const u8*>(output_info_table.data()), static_cast<u32>(output_info_table.size() * sizeof(OutputRegisterInfo))); 224 QueueForWriting(reinterpret_cast<const u8*>(output_info_table.data()),
225 static_cast<u32>(output_info_table.size() * sizeof(OutputRegisterInfo)));
227 226
228 // TODO: Create a label table for "main" 227 // TODO: Create a label table for "main"
229 228
@@ -258,10 +257,8 @@ void DumpShader(const std::string& filename, const Regs::ShaderConfig& config, c
258 constant.f.w = nihstro::to_float24(setup.uniforms.f[i].w.ToFloat32()); 257 constant.f.w = nihstro::to_float24(setup.uniforms.f[i].w.ToFloat32());
259 258
260 // Store constant if it's different from zero.. 259 // Store constant if it's different from zero..
261 if (setup.uniforms.f[i].x.ToFloat32() != 0.0 || 260 if (setup.uniforms.f[i].x.ToFloat32() != 0.0 || setup.uniforms.f[i].y.ToFloat32() != 0.0 ||
262 setup.uniforms.f[i].y.ToFloat32() != 0.0 || 261 setup.uniforms.f[i].z.ToFloat32() != 0.0 || setup.uniforms.f[i].w.ToFloat32() != 0.0)
263 setup.uniforms.f[i].z.ToFloat32() != 0.0 ||
264 setup.uniforms.f[i].w.ToFloat32() != 0.0)
265 constant_table.emplace_back(constant); 262 constant_table.emplace_back(constant);
266 } 263 }
267 dvle.constant_table_offset = write_offset - dvlb.dvle_offset; 264 dvle.constant_table_offset = write_offset - dvlb.dvle_offset;
@@ -282,8 +279,7 @@ static std::unique_ptr<PicaTrace> pica_trace;
282static std::mutex pica_trace_mutex; 279static std::mutex pica_trace_mutex;
283static int is_pica_tracing = false; 280static int is_pica_tracing = false;
284 281
285void StartPicaTracing() 282void StartPicaTracing() {
286{
287 if (is_pica_tracing) { 283 if (is_pica_tracing) {
288 LOG_WARNING(HW_GPU, "StartPicaTracing called even though tracing already running!"); 284 LOG_WARNING(HW_GPU, "StartPicaTracing called even though tracing already running!");
289 return; 285 return;
@@ -295,13 +291,11 @@ void StartPicaTracing()
295 is_pica_tracing = true; 291 is_pica_tracing = true;
296} 292}
297 293
298bool IsPicaTracing() 294bool IsPicaTracing() {
299{
300 return is_pica_tracing != 0; 295 return is_pica_tracing != 0;
301} 296}
302 297
303void OnPicaRegWrite(PicaTrace::Write write) 298void OnPicaRegWrite(PicaTrace::Write write) {
304{
305 // Double check for is_pica_tracing to avoid pointless locking overhead 299 // Double check for is_pica_tracing to avoid pointless locking overhead
306 if (!is_pica_tracing) 300 if (!is_pica_tracing)
307 return; 301 return;
@@ -314,8 +308,7 @@ void OnPicaRegWrite(PicaTrace::Write write)
314 pica_trace->writes.push_back(write); 308 pica_trace->writes.push_back(write);
315} 309}
316 310
317std::unique_ptr<PicaTrace> FinishPicaTracing() 311std::unique_ptr<PicaTrace> FinishPicaTracing() {
318{
319 if (!is_pica_tracing) { 312 if (!is_pica_tracing) {
320 LOG_WARNING(HW_GPU, "FinishPicaTracing called even though tracing isn't running!"); 313 LOG_WARNING(HW_GPU, "FinishPicaTracing called even though tracing isn't running!");
321 return {}; 314 return {};
@@ -331,12 +324,12 @@ std::unique_ptr<PicaTrace> FinishPicaTracing()
331 return ret; 324 return ret;
332} 325}
333 326
334const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const TextureInfo& info, bool disable_alpha) { 327const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const TextureInfo& info,
328 bool disable_alpha) {
335 const unsigned int coarse_x = x & ~7; 329 const unsigned int coarse_x = x & ~7;
336 const unsigned int coarse_y = y & ~7; 330 const unsigned int coarse_y = y & ~7;
337 331
338 if (info.format != Regs::TextureFormat::ETC1 && 332 if (info.format != Regs::TextureFormat::ETC1 && info.format != Regs::TextureFormat::ETC1A4) {
339 info.format != Regs::TextureFormat::ETC1A4) {
340 // TODO(neobrain): Fix code design to unify vertical block offsets! 333 // TODO(neobrain): Fix code design to unify vertical block offsets!
341 source += coarse_y * info.stride; 334 source += coarse_y * info.stride;
342 } 335 }
@@ -344,73 +337,63 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const Texture
344 // TODO: Assert that width/height are multiples of block dimensions 337 // TODO: Assert that width/height are multiples of block dimensions
345 338
346 switch (info.format) { 339 switch (info.format) {
347 case Regs::TextureFormat::RGBA8: 340 case Regs::TextureFormat::RGBA8: {
348 {
349 auto res = Color::DecodeRGBA8(source + VideoCore::GetMortonOffset(x, y, 4)); 341 auto res = Color::DecodeRGBA8(source + VideoCore::GetMortonOffset(x, y, 4));
350 return { res.r(), res.g(), res.b(), static_cast<u8>(disable_alpha ? 255 : res.a()) }; 342 return {res.r(), res.g(), res.b(), static_cast<u8>(disable_alpha ? 255 : res.a())};
351 } 343 }
352 344
353 case Regs::TextureFormat::RGB8: 345 case Regs::TextureFormat::RGB8: {
354 {
355 auto res = Color::DecodeRGB8(source + VideoCore::GetMortonOffset(x, y, 3)); 346 auto res = Color::DecodeRGB8(source + VideoCore::GetMortonOffset(x, y, 3));
356 return { res.r(), res.g(), res.b(), 255 }; 347 return {res.r(), res.g(), res.b(), 255};
357 } 348 }
358 349
359 case Regs::TextureFormat::RGB5A1: 350 case Regs::TextureFormat::RGB5A1: {
360 {
361 auto res = Color::DecodeRGB5A1(source + VideoCore::GetMortonOffset(x, y, 2)); 351 auto res = Color::DecodeRGB5A1(source + VideoCore::GetMortonOffset(x, y, 2));
362 return { res.r(), res.g(), res.b(), static_cast<u8>(disable_alpha ? 255 : res.a()) }; 352 return {res.r(), res.g(), res.b(), static_cast<u8>(disable_alpha ? 255 : res.a())};
363 } 353 }
364 354
365 case Regs::TextureFormat::RGB565: 355 case Regs::TextureFormat::RGB565: {
366 {
367 auto res = Color::DecodeRGB565(source + VideoCore::GetMortonOffset(x, y, 2)); 356 auto res = Color::DecodeRGB565(source + VideoCore::GetMortonOffset(x, y, 2));
368 return { res.r(), res.g(), res.b(), 255 }; 357 return {res.r(), res.g(), res.b(), 255};
369 } 358 }
370 359
371 case Regs::TextureFormat::RGBA4: 360 case Regs::TextureFormat::RGBA4: {
372 {
373 auto res = Color::DecodeRGBA4(source + VideoCore::GetMortonOffset(x, y, 2)); 361 auto res = Color::DecodeRGBA4(source + VideoCore::GetMortonOffset(x, y, 2));
374 return { res.r(), res.g(), res.b(), static_cast<u8>(disable_alpha ? 255 : res.a()) }; 362 return {res.r(), res.g(), res.b(), static_cast<u8>(disable_alpha ? 255 : res.a())};
375 } 363 }
376 364
377 case Regs::TextureFormat::IA8: 365 case Regs::TextureFormat::IA8: {
378 {
379 const u8* source_ptr = source + VideoCore::GetMortonOffset(x, y, 2); 366 const u8* source_ptr = source + VideoCore::GetMortonOffset(x, y, 2);
380 367
381 if (disable_alpha) { 368 if (disable_alpha) {
382 // Show intensity as red, alpha as green 369 // Show intensity as red, alpha as green
383 return { source_ptr[1], source_ptr[0], 0, 255 }; 370 return {source_ptr[1], source_ptr[0], 0, 255};
384 } else { 371 } else {
385 return { source_ptr[1], source_ptr[1], source_ptr[1], source_ptr[0] }; 372 return {source_ptr[1], source_ptr[1], source_ptr[1], source_ptr[0]};
386 } 373 }
387 } 374 }
388 375
389 case Regs::TextureFormat::RG8: 376 case Regs::TextureFormat::RG8: {
390 {
391 auto res = Color::DecodeRG8(source + VideoCore::GetMortonOffset(x, y, 2)); 377 auto res = Color::DecodeRG8(source + VideoCore::GetMortonOffset(x, y, 2));
392 return { res.r(), res.g(), 0, 255 }; 378 return {res.r(), res.g(), 0, 255};
393 } 379 }
394 380
395 case Regs::TextureFormat::I8: 381 case Regs::TextureFormat::I8: {
396 {
397 const u8* source_ptr = source + VideoCore::GetMortonOffset(x, y, 1); 382 const u8* source_ptr = source + VideoCore::GetMortonOffset(x, y, 1);
398 return { *source_ptr, *source_ptr, *source_ptr, 255 }; 383 return {*source_ptr, *source_ptr, *source_ptr, 255};
399 } 384 }
400 385
401 case Regs::TextureFormat::A8: 386 case Regs::TextureFormat::A8: {
402 {
403 const u8* source_ptr = source + VideoCore::GetMortonOffset(x, y, 1); 387 const u8* source_ptr = source + VideoCore::GetMortonOffset(x, y, 1);
404 388
405 if (disable_alpha) { 389 if (disable_alpha) {
406 return { *source_ptr, *source_ptr, *source_ptr, 255 }; 390 return {*source_ptr, *source_ptr, *source_ptr, 255};
407 } else { 391 } else {
408 return { 0, 0, 0, *source_ptr }; 392 return {0, 0, 0, *source_ptr};
409 } 393 }
410 } 394 }
411 395
412 case Regs::TextureFormat::IA4: 396 case Regs::TextureFormat::IA4: {
413 {
414 const u8* source_ptr = source + VideoCore::GetMortonOffset(x, y, 1); 397 const u8* source_ptr = source + VideoCore::GetMortonOffset(x, y, 1);
415 398
416 u8 i = Color::Convert4To8(((*source_ptr) & 0xF0) >> 4); 399 u8 i = Color::Convert4To8(((*source_ptr) & 0xF0) >> 4);
@@ -418,25 +401,23 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const Texture
418 401
419 if (disable_alpha) { 402 if (disable_alpha) {
420 // Show intensity as red, alpha as green 403 // Show intensity as red, alpha as green
421 return { i, a, 0, 255 }; 404 return {i, a, 0, 255};
422 } else { 405 } else {
423 return { i, i, i, a }; 406 return {i, i, i, a};
424 } 407 }
425 } 408 }
426 409
427 case Regs::TextureFormat::I4: 410 case Regs::TextureFormat::I4: {
428 {
429 u32 morton_offset = VideoCore::GetMortonOffset(x, y, 1); 411 u32 morton_offset = VideoCore::GetMortonOffset(x, y, 1);
430 const u8* source_ptr = source + morton_offset / 2; 412 const u8* source_ptr = source + morton_offset / 2;
431 413
432 u8 i = (morton_offset % 2) ? ((*source_ptr & 0xF0) >> 4) : (*source_ptr & 0xF); 414 u8 i = (morton_offset % 2) ? ((*source_ptr & 0xF0) >> 4) : (*source_ptr & 0xF);
433 i = Color::Convert4To8(i); 415 i = Color::Convert4To8(i);
434 416
435 return { i, i, i, 255 }; 417 return {i, i, i, 255};
436 } 418 }
437 419
438 case Regs::TextureFormat::A4: 420 case Regs::TextureFormat::A4: {
439 {
440 u32 morton_offset = VideoCore::GetMortonOffset(x, y, 1); 421 u32 morton_offset = VideoCore::GetMortonOffset(x, y, 1);
441 const u8* source_ptr = source + morton_offset / 2; 422 const u8* source_ptr = source + morton_offset / 2;
442 423
@@ -444,15 +425,14 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const Texture
444 a = Color::Convert4To8(a); 425 a = Color::Convert4To8(a);
445 426
446 if (disable_alpha) { 427 if (disable_alpha) {
447 return { a, a, a, 255 }; 428 return {a, a, a, 255};
448 } else { 429 } else {
449 return { 0, 0, 0, a }; 430 return {0, 0, 0, a};
450 } 431 }
451 } 432 }
452 433
453 case Regs::TextureFormat::ETC1: 434 case Regs::TextureFormat::ETC1:
454 case Regs::TextureFormat::ETC1A4: 435 case Regs::TextureFormat::ETC1A4: {
455 {
456 bool has_alpha = (info.format == Regs::TextureFormat::ETC1A4); 436 bool has_alpha = (info.format == Regs::TextureFormat::ETC1A4);
457 437
458 // ETC1 further subdivides each 8x8 tile into four 4x4 subtiles 438 // ETC1 further subdivides each 8x8 tile into four 4x4 subtiles
@@ -462,10 +442,9 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const Texture
462 int subtile_index = ((x / subtile_width) & 1) + 2 * ((y / subtile_height) & 1); 442 int subtile_index = ((x / subtile_width) & 1) + 2 * ((y / subtile_height) & 1);
463 unsigned subtile_bytes = has_alpha ? 2 : 1; // TODO: Name... 443 unsigned subtile_bytes = has_alpha ? 2 : 1; // TODO: Name...
464 444
465 const u64* source_ptr = (const u64*)(source 445 const u64* source_ptr = (const u64*)(source + coarse_x * subtile_bytes * 4 +
466 + coarse_x * subtile_bytes * 4 446 coarse_y * subtile_bytes * 4 * (info.width / 8) +
467 + coarse_y * subtile_bytes * 4 * (info.width / 8) 447 subtile_index * subtile_bytes * 8);
468 + subtile_index * subtile_bytes * 8);
469 u64 alpha = 0xFFFFFFFFFFFFFFFF; 448 u64 alpha = 0xFFFFFFFFFFFFFFFF;
470 if (has_alpha) { 449 if (has_alpha) {
471 alpha = *source_ptr; 450 alpha = *source_ptr;
@@ -474,7 +453,7 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const Texture
474 453
475 union ETC1Tile { 454 union ETC1Tile {
476 // Each of these two is a collection of 16 bits (one per lookup value) 455 // Each of these two is a collection of 16 bits (one per lookup value)
477 BitField< 0, 16, u64> table_subindexes; 456 BitField<0, 16, u64> table_subindexes;
478 BitField<16, 16, u64> negation_flags; 457 BitField<16, 16, u64> negation_flags;
479 458
480 unsigned GetTableSubIndex(unsigned index) const { 459 unsigned GetTableSubIndex(unsigned index) const {
@@ -547,12 +526,17 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const Texture
547 } 526 }
548 527
549 // Add modifier 528 // Add modifier
550 unsigned table_index = static_cast<int>((x < 2) ? table_index_1.Value() : table_index_2.Value()); 529 unsigned table_index =
551 530 static_cast<int>((x < 2) ? table_index_1.Value() : table_index_2.Value());
552 static const std::array<std::array<u8, 2>, 8> etc1_modifier_table = {{ 531
553 {{ 2, 8 }}, {{ 5, 17 }}, {{ 9, 29 }}, {{ 13, 42 }}, 532 static const std::array<std::array<u8, 2>, 8> etc1_modifier_table = {{{{2, 8}},
554 {{ 18, 60 }}, {{ 24, 80 }}, {{ 33, 106 }}, {{ 47, 183 }} 533 {{5, 17}},
555 }}; 534 {{9, 29}},
535 {{13, 42}},
536 {{18, 60}},
537 {{24, 80}},
538 {{33, 106}},
539 {{47, 183}}}};
556 540
557 int modifier = etc1_modifier_table.at(table_index).at(GetTableSubIndex(texel)); 541 int modifier = etc1_modifier_table.at(table_index).at(GetTableSubIndex(texel));
558 if (GetNegationFlag(texel)) 542 if (GetNegationFlag(texel))
@@ -564,7 +548,7 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const Texture
564 548
565 return ret.Cast<u8>(); 549 return ret.Cast<u8>();
566 } 550 }
567 } const *etc1_tile = reinterpret_cast<const ETC1Tile*>(source_ptr); 551 } const* etc1_tile = reinterpret_cast<const ETC1Tile*>(source_ptr);
568 552
569 alpha >>= 4 * ((x & 3) * 4 + (y & 3)); 553 alpha >>= 4 * ((x & 3) * 4 + (y & 3));
570 return Math::MakeVec(etc1_tile->GetRGB(x & 3, y & 3), 554 return Math::MakeVec(etc1_tile->GetRGB(x & 3, y & 3),
@@ -579,8 +563,7 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const Texture
579} 563}
580 564
581TextureInfo TextureInfo::FromPicaRegister(const Regs::TextureConfig& config, 565TextureInfo TextureInfo::FromPicaRegister(const Regs::TextureConfig& config,
582 const Regs::TextureFormat& format) 566 const Regs::TextureFormat& format) {
583{
584 TextureInfo info; 567 TextureInfo info;
585 info.physical_address = config.GetPhysicalAddress(); 568 info.physical_address = config.GetPhysicalAddress();
586 info.width = config.width; 569 info.width = config.width;
@@ -595,13 +578,13 @@ TextureInfo TextureInfo::FromPicaRegister(const Regs::TextureConfig& config,
595static void WriteIOFile(png_structp png_ptr, png_bytep data, png_size_t length) { 578static void WriteIOFile(png_structp png_ptr, png_bytep data, png_size_t length) {
596 auto* fp = static_cast<FileUtil::IOFile*>(png_get_io_ptr(png_ptr)); 579 auto* fp = static_cast<FileUtil::IOFile*>(png_get_io_ptr(png_ptr));
597 if (!fp->WriteBytes(data, length)) 580 if (!fp->WriteBytes(data, length))
598 png_error(png_ptr, "Failed to write to output PNG file."); 581 png_error(png_ptr, "Failed to write to output PNG file.");
599} 582}
600 583
601static void FlushIOFile(png_structp png_ptr) { 584static void FlushIOFile(png_structp png_ptr) {
602 auto* fp = static_cast<FileUtil::IOFile*>(png_get_io_ptr(png_ptr)); 585 auto* fp = static_cast<FileUtil::IOFile*>(png_get_io_ptr(png_ptr));
603 if (!fp->Flush()) 586 if (!fp->Flush())
604 png_error(png_ptr, "Failed to flush to output PNG file."); 587 png_error(png_ptr, "Failed to flush to output PNG file.");
605} 588}
606#endif 589#endif
607 590
@@ -614,7 +597,8 @@ void DumpTexture(const Pica::Regs::TextureConfig& texture_config, u8* data) {
614 597
615 // Write data to file 598 // Write data to file
616 static int dump_index = 0; 599 static int dump_index = 0;
617 std::string filename = std::string("texture_dump") + std::to_string(++dump_index) + std::string(".png"); 600 std::string filename =
601 std::string("texture_dump") + std::to_string(++dump_index) + std::string(".png");
618 u32 row_stride = texture_config.width * 3; 602 u32 row_stride = texture_config.width * 3;
619 603
620 u8* buf; 604 u8* buf;
@@ -632,7 +616,6 @@ void DumpTexture(const Pica::Regs::TextureConfig& texture_config, u8* data) {
632 if (png_ptr == nullptr) { 616 if (png_ptr == nullptr) {
633 LOG_ERROR(Debug_GPU, "Could not allocate write struct"); 617 LOG_ERROR(Debug_GPU, "Could not allocate write struct");
634 goto finalise; 618 goto finalise;
635
636 } 619 }
637 620
638 // Initialize info structure 621 // Initialize info structure
@@ -651,9 +634,9 @@ void DumpTexture(const Pica::Regs::TextureConfig& texture_config, u8* data) {
651 png_set_write_fn(png_ptr, static_cast<void*>(&fp), WriteIOFile, FlushIOFile); 634 png_set_write_fn(png_ptr, static_cast<void*>(&fp), WriteIOFile, FlushIOFile);
652 635
653 // Write header (8 bit color depth) 636 // Write header (8 bit color depth)
654 png_set_IHDR(png_ptr, info_ptr, texture_config.width, texture_config.height, 637 png_set_IHDR(png_ptr, info_ptr, texture_config.width, texture_config.height, 8,
655 8, PNG_COLOR_TYPE_RGB /*_ALPHA*/, PNG_INTERLACE_NONE, 638 PNG_COLOR_TYPE_RGB /*_ALPHA*/, PNG_INTERLACE_NONE, PNG_COMPRESSION_TYPE_BASE,
656 PNG_COMPRESSION_TYPE_BASE, PNG_FILTER_TYPE_BASE); 639 PNG_FILTER_TYPE_BASE);
657 640
658 png_text title_text; 641 png_text title_text;
659 title_text.compression = PNG_TEXT_COMPRESSION_NONE; 642 title_text.compression = PNG_TEXT_COMPRESSION_NONE;
@@ -672,15 +655,14 @@ void DumpTexture(const Pica::Regs::TextureConfig& texture_config, u8* data) {
672 info.stride = row_stride; 655 info.stride = row_stride;
673 info.format = g_state.regs.texture0_format; 656 info.format = g_state.regs.texture0_format;
674 Math::Vec4<u8> texture_color = LookupTexture(data, x, y, info); 657 Math::Vec4<u8> texture_color = LookupTexture(data, x, y, info);
675 buf[3 * x + y * row_stride ] = texture_color.r(); 658 buf[3 * x + y * row_stride] = texture_color.r();
676 buf[3 * x + y * row_stride + 1] = texture_color.g(); 659 buf[3 * x + y * row_stride + 1] = texture_color.g();
677 buf[3 * x + y * row_stride + 2] = texture_color.b(); 660 buf[3 * x + y * row_stride + 2] = texture_color.b();
678 } 661 }
679 } 662 }
680 663
681 // Write image data 664 // Write image data
682 for (unsigned y = 0; y < texture_config.height; ++y) 665 for (unsigned y = 0; y < texture_config.height; ++y) {
683 {
684 u8* row_ptr = (u8*)buf + y * row_stride; 666 u8* row_ptr = (u8*)buf + y * row_stride;
685 png_write_row(png_ptr, row_ptr); 667 png_write_row(png_ptr, row_ptr);
686 } 668 }
@@ -691,12 +673,15 @@ void DumpTexture(const Pica::Regs::TextureConfig& texture_config, u8* data) {
691 png_write_end(png_ptr, nullptr); 673 png_write_end(png_ptr, nullptr);
692 674
693finalise: 675finalise:
694 if (info_ptr != nullptr) png_free_data(png_ptr, info_ptr, PNG_FREE_ALL, -1); 676 if (info_ptr != nullptr)
695 if (png_ptr != nullptr) png_destroy_write_struct(&png_ptr, (png_infopp)nullptr); 677 png_free_data(png_ptr, info_ptr, PNG_FREE_ALL, -1);
678 if (png_ptr != nullptr)
679 png_destroy_write_struct(&png_ptr, (png_infopp) nullptr);
696#endif 680#endif
697} 681}
698 682
699static std::string ReplacePattern(const std::string& input, const std::string& pattern, const std::string& replacement) { 683static std::string ReplacePattern(const std::string& input, const std::string& pattern,
684 const std::string& replacement) {
700 size_t start = input.find(pattern); 685 size_t start = input.find(pattern);
701 if (start == std::string::npos) 686 if (start == std::string::npos)
702 return input; 687 return input;
@@ -709,16 +694,16 @@ static std::string ReplacePattern(const std::string& input, const std::string& p
709static std::string GetTevStageConfigSourceString(const Pica::Regs::TevStageConfig::Source& source) { 694static std::string GetTevStageConfigSourceString(const Pica::Regs::TevStageConfig::Source& source) {
710 using Source = Pica::Regs::TevStageConfig::Source; 695 using Source = Pica::Regs::TevStageConfig::Source;
711 static const std::map<Source, std::string> source_map = { 696 static const std::map<Source, std::string> source_map = {
712 { Source::PrimaryColor, "PrimaryColor" }, 697 {Source::PrimaryColor, "PrimaryColor"},
713 { Source::PrimaryFragmentColor, "PrimaryFragmentColor" }, 698 {Source::PrimaryFragmentColor, "PrimaryFragmentColor"},
714 { Source::SecondaryFragmentColor, "SecondaryFragmentColor" }, 699 {Source::SecondaryFragmentColor, "SecondaryFragmentColor"},
715 { Source::Texture0, "Texture0" }, 700 {Source::Texture0, "Texture0"},
716 { Source::Texture1, "Texture1" }, 701 {Source::Texture1, "Texture1"},
717 { Source::Texture2, "Texture2" }, 702 {Source::Texture2, "Texture2"},
718 { Source::Texture3, "Texture3" }, 703 {Source::Texture3, "Texture3"},
719 { Source::PreviousBuffer, "PreviousBuffer" }, 704 {Source::PreviousBuffer, "PreviousBuffer"},
720 { Source::Constant, "Constant" }, 705 {Source::Constant, "Constant"},
721 { Source::Previous, "Previous" }, 706 {Source::Previous, "Previous"},
722 }; 707 };
723 708
724 const auto src_it = source_map.find(source); 709 const auto src_it = source_map.find(source);
@@ -728,19 +713,21 @@ static std::string GetTevStageConfigSourceString(const Pica::Regs::TevStageConfi
728 return src_it->second; 713 return src_it->second;
729} 714}
730 715
731static std::string GetTevStageConfigColorSourceString(const Pica::Regs::TevStageConfig::Source& source, const Pica::Regs::TevStageConfig::ColorModifier modifier) { 716static std::string
717GetTevStageConfigColorSourceString(const Pica::Regs::TevStageConfig::Source& source,
718 const Pica::Regs::TevStageConfig::ColorModifier modifier) {
732 using ColorModifier = Pica::Regs::TevStageConfig::ColorModifier; 719 using ColorModifier = Pica::Regs::TevStageConfig::ColorModifier;
733 static const std::map<ColorModifier, std::string> color_modifier_map = { 720 static const std::map<ColorModifier, std::string> color_modifier_map = {
734 { ColorModifier::SourceColor, "%source.rgb" }, 721 {ColorModifier::SourceColor, "%source.rgb"},
735 { ColorModifier::OneMinusSourceColor, "(1.0 - %source.rgb)" }, 722 {ColorModifier::OneMinusSourceColor, "(1.0 - %source.rgb)"},
736 { ColorModifier::SourceAlpha, "%source.aaa" }, 723 {ColorModifier::SourceAlpha, "%source.aaa"},
737 { ColorModifier::OneMinusSourceAlpha, "(1.0 - %source.aaa)" }, 724 {ColorModifier::OneMinusSourceAlpha, "(1.0 - %source.aaa)"},
738 { ColorModifier::SourceRed, "%source.rrr" }, 725 {ColorModifier::SourceRed, "%source.rrr"},
739 { ColorModifier::OneMinusSourceRed, "(1.0 - %source.rrr)" }, 726 {ColorModifier::OneMinusSourceRed, "(1.0 - %source.rrr)"},
740 { ColorModifier::SourceGreen, "%source.ggg" }, 727 {ColorModifier::SourceGreen, "%source.ggg"},
741 { ColorModifier::OneMinusSourceGreen, "(1.0 - %source.ggg)" }, 728 {ColorModifier::OneMinusSourceGreen, "(1.0 - %source.ggg)"},
742 { ColorModifier::SourceBlue, "%source.bbb" }, 729 {ColorModifier::SourceBlue, "%source.bbb"},
743 { ColorModifier::OneMinusSourceBlue, "(1.0 - %source.bbb)" }, 730 {ColorModifier::OneMinusSourceBlue, "(1.0 - %source.bbb)"},
744 }; 731 };
745 732
746 auto src_str = GetTevStageConfigSourceString(source); 733 auto src_str = GetTevStageConfigSourceString(source);
@@ -752,17 +739,19 @@ static std::string GetTevStageConfigColorSourceString(const Pica::Regs::TevStage
752 return ReplacePattern(modifier_str, "%source", src_str); 739 return ReplacePattern(modifier_str, "%source", src_str);
753} 740}
754 741
755static std::string GetTevStageConfigAlphaSourceString(const Pica::Regs::TevStageConfig::Source& source, const Pica::Regs::TevStageConfig::AlphaModifier modifier) { 742static std::string
743GetTevStageConfigAlphaSourceString(const Pica::Regs::TevStageConfig::Source& source,
744 const Pica::Regs::TevStageConfig::AlphaModifier modifier) {
756 using AlphaModifier = Pica::Regs::TevStageConfig::AlphaModifier; 745 using AlphaModifier = Pica::Regs::TevStageConfig::AlphaModifier;
757 static const std::map<AlphaModifier, std::string> alpha_modifier_map = { 746 static const std::map<AlphaModifier, std::string> alpha_modifier_map = {
758 { AlphaModifier::SourceAlpha, "%source.a" }, 747 {AlphaModifier::SourceAlpha, "%source.a"},
759 { AlphaModifier::OneMinusSourceAlpha, "(1.0 - %source.a)" }, 748 {AlphaModifier::OneMinusSourceAlpha, "(1.0 - %source.a)"},
760 { AlphaModifier::SourceRed, "%source.r" }, 749 {AlphaModifier::SourceRed, "%source.r"},
761 { AlphaModifier::OneMinusSourceRed, "(1.0 - %source.r)" }, 750 {AlphaModifier::OneMinusSourceRed, "(1.0 - %source.r)"},
762 { AlphaModifier::SourceGreen, "%source.g" }, 751 {AlphaModifier::SourceGreen, "%source.g"},
763 { AlphaModifier::OneMinusSourceGreen, "(1.0 - %source.g)" }, 752 {AlphaModifier::OneMinusSourceGreen, "(1.0 - %source.g)"},
764 { AlphaModifier::SourceBlue, "%source.b" }, 753 {AlphaModifier::SourceBlue, "%source.b"},
765 { AlphaModifier::OneMinusSourceBlue, "(1.0 - %source.b)" }, 754 {AlphaModifier::OneMinusSourceBlue, "(1.0 - %source.b)"},
766 }; 755 };
767 756
768 auto src_str = GetTevStageConfigSourceString(source); 757 auto src_str = GetTevStageConfigSourceString(source);
@@ -774,18 +763,19 @@ static std::string GetTevStageConfigAlphaSourceString(const Pica::Regs::TevStage
774 return ReplacePattern(modifier_str, "%source", src_str); 763 return ReplacePattern(modifier_str, "%source", src_str);
775} 764}
776 765
777static std::string GetTevStageConfigOperationString(const Pica::Regs::TevStageConfig::Operation& operation) { 766static std::string
767GetTevStageConfigOperationString(const Pica::Regs::TevStageConfig::Operation& operation) {
778 using Operation = Pica::Regs::TevStageConfig::Operation; 768 using Operation = Pica::Regs::TevStageConfig::Operation;
779 static const std::map<Operation, std::string> combiner_map = { 769 static const std::map<Operation, std::string> combiner_map = {
780 { Operation::Replace, "%source1" }, 770 {Operation::Replace, "%source1"},
781 { Operation::Modulate, "(%source1 * %source2)" }, 771 {Operation::Modulate, "(%source1 * %source2)"},
782 { Operation::Add, "(%source1 + %source2)" }, 772 {Operation::Add, "(%source1 + %source2)"},
783 { Operation::AddSigned, "(%source1 + %source2) - 0.5" }, 773 {Operation::AddSigned, "(%source1 + %source2) - 0.5"},
784 { Operation::Lerp, "lerp(%source1, %source2, %source3)" }, 774 {Operation::Lerp, "lerp(%source1, %source2, %source3)"},
785 { Operation::Subtract, "(%source1 - %source2)" }, 775 {Operation::Subtract, "(%source1 - %source2)"},
786 { Operation::Dot3_RGB, "dot(%source1, %source2)" }, 776 {Operation::Dot3_RGB, "dot(%source1, %source2)"},
787 { Operation::MultiplyThenAdd, "((%source1 * %source2) + %source3)" }, 777 {Operation::MultiplyThenAdd, "((%source1 * %source2) + %source3)"},
788 { Operation::AddThenMultiply, "((%source1 + %source2) * %source3)" }, 778 {Operation::AddThenMultiply, "((%source1 + %source2) * %source3)"},
789 }; 779 };
790 780
791 const auto op_it = combiner_map.find(operation); 781 const auto op_it = combiner_map.find(operation);
@@ -797,23 +787,37 @@ static std::string GetTevStageConfigOperationString(const Pica::Regs::TevStageCo
797 787
798std::string GetTevStageConfigColorCombinerString(const Pica::Regs::TevStageConfig& tev_stage) { 788std::string GetTevStageConfigColorCombinerString(const Pica::Regs::TevStageConfig& tev_stage) {
799 auto op_str = GetTevStageConfigOperationString(tev_stage.color_op); 789 auto op_str = GetTevStageConfigOperationString(tev_stage.color_op);
800 op_str = ReplacePattern(op_str, "%source1", GetTevStageConfigColorSourceString(tev_stage.color_source1, tev_stage.color_modifier1)); 790 op_str = ReplacePattern(
801 op_str = ReplacePattern(op_str, "%source2", GetTevStageConfigColorSourceString(tev_stage.color_source2, tev_stage.color_modifier2)); 791 op_str, "%source1",
802 return ReplacePattern(op_str, "%source3", GetTevStageConfigColorSourceString(tev_stage.color_source3, tev_stage.color_modifier3)); 792 GetTevStageConfigColorSourceString(tev_stage.color_source1, tev_stage.color_modifier1));
793 op_str = ReplacePattern(
794 op_str, "%source2",
795 GetTevStageConfigColorSourceString(tev_stage.color_source2, tev_stage.color_modifier2));
796 return ReplacePattern(
797 op_str, "%source3",
798 GetTevStageConfigColorSourceString(tev_stage.color_source3, tev_stage.color_modifier3));
803} 799}
804 800
805std::string GetTevStageConfigAlphaCombinerString(const Pica::Regs::TevStageConfig& tev_stage) { 801std::string GetTevStageConfigAlphaCombinerString(const Pica::Regs::TevStageConfig& tev_stage) {
806 auto op_str = GetTevStageConfigOperationString(tev_stage.alpha_op); 802 auto op_str = GetTevStageConfigOperationString(tev_stage.alpha_op);
807 op_str = ReplacePattern(op_str, "%source1", GetTevStageConfigAlphaSourceString(tev_stage.alpha_source1, tev_stage.alpha_modifier1)); 803 op_str = ReplacePattern(
808 op_str = ReplacePattern(op_str, "%source2", GetTevStageConfigAlphaSourceString(tev_stage.alpha_source2, tev_stage.alpha_modifier2)); 804 op_str, "%source1",
809 return ReplacePattern(op_str, "%source3", GetTevStageConfigAlphaSourceString(tev_stage.alpha_source3, tev_stage.alpha_modifier3)); 805 GetTevStageConfigAlphaSourceString(tev_stage.alpha_source1, tev_stage.alpha_modifier1));
806 op_str = ReplacePattern(
807 op_str, "%source2",
808 GetTevStageConfigAlphaSourceString(tev_stage.alpha_source2, tev_stage.alpha_modifier2));
809 return ReplacePattern(
810 op_str, "%source3",
811 GetTevStageConfigAlphaSourceString(tev_stage.alpha_source3, tev_stage.alpha_modifier3));
810} 812}
811 813
812void DumpTevStageConfig(const std::array<Pica::Regs::TevStageConfig, 6>& stages) { 814void DumpTevStageConfig(const std::array<Pica::Regs::TevStageConfig, 6>& stages) {
813 std::string stage_info = "Tev setup:\n"; 815 std::string stage_info = "Tev setup:\n";
814 for (size_t index = 0; index < stages.size(); ++index) { 816 for (size_t index = 0; index < stages.size(); ++index) {
815 const auto& tev_stage = stages[index]; 817 const auto& tev_stage = stages[index];
816 stage_info += "Stage " + std::to_string(index) + ": " + GetTevStageConfigColorCombinerString(tev_stage) + " " + GetTevStageConfigAlphaCombinerString(tev_stage) + "\n"; 818 stage_info += "Stage " + std::to_string(index) + ": " +
819 GetTevStageConfigColorCombinerString(tev_stage) + " " +
820 GetTevStageConfigAlphaCombinerString(tev_stage) + "\n";
817 } 821 }
818 LOG_TRACE(HW_GPU, "%s", stage_info.c_str()); 822 LOG_TRACE(HW_GPU, "%s", stage_info.c_str());
819} 823}
diff --git a/src/video_core/debug_utils/debug_utils.h b/src/video_core/debug_utils/debug_utils.h
index 92e9734ae..1a58f40ff 100644
--- a/src/video_core/debug_utils/debug_utils.h
+++ b/src/video_core/debug_utils/debug_utils.h
@@ -53,13 +53,16 @@ public:
53 * Most importantly this is used for our debugger GUI. 53 * Most importantly this is used for our debugger GUI.
54 * 54 *
55 * To implement event handling, override the OnPicaBreakPointHit and OnPicaResume methods. 55 * To implement event handling, override the OnPicaBreakPointHit and OnPicaResume methods.
56 * @warning All BreakPointObservers need to be on the same thread to guarantee thread-safe state access 56 * @warning All BreakPointObservers need to be on the same thread to guarantee thread-safe state
57 * @todo Evaluate an alternative interface, in which there is only one managing observer and multiple child observers running (by design) on the same thread. 57 * access
58 * @todo Evaluate an alternative interface, in which there is only one managing observer and
59 * multiple child observers running (by design) on the same thread.
58 */ 60 */
59 class BreakPointObserver { 61 class BreakPointObserver {
60 public: 62 public:
61 /// Constructs the object such that it observes events of the given DebugContext. 63 /// Constructs the object such that it observes events of the given DebugContext.
62 BreakPointObserver(std::shared_ptr<DebugContext> debug_context) : context_weak(debug_context) { 64 BreakPointObserver(std::shared_ptr<DebugContext> debug_context)
65 : context_weak(debug_context) {
63 std::unique_lock<std::mutex> lock(debug_context->breakpoint_mutex); 66 std::unique_lock<std::mutex> lock(debug_context->breakpoint_mutex);
64 debug_context->breakpoint_observers.push_back(this); 67 debug_context->breakpoint_observers.push_back(this);
65 } 68 }
@@ -122,7 +125,8 @@ public:
122 * The current thread then is halted until Resume() is called from another thread (or until 125 * The current thread then is halted until Resume() is called from another thread (or until
123 * emulation is stopped). 126 * emulation is stopped).
124 * @param event Event which has happened 127 * @param event Event which has happened
125 * @param data Optional data pointer (pass nullptr if unused). Needs to remain valid until Resume() is called. 128 * @param data Optional data pointer (pass nullptr if unused). Needs to remain valid until
129 * Resume() is called.
126 */ 130 */
127 void OnEvent(Event event, void* data) { 131 void OnEvent(Event event, void* data) {
128 // This check is left in the header to allow the compiler to inline it. 132 // This check is left in the header to allow the compiler to inline it.
@@ -132,11 +136,12 @@ public:
132 DoOnEvent(event, data); 136 DoOnEvent(event, data);
133 } 137 }
134 138
135 void DoOnEvent(Event event, void *data); 139 void DoOnEvent(Event event, void* data);
136 140
137 /** 141 /**
138 * Resume from the current breakpoint. 142 * Resume from the current breakpoint.
139 * @warning Calling this from the same thread that OnEvent was called in will cause a deadlock. Calling from any other thread is safe. 143 * @warning Calling this from the same thread that OnEvent was called in will cause a deadlock.
144 * Calling from any other thread is safe.
140 */ 145 */
141 void Resume(); 146 void Resume();
142 147
@@ -144,7 +149,7 @@ public:
144 * Delete all set breakpoints and resume emulation. 149 * Delete all set breakpoints and resume emulation.
145 */ 150 */
146 void ClearBreakpoints() { 151 void ClearBreakpoints() {
147 for (auto &bp : breakpoints) { 152 for (auto& bp : breakpoints) {
148 bp.enabled = false; 153 bp.enabled = false;
149 } 154 }
150 Resume(); 155 Resume();
@@ -182,8 +187,8 @@ namespace DebugUtils {
182#define PICA_LOG_TEV 0 187#define PICA_LOG_TEV 0
183 188
184void DumpShader(const std::string& filename, const Regs::ShaderConfig& config, 189void DumpShader(const std::string& filename, const Regs::ShaderConfig& config,
185 const Shader::ShaderSetup& setup, const Regs::VSOutputAttributes* output_attributes); 190 const Shader::ShaderSetup& setup,
186 191 const Regs::VSOutputAttributes* output_attributes);
187 192
188// Utility class to log Pica commands. 193// Utility class to log Pica commands.
189struct PicaTrace { 194struct PicaTrace {
@@ -216,7 +221,10 @@ struct TextureInfo {
216 * @param source Source pointer to read data from 221 * @param source Source pointer to read data from
217 * @param s,t Texture coordinates to read from 222 * @param s,t Texture coordinates to read from
218 * @param info TextureInfo object describing the texture setup 223 * @param info TextureInfo object describing the texture setup
219 * @param disable_alpha This is used for debug widgets which use this method to display textures without providing a good way to visualize alpha by themselves. If true, this will return 255 for the alpha component, and either drop the information entirely or store it in an "unused" color channel. 224 * @param disable_alpha This is used for debug widgets which use this method to display textures
225 * without providing a good way to visualize alpha by themselves. If true, this will return 255 for
226 * the alpha component, and either drop the information entirely or store it in an "unused" color
227 * channel.
220 * @todo Eventually we should get rid of the disable_alpha parameter. 228 * @todo Eventually we should get rid of the disable_alpha parameter.
221 */ 229 */
222const Math::Vec4<u8> LookupTexture(const u8* source, int s, int t, const TextureInfo& info, 230const Math::Vec4<u8> LookupTexture(const u8* source, int s, int t, const TextureInfo& info,
@@ -237,7 +245,8 @@ class MemoryAccessTracker {
237 /// Combine overlapping and close ranges 245 /// Combine overlapping and close ranges
238 void SimplifyRanges() { 246 void SimplifyRanges() {
239 for (auto it = ranges.begin(); it != ranges.end(); ++it) { 247 for (auto it = ranges.begin(); it != ranges.end(); ++it) {
240 // NOTE: We add 32 to the range end address to make sure "close" ranges are combined, too 248 // NOTE: We add 32 to the range end address to make sure "close" ranges are combined,
249 // too
241 auto it2 = std::next(it); 250 auto it2 = std::next(it);
242 while (it2 != ranges.end() && it->first + it->second + 32 >= it2->first) { 251 while (it2 != ranges.end() && it->first + it->second + 32 >= it2->first) {
243 it->second = std::max(it->second, it2->first + it2->second - it->first); 252 it->second = std::max(it->second, it2->first + it2->second - it->first);
diff --git a/src/video_core/gpu_debugger.h b/src/video_core/gpu_debugger.h
index a3aab216c..e3ba80d8f 100644
--- a/src/video_core/gpu_debugger.h
+++ b/src/video_core/gpu_debugger.h
@@ -10,17 +10,15 @@
10 10
11#include "core/hle/service/gsp_gpu.h" 11#include "core/hle/service/gsp_gpu.h"
12 12
13class GraphicsDebugger 13class GraphicsDebugger {
14{
15public: 14public:
16 // Base class for all objects which need to be notified about GPU events 15 // Base class for all objects which need to be notified about GPU events
17 class DebuggerObserver 16 class DebuggerObserver {
18 {
19 public: 17 public:
20 DebuggerObserver() : observed(nullptr) { } 18 DebuggerObserver() : observed(nullptr) {
19 }
21 20
22 virtual ~DebuggerObserver() 21 virtual ~DebuggerObserver() {
23 {
24 if (observed) 22 if (observed)
25 observed->UnregisterObserver(this); 23 observed->UnregisterObserver(this);
26 } 24 }
@@ -31,15 +29,13 @@ public:
31 * @param total_command_count Total number of commands in the GX history 29 * @param total_command_count Total number of commands in the GX history
32 * @note All methods in this class are called from the GSP thread 30 * @note All methods in this class are called from the GSP thread
33 */ 31 */
34 virtual void GXCommandProcessed(int total_command_count) 32 virtual void GXCommandProcessed(int total_command_count) {
35 { 33 const GSP_GPU::Command& cmd = observed->ReadGXCommandHistory(total_command_count - 1);
36 const GSP_GPU::Command& cmd = observed->ReadGXCommandHistory(total_command_count-1);
37 LOG_TRACE(Debug_GPU, "Received command: id=%x", (int)cmd.id.Value()); 34 LOG_TRACE(Debug_GPU, "Received command: id=%x", (int)cmd.id.Value());
38 } 35 }
39 36
40 protected: 37 protected:
41 const GraphicsDebugger* GetDebugger() const 38 const GraphicsDebugger* GetDebugger() const {
42 {
43 return observed; 39 return observed;
44 } 40 }
45 41
@@ -49,8 +45,7 @@ public:
49 friend class GraphicsDebugger; 45 friend class GraphicsDebugger;
50 }; 46 };
51 47
52 void GXCommandProcessed(u8* command_data) 48 void GXCommandProcessed(u8* command_data) {
53 {
54 if (observers.empty()) 49 if (observers.empty())
55 return; 50 return;
56 51
@@ -60,33 +55,29 @@ public:
60 memcpy(&cmd, command_data, sizeof(GSP_GPU::Command)); 55 memcpy(&cmd, command_data, sizeof(GSP_GPU::Command));
61 56
62 ForEachObserver([this](DebuggerObserver* observer) { 57 ForEachObserver([this](DebuggerObserver* observer) {
63 observer->GXCommandProcessed(static_cast<int>(this->gx_command_history.size())); 58 observer->GXCommandProcessed(static_cast<int>(this->gx_command_history.size()));
64 } ); 59 });
65 } 60 }
66 61
67 const GSP_GPU::Command& ReadGXCommandHistory(int index) const 62 const GSP_GPU::Command& ReadGXCommandHistory(int index) const {
68 {
69 // TODO: Is this thread-safe? 63 // TODO: Is this thread-safe?
70 return gx_command_history[index]; 64 return gx_command_history[index];
71 } 65 }
72 66
73 void RegisterObserver(DebuggerObserver* observer) 67 void RegisterObserver(DebuggerObserver* observer) {
74 {
75 // TODO: Check for duplicates 68 // TODO: Check for duplicates
76 observers.push_back(observer); 69 observers.push_back(observer);
77 observer->observed = this; 70 observer->observed = this;
78 } 71 }
79 72
80 void UnregisterObserver(DebuggerObserver* observer) 73 void UnregisterObserver(DebuggerObserver* observer) {
81 {
82 observers.erase(std::remove(observers.begin(), observers.end(), observer), observers.end()); 74 observers.erase(std::remove(observers.begin(), observers.end(), observer), observers.end());
83 observer->observed = nullptr; 75 observer->observed = nullptr;
84 } 76 }
85 77
86private: 78private:
87 void ForEachObserver(std::function<void (DebuggerObserver*)> func) 79 void ForEachObserver(std::function<void(DebuggerObserver*)> func) {
88 { 80 std::for_each(observers.begin(), observers.end(), func);
89 std::for_each(observers.begin(),observers.end(), func);
90 } 81 }
91 82
92 std::vector<DebuggerObserver*> observers; 83 std::vector<DebuggerObserver*> observers;
diff --git a/src/video_core/pica.cpp b/src/video_core/pica.cpp
index ec78f9593..ffd13e717 100644
--- a/src/video_core/pica.cpp
+++ b/src/video_core/pica.cpp
@@ -17,466 +17,466 @@ namespace Pica {
17State g_state; 17State g_state;
18 18
19static const std::pair<u16, const char*> register_names[] = { 19static const std::pair<u16, const char*> register_names[] = {
20 { 0x010, "GPUREG_FINALIZE" }, 20 {0x010, "GPUREG_FINALIZE"},
21 21
22 { 0x040, "GPUREG_FACECULLING_CONFIG" }, 22 {0x040, "GPUREG_FACECULLING_CONFIG"},
23 { 0x041, "GPUREG_VIEWPORT_WIDTH" }, 23 {0x041, "GPUREG_VIEWPORT_WIDTH"},
24 { 0x042, "GPUREG_VIEWPORT_INVW" }, 24 {0x042, "GPUREG_VIEWPORT_INVW"},
25 { 0x043, "GPUREG_VIEWPORT_HEIGHT" }, 25 {0x043, "GPUREG_VIEWPORT_HEIGHT"},
26 { 0x044, "GPUREG_VIEWPORT_INVH" }, 26 {0x044, "GPUREG_VIEWPORT_INVH"},
27 27
28 { 0x047, "GPUREG_FRAGOP_CLIP" }, 28 {0x047, "GPUREG_FRAGOP_CLIP"},
29 { 0x048, "GPUREG_FRAGOP_CLIP_DATA0" }, 29 {0x048, "GPUREG_FRAGOP_CLIP_DATA0"},
30 { 0x049, "GPUREG_FRAGOP_CLIP_DATA1" }, 30 {0x049, "GPUREG_FRAGOP_CLIP_DATA1"},
31 { 0x04A, "GPUREG_FRAGOP_CLIP_DATA2" }, 31 {0x04A, "GPUREG_FRAGOP_CLIP_DATA2"},
32 { 0x04B, "GPUREG_FRAGOP_CLIP_DATA3" }, 32 {0x04B, "GPUREG_FRAGOP_CLIP_DATA3"},
33 33
34 { 0x04D, "GPUREG_DEPTHMAP_SCALE" }, 34 {0x04D, "GPUREG_DEPTHMAP_SCALE"},
35 { 0x04E, "GPUREG_DEPTHMAP_OFFSET" }, 35 {0x04E, "GPUREG_DEPTHMAP_OFFSET"},
36 { 0x04F, "GPUREG_SH_OUTMAP_TOTAL" }, 36 {0x04F, "GPUREG_SH_OUTMAP_TOTAL"},
37 { 0x050, "GPUREG_SH_OUTMAP_O0" }, 37 {0x050, "GPUREG_SH_OUTMAP_O0"},
38 { 0x051, "GPUREG_SH_OUTMAP_O1" }, 38 {0x051, "GPUREG_SH_OUTMAP_O1"},
39 { 0x052, "GPUREG_SH_OUTMAP_O2" }, 39 {0x052, "GPUREG_SH_OUTMAP_O2"},
40 { 0x053, "GPUREG_SH_OUTMAP_O3" }, 40 {0x053, "GPUREG_SH_OUTMAP_O3"},
41 { 0x054, "GPUREG_SH_OUTMAP_O4" }, 41 {0x054, "GPUREG_SH_OUTMAP_O4"},
42 { 0x055, "GPUREG_SH_OUTMAP_O5" }, 42 {0x055, "GPUREG_SH_OUTMAP_O5"},
43 { 0x056, "GPUREG_SH_OUTMAP_O6" }, 43 {0x056, "GPUREG_SH_OUTMAP_O6"},
44 44
45 { 0x061, "GPUREG_EARLYDEPTH_FUNC" }, 45 {0x061, "GPUREG_EARLYDEPTH_FUNC"},
46 { 0x062, "GPUREG_EARLYDEPTH_TEST1" }, 46 {0x062, "GPUREG_EARLYDEPTH_TEST1"},
47 { 0x063, "GPUREG_EARLYDEPTH_CLEAR" }, 47 {0x063, "GPUREG_EARLYDEPTH_CLEAR"},
48 { 0x064, "GPUREG_SH_OUTATTR_MODE" }, 48 {0x064, "GPUREG_SH_OUTATTR_MODE"},
49 { 0x065, "GPUREG_SCISSORTEST_MODE" }, 49 {0x065, "GPUREG_SCISSORTEST_MODE"},
50 { 0x066, "GPUREG_SCISSORTEST_POS" }, 50 {0x066, "GPUREG_SCISSORTEST_POS"},
51 { 0x067, "GPUREG_SCISSORTEST_DIM" }, 51 {0x067, "GPUREG_SCISSORTEST_DIM"},
52 { 0x068, "GPUREG_VIEWPORT_XY" }, 52 {0x068, "GPUREG_VIEWPORT_XY"},
53 53
54 { 0x06A, "GPUREG_EARLYDEPTH_DATA" }, 54 {0x06A, "GPUREG_EARLYDEPTH_DATA"},
55 55
56 { 0x06D, "GPUREG_DEPTHMAP_ENABLE" }, 56 {0x06D, "GPUREG_DEPTHMAP_ENABLE"},
57 { 0x06E, "GPUREG_RENDERBUF_DIM" }, 57 {0x06E, "GPUREG_RENDERBUF_DIM"},
58 { 0x06F, "GPUREG_SH_OUTATTR_CLOCK" }, 58 {0x06F, "GPUREG_SH_OUTATTR_CLOCK"},
59 59
60 { 0x080, "GPUREG_TEXUNIT_CONFIG" }, 60 {0x080, "GPUREG_TEXUNIT_CONFIG"},
61 { 0x081, "GPUREG_TEXUNIT0_BORDER_COLOR" }, 61 {0x081, "GPUREG_TEXUNIT0_BORDER_COLOR"},
62 { 0x082, "GPUREG_TEXUNIT0_DIM" }, 62 {0x082, "GPUREG_TEXUNIT0_DIM"},
63 { 0x083, "GPUREG_TEXUNIT0_PARAM" }, 63 {0x083, "GPUREG_TEXUNIT0_PARAM"},
64 { 0x084, "GPUREG_TEXUNIT0_LOD" }, 64 {0x084, "GPUREG_TEXUNIT0_LOD"},
65 { 0x085, "GPUREG_TEXUNIT0_ADDR1" }, 65 {0x085, "GPUREG_TEXUNIT0_ADDR1"},
66 { 0x086, "GPUREG_TEXUNIT0_ADDR2" }, 66 {0x086, "GPUREG_TEXUNIT0_ADDR2"},
67 { 0x087, "GPUREG_TEXUNIT0_ADDR3" }, 67 {0x087, "GPUREG_TEXUNIT0_ADDR3"},
68 { 0x088, "GPUREG_TEXUNIT0_ADDR4" }, 68 {0x088, "GPUREG_TEXUNIT0_ADDR4"},
69 { 0x089, "GPUREG_TEXUNIT0_ADDR5" }, 69 {0x089, "GPUREG_TEXUNIT0_ADDR5"},
70 { 0x08A, "GPUREG_TEXUNIT0_ADDR6" }, 70 {0x08A, "GPUREG_TEXUNIT0_ADDR6"},
71 { 0x08B, "GPUREG_TEXUNIT0_SHADOW" }, 71 {0x08B, "GPUREG_TEXUNIT0_SHADOW"},
72 72
73 { 0x08E, "GPUREG_TEXUNIT0_TYPE" }, 73 {0x08E, "GPUREG_TEXUNIT0_TYPE"},
74 { 0x08F, "GPUREG_LIGHTING_ENABLE0" }, 74 {0x08F, "GPUREG_LIGHTING_ENABLE0"},
75 75
76 { 0x091, "GPUREG_TEXUNIT1_BORDER_COLOR" }, 76 {0x091, "GPUREG_TEXUNIT1_BORDER_COLOR"},
77 { 0x092, "GPUREG_TEXUNIT1_DIM" }, 77 {0x092, "GPUREG_TEXUNIT1_DIM"},
78 { 0x093, "GPUREG_TEXUNIT1_PARAM" }, 78 {0x093, "GPUREG_TEXUNIT1_PARAM"},
79 { 0x094, "GPUREG_TEXUNIT1_LOD" }, 79 {0x094, "GPUREG_TEXUNIT1_LOD"},
80 { 0x095, "GPUREG_TEXUNIT1_ADDR" }, 80 {0x095, "GPUREG_TEXUNIT1_ADDR"},
81 { 0x096, "GPUREG_TEXUNIT1_TYPE" }, 81 {0x096, "GPUREG_TEXUNIT1_TYPE"},
82 82
83 { 0x099, "GPUREG_TEXUNIT2_BORDER_COLOR" }, 83 {0x099, "GPUREG_TEXUNIT2_BORDER_COLOR"},
84 { 0x09A, "GPUREG_TEXUNIT2_DIM" }, 84 {0x09A, "GPUREG_TEXUNIT2_DIM"},
85 { 0x09B, "GPUREG_TEXUNIT2_PARAM" }, 85 {0x09B, "GPUREG_TEXUNIT2_PARAM"},
86 { 0x09C, "GPUREG_TEXUNIT2_LOD" }, 86 {0x09C, "GPUREG_TEXUNIT2_LOD"},
87 { 0x09D, "GPUREG_TEXUNIT2_ADDR" }, 87 {0x09D, "GPUREG_TEXUNIT2_ADDR"},
88 { 0x09E, "GPUREG_TEXUNIT2_TYPE" }, 88 {0x09E, "GPUREG_TEXUNIT2_TYPE"},
89 89
90 { 0x0A8, "GPUREG_TEXUNIT3_PROCTEX0" }, 90 {0x0A8, "GPUREG_TEXUNIT3_PROCTEX0"},
91 { 0x0A9, "GPUREG_TEXUNIT3_PROCTEX1" }, 91 {0x0A9, "GPUREG_TEXUNIT3_PROCTEX1"},
92 { 0x0AA, "GPUREG_TEXUNIT3_PROCTEX2" }, 92 {0x0AA, "GPUREG_TEXUNIT3_PROCTEX2"},
93 { 0x0AB, "GPUREG_TEXUNIT3_PROCTEX3" }, 93 {0x0AB, "GPUREG_TEXUNIT3_PROCTEX3"},
94 { 0x0AC, "GPUREG_TEXUNIT3_PROCTEX4" }, 94 {0x0AC, "GPUREG_TEXUNIT3_PROCTEX4"},
95 { 0x0AD, "GPUREG_TEXUNIT3_PROCTEX5" }, 95 {0x0AD, "GPUREG_TEXUNIT3_PROCTEX5"},
96 96
97 { 0x0AF, "GPUREG_PROCTEX_LUT" }, 97 {0x0AF, "GPUREG_PROCTEX_LUT"},
98 { 0x0B0, "GPUREG_PROCTEX_LUT_DATA0" }, 98 {0x0B0, "GPUREG_PROCTEX_LUT_DATA0"},
99 { 0x0B1, "GPUREG_PROCTEX_LUT_DATA1" }, 99 {0x0B1, "GPUREG_PROCTEX_LUT_DATA1"},
100 { 0x0B2, "GPUREG_PROCTEX_LUT_DATA2" }, 100 {0x0B2, "GPUREG_PROCTEX_LUT_DATA2"},
101 { 0x0B3, "GPUREG_PROCTEX_LUT_DATA3" }, 101 {0x0B3, "GPUREG_PROCTEX_LUT_DATA3"},
102 { 0x0B4, "GPUREG_PROCTEX_LUT_DATA4" }, 102 {0x0B4, "GPUREG_PROCTEX_LUT_DATA4"},
103 { 0x0B5, "GPUREG_PROCTEX_LUT_DATA5" }, 103 {0x0B5, "GPUREG_PROCTEX_LUT_DATA5"},
104 { 0x0B6, "GPUREG_PROCTEX_LUT_DATA6" }, 104 {0x0B6, "GPUREG_PROCTEX_LUT_DATA6"},
105 { 0x0B7, "GPUREG_PROCTEX_LUT_DATA7" }, 105 {0x0B7, "GPUREG_PROCTEX_LUT_DATA7"},
106 106
107 { 0x0C0, "GPUREG_TEXENV0_SOURCE" }, 107 {0x0C0, "GPUREG_TEXENV0_SOURCE"},
108 { 0x0C1, "GPUREG_TEXENV0_OPERAND" }, 108 {0x0C1, "GPUREG_TEXENV0_OPERAND"},
109 { 0x0C2, "GPUREG_TEXENV0_COMBINER" }, 109 {0x0C2, "GPUREG_TEXENV0_COMBINER"},
110 { 0x0C3, "GPUREG_TEXENV0_COLOR" }, 110 {0x0C3, "GPUREG_TEXENV0_COLOR"},
111 { 0x0C4, "GPUREG_TEXENV0_SCALE" }, 111 {0x0C4, "GPUREG_TEXENV0_SCALE"},
112 112
113 { 0x0C8, "GPUREG_TEXENV1_SOURCE" }, 113 {0x0C8, "GPUREG_TEXENV1_SOURCE"},
114 { 0x0C9, "GPUREG_TEXENV1_OPERAND" }, 114 {0x0C9, "GPUREG_TEXENV1_OPERAND"},
115 { 0x0CA, "GPUREG_TEXENV1_COMBINER" }, 115 {0x0CA, "GPUREG_TEXENV1_COMBINER"},
116 { 0x0CB, "GPUREG_TEXENV1_COLOR" }, 116 {0x0CB, "GPUREG_TEXENV1_COLOR"},
117 { 0x0CC, "GPUREG_TEXENV1_SCALE" }, 117 {0x0CC, "GPUREG_TEXENV1_SCALE"},
118 118
119 { 0x0D0, "GPUREG_TEXENV2_SOURCE" }, 119 {0x0D0, "GPUREG_TEXENV2_SOURCE"},
120 { 0x0D1, "GPUREG_TEXENV2_OPERAND" }, 120 {0x0D1, "GPUREG_TEXENV2_OPERAND"},
121 { 0x0D2, "GPUREG_TEXENV2_COMBINER" }, 121 {0x0D2, "GPUREG_TEXENV2_COMBINER"},
122 { 0x0D3, "GPUREG_TEXENV2_COLOR" }, 122 {0x0D3, "GPUREG_TEXENV2_COLOR"},
123 { 0x0D4, "GPUREG_TEXENV2_SCALE" }, 123 {0x0D4, "GPUREG_TEXENV2_SCALE"},
124 124
125 { 0x0D8, "GPUREG_TEXENV3_SOURCE" }, 125 {0x0D8, "GPUREG_TEXENV3_SOURCE"},
126 { 0x0D9, "GPUREG_TEXENV3_OPERAND" }, 126 {0x0D9, "GPUREG_TEXENV3_OPERAND"},
127 { 0x0DA, "GPUREG_TEXENV3_COMBINER" }, 127 {0x0DA, "GPUREG_TEXENV3_COMBINER"},
128 { 0x0DB, "GPUREG_TEXENV3_COLOR" }, 128 {0x0DB, "GPUREG_TEXENV3_COLOR"},
129 { 0x0DC, "GPUREG_TEXENV3_SCALE" }, 129 {0x0DC, "GPUREG_TEXENV3_SCALE"},
130 130
131 { 0x0E0, "GPUREG_TEXENV_UPDATE_BUFFER" }, 131 {0x0E0, "GPUREG_TEXENV_UPDATE_BUFFER"},
132 { 0x0E1, "GPUREG_FOG_COLOR" }, 132 {0x0E1, "GPUREG_FOG_COLOR"},
133 133
134 { 0x0E4, "GPUREG_GAS_ATTENUATION" }, 134 {0x0E4, "GPUREG_GAS_ATTENUATION"},
135 { 0x0E5, "GPUREG_GAS_ACCMAX" }, 135 {0x0E5, "GPUREG_GAS_ACCMAX"},
136 { 0x0E6, "GPUREG_FOG_LUT_INDEX" }, 136 {0x0E6, "GPUREG_FOG_LUT_INDEX"},
137 137
138 { 0x0E8, "GPUREG_FOG_LUT_DATA0" }, 138 {0x0E8, "GPUREG_FOG_LUT_DATA0"},
139 { 0x0E9, "GPUREG_FOG_LUT_DATA1" }, 139 {0x0E9, "GPUREG_FOG_LUT_DATA1"},
140 { 0x0EA, "GPUREG_FOG_LUT_DATA2" }, 140 {0x0EA, "GPUREG_FOG_LUT_DATA2"},
141 { 0x0EB, "GPUREG_FOG_LUT_DATA3" }, 141 {0x0EB, "GPUREG_FOG_LUT_DATA3"},
142 { 0x0EC, "GPUREG_FOG_LUT_DATA4" }, 142 {0x0EC, "GPUREG_FOG_LUT_DATA4"},
143 { 0x0ED, "GPUREG_FOG_LUT_DATA5" }, 143 {0x0ED, "GPUREG_FOG_LUT_DATA5"},
144 { 0x0EE, "GPUREG_FOG_LUT_DATA6" }, 144 {0x0EE, "GPUREG_FOG_LUT_DATA6"},
145 { 0x0EF, "GPUREG_FOG_LUT_DATA7" }, 145 {0x0EF, "GPUREG_FOG_LUT_DATA7"},
146 { 0x0F0, "GPUREG_TEXENV4_SOURCE" }, 146 {0x0F0, "GPUREG_TEXENV4_SOURCE"},
147 { 0x0F1, "GPUREG_TEXENV4_OPERAND" }, 147 {0x0F1, "GPUREG_TEXENV4_OPERAND"},
148 { 0x0F2, "GPUREG_TEXENV4_COMBINER" }, 148 {0x0F2, "GPUREG_TEXENV4_COMBINER"},
149 { 0x0F3, "GPUREG_TEXENV4_COLOR" }, 149 {0x0F3, "GPUREG_TEXENV4_COLOR"},
150 { 0x0F4, "GPUREG_TEXENV4_SCALE" }, 150 {0x0F4, "GPUREG_TEXENV4_SCALE"},
151 151
152 { 0x0F8, "GPUREG_TEXENV5_SOURCE" }, 152 {0x0F8, "GPUREG_TEXENV5_SOURCE"},
153 { 0x0F9, "GPUREG_TEXENV5_OPERAND" }, 153 {0x0F9, "GPUREG_TEXENV5_OPERAND"},
154 { 0x0FA, "GPUREG_TEXENV5_COMBINER" }, 154 {0x0FA, "GPUREG_TEXENV5_COMBINER"},
155 { 0x0FB, "GPUREG_TEXENV5_COLOR" }, 155 {0x0FB, "GPUREG_TEXENV5_COLOR"},
156 { 0x0FC, "GPUREG_TEXENV5_SCALE" }, 156 {0x0FC, "GPUREG_TEXENV5_SCALE"},
157 { 0x0FD, "GPUREG_TEXENV_BUFFER_COLOR" }, 157 {0x0FD, "GPUREG_TEXENV_BUFFER_COLOR"},
158 158
159 { 0x100, "GPUREG_COLOR_OPERATION" }, 159 {0x100, "GPUREG_COLOR_OPERATION"},
160 { 0x101, "GPUREG_BLEND_FUNC" }, 160 {0x101, "GPUREG_BLEND_FUNC"},
161 { 0x102, "GPUREG_LOGIC_OP" }, 161 {0x102, "GPUREG_LOGIC_OP"},
162 { 0x103, "GPUREG_BLEND_COLOR" }, 162 {0x103, "GPUREG_BLEND_COLOR"},
163 { 0x104, "GPUREG_FRAGOP_ALPHA_TEST" }, 163 {0x104, "GPUREG_FRAGOP_ALPHA_TEST"},
164 { 0x105, "GPUREG_STENCIL_TEST" }, 164 {0x105, "GPUREG_STENCIL_TEST"},
165 { 0x106, "GPUREG_STENCIL_OP" }, 165 {0x106, "GPUREG_STENCIL_OP"},
166 { 0x107, "GPUREG_DEPTH_COLOR_MASK" }, 166 {0x107, "GPUREG_DEPTH_COLOR_MASK"},
167 167
168 { 0x110, "GPUREG_FRAMEBUFFER_INVALIDATE" }, 168 {0x110, "GPUREG_FRAMEBUFFER_INVALIDATE"},
169 { 0x111, "GPUREG_FRAMEBUFFER_FLUSH" }, 169 {0x111, "GPUREG_FRAMEBUFFER_FLUSH"},
170 { 0x112, "GPUREG_COLORBUFFER_READ" }, 170 {0x112, "GPUREG_COLORBUFFER_READ"},
171 { 0x113, "GPUREG_COLORBUFFER_WRITE" }, 171 {0x113, "GPUREG_COLORBUFFER_WRITE"},
172 { 0x114, "GPUREG_DEPTHBUFFER_READ" }, 172 {0x114, "GPUREG_DEPTHBUFFER_READ"},
173 { 0x115, "GPUREG_DEPTHBUFFER_WRITE" }, 173 {0x115, "GPUREG_DEPTHBUFFER_WRITE"},
174 { 0x116, "GPUREG_DEPTHBUFFER_FORMAT" }, 174 {0x116, "GPUREG_DEPTHBUFFER_FORMAT"},
175 { 0x117, "GPUREG_COLORBUFFER_FORMAT" }, 175 {0x117, "GPUREG_COLORBUFFER_FORMAT"},
176 { 0x118, "GPUREG_EARLYDEPTH_TEST2" }, 176 {0x118, "GPUREG_EARLYDEPTH_TEST2"},
177 177
178 { 0x11B, "GPUREG_FRAMEBUFFER_BLOCK32" }, 178 {0x11B, "GPUREG_FRAMEBUFFER_BLOCK32"},
179 { 0x11C, "GPUREG_DEPTHBUFFER_LOC" }, 179 {0x11C, "GPUREG_DEPTHBUFFER_LOC"},
180 { 0x11D, "GPUREG_COLORBUFFER_LOC" }, 180 {0x11D, "GPUREG_COLORBUFFER_LOC"},
181 { 0x11E, "GPUREG_FRAMEBUFFER_DIM" }, 181 {0x11E, "GPUREG_FRAMEBUFFER_DIM"},
182 182
183 { 0x120, "GPUREG_GAS_LIGHT_XY" }, 183 {0x120, "GPUREG_GAS_LIGHT_XY"},
184 { 0x121, "GPUREG_GAS_LIGHT_Z" }, 184 {0x121, "GPUREG_GAS_LIGHT_Z"},
185 { 0x122, "GPUREG_GAS_LIGHT_Z_COLOR" }, 185 {0x122, "GPUREG_GAS_LIGHT_Z_COLOR"},
186 { 0x123, "GPUREG_GAS_LUT_INDEX" }, 186 {0x123, "GPUREG_GAS_LUT_INDEX"},
187 { 0x124, "GPUREG_GAS_LUT_DATA" }, 187 {0x124, "GPUREG_GAS_LUT_DATA"},
188 188
189 { 0x126, "GPUREG_GAS_DELTAZ_DEPTH" }, 189 {0x126, "GPUREG_GAS_DELTAZ_DEPTH"},
190 190
191 { 0x130, "GPUREG_FRAGOP_SHADOW" }, 191 {0x130, "GPUREG_FRAGOP_SHADOW"},
192 192
193 { 0x140, "GPUREG_LIGHT0_SPECULAR0" }, 193 {0x140, "GPUREG_LIGHT0_SPECULAR0"},
194 { 0x141, "GPUREG_LIGHT0_SPECULAR1" }, 194 {0x141, "GPUREG_LIGHT0_SPECULAR1"},
195 { 0x142, "GPUREG_LIGHT0_DIFFUSE" }, 195 {0x142, "GPUREG_LIGHT0_DIFFUSE"},
196 { 0x143, "GPUREG_LIGHT0_AMBIENT" }, 196 {0x143, "GPUREG_LIGHT0_AMBIENT"},
197 { 0x144, "GPUREG_LIGHT0_XY" }, 197 {0x144, "GPUREG_LIGHT0_XY"},
198 { 0x145, "GPUREG_LIGHT0_Z" }, 198 {0x145, "GPUREG_LIGHT0_Z"},
199 { 0x146, "GPUREG_LIGHT0_SPOTDIR_XY" }, 199 {0x146, "GPUREG_LIGHT0_SPOTDIR_XY"},
200 { 0x147, "GPUREG_LIGHT0_SPOTDIR_Z" }, 200 {0x147, "GPUREG_LIGHT0_SPOTDIR_Z"},
201 201
202 { 0x149, "GPUREG_LIGHT0_CONFIG" }, 202 {0x149, "GPUREG_LIGHT0_CONFIG"},
203 { 0x14A, "GPUREG_LIGHT0_ATTENUATION_BIAS" }, 203 {0x14A, "GPUREG_LIGHT0_ATTENUATION_BIAS"},
204 { 0x14B, "GPUREG_LIGHT0_ATTENUATION_SCALE" }, 204 {0x14B, "GPUREG_LIGHT0_ATTENUATION_SCALE"},
205 205
206 { 0x150, "GPUREG_LIGHT1_SPECULAR0" }, 206 {0x150, "GPUREG_LIGHT1_SPECULAR0"},
207 { 0x151, "GPUREG_LIGHT1_SPECULAR1" }, 207 {0x151, "GPUREG_LIGHT1_SPECULAR1"},
208 { 0x152, "GPUREG_LIGHT1_DIFFUSE" }, 208 {0x152, "GPUREG_LIGHT1_DIFFUSE"},
209 { 0x153, "GPUREG_LIGHT1_AMBIENT" }, 209 {0x153, "GPUREG_LIGHT1_AMBIENT"},
210 { 0x154, "GPUREG_LIGHT1_XY" }, 210 {0x154, "GPUREG_LIGHT1_XY"},
211 { 0x155, "GPUREG_LIGHT1_Z" }, 211 {0x155, "GPUREG_LIGHT1_Z"},
212 { 0x156, "GPUREG_LIGHT1_SPOTDIR_XY" }, 212 {0x156, "GPUREG_LIGHT1_SPOTDIR_XY"},
213 { 0x157, "GPUREG_LIGHT1_SPOTDIR_Z" }, 213 {0x157, "GPUREG_LIGHT1_SPOTDIR_Z"},
214 214
215 { 0x159, "GPUREG_LIGHT1_CONFIG" }, 215 {0x159, "GPUREG_LIGHT1_CONFIG"},
216 { 0x15A, "GPUREG_LIGHT1_ATTENUATION_BIAS" }, 216 {0x15A, "GPUREG_LIGHT1_ATTENUATION_BIAS"},
217 { 0x15B, "GPUREG_LIGHT1_ATTENUATION_SCALE" }, 217 {0x15B, "GPUREG_LIGHT1_ATTENUATION_SCALE"},
218 218
219 { 0x160, "GPUREG_LIGHT2_SPECULAR0" }, 219 {0x160, "GPUREG_LIGHT2_SPECULAR0"},
220 { 0x161, "GPUREG_LIGHT2_SPECULAR1" }, 220 {0x161, "GPUREG_LIGHT2_SPECULAR1"},
221 { 0x162, "GPUREG_LIGHT2_DIFFUSE" }, 221 {0x162, "GPUREG_LIGHT2_DIFFUSE"},
222 { 0x163, "GPUREG_LIGHT2_AMBIENT" }, 222 {0x163, "GPUREG_LIGHT2_AMBIENT"},
223 { 0x164, "GPUREG_LIGHT2_XY" }, 223 {0x164, "GPUREG_LIGHT2_XY"},
224 { 0x165, "GPUREG_LIGHT2_Z" }, 224 {0x165, "GPUREG_LIGHT2_Z"},
225 { 0x166, "GPUREG_LIGHT2_SPOTDIR_XY" }, 225 {0x166, "GPUREG_LIGHT2_SPOTDIR_XY"},
226 { 0x167, "GPUREG_LIGHT2_SPOTDIR_Z" }, 226 {0x167, "GPUREG_LIGHT2_SPOTDIR_Z"},
227 227
228 { 0x169, "GPUREG_LIGHT2_CONFIG" }, 228 {0x169, "GPUREG_LIGHT2_CONFIG"},
229 { 0x16A, "GPUREG_LIGHT2_ATTENUATION_BIAS" }, 229 {0x16A, "GPUREG_LIGHT2_ATTENUATION_BIAS"},
230 { 0x16B, "GPUREG_LIGHT2_ATTENUATION_SCALE" }, 230 {0x16B, "GPUREG_LIGHT2_ATTENUATION_SCALE"},
231 231
232 { 0x170, "GPUREG_LIGHT3_SPECULAR0" }, 232 {0x170, "GPUREG_LIGHT3_SPECULAR0"},
233 { 0x171, "GPUREG_LIGHT3_SPECULAR1" }, 233 {0x171, "GPUREG_LIGHT3_SPECULAR1"},
234 { 0x172, "GPUREG_LIGHT3_DIFFUSE" }, 234 {0x172, "GPUREG_LIGHT3_DIFFUSE"},
235 { 0x173, "GPUREG_LIGHT3_AMBIENT" }, 235 {0x173, "GPUREG_LIGHT3_AMBIENT"},
236 { 0x174, "GPUREG_LIGHT3_XY" }, 236 {0x174, "GPUREG_LIGHT3_XY"},
237 { 0x175, "GPUREG_LIGHT3_Z" }, 237 {0x175, "GPUREG_LIGHT3_Z"},
238 { 0x176, "GPUREG_LIGHT3_SPOTDIR_XY" }, 238 {0x176, "GPUREG_LIGHT3_SPOTDIR_XY"},
239 { 0x177, "GPUREG_LIGHT3_SPOTDIR_Z" }, 239 {0x177, "GPUREG_LIGHT3_SPOTDIR_Z"},
240 240
241 { 0x179, "GPUREG_LIGHT3_CONFIG" }, 241 {0x179, "GPUREG_LIGHT3_CONFIG"},
242 { 0x17A, "GPUREG_LIGHT3_ATTENUATION_BIAS" }, 242 {0x17A, "GPUREG_LIGHT3_ATTENUATION_BIAS"},
243 { 0x17B, "GPUREG_LIGHT3_ATTENUATION_SCALE" }, 243 {0x17B, "GPUREG_LIGHT3_ATTENUATION_SCALE"},
244 244
245 { 0x180, "GPUREG_LIGHT4_SPECULAR0" }, 245 {0x180, "GPUREG_LIGHT4_SPECULAR0"},
246 { 0x181, "GPUREG_LIGHT4_SPECULAR1" }, 246 {0x181, "GPUREG_LIGHT4_SPECULAR1"},
247 { 0x182, "GPUREG_LIGHT4_DIFFUSE" }, 247 {0x182, "GPUREG_LIGHT4_DIFFUSE"},
248 { 0x183, "GPUREG_LIGHT4_AMBIENT" }, 248 {0x183, "GPUREG_LIGHT4_AMBIENT"},
249 { 0x184, "GPUREG_LIGHT4_XY" }, 249 {0x184, "GPUREG_LIGHT4_XY"},
250 { 0x185, "GPUREG_LIGHT4_Z" }, 250 {0x185, "GPUREG_LIGHT4_Z"},
251 { 0x186, "GPUREG_LIGHT4_SPOTDIR_XY" }, 251 {0x186, "GPUREG_LIGHT4_SPOTDIR_XY"},
252 { 0x187, "GPUREG_LIGHT4_SPOTDIR_Z" }, 252 {0x187, "GPUREG_LIGHT4_SPOTDIR_Z"},
253 253
254 { 0x189, "GPUREG_LIGHT4_CONFIG" }, 254 {0x189, "GPUREG_LIGHT4_CONFIG"},
255 { 0x18A, "GPUREG_LIGHT4_ATTENUATION_BIAS" }, 255 {0x18A, "GPUREG_LIGHT4_ATTENUATION_BIAS"},
256 { 0x18B, "GPUREG_LIGHT4_ATTENUATION_SCALE" }, 256 {0x18B, "GPUREG_LIGHT4_ATTENUATION_SCALE"},
257 257
258 { 0x190, "GPUREG_LIGHT5_SPECULAR0" }, 258 {0x190, "GPUREG_LIGHT5_SPECULAR0"},
259 { 0x191, "GPUREG_LIGHT5_SPECULAR1" }, 259 {0x191, "GPUREG_LIGHT5_SPECULAR1"},
260 { 0x192, "GPUREG_LIGHT5_DIFFUSE" }, 260 {0x192, "GPUREG_LIGHT5_DIFFUSE"},
261 { 0x193, "GPUREG_LIGHT5_AMBIENT" }, 261 {0x193, "GPUREG_LIGHT5_AMBIENT"},
262 { 0x194, "GPUREG_LIGHT5_XY" }, 262 {0x194, "GPUREG_LIGHT5_XY"},
263 { 0x195, "GPUREG_LIGHT5_Z" }, 263 {0x195, "GPUREG_LIGHT5_Z"},
264 { 0x196, "GPUREG_LIGHT5_SPOTDIR_XY" }, 264 {0x196, "GPUREG_LIGHT5_SPOTDIR_XY"},
265 { 0x197, "GPUREG_LIGHT5_SPOTDIR_Z" }, 265 {0x197, "GPUREG_LIGHT5_SPOTDIR_Z"},
266 266
267 { 0x199, "GPUREG_LIGHT5_CONFIG" }, 267 {0x199, "GPUREG_LIGHT5_CONFIG"},
268 { 0x19A, "GPUREG_LIGHT5_ATTENUATION_BIAS" }, 268 {0x19A, "GPUREG_LIGHT5_ATTENUATION_BIAS"},
269 { 0x19B, "GPUREG_LIGHT5_ATTENUATION_SCALE" }, 269 {0x19B, "GPUREG_LIGHT5_ATTENUATION_SCALE"},
270 270
271 { 0x1A0, "GPUREG_LIGHT6_SPECULAR0" }, 271 {0x1A0, "GPUREG_LIGHT6_SPECULAR0"},
272 { 0x1A1, "GPUREG_LIGHT6_SPECULAR1" }, 272 {0x1A1, "GPUREG_LIGHT6_SPECULAR1"},
273 { 0x1A2, "GPUREG_LIGHT6_DIFFUSE" }, 273 {0x1A2, "GPUREG_LIGHT6_DIFFUSE"},
274 { 0x1A3, "GPUREG_LIGHT6_AMBIENT" }, 274 {0x1A3, "GPUREG_LIGHT6_AMBIENT"},
275 { 0x1A4, "GPUREG_LIGHT6_XY" }, 275 {0x1A4, "GPUREG_LIGHT6_XY"},
276 { 0x1A5, "GPUREG_LIGHT6_Z" }, 276 {0x1A5, "GPUREG_LIGHT6_Z"},
277 { 0x1A6, "GPUREG_LIGHT6_SPOTDIR_XY" }, 277 {0x1A6, "GPUREG_LIGHT6_SPOTDIR_XY"},
278 { 0x1A7, "GPUREG_LIGHT6_SPOTDIR_Z" }, 278 {0x1A7, "GPUREG_LIGHT6_SPOTDIR_Z"},
279 279
280 { 0x1A9, "GPUREG_LIGHT6_CONFIG" }, 280 {0x1A9, "GPUREG_LIGHT6_CONFIG"},
281 { 0x1AA, "GPUREG_LIGHT6_ATTENUATION_BIAS" }, 281 {0x1AA, "GPUREG_LIGHT6_ATTENUATION_BIAS"},
282 { 0x1AB, "GPUREG_LIGHT6_ATTENUATION_SCALE" }, 282 {0x1AB, "GPUREG_LIGHT6_ATTENUATION_SCALE"},
283 283
284 { 0x1B0, "GPUREG_LIGHT7_SPECULAR0" }, 284 {0x1B0, "GPUREG_LIGHT7_SPECULAR0"},
285 { 0x1B1, "GPUREG_LIGHT7_SPECULAR1" }, 285 {0x1B1, "GPUREG_LIGHT7_SPECULAR1"},
286 { 0x1B2, "GPUREG_LIGHT7_DIFFUSE" }, 286 {0x1B2, "GPUREG_LIGHT7_DIFFUSE"},
287 { 0x1B3, "GPUREG_LIGHT7_AMBIENT" }, 287 {0x1B3, "GPUREG_LIGHT7_AMBIENT"},
288 { 0x1B4, "GPUREG_LIGHT7_XY" }, 288 {0x1B4, "GPUREG_LIGHT7_XY"},
289 { 0x1B5, "GPUREG_LIGHT7_Z" }, 289 {0x1B5, "GPUREG_LIGHT7_Z"},
290 { 0x1B6, "GPUREG_LIGHT7_SPOTDIR_XY" }, 290 {0x1B6, "GPUREG_LIGHT7_SPOTDIR_XY"},
291 { 0x1B7, "GPUREG_LIGHT7_SPOTDIR_Z" }, 291 {0x1B7, "GPUREG_LIGHT7_SPOTDIR_Z"},
292 292
293 { 0x1B9, "GPUREG_LIGHT7_CONFIG" }, 293 {0x1B9, "GPUREG_LIGHT7_CONFIG"},
294 { 0x1BA, "GPUREG_LIGHT7_ATTENUATION_BIAS" }, 294 {0x1BA, "GPUREG_LIGHT7_ATTENUATION_BIAS"},
295 { 0x1BB, "GPUREG_LIGHT7_ATTENUATION_SCALE" }, 295 {0x1BB, "GPUREG_LIGHT7_ATTENUATION_SCALE"},
296 296
297 { 0x1C0, "GPUREG_LIGHTING_AMBIENT" }, 297 {0x1C0, "GPUREG_LIGHTING_AMBIENT"},
298 298
299 { 0x1C2, "GPUREG_LIGHTING_NUM_LIGHTS" }, 299 {0x1C2, "GPUREG_LIGHTING_NUM_LIGHTS"},
300 { 0x1C3, "GPUREG_LIGHTING_CONFIG0" }, 300 {0x1C3, "GPUREG_LIGHTING_CONFIG0"},
301 { 0x1C4, "GPUREG_LIGHTING_CONFIG1" }, 301 {0x1C4, "GPUREG_LIGHTING_CONFIG1"},
302 { 0x1C5, "GPUREG_LIGHTING_LUT_INDEX" }, 302 {0x1C5, "GPUREG_LIGHTING_LUT_INDEX"},
303 { 0x1C6, "GPUREG_LIGHTING_ENABLE1" }, 303 {0x1C6, "GPUREG_LIGHTING_ENABLE1"},
304 304
305 { 0x1C8, "GPUREG_LIGHTING_LUT_DATA0" }, 305 {0x1C8, "GPUREG_LIGHTING_LUT_DATA0"},
306 { 0x1C9, "GPUREG_LIGHTING_LUT_DATA1" }, 306 {0x1C9, "GPUREG_LIGHTING_LUT_DATA1"},
307 { 0x1CA, "GPUREG_LIGHTING_LUT_DATA2" }, 307 {0x1CA, "GPUREG_LIGHTING_LUT_DATA2"},
308 { 0x1CB, "GPUREG_LIGHTING_LUT_DATA3" }, 308 {0x1CB, "GPUREG_LIGHTING_LUT_DATA3"},
309 { 0x1CC, "GPUREG_LIGHTING_LUT_DATA4" }, 309 {0x1CC, "GPUREG_LIGHTING_LUT_DATA4"},
310 { 0x1CD, "GPUREG_LIGHTING_LUT_DATA5" }, 310 {0x1CD, "GPUREG_LIGHTING_LUT_DATA5"},
311 { 0x1CE, "GPUREG_LIGHTING_LUT_DATA6" }, 311 {0x1CE, "GPUREG_LIGHTING_LUT_DATA6"},
312 { 0x1CF, "GPUREG_LIGHTING_LUT_DATA7" }, 312 {0x1CF, "GPUREG_LIGHTING_LUT_DATA7"},
313 { 0x1D0, "GPUREG_LIGHTING_LUTINPUT_ABS" }, 313 {0x1D0, "GPUREG_LIGHTING_LUTINPUT_ABS"},
314 { 0x1D1, "GPUREG_LIGHTING_LUTINPUT_SELECT" }, 314 {0x1D1, "GPUREG_LIGHTING_LUTINPUT_SELECT"},
315 { 0x1D2, "GPUREG_LIGHTING_LUTINPUT_SCALE" }, 315 {0x1D2, "GPUREG_LIGHTING_LUTINPUT_SCALE"},
316 316
317 { 0x1D9, "GPUREG_LIGHTING_LIGHT_PERMUTATION" }, 317 {0x1D9, "GPUREG_LIGHTING_LIGHT_PERMUTATION"},
318 318
319 { 0x200, "GPUREG_ATTRIBBUFFERS_LOC" }, 319 {0x200, "GPUREG_ATTRIBBUFFERS_LOC"},
320 { 0x201, "GPUREG_ATTRIBBUFFERS_FORMAT_LOW" }, 320 {0x201, "GPUREG_ATTRIBBUFFERS_FORMAT_LOW"},
321 { 0x202, "GPUREG_ATTRIBBUFFERS_FORMAT_HIGH" }, 321 {0x202, "GPUREG_ATTRIBBUFFERS_FORMAT_HIGH"},
322 { 0x203, "GPUREG_ATTRIBBUFFER0_OFFSET" }, 322 {0x203, "GPUREG_ATTRIBBUFFER0_OFFSET"},
323 { 0x204, "GPUREG_ATTRIBBUFFER0_CONFIG1" }, 323 {0x204, "GPUREG_ATTRIBBUFFER0_CONFIG1"},
324 { 0x205, "GPUREG_ATTRIBBUFFER0_CONFIG2" }, 324 {0x205, "GPUREG_ATTRIBBUFFER0_CONFIG2"},
325 { 0x206, "GPUREG_ATTRIBBUFFER1_OFFSET" }, 325 {0x206, "GPUREG_ATTRIBBUFFER1_OFFSET"},
326 { 0x207, "GPUREG_ATTRIBBUFFER1_CONFIG1" }, 326 {0x207, "GPUREG_ATTRIBBUFFER1_CONFIG1"},
327 { 0x208, "GPUREG_ATTRIBBUFFER1_CONFIG2" }, 327 {0x208, "GPUREG_ATTRIBBUFFER1_CONFIG2"},
328 { 0x209, "GPUREG_ATTRIBBUFFER2_OFFSET" }, 328 {0x209, "GPUREG_ATTRIBBUFFER2_OFFSET"},
329 { 0x20A, "GPUREG_ATTRIBBUFFER2_CONFIG1" }, 329 {0x20A, "GPUREG_ATTRIBBUFFER2_CONFIG1"},
330 { 0x20B, "GPUREG_ATTRIBBUFFER2_CONFIG2" }, 330 {0x20B, "GPUREG_ATTRIBBUFFER2_CONFIG2"},
331 { 0x20C, "GPUREG_ATTRIBBUFFER3_OFFSET" }, 331 {0x20C, "GPUREG_ATTRIBBUFFER3_OFFSET"},
332 { 0x20D, "GPUREG_ATTRIBBUFFER3_CONFIG1" }, 332 {0x20D, "GPUREG_ATTRIBBUFFER3_CONFIG1"},
333 { 0x20E, "GPUREG_ATTRIBBUFFER3_CONFIG2" }, 333 {0x20E, "GPUREG_ATTRIBBUFFER3_CONFIG2"},
334 { 0x20F, "GPUREG_ATTRIBBUFFER4_OFFSET" }, 334 {0x20F, "GPUREG_ATTRIBBUFFER4_OFFSET"},
335 { 0x210, "GPUREG_ATTRIBBUFFER4_CONFIG1" }, 335 {0x210, "GPUREG_ATTRIBBUFFER4_CONFIG1"},
336 { 0x211, "GPUREG_ATTRIBBUFFER4_CONFIG2" }, 336 {0x211, "GPUREG_ATTRIBBUFFER4_CONFIG2"},
337 { 0x212, "GPUREG_ATTRIBBUFFER5_OFFSET" }, 337 {0x212, "GPUREG_ATTRIBBUFFER5_OFFSET"},
338 { 0x213, "GPUREG_ATTRIBBUFFER5_CONFIG1" }, 338 {0x213, "GPUREG_ATTRIBBUFFER5_CONFIG1"},
339 { 0x214, "GPUREG_ATTRIBBUFFER5_CONFIG2" }, 339 {0x214, "GPUREG_ATTRIBBUFFER5_CONFIG2"},
340 { 0x215, "GPUREG_ATTRIBBUFFER6_OFFSET" }, 340 {0x215, "GPUREG_ATTRIBBUFFER6_OFFSET"},
341 { 0x216, "GPUREG_ATTRIBBUFFER6_CONFIG1" }, 341 {0x216, "GPUREG_ATTRIBBUFFER6_CONFIG1"},
342 { 0x217, "GPUREG_ATTRIBBUFFER6_CONFIG2" }, 342 {0x217, "GPUREG_ATTRIBBUFFER6_CONFIG2"},
343 { 0x218, "GPUREG_ATTRIBBUFFER7_OFFSET" }, 343 {0x218, "GPUREG_ATTRIBBUFFER7_OFFSET"},
344 { 0x219, "GPUREG_ATTRIBBUFFER7_CONFIG1" }, 344 {0x219, "GPUREG_ATTRIBBUFFER7_CONFIG1"},
345 { 0x21A, "GPUREG_ATTRIBBUFFER7_CONFIG2" }, 345 {0x21A, "GPUREG_ATTRIBBUFFER7_CONFIG2"},
346 { 0x21B, "GPUREG_ATTRIBBUFFER8_OFFSET" }, 346 {0x21B, "GPUREG_ATTRIBBUFFER8_OFFSET"},
347 { 0x21C, "GPUREG_ATTRIBBUFFER8_CONFIG1" }, 347 {0x21C, "GPUREG_ATTRIBBUFFER8_CONFIG1"},
348 { 0x21D, "GPUREG_ATTRIBBUFFER8_CONFIG2" }, 348 {0x21D, "GPUREG_ATTRIBBUFFER8_CONFIG2"},
349 { 0x21E, "GPUREG_ATTRIBBUFFER9_OFFSET" }, 349 {0x21E, "GPUREG_ATTRIBBUFFER9_OFFSET"},
350 { 0x21F, "GPUREG_ATTRIBBUFFER9_CONFIG1" }, 350 {0x21F, "GPUREG_ATTRIBBUFFER9_CONFIG1"},
351 { 0x220, "GPUREG_ATTRIBBUFFER9_CONFIG2" }, 351 {0x220, "GPUREG_ATTRIBBUFFER9_CONFIG2"},
352 { 0x221, "GPUREG_ATTRIBBUFFER10_OFFSET" }, 352 {0x221, "GPUREG_ATTRIBBUFFER10_OFFSET"},
353 { 0x222, "GPUREG_ATTRIBBUFFER10_CONFIG1" }, 353 {0x222, "GPUREG_ATTRIBBUFFER10_CONFIG1"},
354 { 0x223, "GPUREG_ATTRIBBUFFER10_CONFIG2" }, 354 {0x223, "GPUREG_ATTRIBBUFFER10_CONFIG2"},
355 { 0x224, "GPUREG_ATTRIBBUFFER11_OFFSET" }, 355 {0x224, "GPUREG_ATTRIBBUFFER11_OFFSET"},
356 { 0x225, "GPUREG_ATTRIBBUFFER11_CONFIG1" }, 356 {0x225, "GPUREG_ATTRIBBUFFER11_CONFIG1"},
357 { 0x226, "GPUREG_ATTRIBBUFFER11_CONFIG2" }, 357 {0x226, "GPUREG_ATTRIBBUFFER11_CONFIG2"},
358 { 0x227, "GPUREG_INDEXBUFFER_CONFIG" }, 358 {0x227, "GPUREG_INDEXBUFFER_CONFIG"},
359 { 0x228, "GPUREG_NUMVERTICES" }, 359 {0x228, "GPUREG_NUMVERTICES"},
360 { 0x229, "GPUREG_GEOSTAGE_CONFIG" }, 360 {0x229, "GPUREG_GEOSTAGE_CONFIG"},
361 { 0x22A, "GPUREG_VERTEX_OFFSET" }, 361 {0x22A, "GPUREG_VERTEX_OFFSET"},
362 362
363 { 0x22D, "GPUREG_POST_VERTEX_CACHE_NUM" }, 363 {0x22D, "GPUREG_POST_VERTEX_CACHE_NUM"},
364 { 0x22E, "GPUREG_DRAWARRAYS" }, 364 {0x22E, "GPUREG_DRAWARRAYS"},
365 { 0x22F, "GPUREG_DRAWELEMENTS" }, 365 {0x22F, "GPUREG_DRAWELEMENTS"},
366 366
367 { 0x231, "GPUREG_VTX_FUNC" }, 367 {0x231, "GPUREG_VTX_FUNC"},
368 { 0x232, "GPUREG_FIXEDATTRIB_INDEX" }, 368 {0x232, "GPUREG_FIXEDATTRIB_INDEX"},
369 { 0x233, "GPUREG_FIXEDATTRIB_DATA0" }, 369 {0x233, "GPUREG_FIXEDATTRIB_DATA0"},
370 { 0x234, "GPUREG_FIXEDATTRIB_DATA1" }, 370 {0x234, "GPUREG_FIXEDATTRIB_DATA1"},
371 { 0x235, "GPUREG_FIXEDATTRIB_DATA2" }, 371 {0x235, "GPUREG_FIXEDATTRIB_DATA2"},
372 372
373 { 0x238, "GPUREG_CMDBUF_SIZE0" }, 373 {0x238, "GPUREG_CMDBUF_SIZE0"},
374 { 0x239, "GPUREG_CMDBUF_SIZE1" }, 374 {0x239, "GPUREG_CMDBUF_SIZE1"},
375 { 0x23A, "GPUREG_CMDBUF_ADDR0" }, 375 {0x23A, "GPUREG_CMDBUF_ADDR0"},
376 { 0x23B, "GPUREG_CMDBUF_ADDR1" }, 376 {0x23B, "GPUREG_CMDBUF_ADDR1"},
377 { 0x23C, "GPUREG_CMDBUF_JUMP0" }, 377 {0x23C, "GPUREG_CMDBUF_JUMP0"},
378 { 0x23D, "GPUREG_CMDBUF_JUMP1" }, 378 {0x23D, "GPUREG_CMDBUF_JUMP1"},
379 379
380 { 0x242, "GPUREG_VSH_NUM_ATTR" }, 380 {0x242, "GPUREG_VSH_NUM_ATTR"},
381 381
382 { 0x244, "GPUREG_VSH_COM_MODE" }, 382 {0x244, "GPUREG_VSH_COM_MODE"},
383 { 0x245, "GPUREG_START_DRAW_FUNC0" }, 383 {0x245, "GPUREG_START_DRAW_FUNC0"},
384 384
385 { 0x24A, "GPUREG_VSH_OUTMAP_TOTAL1" }, 385 {0x24A, "GPUREG_VSH_OUTMAP_TOTAL1"},
386 386
387 { 0x251, "GPUREG_VSH_OUTMAP_TOTAL2" }, 387 {0x251, "GPUREG_VSH_OUTMAP_TOTAL2"},
388 { 0x252, "GPUREG_GSH_MISC0" }, 388 {0x252, "GPUREG_GSH_MISC0"},
389 { 0x253, "GPUREG_GEOSTAGE_CONFIG2" }, 389 {0x253, "GPUREG_GEOSTAGE_CONFIG2"},
390 { 0x254, "GPUREG_GSH_MISC1" }, 390 {0x254, "GPUREG_GSH_MISC1"},
391 391
392 { 0x25E, "GPUREG_PRIMITIVE_CONFIG" }, 392 {0x25E, "GPUREG_PRIMITIVE_CONFIG"},
393 { 0x25F, "GPUREG_RESTART_PRIMITIVE" }, 393 {0x25F, "GPUREG_RESTART_PRIMITIVE"},
394 394
395 { 0x280, "GPUREG_GSH_BOOLUNIFORM" }, 395 {0x280, "GPUREG_GSH_BOOLUNIFORM"},
396 { 0x281, "GPUREG_GSH_INTUNIFORM_I0" }, 396 {0x281, "GPUREG_GSH_INTUNIFORM_I0"},
397 { 0x282, "GPUREG_GSH_INTUNIFORM_I1" }, 397 {0x282, "GPUREG_GSH_INTUNIFORM_I1"},
398 { 0x283, "GPUREG_GSH_INTUNIFORM_I2" }, 398 {0x283, "GPUREG_GSH_INTUNIFORM_I2"},
399 { 0x284, "GPUREG_GSH_INTUNIFORM_I3" }, 399 {0x284, "GPUREG_GSH_INTUNIFORM_I3"},
400 400
401 { 0x289, "GPUREG_GSH_INPUTBUFFER_CONFIG" }, 401 {0x289, "GPUREG_GSH_INPUTBUFFER_CONFIG"},
402 { 0x28A, "GPUREG_GSH_ENTRYPOINT" }, 402 {0x28A, "GPUREG_GSH_ENTRYPOINT"},
403 { 0x28B, "GPUREG_GSH_ATTRIBUTES_PERMUTATION_LOW" }, 403 {0x28B, "GPUREG_GSH_ATTRIBUTES_PERMUTATION_LOW"},
404 { 0x28C, "GPUREG_GSH_ATTRIBUTES_PERMUTATION_HIGH" }, 404 {0x28C, "GPUREG_GSH_ATTRIBUTES_PERMUTATION_HIGH"},
405 { 0x28D, "GPUREG_GSH_OUTMAP_MASK" }, 405 {0x28D, "GPUREG_GSH_OUTMAP_MASK"},
406 406
407 { 0x28F, "GPUREG_GSH_CODETRANSFER_END" }, 407 {0x28F, "GPUREG_GSH_CODETRANSFER_END"},
408 { 0x290, "GPUREG_GSH_FLOATUNIFORM_INDEX" }, 408 {0x290, "GPUREG_GSH_FLOATUNIFORM_INDEX"},
409 { 0x291, "GPUREG_GSH_FLOATUNIFORM_DATA0" }, 409 {0x291, "GPUREG_GSH_FLOATUNIFORM_DATA0"},
410 { 0x292, "GPUREG_GSH_FLOATUNIFORM_DATA1" }, 410 {0x292, "GPUREG_GSH_FLOATUNIFORM_DATA1"},
411 { 0x293, "GPUREG_GSH_FLOATUNIFORM_DATA2" }, 411 {0x293, "GPUREG_GSH_FLOATUNIFORM_DATA2"},
412 { 0x294, "GPUREG_GSH_FLOATUNIFORM_DATA3" }, 412 {0x294, "GPUREG_GSH_FLOATUNIFORM_DATA3"},
413 { 0x295, "GPUREG_GSH_FLOATUNIFORM_DATA4" }, 413 {0x295, "GPUREG_GSH_FLOATUNIFORM_DATA4"},
414 { 0x296, "GPUREG_GSH_FLOATUNIFORM_DATA5" }, 414 {0x296, "GPUREG_GSH_FLOATUNIFORM_DATA5"},
415 { 0x297, "GPUREG_GSH_FLOATUNIFORM_DATA6" }, 415 {0x297, "GPUREG_GSH_FLOATUNIFORM_DATA6"},
416 { 0x298, "GPUREG_GSH_FLOATUNIFORM_DATA7" }, 416 {0x298, "GPUREG_GSH_FLOATUNIFORM_DATA7"},
417 417
418 { 0x29B, "GPUREG_GSH_CODETRANSFER_INDEX" }, 418 {0x29B, "GPUREG_GSH_CODETRANSFER_INDEX"},
419 { 0x29C, "GPUREG_GSH_CODETRANSFER_DATA0" }, 419 {0x29C, "GPUREG_GSH_CODETRANSFER_DATA0"},
420 { 0x29D, "GPUREG_GSH_CODETRANSFER_DATA1" }, 420 {0x29D, "GPUREG_GSH_CODETRANSFER_DATA1"},
421 { 0x29E, "GPUREG_GSH_CODETRANSFER_DATA2" }, 421 {0x29E, "GPUREG_GSH_CODETRANSFER_DATA2"},
422 { 0x29F, "GPUREG_GSH_CODETRANSFER_DATA3" }, 422 {0x29F, "GPUREG_GSH_CODETRANSFER_DATA3"},
423 { 0x2A0, "GPUREG_GSH_CODETRANSFER_DATA4" }, 423 {0x2A0, "GPUREG_GSH_CODETRANSFER_DATA4"},
424 { 0x2A1, "GPUREG_GSH_CODETRANSFER_DATA5" }, 424 {0x2A1, "GPUREG_GSH_CODETRANSFER_DATA5"},
425 { 0x2A2, "GPUREG_GSH_CODETRANSFER_DATA6" }, 425 {0x2A2, "GPUREG_GSH_CODETRANSFER_DATA6"},
426 { 0x2A3, "GPUREG_GSH_CODETRANSFER_DATA7" }, 426 {0x2A3, "GPUREG_GSH_CODETRANSFER_DATA7"},
427 427
428 { 0x2A5, "GPUREG_GSH_OPDESCS_INDEX" }, 428 {0x2A5, "GPUREG_GSH_OPDESCS_INDEX"},
429 { 0x2A6, "GPUREG_GSH_OPDESCS_DATA0" }, 429 {0x2A6, "GPUREG_GSH_OPDESCS_DATA0"},
430 { 0x2A7, "GPUREG_GSH_OPDESCS_DATA1" }, 430 {0x2A7, "GPUREG_GSH_OPDESCS_DATA1"},
431 { 0x2A8, "GPUREG_GSH_OPDESCS_DATA2" }, 431 {0x2A8, "GPUREG_GSH_OPDESCS_DATA2"},
432 { 0x2A9, "GPUREG_GSH_OPDESCS_DATA3" }, 432 {0x2A9, "GPUREG_GSH_OPDESCS_DATA3"},
433 { 0x2AA, "GPUREG_GSH_OPDESCS_DATA4" }, 433 {0x2AA, "GPUREG_GSH_OPDESCS_DATA4"},
434 { 0x2AB, "GPUREG_GSH_OPDESCS_DATA5" }, 434 {0x2AB, "GPUREG_GSH_OPDESCS_DATA5"},
435 { 0x2AC, "GPUREG_GSH_OPDESCS_DATA6" }, 435 {0x2AC, "GPUREG_GSH_OPDESCS_DATA6"},
436 { 0x2AD, "GPUREG_GSH_OPDESCS_DATA7" }, 436 {0x2AD, "GPUREG_GSH_OPDESCS_DATA7"},
437 437
438 { 0x2B0, "GPUREG_VSH_BOOLUNIFORM" }, 438 {0x2B0, "GPUREG_VSH_BOOLUNIFORM"},
439 { 0x2B1, "GPUREG_VSH_INTUNIFORM_I0" }, 439 {0x2B1, "GPUREG_VSH_INTUNIFORM_I0"},
440 { 0x2B2, "GPUREG_VSH_INTUNIFORM_I1" }, 440 {0x2B2, "GPUREG_VSH_INTUNIFORM_I1"},
441 { 0x2B3, "GPUREG_VSH_INTUNIFORM_I2" }, 441 {0x2B3, "GPUREG_VSH_INTUNIFORM_I2"},
442 { 0x2B4, "GPUREG_VSH_INTUNIFORM_I3" }, 442 {0x2B4, "GPUREG_VSH_INTUNIFORM_I3"},
443 443
444 { 0x2B9, "GPUREG_VSH_INPUTBUFFER_CONFIG" }, 444 {0x2B9, "GPUREG_VSH_INPUTBUFFER_CONFIG"},
445 { 0x2BA, "GPUREG_VSH_ENTRYPOINT" }, 445 {0x2BA, "GPUREG_VSH_ENTRYPOINT"},
446 { 0x2BB, "GPUREG_VSH_ATTRIBUTES_PERMUTATION_LOW" }, 446 {0x2BB, "GPUREG_VSH_ATTRIBUTES_PERMUTATION_LOW"},
447 { 0x2BC, "GPUREG_VSH_ATTRIBUTES_PERMUTATION_HIGH" }, 447 {0x2BC, "GPUREG_VSH_ATTRIBUTES_PERMUTATION_HIGH"},
448 { 0x2BD, "GPUREG_VSH_OUTMAP_MASK" }, 448 {0x2BD, "GPUREG_VSH_OUTMAP_MASK"},
449 449
450 { 0x2BF, "GPUREG_VSH_CODETRANSFER_END" }, 450 {0x2BF, "GPUREG_VSH_CODETRANSFER_END"},
451 { 0x2C0, "GPUREG_VSH_FLOATUNIFORM_INDEX" }, 451 {0x2C0, "GPUREG_VSH_FLOATUNIFORM_INDEX"},
452 { 0x2C1, "GPUREG_VSH_FLOATUNIFORM_DATA0" }, 452 {0x2C1, "GPUREG_VSH_FLOATUNIFORM_DATA0"},
453 { 0x2C2, "GPUREG_VSH_FLOATUNIFORM_DATA1" }, 453 {0x2C2, "GPUREG_VSH_FLOATUNIFORM_DATA1"},
454 { 0x2C3, "GPUREG_VSH_FLOATUNIFORM_DATA2" }, 454 {0x2C3, "GPUREG_VSH_FLOATUNIFORM_DATA2"},
455 { 0x2C4, "GPUREG_VSH_FLOATUNIFORM_DATA3" }, 455 {0x2C4, "GPUREG_VSH_FLOATUNIFORM_DATA3"},
456 { 0x2C5, "GPUREG_VSH_FLOATUNIFORM_DATA4" }, 456 {0x2C5, "GPUREG_VSH_FLOATUNIFORM_DATA4"},
457 { 0x2C6, "GPUREG_VSH_FLOATUNIFORM_DATA5" }, 457 {0x2C6, "GPUREG_VSH_FLOATUNIFORM_DATA5"},
458 { 0x2C7, "GPUREG_VSH_FLOATUNIFORM_DATA6" }, 458 {0x2C7, "GPUREG_VSH_FLOATUNIFORM_DATA6"},
459 { 0x2C8, "GPUREG_VSH_FLOATUNIFORM_DATA7" }, 459 {0x2C8, "GPUREG_VSH_FLOATUNIFORM_DATA7"},
460 460
461 { 0x2CB, "GPUREG_VSH_CODETRANSFER_INDEX" }, 461 {0x2CB, "GPUREG_VSH_CODETRANSFER_INDEX"},
462 { 0x2CC, "GPUREG_VSH_CODETRANSFER_DATA0" }, 462 {0x2CC, "GPUREG_VSH_CODETRANSFER_DATA0"},
463 { 0x2CD, "GPUREG_VSH_CODETRANSFER_DATA1" }, 463 {0x2CD, "GPUREG_VSH_CODETRANSFER_DATA1"},
464 { 0x2CE, "GPUREG_VSH_CODETRANSFER_DATA2" }, 464 {0x2CE, "GPUREG_VSH_CODETRANSFER_DATA2"},
465 { 0x2CF, "GPUREG_VSH_CODETRANSFER_DATA3" }, 465 {0x2CF, "GPUREG_VSH_CODETRANSFER_DATA3"},
466 { 0x2D0, "GPUREG_VSH_CODETRANSFER_DATA4" }, 466 {0x2D0, "GPUREG_VSH_CODETRANSFER_DATA4"},
467 { 0x2D1, "GPUREG_VSH_CODETRANSFER_DATA5" }, 467 {0x2D1, "GPUREG_VSH_CODETRANSFER_DATA5"},
468 { 0x2D2, "GPUREG_VSH_CODETRANSFER_DATA6" }, 468 {0x2D2, "GPUREG_VSH_CODETRANSFER_DATA6"},
469 { 0x2D3, "GPUREG_VSH_CODETRANSFER_DATA7" }, 469 {0x2D3, "GPUREG_VSH_CODETRANSFER_DATA7"},
470 470
471 { 0x2D5, "GPUREG_VSH_OPDESCS_INDEX" }, 471 {0x2D5, "GPUREG_VSH_OPDESCS_INDEX"},
472 { 0x2D6, "GPUREG_VSH_OPDESCS_DATA0" }, 472 {0x2D6, "GPUREG_VSH_OPDESCS_DATA0"},
473 { 0x2D7, "GPUREG_VSH_OPDESCS_DATA1" }, 473 {0x2D7, "GPUREG_VSH_OPDESCS_DATA1"},
474 { 0x2D8, "GPUREG_VSH_OPDESCS_DATA2" }, 474 {0x2D8, "GPUREG_VSH_OPDESCS_DATA2"},
475 { 0x2D9, "GPUREG_VSH_OPDESCS_DATA3" }, 475 {0x2D9, "GPUREG_VSH_OPDESCS_DATA3"},
476 { 0x2DA, "GPUREG_VSH_OPDESCS_DATA4" }, 476 {0x2DA, "GPUREG_VSH_OPDESCS_DATA4"},
477 { 0x2DB, "GPUREG_VSH_OPDESCS_DATA5" }, 477 {0x2DB, "GPUREG_VSH_OPDESCS_DATA5"},
478 { 0x2DC, "GPUREG_VSH_OPDESCS_DATA6" }, 478 {0x2DC, "GPUREG_VSH_OPDESCS_DATA6"},
479 { 0x2DD, "GPUREG_VSH_OPDESCS_DATA7" }, 479 {0x2DD, "GPUREG_VSH_OPDESCS_DATA7"},
480}; 480};
481 481
482std::string Regs::GetCommandName(int index) { 482std::string Regs::GetCommandName(int index) {
@@ -516,5 +516,4 @@ void State::Reset() {
516 Zero(immediate); 516 Zero(immediate);
517 primitive_assembler.Reconfigure(Regs::TriangleTopology::List); 517 primitive_assembler.Reconfigure(Regs::TriangleTopology::List);
518} 518}
519
520} 519}
diff --git a/src/video_core/pica.h b/src/video_core/pica.h
index 7099c31a0..1d1a686e0 100644
--- a/src/video_core/pica.h
+++ b/src/video_core/pica.h
@@ -16,15 +16,16 @@
16#include "common/bit_field.h" 16#include "common/bit_field.h"
17#include "common/common_funcs.h" 17#include "common/common_funcs.h"
18#include "common/common_types.h" 18#include "common/common_types.h"
19#include "common/vector_math.h"
20#include "common/logging/log.h" 19#include "common/logging/log.h"
20#include "common/vector_math.h"
21 21
22namespace Pica { 22namespace Pica {
23 23
24// Returns index corresponding to the Regs member labeled by field_name 24// Returns index corresponding to the Regs member labeled by field_name
25// TODO: Due to Visual studio bug 209229, offsetof does not return constant expressions 25// TODO: Due to Visual studio bug 209229, offsetof does not return constant expressions
26// when used with array elements (e.g. PICA_REG_INDEX(vs_uniform_setup.set_value[1])). 26// when used with array elements (e.g. PICA_REG_INDEX(vs_uniform_setup.set_value[1])).
27// For details cf. https://connect.microsoft.com/VisualStudio/feedback/details/209229/offsetof-does-not-produce-a-constant-expression-for-array-members 27// For details cf.
28// https://connect.microsoft.com/VisualStudio/feedback/details/209229/offsetof-does-not-produce-a-constant-expression-for-array-members
28// Hopefully, this will be fixed sometime in the future. 29// Hopefully, this will be fixed sometime in the future.
29// For lack of better alternatives, we currently hardcode the offsets when constant 30// For lack of better alternatives, we currently hardcode the offsets when constant
30// expressions are needed via PICA_REG_INDEX_WORKAROUND (on sane compilers, static_asserts 31// expressions are needed via PICA_REG_INDEX_WORKAROUND (on sane compilers, static_asserts
@@ -37,8 +38,9 @@ namespace Pica {
37// really is this annoying. This macro just forwards its first argument to PICA_REG_INDEX 38// really is this annoying. This macro just forwards its first argument to PICA_REG_INDEX
38// and then performs a (no-op) cast to size_t iff the second argument matches the expected 39// and then performs a (no-op) cast to size_t iff the second argument matches the expected
39// field offset. Otherwise, the compiler will fail to compile this code. 40// field offset. Otherwise, the compiler will fail to compile this code.
40#define PICA_REG_INDEX_WORKAROUND(field_name, backup_workaround_index) \ 41#define PICA_REG_INDEX_WORKAROUND(field_name, backup_workaround_index) \
41 ((typename std::enable_if<backup_workaround_index == PICA_REG_INDEX(field_name), size_t>::type)PICA_REG_INDEX(field_name)) 42 ((typename std::enable_if<backup_workaround_index == PICA_REG_INDEX(field_name), \
43 size_t>::type)PICA_REG_INDEX(field_name))
42#endif // _MSC_VER 44#endif // _MSC_VER
43 45
44struct Regs { 46struct Regs {
@@ -51,8 +53,8 @@ struct Regs {
51 53
52 enum class CullMode : u32 { 54 enum class CullMode : u32 {
53 // Select which polygons are considered to be "frontfacing". 55 // Select which polygons are considered to be "frontfacing".
54 KeepAll = 0, 56 KeepAll = 0,
55 KeepClockWise = 1, 57 KeepClockWise = 1,
56 KeepCounterClockWise = 2, 58 KeepCounterClockWise = 2,
57 // TODO: What does the third value imply? 59 // TODO: What does the third value imply?
58 }; 60 };
@@ -69,48 +71,47 @@ struct Regs {
69 71
70 INSERT_PADDING_WORDS(0x9); 72 INSERT_PADDING_WORDS(0x9);
71 73
72 BitField<0, 24, u32> viewport_depth_range; // float24 74 BitField<0, 24, u32> viewport_depth_range; // float24
73 BitField<0, 24, u32> viewport_depth_near_plane; // float24 75 BitField<0, 24, u32> viewport_depth_near_plane; // float24
74 76
75 BitField<0, 3, u32> vs_output_total; 77 BitField<0, 3, u32> vs_output_total;
76 78
77 union VSOutputAttributes { 79 union VSOutputAttributes {
78 // Maps components of output vertex attributes to semantics 80 // Maps components of output vertex attributes to semantics
79 enum Semantic : u32 81 enum Semantic : u32 {
80 { 82 POSITION_X = 0,
81 POSITION_X = 0, 83 POSITION_Y = 1,
82 POSITION_Y = 1, 84 POSITION_Z = 2,
83 POSITION_Z = 2, 85 POSITION_W = 3,
84 POSITION_W = 3, 86
85 87 QUATERNION_X = 4,
86 QUATERNION_X = 4, 88 QUATERNION_Y = 5,
87 QUATERNION_Y = 5, 89 QUATERNION_Z = 6,
88 QUATERNION_Z = 6, 90 QUATERNION_W = 7,
89 QUATERNION_W = 7, 91
90 92 COLOR_R = 8,
91 COLOR_R = 8, 93 COLOR_G = 9,
92 COLOR_G = 9, 94 COLOR_B = 10,
93 COLOR_B = 10, 95 COLOR_A = 11,
94 COLOR_A = 11, 96
95 97 TEXCOORD0_U = 12,
96 TEXCOORD0_U = 12, 98 TEXCOORD0_V = 13,
97 TEXCOORD0_V = 13, 99 TEXCOORD1_U = 14,
98 TEXCOORD1_U = 14, 100 TEXCOORD1_V = 15,
99 TEXCOORD1_V = 15,
100 101
101 // TODO: Not verified 102 // TODO: Not verified
102 VIEW_X = 18, 103 VIEW_X = 18,
103 VIEW_Y = 19, 104 VIEW_Y = 19,
104 VIEW_Z = 20, 105 VIEW_Z = 20,
105 106
106 TEXCOORD2_U = 22, 107 TEXCOORD2_U = 22,
107 TEXCOORD2_V = 23, 108 TEXCOORD2_V = 23,
108 109
109 INVALID = 31, 110 INVALID = 31,
110 }; 111 };
111 112
112 BitField< 0, 5, Semantic> map_x; 113 BitField<0, 5, Semantic> map_x;
113 BitField< 8, 5, Semantic> map_y; 114 BitField<8, 5, Semantic> map_y;
114 BitField<16, 5, Semantic> map_z; 115 BitField<16, 5, Semantic> map_z;
115 BitField<24, 5, Semantic> map_w; 116 BitField<24, 5, Semantic> map_w;
116 } vs_output_attributes[7]; 117 } vs_output_attributes[7];
@@ -128,77 +129,75 @@ struct Regs {
128 BitField<0, 2, ScissorMode> mode; 129 BitField<0, 2, ScissorMode> mode;
129 130
130 union { 131 union {
131 BitField< 0, 16, u32> x1; 132 BitField<0, 16, u32> x1;
132 BitField<16, 16, u32> y1; 133 BitField<16, 16, u32> y1;
133 }; 134 };
134 135
135 union { 136 union {
136 BitField< 0, 16, u32> x2; 137 BitField<0, 16, u32> x2;
137 BitField<16, 16, u32> y2; 138 BitField<16, 16, u32> y2;
138 }; 139 };
139 } scissor_test; 140 } scissor_test;
140 141
141 union { 142 union {
142 BitField< 0, 10, s32> x; 143 BitField<0, 10, s32> x;
143 BitField<16, 10, s32> y; 144 BitField<16, 10, s32> y;
144 } viewport_corner; 145 } viewport_corner;
145 146
146 INSERT_PADDING_WORDS(0x1); 147 INSERT_PADDING_WORDS(0x1);
147 148
148 //TODO: early depth 149 // TODO: early depth
149 INSERT_PADDING_WORDS(0x1); 150 INSERT_PADDING_WORDS(0x1);
150 151
151 INSERT_PADDING_WORDS(0x2); 152 INSERT_PADDING_WORDS(0x2);
152 153
153 enum DepthBuffering : u32 { 154 enum DepthBuffering : u32 {
154 WBuffering = 0, 155 WBuffering = 0,
155 ZBuffering = 1, 156 ZBuffering = 1,
156 }; 157 };
157 BitField< 0, 1, DepthBuffering> depthmap_enable; 158 BitField<0, 1, DepthBuffering> depthmap_enable;
158 159
159 INSERT_PADDING_WORDS(0x12); 160 INSERT_PADDING_WORDS(0x12);
160 161
161 struct TextureConfig { 162 struct TextureConfig {
162 enum TextureType : u32 { 163 enum TextureType : u32 {
163 Texture2D = 0, 164 Texture2D = 0,
164 TextureCube = 1, 165 TextureCube = 1,
165 Shadow2D = 2, 166 Shadow2D = 2,
166 Projection2D = 3, 167 Projection2D = 3,
167 ShadowCube = 4, 168 ShadowCube = 4,
168 Disabled = 5, 169 Disabled = 5,
169 }; 170 };
170 171
171 enum WrapMode : u32 { 172 enum WrapMode : u32 {
172 ClampToEdge = 0, 173 ClampToEdge = 0,
173 ClampToBorder = 1, 174 ClampToBorder = 1,
174 Repeat = 2, 175 Repeat = 2,
175 MirroredRepeat = 3, 176 MirroredRepeat = 3,
176 }; 177 };
177 178
178 enum TextureFilter : u32 { 179 enum TextureFilter : u32 { Nearest = 0, Linear = 1 };
179 Nearest = 0,
180 Linear = 1
181 };
182 180
183 union { 181 union {
184 u32 raw; 182 u32 raw;
185 BitField< 0, 8, u32> r; 183 BitField<0, 8, u32> r;
186 BitField< 8, 8, u32> g; 184 BitField<8, 8, u32> g;
187 BitField<16, 8, u32> b; 185 BitField<16, 8, u32> b;
188 BitField<24, 8, u32> a; 186 BitField<24, 8, u32> a;
189 } border_color; 187 } border_color;
190 188
191 union { 189 union {
192 BitField< 0, 16, u32> height; 190 BitField<0, 16, u32> height;
193 BitField<16, 16, u32> width; 191 BitField<16, 16, u32> width;
194 }; 192 };
195 193
196 union { 194 union {
197 BitField< 1, 1, TextureFilter> mag_filter; 195 BitField<1, 1, TextureFilter> mag_filter;
198 BitField< 2, 1, TextureFilter> min_filter; 196 BitField<2, 1, TextureFilter> min_filter;
199 BitField< 8, 2, WrapMode> wrap_t; 197 BitField<8, 2, WrapMode> wrap_t;
200 BitField<12, 2, WrapMode> wrap_s; 198 BitField<12, 2, WrapMode> wrap_s;
201 BitField<28, 2, TextureType> type; ///< @note Only valid for texture 0 according to 3DBrew. 199 BitField<28, 2, TextureType>
200 type; ///< @note Only valid for texture 0 according to 3DBrew.
202 }; 201 };
203 202
204 INSERT_PADDING_WORDS(0x1); 203 INSERT_PADDING_WORDS(0x1);
@@ -216,39 +215,39 @@ struct Regs {
216 }; 215 };
217 216
218 enum class TextureFormat : u32 { 217 enum class TextureFormat : u32 {
219 RGBA8 = 0, 218 RGBA8 = 0,
220 RGB8 = 1, 219 RGB8 = 1,
221 RGB5A1 = 2, 220 RGB5A1 = 2,
222 RGB565 = 3, 221 RGB565 = 3,
223 RGBA4 = 4, 222 RGBA4 = 4,
224 IA8 = 5, 223 IA8 = 5,
225 RG8 = 6, ///< @note Also called HILO8 in 3DBrew. 224 RG8 = 6, ///< @note Also called HILO8 in 3DBrew.
226 I8 = 7, 225 I8 = 7,
227 A8 = 8, 226 A8 = 8,
228 IA4 = 9, 227 IA4 = 9,
229 I4 = 10, 228 I4 = 10,
230 A4 = 11, 229 A4 = 11,
231 ETC1 = 12, // compressed 230 ETC1 = 12, // compressed
232 ETC1A4 = 13, // compressed 231 ETC1A4 = 13, // compressed
233 }; 232 };
234 233
235 enum class LogicOp : u32 { 234 enum class LogicOp : u32 {
236 Clear = 0, 235 Clear = 0,
237 And = 1, 236 And = 1,
238 AndReverse = 2, 237 AndReverse = 2,
239 Copy = 3, 238 Copy = 3,
240 Set = 4, 239 Set = 4,
241 CopyInverted = 5, 240 CopyInverted = 5,
242 NoOp = 6, 241 NoOp = 6,
243 Invert = 7, 242 Invert = 7,
244 Nand = 8, 243 Nand = 8,
245 Or = 9, 244 Or = 9,
246 Nor = 10, 245 Nor = 10,
247 Xor = 11, 246 Xor = 11,
248 Equiv = 12, 247 Equiv = 12,
249 AndInverted = 13, 248 AndInverted = 13,
250 OrReverse = 14, 249 OrReverse = 14,
251 OrInverted = 15, 250 OrInverted = 15,
252 }; 251 };
253 252
254 static unsigned NibblesPerPixel(TextureFormat format) { 253 static unsigned NibblesPerPixel(TextureFormat format) {
@@ -273,15 +272,15 @@ struct Regs {
273 case TextureFormat::I8: 272 case TextureFormat::I8:
274 case TextureFormat::A8: 273 case TextureFormat::A8:
275 case TextureFormat::IA4: 274 case TextureFormat::IA4:
276 default: // placeholder for yet unknown formats 275 default: // placeholder for yet unknown formats
277 return 2; 276 return 2;
278 } 277 }
279 } 278 }
280 279
281 union { 280 union {
282 BitField< 0, 1, u32> texture0_enable; 281 BitField<0, 1, u32> texture0_enable;
283 BitField< 1, 1, u32> texture1_enable; 282 BitField<1, 1, u32> texture1_enable;
284 BitField< 2, 1, u32> texture2_enable; 283 BitField<2, 1, u32> texture2_enable;
285 }; 284 };
286 TextureConfig texture0; 285 TextureConfig texture0;
287 INSERT_PADDING_WORDS(0x8); 286 INSERT_PADDING_WORDS(0x8);
@@ -301,64 +300,62 @@ struct Regs {
301 const TextureFormat format; 300 const TextureFormat format;
302 }; 301 };
303 const std::array<FullTextureConfig, 3> GetTextures() const { 302 const std::array<FullTextureConfig, 3> GetTextures() const {
304 return {{ 303 return {{{texture0_enable.ToBool(), texture0, texture0_format},
305 { texture0_enable.ToBool(), texture0, texture0_format }, 304 {texture1_enable.ToBool(), texture1, texture1_format},
306 { texture1_enable.ToBool(), texture1, texture1_format }, 305 {texture2_enable.ToBool(), texture2, texture2_format}}};
307 { texture2_enable.ToBool(), texture2, texture2_format }
308 }};
309 } 306 }
310 307
311 // 0xc0-0xff: Texture Combiner (akin to glTexEnv) 308 // 0xc0-0xff: Texture Combiner (akin to glTexEnv)
312 struct TevStageConfig { 309 struct TevStageConfig {
313 enum class Source : u32 { 310 enum class Source : u32 {
314 PrimaryColor = 0x0, 311 PrimaryColor = 0x0,
315 PrimaryFragmentColor = 0x1, 312 PrimaryFragmentColor = 0x1,
316 SecondaryFragmentColor = 0x2, 313 SecondaryFragmentColor = 0x2,
317 314
318 Texture0 = 0x3, 315 Texture0 = 0x3,
319 Texture1 = 0x4, 316 Texture1 = 0x4,
320 Texture2 = 0x5, 317 Texture2 = 0x5,
321 Texture3 = 0x6, 318 Texture3 = 0x6,
322 319
323 PreviousBuffer = 0xd, 320 PreviousBuffer = 0xd,
324 Constant = 0xe, 321 Constant = 0xe,
325 Previous = 0xf, 322 Previous = 0xf,
326 }; 323 };
327 324
328 enum class ColorModifier : u32 { 325 enum class ColorModifier : u32 {
329 SourceColor = 0x0, 326 SourceColor = 0x0,
330 OneMinusSourceColor = 0x1, 327 OneMinusSourceColor = 0x1,
331 SourceAlpha = 0x2, 328 SourceAlpha = 0x2,
332 OneMinusSourceAlpha = 0x3, 329 OneMinusSourceAlpha = 0x3,
333 SourceRed = 0x4, 330 SourceRed = 0x4,
334 OneMinusSourceRed = 0x5, 331 OneMinusSourceRed = 0x5,
335 332
336 SourceGreen = 0x8, 333 SourceGreen = 0x8,
337 OneMinusSourceGreen = 0x9, 334 OneMinusSourceGreen = 0x9,
338 335
339 SourceBlue = 0xc, 336 SourceBlue = 0xc,
340 OneMinusSourceBlue = 0xd, 337 OneMinusSourceBlue = 0xd,
341 }; 338 };
342 339
343 enum class AlphaModifier : u32 { 340 enum class AlphaModifier : u32 {
344 SourceAlpha = 0x0, 341 SourceAlpha = 0x0,
345 OneMinusSourceAlpha = 0x1, 342 OneMinusSourceAlpha = 0x1,
346 SourceRed = 0x2, 343 SourceRed = 0x2,
347 OneMinusSourceRed = 0x3, 344 OneMinusSourceRed = 0x3,
348 SourceGreen = 0x4, 345 SourceGreen = 0x4,
349 OneMinusSourceGreen = 0x5, 346 OneMinusSourceGreen = 0x5,
350 SourceBlue = 0x6, 347 SourceBlue = 0x6,
351 OneMinusSourceBlue = 0x7, 348 OneMinusSourceBlue = 0x7,
352 }; 349 };
353 350
354 enum class Operation : u32 { 351 enum class Operation : u32 {
355 Replace = 0, 352 Replace = 0,
356 Modulate = 1, 353 Modulate = 1,
357 Add = 2, 354 Add = 2,
358 AddSigned = 3, 355 AddSigned = 3,
359 Lerp = 4, 356 Lerp = 4,
360 Subtract = 5, 357 Subtract = 5,
361 Dot3_RGB = 6, 358 Dot3_RGB = 6,
362 359
363 MultiplyThenAdd = 8, 360 MultiplyThenAdd = 8,
364 AddThenMultiply = 9, 361 AddThenMultiply = 9,
@@ -366,9 +363,9 @@ struct Regs {
366 363
367 union { 364 union {
368 u32 sources_raw; 365 u32 sources_raw;
369 BitField< 0, 4, Source> color_source1; 366 BitField<0, 4, Source> color_source1;
370 BitField< 4, 4, Source> color_source2; 367 BitField<4, 4, Source> color_source2;
371 BitField< 8, 4, Source> color_source3; 368 BitField<8, 4, Source> color_source3;
372 BitField<16, 4, Source> alpha_source1; 369 BitField<16, 4, Source> alpha_source1;
373 BitField<20, 4, Source> alpha_source2; 370 BitField<20, 4, Source> alpha_source2;
374 BitField<24, 4, Source> alpha_source3; 371 BitField<24, 4, Source> alpha_source3;
@@ -376,9 +373,9 @@ struct Regs {
376 373
377 union { 374 union {
378 u32 modifiers_raw; 375 u32 modifiers_raw;
379 BitField< 0, 4, ColorModifier> color_modifier1; 376 BitField<0, 4, ColorModifier> color_modifier1;
380 BitField< 4, 4, ColorModifier> color_modifier2; 377 BitField<4, 4, ColorModifier> color_modifier2;
381 BitField< 8, 4, ColorModifier> color_modifier3; 378 BitField<8, 4, ColorModifier> color_modifier3;
382 BitField<12, 3, AlphaModifier> alpha_modifier1; 379 BitField<12, 3, AlphaModifier> alpha_modifier1;
383 BitField<16, 3, AlphaModifier> alpha_modifier2; 380 BitField<16, 3, AlphaModifier> alpha_modifier2;
384 BitField<20, 3, AlphaModifier> alpha_modifier3; 381 BitField<20, 3, AlphaModifier> alpha_modifier3;
@@ -386,21 +383,21 @@ struct Regs {
386 383
387 union { 384 union {
388 u32 ops_raw; 385 u32 ops_raw;
389 BitField< 0, 4, Operation> color_op; 386 BitField<0, 4, Operation> color_op;
390 BitField<16, 4, Operation> alpha_op; 387 BitField<16, 4, Operation> alpha_op;
391 }; 388 };
392 389
393 union { 390 union {
394 u32 const_color; 391 u32 const_color;
395 BitField< 0, 8, u32> const_r; 392 BitField<0, 8, u32> const_r;
396 BitField< 8, 8, u32> const_g; 393 BitField<8, 8, u32> const_g;
397 BitField<16, 8, u32> const_b; 394 BitField<16, 8, u32> const_b;
398 BitField<24, 8, u32> const_a; 395 BitField<24, 8, u32> const_a;
399 }; 396 };
400 397
401 union { 398 union {
402 u32 scales_raw; 399 u32 scales_raw;
403 BitField< 0, 2, u32> color_scale; 400 BitField<0, 2, u32> color_scale;
404 BitField<16, 2, u32> alpha_scale; 401 BitField<16, 2, u32> alpha_scale;
405 }; 402 };
406 403
@@ -424,8 +421,8 @@ struct Regs {
424 421
425 enum class FogMode : u32 { 422 enum class FogMode : u32 {
426 None = 0, 423 None = 0,
427 Fog = 5, 424 Fog = 5,
428 Gas = 7, 425 Gas = 7,
429 }; 426 };
430 427
431 union { 428 union {
@@ -435,7 +432,7 @@ struct Regs {
435 union { 432 union {
436 // Tev stages 0-3 write their output to the combiner buffer if the corresponding bit in 433 // Tev stages 0-3 write their output to the combiner buffer if the corresponding bit in
437 // these masks are set 434 // these masks are set
438 BitField< 8, 4, u32> update_mask_rgb; 435 BitField<8, 4, u32> update_mask_rgb;
439 BitField<12, 4, u32> update_mask_a; 436 BitField<12, 4, u32> update_mask_a;
440 437
441 bool TevStageUpdatesCombinerBufferColor(unsigned stage_index) const { 438 bool TevStageUpdatesCombinerBufferColor(unsigned stage_index) const {
@@ -450,8 +447,8 @@ struct Regs {
450 447
451 union { 448 union {
452 u32 raw; 449 u32 raw;
453 BitField< 0, 8, u32> r; 450 BitField<0, 8, u32> r;
454 BitField< 8, 8, u32> g; 451 BitField<8, 8, u32> g;
455 BitField<16, 8, u32> b; 452 BitField<16, 8, u32> b;
456 } fog_color; 453 } fog_color;
457 454
@@ -469,66 +466,64 @@ struct Regs {
469 466
470 union { 467 union {
471 u32 raw; 468 u32 raw;
472 BitField< 0, 8, u32> r; 469 BitField<0, 8, u32> r;
473 BitField< 8, 8, u32> g; 470 BitField<8, 8, u32> g;
474 BitField<16, 8, u32> b; 471 BitField<16, 8, u32> b;
475 BitField<24, 8, u32> a; 472 BitField<24, 8, u32> a;
476 } tev_combiner_buffer_color; 473 } tev_combiner_buffer_color;
477 474
478 INSERT_PADDING_WORDS(0x2); 475 INSERT_PADDING_WORDS(0x2);
479 476
480 const std::array<Regs::TevStageConfig,6> GetTevStages() const { 477 const std::array<Regs::TevStageConfig, 6> GetTevStages() const {
481 return {{ tev_stage0, tev_stage1, 478 return {{tev_stage0, tev_stage1, tev_stage2, tev_stage3, tev_stage4, tev_stage5}};
482 tev_stage2, tev_stage3,
483 tev_stage4, tev_stage5 }};
484 }; 479 };
485 480
486 enum class BlendEquation : u32 { 481 enum class BlendEquation : u32 {
487 Add = 0, 482 Add = 0,
488 Subtract = 1, 483 Subtract = 1,
489 ReverseSubtract = 2, 484 ReverseSubtract = 2,
490 Min = 3, 485 Min = 3,
491 Max = 4, 486 Max = 4,
492 }; 487 };
493 488
494 enum class BlendFactor : u32 { 489 enum class BlendFactor : u32 {
495 Zero = 0, 490 Zero = 0,
496 One = 1, 491 One = 1,
497 SourceColor = 2, 492 SourceColor = 2,
498 OneMinusSourceColor = 3, 493 OneMinusSourceColor = 3,
499 DestColor = 4, 494 DestColor = 4,
500 OneMinusDestColor = 5, 495 OneMinusDestColor = 5,
501 SourceAlpha = 6, 496 SourceAlpha = 6,
502 OneMinusSourceAlpha = 7, 497 OneMinusSourceAlpha = 7,
503 DestAlpha = 8, 498 DestAlpha = 8,
504 OneMinusDestAlpha = 9, 499 OneMinusDestAlpha = 9,
505 ConstantColor = 10, 500 ConstantColor = 10,
506 OneMinusConstantColor = 11, 501 OneMinusConstantColor = 11,
507 ConstantAlpha = 12, 502 ConstantAlpha = 12,
508 OneMinusConstantAlpha = 13, 503 OneMinusConstantAlpha = 13,
509 SourceAlphaSaturate = 14, 504 SourceAlphaSaturate = 14,
510 }; 505 };
511 506
512 enum class CompareFunc : u32 { 507 enum class CompareFunc : u32 {
513 Never = 0, 508 Never = 0,
514 Always = 1, 509 Always = 1,
515 Equal = 2, 510 Equal = 2,
516 NotEqual = 3, 511 NotEqual = 3,
517 LessThan = 4, 512 LessThan = 4,
518 LessThanOrEqual = 5, 513 LessThanOrEqual = 5,
519 GreaterThan = 6, 514 GreaterThan = 6,
520 GreaterThanOrEqual = 7, 515 GreaterThanOrEqual = 7,
521 }; 516 };
522 517
523 enum class StencilAction : u32 { 518 enum class StencilAction : u32 {
524 Keep = 0, 519 Keep = 0,
525 Zero = 1, 520 Zero = 1,
526 Replace = 2, 521 Replace = 2,
527 Increment = 3, 522 Increment = 3,
528 Decrement = 4, 523 Decrement = 4,
529 Invert = 5, 524 Invert = 5,
530 IncrementWrap = 6, 525 IncrementWrap = 6,
531 DecrementWrap = 7 526 DecrementWrap = 7
532 }; 527 };
533 528
534 struct { 529 struct {
@@ -538,8 +533,8 @@ struct Regs {
538 }; 533 };
539 534
540 union { 535 union {
541 BitField< 0, 8, BlendEquation> blend_equation_rgb; 536 BitField<0, 8, BlendEquation> blend_equation_rgb;
542 BitField< 8, 8, BlendEquation> blend_equation_a; 537 BitField<8, 8, BlendEquation> blend_equation_a;
543 538
544 BitField<16, 4, BlendFactor> factor_source_rgb; 539 BitField<16, 4, BlendFactor> factor_source_rgb;
545 BitField<20, 4, BlendFactor> factor_dest_rgb; 540 BitField<20, 4, BlendFactor> factor_dest_rgb;
@@ -554,16 +549,16 @@ struct Regs {
554 549
555 union { 550 union {
556 u32 raw; 551 u32 raw;
557 BitField< 0, 8, u32> r; 552 BitField<0, 8, u32> r;
558 BitField< 8, 8, u32> g; 553 BitField<8, 8, u32> g;
559 BitField<16, 8, u32> b; 554 BitField<16, 8, u32> b;
560 BitField<24, 8, u32> a; 555 BitField<24, 8, u32> a;
561 } blend_const; 556 } blend_const;
562 557
563 union { 558 union {
564 BitField< 0, 1, u32> enable; 559 BitField<0, 1, u32> enable;
565 BitField< 4, 3, CompareFunc> func; 560 BitField<4, 3, CompareFunc> func;
566 BitField< 8, 8, u32> ref; 561 BitField<8, 8, u32> ref;
567 } alpha_test; 562 } alpha_test;
568 563
569 struct { 564 struct {
@@ -572,13 +567,13 @@ struct Regs {
572 u32 raw_func; 567 u32 raw_func;
573 568
574 // If true, enable stencil testing 569 // If true, enable stencil testing
575 BitField< 0, 1, u32> enable; 570 BitField<0, 1, u32> enable;
576 571
577 // Comparison operation for stencil testing 572 // Comparison operation for stencil testing
578 BitField< 4, 3, CompareFunc> func; 573 BitField<4, 3, CompareFunc> func;
579 574
580 // Mask used to control writing to the stencil buffer 575 // Mask used to control writing to the stencil buffer
581 BitField< 8, 8, u32> write_mask; 576 BitField<8, 8, u32> write_mask;
582 577
583 // Value to compare against for stencil testing 578 // Value to compare against for stencil testing
584 BitField<16, 8, u32> reference_value; 579 BitField<16, 8, u32> reference_value;
@@ -592,21 +587,21 @@ struct Regs {
592 u32 raw_op; 587 u32 raw_op;
593 588
594 // Action to perform when the stencil test fails 589 // Action to perform when the stencil test fails
595 BitField< 0, 3, StencilAction> action_stencil_fail; 590 BitField<0, 3, StencilAction> action_stencil_fail;
596 591
597 // Action to perform when stencil testing passed but depth testing fails 592 // Action to perform when stencil testing passed but depth testing fails
598 BitField< 4, 3, StencilAction> action_depth_fail; 593 BitField<4, 3, StencilAction> action_depth_fail;
599 594
600 // Action to perform when both stencil and depth testing pass 595 // Action to perform when both stencil and depth testing pass
601 BitField< 8, 3, StencilAction> action_depth_pass; 596 BitField<8, 3, StencilAction> action_depth_pass;
602 }; 597 };
603 } stencil_test; 598 } stencil_test;
604 599
605 union { 600 union {
606 BitField< 0, 1, u32> depth_test_enable; 601 BitField<0, 1, u32> depth_test_enable;
607 BitField< 4, 3, CompareFunc> depth_test_func; 602 BitField<4, 3, CompareFunc> depth_test_func;
608 BitField< 8, 1, u32> red_enable; 603 BitField<8, 1, u32> red_enable;
609 BitField< 9, 1, u32> green_enable; 604 BitField<9, 1, u32> green_enable;
610 BitField<10, 1, u32> blue_enable; 605 BitField<10, 1, u32> blue_enable;
611 BitField<11, 1, u32> alpha_enable; 606 BitField<11, 1, u32> alpha_enable;
612 BitField<12, 1, u32> depth_write_enable; 607 BitField<12, 1, u32> depth_write_enable;
@@ -617,16 +612,16 @@ struct Regs {
617 612
618 // Components are laid out in reverse byte order, most significant bits first. 613 // Components are laid out in reverse byte order, most significant bits first.
619 enum class ColorFormat : u32 { 614 enum class ColorFormat : u32 {
620 RGBA8 = 0, 615 RGBA8 = 0,
621 RGB8 = 1, 616 RGB8 = 1,
622 RGB5A1 = 2, 617 RGB5A1 = 2,
623 RGB565 = 3, 618 RGB565 = 3,
624 RGBA4 = 4, 619 RGBA4 = 4,
625 }; 620 };
626 621
627 enum class DepthFormat : u32 { 622 enum class DepthFormat : u32 {
628 D16 = 0, 623 D16 = 0,
629 D24 = 2, 624 D24 = 2,
630 D24S8 = 3, 625 D24S8 = 3,
631 }; 626 };
632 627
@@ -673,7 +668,7 @@ struct Regs {
673 // while the height is stored as the actual height minus one. 668 // while the height is stored as the actual height minus one.
674 // Hence, don't access these fields directly but use the accessors 669 // Hence, don't access these fields directly but use the accessors
675 // GetWidth() and GetHeight() instead. 670 // GetWidth() and GetHeight() instead.
676 BitField< 0, 11, u32> width; 671 BitField<0, 11, u32> width;
677 BitField<12, 10, u32> height; 672 BitField<12, 10, u32> height;
678 }; 673 };
679 674
@@ -759,10 +754,12 @@ struct Regs {
759 754
760 /// Selects which lighting components are affected by fresnel 755 /// Selects which lighting components are affected by fresnel
761 enum class LightingFresnelSelector { 756 enum class LightingFresnelSelector {
762 None = 0, ///< Fresnel is disabled 757 None = 0, ///< Fresnel is disabled
763 PrimaryAlpha = 1, ///< Primary (diffuse) lighting alpha is affected by fresnel 758 PrimaryAlpha = 1, ///< Primary (diffuse) lighting alpha is affected by fresnel
764 SecondaryAlpha = 2, ///< Secondary (specular) lighting alpha is affected by fresnel 759 SecondaryAlpha = 2, ///< Secondary (specular) lighting alpha is affected by fresnel
765 Both = PrimaryAlpha | SecondaryAlpha, ///< Both primary and secondary lighting alphas are affected by fresnel 760 Both =
761 PrimaryAlpha |
762 SecondaryAlpha, ///< Both primary and secondary lighting alphas are affected by fresnel
766 }; 763 };
767 764
768 /// Factor used to scale the output of a lighting LUT 765 /// Factor used to scale the output of a lighting LUT
@@ -789,57 +786,63 @@ struct Regs {
789 }; 786 };
790 787
791 union LightColor { 788 union LightColor {
792 BitField< 0, 10, u32> b; 789 BitField<0, 10, u32> b;
793 BitField<10, 10, u32> g; 790 BitField<10, 10, u32> g;
794 BitField<20, 10, u32> r; 791 BitField<20, 10, u32> r;
795 792
796 Math::Vec3f ToVec3f() const { 793 Math::Vec3f ToVec3f() const {
797 // These fields are 10 bits wide, however 255 corresponds to 1.0f for each color component 794 // These fields are 10 bits wide, however 255 corresponds to 1.0f for each color
795 // component
798 return Math::MakeVec((f32)r / 255.f, (f32)g / 255.f, (f32)b / 255.f); 796 return Math::MakeVec((f32)r / 255.f, (f32)g / 255.f, (f32)b / 255.f);
799 } 797 }
800 }; 798 };
801 799
802 /// Returns true if the specified lighting sampler is supported by the current Pica lighting configuration 800 /// Returns true if the specified lighting sampler is supported by the current Pica lighting
801 /// configuration
803 static bool IsLightingSamplerSupported(LightingConfig config, LightingSampler sampler) { 802 static bool IsLightingSamplerSupported(LightingConfig config, LightingSampler sampler) {
804 switch (sampler) { 803 switch (sampler) {
805 case LightingSampler::Distribution0: 804 case LightingSampler::Distribution0:
806 return (config != LightingConfig::Config1); 805 return (config != LightingConfig::Config1);
807 806
808 case LightingSampler::Distribution1: 807 case LightingSampler::Distribution1:
809 return (config != LightingConfig::Config0) && (config != LightingConfig::Config1) && (config != LightingConfig::Config5); 808 return (config != LightingConfig::Config0) && (config != LightingConfig::Config1) &&
809 (config != LightingConfig::Config5);
810 810
811 case LightingSampler::Fresnel: 811 case LightingSampler::Fresnel:
812 return (config != LightingConfig::Config0) && (config != LightingConfig::Config2) && (config != LightingConfig::Config4); 812 return (config != LightingConfig::Config0) && (config != LightingConfig::Config2) &&
813 (config != LightingConfig::Config4);
813 814
814 case LightingSampler::ReflectRed: 815 case LightingSampler::ReflectRed:
815 return (config != LightingConfig::Config3); 816 return (config != LightingConfig::Config3);
816 817
817 case LightingSampler::ReflectGreen: 818 case LightingSampler::ReflectGreen:
818 case LightingSampler::ReflectBlue: 819 case LightingSampler::ReflectBlue:
819 return (config == LightingConfig::Config4) || (config == LightingConfig::Config5) || (config == LightingConfig::Config7); 820 return (config == LightingConfig::Config4) || (config == LightingConfig::Config5) ||
821 (config == LightingConfig::Config7);
820 default: 822 default:
821 UNREACHABLE_MSG("Regs::IsLightingSamplerSupported: Reached " 823 UNREACHABLE_MSG("Regs::IsLightingSamplerSupported: Reached "
822 "unreachable section, sampler should be one " 824 "unreachable section, sampler should be one "
823 "of Distribution0, Distribution1, Fresnel, " 825 "of Distribution0, Distribution1, Fresnel, "
824 "ReflectRed, ReflectGreen or ReflectBlue, instead " 826 "ReflectRed, ReflectGreen or ReflectBlue, instead "
825 "got %i", static_cast<int>(config)); 827 "got %i",
828 static_cast<int>(config));
826 } 829 }
827 } 830 }
828 831
829 struct { 832 struct {
830 struct LightSrc { 833 struct LightSrc {
831 LightColor specular_0; // material.specular_0 * light.specular_0 834 LightColor specular_0; // material.specular_0 * light.specular_0
832 LightColor specular_1; // material.specular_1 * light.specular_1 835 LightColor specular_1; // material.specular_1 * light.specular_1
833 LightColor diffuse; // material.diffuse * light.diffuse 836 LightColor diffuse; // material.diffuse * light.diffuse
834 LightColor ambient; // material.ambient * light.ambient 837 LightColor ambient; // material.ambient * light.ambient
835 838
836 // Encoded as 16-bit floating point 839 // Encoded as 16-bit floating point
837 union { 840 union {
838 BitField< 0, 16, u32> x; 841 BitField<0, 16, u32> x;
839 BitField<16, 16, u32> y; 842 BitField<16, 16, u32> y;
840 }; 843 };
841 union { 844 union {
842 BitField< 0, 16, u32> z; 845 BitField<0, 16, u32> z;
843 }; 846 };
844 847
845 INSERT_PADDING_WORDS(0x3); 848 INSERT_PADDING_WORDS(0x3);
@@ -854,7 +857,8 @@ struct Regs {
854 857
855 INSERT_PADDING_WORDS(0x4); 858 INSERT_PADDING_WORDS(0x4);
856 }; 859 };
857 static_assert(sizeof(LightSrc) == 0x10 * sizeof(u32), "LightSrc structure must be 0x10 words"); 860 static_assert(sizeof(LightSrc) == 0x10 * sizeof(u32),
861 "LightSrc structure must be 0x10 words");
858 862
859 LightSrc light[8]; 863 LightSrc light[8];
860 LightColor global_ambient; // Emission + (material.ambient * lighting.ambient) 864 LightColor global_ambient; // Emission + (material.ambient * lighting.ambient)
@@ -862,8 +866,8 @@ struct Regs {
862 BitField<0, 3, u32> num_lights; // Number of enabled lights - 1 866 BitField<0, 3, u32> num_lights; // Number of enabled lights - 1
863 867
864 union { 868 union {
865 BitField< 2, 2, LightingFresnelSelector> fresnel_selector; 869 BitField<2, 2, LightingFresnelSelector> fresnel_selector;
866 BitField< 4, 4, LightingConfig> config; 870 BitField<4, 4, LightingConfig> config;
867 BitField<22, 2, u32> bump_selector; // 0: Texture 0, 1: Texture 1, 2: Texture 2 871 BitField<22, 2, u32> bump_selector; // 0: Texture 0, 1: Texture 1, 2: Texture 2
868 BitField<27, 1, u32> clamp_highlights; 872 BitField<27, 1, u32> clamp_highlights;
869 BitField<28, 2, LightingBumpMode> bump_mode; 873 BitField<28, 2, LightingBumpMode> bump_mode;
@@ -892,16 +896,17 @@ struct Regs {
892 } config1; 896 } config1;
893 897
894 bool IsDistAttenDisabled(unsigned index) const { 898 bool IsDistAttenDisabled(unsigned index) const {
895 const unsigned disable[] = { config1.disable_dist_atten_light_0, config1.disable_dist_atten_light_1, 899 const unsigned disable[] = {
896 config1.disable_dist_atten_light_2, config1.disable_dist_atten_light_3, 900 config1.disable_dist_atten_light_0, config1.disable_dist_atten_light_1,
897 config1.disable_dist_atten_light_4, config1.disable_dist_atten_light_5, 901 config1.disable_dist_atten_light_2, config1.disable_dist_atten_light_3,
898 config1.disable_dist_atten_light_6, config1.disable_dist_atten_light_7 }; 902 config1.disable_dist_atten_light_4, config1.disable_dist_atten_light_5,
903 config1.disable_dist_atten_light_6, config1.disable_dist_atten_light_7};
899 return disable[index] != 0; 904 return disable[index] != 0;
900 } 905 }
901 906
902 union { 907 union {
903 BitField<0, 8, u32> index; ///< Index at which to set data in the LUT 908 BitField<0, 8, u32> index; ///< Index at which to set data in the LUT
904 BitField<8, 5, u32> type; ///< Type of LUT for which to set data 909 BitField<8, 5, u32> type; ///< Type of LUT for which to set data
905 } lut_config; 910 } lut_config;
906 911
907 BitField<0, 1, u32> disable; 912 BitField<0, 1, u32> disable;
@@ -917,9 +922,9 @@ struct Regs {
917 // abs mode is disabled, LUT indexes are in the range of (-1.0, 1.0). Otherwise, they are in 922 // abs mode is disabled, LUT indexes are in the range of (-1.0, 1.0). Otherwise, they are in
918 // the range of (0.0, 1.0). 923 // the range of (0.0, 1.0).
919 union { 924 union {
920 BitField< 1, 1, u32> disable_d0; 925 BitField<1, 1, u32> disable_d0;
921 BitField< 5, 1, u32> disable_d1; 926 BitField<5, 1, u32> disable_d1;
922 BitField< 9, 1, u32> disable_sp; 927 BitField<9, 1, u32> disable_sp;
923 BitField<13, 1, u32> disable_fr; 928 BitField<13, 1, u32> disable_fr;
924 BitField<17, 1, u32> disable_rb; 929 BitField<17, 1, u32> disable_rb;
925 BitField<21, 1, u32> disable_rg; 930 BitField<21, 1, u32> disable_rg;
@@ -927,9 +932,9 @@ struct Regs {
927 } abs_lut_input; 932 } abs_lut_input;
928 933
929 union { 934 union {
930 BitField< 0, 3, LightingLutInput> d0; 935 BitField<0, 3, LightingLutInput> d0;
931 BitField< 4, 3, LightingLutInput> d1; 936 BitField<4, 3, LightingLutInput> d1;
932 BitField< 8, 3, LightingLutInput> sp; 937 BitField<8, 3, LightingLutInput> sp;
933 BitField<12, 3, LightingLutInput> fr; 938 BitField<12, 3, LightingLutInput> fr;
934 BitField<16, 3, LightingLutInput> rb; 939 BitField<16, 3, LightingLutInput> rb;
935 BitField<20, 3, LightingLutInput> rg; 940 BitField<20, 3, LightingLutInput> rg;
@@ -937,9 +942,9 @@ struct Regs {
937 } lut_input; 942 } lut_input;
938 943
939 union { 944 union {
940 BitField< 0, 3, LightingScale> d0; 945 BitField<0, 3, LightingScale> d0;
941 BitField< 4, 3, LightingScale> d1; 946 BitField<4, 3, LightingScale> d1;
942 BitField< 8, 3, LightingScale> sp; 947 BitField<8, 3, LightingScale> sp;
943 BitField<12, 3, LightingScale> fr; 948 BitField<12, 3, LightingScale> fr;
944 BitField<16, 3, LightingScale> rb; 949 BitField<16, 3, LightingScale> rb;
945 BitField<20, 3, LightingScale> rg; 950 BitField<20, 3, LightingScale> rg;
@@ -972,9 +977,9 @@ struct Regs {
972 // above), the first N slots below will be set to integers within the range of 0-7, 977 // above), the first N slots below will be set to integers within the range of 0-7,
973 // corresponding to the actual light that is enabled for each slot. 978 // corresponding to the actual light that is enabled for each slot.
974 979
975 BitField< 0, 3, u32> slot_0; 980 BitField<0, 3, u32> slot_0;
976 BitField< 4, 3, u32> slot_1; 981 BitField<4, 3, u32> slot_1;
977 BitField< 8, 3, u32> slot_2; 982 BitField<8, 3, u32> slot_2;
978 BitField<12, 3, u32> slot_3; 983 BitField<12, 3, u32> slot_3;
979 BitField<16, 3, u32> slot_4; 984 BitField<16, 3, u32> slot_4;
980 BitField<20, 3, u32> slot_5; 985 BitField<20, 3, u32> slot_5;
@@ -982,7 +987,8 @@ struct Regs {
982 BitField<28, 3, u32> slot_7; 987 BitField<28, 3, u32> slot_7;
983 988
984 unsigned GetNum(unsigned index) const { 989 unsigned GetNum(unsigned index) const {
985 const unsigned enable_slots[] = { slot_0, slot_1, slot_2, slot_3, slot_4, slot_5, slot_6, slot_7 }; 990 const unsigned enable_slots[] = {slot_0, slot_1, slot_2, slot_3,
991 slot_4, slot_5, slot_6, slot_7};
986 return enable_slots[index]; 992 return enable_slots[index];
987 } 993 }
988 } light_enable; 994 } light_enable;
@@ -1006,58 +1012,54 @@ struct Regs {
1006 1012
1007 // Descriptor for internal vertex attributes 1013 // Descriptor for internal vertex attributes
1008 union { 1014 union {
1009 BitField< 0, 2, VertexAttributeFormat> format0; // size of one element 1015 BitField<0, 2, VertexAttributeFormat> format0; // size of one element
1010 BitField< 2, 2, u64> size0; // number of elements minus 1 1016 BitField<2, 2, u64> size0; // number of elements minus 1
1011 BitField< 4, 2, VertexAttributeFormat> format1; 1017 BitField<4, 2, VertexAttributeFormat> format1;
1012 BitField< 6, 2, u64> size1; 1018 BitField<6, 2, u64> size1;
1013 BitField< 8, 2, VertexAttributeFormat> format2; 1019 BitField<8, 2, VertexAttributeFormat> format2;
1014 BitField<10, 2, u64> size2; 1020 BitField<10, 2, u64> size2;
1015 BitField<12, 2, VertexAttributeFormat> format3; 1021 BitField<12, 2, VertexAttributeFormat> format3;
1016 BitField<14, 2, u64> size3; 1022 BitField<14, 2, u64> size3;
1017 BitField<16, 2, VertexAttributeFormat> format4; 1023 BitField<16, 2, VertexAttributeFormat> format4;
1018 BitField<18, 2, u64> size4; 1024 BitField<18, 2, u64> size4;
1019 BitField<20, 2, VertexAttributeFormat> format5; 1025 BitField<20, 2, VertexAttributeFormat> format5;
1020 BitField<22, 2, u64> size5; 1026 BitField<22, 2, u64> size5;
1021 BitField<24, 2, VertexAttributeFormat> format6; 1027 BitField<24, 2, VertexAttributeFormat> format6;
1022 BitField<26, 2, u64> size6; 1028 BitField<26, 2, u64> size6;
1023 BitField<28, 2, VertexAttributeFormat> format7; 1029 BitField<28, 2, VertexAttributeFormat> format7;
1024 BitField<30, 2, u64> size7; 1030 BitField<30, 2, u64> size7;
1025 BitField<32, 2, VertexAttributeFormat> format8; 1031 BitField<32, 2, VertexAttributeFormat> format8;
1026 BitField<34, 2, u64> size8; 1032 BitField<34, 2, u64> size8;
1027 BitField<36, 2, VertexAttributeFormat> format9; 1033 BitField<36, 2, VertexAttributeFormat> format9;
1028 BitField<38, 2, u64> size9; 1034 BitField<38, 2, u64> size9;
1029 BitField<40, 2, VertexAttributeFormat> format10; 1035 BitField<40, 2, VertexAttributeFormat> format10;
1030 BitField<42, 2, u64> size10; 1036 BitField<42, 2, u64> size10;
1031 BitField<44, 2, VertexAttributeFormat> format11; 1037 BitField<44, 2, VertexAttributeFormat> format11;
1032 BitField<46, 2, u64> size11; 1038 BitField<46, 2, u64> size11;
1033 1039
1034 BitField<48, 12, u64> attribute_mask; 1040 BitField<48, 12, u64> attribute_mask;
1035 1041
1036 // number of total attributes minus 1 1042 // number of total attributes minus 1
1037 BitField<60, 4, u64> num_extra_attributes; 1043 BitField<60, 4, u64> num_extra_attributes;
1038 }; 1044 };
1039 1045
1040 inline VertexAttributeFormat GetFormat(int n) const { 1046 inline VertexAttributeFormat GetFormat(int n) const {
1041 VertexAttributeFormat formats[] = { 1047 VertexAttributeFormat formats[] = {format0, format1, format2, format3,
1042 format0, format1, format2, format3, 1048 format4, format5, format6, format7,
1043 format4, format5, format6, format7, 1049 format8, format9, format10, format11};
1044 format8, format9, format10, format11
1045 };
1046 return formats[n]; 1050 return formats[n];
1047 } 1051 }
1048 1052
1049 inline int GetNumElements(int n) const { 1053 inline int GetNumElements(int n) const {
1050 u64 sizes[] = { 1054 u64 sizes[] = {size0, size1, size2, size3, size4, size5,
1051 size0, size1, size2, size3, 1055 size6, size7, size8, size9, size10, size11};
1052 size4, size5, size6, size7, 1056 return (int)sizes[n] + 1;
1053 size8, size9, size10, size11
1054 };
1055 return (int)sizes[n]+1;
1056 } 1057 }
1057 1058
1058 inline int GetElementSizeInBytes(int n) const { 1059 inline int GetElementSizeInBytes(int n) const {
1059 return (GetFormat(n) == VertexAttributeFormat::FLOAT) ? 4 : 1060 return (GetFormat(n) == VertexAttributeFormat::FLOAT)
1060 (GetFormat(n) == VertexAttributeFormat::SHORT) ? 2 : 1; 1061 ? 4
1062 : (GetFormat(n) == VertexAttributeFormat::SHORT) ? 2 : 1;
1061 } 1063 }
1062 1064
1063 inline int GetStride(int n) const { 1065 inline int GetStride(int n) const {
@@ -1069,7 +1071,7 @@ struct Regs {
1069 } 1071 }
1070 1072
1071 inline int GetNumTotalAttributes() const { 1073 inline int GetNumTotalAttributes() const {
1072 return (int)num_extra_attributes+1; 1074 return (int)num_extra_attributes + 1;
1073 } 1075 }
1074 1076
1075 // Attribute loaders map the source vertex data to input attributes 1077 // Attribute loaders map the source vertex data to input attributes
@@ -1079,9 +1081,9 @@ struct Regs {
1079 u32 data_offset; 1081 u32 data_offset;
1080 1082
1081 union { 1083 union {
1082 BitField< 0, 4, u64> comp0; 1084 BitField<0, 4, u64> comp0;
1083 BitField< 4, 4, u64> comp1; 1085 BitField<4, 4, u64> comp1;
1084 BitField< 8, 4, u64> comp2; 1086 BitField<8, 4, u64> comp2;
1085 BitField<12, 4, u64> comp3; 1087 BitField<12, 4, u64> comp3;
1086 BitField<16, 4, u64> comp4; 1088 BitField<16, 4, u64> comp4;
1087 BitField<20, 4, u64> comp5; 1089 BitField<20, 4, u64> comp5;
@@ -1099,11 +1101,8 @@ struct Regs {
1099 }; 1101 };
1100 1102
1101 inline int GetComponent(int n) const { 1103 inline int GetComponent(int n) const {
1102 u64 components[] = { 1104 u64 components[] = {comp0, comp1, comp2, comp3, comp4, comp5,
1103 comp0, comp1, comp2, comp3, 1105 comp6, comp7, comp8, comp9, comp10, comp11};
1104 comp4, comp5, comp6, comp7,
1105 comp8, comp9, comp10, comp11
1106 };
1107 return (int)components[n]; 1106 return (int)components[n];
1108 } 1107 }
1109 } attribute_loaders[12]; 1108 } attribute_loaders[12];
@@ -1157,8 +1156,8 @@ struct Regs {
1157 // kicked off. 1156 // kicked off.
1158 // 2) Games can configure these registers to provide a command list subroutine mechanism. 1157 // 2) Games can configure these registers to provide a command list subroutine mechanism.
1159 1158
1160 BitField< 0, 20, u32> size[2]; ///< Size (in bytes / 8) of each channel's command buffer 1159 BitField<0, 20, u32> size[2]; ///< Size (in bytes / 8) of each channel's command buffer
1161 BitField< 0, 28, u32> addr[2]; ///< Physical address / 8 of each channel's command buffer 1160 BitField<0, 28, u32> addr[2]; ///< Physical address / 8 of each channel's command buffer
1162 u32 trigger[2]; ///< Triggers execution of the channel's command buffer when written to 1161 u32 trigger[2]; ///< Triggers execution of the channel's command buffer when written to
1163 1162
1164 unsigned GetSize(unsigned index) const { 1163 unsigned GetSize(unsigned index) const {
@@ -1174,19 +1173,16 @@ struct Regs {
1174 1173
1175 INSERT_PADDING_WORDS(0x07); 1174 INSERT_PADDING_WORDS(0x07);
1176 1175
1177 enum class GPUMode : u32 { 1176 enum class GPUMode : u32 { Drawing = 0, Configuring = 1 };
1178 Drawing = 0,
1179 Configuring = 1
1180 };
1181 1177
1182 GPUMode gpu_mode; 1178 GPUMode gpu_mode;
1183 1179
1184 INSERT_PADDING_WORDS(0x18); 1180 INSERT_PADDING_WORDS(0x18);
1185 1181
1186 enum class TriangleTopology : u32 { 1182 enum class TriangleTopology : u32 {
1187 List = 0, 1183 List = 0,
1188 Strip = 1, 1184 Strip = 1,
1189 Fan = 2, 1185 Fan = 2,
1190 Shader = 3, // Programmable setup unit implemented in a geometry shader 1186 Shader = 3, // Programmable setup unit implemented in a geometry shader
1191 }; 1187 };
1192 1188
@@ -1200,8 +1196,8 @@ struct Regs {
1200 BitField<0, 16, u32> bool_uniforms; 1196 BitField<0, 16, u32> bool_uniforms;
1201 1197
1202 union { 1198 union {
1203 BitField< 0, 8, u32> x; 1199 BitField<0, 8, u32> x;
1204 BitField< 8, 8, u32> y; 1200 BitField<8, 8, u32> y;
1205 BitField<16, 8, u32> z; 1201 BitField<16, 8, u32> z;
1206 BitField<24, 8, u32> w; 1202 BitField<24, 8, u32> w;
1207 } int_uniforms[4]; 1203 } int_uniforms[4];
@@ -1217,9 +1213,9 @@ struct Regs {
1217 BitField<0, 16, u32> main_offset; 1213 BitField<0, 16, u32> main_offset;
1218 1214
1219 union { 1215 union {
1220 BitField< 0, 4, u64> attribute0_register; 1216 BitField<0, 4, u64> attribute0_register;
1221 BitField< 4, 4, u64> attribute1_register; 1217 BitField<4, 4, u64> attribute1_register;
1222 BitField< 8, 4, u64> attribute2_register; 1218 BitField<8, 4, u64> attribute2_register;
1223 BitField<12, 4, u64> attribute3_register; 1219 BitField<12, 4, u64> attribute3_register;
1224 BitField<16, 4, u64> attribute4_register; 1220 BitField<16, 4, u64> attribute4_register;
1225 BitField<20, 4, u64> attribute5_register; 1221 BitField<20, 4, u64> attribute5_register;
@@ -1236,10 +1232,12 @@ struct Regs {
1236 1232
1237 int GetRegisterForAttribute(int attribute_index) const { 1233 int GetRegisterForAttribute(int attribute_index) const {
1238 u64 fields[] = { 1234 u64 fields[] = {
1239 attribute0_register, attribute1_register, attribute2_register, attribute3_register, 1235 attribute0_register, attribute1_register, attribute2_register,
1240 attribute4_register, attribute5_register, attribute6_register, attribute7_register, 1236 attribute3_register, attribute4_register, attribute5_register,
1241 attribute8_register, attribute9_register, attribute10_register, attribute11_register, 1237 attribute6_register, attribute7_register, attribute8_register,
1242 attribute12_register, attribute13_register, attribute14_register, attribute15_register, 1238 attribute9_register, attribute10_register, attribute11_register,
1239 attribute12_register, attribute13_register, attribute14_register,
1240 attribute15_register,
1243 }; 1241 };
1244 return (int)fields[attribute_index]; 1242 return (int)fields[attribute_index];
1245 } 1243 }
@@ -1251,11 +1249,7 @@ struct Regs {
1251 INSERT_PADDING_WORDS(0x2); 1249 INSERT_PADDING_WORDS(0x2);
1252 1250
1253 struct { 1251 struct {
1254 enum Format : u32 1252 enum Format : u32 { FLOAT24 = 0, FLOAT32 = 1 };
1255 {
1256 FLOAT24 = 0,
1257 FLOAT32 = 1
1258 };
1259 1253
1260 bool IsFloat32() const { 1254 bool IsFloat32() const {
1261 return format == FLOAT32; 1255 return format == FLOAT32;
@@ -1263,7 +1257,8 @@ struct Regs {
1263 1257
1264 union { 1258 union {
1265 // Index of the next uniform to write to 1259 // Index of the next uniform to write to
1266 // TODO: ctrulib uses 8 bits for this, however that seems to yield lots of invalid indices 1260 // TODO: ctrulib uses 8 bits for this, however that seems to yield lots of invalid
1261 // indices
1267 // TODO: Maybe the uppermost index is for the geometry shader? Investigate! 1262 // TODO: Maybe the uppermost index is for the geometry shader? Investigate!
1268 BitField<0, 7, u32> index; 1263 BitField<0, 7, u32> index;
1269 1264
@@ -1315,12 +1310,12 @@ struct Regs {
1315 return sizeof(Regs) / sizeof(u32); 1310 return sizeof(Regs) / sizeof(u32);
1316 } 1311 }
1317 1312
1318 const u32& operator [] (int index) const { 1313 const u32& operator[](int index) const {
1319 const u32* content = reinterpret_cast<const u32*>(this); 1314 const u32* content = reinterpret_cast<const u32*>(this);
1320 return content[index]; 1315 return content[index];
1321 } 1316 }
1322 1317
1323 u32& operator [] (int index) { 1318 u32& operator[](int index) {
1324 u32* content = reinterpret_cast<u32*>(this); 1319 u32* content = reinterpret_cast<u32*>(this);
1325 return content[index]; 1320 return content[index];
1326 } 1321 }
@@ -1339,7 +1334,9 @@ private:
1339// is technically allowed since C++11. This macro should be enabled once MSVC adds 1334// is technically allowed since C++11. This macro should be enabled once MSVC adds
1340// support for that. 1335// support for that.
1341#ifndef _MSC_VER 1336#ifndef _MSC_VER
1342#define ASSERT_REG_POSITION(field_name, position) static_assert(offsetof(Regs, field_name) == position * 4, "Field "#field_name" has invalid position") 1337#define ASSERT_REG_POSITION(field_name, position) \
1338 static_assert(offsetof(Regs, field_name) == position * 4, \
1339 "Field " #field_name " has invalid position")
1343 1340
1344ASSERT_REG_POSITION(trigger_irq, 0x10); 1341ASSERT_REG_POSITION(trigger_irq, 0x10);
1345ASSERT_REG_POSITION(cull_mode, 0x40); 1342ASSERT_REG_POSITION(cull_mode, 0x40);
@@ -1392,11 +1389,15 @@ ASSERT_REG_POSITION(vs, 0x2b0);
1392#undef ASSERT_REG_POSITION 1389#undef ASSERT_REG_POSITION
1393#endif // !defined(_MSC_VER) 1390#endif // !defined(_MSC_VER)
1394 1391
1395static_assert(sizeof(Regs::ShaderConfig) == 0x30 * sizeof(u32), "ShaderConfig structure has incorrect size"); 1392static_assert(sizeof(Regs::ShaderConfig) == 0x30 * sizeof(u32),
1393 "ShaderConfig structure has incorrect size");
1396 1394
1397// The total number of registers is chosen arbitrarily, but let's make sure it's not some odd value anyway. 1395// The total number of registers is chosen arbitrarily, but let's make sure it's not some odd value
1398static_assert(sizeof(Regs) <= 0x300 * sizeof(u32), "Register set structure larger than it should be"); 1396// anyway.
1399static_assert(sizeof(Regs) >= 0x300 * sizeof(u32), "Register set structure smaller than it should be"); 1397static_assert(sizeof(Regs) <= 0x300 * sizeof(u32),
1398 "Register set structure larger than it should be");
1399static_assert(sizeof(Regs) >= 0x300 * sizeof(u32),
1400 "Register set structure smaller than it should be");
1400 1401
1401/// Initialize Pica state 1402/// Initialize Pica state
1402void Init(); 1403void Init();
diff --git a/src/video_core/pica_state.h b/src/video_core/pica_state.h
index 01f4285a8..2dbd6413f 100644
--- a/src/video_core/pica_state.h
+++ b/src/video_core/pica_state.h
@@ -33,7 +33,7 @@ struct State {
33 u32 raw; 33 u32 raw;
34 34
35 // LUT value, encoded as 12-bit fixed point, with 12 fraction bits 35 // LUT value, encoded as 12-bit fixed point, with 12 fraction bits
36 BitField< 0, 12, u32> value; // 0.0.12 fixed point 36 BitField<0, 12, u32> value; // 0.0.12 fixed point
37 37
38 // Used by HW for efficient interpolation, Citra does not use these 38 // Used by HW for efficient interpolation, Citra does not use these
39 BitField<12, 12, s32> difference; // 1.0.11 fixed point 39 BitField<12, 12, s32> difference; // 1.0.11 fixed point
@@ -51,8 +51,8 @@ struct State {
51 // Used for raw access 51 // Used for raw access
52 u32 raw; 52 u32 raw;
53 53
54 BitField< 0, 13, s32> difference; // 1.1.11 fixed point 54 BitField<0, 13, s32> difference; // 1.1.11 fixed point
55 BitField<13, 11, u32> value; // 0.0.11 fixed point 55 BitField<13, 11, u32> value; // 0.0.11 fixed point
56 }; 56 };
57 57
58 std::array<LutEntry, 128> lut; 58 std::array<LutEntry, 128> lut;
diff --git a/src/video_core/pica_types.h b/src/video_core/pica_types.h
index 3b7bfbdca..20f648b03 100644
--- a/src/video_core/pica_types.h
+++ b/src/video_core/pica_types.h
@@ -22,7 +22,7 @@ namespace Pica {
22 * 22 *
23 * @todo Verify on HW if this conversion is sufficiently accurate. 23 * @todo Verify on HW if this conversion is sufficiently accurate.
24 */ 24 */
25template<unsigned M, unsigned E> 25template <unsigned M, unsigned E>
26struct Float { 26struct Float {
27public: 27public:
28 static Float<M, E> FromFloat32(float val) { 28 static Float<M, E> FromFloat32(float val) {
@@ -58,7 +58,7 @@ public:
58 return value; 58 return value;
59 } 59 }
60 60
61 Float<M, E> operator * (const Float<M, E>& flt) const { 61 Float<M, E> operator*(const Float<M, E>& flt) const {
62 if ((this->value == 0.f && !std::isnan(flt.value)) || 62 if ((this->value == 0.f && !std::isnan(flt.value)) ||
63 (flt.value == 0.f && !std::isnan(this->value))) 63 (flt.value == 0.f && !std::isnan(this->value)))
64 // PICA gives 0 instead of NaN when multiplying by inf 64 // PICA gives 0 instead of NaN when multiplying by inf
@@ -66,67 +66,68 @@ public:
66 return Float<M, E>::FromFloat32(ToFloat32() * flt.ToFloat32()); 66 return Float<M, E>::FromFloat32(ToFloat32() * flt.ToFloat32());
67 } 67 }
68 68
69 Float<M, E> operator / (const Float<M, E>& flt) const { 69 Float<M, E> operator/(const Float<M, E>& flt) const {
70 return Float<M, E>::FromFloat32(ToFloat32() / flt.ToFloat32()); 70 return Float<M, E>::FromFloat32(ToFloat32() / flt.ToFloat32());
71 } 71 }
72 72
73 Float<M, E> operator + (const Float<M, E>& flt) const { 73 Float<M, E> operator+(const Float<M, E>& flt) const {
74 return Float<M, E>::FromFloat32(ToFloat32() + flt.ToFloat32()); 74 return Float<M, E>::FromFloat32(ToFloat32() + flt.ToFloat32());
75 } 75 }
76 76
77 Float<M, E> operator - (const Float<M, E>& flt) const { 77 Float<M, E> operator-(const Float<M, E>& flt) const {
78 return Float<M, E>::FromFloat32(ToFloat32() - flt.ToFloat32()); 78 return Float<M, E>::FromFloat32(ToFloat32() - flt.ToFloat32());
79 } 79 }
80 80
81 Float<M, E>& operator *= (const Float<M, E>& flt) { 81 Float<M, E>& operator*=(const Float<M, E>& flt) {
82 if ((this->value == 0.f && !std::isnan(flt.value)) || 82 if ((this->value == 0.f && !std::isnan(flt.value)) ||
83 (flt.value == 0.f && !std::isnan(this->value))) 83 (flt.value == 0.f && !std::isnan(this->value)))
84 // PICA gives 0 instead of NaN when multiplying by inf 84 // PICA gives 0 instead of NaN when multiplying by inf
85 *this = Zero(); 85 *this = Zero();
86 else value *= flt.ToFloat32(); 86 else
87 value *= flt.ToFloat32();
87 return *this; 88 return *this;
88 } 89 }
89 90
90 Float<M, E>& operator /= (const Float<M, E>& flt) { 91 Float<M, E>& operator/=(const Float<M, E>& flt) {
91 value /= flt.ToFloat32(); 92 value /= flt.ToFloat32();
92 return *this; 93 return *this;
93 } 94 }
94 95
95 Float<M, E>& operator += (const Float<M, E>& flt) { 96 Float<M, E>& operator+=(const Float<M, E>& flt) {
96 value += flt.ToFloat32(); 97 value += flt.ToFloat32();
97 return *this; 98 return *this;
98 } 99 }
99 100
100 Float<M, E>& operator -= (const Float<M, E>& flt) { 101 Float<M, E>& operator-=(const Float<M, E>& flt) {
101 value -= flt.ToFloat32(); 102 value -= flt.ToFloat32();
102 return *this; 103 return *this;
103 } 104 }
104 105
105 Float<M, E> operator - () const { 106 Float<M, E> operator-() const {
106 return Float<M, E>::FromFloat32(-ToFloat32()); 107 return Float<M, E>::FromFloat32(-ToFloat32());
107 } 108 }
108 109
109 bool operator < (const Float<M, E>& flt) const { 110 bool operator<(const Float<M, E>& flt) const {
110 return ToFloat32() < flt.ToFloat32(); 111 return ToFloat32() < flt.ToFloat32();
111 } 112 }
112 113
113 bool operator > (const Float<M, E>& flt) const { 114 bool operator>(const Float<M, E>& flt) const {
114 return ToFloat32() > flt.ToFloat32(); 115 return ToFloat32() > flt.ToFloat32();
115 } 116 }
116 117
117 bool operator >= (const Float<M, E>& flt) const { 118 bool operator>=(const Float<M, E>& flt) const {
118 return ToFloat32() >= flt.ToFloat32(); 119 return ToFloat32() >= flt.ToFloat32();
119 } 120 }
120 121
121 bool operator <= (const Float<M, E>& flt) const { 122 bool operator<=(const Float<M, E>& flt) const {
122 return ToFloat32() <= flt.ToFloat32(); 123 return ToFloat32() <= flt.ToFloat32();
123 } 124 }
124 125
125 bool operator == (const Float<M, E>& flt) const { 126 bool operator==(const Float<M, E>& flt) const {
126 return ToFloat32() == flt.ToFloat32(); 127 return ToFloat32() == flt.ToFloat32();
127 } 128 }
128 129
129 bool operator != (const Float<M, E>& flt) const { 130 bool operator!=(const Float<M, E>& flt) const {
130 return ToFloat32() != flt.ToFloat32(); 131 return ToFloat32() != flt.ToFloat32();
131 } 132 }
132 133
diff --git a/src/video_core/primitive_assembly.cpp b/src/video_core/primitive_assembly.cpp
index 68ea3c08a..343edb191 100644
--- a/src/video_core/primitive_assembly.cpp
+++ b/src/video_core/primitive_assembly.cpp
@@ -10,62 +10,61 @@
10 10
11namespace Pica { 11namespace Pica {
12 12
13template<typename VertexType> 13template <typename VertexType>
14PrimitiveAssembler<VertexType>::PrimitiveAssembler(Regs::TriangleTopology topology) 14PrimitiveAssembler<VertexType>::PrimitiveAssembler(Regs::TriangleTopology topology)
15 : topology(topology), buffer_index(0) { 15 : topology(topology), buffer_index(0) {
16} 16}
17 17
18template<typename VertexType> 18template <typename VertexType>
19void PrimitiveAssembler<VertexType>::SubmitVertex(VertexType& vtx, TriangleHandler triangle_handler) 19void PrimitiveAssembler<VertexType>::SubmitVertex(VertexType& vtx,
20{ 20 TriangleHandler triangle_handler) {
21 switch (topology) { 21 switch (topology) {
22 // TODO: Figure out what's different with TriangleTopology::Shader. 22 // TODO: Figure out what's different with TriangleTopology::Shader.
23 case Regs::TriangleTopology::List: 23 case Regs::TriangleTopology::List:
24 case Regs::TriangleTopology::Shader: 24 case Regs::TriangleTopology::Shader:
25 if (buffer_index < 2) { 25 if (buffer_index < 2) {
26 buffer[buffer_index++] = vtx; 26 buffer[buffer_index++] = vtx;
27 } else { 27 } else {
28 buffer_index = 0; 28 buffer_index = 0;
29 29
30 triangle_handler(buffer[0], buffer[1], vtx); 30 triangle_handler(buffer[0], buffer[1], vtx);
31 } 31 }
32 break; 32 break;
33 33
34 case Regs::TriangleTopology::Strip: 34 case Regs::TriangleTopology::Strip:
35 case Regs::TriangleTopology::Fan: 35 case Regs::TriangleTopology::Fan:
36 if (strip_ready) 36 if (strip_ready)
37 triangle_handler(buffer[0], buffer[1], vtx); 37 triangle_handler(buffer[0], buffer[1], vtx);
38 38
39 buffer[buffer_index] = vtx; 39 buffer[buffer_index] = vtx;
40 40
41 strip_ready |= (buffer_index == 1); 41 strip_ready |= (buffer_index == 1);
42 42
43 if (topology == Regs::TriangleTopology::Strip) 43 if (topology == Regs::TriangleTopology::Strip)
44 buffer_index = !buffer_index; 44 buffer_index = !buffer_index;
45 else if (topology == Regs::TriangleTopology::Fan) 45 else if (topology == Regs::TriangleTopology::Fan)
46 buffer_index = 1; 46 buffer_index = 1;
47 break; 47 break;
48 48
49 default: 49 default:
50 LOG_ERROR(HW_GPU, "Unknown triangle topology %x:", (int)topology); 50 LOG_ERROR(HW_GPU, "Unknown triangle topology %x:", (int)topology);
51 break; 51 break;
52 } 52 }
53} 53}
54 54
55template<typename VertexType> 55template <typename VertexType>
56void PrimitiveAssembler<VertexType>::Reset() { 56void PrimitiveAssembler<VertexType>::Reset() {
57 buffer_index = 0; 57 buffer_index = 0;
58 strip_ready = false; 58 strip_ready = false;
59} 59}
60 60
61template<typename VertexType> 61template <typename VertexType>
62void PrimitiveAssembler<VertexType>::Reconfigure(Regs::TriangleTopology topology) { 62void PrimitiveAssembler<VertexType>::Reconfigure(Regs::TriangleTopology topology) {
63 Reset(); 63 Reset();
64 this->topology = topology; 64 this->topology = topology;
65} 65}
66 66
67// explicitly instantiate use cases 67// explicitly instantiate use cases
68template 68template struct PrimitiveAssembler<Shader::OutputVertex>;
69struct PrimitiveAssembler<Shader::OutputVertex>;
70 69
71} // namespace 70} // namespace
diff --git a/src/video_core/primitive_assembly.h b/src/video_core/primitive_assembly.h
index 9396b4c85..2ad15a858 100644
--- a/src/video_core/primitive_assembly.h
+++ b/src/video_core/primitive_assembly.h
@@ -14,11 +14,9 @@ namespace Pica {
14 * Utility class to build triangles from a series of vertices, 14 * Utility class to build triangles from a series of vertices,
15 * according to a given triangle topology. 15 * according to a given triangle topology.
16 */ 16 */
17template<typename VertexType> 17template <typename VertexType>
18struct PrimitiveAssembler { 18struct PrimitiveAssembler {
19 using TriangleHandler = std::function<void(VertexType& v0, 19 using TriangleHandler = std::function<void(VertexType& v0, VertexType& v1, VertexType& v2)>;
20 VertexType& v1,
21 VertexType& v2)>;
22 20
23 PrimitiveAssembler(Regs::TriangleTopology topology = Regs::TriangleTopology::List); 21 PrimitiveAssembler(Regs::TriangleTopology topology = Regs::TriangleTopology::List);
24 22
@@ -48,5 +46,4 @@ private:
48 bool strip_ready = false; 46 bool strip_ready = false;
49}; 47};
50 48
51
52} // namespace 49} // namespace
diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp
index 6f369a00e..dbdc37ce6 100644
--- a/src/video_core/rasterizer.cpp
+++ b/src/video_core/rasterizer.cpp
@@ -15,16 +15,16 @@
15#include "common/microprofile.h" 15#include "common/microprofile.h"
16#include "common/vector_math.h" 16#include "common/vector_math.h"
17 17
18#include "core/memory.h"
19#include "core/hw/gpu.h" 18#include "core/hw/gpu.h"
19#include "core/memory.h"
20 20
21#include "video_core/debug_utils/debug_utils.h" 21#include "video_core/debug_utils/debug_utils.h"
22#include "video_core/pica.h" 22#include "video_core/pica.h"
23#include "video_core/pica_state.h" 23#include "video_core/pica_state.h"
24#include "video_core/pica_types.h" 24#include "video_core/pica_types.h"
25#include "video_core/rasterizer.h" 25#include "video_core/rasterizer.h"
26#include "video_core/utils.h"
27#include "video_core/shader/shader.h" 26#include "video_core/shader/shader.h"
27#include "video_core/utils.h"
28 28
29namespace Pica { 29namespace Pica {
30 30
@@ -39,8 +39,10 @@ static void DrawPixel(int x, int y, const Math::Vec4<u8>& color) {
39 y = framebuffer.height - y; 39 y = framebuffer.height - y;
40 40
41 const u32 coarse_y = y & ~7; 41 const u32 coarse_y = y & ~7;
42 u32 bytes_per_pixel = GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(framebuffer.color_format.Value())); 42 u32 bytes_per_pixel =
43 u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * framebuffer.width * bytes_per_pixel; 43 GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(framebuffer.color_format.Value()));
44 u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) +
45 coarse_y * framebuffer.width * bytes_per_pixel;
44 u8* dst_pixel = Memory::GetPhysicalPointer(addr) + dst_offset; 46 u8* dst_pixel = Memory::GetPhysicalPointer(addr) + dst_offset;
45 47
46 switch (framebuffer.color_format) { 48 switch (framebuffer.color_format) {
@@ -65,7 +67,8 @@ static void DrawPixel(int x, int y, const Math::Vec4<u8>& color) {
65 break; 67 break;
66 68
67 default: 69 default:
68 LOG_CRITICAL(Render_Software, "Unknown framebuffer color format %x", framebuffer.color_format.Value()); 70 LOG_CRITICAL(Render_Software, "Unknown framebuffer color format %x",
71 framebuffer.color_format.Value());
69 UNIMPLEMENTED(); 72 UNIMPLEMENTED();
70 } 73 }
71} 74}
@@ -77,8 +80,10 @@ static const Math::Vec4<u8> GetPixel(int x, int y) {
77 y = framebuffer.height - y; 80 y = framebuffer.height - y;
78 81
79 const u32 coarse_y = y & ~7; 82 const u32 coarse_y = y & ~7;
80 u32 bytes_per_pixel = GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(framebuffer.color_format.Value())); 83 u32 bytes_per_pixel =
81 u32 src_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * framebuffer.width * bytes_per_pixel; 84 GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(framebuffer.color_format.Value()));
85 u32 src_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) +
86 coarse_y * framebuffer.width * bytes_per_pixel;
82 u8* src_pixel = Memory::GetPhysicalPointer(addr) + src_offset; 87 u8* src_pixel = Memory::GetPhysicalPointer(addr) + src_offset;
83 88
84 switch (framebuffer.color_format) { 89 switch (framebuffer.color_format) {
@@ -98,7 +103,8 @@ static const Math::Vec4<u8> GetPixel(int x, int y) {
98 return Color::DecodeRGBA4(src_pixel); 103 return Color::DecodeRGBA4(src_pixel);
99 104
100 default: 105 default:
101 LOG_CRITICAL(Render_Software, "Unknown framebuffer color format %x", framebuffer.color_format.Value()); 106 LOG_CRITICAL(Render_Software, "Unknown framebuffer color format %x",
107 framebuffer.color_format.Value());
102 UNIMPLEMENTED(); 108 UNIMPLEMENTED();
103 } 109 }
104 110
@@ -120,16 +126,16 @@ static u32 GetDepth(int x, int y) {
120 u8* src_pixel = depth_buffer + src_offset; 126 u8* src_pixel = depth_buffer + src_offset;
121 127
122 switch (framebuffer.depth_format) { 128 switch (framebuffer.depth_format) {
123 case Regs::DepthFormat::D16: 129 case Regs::DepthFormat::D16:
124 return Color::DecodeD16(src_pixel); 130 return Color::DecodeD16(src_pixel);
125 case Regs::DepthFormat::D24: 131 case Regs::DepthFormat::D24:
126 return Color::DecodeD24(src_pixel); 132 return Color::DecodeD24(src_pixel);
127 case Regs::DepthFormat::D24S8: 133 case Regs::DepthFormat::D24S8:
128 return Color::DecodeD24S8(src_pixel).x; 134 return Color::DecodeD24S8(src_pixel).x;
129 default: 135 default:
130 LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", framebuffer.depth_format); 136 LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", framebuffer.depth_format);
131 UNIMPLEMENTED(); 137 UNIMPLEMENTED();
132 return 0; 138 return 0;
133 } 139 }
134} 140}
135 141
@@ -148,12 +154,15 @@ static u8 GetStencil(int x, int y) {
148 u8* src_pixel = depth_buffer + src_offset; 154 u8* src_pixel = depth_buffer + src_offset;
149 155
150 switch (framebuffer.depth_format) { 156 switch (framebuffer.depth_format) {
151 case Regs::DepthFormat::D24S8: 157 case Regs::DepthFormat::D24S8:
152 return Color::DecodeD24S8(src_pixel).y; 158 return Color::DecodeD24S8(src_pixel).y;
153 159
154 default: 160 default:
155 LOG_WARNING(HW_GPU, "GetStencil called for function which doesn't have a stencil component (format %u)", framebuffer.depth_format); 161 LOG_WARNING(
156 return 0; 162 HW_GPU,
163 "GetStencil called for function which doesn't have a stencil component (format %u)",
164 framebuffer.depth_format);
165 return 0;
157 } 166 }
158} 167}
159 168
@@ -172,22 +181,22 @@ static void SetDepth(int x, int y, u32 value) {
172 u8* dst_pixel = depth_buffer + dst_offset; 181 u8* dst_pixel = depth_buffer + dst_offset;
173 182
174 switch (framebuffer.depth_format) { 183 switch (framebuffer.depth_format) {
175 case Regs::DepthFormat::D16: 184 case Regs::DepthFormat::D16:
176 Color::EncodeD16(value, dst_pixel); 185 Color::EncodeD16(value, dst_pixel);
177 break; 186 break;
178 187
179 case Regs::DepthFormat::D24: 188 case Regs::DepthFormat::D24:
180 Color::EncodeD24(value, dst_pixel); 189 Color::EncodeD24(value, dst_pixel);
181 break; 190 break;
182 191
183 case Regs::DepthFormat::D24S8: 192 case Regs::DepthFormat::D24S8:
184 Color::EncodeD24X8(value, dst_pixel); 193 Color::EncodeD24X8(value, dst_pixel);
185 break; 194 break;
186 195
187 default: 196 default:
188 LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", framebuffer.depth_format); 197 LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", framebuffer.depth_format);
189 UNIMPLEMENTED(); 198 UNIMPLEMENTED();
190 break; 199 break;
191 } 200 }
192} 201}
193 202
@@ -206,19 +215,19 @@ static void SetStencil(int x, int y, u8 value) {
206 u8* dst_pixel = depth_buffer + dst_offset; 215 u8* dst_pixel = depth_buffer + dst_offset;
207 216
208 switch (framebuffer.depth_format) { 217 switch (framebuffer.depth_format) {
209 case Pica::Regs::DepthFormat::D16: 218 case Pica::Regs::DepthFormat::D16:
210 case Pica::Regs::DepthFormat::D24: 219 case Pica::Regs::DepthFormat::D24:
211 // Nothing to do 220 // Nothing to do
212 break; 221 break;
213 222
214 case Pica::Regs::DepthFormat::D24S8: 223 case Pica::Regs::DepthFormat::D24S8:
215 Color::EncodeX24S8(value, dst_pixel); 224 Color::EncodeX24S8(value, dst_pixel);
216 break; 225 break;
217 226
218 default: 227 default:
219 LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", framebuffer.depth_format); 228 LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", framebuffer.depth_format);
220 UNIMPLEMENTED(); 229 UNIMPLEMENTED();
221 break; 230 break;
222 } 231 }
223} 232}
224 233
@@ -259,18 +268,24 @@ static u8 PerformStencilAction(Regs::StencilAction action, u8 old_stencil, u8 re
259 268
260// NOTE: Assuming that rasterizer coordinates are 12.4 fixed-point values 269// NOTE: Assuming that rasterizer coordinates are 12.4 fixed-point values
261struct Fix12P4 { 270struct Fix12P4 {
262 Fix12P4() {} 271 Fix12P4() {
263 Fix12P4(u16 val) : val(val) {} 272 }
273 Fix12P4(u16 val) : val(val) {
274 }
264 275
265 static u16 FracMask() { return 0xF; } 276 static u16 FracMask() {
266 static u16 IntMask() { return (u16)~0xF; } 277 return 0xF;
278 }
279 static u16 IntMask() {
280 return (u16)~0xF;
281 }
267 282
268 operator u16() const { 283 operator u16() const {
269 return val; 284 return val;
270 } 285 }
271 286
272 bool operator < (const Fix12P4& oth) const { 287 bool operator<(const Fix12P4& oth) const {
273 return (u16)*this < (u16)oth; 288 return (u16) * this < (u16)oth;
274 } 289 }
275 290
276private: 291private:
@@ -283,9 +298,8 @@ private:
283 * 298 *
284 * @todo define orientation concretely. 299 * @todo define orientation concretely.
285 */ 300 */
286static int SignedArea (const Math::Vec2<Fix12P4>& vtx1, 301static int SignedArea(const Math::Vec2<Fix12P4>& vtx1, const Math::Vec2<Fix12P4>& vtx2,
287 const Math::Vec2<Fix12P4>& vtx2, 302 const Math::Vec2<Fix12P4>& vtx3) {
288 const Math::Vec2<Fix12P4>& vtx3) {
289 const auto vec1 = Math::MakeVec(vtx2 - vtx1, 0); 303 const auto vec1 = Math::MakeVec(vtx2 - vtx1, 0);
290 const auto vec2 = Math::MakeVec(vtx3 - vtx1, 0); 304 const auto vec2 = Math::MakeVec(vtx3 - vtx1, 0);
291 // TODO: There is a very small chance this will overflow for sizeof(int) == 4 305 // TODO: There is a very small chance this will overflow for sizeof(int) == 4
@@ -298,11 +312,8 @@ MICROPROFILE_DEFINE(GPU_Rasterization, "GPU", "Rasterization", MP_RGB(50, 50, 24
298 * Helper function for ProcessTriangle with the "reversed" flag to allow for implementing 312 * Helper function for ProcessTriangle with the "reversed" flag to allow for implementing
299 * culling via recursion. 313 * culling via recursion.
300 */ 314 */
301static void ProcessTriangleInternal(const Shader::OutputVertex& v0, 315static void ProcessTriangleInternal(const Shader::OutputVertex& v0, const Shader::OutputVertex& v1,
302 const Shader::OutputVertex& v1, 316 const Shader::OutputVertex& v2, bool reversed = false) {
303 const Shader::OutputVertex& v2,
304 bool reversed = false)
305{
306 const auto& regs = g_state.regs; 317 const auto& regs = g_state.regs;
307 MICROPROFILE_SCOPE(GPU_Rasterization); 318 MICROPROFILE_SCOPE(GPU_Rasterization);
308 319
@@ -316,9 +327,9 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,
316 return Math::Vec3<Fix12P4>{FloatToFix(vec.x), FloatToFix(vec.y), FloatToFix(vec.z)}; 327 return Math::Vec3<Fix12P4>{FloatToFix(vec.x), FloatToFix(vec.y), FloatToFix(vec.z)};
317 }; 328 };
318 329
319 Math::Vec3<Fix12P4> vtxpos[3]{ ScreenToRasterizerCoordinates(v0.screenpos), 330 Math::Vec3<Fix12P4> vtxpos[3]{ScreenToRasterizerCoordinates(v0.screenpos),
320 ScreenToRasterizerCoordinates(v1.screenpos), 331 ScreenToRasterizerCoordinates(v1.screenpos),
321 ScreenToRasterizerCoordinates(v2.screenpos) }; 332 ScreenToRasterizerCoordinates(v2.screenpos)};
322 333
323 if (regs.cull_mode == Regs::CullMode::KeepAll) { 334 if (regs.cull_mode == Regs::CullMode::KeepAll) {
324 // Make sure we always end up with a triangle wound counter-clockwise 335 // Make sure we always end up with a triangle wound counter-clockwise
@@ -344,8 +355,8 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,
344 u16 max_y = std::max({vtxpos[0].y, vtxpos[1].y, vtxpos[2].y}); 355 u16 max_y = std::max({vtxpos[0].y, vtxpos[1].y, vtxpos[2].y});
345 356
346 // Convert the scissor box coordinates to 12.4 fixed point 357 // Convert the scissor box coordinates to 12.4 fixed point
347 u16 scissor_x1 = (u16)( regs.scissor_test.x1 << 4); 358 u16 scissor_x1 = (u16)(regs.scissor_test.x1 << 4);
348 u16 scissor_y1 = (u16)( regs.scissor_test.y1 << 4); 359 u16 scissor_y1 = (u16)(regs.scissor_test.y1 << 4);
349 // x2,y2 have +1 added to cover the entire sub-pixel area 360 // x2,y2 have +1 added to cover the entire sub-pixel area
350 u16 scissor_x2 = (u16)((regs.scissor_test.x2 + 1) << 4); 361 u16 scissor_x2 = (u16)((regs.scissor_test.x2 + 1) << 4);
351 u16 scissor_y2 = (u16)((regs.scissor_test.y2 + 1) << 4); 362 u16 scissor_y2 = (u16)((regs.scissor_test.y2 + 1) << 4);
@@ -369,27 +380,32 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,
369 // NOTE: These are the PSP filling rules. Not sure if the 3DS uses the same ones... 380 // NOTE: These are the PSP filling rules. Not sure if the 3DS uses the same ones...
370 auto IsRightSideOrFlatBottomEdge = [](const Math::Vec2<Fix12P4>& vtx, 381 auto IsRightSideOrFlatBottomEdge = [](const Math::Vec2<Fix12P4>& vtx,
371 const Math::Vec2<Fix12P4>& line1, 382 const Math::Vec2<Fix12P4>& line1,
372 const Math::Vec2<Fix12P4>& line2) 383 const Math::Vec2<Fix12P4>& line2) {
373 {
374 if (line1.y == line2.y) { 384 if (line1.y == line2.y) {
375 // just check if vertex is above us => bottom line parallel to x-axis 385 // just check if vertex is above us => bottom line parallel to x-axis
376 return vtx.y < line1.y; 386 return vtx.y < line1.y;
377 } else { 387 } else {
378 // check if vertex is on our left => right side 388 // check if vertex is on our left => right side
379 // TODO: Not sure how likely this is to overflow 389 // TODO: Not sure how likely this is to overflow
380 return (int)vtx.x < (int)line1.x + ((int)line2.x - (int)line1.x) * ((int)vtx.y - (int)line1.y) / ((int)line2.y - (int)line1.y); 390 return (int)vtx.x < (int)line1.x +
391 ((int)line2.x - (int)line1.x) * ((int)vtx.y - (int)line1.y) /
392 ((int)line2.y - (int)line1.y);
381 } 393 }
382 }; 394 };
383 int bias0 = IsRightSideOrFlatBottomEdge(vtxpos[0].xy(), vtxpos[1].xy(), vtxpos[2].xy()) ? -1 : 0; 395 int bias0 =
384 int bias1 = IsRightSideOrFlatBottomEdge(vtxpos[1].xy(), vtxpos[2].xy(), vtxpos[0].xy()) ? -1 : 0; 396 IsRightSideOrFlatBottomEdge(vtxpos[0].xy(), vtxpos[1].xy(), vtxpos[2].xy()) ? -1 : 0;
385 int bias2 = IsRightSideOrFlatBottomEdge(vtxpos[2].xy(), vtxpos[0].xy(), vtxpos[1].xy()) ? -1 : 0; 397 int bias1 =
398 IsRightSideOrFlatBottomEdge(vtxpos[1].xy(), vtxpos[2].xy(), vtxpos[0].xy()) ? -1 : 0;
399 int bias2 =
400 IsRightSideOrFlatBottomEdge(vtxpos[2].xy(), vtxpos[0].xy(), vtxpos[1].xy()) ? -1 : 0;
386 401
387 auto w_inverse = Math::MakeVec(v0.pos.w, v1.pos.w, v2.pos.w); 402 auto w_inverse = Math::MakeVec(v0.pos.w, v1.pos.w, v2.pos.w);
388 403
389 auto textures = regs.GetTextures(); 404 auto textures = regs.GetTextures();
390 auto tev_stages = regs.GetTevStages(); 405 auto tev_stages = regs.GetTevStages();
391 406
392 bool stencil_action_enable = g_state.regs.output_merger.stencil_test.enable && g_state.regs.framebuffer.depth_format == Regs::DepthFormat::D24S8; 407 bool stencil_action_enable = g_state.regs.output_merger.stencil_test.enable &&
408 g_state.regs.framebuffer.depth_format == Regs::DepthFormat::D24S8;
393 const auto stencil_test = g_state.regs.output_merger.stencil_test; 409 const auto stencil_test = g_state.regs.output_merger.stencil_test;
394 410
395 // Enter rasterization loop, starting at the center of the topleft bounding box corner. 411 // Enter rasterization loop, starting at the center of the topleft bounding box corner.
@@ -397,10 +413,10 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,
397 for (u16 y = min_y + 8; y < max_y; y += 0x10) { 413 for (u16 y = min_y + 8; y < max_y; y += 0x10) {
398 for (u16 x = min_x + 8; x < max_x; x += 0x10) { 414 for (u16 x = min_x + 8; x < max_x; x += 0x10) {
399 415
400 // Do not process the pixel if it's inside the scissor box and the scissor mode is set to Exclude 416 // Do not process the pixel if it's inside the scissor box and the scissor mode is set
417 // to Exclude
401 if (regs.scissor_test.mode == Regs::ScissorMode::Exclude) { 418 if (regs.scissor_test.mode == Regs::ScissorMode::Exclude) {
402 if (x >= scissor_x1 && x < scissor_x2 && 419 if (x >= scissor_x1 && x < scissor_x2 && y >= scissor_y1 && y < scissor_y2)
403 y >= scissor_y1 && y < scissor_y2)
404 continue; 420 continue;
405 } 421 }
406 422
@@ -414,15 +430,18 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,
414 if (w0 < 0 || w1 < 0 || w2 < 0) 430 if (w0 < 0 || w1 < 0 || w2 < 0)
415 continue; 431 continue;
416 432
417 auto baricentric_coordinates = Math::MakeVec(float24::FromFloat32(static_cast<float>(w0)), 433 auto baricentric_coordinates =
418 float24::FromFloat32(static_cast<float>(w1)), 434 Math::MakeVec(float24::FromFloat32(static_cast<float>(w0)),
419 float24::FromFloat32(static_cast<float>(w2))); 435 float24::FromFloat32(static_cast<float>(w1)),
420 float24 interpolated_w_inverse = float24::FromFloat32(1.0f) / Math::Dot(w_inverse, baricentric_coordinates); 436 float24::FromFloat32(static_cast<float>(w2)));
437 float24 interpolated_w_inverse =
438 float24::FromFloat32(1.0f) / Math::Dot(w_inverse, baricentric_coordinates);
421 439
422 // interpolated_z = z / w 440 // interpolated_z = z / w
423 float interpolated_z_over_w = (v0.screenpos[2].ToFloat32() * w0 + 441 float interpolated_z_over_w =
424 v1.screenpos[2].ToFloat32() * w1 + 442 (v0.screenpos[2].ToFloat32() * w0 + v1.screenpos[2].ToFloat32() * w1 +
425 v2.screenpos[2].ToFloat32() * w2) / wsum; 443 v2.screenpos[2].ToFloat32() * w2) /
444 wsum;
426 445
427 // Not fully accurate. About 3 bits in precision are missing. 446 // Not fully accurate. About 3 bits in precision are missing.
428 // Z-Buffer (z / w * scale + offset) 447 // Z-Buffer (z / w * scale + offset)
@@ -461,11 +480,18 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,
461 }; 480 };
462 481
463 Math::Vec4<u8> primary_color{ 482 Math::Vec4<u8> primary_color{
464 (u8)(GetInterpolatedAttribute(v0.color.r(), v1.color.r(), v2.color.r()).ToFloat32() * 255), 483 (u8)(
465 (u8)(GetInterpolatedAttribute(v0.color.g(), v1.color.g(), v2.color.g()).ToFloat32() * 255), 484 GetInterpolatedAttribute(v0.color.r(), v1.color.r(), v2.color.r()).ToFloat32() *
466 (u8)(GetInterpolatedAttribute(v0.color.b(), v1.color.b(), v2.color.b()).ToFloat32() * 255), 485 255),
467 (u8)(GetInterpolatedAttribute(v0.color.a(), v1.color.a(), v2.color.a()).ToFloat32() * 255) 486 (u8)(
468 }; 487 GetInterpolatedAttribute(v0.color.g(), v1.color.g(), v2.color.g()).ToFloat32() *
488 255),
489 (u8)(
490 GetInterpolatedAttribute(v0.color.b(), v1.color.b(), v2.color.b()).ToFloat32() *
491 255),
492 (u8)(
493 GetInterpolatedAttribute(v0.color.a(), v1.color.a(), v2.color.a()).ToFloat32() *
494 255)};
469 495
470 Math::Vec2<float24> uv[3]; 496 Math::Vec2<float24> uv[3];
471 uv[0].u() = GetInterpolatedAttribute(v0.tc0.u(), v1.tc0.u(), v2.tc0.u()); 497 uv[0].u() = GetInterpolatedAttribute(v0.tc0.u(), v1.tc0.u(), v2.tc0.u());
@@ -489,7 +515,7 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,
489 // Only unit 0 respects the texturing type (according to 3DBrew) 515 // Only unit 0 respects the texturing type (according to 3DBrew)
490 // TODO: Refactor so cubemaps and shadowmaps can be handled 516 // TODO: Refactor so cubemaps and shadowmaps can be handled
491 if (i == 0) { 517 if (i == 0) {
492 switch(texture.config.type) { 518 switch (texture.config.type) {
493 case Regs::TextureConfig::Texture2D: 519 case Regs::TextureConfig::Texture2D:
494 break; 520 break;
495 case Regs::TextureConfig::Projection2D: { 521 case Regs::TextureConfig::Projection2D: {
@@ -506,51 +532,58 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,
506 } 532 }
507 } 533 }
508 534
509 int s = (int)(u * float24::FromFloat32(static_cast<float>(texture.config.width))).ToFloat32(); 535 int s = (int)(u * float24::FromFloat32(static_cast<float>(texture.config.width)))
510 int t = (int)(v * float24::FromFloat32(static_cast<float>(texture.config.height))).ToFloat32(); 536 .ToFloat32();
511 537 int t = (int)(v * float24::FromFloat32(static_cast<float>(texture.config.height)))
538 .ToFloat32();
512 539
513 static auto GetWrappedTexCoord = [](Regs::TextureConfig::WrapMode mode, int val, unsigned size) { 540 static auto GetWrappedTexCoord = [](Regs::TextureConfig::WrapMode mode, int val,
541 unsigned size) {
514 switch (mode) { 542 switch (mode) {
515 case Regs::TextureConfig::ClampToEdge: 543 case Regs::TextureConfig::ClampToEdge:
516 val = std::max(val, 0); 544 val = std::max(val, 0);
517 val = std::min(val, (int)size - 1); 545 val = std::min(val, (int)size - 1);
518 return val; 546 return val;
519 547
520 case Regs::TextureConfig::ClampToBorder: 548 case Regs::TextureConfig::ClampToBorder:
521 return val; 549 return val;
522 550
523 case Regs::TextureConfig::Repeat: 551 case Regs::TextureConfig::Repeat:
524 return (int)((unsigned)val % size); 552 return (int)((unsigned)val % size);
525 553
526 case Regs::TextureConfig::MirroredRepeat: 554 case Regs::TextureConfig::MirroredRepeat: {
527 { 555 unsigned int coord = ((unsigned)val % (2 * size));
528 unsigned int coord = ((unsigned)val % (2 * size)); 556 if (coord >= size)
529 if (coord >= size) 557 coord = 2 * size - 1 - coord;
530 coord = 2 * size - 1 - coord; 558 return (int)coord;
531 return (int)coord; 559 }
532 } 560
533 561 default:
534 default: 562 LOG_ERROR(HW_GPU, "Unknown texture coordinate wrapping mode %x", (int)mode);
535 LOG_ERROR(HW_GPU, "Unknown texture coordinate wrapping mode %x", (int)mode); 563 UNIMPLEMENTED();
536 UNIMPLEMENTED(); 564 return 0;
537 return 0;
538 } 565 }
539 }; 566 };
540 567
541 if ((texture.config.wrap_s == Regs::TextureConfig::ClampToBorder && (s < 0 || s >= texture.config.width)) 568 if ((texture.config.wrap_s == Regs::TextureConfig::ClampToBorder &&
542 || (texture.config.wrap_t == Regs::TextureConfig::ClampToBorder && (t < 0 || t >= texture.config.height))) { 569 (s < 0 || s >= texture.config.width)) ||
570 (texture.config.wrap_t == Regs::TextureConfig::ClampToBorder &&
571 (t < 0 || t >= texture.config.height))) {
543 auto border_color = texture.config.border_color; 572 auto border_color = texture.config.border_color;
544 texture_color[i] = { border_color.r, border_color.g, border_color.b, border_color.a }; 573 texture_color[i] = {border_color.r, border_color.g, border_color.b,
574 border_color.a};
545 } else { 575 } else {
546 // Textures are laid out from bottom to top, hence we invert the t coordinate. 576 // Textures are laid out from bottom to top, hence we invert the t coordinate.
547 // NOTE: This may not be the right place for the inversion. 577 // NOTE: This may not be the right place for the inversion.
548 // TODO: Check if this applies to ETC textures, too. 578 // TODO: Check if this applies to ETC textures, too.
549 s = GetWrappedTexCoord(texture.config.wrap_s, s, texture.config.width); 579 s = GetWrappedTexCoord(texture.config.wrap_s, s, texture.config.width);
550 t = texture.config.height - 1 - GetWrappedTexCoord(texture.config.wrap_t, t, texture.config.height); 580 t = texture.config.height - 1 -
581 GetWrappedTexCoord(texture.config.wrap_t, t, texture.config.height);
551 582
552 u8* texture_data = Memory::GetPhysicalPointer(texture.config.GetPhysicalAddress()); 583 u8* texture_data =
553 auto info = DebugUtils::TextureInfo::FromPicaRegister(texture.config, texture.format); 584 Memory::GetPhysicalPointer(texture.config.GetPhysicalAddress());
585 auto info =
586 DebugUtils::TextureInfo::FromPicaRegister(texture.config, texture.format);
554 587
555 // TODO: Apply the min and mag filters to the texture 588 // TODO: Apply the min and mag filters to the texture
556 texture_color[i] = DebugUtils::LookupTexture(texture_data, s, t, info); 589 texture_color[i] = DebugUtils::LookupTexture(texture_data, s, t, info);
@@ -571,10 +604,10 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,
571 Math::Vec4<u8> combiner_buffer = {0, 0, 0, 0}; 604 Math::Vec4<u8> combiner_buffer = {0, 0, 0, 0};
572 Math::Vec4<u8> next_combiner_buffer = { 605 Math::Vec4<u8> next_combiner_buffer = {
573 regs.tev_combiner_buffer_color.r, regs.tev_combiner_buffer_color.g, 606 regs.tev_combiner_buffer_color.r, regs.tev_combiner_buffer_color.g,
574 regs.tev_combiner_buffer_color.b, regs.tev_combiner_buffer_color.a 607 regs.tev_combiner_buffer_color.b, regs.tev_combiner_buffer_color.a};
575 };
576 608
577 for (unsigned tev_stage_index = 0; tev_stage_index < tev_stages.size(); ++tev_stage_index) { 609 for (unsigned tev_stage_index = 0; tev_stage_index < tev_stages.size();
610 ++tev_stage_index) {
578 const auto& tev_stage = tev_stages[tev_stage_index]; 611 const auto& tev_stage = tev_stages[tev_stage_index];
579 using Source = Regs::TevStageConfig::Source; 612 using Source = Regs::TevStageConfig::Source;
580 using ColorModifier = Regs::TevStageConfig::ColorModifier; 613 using ColorModifier = Regs::TevStageConfig::ColorModifier;
@@ -606,7 +639,8 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,
606 return combiner_buffer; 639 return combiner_buffer;
607 640
608 case Source::Constant: 641 case Source::Constant:
609 return {tev_stage.const_r, tev_stage.const_g, tev_stage.const_b, tev_stage.const_a}; 642 return {tev_stage.const_r, tev_stage.const_g, tev_stage.const_b,
643 tev_stage.const_a};
610 644
611 case Source::Previous: 645 case Source::Previous:
612 return combiner_output; 646 return combiner_output;
@@ -618,7 +652,8 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,
618 } 652 }
619 }; 653 };
620 654
621 static auto GetColorModifier = [](ColorModifier factor, const Math::Vec4<u8>& values) -> Math::Vec3<u8> { 655 static auto GetColorModifier = [](ColorModifier factor,
656 const Math::Vec4<u8>& values) -> Math::Vec3<u8> {
622 switch (factor) { 657 switch (factor) {
623 case ColorModifier::SourceColor: 658 case ColorModifier::SourceColor:
624 return values.rgb(); 659 return values.rgb();
@@ -652,7 +687,8 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,
652 } 687 }
653 }; 688 };
654 689
655 static auto GetAlphaModifier = [](AlphaModifier factor, const Math::Vec4<u8>& values) -> u8 { 690 static auto GetAlphaModifier = [](AlphaModifier factor,
691 const Math::Vec4<u8>& values) -> u8 {
656 switch (factor) { 692 switch (factor) {
657 case AlphaModifier::SourceAlpha: 693 case AlphaModifier::SourceAlpha:
658 return values.a(); 694 return values.a();
@@ -680,7 +716,8 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,
680 } 716 }
681 }; 717 };
682 718
683 static auto ColorCombine = [](Operation op, const Math::Vec3<u8> input[3]) -> Math::Vec3<u8> { 719 static auto ColorCombine = [](Operation op,
720 const Math::Vec3<u8> input[3]) -> Math::Vec3<u8> {
684 switch (op) { 721 switch (op) {
685 case Operation::Replace: 722 case Operation::Replace:
686 return input[0]; 723 return input[0];
@@ -688,8 +725,7 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,
688 case Operation::Modulate: 725 case Operation::Modulate:
689 return ((input[0] * input[1]) / 255).Cast<u8>(); 726 return ((input[0] * input[1]) / 255).Cast<u8>();
690 727
691 case Operation::Add: 728 case Operation::Add: {
692 {
693 auto result = input[0] + input[1]; 729 auto result = input[0] + input[1];
694 result.r() = std::min(255, result.r()); 730 result.r() = std::min(255, result.r());
695 result.g() = std::min(255, result.g()); 731 result.g() = std::min(255, result.g());
@@ -697,10 +733,11 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,
697 return result.Cast<u8>(); 733 return result.Cast<u8>();
698 } 734 }
699 735
700 case Operation::AddSigned: 736 case Operation::AddSigned: {
701 { 737 // TODO(bunnei): Verify that the color conversion from (float) 0.5f to
702 // TODO(bunnei): Verify that the color conversion from (float) 0.5f to (byte) 128 is correct 738 // (byte) 128 is correct
703 auto result = input[0].Cast<int>() + input[1].Cast<int>() - Math::MakeVec<int>(128, 128, 128); 739 auto result = input[0].Cast<int>() + input[1].Cast<int>() -
740 Math::MakeVec<int>(128, 128, 128);
704 result.r() = MathUtil::Clamp<int>(result.r(), 0, 255); 741 result.r() = MathUtil::Clamp<int>(result.r(), 0, 255);
705 result.g() = MathUtil::Clamp<int>(result.g(), 0, 255); 742 result.g() = MathUtil::Clamp<int>(result.g(), 0, 255);
706 result.b() = MathUtil::Clamp<int>(result.b(), 0, 255); 743 result.b() = MathUtil::Clamp<int>(result.b(), 0, 255);
@@ -708,10 +745,13 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,
708 } 745 }
709 746
710 case Operation::Lerp: 747 case Operation::Lerp:
711 return ((input[0] * input[2] + input[1] * (Math::MakeVec<u8>(255, 255, 255) - input[2]).Cast<u8>()) / 255).Cast<u8>(); 748 return ((input[0] * input[2] +
749 input[1] *
750 (Math::MakeVec<u8>(255, 255, 255) - input[2]).Cast<u8>()) /
751 255)
752 .Cast<u8>();
712 753
713 case Operation::Subtract: 754 case Operation::Subtract: {
714 {
715 auto result = input[0].Cast<int>() - input[1].Cast<int>(); 755 auto result = input[0].Cast<int>() - input[1].Cast<int>();
716 result.r() = std::max(0, result.r()); 756 result.r() = std::max(0, result.r());
717 result.g() = std::max(0, result.g()); 757 result.g() = std::max(0, result.g());
@@ -719,8 +759,7 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,
719 return result.Cast<u8>(); 759 return result.Cast<u8>();
720 } 760 }
721 761
722 case Operation::MultiplyThenAdd: 762 case Operation::MultiplyThenAdd: {
723 {
724 auto result = (input[0] * input[1] + 255 * input[2].Cast<int>()) / 255; 763 auto result = (input[0] * input[1] + 255 * input[2].Cast<int>()) / 255;
725 result.r() = std::min(255, result.r()); 764 result.r() = std::min(255, result.r());
726 result.g() = std::min(255, result.g()); 765 result.g() = std::min(255, result.g());
@@ -728,8 +767,7 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,
728 return result.Cast<u8>(); 767 return result.Cast<u8>();
729 } 768 }
730 769
731 case Operation::AddThenMultiply: 770 case Operation::AddThenMultiply: {
732 {
733 auto result = input[0] + input[1]; 771 auto result = input[0] + input[1];
734 result.r() = std::min(255, result.r()); 772 result.r() = std::min(255, result.r());
735 result.g() = std::min(255, result.g()); 773 result.g() = std::min(255, result.g());
@@ -737,17 +775,19 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,
737 result = (result * input[2].Cast<int>()) / 255; 775 result = (result * input[2].Cast<int>()) / 255;
738 return result.Cast<u8>(); 776 return result.Cast<u8>();
739 } 777 }
740 case Operation::Dot3_RGB: 778 case Operation::Dot3_RGB: {
741 {
742 // Not fully accurate. 779 // Not fully accurate.
743 // Worst case scenario seems to yield a +/-3 error 780 // Worst case scenario seems to yield a +/-3 error
744 // Some HW results indicate that the per-component computation can't have a higher precision than 1/256, 781 // Some HW results indicate that the per-component computation can't have a
745 // while dot3_rgb( (0x80,g0,b0),(0x7F,g1,b1) ) and dot3_rgb( (0x80,g0,b0),(0x80,g1,b1) ) give different results 782 // higher precision than 1/256,
746 int result = ((input[0].r() * 2 - 255) * (input[1].r() * 2 - 255) + 128) / 256 + 783 // while dot3_rgb( (0x80,g0,b0),(0x7F,g1,b1) ) and dot3_rgb(
747 ((input[0].g() * 2 - 255) * (input[1].g() * 2 - 255) + 128) / 256 + 784 // (0x80,g0,b0),(0x80,g1,b1) ) give different results
748 ((input[0].b() * 2 - 255) * (input[1].b() * 2 - 255) + 128) / 256; 785 int result =
786 ((input[0].r() * 2 - 255) * (input[1].r() * 2 - 255) + 128) / 256 +
787 ((input[0].g() * 2 - 255) * (input[1].g() * 2 - 255) + 128) / 256 +
788 ((input[0].b() * 2 - 255) * (input[1].b() * 2 - 255) + 128) / 256;
749 result = std::max(0, std::min(255, result)); 789 result = std::max(0, std::min(255, result));
750 return { (u8)result, (u8)result, (u8)result }; 790 return {(u8)result, (u8)result, (u8)result};
751 } 791 }
752 default: 792 default:
753 LOG_ERROR(HW_GPU, "Unknown color combiner operation %d", (int)op); 793 LOG_ERROR(HW_GPU, "Unknown color combiner operation %d", (int)op);
@@ -756,7 +796,7 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,
756 } 796 }
757 }; 797 };
758 798
759 static auto AlphaCombine = [](Operation op, const std::array<u8,3>& input) -> u8 { 799 static auto AlphaCombine = [](Operation op, const std::array<u8, 3>& input) -> u8 {
760 switch (op) { 800 switch (op) {
761 case Operation::Replace: 801 case Operation::Replace:
762 return input[0]; 802 return input[0];
@@ -767,9 +807,9 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,
767 case Operation::Add: 807 case Operation::Add:
768 return std::min(255, input[0] + input[1]); 808 return std::min(255, input[0] + input[1]);
769 809
770 case Operation::AddSigned: 810 case Operation::AddSigned: {
771 { 811 // TODO(bunnei): Verify that the color conversion from (float) 0.5f to
772 // TODO(bunnei): Verify that the color conversion from (float) 0.5f to (byte) 128 is correct 812 // (byte) 128 is correct
773 auto result = static_cast<int>(input[0]) + static_cast<int>(input[1]) - 128; 813 auto result = static_cast<int>(input[0]) + static_cast<int>(input[1]) - 128;
774 return static_cast<u8>(MathUtil::Clamp<int>(result, 0, 255)); 814 return static_cast<u8>(MathUtil::Clamp<int>(result, 0, 255));
775 } 815 }
@@ -801,32 +841,40 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,
801 Math::Vec3<u8> color_result[3] = { 841 Math::Vec3<u8> color_result[3] = {
802 GetColorModifier(tev_stage.color_modifier1, GetSource(tev_stage.color_source1)), 842 GetColorModifier(tev_stage.color_modifier1, GetSource(tev_stage.color_source1)),
803 GetColorModifier(tev_stage.color_modifier2, GetSource(tev_stage.color_source2)), 843 GetColorModifier(tev_stage.color_modifier2, GetSource(tev_stage.color_source2)),
804 GetColorModifier(tev_stage.color_modifier3, GetSource(tev_stage.color_source3)) 844 GetColorModifier(tev_stage.color_modifier3,
805 }; 845 GetSource(tev_stage.color_source3))};
806 auto color_output = ColorCombine(tev_stage.color_op, color_result); 846 auto color_output = ColorCombine(tev_stage.color_op, color_result);
807 847
808 // alpha combiner 848 // alpha combiner
809 std::array<u8,3> alpha_result = {{ 849 std::array<u8, 3> alpha_result = {
810 GetAlphaModifier(tev_stage.alpha_modifier1, GetSource(tev_stage.alpha_source1)), 850 {GetAlphaModifier(tev_stage.alpha_modifier1,
811 GetAlphaModifier(tev_stage.alpha_modifier2, GetSource(tev_stage.alpha_source2)), 851 GetSource(tev_stage.alpha_source1)),
812 GetAlphaModifier(tev_stage.alpha_modifier3, GetSource(tev_stage.alpha_source3)) 852 GetAlphaModifier(tev_stage.alpha_modifier2,
813 }}; 853 GetSource(tev_stage.alpha_source2)),
854 GetAlphaModifier(tev_stage.alpha_modifier3,
855 GetSource(tev_stage.alpha_source3))}};
814 auto alpha_output = AlphaCombine(tev_stage.alpha_op, alpha_result); 856 auto alpha_output = AlphaCombine(tev_stage.alpha_op, alpha_result);
815 857
816 combiner_output[0] = std::min((unsigned)255, color_output.r() * tev_stage.GetColorMultiplier()); 858 combiner_output[0] =
817 combiner_output[1] = std::min((unsigned)255, color_output.g() * tev_stage.GetColorMultiplier()); 859 std::min((unsigned)255, color_output.r() * tev_stage.GetColorMultiplier());
818 combiner_output[2] = std::min((unsigned)255, color_output.b() * tev_stage.GetColorMultiplier()); 860 combiner_output[1] =
819 combiner_output[3] = std::min((unsigned)255, alpha_output * tev_stage.GetAlphaMultiplier()); 861 std::min((unsigned)255, color_output.g() * tev_stage.GetColorMultiplier());
862 combiner_output[2] =
863 std::min((unsigned)255, color_output.b() * tev_stage.GetColorMultiplier());
864 combiner_output[3] =
865 std::min((unsigned)255, alpha_output * tev_stage.GetAlphaMultiplier());
820 866
821 combiner_buffer = next_combiner_buffer; 867 combiner_buffer = next_combiner_buffer;
822 868
823 if (regs.tev_combiner_buffer_input.TevStageUpdatesCombinerBufferColor(tev_stage_index)) { 869 if (regs.tev_combiner_buffer_input.TevStageUpdatesCombinerBufferColor(
870 tev_stage_index)) {
824 next_combiner_buffer.r() = combiner_output.r(); 871 next_combiner_buffer.r() = combiner_output.r();
825 next_combiner_buffer.g() = combiner_output.g(); 872 next_combiner_buffer.g() = combiner_output.g();
826 next_combiner_buffer.b() = combiner_output.b(); 873 next_combiner_buffer.b() = combiner_output.b();
827 } 874 }
828 875
829 if (regs.tev_combiner_buffer_input.TevStageUpdatesCombinerBufferAlpha(tev_stage_index)) { 876 if (regs.tev_combiner_buffer_input.TevStageUpdatesCombinerBufferAlpha(
877 tev_stage_index)) {
830 next_combiner_buffer.a() = combiner_output.a(); 878 next_combiner_buffer.a() = combiner_output.a();
831 } 879 }
832 } 880 }
@@ -897,21 +945,26 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,
897 float fog_i = MathUtil::Clamp(floorf(fog_index), 0.0f, 127.0f); 945 float fog_i = MathUtil::Clamp(floorf(fog_index), 0.0f, 127.0f);
898 float fog_f = fog_index - fog_i; 946 float fog_f = fog_index - fog_i;
899 const auto& fog_lut_entry = g_state.fog.lut[static_cast<unsigned int>(fog_i)]; 947 const auto& fog_lut_entry = g_state.fog.lut[static_cast<unsigned int>(fog_i)];
900 float fog_factor = (fog_lut_entry.value + fog_lut_entry.difference * fog_f) / 2047.0f; // This is signed fixed point 1.11 948 float fog_factor = (fog_lut_entry.value + fog_lut_entry.difference * fog_f) /
949 2047.0f; // This is signed fixed point 1.11
901 fog_factor = MathUtil::Clamp(fog_factor, 0.0f, 1.0f); 950 fog_factor = MathUtil::Clamp(fog_factor, 0.0f, 1.0f);
902 951
903 // Blend the fog 952 // Blend the fog
904 for (unsigned i = 0; i < 3; i++) { 953 for (unsigned i = 0; i < 3; i++) {
905 combiner_output[i] = fog_factor * combiner_output[i] + (1.0f - fog_factor) * fog_color[i]; 954 combiner_output[i] =
955 fog_factor * combiner_output[i] + (1.0f - fog_factor) * fog_color[i];
906 } 956 }
907 } 957 }
908 958
909 u8 old_stencil = 0; 959 u8 old_stencil = 0;
910 960
911 auto UpdateStencil = [stencil_test, x, y, &old_stencil](Pica::Regs::StencilAction action) { 961 auto UpdateStencil = [stencil_test, x, y,
912 u8 new_stencil = PerformStencilAction(action, old_stencil, stencil_test.reference_value); 962 &old_stencil](Pica::Regs::StencilAction action) {
963 u8 new_stencil =
964 PerformStencilAction(action, old_stencil, stencil_test.reference_value);
913 if (g_state.regs.framebuffer.allow_depth_stencil_write != 0) 965 if (g_state.regs.framebuffer.allow_depth_stencil_write != 0)
914 SetStencil(x >> 4, y >> 4, (new_stencil & stencil_test.write_mask) | (old_stencil & ~stencil_test.write_mask)); 966 SetStencil(x >> 4, y >> 4, (new_stencil & stencil_test.write_mask) |
967 (old_stencil & ~stencil_test.write_mask));
915 }; 968 };
916 969
917 if (stencil_action_enable) { 970 if (stencil_action_enable) {
@@ -1030,8 +1083,7 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,
1030 static_cast<u8>(output_merger.blend_const.r), 1083 static_cast<u8>(output_merger.blend_const.r),
1031 static_cast<u8>(output_merger.blend_const.g), 1084 static_cast<u8>(output_merger.blend_const.g),
1032 static_cast<u8>(output_merger.blend_const.b), 1085 static_cast<u8>(output_merger.blend_const.b),
1033 static_cast<u8>(output_merger.blend_const.a) 1086 static_cast<u8>(output_merger.blend_const.a)};
1034 };
1035 1087
1036 switch (factor) { 1088 switch (factor) {
1037 case Regs::BlendFactor::Zero: 1089 case Regs::BlendFactor::Zero:
@@ -1091,12 +1143,13 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,
1091 return combiner_output[channel]; 1143 return combiner_output[channel];
1092 }; 1144 };
1093 1145
1094 static auto EvaluateBlendEquation = [](const Math::Vec4<u8>& src, const Math::Vec4<u8>& srcfactor, 1146 static auto EvaluateBlendEquation = [](
1095 const Math::Vec4<u8>& dest, const Math::Vec4<u8>& destfactor, 1147 const Math::Vec4<u8>& src, const Math::Vec4<u8>& srcfactor,
1096 Regs::BlendEquation equation) { 1148 const Math::Vec4<u8>& dest, const Math::Vec4<u8>& destfactor,
1149 Regs::BlendEquation equation) {
1097 Math::Vec4<int> result; 1150 Math::Vec4<int> result;
1098 1151
1099 auto src_result = (src * srcfactor).Cast<int>(); 1152 auto src_result = (src * srcfactor).Cast<int>();
1100 auto dst_result = (dest * destfactor).Cast<int>(); 1153 auto dst_result = (dest * destfactor).Cast<int>();
1101 1154
1102 switch (equation) { 1155 switch (equation) {
@@ -1134,10 +1187,9 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,
1134 UNIMPLEMENTED(); 1187 UNIMPLEMENTED();
1135 } 1188 }
1136 1189
1137 return Math::Vec4<u8>(MathUtil::Clamp(result.r(), 0, 255), 1190 return Math::Vec4<u8>(
1138 MathUtil::Clamp(result.g(), 0, 255), 1191 MathUtil::Clamp(result.r(), 0, 255), MathUtil::Clamp(result.g(), 0, 255),
1139 MathUtil::Clamp(result.b(), 0, 255), 1192 MathUtil::Clamp(result.b(), 0, 255), MathUtil::Clamp(result.a(), 0, 255));
1140 MathUtil::Clamp(result.a(), 0, 255));
1141 }; 1193 };
1142 1194
1143 auto srcfactor = Math::MakeVec(LookupFactor(0, params.factor_source_rgb), 1195 auto srcfactor = Math::MakeVec(LookupFactor(0, params.factor_source_rgb),
@@ -1150,8 +1202,11 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,
1150 LookupFactor(2, params.factor_dest_rgb), 1202 LookupFactor(2, params.factor_dest_rgb),
1151 LookupFactor(3, params.factor_dest_a)); 1203 LookupFactor(3, params.factor_dest_a));
1152 1204
1153 blend_output = EvaluateBlendEquation(combiner_output, srcfactor, dest, dstfactor, params.blend_equation_rgb); 1205 blend_output = EvaluateBlendEquation(combiner_output, srcfactor, dest, dstfactor,
1154 blend_output.a() = EvaluateBlendEquation(combiner_output, srcfactor, dest, dstfactor, params.blend_equation_a).a(); 1206 params.blend_equation_rgb);
1207 blend_output.a() = EvaluateBlendEquation(combiner_output, srcfactor, dest,
1208 dstfactor, params.blend_equation_a)
1209 .a();
1155 } else { 1210 } else {
1156 static auto LogicOp = [](u8 src, u8 dest, Regs::LogicOp op) -> u8 { 1211 static auto LogicOp = [](u8 src, u8 dest, Regs::LogicOp op) -> u8 {
1157 switch (op) { 1212 switch (op) {
@@ -1205,19 +1260,18 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,
1205 } 1260 }
1206 }; 1261 };
1207 1262
1208 blend_output = Math::MakeVec( 1263 blend_output =
1209 LogicOp(combiner_output.r(), dest.r(), output_merger.logic_op), 1264 Math::MakeVec(LogicOp(combiner_output.r(), dest.r(), output_merger.logic_op),
1210 LogicOp(combiner_output.g(), dest.g(), output_merger.logic_op), 1265 LogicOp(combiner_output.g(), dest.g(), output_merger.logic_op),
1211 LogicOp(combiner_output.b(), dest.b(), output_merger.logic_op), 1266 LogicOp(combiner_output.b(), dest.b(), output_merger.logic_op),
1212 LogicOp(combiner_output.a(), dest.a(), output_merger.logic_op)); 1267 LogicOp(combiner_output.a(), dest.a(), output_merger.logic_op));
1213 } 1268 }
1214 1269
1215 const Math::Vec4<u8> result = { 1270 const Math::Vec4<u8> result = {output_merger.red_enable ? blend_output.r() : dest.r(),
1216 output_merger.red_enable ? blend_output.r() : dest.r(), 1271 output_merger.green_enable ? blend_output.g() : dest.g(),
1217 output_merger.green_enable ? blend_output.g() : dest.g(), 1272 output_merger.blue_enable ? blend_output.b() : dest.b(),
1218 output_merger.blue_enable ? blend_output.b() : dest.b(), 1273 output_merger.alpha_enable ? blend_output.a()
1219 output_merger.alpha_enable ? blend_output.a() : dest.a() 1274 : dest.a()};
1220 };
1221 1275
1222 if (regs.framebuffer.allow_color_write != 0) 1276 if (regs.framebuffer.allow_color_write != 0)
1223 DrawPixel(x >> 4, y >> 4, result); 1277 DrawPixel(x >> 4, y >> 4, result);
@@ -1225,8 +1279,7 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,
1225 } 1279 }
1226} 1280}
1227 1281
1228void ProcessTriangle(const Shader::OutputVertex& v0, 1282void ProcessTriangle(const Shader::OutputVertex& v0, const Shader::OutputVertex& v1,
1229 const Shader::OutputVertex& v1,
1230 const Shader::OutputVertex& v2) { 1283 const Shader::OutputVertex& v2) {
1231 ProcessTriangleInternal(v0, v1, v2); 1284 ProcessTriangleInternal(v0, v1, v2);
1232} 1285}
diff --git a/src/video_core/rasterizer.h b/src/video_core/rasterizer.h
index a6a9634b4..6cbda3067 100644
--- a/src/video_core/rasterizer.h
+++ b/src/video_core/rasterizer.h
@@ -7,13 +7,12 @@
7namespace Pica { 7namespace Pica {
8 8
9namespace Shader { 9namespace Shader {
10 struct OutputVertex; 10struct OutputVertex;
11} 11}
12 12
13namespace Rasterizer { 13namespace Rasterizer {
14 14
15void ProcessTriangle(const Shader::OutputVertex& v0, 15void ProcessTriangle(const Shader::OutputVertex& v0, const Shader::OutputVertex& v1,
16 const Shader::OutputVertex& v1,
17 const Shader::OutputVertex& v2); 16 const Shader::OutputVertex& v2);
18 17
19} // namespace Rasterizer 18} // namespace Rasterizer
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index bf7101665..ce834bd30 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -20,7 +20,8 @@ namespace VideoCore {
20 20
21class RasterizerInterface { 21class RasterizerInterface {
22public: 22public:
23 virtual ~RasterizerInterface() {} 23 virtual ~RasterizerInterface() {
24 }
24 25
25 /// Queues the primitive formed by the given vertices for rendering 26 /// Queues the primitive formed by the given vertices for rendering
26 virtual void AddTriangle(const Pica::Shader::OutputVertex& v0, 27 virtual void AddTriangle(const Pica::Shader::OutputVertex& v0,
@@ -39,17 +40,25 @@ public:
39 /// Notify rasterizer that any caches of the specified region should be flushed to 3DS memory 40 /// Notify rasterizer that any caches of the specified region should be flushed to 3DS memory
40 virtual void FlushRegion(PAddr addr, u32 size) = 0; 41 virtual void FlushRegion(PAddr addr, u32 size) = 0;
41 42
42 /// Notify rasterizer that any caches of the specified region should be flushed to 3DS memory and invalidated 43 /// Notify rasterizer that any caches of the specified region should be flushed to 3DS memory
44 /// and invalidated
43 virtual void FlushAndInvalidateRegion(PAddr addr, u32 size) = 0; 45 virtual void FlushAndInvalidateRegion(PAddr addr, u32 size) = 0;
44 46
45 /// Attempt to use a faster method to perform a display transfer 47 /// Attempt to use a faster method to perform a display transfer
46 virtual bool AccelerateDisplayTransfer(const GPU::Regs::DisplayTransferConfig& config) { return false; } 48 virtual bool AccelerateDisplayTransfer(const GPU::Regs::DisplayTransferConfig& config) {
49 return false;
50 }
47 51
48 /// Attempt to use a faster method to fill a region 52 /// Attempt to use a faster method to fill a region
49 virtual bool AccelerateFill(const GPU::Regs::MemoryFillConfig& config) { return false; } 53 virtual bool AccelerateFill(const GPU::Regs::MemoryFillConfig& config) {
54 return false;
55 }
50 56
51 /// Attempt to use a faster method to display the framebuffer to screen 57 /// Attempt to use a faster method to display the framebuffer to screen
52 virtual bool AccelerateDisplay(const GPU::Regs::FramebufferConfig& config, PAddr framebuffer_addr, u32 pixel_stride, ScreenInfo& screen_info) { return false; } 58 virtual bool AccelerateDisplay(const GPU::Regs::FramebufferConfig& config,
59 PAddr framebuffer_addr, u32 pixel_stride,
60 ScreenInfo& screen_info) {
61 return false;
62 }
53}; 63};
54
55} 64}
diff --git a/src/video_core/renderer_base.cpp b/src/video_core/renderer_base.cpp
index 3f451e062..090683276 100644
--- a/src/video_core/renderer_base.cpp
+++ b/src/video_core/renderer_base.cpp
@@ -6,9 +6,9 @@
6#include <memory> 6#include <memory>
7 7
8#include "video_core/renderer_base.h" 8#include "video_core/renderer_base.h"
9#include "video_core/video_core.h"
10#include "video_core/swrasterizer.h"
11#include "video_core/renderer_opengl/gl_rasterizer.h" 9#include "video_core/renderer_opengl/gl_rasterizer.h"
10#include "video_core/swrasterizer.h"
11#include "video_core/video_core.h"
12 12
13void RendererBase::RefreshRasterizerSetting() { 13void RendererBase::RefreshRasterizerSetting() {
14 bool hw_renderer_enabled = VideoCore::g_hw_renderer_enabled; 14 bool hw_renderer_enabled = VideoCore::g_hw_renderer_enabled;
diff --git a/src/video_core/renderer_base.h b/src/video_core/renderer_base.h
index f68091cc8..22e2f9815 100644
--- a/src/video_core/renderer_base.h
+++ b/src/video_core/renderer_base.h
@@ -14,13 +14,8 @@ class EmuWindow;
14 14
15class RendererBase : NonCopyable { 15class RendererBase : NonCopyable {
16public: 16public:
17
18 /// Used to reference a framebuffer 17 /// Used to reference a framebuffer
19 enum kFramebuffer { 18 enum kFramebuffer { kFramebuffer_VirtualXFB = 0, kFramebuffer_EFB, kFramebuffer_Texture };
20 kFramebuffer_VirtualXFB = 0,
21 kFramebuffer_EFB,
22 kFramebuffer_Texture
23 };
24 19
25 virtual ~RendererBase() { 20 virtual ~RendererBase() {
26 } 21 }
@@ -59,8 +54,8 @@ public:
59 54
60protected: 55protected:
61 std::unique_ptr<VideoCore::RasterizerInterface> rasterizer; 56 std::unique_ptr<VideoCore::RasterizerInterface> rasterizer;
62 f32 m_current_fps = 0.0f; ///< Current framerate, should be set by the renderer 57 f32 m_current_fps = 0.0f; ///< Current framerate, should be set by the renderer
63 int m_current_frame = 0; ///< Current frame, should be set by the renderer 58 int m_current_frame = 0; ///< Current frame, should be set by the renderer
64 59
65private: 60private:
66 bool opengl_rasterizer_active = false; 61 bool opengl_rasterizer_active = false;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index f8393c618..5021f48bc 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -32,8 +32,7 @@ static bool IsPassThroughTevStage(const Pica::Regs::TevStageConfig& stage) {
32 stage.alpha_source1 == Pica::Regs::TevStageConfig::Source::Previous && 32 stage.alpha_source1 == Pica::Regs::TevStageConfig::Source::Previous &&
33 stage.color_modifier1 == Pica::Regs::TevStageConfig::ColorModifier::SourceColor && 33 stage.color_modifier1 == Pica::Regs::TevStageConfig::ColorModifier::SourceColor &&
34 stage.alpha_modifier1 == Pica::Regs::TevStageConfig::AlphaModifier::SourceAlpha && 34 stage.alpha_modifier1 == Pica::Regs::TevStageConfig::AlphaModifier::SourceAlpha &&
35 stage.GetColorMultiplier() == 1 && 35 stage.GetColorMultiplier() == 1 && stage.GetAlphaMultiplier() == 1);
36 stage.GetAlphaMultiplier() == 1);
37} 36}
38 37
39RasterizerOpenGL::RasterizerOpenGL() : shader_dirty(true) { 38RasterizerOpenGL::RasterizerOpenGL() : shader_dirty(true) {
@@ -65,26 +64,34 @@ RasterizerOpenGL::RasterizerOpenGL() : shader_dirty(true) {
65 uniform_block_data.fog_lut_dirty = true; 64 uniform_block_data.fog_lut_dirty = true;
66 65
67 // Set vertex attributes 66 // Set vertex attributes
68 glVertexAttribPointer(GLShader::ATTRIBUTE_POSITION, 4, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, position)); 67 glVertexAttribPointer(GLShader::ATTRIBUTE_POSITION, 4, GL_FLOAT, GL_FALSE,
68 sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, position));
69 glEnableVertexAttribArray(GLShader::ATTRIBUTE_POSITION); 69 glEnableVertexAttribArray(GLShader::ATTRIBUTE_POSITION);
70 70
71 glVertexAttribPointer(GLShader::ATTRIBUTE_COLOR, 4, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, color)); 71 glVertexAttribPointer(GLShader::ATTRIBUTE_COLOR, 4, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex),
72 (GLvoid*)offsetof(HardwareVertex, color));
72 glEnableVertexAttribArray(GLShader::ATTRIBUTE_COLOR); 73 glEnableVertexAttribArray(GLShader::ATTRIBUTE_COLOR);
73 74
74 glVertexAttribPointer(GLShader::ATTRIBUTE_TEXCOORD0, 2, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, tex_coord0)); 75 glVertexAttribPointer(GLShader::ATTRIBUTE_TEXCOORD0, 2, GL_FLOAT, GL_FALSE,
75 glVertexAttribPointer(GLShader::ATTRIBUTE_TEXCOORD1, 2, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, tex_coord1)); 76 sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, tex_coord0));
76 glVertexAttribPointer(GLShader::ATTRIBUTE_TEXCOORD2, 2, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, tex_coord2)); 77 glVertexAttribPointer(GLShader::ATTRIBUTE_TEXCOORD1, 2, GL_FLOAT, GL_FALSE,
78 sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, tex_coord1));
79 glVertexAttribPointer(GLShader::ATTRIBUTE_TEXCOORD2, 2, GL_FLOAT, GL_FALSE,
80 sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, tex_coord2));
77 glEnableVertexAttribArray(GLShader::ATTRIBUTE_TEXCOORD0); 81 glEnableVertexAttribArray(GLShader::ATTRIBUTE_TEXCOORD0);
78 glEnableVertexAttribArray(GLShader::ATTRIBUTE_TEXCOORD1); 82 glEnableVertexAttribArray(GLShader::ATTRIBUTE_TEXCOORD1);
79 glEnableVertexAttribArray(GLShader::ATTRIBUTE_TEXCOORD2); 83 glEnableVertexAttribArray(GLShader::ATTRIBUTE_TEXCOORD2);
80 84
81 glVertexAttribPointer(GLShader::ATTRIBUTE_TEXCOORD0_W, 1, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, tex_coord0_w)); 85 glVertexAttribPointer(GLShader::ATTRIBUTE_TEXCOORD0_W, 1, GL_FLOAT, GL_FALSE,
86 sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, tex_coord0_w));
82 glEnableVertexAttribArray(GLShader::ATTRIBUTE_TEXCOORD0_W); 87 glEnableVertexAttribArray(GLShader::ATTRIBUTE_TEXCOORD0_W);
83 88
84 glVertexAttribPointer(GLShader::ATTRIBUTE_NORMQUAT, 4, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, normquat)); 89 glVertexAttribPointer(GLShader::ATTRIBUTE_NORMQUAT, 4, GL_FLOAT, GL_FALSE,
90 sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, normquat));
85 glEnableVertexAttribArray(GLShader::ATTRIBUTE_NORMQUAT); 91 glEnableVertexAttribArray(GLShader::ATTRIBUTE_NORMQUAT);
86 92
87 glVertexAttribPointer(GLShader::ATTRIBUTE_VIEW, 3, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, view)); 93 glVertexAttribPointer(GLShader::ATTRIBUTE_VIEW, 3, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex),
94 (GLvoid*)offsetof(HardwareVertex, view));
88 glEnableVertexAttribArray(GLShader::ATTRIBUTE_VIEW); 95 glEnableVertexAttribArray(GLShader::ATTRIBUTE_VIEW);
89 96
90 // Create render framebuffer 97 // Create render framebuffer
@@ -130,7 +137,6 @@ RasterizerOpenGL::RasterizerOpenGL() : shader_dirty(true) {
130} 137}
131 138
132RasterizerOpenGL::~RasterizerOpenGL() { 139RasterizerOpenGL::~RasterizerOpenGL() {
133
134} 140}
135 141
136/** 142/**
@@ -149,8 +155,8 @@ RasterizerOpenGL::~RasterizerOpenGL() {
149 * manually using two Lerps, and doing this correction before each Lerp. 155 * manually using two Lerps, and doing this correction before each Lerp.
150 */ 156 */
151static bool AreQuaternionsOpposite(Math::Vec4<Pica::float24> qa, Math::Vec4<Pica::float24> qb) { 157static bool AreQuaternionsOpposite(Math::Vec4<Pica::float24> qa, Math::Vec4<Pica::float24> qb) {
152 Math::Vec4f a{ qa.x.ToFloat32(), qa.y.ToFloat32(), qa.z.ToFloat32(), qa.w.ToFloat32() }; 158 Math::Vec4f a{qa.x.ToFloat32(), qa.y.ToFloat32(), qa.z.ToFloat32(), qa.w.ToFloat32()};
153 Math::Vec4f b{ qb.x.ToFloat32(), qb.y.ToFloat32(), qb.z.ToFloat32(), qb.w.ToFloat32() }; 159 Math::Vec4f b{qb.x.ToFloat32(), qb.y.ToFloat32(), qb.z.ToFloat32(), qb.w.ToFloat32()};
154 160
155 return (Math::Dot(a, b) < 0.f); 161 return (Math::Dot(a, b) < 0.f);
156} 162}
@@ -173,15 +179,20 @@ void RasterizerOpenGL::DrawTriangles() {
173 CachedSurface* color_surface; 179 CachedSurface* color_surface;
174 CachedSurface* depth_surface; 180 CachedSurface* depth_surface;
175 MathUtil::Rectangle<int> rect; 181 MathUtil::Rectangle<int> rect;
176 std::tie(color_surface, depth_surface, rect) = res_cache.GetFramebufferSurfaces(regs.framebuffer); 182 std::tie(color_surface, depth_surface, rect) =
183 res_cache.GetFramebufferSurfaces(regs.framebuffer);
177 184
178 state.draw.draw_framebuffer = framebuffer.handle; 185 state.draw.draw_framebuffer = framebuffer.handle;
179 state.Apply(); 186 state.Apply();
180 187
181 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, color_surface != nullptr ? color_surface->texture.handle : 0, 0); 188 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D,
182 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, depth_surface != nullptr ? depth_surface->texture.handle : 0, 0); 189 color_surface != nullptr ? color_surface->texture.handle : 0, 0);
190 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D,
191 depth_surface != nullptr ? depth_surface->texture.handle : 0, 0);
183 bool has_stencil = regs.framebuffer.depth_format == Pica::Regs::DepthFormat::D24S8; 192 bool has_stencil = regs.framebuffer.depth_format == Pica::Regs::DepthFormat::D24S8;
184 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, (has_stencil && depth_surface != nullptr) ? depth_surface->texture.handle : 0, 0); 193 glFramebufferTexture2D(
194 GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
195 (has_stencil && depth_surface != nullptr) ? depth_surface->texture.handle : 0, 0);
185 196
186 if (OpenGLState::CheckFBStatus(GL_DRAW_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) { 197 if (OpenGLState::CheckFBStatus(GL_DRAW_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) {
187 return; 198 return;
@@ -194,7 +205,8 @@ void RasterizerOpenGL::DrawTriangles() {
194 205
195 glViewport((GLint)(rect.left + regs.viewport_corner.x * color_surface->res_scale_width), 206 glViewport((GLint)(rect.left + regs.viewport_corner.x * color_surface->res_scale_width),
196 (GLint)(rect.bottom + regs.viewport_corner.y * color_surface->res_scale_height), 207 (GLint)(rect.bottom + regs.viewport_corner.y * color_surface->res_scale_height),
197 (GLsizei)(viewport_width * color_surface->res_scale_width), (GLsizei)(viewport_height * color_surface->res_scale_height)); 208 (GLsizei)(viewport_width * color_surface->res_scale_width),
209 (GLsizei)(viewport_height * color_surface->res_scale_height));
198 210
199 if (uniform_block_data.data.framebuffer_scale[0] != color_surface->res_scale_width || 211 if (uniform_block_data.data.framebuffer_scale[0] != color_surface->res_scale_width ||
200 uniform_block_data.data.framebuffer_scale[1] != color_surface->res_scale_height) { 212 uniform_block_data.data.framebuffer_scale[1] != color_surface->res_scale_height) {
@@ -245,14 +257,16 @@ void RasterizerOpenGL::DrawTriangles() {
245 257
246 // Sync the uniform data 258 // Sync the uniform data
247 if (uniform_block_data.dirty) { 259 if (uniform_block_data.dirty) {
248 glBufferData(GL_UNIFORM_BUFFER, sizeof(UniformData), &uniform_block_data.data, GL_STATIC_DRAW); 260 glBufferData(GL_UNIFORM_BUFFER, sizeof(UniformData), &uniform_block_data.data,
261 GL_STATIC_DRAW);
249 uniform_block_data.dirty = false; 262 uniform_block_data.dirty = false;
250 } 263 }
251 264
252 state.Apply(); 265 state.Apply();
253 266
254 // Draw the vertex batch 267 // Draw the vertex batch
255 glBufferData(GL_ARRAY_BUFFER, vertex_batch.size() * sizeof(HardwareVertex), vertex_batch.data(), GL_STREAM_DRAW); 268 glBufferData(GL_ARRAY_BUFFER, vertex_batch.size() * sizeof(HardwareVertex), vertex_batch.data(),
269 GL_STREAM_DRAW);
256 glDrawArrays(GL_TRIANGLES, 0, (GLsizei)vertex_batch.size()); 270 glDrawArrays(GL_TRIANGLES, 0, (GLsizei)vertex_batch.size());
257 271
258 // Mark framebuffer surfaces as dirty 272 // Mark framebuffer surfaces as dirty
@@ -278,7 +292,7 @@ void RasterizerOpenGL::DrawTriangles() {
278void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) { 292void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) {
279 const auto& regs = Pica::g_state.regs; 293 const auto& regs = Pica::g_state.regs;
280 294
281 switch(id) { 295 switch (id) {
282 // Culling 296 // Culling
283 case PICA_REG_INDEX(cull_mode): 297 case PICA_REG_INDEX(cull_mode):
284 SyncCullMode(); 298 SyncCullMode();
@@ -548,7 +562,7 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) {
548 SyncLightAmbient(7); 562 SyncLightAmbient(7);
549 break; 563 break;
550 564
551 // Fragment lighting position 565 // Fragment lighting position
552 case PICA_REG_INDEX_WORKAROUND(lighting.light[0].x, 0x144 + 0 * 0x10): 566 case PICA_REG_INDEX_WORKAROUND(lighting.light[0].x, 0x144 + 0 * 0x10):
553 case PICA_REG_INDEX_WORKAROUND(lighting.light[0].z, 0x145 + 0 * 0x10): 567 case PICA_REG_INDEX_WORKAROUND(lighting.light[0].z, 0x145 + 0 * 0x10):
554 SyncLightPosition(0); 568 SyncLightPosition(0);
@@ -659,13 +673,11 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) {
659 case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[4], 0x1cc): 673 case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[4], 0x1cc):
660 case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[5], 0x1cd): 674 case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[5], 0x1cd):
661 case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[6], 0x1ce): 675 case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[6], 0x1ce):
662 case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[7], 0x1cf): 676 case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[7], 0x1cf): {
663 {
664 auto& lut_config = regs.lighting.lut_config; 677 auto& lut_config = regs.lighting.lut_config;
665 uniform_block_data.lut_dirty[lut_config.type / 4] = true; 678 uniform_block_data.lut_dirty[lut_config.type / 4] = true;
666 break; 679 break;
667 } 680 }
668
669 } 681 }
670} 682}
671 683
@@ -699,8 +711,10 @@ bool RasterizerOpenGL::AccelerateDisplayTransfer(const GPU::Regs::DisplayTransfe
699 711
700 CachedSurface dst_params; 712 CachedSurface dst_params;
701 dst_params.addr = config.GetPhysicalOutputAddress(); 713 dst_params.addr = config.GetPhysicalOutputAddress();
702 dst_params.width = config.scaling != config.NoScale ? config.output_width / 2 : config.output_width.Value(); 714 dst_params.width =
703 dst_params.height = config.scaling == config.ScaleXY ? config.output_height / 2 : config.output_height.Value(); 715 config.scaling != config.NoScale ? config.output_width / 2 : config.output_width.Value();
716 dst_params.height =
717 config.scaling == config.ScaleXY ? config.output_height / 2 : config.output_height.Value();
704 dst_params.is_tiled = config.input_linear != config.dont_swizzle; 718 dst_params.is_tiled = config.input_linear != config.dont_swizzle;
705 dst_params.pixel_format = CachedSurface::PixelFormatFromGPUPixelFormat(config.output_format); 719 dst_params.pixel_format = CachedSurface::PixelFormatFromGPUPixelFormat(config.output_format);
706 720
@@ -735,7 +749,8 @@ bool RasterizerOpenGL::AccelerateDisplayTransfer(const GPU::Regs::DisplayTransfe
735 return false; 749 return false;
736 } 750 }
737 751
738 u32 dst_size = dst_params.width * dst_params.height * CachedSurface::GetFormatBpp(dst_params.pixel_format) / 8; 752 u32 dst_size = dst_params.width * dst_params.height *
753 CachedSurface::GetFormatBpp(dst_params.pixel_format) / 8;
739 dst_surface->dirty = true; 754 dst_surface->dirty = true;
740 res_cache.FlushRegion(config.GetPhysicalOutputAddress(), dst_size, dst_surface, true); 755 res_cache.FlushRegion(config.GetPhysicalOutputAddress(), dst_size, dst_surface, true);
741 return true; 756 return true;
@@ -757,12 +772,15 @@ bool RasterizerOpenGL::AccelerateFill(const GPU::Regs::MemoryFillConfig& config)
757 772
758 GLuint old_fb = cur_state.draw.draw_framebuffer; 773 GLuint old_fb = cur_state.draw.draw_framebuffer;
759 cur_state.draw.draw_framebuffer = framebuffer.handle; 774 cur_state.draw.draw_framebuffer = framebuffer.handle;
760 // TODO: When scissor test is implemented, need to disable scissor test in cur_state here so Clear call isn't affected 775 // TODO: When scissor test is implemented, need to disable scissor test in cur_state here so
776 // Clear call isn't affected
761 cur_state.Apply(); 777 cur_state.Apply();
762 778
763 if (dst_type == SurfaceType::Color || dst_type == SurfaceType::Texture) { 779 if (dst_type == SurfaceType::Color || dst_type == SurfaceType::Texture) {
764 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, dst_surface->texture.handle, 0); 780 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D,
765 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); 781 dst_surface->texture.handle, 0);
782 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0,
783 0);
766 784
767 if (OpenGLState::CheckFBStatus(GL_DRAW_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) { 785 if (OpenGLState::CheckFBStatus(GL_DRAW_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) {
768 return false; 786 return false;
@@ -770,8 +788,10 @@ bool RasterizerOpenGL::AccelerateFill(const GPU::Regs::MemoryFillConfig& config)
770 788
771 GLfloat color_values[4] = {0.0f, 0.0f, 0.0f, 0.0f}; 789 GLfloat color_values[4] = {0.0f, 0.0f, 0.0f, 0.0f};
772 790
773 // TODO: Handle additional pixel format and fill value size combinations to accelerate more cases 791 // TODO: Handle additional pixel format and fill value size combinations to accelerate more
774 // For instance, checking if fill value's bytes/bits repeat to allow filling I8/A8/I4/A4/... 792 // cases
793 // For instance, checking if fill value's bytes/bits repeat to allow filling
794 // I8/A8/I4/A4/...
775 // Currently only handles formats that are multiples of the fill value size 795 // Currently only handles formats that are multiples of the fill value size
776 796
777 if (config.fill_24bit) { 797 if (config.fill_24bit) {
@@ -846,7 +866,8 @@ bool RasterizerOpenGL::AccelerateFill(const GPU::Regs::MemoryFillConfig& config)
846 glClearBufferfv(GL_COLOR, 0, color_values); 866 glClearBufferfv(GL_COLOR, 0, color_values);
847 } else if (dst_type == SurfaceType::Depth) { 867 } else if (dst_type == SurfaceType::Depth) {
848 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); 868 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
849 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, dst_surface->texture.handle, 0); 869 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D,
870 dst_surface->texture.handle, 0);
850 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); 871 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
851 872
852 if (OpenGLState::CheckFBStatus(GL_DRAW_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) { 873 if (OpenGLState::CheckFBStatus(GL_DRAW_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) {
@@ -865,7 +886,8 @@ bool RasterizerOpenGL::AccelerateFill(const GPU::Regs::MemoryFillConfig& config)
865 glClearBufferfv(GL_DEPTH, 0, &value_float); 886 glClearBufferfv(GL_DEPTH, 0, &value_float);
866 } else if (dst_type == SurfaceType::DepthStencil) { 887 } else if (dst_type == SurfaceType::DepthStencil) {
867 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); 888 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
868 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, dst_surface->texture.handle, 0); 889 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
890 dst_surface->texture.handle, 0);
869 891
870 if (OpenGLState::CheckFBStatus(GL_DRAW_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) { 892 if (OpenGLState::CheckFBStatus(GL_DRAW_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) {
871 return false; 893 return false;
@@ -889,7 +911,9 @@ bool RasterizerOpenGL::AccelerateFill(const GPU::Regs::MemoryFillConfig& config)
889 return true; 911 return true;
890} 912}
891 913
892bool RasterizerOpenGL::AccelerateDisplay(const GPU::Regs::FramebufferConfig& config, PAddr framebuffer_addr, u32 pixel_stride, ScreenInfo& screen_info) { 914bool RasterizerOpenGL::AccelerateDisplay(const GPU::Regs::FramebufferConfig& config,
915 PAddr framebuffer_addr, u32 pixel_stride,
916 ScreenInfo& screen_info) {
893 if (framebuffer_addr == 0) { 917 if (framebuffer_addr == 0) {
894 return false; 918 return false;
895 } 919 }
@@ -912,10 +936,9 @@ bool RasterizerOpenGL::AccelerateDisplay(const GPU::Regs::FramebufferConfig& con
912 u32 scaled_width = src_surface->GetScaledWidth(); 936 u32 scaled_width = src_surface->GetScaledWidth();
913 u32 scaled_height = src_surface->GetScaledHeight(); 937 u32 scaled_height = src_surface->GetScaledHeight();
914 938
915 screen_info.display_texcoords = MathUtil::Rectangle<float>((float)src_rect.top / (float)scaled_height, 939 screen_info.display_texcoords = MathUtil::Rectangle<float>(
916 (float)src_rect.left / (float)scaled_width, 940 (float)src_rect.top / (float)scaled_height, (float)src_rect.left / (float)scaled_width,
917 (float)src_rect.bottom / (float)scaled_height, 941 (float)src_rect.bottom / (float)scaled_height, (float)src_rect.right / (float)scaled_width);
918 (float)src_rect.right / (float)scaled_width);
919 942
920 screen_info.display_texture = src_surface->texture.handle; 943 screen_info.display_texture = src_surface->texture.handle;
921 944
@@ -928,7 +951,8 @@ void RasterizerOpenGL::SamplerInfo::Create() {
928 wrap_s = wrap_t = TextureConfig::Repeat; 951 wrap_s = wrap_t = TextureConfig::Repeat;
929 border_color = 0; 952 border_color = 0;
930 953
931 glSamplerParameteri(sampler.handle, GL_TEXTURE_MIN_FILTER, GL_LINEAR); // default is GL_LINEAR_MIPMAP_LINEAR 954 glSamplerParameteri(sampler.handle, GL_TEXTURE_MIN_FILTER,
955 GL_LINEAR); // default is GL_LINEAR_MIPMAP_LINEAR
932 // Other attributes have correct defaults 956 // Other attributes have correct defaults
933} 957}
934 958
@@ -976,41 +1000,64 @@ void RasterizerOpenGL::SetShader() {
976 } else { 1000 } else {
977 LOG_DEBUG(Render_OpenGL, "Creating new shader"); 1001 LOG_DEBUG(Render_OpenGL, "Creating new shader");
978 1002
979 shader->shader.Create(GLShader::GenerateVertexShader().c_str(), GLShader::GenerateFragmentShader(config).c_str()); 1003 shader->shader.Create(GLShader::GenerateVertexShader().c_str(),
1004 GLShader::GenerateFragmentShader(config).c_str());
980 1005
981 state.draw.shader_program = shader->shader.handle; 1006 state.draw.shader_program = shader->shader.handle;
982 state.Apply(); 1007 state.Apply();
983 1008
984 // Set the texture samplers to correspond to different texture units 1009 // Set the texture samplers to correspond to different texture units
985 GLuint uniform_tex = glGetUniformLocation(shader->shader.handle, "tex[0]"); 1010 GLuint uniform_tex = glGetUniformLocation(shader->shader.handle, "tex[0]");
986 if (uniform_tex != -1) { glUniform1i(uniform_tex, 0); } 1011 if (uniform_tex != -1) {
1012 glUniform1i(uniform_tex, 0);
1013 }
987 uniform_tex = glGetUniformLocation(shader->shader.handle, "tex[1]"); 1014 uniform_tex = glGetUniformLocation(shader->shader.handle, "tex[1]");
988 if (uniform_tex != -1) { glUniform1i(uniform_tex, 1); } 1015 if (uniform_tex != -1) {
1016 glUniform1i(uniform_tex, 1);
1017 }
989 uniform_tex = glGetUniformLocation(shader->shader.handle, "tex[2]"); 1018 uniform_tex = glGetUniformLocation(shader->shader.handle, "tex[2]");
990 if (uniform_tex != -1) { glUniform1i(uniform_tex, 2); } 1019 if (uniform_tex != -1) {
1020 glUniform1i(uniform_tex, 2);
1021 }
991 1022
992 // Set the texture samplers to correspond to different lookup table texture units 1023 // Set the texture samplers to correspond to different lookup table texture units
993 GLuint uniform_lut = glGetUniformLocation(shader->shader.handle, "lut[0]"); 1024 GLuint uniform_lut = glGetUniformLocation(shader->shader.handle, "lut[0]");
994 if (uniform_lut != -1) { glUniform1i(uniform_lut, 3); } 1025 if (uniform_lut != -1) {
1026 glUniform1i(uniform_lut, 3);
1027 }
995 uniform_lut = glGetUniformLocation(shader->shader.handle, "lut[1]"); 1028 uniform_lut = glGetUniformLocation(shader->shader.handle, "lut[1]");
996 if (uniform_lut != -1) { glUniform1i(uniform_lut, 4); } 1029 if (uniform_lut != -1) {
1030 glUniform1i(uniform_lut, 4);
1031 }
997 uniform_lut = glGetUniformLocation(shader->shader.handle, "lut[2]"); 1032 uniform_lut = glGetUniformLocation(shader->shader.handle, "lut[2]");
998 if (uniform_lut != -1) { glUniform1i(uniform_lut, 5); } 1033 if (uniform_lut != -1) {
1034 glUniform1i(uniform_lut, 5);
1035 }
999 uniform_lut = glGetUniformLocation(shader->shader.handle, "lut[3]"); 1036 uniform_lut = glGetUniformLocation(shader->shader.handle, "lut[3]");
1000 if (uniform_lut != -1) { glUniform1i(uniform_lut, 6); } 1037 if (uniform_lut != -1) {
1038 glUniform1i(uniform_lut, 6);
1039 }
1001 uniform_lut = glGetUniformLocation(shader->shader.handle, "lut[4]"); 1040 uniform_lut = glGetUniformLocation(shader->shader.handle, "lut[4]");
1002 if (uniform_lut != -1) { glUniform1i(uniform_lut, 7); } 1041 if (uniform_lut != -1) {
1042 glUniform1i(uniform_lut, 7);
1043 }
1003 uniform_lut = glGetUniformLocation(shader->shader.handle, "lut[5]"); 1044 uniform_lut = glGetUniformLocation(shader->shader.handle, "lut[5]");
1004 if (uniform_lut != -1) { glUniform1i(uniform_lut, 8); } 1045 if (uniform_lut != -1) {
1046 glUniform1i(uniform_lut, 8);
1047 }
1005 1048
1006 GLuint uniform_fog_lut = glGetUniformLocation(shader->shader.handle, "fog_lut"); 1049 GLuint uniform_fog_lut = glGetUniformLocation(shader->shader.handle, "fog_lut");
1007 if (uniform_fog_lut != -1) { glUniform1i(uniform_fog_lut, 9); } 1050 if (uniform_fog_lut != -1) {
1051 glUniform1i(uniform_fog_lut, 9);
1052 }
1008 1053
1009 current_shader = shader_cache.emplace(config, std::move(shader)).first->second.get(); 1054 current_shader = shader_cache.emplace(config, std::move(shader)).first->second.get();
1010 1055
1011 unsigned int block_index = glGetUniformBlockIndex(current_shader->shader.handle, "shader_data"); 1056 unsigned int block_index =
1057 glGetUniformBlockIndex(current_shader->shader.handle, "shader_data");
1012 GLint block_size; 1058 GLint block_size;
1013 glGetActiveUniformBlockiv(current_shader->shader.handle, block_index, GL_UNIFORM_BLOCK_DATA_SIZE, &block_size); 1059 glGetActiveUniformBlockiv(current_shader->shader.handle, block_index,
1060 GL_UNIFORM_BLOCK_DATA_SIZE, &block_size);
1014 ASSERT_MSG(block_size == sizeof(UniformData), "Uniform block size did not match!"); 1061 ASSERT_MSG(block_size == sizeof(UniformData), "Uniform block size did not match!");
1015 glUniformBlockBinding(current_shader->shader.handle, block_index, 0); 1062 glUniformBlockBinding(current_shader->shader.handle, block_index, 0);
1016 1063
@@ -1073,7 +1120,8 @@ void RasterizerOpenGL::SyncDepthScale() {
1073} 1120}
1074 1121
1075void RasterizerOpenGL::SyncDepthOffset() { 1122void RasterizerOpenGL::SyncDepthOffset() {
1076 float depth_offset = Pica::float24::FromRaw(Pica::g_state.regs.viewport_depth_near_plane).ToFloat32(); 1123 float depth_offset =
1124 Pica::float24::FromRaw(Pica::g_state.regs.viewport_depth_near_plane).ToFloat32();
1077 if (depth_offset != uniform_block_data.data.depth_offset) { 1125 if (depth_offset != uniform_block_data.data.depth_offset) {
1078 uniform_block_data.data.depth_offset = depth_offset; 1126 uniform_block_data.data.depth_offset = depth_offset;
1079 uniform_block_data.dirty = true; 1127 uniform_block_data.dirty = true;
@@ -1086,10 +1134,14 @@ void RasterizerOpenGL::SyncBlendEnabled() {
1086 1134
1087void RasterizerOpenGL::SyncBlendFuncs() { 1135void RasterizerOpenGL::SyncBlendFuncs() {
1088 const auto& regs = Pica::g_state.regs; 1136 const auto& regs = Pica::g_state.regs;
1089 state.blend.rgb_equation = PicaToGL::BlendEquation(regs.output_merger.alpha_blending.blend_equation_rgb); 1137 state.blend.rgb_equation =
1090 state.blend.a_equation = PicaToGL::BlendEquation(regs.output_merger.alpha_blending.blend_equation_a); 1138 PicaToGL::BlendEquation(regs.output_merger.alpha_blending.blend_equation_rgb);
1091 state.blend.src_rgb_func = PicaToGL::BlendFunc(regs.output_merger.alpha_blending.factor_source_rgb); 1139 state.blend.a_equation =
1092 state.blend.dst_rgb_func = PicaToGL::BlendFunc(regs.output_merger.alpha_blending.factor_dest_rgb); 1140 PicaToGL::BlendEquation(regs.output_merger.alpha_blending.blend_equation_a);
1141 state.blend.src_rgb_func =
1142 PicaToGL::BlendFunc(regs.output_merger.alpha_blending.factor_source_rgb);
1143 state.blend.dst_rgb_func =
1144 PicaToGL::BlendFunc(regs.output_merger.alpha_blending.factor_dest_rgb);
1093 state.blend.src_a_func = PicaToGL::BlendFunc(regs.output_merger.alpha_blending.factor_source_a); 1145 state.blend.src_a_func = PicaToGL::BlendFunc(regs.output_merger.alpha_blending.factor_source_a);
1094 state.blend.dst_a_func = PicaToGL::BlendFunc(regs.output_merger.alpha_blending.factor_dest_a); 1146 state.blend.dst_a_func = PicaToGL::BlendFunc(regs.output_merger.alpha_blending.factor_dest_a);
1095} 1147}
@@ -1104,25 +1156,23 @@ void RasterizerOpenGL::SyncBlendColor() {
1104 1156
1105void RasterizerOpenGL::SyncFogColor() { 1157void RasterizerOpenGL::SyncFogColor() {
1106 const auto& regs = Pica::g_state.regs; 1158 const auto& regs = Pica::g_state.regs;
1107 uniform_block_data.data.fog_color = { 1159 uniform_block_data.data.fog_color = {regs.fog_color.r.Value() / 255.0f,
1108 regs.fog_color.r.Value() / 255.0f, 1160 regs.fog_color.g.Value() / 255.0f,
1109 regs.fog_color.g.Value() / 255.0f, 1161 regs.fog_color.b.Value() / 255.0f};
1110 regs.fog_color.b.Value() / 255.0f
1111 };
1112 uniform_block_data.dirty = true; 1162 uniform_block_data.dirty = true;
1113} 1163}
1114 1164
1115void RasterizerOpenGL::SyncFogLUT() { 1165void RasterizerOpenGL::SyncFogLUT() {
1116 std::array<GLuint, 128> new_data; 1166 std::array<GLuint, 128> new_data;
1117 1167
1118 std::transform(Pica::g_state.fog.lut.begin(), Pica::g_state.fog.lut.end(), new_data.begin(), [](const auto& entry) { 1168 std::transform(Pica::g_state.fog.lut.begin(), Pica::g_state.fog.lut.end(), new_data.begin(),
1119 return entry.raw; 1169 [](const auto& entry) { return entry.raw; });
1120 });
1121 1170
1122 if (new_data != fog_lut_data) { 1171 if (new_data != fog_lut_data) {
1123 fog_lut_data = new_data; 1172 fog_lut_data = new_data;
1124 glActiveTexture(GL_TEXTURE9); 1173 glActiveTexture(GL_TEXTURE9);
1125 glTexSubImage1D(GL_TEXTURE_1D, 0, 0, 128, GL_RED_INTEGER, GL_UNSIGNED_INT, fog_lut_data.data()); 1174 glTexSubImage1D(GL_TEXTURE_1D, 0, 0, 128, GL_RED_INTEGER, GL_UNSIGNED_INT,
1175 fog_lut_data.data());
1126 } 1176 }
1127} 1177}
1128 1178
@@ -1154,34 +1204,40 @@ void RasterizerOpenGL::SyncColorWriteMask() {
1154void RasterizerOpenGL::SyncStencilWriteMask() { 1204void RasterizerOpenGL::SyncStencilWriteMask() {
1155 const auto& regs = Pica::g_state.regs; 1205 const auto& regs = Pica::g_state.regs;
1156 state.stencil.write_mask = (regs.framebuffer.allow_depth_stencil_write != 0) 1206 state.stencil.write_mask = (regs.framebuffer.allow_depth_stencil_write != 0)
1157 ? static_cast<GLuint>(regs.output_merger.stencil_test.write_mask) 1207 ? static_cast<GLuint>(regs.output_merger.stencil_test.write_mask)
1158 : 0; 1208 : 0;
1159} 1209}
1160 1210
1161void RasterizerOpenGL::SyncDepthWriteMask() { 1211void RasterizerOpenGL::SyncDepthWriteMask() {
1162 const auto& regs = Pica::g_state.regs; 1212 const auto& regs = Pica::g_state.regs;
1163 state.depth.write_mask = (regs.framebuffer.allow_depth_stencil_write != 0 && regs.output_merger.depth_write_enable) 1213 state.depth.write_mask =
1164 ? GL_TRUE 1214 (regs.framebuffer.allow_depth_stencil_write != 0 && regs.output_merger.depth_write_enable)
1165 : GL_FALSE; 1215 ? GL_TRUE
1216 : GL_FALSE;
1166} 1217}
1167 1218
1168void RasterizerOpenGL::SyncStencilTest() { 1219void RasterizerOpenGL::SyncStencilTest() {
1169 const auto& regs = Pica::g_state.regs; 1220 const auto& regs = Pica::g_state.regs;
1170 state.stencil.test_enabled = regs.output_merger.stencil_test.enable && regs.framebuffer.depth_format == Pica::Regs::DepthFormat::D24S8; 1221 state.stencil.test_enabled = regs.output_merger.stencil_test.enable &&
1222 regs.framebuffer.depth_format == Pica::Regs::DepthFormat::D24S8;
1171 state.stencil.test_func = PicaToGL::CompareFunc(regs.output_merger.stencil_test.func); 1223 state.stencil.test_func = PicaToGL::CompareFunc(regs.output_merger.stencil_test.func);
1172 state.stencil.test_ref = regs.output_merger.stencil_test.reference_value; 1224 state.stencil.test_ref = regs.output_merger.stencil_test.reference_value;
1173 state.stencil.test_mask = regs.output_merger.stencil_test.input_mask; 1225 state.stencil.test_mask = regs.output_merger.stencil_test.input_mask;
1174 state.stencil.action_stencil_fail = PicaToGL::StencilOp(regs.output_merger.stencil_test.action_stencil_fail); 1226 state.stencil.action_stencil_fail =
1175 state.stencil.action_depth_fail = PicaToGL::StencilOp(regs.output_merger.stencil_test.action_depth_fail); 1227 PicaToGL::StencilOp(regs.output_merger.stencil_test.action_stencil_fail);
1176 state.stencil.action_depth_pass = PicaToGL::StencilOp(regs.output_merger.stencil_test.action_depth_pass); 1228 state.stencil.action_depth_fail =
1229 PicaToGL::StencilOp(regs.output_merger.stencil_test.action_depth_fail);
1230 state.stencil.action_depth_pass =
1231 PicaToGL::StencilOp(regs.output_merger.stencil_test.action_depth_pass);
1177} 1232}
1178 1233
1179void RasterizerOpenGL::SyncDepthTest() { 1234void RasterizerOpenGL::SyncDepthTest() {
1180 const auto& regs = Pica::g_state.regs; 1235 const auto& regs = Pica::g_state.regs;
1181 state.depth.test_enabled = regs.output_merger.depth_test_enable == 1 || 1236 state.depth.test_enabled =
1182 regs.output_merger.depth_write_enable == 1; 1237 regs.output_merger.depth_test_enable == 1 || regs.output_merger.depth_write_enable == 1;
1183 state.depth.test_func = regs.output_merger.depth_test_enable == 1 ? 1238 state.depth.test_func = regs.output_merger.depth_test_enable == 1
1184 PicaToGL::CompareFunc(regs.output_merger.depth_test_func) : GL_ALWAYS; 1239 ? PicaToGL::CompareFunc(regs.output_merger.depth_test_func)
1240 : GL_ALWAYS;
1185} 1241}
1186 1242
1187void RasterizerOpenGL::SyncScissorTest() { 1243void RasterizerOpenGL::SyncScissorTest() {
@@ -1208,7 +1264,8 @@ void RasterizerOpenGL::SyncCombinerColor() {
1208 } 1264 }
1209} 1265}
1210 1266
1211void RasterizerOpenGL::SyncTevConstColor(int stage_index, const Pica::Regs::TevStageConfig& tev_stage) { 1267void RasterizerOpenGL::SyncTevConstColor(int stage_index,
1268 const Pica::Regs::TevStageConfig& tev_stage) {
1212 auto const_color = PicaToGL::ColorRGBA8(tev_stage.const_color); 1269 auto const_color = PicaToGL::ColorRGBA8(tev_stage.const_color);
1213 if (const_color != uniform_block_data.data.const_color[stage_index]) { 1270 if (const_color != uniform_block_data.data.const_color[stage_index]) {
1214 uniform_block_data.data.const_color[stage_index] = const_color; 1271 uniform_block_data.data.const_color[stage_index] = const_color;
@@ -1237,7 +1294,8 @@ void RasterizerOpenGL::SyncLightingLUT(unsigned lut_index) {
1237 if (new_data != lighting_lut_data[lut_index]) { 1294 if (new_data != lighting_lut_data[lut_index]) {
1238 lighting_lut_data[lut_index] = new_data; 1295 lighting_lut_data[lut_index] = new_data;
1239 glActiveTexture(GL_TEXTURE3 + lut_index); 1296 glActiveTexture(GL_TEXTURE3 + lut_index);
1240 glTexSubImage1D(GL_TEXTURE_1D, 0, 0, 256, GL_RGBA, GL_FLOAT, lighting_lut_data[lut_index].data()); 1297 glTexSubImage1D(GL_TEXTURE_1D, 0, 0, 256, GL_RGBA, GL_FLOAT,
1298 lighting_lut_data[lut_index].data());
1241 } 1299 }
1242} 1300}
1243 1301
@@ -1277,7 +1335,7 @@ void RasterizerOpenGL::SyncLightPosition(int light_index) {
1277 GLvec3 position = { 1335 GLvec3 position = {
1278 Pica::float16::FromRaw(Pica::g_state.regs.lighting.light[light_index].x).ToFloat32(), 1336 Pica::float16::FromRaw(Pica::g_state.regs.lighting.light[light_index].x).ToFloat32(),
1279 Pica::float16::FromRaw(Pica::g_state.regs.lighting.light[light_index].y).ToFloat32(), 1337 Pica::float16::FromRaw(Pica::g_state.regs.lighting.light[light_index].y).ToFloat32(),
1280 Pica::float16::FromRaw(Pica::g_state.regs.lighting.light[light_index].z).ToFloat32() }; 1338 Pica::float16::FromRaw(Pica::g_state.regs.lighting.light[light_index].z).ToFloat32()};
1281 1339
1282 if (position != uniform_block_data.data.light_src[light_index].position) { 1340 if (position != uniform_block_data.data.light_src[light_index].position) {
1283 uniform_block_data.data.light_src[light_index].position = position; 1341 uniform_block_data.data.light_src[light_index].position = position;
@@ -1286,7 +1344,9 @@ void RasterizerOpenGL::SyncLightPosition(int light_index) {
1286} 1344}
1287 1345
1288void RasterizerOpenGL::SyncLightDistanceAttenuationBias(int light_index) { 1346void RasterizerOpenGL::SyncLightDistanceAttenuationBias(int light_index) {
1289 GLfloat dist_atten_bias = Pica::float20::FromRaw(Pica::g_state.regs.lighting.light[light_index].dist_atten_bias).ToFloat32(); 1347 GLfloat dist_atten_bias =
1348 Pica::float20::FromRaw(Pica::g_state.regs.lighting.light[light_index].dist_atten_bias)
1349 .ToFloat32();
1290 1350
1291 if (dist_atten_bias != uniform_block_data.data.light_src[light_index].dist_atten_bias) { 1351 if (dist_atten_bias != uniform_block_data.data.light_src[light_index].dist_atten_bias) {
1292 uniform_block_data.data.light_src[light_index].dist_atten_bias = dist_atten_bias; 1352 uniform_block_data.data.light_src[light_index].dist_atten_bias = dist_atten_bias;
@@ -1295,7 +1355,9 @@ void RasterizerOpenGL::SyncLightDistanceAttenuationBias(int light_index) {
1295} 1355}
1296 1356
1297void RasterizerOpenGL::SyncLightDistanceAttenuationScale(int light_index) { 1357void RasterizerOpenGL::SyncLightDistanceAttenuationScale(int light_index) {
1298 GLfloat dist_atten_scale = Pica::float20::FromRaw(Pica::g_state.regs.lighting.light[light_index].dist_atten_scale).ToFloat32(); 1358 GLfloat dist_atten_scale =
1359 Pica::float20::FromRaw(Pica::g_state.regs.lighting.light[light_index].dist_atten_scale)
1360 .ToFloat32();
1299 1361
1300 if (dist_atten_scale != uniform_block_data.data.light_src[light_index].dist_atten_scale) { 1362 if (dist_atten_scale != uniform_block_data.data.light_src[light_index].dist_atten_scale) {
1301 uniform_block_data.data.light_src[light_index].dist_atten_scale = dist_atten_scale; 1363 uniform_block_data.data.light_src[light_index].dist_atten_scale = dist_atten_scale;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index c5029432b..70e9e64ef 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -8,8 +8,8 @@
8#include <cstddef> 8#include <cstddef>
9#include <cstring> 9#include <cstring>
10#include <memory> 10#include <memory>
11#include <vector>
12#include <unordered_map> 11#include <unordered_map>
12#include <vector>
13 13
14#include <glad/glad.h> 14#include <glad/glad.h>
15 15
@@ -40,10 +40,13 @@ struct ScreenInfo;
40 * Pica state is not being captured in the shader cache key, thereby resulting in (what should be) 40 * Pica state is not being captured in the shader cache key, thereby resulting in (what should be)
41 * two separate shaders sharing the same key. 41 * two separate shaders sharing the same key.
42 * 42 *
43 * We use a union because "implicitly-defined copy/move constructor for a union X copies the object representation of X." 43 * We use a union because "implicitly-defined copy/move constructor for a union X copies the object
44 * and "implicitly-defined copy assignment operator for a union X copies the object representation (3.9) of X." 44 * representation of X."
45 * and "implicitly-defined copy assignment operator for a union X copies the object representation
46 * (3.9) of X."
45 * = Bytewise copy instead of memberwise copy. 47 * = Bytewise copy instead of memberwise copy.
46 * This is important because the padding bytes are included in the hash and comparison between objects. 48 * This is important because the padding bytes are included in the hash and comparison between
49 * objects.
47 */ 50 */
48union PicaShaderConfig { 51union PicaShaderConfig {
49 52
@@ -60,8 +63,9 @@ union PicaShaderConfig {
60 63
61 state.depthmap_enable = regs.depthmap_enable; 64 state.depthmap_enable = regs.depthmap_enable;
62 65
63 state.alpha_test_func = regs.output_merger.alpha_test.enable ? 66 state.alpha_test_func = regs.output_merger.alpha_test.enable
64 regs.output_merger.alpha_test.func.Value() : Pica::Regs::CompareFunc::Always; 67 ? regs.output_merger.alpha_test.func.Value()
68 : Pica::Regs::CompareFunc::Always;
65 69
66 state.texture0_type = regs.texture0.type; 70 state.texture0_type = regs.texture0.type;
67 71
@@ -81,9 +85,8 @@ union PicaShaderConfig {
81 state.fog_mode = regs.fog_mode; 85 state.fog_mode = regs.fog_mode;
82 state.fog_flip = regs.fog_flip; 86 state.fog_flip = regs.fog_flip;
83 87
84 state.combiner_buffer_input = 88 state.combiner_buffer_input = regs.tev_combiner_buffer_input.update_mask_rgb.Value() |
85 regs.tev_combiner_buffer_input.update_mask_rgb.Value() | 89 regs.tev_combiner_buffer_input.update_mask_a.Value() << 4;
86 regs.tev_combiner_buffer_input.update_mask_a.Value() << 4;
87 90
88 // Fragment lighting 91 // Fragment lighting
89 92
@@ -95,8 +98,10 @@ union PicaShaderConfig {
95 const auto& light = regs.lighting.light[num]; 98 const auto& light = regs.lighting.light[num];
96 state.lighting.light[light_index].num = num; 99 state.lighting.light[light_index].num = num;
97 state.lighting.light[light_index].directional = light.config.directional != 0; 100 state.lighting.light[light_index].directional = light.config.directional != 0;
98 state.lighting.light[light_index].two_sided_diffuse = light.config.two_sided_diffuse != 0; 101 state.lighting.light[light_index].two_sided_diffuse =
99 state.lighting.light[light_index].dist_atten_enable = !regs.lighting.IsDistAttenDisabled(num); 102 light.config.two_sided_diffuse != 0;
103 state.lighting.light[light_index].dist_atten_enable =
104 !regs.lighting.IsDistAttenDisabled(num);
100 } 105 }
101 106
102 state.lighting.lut_d0.enable = regs.lighting.config1.disable_lut_d0 == 0; 107 state.lighting.lut_d0.enable = regs.lighting.config1.disable_lut_d0 == 0;
@@ -147,7 +152,7 @@ union PicaShaderConfig {
147 return (stage_index < 4) && ((state.combiner_buffer_input >> 4) & (1 << stage_index)); 152 return (stage_index < 4) && ((state.combiner_buffer_input >> 4) & (1 << stage_index));
148 } 153 }
149 154
150 bool operator ==(const PicaShaderConfig& o) const { 155 bool operator==(const PicaShaderConfig& o) const {
151 return std::memcmp(&state, &o.state, sizeof(PicaShaderConfig::State)) == 0; 156 return std::memcmp(&state, &o.state, sizeof(PicaShaderConfig::State)) == 0;
152 }; 157 };
153 158
@@ -212,7 +217,8 @@ union PicaShaderConfig {
212 } state; 217 } state;
213}; 218};
214#if (__GNUC__ >= 5) || defined(__clang__) || defined(_MSC_VER) 219#if (__GNUC__ >= 5) || defined(__clang__) || defined(_MSC_VER)
215static_assert(std::is_trivially_copyable<PicaShaderConfig::State>::value, "PicaShaderConfig::State must be trivially copyable"); 220static_assert(std::is_trivially_copyable<PicaShaderConfig::State>::value,
221 "PicaShaderConfig::State must be trivially copyable");
216#endif 222#endif
217 223
218namespace std { 224namespace std {
@@ -228,12 +234,10 @@ struct hash<PicaShaderConfig> {
228 234
229class RasterizerOpenGL : public VideoCore::RasterizerInterface { 235class RasterizerOpenGL : public VideoCore::RasterizerInterface {
230public: 236public:
231
232 RasterizerOpenGL(); 237 RasterizerOpenGL();
233 ~RasterizerOpenGL() override; 238 ~RasterizerOpenGL() override;
234 239
235 void AddTriangle(const Pica::Shader::OutputVertex& v0, 240 void AddTriangle(const Pica::Shader::OutputVertex& v0, const Pica::Shader::OutputVertex& v1,
236 const Pica::Shader::OutputVertex& v1,
237 const Pica::Shader::OutputVertex& v2) override; 241 const Pica::Shader::OutputVertex& v2) override;
238 void DrawTriangles() override; 242 void DrawTriangles() override;
239 void NotifyPicaRegisterChanged(u32 id) override; 243 void NotifyPicaRegisterChanged(u32 id) override;
@@ -242,7 +246,8 @@ public:
242 void FlushAndInvalidateRegion(PAddr addr, u32 size) override; 246 void FlushAndInvalidateRegion(PAddr addr, u32 size) override;
243 bool AccelerateDisplayTransfer(const GPU::Regs::DisplayTransferConfig& config) override; 247 bool AccelerateDisplayTransfer(const GPU::Regs::DisplayTransferConfig& config) override;
244 bool AccelerateFill(const GPU::Regs::MemoryFillConfig& config) override; 248 bool AccelerateFill(const GPU::Regs::MemoryFillConfig& config) override;
245 bool AccelerateDisplay(const GPU::Regs::FramebufferConfig& config, PAddr framebuffer_addr, u32 pixel_stride, ScreenInfo& screen_info) override; 249 bool AccelerateDisplay(const GPU::Regs::FramebufferConfig& config, PAddr framebuffer_addr,
250 u32 pixel_stride, ScreenInfo& screen_info) override;
246 251
247 /// OpenGL shader generated for a given Pica register state 252 /// OpenGL shader generated for a given Pica register state
248 struct PicaShader { 253 struct PicaShader {
@@ -251,13 +256,13 @@ public:
251 }; 256 };
252 257
253private: 258private:
254
255 struct SamplerInfo { 259 struct SamplerInfo {
256 using TextureConfig = Pica::Regs::TextureConfig; 260 using TextureConfig = Pica::Regs::TextureConfig;
257 261
258 OGLSampler sampler; 262 OGLSampler sampler;
259 263
260 /// Creates the sampler object, initializing its state so that it's in sync with the SamplerInfo struct. 264 /// Creates the sampler object, initializing its state so that it's in sync with the
265 /// SamplerInfo struct.
261 void Create(); 266 void Create();
262 /// Syncs the sampler object with the config, updating any necessary state. 267 /// Syncs the sampler object with the config, updating any necessary state.
263 void SyncWithConfig(const TextureConfig& config); 268 void SyncWithConfig(const TextureConfig& config);
@@ -343,8 +348,11 @@ private:
343 alignas(16) GLvec4 tev_combiner_buffer_color; 348 alignas(16) GLvec4 tev_combiner_buffer_color;
344 }; 349 };
345 350
346 static_assert(sizeof(UniformData) == 0x3C0, "The size of the UniformData structure has changed, update the structure in the shader"); 351 static_assert(
347 static_assert(sizeof(UniformData) < 16384, "UniformData structure must be less than 16kb as per the OpenGL spec"); 352 sizeof(UniformData) == 0x3C0,
353 "The size of the UniformData structure has changed, update the structure in the shader");
354 static_assert(sizeof(UniformData) < 16384,
355 "UniformData structure must be less than 16kb as per the OpenGL spec");
348 356
349 /// Sets the OpenGL shader in accordance with the current PICA register state 357 /// Sets the OpenGL shader in accordance with the current PICA register state
350 void SetShader(); 358 void SetShader();
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
index 7efd0038a..8f1477bcd 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -35,18 +35,18 @@ struct FormatTuple {
35}; 35};
36 36
37static const std::array<FormatTuple, 5> fb_format_tuples = {{ 37static const std::array<FormatTuple, 5> fb_format_tuples = {{
38 { GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8 }, // RGBA8 38 {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8}, // RGBA8
39 { GL_RGB8, GL_BGR, GL_UNSIGNED_BYTE }, // RGB8 39 {GL_RGB8, GL_BGR, GL_UNSIGNED_BYTE}, // RGB8
40 { GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_5_5_5_1 }, // RGB5A1 40 {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_5_5_5_1}, // RGB5A1
41 { GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5 }, // RGB565 41 {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5}, // RGB565
42 { GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4 }, // RGBA4 42 {GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4}, // RGBA4
43}}; 43}};
44 44
45static const std::array<FormatTuple, 4> depth_format_tuples = {{ 45static const std::array<FormatTuple, 4> depth_format_tuples = {{
46 { GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT }, // D16 46 {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // D16
47 {}, 47 {},
48 { GL_DEPTH_COMPONENT24, GL_DEPTH_COMPONENT, GL_UNSIGNED_INT }, // D24 48 {GL_DEPTH_COMPONENT24, GL_DEPTH_COMPONENT, GL_UNSIGNED_INT}, // D24
49 { GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8 }, // D24S8 49 {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // D24S8
50}}; 50}};
51 51
52RasterizerCacheOpenGL::RasterizerCacheOpenGL() { 52RasterizerCacheOpenGL::RasterizerCacheOpenGL() {
@@ -58,7 +58,9 @@ RasterizerCacheOpenGL::~RasterizerCacheOpenGL() {
58 FlushAll(); 58 FlushAll();
59} 59}
60 60
61static void MortonCopyPixels(CachedSurface::PixelFormat pixel_format, u32 width, u32 height, u32 bytes_per_pixel, u32 gl_bytes_per_pixel, u8* morton_data, u8* gl_data, bool morton_to_gl) { 61static void MortonCopyPixels(CachedSurface::PixelFormat pixel_format, u32 width, u32 height,
62 u32 bytes_per_pixel, u32 gl_bytes_per_pixel, u8* morton_data,
63 u8* gl_data, bool morton_to_gl) {
62 using PixelFormat = CachedSurface::PixelFormat; 64 using PixelFormat = CachedSurface::PixelFormat;
63 65
64 u8* data_ptrs[2]; 66 u8* data_ptrs[2];
@@ -72,7 +74,8 @@ static void MortonCopyPixels(CachedSurface::PixelFormat pixel_format, u32 width,
72 for (unsigned y = 0; y < height; ++y) { 74 for (unsigned y = 0; y < height; ++y) {
73 for (unsigned x = 0; x < width; ++x) { 75 for (unsigned x = 0; x < width; ++x) {
74 const u32 coarse_y = y & ~7; 76 const u32 coarse_y = y & ~7;
75 u32 morton_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * width * bytes_per_pixel; 77 u32 morton_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) +
78 coarse_y * width * bytes_per_pixel;
76 u32 gl_pixel_index = (x + (height - 1 - y) * width) * gl_bytes_per_pixel; 79 u32 gl_pixel_index = (x + (height - 1 - y) * width) * gl_bytes_per_pixel;
77 80
78 data_ptrs[morton_to_gl] = morton_data + morton_offset; 81 data_ptrs[morton_to_gl] = morton_data + morton_offset;
@@ -81,7 +84,8 @@ static void MortonCopyPixels(CachedSurface::PixelFormat pixel_format, u32 width,
81 // Swap depth and stencil value ordering since 3DS does not match OpenGL 84 // Swap depth and stencil value ordering since 3DS does not match OpenGL
82 u32 depth_stencil; 85 u32 depth_stencil;
83 memcpy(&depth_stencil, data_ptrs[1], sizeof(u32)); 86 memcpy(&depth_stencil, data_ptrs[1], sizeof(u32));
84 depth_stencil = (depth_stencil << depth_stencil_shifts[0]) | (depth_stencil >> depth_stencil_shifts[1]); 87 depth_stencil = (depth_stencil << depth_stencil_shifts[0]) |
88 (depth_stencil >> depth_stencil_shifts[1]);
85 89
86 memcpy(data_ptrs[0], &depth_stencil, sizeof(u32)); 90 memcpy(data_ptrs[0], &depth_stencil, sizeof(u32));
87 } 91 }
@@ -90,7 +94,8 @@ static void MortonCopyPixels(CachedSurface::PixelFormat pixel_format, u32 width,
90 for (unsigned y = 0; y < height; ++y) { 94 for (unsigned y = 0; y < height; ++y) {
91 for (unsigned x = 0; x < width; ++x) { 95 for (unsigned x = 0; x < width; ++x) {
92 const u32 coarse_y = y & ~7; 96 const u32 coarse_y = y & ~7;
93 u32 morton_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * width * bytes_per_pixel; 97 u32 morton_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) +
98 coarse_y * width * bytes_per_pixel;
94 u32 gl_pixel_index = (x + (height - 1 - y) * width) * gl_bytes_per_pixel; 99 u32 gl_pixel_index = (x + (height - 1 - y) * width) * gl_bytes_per_pixel;
95 100
96 data_ptrs[morton_to_gl] = morton_data + morton_offset; 101 data_ptrs[morton_to_gl] = morton_data + morton_offset;
@@ -102,17 +107,21 @@ static void MortonCopyPixels(CachedSurface::PixelFormat pixel_format, u32 width,
102 } 107 }
103} 108}
104 109
105bool RasterizerCacheOpenGL::BlitTextures(GLuint src_tex, GLuint dst_tex, CachedSurface::SurfaceType type, const MathUtil::Rectangle<int>& src_rect, const MathUtil::Rectangle<int>& dst_rect) { 110bool RasterizerCacheOpenGL::BlitTextures(GLuint src_tex, GLuint dst_tex,
111 CachedSurface::SurfaceType type,
112 const MathUtil::Rectangle<int>& src_rect,
113 const MathUtil::Rectangle<int>& dst_rect) {
106 using SurfaceType = CachedSurface::SurfaceType; 114 using SurfaceType = CachedSurface::SurfaceType;
107 115
108 OpenGLState cur_state = OpenGLState::GetCurState(); 116 OpenGLState cur_state = OpenGLState::GetCurState();
109 117
110 // Make sure textures aren't bound to texture units, since going to bind them to framebuffer components 118 // Make sure textures aren't bound to texture units, since going to bind them to framebuffer
119 // components
111 OpenGLState::ResetTexture(src_tex); 120 OpenGLState::ResetTexture(src_tex);
112 OpenGLState::ResetTexture(dst_tex); 121 OpenGLState::ResetTexture(dst_tex);
113 122
114 // Keep track of previous framebuffer bindings 123 // Keep track of previous framebuffer bindings
115 GLuint old_fbs[2] = { cur_state.draw.read_framebuffer, cur_state.draw.draw_framebuffer }; 124 GLuint old_fbs[2] = {cur_state.draw.read_framebuffer, cur_state.draw.draw_framebuffer};
116 cur_state.draw.read_framebuffer = transfer_framebuffers[0].handle; 125 cur_state.draw.read_framebuffer = transfer_framebuffers[0].handle;
117 cur_state.draw.draw_framebuffer = transfer_framebuffers[1].handle; 126 cur_state.draw.draw_framebuffer = transfer_framebuffers[1].handle;
118 cur_state.Apply(); 127 cur_state.Apply();
@@ -120,11 +129,15 @@ bool RasterizerCacheOpenGL::BlitTextures(GLuint src_tex, GLuint dst_tex, CachedS
120 u32 buffers = 0; 129 u32 buffers = 0;
121 130
122 if (type == SurfaceType::Color || type == SurfaceType::Texture) { 131 if (type == SurfaceType::Color || type == SurfaceType::Texture) {
123 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, src_tex, 0); 132 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, src_tex,
124 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); 133 0);
134 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0,
135 0);
125 136
126 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, dst_tex, 0); 137 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, dst_tex,
127 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); 138 0);
139 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0,
140 0);
128 141
129 buffers = GL_COLOR_BUFFER_BIT; 142 buffers = GL_COLOR_BUFFER_BIT;
130 } else if (type == SurfaceType::Depth) { 143 } else if (type == SurfaceType::Depth) {
@@ -139,10 +152,12 @@ bool RasterizerCacheOpenGL::BlitTextures(GLuint src_tex, GLuint dst_tex, CachedS
139 buffers = GL_DEPTH_BUFFER_BIT; 152 buffers = GL_DEPTH_BUFFER_BIT;
140 } else if (type == SurfaceType::DepthStencil) { 153 } else if (type == SurfaceType::DepthStencil) {
141 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); 154 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
142 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, src_tex, 0); 155 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
156 src_tex, 0);
143 157
144 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); 158 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
145 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, dst_tex, 0); 159 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
160 dst_tex, 0);
146 161
147 buffers = GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT; 162 buffers = GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT;
148 } 163 }
@@ -155,9 +170,9 @@ bool RasterizerCacheOpenGL::BlitTextures(GLuint src_tex, GLuint dst_tex, CachedS
155 return false; 170 return false;
156 } 171 }
157 172
158 glBlitFramebuffer(src_rect.left, src_rect.top, src_rect.right, src_rect.bottom, 173 glBlitFramebuffer(src_rect.left, src_rect.top, src_rect.right, src_rect.bottom, dst_rect.left,
159 dst_rect.left, dst_rect.top, dst_rect.right, dst_rect.bottom, 174 dst_rect.top, dst_rect.right, dst_rect.bottom, buffers,
160 buffers, buffers == GL_COLOR_BUFFER_BIT ? GL_LINEAR : GL_NEAREST); 175 buffers == GL_COLOR_BUFFER_BIT ? GL_LINEAR : GL_NEAREST);
161 176
162 // Restore previous framebuffer bindings 177 // Restore previous framebuffer bindings
163 cur_state.draw.read_framebuffer = old_fbs[0]; 178 cur_state.draw.read_framebuffer = old_fbs[0];
@@ -167,17 +182,24 @@ bool RasterizerCacheOpenGL::BlitTextures(GLuint src_tex, GLuint dst_tex, CachedS
167 return true; 182 return true;
168} 183}
169 184
170bool RasterizerCacheOpenGL::TryBlitSurfaces(CachedSurface* src_surface, const MathUtil::Rectangle<int>& src_rect, CachedSurface* dst_surface, const MathUtil::Rectangle<int>& dst_rect) { 185bool RasterizerCacheOpenGL::TryBlitSurfaces(CachedSurface* src_surface,
186 const MathUtil::Rectangle<int>& src_rect,
187 CachedSurface* dst_surface,
188 const MathUtil::Rectangle<int>& dst_rect) {
171 using SurfaceType = CachedSurface::SurfaceType; 189 using SurfaceType = CachedSurface::SurfaceType;
172 190
173 if (!CachedSurface::CheckFormatsBlittable(src_surface->pixel_format, dst_surface->pixel_format)) { 191 if (!CachedSurface::CheckFormatsBlittable(src_surface->pixel_format,
192 dst_surface->pixel_format)) {
174 return false; 193 return false;
175 } 194 }
176 195
177 return BlitTextures(src_surface->texture.handle, dst_surface->texture.handle, CachedSurface::GetFormatType(src_surface->pixel_format), src_rect, dst_rect); 196 return BlitTextures(src_surface->texture.handle, dst_surface->texture.handle,
197 CachedSurface::GetFormatType(src_surface->pixel_format), src_rect,
198 dst_rect);
178} 199}
179 200
180static void AllocateSurfaceTexture(GLuint texture, CachedSurface::PixelFormat pixel_format, u32 width, u32 height) { 201static void AllocateSurfaceTexture(GLuint texture, CachedSurface::PixelFormat pixel_format,
202 u32 width, u32 height) {
181 // Allocate an uninitialized texture of appropriate size and format for the surface 203 // Allocate an uninitialized texture of appropriate size and format for the surface
182 using SurfaceType = CachedSurface::SurfaceType; 204 using SurfaceType = CachedSurface::SurfaceType;
183 205
@@ -200,11 +222,11 @@ static void AllocateSurfaceTexture(GLuint texture, CachedSurface::PixelFormat pi
200 ASSERT(tuple_idx < depth_format_tuples.size()); 222 ASSERT(tuple_idx < depth_format_tuples.size());
201 tuple = depth_format_tuples[tuple_idx]; 223 tuple = depth_format_tuples[tuple_idx];
202 } else { 224 } else {
203 tuple = { GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE }; 225 tuple = {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE};
204 } 226 }
205 227
206 glTexImage2D(GL_TEXTURE_2D, 0, tuple.internal_format, width, height, 0, 228 glTexImage2D(GL_TEXTURE_2D, 0, tuple.internal_format, width, height, 0, tuple.format,
207 tuple.format, tuple.type, nullptr); 229 tuple.type, nullptr);
208 230
209 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0); 231 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0);
210 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); 232 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
@@ -217,7 +239,8 @@ static void AllocateSurfaceTexture(GLuint texture, CachedSurface::PixelFormat pi
217} 239}
218 240
219MICROPROFILE_DEFINE(OpenGL_SurfaceUpload, "OpenGL", "Surface Upload", MP_RGB(128, 64, 192)); 241MICROPROFILE_DEFINE(OpenGL_SurfaceUpload, "OpenGL", "Surface Upload", MP_RGB(128, 64, 192));
220CachedSurface* RasterizerCacheOpenGL::GetSurface(const CachedSurface& params, bool match_res_scale, bool load_if_create) { 242CachedSurface* RasterizerCacheOpenGL::GetSurface(const CachedSurface& params, bool match_res_scale,
243 bool load_if_create) {
221 using PixelFormat = CachedSurface::PixelFormat; 244 using PixelFormat = CachedSurface::PixelFormat;
222 using SurfaceType = CachedSurface::SurfaceType; 245 using SurfaceType = CachedSurface::SurfaceType;
223 246
@@ -225,29 +248,31 @@ CachedSurface* RasterizerCacheOpenGL::GetSurface(const CachedSurface& params, bo
225 return nullptr; 248 return nullptr;
226 } 249 }
227 250
228 u32 params_size = params.width * params.height * CachedSurface::GetFormatBpp(params.pixel_format) / 8; 251 u32 params_size =
252 params.width * params.height * CachedSurface::GetFormatBpp(params.pixel_format) / 8;
229 253
230 // Check for an exact match in existing surfaces 254 // Check for an exact match in existing surfaces
231 CachedSurface* best_exact_surface = nullptr; 255 CachedSurface* best_exact_surface = nullptr;
232 float exact_surface_goodness = -1.f; 256 float exact_surface_goodness = -1.f;
233 257
234 auto surface_interval = boost::icl::interval<PAddr>::right_open(params.addr, params.addr + params_size); 258 auto surface_interval =
259 boost::icl::interval<PAddr>::right_open(params.addr, params.addr + params_size);
235 auto range = surface_cache.equal_range(surface_interval); 260 auto range = surface_cache.equal_range(surface_interval);
236 for (auto it = range.first; it != range.second; ++it) { 261 for (auto it = range.first; it != range.second; ++it) {
237 for (auto it2 = it->second.begin(); it2 != it->second.end(); ++it2) { 262 for (auto it2 = it->second.begin(); it2 != it->second.end(); ++it2) {
238 CachedSurface* surface = it2->get(); 263 CachedSurface* surface = it2->get();
239 264
240 // Check if the request matches the surface exactly 265 // Check if the request matches the surface exactly
241 if (params.addr == surface->addr && 266 if (params.addr == surface->addr && params.width == surface->width &&
242 params.width == surface->width && params.height == surface->height && 267 params.height == surface->height && params.pixel_format == surface->pixel_format) {
243 params.pixel_format == surface->pixel_format)
244 {
245 // Make sure optional param-matching criteria are fulfilled 268 // Make sure optional param-matching criteria are fulfilled
246 bool tiling_match = (params.is_tiled == surface->is_tiled); 269 bool tiling_match = (params.is_tiled == surface->is_tiled);
247 bool res_scale_match = (params.res_scale_width == surface->res_scale_width && params.res_scale_height == surface->res_scale_height); 270 bool res_scale_match = (params.res_scale_width == surface->res_scale_width &&
271 params.res_scale_height == surface->res_scale_height);
248 if (!match_res_scale || res_scale_match) { 272 if (!match_res_scale || res_scale_match) {
249 // Prioritize same-tiling and highest resolution surfaces 273 // Prioritize same-tiling and highest resolution surfaces
250 float match_goodness = (float)tiling_match + surface->res_scale_width * surface->res_scale_height; 274 float match_goodness =
275 (float)tiling_match + surface->res_scale_width * surface->res_scale_height;
251 if (match_goodness > exact_surface_goodness || surface->dirty) { 276 if (match_goodness > exact_surface_goodness || surface->dirty) {
252 exact_surface_goodness = match_goodness; 277 exact_surface_goodness = match_goodness;
253 best_exact_surface = surface; 278 best_exact_surface = surface;
@@ -288,9 +313,11 @@ CachedSurface* RasterizerCacheOpenGL::GetSurface(const CachedSurface& params, bo
288 313
289 if (!load_if_create) { 314 if (!load_if_create) {
290 // Don't load any data; just allocate the surface's texture 315 // Don't load any data; just allocate the surface's texture
291 AllocateSurfaceTexture(new_surface->texture.handle, new_surface->pixel_format, new_surface->GetScaledWidth(), new_surface->GetScaledHeight()); 316 AllocateSurfaceTexture(new_surface->texture.handle, new_surface->pixel_format,
317 new_surface->GetScaledWidth(), new_surface->GetScaledHeight());
292 } else { 318 } else {
293 // TODO: Consider attempting subrect match in existing surfaces and direct blit here instead of memory upload below if that's a common scenario in some game 319 // TODO: Consider attempting subrect match in existing surfaces and direct blit here instead
320 // of memory upload below if that's a common scenario in some game
294 321
295 Memory::RasterizerFlushRegion(params.addr, params_size); 322 Memory::RasterizerFlushRegion(params.addr, params_size);
296 323
@@ -318,7 +345,7 @@ CachedSurface* RasterizerCacheOpenGL::GetSurface(const CachedSurface& params, bo
318 tuple = fb_format_tuples[(unsigned int)params.pixel_format]; 345 tuple = fb_format_tuples[(unsigned int)params.pixel_format];
319 } else { 346 } else {
320 // Texture 347 // Texture
321 tuple = { GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE }; 348 tuple = {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE};
322 } 349 }
323 350
324 std::vector<Math::Vec4<u8>> tex_buffer(params.width * params.height); 351 std::vector<Math::Vec4<u8>> tex_buffer(params.width * params.height);
@@ -326,19 +353,23 @@ CachedSurface* RasterizerCacheOpenGL::GetSurface(const CachedSurface& params, bo
326 Pica::DebugUtils::TextureInfo tex_info; 353 Pica::DebugUtils::TextureInfo tex_info;
327 tex_info.width = params.width; 354 tex_info.width = params.width;
328 tex_info.height = params.height; 355 tex_info.height = params.height;
329 tex_info.stride = params.width * CachedSurface::GetFormatBpp(params.pixel_format) / 8; 356 tex_info.stride =
357 params.width * CachedSurface::GetFormatBpp(params.pixel_format) / 8;
330 tex_info.format = (Pica::Regs::TextureFormat)params.pixel_format; 358 tex_info.format = (Pica::Regs::TextureFormat)params.pixel_format;
331 tex_info.physical_address = params.addr; 359 tex_info.physical_address = params.addr;
332 360
333 for (unsigned y = 0; y < params.height; ++y) { 361 for (unsigned y = 0; y < params.height; ++y) {
334 for (unsigned x = 0; x < params.width; ++x) { 362 for (unsigned x = 0; x < params.width; ++x) {
335 tex_buffer[x + params.width * y] = Pica::DebugUtils::LookupTexture(texture_src_data, x, params.height - 1 - y, tex_info); 363 tex_buffer[x + params.width * y] = Pica::DebugUtils::LookupTexture(
364 texture_src_data, x, params.height - 1 - y, tex_info);
336 } 365 }
337 } 366 }
338 367
339 glTexImage2D(GL_TEXTURE_2D, 0, tuple.internal_format, params.width, params.height, 0, GL_RGBA, GL_UNSIGNED_BYTE, tex_buffer.data()); 368 glTexImage2D(GL_TEXTURE_2D, 0, tuple.internal_format, params.width, params.height,
369 0, GL_RGBA, GL_UNSIGNED_BYTE, tex_buffer.data());
340 } else { 370 } else {
341 // Depth/Stencil formats need special treatment since they aren't sampleable using LookupTexture and can't use RGBA format 371 // Depth/Stencil formats need special treatment since they aren't sampleable using
372 // LookupTexture and can't use RGBA format
342 size_t tuple_idx = (size_t)params.pixel_format - 14; 373 size_t tuple_idx = (size_t)params.pixel_format - 14;
343 ASSERT(tuple_idx < depth_format_tuples.size()); 374 ASSERT(tuple_idx < depth_format_tuples.size());
344 const FormatTuple& tuple = depth_format_tuples[tuple_idx]; 375 const FormatTuple& tuple = depth_format_tuples[tuple_idx];
@@ -350,14 +381,18 @@ CachedSurface* RasterizerCacheOpenGL::GetSurface(const CachedSurface& params, bo
350 381
351 u32 gl_bytes_per_pixel = use_4bpp ? 4 : bytes_per_pixel; 382 u32 gl_bytes_per_pixel = use_4bpp ? 4 : bytes_per_pixel;
352 383
353 std::vector<u8> temp_fb_depth_buffer(params.width * params.height * gl_bytes_per_pixel); 384 std::vector<u8> temp_fb_depth_buffer(params.width * params.height *
385 gl_bytes_per_pixel);
354 386
355 u8* temp_fb_depth_buffer_ptr = use_4bpp ? temp_fb_depth_buffer.data() + 1 : temp_fb_depth_buffer.data(); 387 u8* temp_fb_depth_buffer_ptr =
388 use_4bpp ? temp_fb_depth_buffer.data() + 1 : temp_fb_depth_buffer.data();
356 389
357 MortonCopyPixels(params.pixel_format, params.width, params.height, bytes_per_pixel, gl_bytes_per_pixel, texture_src_data, temp_fb_depth_buffer_ptr, true); 390 MortonCopyPixels(params.pixel_format, params.width, params.height, bytes_per_pixel,
391 gl_bytes_per_pixel, texture_src_data, temp_fb_depth_buffer_ptr,
392 true);
358 393
359 glTexImage2D(GL_TEXTURE_2D, 0, tuple.internal_format, params.width, params.height, 0, 394 glTexImage2D(GL_TEXTURE_2D, 0, tuple.internal_format, params.width, params.height,
360 tuple.format, tuple.type, temp_fb_depth_buffer.data()); 395 0, tuple.format, tuple.type, temp_fb_depth_buffer.data());
361 } 396 }
362 } 397 }
363 glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); 398 glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
@@ -367,10 +402,13 @@ CachedSurface* RasterizerCacheOpenGL::GetSurface(const CachedSurface& params, bo
367 OGLTexture scaled_texture; 402 OGLTexture scaled_texture;
368 scaled_texture.Create(); 403 scaled_texture.Create();
369 404
370 AllocateSurfaceTexture(scaled_texture.handle, new_surface->pixel_format, new_surface->GetScaledWidth(), new_surface->GetScaledHeight()); 405 AllocateSurfaceTexture(scaled_texture.handle, new_surface->pixel_format,
371 BlitTextures(new_surface->texture.handle, scaled_texture.handle, CachedSurface::GetFormatType(new_surface->pixel_format), 406 new_surface->GetScaledWidth(), new_surface->GetScaledHeight());
372 MathUtil::Rectangle<int>(0, 0, new_surface->width, new_surface->height), 407 BlitTextures(new_surface->texture.handle, scaled_texture.handle,
373 MathUtil::Rectangle<int>(0, 0, new_surface->GetScaledWidth(), new_surface->GetScaledHeight())); 408 CachedSurface::GetFormatType(new_surface->pixel_format),
409 MathUtil::Rectangle<int>(0, 0, new_surface->width, new_surface->height),
410 MathUtil::Rectangle<int>(0, 0, new_surface->GetScaledWidth(),
411 new_surface->GetScaledHeight()));
374 412
375 new_surface->texture.Release(); 413 new_surface->texture.Release();
376 new_surface->texture.handle = scaled_texture.handle; 414 new_surface->texture.handle = scaled_texture.handle;
@@ -389,11 +427,15 @@ CachedSurface* RasterizerCacheOpenGL::GetSurface(const CachedSurface& params, bo
389 } 427 }
390 428
391 Memory::RasterizerMarkRegionCached(new_surface->addr, new_surface->size, 1); 429 Memory::RasterizerMarkRegionCached(new_surface->addr, new_surface->size, 1);
392 surface_cache.add(std::make_pair(boost::icl::interval<PAddr>::right_open(new_surface->addr, new_surface->addr + new_surface->size), std::set<std::shared_ptr<CachedSurface>>({ new_surface }))); 430 surface_cache.add(std::make_pair(boost::icl::interval<PAddr>::right_open(
431 new_surface->addr, new_surface->addr + new_surface->size),
432 std::set<std::shared_ptr<CachedSurface>>({new_surface})));
393 return new_surface.get(); 433 return new_surface.get();
394} 434}
395 435
396CachedSurface* RasterizerCacheOpenGL::GetSurfaceRect(const CachedSurface& params, bool match_res_scale, bool load_if_create, MathUtil::Rectangle<int>& out_rect) { 436CachedSurface* RasterizerCacheOpenGL::GetSurfaceRect(const CachedSurface& params,
437 bool match_res_scale, bool load_if_create,
438 MathUtil::Rectangle<int>& out_rect) {
397 if (params.addr == 0) { 439 if (params.addr == 0) {
398 return nullptr; 440 return nullptr;
399 } 441 }
@@ -405,7 +447,8 @@ CachedSurface* RasterizerCacheOpenGL::GetSurfaceRect(const CachedSurface& params
405 CachedSurface* best_subrect_surface = nullptr; 447 CachedSurface* best_subrect_surface = nullptr;
406 float subrect_surface_goodness = -1.f; 448 float subrect_surface_goodness = -1.f;
407 449
408 auto surface_interval = boost::icl::interval<PAddr>::right_open(params.addr, params.addr + params_size); 450 auto surface_interval =
451 boost::icl::interval<PAddr>::right_open(params.addr, params.addr + params_size);
409 auto cache_upper_bound = surface_cache.upper_bound(surface_interval); 452 auto cache_upper_bound = surface_cache.upper_bound(surface_interval);
410 for (auto it = surface_cache.lower_bound(surface_interval); it != cache_upper_bound; ++it) { 453 for (auto it = surface_cache.lower_bound(surface_interval); it != cache_upper_bound; ++it) {
411 for (auto it2 = it->second.begin(); it2 != it->second.end(); ++it2) { 454 for (auto it2 = it->second.begin(); it2 != it->second.end(); ++it2) {
@@ -414,14 +457,15 @@ CachedSurface* RasterizerCacheOpenGL::GetSurfaceRect(const CachedSurface& params
414 // Check if the request is contained in the surface 457 // Check if the request is contained in the surface
415 if (params.addr >= surface->addr && 458 if (params.addr >= surface->addr &&
416 params.addr + params_size - 1 <= surface->addr + surface->size - 1 && 459 params.addr + params_size - 1 <= surface->addr + surface->size - 1 &&
417 params.pixel_format == surface->pixel_format) 460 params.pixel_format == surface->pixel_format) {
418 {
419 // Make sure optional param-matching criteria are fulfilled 461 // Make sure optional param-matching criteria are fulfilled
420 bool tiling_match = (params.is_tiled == surface->is_tiled); 462 bool tiling_match = (params.is_tiled == surface->is_tiled);
421 bool res_scale_match = (params.res_scale_width == surface->res_scale_width && params.res_scale_height == surface->res_scale_height); 463 bool res_scale_match = (params.res_scale_width == surface->res_scale_width &&
464 params.res_scale_height == surface->res_scale_height);
422 if (!match_res_scale || res_scale_match) { 465 if (!match_res_scale || res_scale_match) {
423 // Prioritize same-tiling and highest resolution surfaces 466 // Prioritize same-tiling and highest resolution surfaces
424 float match_goodness = (float)tiling_match + surface->res_scale_width * surface->res_scale_height; 467 float match_goodness =
468 (float)tiling_match + surface->res_scale_width * surface->res_scale_height;
425 if (match_goodness > subrect_surface_goodness || surface->dirty) { 469 if (match_goodness > subrect_surface_goodness || surface->dirty) {
426 subrect_surface_goodness = match_goodness; 470 subrect_surface_goodness = match_goodness;
427 best_subrect_surface = surface; 471 best_subrect_surface = surface;
@@ -433,7 +477,8 @@ CachedSurface* RasterizerCacheOpenGL::GetSurfaceRect(const CachedSurface& params
433 477
434 // Return the best subrect surface if found 478 // Return the best subrect surface if found
435 if (best_subrect_surface != nullptr) { 479 if (best_subrect_surface != nullptr) {
436 unsigned int bytes_per_pixel = (CachedSurface::GetFormatBpp(best_subrect_surface->pixel_format) / 8); 480 unsigned int bytes_per_pixel =
481 (CachedSurface::GetFormatBpp(best_subrect_surface->pixel_format) / 8);
437 482
438 int x0, y0; 483 int x0, y0;
439 484
@@ -452,7 +497,9 @@ CachedSurface* RasterizerCacheOpenGL::GetSurfaceRect(const CachedSurface& params
452 y0 = begin_tile_index / tiles_per_row * 8; 497 y0 = begin_tile_index / tiles_per_row * 8;
453 498
454 // Tiled surfaces are flipped vertically in the rasterizer vs. 3DS memory. 499 // Tiled surfaces are flipped vertically in the rasterizer vs. 3DS memory.
455 out_rect = MathUtil::Rectangle<int>(x0, best_subrect_surface->height - y0, x0 + params.width, best_subrect_surface->height - (y0 + params.height)); 500 out_rect =
501 MathUtil::Rectangle<int>(x0, best_subrect_surface->height - y0, x0 + params.width,
502 best_subrect_surface->height - (y0 + params.height));
456 } 503 }
457 504
458 out_rect.left = (int)(out_rect.left * best_subrect_surface->res_scale_width); 505 out_rect.left = (int)(out_rect.left * best_subrect_surface->res_scale_width);
@@ -465,16 +512,20 @@ CachedSurface* RasterizerCacheOpenGL::GetSurfaceRect(const CachedSurface& params
465 512
466 // No subrect found - create and return a new surface 513 // No subrect found - create and return a new surface
467 if (!params.is_tiled) { 514 if (!params.is_tiled) {
468 out_rect = MathUtil::Rectangle<int>(0, 0, (int)(params.width * params.res_scale_width), (int)(params.height * params.res_scale_height)); 515 out_rect = MathUtil::Rectangle<int>(0, 0, (int)(params.width * params.res_scale_width),
516 (int)(params.height * params.res_scale_height));
469 } else { 517 } else {
470 out_rect = MathUtil::Rectangle<int>(0, (int)(params.height * params.res_scale_height), (int)(params.width * params.res_scale_width), 0); 518 out_rect = MathUtil::Rectangle<int>(0, (int)(params.height * params.res_scale_height),
519 (int)(params.width * params.res_scale_width), 0);
471 } 520 }
472 521
473 return GetSurface(params, match_res_scale, load_if_create); 522 return GetSurface(params, match_res_scale, load_if_create);
474} 523}
475 524
476CachedSurface* RasterizerCacheOpenGL::GetTextureSurface(const Pica::Regs::FullTextureConfig& config) { 525CachedSurface*
477 Pica::DebugUtils::TextureInfo info = Pica::DebugUtils::TextureInfo::FromPicaRegister(config.config, config.format); 526RasterizerCacheOpenGL::GetTextureSurface(const Pica::Regs::FullTextureConfig& config) {
527 Pica::DebugUtils::TextureInfo info =
528 Pica::DebugUtils::TextureInfo::FromPicaRegister(config.config, config.format);
478 529
479 CachedSurface params; 530 CachedSurface params;
480 params.addr = info.physical_address; 531 params.addr = info.physical_address;
@@ -485,20 +536,28 @@ CachedSurface* RasterizerCacheOpenGL::GetTextureSurface(const Pica::Regs::FullTe
485 return GetSurface(params, false, true); 536 return GetSurface(params, false, true);
486} 537}
487 538
488std::tuple<CachedSurface*, CachedSurface*, MathUtil::Rectangle<int>> RasterizerCacheOpenGL::GetFramebufferSurfaces(const Pica::Regs::FramebufferConfig& config) { 539std::tuple<CachedSurface*, CachedSurface*, MathUtil::Rectangle<int>>
540RasterizerCacheOpenGL::GetFramebufferSurfaces(const Pica::Regs::FramebufferConfig& config) {
489 const auto& regs = Pica::g_state.regs; 541 const auto& regs = Pica::g_state.regs;
490 542
491 // Make sur that framebuffers don't overlap if both color and depth are being used 543 // Make sur that framebuffers don't overlap if both color and depth are being used
492 u32 fb_area = config.GetWidth() * config.GetHeight(); 544 u32 fb_area = config.GetWidth() * config.GetHeight();
493 bool framebuffers_overlap = config.GetColorBufferPhysicalAddress() != 0 && 545 bool framebuffers_overlap =
494 config.GetDepthBufferPhysicalAddress() != 0 && 546 config.GetColorBufferPhysicalAddress() != 0 &&
495 MathUtil::IntervalsIntersect(config.GetColorBufferPhysicalAddress(), fb_area * GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(config.color_format.Value())), 547 config.GetDepthBufferPhysicalAddress() != 0 &&
496 config.GetDepthBufferPhysicalAddress(), fb_area * Pica::Regs::BytesPerDepthPixel(config.depth_format)); 548 MathUtil::IntervalsIntersect(
549 config.GetColorBufferPhysicalAddress(),
550 fb_area * GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(config.color_format.Value())),
551 config.GetDepthBufferPhysicalAddress(),
552 fb_area * Pica::Regs::BytesPerDepthPixel(config.depth_format));
497 bool using_color_fb = config.GetColorBufferPhysicalAddress() != 0; 553 bool using_color_fb = config.GetColorBufferPhysicalAddress() != 0;
498 bool using_depth_fb = config.GetDepthBufferPhysicalAddress() != 0 && (regs.output_merger.depth_test_enable || regs.output_merger.depth_write_enable || !framebuffers_overlap); 554 bool using_depth_fb = config.GetDepthBufferPhysicalAddress() != 0 &&
555 (regs.output_merger.depth_test_enable ||
556 regs.output_merger.depth_write_enable || !framebuffers_overlap);
499 557
500 if (framebuffers_overlap && using_color_fb && using_depth_fb) { 558 if (framebuffers_overlap && using_color_fb && using_depth_fb) {
501 LOG_CRITICAL(Render_OpenGL, "Color and depth framebuffer memory regions overlap; overlapping framebuffers not supported!"); 559 LOG_CRITICAL(Render_OpenGL, "Color and depth framebuffer memory regions overlap; "
560 "overlapping framebuffers not supported!");
502 using_depth_fb = false; 561 using_depth_fb = false;
503 } 562 }
504 563
@@ -512,8 +571,10 @@ std::tuple<CachedSurface*, CachedSurface*, MathUtil::Rectangle<int>> RasterizerC
512 auto layout = VideoCore::g_emu_window->GetFramebufferLayout(); 571 auto layout = VideoCore::g_emu_window->GetFramebufferLayout();
513 572
514 // Assume same scaling factor for top and bottom screens 573 // Assume same scaling factor for top and bottom screens
515 color_params.res_scale_width = depth_params.res_scale_width = (float)layout.top_screen.GetWidth() / VideoCore::kScreenTopWidth; 574 color_params.res_scale_width = depth_params.res_scale_width =
516 color_params.res_scale_height = depth_params.res_scale_height = (float)layout.top_screen.GetHeight() / VideoCore::kScreenTopHeight; 575 (float)layout.top_screen.GetWidth() / VideoCore::kScreenTopWidth;
576 color_params.res_scale_height = depth_params.res_scale_height =
577 (float)layout.top_screen.GetHeight() / VideoCore::kScreenTopHeight;
517 } 578 }
518 579
519 color_params.addr = config.GetColorBufferPhysicalAddress(); 580 color_params.addr = config.GetColorBufferPhysicalAddress();
@@ -523,22 +584,28 @@ std::tuple<CachedSurface*, CachedSurface*, MathUtil::Rectangle<int>> RasterizerC
523 depth_params.pixel_format = CachedSurface::PixelFormatFromDepthFormat(config.depth_format); 584 depth_params.pixel_format = CachedSurface::PixelFormatFromDepthFormat(config.depth_format);
524 585
525 MathUtil::Rectangle<int> color_rect; 586 MathUtil::Rectangle<int> color_rect;
526 CachedSurface* color_surface = using_color_fb ? GetSurfaceRect(color_params, true, true, color_rect) : nullptr; 587 CachedSurface* color_surface =
588 using_color_fb ? GetSurfaceRect(color_params, true, true, color_rect) : nullptr;
527 589
528 MathUtil::Rectangle<int> depth_rect; 590 MathUtil::Rectangle<int> depth_rect;
529 CachedSurface* depth_surface = using_depth_fb ? GetSurfaceRect(depth_params, true, true, depth_rect) : nullptr; 591 CachedSurface* depth_surface =
592 using_depth_fb ? GetSurfaceRect(depth_params, true, true, depth_rect) : nullptr;
530 593
531 // Sanity check to make sure found surfaces aren't the same 594 // Sanity check to make sure found surfaces aren't the same
532 if (using_depth_fb && using_color_fb && color_surface == depth_surface) { 595 if (using_depth_fb && using_color_fb && color_surface == depth_surface) {
533 LOG_CRITICAL(Render_OpenGL, "Color and depth framebuffer surfaces overlap; overlapping surfaces not supported!"); 596 LOG_CRITICAL(
597 Render_OpenGL,
598 "Color and depth framebuffer surfaces overlap; overlapping surfaces not supported!");
534 using_depth_fb = false; 599 using_depth_fb = false;
535 depth_surface = nullptr; 600 depth_surface = nullptr;
536 } 601 }
537 602
538 MathUtil::Rectangle<int> rect; 603 MathUtil::Rectangle<int> rect;
539 604
540 if (color_surface != nullptr && depth_surface != nullptr && (depth_rect.left != color_rect.left || depth_rect.top != color_rect.top)) { 605 if (color_surface != nullptr && depth_surface != nullptr &&
541 // Can't specify separate color and depth viewport offsets in OpenGL, so re-zero both if they don't match 606 (depth_rect.left != color_rect.left || depth_rect.top != color_rect.top)) {
607 // Can't specify separate color and depth viewport offsets in OpenGL, so re-zero both if
608 // they don't match
542 if (color_rect.left != 0 || color_rect.top != 0) { 609 if (color_rect.left != 0 || color_rect.top != 0) {
543 color_surface = GetSurface(color_params, true, true); 610 color_surface = GetSurface(color_params, true, true);
544 } 611 }
@@ -548,9 +615,13 @@ std::tuple<CachedSurface*, CachedSurface*, MathUtil::Rectangle<int>> RasterizerC
548 } 615 }
549 616
550 if (!color_surface->is_tiled) { 617 if (!color_surface->is_tiled) {
551 rect = MathUtil::Rectangle<int>(0, 0, (int)(color_params.width * color_params.res_scale_width), (int)(color_params.height * color_params.res_scale_height)); 618 rect = MathUtil::Rectangle<int>(
619 0, 0, (int)(color_params.width * color_params.res_scale_width),
620 (int)(color_params.height * color_params.res_scale_height));
552 } else { 621 } else {
553 rect = MathUtil::Rectangle<int>(0, (int)(color_params.height * color_params.res_scale_height), (int)(color_params.width * color_params.res_scale_width), 0); 622 rect = MathUtil::Rectangle<int>(
623 0, (int)(color_params.height * color_params.res_scale_height),
624 (int)(color_params.width * color_params.res_scale_width), 0);
554 } 625 }
555 } else if (color_surface != nullptr) { 626 } else if (color_surface != nullptr) {
556 rect = color_rect; 627 rect = color_rect;
@@ -564,7 +635,8 @@ std::tuple<CachedSurface*, CachedSurface*, MathUtil::Rectangle<int>> RasterizerC
564} 635}
565 636
566CachedSurface* RasterizerCacheOpenGL::TryGetFillSurface(const GPU::Regs::MemoryFillConfig& config) { 637CachedSurface* RasterizerCacheOpenGL::TryGetFillSurface(const GPU::Regs::MemoryFillConfig& config) {
567 auto surface_interval = boost::icl::interval<PAddr>::right_open(config.GetStartAddress(), config.GetEndAddress()); 638 auto surface_interval =
639 boost::icl::interval<PAddr>::right_open(config.GetStartAddress(), config.GetEndAddress());
568 auto range = surface_cache.equal_range(surface_interval); 640 auto range = surface_cache.equal_range(surface_interval);
569 for (auto it = range.first; it != range.second; ++it) { 641 for (auto it = range.first; it != range.second; ++it) {
570 for (auto it2 = it->second.begin(); it2 != it->second.end(); ++it2) { 642 for (auto it2 = it->second.begin(); it2 != it->second.end(); ++it2) {
@@ -581,8 +653,9 @@ CachedSurface* RasterizerCacheOpenGL::TryGetFillSurface(const GPU::Regs::MemoryF
581 653
582 if (surface->addr == config.GetStartAddress() && 654 if (surface->addr == config.GetStartAddress() &&
583 CachedSurface::GetFormatBpp(surface->pixel_format) == bits_per_value && 655 CachedSurface::GetFormatBpp(surface->pixel_format) == bits_per_value &&
584 (surface->width * surface->height * CachedSurface::GetFormatBpp(surface->pixel_format) / 8) == (config.GetEndAddress() - config.GetStartAddress())) 656 (surface->width * surface->height *
585 { 657 CachedSurface::GetFormatBpp(surface->pixel_format) / 8) ==
658 (config.GetEndAddress() - config.GetStartAddress())) {
586 return surface; 659 return surface;
587 } 660 }
588 } 661 }
@@ -617,8 +690,11 @@ void RasterizerCacheOpenGL::FlushSurface(CachedSurface* surface) {
617 if (surface->res_scale_width != 1.f || surface->res_scale_height != 1.f) { 690 if (surface->res_scale_width != 1.f || surface->res_scale_height != 1.f) {
618 unscaled_tex.Create(); 691 unscaled_tex.Create();
619 692
620 AllocateSurfaceTexture(unscaled_tex.handle, surface->pixel_format, surface->width, surface->height); 693 AllocateSurfaceTexture(unscaled_tex.handle, surface->pixel_format, surface->width,
621 BlitTextures(surface->texture.handle, unscaled_tex.handle, CachedSurface::GetFormatType(surface->pixel_format), 694 surface->height);
695 BlitTextures(
696 surface->texture.handle, unscaled_tex.handle,
697 CachedSurface::GetFormatType(surface->pixel_format),
622 MathUtil::Rectangle<int>(0, 0, surface->GetScaledWidth(), surface->GetScaledHeight()), 698 MathUtil::Rectangle<int>(0, 0, surface->GetScaledWidth(), surface->GetScaledHeight()),
623 MathUtil::Rectangle<int>(0, 0, surface->width, surface->height)); 699 MathUtil::Rectangle<int>(0, 0, surface->width, surface->height));
624 700
@@ -648,10 +724,14 @@ void RasterizerCacheOpenGL::FlushSurface(CachedSurface* surface) {
648 724
649 glGetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, temp_gl_buffer.data()); 725 glGetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, temp_gl_buffer.data());
650 726
651 // Directly copy pixels. Internal OpenGL color formats are consistent so no conversion is necessary. 727 // Directly copy pixels. Internal OpenGL color formats are consistent so no conversion
652 MortonCopyPixels(surface->pixel_format, surface->width, surface->height, bytes_per_pixel, bytes_per_pixel, dst_buffer, temp_gl_buffer.data(), false); 728 // is necessary.
729 MortonCopyPixels(surface->pixel_format, surface->width, surface->height,
730 bytes_per_pixel, bytes_per_pixel, dst_buffer, temp_gl_buffer.data(),
731 false);
653 } else { 732 } else {
654 // Depth/Stencil formats need special treatment since they aren't sampleable using LookupTexture and can't use RGBA format 733 // Depth/Stencil formats need special treatment since they aren't sampleable using
734 // LookupTexture and can't use RGBA format
655 size_t tuple_idx = (size_t)surface->pixel_format - 14; 735 size_t tuple_idx = (size_t)surface->pixel_format - 14;
656 ASSERT(tuple_idx < depth_format_tuples.size()); 736 ASSERT(tuple_idx < depth_format_tuples.size());
657 const FormatTuple& tuple = depth_format_tuples[tuple_idx]; 737 const FormatTuple& tuple = depth_format_tuples[tuple_idx];
@@ -669,7 +749,9 @@ void RasterizerCacheOpenGL::FlushSurface(CachedSurface* surface) {
669 749
670 u8* temp_gl_buffer_ptr = use_4bpp ? temp_gl_buffer.data() + 1 : temp_gl_buffer.data(); 750 u8* temp_gl_buffer_ptr = use_4bpp ? temp_gl_buffer.data() + 1 : temp_gl_buffer.data();
671 751
672 MortonCopyPixels(surface->pixel_format, surface->width, surface->height, bytes_per_pixel, gl_bytes_per_pixel, dst_buffer, temp_gl_buffer_ptr, false); 752 MortonCopyPixels(surface->pixel_format, surface->width, surface->height,
753 bytes_per_pixel, gl_bytes_per_pixel, dst_buffer, temp_gl_buffer_ptr,
754 false);
673 } 755 }
674 } 756 }
675 glPixelStorei(GL_PACK_ROW_LENGTH, 0); 757 glPixelStorei(GL_PACK_ROW_LENGTH, 0);
@@ -680,7 +762,8 @@ void RasterizerCacheOpenGL::FlushSurface(CachedSurface* surface) {
680 cur_state.Apply(); 762 cur_state.Apply();
681} 763}
682 764
683void RasterizerCacheOpenGL::FlushRegion(PAddr addr, u32 size, const CachedSurface* skip_surface, bool invalidate) { 765void RasterizerCacheOpenGL::FlushRegion(PAddr addr, u32 size, const CachedSurface* skip_surface,
766 bool invalidate) {
684 if (size == 0) { 767 if (size == 0) {
685 return; 768 return;
686 } 769 }
@@ -691,8 +774,11 @@ void RasterizerCacheOpenGL::FlushRegion(PAddr addr, u32 size, const CachedSurfac
691 auto surface_interval = boost::icl::interval<PAddr>::right_open(addr, addr + size); 774 auto surface_interval = boost::icl::interval<PAddr>::right_open(addr, addr + size);
692 auto cache_upper_bound = surface_cache.upper_bound(surface_interval); 775 auto cache_upper_bound = surface_cache.upper_bound(surface_interval);
693 for (auto it = surface_cache.lower_bound(surface_interval); it != cache_upper_bound; ++it) { 776 for (auto it = surface_cache.lower_bound(surface_interval); it != cache_upper_bound; ++it) {
694 std::copy_if(it->second.begin(), it->second.end(), std::inserter(touching_surfaces, touching_surfaces.end()), 777 std::copy_if(it->second.begin(), it->second.end(),
695 [skip_surface](std::shared_ptr<CachedSurface> surface) { return (surface.get() != skip_surface); }); 778 std::inserter(touching_surfaces, touching_surfaces.end()),
779 [skip_surface](std::shared_ptr<CachedSurface> surface) {
780 return (surface.get() != skip_surface);
781 });
696 } 782 }
697 783
698 // Flush and invalidate surfaces 784 // Flush and invalidate surfaces
@@ -700,7 +786,10 @@ void RasterizerCacheOpenGL::FlushRegion(PAddr addr, u32 size, const CachedSurfac
700 FlushSurface(surface.get()); 786 FlushSurface(surface.get());
701 if (invalidate) { 787 if (invalidate) {
702 Memory::RasterizerMarkRegionCached(surface->addr, surface->size, -1); 788 Memory::RasterizerMarkRegionCached(surface->addr, surface->size, -1);
703 surface_cache.subtract(std::make_pair(boost::icl::interval<PAddr>::right_open(surface->addr, surface->addr + surface->size), std::set<std::shared_ptr<CachedSurface>>({ surface }))); 789 surface_cache.subtract(
790 std::make_pair(boost::icl::interval<PAddr>::right_open(
791 surface->addr, surface->addr + surface->size),
792 std::set<std::shared_ptr<CachedSurface>>({surface})));
704 } 793 }
705 } 794 }
706} 795}
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
index 225596415..db5b649da 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -22,7 +22,8 @@
22#include "video_core/renderer_opengl/gl_resource_manager.h" 22#include "video_core/renderer_opengl/gl_resource_manager.h"
23 23
24namespace MathUtil { 24namespace MathUtil {
25template <class T> struct Rectangle; 25template <class T>
26struct Rectangle;
26} 27}
27 28
28struct CachedSurface; 29struct CachedSurface;
@@ -32,38 +33,38 @@ using SurfaceCache = boost::icl::interval_map<PAddr, std::set<std::shared_ptr<Ca
32struct CachedSurface { 33struct CachedSurface {
33 enum class PixelFormat { 34 enum class PixelFormat {
34 // First 5 formats are shared between textures and color buffers 35 // First 5 formats are shared between textures and color buffers
35 RGBA8 = 0, 36 RGBA8 = 0,
36 RGB8 = 1, 37 RGB8 = 1,
37 RGB5A1 = 2, 38 RGB5A1 = 2,
38 RGB565 = 3, 39 RGB565 = 3,
39 RGBA4 = 4, 40 RGBA4 = 4,
40 41
41 // Texture-only formats 42 // Texture-only formats
42 IA8 = 5, 43 IA8 = 5,
43 RG8 = 6, 44 RG8 = 6,
44 I8 = 7, 45 I8 = 7,
45 A8 = 8, 46 A8 = 8,
46 IA4 = 9, 47 IA4 = 9,
47 I4 = 10, 48 I4 = 10,
48 A4 = 11, 49 A4 = 11,
49 ETC1 = 12, 50 ETC1 = 12,
50 ETC1A4 = 13, 51 ETC1A4 = 13,
51 52
52 // Depth buffer-only formats 53 // Depth buffer-only formats
53 D16 = 14, 54 D16 = 14,
54 // gap 55 // gap
55 D24 = 16, 56 D24 = 16,
56 D24S8 = 17, 57 D24S8 = 17,
57 58
58 Invalid = 255, 59 Invalid = 255,
59 }; 60 };
60 61
61 enum class SurfaceType { 62 enum class SurfaceType {
62 Color = 0, 63 Color = 0,
63 Texture = 1, 64 Texture = 1,
64 Depth = 2, 65 Depth = 2,
65 DepthStencil = 3, 66 DepthStencil = 3,
66 Invalid = 4, 67 Invalid = 4,
67 }; 68 };
68 69
69 static unsigned int GetFormatBpp(CachedSurface::PixelFormat format) { 70 static unsigned int GetFormatBpp(CachedSurface::PixelFormat format) {
@@ -101,7 +102,8 @@ struct CachedSurface {
101 } 102 }
102 103
103 static PixelFormat PixelFormatFromDepthFormat(Pica::Regs::DepthFormat format) { 104 static PixelFormat PixelFormatFromDepthFormat(Pica::Regs::DepthFormat format) {
104 return ((unsigned int)format < 4) ? (PixelFormat)((unsigned int)format + 14) : PixelFormat::Invalid; 105 return ((unsigned int)format < 4) ? (PixelFormat)((unsigned int)format + 14)
106 : PixelFormat::Invalid;
105 } 107 }
106 108
107 static PixelFormat PixelFormatFromGPUPixelFormat(GPU::Regs::PixelFormat format) { 109 static PixelFormat PixelFormatFromGPUPixelFormat(GPU::Regs::PixelFormat format) {
@@ -120,7 +122,8 @@ struct CachedSurface {
120 SurfaceType a_type = GetFormatType(pixel_format_a); 122 SurfaceType a_type = GetFormatType(pixel_format_a);
121 SurfaceType b_type = GetFormatType(pixel_format_b); 123 SurfaceType b_type = GetFormatType(pixel_format_b);
122 124
123 if ((a_type == SurfaceType::Color || a_type == SurfaceType::Texture) && (b_type == SurfaceType::Color || b_type == SurfaceType::Texture)) { 125 if ((a_type == SurfaceType::Color || a_type == SurfaceType::Texture) &&
126 (b_type == SurfaceType::Color || b_type == SurfaceType::Texture)) {
124 return true; 127 return true;
125 } 128 }
126 129
@@ -187,22 +190,30 @@ public:
187 ~RasterizerCacheOpenGL(); 190 ~RasterizerCacheOpenGL();
188 191
189 /// Blits one texture to another 192 /// Blits one texture to another
190 bool BlitTextures(GLuint src_tex, GLuint dst_tex, CachedSurface::SurfaceType type, const MathUtil::Rectangle<int>& src_rect, const MathUtil::Rectangle<int>& dst_rect); 193 bool BlitTextures(GLuint src_tex, GLuint dst_tex, CachedSurface::SurfaceType type,
194 const MathUtil::Rectangle<int>& src_rect,
195 const MathUtil::Rectangle<int>& dst_rect);
191 196
192 /// Attempt to blit one surface's texture to another 197 /// Attempt to blit one surface's texture to another
193 bool TryBlitSurfaces(CachedSurface* src_surface, const MathUtil::Rectangle<int>& src_rect, CachedSurface* dst_surface, const MathUtil::Rectangle<int>& dst_rect); 198 bool TryBlitSurfaces(CachedSurface* src_surface, const MathUtil::Rectangle<int>& src_rect,
199 CachedSurface* dst_surface, const MathUtil::Rectangle<int>& dst_rect);
194 200
195 /// Loads a texture from 3DS memory to OpenGL and caches it (if not already cached) 201 /// Loads a texture from 3DS memory to OpenGL and caches it (if not already cached)
196 CachedSurface* GetSurface(const CachedSurface& params, bool match_res_scale, bool load_if_create); 202 CachedSurface* GetSurface(const CachedSurface& params, bool match_res_scale,
203 bool load_if_create);
197 204
198 /// Attempt to find a subrect (resolution scaled) of a surface, otherwise loads a texture from 3DS memory to OpenGL and caches it (if not already cached) 205 /// Attempt to find a subrect (resolution scaled) of a surface, otherwise loads a texture from
199 CachedSurface* GetSurfaceRect(const CachedSurface& params, bool match_res_scale, bool load_if_create, MathUtil::Rectangle<int>& out_rect); 206 /// 3DS memory to OpenGL and caches it (if not already cached)
207 CachedSurface* GetSurfaceRect(const CachedSurface& params, bool match_res_scale,
208 bool load_if_create, MathUtil::Rectangle<int>& out_rect);
200 209
201 /// Gets a surface based on the texture configuration 210 /// Gets a surface based on the texture configuration
202 CachedSurface* GetTextureSurface(const Pica::Regs::FullTextureConfig& config); 211 CachedSurface* GetTextureSurface(const Pica::Regs::FullTextureConfig& config);
203 212
204 /// Gets the color and depth surfaces and rect (resolution scaled) based on the framebuffer configuration 213 /// Gets the color and depth surfaces and rect (resolution scaled) based on the framebuffer
205 std::tuple<CachedSurface*, CachedSurface*, MathUtil::Rectangle<int>> GetFramebufferSurfaces(const Pica::Regs::FramebufferConfig& config); 214 /// configuration
215 std::tuple<CachedSurface*, CachedSurface*, MathUtil::Rectangle<int>>
216 GetFramebufferSurfaces(const Pica::Regs::FramebufferConfig& config);
206 217
207 /// Attempt to get a surface that exactly matches the fill region and format 218 /// Attempt to get a surface that exactly matches the fill region and format
208 CachedSurface* TryGetFillSurface(const GPU::Regs::MemoryFillConfig& config); 219 CachedSurface* TryGetFillSurface(const GPU::Regs::MemoryFillConfig& config);
@@ -210,7 +221,8 @@ public:
210 /// Write the surface back to memory 221 /// Write the surface back to memory
211 void FlushSurface(CachedSurface* surface); 222 void FlushSurface(CachedSurface* surface);
212 223
213 /// Write any cached resources overlapping the region back to memory (if dirty) and optionally invalidate them in the cache 224 /// Write any cached resources overlapping the region back to memory (if dirty) and optionally
225 /// invalidate them in the cache
214 void FlushRegion(PAddr addr, u32 size, const CachedSurface* skip_surface, bool invalidate); 226 void FlushRegion(PAddr addr, u32 size, const CachedSurface* skip_surface, bool invalidate);
215 227
216 /// Flush all cached resources tracked by this cache manager 228 /// Flush all cached resources tracked by this cache manager
diff --git a/src/video_core/renderer_opengl/gl_resource_manager.h b/src/video_core/renderer_opengl/gl_resource_manager.h
index eb128966c..2f40eb646 100644
--- a/src/video_core/renderer_opengl/gl_resource_manager.h
+++ b/src/video_core/renderer_opengl/gl_resource_manager.h
@@ -16,19 +16,28 @@
16class OGLTexture : private NonCopyable { 16class OGLTexture : private NonCopyable {
17public: 17public:
18 OGLTexture() = default; 18 OGLTexture() = default;
19 OGLTexture(OGLTexture&& o) { std::swap(handle, o.handle); } 19 OGLTexture(OGLTexture&& o) {
20 ~OGLTexture() { Release(); } 20 std::swap(handle, o.handle);
21 OGLTexture& operator=(OGLTexture&& o) { std::swap(handle, o.handle); return *this; } 21 }
22 ~OGLTexture() {
23 Release();
24 }
25 OGLTexture& operator=(OGLTexture&& o) {
26 std::swap(handle, o.handle);
27 return *this;
28 }
22 29
23 /// Creates a new internal OpenGL resource and stores the handle 30 /// Creates a new internal OpenGL resource and stores the handle
24 void Create() { 31 void Create() {
25 if (handle != 0) return; 32 if (handle != 0)
33 return;
26 glGenTextures(1, &handle); 34 glGenTextures(1, &handle);
27 } 35 }
28 36
29 /// Deletes the internal OpenGL resource 37 /// Deletes the internal OpenGL resource
30 void Release() { 38 void Release() {
31 if (handle == 0) return; 39 if (handle == 0)
40 return;
32 glDeleteTextures(1, &handle); 41 glDeleteTextures(1, &handle);
33 OpenGLState::ResetTexture(handle); 42 OpenGLState::ResetTexture(handle);
34 handle = 0; 43 handle = 0;
@@ -40,19 +49,28 @@ public:
40class OGLSampler : private NonCopyable { 49class OGLSampler : private NonCopyable {
41public: 50public:
42 OGLSampler() = default; 51 OGLSampler() = default;
43 OGLSampler(OGLSampler&& o) { std::swap(handle, o.handle); } 52 OGLSampler(OGLSampler&& o) {
44 ~OGLSampler() { Release(); } 53 std::swap(handle, o.handle);
45 OGLSampler& operator=(OGLSampler&& o) { std::swap(handle, o.handle); return *this; } 54 }
55 ~OGLSampler() {
56 Release();
57 }
58 OGLSampler& operator=(OGLSampler&& o) {
59 std::swap(handle, o.handle);
60 return *this;
61 }
46 62
47 /// Creates a new internal OpenGL resource and stores the handle 63 /// Creates a new internal OpenGL resource and stores the handle
48 void Create() { 64 void Create() {
49 if (handle != 0) return; 65 if (handle != 0)
66 return;
50 glGenSamplers(1, &handle); 67 glGenSamplers(1, &handle);
51 } 68 }
52 69
53 /// Deletes the internal OpenGL resource 70 /// Deletes the internal OpenGL resource
54 void Release() { 71 void Release() {
55 if (handle == 0) return; 72 if (handle == 0)
73 return;
56 glDeleteSamplers(1, &handle); 74 glDeleteSamplers(1, &handle);
57 OpenGLState::ResetSampler(handle); 75 OpenGLState::ResetSampler(handle);
58 handle = 0; 76 handle = 0;
@@ -64,19 +82,28 @@ public:
64class OGLShader : private NonCopyable { 82class OGLShader : private NonCopyable {
65public: 83public:
66 OGLShader() = default; 84 OGLShader() = default;
67 OGLShader(OGLShader&& o) { std::swap(handle, o.handle); } 85 OGLShader(OGLShader&& o) {
68 ~OGLShader() { Release(); } 86 std::swap(handle, o.handle);
69 OGLShader& operator=(OGLShader&& o) { std::swap(handle, o.handle); return *this; } 87 }
88 ~OGLShader() {
89 Release();
90 }
91 OGLShader& operator=(OGLShader&& o) {
92 std::swap(handle, o.handle);
93 return *this;
94 }
70 95
71 /// Creates a new internal OpenGL resource and stores the handle 96 /// Creates a new internal OpenGL resource and stores the handle
72 void Create(const char* vert_shader, const char* frag_shader) { 97 void Create(const char* vert_shader, const char* frag_shader) {
73 if (handle != 0) return; 98 if (handle != 0)
99 return;
74 handle = GLShader::LoadProgram(vert_shader, frag_shader); 100 handle = GLShader::LoadProgram(vert_shader, frag_shader);
75 } 101 }
76 102
77 /// Deletes the internal OpenGL resource 103 /// Deletes the internal OpenGL resource
78 void Release() { 104 void Release() {
79 if (handle == 0) return; 105 if (handle == 0)
106 return;
80 glDeleteProgram(handle); 107 glDeleteProgram(handle);
81 OpenGLState::ResetProgram(handle); 108 OpenGLState::ResetProgram(handle);
82 handle = 0; 109 handle = 0;
@@ -88,19 +115,28 @@ public:
88class OGLBuffer : private NonCopyable { 115class OGLBuffer : private NonCopyable {
89public: 116public:
90 OGLBuffer() = default; 117 OGLBuffer() = default;
91 OGLBuffer(OGLBuffer&& o) { std::swap(handle, o.handle); } 118 OGLBuffer(OGLBuffer&& o) {
92 ~OGLBuffer() { Release(); } 119 std::swap(handle, o.handle);
93 OGLBuffer& operator=(OGLBuffer&& o) { std::swap(handle, o.handle); return *this; } 120 }
121 ~OGLBuffer() {
122 Release();
123 }
124 OGLBuffer& operator=(OGLBuffer&& o) {
125 std::swap(handle, o.handle);
126 return *this;
127 }
94 128
95 /// Creates a new internal OpenGL resource and stores the handle 129 /// Creates a new internal OpenGL resource and stores the handle
96 void Create() { 130 void Create() {
97 if (handle != 0) return; 131 if (handle != 0)
132 return;
98 glGenBuffers(1, &handle); 133 glGenBuffers(1, &handle);
99 } 134 }
100 135
101 /// Deletes the internal OpenGL resource 136 /// Deletes the internal OpenGL resource
102 void Release() { 137 void Release() {
103 if (handle == 0) return; 138 if (handle == 0)
139 return;
104 glDeleteBuffers(1, &handle); 140 glDeleteBuffers(1, &handle);
105 OpenGLState::ResetBuffer(handle); 141 OpenGLState::ResetBuffer(handle);
106 handle = 0; 142 handle = 0;
@@ -112,19 +148,28 @@ public:
112class OGLVertexArray : private NonCopyable { 148class OGLVertexArray : private NonCopyable {
113public: 149public:
114 OGLVertexArray() = default; 150 OGLVertexArray() = default;
115 OGLVertexArray(OGLVertexArray&& o) { std::swap(handle, o.handle); } 151 OGLVertexArray(OGLVertexArray&& o) {
116 ~OGLVertexArray() { Release(); } 152 std::swap(handle, o.handle);
117 OGLVertexArray& operator=(OGLVertexArray&& o) { std::swap(handle, o.handle); return *this; } 153 }
154 ~OGLVertexArray() {
155 Release();
156 }
157 OGLVertexArray& operator=(OGLVertexArray&& o) {
158 std::swap(handle, o.handle);
159 return *this;
160 }
118 161
119 /// Creates a new internal OpenGL resource and stores the handle 162 /// Creates a new internal OpenGL resource and stores the handle
120 void Create() { 163 void Create() {
121 if (handle != 0) return; 164 if (handle != 0)
165 return;
122 glGenVertexArrays(1, &handle); 166 glGenVertexArrays(1, &handle);
123 } 167 }
124 168
125 /// Deletes the internal OpenGL resource 169 /// Deletes the internal OpenGL resource
126 void Release() { 170 void Release() {
127 if (handle == 0) return; 171 if (handle == 0)
172 return;
128 glDeleteVertexArrays(1, &handle); 173 glDeleteVertexArrays(1, &handle);
129 OpenGLState::ResetVertexArray(handle); 174 OpenGLState::ResetVertexArray(handle);
130 handle = 0; 175 handle = 0;
@@ -136,19 +181,28 @@ public:
136class OGLFramebuffer : private NonCopyable { 181class OGLFramebuffer : private NonCopyable {
137public: 182public:
138 OGLFramebuffer() = default; 183 OGLFramebuffer() = default;
139 OGLFramebuffer(OGLFramebuffer&& o) { std::swap(handle, o.handle); } 184 OGLFramebuffer(OGLFramebuffer&& o) {
140 ~OGLFramebuffer() { Release(); } 185 std::swap(handle, o.handle);
141 OGLFramebuffer& operator=(OGLFramebuffer&& o) { std::swap(handle, o.handle); return *this; } 186 }
187 ~OGLFramebuffer() {
188 Release();
189 }
190 OGLFramebuffer& operator=(OGLFramebuffer&& o) {
191 std::swap(handle, o.handle);
192 return *this;
193 }
142 194
143 /// Creates a new internal OpenGL resource and stores the handle 195 /// Creates a new internal OpenGL resource and stores the handle
144 void Create() { 196 void Create() {
145 if (handle != 0) return; 197 if (handle != 0)
198 return;
146 glGenFramebuffers(1, &handle); 199 glGenFramebuffers(1, &handle);
147 } 200 }
148 201
149 /// Deletes the internal OpenGL resource 202 /// Deletes the internal OpenGL resource
150 void Release() { 203 void Release() {
151 if (handle == 0) return; 204 if (handle == 0)
205 return;
152 glDeleteFramebuffers(1, &handle); 206 glDeleteFramebuffers(1, &handle);
153 OpenGLState::ResetFramebuffer(handle); 207 OpenGLState::ResetFramebuffer(handle);
154 handle = 0; 208 handle = 0;
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp
index 3de372f67..f86cffee5 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp
@@ -21,19 +21,18 @@ namespace GLShader {
21 21
22/// Detects if a TEV stage is configured to be skipped (to avoid generating unnecessary code) 22/// Detects if a TEV stage is configured to be skipped (to avoid generating unnecessary code)
23static bool IsPassThroughTevStage(const TevStageConfig& stage) { 23static bool IsPassThroughTevStage(const TevStageConfig& stage) {
24 return (stage.color_op == TevStageConfig::Operation::Replace && 24 return (stage.color_op == TevStageConfig::Operation::Replace &&
25 stage.alpha_op == TevStageConfig::Operation::Replace && 25 stage.alpha_op == TevStageConfig::Operation::Replace &&
26 stage.color_source1 == TevStageConfig::Source::Previous && 26 stage.color_source1 == TevStageConfig::Source::Previous &&
27 stage.alpha_source1 == TevStageConfig::Source::Previous && 27 stage.alpha_source1 == TevStageConfig::Source::Previous &&
28 stage.color_modifier1 == TevStageConfig::ColorModifier::SourceColor && 28 stage.color_modifier1 == TevStageConfig::ColorModifier::SourceColor &&
29 stage.alpha_modifier1 == TevStageConfig::AlphaModifier::SourceAlpha && 29 stage.alpha_modifier1 == TevStageConfig::AlphaModifier::SourceAlpha &&
30 stage.GetColorMultiplier() == 1 && 30 stage.GetColorMultiplier() == 1 && stage.GetAlphaMultiplier() == 1);
31 stage.GetAlphaMultiplier() == 1);
32} 31}
33 32
34/// Writes the specified TEV stage source component(s) 33/// Writes the specified TEV stage source component(s)
35static void AppendSource(std::string& out, const PicaShaderConfig& config, TevStageConfig::Source source, 34static void AppendSource(std::string& out, const PicaShaderConfig& config,
36 const std::string& index_name) { 35 TevStageConfig::Source source, const std::string& index_name) {
37 const auto& state = config.state; 36 const auto& state = config.state;
38 using Source = TevStageConfig::Source; 37 using Source = TevStageConfig::Source;
39 switch (source) { 38 switch (source) {
@@ -48,7 +47,7 @@ static void AppendSource(std::string& out, const PicaShaderConfig& config, TevSt
48 break; 47 break;
49 case Source::Texture0: 48 case Source::Texture0:
50 // Only unit 0 respects the texturing type (according to 3DBrew) 49 // Only unit 0 respects the texturing type (according to 3DBrew)
51 switch(state.texture0_type) { 50 switch (state.texture0_type) {
52 case Pica::Regs::TextureConfig::Texture2D: 51 case Pica::Regs::TextureConfig::Texture2D:
53 out += "texture(tex[0], texcoord[0])"; 52 out += "texture(tex[0], texcoord[0])";
54 break; 53 break;
@@ -57,7 +56,8 @@ static void AppendSource(std::string& out, const PicaShaderConfig& config, TevSt
57 break; 56 break;
58 default: 57 default:
59 out += "texture(tex[0], texcoord[0])"; 58 out += "texture(tex[0], texcoord[0])";
60 LOG_CRITICAL(HW_GPU, "Unhandled texture type %x", static_cast<int>(state.texture0_type)); 59 LOG_CRITICAL(HW_GPU, "Unhandled texture type %x",
60 static_cast<int>(state.texture0_type));
61 UNIMPLEMENTED(); 61 UNIMPLEMENTED();
62 break; 62 break;
63 } 63 }
@@ -85,8 +85,9 @@ static void AppendSource(std::string& out, const PicaShaderConfig& config, TevSt
85} 85}
86 86
87/// Writes the color components to use for the specified TEV stage color modifier 87/// Writes the color components to use for the specified TEV stage color modifier
88static void AppendColorModifier(std::string& out, const PicaShaderConfig& config, TevStageConfig::ColorModifier modifier, 88static void AppendColorModifier(std::string& out, const PicaShaderConfig& config,
89 TevStageConfig::Source source, const std::string& index_name) { 89 TevStageConfig::ColorModifier modifier,
90 TevStageConfig::Source source, const std::string& index_name) {
90 using ColorModifier = TevStageConfig::ColorModifier; 91 using ColorModifier = TevStageConfig::ColorModifier;
91 switch (modifier) { 92 switch (modifier) {
92 case ColorModifier::SourceColor: 93 case ColorModifier::SourceColor:
@@ -142,8 +143,9 @@ static void AppendColorModifier(std::string& out, const PicaShaderConfig& config
142} 143}
143 144
144/// Writes the alpha component to use for the specified TEV stage alpha modifier 145/// Writes the alpha component to use for the specified TEV stage alpha modifier
145static void AppendAlphaModifier(std::string& out, const PicaShaderConfig& config, TevStageConfig::AlphaModifier modifier, 146static void AppendAlphaModifier(std::string& out, const PicaShaderConfig& config,
146 TevStageConfig::Source source, const std::string& index_name) { 147 TevStageConfig::AlphaModifier modifier,
148 TevStageConfig::Source source, const std::string& index_name) {
147 using AlphaModifier = TevStageConfig::AlphaModifier; 149 using AlphaModifier = TevStageConfig::AlphaModifier;
148 switch (modifier) { 150 switch (modifier) {
149 case AlphaModifier::SourceAlpha: 151 case AlphaModifier::SourceAlpha:
@@ -191,7 +193,7 @@ static void AppendAlphaModifier(std::string& out, const PicaShaderConfig& config
191 193
192/// Writes the combiner function for the color components for the specified TEV stage operation 194/// Writes the combiner function for the color components for the specified TEV stage operation
193static void AppendColorCombiner(std::string& out, TevStageConfig::Operation operation, 195static void AppendColorCombiner(std::string& out, TevStageConfig::Operation operation,
194 const std::string& variable_name) { 196 const std::string& variable_name) {
195 out += "clamp("; 197 out += "clamp(";
196 using Operation = TevStageConfig::Operation; 198 using Operation = TevStageConfig::Operation;
197 switch (operation) { 199 switch (operation) {
@@ -208,8 +210,10 @@ static void AppendColorCombiner(std::string& out, TevStageConfig::Operation oper
208 out += variable_name + "[0] + " + variable_name + "[1] - vec3(0.5)"; 210 out += variable_name + "[0] + " + variable_name + "[1] - vec3(0.5)";
209 break; 211 break;
210 case Operation::Lerp: 212 case Operation::Lerp:
211 // TODO(bunnei): Verify if HW actually does this per-component, otherwise we can just use builtin lerp 213 // TODO(bunnei): Verify if HW actually does this per-component, otherwise we can just use
212 out += variable_name + "[0] * " + variable_name + "[2] + " + variable_name + "[1] * (vec3(1.0) - " + variable_name + "[2])"; 214 // builtin lerp
215 out += variable_name + "[0] * " + variable_name + "[2] + " + variable_name +
216 "[1] * (vec3(1.0) - " + variable_name + "[2])";
213 break; 217 break;
214 case Operation::Subtract: 218 case Operation::Subtract:
215 out += variable_name + "[0] - " + variable_name + "[1]"; 219 out += variable_name + "[0] - " + variable_name + "[1]";
@@ -218,10 +222,12 @@ static void AppendColorCombiner(std::string& out, TevStageConfig::Operation oper
218 out += variable_name + "[0] * " + variable_name + "[1] + " + variable_name + "[2]"; 222 out += variable_name + "[0] * " + variable_name + "[1] + " + variable_name + "[2]";
219 break; 223 break;
220 case Operation::AddThenMultiply: 224 case Operation::AddThenMultiply:
221 out += "min(" + variable_name + "[0] + " + variable_name + "[1], vec3(1.0)) * " + variable_name + "[2]"; 225 out += "min(" + variable_name + "[0] + " + variable_name + "[1], vec3(1.0)) * " +
226 variable_name + "[2]";
222 break; 227 break;
223 case Operation::Dot3_RGB: 228 case Operation::Dot3_RGB:
224 out += "vec3(dot(" + variable_name + "[0] - vec3(0.5), " + variable_name + "[1] - vec3(0.5)) * 4.0)"; 229 out += "vec3(dot(" + variable_name + "[0] - vec3(0.5), " + variable_name +
230 "[1] - vec3(0.5)) * 4.0)";
225 break; 231 break;
226 default: 232 default:
227 out += "vec3(0.0)"; 233 out += "vec3(0.0)";
@@ -233,7 +239,7 @@ static void AppendColorCombiner(std::string& out, TevStageConfig::Operation oper
233 239
234/// Writes the combiner function for the alpha component for the specified TEV stage operation 240/// Writes the combiner function for the alpha component for the specified TEV stage operation
235static void AppendAlphaCombiner(std::string& out, TevStageConfig::Operation operation, 241static void AppendAlphaCombiner(std::string& out, TevStageConfig::Operation operation,
236 const std::string& variable_name) { 242 const std::string& variable_name) {
237 out += "clamp("; 243 out += "clamp(";
238 using Operation = TevStageConfig::Operation; 244 using Operation = TevStageConfig::Operation;
239 switch (operation) { 245 switch (operation) {
@@ -250,7 +256,8 @@ static void AppendAlphaCombiner(std::string& out, TevStageConfig::Operation oper
250 out += variable_name + "[0] + " + variable_name + "[1] - 0.5"; 256 out += variable_name + "[0] + " + variable_name + "[1] - 0.5";
251 break; 257 break;
252 case Operation::Lerp: 258 case Operation::Lerp:
253 out += variable_name + "[0] * " + variable_name + "[2] + " + variable_name + "[1] * (1.0 - " + variable_name + "[2])"; 259 out += variable_name + "[0] * " + variable_name + "[2] + " + variable_name +
260 "[1] * (1.0 - " + variable_name + "[2])";
254 break; 261 break;
255 case Operation::Subtract: 262 case Operation::Subtract:
256 out += variable_name + "[0] - " + variable_name + "[1]"; 263 out += variable_name + "[0] - " + variable_name + "[1]";
@@ -259,7 +266,8 @@ static void AppendAlphaCombiner(std::string& out, TevStageConfig::Operation oper
259 out += variable_name + "[0] * " + variable_name + "[1] + " + variable_name + "[2]"; 266 out += variable_name + "[0] * " + variable_name + "[1] + " + variable_name + "[2]";
260 break; 267 break;
261 case Operation::AddThenMultiply: 268 case Operation::AddThenMultiply:
262 out += "min(" + variable_name + "[0] + " + variable_name + "[1], 1.0) * " + variable_name + "[2]"; 269 out += "min(" + variable_name + "[0] + " + variable_name + "[1], 1.0) * " + variable_name +
270 "[2]";
263 break; 271 break;
264 default: 272 default:
265 out += "0.0"; 273 out += "0.0";
@@ -284,9 +292,10 @@ static void AppendAlphaTestCondition(std::string& out, Regs::CompareFunc func) {
284 case CompareFunc::LessThan: 292 case CompareFunc::LessThan:
285 case CompareFunc::LessThanOrEqual: 293 case CompareFunc::LessThanOrEqual:
286 case CompareFunc::GreaterThan: 294 case CompareFunc::GreaterThan:
287 case CompareFunc::GreaterThanOrEqual: 295 case CompareFunc::GreaterThanOrEqual: {
288 { 296 static const char* op[] = {
289 static const char* op[] = { "!=", "==", ">=", ">", "<=", "<", }; 297 "!=", "==", ">=", ">", "<=", "<",
298 };
290 unsigned index = (unsigned)func - (unsigned)CompareFunc::Equal; 299 unsigned index = (unsigned)func - (unsigned)CompareFunc::Equal;
291 out += "int(last_tex_env_out.a * 255.0f) " + std::string(op[index]) + " alphatest_ref"; 300 out += "int(last_tex_env_out.a * 255.0f) " + std::string(op[index]) + " alphatest_ref";
292 break; 301 break;
@@ -301,7 +310,8 @@ static void AppendAlphaTestCondition(std::string& out, Regs::CompareFunc func) {
301 310
302/// Writes the code to emulate the specified TEV stage 311/// Writes the code to emulate the specified TEV stage
303static void WriteTevStage(std::string& out, const PicaShaderConfig& config, unsigned index) { 312static void WriteTevStage(std::string& out, const PicaShaderConfig& config, unsigned index) {
304 const auto stage = static_cast<const Pica::Regs::TevStageConfig>(config.state.tev_stages[index]); 313 const auto stage =
314 static_cast<const Pica::Regs::TevStageConfig>(config.state.tev_stages[index]);
305 if (!IsPassThroughTevStage(stage)) { 315 if (!IsPassThroughTevStage(stage)) {
306 std::string index_name = std::to_string(index); 316 std::string index_name = std::to_string(index);
307 317
@@ -330,8 +340,12 @@ static void WriteTevStage(std::string& out, const PicaShaderConfig& config, unsi
330 out += ";\n"; 340 out += ";\n";
331 341
332 out += "last_tex_env_out = vec4(" 342 out += "last_tex_env_out = vec4("
333 "clamp(color_output_" + index_name + " * " + std::to_string(stage.GetColorMultiplier()) + ".0, vec3(0.0), vec3(1.0))," 343 "clamp(color_output_" +
334 "clamp(alpha_output_" + index_name + " * " + std::to_string(stage.GetAlphaMultiplier()) + ".0, 0.0, 1.0));\n"; 344 index_name + " * " + std::to_string(stage.GetColorMultiplier()) +
345 ".0, vec3(0.0), vec3(1.0)),"
346 "clamp(alpha_output_" +
347 index_name + " * " + std::to_string(stage.GetAlphaMultiplier()) +
348 ".0, 0.0, 1.0));\n";
335 } 349 }
336 350
337 out += "combiner_buffer = next_combiner_buffer;\n"; 351 out += "combiner_buffer = next_combiner_buffer;\n";
@@ -355,13 +369,17 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) {
355 369
356 // Compute fragment normals 370 // Compute fragment normals
357 if (lighting.bump_mode == Pica::Regs::LightingBumpMode::NormalMap) { 371 if (lighting.bump_mode == Pica::Regs::LightingBumpMode::NormalMap) {
358 // Bump mapping is enabled using a normal map, read perturbation vector from the selected texture 372 // Bump mapping is enabled using a normal map, read perturbation vector from the selected
373 // texture
359 std::string bump_selector = std::to_string(lighting.bump_selector); 374 std::string bump_selector = std::to_string(lighting.bump_selector);
360 out += "vec3 surface_normal = 2.0 * texture(tex[" + bump_selector + "], texcoord[" + bump_selector + "]).rgb - 1.0;\n"; 375 out += "vec3 surface_normal = 2.0 * texture(tex[" + bump_selector + "], texcoord[" +
376 bump_selector + "]).rgb - 1.0;\n";
361 377
362 // Recompute Z-component of perturbation if 'renorm' is enabled, this provides a higher precision result 378 // Recompute Z-component of perturbation if 'renorm' is enabled, this provides a higher
379 // precision result
363 if (lighting.bump_renorm) { 380 if (lighting.bump_renorm) {
364 std::string val = "(1.0 - (surface_normal.x*surface_normal.x + surface_normal.y*surface_normal.y))"; 381 std::string val =
382 "(1.0 - (surface_normal.x*surface_normal.x + surface_normal.y*surface_normal.y))";
365 out += "surface_normal.z = sqrt(max(" + val + ", 0.0));\n"; 383 out += "surface_normal.z = sqrt(max(" + val + ", 0.0));\n";
366 } 384 }
367 } else if (lighting.bump_mode == Pica::Regs::LightingBumpMode::TangentMap) { 385 } else if (lighting.bump_mode == Pica::Regs::LightingBumpMode::TangentMap) {
@@ -373,7 +391,8 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) {
373 out += "vec3 surface_normal = vec3(0.0, 0.0, 1.0);\n"; 391 out += "vec3 surface_normal = vec3(0.0, 0.0, 1.0);\n";
374 } 392 }
375 393
376 // Rotate the surface-local normal by the interpolated normal quaternion to convert it to eyespace 394 // Rotate the surface-local normal by the interpolated normal quaternion to convert it to
395 // eyespace
377 out += "vec3 normal = normalize(quaternion_rotate(normquat, surface_normal));\n"; 396 out += "vec3 normal = normalize(quaternion_rotate(normquat, surface_normal));\n";
378 397
379 // Gets the index into the specified lookup table for specular lighting 398 // Gets the index into the specified lookup table for specular lighting
@@ -406,12 +425,14 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) {
406 425
407 if (abs) { 426 if (abs) {
408 // LUT index is in the range of (0.0, 1.0) 427 // LUT index is in the range of (0.0, 1.0)
409 index = lighting.light[light_num].two_sided_diffuse ? "abs(" + index + ")" : "max(" + index + ", 0.f)"; 428 index = lighting.light[light_num].two_sided_diffuse ? "abs(" + index + ")"
429 : "max(" + index + ", 0.f)";
410 return "(FLOAT_255 * clamp(" + index + ", 0.0, 1.0))"; 430 return "(FLOAT_255 * clamp(" + index + ", 0.0, 1.0))";
411 } else { 431 } else {
412 // LUT index is in the range of (-1.0, 1.0) 432 // LUT index is in the range of (-1.0, 1.0)
413 index = "clamp(" + index + ", -1.0, 1.0)"; 433 index = "clamp(" + index + ", -1.0, 1.0)";
414 return "(FLOAT_255 * ((" + index + " < 0) ? " + index + " + 2.0 : " + index + ") / 2.0)"; 434 return "(FLOAT_255 * ((" + index + " < 0) ? " + index + " + 2.0 : " + index +
435 ") / 2.0)";
415 } 436 }
416 437
417 return std::string(); 438 return std::string();
@@ -434,52 +455,74 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) {
434 else 455 else
435 out += "light_vector = normalize(" + light_src + ".position + view);\n"; 456 out += "light_vector = normalize(" + light_src + ".position + view);\n";
436 457
437 // Compute dot product of light_vector and normal, adjust if lighting is one-sided or two-sided 458 // Compute dot product of light_vector and normal, adjust if lighting is one-sided or
438 std::string dot_product = light_config.two_sided_diffuse ? "abs(dot(light_vector, normal))" : "max(dot(light_vector, normal), 0.0)"; 459 // two-sided
460 std::string dot_product = light_config.two_sided_diffuse
461 ? "abs(dot(light_vector, normal))"
462 : "max(dot(light_vector, normal), 0.0)";
439 463
440 // If enabled, compute distance attenuation value 464 // If enabled, compute distance attenuation value
441 std::string dist_atten = "1.0"; 465 std::string dist_atten = "1.0";
442 if (light_config.dist_atten_enable) { 466 if (light_config.dist_atten_enable) {
443 std::string index = "(" + light_src + ".dist_atten_scale * length(-view - " + light_src + ".position) + " + light_src + ".dist_atten_bias)"; 467 std::string index = "(" + light_src + ".dist_atten_scale * length(-view - " +
468 light_src + ".position) + " + light_src + ".dist_atten_bias)";
444 index = "((clamp(" + index + ", 0.0, FLOAT_255)))"; 469 index = "((clamp(" + index + ", 0.0, FLOAT_255)))";
445 const unsigned lut_num = ((unsigned)Regs::LightingSampler::DistanceAttenuation + light_config.num); 470 const unsigned lut_num =
471 ((unsigned)Regs::LightingSampler::DistanceAttenuation + light_config.num);
446 dist_atten = GetLutValue((Regs::LightingSampler)lut_num, index); 472 dist_atten = GetLutValue((Regs::LightingSampler)lut_num, index);
447 } 473 }
448 474
449 // If enabled, clamp specular component if lighting result is negative 475 // If enabled, clamp specular component if lighting result is negative
450 std::string clamp_highlights = lighting.clamp_highlights ? "(dot(light_vector, normal) <= 0.0 ? 0.0 : 1.0)" : "1.0"; 476 std::string clamp_highlights =
477 lighting.clamp_highlights ? "(dot(light_vector, normal) <= 0.0 ? 0.0 : 1.0)" : "1.0";
451 478
452 // Specular 0 component 479 // Specular 0 component
453 std::string d0_lut_value = "1.0"; 480 std::string d0_lut_value = "1.0";
454 if (lighting.lut_d0.enable && Pica::Regs::IsLightingSamplerSupported(lighting.config, Pica::Regs::LightingSampler::Distribution0)) { 481 if (lighting.lut_d0.enable &&
482 Pica::Regs::IsLightingSamplerSupported(lighting.config,
483 Pica::Regs::LightingSampler::Distribution0)) {
455 // Lookup specular "distribution 0" LUT value 484 // Lookup specular "distribution 0" LUT value
456 std::string index = GetLutIndex(light_config.num, lighting.lut_d0.type, lighting.lut_d0.abs_input); 485 std::string index =
457 d0_lut_value = "(" + std::to_string(lighting.lut_d0.scale) + " * " + GetLutValue(Regs::LightingSampler::Distribution0, index) + ")"; 486 GetLutIndex(light_config.num, lighting.lut_d0.type, lighting.lut_d0.abs_input);
487 d0_lut_value = "(" + std::to_string(lighting.lut_d0.scale) + " * " +
488 GetLutValue(Regs::LightingSampler::Distribution0, index) + ")";
458 } 489 }
459 std::string specular_0 = "(" + d0_lut_value + " * " + light_src + ".specular_0)"; 490 std::string specular_0 = "(" + d0_lut_value + " * " + light_src + ".specular_0)";
460 491
461 // If enabled, lookup ReflectRed value, otherwise, 1.0 is used 492 // If enabled, lookup ReflectRed value, otherwise, 1.0 is used
462 if (lighting.lut_rr.enable && Pica::Regs::IsLightingSamplerSupported(lighting.config, Pica::Regs::LightingSampler::ReflectRed)) { 493 if (lighting.lut_rr.enable &&
463 std::string index = GetLutIndex(light_config.num, lighting.lut_rr.type, lighting.lut_rr.abs_input); 494 Pica::Regs::IsLightingSamplerSupported(lighting.config,
464 std::string value = "(" + std::to_string(lighting.lut_rr.scale) + " * " + GetLutValue(Regs::LightingSampler::ReflectRed, index) + ")"; 495 Pica::Regs::LightingSampler::ReflectRed)) {
496 std::string index =
497 GetLutIndex(light_config.num, lighting.lut_rr.type, lighting.lut_rr.abs_input);
498 std::string value = "(" + std::to_string(lighting.lut_rr.scale) + " * " +
499 GetLutValue(Regs::LightingSampler::ReflectRed, index) + ")";
465 out += "refl_value.r = " + value + ";\n"; 500 out += "refl_value.r = " + value + ";\n";
466 } else { 501 } else {
467 out += "refl_value.r = 1.0;\n"; 502 out += "refl_value.r = 1.0;\n";
468 } 503 }
469 504
470 // If enabled, lookup ReflectGreen value, otherwise, ReflectRed value is used 505 // If enabled, lookup ReflectGreen value, otherwise, ReflectRed value is used
471 if (lighting.lut_rg.enable && Pica::Regs::IsLightingSamplerSupported(lighting.config, Pica::Regs::LightingSampler::ReflectGreen)) { 506 if (lighting.lut_rg.enable &&
472 std::string index = GetLutIndex(light_config.num, lighting.lut_rg.type, lighting.lut_rg.abs_input); 507 Pica::Regs::IsLightingSamplerSupported(lighting.config,
473 std::string value = "(" + std::to_string(lighting.lut_rg.scale) + " * " + GetLutValue(Regs::LightingSampler::ReflectGreen, index) + ")"; 508 Pica::Regs::LightingSampler::ReflectGreen)) {
509 std::string index =
510 GetLutIndex(light_config.num, lighting.lut_rg.type, lighting.lut_rg.abs_input);
511 std::string value = "(" + std::to_string(lighting.lut_rg.scale) + " * " +
512 GetLutValue(Regs::LightingSampler::ReflectGreen, index) + ")";
474 out += "refl_value.g = " + value + ";\n"; 513 out += "refl_value.g = " + value + ";\n";
475 } else { 514 } else {
476 out += "refl_value.g = refl_value.r;\n"; 515 out += "refl_value.g = refl_value.r;\n";
477 } 516 }
478 517
479 // If enabled, lookup ReflectBlue value, otherwise, ReflectRed value is used 518 // If enabled, lookup ReflectBlue value, otherwise, ReflectRed value is used
480 if (lighting.lut_rb.enable && Pica::Regs::IsLightingSamplerSupported(lighting.config, Pica::Regs::LightingSampler::ReflectBlue)) { 519 if (lighting.lut_rb.enable &&
481 std::string index = GetLutIndex(light_config.num, lighting.lut_rb.type, lighting.lut_rb.abs_input); 520 Pica::Regs::IsLightingSamplerSupported(lighting.config,
482 std::string value = "(" + std::to_string(lighting.lut_rb.scale) + " * " + GetLutValue(Regs::LightingSampler::ReflectBlue, index) + ")"; 521 Pica::Regs::LightingSampler::ReflectBlue)) {
522 std::string index =
523 GetLutIndex(light_config.num, lighting.lut_rb.type, lighting.lut_rb.abs_input);
524 std::string value = "(" + std::to_string(lighting.lut_rb.scale) + " * " +
525 GetLutValue(Regs::LightingSampler::ReflectBlue, index) + ")";
483 out += "refl_value.b = " + value + ";\n"; 526 out += "refl_value.b = " + value + ";\n";
484 } else { 527 } else {
485 out += "refl_value.b = refl_value.r;\n"; 528 out += "refl_value.b = refl_value.r;\n";
@@ -487,18 +530,26 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) {
487 530
488 // Specular 1 component 531 // Specular 1 component
489 std::string d1_lut_value = "1.0"; 532 std::string d1_lut_value = "1.0";
490 if (lighting.lut_d1.enable && Pica::Regs::IsLightingSamplerSupported(lighting.config, Pica::Regs::LightingSampler::Distribution1)) { 533 if (lighting.lut_d1.enable &&
534 Pica::Regs::IsLightingSamplerSupported(lighting.config,
535 Pica::Regs::LightingSampler::Distribution1)) {
491 // Lookup specular "distribution 1" LUT value 536 // Lookup specular "distribution 1" LUT value
492 std::string index = GetLutIndex(light_config.num, lighting.lut_d1.type, lighting.lut_d1.abs_input); 537 std::string index =
493 d1_lut_value = "(" + std::to_string(lighting.lut_d1.scale) + " * " + GetLutValue(Regs::LightingSampler::Distribution1, index) + ")"; 538 GetLutIndex(light_config.num, lighting.lut_d1.type, lighting.lut_d1.abs_input);
539 d1_lut_value = "(" + std::to_string(lighting.lut_d1.scale) + " * " +
540 GetLutValue(Regs::LightingSampler::Distribution1, index) + ")";
494 } 541 }
495 std::string specular_1 = "(" + d1_lut_value + " * refl_value * " + light_src + ".specular_1)"; 542 std::string specular_1 =
543 "(" + d1_lut_value + " * refl_value * " + light_src + ".specular_1)";
496 544
497 // Fresnel 545 // Fresnel
498 if (lighting.lut_fr.enable && Pica::Regs::IsLightingSamplerSupported(lighting.config, Pica::Regs::LightingSampler::Fresnel)) { 546 if (lighting.lut_fr.enable && Pica::Regs::IsLightingSamplerSupported(
547 lighting.config, Pica::Regs::LightingSampler::Fresnel)) {
499 // Lookup fresnel LUT value 548 // Lookup fresnel LUT value
500 std::string index = GetLutIndex(light_config.num, lighting.lut_fr.type, lighting.lut_fr.abs_input); 549 std::string index =
501 std::string value = "(" + std::to_string(lighting.lut_fr.scale) + " * " + GetLutValue(Regs::LightingSampler::Fresnel, index) + ")"; 550 GetLutIndex(light_config.num, lighting.lut_fr.type, lighting.lut_fr.abs_input);
551 std::string value = "(" + std::to_string(lighting.lut_fr.scale) + " * " +
552 GetLutValue(Regs::LightingSampler::Fresnel, index) + ")";
502 553
503 // Enabled for difffuse lighting alpha component 554 // Enabled for difffuse lighting alpha component
504 if (lighting.fresnel_selector == Pica::Regs::LightingFresnelSelector::PrimaryAlpha || 555 if (lighting.fresnel_selector == Pica::Regs::LightingFresnelSelector::PrimaryAlpha ||
@@ -512,10 +563,12 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) {
512 } 563 }
513 564
514 // Compute primary fragment color (diffuse lighting) function 565 // Compute primary fragment color (diffuse lighting) function
515 out += "diffuse_sum.rgb += ((" + light_src + ".diffuse * " + dot_product + ") + " + light_src + ".ambient) * " + dist_atten + ";\n"; 566 out += "diffuse_sum.rgb += ((" + light_src + ".diffuse * " + dot_product + ") + " +
567 light_src + ".ambient) * " + dist_atten + ";\n";
516 568
517 // Compute secondary fragment color (specular lighting) function 569 // Compute secondary fragment color (specular lighting) function
518 out += "specular_sum.rgb += (" + specular_0 + " + " + specular_1 + ") * " + clamp_highlights + " * " + dist_atten + ";\n"; 570 out += "specular_sum.rgb += (" + specular_0 + " + " + specular_1 + ") * " +
571 clamp_highlights + " * " + dist_atten + ";\n";
519 } 572 }
520 573
521 // Sum final lighting result 574 // Sum final lighting result
@@ -598,9 +651,9 @@ vec4 secondary_fragment_color = vec4(0.0);
598 out += "!"; 651 out += "!";
599 // x2,y2 have +1 added to cover the entire pixel area 652 // x2,y2 have +1 added to cover the entire pixel area
600 out += "(gl_FragCoord.x >= scissor_x1 * framebuffer_scale.x && " 653 out += "(gl_FragCoord.x >= scissor_x1 * framebuffer_scale.x && "
601 "gl_FragCoord.y >= scissor_y1 * framebuffer_scale.y && " 654 "gl_FragCoord.y >= scissor_y1 * framebuffer_scale.y && "
602 "gl_FragCoord.x < (scissor_x2 + 1) * framebuffer_scale.x && " 655 "gl_FragCoord.x < (scissor_x2 + 1) * framebuffer_scale.x && "
603 "gl_FragCoord.y < (scissor_y2 + 1) * framebuffer_scale.y)) discard;\n"; 656 "gl_FragCoord.y < (scissor_y2 + 1) * framebuffer_scale.y)) discard;\n";
604 } 657 }
605 658
606 out += "float z_over_w = 1.0 - gl_FragCoord.z * 2.0;\n"; 659 out += "float z_over_w = 1.0 - gl_FragCoord.z * 2.0;\n";
@@ -638,9 +691,11 @@ vec4 secondary_fragment_color = vec4(0.0);
638 out += "float fog_i = clamp(floor(fog_index), 0.0, 127.0);\n"; 691 out += "float fog_i = clamp(floor(fog_index), 0.0, 127.0);\n";
639 out += "float fog_f = fog_index - fog_i;\n"; 692 out += "float fog_f = fog_index - fog_i;\n";
640 out += "uint fog_lut_entry = texelFetch(fog_lut, int(fog_i), 0).r;\n"; 693 out += "uint fog_lut_entry = texelFetch(fog_lut, int(fog_i), 0).r;\n";
641 out += "float fog_lut_entry_difference = float(int((fog_lut_entry & 0x1FFFU) << 19U) >> 19);\n"; // Extract signed difference 694 out += "float fog_lut_entry_difference = float(int((fog_lut_entry & 0x1FFFU) << 19U) >> "
695 "19);\n"; // Extract signed difference
642 out += "float fog_lut_entry_value = float((fog_lut_entry >> 13U) & 0x7FFU);\n"; 696 out += "float fog_lut_entry_value = float((fog_lut_entry >> 13U) & 0x7FFU);\n";
643 out += "float fog_factor = (fog_lut_entry_value + fog_lut_entry_difference * fog_f) / 2047.0;\n"; 697 out += "float fog_factor = (fog_lut_entry_value + fog_lut_entry_difference * fog_f) / "
698 "2047.0;\n";
644 out += "fog_factor = clamp(fog_factor, 0.0, 1.0);\n"; 699 out += "fog_factor = clamp(fog_factor, 0.0, 1.0);\n";
645 700
646 // Blend the fog 701 // Blend the fog
@@ -658,14 +713,20 @@ vec4 secondary_fragment_color = vec4(0.0);
658std::string GenerateVertexShader() { 713std::string GenerateVertexShader() {
659 std::string out = "#version 330 core\n"; 714 std::string out = "#version 330 core\n";
660 715
661 out += "layout(location = " + std::to_string((int)ATTRIBUTE_POSITION) + ") in vec4 vert_position;\n"; 716 out += "layout(location = " + std::to_string((int)ATTRIBUTE_POSITION) +
662 out += "layout(location = " + std::to_string((int)ATTRIBUTE_COLOR) + ") in vec4 vert_color;\n"; 717 ") in vec4 vert_position;\n";
663 out += "layout(location = " + std::to_string((int)ATTRIBUTE_TEXCOORD0) + ") in vec2 vert_texcoord0;\n"; 718 out += "layout(location = " + std::to_string((int)ATTRIBUTE_COLOR) + ") in vec4 vert_color;\n";
664 out += "layout(location = " + std::to_string((int)ATTRIBUTE_TEXCOORD1) + ") in vec2 vert_texcoord1;\n"; 719 out += "layout(location = " + std::to_string((int)ATTRIBUTE_TEXCOORD0) +
665 out += "layout(location = " + std::to_string((int)ATTRIBUTE_TEXCOORD2) + ") in vec2 vert_texcoord2;\n"; 720 ") in vec2 vert_texcoord0;\n";
666 out += "layout(location = " + std::to_string((int)ATTRIBUTE_TEXCOORD0_W) + ") in float vert_texcoord0_w;\n"; 721 out += "layout(location = " + std::to_string((int)ATTRIBUTE_TEXCOORD1) +
667 out += "layout(location = " + std::to_string((int)ATTRIBUTE_NORMQUAT) + ") in vec4 vert_normquat;\n"; 722 ") in vec2 vert_texcoord1;\n";
668 out += "layout(location = " + std::to_string((int)ATTRIBUTE_VIEW) + ") in vec3 vert_view;\n"; 723 out += "layout(location = " + std::to_string((int)ATTRIBUTE_TEXCOORD2) +
724 ") in vec2 vert_texcoord2;\n";
725 out += "layout(location = " + std::to_string((int)ATTRIBUTE_TEXCOORD0_W) +
726 ") in float vert_texcoord0_w;\n";
727 out += "layout(location = " + std::to_string((int)ATTRIBUTE_NORMQUAT) +
728 ") in vec4 vert_normquat;\n";
729 out += "layout(location = " + std::to_string((int)ATTRIBUTE_VIEW) + ") in vec3 vert_view;\n";
669 730
670 out += R"( 731 out += R"(
671out vec4 primary_color; 732out vec4 primary_color;
diff --git a/src/video_core/renderer_opengl/gl_shader_util.cpp b/src/video_core/renderer_opengl/gl_shader_util.cpp
index dded3db46..7d90ec6a3 100644
--- a/src/video_core/renderer_opengl/gl_shader_util.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_util.cpp
@@ -56,7 +56,8 @@ GLuint LoadProgram(const char* vertex_shader, const char* fragment_shader) {
56 if (result) { 56 if (result) {
57 LOG_DEBUG(Render_OpenGL, "%s", &fragment_shader_error[0]); 57 LOG_DEBUG(Render_OpenGL, "%s", &fragment_shader_error[0]);
58 } else { 58 } else {
59 LOG_ERROR(Render_OpenGL, "Error compiling fragment shader:\n%s", &fragment_shader_error[0]); 59 LOG_ERROR(Render_OpenGL, "Error compiling fragment shader:\n%s",
60 &fragment_shader_error[0]);
60 } 61 }
61 } 62 }
62 63
diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp
index 13ee986b9..a97269d44 100644
--- a/src/video_core/renderer_opengl/gl_state.cpp
+++ b/src/video_core/renderer_opengl/gl_state.cpp
@@ -106,11 +106,11 @@ void OpenGLState::Apply() const {
106 106
107 // Color mask 107 // Color mask
108 if (color_mask.red_enabled != cur_state.color_mask.red_enabled || 108 if (color_mask.red_enabled != cur_state.color_mask.red_enabled ||
109 color_mask.green_enabled != cur_state.color_mask.green_enabled || 109 color_mask.green_enabled != cur_state.color_mask.green_enabled ||
110 color_mask.blue_enabled != cur_state.color_mask.blue_enabled || 110 color_mask.blue_enabled != cur_state.color_mask.blue_enabled ||
111 color_mask.alpha_enabled != cur_state.color_mask.alpha_enabled) { 111 color_mask.alpha_enabled != cur_state.color_mask.alpha_enabled) {
112 glColorMask(color_mask.red_enabled, color_mask.green_enabled, 112 glColorMask(color_mask.red_enabled, color_mask.green_enabled, color_mask.blue_enabled,
113 color_mask.blue_enabled, color_mask.alpha_enabled); 113 color_mask.alpha_enabled);
114 } 114 }
115 115
116 // Stencil test 116 // Stencil test
@@ -123,15 +123,16 @@ void OpenGLState::Apply() const {
123 } 123 }
124 124
125 if (stencil.test_func != cur_state.stencil.test_func || 125 if (stencil.test_func != cur_state.stencil.test_func ||
126 stencil.test_ref != cur_state.stencil.test_ref || 126 stencil.test_ref != cur_state.stencil.test_ref ||
127 stencil.test_mask != cur_state.stencil.test_mask) { 127 stencil.test_mask != cur_state.stencil.test_mask) {
128 glStencilFunc(stencil.test_func, stencil.test_ref, stencil.test_mask); 128 glStencilFunc(stencil.test_func, stencil.test_ref, stencil.test_mask);
129 } 129 }
130 130
131 if (stencil.action_depth_fail != cur_state.stencil.action_depth_fail || 131 if (stencil.action_depth_fail != cur_state.stencil.action_depth_fail ||
132 stencil.action_depth_pass != cur_state.stencil.action_depth_pass || 132 stencil.action_depth_pass != cur_state.stencil.action_depth_pass ||
133 stencil.action_stencil_fail != cur_state.stencil.action_stencil_fail) { 133 stencil.action_stencil_fail != cur_state.stencil.action_stencil_fail) {
134 glStencilOp(stencil.action_stencil_fail, stencil.action_depth_fail, stencil.action_depth_pass); 134 glStencilOp(stencil.action_stencil_fail, stencil.action_depth_fail,
135 stencil.action_depth_pass);
135 } 136 }
136 137
137 // Stencil mask 138 // Stencil mask
@@ -154,23 +155,22 @@ void OpenGLState::Apply() const {
154 } 155 }
155 156
156 if (blend.color.red != cur_state.blend.color.red || 157 if (blend.color.red != cur_state.blend.color.red ||
157 blend.color.green != cur_state.blend.color.green || 158 blend.color.green != cur_state.blend.color.green ||
158 blend.color.blue != cur_state.blend.color.blue || 159 blend.color.blue != cur_state.blend.color.blue ||
159 blend.color.alpha != cur_state.blend.color.alpha) { 160 blend.color.alpha != cur_state.blend.color.alpha) {
160 glBlendColor(blend.color.red, blend.color.green, 161 glBlendColor(blend.color.red, blend.color.green, blend.color.blue, blend.color.alpha);
161 blend.color.blue, blend.color.alpha);
162 } 162 }
163 163
164 if (blend.src_rgb_func != cur_state.blend.src_rgb_func || 164 if (blend.src_rgb_func != cur_state.blend.src_rgb_func ||
165 blend.dst_rgb_func != cur_state.blend.dst_rgb_func || 165 blend.dst_rgb_func != cur_state.blend.dst_rgb_func ||
166 blend.src_a_func != cur_state.blend.src_a_func || 166 blend.src_a_func != cur_state.blend.src_a_func ||
167 blend.dst_a_func != cur_state.blend.dst_a_func) { 167 blend.dst_a_func != cur_state.blend.dst_a_func) {
168 glBlendFuncSeparate(blend.src_rgb_func, blend.dst_rgb_func, 168 glBlendFuncSeparate(blend.src_rgb_func, blend.dst_rgb_func, blend.src_a_func,
169 blend.src_a_func, blend.dst_a_func); 169 blend.dst_a_func);
170 } 170 }
171 171
172 if (blend.rgb_equation != cur_state.blend.rgb_equation || 172 if (blend.rgb_equation != cur_state.blend.rgb_equation ||
173 blend.a_equation != cur_state.blend.a_equation) { 173 blend.a_equation != cur_state.blend.a_equation) {
174 glBlendEquationSeparate(blend.rgb_equation, blend.a_equation); 174 glBlendEquationSeparate(blend.rgb_equation, blend.a_equation);
175 } 175 }
176 176
@@ -237,8 +237,11 @@ void OpenGLState::Apply() const {
237GLenum OpenGLState::CheckFBStatus(GLenum target) { 237GLenum OpenGLState::CheckFBStatus(GLenum target) {
238 GLenum fb_status = glCheckFramebufferStatus(target); 238 GLenum fb_status = glCheckFramebufferStatus(target);
239 if (fb_status != GL_FRAMEBUFFER_COMPLETE) { 239 if (fb_status != GL_FRAMEBUFFER_COMPLETE) {
240 const char* fb_description = (target == GL_READ_FRAMEBUFFER ? "READ" : (target == GL_DRAW_FRAMEBUFFER ? "DRAW" : "UNK")); 240 const char* fb_description =
241 LOG_CRITICAL(Render_OpenGL, "OpenGL %s framebuffer check failed, status %X", fb_description, fb_status); 241 (target == GL_READ_FRAMEBUFFER ? "READ"
242 : (target == GL_DRAW_FRAMEBUFFER ? "DRAW" : "UNK"));
243 LOG_CRITICAL(Render_OpenGL, "OpenGL %s framebuffer check failed, status %X", fb_description,
244 fb_status);
242 } 245 }
243 246
244 return fb_status; 247 return fb_status;
diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h
index 13c71b0a6..01dead883 100644
--- a/src/video_core/renderer_opengl/gl_state.h
+++ b/src/video_core/renderer_opengl/gl_state.h
@@ -9,14 +9,14 @@
9class OpenGLState { 9class OpenGLState {
10public: 10public:
11 struct { 11 struct {
12 bool enabled; // GL_CULL_FACE 12 bool enabled; // GL_CULL_FACE
13 GLenum mode; // GL_CULL_FACE_MODE 13 GLenum mode; // GL_CULL_FACE_MODE
14 GLenum front_face; // GL_FRONT_FACE 14 GLenum front_face; // GL_FRONT_FACE
15 } cull; 15 } cull;
16 16
17 struct { 17 struct {
18 bool test_enabled; // GL_DEPTH_TEST 18 bool test_enabled; // GL_DEPTH_TEST
19 GLenum test_func; // GL_DEPTH_FUNC 19 GLenum test_func; // GL_DEPTH_FUNC
20 GLboolean write_mask; // GL_DEPTH_WRITEMASK 20 GLboolean write_mask; // GL_DEPTH_WRITEMASK
21 } depth; 21 } depth;
22 22
@@ -28,24 +28,24 @@ public:
28 } color_mask; // GL_COLOR_WRITEMASK 28 } color_mask; // GL_COLOR_WRITEMASK
29 29
30 struct { 30 struct {
31 bool test_enabled; // GL_STENCIL_TEST 31 bool test_enabled; // GL_STENCIL_TEST
32 GLenum test_func; // GL_STENCIL_FUNC 32 GLenum test_func; // GL_STENCIL_FUNC
33 GLint test_ref; // GL_STENCIL_REF 33 GLint test_ref; // GL_STENCIL_REF
34 GLuint test_mask; // GL_STENCIL_VALUE_MASK 34 GLuint test_mask; // GL_STENCIL_VALUE_MASK
35 GLuint write_mask; // GL_STENCIL_WRITEMASK 35 GLuint write_mask; // GL_STENCIL_WRITEMASK
36 GLenum action_stencil_fail; // GL_STENCIL_FAIL 36 GLenum action_stencil_fail; // GL_STENCIL_FAIL
37 GLenum action_depth_fail; // GL_STENCIL_PASS_DEPTH_FAIL 37 GLenum action_depth_fail; // GL_STENCIL_PASS_DEPTH_FAIL
38 GLenum action_depth_pass; // GL_STENCIL_PASS_DEPTH_PASS 38 GLenum action_depth_pass; // GL_STENCIL_PASS_DEPTH_PASS
39 } stencil; 39 } stencil;
40 40
41 struct { 41 struct {
42 bool enabled; // GL_BLEND 42 bool enabled; // GL_BLEND
43 GLenum rgb_equation; // GL_BLEND_EQUATION_RGB 43 GLenum rgb_equation; // GL_BLEND_EQUATION_RGB
44 GLenum a_equation; // GL_BLEND_EQUATION_ALPHA 44 GLenum a_equation; // GL_BLEND_EQUATION_ALPHA
45 GLenum src_rgb_func; // GL_BLEND_SRC_RGB 45 GLenum src_rgb_func; // GL_BLEND_SRC_RGB
46 GLenum dst_rgb_func; // GL_BLEND_DST_RGB 46 GLenum dst_rgb_func; // GL_BLEND_DST_RGB
47 GLenum src_a_func; // GL_BLEND_SRC_ALPHA 47 GLenum src_a_func; // GL_BLEND_SRC_ALPHA
48 GLenum dst_a_func; // GL_BLEND_DST_ALPHA 48 GLenum dst_a_func; // GL_BLEND_DST_ALPHA
49 49
50 struct { 50 struct {
51 GLclampf red; 51 GLclampf red;
@@ -60,7 +60,7 @@ public:
60 // 3 texture units - one for each that is used in PICA fragment shader emulation 60 // 3 texture units - one for each that is used in PICA fragment shader emulation
61 struct { 61 struct {
62 GLuint texture_2d; // GL_TEXTURE_BINDING_2D 62 GLuint texture_2d; // GL_TEXTURE_BINDING_2D
63 GLuint sampler; // GL_SAMPLER_BINDING 63 GLuint sampler; // GL_SAMPLER_BINDING
64 } texture_units[3]; 64 } texture_units[3];
65 65
66 struct { 66 struct {
@@ -74,10 +74,10 @@ public:
74 struct { 74 struct {
75 GLuint read_framebuffer; // GL_READ_FRAMEBUFFER_BINDING 75 GLuint read_framebuffer; // GL_READ_FRAMEBUFFER_BINDING
76 GLuint draw_framebuffer; // GL_DRAW_FRAMEBUFFER_BINDING 76 GLuint draw_framebuffer; // GL_DRAW_FRAMEBUFFER_BINDING
77 GLuint vertex_array; // GL_VERTEX_ARRAY_BINDING 77 GLuint vertex_array; // GL_VERTEX_ARRAY_BINDING
78 GLuint vertex_buffer; // GL_ARRAY_BUFFER_BINDING 78 GLuint vertex_buffer; // GL_ARRAY_BUFFER_BINDING
79 GLuint uniform_buffer; // GL_UNIFORM_BUFFER_BINDING 79 GLuint uniform_buffer; // GL_UNIFORM_BUFFER_BINDING
80 GLuint shader_program; // GL_CURRENT_PROGRAM 80 GLuint shader_program; // GL_CURRENT_PROGRAM
81 } draw; 81 } draw;
82 82
83 OpenGLState(); 83 OpenGLState();
diff --git a/src/video_core/renderer_opengl/pica_to_gl.h b/src/video_core/renderer_opengl/pica_to_gl.h
index d9b9c9cc2..a604e94d4 100644
--- a/src/video_core/renderer_opengl/pica_to_gl.h
+++ b/src/video_core/renderer_opengl/pica_to_gl.h
@@ -25,8 +25,8 @@ namespace PicaToGL {
25 25
26inline GLenum TextureFilterMode(Pica::Regs::TextureConfig::TextureFilter mode) { 26inline GLenum TextureFilterMode(Pica::Regs::TextureConfig::TextureFilter mode) {
27 static const GLenum filter_mode_table[] = { 27 static const GLenum filter_mode_table[] = {
28 GL_NEAREST, // TextureFilter::Nearest 28 GL_NEAREST, // TextureFilter::Nearest
29 GL_LINEAR // TextureFilter::Linear 29 GL_LINEAR // TextureFilter::Linear
30 }; 30 };
31 31
32 // Range check table for input 32 // Range check table for input
@@ -52,10 +52,10 @@ inline GLenum TextureFilterMode(Pica::Regs::TextureConfig::TextureFilter mode) {
52 52
53inline GLenum WrapMode(Pica::Regs::TextureConfig::WrapMode mode) { 53inline GLenum WrapMode(Pica::Regs::TextureConfig::WrapMode mode) {
54 static const GLenum wrap_mode_table[] = { 54 static const GLenum wrap_mode_table[] = {
55 GL_CLAMP_TO_EDGE, // WrapMode::ClampToEdge 55 GL_CLAMP_TO_EDGE, // WrapMode::ClampToEdge
56 GL_CLAMP_TO_BORDER,// WrapMode::ClampToBorder 56 GL_CLAMP_TO_BORDER, // WrapMode::ClampToBorder
57 GL_REPEAT, // WrapMode::Repeat 57 GL_REPEAT, // WrapMode::Repeat
58 GL_MIRRORED_REPEAT // WrapMode::MirroredRepeat 58 GL_MIRRORED_REPEAT // WrapMode::MirroredRepeat
59 }; 59 };
60 60
61 // Range check table for input 61 // Range check table for input
@@ -131,22 +131,22 @@ inline GLenum BlendFunc(Pica::Regs::BlendFactor factor) {
131 131
132inline GLenum LogicOp(Pica::Regs::LogicOp op) { 132inline GLenum LogicOp(Pica::Regs::LogicOp op) {
133 static const GLenum logic_op_table[] = { 133 static const GLenum logic_op_table[] = {
134 GL_CLEAR, // Clear 134 GL_CLEAR, // Clear
135 GL_AND, // And 135 GL_AND, // And
136 GL_AND_REVERSE, // AndReverse 136 GL_AND_REVERSE, // AndReverse
137 GL_COPY, // Copy 137 GL_COPY, // Copy
138 GL_SET, // Set 138 GL_SET, // Set
139 GL_COPY_INVERTED, // CopyInverted 139 GL_COPY_INVERTED, // CopyInverted
140 GL_NOOP, // NoOp 140 GL_NOOP, // NoOp
141 GL_INVERT, // Invert 141 GL_INVERT, // Invert
142 GL_NAND, // Nand 142 GL_NAND, // Nand
143 GL_OR, // Or 143 GL_OR, // Or
144 GL_NOR, // Nor 144 GL_NOR, // Nor
145 GL_XOR, // Xor 145 GL_XOR, // Xor
146 GL_EQUIV, // Equiv 146 GL_EQUIV, // Equiv
147 GL_AND_INVERTED, // AndInverted 147 GL_AND_INVERTED, // AndInverted
148 GL_OR_REVERSE, // OrReverse 148 GL_OR_REVERSE, // OrReverse
149 GL_OR_INVERTED, // OrInverted 149 GL_OR_INVERTED, // OrInverted
150 }; 150 };
151 151
152 // Range check table for input 152 // Range check table for input
@@ -185,14 +185,14 @@ inline GLenum CompareFunc(Pica::Regs::CompareFunc func) {
185 185
186inline GLenum StencilOp(Pica::Regs::StencilAction action) { 186inline GLenum StencilOp(Pica::Regs::StencilAction action) {
187 static const GLenum stencil_op_table[] = { 187 static const GLenum stencil_op_table[] = {
188 GL_KEEP, // StencilAction::Keep 188 GL_KEEP, // StencilAction::Keep
189 GL_ZERO, // StencilAction::Zero 189 GL_ZERO, // StencilAction::Zero
190 GL_REPLACE, // StencilAction::Replace 190 GL_REPLACE, // StencilAction::Replace
191 GL_INCR, // StencilAction::Increment 191 GL_INCR, // StencilAction::Increment
192 GL_DECR, // StencilAction::Decrement 192 GL_DECR, // StencilAction::Decrement
193 GL_INVERT, // StencilAction::Invert 193 GL_INVERT, // StencilAction::Invert
194 GL_INCR_WRAP, // StencilAction::IncrementWrap 194 GL_INCR_WRAP, // StencilAction::IncrementWrap
195 GL_DECR_WRAP // StencilAction::DecrementWrap 195 GL_DECR_WRAP // StencilAction::DecrementWrap
196 }; 196 };
197 197
198 // Range check table for input 198 // Range check table for input
@@ -207,18 +207,12 @@ inline GLenum StencilOp(Pica::Regs::StencilAction action) {
207} 207}
208 208
209inline GLvec4 ColorRGBA8(const u32 color) { 209inline GLvec4 ColorRGBA8(const u32 color) {
210 return { { (color >> 0 & 0xFF) / 255.0f, 210 return {{(color >> 0 & 0xFF) / 255.0f, (color >> 8 & 0xFF) / 255.0f,
211 (color >> 8 & 0xFF) / 255.0f, 211 (color >> 16 & 0xFF) / 255.0f, (color >> 24 & 0xFF) / 255.0f}};
212 (color >> 16 & 0xFF) / 255.0f,
213 (color >> 24 & 0xFF) / 255.0f
214 } };
215} 212}
216 213
217inline std::array<GLfloat, 3> LightColor(const Pica::Regs::LightColor& color) { 214inline std::array<GLfloat, 3> LightColor(const Pica::Regs::LightColor& color) {
218 return { { color.r / 255.0f, 215 return {{color.r / 255.0f, color.g / 255.0f, color.b / 255.0f}};
219 color.g / 255.0f,
220 color.b / 255.0f
221 } };
222} 216}
223 217
224} // namespace 218} // namespace
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index 8410e0a64..3cabda8f9 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -89,8 +89,12 @@ struct ScreenRectVertex {
89static std::array<GLfloat, 3 * 2> MakeOrthographicMatrix(const float width, const float height) { 89static std::array<GLfloat, 3 * 2> MakeOrthographicMatrix(const float width, const float height) {
90 std::array<GLfloat, 3 * 2> matrix; 90 std::array<GLfloat, 3 * 2> matrix;
91 91
92 matrix[0] = 2.f / width; matrix[2] = 0.f; matrix[4] = -1.f; 92 matrix[0] = 2.f / width;
93 matrix[1] = 0.f; matrix[3] = -2.f / height; matrix[5] = 1.f; 93 matrix[2] = 0.f;
94 matrix[4] = -1.f;
95 matrix[1] = 0.f;
96 matrix[3] = -2.f / height;
97 matrix[5] = 1.f;
94 // Last matrix row is implicitly assumed to be [0, 0, 1]. 98 // Last matrix row is implicitly assumed to be [0, 0, 1].
95 99
96 return matrix; 100 return matrix;
@@ -98,7 +102,7 @@ static std::array<GLfloat, 3 * 2> MakeOrthographicMatrix(const float width, cons
98 102
99/// RendererOpenGL constructor 103/// RendererOpenGL constructor
100RendererOpenGL::RendererOpenGL() { 104RendererOpenGL::RendererOpenGL() {
101 resolution_width = std::max(VideoCore::kScreenTopWidth, VideoCore::kScreenBottomWidth); 105 resolution_width = std::max(VideoCore::kScreenTopWidth, VideoCore::kScreenBottomWidth);
102 resolution_height = VideoCore::kScreenTopHeight + VideoCore::kScreenBottomHeight; 106 resolution_height = VideoCore::kScreenTopHeight + VideoCore::kScreenBottomHeight;
103} 107}
104 108
@@ -116,13 +120,15 @@ void RendererOpenGL::SwapBuffers() {
116 const auto& framebuffer = GPU::g_regs.framebuffer_config[i]; 120 const auto& framebuffer = GPU::g_regs.framebuffer_config[i];
117 121
118 // Main LCD (0): 0x1ED02204, Sub LCD (1): 0x1ED02A04 122 // Main LCD (0): 0x1ED02204, Sub LCD (1): 0x1ED02A04
119 u32 lcd_color_addr = (i == 0) ? LCD_REG_INDEX(color_fill_top) : LCD_REG_INDEX(color_fill_bottom); 123 u32 lcd_color_addr =
124 (i == 0) ? LCD_REG_INDEX(color_fill_top) : LCD_REG_INDEX(color_fill_bottom);
120 lcd_color_addr = HW::VADDR_LCD + 4 * lcd_color_addr; 125 lcd_color_addr = HW::VADDR_LCD + 4 * lcd_color_addr;
121 LCD::Regs::ColorFill color_fill = {0}; 126 LCD::Regs::ColorFill color_fill = {0};
122 LCD::Read(color_fill.raw, lcd_color_addr); 127 LCD::Read(color_fill.raw, lcd_color_addr);
123 128
124 if (color_fill.is_enabled) { 129 if (color_fill.is_enabled) {
125 LoadColorToActiveGLTexture(color_fill.color_r, color_fill.color_g, color_fill.color_b, screen_infos[i].texture); 130 LoadColorToActiveGLTexture(color_fill.color_r, color_fill.color_g, color_fill.color_b,
131 screen_infos[i].texture);
126 132
127 // Resize the texture in case the framebuffer size has changed 133 // Resize the texture in case the framebuffer size has changed
128 screen_infos[i].texture.width = 1; 134 screen_infos[i].texture.width = 1;
@@ -172,15 +178,14 @@ void RendererOpenGL::SwapBuffers() {
172 * Loads framebuffer from emulated memory into the active OpenGL texture. 178 * Loads framebuffer from emulated memory into the active OpenGL texture.
173 */ 179 */
174void RendererOpenGL::LoadFBToScreenInfo(const GPU::Regs::FramebufferConfig& framebuffer, 180void RendererOpenGL::LoadFBToScreenInfo(const GPU::Regs::FramebufferConfig& framebuffer,
175 ScreenInfo& screen_info) { 181 ScreenInfo& screen_info) {
176 182
177 const PAddr framebuffer_addr = framebuffer.active_fb == 0 ? 183 const PAddr framebuffer_addr =
178 framebuffer.address_left1 : framebuffer.address_left2; 184 framebuffer.active_fb == 0 ? framebuffer.address_left1 : framebuffer.address_left2;
179 185
180 LOG_TRACE(Render_OpenGL, "0x%08x bytes from 0x%08x(%dx%d), fmt %x", 186 LOG_TRACE(Render_OpenGL, "0x%08x bytes from 0x%08x(%dx%d), fmt %x",
181 framebuffer.stride * framebuffer.height, 187 framebuffer.stride * framebuffer.height, framebuffer_addr, (int)framebuffer.width,
182 framebuffer_addr, (int)framebuffer.width, 188 (int)framebuffer.height, (int)framebuffer.format);
183 (int)framebuffer.height, (int)framebuffer.format);
184 189
185 int bpp = GPU::Regs::BytesPerPixel(framebuffer.color_format); 190 int bpp = GPU::Regs::BytesPerPixel(framebuffer.color_format);
186 size_t pixel_stride = framebuffer.stride / bpp; 191 size_t pixel_stride = framebuffer.stride / bpp;
@@ -192,7 +197,8 @@ void RendererOpenGL::LoadFBToScreenInfo(const GPU::Regs::FramebufferConfig& fram
192 // only allows rows to have a memory alignement of 4. 197 // only allows rows to have a memory alignement of 4.
193 ASSERT(pixel_stride % 4 == 0); 198 ASSERT(pixel_stride % 4 == 0);
194 199
195 if (!Rasterizer()->AccelerateDisplay(framebuffer, framebuffer_addr, static_cast<u32>(pixel_stride), screen_info)) { 200 if (!Rasterizer()->AccelerateDisplay(framebuffer, framebuffer_addr,
201 static_cast<u32>(pixel_stride), screen_info)) {
196 // Reset the screen info's display texture to its own permanent texture 202 // Reset the screen info's display texture to its own permanent texture
197 screen_info.display_texture = screen_info.texture.resource.handle; 203 screen_info.display_texture = screen_info.texture.resource.handle;
198 screen_info.display_texcoords = MathUtil::Rectangle<float>(0.f, 0.f, 1.f, 1.f); 204 screen_info.display_texcoords = MathUtil::Rectangle<float>(0.f, 0.f, 1.f, 1.f);
@@ -208,12 +214,14 @@ void RendererOpenGL::LoadFBToScreenInfo(const GPU::Regs::FramebufferConfig& fram
208 glPixelStorei(GL_UNPACK_ROW_LENGTH, (GLint)pixel_stride); 214 glPixelStorei(GL_UNPACK_ROW_LENGTH, (GLint)pixel_stride);
209 215
210 // Update existing texture 216 // Update existing texture
211 // TODO: Test what happens on hardware when you change the framebuffer dimensions so that they 217 // TODO: Test what happens on hardware when you change the framebuffer dimensions so that
218 // they
212 // differ from the LCD resolution. 219 // differ from the LCD resolution.
213 // TODO: Applications could theoretically crash Citra here by specifying too large 220 // TODO: Applications could theoretically crash Citra here by specifying too large
214 // framebuffer sizes. We should make sure that this cannot happen. 221 // framebuffer sizes. We should make sure that this cannot happen.
215 glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, framebuffer.width, framebuffer.height, 222 glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, framebuffer.width, framebuffer.height,
216 screen_info.texture.gl_format, screen_info.texture.gl_type, framebuffer_data); 223 screen_info.texture.gl_format, screen_info.texture.gl_type,
224 framebuffer_data);
217 225
218 glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); 226 glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
219 227
@@ -224,7 +232,8 @@ void RendererOpenGL::LoadFBToScreenInfo(const GPU::Regs::FramebufferConfig& fram
224 232
225/** 233/**
226 * Fills active OpenGL texture with the given RGB color. 234 * Fills active OpenGL texture with the given RGB color.
227 * Since the color is solid, the texture can be 1x1 but will stretch across whatever it's rendered on. 235 * Since the color is solid, the texture can be 1x1 but will stretch across whatever it's rendered
236 * on.
228 * This has the added benefit of being *really fast*. 237 * This has the added benefit of being *really fast*.
229 */ 238 */
230void RendererOpenGL::LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b, 239void RendererOpenGL::LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b,
@@ -233,7 +242,7 @@ void RendererOpenGL::LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color
233 state.Apply(); 242 state.Apply();
234 243
235 glActiveTexture(GL_TEXTURE0); 244 glActiveTexture(GL_TEXTURE0);
236 u8 framebuffer_data[3] = { color_r, color_g, color_b }; 245 u8 framebuffer_data[3] = {color_r, color_g, color_b};
237 246
238 // Update existing texture 247 // Update existing texture
239 glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB, 1, 1, 0, GL_RGB, GL_UNSIGNED_BYTE, framebuffer_data); 248 glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB, 1, 1, 0, GL_RGB, GL_UNSIGNED_BYTE, framebuffer_data);
@@ -246,7 +255,8 @@ void RendererOpenGL::LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color
246 * Initializes the OpenGL state and creates persistent objects. 255 * Initializes the OpenGL state and creates persistent objects.
247 */ 256 */
248void RendererOpenGL::InitOpenGLObjects() { 257void RendererOpenGL::InitOpenGLObjects() {
249 glClearColor(Settings::values.bg_red, Settings::values.bg_green, Settings::values.bg_blue, 0.0f); 258 glClearColor(Settings::values.bg_red, Settings::values.bg_green, Settings::values.bg_blue,
259 0.0f);
250 260
251 // Link shaders and get variable locations 261 // Link shaders and get variable locations
252 shader.Create(vertex_shader, fragment_shader); 262 shader.Create(vertex_shader, fragment_shader);
@@ -270,8 +280,10 @@ void RendererOpenGL::InitOpenGLObjects() {
270 280
271 // Attach vertex data to VAO 281 // Attach vertex data to VAO
272 glBufferData(GL_ARRAY_BUFFER, sizeof(ScreenRectVertex) * 4, nullptr, GL_STREAM_DRAW); 282 glBufferData(GL_ARRAY_BUFFER, sizeof(ScreenRectVertex) * 4, nullptr, GL_STREAM_DRAW);
273 glVertexAttribPointer(attrib_position, 2, GL_FLOAT, GL_FALSE, sizeof(ScreenRectVertex), (GLvoid*)offsetof(ScreenRectVertex, position)); 283 glVertexAttribPointer(attrib_position, 2, GL_FLOAT, GL_FALSE, sizeof(ScreenRectVertex),
274 glVertexAttribPointer(attrib_tex_coord, 2, GL_FLOAT, GL_FALSE, sizeof(ScreenRectVertex), (GLvoid*)offsetof(ScreenRectVertex, tex_coord)); 284 (GLvoid*)offsetof(ScreenRectVertex, position));
285 glVertexAttribPointer(attrib_tex_coord, 2, GL_FLOAT, GL_FALSE, sizeof(ScreenRectVertex),
286 (GLvoid*)offsetof(ScreenRectVertex, tex_coord));
275 glEnableVertexAttribArray(attrib_position); 287 glEnableVertexAttribArray(attrib_position);
276 glEnableVertexAttribArray(attrib_tex_coord); 288 glEnableVertexAttribArray(attrib_tex_coord);
277 289
@@ -352,23 +364,25 @@ void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture,
352 364
353 glActiveTexture(GL_TEXTURE0); 365 glActiveTexture(GL_TEXTURE0);
354 glTexImage2D(GL_TEXTURE_2D, 0, internal_format, texture.width, texture.height, 0, 366 glTexImage2D(GL_TEXTURE_2D, 0, internal_format, texture.width, texture.height, 0,
355 texture.gl_format, texture.gl_type, nullptr); 367 texture.gl_format, texture.gl_type, nullptr);
356 368
357 state.texture_units[0].texture_2d = 0; 369 state.texture_units[0].texture_2d = 0;
358 state.Apply(); 370 state.Apply();
359} 371}
360 372
361/** 373/**
362 * Draws a single texture to the emulator window, rotating the texture to correct for the 3DS's LCD rotation. 374 * Draws a single texture to the emulator window, rotating the texture to correct for the 3DS's LCD
375 * rotation.
363 */ 376 */
364void RendererOpenGL::DrawSingleScreenRotated(const ScreenInfo& screen_info, float x, float y, float w, float h) { 377void RendererOpenGL::DrawSingleScreenRotated(const ScreenInfo& screen_info, float x, float y,
378 float w, float h) {
365 auto& texcoords = screen_info.display_texcoords; 379 auto& texcoords = screen_info.display_texcoords;
366 380
367 std::array<ScreenRectVertex, 4> vertices = {{ 381 std::array<ScreenRectVertex, 4> vertices = {{
368 ScreenRectVertex(x, y, texcoords.bottom, texcoords.left), 382 ScreenRectVertex(x, y, texcoords.bottom, texcoords.left),
369 ScreenRectVertex(x+w, y, texcoords.bottom, texcoords.right), 383 ScreenRectVertex(x + w, y, texcoords.bottom, texcoords.right),
370 ScreenRectVertex(x, y+h, texcoords.top, texcoords.left), 384 ScreenRectVertex(x, y + h, texcoords.top, texcoords.left),
371 ScreenRectVertex(x+w, y+h, texcoords.top, texcoords.right), 385 ScreenRectVertex(x + w, y + h, texcoords.top, texcoords.right),
372 }}; 386 }};
373 387
374 state.texture_units[0].texture_2d = screen_info.display_texture; 388 state.texture_units[0].texture_2d = screen_info.display_texture;
@@ -391,18 +405,20 @@ void RendererOpenGL::DrawScreens() {
391 glClear(GL_COLOR_BUFFER_BIT); 405 glClear(GL_COLOR_BUFFER_BIT);
392 406
393 // Set projection matrix 407 // Set projection matrix
394 std::array<GLfloat, 3 * 2> ortho_matrix = MakeOrthographicMatrix((float)layout.width, 408 std::array<GLfloat, 3 * 2> ortho_matrix =
395 (float)layout.height); 409 MakeOrthographicMatrix((float)layout.width, (float)layout.height);
396 glUniformMatrix3x2fv(uniform_modelview_matrix, 1, GL_FALSE, ortho_matrix.data()); 410 glUniformMatrix3x2fv(uniform_modelview_matrix, 1, GL_FALSE, ortho_matrix.data());
397 411
398 // Bind texture in Texture Unit 0 412 // Bind texture in Texture Unit 0
399 glActiveTexture(GL_TEXTURE0); 413 glActiveTexture(GL_TEXTURE0);
400 glUniform1i(uniform_color_texture, 0); 414 glUniform1i(uniform_color_texture, 0);
401 415
402 DrawSingleScreenRotated(screen_infos[0], (float)layout.top_screen.left, (float)layout.top_screen.top, 416 DrawSingleScreenRotated(screen_infos[0], (float)layout.top_screen.left,
403 (float)layout.top_screen.GetWidth(), (float)layout.top_screen.GetHeight()); 417 (float)layout.top_screen.top, (float)layout.top_screen.GetWidth(),
404 DrawSingleScreenRotated(screen_infos[1], (float)layout.bottom_screen.left,(float)layout.bottom_screen.top, 418 (float)layout.top_screen.GetHeight());
405 (float)layout.bottom_screen.GetWidth(), (float)layout.bottom_screen.GetHeight()); 419 DrawSingleScreenRotated(screen_infos[1], (float)layout.bottom_screen.left,
420 (float)layout.bottom_screen.top, (float)layout.bottom_screen.GetWidth(),
421 (float)layout.bottom_screen.GetHeight());
406 422
407 m_current_frame++; 423 m_current_frame++;
408} 424}
@@ -420,14 +436,16 @@ void RendererOpenGL::SetWindow(EmuWindow* window) {
420} 436}
421 437
422static const char* GetSource(GLenum source) { 438static const char* GetSource(GLenum source) {
423#define RET(s) case GL_DEBUG_SOURCE_##s: return #s 439#define RET(s) \
440 case GL_DEBUG_SOURCE_##s: \
441 return #s
424 switch (source) { 442 switch (source) {
425 RET(API); 443 RET(API);
426 RET(WINDOW_SYSTEM); 444 RET(WINDOW_SYSTEM);
427 RET(SHADER_COMPILER); 445 RET(SHADER_COMPILER);
428 RET(THIRD_PARTY); 446 RET(THIRD_PARTY);
429 RET(APPLICATION); 447 RET(APPLICATION);
430 RET(OTHER); 448 RET(OTHER);
431 default: 449 default:
432 UNREACHABLE(); 450 UNREACHABLE();
433 } 451 }
@@ -435,23 +453,25 @@ static const char* GetSource(GLenum source) {
435} 453}
436 454
437static const char* GetType(GLenum type) { 455static const char* GetType(GLenum type) {
438#define RET(t) case GL_DEBUG_TYPE_##t: return #t 456#define RET(t) \
457 case GL_DEBUG_TYPE_##t: \
458 return #t
439 switch (type) { 459 switch (type) {
440 RET(ERROR); 460 RET(ERROR);
441 RET(DEPRECATED_BEHAVIOR); 461 RET(DEPRECATED_BEHAVIOR);
442 RET(UNDEFINED_BEHAVIOR); 462 RET(UNDEFINED_BEHAVIOR);
443 RET(PORTABILITY); 463 RET(PORTABILITY);
444 RET(PERFORMANCE); 464 RET(PERFORMANCE);
445 RET(OTHER); 465 RET(OTHER);
446 RET(MARKER); 466 RET(MARKER);
447 default: 467 default:
448 UNREACHABLE(); 468 UNREACHABLE();
449 } 469 }
450#undef RET 470#undef RET
451} 471}
452 472
453static void APIENTRY DebugHandler(GLenum source, GLenum type, GLuint id, GLenum severity, GLsizei length, 473static void APIENTRY DebugHandler(GLenum source, GLenum type, GLuint id, GLenum severity,
454 const GLchar* message, const void* user_param) { 474 GLsizei length, const GLchar* message, const void* user_param) {
455 Log::Level level; 475 Log::Level level;
456 switch (severity) { 476 switch (severity) {
457 case GL_DEBUG_SEVERITY_HIGH: 477 case GL_DEBUG_SEVERITY_HIGH:
@@ -465,8 +485,8 @@ static void APIENTRY DebugHandler(GLenum source, GLenum type, GLuint id, GLenum
465 level = Log::Level::Debug; 485 level = Log::Level::Debug;
466 break; 486 break;
467 } 487 }
468 LOG_GENERIC(Log::Class::Render_OpenGL, level, "%s %s %d: %s", 488 LOG_GENERIC(Log::Class::Render_OpenGL, level, "%s %s %d: %s", GetSource(source), GetType(type),
469 GetSource(source), GetType(type), id, message); 489 id, message);
470} 490}
471 491
472/// Initialize the renderer 492/// Initialize the renderer
diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h
index 00e1044ab..faeb519ec 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.h
+++ b/src/video_core/renderer_opengl/renderer_opengl.h
@@ -38,7 +38,6 @@ struct ScreenInfo {
38 38
39class RendererOpenGL : public RendererBase { 39class RendererOpenGL : public RendererBase {
40public: 40public:
41
42 RendererOpenGL(); 41 RendererOpenGL();
43 ~RendererOpenGL() override; 42 ~RendererOpenGL() override;
44 43
@@ -67,15 +66,14 @@ private:
67 66
68 // Loads framebuffer from emulated memory into the display information structure 67 // Loads framebuffer from emulated memory into the display information structure
69 void LoadFBToScreenInfo(const GPU::Regs::FramebufferConfig& framebuffer, 68 void LoadFBToScreenInfo(const GPU::Regs::FramebufferConfig& framebuffer,
70 ScreenInfo& screen_info); 69 ScreenInfo& screen_info);
71 // Fills active OpenGL texture with the given RGB color. 70 // Fills active OpenGL texture with the given RGB color.
72 void LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b, 71 void LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b, const TextureInfo& texture);
73 const TextureInfo& texture);
74 72
75 EmuWindow* render_window; ///< Handle to render window 73 EmuWindow* render_window; ///< Handle to render window
76 74
77 int resolution_width; ///< Current resolution width 75 int resolution_width; ///< Current resolution width
78 int resolution_height; ///< Current resolution height 76 int resolution_height; ///< Current resolution height
79 77
80 OpenGLState state; 78 OpenGLState state;
81 79
@@ -83,7 +81,8 @@ private:
83 OGLVertexArray vertex_array; 81 OGLVertexArray vertex_array;
84 OGLBuffer vertex_buffer; 82 OGLBuffer vertex_buffer;
85 OGLShader shader; 83 OGLShader shader;
86 std::array<ScreenInfo, 2> screen_infos; ///< Display information for top and bottom screens respectively 84 std::array<ScreenInfo, 2>
85 screen_infos; ///< Display information for top and bottom screens respectively
87 // Shader uniform location indices 86 // Shader uniform location indices
88 GLuint uniform_modelview_matrix; 87 GLuint uniform_modelview_matrix;
89 GLuint uniform_color_texture; 88 GLuint uniform_color_texture;
diff --git a/src/video_core/shader/shader.cpp b/src/video_core/shader/shader.cpp
index f565e2c91..852c5a9a0 100644
--- a/src/video_core/shader/shader.cpp
+++ b/src/video_core/shader/shader.cpp
@@ -46,10 +46,8 @@ OutputVertex OutputRegisters::ToVertex(const Regs::ShaderConfig& config) {
46 46
47 const auto& output_register_map = g_state.regs.vs_output_attributes[index]; 47 const auto& output_register_map = g_state.regs.vs_output_attributes[index];
48 48
49 u32 semantics[4] = { 49 u32 semantics[4] = {output_register_map.map_x, output_register_map.map_y,
50 output_register_map.map_x, output_register_map.map_y, 50 output_register_map.map_z, output_register_map.map_w};
51 output_register_map.map_z, output_register_map.map_w
52 };
53 51
54 for (unsigned comp = 0; comp < 4; ++comp) { 52 for (unsigned comp = 0; comp < 4; ++comp) {
55 float24* out = ((float24*)&ret) + semantics[comp]; 53 float24* out = ((float24*)&ret) + semantics[comp];
@@ -65,19 +63,20 @@ OutputVertex OutputRegisters::ToVertex(const Regs::ShaderConfig& config) {
65 index++; 63 index++;
66 } 64 }
67 65
68 // The hardware takes the absolute and saturates vertex colors like this, *before* doing interpolation 66 // The hardware takes the absolute and saturates vertex colors like this, *before* doing
67 // interpolation
69 for (unsigned i = 0; i < 4; ++i) { 68 for (unsigned i = 0; i < 4; ++i) {
70 ret.color[i] = float24::FromFloat32( 69 ret.color[i] = float24::FromFloat32(std::fmin(std::fabs(ret.color[i].ToFloat32()), 1.0f));
71 std::fmin(std::fabs(ret.color[i].ToFloat32()), 1.0f));
72 } 70 }
73 71
74 LOG_TRACE(HW_GPU, "Output vertex: pos(%.2f, %.2f, %.2f, %.2f), quat(%.2f, %.2f, %.2f, %.2f), " 72 LOG_TRACE(HW_GPU, "Output vertex: pos(%.2f, %.2f, %.2f, %.2f), quat(%.2f, %.2f, %.2f, %.2f), "
75 "col(%.2f, %.2f, %.2f, %.2f), tc0(%.2f, %.2f), view(%.2f, %.2f, %.2f)", 73 "col(%.2f, %.2f, %.2f, %.2f), tc0(%.2f, %.2f), view(%.2f, %.2f, %.2f)",
76 ret.pos.x.ToFloat32(), ret.pos.y.ToFloat32(), ret.pos.z.ToFloat32(), ret.pos.w.ToFloat32(), 74 ret.pos.x.ToFloat32(), ret.pos.y.ToFloat32(), ret.pos.z.ToFloat32(),
77 ret.quat.x.ToFloat32(), ret.quat.y.ToFloat32(), ret.quat.z.ToFloat32(), ret.quat.w.ToFloat32(), 75 ret.pos.w.ToFloat32(), ret.quat.x.ToFloat32(), ret.quat.y.ToFloat32(),
78 ret.color.x.ToFloat32(), ret.color.y.ToFloat32(), ret.color.z.ToFloat32(), ret.color.w.ToFloat32(), 76 ret.quat.z.ToFloat32(), ret.quat.w.ToFloat32(), ret.color.x.ToFloat32(),
79 ret.tc0.u().ToFloat32(), ret.tc0.v().ToFloat32(), 77 ret.color.y.ToFloat32(), ret.color.z.ToFloat32(), ret.color.w.ToFloat32(),
80 ret.view.x.ToFloat32(), ret.view.y.ToFloat32(), ret.view.z.ToFloat32()); 78 ret.tc0.u().ToFloat32(), ret.tc0.v().ToFloat32(), ret.view.x.ToFloat32(),
79 ret.view.y.ToFloat32(), ret.view.z.ToFloat32());
81 80
82 return ret; 81 return ret;
83} 82}
@@ -96,8 +95,9 @@ void ClearCache() {
96void ShaderSetup::Setup() { 95void ShaderSetup::Setup() {
97#ifdef ARCHITECTURE_x86_64 96#ifdef ARCHITECTURE_x86_64
98 if (VideoCore::g_shader_jit_enabled) { 97 if (VideoCore::g_shader_jit_enabled) {
99 u64 cache_key = (Common::ComputeHash64(&g_state.vs.program_code, sizeof(g_state.vs.program_code)) ^ 98 u64 cache_key =
100 Common::ComputeHash64(&g_state.vs.swizzle_data, sizeof(g_state.vs.swizzle_data))); 99 (Common::ComputeHash64(&g_state.vs.program_code, sizeof(g_state.vs.program_code)) ^
100 Common::ComputeHash64(&g_state.vs.swizzle_data, sizeof(g_state.vs.swizzle_data)));
101 101
102 auto iter = shader_map.find(cache_key); 102 auto iter = shader_map.find(cache_key);
103 if (iter != shader_map.end()) { 103 if (iter != shader_map.end()) {
@@ -127,7 +127,7 @@ void ShaderSetup::Run(UnitState<false>& state, const InputVertex& input, int num
127 const auto& attribute_register_map = config.input_register_map; 127 const auto& attribute_register_map = config.input_register_map;
128 128
129 for (unsigned i = 0; i < num_attributes; i++) 129 for (unsigned i = 0; i < num_attributes; i++)
130 state.registers.input[attribute_register_map.GetRegisterForAttribute(i)] = input.attr[i]; 130 state.registers.input[attribute_register_map.GetRegisterForAttribute(i)] = input.attr[i];
131 131
132 state.conditional_code[0] = false; 132 state.conditional_code[0] = false;
133 state.conditional_code[1] = false; 133 state.conditional_code[1] = false;
@@ -140,10 +140,11 @@ void ShaderSetup::Run(UnitState<false>& state, const InputVertex& input, int num
140#else 140#else
141 RunInterpreter(setup, state, config.main_offset); 141 RunInterpreter(setup, state, config.main_offset);
142#endif // ARCHITECTURE_x86_64 142#endif // ARCHITECTURE_x86_64
143
144} 143}
145 144
146DebugData<true> ShaderSetup::ProduceDebugInfo(const InputVertex& input, int num_attributes, const Regs::ShaderConfig& config, const ShaderSetup& setup) { 145DebugData<true> ShaderSetup::ProduceDebugInfo(const InputVertex& input, int num_attributes,
146 const Regs::ShaderConfig& config,
147 const ShaderSetup& setup) {
147 UnitState<true> state; 148 UnitState<true> state;
148 149
149 state.debug.max_offset = 0; 150 state.debug.max_offset = 0;
@@ -155,7 +156,7 @@ DebugData<true> ShaderSetup::ProduceDebugInfo(const InputVertex& input, int num_
155 boost::fill(state.registers.input, &dummy_register); 156 boost::fill(state.registers.input, &dummy_register);
156 157
157 for (unsigned i = 0; i < num_attributes; i++) 158 for (unsigned i = 0; i < num_attributes; i++)
158 state.registers.input[attribute_register_map.GetRegisterForAttribute(i)] = input.attr[i]; 159 state.registers.input[attribute_register_map.GetRegisterForAttribute(i)] = input.attr[i];
159 160
160 state.conditional_code[0] = false; 161 state.conditional_code[0] = false;
161 state.conditional_code[1] = false; 162 state.conditional_code[1] = false;
diff --git a/src/video_core/shader/shader.h b/src/video_core/shader/shader.h
index fee16df62..830d933a8 100644
--- a/src/video_core/shader/shader.h
+++ b/src/video_core/shader/shader.h
@@ -94,46 +94,46 @@ struct OutputRegisters {
94static_assert(std::is_pod<OutputRegisters>::value, "Structure is not POD"); 94static_assert(std::is_pod<OutputRegisters>::value, "Structure is not POD");
95 95
96// Helper structure used to keep track of data useful for inspection of shader emulation 96// Helper structure used to keep track of data useful for inspection of shader emulation
97template<bool full_debugging> 97template <bool full_debugging>
98struct DebugData; 98struct DebugData;
99 99
100template<> 100template <>
101struct DebugData<false> { 101struct DebugData<false> {
102 // TODO: Hide these behind and interface and move them to DebugData<true> 102 // TODO: Hide these behind and interface and move them to DebugData<true>
103 u32 max_offset; // maximum program counter ever reached 103 u32 max_offset; // maximum program counter ever reached
104 u32 max_opdesc_id; // maximum swizzle pattern index ever used 104 u32 max_opdesc_id; // maximum swizzle pattern index ever used
105}; 105};
106 106
107template<> 107template <>
108struct DebugData<true> { 108struct DebugData<true> {
109 // Records store the input and output operands of a particular instruction. 109 // Records store the input and output operands of a particular instruction.
110 struct Record { 110 struct Record {
111 enum Type { 111 enum Type {
112 // Floating point arithmetic operands 112 // Floating point arithmetic operands
113 SRC1 = 0x1, 113 SRC1 = 0x1,
114 SRC2 = 0x2, 114 SRC2 = 0x2,
115 SRC3 = 0x4, 115 SRC3 = 0x4,
116 116
117 // Initial and final output operand value 117 // Initial and final output operand value
118 DEST_IN = 0x8, 118 DEST_IN = 0x8,
119 DEST_OUT = 0x10, 119 DEST_OUT = 0x10,
120 120
121 // Current and next instruction offset (in words) 121 // Current and next instruction offset (in words)
122 CUR_INSTR = 0x20, 122 CUR_INSTR = 0x20,
123 NEXT_INSTR = 0x40, 123 NEXT_INSTR = 0x40,
124 124
125 // Output address register value 125 // Output address register value
126 ADDR_REG_OUT = 0x80, 126 ADDR_REG_OUT = 0x80,
127 127
128 // Result of a comparison instruction 128 // Result of a comparison instruction
129 CMP_RESULT = 0x100, 129 CMP_RESULT = 0x100,
130 130
131 // Input values for conditional flow control instructions 131 // Input values for conditional flow control instructions
132 COND_BOOL_IN = 0x200, 132 COND_BOOL_IN = 0x200,
133 COND_CMP_IN = 0x400, 133 COND_CMP_IN = 0x400,
134 134
135 // Input values for a loop 135 // Input values for a loop
136 LOOP_INT_IN = 0x800, 136 LOOP_INT_IN = 0x800,
137 }; 137 };
138 138
139 Math::Vec4<float24> src1; 139 Math::Vec4<float24> src1;
@@ -156,7 +156,7 @@ struct DebugData<true> {
156 unsigned mask = 0; 156 unsigned mask = 0;
157 }; 157 };
158 158
159 u32 max_offset; // maximum program counter ever reached 159 u32 max_offset; // maximum program counter ever reached
160 u32 max_opdesc_id; // maximum swizzle pattern index ever used 160 u32 max_opdesc_id; // maximum swizzle pattern index ever used
161 161
162 // List of records for each executed shader instruction 162 // List of records for each executed shader instruction
@@ -167,10 +167,10 @@ struct DebugData<true> {
167using DebugDataRecord = DebugData<true>::Record; 167using DebugDataRecord = DebugData<true>::Record;
168 168
169// Helper function to set a DebugData<true>::Record field based on the template enum parameter. 169// Helper function to set a DebugData<true>::Record field based on the template enum parameter.
170template<DebugDataRecord::Type type, typename ValueType> 170template <DebugDataRecord::Type type, typename ValueType>
171inline void SetField(DebugDataRecord& record, ValueType value); 171inline void SetField(DebugDataRecord& record, ValueType value);
172 172
173template<> 173template <>
174inline void SetField<DebugDataRecord::SRC1>(DebugDataRecord& record, float24* value) { 174inline void SetField<DebugDataRecord::SRC1>(DebugDataRecord& record, float24* value) {
175 record.src1.x = value[0]; 175 record.src1.x = value[0];
176 record.src1.y = value[1]; 176 record.src1.y = value[1];
@@ -178,7 +178,7 @@ inline void SetField<DebugDataRecord::SRC1>(DebugDataRecord& record, float24* va
178 record.src1.w = value[3]; 178 record.src1.w = value[3];
179} 179}
180 180
181template<> 181template <>
182inline void SetField<DebugDataRecord::SRC2>(DebugDataRecord& record, float24* value) { 182inline void SetField<DebugDataRecord::SRC2>(DebugDataRecord& record, float24* value) {
183 record.src2.x = value[0]; 183 record.src2.x = value[0];
184 record.src2.y = value[1]; 184 record.src2.y = value[1];
@@ -186,7 +186,7 @@ inline void SetField<DebugDataRecord::SRC2>(DebugDataRecord& record, float24* va
186 record.src2.w = value[3]; 186 record.src2.w = value[3];
187} 187}
188 188
189template<> 189template <>
190inline void SetField<DebugDataRecord::SRC3>(DebugDataRecord& record, float24* value) { 190inline void SetField<DebugDataRecord::SRC3>(DebugDataRecord& record, float24* value) {
191 record.src3.x = value[0]; 191 record.src3.x = value[0];
192 record.src3.y = value[1]; 192 record.src3.y = value[1];
@@ -194,7 +194,7 @@ inline void SetField<DebugDataRecord::SRC3>(DebugDataRecord& record, float24* va
194 record.src3.w = value[3]; 194 record.src3.w = value[3];
195} 195}
196 196
197template<> 197template <>
198inline void SetField<DebugDataRecord::DEST_IN>(DebugDataRecord& record, float24* value) { 198inline void SetField<DebugDataRecord::DEST_IN>(DebugDataRecord& record, float24* value) {
199 record.dest_in.x = value[0]; 199 record.dest_in.x = value[0];
200 record.dest_in.y = value[1]; 200 record.dest_in.y = value[1];
@@ -202,7 +202,7 @@ inline void SetField<DebugDataRecord::DEST_IN>(DebugDataRecord& record, float24*
202 record.dest_in.w = value[3]; 202 record.dest_in.w = value[3];
203} 203}
204 204
205template<> 205template <>
206inline void SetField<DebugDataRecord::DEST_OUT>(DebugDataRecord& record, float24* value) { 206inline void SetField<DebugDataRecord::DEST_OUT>(DebugDataRecord& record, float24* value) {
207 record.dest_out.x = value[0]; 207 record.dest_out.x = value[0];
208 record.dest_out.y = value[1]; 208 record.dest_out.y = value[1];
@@ -210,67 +210,66 @@ inline void SetField<DebugDataRecord::DEST_OUT>(DebugDataRecord& record, float24
210 record.dest_out.w = value[3]; 210 record.dest_out.w = value[3];
211} 211}
212 212
213template<> 213template <>
214inline void SetField<DebugDataRecord::ADDR_REG_OUT>(DebugDataRecord& record, s32* value) { 214inline void SetField<DebugDataRecord::ADDR_REG_OUT>(DebugDataRecord& record, s32* value) {
215 record.address_registers[0] = value[0]; 215 record.address_registers[0] = value[0];
216 record.address_registers[1] = value[1]; 216 record.address_registers[1] = value[1];
217} 217}
218 218
219template<> 219template <>
220inline void SetField<DebugDataRecord::CMP_RESULT>(DebugDataRecord& record, bool* value) { 220inline void SetField<DebugDataRecord::CMP_RESULT>(DebugDataRecord& record, bool* value) {
221 record.conditional_code[0] = value[0]; 221 record.conditional_code[0] = value[0];
222 record.conditional_code[1] = value[1]; 222 record.conditional_code[1] = value[1];
223} 223}
224 224
225template<> 225template <>
226inline void SetField<DebugDataRecord::COND_BOOL_IN>(DebugDataRecord& record, bool value) { 226inline void SetField<DebugDataRecord::COND_BOOL_IN>(DebugDataRecord& record, bool value) {
227 record.cond_bool = value; 227 record.cond_bool = value;
228} 228}
229 229
230template<> 230template <>
231inline void SetField<DebugDataRecord::COND_CMP_IN>(DebugDataRecord& record, bool* value) { 231inline void SetField<DebugDataRecord::COND_CMP_IN>(DebugDataRecord& record, bool* value) {
232 record.cond_cmp[0] = value[0]; 232 record.cond_cmp[0] = value[0];
233 record.cond_cmp[1] = value[1]; 233 record.cond_cmp[1] = value[1];
234} 234}
235 235
236template<> 236template <>
237inline void SetField<DebugDataRecord::LOOP_INT_IN>(DebugDataRecord& record, Math::Vec4<u8> value) { 237inline void SetField<DebugDataRecord::LOOP_INT_IN>(DebugDataRecord& record, Math::Vec4<u8> value) {
238 record.loop_int = value; 238 record.loop_int = value;
239} 239}
240 240
241template<> 241template <>
242inline void SetField<DebugDataRecord::CUR_INSTR>(DebugDataRecord& record, u32 value) { 242inline void SetField<DebugDataRecord::CUR_INSTR>(DebugDataRecord& record, u32 value) {
243 record.instruction_offset = value; 243 record.instruction_offset = value;
244} 244}
245 245
246template<> 246template <>
247inline void SetField<DebugDataRecord::NEXT_INSTR>(DebugDataRecord& record, u32 value) { 247inline void SetField<DebugDataRecord::NEXT_INSTR>(DebugDataRecord& record, u32 value) {
248 record.next_instruction = value; 248 record.next_instruction = value;
249} 249}
250 250
251// Helper function to set debug information on the current shader iteration. 251// Helper function to set debug information on the current shader iteration.
252template<DebugDataRecord::Type type, typename ValueType> 252template <DebugDataRecord::Type type, typename ValueType>
253inline void Record(DebugData<false>& debug_data, u32 offset, ValueType value) { 253inline void Record(DebugData<false>& debug_data, u32 offset, ValueType value) {
254 // Debugging disabled => nothing to do 254 // Debugging disabled => nothing to do
255} 255}
256 256
257template<DebugDataRecord::Type type, typename ValueType> 257template <DebugDataRecord::Type type, typename ValueType>
258inline void Record(DebugData<true>& debug_data, u32 offset, ValueType value) { 258inline void Record(DebugData<true>& debug_data, u32 offset, ValueType value) {
259 if (offset >= debug_data.records.size()) 259 if (offset >= debug_data.records.size())
260 debug_data.records.resize(offset + 1); 260 debug_data.records.resize(offset + 1);
261 261
262 SetField<type, ValueType>(debug_data.records[offset], value); 262 SetField<type, ValueType>(debug_data.records[offset], value);
263 debug_data.records[offset].mask |= type; 263 debug_data.records[offset].mask |= type;
264} 264}
265 265
266
267/** 266/**
268 * This structure contains the state information that needs to be unique for a shader unit. The 3DS 267 * This structure contains the state information that needs to be unique for a shader unit. The 3DS
269 * has four shader units that process shaders in parallel. At the present, Citra only implements a 268 * has four shader units that process shaders in parallel. At the present, Citra only implements a
270 * single shader unit that processes all shaders serially. Putting the state information in a struct 269 * single shader unit that processes all shaders serially. Putting the state information in a struct
271 * here will make it easier for us to parallelize the shader processing later. 270 * here will make it easier for us to parallelize the shader processing later.
272 */ 271 */
273template<bool Debug> 272template <bool Debug>
274struct UnitState { 273struct UnitState {
275 struct Registers { 274 struct Registers {
276 // The registers are accessed by the shader JIT using SSE instructions, and are therefore 275 // The registers are accessed by the shader JIT using SSE instructions, and are therefore
@@ -293,10 +292,12 @@ struct UnitState {
293 static size_t InputOffset(const SourceRegister& reg) { 292 static size_t InputOffset(const SourceRegister& reg) {
294 switch (reg.GetRegisterType()) { 293 switch (reg.GetRegisterType()) {
295 case RegisterType::Input: 294 case RegisterType::Input:
296 return offsetof(UnitState, registers.input) + reg.GetIndex()*sizeof(Math::Vec4<float24>); 295 return offsetof(UnitState, registers.input) +
296 reg.GetIndex() * sizeof(Math::Vec4<float24>);
297 297
298 case RegisterType::Temporary: 298 case RegisterType::Temporary:
299 return offsetof(UnitState, registers.temporary) + reg.GetIndex()*sizeof(Math::Vec4<float24>); 299 return offsetof(UnitState, registers.temporary) +
300 reg.GetIndex() * sizeof(Math::Vec4<float24>);
300 301
301 default: 302 default:
302 UNREACHABLE(); 303 UNREACHABLE();
@@ -307,10 +308,12 @@ struct UnitState {
307 static size_t OutputOffset(const DestRegister& reg) { 308 static size_t OutputOffset(const DestRegister& reg) {
308 switch (reg.GetRegisterType()) { 309 switch (reg.GetRegisterType()) {
309 case RegisterType::Output: 310 case RegisterType::Output:
310 return offsetof(UnitState, output_registers.value) + reg.GetIndex()*sizeof(Math::Vec4<float24>); 311 return offsetof(UnitState, output_registers.value) +
312 reg.GetIndex() * sizeof(Math::Vec4<float24>);
311 313
312 case RegisterType::Temporary: 314 case RegisterType::Temporary:
313 return offsetof(UnitState, registers.temporary) + reg.GetIndex()*sizeof(Math::Vec4<float24>); 315 return offsetof(UnitState, registers.temporary) +
316 reg.GetIndex() * sizeof(Math::Vec4<float24>);
314 317
315 default: 318 default:
316 UNREACHABLE(); 319 UNREACHABLE();
@@ -336,13 +339,13 @@ struct ShaderSetup {
336 static size_t UniformOffset(RegisterType type, unsigned index) { 339 static size_t UniformOffset(RegisterType type, unsigned index) {
337 switch (type) { 340 switch (type) {
338 case RegisterType::FloatUniform: 341 case RegisterType::FloatUniform:
339 return offsetof(ShaderSetup, uniforms.f) + index*sizeof(Math::Vec4<float24>); 342 return offsetof(ShaderSetup, uniforms.f) + index * sizeof(Math::Vec4<float24>);
340 343
341 case RegisterType::BoolUniform: 344 case RegisterType::BoolUniform:
342 return offsetof(ShaderSetup, uniforms.b) + index*sizeof(bool); 345 return offsetof(ShaderSetup, uniforms.b) + index * sizeof(bool);
343 346
344 case RegisterType::IntUniform: 347 case RegisterType::IntUniform:
345 return offsetof(ShaderSetup, uniforms.i) + index*sizeof(Math::Vec4<u8>); 348 return offsetof(ShaderSetup, uniforms.i) + index * sizeof(Math::Vec4<u8>);
346 349
347 default: 350 default:
348 UNREACHABLE(); 351 UNREACHABLE();
@@ -354,7 +357,8 @@ struct ShaderSetup {
354 std::array<u32, 1024> swizzle_data; 357 std::array<u32, 1024> swizzle_data;
355 358
356 /** 359 /**
357 * Performs any shader unit setup that only needs to happen once per shader (as opposed to once per 360 * Performs any shader unit setup that only needs to happen once per shader (as opposed to once
361 * per
358 * vertex, which would happen within the `Run` function). 362 * vertex, which would happen within the `Run` function).
359 */ 363 */
360 void Setup(); 364 void Setup();
@@ -375,8 +379,8 @@ struct ShaderSetup {
375 * @param setup Setup object for the shader pipeline 379 * @param setup Setup object for the shader pipeline
376 * @return Debug information for this shader with regards to the given vertex 380 * @return Debug information for this shader with regards to the given vertex
377 */ 381 */
378 DebugData<true> ProduceDebugInfo(const InputVertex& input, int num_attributes, const Regs::ShaderConfig& config, const ShaderSetup& setup); 382 DebugData<true> ProduceDebugInfo(const InputVertex& input, int num_attributes,
379 383 const Regs::ShaderConfig& config, const ShaderSetup& setup);
380}; 384};
381 385
382} // namespace Shader 386} // namespace Shader
diff --git a/src/video_core/shader/shader_interpreter.cpp b/src/video_core/shader/shader_interpreter.cpp
index f6c86a759..681ff9728 100644
--- a/src/video_core/shader/shader_interpreter.cpp
+++ b/src/video_core/shader/shader_interpreter.cpp
@@ -40,7 +40,7 @@ struct CallStackElement {
40 u32 loop_address; // The address where we'll return to after each loop iteration 40 u32 loop_address; // The address where we'll return to after each loop iteration
41}; 41};
42 42
43template<bool Debug> 43template <bool Debug>
44void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned offset) { 44void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned offset) {
45 // TODO: Is there a maximal size for this? 45 // TODO: Is there a maximal size for this?
46 boost::container::static_vector<CallStackElement, 16> call_stack; 46 boost::container::static_vector<CallStackElement, 16> call_stack;
@@ -74,14 +74,18 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
74 } 74 }
75 } 75 }
76 76
77 const Instruction instr = { program_code[program_counter] }; 77 const Instruction instr = {program_code[program_counter]};
78 const SwizzlePattern swizzle = { swizzle_data[instr.common.operand_desc_id] }; 78 const SwizzlePattern swizzle = {swizzle_data[instr.common.operand_desc_id]};
79 79
80 auto call = [&program_counter, &call_stack](UnitState<Debug>& state, u32 offset, u32 num_instructions, 80 auto call = [&program_counter, &call_stack](UnitState<Debug>& state, u32 offset,
81 u32 return_offset, u8 repeat_count, u8 loop_increment) { 81 u32 num_instructions, u32 return_offset,
82 program_counter = offset - 1; // -1 to make sure when incrementing the PC we end up at the correct offset 82 u8 repeat_count, u8 loop_increment) {
83 program_counter =
84 offset -
85 1; // -1 to make sure when incrementing the PC we end up at the correct offset
83 ASSERT(call_stack.size() < call_stack.capacity()); 86 ASSERT(call_stack.size() < call_stack.capacity());
84 call_stack.push_back({ offset + num_instructions, return_offset, repeat_count, loop_increment, offset }); 87 call_stack.push_back(
88 {offset + num_instructions, return_offset, repeat_count, loop_increment, offset});
85 }; 89 };
86 Record<DebugDataRecord::CUR_INSTR>(state.debug, iteration, program_counter); 90 Record<DebugDataRecord::CUR_INSTR>(state.debug, iteration, program_counter);
87 if (iteration > 0) 91 if (iteration > 0)
@@ -106,24 +110,26 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
106 }; 110 };
107 111
108 switch (instr.opcode.Value().GetInfo().type) { 112 switch (instr.opcode.Value().GetInfo().type) {
109 case OpCode::Type::Arithmetic: 113 case OpCode::Type::Arithmetic: {
110 { 114 const bool is_inverted =
111 const bool is_inverted = (0 != (instr.opcode.Value().GetInfo().subtype & OpCode::Info::SrcInversed)); 115 (0 != (instr.opcode.Value().GetInfo().subtype & OpCode::Info::SrcInversed));
112 116
113 const int address_offset = (instr.common.address_register_index == 0) 117 const int address_offset =
114 ? 0 : state.address_registers[instr.common.address_register_index - 1]; 118 (instr.common.address_register_index == 0)
119 ? 0
120 : state.address_registers[instr.common.address_register_index - 1];
115 121
116 const float24* src1_ = LookupSourceRegister(instr.common.GetSrc1(is_inverted) + (!is_inverted * address_offset)); 122 const float24* src1_ = LookupSourceRegister(instr.common.GetSrc1(is_inverted) +
117 const float24* src2_ = LookupSourceRegister(instr.common.GetSrc2(is_inverted) + ( is_inverted * address_offset)); 123 (!is_inverted * address_offset));
124 const float24* src2_ = LookupSourceRegister(instr.common.GetSrc2(is_inverted) +
125 (is_inverted * address_offset));
118 126
119 const bool negate_src1 = ((bool)swizzle.negate_src1 != false); 127 const bool negate_src1 = ((bool)swizzle.negate_src1 != false);
120 const bool negate_src2 = ((bool)swizzle.negate_src2 != false); 128 const bool negate_src2 = ((bool)swizzle.negate_src2 != false);
121 129
122 float24 src1[4] = { 130 float24 src1[4] = {
123 src1_[(int)swizzle.GetSelectorSrc1(0)], 131 src1_[(int)swizzle.GetSelectorSrc1(0)], src1_[(int)swizzle.GetSelectorSrc1(1)],
124 src1_[(int)swizzle.GetSelectorSrc1(1)], 132 src1_[(int)swizzle.GetSelectorSrc1(2)], src1_[(int)swizzle.GetSelectorSrc1(3)],
125 src1_[(int)swizzle.GetSelectorSrc1(2)],
126 src1_[(int)swizzle.GetSelectorSrc1(3)],
127 }; 133 };
128 if (negate_src1) { 134 if (negate_src1) {
129 src1[0] = src1[0] * float24::FromFloat32(-1); 135 src1[0] = src1[0] * float24::FromFloat32(-1);
@@ -132,10 +138,8 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
132 src1[3] = src1[3] * float24::FromFloat32(-1); 138 src1[3] = src1[3] * float24::FromFloat32(-1);
133 } 139 }
134 float24 src2[4] = { 140 float24 src2[4] = {
135 src2_[(int)swizzle.GetSelectorSrc2(0)], 141 src2_[(int)swizzle.GetSelectorSrc2(0)], src2_[(int)swizzle.GetSelectorSrc2(1)],
136 src2_[(int)swizzle.GetSelectorSrc2(1)], 142 src2_[(int)swizzle.GetSelectorSrc2(2)], src2_[(int)swizzle.GetSelectorSrc2(3)],
137 src2_[(int)swizzle.GetSelectorSrc2(2)],
138 src2_[(int)swizzle.GetSelectorSrc2(3)],
139 }; 143 };
140 if (negate_src2) { 144 if (negate_src2) {
141 src2[0] = src2[0] * float24::FromFloat32(-1); 145 src2[0] = src2[0] * float24::FromFloat32(-1);
@@ -144,15 +148,18 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
144 src2[3] = src2[3] * float24::FromFloat32(-1); 148 src2[3] = src2[3] * float24::FromFloat32(-1);
145 } 149 }
146 150
147 float24* dest = (instr.common.dest.Value() < 0x10) ? &state.output_registers.value[instr.common.dest.Value().GetIndex()][0] 151 float24* dest =
148 : (instr.common.dest.Value() < 0x20) ? &state.registers.temporary[instr.common.dest.Value().GetIndex()][0] 152 (instr.common.dest.Value() < 0x10)
149 : dummy_vec4_float24; 153 ? &state.output_registers.value[instr.common.dest.Value().GetIndex()][0]
154 : (instr.common.dest.Value() < 0x20)
155 ? &state.registers.temporary[instr.common.dest.Value().GetIndex()][0]
156 : dummy_vec4_float24;
150 157
151 state.debug.max_opdesc_id = std::max<u32>(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id); 158 state.debug.max_opdesc_id =
159 std::max<u32>(state.debug.max_opdesc_id, 1 + instr.common.operand_desc_id);
152 160
153 switch (instr.opcode.Value().EffectiveOpCode()) { 161 switch (instr.opcode.Value().EffectiveOpCode()) {
154 case OpCode::Id::ADD: 162 case OpCode::Id::ADD: {
155 {
156 Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); 163 Record<DebugDataRecord::SRC1>(state.debug, iteration, src1);
157 Record<DebugDataRecord::SRC2>(state.debug, iteration, src2); 164 Record<DebugDataRecord::SRC2>(state.debug, iteration, src2);
158 Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); 165 Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest);
@@ -166,8 +173,7 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
166 break; 173 break;
167 } 174 }
168 175
169 case OpCode::Id::MUL: 176 case OpCode::Id::MUL: {
170 {
171 Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); 177 Record<DebugDataRecord::SRC1>(state.debug, iteration, src1);
172 Record<DebugDataRecord::SRC2>(state.debug, iteration, src2); 178 Record<DebugDataRecord::SRC2>(state.debug, iteration, src2);
173 Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); 179 Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest);
@@ -228,8 +234,7 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
228 case OpCode::Id::DP3: 234 case OpCode::Id::DP3:
229 case OpCode::Id::DP4: 235 case OpCode::Id::DP4:
230 case OpCode::Id::DPH: 236 case OpCode::Id::DPH:
231 case OpCode::Id::DPHI: 237 case OpCode::Id::DPHI: {
232 {
233 Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); 238 Record<DebugDataRecord::SRC1>(state.debug, iteration, src1);
234 Record<DebugDataRecord::SRC2>(state.debug, iteration, src2); 239 Record<DebugDataRecord::SRC2>(state.debug, iteration, src2);
235 Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); 240 Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest);
@@ -239,7 +244,8 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
239 src1[3] = float24::FromFloat32(1.0f); 244 src1[3] = float24::FromFloat32(1.0f);
240 245
241 int num_components = (opcode == OpCode::Id::DP3) ? 3 : 4; 246 int num_components = (opcode == OpCode::Id::DP3) ? 3 : 4;
242 float24 dot = std::inner_product(src1, src1 + num_components, src2, float24::FromFloat32(0.f)); 247 float24 dot = std::inner_product(src1, src1 + num_components, src2,
248 float24::FromFloat32(0.f));
243 249
244 for (int i = 0; i < 4; ++i) { 250 for (int i = 0; i < 4; ++i) {
245 if (!swizzle.DestComponentEnabled(i)) 251 if (!swizzle.DestComponentEnabled(i))
@@ -252,8 +258,7 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
252 } 258 }
253 259
254 // Reciprocal 260 // Reciprocal
255 case OpCode::Id::RCP: 261 case OpCode::Id::RCP: {
256 {
257 Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); 262 Record<DebugDataRecord::SRC1>(state.debug, iteration, src1);
258 Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); 263 Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest);
259 float24 rcp_res = float24::FromFloat32(1.0f / src1[0].ToFloat32()); 264 float24 rcp_res = float24::FromFloat32(1.0f / src1[0].ToFloat32());
@@ -268,8 +273,7 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
268 } 273 }
269 274
270 // Reciprocal Square Root 275 // Reciprocal Square Root
271 case OpCode::Id::RSQ: 276 case OpCode::Id::RSQ: {
272 {
273 Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); 277 Record<DebugDataRecord::SRC1>(state.debug, iteration, src1);
274 Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); 278 Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest);
275 float24 rsq_res = float24::FromFloat32(1.0f / std::sqrt(src1[0].ToFloat32())); 279 float24 rsq_res = float24::FromFloat32(1.0f / std::sqrt(src1[0].ToFloat32()));
@@ -283,8 +287,7 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
283 break; 287 break;
284 } 288 }
285 289
286 case OpCode::Id::MOVA: 290 case OpCode::Id::MOVA: {
287 {
288 Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); 291 Record<DebugDataRecord::SRC1>(state.debug, iteration, src1);
289 for (int i = 0; i < 2; ++i) { 292 for (int i = 0; i < 2; ++i) {
290 if (!swizzle.DestComponentEnabled(i)) 293 if (!swizzle.DestComponentEnabled(i))
@@ -293,12 +296,12 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
293 // TODO: Figure out how the rounding is done on hardware 296 // TODO: Figure out how the rounding is done on hardware
294 state.address_registers[i] = static_cast<s32>(src1[i].ToFloat32()); 297 state.address_registers[i] = static_cast<s32>(src1[i].ToFloat32());
295 } 298 }
296 Record<DebugDataRecord::ADDR_REG_OUT>(state.debug, iteration, state.address_registers); 299 Record<DebugDataRecord::ADDR_REG_OUT>(state.debug, iteration,
300 state.address_registers);
297 break; 301 break;
298 } 302 }
299 303
300 case OpCode::Id::MOV: 304 case OpCode::Id::MOV: {
301 {
302 Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); 305 Record<DebugDataRecord::SRC1>(state.debug, iteration, src1);
303 Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); 306 Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest);
304 for (int i = 0; i < 4; ++i) { 307 for (int i = 0; i < 4; ++i) {
@@ -320,7 +323,8 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
320 if (!swizzle.DestComponentEnabled(i)) 323 if (!swizzle.DestComponentEnabled(i))
321 continue; 324 continue;
322 325
323 dest[i] = (src1[i] >= src2[i]) ? float24::FromFloat32(1.0f) : float24::FromFloat32(0.0f); 326 dest[i] = (src1[i] >= src2[i]) ? float24::FromFloat32(1.0f)
327 : float24::FromFloat32(0.0f);
324 } 328 }
325 Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest); 329 Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest);
326 break; 330 break;
@@ -334,7 +338,8 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
334 if (!swizzle.DestComponentEnabled(i)) 338 if (!swizzle.DestComponentEnabled(i))
335 continue; 339 continue;
336 340
337 dest[i] = (src1[i] < src2[i]) ? float24::FromFloat32(1.0f) : float24::FromFloat32(0.0f); 341 dest[i] = (src1[i] < src2[i]) ? float24::FromFloat32(1.0f)
342 : float24::FromFloat32(0.0f);
338 } 343 }
339 Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest); 344 Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest);
340 break; 345 break;
@@ -349,40 +354,39 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
349 auto op = (i == 0) ? compare_op.x.Value() : compare_op.y.Value(); 354 auto op = (i == 0) ? compare_op.x.Value() : compare_op.y.Value();
350 355
351 switch (op) { 356 switch (op) {
352 case Instruction::Common::CompareOpType::Equal: 357 case Instruction::Common::CompareOpType::Equal:
353 state.conditional_code[i] = (src1[i] == src2[i]); 358 state.conditional_code[i] = (src1[i] == src2[i]);
354 break; 359 break;
355 360
356 case Instruction::Common::CompareOpType::NotEqual: 361 case Instruction::Common::CompareOpType::NotEqual:
357 state.conditional_code[i] = (src1[i] != src2[i]); 362 state.conditional_code[i] = (src1[i] != src2[i]);
358 break; 363 break;
359 364
360 case Instruction::Common::CompareOpType::LessThan: 365 case Instruction::Common::CompareOpType::LessThan:
361 state.conditional_code[i] = (src1[i] < src2[i]); 366 state.conditional_code[i] = (src1[i] < src2[i]);
362 break; 367 break;
363 368
364 case Instruction::Common::CompareOpType::LessEqual: 369 case Instruction::Common::CompareOpType::LessEqual:
365 state.conditional_code[i] = (src1[i] <= src2[i]); 370 state.conditional_code[i] = (src1[i] <= src2[i]);
366 break; 371 break;
367 372
368 case Instruction::Common::CompareOpType::GreaterThan: 373 case Instruction::Common::CompareOpType::GreaterThan:
369 state.conditional_code[i] = (src1[i] > src2[i]); 374 state.conditional_code[i] = (src1[i] > src2[i]);
370 break; 375 break;
371 376
372 case Instruction::Common::CompareOpType::GreaterEqual: 377 case Instruction::Common::CompareOpType::GreaterEqual:
373 state.conditional_code[i] = (src1[i] >= src2[i]); 378 state.conditional_code[i] = (src1[i] >= src2[i]);
374 break; 379 break;
375 380
376 default: 381 default:
377 LOG_ERROR(HW_GPU, "Unknown compare mode %x", static_cast<int>(op)); 382 LOG_ERROR(HW_GPU, "Unknown compare mode %x", static_cast<int>(op));
378 break; 383 break;
379 } 384 }
380 } 385 }
381 Record<DebugDataRecord::CMP_RESULT>(state.debug, iteration, state.conditional_code); 386 Record<DebugDataRecord::CMP_RESULT>(state.debug, iteration, state.conditional_code);
382 break; 387 break;
383 388
384 case OpCode::Id::EX2: 389 case OpCode::Id::EX2: {
385 {
386 Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); 390 Record<DebugDataRecord::SRC1>(state.debug, iteration, src1);
387 Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); 391 Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest);
388 392
@@ -399,8 +403,7 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
399 break; 403 break;
400 } 404 }
401 405
402 case OpCode::Id::LG2: 406 case OpCode::Id::LG2: {
403 {
404 Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); 407 Record<DebugDataRecord::SRC1>(state.debug, iteration, src1);
405 Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); 408 Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest);
406 409
@@ -419,7 +422,8 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
419 422
420 default: 423 default:
421 LOG_ERROR(HW_GPU, "Unhandled arithmetic instruction: 0x%02x (%s): 0x%08x", 424 LOG_ERROR(HW_GPU, "Unhandled arithmetic instruction: 0x%02x (%s): 0x%08x",
422 (int)instr.opcode.Value().EffectiveOpCode(), instr.opcode.Value().GetInfo().name, instr.hex); 425 (int)instr.opcode.Value().EffectiveOpCode(),
426 instr.opcode.Value().GetInfo().name, instr.hex);
423 DEBUG_ASSERT(false); 427 DEBUG_ASSERT(false);
424 break; 428 break;
425 } 429 }
@@ -427,30 +431,32 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
427 break; 431 break;
428 } 432 }
429 433
430 case OpCode::Type::MultiplyAdd: 434 case OpCode::Type::MultiplyAdd: {
431 {
432 if ((instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MAD) || 435 if ((instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MAD) ||
433 (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MADI)) { 436 (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MADI)) {
434 const SwizzlePattern& swizzle = *reinterpret_cast<const SwizzlePattern*>(&swizzle_data[instr.mad.operand_desc_id]); 437 const SwizzlePattern& swizzle = *reinterpret_cast<const SwizzlePattern*>(
438 &swizzle_data[instr.mad.operand_desc_id]);
435 439
436 bool is_inverted = (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MADI); 440 bool is_inverted = (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MADI);
437 441
438 const int address_offset = (instr.mad.address_register_index == 0) 442 const int address_offset =
439 ? 0 : state.address_registers[instr.mad.address_register_index - 1]; 443 (instr.mad.address_register_index == 0)
444 ? 0
445 : state.address_registers[instr.mad.address_register_index - 1];
440 446
441 const float24* src1_ = LookupSourceRegister(instr.mad.GetSrc1(is_inverted)); 447 const float24* src1_ = LookupSourceRegister(instr.mad.GetSrc1(is_inverted));
442 const float24* src2_ = LookupSourceRegister(instr.mad.GetSrc2(is_inverted) + (!is_inverted * address_offset)); 448 const float24* src2_ = LookupSourceRegister(instr.mad.GetSrc2(is_inverted) +
443 const float24* src3_ = LookupSourceRegister(instr.mad.GetSrc3(is_inverted) + ( is_inverted * address_offset)); 449 (!is_inverted * address_offset));
450 const float24* src3_ = LookupSourceRegister(instr.mad.GetSrc3(is_inverted) +
451 (is_inverted * address_offset));
444 452
445 const bool negate_src1 = ((bool)swizzle.negate_src1 != false); 453 const bool negate_src1 = ((bool)swizzle.negate_src1 != false);
446 const bool negate_src2 = ((bool)swizzle.negate_src2 != false); 454 const bool negate_src2 = ((bool)swizzle.negate_src2 != false);
447 const bool negate_src3 = ((bool)swizzle.negate_src3 != false); 455 const bool negate_src3 = ((bool)swizzle.negate_src3 != false);
448 456
449 float24 src1[4] = { 457 float24 src1[4] = {
450 src1_[(int)swizzle.GetSelectorSrc1(0)], 458 src1_[(int)swizzle.GetSelectorSrc1(0)], src1_[(int)swizzle.GetSelectorSrc1(1)],
451 src1_[(int)swizzle.GetSelectorSrc1(1)], 459 src1_[(int)swizzle.GetSelectorSrc1(2)], src1_[(int)swizzle.GetSelectorSrc1(3)],
452 src1_[(int)swizzle.GetSelectorSrc1(2)],
453 src1_[(int)swizzle.GetSelectorSrc1(3)],
454 }; 460 };
455 if (negate_src1) { 461 if (negate_src1) {
456 src1[0] = src1[0] * float24::FromFloat32(-1); 462 src1[0] = src1[0] * float24::FromFloat32(-1);
@@ -459,10 +465,8 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
459 src1[3] = src1[3] * float24::FromFloat32(-1); 465 src1[3] = src1[3] * float24::FromFloat32(-1);
460 } 466 }
461 float24 src2[4] = { 467 float24 src2[4] = {
462 src2_[(int)swizzle.GetSelectorSrc2(0)], 468 src2_[(int)swizzle.GetSelectorSrc2(0)], src2_[(int)swizzle.GetSelectorSrc2(1)],
463 src2_[(int)swizzle.GetSelectorSrc2(1)], 469 src2_[(int)swizzle.GetSelectorSrc2(2)], src2_[(int)swizzle.GetSelectorSrc2(3)],
464 src2_[(int)swizzle.GetSelectorSrc2(2)],
465 src2_[(int)swizzle.GetSelectorSrc2(3)],
466 }; 470 };
467 if (negate_src2) { 471 if (negate_src2) {
468 src2[0] = src2[0] * float24::FromFloat32(-1); 472 src2[0] = src2[0] * float24::FromFloat32(-1);
@@ -471,10 +475,8 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
471 src2[3] = src2[3] * float24::FromFloat32(-1); 475 src2[3] = src2[3] * float24::FromFloat32(-1);
472 } 476 }
473 float24 src3[4] = { 477 float24 src3[4] = {
474 src3_[(int)swizzle.GetSelectorSrc3(0)], 478 src3_[(int)swizzle.GetSelectorSrc3(0)], src3_[(int)swizzle.GetSelectorSrc3(1)],
475 src3_[(int)swizzle.GetSelectorSrc3(1)], 479 src3_[(int)swizzle.GetSelectorSrc3(2)], src3_[(int)swizzle.GetSelectorSrc3(3)],
476 src3_[(int)swizzle.GetSelectorSrc3(2)],
477 src3_[(int)swizzle.GetSelectorSrc3(3)],
478 }; 480 };
479 if (negate_src3) { 481 if (negate_src3) {
480 src3[0] = src3[0] * float24::FromFloat32(-1); 482 src3[0] = src3[0] * float24::FromFloat32(-1);
@@ -483,9 +485,12 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
483 src3[3] = src3[3] * float24::FromFloat32(-1); 485 src3[3] = src3[3] * float24::FromFloat32(-1);
484 } 486 }
485 487
486 float24* dest = (instr.mad.dest.Value() < 0x10) ? &state.output_registers.value[instr.mad.dest.Value().GetIndex()][0] 488 float24* dest =
487 : (instr.mad.dest.Value() < 0x20) ? &state.registers.temporary[instr.mad.dest.Value().GetIndex()][0] 489 (instr.mad.dest.Value() < 0x10)
488 : dummy_vec4_float24; 490 ? &state.output_registers.value[instr.mad.dest.Value().GetIndex()][0]
491 : (instr.mad.dest.Value() < 0x20)
492 ? &state.registers.temporary[instr.mad.dest.Value().GetIndex()][0]
493 : dummy_vec4_float24;
489 494
490 Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); 495 Record<DebugDataRecord::SRC1>(state.debug, iteration, src1);
491 Record<DebugDataRecord::SRC2>(state.debug, iteration, src2); 496 Record<DebugDataRecord::SRC2>(state.debug, iteration, src2);
@@ -500,16 +505,17 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
500 Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest); 505 Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest);
501 } else { 506 } else {
502 LOG_ERROR(HW_GPU, "Unhandled multiply-add instruction: 0x%02x (%s): 0x%08x", 507 LOG_ERROR(HW_GPU, "Unhandled multiply-add instruction: 0x%02x (%s): 0x%08x",
503 (int)instr.opcode.Value().EffectiveOpCode(), instr.opcode.Value().GetInfo().name, instr.hex); 508 (int)instr.opcode.Value().EffectiveOpCode(),
509 instr.opcode.Value().GetInfo().name, instr.hex);
504 } 510 }
505 break; 511 break;
506 } 512 }
507 513
508 default: 514 default: {
509 { 515 static auto evaluate_condition = [](const UnitState<Debug>& state, bool refx, bool refy,
510 static auto evaluate_condition = [](const UnitState<Debug>& state, bool refx, bool refy, Instruction::FlowControlType flow_control) { 516 Instruction::FlowControlType flow_control) {
511 bool results[2] = { refx == state.conditional_code[0], 517 bool results[2] = {refx == state.conditional_code[0],
512 refy == state.conditional_code[1] }; 518 refy == state.conditional_code[1]};
513 519
514 switch (flow_control.op) { 520 switch (flow_control.op) {
515 case flow_control.Or: 521 case flow_control.Or:
@@ -533,44 +539,45 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
533 break; 539 break;
534 540
535 case OpCode::Id::JMPC: 541 case OpCode::Id::JMPC:
536 Record<DebugDataRecord::COND_CMP_IN>(state.debug, iteration, state.conditional_code); 542 Record<DebugDataRecord::COND_CMP_IN>(state.debug, iteration,
537 if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, instr.flow_control)) { 543 state.conditional_code);
544 if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy,
545 instr.flow_control)) {
538 program_counter = instr.flow_control.dest_offset - 1; 546 program_counter = instr.flow_control.dest_offset - 1;
539 } 547 }
540 break; 548 break;
541 549
542 case OpCode::Id::JMPU: 550 case OpCode::Id::JMPU:
543 Record<DebugDataRecord::COND_BOOL_IN>(state.debug, iteration, uniforms.b[instr.flow_control.bool_uniform_id]); 551 Record<DebugDataRecord::COND_BOOL_IN>(
552 state.debug, iteration, uniforms.b[instr.flow_control.bool_uniform_id]);
544 553
545 if (uniforms.b[instr.flow_control.bool_uniform_id] == !(instr.flow_control.num_instructions & 1)) { 554 if (uniforms.b[instr.flow_control.bool_uniform_id] ==
555 !(instr.flow_control.num_instructions & 1)) {
546 program_counter = instr.flow_control.dest_offset - 1; 556 program_counter = instr.flow_control.dest_offset - 1;
547 } 557 }
548 break; 558 break;
549 559
550 case OpCode::Id::CALL: 560 case OpCode::Id::CALL:
551 call(state, 561 call(state, instr.flow_control.dest_offset, instr.flow_control.num_instructions,
552 instr.flow_control.dest_offset,
553 instr.flow_control.num_instructions,
554 program_counter + 1, 0, 0); 562 program_counter + 1, 0, 0);
555 break; 563 break;
556 564
557 case OpCode::Id::CALLU: 565 case OpCode::Id::CALLU:
558 Record<DebugDataRecord::COND_BOOL_IN>(state.debug, iteration, uniforms.b[instr.flow_control.bool_uniform_id]); 566 Record<DebugDataRecord::COND_BOOL_IN>(
567 state.debug, iteration, uniforms.b[instr.flow_control.bool_uniform_id]);
559 if (uniforms.b[instr.flow_control.bool_uniform_id]) { 568 if (uniforms.b[instr.flow_control.bool_uniform_id]) {
560 call(state, 569 call(state, instr.flow_control.dest_offset, instr.flow_control.num_instructions,
561 instr.flow_control.dest_offset, 570 program_counter + 1, 0, 0);
562 instr.flow_control.num_instructions,
563 program_counter + 1, 0, 0);
564 } 571 }
565 break; 572 break;
566 573
567 case OpCode::Id::CALLC: 574 case OpCode::Id::CALLC:
568 Record<DebugDataRecord::COND_CMP_IN>(state.debug, iteration, state.conditional_code); 575 Record<DebugDataRecord::COND_CMP_IN>(state.debug, iteration,
569 if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, instr.flow_control)) { 576 state.conditional_code);
570 call(state, 577 if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy,
571 instr.flow_control.dest_offset, 578 instr.flow_control)) {
572 instr.flow_control.num_instructions, 579 call(state, instr.flow_control.dest_offset, instr.flow_control.num_instructions,
573 program_counter + 1, 0, 0); 580 program_counter + 1, 0, 0);
574 } 581 }
575 break; 582 break;
576 583
@@ -578,43 +585,42 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
578 break; 585 break;
579 586
580 case OpCode::Id::IFU: 587 case OpCode::Id::IFU:
581 Record<DebugDataRecord::COND_BOOL_IN>(state.debug, iteration, uniforms.b[instr.flow_control.bool_uniform_id]); 588 Record<DebugDataRecord::COND_BOOL_IN>(
589 state.debug, iteration, uniforms.b[instr.flow_control.bool_uniform_id]);
582 if (uniforms.b[instr.flow_control.bool_uniform_id]) { 590 if (uniforms.b[instr.flow_control.bool_uniform_id]) {
583 call(state, 591 call(state, program_counter + 1,
584 program_counter + 1,
585 instr.flow_control.dest_offset - program_counter - 1, 592 instr.flow_control.dest_offset - program_counter - 1,
586 instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, 0); 593 instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0,
594 0);
587 } else { 595 } else {
588 call(state, 596 call(state, instr.flow_control.dest_offset, instr.flow_control.num_instructions,
589 instr.flow_control.dest_offset, 597 instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0,
590 instr.flow_control.num_instructions, 598 0);
591 instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, 0);
592 } 599 }
593 600
594 break; 601 break;
595 602
596 case OpCode::Id::IFC: 603 case OpCode::Id::IFC: {
597 {
598 // TODO: Do we need to consider swizzlers here? 604 // TODO: Do we need to consider swizzlers here?
599 605
600 Record<DebugDataRecord::COND_CMP_IN>(state.debug, iteration, state.conditional_code); 606 Record<DebugDataRecord::COND_CMP_IN>(state.debug, iteration,
601 if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, instr.flow_control)) { 607 state.conditional_code);
602 call(state, 608 if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy,
603 program_counter + 1, 609 instr.flow_control)) {
610 call(state, program_counter + 1,
604 instr.flow_control.dest_offset - program_counter - 1, 611 instr.flow_control.dest_offset - program_counter - 1,
605 instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, 0); 612 instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0,
613 0);
606 } else { 614 } else {
607 call(state, 615 call(state, instr.flow_control.dest_offset, instr.flow_control.num_instructions,
608 instr.flow_control.dest_offset, 616 instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0,
609 instr.flow_control.num_instructions, 617 0);
610 instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, 0);
611 } 618 }
612 619
613 break; 620 break;
614 } 621 }
615 622
616 case OpCode::Id::LOOP: 623 case OpCode::Id::LOOP: {
617 {
618 Math::Vec4<u8> loop_param(uniforms.i[instr.flow_control.int_uniform_id].x, 624 Math::Vec4<u8> loop_param(uniforms.i[instr.flow_control.int_uniform_id].x,
619 uniforms.i[instr.flow_control.int_uniform_id].y, 625 uniforms.i[instr.flow_control.int_uniform_id].y,
620 uniforms.i[instr.flow_control.int_uniform_id].z, 626 uniforms.i[instr.flow_control.int_uniform_id].z,
@@ -622,18 +628,16 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
622 state.address_registers[2] = loop_param.y; 628 state.address_registers[2] = loop_param.y;
623 629
624 Record<DebugDataRecord::LOOP_INT_IN>(state.debug, iteration, loop_param); 630 Record<DebugDataRecord::LOOP_INT_IN>(state.debug, iteration, loop_param);
625 call(state, 631 call(state, program_counter + 1,
626 program_counter + 1,
627 instr.flow_control.dest_offset - program_counter + 1, 632 instr.flow_control.dest_offset - program_counter + 1,
628 instr.flow_control.dest_offset + 1, 633 instr.flow_control.dest_offset + 1, loop_param.x, loop_param.z);
629 loop_param.x,
630 loop_param.z);
631 break; 634 break;
632 } 635 }
633 636
634 default: 637 default:
635 LOG_ERROR(HW_GPU, "Unhandled instruction: 0x%02x (%s): 0x%08x", 638 LOG_ERROR(HW_GPU, "Unhandled instruction: 0x%02x (%s): 0x%08x",
636 (int)instr.opcode.Value().EffectiveOpCode(), instr.opcode.Value().GetInfo().name, instr.hex); 639 (int)instr.opcode.Value().EffectiveOpCode(),
640 instr.opcode.Value().GetInfo().name, instr.hex);
637 break; 641 break;
638 } 642 }
639 643
diff --git a/src/video_core/shader/shader_interpreter.h b/src/video_core/shader/shader_interpreter.h
index bb3ce1c6e..48ede0a2e 100644
--- a/src/video_core/shader/shader_interpreter.h
+++ b/src/video_core/shader/shader_interpreter.h
@@ -8,9 +8,10 @@ namespace Pica {
8 8
9namespace Shader { 9namespace Shader {
10 10
11template <bool Debug> struct UnitState; 11template <bool Debug>
12struct UnitState;
12 13
13template<bool Debug> 14template <bool Debug>
14void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned offset); 15void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned offset);
15 16
16} // namespace 17} // namespace
diff --git a/src/video_core/shader/shader_jit_x64.cpp b/src/video_core/shader/shader_jit_x64.cpp
index 43e7e6b4c..04e04ba1a 100644
--- a/src/video_core/shader/shader_jit_x64.cpp
+++ b/src/video_core/shader/shader_jit_x64.cpp
@@ -31,70 +31,70 @@ using namespace Gen;
31typedef void (JitShader::*JitFunction)(Instruction instr); 31typedef void (JitShader::*JitFunction)(Instruction instr);
32 32
33const JitFunction instr_table[64] = { 33const JitFunction instr_table[64] = {
34 &JitShader::Compile_ADD, // add 34 &JitShader::Compile_ADD, // add
35 &JitShader::Compile_DP3, // dp3 35 &JitShader::Compile_DP3, // dp3
36 &JitShader::Compile_DP4, // dp4 36 &JitShader::Compile_DP4, // dp4
37 &JitShader::Compile_DPH, // dph 37 &JitShader::Compile_DPH, // dph
38 nullptr, // unknown 38 nullptr, // unknown
39 &JitShader::Compile_EX2, // ex2 39 &JitShader::Compile_EX2, // ex2
40 &JitShader::Compile_LG2, // lg2 40 &JitShader::Compile_LG2, // lg2
41 nullptr, // unknown 41 nullptr, // unknown
42 &JitShader::Compile_MUL, // mul 42 &JitShader::Compile_MUL, // mul
43 &JitShader::Compile_SGE, // sge 43 &JitShader::Compile_SGE, // sge
44 &JitShader::Compile_SLT, // slt 44 &JitShader::Compile_SLT, // slt
45 &JitShader::Compile_FLR, // flr 45 &JitShader::Compile_FLR, // flr
46 &JitShader::Compile_MAX, // max 46 &JitShader::Compile_MAX, // max
47 &JitShader::Compile_MIN, // min 47 &JitShader::Compile_MIN, // min
48 &JitShader::Compile_RCP, // rcp 48 &JitShader::Compile_RCP, // rcp
49 &JitShader::Compile_RSQ, // rsq 49 &JitShader::Compile_RSQ, // rsq
50 nullptr, // unknown 50 nullptr, // unknown
51 nullptr, // unknown 51 nullptr, // unknown
52 &JitShader::Compile_MOVA, // mova 52 &JitShader::Compile_MOVA, // mova
53 &JitShader::Compile_MOV, // mov 53 &JitShader::Compile_MOV, // mov
54 nullptr, // unknown 54 nullptr, // unknown
55 nullptr, // unknown 55 nullptr, // unknown
56 nullptr, // unknown 56 nullptr, // unknown
57 nullptr, // unknown 57 nullptr, // unknown
58 &JitShader::Compile_DPH, // dphi 58 &JitShader::Compile_DPH, // dphi
59 nullptr, // unknown 59 nullptr, // unknown
60 &JitShader::Compile_SGE, // sgei 60 &JitShader::Compile_SGE, // sgei
61 &JitShader::Compile_SLT, // slti 61 &JitShader::Compile_SLT, // slti
62 nullptr, // unknown 62 nullptr, // unknown
63 nullptr, // unknown 63 nullptr, // unknown
64 nullptr, // unknown 64 nullptr, // unknown
65 nullptr, // unknown 65 nullptr, // unknown
66 nullptr, // unknown 66 nullptr, // unknown
67 &JitShader::Compile_NOP, // nop 67 &JitShader::Compile_NOP, // nop
68 &JitShader::Compile_END, // end 68 &JitShader::Compile_END, // end
69 nullptr, // break 69 nullptr, // break
70 &JitShader::Compile_CALL, // call 70 &JitShader::Compile_CALL, // call
71 &JitShader::Compile_CALLC, // callc 71 &JitShader::Compile_CALLC, // callc
72 &JitShader::Compile_CALLU, // callu 72 &JitShader::Compile_CALLU, // callu
73 &JitShader::Compile_IF, // ifu 73 &JitShader::Compile_IF, // ifu
74 &JitShader::Compile_IF, // ifc 74 &JitShader::Compile_IF, // ifc
75 &JitShader::Compile_LOOP, // loop 75 &JitShader::Compile_LOOP, // loop
76 nullptr, // emit 76 nullptr, // emit
77 nullptr, // sete 77 nullptr, // sete
78 &JitShader::Compile_JMP, // jmpc 78 &JitShader::Compile_JMP, // jmpc
79 &JitShader::Compile_JMP, // jmpu 79 &JitShader::Compile_JMP, // jmpu
80 &JitShader::Compile_CMP, // cmp 80 &JitShader::Compile_CMP, // cmp
81 &JitShader::Compile_CMP, // cmp 81 &JitShader::Compile_CMP, // cmp
82 &JitShader::Compile_MAD, // madi 82 &JitShader::Compile_MAD, // madi
83 &JitShader::Compile_MAD, // madi 83 &JitShader::Compile_MAD, // madi
84 &JitShader::Compile_MAD, // madi 84 &JitShader::Compile_MAD, // madi
85 &JitShader::Compile_MAD, // madi 85 &JitShader::Compile_MAD, // madi
86 &JitShader::Compile_MAD, // madi 86 &JitShader::Compile_MAD, // madi
87 &JitShader::Compile_MAD, // madi 87 &JitShader::Compile_MAD, // madi
88 &JitShader::Compile_MAD, // madi 88 &JitShader::Compile_MAD, // madi
89 &JitShader::Compile_MAD, // madi 89 &JitShader::Compile_MAD, // madi
90 &JitShader::Compile_MAD, // mad 90 &JitShader::Compile_MAD, // mad
91 &JitShader::Compile_MAD, // mad 91 &JitShader::Compile_MAD, // mad
92 &JitShader::Compile_MAD, // mad 92 &JitShader::Compile_MAD, // mad
93 &JitShader::Compile_MAD, // mad 93 &JitShader::Compile_MAD, // mad
94 &JitShader::Compile_MAD, // mad 94 &JitShader::Compile_MAD, // mad
95 &JitShader::Compile_MAD, // mad 95 &JitShader::Compile_MAD, // mad
96 &JitShader::Compile_MAD, // mad 96 &JitShader::Compile_MAD, // mad
97 &JitShader::Compile_MAD, // mad 97 &JitShader::Compile_MAD, // mad
98}; 98};
99 99
100// The following is used to alias some commonly used registers. Generally, RAX-RDX and XMM0-XMM3 can 100// The following is used to alias some commonly used registers. Generally, RAX-RDX and XMM0-XMM3 can
@@ -136,9 +136,9 @@ static const X64Reg NEGBIT = XMM15;
136// State registers that must not be modified by external functions calls 136// State registers that must not be modified by external functions calls
137// Scratch registers, e.g., SRC1 and SCRATCH, have to be saved on the side if needed 137// Scratch registers, e.g., SRC1 and SCRATCH, have to be saved on the side if needed
138static const BitSet32 persistent_regs = { 138static const BitSet32 persistent_regs = {
139 SETUP, STATE, // Pointers to register blocks 139 SETUP, STATE, // Pointers to register blocks
140 ADDROFFS_REG_0, ADDROFFS_REG_1, LOOPCOUNT_REG, COND0, COND1, // Cached registers 140 ADDROFFS_REG_0, ADDROFFS_REG_1, LOOPCOUNT_REG, COND0, COND1, // Cached registers
141 ONE+16, NEGBIT+16, // Constants 141 ONE + 16, NEGBIT + 16, // Constants
142}; 142};
143 143
144/// Raw constant for the source register selector that indicates no swizzling is performed 144/// Raw constant for the source register selector that indicates no swizzling is performed
@@ -152,7 +152,7 @@ static const u8 NO_DEST_REG_MASK = 0xf;
152 * @return Instruction at the specified offset 152 * @return Instruction at the specified offset
153 */ 153 */
154static Instruction GetVertexShaderInstruction(size_t offset) { 154static Instruction GetVertexShaderInstruction(size_t offset) {
155 return { g_state.vs.program_code[offset] }; 155 return {g_state.vs.program_code[offset]};
156} 156}
157 157
158static void LogCritical(const char* msg) { 158static void LogCritical(const char* msg) {
@@ -172,7 +172,8 @@ void JitShader::Compile_Assert(bool condition, const char* msg) {
172 * @param src_reg SourceRegister object corresponding to the source register to load 172 * @param src_reg SourceRegister object corresponding to the source register to load
173 * @param dest Destination XMM register to store the loaded, swizzled source register 173 * @param dest Destination XMM register to store the loaded, swizzled source register
174 */ 174 */
175void JitShader::Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRegister src_reg, X64Reg dest) { 175void JitShader::Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRegister src_reg,
176 X64Reg dest) {
176 X64Reg src_ptr; 177 X64Reg src_ptr;
177 size_t src_offset; 178 size_t src_offset;
178 179
@@ -189,7 +190,8 @@ void JitShader::Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRe
189 190
190 unsigned operand_desc_id; 191 unsigned operand_desc_id;
191 192
192 const bool is_inverted = (0 != (instr.opcode.Value().GetInfo().subtype & OpCode::Info::SrcInversed)); 193 const bool is_inverted =
194 (0 != (instr.opcode.Value().GetInfo().subtype & OpCode::Info::SrcInversed));
193 195
194 unsigned address_register_index; 196 unsigned address_register_index;
195 unsigned offset_src; 197 unsigned offset_src;
@@ -225,7 +227,7 @@ void JitShader::Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRe
225 MOVAPS(dest, MDisp(src_ptr, src_offset_disp)); 227 MOVAPS(dest, MDisp(src_ptr, src_offset_disp));
226 } 228 }
227 229
228 SwizzlePattern swiz = { g_state.vs.swizzle_data[operand_desc_id] }; 230 SwizzlePattern swiz = {g_state.vs.swizzle_data[operand_desc_id]};
229 231
230 // Generate instructions for source register swizzling as needed 232 // Generate instructions for source register swizzling as needed
231 u8 sel = swiz.GetRawSelector(src_num); 233 u8 sel = swiz.GetRawSelector(src_num);
@@ -238,13 +240,13 @@ void JitShader::Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRe
238 } 240 }
239 241
240 // If the source register should be negated, flip the negative bit using XOR 242 // If the source register should be negated, flip the negative bit using XOR
241 const bool negate[] = { swiz.negate_src1, swiz.negate_src2, swiz.negate_src3 }; 243 const bool negate[] = {swiz.negate_src1, swiz.negate_src2, swiz.negate_src3};
242 if (negate[src_num - 1]) { 244 if (negate[src_num - 1]) {
243 XORPS(dest, R(NEGBIT)); 245 XORPS(dest, R(NEGBIT));
244 } 246 }
245} 247}
246 248
247void JitShader::Compile_DestEnable(Instruction instr,X64Reg src) { 249void JitShader::Compile_DestEnable(Instruction instr, X64Reg src) {
248 DestRegister dest; 250 DestRegister dest;
249 unsigned operand_desc_id; 251 unsigned operand_desc_id;
250 if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MAD || 252 if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MAD ||
@@ -256,10 +258,11 @@ void JitShader::Compile_DestEnable(Instruction instr,X64Reg src) {
256 dest = instr.common.dest.Value(); 258 dest = instr.common.dest.Value();
257 } 259 }
258 260
259 SwizzlePattern swiz = { g_state.vs.swizzle_data[operand_desc_id] }; 261 SwizzlePattern swiz = {g_state.vs.swizzle_data[operand_desc_id]};
260 262
261 int dest_offset_disp = (int)UnitState<false>::OutputOffset(dest); 263 int dest_offset_disp = (int)UnitState<false>::OutputOffset(dest);
262 ASSERT_MSG(dest_offset_disp == UnitState<false>::OutputOffset(dest), "Destinaton offset too large for int type"); 264 ASSERT_MSG(dest_offset_disp == UnitState<false>::OutputOffset(dest),
265 "Destinaton offset too large for int type");
263 266
264 // If all components are enabled, write the result to the destination register 267 // If all components are enabled, write the result to the destination register
265 if (swiz.dest_mask == NO_DEST_REG_MASK) { 268 if (swiz.dest_mask == NO_DEST_REG_MASK) {
@@ -267,18 +270,21 @@ void JitShader::Compile_DestEnable(Instruction instr,X64Reg src) {
267 MOVAPS(MDisp(STATE, dest_offset_disp), src); 270 MOVAPS(MDisp(STATE, dest_offset_disp), src);
268 271
269 } else { 272 } else {
270 // Not all components are enabled, so mask the result when storing to the destination register... 273 // Not all components are enabled, so mask the result when storing to the destination
274 // register...
271 MOVAPS(SCRATCH, MDisp(STATE, dest_offset_disp)); 275 MOVAPS(SCRATCH, MDisp(STATE, dest_offset_disp));
272 276
273 if (Common::GetCPUCaps().sse4_1) { 277 if (Common::GetCPUCaps().sse4_1) {
274 u8 mask = ((swiz.dest_mask & 1) << 3) | ((swiz.dest_mask & 8) >> 3) | ((swiz.dest_mask & 2) << 1) | ((swiz.dest_mask & 4) >> 1); 278 u8 mask = ((swiz.dest_mask & 1) << 3) | ((swiz.dest_mask & 8) >> 3) |
279 ((swiz.dest_mask & 2) << 1) | ((swiz.dest_mask & 4) >> 1);
275 BLENDPS(SCRATCH, R(src), mask); 280 BLENDPS(SCRATCH, R(src), mask);
276 } else { 281 } else {
277 MOVAPS(SCRATCH2, R(src)); 282 MOVAPS(SCRATCH2, R(src));
278 UNPCKHPS(SCRATCH2, R(SCRATCH)); // Unpack X/Y components of source and destination 283 UNPCKHPS(SCRATCH2, R(SCRATCH)); // Unpack X/Y components of source and destination
279 UNPCKLPS(SCRATCH, R(src)); // Unpack Z/W components of source and destination 284 UNPCKLPS(SCRATCH, R(src)); // Unpack Z/W components of source and destination
280 285
281 // Compute selector to selectively copy source components to destination for SHUFPS instruction 286 // Compute selector to selectively copy source components to destination for SHUFPS
287 // instruction
282 u8 sel = ((swiz.DestComponentEnabled(0) ? 1 : 0) << 0) | 288 u8 sel = ((swiz.DestComponentEnabled(0) ? 1 : 0) << 0) |
283 ((swiz.DestComponentEnabled(1) ? 3 : 2) << 2) | 289 ((swiz.DestComponentEnabled(1) ? 3 : 2) << 2) |
284 ((swiz.DestComponentEnabled(2) ? 0 : 1) << 4) | 290 ((swiz.DestComponentEnabled(2) ? 0 : 1) << 4) |
@@ -336,7 +342,8 @@ void JitShader::Compile_EvaluateCondition(Instruction instr) {
336} 342}
337 343
338void JitShader::Compile_UniformCondition(Instruction instr) { 344void JitShader::Compile_UniformCondition(Instruction instr) {
339 int offset = ShaderSetup::UniformOffset(RegisterType::BoolUniform, instr.flow_control.bool_uniform_id); 345 int offset =
346 ShaderSetup::UniformOffset(RegisterType::BoolUniform, instr.flow_control.bool_uniform_id);
340 CMP(sizeof(bool) * 8, MDisp(SETUP, offset), Imm8(0)); 347 CMP(sizeof(bool) * 8, MDisp(SETUP, offset), Imm8(0));
341} 348}
342 349
@@ -512,7 +519,7 @@ void JitShader::Compile_MIN(Instruction instr) {
512} 519}
513 520
514void JitShader::Compile_MOVA(Instruction instr) { 521void JitShader::Compile_MOVA(Instruction instr) {
515 SwizzlePattern swiz = { g_state.vs.swizzle_data[instr.common.operand_desc_id] }; 522 SwizzlePattern swiz = {g_state.vs.swizzle_data[instr.common.operand_desc_id]};
516 523
517 if (!swiz.DestComponentEnabled(0) && !swiz.DestComponentEnabled(1)) { 524 if (!swiz.DestComponentEnabled(0) && !swiz.DestComponentEnabled(1)) {
518 return; // NoOp 525 return; // NoOp
@@ -597,7 +604,7 @@ void JitShader::Compile_CALL(Instruction instr) {
597 604
598 // Call the subroutine 605 // Call the subroutine
599 FixupBranch b = CALL(); 606 FixupBranch b = CALL();
600 fixup_branches.push_back({ b, instr.flow_control.dest_offset }); 607 fixup_branches.push_back({b, instr.flow_control.dest_offset});
601 608
602 // Skip over the return offset that's on the stack 609 // Skip over the return offset that's on the stack
603 ADD(64, R(RSP), Imm32(8)); 610 ADD(64, R(RSP), Imm32(8));
@@ -628,7 +635,7 @@ void JitShader::Compile_CMP(Instruction instr) {
628 // SSE doesn't have greater-than (GT) or greater-equal (GE) comparison operators. You need to 635 // SSE doesn't have greater-than (GT) or greater-equal (GE) comparison operators. You need to
629 // emulate them by swapping the lhs and rhs and using LT and LE. NLT and NLE can't be used here 636 // emulate them by swapping the lhs and rhs and using LT and LE. NLT and NLE can't be used here
630 // because they don't match when used with NaNs. 637 // because they don't match when used with NaNs.
631 static const u8 cmp[] = { CMP_EQ, CMP_NEQ, CMP_LT, CMP_LE, CMP_LT, CMP_LE }; 638 static const u8 cmp[] = {CMP_EQ, CMP_NEQ, CMP_LT, CMP_LE, CMP_LT, CMP_LE};
632 639
633 bool invert_op_x = (op_x == Op::GreaterThan || op_x == Op::GreaterEqual); 640 bool invert_op_x = (op_x == Op::GreaterThan || op_x == Op::GreaterEqual);
634 Gen::X64Reg lhs_x = invert_op_x ? SRC2 : SRC1; 641 Gen::X64Reg lhs_x = invert_op_x ? SRC2 : SRC1;
@@ -678,7 +685,8 @@ void JitShader::Compile_MAD(Instruction instr) {
678} 685}
679 686
680void JitShader::Compile_IF(Instruction instr) { 687void JitShader::Compile_IF(Instruction instr) {
681 Compile_Assert(instr.flow_control.dest_offset >= program_counter, "Backwards if-statements not supported"); 688 Compile_Assert(instr.flow_control.dest_offset >= program_counter,
689 "Backwards if-statements not supported");
682 690
683 // Evaluate the "IF" condition 691 // Evaluate the "IF" condition
684 if (instr.opcode.Value() == OpCode::Id::IFU) { 692 if (instr.opcode.Value() == OpCode::Id::IFU) {
@@ -709,29 +717,31 @@ void JitShader::Compile_IF(Instruction instr) {
709} 717}
710 718
711void JitShader::Compile_LOOP(Instruction instr) { 719void JitShader::Compile_LOOP(Instruction instr) {
712 Compile_Assert(instr.flow_control.dest_offset >= program_counter, "Backwards loops not supported"); 720 Compile_Assert(instr.flow_control.dest_offset >= program_counter,
721 "Backwards loops not supported");
713 Compile_Assert(!looping, "Nested loops not supported"); 722 Compile_Assert(!looping, "Nested loops not supported");
714 723
715 looping = true; 724 looping = true;
716 725
717 int offset = ShaderSetup::UniformOffset(RegisterType::IntUniform, instr.flow_control.int_uniform_id); 726 int offset =
727 ShaderSetup::UniformOffset(RegisterType::IntUniform, instr.flow_control.int_uniform_id);
718 MOV(32, R(LOOPCOUNT), MDisp(SETUP, offset)); 728 MOV(32, R(LOOPCOUNT), MDisp(SETUP, offset));
719 MOV(32, R(LOOPCOUNT_REG), R(LOOPCOUNT)); 729 MOV(32, R(LOOPCOUNT_REG), R(LOOPCOUNT));
720 SHR(32, R(LOOPCOUNT_REG), Imm8(8)); 730 SHR(32, R(LOOPCOUNT_REG), Imm8(8));
721 AND(32, R(LOOPCOUNT_REG), Imm32(0xff)); // Y-component is the start 731 AND(32, R(LOOPCOUNT_REG), Imm32(0xff)); // Y-component is the start
722 MOV(32, R(LOOPINC), R(LOOPCOUNT)); 732 MOV(32, R(LOOPINC), R(LOOPCOUNT));
723 SHR(32, R(LOOPINC), Imm8(16)); 733 SHR(32, R(LOOPINC), Imm8(16));
724 MOVZX(32, 8, LOOPINC, R(LOOPINC)); // Z-component is the incrementer 734 MOVZX(32, 8, LOOPINC, R(LOOPINC)); // Z-component is the incrementer
725 MOVZX(32, 8, LOOPCOUNT, R(LOOPCOUNT)); // X-component is iteration count 735 MOVZX(32, 8, LOOPCOUNT, R(LOOPCOUNT)); // X-component is iteration count
726 ADD(32, R(LOOPCOUNT), Imm8(1)); // Iteration count is X-component + 1 736 ADD(32, R(LOOPCOUNT), Imm8(1)); // Iteration count is X-component + 1
727 737
728 auto loop_start = GetCodePtr(); 738 auto loop_start = GetCodePtr();
729 739
730 Compile_Block(instr.flow_control.dest_offset + 1); 740 Compile_Block(instr.flow_control.dest_offset + 1);
731 741
732 ADD(32, R(LOOPCOUNT_REG), R(LOOPINC)); // Increment LOOPCOUNT_REG by Z-component 742 ADD(32, R(LOOPCOUNT_REG), R(LOOPINC)); // Increment LOOPCOUNT_REG by Z-component
733 SUB(32, R(LOOPCOUNT), Imm8(1)); // Increment loop count by 1 743 SUB(32, R(LOOPCOUNT), Imm8(1)); // Increment loop count by 1
734 J_CC(CC_NZ, loop_start); // Loop if not equal 744 J_CC(CC_NZ, loop_start); // Loop if not equal
735 745
736 looping = false; 746 looping = false;
737} 747}
@@ -744,11 +754,11 @@ void JitShader::Compile_JMP(Instruction instr) {
744 else 754 else
745 UNREACHABLE(); 755 UNREACHABLE();
746 756
747 bool inverted_condition = (instr.opcode.Value() == OpCode::Id::JMPU) && 757 bool inverted_condition =
748 (instr.flow_control.num_instructions & 1); 758 (instr.opcode.Value() == OpCode::Id::JMPU) && (instr.flow_control.num_instructions & 1);
749 759
750 FixupBranch b = J_CC(inverted_condition ? CC_Z : CC_NZ, true); 760 FixupBranch b = J_CC(inverted_condition ? CC_Z : CC_NZ, true);
751 fixup_branches.push_back({ b, instr.flow_control.dest_offset }); 761 fixup_branches.push_back({b, instr.flow_control.dest_offset});
752} 762}
753 763
754void JitShader::Compile_Block(unsigned end) { 764void JitShader::Compile_Block(unsigned end) {
@@ -773,7 +783,8 @@ void JitShader::Compile_NextInstr() {
773 Compile_Return(); 783 Compile_Return();
774 } 784 }
775 785
776 ASSERT_MSG(code_ptr[program_counter] == nullptr, "Tried to compile already compiled shader location!"); 786 ASSERT_MSG(code_ptr[program_counter] == nullptr,
787 "Tried to compile already compiled shader location!");
777 code_ptr[program_counter] = GetCodePtr(); 788 code_ptr[program_counter] = GetCodePtr();
778 789
779 Instruction instr = GetVertexShaderInstruction(program_counter++); 790 Instruction instr = GetVertexShaderInstruction(program_counter++);
@@ -787,7 +798,7 @@ void JitShader::Compile_NextInstr() {
787 } else { 798 } else {
788 // Unhandled instruction 799 // Unhandled instruction
789 LOG_CRITICAL(HW_GPU, "Unhandled instruction: 0x%02x (0x%08x)", 800 LOG_CRITICAL(HW_GPU, "Unhandled instruction: 0x%02x (0x%08x)",
790 instr.opcode.Value().EffectiveOpCode(), instr.hex); 801 instr.opcode.Value().EffectiveOpCode(), instr.hex);
791 } 802 }
792} 803}
793 804
@@ -801,7 +812,8 @@ void JitShader::FindReturnOffsets() {
801 case OpCode::Id::CALL: 812 case OpCode::Id::CALL:
802 case OpCode::Id::CALLC: 813 case OpCode::Id::CALLC:
803 case OpCode::Id::CALLU: 814 case OpCode::Id::CALLU:
804 return_offsets.push_back(instr.flow_control.dest_offset + instr.flow_control.num_instructions); 815 return_offsets.push_back(instr.flow_control.dest_offset +
816 instr.flow_control.num_instructions);
805 break; 817 break;
806 default: 818 default:
807 break; 819 break;
@@ -835,12 +847,12 @@ void JitShader::Compile() {
835 XOR(64, R(LOOPCOUNT_REG), R(LOOPCOUNT_REG)); 847 XOR(64, R(LOOPCOUNT_REG), R(LOOPCOUNT_REG));
836 848
837 // Used to set a register to one 849 // Used to set a register to one
838 static const __m128 one = { 1.f, 1.f, 1.f, 1.f }; 850 static const __m128 one = {1.f, 1.f, 1.f, 1.f};
839 MOV(PTRBITS, R(RAX), ImmPtr(&one)); 851 MOV(PTRBITS, R(RAX), ImmPtr(&one));
840 MOVAPS(ONE, MatR(RAX)); 852 MOVAPS(ONE, MatR(RAX));
841 853
842 // Used to negate registers 854 // Used to negate registers
843 static const __m128 neg = { -0.f, -0.f, -0.f, -0.f }; 855 static const __m128 neg = {-0.f, -0.f, -0.f, -0.f};
844 MOV(PTRBITS, R(RAX), ImmPtr(&neg)); 856 MOV(PTRBITS, R(RAX), ImmPtr(&neg));
845 MOVAPS(NEGBIT, MatR(RAX)); 857 MOVAPS(NEGBIT, MatR(RAX));
846 858
@@ -850,7 +862,8 @@ void JitShader::Compile() {
850 // Compile entire program 862 // Compile entire program
851 Compile_Block(static_cast<unsigned>(g_state.vs.program_code.size())); 863 Compile_Block(static_cast<unsigned>(g_state.vs.program_code.size()));
852 864
853 // Set the target for any incomplete branches now that the entire shader program has been emitted 865 // Set the target for any incomplete branches now that the entire shader program has been
866 // emitted
854 for (const auto& branch : fixup_branches) { 867 for (const auto& branch : fixup_branches) {
855 SetJumpTarget(branch.first, code_ptr[branch.second]); 868 SetJumpTarget(branch.first, code_ptr[branch.second]);
856 } 869 }
@@ -861,7 +874,8 @@ void JitShader::Compile() {
861 fixup_branches.clear(); 874 fixup_branches.clear();
862 fixup_branches.shrink_to_fit(); 875 fixup_branches.shrink_to_fit();
863 876
864 uintptr_t size = reinterpret_cast<uintptr_t>(GetCodePtr()) - reinterpret_cast<uintptr_t>(program); 877 uintptr_t size =
878 reinterpret_cast<uintptr_t>(GetCodePtr()) - reinterpret_cast<uintptr_t>(program);
865 ASSERT_MSG(size <= MAX_SHADER_SIZE, "Compiled a shader that exceeds the allocated size!"); 879 ASSERT_MSG(size <= MAX_SHADER_SIZE, "Compiled a shader that exceeds the allocated size!");
866 880
867 LOG_DEBUG(HW_GPU, "Compiled shader size=%lu", size); 881 LOG_DEBUG(HW_GPU, "Compiled shader size=%lu", size);
diff --git a/src/video_core/shader/shader_jit_x64.h b/src/video_core/shader/shader_jit_x64.h
index 5468459d4..2f37ef8bf 100644
--- a/src/video_core/shader/shader_jit_x64.h
+++ b/src/video_core/shader/shader_jit_x64.h
@@ -70,11 +70,11 @@ public:
70 void Compile_MAD(Instruction instr); 70 void Compile_MAD(Instruction instr);
71 71
72private: 72private:
73
74 void Compile_Block(unsigned end); 73 void Compile_Block(unsigned end);
75 void Compile_NextInstr(); 74 void Compile_NextInstr();
76 75
77 void Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRegister src_reg, Gen::X64Reg dest); 76 void Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRegister src_reg,
77 Gen::X64Reg dest);
78 void Compile_DestEnable(Instruction instr, Gen::X64Reg dest); 78 void Compile_DestEnable(Instruction instr, Gen::X64Reg dest);
79 79
80 /** 80 /**
@@ -111,8 +111,8 @@ private:
111 /// Offsets in code where a return needs to be inserted 111 /// Offsets in code where a return needs to be inserted
112 std::vector<unsigned> return_offsets; 112 std::vector<unsigned> return_offsets;
113 113
114 unsigned program_counter = 0; ///< Offset of the next instruction to decode 114 unsigned program_counter = 0; ///< Offset of the next instruction to decode
115 bool looping = false; ///< True if compiling a loop, used to check for nested loops 115 bool looping = false; ///< True if compiling a loop, used to check for nested loops
116 116
117 /// Branches that need to be fixed up once the entire shader program is compiled 117 /// Branches that need to be fixed up once the entire shader program is compiled
118 std::vector<std::pair<Gen::FixupBranch, unsigned>> fixup_branches; 118 std::vector<std::pair<Gen::FixupBranch, unsigned>> fixup_branches;
diff --git a/src/video_core/swrasterizer.cpp b/src/video_core/swrasterizer.cpp
index 03df15b01..ba458746c 100644
--- a/src/video_core/swrasterizer.cpp
+++ b/src/video_core/swrasterizer.cpp
@@ -2,15 +2,14 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include "video_core/clipper.h"
6#include "video_core/swrasterizer.h" 5#include "video_core/swrasterizer.h"
6#include "video_core/clipper.h"
7 7
8namespace VideoCore { 8namespace VideoCore {
9 9
10void SWRasterizer::AddTriangle(const Pica::Shader::OutputVertex& v0, 10void SWRasterizer::AddTriangle(const Pica::Shader::OutputVertex& v0,
11 const Pica::Shader::OutputVertex& v1, 11 const Pica::Shader::OutputVertex& v1,
12 const Pica::Shader::OutputVertex& v2) { 12 const Pica::Shader::OutputVertex& v2) {
13 Pica::Clipper::ProcessTriangle(v0, v1, v2); 13 Pica::Clipper::ProcessTriangle(v0, v1, v2);
14} 14}
15
16} 15}
diff --git a/src/video_core/swrasterizer.h b/src/video_core/swrasterizer.h
index 0a028b774..bca9780e5 100644
--- a/src/video_core/swrasterizer.h
+++ b/src/video_core/swrasterizer.h
@@ -17,14 +17,17 @@ struct OutputVertex;
17namespace VideoCore { 17namespace VideoCore {
18 18
19class SWRasterizer : public RasterizerInterface { 19class SWRasterizer : public RasterizerInterface {
20 void AddTriangle(const Pica::Shader::OutputVertex& v0, 20 void AddTriangle(const Pica::Shader::OutputVertex& v0, const Pica::Shader::OutputVertex& v1,
21 const Pica::Shader::OutputVertex& v1, 21 const Pica::Shader::OutputVertex& v2) override;
22 const Pica::Shader::OutputVertex& v2) override; 22 void DrawTriangles() override {
23 void DrawTriangles() override {} 23 }
24 void NotifyPicaRegisterChanged(u32 id) override {} 24 void NotifyPicaRegisterChanged(u32 id) override {
25 void FlushAll() override {} 25 }
26 void FlushRegion(PAddr addr, u32 size) override {} 26 void FlushAll() override {
27 void FlushAndInvalidateRegion(PAddr addr, u32 size) override {} 27 }
28 void FlushRegion(PAddr addr, u32 size) override {
29 }
30 void FlushAndInvalidateRegion(PAddr addr, u32 size) override {
31 }
28}; 32};
29
30} 33}
diff --git a/src/video_core/vertex_loader.cpp b/src/video_core/vertex_loader.cpp
index e40f0f1ee..9dcd9d748 100644
--- a/src/video_core/vertex_loader.cpp
+++ b/src/video_core/vertex_loader.cpp
@@ -41,24 +41,32 @@ void VertexLoader::Setup(const Pica::Regs& regs) {
41 // TODO: What happens if a loader overwrites a previous one's data? 41 // TODO: What happens if a loader overwrites a previous one's data?
42 for (unsigned component = 0; component < loader_config.component_count; ++component) { 42 for (unsigned component = 0; component < loader_config.component_count; ++component) {
43 if (component >= 12) { 43 if (component >= 12) {
44 LOG_ERROR(HW_GPU, "Overflow in the vertex attribute loader %u trying to load component %u", loader, component); 44 LOG_ERROR(HW_GPU,
45 "Overflow in the vertex attribute loader %u trying to load component %u",
46 loader, component);
45 continue; 47 continue;
46 } 48 }
47 49
48 u32 attribute_index = loader_config.GetComponent(component); 50 u32 attribute_index = loader_config.GetComponent(component);
49 if (attribute_index < 12) { 51 if (attribute_index < 12) {
50 offset = Common::AlignUp(offset, attribute_config.GetElementSizeInBytes(attribute_index)); 52 offset = Common::AlignUp(offset,
53 attribute_config.GetElementSizeInBytes(attribute_index));
51 vertex_attribute_sources[attribute_index] = loader_config.data_offset + offset; 54 vertex_attribute_sources[attribute_index] = loader_config.data_offset + offset;
52 vertex_attribute_strides[attribute_index] = static_cast<u32>(loader_config.byte_count); 55 vertex_attribute_strides[attribute_index] =
53 vertex_attribute_formats[attribute_index] = attribute_config.GetFormat(attribute_index); 56 static_cast<u32>(loader_config.byte_count);
54 vertex_attribute_elements[attribute_index] = attribute_config.GetNumElements(attribute_index); 57 vertex_attribute_formats[attribute_index] =
58 attribute_config.GetFormat(attribute_index);
59 vertex_attribute_elements[attribute_index] =
60 attribute_config.GetNumElements(attribute_index);
55 offset += attribute_config.GetStride(attribute_index); 61 offset += attribute_config.GetStride(attribute_index);
56 } else if (attribute_index < 16) { 62 } else if (attribute_index < 16) {
57 // Attribute ids 12, 13, 14 and 15 signify 4, 8, 12 and 16-byte paddings, respectively 63 // Attribute ids 12, 13, 14 and 15 signify 4, 8, 12 and 16-byte paddings,
64 // respectively
58 offset = Common::AlignUp(offset, 4); 65 offset = Common::AlignUp(offset, 4);
59 offset += (attribute_index - 11) * 4; 66 offset += (attribute_index - 11) * 4;
60 } else { 67 } else {
61 UNREACHABLE(); // This is truly unreachable due to the number of bits for each component 68 UNREACHABLE(); // This is truly unreachable due to the number of bits for each
69 // component
62 } 70 }
63 } 71 }
64 } 72 }
@@ -66,48 +74,55 @@ void VertexLoader::Setup(const Pica::Regs& regs) {
66 is_setup = true; 74 is_setup = true;
67} 75}
68 76
69void VertexLoader::LoadVertex(u32 base_address, int index, int vertex, Shader::InputVertex& input, DebugUtils::MemoryAccessTracker& memory_accesses) { 77void VertexLoader::LoadVertex(u32 base_address, int index, int vertex, Shader::InputVertex& input,
78 DebugUtils::MemoryAccessTracker& memory_accesses) {
70 ASSERT_MSG(is_setup, "A VertexLoader needs to be setup before loading vertices."); 79 ASSERT_MSG(is_setup, "A VertexLoader needs to be setup before loading vertices.");
71 80
72 for (int i = 0; i < num_total_attributes; ++i) { 81 for (int i = 0; i < num_total_attributes; ++i) {
73 if (vertex_attribute_elements[i] != 0) { 82 if (vertex_attribute_elements[i] != 0) {
74 // Load per-vertex data from the loader arrays 83 // Load per-vertex data from the loader arrays
75 u32 source_addr = base_address + vertex_attribute_sources[i] + vertex_attribute_strides[i] * vertex; 84 u32 source_addr =
85 base_address + vertex_attribute_sources[i] + vertex_attribute_strides[i] * vertex;
76 86
77 if (g_debug_context && Pica::g_debug_context->recorder) { 87 if (g_debug_context && Pica::g_debug_context->recorder) {
78 memory_accesses.AddAccess(source_addr, vertex_attribute_elements[i] * ( 88 memory_accesses.AddAccess(
79 (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::FLOAT) ? 4 89 source_addr,
80 : (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::SHORT) ? 2 : 1)); 90 vertex_attribute_elements[i] *
91 ((vertex_attribute_formats[i] == Regs::VertexAttributeFormat::FLOAT)
92 ? 4
93 : (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::SHORT)
94 ? 2
95 : 1));
81 } 96 }
82 97
83 switch (vertex_attribute_formats[i]) { 98 switch (vertex_attribute_formats[i]) {
84 case Regs::VertexAttributeFormat::BYTE: 99 case Regs::VertexAttributeFormat::BYTE: {
85 { 100 const s8* srcdata =
86 const s8* srcdata = reinterpret_cast<const s8*>(Memory::GetPhysicalPointer(source_addr)); 101 reinterpret_cast<const s8*>(Memory::GetPhysicalPointer(source_addr));
87 for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) { 102 for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) {
88 input.attr[i][comp] = float24::FromFloat32(srcdata[comp]); 103 input.attr[i][comp] = float24::FromFloat32(srcdata[comp]);
89 } 104 }
90 break; 105 break;
91 } 106 }
92 case Regs::VertexAttributeFormat::UBYTE: 107 case Regs::VertexAttributeFormat::UBYTE: {
93 { 108 const u8* srcdata =
94 const u8* srcdata = reinterpret_cast<const u8*>(Memory::GetPhysicalPointer(source_addr)); 109 reinterpret_cast<const u8*>(Memory::GetPhysicalPointer(source_addr));
95 for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) { 110 for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) {
96 input.attr[i][comp] = float24::FromFloat32(srcdata[comp]); 111 input.attr[i][comp] = float24::FromFloat32(srcdata[comp]);
97 } 112 }
98 break; 113 break;
99 } 114 }
100 case Regs::VertexAttributeFormat::SHORT: 115 case Regs::VertexAttributeFormat::SHORT: {
101 { 116 const s16* srcdata =
102 const s16* srcdata = reinterpret_cast<const s16*>(Memory::GetPhysicalPointer(source_addr)); 117 reinterpret_cast<const s16*>(Memory::GetPhysicalPointer(source_addr));
103 for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) { 118 for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) {
104 input.attr[i][comp] = float24::FromFloat32(srcdata[comp]); 119 input.attr[i][comp] = float24::FromFloat32(srcdata[comp]);
105 } 120 }
106 break; 121 break;
107 } 122 }
108 case Regs::VertexAttributeFormat::FLOAT: 123 case Regs::VertexAttributeFormat::FLOAT: {
109 { 124 const float* srcdata =
110 const float* srcdata = reinterpret_cast<const float*>(Memory::GetPhysicalPointer(source_addr)); 125 reinterpret_cast<const float*>(Memory::GetPhysicalPointer(source_addr));
111 for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) { 126 for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) {
112 input.attr[i][comp] = float24::FromFloat32(srcdata[comp]); 127 input.attr[i][comp] = float24::FromFloat32(srcdata[comp]);
113 } 128 }
@@ -119,22 +134,23 @@ void VertexLoader::LoadVertex(u32 base_address, int index, int vertex, Shader::I
119 // is *not* carried over from the default attribute settings even if they're 134 // is *not* carried over from the default attribute settings even if they're
120 // enabled for this attribute. 135 // enabled for this attribute.
121 for (unsigned int comp = vertex_attribute_elements[i]; comp < 4; ++comp) { 136 for (unsigned int comp = vertex_attribute_elements[i]; comp < 4; ++comp) {
122 input.attr[i][comp] = comp == 3 ? float24::FromFloat32(1.0f) : float24::FromFloat32(0.0f); 137 input.attr[i][comp] =
138 comp == 3 ? float24::FromFloat32(1.0f) : float24::FromFloat32(0.0f);
123 } 139 }
124 140
125 LOG_TRACE(HW_GPU, "Loaded %d components of attribute %x for vertex %x (index %x) from 0x%08x + 0x%08x + 0x%04x: %f %f %f %f", 141 LOG_TRACE(HW_GPU, "Loaded %d components of attribute %x for vertex %x (index %x) from "
126 vertex_attribute_elements[i], i, vertex, index, 142 "0x%08x + 0x%08x + 0x%04x: %f %f %f %f",
127 base_address, 143 vertex_attribute_elements[i], i, vertex, index, base_address,
128 vertex_attribute_sources[i], 144 vertex_attribute_sources[i], vertex_attribute_strides[i] * vertex,
129 vertex_attribute_strides[i] * vertex, 145 input.attr[i][0].ToFloat32(), input.attr[i][1].ToFloat32(),
130 input.attr[i][0].ToFloat32(), input.attr[i][1].ToFloat32(), input.attr[i][2].ToFloat32(), input.attr[i][3].ToFloat32()); 146 input.attr[i][2].ToFloat32(), input.attr[i][3].ToFloat32());
131 } else if (vertex_attribute_is_default[i]) { 147 } else if (vertex_attribute_is_default[i]) {
132 // Load the default attribute if we're configured to do so 148 // Load the default attribute if we're configured to do so
133 input.attr[i] = g_state.vs_default_attributes[i]; 149 input.attr[i] = g_state.vs_default_attributes[i];
134 LOG_TRACE(HW_GPU, "Loaded default attribute %x for vertex %x (index %x): (%f, %f, %f, %f)", 150 LOG_TRACE(HW_GPU,
135 i, vertex, index, 151 "Loaded default attribute %x for vertex %x (index %x): (%f, %f, %f, %f)", i,
136 input.attr[i][0].ToFloat32(), input.attr[i][1].ToFloat32(), 152 vertex, index, input.attr[i][0].ToFloat32(), input.attr[i][1].ToFloat32(),
137 input.attr[i][2].ToFloat32(), input.attr[i][3].ToFloat32()); 153 input.attr[i][2].ToFloat32(), input.attr[i][3].ToFloat32());
138 } else { 154 } else {
139 // TODO(yuriks): In this case, no data gets loaded and the vertex 155 // TODO(yuriks): In this case, no data gets loaded and the vertex
140 // remains with the last value it had. This isn't currently maintained 156 // remains with the last value it had. This isn't currently maintained
@@ -143,4 +159,4 @@ void VertexLoader::LoadVertex(u32 base_address, int index, int vertex, Shader::I
143 } 159 }
144} 160}
145 161
146} // namespace Pica 162} // namespace Pica
diff --git a/src/video_core/vertex_loader.h b/src/video_core/vertex_loader.h
index ac162c254..4f8d79f14 100644
--- a/src/video_core/vertex_loader.h
+++ b/src/video_core/vertex_loader.h
@@ -23,9 +23,12 @@ public:
23 } 23 }
24 24
25 void Setup(const Pica::Regs& regs); 25 void Setup(const Pica::Regs& regs);
26 void LoadVertex(u32 base_address, int index, int vertex, Shader::InputVertex& input, DebugUtils::MemoryAccessTracker& memory_accesses); 26 void LoadVertex(u32 base_address, int index, int vertex, Shader::InputVertex& input,
27 DebugUtils::MemoryAccessTracker& memory_accesses);
27 28
28 int GetNumTotalAttributes() const { return num_total_attributes; } 29 int GetNumTotalAttributes() const {
30 return num_total_attributes;
31 }
29 32
30private: 33private:
31 std::array<u32, 16> vertex_attribute_sources; 34 std::array<u32, 16> vertex_attribute_sources;
@@ -37,4 +40,4 @@ private:
37 bool is_setup = false; 40 bool is_setup = false;
38}; 41};
39 42
40} // namespace Pica 43} // namespace Pica
diff --git a/src/video_core/video_core.cpp b/src/video_core/video_core.cpp
index bd6e5eb6b..880f4990d 100644
--- a/src/video_core/video_core.cpp
+++ b/src/video_core/video_core.cpp
@@ -8,16 +8,16 @@
8 8
9#include "video_core/pica.h" 9#include "video_core/pica.h"
10#include "video_core/renderer_base.h" 10#include "video_core/renderer_base.h"
11#include "video_core/video_core.h"
12#include "video_core/renderer_opengl/renderer_opengl.h" 11#include "video_core/renderer_opengl/renderer_opengl.h"
12#include "video_core/video_core.h"
13 13
14//////////////////////////////////////////////////////////////////////////////////////////////////// 14////////////////////////////////////////////////////////////////////////////////////////////////////
15// Video Core namespace 15// Video Core namespace
16 16
17namespace VideoCore { 17namespace VideoCore {
18 18
19EmuWindow* g_emu_window = nullptr; ///< Frontend emulator window 19EmuWindow* g_emu_window = nullptr; ///< Frontend emulator window
20std::unique_ptr<RendererBase> g_renderer; ///< Renderer plugin 20std::unique_ptr<RendererBase> g_renderer; ///< Renderer plugin
21 21
22std::atomic<bool> g_hw_renderer_enabled; 22std::atomic<bool> g_hw_renderer_enabled;
23std::atomic<bool> g_shader_jit_enabled; 23std::atomic<bool> g_shader_jit_enabled;
diff --git a/src/video_core/video_core.h b/src/video_core/video_core.h
index 30267489e..e2d725ab1 100644
--- a/src/video_core/video_core.h
+++ b/src/video_core/video_core.h
@@ -22,18 +22,19 @@ namespace VideoCore {
22// framebuffers in video memory are stored in column-major order and rendered sideways, causing 22// framebuffers in video memory are stored in column-major order and rendered sideways, causing
23// the widths and heights of the framebuffers read by the LCD to be switched compared to the 23// the widths and heights of the framebuffers read by the LCD to be switched compared to the
24// heights and widths of the screens listed here. 24// heights and widths of the screens listed here.
25static const int kScreenTopWidth = 400; ///< 3DS top screen width 25static const int kScreenTopWidth = 400; ///< 3DS top screen width
26static const int kScreenTopHeight = 240; ///< 3DS top screen height 26static const int kScreenTopHeight = 240; ///< 3DS top screen height
27static const int kScreenBottomWidth = 320; ///< 3DS bottom screen width 27static const int kScreenBottomWidth = 320; ///< 3DS bottom screen width
28static const int kScreenBottomHeight = 240; ///< 3DS bottom screen height 28static const int kScreenBottomHeight = 240; ///< 3DS bottom screen height
29 29
30// Video core renderer 30// Video core renderer
31// --------------------- 31// ---------------------
32 32
33extern std::unique_ptr<RendererBase> g_renderer; ///< Renderer plugin 33extern std::unique_ptr<RendererBase> g_renderer; ///< Renderer plugin
34extern EmuWindow* g_emu_window; ///< Emu window 34extern EmuWindow* g_emu_window; ///< Emu window
35 35
36// TODO: Wrap these in a user settings struct along with any other graphics settings (often set from qt ui) 36// TODO: Wrap these in a user settings struct along with any other graphics settings (often set from
37// qt ui)
37extern std::atomic<bool> g_hw_renderer_enabled; 38extern std::atomic<bool> g_hw_renderer_enabled;
38extern std::atomic<bool> g_shader_jit_enabled; 39extern std::atomic<bool> g_shader_jit_enabled;
39extern std::atomic<bool> g_scaled_resolution_enabled; 40extern std::atomic<bool> g_scaled_resolution_enabled;