summaryrefslogtreecommitdiff
path: root/src/video_core
diff options
context:
space:
mode:
authorGravatar Yuri Kunde Schlesner2016-09-21 11:29:48 -0700
committerGravatar GitHub2016-09-21 11:29:48 -0700
commitd5d2ca8058a0f1c00ab7ca9fe2c058ba47546c0a (patch)
tree8a22ca73ff838f3f0090b29a548ae81087fc90ed /src/video_core
parentREADME: Specify master branch for Travis CI badge (diff)
parentFix Travis clang-format check (diff)
downloadyuzu-d5d2ca8058a0f1c00ab7ca9fe2c058ba47546c0a.tar.gz
yuzu-d5d2ca8058a0f1c00ab7ca9fe2c058ba47546c0a.tar.xz
yuzu-d5d2ca8058a0f1c00ab7ca9fe2c058ba47546c0a.zip
Merge pull request #2086 from linkmauve/clang-format
Add clang-format as part of our {commit,travis}-time checks
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/clipper.cpp72
-rw-r--r--src/video_core/clipper.h2
-rw-r--r--src/video_core/command_processor.cpp651
-rw-r--r--src/video_core/command_processor.h7
-rw-r--r--src/video_core/debug_utils/debug_utils.cpp429
-rw-r--r--src/video_core/debug_utils/debug_utils.h39
-rw-r--r--src/video_core/gpu_debugger.h41
-rw-r--r--src/video_core/pica.cpp922
-rw-r--r--src/video_core/pica.h644
-rw-r--r--src/video_core/pica_state.h8
-rw-r--r--src/video_core/pica_types.h36
-rw-r--r--src/video_core/primitive_assembly.cpp67
-rw-r--r--src/video_core/primitive_assembly.h8
-rw-r--r--src/video_core/rasterizer.cpp436
-rw-r--r--src/video_core/rasterizer.h5
-rw-r--r--src/video_core/rasterizer_interface.h19
-rw-r--r--src/video_core/renderer_base.cpp5
-rw-r--r--src/video_core/renderer_base.h16
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp238
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h53
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.cpp289
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.h82
-rw-r--r--src/video_core/renderer_opengl/gl_resource_manager.h117
-rw-r--r--src/video_core/renderer_opengl/gl_shader_gen.cpp213
-rw-r--r--src/video_core/renderer_opengl/gl_shader_util.cpp5
-rw-r--r--src/video_core/renderer_opengl/gl_state.cpp51
-rw-r--r--src/video_core/renderer_opengl/gl_state.h40
-rw-r--r--src/video_core/renderer_opengl/pica_to_gl.h79
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp134
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.h22
-rw-r--r--src/video_core/shader/shader.cpp44
-rw-r--r--src/video_core/shader/shader.h97
-rw-r--r--src/video_core/shader/shader_interpreter.cpp292
-rw-r--r--src/video_core/shader/shader_interpreter.h5
-rw-r--r--src/video_core/shader/shader_jit_x64.cpp221
-rw-r--r--src/video_core/shader/shader_jit_x64.h11
-rw-r--r--src/video_core/swrasterizer.cpp5
-rw-r--r--src/video_core/swrasterizer.h7
-rw-r--r--src/video_core/vertex_loader.cpp92
-rw-r--r--src/video_core/vertex_loader.h10
-rw-r--r--src/video_core/video_core.cpp8
-rw-r--r--src/video_core/video_core.h15
42 files changed, 2927 insertions, 2610 deletions
diff --git a/src/video_core/clipper.cpp b/src/video_core/clipper.cpp
index db99ce666..05b5cea73 100644
--- a/src/video_core/clipper.cpp
+++ b/src/video_core/clipper.cpp
@@ -5,15 +5,12 @@
5#include <algorithm> 5#include <algorithm>
6#include <array> 6#include <array>
7#include <cstddef> 7#include <cstddef>
8
9#include <boost/container/static_vector.hpp> 8#include <boost/container/static_vector.hpp>
10#include <boost/container/vector.hpp> 9#include <boost/container/vector.hpp>
11
12#include "common/bit_field.h" 10#include "common/bit_field.h"
13#include "common/common_types.h" 11#include "common/common_types.h"
14#include "common/logging/log.h" 12#include "common/logging/log.h"
15#include "common/vector_math.h" 13#include "common/vector_math.h"
16
17#include "video_core/clipper.h" 14#include "video_core/clipper.h"
18#include "video_core/pica.h" 15#include "video_core/pica.h"
19#include "video_core/pica_state.h" 16#include "video_core/pica_state.h"
@@ -27,15 +24,10 @@ namespace Clipper {
27 24
28struct ClippingEdge { 25struct ClippingEdge {
29public: 26public:
30 ClippingEdge(Math::Vec4<float24> coeffs, 27 ClippingEdge(Math::Vec4<float24> coeffs, Math::Vec4<float24> bias = Math::Vec4<float24>(
31 Math::Vec4<float24> bias = Math::Vec4<float24>(float24::FromFloat32(0), 28 float24::FromFloat32(0), float24::FromFloat32(0),
32 float24::FromFloat32(0), 29 float24::FromFloat32(0), float24::FromFloat32(0)))
33 float24::FromFloat32(0), 30 : coeffs(coeffs), bias(bias) {}
34 float24::FromFloat32(0)))
35 : coeffs(coeffs),
36 bias(bias)
37 {
38 }
39 31
40 bool IsInside(const OutputVertex& vertex) const { 32 bool IsInside(const OutputVertex& vertex) const {
41 return Math::Dot(vertex.pos + bias, coeffs) <= float24::FromFloat32(0); 33 return Math::Dot(vertex.pos + bias, coeffs) <= float24::FromFloat32(0);
@@ -59,8 +51,7 @@ private:
59 Math::Vec4<float24> bias; 51 Math::Vec4<float24> bias;
60}; 52};
61 53
62static void InitScreenCoordinates(OutputVertex& vtx) 54static void InitScreenCoordinates(OutputVertex& vtx) {
63{
64 struct { 55 struct {
65 float24 halfsize_x; 56 float24 halfsize_x;
66 float24 offset_x; 57 float24 offset_x;
@@ -73,8 +64,8 @@ static void InitScreenCoordinates(OutputVertex& vtx)
73 const auto& regs = g_state.regs; 64 const auto& regs = g_state.regs;
74 viewport.halfsize_x = float24::FromRaw(regs.viewport_size_x); 65 viewport.halfsize_x = float24::FromRaw(regs.viewport_size_x);
75 viewport.halfsize_y = float24::FromRaw(regs.viewport_size_y); 66 viewport.halfsize_y = float24::FromRaw(regs.viewport_size_y);
76 viewport.offset_x = float24::FromFloat32(static_cast<float>(regs.viewport_corner.x)); 67 viewport.offset_x = float24::FromFloat32(static_cast<float>(regs.viewport_corner.x));
77 viewport.offset_y = float24::FromFloat32(static_cast<float>(regs.viewport_corner.y)); 68 viewport.offset_y = float24::FromFloat32(static_cast<float>(regs.viewport_corner.y));
78 69
79 float24 inv_w = float24::FromFloat32(1.f) / vtx.pos.w; 70 float24 inv_w = float24::FromFloat32(1.f) / vtx.pos.w;
80 vtx.color *= inv_w; 71 vtx.color *= inv_w;
@@ -85,12 +76,14 @@ static void InitScreenCoordinates(OutputVertex& vtx)
85 vtx.tc2 *= inv_w; 76 vtx.tc2 *= inv_w;
86 vtx.pos.w = inv_w; 77 vtx.pos.w = inv_w;
87 78
88 vtx.screenpos[0] = (vtx.pos.x * inv_w + float24::FromFloat32(1.0)) * viewport.halfsize_x + viewport.offset_x; 79 vtx.screenpos[0] =
89 vtx.screenpos[1] = (vtx.pos.y * inv_w + float24::FromFloat32(1.0)) * viewport.halfsize_y + viewport.offset_y; 80 (vtx.pos.x * inv_w + float24::FromFloat32(1.0)) * viewport.halfsize_x + viewport.offset_x;
81 vtx.screenpos[1] =
82 (vtx.pos.y * inv_w + float24::FromFloat32(1.0)) * viewport.halfsize_y + viewport.offset_y;
90 vtx.screenpos[2] = vtx.pos.z * inv_w; 83 vtx.screenpos[2] = vtx.pos.z * inv_w;
91} 84}
92 85
93void ProcessTriangle(const OutputVertex &v0, const OutputVertex &v1, const OutputVertex &v2) { 86void ProcessTriangle(const OutputVertex& v0, const OutputVertex& v1, const OutputVertex& v2) {
94 using boost::container::static_vector; 87 using boost::container::static_vector;
95 88
96 // Clipping a planar n-gon against a plane will remove at least 1 vertex and introduces 2 at 89 // Clipping a planar n-gon against a plane will remove at least 1 vertex and introduces 2 at
@@ -98,10 +91,10 @@ void ProcessTriangle(const OutputVertex &v0, const OutputVertex &v1, const Outpu
98 // introduces at most 1 new vertex to the polygon. Since we start with a triangle and have a 91 // introduces at most 1 new vertex to the polygon. Since we start with a triangle and have a
99 // fixed 6 clipping planes, the maximum number of vertices of the clipped polygon is 3 + 6 = 9. 92 // fixed 6 clipping planes, the maximum number of vertices of the clipped polygon is 3 + 6 = 9.
100 static const size_t MAX_VERTICES = 9; 93 static const size_t MAX_VERTICES = 9;
101 static_vector<OutputVertex, MAX_VERTICES> buffer_a = { v0, v1, v2 }; 94 static_vector<OutputVertex, MAX_VERTICES> buffer_a = {v0, v1, v2};
102 static_vector<OutputVertex, MAX_VERTICES> buffer_b; 95 static_vector<OutputVertex, MAX_VERTICES> buffer_b;
103 auto* output_list = &buffer_a; 96 auto* output_list = &buffer_a;
104 auto* input_list = &buffer_b; 97 auto* input_list = &buffer_b;
105 98
106 // NOTE: We clip against a w=epsilon plane to guarantee that the output has a positive w value. 99 // NOTE: We clip against a w=epsilon plane to guarantee that the output has a positive w value.
107 // TODO: Not sure if this is a valid approach. Also should probably instead use the smallest 100 // TODO: Not sure if this is a valid approach. Also should probably instead use the smallest
@@ -110,13 +103,13 @@ void ProcessTriangle(const OutputVertex &v0, const OutputVertex &v1, const Outpu
110 static const float24 f0 = float24::FromFloat32(0.0); 103 static const float24 f0 = float24::FromFloat32(0.0);
111 static const float24 f1 = float24::FromFloat32(1.0); 104 static const float24 f1 = float24::FromFloat32(1.0);
112 static const std::array<ClippingEdge, 7> clipping_edges = {{ 105 static const std::array<ClippingEdge, 7> clipping_edges = {{
113 { Math::MakeVec( f1, f0, f0, -f1) }, // x = +w 106 {Math::MakeVec(f1, f0, f0, -f1)}, // x = +w
114 { Math::MakeVec(-f1, f0, f0, -f1) }, // x = -w 107 {Math::MakeVec(-f1, f0, f0, -f1)}, // x = -w
115 { Math::MakeVec( f0, f1, f0, -f1) }, // y = +w 108 {Math::MakeVec(f0, f1, f0, -f1)}, // y = +w
116 { Math::MakeVec( f0, -f1, f0, -f1) }, // y = -w 109 {Math::MakeVec(f0, -f1, f0, -f1)}, // y = -w
117 { Math::MakeVec( f0, f0, f1, f0) }, // z = 0 110 {Math::MakeVec(f0, f0, f1, f0)}, // z = 0
118 { Math::MakeVec( f0, f0, -f1, -f1) }, // z = -w 111 {Math::MakeVec(f0, f0, -f1, -f1)}, // z = -w
119 { Math::MakeVec( f0, f0, f0, -f1), Math::Vec4<float24>(f0, f0, f0, EPSILON) }, // w = EPSILON 112 {Math::MakeVec(f0, f0, f0, -f1), Math::Vec4<float24>(f0, f0, f0, EPSILON)}, // w = EPSILON
120 }}; 113 }};
121 114
122 // TODO: If one vertex lies outside one of the depth clipping planes, some platforms (e.g. Wii) 115 // TODO: If one vertex lies outside one of the depth clipping planes, some platforms (e.g. Wii)
@@ -154,10 +147,10 @@ void ProcessTriangle(const OutputVertex &v0, const OutputVertex &v1, const Outpu
154 InitScreenCoordinates((*output_list)[0]); 147 InitScreenCoordinates((*output_list)[0]);
155 InitScreenCoordinates((*output_list)[1]); 148 InitScreenCoordinates((*output_list)[1]);
156 149
157 for (size_t i = 0; i < output_list->size() - 2; i ++) { 150 for (size_t i = 0; i < output_list->size() - 2; i++) {
158 OutputVertex& vtx0 = (*output_list)[0]; 151 OutputVertex& vtx0 = (*output_list)[0];
159 OutputVertex& vtx1 = (*output_list)[i+1]; 152 OutputVertex& vtx1 = (*output_list)[i + 1];
160 OutputVertex& vtx2 = (*output_list)[i+2]; 153 OutputVertex& vtx2 = (*output_list)[i + 2];
161 154
162 InitScreenCoordinates(vtx2); 155 InitScreenCoordinates(vtx2);
163 156
@@ -165,19 +158,20 @@ void ProcessTriangle(const OutputVertex &v0, const OutputVertex &v1, const Outpu
165 "Triangle %lu/%lu at position (%.3f, %.3f, %.3f, %.3f), " 158 "Triangle %lu/%lu at position (%.3f, %.3f, %.3f, %.3f), "
166 "(%.3f, %.3f, %.3f, %.3f), (%.3f, %.3f, %.3f, %.3f) and " 159 "(%.3f, %.3f, %.3f, %.3f), (%.3f, %.3f, %.3f, %.3f) and "
167 "screen position (%.2f, %.2f, %.2f), (%.2f, %.2f, %.2f), (%.2f, %.2f, %.2f)", 160 "screen position (%.2f, %.2f, %.2f), (%.2f, %.2f, %.2f), (%.2f, %.2f, %.2f)",
168 i + 1, output_list->size() - 2, 161 i + 1, output_list->size() - 2, vtx0.pos.x.ToFloat32(), vtx0.pos.y.ToFloat32(),
169 vtx0.pos.x.ToFloat32(), vtx0.pos.y.ToFloat32(), vtx0.pos.z.ToFloat32(), vtx0.pos.w.ToFloat32(), 162 vtx0.pos.z.ToFloat32(), vtx0.pos.w.ToFloat32(), vtx1.pos.x.ToFloat32(),
170 vtx1.pos.x.ToFloat32(), vtx1.pos.y.ToFloat32(), vtx1.pos.z.ToFloat32(), vtx1.pos.w.ToFloat32(), 163 vtx1.pos.y.ToFloat32(), vtx1.pos.z.ToFloat32(), vtx1.pos.w.ToFloat32(),
171 vtx2.pos.x.ToFloat32(), vtx2.pos.y.ToFloat32(), vtx2.pos.z.ToFloat32(), vtx2.pos.w.ToFloat32(), 164 vtx2.pos.x.ToFloat32(), vtx2.pos.y.ToFloat32(), vtx2.pos.z.ToFloat32(),
172 vtx0.screenpos.x.ToFloat32(), vtx0.screenpos.y.ToFloat32(), vtx0.screenpos.z.ToFloat32(), 165 vtx2.pos.w.ToFloat32(), vtx0.screenpos.x.ToFloat32(),
173 vtx1.screenpos.x.ToFloat32(), vtx1.screenpos.y.ToFloat32(), vtx1.screenpos.z.ToFloat32(), 166 vtx0.screenpos.y.ToFloat32(), vtx0.screenpos.z.ToFloat32(),
174 vtx2.screenpos.x.ToFloat32(), vtx2.screenpos.y.ToFloat32(), vtx2.screenpos.z.ToFloat32()); 167 vtx1.screenpos.x.ToFloat32(), vtx1.screenpos.y.ToFloat32(),
168 vtx1.screenpos.z.ToFloat32(), vtx2.screenpos.x.ToFloat32(),
169 vtx2.screenpos.y.ToFloat32(), vtx2.screenpos.z.ToFloat32());
175 170
176 Rasterizer::ProcessTriangle(vtx0, vtx1, vtx2); 171 Rasterizer::ProcessTriangle(vtx0, vtx1, vtx2);
177 } 172 }
178} 173}
179 174
180
181} // namespace 175} // namespace
182 176
183} // namespace 177} // namespace
diff --git a/src/video_core/clipper.h b/src/video_core/clipper.h
index f85d8d4c9..b51af0af9 100644
--- a/src/video_core/clipper.h
+++ b/src/video_core/clipper.h
@@ -7,7 +7,7 @@
7namespace Pica { 7namespace Pica {
8 8
9namespace Shader { 9namespace Shader {
10 struct OutputVertex; 10struct OutputVertex;
11} 11}
12 12
13namespace Clipper { 13namespace Clipper {
diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp
index 689859049..bb618cacd 100644
--- a/src/video_core/command_processor.cpp
+++ b/src/video_core/command_processor.cpp
@@ -6,17 +6,14 @@
6#include <cstddef> 6#include <cstddef>
7#include <memory> 7#include <memory>
8#include <utility> 8#include <utility>
9
10#include "common/assert.h" 9#include "common/assert.h"
11#include "common/logging/log.h" 10#include "common/logging/log.h"
12#include "common/microprofile.h" 11#include "common/microprofile.h"
13#include "common/vector_math.h" 12#include "common/vector_math.h"
14
15#include "core/hle/service/gsp_gpu.h" 13#include "core/hle/service/gsp_gpu.h"
16#include "core/hw/gpu.h" 14#include "core/hw/gpu.h"
17#include "core/memory.h" 15#include "core/memory.h"
18#include "core/tracer/recorder.h" 16#include "core/tracer/recorder.h"
19
20#include "video_core/command_processor.h" 17#include "video_core/command_processor.h"
21#include "video_core/debug_utils/debug_utils.h" 18#include "video_core/debug_utils/debug_utils.h"
22#include "video_core/pica.h" 19#include "video_core/pica.h"
@@ -43,10 +40,8 @@ static u32 default_attr_write_buffer[3];
43 40
44// Expand a 4-bit mask to 4-byte mask, e.g. 0b0101 -> 0x00FF00FF 41// Expand a 4-bit mask to 4-byte mask, e.g. 0b0101 -> 0x00FF00FF
45static const u32 expand_bits_to_bytes[] = { 42static const u32 expand_bits_to_bytes[] = {
46 0x00000000, 0x000000ff, 0x0000ff00, 0x0000ffff, 43 0x00000000, 0x000000ff, 0x0000ff00, 0x0000ffff, 0x00ff0000, 0x00ff00ff, 0x00ffff00, 0x00ffffff,
47 0x00ff0000, 0x00ff00ff, 0x00ffff00, 0x00ffffff, 44 0xff000000, 0xff0000ff, 0xff00ff00, 0xff00ffff, 0xffff0000, 0xffff00ff, 0xffffff00, 0xffffffff,
48 0xff000000, 0xff0000ff, 0xff00ff00, 0xff00ffff,
49 0xffff0000, 0xffff00ff, 0xffffff00, 0xffffffff
50}; 45};
51 46
52MICROPROFILE_DEFINE(GPU_Drawing, "GPU", "Drawing", MP_RGB(50, 50, 240)); 47MICROPROFILE_DEFINE(GPU_Drawing, "GPU", "Drawing", MP_RGB(50, 50, 240));
@@ -68,383 +63,393 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
68 63
69 regs[id] = (old_value & ~write_mask) | (value & write_mask); 64 regs[id] = (old_value & ~write_mask) | (value & write_mask);
70 65
71 DebugUtils::OnPicaRegWrite({ (u16)id, (u16)mask, regs[id] }); 66 DebugUtils::OnPicaRegWrite({(u16)id, (u16)mask, regs[id]});
72 67
73 if (g_debug_context) 68 if (g_debug_context)
74 g_debug_context->OnEvent(DebugContext::Event::PicaCommandLoaded, reinterpret_cast<void*>(&id)); 69 g_debug_context->OnEvent(DebugContext::Event::PicaCommandLoaded,
75 70 reinterpret_cast<void*>(&id));
76 switch(id) { 71
77 // Trigger IRQ 72 switch (id) {
78 case PICA_REG_INDEX(trigger_irq): 73 // Trigger IRQ
79 GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::P3D); 74 case PICA_REG_INDEX(trigger_irq):
80 break; 75 GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::P3D);
81 76 break;
82 case PICA_REG_INDEX_WORKAROUND(triangle_topology, 0x25E): 77
83 g_state.primitive_assembler.Reconfigure(regs.triangle_topology); 78 case PICA_REG_INDEX_WORKAROUND(triangle_topology, 0x25E):
84 break; 79 g_state.primitive_assembler.Reconfigure(regs.triangle_topology);
85 80 break;
86 case PICA_REG_INDEX_WORKAROUND(restart_primitive, 0x25F): 81
87 g_state.primitive_assembler.Reset(); 82 case PICA_REG_INDEX_WORKAROUND(restart_primitive, 0x25F):
88 break; 83 g_state.primitive_assembler.Reset();
89 84 break;
90 case PICA_REG_INDEX_WORKAROUND(vs_default_attributes_setup.index, 0x232): 85
91 g_state.immediate.current_attribute = 0; 86 case PICA_REG_INDEX_WORKAROUND(vs_default_attributes_setup.index, 0x232):
87 g_state.immediate.current_attribute = 0;
88 default_attr_counter = 0;
89 break;
90
91 // Load default vertex input attributes
92 case PICA_REG_INDEX_WORKAROUND(vs_default_attributes_setup.set_value[0], 0x233):
93 case PICA_REG_INDEX_WORKAROUND(vs_default_attributes_setup.set_value[1], 0x234):
94 case PICA_REG_INDEX_WORKAROUND(vs_default_attributes_setup.set_value[2], 0x235): {
95 // TODO: Does actual hardware indeed keep an intermediate buffer or does
96 // it directly write the values?
97 default_attr_write_buffer[default_attr_counter++] = value;
98
99 // Default attributes are written in a packed format such that four float24 values are
100 // encoded in
101 // three 32-bit numbers. We write to internal memory once a full such vector is
102 // written.
103 if (default_attr_counter >= 3) {
92 default_attr_counter = 0; 104 default_attr_counter = 0;
93 break;
94
95 // Load default vertex input attributes
96 case PICA_REG_INDEX_WORKAROUND(vs_default_attributes_setup.set_value[0], 0x233):
97 case PICA_REG_INDEX_WORKAROUND(vs_default_attributes_setup.set_value[1], 0x234):
98 case PICA_REG_INDEX_WORKAROUND(vs_default_attributes_setup.set_value[2], 0x235):
99 {
100 // TODO: Does actual hardware indeed keep an intermediate buffer or does
101 // it directly write the values?
102 default_attr_write_buffer[default_attr_counter++] = value;
103
104 // Default attributes are written in a packed format such that four float24 values are encoded in
105 // three 32-bit numbers. We write to internal memory once a full such vector is
106 // written.
107 if (default_attr_counter >= 3) {
108 default_attr_counter = 0;
109
110 auto& setup = regs.vs_default_attributes_setup;
111
112 if (setup.index >= 16) {
113 LOG_ERROR(HW_GPU, "Invalid VS default attribute index %d", (int)setup.index);
114 break;
115 }
116 105
117 Math::Vec4<float24> attribute; 106 auto& setup = regs.vs_default_attributes_setup;
118 107
119 // NOTE: The destination component order indeed is "backwards" 108 if (setup.index >= 16) {
120 attribute.w = float24::FromRaw(default_attr_write_buffer[0] >> 8); 109 LOG_ERROR(HW_GPU, "Invalid VS default attribute index %d", (int)setup.index);
121 attribute.z = float24::FromRaw(((default_attr_write_buffer[0] & 0xFF) << 16) | ((default_attr_write_buffer[1] >> 16) & 0xFFFF)); 110 break;
122 attribute.y = float24::FromRaw(((default_attr_write_buffer[1] & 0xFFFF) << 8) | ((default_attr_write_buffer[2] >> 24) & 0xFF)); 111 }
123 attribute.x = float24::FromRaw(default_attr_write_buffer[2] & 0xFFFFFF);
124 112
125 LOG_TRACE(HW_GPU, "Set default VS attribute %x to (%f %f %f %f)", (int)setup.index, 113 Math::Vec4<float24> attribute;
126 attribute.x.ToFloat32(), attribute.y.ToFloat32(), attribute.z.ToFloat32(),
127 attribute.w.ToFloat32());
128 114
129 // TODO: Verify that this actually modifies the register! 115 // NOTE: The destination component order indeed is "backwards"
130 if (setup.index < 15) { 116 attribute.w = float24::FromRaw(default_attr_write_buffer[0] >> 8);
131 g_state.vs_default_attributes[setup.index] = attribute; 117 attribute.z = float24::FromRaw(((default_attr_write_buffer[0] & 0xFF) << 16) |
132 setup.index++; 118 ((default_attr_write_buffer[1] >> 16) & 0xFFFF));
133 } else { 119 attribute.y = float24::FromRaw(((default_attr_write_buffer[1] & 0xFFFF) << 8) |
134 // Put each attribute into an immediate input buffer. 120 ((default_attr_write_buffer[2] >> 24) & 0xFF));
135 // When all specified immediate attributes are present, the Vertex Shader is invoked and everything is 121 attribute.x = float24::FromRaw(default_attr_write_buffer[2] & 0xFFFFFF);
136 // sent to the primitive assembler.
137 122
138 auto& immediate_input = g_state.immediate.input_vertex; 123 LOG_TRACE(HW_GPU, "Set default VS attribute %x to (%f %f %f %f)", (int)setup.index,
139 auto& immediate_attribute_id = g_state.immediate.current_attribute; 124 attribute.x.ToFloat32(), attribute.y.ToFloat32(), attribute.z.ToFloat32(),
125 attribute.w.ToFloat32());
140 126
141 immediate_input.attr[immediate_attribute_id++] = attribute; 127 // TODO: Verify that this actually modifies the register!
128 if (setup.index < 15) {
129 g_state.vs_default_attributes[setup.index] = attribute;
130 setup.index++;
131 } else {
132 // Put each attribute into an immediate input buffer.
133 // When all specified immediate attributes are present, the Vertex Shader is invoked
134 // and everything is
135 // sent to the primitive assembler.
142 136
143 if (immediate_attribute_id >= regs.vs.num_input_attributes+1) { 137 auto& immediate_input = g_state.immediate.input_vertex;
144 immediate_attribute_id = 0; 138 auto& immediate_attribute_id = g_state.immediate.current_attribute;
145 139
146 Shader::UnitState<false> shader_unit; 140 immediate_input.attr[immediate_attribute_id++] = attribute;
147 g_state.vs.Setup();
148 141
149 // Send to vertex shader 142 if (immediate_attribute_id >= regs.vs.num_input_attributes + 1) {
150 if (g_debug_context) 143 immediate_attribute_id = 0;
151 g_debug_context->OnEvent(DebugContext::Event::VertexShaderInvocation, static_cast<void*>(&immediate_input));
152 g_state.vs.Run(shader_unit, immediate_input, regs.vs.num_input_attributes+1);
153 Shader::OutputVertex output_vertex = shader_unit.output_registers.ToVertex(regs.vs);
154 144
155 // Send to renderer 145 Shader::UnitState<false> shader_unit;
156 using Pica::Shader::OutputVertex; 146 g_state.vs.Setup();
157 auto AddTriangle = [](const OutputVertex& v0, const OutputVertex& v1, const OutputVertex& v2) {
158 VideoCore::g_renderer->Rasterizer()->AddTriangle(v0, v1, v2);
159 };
160 147
161 g_state.primitive_assembler.SubmitVertex(output_vertex, AddTriangle); 148 // Send to vertex shader
162 } 149 if (g_debug_context)
150 g_debug_context->OnEvent(DebugContext::Event::VertexShaderInvocation,
151 static_cast<void*>(&immediate_input));
152 g_state.vs.Run(shader_unit, immediate_input, regs.vs.num_input_attributes + 1);
153 Shader::OutputVertex output_vertex =
154 shader_unit.output_registers.ToVertex(regs.vs);
155
156 // Send to renderer
157 using Pica::Shader::OutputVertex;
158 auto AddTriangle = [](const OutputVertex& v0, const OutputVertex& v1,
159 const OutputVertex& v2) {
160 VideoCore::g_renderer->Rasterizer()->AddTriangle(v0, v1, v2);
161 };
162
163 g_state.primitive_assembler.SubmitVertex(output_vertex, AddTriangle);
163 } 164 }
164 } 165 }
165 break;
166 } 166 }
167 break;
168 }
167 169
168 case PICA_REG_INDEX(gpu_mode): 170 case PICA_REG_INDEX(gpu_mode):
169 if (regs.gpu_mode == Regs::GPUMode::Configuring) { 171 if (regs.gpu_mode == Regs::GPUMode::Configuring) {
170 // Draw immediate mode triangles when GPU Mode is set to GPUMode::Configuring 172 // Draw immediate mode triangles when GPU Mode is set to GPUMode::Configuring
171 VideoCore::g_renderer->Rasterizer()->DrawTriangles(); 173 VideoCore::g_renderer->Rasterizer()->DrawTriangles();
172 174
173 if (g_debug_context) { 175 if (g_debug_context) {
174 g_debug_context->OnEvent(DebugContext::Event::FinishedPrimitiveBatch, nullptr); 176 g_debug_context->OnEvent(DebugContext::Event::FinishedPrimitiveBatch, nullptr);
175 }
176 } 177 }
177 break;
178
179 case PICA_REG_INDEX_WORKAROUND(command_buffer.trigger[0], 0x23c):
180 case PICA_REG_INDEX_WORKAROUND(command_buffer.trigger[1], 0x23d):
181 {
182 unsigned index = static_cast<unsigned>(id - PICA_REG_INDEX(command_buffer.trigger[0]));
183 u32* head_ptr = (u32*)Memory::GetPhysicalPointer(regs.command_buffer.GetPhysicalAddress(index));
184 g_state.cmd_list.head_ptr = g_state.cmd_list.current_ptr = head_ptr;
185 g_state.cmd_list.length = regs.command_buffer.GetSize(index) / sizeof(u32);
186 break;
187 } 178 }
179 break;
180
181 case PICA_REG_INDEX_WORKAROUND(command_buffer.trigger[0], 0x23c):
182 case PICA_REG_INDEX_WORKAROUND(command_buffer.trigger[1], 0x23d): {
183 unsigned index = static_cast<unsigned>(id - PICA_REG_INDEX(command_buffer.trigger[0]));
184 u32* head_ptr =
185 (u32*)Memory::GetPhysicalPointer(regs.command_buffer.GetPhysicalAddress(index));
186 g_state.cmd_list.head_ptr = g_state.cmd_list.current_ptr = head_ptr;
187 g_state.cmd_list.length = regs.command_buffer.GetSize(index) / sizeof(u32);
188 break;
189 }
188 190
189 // It seems like these trigger vertex rendering 191 // It seems like these trigger vertex rendering
190 case PICA_REG_INDEX(trigger_draw): 192 case PICA_REG_INDEX(trigger_draw):
191 case PICA_REG_INDEX(trigger_draw_indexed): 193 case PICA_REG_INDEX(trigger_draw_indexed): {
192 { 194 MICROPROFILE_SCOPE(GPU_Drawing);
193 MICROPROFILE_SCOPE(GPU_Drawing);
194 195
195#if PICA_LOG_TEV 196#if PICA_LOG_TEV
196 DebugUtils::DumpTevStageConfig(regs.GetTevStages()); 197 DebugUtils::DumpTevStageConfig(regs.GetTevStages());
197#endif 198#endif
198 if (g_debug_context) 199 if (g_debug_context)
199 g_debug_context->OnEvent(DebugContext::Event::IncomingPrimitiveBatch, nullptr); 200 g_debug_context->OnEvent(DebugContext::Event::IncomingPrimitiveBatch, nullptr);
200 201
201 // Processes information about internal vertex attributes to figure out how a vertex is loaded. 202 // Processes information about internal vertex attributes to figure out how a vertex is
202 // Later, these can be compiled and cached. 203 // loaded.
203 const u32 base_address = regs.vertex_attributes.GetPhysicalBaseAddress(); 204 // Later, these can be compiled and cached.
204 VertexLoader loader(regs); 205 const u32 base_address = regs.vertex_attributes.GetPhysicalBaseAddress();
205 206 VertexLoader loader(regs);
206 // Load vertices 207
207 bool is_indexed = (id == PICA_REG_INDEX(trigger_draw_indexed)); 208 // Load vertices
208 209 bool is_indexed = (id == PICA_REG_INDEX(trigger_draw_indexed));
209 const auto& index_info = regs.index_array; 210
210 const u8* index_address_8 = Memory::GetPhysicalPointer(base_address + index_info.offset); 211 const auto& index_info = regs.index_array;
211 const u16* index_address_16 = reinterpret_cast<const u16*>(index_address_8); 212 const u8* index_address_8 = Memory::GetPhysicalPointer(base_address + index_info.offset);
212 bool index_u16 = index_info.format != 0; 213 const u16* index_address_16 = reinterpret_cast<const u16*>(index_address_8);
213 214 bool index_u16 = index_info.format != 0;
214 PrimitiveAssembler<Shader::OutputVertex>& primitive_assembler = g_state.primitive_assembler; 215
215 216 PrimitiveAssembler<Shader::OutputVertex>& primitive_assembler = g_state.primitive_assembler;
216 if (g_debug_context) { 217
217 for (int i = 0; i < 3; ++i) { 218 if (g_debug_context) {
218 const auto texture = regs.GetTextures()[i]; 219 for (int i = 0; i < 3; ++i) {
219 if (!texture.enabled) 220 const auto texture = regs.GetTextures()[i];
220 continue; 221 if (!texture.enabled)
221 222 continue;
222 u8* texture_data = Memory::GetPhysicalPointer(texture.config.GetPhysicalAddress()); 223
223 if (g_debug_context && Pica::g_debug_context->recorder) 224 u8* texture_data = Memory::GetPhysicalPointer(texture.config.GetPhysicalAddress());
224 g_debug_context->recorder->MemoryAccessed(texture_data, Pica::Regs::NibblesPerPixel(texture.format) * texture.config.width / 2 * texture.config.height, texture.config.GetPhysicalAddress()); 225 if (g_debug_context && Pica::g_debug_context->recorder)
225 } 226 g_debug_context->recorder->MemoryAccessed(
227 texture_data, Pica::Regs::NibblesPerPixel(texture.format) *
228 texture.config.width / 2 * texture.config.height,
229 texture.config.GetPhysicalAddress());
226 } 230 }
231 }
227 232
228 DebugUtils::MemoryAccessTracker memory_accesses; 233 DebugUtils::MemoryAccessTracker memory_accesses;
229
230 // Simple circular-replacement vertex cache
231 // The size has been tuned for optimal balance between hit-rate and the cost of lookup
232 const size_t VERTEX_CACHE_SIZE = 32;
233 std::array<u16, VERTEX_CACHE_SIZE> vertex_cache_ids;
234 std::array<Shader::OutputRegisters, VERTEX_CACHE_SIZE> vertex_cache;
235 234
236 unsigned int vertex_cache_pos = 0; 235 // Simple circular-replacement vertex cache
237 vertex_cache_ids.fill(-1); 236 // The size has been tuned for optimal balance between hit-rate and the cost of lookup
237 const size_t VERTEX_CACHE_SIZE = 32;
238 std::array<u16, VERTEX_CACHE_SIZE> vertex_cache_ids;
239 std::array<Shader::OutputRegisters, VERTEX_CACHE_SIZE> vertex_cache;
238 240
239 Shader::UnitState<false> shader_unit; 241 unsigned int vertex_cache_pos = 0;
240 g_state.vs.Setup(); 242 vertex_cache_ids.fill(-1);
241 243
242 for (unsigned int index = 0; index < regs.num_vertices; ++index) 244 Shader::UnitState<false> shader_unit;
243 { 245 g_state.vs.Setup();
244 // Indexed rendering doesn't use the start offset
245 unsigned int vertex = is_indexed ? (index_u16 ? index_address_16[index] : index_address_8[index]) : (index + regs.vertex_offset);
246 246
247 // -1 is a common special value used for primitive restart. Since it's unknown if 247 for (unsigned int index = 0; index < regs.num_vertices; ++index) {
248 // the PICA supports it, and it would mess up the caching, guard against it here. 248 // Indexed rendering doesn't use the start offset
249 ASSERT(vertex != -1); 249 unsigned int vertex =
250 is_indexed ? (index_u16 ? index_address_16[index] : index_address_8[index])
251 : (index + regs.vertex_offset);
250 252
251 bool vertex_cache_hit = false; 253 // -1 is a common special value used for primitive restart. Since it's unknown if
252 Shader::OutputRegisters output_registers; 254 // the PICA supports it, and it would mess up the caching, guard against it here.
255 ASSERT(vertex != -1);
253 256
254 if (is_indexed) { 257 bool vertex_cache_hit = false;
255 if (g_debug_context && Pica::g_debug_context->recorder) { 258 Shader::OutputRegisters output_registers;
256 int size = index_u16 ? 2 : 1;
257 memory_accesses.AddAccess(base_address + index_info.offset + size * index, size);
258 }
259 259
260 for (unsigned int i = 0; i < VERTEX_CACHE_SIZE; ++i) { 260 if (is_indexed) {
261 if (vertex == vertex_cache_ids[i]) { 261 if (g_debug_context && Pica::g_debug_context->recorder) {
262 output_registers = vertex_cache[i]; 262 int size = index_u16 ? 2 : 1;
263 vertex_cache_hit = true; 263 memory_accesses.AddAccess(base_address + index_info.offset + size * index,
264 break; 264 size);
265 }
266 }
267 } 265 }
268 266
269 if (!vertex_cache_hit) { 267 for (unsigned int i = 0; i < VERTEX_CACHE_SIZE; ++i) {
270 // Initialize data for the current vertex 268 if (vertex == vertex_cache_ids[i]) {
271 Shader::InputVertex input; 269 output_registers = vertex_cache[i];
272 loader.LoadVertex(base_address, index, vertex, input, memory_accesses); 270 vertex_cache_hit = true;
273 271 break;
274 // Send to vertex shader
275 if (g_debug_context)
276 g_debug_context->OnEvent(DebugContext::Event::VertexShaderInvocation, (void*)&input);
277 g_state.vs.Run(shader_unit, input, loader.GetNumTotalAttributes());
278 output_registers = shader_unit.output_registers;
279
280 if (is_indexed) {
281 vertex_cache[vertex_cache_pos] = output_registers;
282 vertex_cache_ids[vertex_cache_pos] = vertex;
283 vertex_cache_pos = (vertex_cache_pos + 1) % VERTEX_CACHE_SIZE;
284 } 272 }
285 } 273 }
274 }
286 275
287 // Retreive vertex from register data 276 if (!vertex_cache_hit) {
288 Shader::OutputVertex output_vertex = output_registers.ToVertex(regs.vs); 277 // Initialize data for the current vertex
278 Shader::InputVertex input;
279 loader.LoadVertex(base_address, index, vertex, input, memory_accesses);
289 280
290 // Send to renderer 281 // Send to vertex shader
291 using Pica::Shader::OutputVertex; 282 if (g_debug_context)
292 auto AddTriangle = []( 283 g_debug_context->OnEvent(DebugContext::Event::VertexShaderInvocation,
293 const OutputVertex& v0, const OutputVertex& v1, const OutputVertex& v2) { 284 (void*)&input);
294 VideoCore::g_renderer->Rasterizer()->AddTriangle(v0, v1, v2); 285 g_state.vs.Run(shader_unit, input, loader.GetNumTotalAttributes());
295 }; 286 output_registers = shader_unit.output_registers;
296 287
297 primitive_assembler.SubmitVertex(output_vertex, AddTriangle); 288 if (is_indexed) {
289 vertex_cache[vertex_cache_pos] = output_registers;
290 vertex_cache_ids[vertex_cache_pos] = vertex;
291 vertex_cache_pos = (vertex_cache_pos + 1) % VERTEX_CACHE_SIZE;
292 }
298 } 293 }
299 294
300 for (auto& range : memory_accesses.ranges) { 295 // Retreive vertex from register data
301 g_debug_context->recorder->MemoryAccessed(Memory::GetPhysicalPointer(range.first), 296 Shader::OutputVertex output_vertex = output_registers.ToVertex(regs.vs);
302 range.second, range.first); 297
303 } 298 // Send to renderer
299 using Pica::Shader::OutputVertex;
300 auto AddTriangle = [](const OutputVertex& v0, const OutputVertex& v1,
301 const OutputVertex& v2) {
302 VideoCore::g_renderer->Rasterizer()->AddTriangle(v0, v1, v2);
303 };
304 304
305 break; 305 primitive_assembler.SubmitVertex(output_vertex, AddTriangle);
306 } 306 }
307 307
308 case PICA_REG_INDEX(vs.bool_uniforms): 308 for (auto& range : memory_accesses.ranges) {
309 for (unsigned i = 0; i < 16; ++i) 309 g_debug_context->recorder->MemoryAccessed(Memory::GetPhysicalPointer(range.first),
310 g_state.vs.uniforms.b[i] = (regs.vs.bool_uniforms.Value() & (1 << i)) != 0; 310 range.second, range.first);
311
312 break;
313
314 case PICA_REG_INDEX_WORKAROUND(vs.int_uniforms[0], 0x2b1):
315 case PICA_REG_INDEX_WORKAROUND(vs.int_uniforms[1], 0x2b2):
316 case PICA_REG_INDEX_WORKAROUND(vs.int_uniforms[2], 0x2b3):
317 case PICA_REG_INDEX_WORKAROUND(vs.int_uniforms[3], 0x2b4):
318 {
319 int index = (id - PICA_REG_INDEX_WORKAROUND(vs.int_uniforms[0], 0x2b1));
320 auto values = regs.vs.int_uniforms[index];
321 g_state.vs.uniforms.i[index] = Math::Vec4<u8>(values.x, values.y, values.z, values.w);
322 LOG_TRACE(HW_GPU, "Set integer uniform %d to %02x %02x %02x %02x",
323 index, values.x.Value(), values.y.Value(), values.z.Value(), values.w.Value());
324 break;
325 } 311 }
326 312
327 case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[0], 0x2c1): 313 break;
328 case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[1], 0x2c2): 314 }
329 case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[2], 0x2c3):
330 case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[3], 0x2c4):
331 case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[4], 0x2c5):
332 case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[5], 0x2c6):
333 case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[6], 0x2c7):
334 case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[7], 0x2c8):
335 {
336 auto& uniform_setup = regs.vs.uniform_setup;
337
338 // TODO: Does actual hardware indeed keep an intermediate buffer or does
339 // it directly write the values?
340 uniform_write_buffer[float_regs_counter++] = value;
341
342 // Uniforms are written in a packed format such that four float24 values are encoded in
343 // three 32-bit numbers. We write to internal memory once a full such vector is
344 // written.
345 if ((float_regs_counter >= 4 && uniform_setup.IsFloat32()) ||
346 (float_regs_counter >= 3 && !uniform_setup.IsFloat32())) {
347 float_regs_counter = 0;
348
349 auto& uniform = g_state.vs.uniforms.f[uniform_setup.index];
350
351 if (uniform_setup.index > 95) {
352 LOG_ERROR(HW_GPU, "Invalid VS uniform index %d", (int)uniform_setup.index);
353 break;
354 }
355 315
356 // NOTE: The destination component order indeed is "backwards" 316 case PICA_REG_INDEX(vs.bool_uniforms):
357 if (uniform_setup.IsFloat32()) { 317 for (unsigned i = 0; i < 16; ++i)
358 for (auto i : {0,1,2,3}) 318 g_state.vs.uniforms.b[i] = (regs.vs.bool_uniforms.Value() & (1 << i)) != 0;
359 uniform[3 - i] = float24::FromFloat32(*(float*)(&uniform_write_buffer[i])); 319
360 } else { 320 break;
361 // TODO: Untested 321
362 uniform.w = float24::FromRaw(uniform_write_buffer[0] >> 8); 322 case PICA_REG_INDEX_WORKAROUND(vs.int_uniforms[0], 0x2b1):
363 uniform.z = float24::FromRaw(((uniform_write_buffer[0] & 0xFF) << 16) | ((uniform_write_buffer[1] >> 16) & 0xFFFF)); 323 case PICA_REG_INDEX_WORKAROUND(vs.int_uniforms[1], 0x2b2):
364 uniform.y = float24::FromRaw(((uniform_write_buffer[1] & 0xFFFF) << 8) | ((uniform_write_buffer[2] >> 24) & 0xFF)); 324 case PICA_REG_INDEX_WORKAROUND(vs.int_uniforms[2], 0x2b3):
365 uniform.x = float24::FromRaw(uniform_write_buffer[2] & 0xFFFFFF); 325 case PICA_REG_INDEX_WORKAROUND(vs.int_uniforms[3], 0x2b4): {
366 } 326 int index = (id - PICA_REG_INDEX_WORKAROUND(vs.int_uniforms[0], 0x2b1));
327 auto values = regs.vs.int_uniforms[index];
328 g_state.vs.uniforms.i[index] = Math::Vec4<u8>(values.x, values.y, values.z, values.w);
329 LOG_TRACE(HW_GPU, "Set integer uniform %d to %02x %02x %02x %02x", index, values.x.Value(),
330 values.y.Value(), values.z.Value(), values.w.Value());
331 break;
332 }
367 333
368 LOG_TRACE(HW_GPU, "Set uniform %x to (%f %f %f %f)", (int)uniform_setup.index, 334 case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[0], 0x2c1):
369 uniform.x.ToFloat32(), uniform.y.ToFloat32(), uniform.z.ToFloat32(), 335 case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[1], 0x2c2):
370 uniform.w.ToFloat32()); 336 case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[2], 0x2c3):
337 case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[3], 0x2c4):
338 case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[4], 0x2c5):
339 case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[5], 0x2c6):
340 case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[6], 0x2c7):
341 case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[7], 0x2c8): {
342 auto& uniform_setup = regs.vs.uniform_setup;
343
344 // TODO: Does actual hardware indeed keep an intermediate buffer or does
345 // it directly write the values?
346 uniform_write_buffer[float_regs_counter++] = value;
347
348 // Uniforms are written in a packed format such that four float24 values are encoded in
349 // three 32-bit numbers. We write to internal memory once a full such vector is
350 // written.
351 if ((float_regs_counter >= 4 && uniform_setup.IsFloat32()) ||
352 (float_regs_counter >= 3 && !uniform_setup.IsFloat32())) {
353 float_regs_counter = 0;
354
355 auto& uniform = g_state.vs.uniforms.f[uniform_setup.index];
356
357 if (uniform_setup.index > 95) {
358 LOG_ERROR(HW_GPU, "Invalid VS uniform index %d", (int)uniform_setup.index);
359 break;
360 }
371 361
372 // TODO: Verify that this actually modifies the register! 362 // NOTE: The destination component order indeed is "backwards"
373 uniform_setup.index.Assign(uniform_setup.index + 1); 363 if (uniform_setup.IsFloat32()) {
364 for (auto i : {0, 1, 2, 3})
365 uniform[3 - i] = float24::FromFloat32(*(float*)(&uniform_write_buffer[i]));
366 } else {
367 // TODO: Untested
368 uniform.w = float24::FromRaw(uniform_write_buffer[0] >> 8);
369 uniform.z = float24::FromRaw(((uniform_write_buffer[0] & 0xFF) << 16) |
370 ((uniform_write_buffer[1] >> 16) & 0xFFFF));
371 uniform.y = float24::FromRaw(((uniform_write_buffer[1] & 0xFFFF) << 8) |
372 ((uniform_write_buffer[2] >> 24) & 0xFF));
373 uniform.x = float24::FromRaw(uniform_write_buffer[2] & 0xFFFFFF);
374 } 374 }
375 break;
376 }
377 375
378 // Load shader program code 376 LOG_TRACE(HW_GPU, "Set uniform %x to (%f %f %f %f)", (int)uniform_setup.index,
379 case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[0], 0x2cc): 377 uniform.x.ToFloat32(), uniform.y.ToFloat32(), uniform.z.ToFloat32(),
380 case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[1], 0x2cd): 378 uniform.w.ToFloat32());
381 case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[2], 0x2ce):
382 case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[3], 0x2cf):
383 case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[4], 0x2d0):
384 case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[5], 0x2d1):
385 case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[6], 0x2d2):
386 case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[7], 0x2d3):
387 {
388 g_state.vs.program_code[regs.vs.program.offset] = value;
389 regs.vs.program.offset++;
390 break;
391 }
392 379
393 // Load swizzle pattern data 380 // TODO: Verify that this actually modifies the register!
394 case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[0], 0x2d6): 381 uniform_setup.index.Assign(uniform_setup.index + 1);
395 case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[1], 0x2d7):
396 case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[2], 0x2d8):
397 case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[3], 0x2d9):
398 case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[4], 0x2da):
399 case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[5], 0x2db):
400 case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[6], 0x2dc):
401 case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[7], 0x2dd):
402 {
403 g_state.vs.swizzle_data[regs.vs.swizzle_patterns.offset] = value;
404 regs.vs.swizzle_patterns.offset++;
405 break;
406 } 382 }
383 break;
384 }
407 385
408 case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[0], 0x1c8): 386 // Load shader program code
409 case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[1], 0x1c9): 387 case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[0], 0x2cc):
410 case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[2], 0x1ca): 388 case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[1], 0x2cd):
411 case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[3], 0x1cb): 389 case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[2], 0x2ce):
412 case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[4], 0x1cc): 390 case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[3], 0x2cf):
413 case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[5], 0x1cd): 391 case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[4], 0x2d0):
414 case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[6], 0x1ce): 392 case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[5], 0x2d1):
415 case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[7], 0x1cf): 393 case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[6], 0x2d2):
416 { 394 case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[7], 0x2d3): {
417 auto& lut_config = regs.lighting.lut_config; 395 g_state.vs.program_code[regs.vs.program.offset] = value;
418 396 regs.vs.program.offset++;
419 ASSERT_MSG(lut_config.index < 256, "lut_config.index exceeded maximum value of 255!"); 397 break;
420 398 }
421 g_state.lighting.luts[lut_config.type][lut_config.index].raw = value;
422 lut_config.index.Assign(lut_config.index + 1);
423 break;
424 }
425 399
426 case PICA_REG_INDEX_WORKAROUND(fog_lut_data[0], 0xe8): 400 // Load swizzle pattern data
427 case PICA_REG_INDEX_WORKAROUND(fog_lut_data[1], 0xe9): 401 case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[0], 0x2d6):
428 case PICA_REG_INDEX_WORKAROUND(fog_lut_data[2], 0xea): 402 case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[1], 0x2d7):
429 case PICA_REG_INDEX_WORKAROUND(fog_lut_data[3], 0xeb): 403 case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[2], 0x2d8):
430 case PICA_REG_INDEX_WORKAROUND(fog_lut_data[4], 0xec): 404 case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[3], 0x2d9):
431 case PICA_REG_INDEX_WORKAROUND(fog_lut_data[5], 0xed): 405 case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[4], 0x2da):
432 case PICA_REG_INDEX_WORKAROUND(fog_lut_data[6], 0xee): 406 case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[5], 0x2db):
433 case PICA_REG_INDEX_WORKAROUND(fog_lut_data[7], 0xef): 407 case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[6], 0x2dc):
434 { 408 case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[7], 0x2dd): {
435 g_state.fog.lut[regs.fog_lut_offset % 128].raw = value; 409 g_state.vs.swizzle_data[regs.vs.swizzle_patterns.offset] = value;
436 regs.fog_lut_offset.Assign(regs.fog_lut_offset + 1); 410 regs.vs.swizzle_patterns.offset++;
437 break; 411 break;
438 } 412 }
413
414 case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[0], 0x1c8):
415 case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[1], 0x1c9):
416 case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[2], 0x1ca):
417 case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[3], 0x1cb):
418 case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[4], 0x1cc):
419 case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[5], 0x1cd):
420 case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[6], 0x1ce):
421 case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[7], 0x1cf): {
422 auto& lut_config = regs.lighting.lut_config;
423
424 ASSERT_MSG(lut_config.index < 256, "lut_config.index exceeded maximum value of 255!");
425
426 g_state.lighting.luts[lut_config.type][lut_config.index].raw = value;
427 lut_config.index.Assign(lut_config.index + 1);
428 break;
429 }
439 430
440 default: 431 case PICA_REG_INDEX_WORKAROUND(fog_lut_data[0], 0xe8):
441 break; 432 case PICA_REG_INDEX_WORKAROUND(fog_lut_data[1], 0xe9):
433 case PICA_REG_INDEX_WORKAROUND(fog_lut_data[2], 0xea):
434 case PICA_REG_INDEX_WORKAROUND(fog_lut_data[3], 0xeb):
435 case PICA_REG_INDEX_WORKAROUND(fog_lut_data[4], 0xec):
436 case PICA_REG_INDEX_WORKAROUND(fog_lut_data[5], 0xed):
437 case PICA_REG_INDEX_WORKAROUND(fog_lut_data[6], 0xee):
438 case PICA_REG_INDEX_WORKAROUND(fog_lut_data[7], 0xef): {
439 g_state.fog.lut[regs.fog_lut_offset % 128].raw = value;
440 regs.fog_lut_offset.Assign(regs.fog_lut_offset + 1);
441 break;
442 }
443
444 default:
445 break;
442 } 446 }
443 447
444 VideoCore::g_renderer->Rasterizer()->NotifyPicaRegisterChanged(id); 448 VideoCore::g_renderer->Rasterizer()->NotifyPicaRegisterChanged(id);
445 449
446 if (g_debug_context) 450 if (g_debug_context)
447 g_debug_context->OnEvent(DebugContext::Event::PicaCommandProcessed, reinterpret_cast<void*>(&id)); 451 g_debug_context->OnEvent(DebugContext::Event::PicaCommandProcessed,
452 reinterpret_cast<void*>(&id));
448} 453}
449 454
450void ProcessCommandList(const u32* list, u32 size) { 455void ProcessCommandList(const u32* list, u32 size) {
@@ -458,14 +463,14 @@ void ProcessCommandList(const u32* list, u32 size) {
458 ++g_state.cmd_list.current_ptr; 463 ++g_state.cmd_list.current_ptr;
459 464
460 u32 value = *g_state.cmd_list.current_ptr++; 465 u32 value = *g_state.cmd_list.current_ptr++;
461 const CommandHeader header = { *g_state.cmd_list.current_ptr++ }; 466 const CommandHeader header = {*g_state.cmd_list.current_ptr++};
462 467
463 WritePicaReg(header.cmd_id, value, header.parameter_mask); 468 WritePicaReg(header.cmd_id, value, header.parameter_mask);
464 469
465 for (unsigned i = 0; i < header.extra_data_length; ++i) { 470 for (unsigned i = 0; i < header.extra_data_length; ++i) {
466 u32 cmd = header.cmd_id + (header.group_commands ? i + 1 : 0); 471 u32 cmd = header.cmd_id + (header.group_commands ? i + 1 : 0);
467 WritePicaReg(cmd, *g_state.cmd_list.current_ptr++, header.parameter_mask); 472 WritePicaReg(cmd, *g_state.cmd_list.current_ptr++, header.parameter_mask);
468 } 473 }
469 } 474 }
470} 475}
471 476
diff --git a/src/video_core/command_processor.h b/src/video_core/command_processor.h
index 022a71f5e..62ad2d3f3 100644
--- a/src/video_core/command_processor.h
+++ b/src/video_core/command_processor.h
@@ -5,7 +5,6 @@
5#pragma once 5#pragma once
6 6
7#include <type_traits> 7#include <type_traits>
8
9#include "common/bit_field.h" 8#include "common/bit_field.h"
10#include "common/common_types.h" 9#include "common/common_types.h"
11 10
@@ -16,7 +15,7 @@ namespace CommandProcessor {
16union CommandHeader { 15union CommandHeader {
17 u32 hex; 16 u32 hex;
18 17
19 BitField< 0, 16, u32> cmd_id; 18 BitField<0, 16, u32> cmd_id;
20 19
21 // parameter_mask: 20 // parameter_mask:
22 // Mask applied to the input value to make it possible to update 21 // Mask applied to the input value to make it possible to update
@@ -25,11 +24,11 @@ union CommandHeader {
25 // second bit: 0x0000FF00 24 // second bit: 0x0000FF00
26 // third bit: 0x00FF0000 25 // third bit: 0x00FF0000
27 // fourth bit: 0xFF000000 26 // fourth bit: 0xFF000000
28 BitField<16, 4, u32> parameter_mask; 27 BitField<16, 4, u32> parameter_mask;
29 28
30 BitField<20, 11, u32> extra_data_length; 29 BitField<20, 11, u32> extra_data_length;
31 30
32 BitField<31, 1, u32> group_commands; 31 BitField<31, 1, u32> group_commands;
33}; 32};
34static_assert(std::is_standard_layout<CommandHeader>::value == true, 33static_assert(std::is_standard_layout<CommandHeader>::value == true,
35 "CommandHeader does not use standard layout"); 34 "CommandHeader does not use standard layout");
diff --git a/src/video_core/debug_utils/debug_utils.cpp b/src/video_core/debug_utils/debug_utils.cpp
index bfa686380..8806464d9 100644
--- a/src/video_core/debug_utils/debug_utils.cpp
+++ b/src/video_core/debug_utils/debug_utils.cpp
@@ -20,7 +20,6 @@
20#include <nihstro/bit_field.h> 20#include <nihstro/bit_field.h>
21#include <nihstro/float24.h> 21#include <nihstro/float24.h>
22#include <nihstro/shader_binary.h> 22#include <nihstro/shader_binary.h>
23
24#include "common/assert.h" 23#include "common/assert.h"
25#include "common/bit_field.h" 24#include "common/bit_field.h"
26#include "common/color.h" 25#include "common/color.h"
@@ -29,7 +28,6 @@
29#include "common/logging/log.h" 28#include "common/logging/log.h"
30#include "common/math_util.h" 29#include "common/math_util.h"
31#include "common/vector_math.h" 30#include "common/vector_math.h"
32
33#include "video_core/debug_utils/debug_utils.h" 31#include "video_core/debug_utils/debug_utils.h"
34#include "video_core/pica.h" 32#include "video_core/pica.h"
35#include "video_core/pica_state.h" 33#include "video_core/pica_state.h"
@@ -50,7 +48,8 @@ void DebugContext::DoOnEvent(Event event, void* data) {
50 { 48 {
51 std::unique_lock<std::mutex> lock(breakpoint_mutex); 49 std::unique_lock<std::mutex> lock(breakpoint_mutex);
52 50
53 // Commit the rasterizer's caches so framebuffers, render targets, etc. will show on debug widgets 51 // Commit the rasterizer's caches so framebuffers, render targets, etc. will show on debug
52 // widgets
54 VideoCore::g_renderer->Rasterizer()->FlushAll(); 53 VideoCore::g_renderer->Rasterizer()->FlushAll();
55 54
56 // TODO: Should stop the CPU thread here once we multithread emulation. 55 // TODO: Should stop the CPU thread here once we multithread emulation.
@@ -64,7 +63,7 @@ void DebugContext::DoOnEvent(Event event, void* data) {
64 } 63 }
65 64
66 // Wait until another thread tells us to Resume() 65 // Wait until another thread tells us to Resume()
67 resume_from_breakpoint.wait(lock, [&]{ return !at_breakpoint; }); 66 resume_from_breakpoint.wait(lock, [&] { return !at_breakpoint; });
68 } 67 }
69} 68}
70 69
@@ -88,8 +87,9 @@ std::shared_ptr<DebugContext> g_debug_context; // TODO: Get rid of this global
88 87
89namespace DebugUtils { 88namespace DebugUtils {
90 89
91void DumpShader(const std::string& filename, const Regs::ShaderConfig& config, const Shader::ShaderSetup& setup, const Regs::VSOutputAttributes* output_attributes) 90void DumpShader(const std::string& filename, const Regs::ShaderConfig& config,
92{ 91 const Shader::ShaderSetup& setup,
92 const Regs::VSOutputAttributes* output_attributes) {
93 struct StuffToWrite { 93 struct StuffToWrite {
94 const u8* pointer; 94 const u8* pointer;
95 u32 size; 95 u32 size;
@@ -97,7 +97,7 @@ void DumpShader(const std::string& filename, const Regs::ShaderConfig& config, c
97 std::vector<StuffToWrite> writing_queue; 97 std::vector<StuffToWrite> writing_queue;
98 u32 write_offset = 0; 98 u32 write_offset = 0;
99 99
100 auto QueueForWriting = [&writing_queue,&write_offset](const u8* pointer, u32 size) { 100 auto QueueForWriting = [&writing_queue, &write_offset](const u8* pointer, u32 size) {
101 writing_queue.push_back({pointer, size}); 101 writing_queue.push_back({pointer, size});
102 u32 old_write_offset = write_offset; 102 u32 old_write_offset = write_offset;
103 write_offset += size; 103 write_offset += size;
@@ -108,99 +108,95 @@ void DumpShader(const std::string& filename, const Regs::ShaderConfig& config, c
108 // into shbin format (separate type and component mask). 108 // into shbin format (separate type and component mask).
109 union OutputRegisterInfo { 109 union OutputRegisterInfo {
110 enum Type : u64 { 110 enum Type : u64 {
111 POSITION = 0, 111 POSITION = 0,
112 QUATERNION = 1, 112 QUATERNION = 1,
113 COLOR = 2, 113 COLOR = 2,
114 TEXCOORD0 = 3, 114 TEXCOORD0 = 3,
115 TEXCOORD1 = 5, 115 TEXCOORD1 = 5,
116 TEXCOORD2 = 6, 116 TEXCOORD2 = 6,
117 117
118 VIEW = 8, 118 VIEW = 8,
119 }; 119 };
120 120
121 BitField< 0, 64, u64> hex; 121 BitField<0, 64, u64> hex;
122 122
123 BitField< 0, 16, Type> type; 123 BitField<0, 16, Type> type;
124 BitField<16, 16, u64> id; 124 BitField<16, 16, u64> id;
125 BitField<32, 4, u64> component_mask; 125 BitField<32, 4, u64> component_mask;
126 }; 126 };
127 127
128 // This is put into a try-catch block to make sure we notice unknown configurations. 128 // This is put into a try-catch block to make sure we notice unknown configurations.
129 std::vector<OutputRegisterInfo> output_info_table; 129 std::vector<OutputRegisterInfo> output_info_table;
130 for (unsigned i = 0; i < 7; ++i) { 130 for (unsigned i = 0; i < 7; ++i) {
131 using OutputAttributes = Pica::Regs::VSOutputAttributes; 131 using OutputAttributes = Pica::Regs::VSOutputAttributes;
132 132
133 // TODO: It's still unclear how the attribute components map to the register! 133 // TODO: It's still unclear how the attribute components map to the register!
134 // Once we know that, this code probably will not make much sense anymore. 134 // Once we know that, this code probably will not make much sense anymore.
135 std::map<OutputAttributes::Semantic, std::pair<OutputRegisterInfo::Type, u32> > map = { 135 std::map<OutputAttributes::Semantic, std::pair<OutputRegisterInfo::Type, u32>> map = {
136 { OutputAttributes::POSITION_X, { OutputRegisterInfo::POSITION, 1} }, 136 {OutputAttributes::POSITION_X, {OutputRegisterInfo::POSITION, 1}},
137 { OutputAttributes::POSITION_Y, { OutputRegisterInfo::POSITION, 2} }, 137 {OutputAttributes::POSITION_Y, {OutputRegisterInfo::POSITION, 2}},
138 { OutputAttributes::POSITION_Z, { OutputRegisterInfo::POSITION, 4} }, 138 {OutputAttributes::POSITION_Z, {OutputRegisterInfo::POSITION, 4}},
139 { OutputAttributes::POSITION_W, { OutputRegisterInfo::POSITION, 8} }, 139 {OutputAttributes::POSITION_W, {OutputRegisterInfo::POSITION, 8}},
140 { OutputAttributes::QUATERNION_X, { OutputRegisterInfo::QUATERNION, 1} }, 140 {OutputAttributes::QUATERNION_X, {OutputRegisterInfo::QUATERNION, 1}},
141 { OutputAttributes::QUATERNION_Y, { OutputRegisterInfo::QUATERNION, 2} }, 141 {OutputAttributes::QUATERNION_Y, {OutputRegisterInfo::QUATERNION, 2}},
142 { OutputAttributes::QUATERNION_Z, { OutputRegisterInfo::QUATERNION, 4} }, 142 {OutputAttributes::QUATERNION_Z, {OutputRegisterInfo::QUATERNION, 4}},
143 { OutputAttributes::QUATERNION_W, { OutputRegisterInfo::QUATERNION, 8} }, 143 {OutputAttributes::QUATERNION_W, {OutputRegisterInfo::QUATERNION, 8}},
144 { OutputAttributes::COLOR_R, { OutputRegisterInfo::COLOR, 1} }, 144 {OutputAttributes::COLOR_R, {OutputRegisterInfo::COLOR, 1}},
145 { OutputAttributes::COLOR_G, { OutputRegisterInfo::COLOR, 2} }, 145 {OutputAttributes::COLOR_G, {OutputRegisterInfo::COLOR, 2}},
146 { OutputAttributes::COLOR_B, { OutputRegisterInfo::COLOR, 4} }, 146 {OutputAttributes::COLOR_B, {OutputRegisterInfo::COLOR, 4}},
147 { OutputAttributes::COLOR_A, { OutputRegisterInfo::COLOR, 8} }, 147 {OutputAttributes::COLOR_A, {OutputRegisterInfo::COLOR, 8}},
148 { OutputAttributes::TEXCOORD0_U, { OutputRegisterInfo::TEXCOORD0, 1} }, 148 {OutputAttributes::TEXCOORD0_U, {OutputRegisterInfo::TEXCOORD0, 1}},
149 { OutputAttributes::TEXCOORD0_V, { OutputRegisterInfo::TEXCOORD0, 2} }, 149 {OutputAttributes::TEXCOORD0_V, {OutputRegisterInfo::TEXCOORD0, 2}},
150 { OutputAttributes::TEXCOORD1_U, { OutputRegisterInfo::TEXCOORD1, 1} }, 150 {OutputAttributes::TEXCOORD1_U, {OutputRegisterInfo::TEXCOORD1, 1}},
151 { OutputAttributes::TEXCOORD1_V, { OutputRegisterInfo::TEXCOORD1, 2} }, 151 {OutputAttributes::TEXCOORD1_V, {OutputRegisterInfo::TEXCOORD1, 2}},
152 { OutputAttributes::TEXCOORD2_U, { OutputRegisterInfo::TEXCOORD2, 1} }, 152 {OutputAttributes::TEXCOORD2_U, {OutputRegisterInfo::TEXCOORD2, 1}},
153 { OutputAttributes::TEXCOORD2_V, { OutputRegisterInfo::TEXCOORD2, 2} }, 153 {OutputAttributes::TEXCOORD2_V, {OutputRegisterInfo::TEXCOORD2, 2}},
154 { OutputAttributes::VIEW_X, { OutputRegisterInfo::VIEW, 1} }, 154 {OutputAttributes::VIEW_X, {OutputRegisterInfo::VIEW, 1}},
155 { OutputAttributes::VIEW_Y, { OutputRegisterInfo::VIEW, 2} }, 155 {OutputAttributes::VIEW_Y, {OutputRegisterInfo::VIEW, 2}},
156 { OutputAttributes::VIEW_Z, { OutputRegisterInfo::VIEW, 4} } 156 {OutputAttributes::VIEW_Z, {OutputRegisterInfo::VIEW, 4}},
157 }; 157 };
158 158
159 for (const auto& semantic : std::vector<OutputAttributes::Semantic>{ 159 for (const auto& semantic : std::vector<OutputAttributes::Semantic>{
160 output_attributes[i].map_x, 160 output_attributes[i].map_x, output_attributes[i].map_y, output_attributes[i].map_z,
161 output_attributes[i].map_y, 161 output_attributes[i].map_w}) {
162 output_attributes[i].map_z, 162 if (semantic == OutputAttributes::INVALID)
163 output_attributes[i].map_w }) { 163 continue;
164 if (semantic == OutputAttributes::INVALID) 164
165 continue; 165 try {
166 166 OutputRegisterInfo::Type type = map.at(semantic).first;
167 try { 167 u32 component_mask = map.at(semantic).second;
168 OutputRegisterInfo::Type type = map.at(semantic).first; 168
169 u32 component_mask = map.at(semantic).second; 169 auto it = std::find_if(output_info_table.begin(), output_info_table.end(),
170 170 [&i, &type](const OutputRegisterInfo& info) {
171 auto it = std::find_if(output_info_table.begin(), output_info_table.end(), 171 return info.id == i && info.type == type;
172 [&i, &type](const OutputRegisterInfo& info) { 172 });
173 return info.id == i && info.type == type; 173
174 } 174 if (it == output_info_table.end()) {
175 ); 175 output_info_table.emplace_back();
176 176 output_info_table.back().type.Assign(type);
177 if (it == output_info_table.end()) { 177 output_info_table.back().component_mask.Assign(component_mask);
178 output_info_table.emplace_back(); 178 output_info_table.back().id.Assign(i);
179 output_info_table.back().type.Assign(type); 179 } else {
180 output_info_table.back().component_mask.Assign(component_mask); 180 it->component_mask.Assign(it->component_mask | component_mask);
181 output_info_table.back().id.Assign(i);
182 } else {
183 it->component_mask.Assign(it->component_mask | component_mask);
184 }
185 } catch (const std::out_of_range& ) {
186 DEBUG_ASSERT_MSG(false, "Unknown output attribute mapping");
187 LOG_ERROR(HW_GPU, "Unknown output attribute mapping: %03x, %03x, %03x, %03x",
188 (int)output_attributes[i].map_x.Value(),
189 (int)output_attributes[i].map_y.Value(),
190 (int)output_attributes[i].map_z.Value(),
191 (int)output_attributes[i].map_w.Value());
192 } 181 }
182 } catch (const std::out_of_range&) {
183 DEBUG_ASSERT_MSG(false, "Unknown output attribute mapping");
184 LOG_ERROR(HW_GPU, "Unknown output attribute mapping: %03x, %03x, %03x, %03x",
185 (int)output_attributes[i].map_x.Value(),
186 (int)output_attributes[i].map_y.Value(),
187 (int)output_attributes[i].map_z.Value(),
188 (int)output_attributes[i].map_w.Value());
193 } 189 }
194 } 190 }
195 191 }
196 192
197 struct { 193 struct {
198 DVLBHeader header; 194 DVLBHeader header;
199 u32 dvle_offset; 195 u32 dvle_offset;
200 } dvlb{ {DVLBHeader::MAGIC_WORD, 1 } }; // 1 DVLE 196 } dvlb{{DVLBHeader::MAGIC_WORD, 1}}; // 1 DVLE
201 197
202 DVLPHeader dvlp{ DVLPHeader::MAGIC_WORD }; 198 DVLPHeader dvlp{DVLPHeader::MAGIC_WORD};
203 DVLEHeader dvle{ DVLEHeader::MAGIC_WORD }; 199 DVLEHeader dvle{DVLEHeader::MAGIC_WORD};
204 200
205 QueueForWriting(reinterpret_cast<const u8*>(&dvlb), sizeof(dvlb)); 201 QueueForWriting(reinterpret_cast<const u8*>(&dvlb), sizeof(dvlb));
206 u32 dvlp_offset = QueueForWriting(reinterpret_cast<const u8*>(&dvlp), sizeof(dvlp)); 202 u32 dvlp_offset = QueueForWriting(reinterpret_cast<const u8*>(&dvlp), sizeof(dvlp));
@@ -216,14 +212,16 @@ void DumpShader(const std::string& filename, const Regs::ShaderConfig& config, c
216 dvlp.swizzle_info_num_entries = static_cast<uint32_t>(setup.swizzle_data.size()); 212 dvlp.swizzle_info_num_entries = static_cast<uint32_t>(setup.swizzle_data.size());
217 u32 dummy = 0; 213 u32 dummy = 0;
218 for (unsigned int i = 0; i < setup.swizzle_data.size(); ++i) { 214 for (unsigned int i = 0; i < setup.swizzle_data.size(); ++i) {
219 QueueForWriting(reinterpret_cast<const u8*>(&setup.swizzle_data[i]), sizeof(setup.swizzle_data[i])); 215 QueueForWriting(reinterpret_cast<const u8*>(&setup.swizzle_data[i]),
216 sizeof(setup.swizzle_data[i]));
220 QueueForWriting(reinterpret_cast<const u8*>(&dummy), sizeof(dummy)); 217 QueueForWriting(reinterpret_cast<const u8*>(&dummy), sizeof(dummy));
221 } 218 }
222 219
223 dvle.main_offset_words = config.main_offset; 220 dvle.main_offset_words = config.main_offset;
224 dvle.output_register_table_offset = write_offset - dvlb.dvle_offset; 221 dvle.output_register_table_offset = write_offset - dvlb.dvle_offset;
225 dvle.output_register_table_size = static_cast<u32>(output_info_table.size()); 222 dvle.output_register_table_size = static_cast<u32>(output_info_table.size());
226 QueueForWriting(reinterpret_cast<const u8*>(output_info_table.data()), static_cast<u32>(output_info_table.size() * sizeof(OutputRegisterInfo))); 223 QueueForWriting(reinterpret_cast<const u8*>(output_info_table.data()),
224 static_cast<u32>(output_info_table.size() * sizeof(OutputRegisterInfo)));
227 225
228 // TODO: Create a label table for "main" 226 // TODO: Create a label table for "main"
229 227
@@ -258,10 +256,8 @@ void DumpShader(const std::string& filename, const Regs::ShaderConfig& config, c
258 constant.f.w = nihstro::to_float24(setup.uniforms.f[i].w.ToFloat32()); 256 constant.f.w = nihstro::to_float24(setup.uniforms.f[i].w.ToFloat32());
259 257
260 // Store constant if it's different from zero.. 258 // Store constant if it's different from zero..
261 if (setup.uniforms.f[i].x.ToFloat32() != 0.0 || 259 if (setup.uniforms.f[i].x.ToFloat32() != 0.0 || setup.uniforms.f[i].y.ToFloat32() != 0.0 ||
262 setup.uniforms.f[i].y.ToFloat32() != 0.0 || 260 setup.uniforms.f[i].z.ToFloat32() != 0.0 || setup.uniforms.f[i].w.ToFloat32() != 0.0)
263 setup.uniforms.f[i].z.ToFloat32() != 0.0 ||
264 setup.uniforms.f[i].w.ToFloat32() != 0.0)
265 constant_table.emplace_back(constant); 261 constant_table.emplace_back(constant);
266 } 262 }
267 dvle.constant_table_offset = write_offset - dvlb.dvle_offset; 263 dvle.constant_table_offset = write_offset - dvlb.dvle_offset;
@@ -282,8 +278,7 @@ static std::unique_ptr<PicaTrace> pica_trace;
282static std::mutex pica_trace_mutex; 278static std::mutex pica_trace_mutex;
283static int is_pica_tracing = false; 279static int is_pica_tracing = false;
284 280
285void StartPicaTracing() 281void StartPicaTracing() {
286{
287 if (is_pica_tracing) { 282 if (is_pica_tracing) {
288 LOG_WARNING(HW_GPU, "StartPicaTracing called even though tracing already running!"); 283 LOG_WARNING(HW_GPU, "StartPicaTracing called even though tracing already running!");
289 return; 284 return;
@@ -295,13 +290,11 @@ void StartPicaTracing()
295 is_pica_tracing = true; 290 is_pica_tracing = true;
296} 291}
297 292
298bool IsPicaTracing() 293bool IsPicaTracing() {
299{
300 return is_pica_tracing != 0; 294 return is_pica_tracing != 0;
301} 295}
302 296
303void OnPicaRegWrite(PicaTrace::Write write) 297void OnPicaRegWrite(PicaTrace::Write write) {
304{
305 // Double check for is_pica_tracing to avoid pointless locking overhead 298 // Double check for is_pica_tracing to avoid pointless locking overhead
306 if (!is_pica_tracing) 299 if (!is_pica_tracing)
307 return; 300 return;
@@ -314,8 +307,7 @@ void OnPicaRegWrite(PicaTrace::Write write)
314 pica_trace->writes.push_back(write); 307 pica_trace->writes.push_back(write);
315} 308}
316 309
317std::unique_ptr<PicaTrace> FinishPicaTracing() 310std::unique_ptr<PicaTrace> FinishPicaTracing() {
318{
319 if (!is_pica_tracing) { 311 if (!is_pica_tracing) {
320 LOG_WARNING(HW_GPU, "FinishPicaTracing called even though tracing isn't running!"); 312 LOG_WARNING(HW_GPU, "FinishPicaTracing called even though tracing isn't running!");
321 return {}; 313 return {};
@@ -331,12 +323,12 @@ std::unique_ptr<PicaTrace> FinishPicaTracing()
331 return ret; 323 return ret;
332} 324}
333 325
334const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const TextureInfo& info, bool disable_alpha) { 326const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const TextureInfo& info,
327 bool disable_alpha) {
335 const unsigned int coarse_x = x & ~7; 328 const unsigned int coarse_x = x & ~7;
336 const unsigned int coarse_y = y & ~7; 329 const unsigned int coarse_y = y & ~7;
337 330
338 if (info.format != Regs::TextureFormat::ETC1 && 331 if (info.format != Regs::TextureFormat::ETC1 && info.format != Regs::TextureFormat::ETC1A4) {
339 info.format != Regs::TextureFormat::ETC1A4) {
340 // TODO(neobrain): Fix code design to unify vertical block offsets! 332 // TODO(neobrain): Fix code design to unify vertical block offsets!
341 source += coarse_y * info.stride; 333 source += coarse_y * info.stride;
342 } 334 }
@@ -344,73 +336,63 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const Texture
344 // TODO: Assert that width/height are multiples of block dimensions 336 // TODO: Assert that width/height are multiples of block dimensions
345 337
346 switch (info.format) { 338 switch (info.format) {
347 case Regs::TextureFormat::RGBA8: 339 case Regs::TextureFormat::RGBA8: {
348 {
349 auto res = Color::DecodeRGBA8(source + VideoCore::GetMortonOffset(x, y, 4)); 340 auto res = Color::DecodeRGBA8(source + VideoCore::GetMortonOffset(x, y, 4));
350 return { res.r(), res.g(), res.b(), static_cast<u8>(disable_alpha ? 255 : res.a()) }; 341 return {res.r(), res.g(), res.b(), static_cast<u8>(disable_alpha ? 255 : res.a())};
351 } 342 }
352 343
353 case Regs::TextureFormat::RGB8: 344 case Regs::TextureFormat::RGB8: {
354 {
355 auto res = Color::DecodeRGB8(source + VideoCore::GetMortonOffset(x, y, 3)); 345 auto res = Color::DecodeRGB8(source + VideoCore::GetMortonOffset(x, y, 3));
356 return { res.r(), res.g(), res.b(), 255 }; 346 return {res.r(), res.g(), res.b(), 255};
357 } 347 }
358 348
359 case Regs::TextureFormat::RGB5A1: 349 case Regs::TextureFormat::RGB5A1: {
360 {
361 auto res = Color::DecodeRGB5A1(source + VideoCore::GetMortonOffset(x, y, 2)); 350 auto res = Color::DecodeRGB5A1(source + VideoCore::GetMortonOffset(x, y, 2));
362 return { res.r(), res.g(), res.b(), static_cast<u8>(disable_alpha ? 255 : res.a()) }; 351 return {res.r(), res.g(), res.b(), static_cast<u8>(disable_alpha ? 255 : res.a())};
363 } 352 }
364 353
365 case Regs::TextureFormat::RGB565: 354 case Regs::TextureFormat::RGB565: {
366 {
367 auto res = Color::DecodeRGB565(source + VideoCore::GetMortonOffset(x, y, 2)); 355 auto res = Color::DecodeRGB565(source + VideoCore::GetMortonOffset(x, y, 2));
368 return { res.r(), res.g(), res.b(), 255 }; 356 return {res.r(), res.g(), res.b(), 255};
369 } 357 }
370 358
371 case Regs::TextureFormat::RGBA4: 359 case Regs::TextureFormat::RGBA4: {
372 {
373 auto res = Color::DecodeRGBA4(source + VideoCore::GetMortonOffset(x, y, 2)); 360 auto res = Color::DecodeRGBA4(source + VideoCore::GetMortonOffset(x, y, 2));
374 return { res.r(), res.g(), res.b(), static_cast<u8>(disable_alpha ? 255 : res.a()) }; 361 return {res.r(), res.g(), res.b(), static_cast<u8>(disable_alpha ? 255 : res.a())};
375 } 362 }
376 363
377 case Regs::TextureFormat::IA8: 364 case Regs::TextureFormat::IA8: {
378 {
379 const u8* source_ptr = source + VideoCore::GetMortonOffset(x, y, 2); 365 const u8* source_ptr = source + VideoCore::GetMortonOffset(x, y, 2);
380 366
381 if (disable_alpha) { 367 if (disable_alpha) {
382 // Show intensity as red, alpha as green 368 // Show intensity as red, alpha as green
383 return { source_ptr[1], source_ptr[0], 0, 255 }; 369 return {source_ptr[1], source_ptr[0], 0, 255};
384 } else { 370 } else {
385 return { source_ptr[1], source_ptr[1], source_ptr[1], source_ptr[0] }; 371 return {source_ptr[1], source_ptr[1], source_ptr[1], source_ptr[0]};
386 } 372 }
387 } 373 }
388 374
389 case Regs::TextureFormat::RG8: 375 case Regs::TextureFormat::RG8: {
390 {
391 auto res = Color::DecodeRG8(source + VideoCore::GetMortonOffset(x, y, 2)); 376 auto res = Color::DecodeRG8(source + VideoCore::GetMortonOffset(x, y, 2));
392 return { res.r(), res.g(), 0, 255 }; 377 return {res.r(), res.g(), 0, 255};
393 } 378 }
394 379
395 case Regs::TextureFormat::I8: 380 case Regs::TextureFormat::I8: {
396 {
397 const u8* source_ptr = source + VideoCore::GetMortonOffset(x, y, 1); 381 const u8* source_ptr = source + VideoCore::GetMortonOffset(x, y, 1);
398 return { *source_ptr, *source_ptr, *source_ptr, 255 }; 382 return {*source_ptr, *source_ptr, *source_ptr, 255};
399 } 383 }
400 384
401 case Regs::TextureFormat::A8: 385 case Regs::TextureFormat::A8: {
402 {
403 const u8* source_ptr = source + VideoCore::GetMortonOffset(x, y, 1); 386 const u8* source_ptr = source + VideoCore::GetMortonOffset(x, y, 1);
404 387
405 if (disable_alpha) { 388 if (disable_alpha) {
406 return { *source_ptr, *source_ptr, *source_ptr, 255 }; 389 return {*source_ptr, *source_ptr, *source_ptr, 255};
407 } else { 390 } else {
408 return { 0, 0, 0, *source_ptr }; 391 return {0, 0, 0, *source_ptr};
409 } 392 }
410 } 393 }
411 394
412 case Regs::TextureFormat::IA4: 395 case Regs::TextureFormat::IA4: {
413 {
414 const u8* source_ptr = source + VideoCore::GetMortonOffset(x, y, 1); 396 const u8* source_ptr = source + VideoCore::GetMortonOffset(x, y, 1);
415 397
416 u8 i = Color::Convert4To8(((*source_ptr) & 0xF0) >> 4); 398 u8 i = Color::Convert4To8(((*source_ptr) & 0xF0) >> 4);
@@ -418,25 +400,23 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const Texture
418 400
419 if (disable_alpha) { 401 if (disable_alpha) {
420 // Show intensity as red, alpha as green 402 // Show intensity as red, alpha as green
421 return { i, a, 0, 255 }; 403 return {i, a, 0, 255};
422 } else { 404 } else {
423 return { i, i, i, a }; 405 return {i, i, i, a};
424 } 406 }
425 } 407 }
426 408
427 case Regs::TextureFormat::I4: 409 case Regs::TextureFormat::I4: {
428 {
429 u32 morton_offset = VideoCore::GetMortonOffset(x, y, 1); 410 u32 morton_offset = VideoCore::GetMortonOffset(x, y, 1);
430 const u8* source_ptr = source + morton_offset / 2; 411 const u8* source_ptr = source + morton_offset / 2;
431 412
432 u8 i = (morton_offset % 2) ? ((*source_ptr & 0xF0) >> 4) : (*source_ptr & 0xF); 413 u8 i = (morton_offset % 2) ? ((*source_ptr & 0xF0) >> 4) : (*source_ptr & 0xF);
433 i = Color::Convert4To8(i); 414 i = Color::Convert4To8(i);
434 415
435 return { i, i, i, 255 }; 416 return {i, i, i, 255};
436 } 417 }
437 418
438 case Regs::TextureFormat::A4: 419 case Regs::TextureFormat::A4: {
439 {
440 u32 morton_offset = VideoCore::GetMortonOffset(x, y, 1); 420 u32 morton_offset = VideoCore::GetMortonOffset(x, y, 1);
441 const u8* source_ptr = source + morton_offset / 2; 421 const u8* source_ptr = source + morton_offset / 2;
442 422
@@ -444,15 +424,14 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const Texture
444 a = Color::Convert4To8(a); 424 a = Color::Convert4To8(a);
445 425
446 if (disable_alpha) { 426 if (disable_alpha) {
447 return { a, a, a, 255 }; 427 return {a, a, a, 255};
448 } else { 428 } else {
449 return { 0, 0, 0, a }; 429 return {0, 0, 0, a};
450 } 430 }
451 } 431 }
452 432
453 case Regs::TextureFormat::ETC1: 433 case Regs::TextureFormat::ETC1:
454 case Regs::TextureFormat::ETC1A4: 434 case Regs::TextureFormat::ETC1A4: {
455 {
456 bool has_alpha = (info.format == Regs::TextureFormat::ETC1A4); 435 bool has_alpha = (info.format == Regs::TextureFormat::ETC1A4);
457 436
458 // ETC1 further subdivides each 8x8 tile into four 4x4 subtiles 437 // ETC1 further subdivides each 8x8 tile into four 4x4 subtiles
@@ -462,10 +441,9 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const Texture
462 int subtile_index = ((x / subtile_width) & 1) + 2 * ((y / subtile_height) & 1); 441 int subtile_index = ((x / subtile_width) & 1) + 2 * ((y / subtile_height) & 1);
463 unsigned subtile_bytes = has_alpha ? 2 : 1; // TODO: Name... 442 unsigned subtile_bytes = has_alpha ? 2 : 1; // TODO: Name...
464 443
465 const u64* source_ptr = (const u64*)(source 444 const u64* source_ptr = (const u64*)(source + coarse_x * subtile_bytes * 4 +
466 + coarse_x * subtile_bytes * 4 445 coarse_y * subtile_bytes * 4 * (info.width / 8) +
467 + coarse_y * subtile_bytes * 4 * (info.width / 8) 446 subtile_index * subtile_bytes * 8);
468 + subtile_index * subtile_bytes * 8);
469 u64 alpha = 0xFFFFFFFFFFFFFFFF; 447 u64 alpha = 0xFFFFFFFFFFFFFFFF;
470 if (has_alpha) { 448 if (has_alpha) {
471 alpha = *source_ptr; 449 alpha = *source_ptr;
@@ -474,7 +452,7 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const Texture
474 452
475 union ETC1Tile { 453 union ETC1Tile {
476 // Each of these two is a collection of 16 bits (one per lookup value) 454 // Each of these two is a collection of 16 bits (one per lookup value)
477 BitField< 0, 16, u64> table_subindexes; 455 BitField<0, 16, u64> table_subindexes;
478 BitField<16, 16, u64> negation_flags; 456 BitField<16, 16, u64> negation_flags;
479 457
480 unsigned GetTableSubIndex(unsigned index) const { 458 unsigned GetTableSubIndex(unsigned index) const {
@@ -547,11 +525,18 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const Texture
547 } 525 }
548 526
549 // Add modifier 527 // Add modifier
550 unsigned table_index = static_cast<int>((x < 2) ? table_index_1.Value() : table_index_2.Value()); 528 unsigned table_index =
529 static_cast<int>((x < 2) ? table_index_1.Value() : table_index_2.Value());
551 530
552 static const std::array<std::array<u8, 2>, 8> etc1_modifier_table = {{ 531 static const std::array<std::array<u8, 2>, 8> etc1_modifier_table = {{
553 {{ 2, 8 }}, {{ 5, 17 }}, {{ 9, 29 }}, {{ 13, 42 }}, 532 {{2, 8}},
554 {{ 18, 60 }}, {{ 24, 80 }}, {{ 33, 106 }}, {{ 47, 183 }} 533 {{5, 17}},
534 {{9, 29}},
535 {{13, 42}},
536 {{18, 60}},
537 {{24, 80}},
538 {{33, 106}},
539 {{47, 183}},
555 }}; 540 }};
556 541
557 int modifier = etc1_modifier_table.at(table_index).at(GetTableSubIndex(texel)); 542 int modifier = etc1_modifier_table.at(table_index).at(GetTableSubIndex(texel));
@@ -564,7 +549,7 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const Texture
564 549
565 return ret.Cast<u8>(); 550 return ret.Cast<u8>();
566 } 551 }
567 } const *etc1_tile = reinterpret_cast<const ETC1Tile*>(source_ptr); 552 } const* etc1_tile = reinterpret_cast<const ETC1Tile*>(source_ptr);
568 553
569 alpha >>= 4 * ((x & 3) * 4 + (y & 3)); 554 alpha >>= 4 * ((x & 3) * 4 + (y & 3));
570 return Math::MakeVec(etc1_tile->GetRGB(x & 3, y & 3), 555 return Math::MakeVec(etc1_tile->GetRGB(x & 3, y & 3),
@@ -579,8 +564,7 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const Texture
579} 564}
580 565
581TextureInfo TextureInfo::FromPicaRegister(const Regs::TextureConfig& config, 566TextureInfo TextureInfo::FromPicaRegister(const Regs::TextureConfig& config,
582 const Regs::TextureFormat& format) 567 const Regs::TextureFormat& format) {
583{
584 TextureInfo info; 568 TextureInfo info;
585 info.physical_address = config.GetPhysicalAddress(); 569 info.physical_address = config.GetPhysicalAddress();
586 info.width = config.width; 570 info.width = config.width;
@@ -595,13 +579,13 @@ TextureInfo TextureInfo::FromPicaRegister(const Regs::TextureConfig& config,
595static void WriteIOFile(png_structp png_ptr, png_bytep data, png_size_t length) { 579static void WriteIOFile(png_structp png_ptr, png_bytep data, png_size_t length) {
596 auto* fp = static_cast<FileUtil::IOFile*>(png_get_io_ptr(png_ptr)); 580 auto* fp = static_cast<FileUtil::IOFile*>(png_get_io_ptr(png_ptr));
597 if (!fp->WriteBytes(data, length)) 581 if (!fp->WriteBytes(data, length))
598 png_error(png_ptr, "Failed to write to output PNG file."); 582 png_error(png_ptr, "Failed to write to output PNG file.");
599} 583}
600 584
601static void FlushIOFile(png_structp png_ptr) { 585static void FlushIOFile(png_structp png_ptr) {
602 auto* fp = static_cast<FileUtil::IOFile*>(png_get_io_ptr(png_ptr)); 586 auto* fp = static_cast<FileUtil::IOFile*>(png_get_io_ptr(png_ptr));
603 if (!fp->Flush()) 587 if (!fp->Flush())
604 png_error(png_ptr, "Failed to flush to output PNG file."); 588 png_error(png_ptr, "Failed to flush to output PNG file.");
605} 589}
606#endif 590#endif
607 591
@@ -614,7 +598,8 @@ void DumpTexture(const Pica::Regs::TextureConfig& texture_config, u8* data) {
614 598
615 // Write data to file 599 // Write data to file
616 static int dump_index = 0; 600 static int dump_index = 0;
617 std::string filename = std::string("texture_dump") + std::to_string(++dump_index) + std::string(".png"); 601 std::string filename =
602 std::string("texture_dump") + std::to_string(++dump_index) + std::string(".png");
618 u32 row_stride = texture_config.width * 3; 603 u32 row_stride = texture_config.width * 3;
619 604
620 u8* buf; 605 u8* buf;
@@ -632,7 +617,6 @@ void DumpTexture(const Pica::Regs::TextureConfig& texture_config, u8* data) {
632 if (png_ptr == nullptr) { 617 if (png_ptr == nullptr) {
633 LOG_ERROR(Debug_GPU, "Could not allocate write struct"); 618 LOG_ERROR(Debug_GPU, "Could not allocate write struct");
634 goto finalise; 619 goto finalise;
635
636 } 620 }
637 621
638 // Initialize info structure 622 // Initialize info structure
@@ -651,9 +635,9 @@ void DumpTexture(const Pica::Regs::TextureConfig& texture_config, u8* data) {
651 png_set_write_fn(png_ptr, static_cast<void*>(&fp), WriteIOFile, FlushIOFile); 635 png_set_write_fn(png_ptr, static_cast<void*>(&fp), WriteIOFile, FlushIOFile);
652 636
653 // Write header (8 bit color depth) 637 // Write header (8 bit color depth)
654 png_set_IHDR(png_ptr, info_ptr, texture_config.width, texture_config.height, 638 png_set_IHDR(png_ptr, info_ptr, texture_config.width, texture_config.height, 8,
655 8, PNG_COLOR_TYPE_RGB /*_ALPHA*/, PNG_INTERLACE_NONE, 639 PNG_COLOR_TYPE_RGB /*_ALPHA*/, PNG_INTERLACE_NONE, PNG_COMPRESSION_TYPE_BASE,
656 PNG_COMPRESSION_TYPE_BASE, PNG_FILTER_TYPE_BASE); 640 PNG_FILTER_TYPE_BASE);
657 641
658 png_text title_text; 642 png_text title_text;
659 title_text.compression = PNG_TEXT_COMPRESSION_NONE; 643 title_text.compression = PNG_TEXT_COMPRESSION_NONE;
@@ -672,15 +656,14 @@ void DumpTexture(const Pica::Regs::TextureConfig& texture_config, u8* data) {
672 info.stride = row_stride; 656 info.stride = row_stride;
673 info.format = g_state.regs.texture0_format; 657 info.format = g_state.regs.texture0_format;
674 Math::Vec4<u8> texture_color = LookupTexture(data, x, y, info); 658 Math::Vec4<u8> texture_color = LookupTexture(data, x, y, info);
675 buf[3 * x + y * row_stride ] = texture_color.r(); 659 buf[3 * x + y * row_stride] = texture_color.r();
676 buf[3 * x + y * row_stride + 1] = texture_color.g(); 660 buf[3 * x + y * row_stride + 1] = texture_color.g();
677 buf[3 * x + y * row_stride + 2] = texture_color.b(); 661 buf[3 * x + y * row_stride + 2] = texture_color.b();
678 } 662 }
679 } 663 }
680 664
681 // Write image data 665 // Write image data
682 for (unsigned y = 0; y < texture_config.height; ++y) 666 for (unsigned y = 0; y < texture_config.height; ++y) {
683 {
684 u8* row_ptr = (u8*)buf + y * row_stride; 667 u8* row_ptr = (u8*)buf + y * row_stride;
685 png_write_row(png_ptr, row_ptr); 668 png_write_row(png_ptr, row_ptr);
686 } 669 }
@@ -691,12 +674,15 @@ void DumpTexture(const Pica::Regs::TextureConfig& texture_config, u8* data) {
691 png_write_end(png_ptr, nullptr); 674 png_write_end(png_ptr, nullptr);
692 675
693finalise: 676finalise:
694 if (info_ptr != nullptr) png_free_data(png_ptr, info_ptr, PNG_FREE_ALL, -1); 677 if (info_ptr != nullptr)
695 if (png_ptr != nullptr) png_destroy_write_struct(&png_ptr, (png_infopp)nullptr); 678 png_free_data(png_ptr, info_ptr, PNG_FREE_ALL, -1);
679 if (png_ptr != nullptr)
680 png_destroy_write_struct(&png_ptr, (png_infopp) nullptr);
696#endif 681#endif
697} 682}
698 683
699static std::string ReplacePattern(const std::string& input, const std::string& pattern, const std::string& replacement) { 684static std::string ReplacePattern(const std::string& input, const std::string& pattern,
685 const std::string& replacement) {
700 size_t start = input.find(pattern); 686 size_t start = input.find(pattern);
701 if (start == std::string::npos) 687 if (start == std::string::npos)
702 return input; 688 return input;
@@ -709,16 +695,16 @@ static std::string ReplacePattern(const std::string& input, const std::string& p
709static std::string GetTevStageConfigSourceString(const Pica::Regs::TevStageConfig::Source& source) { 695static std::string GetTevStageConfigSourceString(const Pica::Regs::TevStageConfig::Source& source) {
710 using Source = Pica::Regs::TevStageConfig::Source; 696 using Source = Pica::Regs::TevStageConfig::Source;
711 static const std::map<Source, std::string> source_map = { 697 static const std::map<Source, std::string> source_map = {
712 { Source::PrimaryColor, "PrimaryColor" }, 698 {Source::PrimaryColor, "PrimaryColor"},
713 { Source::PrimaryFragmentColor, "PrimaryFragmentColor" }, 699 {Source::PrimaryFragmentColor, "PrimaryFragmentColor"},
714 { Source::SecondaryFragmentColor, "SecondaryFragmentColor" }, 700 {Source::SecondaryFragmentColor, "SecondaryFragmentColor"},
715 { Source::Texture0, "Texture0" }, 701 {Source::Texture0, "Texture0"},
716 { Source::Texture1, "Texture1" }, 702 {Source::Texture1, "Texture1"},
717 { Source::Texture2, "Texture2" }, 703 {Source::Texture2, "Texture2"},
718 { Source::Texture3, "Texture3" }, 704 {Source::Texture3, "Texture3"},
719 { Source::PreviousBuffer, "PreviousBuffer" }, 705 {Source::PreviousBuffer, "PreviousBuffer"},
720 { Source::Constant, "Constant" }, 706 {Source::Constant, "Constant"},
721 { Source::Previous, "Previous" }, 707 {Source::Previous, "Previous"},
722 }; 708 };
723 709
724 const auto src_it = source_map.find(source); 710 const auto src_it = source_map.find(source);
@@ -728,19 +714,21 @@ static std::string GetTevStageConfigSourceString(const Pica::Regs::TevStageConfi
728 return src_it->second; 714 return src_it->second;
729} 715}
730 716
731static std::string GetTevStageConfigColorSourceString(const Pica::Regs::TevStageConfig::Source& source, const Pica::Regs::TevStageConfig::ColorModifier modifier) { 717static std::string GetTevStageConfigColorSourceString(
718 const Pica::Regs::TevStageConfig::Source& source,
719 const Pica::Regs::TevStageConfig::ColorModifier modifier) {
732 using ColorModifier = Pica::Regs::TevStageConfig::ColorModifier; 720 using ColorModifier = Pica::Regs::TevStageConfig::ColorModifier;
733 static const std::map<ColorModifier, std::string> color_modifier_map = { 721 static const std::map<ColorModifier, std::string> color_modifier_map = {
734 { ColorModifier::SourceColor, "%source.rgb" }, 722 {ColorModifier::SourceColor, "%source.rgb"},
735 { ColorModifier::OneMinusSourceColor, "(1.0 - %source.rgb)" }, 723 {ColorModifier::OneMinusSourceColor, "(1.0 - %source.rgb)"},
736 { ColorModifier::SourceAlpha, "%source.aaa" }, 724 {ColorModifier::SourceAlpha, "%source.aaa"},
737 { ColorModifier::OneMinusSourceAlpha, "(1.0 - %source.aaa)" }, 725 {ColorModifier::OneMinusSourceAlpha, "(1.0 - %source.aaa)"},
738 { ColorModifier::SourceRed, "%source.rrr" }, 726 {ColorModifier::SourceRed, "%source.rrr"},
739 { ColorModifier::OneMinusSourceRed, "(1.0 - %source.rrr)" }, 727 {ColorModifier::OneMinusSourceRed, "(1.0 - %source.rrr)"},
740 { ColorModifier::SourceGreen, "%source.ggg" }, 728 {ColorModifier::SourceGreen, "%source.ggg"},
741 { ColorModifier::OneMinusSourceGreen, "(1.0 - %source.ggg)" }, 729 {ColorModifier::OneMinusSourceGreen, "(1.0 - %source.ggg)"},
742 { ColorModifier::SourceBlue, "%source.bbb" }, 730 {ColorModifier::SourceBlue, "%source.bbb"},
743 { ColorModifier::OneMinusSourceBlue, "(1.0 - %source.bbb)" }, 731 {ColorModifier::OneMinusSourceBlue, "(1.0 - %source.bbb)"},
744 }; 732 };
745 733
746 auto src_str = GetTevStageConfigSourceString(source); 734 auto src_str = GetTevStageConfigSourceString(source);
@@ -752,17 +740,19 @@ static std::string GetTevStageConfigColorSourceString(const Pica::Regs::TevStage
752 return ReplacePattern(modifier_str, "%source", src_str); 740 return ReplacePattern(modifier_str, "%source", src_str);
753} 741}
754 742
755static std::string GetTevStageConfigAlphaSourceString(const Pica::Regs::TevStageConfig::Source& source, const Pica::Regs::TevStageConfig::AlphaModifier modifier) { 743static std::string GetTevStageConfigAlphaSourceString(
744 const Pica::Regs::TevStageConfig::Source& source,
745 const Pica::Regs::TevStageConfig::AlphaModifier modifier) {
756 using AlphaModifier = Pica::Regs::TevStageConfig::AlphaModifier; 746 using AlphaModifier = Pica::Regs::TevStageConfig::AlphaModifier;
757 static const std::map<AlphaModifier, std::string> alpha_modifier_map = { 747 static const std::map<AlphaModifier, std::string> alpha_modifier_map = {
758 { AlphaModifier::SourceAlpha, "%source.a" }, 748 {AlphaModifier::SourceAlpha, "%source.a"},
759 { AlphaModifier::OneMinusSourceAlpha, "(1.0 - %source.a)" }, 749 {AlphaModifier::OneMinusSourceAlpha, "(1.0 - %source.a)"},
760 { AlphaModifier::SourceRed, "%source.r" }, 750 {AlphaModifier::SourceRed, "%source.r"},
761 { AlphaModifier::OneMinusSourceRed, "(1.0 - %source.r)" }, 751 {AlphaModifier::OneMinusSourceRed, "(1.0 - %source.r)"},
762 { AlphaModifier::SourceGreen, "%source.g" }, 752 {AlphaModifier::SourceGreen, "%source.g"},
763 { AlphaModifier::OneMinusSourceGreen, "(1.0 - %source.g)" }, 753 {AlphaModifier::OneMinusSourceGreen, "(1.0 - %source.g)"},
764 { AlphaModifier::SourceBlue, "%source.b" }, 754 {AlphaModifier::SourceBlue, "%source.b"},
765 { AlphaModifier::OneMinusSourceBlue, "(1.0 - %source.b)" }, 755 {AlphaModifier::OneMinusSourceBlue, "(1.0 - %source.b)"},
766 }; 756 };
767 757
768 auto src_str = GetTevStageConfigSourceString(source); 758 auto src_str = GetTevStageConfigSourceString(source);
@@ -774,18 +764,19 @@ static std::string GetTevStageConfigAlphaSourceString(const Pica::Regs::TevStage
774 return ReplacePattern(modifier_str, "%source", src_str); 764 return ReplacePattern(modifier_str, "%source", src_str);
775} 765}
776 766
777static std::string GetTevStageConfigOperationString(const Pica::Regs::TevStageConfig::Operation& operation) { 767static std::string GetTevStageConfigOperationString(
768 const Pica::Regs::TevStageConfig::Operation& operation) {
778 using Operation = Pica::Regs::TevStageConfig::Operation; 769 using Operation = Pica::Regs::TevStageConfig::Operation;
779 static const std::map<Operation, std::string> combiner_map = { 770 static const std::map<Operation, std::string> combiner_map = {
780 { Operation::Replace, "%source1" }, 771 {Operation::Replace, "%source1"},
781 { Operation::Modulate, "(%source1 * %source2)" }, 772 {Operation::Modulate, "(%source1 * %source2)"},
782 { Operation::Add, "(%source1 + %source2)" }, 773 {Operation::Add, "(%source1 + %source2)"},
783 { Operation::AddSigned, "(%source1 + %source2) - 0.5" }, 774 {Operation::AddSigned, "(%source1 + %source2) - 0.5"},
784 { Operation::Lerp, "lerp(%source1, %source2, %source3)" }, 775 {Operation::Lerp, "lerp(%source1, %source2, %source3)"},
785 { Operation::Subtract, "(%source1 - %source2)" }, 776 {Operation::Subtract, "(%source1 - %source2)"},
786 { Operation::Dot3_RGB, "dot(%source1, %source2)" }, 777 {Operation::Dot3_RGB, "dot(%source1, %source2)"},
787 { Operation::MultiplyThenAdd, "((%source1 * %source2) + %source3)" }, 778 {Operation::MultiplyThenAdd, "((%source1 * %source2) + %source3)"},
788 { Operation::AddThenMultiply, "((%source1 + %source2) * %source3)" }, 779 {Operation::AddThenMultiply, "((%source1 + %source2) * %source3)"},
789 }; 780 };
790 781
791 const auto op_it = combiner_map.find(operation); 782 const auto op_it = combiner_map.find(operation);
@@ -797,23 +788,37 @@ static std::string GetTevStageConfigOperationString(const Pica::Regs::TevStageCo
797 788
798std::string GetTevStageConfigColorCombinerString(const Pica::Regs::TevStageConfig& tev_stage) { 789std::string GetTevStageConfigColorCombinerString(const Pica::Regs::TevStageConfig& tev_stage) {
799 auto op_str = GetTevStageConfigOperationString(tev_stage.color_op); 790 auto op_str = GetTevStageConfigOperationString(tev_stage.color_op);
800 op_str = ReplacePattern(op_str, "%source1", GetTevStageConfigColorSourceString(tev_stage.color_source1, tev_stage.color_modifier1)); 791 op_str = ReplacePattern(
801 op_str = ReplacePattern(op_str, "%source2", GetTevStageConfigColorSourceString(tev_stage.color_source2, tev_stage.color_modifier2)); 792 op_str, "%source1",
802 return ReplacePattern(op_str, "%source3", GetTevStageConfigColorSourceString(tev_stage.color_source3, tev_stage.color_modifier3)); 793 GetTevStageConfigColorSourceString(tev_stage.color_source1, tev_stage.color_modifier1));
794 op_str = ReplacePattern(
795 op_str, "%source2",
796 GetTevStageConfigColorSourceString(tev_stage.color_source2, tev_stage.color_modifier2));
797 return ReplacePattern(
798 op_str, "%source3",
799 GetTevStageConfigColorSourceString(tev_stage.color_source3, tev_stage.color_modifier3));
803} 800}
804 801
805std::string GetTevStageConfigAlphaCombinerString(const Pica::Regs::TevStageConfig& tev_stage) { 802std::string GetTevStageConfigAlphaCombinerString(const Pica::Regs::TevStageConfig& tev_stage) {
806 auto op_str = GetTevStageConfigOperationString(tev_stage.alpha_op); 803 auto op_str = GetTevStageConfigOperationString(tev_stage.alpha_op);
807 op_str = ReplacePattern(op_str, "%source1", GetTevStageConfigAlphaSourceString(tev_stage.alpha_source1, tev_stage.alpha_modifier1)); 804 op_str = ReplacePattern(
808 op_str = ReplacePattern(op_str, "%source2", GetTevStageConfigAlphaSourceString(tev_stage.alpha_source2, tev_stage.alpha_modifier2)); 805 op_str, "%source1",
809 return ReplacePattern(op_str, "%source3", GetTevStageConfigAlphaSourceString(tev_stage.alpha_source3, tev_stage.alpha_modifier3)); 806 GetTevStageConfigAlphaSourceString(tev_stage.alpha_source1, tev_stage.alpha_modifier1));
807 op_str = ReplacePattern(
808 op_str, "%source2",
809 GetTevStageConfigAlphaSourceString(tev_stage.alpha_source2, tev_stage.alpha_modifier2));
810 return ReplacePattern(
811 op_str, "%source3",
812 GetTevStageConfigAlphaSourceString(tev_stage.alpha_source3, tev_stage.alpha_modifier3));
810} 813}
811 814
812void DumpTevStageConfig(const std::array<Pica::Regs::TevStageConfig, 6>& stages) { 815void DumpTevStageConfig(const std::array<Pica::Regs::TevStageConfig, 6>& stages) {
813 std::string stage_info = "Tev setup:\n"; 816 std::string stage_info = "Tev setup:\n";
814 for (size_t index = 0; index < stages.size(); ++index) { 817 for (size_t index = 0; index < stages.size(); ++index) {
815 const auto& tev_stage = stages[index]; 818 const auto& tev_stage = stages[index];
816 stage_info += "Stage " + std::to_string(index) + ": " + GetTevStageConfigColorCombinerString(tev_stage) + " " + GetTevStageConfigAlphaCombinerString(tev_stage) + "\n"; 819 stage_info += "Stage " + std::to_string(index) + ": " +
820 GetTevStageConfigColorCombinerString(tev_stage) + " " +
821 GetTevStageConfigAlphaCombinerString(tev_stage) + "\n";
817 } 822 }
818 LOG_TRACE(HW_GPU, "%s", stage_info.c_str()); 823 LOG_TRACE(HW_GPU, "%s", stage_info.c_str());
819} 824}
diff --git a/src/video_core/debug_utils/debug_utils.h b/src/video_core/debug_utils/debug_utils.h
index 92e9734ae..189c93abb 100644
--- a/src/video_core/debug_utils/debug_utils.h
+++ b/src/video_core/debug_utils/debug_utils.h
@@ -15,10 +15,8 @@
15#include <string> 15#include <string>
16#include <utility> 16#include <utility>
17#include <vector> 17#include <vector>
18
19#include "common/common_types.h" 18#include "common/common_types.h"
20#include "common/vector_math.h" 19#include "common/vector_math.h"
21
22#include "video_core/pica.h" 20#include "video_core/pica.h"
23 21
24namespace CiTrace { 22namespace CiTrace {
@@ -53,13 +51,16 @@ public:
53 * Most importantly this is used for our debugger GUI. 51 * Most importantly this is used for our debugger GUI.
54 * 52 *
55 * To implement event handling, override the OnPicaBreakPointHit and OnPicaResume methods. 53 * To implement event handling, override the OnPicaBreakPointHit and OnPicaResume methods.
56 * @warning All BreakPointObservers need to be on the same thread to guarantee thread-safe state access 54 * @warning All BreakPointObservers need to be on the same thread to guarantee thread-safe state
57 * @todo Evaluate an alternative interface, in which there is only one managing observer and multiple child observers running (by design) on the same thread. 55 * access
56 * @todo Evaluate an alternative interface, in which there is only one managing observer and
57 * multiple child observers running (by design) on the same thread.
58 */ 58 */
59 class BreakPointObserver { 59 class BreakPointObserver {
60 public: 60 public:
61 /// Constructs the object such that it observes events of the given DebugContext. 61 /// Constructs the object such that it observes events of the given DebugContext.
62 BreakPointObserver(std::shared_ptr<DebugContext> debug_context) : context_weak(debug_context) { 62 BreakPointObserver(std::shared_ptr<DebugContext> debug_context)
63 : context_weak(debug_context) {
63 std::unique_lock<std::mutex> lock(debug_context->breakpoint_mutex); 64 std::unique_lock<std::mutex> lock(debug_context->breakpoint_mutex);
64 debug_context->breakpoint_observers.push_back(this); 65 debug_context->breakpoint_observers.push_back(this);
65 } 66 }
@@ -84,15 +85,13 @@ public:
84 * @param data Optional data pointer (if unused, this is a nullptr) 85 * @param data Optional data pointer (if unused, this is a nullptr)
85 * @note This function will perform nothing unless it is overridden in the child class. 86 * @note This function will perform nothing unless it is overridden in the child class.
86 */ 87 */
87 virtual void OnPicaBreakPointHit(Event, void*) { 88 virtual void OnPicaBreakPointHit(Event, void*) {}
88 }
89 89
90 /** 90 /**
91 * Action to perform when emulation is resumed from a breakpoint. 91 * Action to perform when emulation is resumed from a breakpoint.
92 * @note This function will perform nothing unless it is overridden in the child class. 92 * @note This function will perform nothing unless it is overridden in the child class.
93 */ 93 */
94 virtual void OnPicaResume() { 94 virtual void OnPicaResume() {}
95 }
96 95
97 protected: 96 protected:
98 /** 97 /**
@@ -122,7 +121,8 @@ public:
122 * The current thread then is halted until Resume() is called from another thread (or until 121 * The current thread then is halted until Resume() is called from another thread (or until
123 * emulation is stopped). 122 * emulation is stopped).
124 * @param event Event which has happened 123 * @param event Event which has happened
125 * @param data Optional data pointer (pass nullptr if unused). Needs to remain valid until Resume() is called. 124 * @param data Optional data pointer (pass nullptr if unused). Needs to remain valid until
125 * Resume() is called.
126 */ 126 */
127 void OnEvent(Event event, void* data) { 127 void OnEvent(Event event, void* data) {
128 // This check is left in the header to allow the compiler to inline it. 128 // This check is left in the header to allow the compiler to inline it.
@@ -132,11 +132,12 @@ public:
132 DoOnEvent(event, data); 132 DoOnEvent(event, data);
133 } 133 }
134 134
135 void DoOnEvent(Event event, void *data); 135 void DoOnEvent(Event event, void* data);
136 136
137 /** 137 /**
138 * Resume from the current breakpoint. 138 * Resume from the current breakpoint.
139 * @warning Calling this from the same thread that OnEvent was called in will cause a deadlock. Calling from any other thread is safe. 139 * @warning Calling this from the same thread that OnEvent was called in will cause a deadlock.
140 * Calling from any other thread is safe.
140 */ 141 */
141 void Resume(); 142 void Resume();
142 143
@@ -144,7 +145,7 @@ public:
144 * Delete all set breakpoints and resume emulation. 145 * Delete all set breakpoints and resume emulation.
145 */ 146 */
146 void ClearBreakpoints() { 147 void ClearBreakpoints() {
147 for (auto &bp : breakpoints) { 148 for (auto& bp : breakpoints) {
148 bp.enabled = false; 149 bp.enabled = false;
149 } 150 }
150 Resume(); 151 Resume();
@@ -182,8 +183,8 @@ namespace DebugUtils {
182#define PICA_LOG_TEV 0 183#define PICA_LOG_TEV 0
183 184
184void DumpShader(const std::string& filename, const Regs::ShaderConfig& config, 185void DumpShader(const std::string& filename, const Regs::ShaderConfig& config,
185 const Shader::ShaderSetup& setup, const Regs::VSOutputAttributes* output_attributes); 186 const Shader::ShaderSetup& setup,
186 187 const Regs::VSOutputAttributes* output_attributes);
187 188
188// Utility class to log Pica commands. 189// Utility class to log Pica commands.
189struct PicaTrace { 190struct PicaTrace {
@@ -216,7 +217,10 @@ struct TextureInfo {
216 * @param source Source pointer to read data from 217 * @param source Source pointer to read data from
217 * @param s,t Texture coordinates to read from 218 * @param s,t Texture coordinates to read from
218 * @param info TextureInfo object describing the texture setup 219 * @param info TextureInfo object describing the texture setup
219 * @param disable_alpha This is used for debug widgets which use this method to display textures without providing a good way to visualize alpha by themselves. If true, this will return 255 for the alpha component, and either drop the information entirely or store it in an "unused" color channel. 220 * @param disable_alpha This is used for debug widgets which use this method to display textures
221 * without providing a good way to visualize alpha by themselves. If true, this will return 255 for
222 * the alpha component, and either drop the information entirely or store it in an "unused" color
223 * channel.
220 * @todo Eventually we should get rid of the disable_alpha parameter. 224 * @todo Eventually we should get rid of the disable_alpha parameter.
221 */ 225 */
222const Math::Vec4<u8> LookupTexture(const u8* source, int s, int t, const TextureInfo& info, 226const Math::Vec4<u8> LookupTexture(const u8* source, int s, int t, const TextureInfo& info,
@@ -237,7 +241,8 @@ class MemoryAccessTracker {
237 /// Combine overlapping and close ranges 241 /// Combine overlapping and close ranges
238 void SimplifyRanges() { 242 void SimplifyRanges() {
239 for (auto it = ranges.begin(); it != ranges.end(); ++it) { 243 for (auto it = ranges.begin(); it != ranges.end(); ++it) {
240 // NOTE: We add 32 to the range end address to make sure "close" ranges are combined, too 244 // NOTE: We add 32 to the range end address to make sure "close" ranges are combined,
245 // too
241 auto it2 = std::next(it); 246 auto it2 = std::next(it);
242 while (it2 != ranges.end() && it->first + it->second + 32 >= it2->first) { 247 while (it2 != ranges.end() && it->first + it->second + 32 >= it2->first) {
243 it->second = std::max(it->second, it2->first + it2->second - it->first); 248 it->second = std::max(it->second, it2->first + it2->second - it->first);
diff --git a/src/video_core/gpu_debugger.h b/src/video_core/gpu_debugger.h
index a3aab216c..3c6636d66 100644
--- a/src/video_core/gpu_debugger.h
+++ b/src/video_core/gpu_debugger.h
@@ -7,20 +7,16 @@
7#include <algorithm> 7#include <algorithm>
8#include <functional> 8#include <functional>
9#include <vector> 9#include <vector>
10
11#include "core/hle/service/gsp_gpu.h" 10#include "core/hle/service/gsp_gpu.h"
12 11
13class GraphicsDebugger 12class GraphicsDebugger {
14{
15public: 13public:
16 // Base class for all objects which need to be notified about GPU events 14 // Base class for all objects which need to be notified about GPU events
17 class DebuggerObserver 15 class DebuggerObserver {
18 {
19 public: 16 public:
20 DebuggerObserver() : observed(nullptr) { } 17 DebuggerObserver() : observed(nullptr) {}
21 18
22 virtual ~DebuggerObserver() 19 virtual ~DebuggerObserver() {
23 {
24 if (observed) 20 if (observed)
25 observed->UnregisterObserver(this); 21 observed->UnregisterObserver(this);
26 } 22 }
@@ -31,15 +27,13 @@ public:
31 * @param total_command_count Total number of commands in the GX history 27 * @param total_command_count Total number of commands in the GX history
32 * @note All methods in this class are called from the GSP thread 28 * @note All methods in this class are called from the GSP thread
33 */ 29 */
34 virtual void GXCommandProcessed(int total_command_count) 30 virtual void GXCommandProcessed(int total_command_count) {
35 { 31 const GSP_GPU::Command& cmd = observed->ReadGXCommandHistory(total_command_count - 1);
36 const GSP_GPU::Command& cmd = observed->ReadGXCommandHistory(total_command_count-1);
37 LOG_TRACE(Debug_GPU, "Received command: id=%x", (int)cmd.id.Value()); 32 LOG_TRACE(Debug_GPU, "Received command: id=%x", (int)cmd.id.Value());
38 } 33 }
39 34
40 protected: 35 protected:
41 const GraphicsDebugger* GetDebugger() const 36 const GraphicsDebugger* GetDebugger() const {
42 {
43 return observed; 37 return observed;
44 } 38 }
45 39
@@ -49,8 +43,7 @@ public:
49 friend class GraphicsDebugger; 43 friend class GraphicsDebugger;
50 }; 44 };
51 45
52 void GXCommandProcessed(u8* command_data) 46 void GXCommandProcessed(u8* command_data) {
53 {
54 if (observers.empty()) 47 if (observers.empty())
55 return; 48 return;
56 49
@@ -60,33 +53,29 @@ public:
60 memcpy(&cmd, command_data, sizeof(GSP_GPU::Command)); 53 memcpy(&cmd, command_data, sizeof(GSP_GPU::Command));
61 54
62 ForEachObserver([this](DebuggerObserver* observer) { 55 ForEachObserver([this](DebuggerObserver* observer) {
63 observer->GXCommandProcessed(static_cast<int>(this->gx_command_history.size())); 56 observer->GXCommandProcessed(static_cast<int>(this->gx_command_history.size()));
64 } ); 57 });
65 } 58 }
66 59
67 const GSP_GPU::Command& ReadGXCommandHistory(int index) const 60 const GSP_GPU::Command& ReadGXCommandHistory(int index) const {
68 {
69 // TODO: Is this thread-safe? 61 // TODO: Is this thread-safe?
70 return gx_command_history[index]; 62 return gx_command_history[index];
71 } 63 }
72 64
73 void RegisterObserver(DebuggerObserver* observer) 65 void RegisterObserver(DebuggerObserver* observer) {
74 {
75 // TODO: Check for duplicates 66 // TODO: Check for duplicates
76 observers.push_back(observer); 67 observers.push_back(observer);
77 observer->observed = this; 68 observer->observed = this;
78 } 69 }
79 70
80 void UnregisterObserver(DebuggerObserver* observer) 71 void UnregisterObserver(DebuggerObserver* observer) {
81 {
82 observers.erase(std::remove(observers.begin(), observers.end(), observer), observers.end()); 72 observers.erase(std::remove(observers.begin(), observers.end(), observer), observers.end());
83 observer->observed = nullptr; 73 observer->observed = nullptr;
84 } 74 }
85 75
86private: 76private:
87 void ForEachObserver(std::function<void (DebuggerObserver*)> func) 77 void ForEachObserver(std::function<void(DebuggerObserver*)> func) {
88 { 78 std::for_each(observers.begin(), observers.end(), func);
89 std::for_each(observers.begin(),observers.end(), func);
90 } 79 }
91 80
92 std::vector<DebuggerObserver*> observers; 81 std::vector<DebuggerObserver*> observers;
diff --git a/src/video_core/pica.cpp b/src/video_core/pica.cpp
index ec78f9593..ce2bd455e 100644
--- a/src/video_core/pica.cpp
+++ b/src/video_core/pica.cpp
@@ -6,7 +6,6 @@
6#include <iterator> 6#include <iterator>
7#include <unordered_map> 7#include <unordered_map>
8#include <utility> 8#include <utility>
9
10#include "video_core/pica.h" 9#include "video_core/pica.h"
11#include "video_core/pica_state.h" 10#include "video_core/pica_state.h"
12#include "video_core/primitive_assembly.h" 11#include "video_core/primitive_assembly.h"
@@ -17,466 +16,466 @@ namespace Pica {
17State g_state; 16State g_state;
18 17
19static const std::pair<u16, const char*> register_names[] = { 18static const std::pair<u16, const char*> register_names[] = {
20 { 0x010, "GPUREG_FINALIZE" }, 19 {0x010, "GPUREG_FINALIZE"},
21 20
22 { 0x040, "GPUREG_FACECULLING_CONFIG" }, 21 {0x040, "GPUREG_FACECULLING_CONFIG"},
23 { 0x041, "GPUREG_VIEWPORT_WIDTH" }, 22 {0x041, "GPUREG_VIEWPORT_WIDTH"},
24 { 0x042, "GPUREG_VIEWPORT_INVW" }, 23 {0x042, "GPUREG_VIEWPORT_INVW"},
25 { 0x043, "GPUREG_VIEWPORT_HEIGHT" }, 24 {0x043, "GPUREG_VIEWPORT_HEIGHT"},
26 { 0x044, "GPUREG_VIEWPORT_INVH" }, 25 {0x044, "GPUREG_VIEWPORT_INVH"},
27 26
28 { 0x047, "GPUREG_FRAGOP_CLIP" }, 27 {0x047, "GPUREG_FRAGOP_CLIP"},
29 { 0x048, "GPUREG_FRAGOP_CLIP_DATA0" }, 28 {0x048, "GPUREG_FRAGOP_CLIP_DATA0"},
30 { 0x049, "GPUREG_FRAGOP_CLIP_DATA1" }, 29 {0x049, "GPUREG_FRAGOP_CLIP_DATA1"},
31 { 0x04A, "GPUREG_FRAGOP_CLIP_DATA2" }, 30 {0x04A, "GPUREG_FRAGOP_CLIP_DATA2"},
32 { 0x04B, "GPUREG_FRAGOP_CLIP_DATA3" }, 31 {0x04B, "GPUREG_FRAGOP_CLIP_DATA3"},
33 32
34 { 0x04D, "GPUREG_DEPTHMAP_SCALE" }, 33 {0x04D, "GPUREG_DEPTHMAP_SCALE"},
35 { 0x04E, "GPUREG_DEPTHMAP_OFFSET" }, 34 {0x04E, "GPUREG_DEPTHMAP_OFFSET"},
36 { 0x04F, "GPUREG_SH_OUTMAP_TOTAL" }, 35 {0x04F, "GPUREG_SH_OUTMAP_TOTAL"},
37 { 0x050, "GPUREG_SH_OUTMAP_O0" }, 36 {0x050, "GPUREG_SH_OUTMAP_O0"},
38 { 0x051, "GPUREG_SH_OUTMAP_O1" }, 37 {0x051, "GPUREG_SH_OUTMAP_O1"},
39 { 0x052, "GPUREG_SH_OUTMAP_O2" }, 38 {0x052, "GPUREG_SH_OUTMAP_O2"},
40 { 0x053, "GPUREG_SH_OUTMAP_O3" }, 39 {0x053, "GPUREG_SH_OUTMAP_O3"},
41 { 0x054, "GPUREG_SH_OUTMAP_O4" }, 40 {0x054, "GPUREG_SH_OUTMAP_O4"},
42 { 0x055, "GPUREG_SH_OUTMAP_O5" }, 41 {0x055, "GPUREG_SH_OUTMAP_O5"},
43 { 0x056, "GPUREG_SH_OUTMAP_O6" }, 42 {0x056, "GPUREG_SH_OUTMAP_O6"},
44 43
45 { 0x061, "GPUREG_EARLYDEPTH_FUNC" }, 44 {0x061, "GPUREG_EARLYDEPTH_FUNC"},
46 { 0x062, "GPUREG_EARLYDEPTH_TEST1" }, 45 {0x062, "GPUREG_EARLYDEPTH_TEST1"},
47 { 0x063, "GPUREG_EARLYDEPTH_CLEAR" }, 46 {0x063, "GPUREG_EARLYDEPTH_CLEAR"},
48 { 0x064, "GPUREG_SH_OUTATTR_MODE" }, 47 {0x064, "GPUREG_SH_OUTATTR_MODE"},
49 { 0x065, "GPUREG_SCISSORTEST_MODE" }, 48 {0x065, "GPUREG_SCISSORTEST_MODE"},
50 { 0x066, "GPUREG_SCISSORTEST_POS" }, 49 {0x066, "GPUREG_SCISSORTEST_POS"},
51 { 0x067, "GPUREG_SCISSORTEST_DIM" }, 50 {0x067, "GPUREG_SCISSORTEST_DIM"},
52 { 0x068, "GPUREG_VIEWPORT_XY" }, 51 {0x068, "GPUREG_VIEWPORT_XY"},
53 52
54 { 0x06A, "GPUREG_EARLYDEPTH_DATA" }, 53 {0x06A, "GPUREG_EARLYDEPTH_DATA"},
55 54
56 { 0x06D, "GPUREG_DEPTHMAP_ENABLE" }, 55 {0x06D, "GPUREG_DEPTHMAP_ENABLE"},
57 { 0x06E, "GPUREG_RENDERBUF_DIM" }, 56 {0x06E, "GPUREG_RENDERBUF_DIM"},
58 { 0x06F, "GPUREG_SH_OUTATTR_CLOCK" }, 57 {0x06F, "GPUREG_SH_OUTATTR_CLOCK"},
59 58
60 { 0x080, "GPUREG_TEXUNIT_CONFIG" }, 59 {0x080, "GPUREG_TEXUNIT_CONFIG"},
61 { 0x081, "GPUREG_TEXUNIT0_BORDER_COLOR" }, 60 {0x081, "GPUREG_TEXUNIT0_BORDER_COLOR"},
62 { 0x082, "GPUREG_TEXUNIT0_DIM" }, 61 {0x082, "GPUREG_TEXUNIT0_DIM"},
63 { 0x083, "GPUREG_TEXUNIT0_PARAM" }, 62 {0x083, "GPUREG_TEXUNIT0_PARAM"},
64 { 0x084, "GPUREG_TEXUNIT0_LOD" }, 63 {0x084, "GPUREG_TEXUNIT0_LOD"},
65 { 0x085, "GPUREG_TEXUNIT0_ADDR1" }, 64 {0x085, "GPUREG_TEXUNIT0_ADDR1"},
66 { 0x086, "GPUREG_TEXUNIT0_ADDR2" }, 65 {0x086, "GPUREG_TEXUNIT0_ADDR2"},
67 { 0x087, "GPUREG_TEXUNIT0_ADDR3" }, 66 {0x087, "GPUREG_TEXUNIT0_ADDR3"},
68 { 0x088, "GPUREG_TEXUNIT0_ADDR4" }, 67 {0x088, "GPUREG_TEXUNIT0_ADDR4"},
69 { 0x089, "GPUREG_TEXUNIT0_ADDR5" }, 68 {0x089, "GPUREG_TEXUNIT0_ADDR5"},
70 { 0x08A, "GPUREG_TEXUNIT0_ADDR6" }, 69 {0x08A, "GPUREG_TEXUNIT0_ADDR6"},
71 { 0x08B, "GPUREG_TEXUNIT0_SHADOW" }, 70 {0x08B, "GPUREG_TEXUNIT0_SHADOW"},
72 71
73 { 0x08E, "GPUREG_TEXUNIT0_TYPE" }, 72 {0x08E, "GPUREG_TEXUNIT0_TYPE"},
74 { 0x08F, "GPUREG_LIGHTING_ENABLE0" }, 73 {0x08F, "GPUREG_LIGHTING_ENABLE0"},
75 74
76 { 0x091, "GPUREG_TEXUNIT1_BORDER_COLOR" }, 75 {0x091, "GPUREG_TEXUNIT1_BORDER_COLOR"},
77 { 0x092, "GPUREG_TEXUNIT1_DIM" }, 76 {0x092, "GPUREG_TEXUNIT1_DIM"},
78 { 0x093, "GPUREG_TEXUNIT1_PARAM" }, 77 {0x093, "GPUREG_TEXUNIT1_PARAM"},
79 { 0x094, "GPUREG_TEXUNIT1_LOD" }, 78 {0x094, "GPUREG_TEXUNIT1_LOD"},
80 { 0x095, "GPUREG_TEXUNIT1_ADDR" }, 79 {0x095, "GPUREG_TEXUNIT1_ADDR"},
81 { 0x096, "GPUREG_TEXUNIT1_TYPE" }, 80 {0x096, "GPUREG_TEXUNIT1_TYPE"},
82 81
83 { 0x099, "GPUREG_TEXUNIT2_BORDER_COLOR" }, 82 {0x099, "GPUREG_TEXUNIT2_BORDER_COLOR"},
84 { 0x09A, "GPUREG_TEXUNIT2_DIM" }, 83 {0x09A, "GPUREG_TEXUNIT2_DIM"},
85 { 0x09B, "GPUREG_TEXUNIT2_PARAM" }, 84 {0x09B, "GPUREG_TEXUNIT2_PARAM"},
86 { 0x09C, "GPUREG_TEXUNIT2_LOD" }, 85 {0x09C, "GPUREG_TEXUNIT2_LOD"},
87 { 0x09D, "GPUREG_TEXUNIT2_ADDR" }, 86 {0x09D, "GPUREG_TEXUNIT2_ADDR"},
88 { 0x09E, "GPUREG_TEXUNIT2_TYPE" }, 87 {0x09E, "GPUREG_TEXUNIT2_TYPE"},
89 88
90 { 0x0A8, "GPUREG_TEXUNIT3_PROCTEX0" }, 89 {0x0A8, "GPUREG_TEXUNIT3_PROCTEX0"},
91 { 0x0A9, "GPUREG_TEXUNIT3_PROCTEX1" }, 90 {0x0A9, "GPUREG_TEXUNIT3_PROCTEX1"},
92 { 0x0AA, "GPUREG_TEXUNIT3_PROCTEX2" }, 91 {0x0AA, "GPUREG_TEXUNIT3_PROCTEX2"},
93 { 0x0AB, "GPUREG_TEXUNIT3_PROCTEX3" }, 92 {0x0AB, "GPUREG_TEXUNIT3_PROCTEX3"},
94 { 0x0AC, "GPUREG_TEXUNIT3_PROCTEX4" }, 93 {0x0AC, "GPUREG_TEXUNIT3_PROCTEX4"},
95 { 0x0AD, "GPUREG_TEXUNIT3_PROCTEX5" }, 94 {0x0AD, "GPUREG_TEXUNIT3_PROCTEX5"},
96 95
97 { 0x0AF, "GPUREG_PROCTEX_LUT" }, 96 {0x0AF, "GPUREG_PROCTEX_LUT"},
98 { 0x0B0, "GPUREG_PROCTEX_LUT_DATA0" }, 97 {0x0B0, "GPUREG_PROCTEX_LUT_DATA0"},
99 { 0x0B1, "GPUREG_PROCTEX_LUT_DATA1" }, 98 {0x0B1, "GPUREG_PROCTEX_LUT_DATA1"},
100 { 0x0B2, "GPUREG_PROCTEX_LUT_DATA2" }, 99 {0x0B2, "GPUREG_PROCTEX_LUT_DATA2"},
101 { 0x0B3, "GPUREG_PROCTEX_LUT_DATA3" }, 100 {0x0B3, "GPUREG_PROCTEX_LUT_DATA3"},
102 { 0x0B4, "GPUREG_PROCTEX_LUT_DATA4" }, 101 {0x0B4, "GPUREG_PROCTEX_LUT_DATA4"},
103 { 0x0B5, "GPUREG_PROCTEX_LUT_DATA5" }, 102 {0x0B5, "GPUREG_PROCTEX_LUT_DATA5"},
104 { 0x0B6, "GPUREG_PROCTEX_LUT_DATA6" }, 103 {0x0B6, "GPUREG_PROCTEX_LUT_DATA6"},
105 { 0x0B7, "GPUREG_PROCTEX_LUT_DATA7" }, 104 {0x0B7, "GPUREG_PROCTEX_LUT_DATA7"},
106 105
107 { 0x0C0, "GPUREG_TEXENV0_SOURCE" }, 106 {0x0C0, "GPUREG_TEXENV0_SOURCE"},
108 { 0x0C1, "GPUREG_TEXENV0_OPERAND" }, 107 {0x0C1, "GPUREG_TEXENV0_OPERAND"},
109 { 0x0C2, "GPUREG_TEXENV0_COMBINER" }, 108 {0x0C2, "GPUREG_TEXENV0_COMBINER"},
110 { 0x0C3, "GPUREG_TEXENV0_COLOR" }, 109 {0x0C3, "GPUREG_TEXENV0_COLOR"},
111 { 0x0C4, "GPUREG_TEXENV0_SCALE" }, 110 {0x0C4, "GPUREG_TEXENV0_SCALE"},
112 111
113 { 0x0C8, "GPUREG_TEXENV1_SOURCE" }, 112 {0x0C8, "GPUREG_TEXENV1_SOURCE"},
114 { 0x0C9, "GPUREG_TEXENV1_OPERAND" }, 113 {0x0C9, "GPUREG_TEXENV1_OPERAND"},
115 { 0x0CA, "GPUREG_TEXENV1_COMBINER" }, 114 {0x0CA, "GPUREG_TEXENV1_COMBINER"},
116 { 0x0CB, "GPUREG_TEXENV1_COLOR" }, 115 {0x0CB, "GPUREG_TEXENV1_COLOR"},
117 { 0x0CC, "GPUREG_TEXENV1_SCALE" }, 116 {0x0CC, "GPUREG_TEXENV1_SCALE"},
118 117
119 { 0x0D0, "GPUREG_TEXENV2_SOURCE" }, 118 {0x0D0, "GPUREG_TEXENV2_SOURCE"},
120 { 0x0D1, "GPUREG_TEXENV2_OPERAND" }, 119 {0x0D1, "GPUREG_TEXENV2_OPERAND"},
121 { 0x0D2, "GPUREG_TEXENV2_COMBINER" }, 120 {0x0D2, "GPUREG_TEXENV2_COMBINER"},
122 { 0x0D3, "GPUREG_TEXENV2_COLOR" }, 121 {0x0D3, "GPUREG_TEXENV2_COLOR"},
123 { 0x0D4, "GPUREG_TEXENV2_SCALE" }, 122 {0x0D4, "GPUREG_TEXENV2_SCALE"},
124 123
125 { 0x0D8, "GPUREG_TEXENV3_SOURCE" }, 124 {0x0D8, "GPUREG_TEXENV3_SOURCE"},
126 { 0x0D9, "GPUREG_TEXENV3_OPERAND" }, 125 {0x0D9, "GPUREG_TEXENV3_OPERAND"},
127 { 0x0DA, "GPUREG_TEXENV3_COMBINER" }, 126 {0x0DA, "GPUREG_TEXENV3_COMBINER"},
128 { 0x0DB, "GPUREG_TEXENV3_COLOR" }, 127 {0x0DB, "GPUREG_TEXENV3_COLOR"},
129 { 0x0DC, "GPUREG_TEXENV3_SCALE" }, 128 {0x0DC, "GPUREG_TEXENV3_SCALE"},
130 129
131 { 0x0E0, "GPUREG_TEXENV_UPDATE_BUFFER" }, 130 {0x0E0, "GPUREG_TEXENV_UPDATE_BUFFER"},
132 { 0x0E1, "GPUREG_FOG_COLOR" }, 131 {0x0E1, "GPUREG_FOG_COLOR"},
133 132
134 { 0x0E4, "GPUREG_GAS_ATTENUATION" }, 133 {0x0E4, "GPUREG_GAS_ATTENUATION"},
135 { 0x0E5, "GPUREG_GAS_ACCMAX" }, 134 {0x0E5, "GPUREG_GAS_ACCMAX"},
136 { 0x0E6, "GPUREG_FOG_LUT_INDEX" }, 135 {0x0E6, "GPUREG_FOG_LUT_INDEX"},
137 136
138 { 0x0E8, "GPUREG_FOG_LUT_DATA0" }, 137 {0x0E8, "GPUREG_FOG_LUT_DATA0"},
139 { 0x0E9, "GPUREG_FOG_LUT_DATA1" }, 138 {0x0E9, "GPUREG_FOG_LUT_DATA1"},
140 { 0x0EA, "GPUREG_FOG_LUT_DATA2" }, 139 {0x0EA, "GPUREG_FOG_LUT_DATA2"},
141 { 0x0EB, "GPUREG_FOG_LUT_DATA3" }, 140 {0x0EB, "GPUREG_FOG_LUT_DATA3"},
142 { 0x0EC, "GPUREG_FOG_LUT_DATA4" }, 141 {0x0EC, "GPUREG_FOG_LUT_DATA4"},
143 { 0x0ED, "GPUREG_FOG_LUT_DATA5" }, 142 {0x0ED, "GPUREG_FOG_LUT_DATA5"},
144 { 0x0EE, "GPUREG_FOG_LUT_DATA6" }, 143 {0x0EE, "GPUREG_FOG_LUT_DATA6"},
145 { 0x0EF, "GPUREG_FOG_LUT_DATA7" }, 144 {0x0EF, "GPUREG_FOG_LUT_DATA7"},
146 { 0x0F0, "GPUREG_TEXENV4_SOURCE" }, 145 {0x0F0, "GPUREG_TEXENV4_SOURCE"},
147 { 0x0F1, "GPUREG_TEXENV4_OPERAND" }, 146 {0x0F1, "GPUREG_TEXENV4_OPERAND"},
148 { 0x0F2, "GPUREG_TEXENV4_COMBINER" }, 147 {0x0F2, "GPUREG_TEXENV4_COMBINER"},
149 { 0x0F3, "GPUREG_TEXENV4_COLOR" }, 148 {0x0F3, "GPUREG_TEXENV4_COLOR"},
150 { 0x0F4, "GPUREG_TEXENV4_SCALE" }, 149 {0x0F4, "GPUREG_TEXENV4_SCALE"},
151 150
152 { 0x0F8, "GPUREG_TEXENV5_SOURCE" }, 151 {0x0F8, "GPUREG_TEXENV5_SOURCE"},
153 { 0x0F9, "GPUREG_TEXENV5_OPERAND" }, 152 {0x0F9, "GPUREG_TEXENV5_OPERAND"},
154 { 0x0FA, "GPUREG_TEXENV5_COMBINER" }, 153 {0x0FA, "GPUREG_TEXENV5_COMBINER"},
155 { 0x0FB, "GPUREG_TEXENV5_COLOR" }, 154 {0x0FB, "GPUREG_TEXENV5_COLOR"},
156 { 0x0FC, "GPUREG_TEXENV5_SCALE" }, 155 {0x0FC, "GPUREG_TEXENV5_SCALE"},
157 { 0x0FD, "GPUREG_TEXENV_BUFFER_COLOR" }, 156 {0x0FD, "GPUREG_TEXENV_BUFFER_COLOR"},
158 157
159 { 0x100, "GPUREG_COLOR_OPERATION" }, 158 {0x100, "GPUREG_COLOR_OPERATION"},
160 { 0x101, "GPUREG_BLEND_FUNC" }, 159 {0x101, "GPUREG_BLEND_FUNC"},
161 { 0x102, "GPUREG_LOGIC_OP" }, 160 {0x102, "GPUREG_LOGIC_OP"},
162 { 0x103, "GPUREG_BLEND_COLOR" }, 161 {0x103, "GPUREG_BLEND_COLOR"},
163 { 0x104, "GPUREG_FRAGOP_ALPHA_TEST" }, 162 {0x104, "GPUREG_FRAGOP_ALPHA_TEST"},
164 { 0x105, "GPUREG_STENCIL_TEST" }, 163 {0x105, "GPUREG_STENCIL_TEST"},
165 { 0x106, "GPUREG_STENCIL_OP" }, 164 {0x106, "GPUREG_STENCIL_OP"},
166 { 0x107, "GPUREG_DEPTH_COLOR_MASK" }, 165 {0x107, "GPUREG_DEPTH_COLOR_MASK"},
167 166
168 { 0x110, "GPUREG_FRAMEBUFFER_INVALIDATE" }, 167 {0x110, "GPUREG_FRAMEBUFFER_INVALIDATE"},
169 { 0x111, "GPUREG_FRAMEBUFFER_FLUSH" }, 168 {0x111, "GPUREG_FRAMEBUFFER_FLUSH"},
170 { 0x112, "GPUREG_COLORBUFFER_READ" }, 169 {0x112, "GPUREG_COLORBUFFER_READ"},
171 { 0x113, "GPUREG_COLORBUFFER_WRITE" }, 170 {0x113, "GPUREG_COLORBUFFER_WRITE"},
172 { 0x114, "GPUREG_DEPTHBUFFER_READ" }, 171 {0x114, "GPUREG_DEPTHBUFFER_READ"},
173 { 0x115, "GPUREG_DEPTHBUFFER_WRITE" }, 172 {0x115, "GPUREG_DEPTHBUFFER_WRITE"},
174 { 0x116, "GPUREG_DEPTHBUFFER_FORMAT" }, 173 {0x116, "GPUREG_DEPTHBUFFER_FORMAT"},
175 { 0x117, "GPUREG_COLORBUFFER_FORMAT" }, 174 {0x117, "GPUREG_COLORBUFFER_FORMAT"},
176 { 0x118, "GPUREG_EARLYDEPTH_TEST2" }, 175 {0x118, "GPUREG_EARLYDEPTH_TEST2"},
177 176
178 { 0x11B, "GPUREG_FRAMEBUFFER_BLOCK32" }, 177 {0x11B, "GPUREG_FRAMEBUFFER_BLOCK32"},
179 { 0x11C, "GPUREG_DEPTHBUFFER_LOC" }, 178 {0x11C, "GPUREG_DEPTHBUFFER_LOC"},
180 { 0x11D, "GPUREG_COLORBUFFER_LOC" }, 179 {0x11D, "GPUREG_COLORBUFFER_LOC"},
181 { 0x11E, "GPUREG_FRAMEBUFFER_DIM" }, 180 {0x11E, "GPUREG_FRAMEBUFFER_DIM"},
182 181
183 { 0x120, "GPUREG_GAS_LIGHT_XY" }, 182 {0x120, "GPUREG_GAS_LIGHT_XY"},
184 { 0x121, "GPUREG_GAS_LIGHT_Z" }, 183 {0x121, "GPUREG_GAS_LIGHT_Z"},
185 { 0x122, "GPUREG_GAS_LIGHT_Z_COLOR" }, 184 {0x122, "GPUREG_GAS_LIGHT_Z_COLOR"},
186 { 0x123, "GPUREG_GAS_LUT_INDEX" }, 185 {0x123, "GPUREG_GAS_LUT_INDEX"},
187 { 0x124, "GPUREG_GAS_LUT_DATA" }, 186 {0x124, "GPUREG_GAS_LUT_DATA"},
188 187
189 { 0x126, "GPUREG_GAS_DELTAZ_DEPTH" }, 188 {0x126, "GPUREG_GAS_DELTAZ_DEPTH"},
190 189
191 { 0x130, "GPUREG_FRAGOP_SHADOW" }, 190 {0x130, "GPUREG_FRAGOP_SHADOW"},
192 191
193 { 0x140, "GPUREG_LIGHT0_SPECULAR0" }, 192 {0x140, "GPUREG_LIGHT0_SPECULAR0"},
194 { 0x141, "GPUREG_LIGHT0_SPECULAR1" }, 193 {0x141, "GPUREG_LIGHT0_SPECULAR1"},
195 { 0x142, "GPUREG_LIGHT0_DIFFUSE" }, 194 {0x142, "GPUREG_LIGHT0_DIFFUSE"},
196 { 0x143, "GPUREG_LIGHT0_AMBIENT" }, 195 {0x143, "GPUREG_LIGHT0_AMBIENT"},
197 { 0x144, "GPUREG_LIGHT0_XY" }, 196 {0x144, "GPUREG_LIGHT0_XY"},
198 { 0x145, "GPUREG_LIGHT0_Z" }, 197 {0x145, "GPUREG_LIGHT0_Z"},
199 { 0x146, "GPUREG_LIGHT0_SPOTDIR_XY" }, 198 {0x146, "GPUREG_LIGHT0_SPOTDIR_XY"},
200 { 0x147, "GPUREG_LIGHT0_SPOTDIR_Z" }, 199 {0x147, "GPUREG_LIGHT0_SPOTDIR_Z"},
201 200
202 { 0x149, "GPUREG_LIGHT0_CONFIG" }, 201 {0x149, "GPUREG_LIGHT0_CONFIG"},
203 { 0x14A, "GPUREG_LIGHT0_ATTENUATION_BIAS" }, 202 {0x14A, "GPUREG_LIGHT0_ATTENUATION_BIAS"},
204 { 0x14B, "GPUREG_LIGHT0_ATTENUATION_SCALE" }, 203 {0x14B, "GPUREG_LIGHT0_ATTENUATION_SCALE"},
205 204
206 { 0x150, "GPUREG_LIGHT1_SPECULAR0" }, 205 {0x150, "GPUREG_LIGHT1_SPECULAR0"},
207 { 0x151, "GPUREG_LIGHT1_SPECULAR1" }, 206 {0x151, "GPUREG_LIGHT1_SPECULAR1"},
208 { 0x152, "GPUREG_LIGHT1_DIFFUSE" }, 207 {0x152, "GPUREG_LIGHT1_DIFFUSE"},
209 { 0x153, "GPUREG_LIGHT1_AMBIENT" }, 208 {0x153, "GPUREG_LIGHT1_AMBIENT"},
210 { 0x154, "GPUREG_LIGHT1_XY" }, 209 {0x154, "GPUREG_LIGHT1_XY"},
211 { 0x155, "GPUREG_LIGHT1_Z" }, 210 {0x155, "GPUREG_LIGHT1_Z"},
212 { 0x156, "GPUREG_LIGHT1_SPOTDIR_XY" }, 211 {0x156, "GPUREG_LIGHT1_SPOTDIR_XY"},
213 { 0x157, "GPUREG_LIGHT1_SPOTDIR_Z" }, 212 {0x157, "GPUREG_LIGHT1_SPOTDIR_Z"},
214 213
215 { 0x159, "GPUREG_LIGHT1_CONFIG" }, 214 {0x159, "GPUREG_LIGHT1_CONFIG"},
216 { 0x15A, "GPUREG_LIGHT1_ATTENUATION_BIAS" }, 215 {0x15A, "GPUREG_LIGHT1_ATTENUATION_BIAS"},
217 { 0x15B, "GPUREG_LIGHT1_ATTENUATION_SCALE" }, 216 {0x15B, "GPUREG_LIGHT1_ATTENUATION_SCALE"},
218 217
219 { 0x160, "GPUREG_LIGHT2_SPECULAR0" }, 218 {0x160, "GPUREG_LIGHT2_SPECULAR0"},
220 { 0x161, "GPUREG_LIGHT2_SPECULAR1" }, 219 {0x161, "GPUREG_LIGHT2_SPECULAR1"},
221 { 0x162, "GPUREG_LIGHT2_DIFFUSE" }, 220 {0x162, "GPUREG_LIGHT2_DIFFUSE"},
222 { 0x163, "GPUREG_LIGHT2_AMBIENT" }, 221 {0x163, "GPUREG_LIGHT2_AMBIENT"},
223 { 0x164, "GPUREG_LIGHT2_XY" }, 222 {0x164, "GPUREG_LIGHT2_XY"},
224 { 0x165, "GPUREG_LIGHT2_Z" }, 223 {0x165, "GPUREG_LIGHT2_Z"},
225 { 0x166, "GPUREG_LIGHT2_SPOTDIR_XY" }, 224 {0x166, "GPUREG_LIGHT2_SPOTDIR_XY"},
226 { 0x167, "GPUREG_LIGHT2_SPOTDIR_Z" }, 225 {0x167, "GPUREG_LIGHT2_SPOTDIR_Z"},
227 226
228 { 0x169, "GPUREG_LIGHT2_CONFIG" }, 227 {0x169, "GPUREG_LIGHT2_CONFIG"},
229 { 0x16A, "GPUREG_LIGHT2_ATTENUATION_BIAS" }, 228 {0x16A, "GPUREG_LIGHT2_ATTENUATION_BIAS"},
230 { 0x16B, "GPUREG_LIGHT2_ATTENUATION_SCALE" }, 229 {0x16B, "GPUREG_LIGHT2_ATTENUATION_SCALE"},
231 230
232 { 0x170, "GPUREG_LIGHT3_SPECULAR0" }, 231 {0x170, "GPUREG_LIGHT3_SPECULAR0"},
233 { 0x171, "GPUREG_LIGHT3_SPECULAR1" }, 232 {0x171, "GPUREG_LIGHT3_SPECULAR1"},
234 { 0x172, "GPUREG_LIGHT3_DIFFUSE" }, 233 {0x172, "GPUREG_LIGHT3_DIFFUSE"},
235 { 0x173, "GPUREG_LIGHT3_AMBIENT" }, 234 {0x173, "GPUREG_LIGHT3_AMBIENT"},
236 { 0x174, "GPUREG_LIGHT3_XY" }, 235 {0x174, "GPUREG_LIGHT3_XY"},
237 { 0x175, "GPUREG_LIGHT3_Z" }, 236 {0x175, "GPUREG_LIGHT3_Z"},
238 { 0x176, "GPUREG_LIGHT3_SPOTDIR_XY" }, 237 {0x176, "GPUREG_LIGHT3_SPOTDIR_XY"},
239 { 0x177, "GPUREG_LIGHT3_SPOTDIR_Z" }, 238 {0x177, "GPUREG_LIGHT3_SPOTDIR_Z"},
240 239
241 { 0x179, "GPUREG_LIGHT3_CONFIG" }, 240 {0x179, "GPUREG_LIGHT3_CONFIG"},
242 { 0x17A, "GPUREG_LIGHT3_ATTENUATION_BIAS" }, 241 {0x17A, "GPUREG_LIGHT3_ATTENUATION_BIAS"},
243 { 0x17B, "GPUREG_LIGHT3_ATTENUATION_SCALE" }, 242 {0x17B, "GPUREG_LIGHT3_ATTENUATION_SCALE"},
244 243
245 { 0x180, "GPUREG_LIGHT4_SPECULAR0" }, 244 {0x180, "GPUREG_LIGHT4_SPECULAR0"},
246 { 0x181, "GPUREG_LIGHT4_SPECULAR1" }, 245 {0x181, "GPUREG_LIGHT4_SPECULAR1"},
247 { 0x182, "GPUREG_LIGHT4_DIFFUSE" }, 246 {0x182, "GPUREG_LIGHT4_DIFFUSE"},
248 { 0x183, "GPUREG_LIGHT4_AMBIENT" }, 247 {0x183, "GPUREG_LIGHT4_AMBIENT"},
249 { 0x184, "GPUREG_LIGHT4_XY" }, 248 {0x184, "GPUREG_LIGHT4_XY"},
250 { 0x185, "GPUREG_LIGHT4_Z" }, 249 {0x185, "GPUREG_LIGHT4_Z"},
251 { 0x186, "GPUREG_LIGHT4_SPOTDIR_XY" }, 250 {0x186, "GPUREG_LIGHT4_SPOTDIR_XY"},
252 { 0x187, "GPUREG_LIGHT4_SPOTDIR_Z" }, 251 {0x187, "GPUREG_LIGHT4_SPOTDIR_Z"},
253 252
254 { 0x189, "GPUREG_LIGHT4_CONFIG" }, 253 {0x189, "GPUREG_LIGHT4_CONFIG"},
255 { 0x18A, "GPUREG_LIGHT4_ATTENUATION_BIAS" }, 254 {0x18A, "GPUREG_LIGHT4_ATTENUATION_BIAS"},
256 { 0x18B, "GPUREG_LIGHT4_ATTENUATION_SCALE" }, 255 {0x18B, "GPUREG_LIGHT4_ATTENUATION_SCALE"},
257 256
258 { 0x190, "GPUREG_LIGHT5_SPECULAR0" }, 257 {0x190, "GPUREG_LIGHT5_SPECULAR0"},
259 { 0x191, "GPUREG_LIGHT5_SPECULAR1" }, 258 {0x191, "GPUREG_LIGHT5_SPECULAR1"},
260 { 0x192, "GPUREG_LIGHT5_DIFFUSE" }, 259 {0x192, "GPUREG_LIGHT5_DIFFUSE"},
261 { 0x193, "GPUREG_LIGHT5_AMBIENT" }, 260 {0x193, "GPUREG_LIGHT5_AMBIENT"},
262 { 0x194, "GPUREG_LIGHT5_XY" }, 261 {0x194, "GPUREG_LIGHT5_XY"},
263 { 0x195, "GPUREG_LIGHT5_Z" }, 262 {0x195, "GPUREG_LIGHT5_Z"},
264 { 0x196, "GPUREG_LIGHT5_SPOTDIR_XY" }, 263 {0x196, "GPUREG_LIGHT5_SPOTDIR_XY"},
265 { 0x197, "GPUREG_LIGHT5_SPOTDIR_Z" }, 264 {0x197, "GPUREG_LIGHT5_SPOTDIR_Z"},
266 265
267 { 0x199, "GPUREG_LIGHT5_CONFIG" }, 266 {0x199, "GPUREG_LIGHT5_CONFIG"},
268 { 0x19A, "GPUREG_LIGHT5_ATTENUATION_BIAS" }, 267 {0x19A, "GPUREG_LIGHT5_ATTENUATION_BIAS"},
269 { 0x19B, "GPUREG_LIGHT5_ATTENUATION_SCALE" }, 268 {0x19B, "GPUREG_LIGHT5_ATTENUATION_SCALE"},
270 269
271 { 0x1A0, "GPUREG_LIGHT6_SPECULAR0" }, 270 {0x1A0, "GPUREG_LIGHT6_SPECULAR0"},
272 { 0x1A1, "GPUREG_LIGHT6_SPECULAR1" }, 271 {0x1A1, "GPUREG_LIGHT6_SPECULAR1"},
273 { 0x1A2, "GPUREG_LIGHT6_DIFFUSE" }, 272 {0x1A2, "GPUREG_LIGHT6_DIFFUSE"},
274 { 0x1A3, "GPUREG_LIGHT6_AMBIENT" }, 273 {0x1A3, "GPUREG_LIGHT6_AMBIENT"},
275 { 0x1A4, "GPUREG_LIGHT6_XY" }, 274 {0x1A4, "GPUREG_LIGHT6_XY"},
276 { 0x1A5, "GPUREG_LIGHT6_Z" }, 275 {0x1A5, "GPUREG_LIGHT6_Z"},
277 { 0x1A6, "GPUREG_LIGHT6_SPOTDIR_XY" }, 276 {0x1A6, "GPUREG_LIGHT6_SPOTDIR_XY"},
278 { 0x1A7, "GPUREG_LIGHT6_SPOTDIR_Z" }, 277 {0x1A7, "GPUREG_LIGHT6_SPOTDIR_Z"},
279 278
280 { 0x1A9, "GPUREG_LIGHT6_CONFIG" }, 279 {0x1A9, "GPUREG_LIGHT6_CONFIG"},
281 { 0x1AA, "GPUREG_LIGHT6_ATTENUATION_BIAS" }, 280 {0x1AA, "GPUREG_LIGHT6_ATTENUATION_BIAS"},
282 { 0x1AB, "GPUREG_LIGHT6_ATTENUATION_SCALE" }, 281 {0x1AB, "GPUREG_LIGHT6_ATTENUATION_SCALE"},
283 282
284 { 0x1B0, "GPUREG_LIGHT7_SPECULAR0" }, 283 {0x1B0, "GPUREG_LIGHT7_SPECULAR0"},
285 { 0x1B1, "GPUREG_LIGHT7_SPECULAR1" }, 284 {0x1B1, "GPUREG_LIGHT7_SPECULAR1"},
286 { 0x1B2, "GPUREG_LIGHT7_DIFFUSE" }, 285 {0x1B2, "GPUREG_LIGHT7_DIFFUSE"},
287 { 0x1B3, "GPUREG_LIGHT7_AMBIENT" }, 286 {0x1B3, "GPUREG_LIGHT7_AMBIENT"},
288 { 0x1B4, "GPUREG_LIGHT7_XY" }, 287 {0x1B4, "GPUREG_LIGHT7_XY"},
289 { 0x1B5, "GPUREG_LIGHT7_Z" }, 288 {0x1B5, "GPUREG_LIGHT7_Z"},
290 { 0x1B6, "GPUREG_LIGHT7_SPOTDIR_XY" }, 289 {0x1B6, "GPUREG_LIGHT7_SPOTDIR_XY"},
291 { 0x1B7, "GPUREG_LIGHT7_SPOTDIR_Z" }, 290 {0x1B7, "GPUREG_LIGHT7_SPOTDIR_Z"},
292 291
293 { 0x1B9, "GPUREG_LIGHT7_CONFIG" }, 292 {0x1B9, "GPUREG_LIGHT7_CONFIG"},
294 { 0x1BA, "GPUREG_LIGHT7_ATTENUATION_BIAS" }, 293 {0x1BA, "GPUREG_LIGHT7_ATTENUATION_BIAS"},
295 { 0x1BB, "GPUREG_LIGHT7_ATTENUATION_SCALE" }, 294 {0x1BB, "GPUREG_LIGHT7_ATTENUATION_SCALE"},
296 295
297 { 0x1C0, "GPUREG_LIGHTING_AMBIENT" }, 296 {0x1C0, "GPUREG_LIGHTING_AMBIENT"},
298 297
299 { 0x1C2, "GPUREG_LIGHTING_NUM_LIGHTS" }, 298 {0x1C2, "GPUREG_LIGHTING_NUM_LIGHTS"},
300 { 0x1C3, "GPUREG_LIGHTING_CONFIG0" }, 299 {0x1C3, "GPUREG_LIGHTING_CONFIG0"},
301 { 0x1C4, "GPUREG_LIGHTING_CONFIG1" }, 300 {0x1C4, "GPUREG_LIGHTING_CONFIG1"},
302 { 0x1C5, "GPUREG_LIGHTING_LUT_INDEX" }, 301 {0x1C5, "GPUREG_LIGHTING_LUT_INDEX"},
303 { 0x1C6, "GPUREG_LIGHTING_ENABLE1" }, 302 {0x1C6, "GPUREG_LIGHTING_ENABLE1"},
304 303
305 { 0x1C8, "GPUREG_LIGHTING_LUT_DATA0" }, 304 {0x1C8, "GPUREG_LIGHTING_LUT_DATA0"},
306 { 0x1C9, "GPUREG_LIGHTING_LUT_DATA1" }, 305 {0x1C9, "GPUREG_LIGHTING_LUT_DATA1"},
307 { 0x1CA, "GPUREG_LIGHTING_LUT_DATA2" }, 306 {0x1CA, "GPUREG_LIGHTING_LUT_DATA2"},
308 { 0x1CB, "GPUREG_LIGHTING_LUT_DATA3" }, 307 {0x1CB, "GPUREG_LIGHTING_LUT_DATA3"},
309 { 0x1CC, "GPUREG_LIGHTING_LUT_DATA4" }, 308 {0x1CC, "GPUREG_LIGHTING_LUT_DATA4"},
310 { 0x1CD, "GPUREG_LIGHTING_LUT_DATA5" }, 309 {0x1CD, "GPUREG_LIGHTING_LUT_DATA5"},
311 { 0x1CE, "GPUREG_LIGHTING_LUT_DATA6" }, 310 {0x1CE, "GPUREG_LIGHTING_LUT_DATA6"},
312 { 0x1CF, "GPUREG_LIGHTING_LUT_DATA7" }, 311 {0x1CF, "GPUREG_LIGHTING_LUT_DATA7"},
313 { 0x1D0, "GPUREG_LIGHTING_LUTINPUT_ABS" }, 312 {0x1D0, "GPUREG_LIGHTING_LUTINPUT_ABS"},
314 { 0x1D1, "GPUREG_LIGHTING_LUTINPUT_SELECT" }, 313 {0x1D1, "GPUREG_LIGHTING_LUTINPUT_SELECT"},
315 { 0x1D2, "GPUREG_LIGHTING_LUTINPUT_SCALE" }, 314 {0x1D2, "GPUREG_LIGHTING_LUTINPUT_SCALE"},
316 315
317 { 0x1D9, "GPUREG_LIGHTING_LIGHT_PERMUTATION" }, 316 {0x1D9, "GPUREG_LIGHTING_LIGHT_PERMUTATION"},
318 317
319 { 0x200, "GPUREG_ATTRIBBUFFERS_LOC" }, 318 {0x200, "GPUREG_ATTRIBBUFFERS_LOC"},
320 { 0x201, "GPUREG_ATTRIBBUFFERS_FORMAT_LOW" }, 319 {0x201, "GPUREG_ATTRIBBUFFERS_FORMAT_LOW"},
321 { 0x202, "GPUREG_ATTRIBBUFFERS_FORMAT_HIGH" }, 320 {0x202, "GPUREG_ATTRIBBUFFERS_FORMAT_HIGH"},
322 { 0x203, "GPUREG_ATTRIBBUFFER0_OFFSET" }, 321 {0x203, "GPUREG_ATTRIBBUFFER0_OFFSET"},
323 { 0x204, "GPUREG_ATTRIBBUFFER0_CONFIG1" }, 322 {0x204, "GPUREG_ATTRIBBUFFER0_CONFIG1"},
324 { 0x205, "GPUREG_ATTRIBBUFFER0_CONFIG2" }, 323 {0x205, "GPUREG_ATTRIBBUFFER0_CONFIG2"},
325 { 0x206, "GPUREG_ATTRIBBUFFER1_OFFSET" }, 324 {0x206, "GPUREG_ATTRIBBUFFER1_OFFSET"},
326 { 0x207, "GPUREG_ATTRIBBUFFER1_CONFIG1" }, 325 {0x207, "GPUREG_ATTRIBBUFFER1_CONFIG1"},
327 { 0x208, "GPUREG_ATTRIBBUFFER1_CONFIG2" }, 326 {0x208, "GPUREG_ATTRIBBUFFER1_CONFIG2"},
328 { 0x209, "GPUREG_ATTRIBBUFFER2_OFFSET" }, 327 {0x209, "GPUREG_ATTRIBBUFFER2_OFFSET"},
329 { 0x20A, "GPUREG_ATTRIBBUFFER2_CONFIG1" }, 328 {0x20A, "GPUREG_ATTRIBBUFFER2_CONFIG1"},
330 { 0x20B, "GPUREG_ATTRIBBUFFER2_CONFIG2" }, 329 {0x20B, "GPUREG_ATTRIBBUFFER2_CONFIG2"},
331 { 0x20C, "GPUREG_ATTRIBBUFFER3_OFFSET" }, 330 {0x20C, "GPUREG_ATTRIBBUFFER3_OFFSET"},
332 { 0x20D, "GPUREG_ATTRIBBUFFER3_CONFIG1" }, 331 {0x20D, "GPUREG_ATTRIBBUFFER3_CONFIG1"},
333 { 0x20E, "GPUREG_ATTRIBBUFFER3_CONFIG2" }, 332 {0x20E, "GPUREG_ATTRIBBUFFER3_CONFIG2"},
334 { 0x20F, "GPUREG_ATTRIBBUFFER4_OFFSET" }, 333 {0x20F, "GPUREG_ATTRIBBUFFER4_OFFSET"},
335 { 0x210, "GPUREG_ATTRIBBUFFER4_CONFIG1" }, 334 {0x210, "GPUREG_ATTRIBBUFFER4_CONFIG1"},
336 { 0x211, "GPUREG_ATTRIBBUFFER4_CONFIG2" }, 335 {0x211, "GPUREG_ATTRIBBUFFER4_CONFIG2"},
337 { 0x212, "GPUREG_ATTRIBBUFFER5_OFFSET" }, 336 {0x212, "GPUREG_ATTRIBBUFFER5_OFFSET"},
338 { 0x213, "GPUREG_ATTRIBBUFFER5_CONFIG1" }, 337 {0x213, "GPUREG_ATTRIBBUFFER5_CONFIG1"},
339 { 0x214, "GPUREG_ATTRIBBUFFER5_CONFIG2" }, 338 {0x214, "GPUREG_ATTRIBBUFFER5_CONFIG2"},
340 { 0x215, "GPUREG_ATTRIBBUFFER6_OFFSET" }, 339 {0x215, "GPUREG_ATTRIBBUFFER6_OFFSET"},
341 { 0x216, "GPUREG_ATTRIBBUFFER6_CONFIG1" }, 340 {0x216, "GPUREG_ATTRIBBUFFER6_CONFIG1"},
342 { 0x217, "GPUREG_ATTRIBBUFFER6_CONFIG2" }, 341 {0x217, "GPUREG_ATTRIBBUFFER6_CONFIG2"},
343 { 0x218, "GPUREG_ATTRIBBUFFER7_OFFSET" }, 342 {0x218, "GPUREG_ATTRIBBUFFER7_OFFSET"},
344 { 0x219, "GPUREG_ATTRIBBUFFER7_CONFIG1" }, 343 {0x219, "GPUREG_ATTRIBBUFFER7_CONFIG1"},
345 { 0x21A, "GPUREG_ATTRIBBUFFER7_CONFIG2" }, 344 {0x21A, "GPUREG_ATTRIBBUFFER7_CONFIG2"},
346 { 0x21B, "GPUREG_ATTRIBBUFFER8_OFFSET" }, 345 {0x21B, "GPUREG_ATTRIBBUFFER8_OFFSET"},
347 { 0x21C, "GPUREG_ATTRIBBUFFER8_CONFIG1" }, 346 {0x21C, "GPUREG_ATTRIBBUFFER8_CONFIG1"},
348 { 0x21D, "GPUREG_ATTRIBBUFFER8_CONFIG2" }, 347 {0x21D, "GPUREG_ATTRIBBUFFER8_CONFIG2"},
349 { 0x21E, "GPUREG_ATTRIBBUFFER9_OFFSET" }, 348 {0x21E, "GPUREG_ATTRIBBUFFER9_OFFSET"},
350 { 0x21F, "GPUREG_ATTRIBBUFFER9_CONFIG1" }, 349 {0x21F, "GPUREG_ATTRIBBUFFER9_CONFIG1"},
351 { 0x220, "GPUREG_ATTRIBBUFFER9_CONFIG2" }, 350 {0x220, "GPUREG_ATTRIBBUFFER9_CONFIG2"},
352 { 0x221, "GPUREG_ATTRIBBUFFER10_OFFSET" }, 351 {0x221, "GPUREG_ATTRIBBUFFER10_OFFSET"},
353 { 0x222, "GPUREG_ATTRIBBUFFER10_CONFIG1" }, 352 {0x222, "GPUREG_ATTRIBBUFFER10_CONFIG1"},
354 { 0x223, "GPUREG_ATTRIBBUFFER10_CONFIG2" }, 353 {0x223, "GPUREG_ATTRIBBUFFER10_CONFIG2"},
355 { 0x224, "GPUREG_ATTRIBBUFFER11_OFFSET" }, 354 {0x224, "GPUREG_ATTRIBBUFFER11_OFFSET"},
356 { 0x225, "GPUREG_ATTRIBBUFFER11_CONFIG1" }, 355 {0x225, "GPUREG_ATTRIBBUFFER11_CONFIG1"},
357 { 0x226, "GPUREG_ATTRIBBUFFER11_CONFIG2" }, 356 {0x226, "GPUREG_ATTRIBBUFFER11_CONFIG2"},
358 { 0x227, "GPUREG_INDEXBUFFER_CONFIG" }, 357 {0x227, "GPUREG_INDEXBUFFER_CONFIG"},
359 { 0x228, "GPUREG_NUMVERTICES" }, 358 {0x228, "GPUREG_NUMVERTICES"},
360 { 0x229, "GPUREG_GEOSTAGE_CONFIG" }, 359 {0x229, "GPUREG_GEOSTAGE_CONFIG"},
361 { 0x22A, "GPUREG_VERTEX_OFFSET" }, 360 {0x22A, "GPUREG_VERTEX_OFFSET"},
362 361
363 { 0x22D, "GPUREG_POST_VERTEX_CACHE_NUM" }, 362 {0x22D, "GPUREG_POST_VERTEX_CACHE_NUM"},
364 { 0x22E, "GPUREG_DRAWARRAYS" }, 363 {0x22E, "GPUREG_DRAWARRAYS"},
365 { 0x22F, "GPUREG_DRAWELEMENTS" }, 364 {0x22F, "GPUREG_DRAWELEMENTS"},
366 365
367 { 0x231, "GPUREG_VTX_FUNC" }, 366 {0x231, "GPUREG_VTX_FUNC"},
368 { 0x232, "GPUREG_FIXEDATTRIB_INDEX" }, 367 {0x232, "GPUREG_FIXEDATTRIB_INDEX"},
369 { 0x233, "GPUREG_FIXEDATTRIB_DATA0" }, 368 {0x233, "GPUREG_FIXEDATTRIB_DATA0"},
370 { 0x234, "GPUREG_FIXEDATTRIB_DATA1" }, 369 {0x234, "GPUREG_FIXEDATTRIB_DATA1"},
371 { 0x235, "GPUREG_FIXEDATTRIB_DATA2" }, 370 {0x235, "GPUREG_FIXEDATTRIB_DATA2"},
372 371
373 { 0x238, "GPUREG_CMDBUF_SIZE0" }, 372 {0x238, "GPUREG_CMDBUF_SIZE0"},
374 { 0x239, "GPUREG_CMDBUF_SIZE1" }, 373 {0x239, "GPUREG_CMDBUF_SIZE1"},
375 { 0x23A, "GPUREG_CMDBUF_ADDR0" }, 374 {0x23A, "GPUREG_CMDBUF_ADDR0"},
376 { 0x23B, "GPUREG_CMDBUF_ADDR1" }, 375 {0x23B, "GPUREG_CMDBUF_ADDR1"},
377 { 0x23C, "GPUREG_CMDBUF_JUMP0" }, 376 {0x23C, "GPUREG_CMDBUF_JUMP0"},
378 { 0x23D, "GPUREG_CMDBUF_JUMP1" }, 377 {0x23D, "GPUREG_CMDBUF_JUMP1"},
379 378
380 { 0x242, "GPUREG_VSH_NUM_ATTR" }, 379 {0x242, "GPUREG_VSH_NUM_ATTR"},
381 380
382 { 0x244, "GPUREG_VSH_COM_MODE" }, 381 {0x244, "GPUREG_VSH_COM_MODE"},
383 { 0x245, "GPUREG_START_DRAW_FUNC0" }, 382 {0x245, "GPUREG_START_DRAW_FUNC0"},
384 383
385 { 0x24A, "GPUREG_VSH_OUTMAP_TOTAL1" }, 384 {0x24A, "GPUREG_VSH_OUTMAP_TOTAL1"},
386 385
387 { 0x251, "GPUREG_VSH_OUTMAP_TOTAL2" }, 386 {0x251, "GPUREG_VSH_OUTMAP_TOTAL2"},
388 { 0x252, "GPUREG_GSH_MISC0" }, 387 {0x252, "GPUREG_GSH_MISC0"},
389 { 0x253, "GPUREG_GEOSTAGE_CONFIG2" }, 388 {0x253, "GPUREG_GEOSTAGE_CONFIG2"},
390 { 0x254, "GPUREG_GSH_MISC1" }, 389 {0x254, "GPUREG_GSH_MISC1"},
391 390
392 { 0x25E, "GPUREG_PRIMITIVE_CONFIG" }, 391 {0x25E, "GPUREG_PRIMITIVE_CONFIG"},
393 { 0x25F, "GPUREG_RESTART_PRIMITIVE" }, 392 {0x25F, "GPUREG_RESTART_PRIMITIVE"},
394 393
395 { 0x280, "GPUREG_GSH_BOOLUNIFORM" }, 394 {0x280, "GPUREG_GSH_BOOLUNIFORM"},
396 { 0x281, "GPUREG_GSH_INTUNIFORM_I0" }, 395 {0x281, "GPUREG_GSH_INTUNIFORM_I0"},
397 { 0x282, "GPUREG_GSH_INTUNIFORM_I1" }, 396 {0x282, "GPUREG_GSH_INTUNIFORM_I1"},
398 { 0x283, "GPUREG_GSH_INTUNIFORM_I2" }, 397 {0x283, "GPUREG_GSH_INTUNIFORM_I2"},
399 { 0x284, "GPUREG_GSH_INTUNIFORM_I3" }, 398 {0x284, "GPUREG_GSH_INTUNIFORM_I3"},
400 399
401 { 0x289, "GPUREG_GSH_INPUTBUFFER_CONFIG" }, 400 {0x289, "GPUREG_GSH_INPUTBUFFER_CONFIG"},
402 { 0x28A, "GPUREG_GSH_ENTRYPOINT" }, 401 {0x28A, "GPUREG_GSH_ENTRYPOINT"},
403 { 0x28B, "GPUREG_GSH_ATTRIBUTES_PERMUTATION_LOW" }, 402 {0x28B, "GPUREG_GSH_ATTRIBUTES_PERMUTATION_LOW"},
404 { 0x28C, "GPUREG_GSH_ATTRIBUTES_PERMUTATION_HIGH" }, 403 {0x28C, "GPUREG_GSH_ATTRIBUTES_PERMUTATION_HIGH"},
405 { 0x28D, "GPUREG_GSH_OUTMAP_MASK" }, 404 {0x28D, "GPUREG_GSH_OUTMAP_MASK"},
406 405
407 { 0x28F, "GPUREG_GSH_CODETRANSFER_END" }, 406 {0x28F, "GPUREG_GSH_CODETRANSFER_END"},
408 { 0x290, "GPUREG_GSH_FLOATUNIFORM_INDEX" }, 407 {0x290, "GPUREG_GSH_FLOATUNIFORM_INDEX"},
409 { 0x291, "GPUREG_GSH_FLOATUNIFORM_DATA0" }, 408 {0x291, "GPUREG_GSH_FLOATUNIFORM_DATA0"},
410 { 0x292, "GPUREG_GSH_FLOATUNIFORM_DATA1" }, 409 {0x292, "GPUREG_GSH_FLOATUNIFORM_DATA1"},
411 { 0x293, "GPUREG_GSH_FLOATUNIFORM_DATA2" }, 410 {0x293, "GPUREG_GSH_FLOATUNIFORM_DATA2"},
412 { 0x294, "GPUREG_GSH_FLOATUNIFORM_DATA3" }, 411 {0x294, "GPUREG_GSH_FLOATUNIFORM_DATA3"},
413 { 0x295, "GPUREG_GSH_FLOATUNIFORM_DATA4" }, 412 {0x295, "GPUREG_GSH_FLOATUNIFORM_DATA4"},
414 { 0x296, "GPUREG_GSH_FLOATUNIFORM_DATA5" }, 413 {0x296, "GPUREG_GSH_FLOATUNIFORM_DATA5"},
415 { 0x297, "GPUREG_GSH_FLOATUNIFORM_DATA6" }, 414 {0x297, "GPUREG_GSH_FLOATUNIFORM_DATA6"},
416 { 0x298, "GPUREG_GSH_FLOATUNIFORM_DATA7" }, 415 {0x298, "GPUREG_GSH_FLOATUNIFORM_DATA7"},
417 416
418 { 0x29B, "GPUREG_GSH_CODETRANSFER_INDEX" }, 417 {0x29B, "GPUREG_GSH_CODETRANSFER_INDEX"},
419 { 0x29C, "GPUREG_GSH_CODETRANSFER_DATA0" }, 418 {0x29C, "GPUREG_GSH_CODETRANSFER_DATA0"},
420 { 0x29D, "GPUREG_GSH_CODETRANSFER_DATA1" }, 419 {0x29D, "GPUREG_GSH_CODETRANSFER_DATA1"},
421 { 0x29E, "GPUREG_GSH_CODETRANSFER_DATA2" }, 420 {0x29E, "GPUREG_GSH_CODETRANSFER_DATA2"},
422 { 0x29F, "GPUREG_GSH_CODETRANSFER_DATA3" }, 421 {0x29F, "GPUREG_GSH_CODETRANSFER_DATA3"},
423 { 0x2A0, "GPUREG_GSH_CODETRANSFER_DATA4" }, 422 {0x2A0, "GPUREG_GSH_CODETRANSFER_DATA4"},
424 { 0x2A1, "GPUREG_GSH_CODETRANSFER_DATA5" }, 423 {0x2A1, "GPUREG_GSH_CODETRANSFER_DATA5"},
425 { 0x2A2, "GPUREG_GSH_CODETRANSFER_DATA6" }, 424 {0x2A2, "GPUREG_GSH_CODETRANSFER_DATA6"},
426 { 0x2A3, "GPUREG_GSH_CODETRANSFER_DATA7" }, 425 {0x2A3, "GPUREG_GSH_CODETRANSFER_DATA7"},
427 426
428 { 0x2A5, "GPUREG_GSH_OPDESCS_INDEX" }, 427 {0x2A5, "GPUREG_GSH_OPDESCS_INDEX"},
429 { 0x2A6, "GPUREG_GSH_OPDESCS_DATA0" }, 428 {0x2A6, "GPUREG_GSH_OPDESCS_DATA0"},
430 { 0x2A7, "GPUREG_GSH_OPDESCS_DATA1" }, 429 {0x2A7, "GPUREG_GSH_OPDESCS_DATA1"},
431 { 0x2A8, "GPUREG_GSH_OPDESCS_DATA2" }, 430 {0x2A8, "GPUREG_GSH_OPDESCS_DATA2"},
432 { 0x2A9, "GPUREG_GSH_OPDESCS_DATA3" }, 431 {0x2A9, "GPUREG_GSH_OPDESCS_DATA3"},
433 { 0x2AA, "GPUREG_GSH_OPDESCS_DATA4" }, 432 {0x2AA, "GPUREG_GSH_OPDESCS_DATA4"},
434 { 0x2AB, "GPUREG_GSH_OPDESCS_DATA5" }, 433 {0x2AB, "GPUREG_GSH_OPDESCS_DATA5"},
435 { 0x2AC, "GPUREG_GSH_OPDESCS_DATA6" }, 434 {0x2AC, "GPUREG_GSH_OPDESCS_DATA6"},
436 { 0x2AD, "GPUREG_GSH_OPDESCS_DATA7" }, 435 {0x2AD, "GPUREG_GSH_OPDESCS_DATA7"},
437 436
438 { 0x2B0, "GPUREG_VSH_BOOLUNIFORM" }, 437 {0x2B0, "GPUREG_VSH_BOOLUNIFORM"},
439 { 0x2B1, "GPUREG_VSH_INTUNIFORM_I0" }, 438 {0x2B1, "GPUREG_VSH_INTUNIFORM_I0"},
440 { 0x2B2, "GPUREG_VSH_INTUNIFORM_I1" }, 439 {0x2B2, "GPUREG_VSH_INTUNIFORM_I1"},
441 { 0x2B3, "GPUREG_VSH_INTUNIFORM_I2" }, 440 {0x2B3, "GPUREG_VSH_INTUNIFORM_I2"},
442 { 0x2B4, "GPUREG_VSH_INTUNIFORM_I3" }, 441 {0x2B4, "GPUREG_VSH_INTUNIFORM_I3"},
443 442
444 { 0x2B9, "GPUREG_VSH_INPUTBUFFER_CONFIG" }, 443 {0x2B9, "GPUREG_VSH_INPUTBUFFER_CONFIG"},
445 { 0x2BA, "GPUREG_VSH_ENTRYPOINT" }, 444 {0x2BA, "GPUREG_VSH_ENTRYPOINT"},
446 { 0x2BB, "GPUREG_VSH_ATTRIBUTES_PERMUTATION_LOW" }, 445 {0x2BB, "GPUREG_VSH_ATTRIBUTES_PERMUTATION_LOW"},
447 { 0x2BC, "GPUREG_VSH_ATTRIBUTES_PERMUTATION_HIGH" }, 446 {0x2BC, "GPUREG_VSH_ATTRIBUTES_PERMUTATION_HIGH"},
448 { 0x2BD, "GPUREG_VSH_OUTMAP_MASK" }, 447 {0x2BD, "GPUREG_VSH_OUTMAP_MASK"},
449 448
450 { 0x2BF, "GPUREG_VSH_CODETRANSFER_END" }, 449 {0x2BF, "GPUREG_VSH_CODETRANSFER_END"},
451 { 0x2C0, "GPUREG_VSH_FLOATUNIFORM_INDEX" }, 450 {0x2C0, "GPUREG_VSH_FLOATUNIFORM_INDEX"},
452 { 0x2C1, "GPUREG_VSH_FLOATUNIFORM_DATA0" }, 451 {0x2C1, "GPUREG_VSH_FLOATUNIFORM_DATA0"},
453 { 0x2C2, "GPUREG_VSH_FLOATUNIFORM_DATA1" }, 452 {0x2C2, "GPUREG_VSH_FLOATUNIFORM_DATA1"},
454 { 0x2C3, "GPUREG_VSH_FLOATUNIFORM_DATA2" }, 453 {0x2C3, "GPUREG_VSH_FLOATUNIFORM_DATA2"},
455 { 0x2C4, "GPUREG_VSH_FLOATUNIFORM_DATA3" }, 454 {0x2C4, "GPUREG_VSH_FLOATUNIFORM_DATA3"},
456 { 0x2C5, "GPUREG_VSH_FLOATUNIFORM_DATA4" }, 455 {0x2C5, "GPUREG_VSH_FLOATUNIFORM_DATA4"},
457 { 0x2C6, "GPUREG_VSH_FLOATUNIFORM_DATA5" }, 456 {0x2C6, "GPUREG_VSH_FLOATUNIFORM_DATA5"},
458 { 0x2C7, "GPUREG_VSH_FLOATUNIFORM_DATA6" }, 457 {0x2C7, "GPUREG_VSH_FLOATUNIFORM_DATA6"},
459 { 0x2C8, "GPUREG_VSH_FLOATUNIFORM_DATA7" }, 458 {0x2C8, "GPUREG_VSH_FLOATUNIFORM_DATA7"},
460 459
461 { 0x2CB, "GPUREG_VSH_CODETRANSFER_INDEX" }, 460 {0x2CB, "GPUREG_VSH_CODETRANSFER_INDEX"},
462 { 0x2CC, "GPUREG_VSH_CODETRANSFER_DATA0" }, 461 {0x2CC, "GPUREG_VSH_CODETRANSFER_DATA0"},
463 { 0x2CD, "GPUREG_VSH_CODETRANSFER_DATA1" }, 462 {0x2CD, "GPUREG_VSH_CODETRANSFER_DATA1"},
464 { 0x2CE, "GPUREG_VSH_CODETRANSFER_DATA2" }, 463 {0x2CE, "GPUREG_VSH_CODETRANSFER_DATA2"},
465 { 0x2CF, "GPUREG_VSH_CODETRANSFER_DATA3" }, 464 {0x2CF, "GPUREG_VSH_CODETRANSFER_DATA3"},
466 { 0x2D0, "GPUREG_VSH_CODETRANSFER_DATA4" }, 465 {0x2D0, "GPUREG_VSH_CODETRANSFER_DATA4"},
467 { 0x2D1, "GPUREG_VSH_CODETRANSFER_DATA5" }, 466 {0x2D1, "GPUREG_VSH_CODETRANSFER_DATA5"},
468 { 0x2D2, "GPUREG_VSH_CODETRANSFER_DATA6" }, 467 {0x2D2, "GPUREG_VSH_CODETRANSFER_DATA6"},
469 { 0x2D3, "GPUREG_VSH_CODETRANSFER_DATA7" }, 468 {0x2D3, "GPUREG_VSH_CODETRANSFER_DATA7"},
470 469
471 { 0x2D5, "GPUREG_VSH_OPDESCS_INDEX" }, 470 {0x2D5, "GPUREG_VSH_OPDESCS_INDEX"},
472 { 0x2D6, "GPUREG_VSH_OPDESCS_DATA0" }, 471 {0x2D6, "GPUREG_VSH_OPDESCS_DATA0"},
473 { 0x2D7, "GPUREG_VSH_OPDESCS_DATA1" }, 472 {0x2D7, "GPUREG_VSH_OPDESCS_DATA1"},
474 { 0x2D8, "GPUREG_VSH_OPDESCS_DATA2" }, 473 {0x2D8, "GPUREG_VSH_OPDESCS_DATA2"},
475 { 0x2D9, "GPUREG_VSH_OPDESCS_DATA3" }, 474 {0x2D9, "GPUREG_VSH_OPDESCS_DATA3"},
476 { 0x2DA, "GPUREG_VSH_OPDESCS_DATA4" }, 475 {0x2DA, "GPUREG_VSH_OPDESCS_DATA4"},
477 { 0x2DB, "GPUREG_VSH_OPDESCS_DATA5" }, 476 {0x2DB, "GPUREG_VSH_OPDESCS_DATA5"},
478 { 0x2DC, "GPUREG_VSH_OPDESCS_DATA6" }, 477 {0x2DC, "GPUREG_VSH_OPDESCS_DATA6"},
479 { 0x2DD, "GPUREG_VSH_OPDESCS_DATA7" }, 478 {0x2DD, "GPUREG_VSH_OPDESCS_DATA7"},
480}; 479};
481 480
482std::string Regs::GetCommandName(int index) { 481std::string Regs::GetCommandName(int index) {
@@ -516,5 +515,4 @@ void State::Reset() {
516 Zero(immediate); 515 Zero(immediate);
517 primitive_assembler.Reconfigure(Regs::TriangleTopology::List); 516 primitive_assembler.Reconfigure(Regs::TriangleTopology::List);
518} 517}
519
520} 518}
diff --git a/src/video_core/pica.h b/src/video_core/pica.h
index 7099c31a0..b2db609ec 100644
--- a/src/video_core/pica.h
+++ b/src/video_core/pica.h
@@ -16,15 +16,16 @@
16#include "common/bit_field.h" 16#include "common/bit_field.h"
17#include "common/common_funcs.h" 17#include "common/common_funcs.h"
18#include "common/common_types.h" 18#include "common/common_types.h"
19#include "common/vector_math.h"
20#include "common/logging/log.h" 19#include "common/logging/log.h"
20#include "common/vector_math.h"
21 21
22namespace Pica { 22namespace Pica {
23 23
24// Returns index corresponding to the Regs member labeled by field_name 24// Returns index corresponding to the Regs member labeled by field_name
25// TODO: Due to Visual studio bug 209229, offsetof does not return constant expressions 25// TODO: Due to Visual studio bug 209229, offsetof does not return constant expressions
26// when used with array elements (e.g. PICA_REG_INDEX(vs_uniform_setup.set_value[1])). 26// when used with array elements (e.g. PICA_REG_INDEX(vs_uniform_setup.set_value[1])).
27// For details cf. https://connect.microsoft.com/VisualStudio/feedback/details/209229/offsetof-does-not-produce-a-constant-expression-for-array-members 27// For details cf.
28// https://connect.microsoft.com/VisualStudio/feedback/details/209229/offsetof-does-not-produce-a-constant-expression-for-array-members
28// Hopefully, this will be fixed sometime in the future. 29// Hopefully, this will be fixed sometime in the future.
29// For lack of better alternatives, we currently hardcode the offsets when constant 30// For lack of better alternatives, we currently hardcode the offsets when constant
30// expressions are needed via PICA_REG_INDEX_WORKAROUND (on sane compilers, static_asserts 31// expressions are needed via PICA_REG_INDEX_WORKAROUND (on sane compilers, static_asserts
@@ -37,8 +38,9 @@ namespace Pica {
37// really is this annoying. This macro just forwards its first argument to PICA_REG_INDEX 38// really is this annoying. This macro just forwards its first argument to PICA_REG_INDEX
38// and then performs a (no-op) cast to size_t iff the second argument matches the expected 39// and then performs a (no-op) cast to size_t iff the second argument matches the expected
39// field offset. Otherwise, the compiler will fail to compile this code. 40// field offset. Otherwise, the compiler will fail to compile this code.
40#define PICA_REG_INDEX_WORKAROUND(field_name, backup_workaround_index) \ 41#define PICA_REG_INDEX_WORKAROUND(field_name, backup_workaround_index) \
41 ((typename std::enable_if<backup_workaround_index == PICA_REG_INDEX(field_name), size_t>::type)PICA_REG_INDEX(field_name)) 42 ((typename std::enable_if<backup_workaround_index == PICA_REG_INDEX(field_name), \
43 size_t>::type)PICA_REG_INDEX(field_name))
42#endif // _MSC_VER 44#endif // _MSC_VER
43 45
44struct Regs { 46struct Regs {
@@ -51,8 +53,8 @@ struct Regs {
51 53
52 enum class CullMode : u32 { 54 enum class CullMode : u32 {
53 // Select which polygons are considered to be "frontfacing". 55 // Select which polygons are considered to be "frontfacing".
54 KeepAll = 0, 56 KeepAll = 0,
55 KeepClockWise = 1, 57 KeepClockWise = 1,
56 KeepCounterClockWise = 2, 58 KeepCounterClockWise = 2,
57 // TODO: What does the third value imply? 59 // TODO: What does the third value imply?
58 }; 60 };
@@ -69,48 +71,47 @@ struct Regs {
69 71
70 INSERT_PADDING_WORDS(0x9); 72 INSERT_PADDING_WORDS(0x9);
71 73
72 BitField<0, 24, u32> viewport_depth_range; // float24 74 BitField<0, 24, u32> viewport_depth_range; // float24
73 BitField<0, 24, u32> viewport_depth_near_plane; // float24 75 BitField<0, 24, u32> viewport_depth_near_plane; // float24
74 76
75 BitField<0, 3, u32> vs_output_total; 77 BitField<0, 3, u32> vs_output_total;
76 78
77 union VSOutputAttributes { 79 union VSOutputAttributes {
78 // Maps components of output vertex attributes to semantics 80 // Maps components of output vertex attributes to semantics
79 enum Semantic : u32 81 enum Semantic : u32 {
80 { 82 POSITION_X = 0,
81 POSITION_X = 0, 83 POSITION_Y = 1,
82 POSITION_Y = 1, 84 POSITION_Z = 2,
83 POSITION_Z = 2, 85 POSITION_W = 3,
84 POSITION_W = 3, 86
85 87 QUATERNION_X = 4,
86 QUATERNION_X = 4, 88 QUATERNION_Y = 5,
87 QUATERNION_Y = 5, 89 QUATERNION_Z = 6,
88 QUATERNION_Z = 6, 90 QUATERNION_W = 7,
89 QUATERNION_W = 7, 91
90 92 COLOR_R = 8,
91 COLOR_R = 8, 93 COLOR_G = 9,
92 COLOR_G = 9, 94 COLOR_B = 10,
93 COLOR_B = 10, 95 COLOR_A = 11,
94 COLOR_A = 11, 96
95 97 TEXCOORD0_U = 12,
96 TEXCOORD0_U = 12, 98 TEXCOORD0_V = 13,
97 TEXCOORD0_V = 13, 99 TEXCOORD1_U = 14,
98 TEXCOORD1_U = 14, 100 TEXCOORD1_V = 15,
99 TEXCOORD1_V = 15,
100 101
101 // TODO: Not verified 102 // TODO: Not verified
102 VIEW_X = 18, 103 VIEW_X = 18,
103 VIEW_Y = 19, 104 VIEW_Y = 19,
104 VIEW_Z = 20, 105 VIEW_Z = 20,
105 106
106 TEXCOORD2_U = 22, 107 TEXCOORD2_U = 22,
107 TEXCOORD2_V = 23, 108 TEXCOORD2_V = 23,
108 109
109 INVALID = 31, 110 INVALID = 31,
110 }; 111 };
111 112
112 BitField< 0, 5, Semantic> map_x; 113 BitField<0, 5, Semantic> map_x;
113 BitField< 8, 5, Semantic> map_y; 114 BitField<8, 5, Semantic> map_y;
114 BitField<16, 5, Semantic> map_z; 115 BitField<16, 5, Semantic> map_z;
115 BitField<24, 5, Semantic> map_w; 116 BitField<24, 5, Semantic> map_w;
116 } vs_output_attributes[7]; 117 } vs_output_attributes[7];
@@ -128,77 +129,78 @@ struct Regs {
128 BitField<0, 2, ScissorMode> mode; 129 BitField<0, 2, ScissorMode> mode;
129 130
130 union { 131 union {
131 BitField< 0, 16, u32> x1; 132 BitField<0, 16, u32> x1;
132 BitField<16, 16, u32> y1; 133 BitField<16, 16, u32> y1;
133 }; 134 };
134 135
135 union { 136 union {
136 BitField< 0, 16, u32> x2; 137 BitField<0, 16, u32> x2;
137 BitField<16, 16, u32> y2; 138 BitField<16, 16, u32> y2;
138 }; 139 };
139 } scissor_test; 140 } scissor_test;
140 141
141 union { 142 union {
142 BitField< 0, 10, s32> x; 143 BitField<0, 10, s32> x;
143 BitField<16, 10, s32> y; 144 BitField<16, 10, s32> y;
144 } viewport_corner; 145 } viewport_corner;
145 146
146 INSERT_PADDING_WORDS(0x1); 147 INSERT_PADDING_WORDS(0x1);
147 148
148 //TODO: early depth 149 // TODO: early depth
149 INSERT_PADDING_WORDS(0x1); 150 INSERT_PADDING_WORDS(0x1);
150 151
151 INSERT_PADDING_WORDS(0x2); 152 INSERT_PADDING_WORDS(0x2);
152 153
153 enum DepthBuffering : u32 { 154 enum DepthBuffering : u32 {
154 WBuffering = 0, 155 WBuffering = 0,
155 ZBuffering = 1, 156 ZBuffering = 1,
156 }; 157 };
157 BitField< 0, 1, DepthBuffering> depthmap_enable; 158 BitField<0, 1, DepthBuffering> depthmap_enable;
158 159
159 INSERT_PADDING_WORDS(0x12); 160 INSERT_PADDING_WORDS(0x12);
160 161
161 struct TextureConfig { 162 struct TextureConfig {
162 enum TextureType : u32 { 163 enum TextureType : u32 {
163 Texture2D = 0, 164 Texture2D = 0,
164 TextureCube = 1, 165 TextureCube = 1,
165 Shadow2D = 2, 166 Shadow2D = 2,
166 Projection2D = 3, 167 Projection2D = 3,
167 ShadowCube = 4, 168 ShadowCube = 4,
168 Disabled = 5, 169 Disabled = 5,
169 }; 170 };
170 171
171 enum WrapMode : u32 { 172 enum WrapMode : u32 {
172 ClampToEdge = 0, 173 ClampToEdge = 0,
173 ClampToBorder = 1, 174 ClampToBorder = 1,
174 Repeat = 2, 175 Repeat = 2,
175 MirroredRepeat = 3, 176 MirroredRepeat = 3,
176 }; 177 };
177 178
178 enum TextureFilter : u32 { 179 enum TextureFilter : u32 {
179 Nearest = 0, 180 Nearest = 0,
180 Linear = 1 181 Linear = 1,
181 }; 182 };
182 183
183 union { 184 union {
184 u32 raw; 185 u32 raw;
185 BitField< 0, 8, u32> r; 186 BitField<0, 8, u32> r;
186 BitField< 8, 8, u32> g; 187 BitField<8, 8, u32> g;
187 BitField<16, 8, u32> b; 188 BitField<16, 8, u32> b;
188 BitField<24, 8, u32> a; 189 BitField<24, 8, u32> a;
189 } border_color; 190 } border_color;
190 191
191 union { 192 union {
192 BitField< 0, 16, u32> height; 193 BitField<0, 16, u32> height;
193 BitField<16, 16, u32> width; 194 BitField<16, 16, u32> width;
194 }; 195 };
195 196
196 union { 197 union {
197 BitField< 1, 1, TextureFilter> mag_filter; 198 BitField<1, 1, TextureFilter> mag_filter;
198 BitField< 2, 1, TextureFilter> min_filter; 199 BitField<2, 1, TextureFilter> min_filter;
199 BitField< 8, 2, WrapMode> wrap_t; 200 BitField<8, 2, WrapMode> wrap_t;
200 BitField<12, 2, WrapMode> wrap_s; 201 BitField<12, 2, WrapMode> wrap_s;
201 BitField<28, 2, TextureType> type; ///< @note Only valid for texture 0 according to 3DBrew. 202 BitField<28, 2, TextureType>
203 type; ///< @note Only valid for texture 0 according to 3DBrew.
202 }; 204 };
203 205
204 INSERT_PADDING_WORDS(0x1); 206 INSERT_PADDING_WORDS(0x1);
@@ -216,39 +218,39 @@ struct Regs {
216 }; 218 };
217 219
218 enum class TextureFormat : u32 { 220 enum class TextureFormat : u32 {
219 RGBA8 = 0, 221 RGBA8 = 0,
220 RGB8 = 1, 222 RGB8 = 1,
221 RGB5A1 = 2, 223 RGB5A1 = 2,
222 RGB565 = 3, 224 RGB565 = 3,
223 RGBA4 = 4, 225 RGBA4 = 4,
224 IA8 = 5, 226 IA8 = 5,
225 RG8 = 6, ///< @note Also called HILO8 in 3DBrew. 227 RG8 = 6, ///< @note Also called HILO8 in 3DBrew.
226 I8 = 7, 228 I8 = 7,
227 A8 = 8, 229 A8 = 8,
228 IA4 = 9, 230 IA4 = 9,
229 I4 = 10, 231 I4 = 10,
230 A4 = 11, 232 A4 = 11,
231 ETC1 = 12, // compressed 233 ETC1 = 12, // compressed
232 ETC1A4 = 13, // compressed 234 ETC1A4 = 13, // compressed
233 }; 235 };
234 236
235 enum class LogicOp : u32 { 237 enum class LogicOp : u32 {
236 Clear = 0, 238 Clear = 0,
237 And = 1, 239 And = 1,
238 AndReverse = 2, 240 AndReverse = 2,
239 Copy = 3, 241 Copy = 3,
240 Set = 4, 242 Set = 4,
241 CopyInverted = 5, 243 CopyInverted = 5,
242 NoOp = 6, 244 NoOp = 6,
243 Invert = 7, 245 Invert = 7,
244 Nand = 8, 246 Nand = 8,
245 Or = 9, 247 Or = 9,
246 Nor = 10, 248 Nor = 10,
247 Xor = 11, 249 Xor = 11,
248 Equiv = 12, 250 Equiv = 12,
249 AndInverted = 13, 251 AndInverted = 13,
250 OrReverse = 14, 252 OrReverse = 14,
251 OrInverted = 15, 253 OrInverted = 15,
252 }; 254 };
253 255
254 static unsigned NibblesPerPixel(TextureFormat format) { 256 static unsigned NibblesPerPixel(TextureFormat format) {
@@ -273,15 +275,15 @@ struct Regs {
273 case TextureFormat::I8: 275 case TextureFormat::I8:
274 case TextureFormat::A8: 276 case TextureFormat::A8:
275 case TextureFormat::IA4: 277 case TextureFormat::IA4:
276 default: // placeholder for yet unknown formats 278 default: // placeholder for yet unknown formats
277 return 2; 279 return 2;
278 } 280 }
279 } 281 }
280 282
281 union { 283 union {
282 BitField< 0, 1, u32> texture0_enable; 284 BitField<0, 1, u32> texture0_enable;
283 BitField< 1, 1, u32> texture1_enable; 285 BitField<1, 1, u32> texture1_enable;
284 BitField< 2, 1, u32> texture2_enable; 286 BitField<2, 1, u32> texture2_enable;
285 }; 287 };
286 TextureConfig texture0; 288 TextureConfig texture0;
287 INSERT_PADDING_WORDS(0x8); 289 INSERT_PADDING_WORDS(0x8);
@@ -302,63 +304,63 @@ struct Regs {
302 }; 304 };
303 const std::array<FullTextureConfig, 3> GetTextures() const { 305 const std::array<FullTextureConfig, 3> GetTextures() const {
304 return {{ 306 return {{
305 { texture0_enable.ToBool(), texture0, texture0_format }, 307 {texture0_enable.ToBool(), texture0, texture0_format},
306 { texture1_enable.ToBool(), texture1, texture1_format }, 308 {texture1_enable.ToBool(), texture1, texture1_format},
307 { texture2_enable.ToBool(), texture2, texture2_format } 309 {texture2_enable.ToBool(), texture2, texture2_format},
308 }}; 310 }};
309 } 311 }
310 312
311 // 0xc0-0xff: Texture Combiner (akin to glTexEnv) 313 // 0xc0-0xff: Texture Combiner (akin to glTexEnv)
312 struct TevStageConfig { 314 struct TevStageConfig {
313 enum class Source : u32 { 315 enum class Source : u32 {
314 PrimaryColor = 0x0, 316 PrimaryColor = 0x0,
315 PrimaryFragmentColor = 0x1, 317 PrimaryFragmentColor = 0x1,
316 SecondaryFragmentColor = 0x2, 318 SecondaryFragmentColor = 0x2,
317 319
318 Texture0 = 0x3, 320 Texture0 = 0x3,
319 Texture1 = 0x4, 321 Texture1 = 0x4,
320 Texture2 = 0x5, 322 Texture2 = 0x5,
321 Texture3 = 0x6, 323 Texture3 = 0x6,
322 324
323 PreviousBuffer = 0xd, 325 PreviousBuffer = 0xd,
324 Constant = 0xe, 326 Constant = 0xe,
325 Previous = 0xf, 327 Previous = 0xf,
326 }; 328 };
327 329
328 enum class ColorModifier : u32 { 330 enum class ColorModifier : u32 {
329 SourceColor = 0x0, 331 SourceColor = 0x0,
330 OneMinusSourceColor = 0x1, 332 OneMinusSourceColor = 0x1,
331 SourceAlpha = 0x2, 333 SourceAlpha = 0x2,
332 OneMinusSourceAlpha = 0x3, 334 OneMinusSourceAlpha = 0x3,
333 SourceRed = 0x4, 335 SourceRed = 0x4,
334 OneMinusSourceRed = 0x5, 336 OneMinusSourceRed = 0x5,
335 337
336 SourceGreen = 0x8, 338 SourceGreen = 0x8,
337 OneMinusSourceGreen = 0x9, 339 OneMinusSourceGreen = 0x9,
338 340
339 SourceBlue = 0xc, 341 SourceBlue = 0xc,
340 OneMinusSourceBlue = 0xd, 342 OneMinusSourceBlue = 0xd,
341 }; 343 };
342 344
343 enum class AlphaModifier : u32 { 345 enum class AlphaModifier : u32 {
344 SourceAlpha = 0x0, 346 SourceAlpha = 0x0,
345 OneMinusSourceAlpha = 0x1, 347 OneMinusSourceAlpha = 0x1,
346 SourceRed = 0x2, 348 SourceRed = 0x2,
347 OneMinusSourceRed = 0x3, 349 OneMinusSourceRed = 0x3,
348 SourceGreen = 0x4, 350 SourceGreen = 0x4,
349 OneMinusSourceGreen = 0x5, 351 OneMinusSourceGreen = 0x5,
350 SourceBlue = 0x6, 352 SourceBlue = 0x6,
351 OneMinusSourceBlue = 0x7, 353 OneMinusSourceBlue = 0x7,
352 }; 354 };
353 355
354 enum class Operation : u32 { 356 enum class Operation : u32 {
355 Replace = 0, 357 Replace = 0,
356 Modulate = 1, 358 Modulate = 1,
357 Add = 2, 359 Add = 2,
358 AddSigned = 3, 360 AddSigned = 3,
359 Lerp = 4, 361 Lerp = 4,
360 Subtract = 5, 362 Subtract = 5,
361 Dot3_RGB = 6, 363 Dot3_RGB = 6,
362 364
363 MultiplyThenAdd = 8, 365 MultiplyThenAdd = 8,
364 AddThenMultiply = 9, 366 AddThenMultiply = 9,
@@ -366,9 +368,9 @@ struct Regs {
366 368
367 union { 369 union {
368 u32 sources_raw; 370 u32 sources_raw;
369 BitField< 0, 4, Source> color_source1; 371 BitField<0, 4, Source> color_source1;
370 BitField< 4, 4, Source> color_source2; 372 BitField<4, 4, Source> color_source2;
371 BitField< 8, 4, Source> color_source3; 373 BitField<8, 4, Source> color_source3;
372 BitField<16, 4, Source> alpha_source1; 374 BitField<16, 4, Source> alpha_source1;
373 BitField<20, 4, Source> alpha_source2; 375 BitField<20, 4, Source> alpha_source2;
374 BitField<24, 4, Source> alpha_source3; 376 BitField<24, 4, Source> alpha_source3;
@@ -376,9 +378,9 @@ struct Regs {
376 378
377 union { 379 union {
378 u32 modifiers_raw; 380 u32 modifiers_raw;
379 BitField< 0, 4, ColorModifier> color_modifier1; 381 BitField<0, 4, ColorModifier> color_modifier1;
380 BitField< 4, 4, ColorModifier> color_modifier2; 382 BitField<4, 4, ColorModifier> color_modifier2;
381 BitField< 8, 4, ColorModifier> color_modifier3; 383 BitField<8, 4, ColorModifier> color_modifier3;
382 BitField<12, 3, AlphaModifier> alpha_modifier1; 384 BitField<12, 3, AlphaModifier> alpha_modifier1;
383 BitField<16, 3, AlphaModifier> alpha_modifier2; 385 BitField<16, 3, AlphaModifier> alpha_modifier2;
384 BitField<20, 3, AlphaModifier> alpha_modifier3; 386 BitField<20, 3, AlphaModifier> alpha_modifier3;
@@ -386,21 +388,21 @@ struct Regs {
386 388
387 union { 389 union {
388 u32 ops_raw; 390 u32 ops_raw;
389 BitField< 0, 4, Operation> color_op; 391 BitField<0, 4, Operation> color_op;
390 BitField<16, 4, Operation> alpha_op; 392 BitField<16, 4, Operation> alpha_op;
391 }; 393 };
392 394
393 union { 395 union {
394 u32 const_color; 396 u32 const_color;
395 BitField< 0, 8, u32> const_r; 397 BitField<0, 8, u32> const_r;
396 BitField< 8, 8, u32> const_g; 398 BitField<8, 8, u32> const_g;
397 BitField<16, 8, u32> const_b; 399 BitField<16, 8, u32> const_b;
398 BitField<24, 8, u32> const_a; 400 BitField<24, 8, u32> const_a;
399 }; 401 };
400 402
401 union { 403 union {
402 u32 scales_raw; 404 u32 scales_raw;
403 BitField< 0, 2, u32> color_scale; 405 BitField<0, 2, u32> color_scale;
404 BitField<16, 2, u32> alpha_scale; 406 BitField<16, 2, u32> alpha_scale;
405 }; 407 };
406 408
@@ -424,8 +426,8 @@ struct Regs {
424 426
425 enum class FogMode : u32 { 427 enum class FogMode : u32 {
426 None = 0, 428 None = 0,
427 Fog = 5, 429 Fog = 5,
428 Gas = 7, 430 Gas = 7,
429 }; 431 };
430 432
431 union { 433 union {
@@ -435,7 +437,7 @@ struct Regs {
435 union { 437 union {
436 // Tev stages 0-3 write their output to the combiner buffer if the corresponding bit in 438 // Tev stages 0-3 write their output to the combiner buffer if the corresponding bit in
437 // these masks are set 439 // these masks are set
438 BitField< 8, 4, u32> update_mask_rgb; 440 BitField<8, 4, u32> update_mask_rgb;
439 BitField<12, 4, u32> update_mask_a; 441 BitField<12, 4, u32> update_mask_a;
440 442
441 bool TevStageUpdatesCombinerBufferColor(unsigned stage_index) const { 443 bool TevStageUpdatesCombinerBufferColor(unsigned stage_index) const {
@@ -450,8 +452,8 @@ struct Regs {
450 452
451 union { 453 union {
452 u32 raw; 454 u32 raw;
453 BitField< 0, 8, u32> r; 455 BitField<0, 8, u32> r;
454 BitField< 8, 8, u32> g; 456 BitField<8, 8, u32> g;
455 BitField<16, 8, u32> b; 457 BitField<16, 8, u32> b;
456 } fog_color; 458 } fog_color;
457 459
@@ -469,66 +471,64 @@ struct Regs {
469 471
470 union { 472 union {
471 u32 raw; 473 u32 raw;
472 BitField< 0, 8, u32> r; 474 BitField<0, 8, u32> r;
473 BitField< 8, 8, u32> g; 475 BitField<8, 8, u32> g;
474 BitField<16, 8, u32> b; 476 BitField<16, 8, u32> b;
475 BitField<24, 8, u32> a; 477 BitField<24, 8, u32> a;
476 } tev_combiner_buffer_color; 478 } tev_combiner_buffer_color;
477 479
478 INSERT_PADDING_WORDS(0x2); 480 INSERT_PADDING_WORDS(0x2);
479 481
480 const std::array<Regs::TevStageConfig,6> GetTevStages() const { 482 const std::array<Regs::TevStageConfig, 6> GetTevStages() const {
481 return {{ tev_stage0, tev_stage1, 483 return {{tev_stage0, tev_stage1, tev_stage2, tev_stage3, tev_stage4, tev_stage5}};
482 tev_stage2, tev_stage3,
483 tev_stage4, tev_stage5 }};
484 }; 484 };
485 485
486 enum class BlendEquation : u32 { 486 enum class BlendEquation : u32 {
487 Add = 0, 487 Add = 0,
488 Subtract = 1, 488 Subtract = 1,
489 ReverseSubtract = 2, 489 ReverseSubtract = 2,
490 Min = 3, 490 Min = 3,
491 Max = 4, 491 Max = 4,
492 }; 492 };
493 493
494 enum class BlendFactor : u32 { 494 enum class BlendFactor : u32 {
495 Zero = 0, 495 Zero = 0,
496 One = 1, 496 One = 1,
497 SourceColor = 2, 497 SourceColor = 2,
498 OneMinusSourceColor = 3, 498 OneMinusSourceColor = 3,
499 DestColor = 4, 499 DestColor = 4,
500 OneMinusDestColor = 5, 500 OneMinusDestColor = 5,
501 SourceAlpha = 6, 501 SourceAlpha = 6,
502 OneMinusSourceAlpha = 7, 502 OneMinusSourceAlpha = 7,
503 DestAlpha = 8, 503 DestAlpha = 8,
504 OneMinusDestAlpha = 9, 504 OneMinusDestAlpha = 9,
505 ConstantColor = 10, 505 ConstantColor = 10,
506 OneMinusConstantColor = 11, 506 OneMinusConstantColor = 11,
507 ConstantAlpha = 12, 507 ConstantAlpha = 12,
508 OneMinusConstantAlpha = 13, 508 OneMinusConstantAlpha = 13,
509 SourceAlphaSaturate = 14, 509 SourceAlphaSaturate = 14,
510 }; 510 };
511 511
512 enum class CompareFunc : u32 { 512 enum class CompareFunc : u32 {
513 Never = 0, 513 Never = 0,
514 Always = 1, 514 Always = 1,
515 Equal = 2, 515 Equal = 2,
516 NotEqual = 3, 516 NotEqual = 3,
517 LessThan = 4, 517 LessThan = 4,
518 LessThanOrEqual = 5, 518 LessThanOrEqual = 5,
519 GreaterThan = 6, 519 GreaterThan = 6,
520 GreaterThanOrEqual = 7, 520 GreaterThanOrEqual = 7,
521 }; 521 };
522 522
523 enum class StencilAction : u32 { 523 enum class StencilAction : u32 {
524 Keep = 0, 524 Keep = 0,
525 Zero = 1, 525 Zero = 1,
526 Replace = 2, 526 Replace = 2,
527 Increment = 3, 527 Increment = 3,
528 Decrement = 4, 528 Decrement = 4,
529 Invert = 5, 529 Invert = 5,
530 IncrementWrap = 6, 530 IncrementWrap = 6,
531 DecrementWrap = 7 531 DecrementWrap = 7,
532 }; 532 };
533 533
534 struct { 534 struct {
@@ -538,8 +538,8 @@ struct Regs {
538 }; 538 };
539 539
540 union { 540 union {
541 BitField< 0, 8, BlendEquation> blend_equation_rgb; 541 BitField<0, 8, BlendEquation> blend_equation_rgb;
542 BitField< 8, 8, BlendEquation> blend_equation_a; 542 BitField<8, 8, BlendEquation> blend_equation_a;
543 543
544 BitField<16, 4, BlendFactor> factor_source_rgb; 544 BitField<16, 4, BlendFactor> factor_source_rgb;
545 BitField<20, 4, BlendFactor> factor_dest_rgb; 545 BitField<20, 4, BlendFactor> factor_dest_rgb;
@@ -554,16 +554,16 @@ struct Regs {
554 554
555 union { 555 union {
556 u32 raw; 556 u32 raw;
557 BitField< 0, 8, u32> r; 557 BitField<0, 8, u32> r;
558 BitField< 8, 8, u32> g; 558 BitField<8, 8, u32> g;
559 BitField<16, 8, u32> b; 559 BitField<16, 8, u32> b;
560 BitField<24, 8, u32> a; 560 BitField<24, 8, u32> a;
561 } blend_const; 561 } blend_const;
562 562
563 union { 563 union {
564 BitField< 0, 1, u32> enable; 564 BitField<0, 1, u32> enable;
565 BitField< 4, 3, CompareFunc> func; 565 BitField<4, 3, CompareFunc> func;
566 BitField< 8, 8, u32> ref; 566 BitField<8, 8, u32> ref;
567 } alpha_test; 567 } alpha_test;
568 568
569 struct { 569 struct {
@@ -572,13 +572,13 @@ struct Regs {
572 u32 raw_func; 572 u32 raw_func;
573 573
574 // If true, enable stencil testing 574 // If true, enable stencil testing
575 BitField< 0, 1, u32> enable; 575 BitField<0, 1, u32> enable;
576 576
577 // Comparison operation for stencil testing 577 // Comparison operation for stencil testing
578 BitField< 4, 3, CompareFunc> func; 578 BitField<4, 3, CompareFunc> func;
579 579
580 // Mask used to control writing to the stencil buffer 580 // Mask used to control writing to the stencil buffer
581 BitField< 8, 8, u32> write_mask; 581 BitField<8, 8, u32> write_mask;
582 582
583 // Value to compare against for stencil testing 583 // Value to compare against for stencil testing
584 BitField<16, 8, u32> reference_value; 584 BitField<16, 8, u32> reference_value;
@@ -592,21 +592,21 @@ struct Regs {
592 u32 raw_op; 592 u32 raw_op;
593 593
594 // Action to perform when the stencil test fails 594 // Action to perform when the stencil test fails
595 BitField< 0, 3, StencilAction> action_stencil_fail; 595 BitField<0, 3, StencilAction> action_stencil_fail;
596 596
597 // Action to perform when stencil testing passed but depth testing fails 597 // Action to perform when stencil testing passed but depth testing fails
598 BitField< 4, 3, StencilAction> action_depth_fail; 598 BitField<4, 3, StencilAction> action_depth_fail;
599 599
600 // Action to perform when both stencil and depth testing pass 600 // Action to perform when both stencil and depth testing pass
601 BitField< 8, 3, StencilAction> action_depth_pass; 601 BitField<8, 3, StencilAction> action_depth_pass;
602 }; 602 };
603 } stencil_test; 603 } stencil_test;
604 604
605 union { 605 union {
606 BitField< 0, 1, u32> depth_test_enable; 606 BitField<0, 1, u32> depth_test_enable;
607 BitField< 4, 3, CompareFunc> depth_test_func; 607 BitField<4, 3, CompareFunc> depth_test_func;
608 BitField< 8, 1, u32> red_enable; 608 BitField<8, 1, u32> red_enable;
609 BitField< 9, 1, u32> green_enable; 609 BitField<9, 1, u32> green_enable;
610 BitField<10, 1, u32> blue_enable; 610 BitField<10, 1, u32> blue_enable;
611 BitField<11, 1, u32> alpha_enable; 611 BitField<11, 1, u32> alpha_enable;
612 BitField<12, 1, u32> depth_write_enable; 612 BitField<12, 1, u32> depth_write_enable;
@@ -617,16 +617,16 @@ struct Regs {
617 617
618 // Components are laid out in reverse byte order, most significant bits first. 618 // Components are laid out in reverse byte order, most significant bits first.
619 enum class ColorFormat : u32 { 619 enum class ColorFormat : u32 {
620 RGBA8 = 0, 620 RGBA8 = 0,
621 RGB8 = 1, 621 RGB8 = 1,
622 RGB5A1 = 2, 622 RGB5A1 = 2,
623 RGB565 = 3, 623 RGB565 = 3,
624 RGBA4 = 4, 624 RGBA4 = 4,
625 }; 625 };
626 626
627 enum class DepthFormat : u32 { 627 enum class DepthFormat : u32 {
628 D16 = 0, 628 D16 = 0,
629 D24 = 2, 629 D24 = 2,
630 D24S8 = 3, 630 D24S8 = 3,
631 }; 631 };
632 632
@@ -673,7 +673,7 @@ struct Regs {
673 // while the height is stored as the actual height minus one. 673 // while the height is stored as the actual height minus one.
674 // Hence, don't access these fields directly but use the accessors 674 // Hence, don't access these fields directly but use the accessors
675 // GetWidth() and GetHeight() instead. 675 // GetWidth() and GetHeight() instead.
676 BitField< 0, 11, u32> width; 676 BitField<0, 11, u32> width;
677 BitField<12, 10, u32> height; 677 BitField<12, 10, u32> height;
678 }; 678 };
679 679
@@ -759,10 +759,12 @@ struct Regs {
759 759
760 /// Selects which lighting components are affected by fresnel 760 /// Selects which lighting components are affected by fresnel
761 enum class LightingFresnelSelector { 761 enum class LightingFresnelSelector {
762 None = 0, ///< Fresnel is disabled 762 None = 0, ///< Fresnel is disabled
763 PrimaryAlpha = 1, ///< Primary (diffuse) lighting alpha is affected by fresnel 763 PrimaryAlpha = 1, ///< Primary (diffuse) lighting alpha is affected by fresnel
764 SecondaryAlpha = 2, ///< Secondary (specular) lighting alpha is affected by fresnel 764 SecondaryAlpha = 2, ///< Secondary (specular) lighting alpha is affected by fresnel
765 Both = PrimaryAlpha | SecondaryAlpha, ///< Both primary and secondary lighting alphas are affected by fresnel 765 Both =
766 PrimaryAlpha |
767 SecondaryAlpha, ///< Both primary and secondary lighting alphas are affected by fresnel
766 }; 768 };
767 769
768 /// Factor used to scale the output of a lighting LUT 770 /// Factor used to scale the output of a lighting LUT
@@ -789,57 +791,63 @@ struct Regs {
789 }; 791 };
790 792
791 union LightColor { 793 union LightColor {
792 BitField< 0, 10, u32> b; 794 BitField<0, 10, u32> b;
793 BitField<10, 10, u32> g; 795 BitField<10, 10, u32> g;
794 BitField<20, 10, u32> r; 796 BitField<20, 10, u32> r;
795 797
796 Math::Vec3f ToVec3f() const { 798 Math::Vec3f ToVec3f() const {
797 // These fields are 10 bits wide, however 255 corresponds to 1.0f for each color component 799 // These fields are 10 bits wide, however 255 corresponds to 1.0f for each color
800 // component
798 return Math::MakeVec((f32)r / 255.f, (f32)g / 255.f, (f32)b / 255.f); 801 return Math::MakeVec((f32)r / 255.f, (f32)g / 255.f, (f32)b / 255.f);
799 } 802 }
800 }; 803 };
801 804
802 /// Returns true if the specified lighting sampler is supported by the current Pica lighting configuration 805 /// Returns true if the specified lighting sampler is supported by the current Pica lighting
806 /// configuration
803 static bool IsLightingSamplerSupported(LightingConfig config, LightingSampler sampler) { 807 static bool IsLightingSamplerSupported(LightingConfig config, LightingSampler sampler) {
804 switch (sampler) { 808 switch (sampler) {
805 case LightingSampler::Distribution0: 809 case LightingSampler::Distribution0:
806 return (config != LightingConfig::Config1); 810 return (config != LightingConfig::Config1);
807 811
808 case LightingSampler::Distribution1: 812 case LightingSampler::Distribution1:
809 return (config != LightingConfig::Config0) && (config != LightingConfig::Config1) && (config != LightingConfig::Config5); 813 return (config != LightingConfig::Config0) && (config != LightingConfig::Config1) &&
814 (config != LightingConfig::Config5);
810 815
811 case LightingSampler::Fresnel: 816 case LightingSampler::Fresnel:
812 return (config != LightingConfig::Config0) && (config != LightingConfig::Config2) && (config != LightingConfig::Config4); 817 return (config != LightingConfig::Config0) && (config != LightingConfig::Config2) &&
818 (config != LightingConfig::Config4);
813 819
814 case LightingSampler::ReflectRed: 820 case LightingSampler::ReflectRed:
815 return (config != LightingConfig::Config3); 821 return (config != LightingConfig::Config3);
816 822
817 case LightingSampler::ReflectGreen: 823 case LightingSampler::ReflectGreen:
818 case LightingSampler::ReflectBlue: 824 case LightingSampler::ReflectBlue:
819 return (config == LightingConfig::Config4) || (config == LightingConfig::Config5) || (config == LightingConfig::Config7); 825 return (config == LightingConfig::Config4) || (config == LightingConfig::Config5) ||
826 (config == LightingConfig::Config7);
820 default: 827 default:
821 UNREACHABLE_MSG("Regs::IsLightingSamplerSupported: Reached " 828 UNREACHABLE_MSG("Regs::IsLightingSamplerSupported: Reached "
822 "unreachable section, sampler should be one " 829 "unreachable section, sampler should be one "
823 "of Distribution0, Distribution1, Fresnel, " 830 "of Distribution0, Distribution1, Fresnel, "
824 "ReflectRed, ReflectGreen or ReflectBlue, instead " 831 "ReflectRed, ReflectGreen or ReflectBlue, instead "
825 "got %i", static_cast<int>(config)); 832 "got %i",
833 static_cast<int>(config));
826 } 834 }
827 } 835 }
828 836
829 struct { 837 struct {
830 struct LightSrc { 838 struct LightSrc {
831 LightColor specular_0; // material.specular_0 * light.specular_0 839 LightColor specular_0; // material.specular_0 * light.specular_0
832 LightColor specular_1; // material.specular_1 * light.specular_1 840 LightColor specular_1; // material.specular_1 * light.specular_1
833 LightColor diffuse; // material.diffuse * light.diffuse 841 LightColor diffuse; // material.diffuse * light.diffuse
834 LightColor ambient; // material.ambient * light.ambient 842 LightColor ambient; // material.ambient * light.ambient
835 843
836 // Encoded as 16-bit floating point 844 // Encoded as 16-bit floating point
837 union { 845 union {
838 BitField< 0, 16, u32> x; 846 BitField<0, 16, u32> x;
839 BitField<16, 16, u32> y; 847 BitField<16, 16, u32> y;
840 }; 848 };
841 union { 849 union {
842 BitField< 0, 16, u32> z; 850 BitField<0, 16, u32> z;
843 }; 851 };
844 852
845 INSERT_PADDING_WORDS(0x3); 853 INSERT_PADDING_WORDS(0x3);
@@ -854,7 +862,8 @@ struct Regs {
854 862
855 INSERT_PADDING_WORDS(0x4); 863 INSERT_PADDING_WORDS(0x4);
856 }; 864 };
857 static_assert(sizeof(LightSrc) == 0x10 * sizeof(u32), "LightSrc structure must be 0x10 words"); 865 static_assert(sizeof(LightSrc) == 0x10 * sizeof(u32),
866 "LightSrc structure must be 0x10 words");
858 867
859 LightSrc light[8]; 868 LightSrc light[8];
860 LightColor global_ambient; // Emission + (material.ambient * lighting.ambient) 869 LightColor global_ambient; // Emission + (material.ambient * lighting.ambient)
@@ -862,8 +871,8 @@ struct Regs {
862 BitField<0, 3, u32> num_lights; // Number of enabled lights - 1 871 BitField<0, 3, u32> num_lights; // Number of enabled lights - 1
863 872
864 union { 873 union {
865 BitField< 2, 2, LightingFresnelSelector> fresnel_selector; 874 BitField<2, 2, LightingFresnelSelector> fresnel_selector;
866 BitField< 4, 4, LightingConfig> config; 875 BitField<4, 4, LightingConfig> config;
867 BitField<22, 2, u32> bump_selector; // 0: Texture 0, 1: Texture 1, 2: Texture 2 876 BitField<22, 2, u32> bump_selector; // 0: Texture 0, 1: Texture 1, 2: Texture 2
868 BitField<27, 1, u32> clamp_highlights; 877 BitField<27, 1, u32> clamp_highlights;
869 BitField<28, 2, LightingBumpMode> bump_mode; 878 BitField<28, 2, LightingBumpMode> bump_mode;
@@ -892,16 +901,17 @@ struct Regs {
892 } config1; 901 } config1;
893 902
894 bool IsDistAttenDisabled(unsigned index) const { 903 bool IsDistAttenDisabled(unsigned index) const {
895 const unsigned disable[] = { config1.disable_dist_atten_light_0, config1.disable_dist_atten_light_1, 904 const unsigned disable[] = {
896 config1.disable_dist_atten_light_2, config1.disable_dist_atten_light_3, 905 config1.disable_dist_atten_light_0, config1.disable_dist_atten_light_1,
897 config1.disable_dist_atten_light_4, config1.disable_dist_atten_light_5, 906 config1.disable_dist_atten_light_2, config1.disable_dist_atten_light_3,
898 config1.disable_dist_atten_light_6, config1.disable_dist_atten_light_7 }; 907 config1.disable_dist_atten_light_4, config1.disable_dist_atten_light_5,
908 config1.disable_dist_atten_light_6, config1.disable_dist_atten_light_7};
899 return disable[index] != 0; 909 return disable[index] != 0;
900 } 910 }
901 911
902 union { 912 union {
903 BitField<0, 8, u32> index; ///< Index at which to set data in the LUT 913 BitField<0, 8, u32> index; ///< Index at which to set data in the LUT
904 BitField<8, 5, u32> type; ///< Type of LUT for which to set data 914 BitField<8, 5, u32> type; ///< Type of LUT for which to set data
905 } lut_config; 915 } lut_config;
906 916
907 BitField<0, 1, u32> disable; 917 BitField<0, 1, u32> disable;
@@ -917,9 +927,9 @@ struct Regs {
917 // abs mode is disabled, LUT indexes are in the range of (-1.0, 1.0). Otherwise, they are in 927 // abs mode is disabled, LUT indexes are in the range of (-1.0, 1.0). Otherwise, they are in
918 // the range of (0.0, 1.0). 928 // the range of (0.0, 1.0).
919 union { 929 union {
920 BitField< 1, 1, u32> disable_d0; 930 BitField<1, 1, u32> disable_d0;
921 BitField< 5, 1, u32> disable_d1; 931 BitField<5, 1, u32> disable_d1;
922 BitField< 9, 1, u32> disable_sp; 932 BitField<9, 1, u32> disable_sp;
923 BitField<13, 1, u32> disable_fr; 933 BitField<13, 1, u32> disable_fr;
924 BitField<17, 1, u32> disable_rb; 934 BitField<17, 1, u32> disable_rb;
925 BitField<21, 1, u32> disable_rg; 935 BitField<21, 1, u32> disable_rg;
@@ -927,9 +937,9 @@ struct Regs {
927 } abs_lut_input; 937 } abs_lut_input;
928 938
929 union { 939 union {
930 BitField< 0, 3, LightingLutInput> d0; 940 BitField<0, 3, LightingLutInput> d0;
931 BitField< 4, 3, LightingLutInput> d1; 941 BitField<4, 3, LightingLutInput> d1;
932 BitField< 8, 3, LightingLutInput> sp; 942 BitField<8, 3, LightingLutInput> sp;
933 BitField<12, 3, LightingLutInput> fr; 943 BitField<12, 3, LightingLutInput> fr;
934 BitField<16, 3, LightingLutInput> rb; 944 BitField<16, 3, LightingLutInput> rb;
935 BitField<20, 3, LightingLutInput> rg; 945 BitField<20, 3, LightingLutInput> rg;
@@ -937,9 +947,9 @@ struct Regs {
937 } lut_input; 947 } lut_input;
938 948
939 union { 949 union {
940 BitField< 0, 3, LightingScale> d0; 950 BitField<0, 3, LightingScale> d0;
941 BitField< 4, 3, LightingScale> d1; 951 BitField<4, 3, LightingScale> d1;
942 BitField< 8, 3, LightingScale> sp; 952 BitField<8, 3, LightingScale> sp;
943 BitField<12, 3, LightingScale> fr; 953 BitField<12, 3, LightingScale> fr;
944 BitField<16, 3, LightingScale> rb; 954 BitField<16, 3, LightingScale> rb;
945 BitField<20, 3, LightingScale> rg; 955 BitField<20, 3, LightingScale> rg;
@@ -972,9 +982,9 @@ struct Regs {
972 // above), the first N slots below will be set to integers within the range of 0-7, 982 // above), the first N slots below will be set to integers within the range of 0-7,
973 // corresponding to the actual light that is enabled for each slot. 983 // corresponding to the actual light that is enabled for each slot.
974 984
975 BitField< 0, 3, u32> slot_0; 985 BitField<0, 3, u32> slot_0;
976 BitField< 4, 3, u32> slot_1; 986 BitField<4, 3, u32> slot_1;
977 BitField< 8, 3, u32> slot_2; 987 BitField<8, 3, u32> slot_2;
978 BitField<12, 3, u32> slot_3; 988 BitField<12, 3, u32> slot_3;
979 BitField<16, 3, u32> slot_4; 989 BitField<16, 3, u32> slot_4;
980 BitField<20, 3, u32> slot_5; 990 BitField<20, 3, u32> slot_5;
@@ -982,7 +992,8 @@ struct Regs {
982 BitField<28, 3, u32> slot_7; 992 BitField<28, 3, u32> slot_7;
983 993
984 unsigned GetNum(unsigned index) const { 994 unsigned GetNum(unsigned index) const {
985 const unsigned enable_slots[] = { slot_0, slot_1, slot_2, slot_3, slot_4, slot_5, slot_6, slot_7 }; 995 const unsigned enable_slots[] = {slot_0, slot_1, slot_2, slot_3,
996 slot_4, slot_5, slot_6, slot_7};
986 return enable_slots[index]; 997 return enable_slots[index];
987 } 998 }
988 } light_enable; 999 } light_enable;
@@ -1006,58 +1017,54 @@ struct Regs {
1006 1017
1007 // Descriptor for internal vertex attributes 1018 // Descriptor for internal vertex attributes
1008 union { 1019 union {
1009 BitField< 0, 2, VertexAttributeFormat> format0; // size of one element 1020 BitField<0, 2, VertexAttributeFormat> format0; // size of one element
1010 BitField< 2, 2, u64> size0; // number of elements minus 1 1021 BitField<2, 2, u64> size0; // number of elements minus 1
1011 BitField< 4, 2, VertexAttributeFormat> format1; 1022 BitField<4, 2, VertexAttributeFormat> format1;
1012 BitField< 6, 2, u64> size1; 1023 BitField<6, 2, u64> size1;
1013 BitField< 8, 2, VertexAttributeFormat> format2; 1024 BitField<8, 2, VertexAttributeFormat> format2;
1014 BitField<10, 2, u64> size2; 1025 BitField<10, 2, u64> size2;
1015 BitField<12, 2, VertexAttributeFormat> format3; 1026 BitField<12, 2, VertexAttributeFormat> format3;
1016 BitField<14, 2, u64> size3; 1027 BitField<14, 2, u64> size3;
1017 BitField<16, 2, VertexAttributeFormat> format4; 1028 BitField<16, 2, VertexAttributeFormat> format4;
1018 BitField<18, 2, u64> size4; 1029 BitField<18, 2, u64> size4;
1019 BitField<20, 2, VertexAttributeFormat> format5; 1030 BitField<20, 2, VertexAttributeFormat> format5;
1020 BitField<22, 2, u64> size5; 1031 BitField<22, 2, u64> size5;
1021 BitField<24, 2, VertexAttributeFormat> format6; 1032 BitField<24, 2, VertexAttributeFormat> format6;
1022 BitField<26, 2, u64> size6; 1033 BitField<26, 2, u64> size6;
1023 BitField<28, 2, VertexAttributeFormat> format7; 1034 BitField<28, 2, VertexAttributeFormat> format7;
1024 BitField<30, 2, u64> size7; 1035 BitField<30, 2, u64> size7;
1025 BitField<32, 2, VertexAttributeFormat> format8; 1036 BitField<32, 2, VertexAttributeFormat> format8;
1026 BitField<34, 2, u64> size8; 1037 BitField<34, 2, u64> size8;
1027 BitField<36, 2, VertexAttributeFormat> format9; 1038 BitField<36, 2, VertexAttributeFormat> format9;
1028 BitField<38, 2, u64> size9; 1039 BitField<38, 2, u64> size9;
1029 BitField<40, 2, VertexAttributeFormat> format10; 1040 BitField<40, 2, VertexAttributeFormat> format10;
1030 BitField<42, 2, u64> size10; 1041 BitField<42, 2, u64> size10;
1031 BitField<44, 2, VertexAttributeFormat> format11; 1042 BitField<44, 2, VertexAttributeFormat> format11;
1032 BitField<46, 2, u64> size11; 1043 BitField<46, 2, u64> size11;
1033 1044
1034 BitField<48, 12, u64> attribute_mask; 1045 BitField<48, 12, u64> attribute_mask;
1035 1046
1036 // number of total attributes minus 1 1047 // number of total attributes minus 1
1037 BitField<60, 4, u64> num_extra_attributes; 1048 BitField<60, 4, u64> num_extra_attributes;
1038 }; 1049 };
1039 1050
1040 inline VertexAttributeFormat GetFormat(int n) const { 1051 inline VertexAttributeFormat GetFormat(int n) const {
1041 VertexAttributeFormat formats[] = { 1052 VertexAttributeFormat formats[] = {format0, format1, format2, format3,
1042 format0, format1, format2, format3, 1053 format4, format5, format6, format7,
1043 format4, format5, format6, format7, 1054 format8, format9, format10, format11};
1044 format8, format9, format10, format11
1045 };
1046 return formats[n]; 1055 return formats[n];
1047 } 1056 }
1048 1057
1049 inline int GetNumElements(int n) const { 1058 inline int GetNumElements(int n) const {
1050 u64 sizes[] = { 1059 u64 sizes[] = {size0, size1, size2, size3, size4, size5,
1051 size0, size1, size2, size3, 1060 size6, size7, size8, size9, size10, size11};
1052 size4, size5, size6, size7, 1061 return (int)sizes[n] + 1;
1053 size8, size9, size10, size11
1054 };
1055 return (int)sizes[n]+1;
1056 } 1062 }
1057 1063
1058 inline int GetElementSizeInBytes(int n) const { 1064 inline int GetElementSizeInBytes(int n) const {
1059 return (GetFormat(n) == VertexAttributeFormat::FLOAT) ? 4 : 1065 return (GetFormat(n) == VertexAttributeFormat::FLOAT)
1060 (GetFormat(n) == VertexAttributeFormat::SHORT) ? 2 : 1; 1066 ? 4
1067 : (GetFormat(n) == VertexAttributeFormat::SHORT) ? 2 : 1;
1061 } 1068 }
1062 1069
1063 inline int GetStride(int n) const { 1070 inline int GetStride(int n) const {
@@ -1069,7 +1076,7 @@ struct Regs {
1069 } 1076 }
1070 1077
1071 inline int GetNumTotalAttributes() const { 1078 inline int GetNumTotalAttributes() const {
1072 return (int)num_extra_attributes+1; 1079 return (int)num_extra_attributes + 1;
1073 } 1080 }
1074 1081
1075 // Attribute loaders map the source vertex data to input attributes 1082 // Attribute loaders map the source vertex data to input attributes
@@ -1079,9 +1086,9 @@ struct Regs {
1079 u32 data_offset; 1086 u32 data_offset;
1080 1087
1081 union { 1088 union {
1082 BitField< 0, 4, u64> comp0; 1089 BitField<0, 4, u64> comp0;
1083 BitField< 4, 4, u64> comp1; 1090 BitField<4, 4, u64> comp1;
1084 BitField< 8, 4, u64> comp2; 1091 BitField<8, 4, u64> comp2;
1085 BitField<12, 4, u64> comp3; 1092 BitField<12, 4, u64> comp3;
1086 BitField<16, 4, u64> comp4; 1093 BitField<16, 4, u64> comp4;
1087 BitField<20, 4, u64> comp5; 1094 BitField<20, 4, u64> comp5;
@@ -1099,11 +1106,8 @@ struct Regs {
1099 }; 1106 };
1100 1107
1101 inline int GetComponent(int n) const { 1108 inline int GetComponent(int n) const {
1102 u64 components[] = { 1109 u64 components[] = {comp0, comp1, comp2, comp3, comp4, comp5,
1103 comp0, comp1, comp2, comp3, 1110 comp6, comp7, comp8, comp9, comp10, comp11};
1104 comp4, comp5, comp6, comp7,
1105 comp8, comp9, comp10, comp11
1106 };
1107 return (int)components[n]; 1111 return (int)components[n];
1108 } 1112 }
1109 } attribute_loaders[12]; 1113 } attribute_loaders[12];
@@ -1157,8 +1161,8 @@ struct Regs {
1157 // kicked off. 1161 // kicked off.
1158 // 2) Games can configure these registers to provide a command list subroutine mechanism. 1162 // 2) Games can configure these registers to provide a command list subroutine mechanism.
1159 1163
1160 BitField< 0, 20, u32> size[2]; ///< Size (in bytes / 8) of each channel's command buffer 1164 BitField<0, 20, u32> size[2]; ///< Size (in bytes / 8) of each channel's command buffer
1161 BitField< 0, 28, u32> addr[2]; ///< Physical address / 8 of each channel's command buffer 1165 BitField<0, 28, u32> addr[2]; ///< Physical address / 8 of each channel's command buffer
1162 u32 trigger[2]; ///< Triggers execution of the channel's command buffer when written to 1166 u32 trigger[2]; ///< Triggers execution of the channel's command buffer when written to
1163 1167
1164 unsigned GetSize(unsigned index) const { 1168 unsigned GetSize(unsigned index) const {
@@ -1176,7 +1180,7 @@ struct Regs {
1176 1180
1177 enum class GPUMode : u32 { 1181 enum class GPUMode : u32 {
1178 Drawing = 0, 1182 Drawing = 0,
1179 Configuring = 1 1183 Configuring = 1,
1180 }; 1184 };
1181 1185
1182 GPUMode gpu_mode; 1186 GPUMode gpu_mode;
@@ -1184,9 +1188,9 @@ struct Regs {
1184 INSERT_PADDING_WORDS(0x18); 1188 INSERT_PADDING_WORDS(0x18);
1185 1189
1186 enum class TriangleTopology : u32 { 1190 enum class TriangleTopology : u32 {
1187 List = 0, 1191 List = 0,
1188 Strip = 1, 1192 Strip = 1,
1189 Fan = 2, 1193 Fan = 2,
1190 Shader = 3, // Programmable setup unit implemented in a geometry shader 1194 Shader = 3, // Programmable setup unit implemented in a geometry shader
1191 }; 1195 };
1192 1196
@@ -1200,8 +1204,8 @@ struct Regs {
1200 BitField<0, 16, u32> bool_uniforms; 1204 BitField<0, 16, u32> bool_uniforms;
1201 1205
1202 union { 1206 union {
1203 BitField< 0, 8, u32> x; 1207 BitField<0, 8, u32> x;
1204 BitField< 8, 8, u32> y; 1208 BitField<8, 8, u32> y;
1205 BitField<16, 8, u32> z; 1209 BitField<16, 8, u32> z;
1206 BitField<24, 8, u32> w; 1210 BitField<24, 8, u32> w;
1207 } int_uniforms[4]; 1211 } int_uniforms[4];
@@ -1217,9 +1221,9 @@ struct Regs {
1217 BitField<0, 16, u32> main_offset; 1221 BitField<0, 16, u32> main_offset;
1218 1222
1219 union { 1223 union {
1220 BitField< 0, 4, u64> attribute0_register; 1224 BitField<0, 4, u64> attribute0_register;
1221 BitField< 4, 4, u64> attribute1_register; 1225 BitField<4, 4, u64> attribute1_register;
1222 BitField< 8, 4, u64> attribute2_register; 1226 BitField<8, 4, u64> attribute2_register;
1223 BitField<12, 4, u64> attribute3_register; 1227 BitField<12, 4, u64> attribute3_register;
1224 BitField<16, 4, u64> attribute4_register; 1228 BitField<16, 4, u64> attribute4_register;
1225 BitField<20, 4, u64> attribute5_register; 1229 BitField<20, 4, u64> attribute5_register;
@@ -1236,10 +1240,12 @@ struct Regs {
1236 1240
1237 int GetRegisterForAttribute(int attribute_index) const { 1241 int GetRegisterForAttribute(int attribute_index) const {
1238 u64 fields[] = { 1242 u64 fields[] = {
1239 attribute0_register, attribute1_register, attribute2_register, attribute3_register, 1243 attribute0_register, attribute1_register, attribute2_register,
1240 attribute4_register, attribute5_register, attribute6_register, attribute7_register, 1244 attribute3_register, attribute4_register, attribute5_register,
1241 attribute8_register, attribute9_register, attribute10_register, attribute11_register, 1245 attribute6_register, attribute7_register, attribute8_register,
1242 attribute12_register, attribute13_register, attribute14_register, attribute15_register, 1246 attribute9_register, attribute10_register, attribute11_register,
1247 attribute12_register, attribute13_register, attribute14_register,
1248 attribute15_register,
1243 }; 1249 };
1244 return (int)fields[attribute_index]; 1250 return (int)fields[attribute_index];
1245 } 1251 }
@@ -1251,10 +1257,9 @@ struct Regs {
1251 INSERT_PADDING_WORDS(0x2); 1257 INSERT_PADDING_WORDS(0x2);
1252 1258
1253 struct { 1259 struct {
1254 enum Format : u32 1260 enum Format : u32 {
1255 {
1256 FLOAT24 = 0, 1261 FLOAT24 = 0,
1257 FLOAT32 = 1 1262 FLOAT32 = 1,
1258 }; 1263 };
1259 1264
1260 bool IsFloat32() const { 1265 bool IsFloat32() const {
@@ -1263,7 +1268,8 @@ struct Regs {
1263 1268
1264 union { 1269 union {
1265 // Index of the next uniform to write to 1270 // Index of the next uniform to write to
1266 // TODO: ctrulib uses 8 bits for this, however that seems to yield lots of invalid indices 1271 // TODO: ctrulib uses 8 bits for this, however that seems to yield lots of invalid
1272 // indices
1267 // TODO: Maybe the uppermost index is for the geometry shader? Investigate! 1273 // TODO: Maybe the uppermost index is for the geometry shader? Investigate!
1268 BitField<0, 7, u32> index; 1274 BitField<0, 7, u32> index;
1269 1275
@@ -1315,12 +1321,12 @@ struct Regs {
1315 return sizeof(Regs) / sizeof(u32); 1321 return sizeof(Regs) / sizeof(u32);
1316 } 1322 }
1317 1323
1318 const u32& operator [] (int index) const { 1324 const u32& operator[](int index) const {
1319 const u32* content = reinterpret_cast<const u32*>(this); 1325 const u32* content = reinterpret_cast<const u32*>(this);
1320 return content[index]; 1326 return content[index];
1321 } 1327 }
1322 1328
1323 u32& operator [] (int index) { 1329 u32& operator[](int index) {
1324 u32* content = reinterpret_cast<u32*>(this); 1330 u32* content = reinterpret_cast<u32*>(this);
1325 return content[index]; 1331 return content[index];
1326 } 1332 }
@@ -1339,7 +1345,9 @@ private:
1339// is technically allowed since C++11. This macro should be enabled once MSVC adds 1345// is technically allowed since C++11. This macro should be enabled once MSVC adds
1340// support for that. 1346// support for that.
1341#ifndef _MSC_VER 1347#ifndef _MSC_VER
1342#define ASSERT_REG_POSITION(field_name, position) static_assert(offsetof(Regs, field_name) == position * 4, "Field "#field_name" has invalid position") 1348#define ASSERT_REG_POSITION(field_name, position) \
1349 static_assert(offsetof(Regs, field_name) == position * 4, \
1350 "Field " #field_name " has invalid position")
1343 1351
1344ASSERT_REG_POSITION(trigger_irq, 0x10); 1352ASSERT_REG_POSITION(trigger_irq, 0x10);
1345ASSERT_REG_POSITION(cull_mode, 0x40); 1353ASSERT_REG_POSITION(cull_mode, 0x40);
@@ -1392,11 +1400,15 @@ ASSERT_REG_POSITION(vs, 0x2b0);
1392#undef ASSERT_REG_POSITION 1400#undef ASSERT_REG_POSITION
1393#endif // !defined(_MSC_VER) 1401#endif // !defined(_MSC_VER)
1394 1402
1395static_assert(sizeof(Regs::ShaderConfig) == 0x30 * sizeof(u32), "ShaderConfig structure has incorrect size"); 1403static_assert(sizeof(Regs::ShaderConfig) == 0x30 * sizeof(u32),
1404 "ShaderConfig structure has incorrect size");
1396 1405
1397// The total number of registers is chosen arbitrarily, but let's make sure it's not some odd value anyway. 1406// The total number of registers is chosen arbitrarily, but let's make sure it's not some odd value
1398static_assert(sizeof(Regs) <= 0x300 * sizeof(u32), "Register set structure larger than it should be"); 1407// anyway.
1399static_assert(sizeof(Regs) >= 0x300 * sizeof(u32), "Register set structure smaller than it should be"); 1408static_assert(sizeof(Regs) <= 0x300 * sizeof(u32),
1409 "Register set structure larger than it should be");
1410static_assert(sizeof(Regs) >= 0x300 * sizeof(u32),
1411 "Register set structure smaller than it should be");
1400 1412
1401/// Initialize Pica state 1413/// Initialize Pica state
1402void Init(); 1414void Init();
diff --git a/src/video_core/pica_state.h b/src/video_core/pica_state.h
index 01f4285a8..e4f2e6d5d 100644
--- a/src/video_core/pica_state.h
+++ b/src/video_core/pica_state.h
@@ -5,10 +5,8 @@
5#pragma once 5#pragma once
6 6
7#include <array> 7#include <array>
8
9#include "common/bit_field.h" 8#include "common/bit_field.h"
10#include "common/common_types.h" 9#include "common/common_types.h"
11
12#include "video_core/pica.h" 10#include "video_core/pica.h"
13#include "video_core/primitive_assembly.h" 11#include "video_core/primitive_assembly.h"
14#include "video_core/shader/shader.h" 12#include "video_core/shader/shader.h"
@@ -33,7 +31,7 @@ struct State {
33 u32 raw; 31 u32 raw;
34 32
35 // LUT value, encoded as 12-bit fixed point, with 12 fraction bits 33 // LUT value, encoded as 12-bit fixed point, with 12 fraction bits
36 BitField< 0, 12, u32> value; // 0.0.12 fixed point 34 BitField<0, 12, u32> value; // 0.0.12 fixed point
37 35
38 // Used by HW for efficient interpolation, Citra does not use these 36 // Used by HW for efficient interpolation, Citra does not use these
39 BitField<12, 12, s32> difference; // 1.0.11 fixed point 37 BitField<12, 12, s32> difference; // 1.0.11 fixed point
@@ -51,8 +49,8 @@ struct State {
51 // Used for raw access 49 // Used for raw access
52 u32 raw; 50 u32 raw;
53 51
54 BitField< 0, 13, s32> difference; // 1.1.11 fixed point 52 BitField<0, 13, s32> difference; // 1.1.11 fixed point
55 BitField<13, 11, u32> value; // 0.0.11 fixed point 53 BitField<13, 11, u32> value; // 0.0.11 fixed point
56 }; 54 };
57 55
58 std::array<LutEntry, 128> lut; 56 std::array<LutEntry, 128> lut;
diff --git a/src/video_core/pica_types.h b/src/video_core/pica_types.h
index 3b7bfbdca..5d7e10066 100644
--- a/src/video_core/pica_types.h
+++ b/src/video_core/pica_types.h
@@ -6,7 +6,6 @@
6 6
7#include <cmath> 7#include <cmath>
8#include <cstring> 8#include <cstring>
9
10#include "common/common_types.h" 9#include "common/common_types.h"
11 10
12namespace Pica { 11namespace Pica {
@@ -22,7 +21,7 @@ namespace Pica {
22 * 21 *
23 * @todo Verify on HW if this conversion is sufficiently accurate. 22 * @todo Verify on HW if this conversion is sufficiently accurate.
24 */ 23 */
25template<unsigned M, unsigned E> 24template <unsigned M, unsigned E>
26struct Float { 25struct Float {
27public: 26public:
28 static Float<M, E> FromFloat32(float val) { 27 static Float<M, E> FromFloat32(float val) {
@@ -58,7 +57,7 @@ public:
58 return value; 57 return value;
59 } 58 }
60 59
61 Float<M, E> operator * (const Float<M, E>& flt) const { 60 Float<M, E> operator*(const Float<M, E>& flt) const {
62 if ((this->value == 0.f && !std::isnan(flt.value)) || 61 if ((this->value == 0.f && !std::isnan(flt.value)) ||
63 (flt.value == 0.f && !std::isnan(this->value))) 62 (flt.value == 0.f && !std::isnan(this->value)))
64 // PICA gives 0 instead of NaN when multiplying by inf 63 // PICA gives 0 instead of NaN when multiplying by inf
@@ -66,67 +65,68 @@ public:
66 return Float<M, E>::FromFloat32(ToFloat32() * flt.ToFloat32()); 65 return Float<M, E>::FromFloat32(ToFloat32() * flt.ToFloat32());
67 } 66 }
68 67
69 Float<M, E> operator / (const Float<M, E>& flt) const { 68 Float<M, E> operator/(const Float<M, E>& flt) const {
70 return Float<M, E>::FromFloat32(ToFloat32() / flt.ToFloat32()); 69 return Float<M, E>::FromFloat32(ToFloat32() / flt.ToFloat32());
71 } 70 }
72 71
73 Float<M, E> operator + (const Float<M, E>& flt) const { 72 Float<M, E> operator+(const Float<M, E>& flt) const {
74 return Float<M, E>::FromFloat32(ToFloat32() + flt.ToFloat32()); 73 return Float<M, E>::FromFloat32(ToFloat32() + flt.ToFloat32());
75 } 74 }
76 75
77 Float<M, E> operator - (const Float<M, E>& flt) const { 76 Float<M, E> operator-(const Float<M, E>& flt) const {
78 return Float<M, E>::FromFloat32(ToFloat32() - flt.ToFloat32()); 77 return Float<M, E>::FromFloat32(ToFloat32() - flt.ToFloat32());
79 } 78 }
80 79
81 Float<M, E>& operator *= (const Float<M, E>& flt) { 80 Float<M, E>& operator*=(const Float<M, E>& flt) {
82 if ((this->value == 0.f && !std::isnan(flt.value)) || 81 if ((this->value == 0.f && !std::isnan(flt.value)) ||
83 (flt.value == 0.f && !std::isnan(this->value))) 82 (flt.value == 0.f && !std::isnan(this->value)))
84 // PICA gives 0 instead of NaN when multiplying by inf 83 // PICA gives 0 instead of NaN when multiplying by inf
85 *this = Zero(); 84 *this = Zero();
86 else value *= flt.ToFloat32(); 85 else
86 value *= flt.ToFloat32();
87 return *this; 87 return *this;
88 } 88 }
89 89
90 Float<M, E>& operator /= (const Float<M, E>& flt) { 90 Float<M, E>& operator/=(const Float<M, E>& flt) {
91 value /= flt.ToFloat32(); 91 value /= flt.ToFloat32();
92 return *this; 92 return *this;
93 } 93 }
94 94
95 Float<M, E>& operator += (const Float<M, E>& flt) { 95 Float<M, E>& operator+=(const Float<M, E>& flt) {
96 value += flt.ToFloat32(); 96 value += flt.ToFloat32();
97 return *this; 97 return *this;
98 } 98 }
99 99
100 Float<M, E>& operator -= (const Float<M, E>& flt) { 100 Float<M, E>& operator-=(const Float<M, E>& flt) {
101 value -= flt.ToFloat32(); 101 value -= flt.ToFloat32();
102 return *this; 102 return *this;
103 } 103 }
104 104
105 Float<M, E> operator - () const { 105 Float<M, E> operator-() const {
106 return Float<M, E>::FromFloat32(-ToFloat32()); 106 return Float<M, E>::FromFloat32(-ToFloat32());
107 } 107 }
108 108
109 bool operator < (const Float<M, E>& flt) const { 109 bool operator<(const Float<M, E>& flt) const {
110 return ToFloat32() < flt.ToFloat32(); 110 return ToFloat32() < flt.ToFloat32();
111 } 111 }
112 112
113 bool operator > (const Float<M, E>& flt) const { 113 bool operator>(const Float<M, E>& flt) const {
114 return ToFloat32() > flt.ToFloat32(); 114 return ToFloat32() > flt.ToFloat32();
115 } 115 }
116 116
117 bool operator >= (const Float<M, E>& flt) const { 117 bool operator>=(const Float<M, E>& flt) const {
118 return ToFloat32() >= flt.ToFloat32(); 118 return ToFloat32() >= flt.ToFloat32();
119 } 119 }
120 120
121 bool operator <= (const Float<M, E>& flt) const { 121 bool operator<=(const Float<M, E>& flt) const {
122 return ToFloat32() <= flt.ToFloat32(); 122 return ToFloat32() <= flt.ToFloat32();
123 } 123 }
124 124
125 bool operator == (const Float<M, E>& flt) const { 125 bool operator==(const Float<M, E>& flt) const {
126 return ToFloat32() == flt.ToFloat32(); 126 return ToFloat32() == flt.ToFloat32();
127 } 127 }
128 128
129 bool operator != (const Float<M, E>& flt) const { 129 bool operator!=(const Float<M, E>& flt) const {
130 return ToFloat32() != flt.ToFloat32(); 130 return ToFloat32() != flt.ToFloat32();
131 } 131 }
132 132
diff --git a/src/video_core/primitive_assembly.cpp b/src/video_core/primitive_assembly.cpp
index 68ea3c08a..be7377290 100644
--- a/src/video_core/primitive_assembly.cpp
+++ b/src/video_core/primitive_assembly.cpp
@@ -3,69 +3,66 @@
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include "common/logging/log.h" 5#include "common/logging/log.h"
6
7#include "video_core/pica.h" 6#include "video_core/pica.h"
8#include "video_core/primitive_assembly.h" 7#include "video_core/primitive_assembly.h"
9#include "video_core/shader/shader.h" 8#include "video_core/shader/shader.h"
10 9
11namespace Pica { 10namespace Pica {
12 11
13template<typename VertexType> 12template <typename VertexType>
14PrimitiveAssembler<VertexType>::PrimitiveAssembler(Regs::TriangleTopology topology) 13PrimitiveAssembler<VertexType>::PrimitiveAssembler(Regs::TriangleTopology topology)
15 : topology(topology), buffer_index(0) { 14 : topology(topology), buffer_index(0) {}
16}
17 15
18template<typename VertexType> 16template <typename VertexType>
19void PrimitiveAssembler<VertexType>::SubmitVertex(VertexType& vtx, TriangleHandler triangle_handler) 17void PrimitiveAssembler<VertexType>::SubmitVertex(VertexType& vtx,
20{ 18 TriangleHandler triangle_handler) {
21 switch (topology) { 19 switch (topology) {
22 // TODO: Figure out what's different with TriangleTopology::Shader. 20 // TODO: Figure out what's different with TriangleTopology::Shader.
23 case Regs::TriangleTopology::List: 21 case Regs::TriangleTopology::List:
24 case Regs::TriangleTopology::Shader: 22 case Regs::TriangleTopology::Shader:
25 if (buffer_index < 2) { 23 if (buffer_index < 2) {
26 buffer[buffer_index++] = vtx; 24 buffer[buffer_index++] = vtx;
27 } else { 25 } else {
28 buffer_index = 0; 26 buffer_index = 0;
29 27
30 triangle_handler(buffer[0], buffer[1], vtx); 28 triangle_handler(buffer[0], buffer[1], vtx);
31 } 29 }
32 break; 30 break;
33 31
34 case Regs::TriangleTopology::Strip: 32 case Regs::TriangleTopology::Strip:
35 case Regs::TriangleTopology::Fan: 33 case Regs::TriangleTopology::Fan:
36 if (strip_ready) 34 if (strip_ready)
37 triangle_handler(buffer[0], buffer[1], vtx); 35 triangle_handler(buffer[0], buffer[1], vtx);
38 36
39 buffer[buffer_index] = vtx; 37 buffer[buffer_index] = vtx;
40 38
41 strip_ready |= (buffer_index == 1); 39 strip_ready |= (buffer_index == 1);
42 40
43 if (topology == Regs::TriangleTopology::Strip) 41 if (topology == Regs::TriangleTopology::Strip)
44 buffer_index = !buffer_index; 42 buffer_index = !buffer_index;
45 else if (topology == Regs::TriangleTopology::Fan) 43 else if (topology == Regs::TriangleTopology::Fan)
46 buffer_index = 1; 44 buffer_index = 1;
47 break; 45 break;
48 46
49 default: 47 default:
50 LOG_ERROR(HW_GPU, "Unknown triangle topology %x:", (int)topology); 48 LOG_ERROR(HW_GPU, "Unknown triangle topology %x:", (int)topology);
51 break; 49 break;
52 } 50 }
53} 51}
54 52
55template<typename VertexType> 53template <typename VertexType>
56void PrimitiveAssembler<VertexType>::Reset() { 54void PrimitiveAssembler<VertexType>::Reset() {
57 buffer_index = 0; 55 buffer_index = 0;
58 strip_ready = false; 56 strip_ready = false;
59} 57}
60 58
61template<typename VertexType> 59template <typename VertexType>
62void PrimitiveAssembler<VertexType>::Reconfigure(Regs::TriangleTopology topology) { 60void PrimitiveAssembler<VertexType>::Reconfigure(Regs::TriangleTopology topology) {
63 Reset(); 61 Reset();
64 this->topology = topology; 62 this->topology = topology;
65} 63}
66 64
67// explicitly instantiate use cases 65// explicitly instantiate use cases
68template 66template struct PrimitiveAssembler<Shader::OutputVertex>;
69struct PrimitiveAssembler<Shader::OutputVertex>;
70 67
71} // namespace 68} // namespace
diff --git a/src/video_core/primitive_assembly.h b/src/video_core/primitive_assembly.h
index 9396b4c85..0384d5984 100644
--- a/src/video_core/primitive_assembly.h
+++ b/src/video_core/primitive_assembly.h
@@ -5,7 +5,6 @@
5#pragma once 5#pragma once
6 6
7#include <functional> 7#include <functional>
8
9#include "video_core/pica.h" 8#include "video_core/pica.h"
10 9
11namespace Pica { 10namespace Pica {
@@ -14,11 +13,9 @@ namespace Pica {
14 * Utility class to build triangles from a series of vertices, 13 * Utility class to build triangles from a series of vertices,
15 * according to a given triangle topology. 14 * according to a given triangle topology.
16 */ 15 */
17template<typename VertexType> 16template <typename VertexType>
18struct PrimitiveAssembler { 17struct PrimitiveAssembler {
19 using TriangleHandler = std::function<void(VertexType& v0, 18 using TriangleHandler = std::function<void(VertexType& v0, VertexType& v1, VertexType& v2)>;
20 VertexType& v1,
21 VertexType& v2)>;
22 19
23 PrimitiveAssembler(Regs::TriangleTopology topology = Regs::TriangleTopology::List); 20 PrimitiveAssembler(Regs::TriangleTopology topology = Regs::TriangleTopology::List);
24 21
@@ -48,5 +45,4 @@ private:
48 bool strip_ready = false; 45 bool strip_ready = false;
49}; 46};
50 47
51
52} // namespace 48} // namespace
diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp
index 6f369a00e..6c4bbed33 100644
--- a/src/video_core/rasterizer.cpp
+++ b/src/video_core/rasterizer.cpp
@@ -5,7 +5,6 @@
5#include <algorithm> 5#include <algorithm>
6#include <array> 6#include <array>
7#include <cmath> 7#include <cmath>
8
9#include "common/assert.h" 8#include "common/assert.h"
10#include "common/bit_field.h" 9#include "common/bit_field.h"
11#include "common/color.h" 10#include "common/color.h"
@@ -14,17 +13,15 @@
14#include "common/math_util.h" 13#include "common/math_util.h"
15#include "common/microprofile.h" 14#include "common/microprofile.h"
16#include "common/vector_math.h" 15#include "common/vector_math.h"
17
18#include "core/memory.h"
19#include "core/hw/gpu.h" 16#include "core/hw/gpu.h"
20 17#include "core/memory.h"
21#include "video_core/debug_utils/debug_utils.h" 18#include "video_core/debug_utils/debug_utils.h"
22#include "video_core/pica.h" 19#include "video_core/pica.h"
23#include "video_core/pica_state.h" 20#include "video_core/pica_state.h"
24#include "video_core/pica_types.h" 21#include "video_core/pica_types.h"
25#include "video_core/rasterizer.h" 22#include "video_core/rasterizer.h"
26#include "video_core/utils.h"
27#include "video_core/shader/shader.h" 23#include "video_core/shader/shader.h"
24#include "video_core/utils.h"
28 25
29namespace Pica { 26namespace Pica {
30 27
@@ -39,8 +36,10 @@ static void DrawPixel(int x, int y, const Math::Vec4<u8>& color) {
39 y = framebuffer.height - y; 36 y = framebuffer.height - y;
40 37
41 const u32 coarse_y = y & ~7; 38 const u32 coarse_y = y & ~7;
42 u32 bytes_per_pixel = GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(framebuffer.color_format.Value())); 39 u32 bytes_per_pixel =
43 u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * framebuffer.width * bytes_per_pixel; 40 GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(framebuffer.color_format.Value()));
41 u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) +
42 coarse_y * framebuffer.width * bytes_per_pixel;
44 u8* dst_pixel = Memory::GetPhysicalPointer(addr) + dst_offset; 43 u8* dst_pixel = Memory::GetPhysicalPointer(addr) + dst_offset;
45 44
46 switch (framebuffer.color_format) { 45 switch (framebuffer.color_format) {
@@ -65,7 +64,8 @@ static void DrawPixel(int x, int y, const Math::Vec4<u8>& color) {
65 break; 64 break;
66 65
67 default: 66 default:
68 LOG_CRITICAL(Render_Software, "Unknown framebuffer color format %x", framebuffer.color_format.Value()); 67 LOG_CRITICAL(Render_Software, "Unknown framebuffer color format %x",
68 framebuffer.color_format.Value());
69 UNIMPLEMENTED(); 69 UNIMPLEMENTED();
70 } 70 }
71} 71}
@@ -77,8 +77,10 @@ static const Math::Vec4<u8> GetPixel(int x, int y) {
77 y = framebuffer.height - y; 77 y = framebuffer.height - y;
78 78
79 const u32 coarse_y = y & ~7; 79 const u32 coarse_y = y & ~7;
80 u32 bytes_per_pixel = GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(framebuffer.color_format.Value())); 80 u32 bytes_per_pixel =
81 u32 src_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * framebuffer.width * bytes_per_pixel; 81 GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(framebuffer.color_format.Value()));
82 u32 src_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) +
83 coarse_y * framebuffer.width * bytes_per_pixel;
82 u8* src_pixel = Memory::GetPhysicalPointer(addr) + src_offset; 84 u8* src_pixel = Memory::GetPhysicalPointer(addr) + src_offset;
83 85
84 switch (framebuffer.color_format) { 86 switch (framebuffer.color_format) {
@@ -98,7 +100,8 @@ static const Math::Vec4<u8> GetPixel(int x, int y) {
98 return Color::DecodeRGBA4(src_pixel); 100 return Color::DecodeRGBA4(src_pixel);
99 101
100 default: 102 default:
101 LOG_CRITICAL(Render_Software, "Unknown framebuffer color format %x", framebuffer.color_format.Value()); 103 LOG_CRITICAL(Render_Software, "Unknown framebuffer color format %x",
104 framebuffer.color_format.Value());
102 UNIMPLEMENTED(); 105 UNIMPLEMENTED();
103 } 106 }
104 107
@@ -120,16 +123,16 @@ static u32 GetDepth(int x, int y) {
120 u8* src_pixel = depth_buffer + src_offset; 123 u8* src_pixel = depth_buffer + src_offset;
121 124
122 switch (framebuffer.depth_format) { 125 switch (framebuffer.depth_format) {
123 case Regs::DepthFormat::D16: 126 case Regs::DepthFormat::D16:
124 return Color::DecodeD16(src_pixel); 127 return Color::DecodeD16(src_pixel);
125 case Regs::DepthFormat::D24: 128 case Regs::DepthFormat::D24:
126 return Color::DecodeD24(src_pixel); 129 return Color::DecodeD24(src_pixel);
127 case Regs::DepthFormat::D24S8: 130 case Regs::DepthFormat::D24S8:
128 return Color::DecodeD24S8(src_pixel).x; 131 return Color::DecodeD24S8(src_pixel).x;
129 default: 132 default:
130 LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", framebuffer.depth_format); 133 LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", framebuffer.depth_format);
131 UNIMPLEMENTED(); 134 UNIMPLEMENTED();
132 return 0; 135 return 0;
133 } 136 }
134} 137}
135 138
@@ -148,12 +151,15 @@ static u8 GetStencil(int x, int y) {
148 u8* src_pixel = depth_buffer + src_offset; 151 u8* src_pixel = depth_buffer + src_offset;
149 152
150 switch (framebuffer.depth_format) { 153 switch (framebuffer.depth_format) {
151 case Regs::DepthFormat::D24S8: 154 case Regs::DepthFormat::D24S8:
152 return Color::DecodeD24S8(src_pixel).y; 155 return Color::DecodeD24S8(src_pixel).y;
153 156
154 default: 157 default:
155 LOG_WARNING(HW_GPU, "GetStencil called for function which doesn't have a stencil component (format %u)", framebuffer.depth_format); 158 LOG_WARNING(
156 return 0; 159 HW_GPU,
160 "GetStencil called for function which doesn't have a stencil component (format %u)",
161 framebuffer.depth_format);
162 return 0;
157 } 163 }
158} 164}
159 165
@@ -172,22 +178,22 @@ static void SetDepth(int x, int y, u32 value) {
172 u8* dst_pixel = depth_buffer + dst_offset; 178 u8* dst_pixel = depth_buffer + dst_offset;
173 179
174 switch (framebuffer.depth_format) { 180 switch (framebuffer.depth_format) {
175 case Regs::DepthFormat::D16: 181 case Regs::DepthFormat::D16:
176 Color::EncodeD16(value, dst_pixel); 182 Color::EncodeD16(value, dst_pixel);
177 break; 183 break;
178 184
179 case Regs::DepthFormat::D24: 185 case Regs::DepthFormat::D24:
180 Color::EncodeD24(value, dst_pixel); 186 Color::EncodeD24(value, dst_pixel);
181 break; 187 break;
182 188
183 case Regs::DepthFormat::D24S8: 189 case Regs::DepthFormat::D24S8:
184 Color::EncodeD24X8(value, dst_pixel); 190 Color::EncodeD24X8(value, dst_pixel);
185 break; 191 break;
186 192
187 default: 193 default:
188 LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", framebuffer.depth_format); 194 LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", framebuffer.depth_format);
189 UNIMPLEMENTED(); 195 UNIMPLEMENTED();
190 break; 196 break;
191 } 197 }
192} 198}
193 199
@@ -206,19 +212,19 @@ static void SetStencil(int x, int y, u8 value) {
206 u8* dst_pixel = depth_buffer + dst_offset; 212 u8* dst_pixel = depth_buffer + dst_offset;
207 213
208 switch (framebuffer.depth_format) { 214 switch (framebuffer.depth_format) {
209 case Pica::Regs::DepthFormat::D16: 215 case Pica::Regs::DepthFormat::D16:
210 case Pica::Regs::DepthFormat::D24: 216 case Pica::Regs::DepthFormat::D24:
211 // Nothing to do 217 // Nothing to do
212 break; 218 break;
213 219
214 case Pica::Regs::DepthFormat::D24S8: 220 case Pica::Regs::DepthFormat::D24S8:
215 Color::EncodeX24S8(value, dst_pixel); 221 Color::EncodeX24S8(value, dst_pixel);
216 break; 222 break;
217 223
218 default: 224 default:
219 LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", framebuffer.depth_format); 225 LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", framebuffer.depth_format);
220 UNIMPLEMENTED(); 226 UNIMPLEMENTED();
221 break; 227 break;
222 } 228 }
223} 229}
224 230
@@ -262,15 +268,19 @@ struct Fix12P4 {
262 Fix12P4() {} 268 Fix12P4() {}
263 Fix12P4(u16 val) : val(val) {} 269 Fix12P4(u16 val) : val(val) {}
264 270
265 static u16 FracMask() { return 0xF; } 271 static u16 FracMask() {
266 static u16 IntMask() { return (u16)~0xF; } 272 return 0xF;
273 }
274 static u16 IntMask() {
275 return (u16)~0xF;
276 }
267 277
268 operator u16() const { 278 operator u16() const {
269 return val; 279 return val;
270 } 280 }
271 281
272 bool operator < (const Fix12P4& oth) const { 282 bool operator<(const Fix12P4& oth) const {
273 return (u16)*this < (u16)oth; 283 return (u16) * this < (u16)oth;
274 } 284 }
275 285
276private: 286private:
@@ -283,9 +293,8 @@ private:
283 * 293 *
284 * @todo define orientation concretely. 294 * @todo define orientation concretely.
285 */ 295 */
286static int SignedArea (const Math::Vec2<Fix12P4>& vtx1, 296static int SignedArea(const Math::Vec2<Fix12P4>& vtx1, const Math::Vec2<Fix12P4>& vtx2,
287 const Math::Vec2<Fix12P4>& vtx2, 297 const Math::Vec2<Fix12P4>& vtx3) {
288 const Math::Vec2<Fix12P4>& vtx3) {
289 const auto vec1 = Math::MakeVec(vtx2 - vtx1, 0); 298 const auto vec1 = Math::MakeVec(vtx2 - vtx1, 0);
290 const auto vec2 = Math::MakeVec(vtx3 - vtx1, 0); 299 const auto vec2 = Math::MakeVec(vtx3 - vtx1, 0);
291 // TODO: There is a very small chance this will overflow for sizeof(int) == 4 300 // TODO: There is a very small chance this will overflow for sizeof(int) == 4
@@ -298,11 +307,8 @@ MICROPROFILE_DEFINE(GPU_Rasterization, "GPU", "Rasterization", MP_RGB(50, 50, 24
298 * Helper function for ProcessTriangle with the "reversed" flag to allow for implementing 307 * Helper function for ProcessTriangle with the "reversed" flag to allow for implementing
299 * culling via recursion. 308 * culling via recursion.
300 */ 309 */
301static void ProcessTriangleInternal(const Shader::OutputVertex& v0, 310static void ProcessTriangleInternal(const Shader::OutputVertex& v0, const Shader::OutputVertex& v1,
302 const Shader::OutputVertex& v1, 311 const Shader::OutputVertex& v2, bool reversed = false) {
303 const Shader::OutputVertex& v2,
304 bool reversed = false)
305{
306 const auto& regs = g_state.regs; 312 const auto& regs = g_state.regs;
307 MICROPROFILE_SCOPE(GPU_Rasterization); 313 MICROPROFILE_SCOPE(GPU_Rasterization);
308 314
@@ -316,9 +322,9 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,
316 return Math::Vec3<Fix12P4>{FloatToFix(vec.x), FloatToFix(vec.y), FloatToFix(vec.z)}; 322 return Math::Vec3<Fix12P4>{FloatToFix(vec.x), FloatToFix(vec.y), FloatToFix(vec.z)};
317 }; 323 };
318 324
319 Math::Vec3<Fix12P4> vtxpos[3]{ ScreenToRasterizerCoordinates(v0.screenpos), 325 Math::Vec3<Fix12P4> vtxpos[3]{ScreenToRasterizerCoordinates(v0.screenpos),
320 ScreenToRasterizerCoordinates(v1.screenpos), 326 ScreenToRasterizerCoordinates(v1.screenpos),
321 ScreenToRasterizerCoordinates(v2.screenpos) }; 327 ScreenToRasterizerCoordinates(v2.screenpos)};
322 328
323 if (regs.cull_mode == Regs::CullMode::KeepAll) { 329 if (regs.cull_mode == Regs::CullMode::KeepAll) {
324 // Make sure we always end up with a triangle wound counter-clockwise 330 // Make sure we always end up with a triangle wound counter-clockwise
@@ -344,8 +350,8 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,
344 u16 max_y = std::max({vtxpos[0].y, vtxpos[1].y, vtxpos[2].y}); 350 u16 max_y = std::max({vtxpos[0].y, vtxpos[1].y, vtxpos[2].y});
345 351
346 // Convert the scissor box coordinates to 12.4 fixed point 352 // Convert the scissor box coordinates to 12.4 fixed point
347 u16 scissor_x1 = (u16)( regs.scissor_test.x1 << 4); 353 u16 scissor_x1 = (u16)(regs.scissor_test.x1 << 4);
348 u16 scissor_y1 = (u16)( regs.scissor_test.y1 << 4); 354 u16 scissor_y1 = (u16)(regs.scissor_test.y1 << 4);
349 // x2,y2 have +1 added to cover the entire sub-pixel area 355 // x2,y2 have +1 added to cover the entire sub-pixel area
350 u16 scissor_x2 = (u16)((regs.scissor_test.x2 + 1) << 4); 356 u16 scissor_x2 = (u16)((regs.scissor_test.x2 + 1) << 4);
351 u16 scissor_y2 = (u16)((regs.scissor_test.y2 + 1) << 4); 357 u16 scissor_y2 = (u16)((regs.scissor_test.y2 + 1) << 4);
@@ -369,27 +375,32 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,
369 // NOTE: These are the PSP filling rules. Not sure if the 3DS uses the same ones... 375 // NOTE: These are the PSP filling rules. Not sure if the 3DS uses the same ones...
370 auto IsRightSideOrFlatBottomEdge = [](const Math::Vec2<Fix12P4>& vtx, 376 auto IsRightSideOrFlatBottomEdge = [](const Math::Vec2<Fix12P4>& vtx,
371 const Math::Vec2<Fix12P4>& line1, 377 const Math::Vec2<Fix12P4>& line1,
372 const Math::Vec2<Fix12P4>& line2) 378 const Math::Vec2<Fix12P4>& line2) {
373 {
374 if (line1.y == line2.y) { 379 if (line1.y == line2.y) {
375 // just check if vertex is above us => bottom line parallel to x-axis 380 // just check if vertex is above us => bottom line parallel to x-axis
376 return vtx.y < line1.y; 381 return vtx.y < line1.y;
377 } else { 382 } else {
378 // check if vertex is on our left => right side 383 // check if vertex is on our left => right side
379 // TODO: Not sure how likely this is to overflow 384 // TODO: Not sure how likely this is to overflow
380 return (int)vtx.x < (int)line1.x + ((int)line2.x - (int)line1.x) * ((int)vtx.y - (int)line1.y) / ((int)line2.y - (int)line1.y); 385 return (int)vtx.x < (int)line1.x +
386 ((int)line2.x - (int)line1.x) * ((int)vtx.y - (int)line1.y) /
387 ((int)line2.y - (int)line1.y);
381 } 388 }
382 }; 389 };
383 int bias0 = IsRightSideOrFlatBottomEdge(vtxpos[0].xy(), vtxpos[1].xy(), vtxpos[2].xy()) ? -1 : 0; 390 int bias0 =
384 int bias1 = IsRightSideOrFlatBottomEdge(vtxpos[1].xy(), vtxpos[2].xy(), vtxpos[0].xy()) ? -1 : 0; 391 IsRightSideOrFlatBottomEdge(vtxpos[0].xy(), vtxpos[1].xy(), vtxpos[2].xy()) ? -1 : 0;
385 int bias2 = IsRightSideOrFlatBottomEdge(vtxpos[2].xy(), vtxpos[0].xy(), vtxpos[1].xy()) ? -1 : 0; 392 int bias1 =
393 IsRightSideOrFlatBottomEdge(vtxpos[1].xy(), vtxpos[2].xy(), vtxpos[0].xy()) ? -1 : 0;
394 int bias2 =
395 IsRightSideOrFlatBottomEdge(vtxpos[2].xy(), vtxpos[0].xy(), vtxpos[1].xy()) ? -1 : 0;
386 396
387 auto w_inverse = Math::MakeVec(v0.pos.w, v1.pos.w, v2.pos.w); 397 auto w_inverse = Math::MakeVec(v0.pos.w, v1.pos.w, v2.pos.w);
388 398
389 auto textures = regs.GetTextures(); 399 auto textures = regs.GetTextures();
390 auto tev_stages = regs.GetTevStages(); 400 auto tev_stages = regs.GetTevStages();
391 401
392 bool stencil_action_enable = g_state.regs.output_merger.stencil_test.enable && g_state.regs.framebuffer.depth_format == Regs::DepthFormat::D24S8; 402 bool stencil_action_enable = g_state.regs.output_merger.stencil_test.enable &&
403 g_state.regs.framebuffer.depth_format == Regs::DepthFormat::D24S8;
393 const auto stencil_test = g_state.regs.output_merger.stencil_test; 404 const auto stencil_test = g_state.regs.output_merger.stencil_test;
394 405
395 // Enter rasterization loop, starting at the center of the topleft bounding box corner. 406 // Enter rasterization loop, starting at the center of the topleft bounding box corner.
@@ -397,10 +408,10 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,
397 for (u16 y = min_y + 8; y < max_y; y += 0x10) { 408 for (u16 y = min_y + 8; y < max_y; y += 0x10) {
398 for (u16 x = min_x + 8; x < max_x; x += 0x10) { 409 for (u16 x = min_x + 8; x < max_x; x += 0x10) {
399 410
400 // Do not process the pixel if it's inside the scissor box and the scissor mode is set to Exclude 411 // Do not process the pixel if it's inside the scissor box and the scissor mode is set
412 // to Exclude
401 if (regs.scissor_test.mode == Regs::ScissorMode::Exclude) { 413 if (regs.scissor_test.mode == Regs::ScissorMode::Exclude) {
402 if (x >= scissor_x1 && x < scissor_x2 && 414 if (x >= scissor_x1 && x < scissor_x2 && y >= scissor_y1 && y < scissor_y2)
403 y >= scissor_y1 && y < scissor_y2)
404 continue; 415 continue;
405 } 416 }
406 417
@@ -414,15 +425,18 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,
414 if (w0 < 0 || w1 < 0 || w2 < 0) 425 if (w0 < 0 || w1 < 0 || w2 < 0)
415 continue; 426 continue;
416 427
417 auto baricentric_coordinates = Math::MakeVec(float24::FromFloat32(static_cast<float>(w0)), 428 auto baricentric_coordinates =
418 float24::FromFloat32(static_cast<float>(w1)), 429 Math::MakeVec(float24::FromFloat32(static_cast<float>(w0)),
419 float24::FromFloat32(static_cast<float>(w2))); 430 float24::FromFloat32(static_cast<float>(w1)),
420 float24 interpolated_w_inverse = float24::FromFloat32(1.0f) / Math::Dot(w_inverse, baricentric_coordinates); 431 float24::FromFloat32(static_cast<float>(w2)));
432 float24 interpolated_w_inverse =
433 float24::FromFloat32(1.0f) / Math::Dot(w_inverse, baricentric_coordinates);
421 434
422 // interpolated_z = z / w 435 // interpolated_z = z / w
423 float interpolated_z_over_w = (v0.screenpos[2].ToFloat32() * w0 + 436 float interpolated_z_over_w =
424 v1.screenpos[2].ToFloat32() * w1 + 437 (v0.screenpos[2].ToFloat32() * w0 + v1.screenpos[2].ToFloat32() * w1 +
425 v2.screenpos[2].ToFloat32() * w2) / wsum; 438 v2.screenpos[2].ToFloat32() * w2) /
439 wsum;
426 440
427 // Not fully accurate. About 3 bits in precision are missing. 441 // Not fully accurate. About 3 bits in precision are missing.
428 // Z-Buffer (z / w * scale + offset) 442 // Z-Buffer (z / w * scale + offset)
@@ -461,10 +475,18 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,
461 }; 475 };
462 476
463 Math::Vec4<u8> primary_color{ 477 Math::Vec4<u8> primary_color{
464 (u8)(GetInterpolatedAttribute(v0.color.r(), v1.color.r(), v2.color.r()).ToFloat32() * 255), 478 (u8)(
465 (u8)(GetInterpolatedAttribute(v0.color.g(), v1.color.g(), v2.color.g()).ToFloat32() * 255), 479 GetInterpolatedAttribute(v0.color.r(), v1.color.r(), v2.color.r()).ToFloat32() *
466 (u8)(GetInterpolatedAttribute(v0.color.b(), v1.color.b(), v2.color.b()).ToFloat32() * 255), 480 255),
467 (u8)(GetInterpolatedAttribute(v0.color.a(), v1.color.a(), v2.color.a()).ToFloat32() * 255) 481 (u8)(
482 GetInterpolatedAttribute(v0.color.g(), v1.color.g(), v2.color.g()).ToFloat32() *
483 255),
484 (u8)(
485 GetInterpolatedAttribute(v0.color.b(), v1.color.b(), v2.color.b()).ToFloat32() *
486 255),
487 (u8)(
488 GetInterpolatedAttribute(v0.color.a(), v1.color.a(), v2.color.a()).ToFloat32() *
489 255),
468 }; 490 };
469 491
470 Math::Vec2<float24> uv[3]; 492 Math::Vec2<float24> uv[3];
@@ -489,7 +511,7 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,
489 // Only unit 0 respects the texturing type (according to 3DBrew) 511 // Only unit 0 respects the texturing type (according to 3DBrew)
490 // TODO: Refactor so cubemaps and shadowmaps can be handled 512 // TODO: Refactor so cubemaps and shadowmaps can be handled
491 if (i == 0) { 513 if (i == 0) {
492 switch(texture.config.type) { 514 switch (texture.config.type) {
493 case Regs::TextureConfig::Texture2D: 515 case Regs::TextureConfig::Texture2D:
494 break; 516 break;
495 case Regs::TextureConfig::Projection2D: { 517 case Regs::TextureConfig::Projection2D: {
@@ -506,51 +528,58 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,
506 } 528 }
507 } 529 }
508 530
509 int s = (int)(u * float24::FromFloat32(static_cast<float>(texture.config.width))).ToFloat32(); 531 int s = (int)(u * float24::FromFloat32(static_cast<float>(texture.config.width)))
510 int t = (int)(v * float24::FromFloat32(static_cast<float>(texture.config.height))).ToFloat32(); 532 .ToFloat32();
533 int t = (int)(v * float24::FromFloat32(static_cast<float>(texture.config.height)))
534 .ToFloat32();
511 535
512 536 static auto GetWrappedTexCoord = [](Regs::TextureConfig::WrapMode mode, int val,
513 static auto GetWrappedTexCoord = [](Regs::TextureConfig::WrapMode mode, int val, unsigned size) { 537 unsigned size) {
514 switch (mode) { 538 switch (mode) {
515 case Regs::TextureConfig::ClampToEdge: 539 case Regs::TextureConfig::ClampToEdge:
516 val = std::max(val, 0); 540 val = std::max(val, 0);
517 val = std::min(val, (int)size - 1); 541 val = std::min(val, (int)size - 1);
518 return val; 542 return val;
519 543
520 case Regs::TextureConfig::ClampToBorder: 544 case Regs::TextureConfig::ClampToBorder:
521 return val; 545 return val;
522 546
523 case Regs::TextureConfig::Repeat: 547 case Regs::TextureConfig::Repeat:
524 return (int)((unsigned)val % size); 548 return (int)((unsigned)val % size);
525 549
526 case Regs::TextureConfig::MirroredRepeat: 550 case Regs::TextureConfig::MirroredRepeat: {
527 { 551 unsigned int coord = ((unsigned)val % (2 * size));
528 unsigned int coord = ((unsigned)val % (2 * size)); 552 if (coord >= size)
529 if (coord >= size) 553 coord = 2 * size - 1 - coord;
530 coord = 2 * size - 1 - coord; 554 return (int)coord;
531 return (int)coord; 555 }
532 } 556
533 557 default:
534 default: 558 LOG_ERROR(HW_GPU, "Unknown texture coordinate wrapping mode %x", (int)mode);
535 LOG_ERROR(HW_GPU, "Unknown texture coordinate wrapping mode %x", (int)mode); 559 UNIMPLEMENTED();
536 UNIMPLEMENTED(); 560 return 0;
537 return 0;
538 } 561 }
539 }; 562 };
540 563
541 if ((texture.config.wrap_s == Regs::TextureConfig::ClampToBorder && (s < 0 || s >= texture.config.width)) 564 if ((texture.config.wrap_s == Regs::TextureConfig::ClampToBorder &&
542 || (texture.config.wrap_t == Regs::TextureConfig::ClampToBorder && (t < 0 || t >= texture.config.height))) { 565 (s < 0 || s >= texture.config.width)) ||
566 (texture.config.wrap_t == Regs::TextureConfig::ClampToBorder &&
567 (t < 0 || t >= texture.config.height))) {
543 auto border_color = texture.config.border_color; 568 auto border_color = texture.config.border_color;
544 texture_color[i] = { border_color.r, border_color.g, border_color.b, border_color.a }; 569 texture_color[i] = {border_color.r, border_color.g, border_color.b,
570 border_color.a};
545 } else { 571 } else {
546 // Textures are laid out from bottom to top, hence we invert the t coordinate. 572 // Textures are laid out from bottom to top, hence we invert the t coordinate.
547 // NOTE: This may not be the right place for the inversion. 573 // NOTE: This may not be the right place for the inversion.
548 // TODO: Check if this applies to ETC textures, too. 574 // TODO: Check if this applies to ETC textures, too.
549 s = GetWrappedTexCoord(texture.config.wrap_s, s, texture.config.width); 575 s = GetWrappedTexCoord(texture.config.wrap_s, s, texture.config.width);
550 t = texture.config.height - 1 - GetWrappedTexCoord(texture.config.wrap_t, t, texture.config.height); 576 t = texture.config.height - 1 -
577 GetWrappedTexCoord(texture.config.wrap_t, t, texture.config.height);
551 578
552 u8* texture_data = Memory::GetPhysicalPointer(texture.config.GetPhysicalAddress()); 579 u8* texture_data =
553 auto info = DebugUtils::TextureInfo::FromPicaRegister(texture.config, texture.format); 580 Memory::GetPhysicalPointer(texture.config.GetPhysicalAddress());
581 auto info =
582 DebugUtils::TextureInfo::FromPicaRegister(texture.config, texture.format);
554 583
555 // TODO: Apply the min and mag filters to the texture 584 // TODO: Apply the min and mag filters to the texture
556 texture_color[i] = DebugUtils::LookupTexture(texture_data, s, t, info); 585 texture_color[i] = DebugUtils::LookupTexture(texture_data, s, t, info);
@@ -571,10 +600,11 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,
571 Math::Vec4<u8> combiner_buffer = {0, 0, 0, 0}; 600 Math::Vec4<u8> combiner_buffer = {0, 0, 0, 0};
572 Math::Vec4<u8> next_combiner_buffer = { 601 Math::Vec4<u8> next_combiner_buffer = {
573 regs.tev_combiner_buffer_color.r, regs.tev_combiner_buffer_color.g, 602 regs.tev_combiner_buffer_color.r, regs.tev_combiner_buffer_color.g,
574 regs.tev_combiner_buffer_color.b, regs.tev_combiner_buffer_color.a 603 regs.tev_combiner_buffer_color.b, regs.tev_combiner_buffer_color.a,
575 }; 604 };
576 605
577 for (unsigned tev_stage_index = 0; tev_stage_index < tev_stages.size(); ++tev_stage_index) { 606 for (unsigned tev_stage_index = 0; tev_stage_index < tev_stages.size();
607 ++tev_stage_index) {
578 const auto& tev_stage = tev_stages[tev_stage_index]; 608 const auto& tev_stage = tev_stages[tev_stage_index];
579 using Source = Regs::TevStageConfig::Source; 609 using Source = Regs::TevStageConfig::Source;
580 using ColorModifier = Regs::TevStageConfig::ColorModifier; 610 using ColorModifier = Regs::TevStageConfig::ColorModifier;
@@ -606,7 +636,8 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,
606 return combiner_buffer; 636 return combiner_buffer;
607 637
608 case Source::Constant: 638 case Source::Constant:
609 return {tev_stage.const_r, tev_stage.const_g, tev_stage.const_b, tev_stage.const_a}; 639 return {tev_stage.const_r, tev_stage.const_g, tev_stage.const_b,
640 tev_stage.const_a};
610 641
611 case Source::Previous: 642 case Source::Previous:
612 return combiner_output; 643 return combiner_output;
@@ -618,7 +649,8 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,
618 } 649 }
619 }; 650 };
620 651
621 static auto GetColorModifier = [](ColorModifier factor, const Math::Vec4<u8>& values) -> Math::Vec3<u8> { 652 static auto GetColorModifier = [](ColorModifier factor,
653 const Math::Vec4<u8>& values) -> Math::Vec3<u8> {
622 switch (factor) { 654 switch (factor) {
623 case ColorModifier::SourceColor: 655 case ColorModifier::SourceColor:
624 return values.rgb(); 656 return values.rgb();
@@ -652,7 +684,8 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,
652 } 684 }
653 }; 685 };
654 686
655 static auto GetAlphaModifier = [](AlphaModifier factor, const Math::Vec4<u8>& values) -> u8 { 687 static auto GetAlphaModifier = [](AlphaModifier factor,
688 const Math::Vec4<u8>& values) -> u8 {
656 switch (factor) { 689 switch (factor) {
657 case AlphaModifier::SourceAlpha: 690 case AlphaModifier::SourceAlpha:
658 return values.a(); 691 return values.a();
@@ -680,7 +713,8 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,
680 } 713 }
681 }; 714 };
682 715
683 static auto ColorCombine = [](Operation op, const Math::Vec3<u8> input[3]) -> Math::Vec3<u8> { 716 static auto ColorCombine = [](Operation op,
717 const Math::Vec3<u8> input[3]) -> Math::Vec3<u8> {
684 switch (op) { 718 switch (op) {
685 case Operation::Replace: 719 case Operation::Replace:
686 return input[0]; 720 return input[0];
@@ -688,8 +722,7 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,
688 case Operation::Modulate: 722 case Operation::Modulate:
689 return ((input[0] * input[1]) / 255).Cast<u8>(); 723 return ((input[0] * input[1]) / 255).Cast<u8>();
690 724
691 case Operation::Add: 725 case Operation::Add: {
692 {
693 auto result = input[0] + input[1]; 726 auto result = input[0] + input[1];
694 result.r() = std::min(255, result.r()); 727 result.r() = std::min(255, result.r());
695 result.g() = std::min(255, result.g()); 728 result.g() = std::min(255, result.g());
@@ -697,10 +730,11 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,
697 return result.Cast<u8>(); 730 return result.Cast<u8>();
698 } 731 }
699 732
700 case Operation::AddSigned: 733 case Operation::AddSigned: {
701 { 734 // TODO(bunnei): Verify that the color conversion from (float) 0.5f to
702 // TODO(bunnei): Verify that the color conversion from (float) 0.5f to (byte) 128 is correct 735 // (byte) 128 is correct
703 auto result = input[0].Cast<int>() + input[1].Cast<int>() - Math::MakeVec<int>(128, 128, 128); 736 auto result = input[0].Cast<int>() + input[1].Cast<int>() -
737 Math::MakeVec<int>(128, 128, 128);
704 result.r() = MathUtil::Clamp<int>(result.r(), 0, 255); 738 result.r() = MathUtil::Clamp<int>(result.r(), 0, 255);
705 result.g() = MathUtil::Clamp<int>(result.g(), 0, 255); 739 result.g() = MathUtil::Clamp<int>(result.g(), 0, 255);
706 result.b() = MathUtil::Clamp<int>(result.b(), 0, 255); 740 result.b() = MathUtil::Clamp<int>(result.b(), 0, 255);
@@ -708,10 +742,13 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,
708 } 742 }
709 743
710 case Operation::Lerp: 744 case Operation::Lerp:
711 return ((input[0] * input[2] + input[1] * (Math::MakeVec<u8>(255, 255, 255) - input[2]).Cast<u8>()) / 255).Cast<u8>(); 745 return ((input[0] * input[2] +
746 input[1] *
747 (Math::MakeVec<u8>(255, 255, 255) - input[2]).Cast<u8>()) /
748 255)
749 .Cast<u8>();
712 750
713 case Operation::Subtract: 751 case Operation::Subtract: {
714 {
715 auto result = input[0].Cast<int>() - input[1].Cast<int>(); 752 auto result = input[0].Cast<int>() - input[1].Cast<int>();
716 result.r() = std::max(0, result.r()); 753 result.r() = std::max(0, result.r());
717 result.g() = std::max(0, result.g()); 754 result.g() = std::max(0, result.g());
@@ -719,8 +756,7 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,
719 return result.Cast<u8>(); 756 return result.Cast<u8>();
720 } 757 }
721 758
722 case Operation::MultiplyThenAdd: 759 case Operation::MultiplyThenAdd: {
723 {
724 auto result = (input[0] * input[1] + 255 * input[2].Cast<int>()) / 255; 760 auto result = (input[0] * input[1] + 255 * input[2].Cast<int>()) / 255;
725 result.r() = std::min(255, result.r()); 761 result.r() = std::min(255, result.r());
726 result.g() = std::min(255, result.g()); 762 result.g() = std::min(255, result.g());
@@ -728,8 +764,7 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,
728 return result.Cast<u8>(); 764 return result.Cast<u8>();
729 } 765 }
730 766
731 case Operation::AddThenMultiply: 767 case Operation::AddThenMultiply: {
732 {
733 auto result = input[0] + input[1]; 768 auto result = input[0] + input[1];
734 result.r() = std::min(255, result.r()); 769 result.r() = std::min(255, result.r());
735 result.g() = std::min(255, result.g()); 770 result.g() = std::min(255, result.g());
@@ -737,17 +772,19 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,
737 result = (result * input[2].Cast<int>()) / 255; 772 result = (result * input[2].Cast<int>()) / 255;
738 return result.Cast<u8>(); 773 return result.Cast<u8>();
739 } 774 }
740 case Operation::Dot3_RGB: 775 case Operation::Dot3_RGB: {
741 {
742 // Not fully accurate. 776 // Not fully accurate.
743 // Worst case scenario seems to yield a +/-3 error 777 // Worst case scenario seems to yield a +/-3 error
744 // Some HW results indicate that the per-component computation can't have a higher precision than 1/256, 778 // Some HW results indicate that the per-component computation can't have a
745 // while dot3_rgb( (0x80,g0,b0),(0x7F,g1,b1) ) and dot3_rgb( (0x80,g0,b0),(0x80,g1,b1) ) give different results 779 // higher precision than 1/256,
746 int result = ((input[0].r() * 2 - 255) * (input[1].r() * 2 - 255) + 128) / 256 + 780 // while dot3_rgb( (0x80,g0,b0),(0x7F,g1,b1) ) and dot3_rgb(
747 ((input[0].g() * 2 - 255) * (input[1].g() * 2 - 255) + 128) / 256 + 781 // (0x80,g0,b0),(0x80,g1,b1) ) give different results
748 ((input[0].b() * 2 - 255) * (input[1].b() * 2 - 255) + 128) / 256; 782 int result =
783 ((input[0].r() * 2 - 255) * (input[1].r() * 2 - 255) + 128) / 256 +
784 ((input[0].g() * 2 - 255) * (input[1].g() * 2 - 255) + 128) / 256 +
785 ((input[0].b() * 2 - 255) * (input[1].b() * 2 - 255) + 128) / 256;
749 result = std::max(0, std::min(255, result)); 786 result = std::max(0, std::min(255, result));
750 return { (u8)result, (u8)result, (u8)result }; 787 return {(u8)result, (u8)result, (u8)result};
751 } 788 }
752 default: 789 default:
753 LOG_ERROR(HW_GPU, "Unknown color combiner operation %d", (int)op); 790 LOG_ERROR(HW_GPU, "Unknown color combiner operation %d", (int)op);
@@ -756,7 +793,7 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,
756 } 793 }
757 }; 794 };
758 795
759 static auto AlphaCombine = [](Operation op, const std::array<u8,3>& input) -> u8 { 796 static auto AlphaCombine = [](Operation op, const std::array<u8, 3>& input) -> u8 {
760 switch (op) { 797 switch (op) {
761 case Operation::Replace: 798 case Operation::Replace:
762 return input[0]; 799 return input[0];
@@ -767,9 +804,9 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,
767 case Operation::Add: 804 case Operation::Add:
768 return std::min(255, input[0] + input[1]); 805 return std::min(255, input[0] + input[1]);
769 806
770 case Operation::AddSigned: 807 case Operation::AddSigned: {
771 { 808 // TODO(bunnei): Verify that the color conversion from (float) 0.5f to
772 // TODO(bunnei): Verify that the color conversion from (float) 0.5f to (byte) 128 is correct 809 // (byte) 128 is correct
773 auto result = static_cast<int>(input[0]) + static_cast<int>(input[1]) - 128; 810 auto result = static_cast<int>(input[0]) + static_cast<int>(input[1]) - 128;
774 return static_cast<u8>(MathUtil::Clamp<int>(result, 0, 255)); 811 return static_cast<u8>(MathUtil::Clamp<int>(result, 0, 255));
775 } 812 }
@@ -801,32 +838,38 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,
801 Math::Vec3<u8> color_result[3] = { 838 Math::Vec3<u8> color_result[3] = {
802 GetColorModifier(tev_stage.color_modifier1, GetSource(tev_stage.color_source1)), 839 GetColorModifier(tev_stage.color_modifier1, GetSource(tev_stage.color_source1)),
803 GetColorModifier(tev_stage.color_modifier2, GetSource(tev_stage.color_source2)), 840 GetColorModifier(tev_stage.color_modifier2, GetSource(tev_stage.color_source2)),
804 GetColorModifier(tev_stage.color_modifier3, GetSource(tev_stage.color_source3)) 841 GetColorModifier(tev_stage.color_modifier3, GetSource(tev_stage.color_source3)),
805 }; 842 };
806 auto color_output = ColorCombine(tev_stage.color_op, color_result); 843 auto color_output = ColorCombine(tev_stage.color_op, color_result);
807 844
808 // alpha combiner 845 // alpha combiner
809 std::array<u8,3> alpha_result = {{ 846 std::array<u8, 3> alpha_result = {{
810 GetAlphaModifier(tev_stage.alpha_modifier1, GetSource(tev_stage.alpha_source1)), 847 GetAlphaModifier(tev_stage.alpha_modifier1, GetSource(tev_stage.alpha_source1)),
811 GetAlphaModifier(tev_stage.alpha_modifier2, GetSource(tev_stage.alpha_source2)), 848 GetAlphaModifier(tev_stage.alpha_modifier2, GetSource(tev_stage.alpha_source2)),
812 GetAlphaModifier(tev_stage.alpha_modifier3, GetSource(tev_stage.alpha_source3)) 849 GetAlphaModifier(tev_stage.alpha_modifier3, GetSource(tev_stage.alpha_source3)),
813 }}; 850 }};
814 auto alpha_output = AlphaCombine(tev_stage.alpha_op, alpha_result); 851 auto alpha_output = AlphaCombine(tev_stage.alpha_op, alpha_result);
815 852
816 combiner_output[0] = std::min((unsigned)255, color_output.r() * tev_stage.GetColorMultiplier()); 853 combiner_output[0] =
817 combiner_output[1] = std::min((unsigned)255, color_output.g() * tev_stage.GetColorMultiplier()); 854 std::min((unsigned)255, color_output.r() * tev_stage.GetColorMultiplier());
818 combiner_output[2] = std::min((unsigned)255, color_output.b() * tev_stage.GetColorMultiplier()); 855 combiner_output[1] =
819 combiner_output[3] = std::min((unsigned)255, alpha_output * tev_stage.GetAlphaMultiplier()); 856 std::min((unsigned)255, color_output.g() * tev_stage.GetColorMultiplier());
857 combiner_output[2] =
858 std::min((unsigned)255, color_output.b() * tev_stage.GetColorMultiplier());
859 combiner_output[3] =
860 std::min((unsigned)255, alpha_output * tev_stage.GetAlphaMultiplier());
820 861
821 combiner_buffer = next_combiner_buffer; 862 combiner_buffer = next_combiner_buffer;
822 863
823 if (regs.tev_combiner_buffer_input.TevStageUpdatesCombinerBufferColor(tev_stage_index)) { 864 if (regs.tev_combiner_buffer_input.TevStageUpdatesCombinerBufferColor(
865 tev_stage_index)) {
824 next_combiner_buffer.r() = combiner_output.r(); 866 next_combiner_buffer.r() = combiner_output.r();
825 next_combiner_buffer.g() = combiner_output.g(); 867 next_combiner_buffer.g() = combiner_output.g();
826 next_combiner_buffer.b() = combiner_output.b(); 868 next_combiner_buffer.b() = combiner_output.b();
827 } 869 }
828 870
829 if (regs.tev_combiner_buffer_input.TevStageUpdatesCombinerBufferAlpha(tev_stage_index)) { 871 if (regs.tev_combiner_buffer_input.TevStageUpdatesCombinerBufferAlpha(
872 tev_stage_index)) {
830 next_combiner_buffer.a() = combiner_output.a(); 873 next_combiner_buffer.a() = combiner_output.a();
831 } 874 }
832 } 875 }
@@ -897,21 +940,26 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,
897 float fog_i = MathUtil::Clamp(floorf(fog_index), 0.0f, 127.0f); 940 float fog_i = MathUtil::Clamp(floorf(fog_index), 0.0f, 127.0f);
898 float fog_f = fog_index - fog_i; 941 float fog_f = fog_index - fog_i;
899 const auto& fog_lut_entry = g_state.fog.lut[static_cast<unsigned int>(fog_i)]; 942 const auto& fog_lut_entry = g_state.fog.lut[static_cast<unsigned int>(fog_i)];
900 float fog_factor = (fog_lut_entry.value + fog_lut_entry.difference * fog_f) / 2047.0f; // This is signed fixed point 1.11 943 float fog_factor = (fog_lut_entry.value + fog_lut_entry.difference * fog_f) /
944 2047.0f; // This is signed fixed point 1.11
901 fog_factor = MathUtil::Clamp(fog_factor, 0.0f, 1.0f); 945 fog_factor = MathUtil::Clamp(fog_factor, 0.0f, 1.0f);
902 946
903 // Blend the fog 947 // Blend the fog
904 for (unsigned i = 0; i < 3; i++) { 948 for (unsigned i = 0; i < 3; i++) {
905 combiner_output[i] = fog_factor * combiner_output[i] + (1.0f - fog_factor) * fog_color[i]; 949 combiner_output[i] =
950 fog_factor * combiner_output[i] + (1.0f - fog_factor) * fog_color[i];
906 } 951 }
907 } 952 }
908 953
909 u8 old_stencil = 0; 954 u8 old_stencil = 0;
910 955
911 auto UpdateStencil = [stencil_test, x, y, &old_stencil](Pica::Regs::StencilAction action) { 956 auto UpdateStencil = [stencil_test, x, y,
912 u8 new_stencil = PerformStencilAction(action, old_stencil, stencil_test.reference_value); 957 &old_stencil](Pica::Regs::StencilAction action) {
958 u8 new_stencil =
959 PerformStencilAction(action, old_stencil, stencil_test.reference_value);
913 if (g_state.regs.framebuffer.allow_depth_stencil_write != 0) 960 if (g_state.regs.framebuffer.allow_depth_stencil_write != 0)
914 SetStencil(x >> 4, y >> 4, (new_stencil & stencil_test.write_mask) | (old_stencil & ~stencil_test.write_mask)); 961 SetStencil(x >> 4, y >> 4, (new_stencil & stencil_test.write_mask) |
962 (old_stencil & ~stencil_test.write_mask));
915 }; 963 };
916 964
917 if (stencil_action_enable) { 965 if (stencil_action_enable) {
@@ -1030,7 +1078,7 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,
1030 static_cast<u8>(output_merger.blend_const.r), 1078 static_cast<u8>(output_merger.blend_const.r),
1031 static_cast<u8>(output_merger.blend_const.g), 1079 static_cast<u8>(output_merger.blend_const.g),
1032 static_cast<u8>(output_merger.blend_const.b), 1080 static_cast<u8>(output_merger.blend_const.b),
1033 static_cast<u8>(output_merger.blend_const.a) 1081 static_cast<u8>(output_merger.blend_const.a),
1034 }; 1082 };
1035 1083
1036 switch (factor) { 1084 switch (factor) {
@@ -1091,12 +1139,13 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,
1091 return combiner_output[channel]; 1139 return combiner_output[channel];
1092 }; 1140 };
1093 1141
1094 static auto EvaluateBlendEquation = [](const Math::Vec4<u8>& src, const Math::Vec4<u8>& srcfactor, 1142 static auto EvaluateBlendEquation = [](
1095 const Math::Vec4<u8>& dest, const Math::Vec4<u8>& destfactor, 1143 const Math::Vec4<u8>& src, const Math::Vec4<u8>& srcfactor,
1096 Regs::BlendEquation equation) { 1144 const Math::Vec4<u8>& dest, const Math::Vec4<u8>& destfactor,
1145 Regs::BlendEquation equation) {
1097 Math::Vec4<int> result; 1146 Math::Vec4<int> result;
1098 1147
1099 auto src_result = (src * srcfactor).Cast<int>(); 1148 auto src_result = (src * srcfactor).Cast<int>();
1100 auto dst_result = (dest * destfactor).Cast<int>(); 1149 auto dst_result = (dest * destfactor).Cast<int>();
1101 1150
1102 switch (equation) { 1151 switch (equation) {
@@ -1134,10 +1183,9 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,
1134 UNIMPLEMENTED(); 1183 UNIMPLEMENTED();
1135 } 1184 }
1136 1185
1137 return Math::Vec4<u8>(MathUtil::Clamp(result.r(), 0, 255), 1186 return Math::Vec4<u8>(
1138 MathUtil::Clamp(result.g(), 0, 255), 1187 MathUtil::Clamp(result.r(), 0, 255), MathUtil::Clamp(result.g(), 0, 255),
1139 MathUtil::Clamp(result.b(), 0, 255), 1188 MathUtil::Clamp(result.b(), 0, 255), MathUtil::Clamp(result.a(), 0, 255));
1140 MathUtil::Clamp(result.a(), 0, 255));
1141 }; 1189 };
1142 1190
1143 auto srcfactor = Math::MakeVec(LookupFactor(0, params.factor_source_rgb), 1191 auto srcfactor = Math::MakeVec(LookupFactor(0, params.factor_source_rgb),
@@ -1150,8 +1198,11 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,
1150 LookupFactor(2, params.factor_dest_rgb), 1198 LookupFactor(2, params.factor_dest_rgb),
1151 LookupFactor(3, params.factor_dest_a)); 1199 LookupFactor(3, params.factor_dest_a));
1152 1200
1153 blend_output = EvaluateBlendEquation(combiner_output, srcfactor, dest, dstfactor, params.blend_equation_rgb); 1201 blend_output = EvaluateBlendEquation(combiner_output, srcfactor, dest, dstfactor,
1154 blend_output.a() = EvaluateBlendEquation(combiner_output, srcfactor, dest, dstfactor, params.blend_equation_a).a(); 1202 params.blend_equation_rgb);
1203 blend_output.a() = EvaluateBlendEquation(combiner_output, srcfactor, dest,
1204 dstfactor, params.blend_equation_a)
1205 .a();
1155 } else { 1206 } else {
1156 static auto LogicOp = [](u8 src, u8 dest, Regs::LogicOp op) -> u8 { 1207 static auto LogicOp = [](u8 src, u8 dest, Regs::LogicOp op) -> u8 {
1157 switch (op) { 1208 switch (op) {
@@ -1205,18 +1256,18 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,
1205 } 1256 }
1206 }; 1257 };
1207 1258
1208 blend_output = Math::MakeVec( 1259 blend_output =
1209 LogicOp(combiner_output.r(), dest.r(), output_merger.logic_op), 1260 Math::MakeVec(LogicOp(combiner_output.r(), dest.r(), output_merger.logic_op),
1210 LogicOp(combiner_output.g(), dest.g(), output_merger.logic_op), 1261 LogicOp(combiner_output.g(), dest.g(), output_merger.logic_op),
1211 LogicOp(combiner_output.b(), dest.b(), output_merger.logic_op), 1262 LogicOp(combiner_output.b(), dest.b(), output_merger.logic_op),
1212 LogicOp(combiner_output.a(), dest.a(), output_merger.logic_op)); 1263 LogicOp(combiner_output.a(), dest.a(), output_merger.logic_op));
1213 } 1264 }
1214 1265
1215 const Math::Vec4<u8> result = { 1266 const Math::Vec4<u8> result = {
1216 output_merger.red_enable ? blend_output.r() : dest.r(), 1267 output_merger.red_enable ? blend_output.r() : dest.r(),
1217 output_merger.green_enable ? blend_output.g() : dest.g(), 1268 output_merger.green_enable ? blend_output.g() : dest.g(),
1218 output_merger.blue_enable ? blend_output.b() : dest.b(), 1269 output_merger.blue_enable ? blend_output.b() : dest.b(),
1219 output_merger.alpha_enable ? blend_output.a() : dest.a() 1270 output_merger.alpha_enable ? blend_output.a() : dest.a(),
1220 }; 1271 };
1221 1272
1222 if (regs.framebuffer.allow_color_write != 0) 1273 if (regs.framebuffer.allow_color_write != 0)
@@ -1225,8 +1276,7 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,
1225 } 1276 }
1226} 1277}
1227 1278
1228void ProcessTriangle(const Shader::OutputVertex& v0, 1279void ProcessTriangle(const Shader::OutputVertex& v0, const Shader::OutputVertex& v1,
1229 const Shader::OutputVertex& v1,
1230 const Shader::OutputVertex& v2) { 1280 const Shader::OutputVertex& v2) {
1231 ProcessTriangleInternal(v0, v1, v2); 1281 ProcessTriangleInternal(v0, v1, v2);
1232} 1282}
diff --git a/src/video_core/rasterizer.h b/src/video_core/rasterizer.h
index a6a9634b4..6cbda3067 100644
--- a/src/video_core/rasterizer.h
+++ b/src/video_core/rasterizer.h
@@ -7,13 +7,12 @@
7namespace Pica { 7namespace Pica {
8 8
9namespace Shader { 9namespace Shader {
10 struct OutputVertex; 10struct OutputVertex;
11} 11}
12 12
13namespace Rasterizer { 13namespace Rasterizer {
14 14
15void ProcessTriangle(const Shader::OutputVertex& v0, 15void ProcessTriangle(const Shader::OutputVertex& v0, const Shader::OutputVertex& v1,
16 const Shader::OutputVertex& v1,
17 const Shader::OutputVertex& v2); 16 const Shader::OutputVertex& v2);
18 17
19} // namespace Rasterizer 18} // namespace Rasterizer
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index bf7101665..71df233b5 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -5,7 +5,6 @@
5#pragma once 5#pragma once
6 6
7#include "common/common_types.h" 7#include "common/common_types.h"
8
9#include "core/hw/gpu.h" 8#include "core/hw/gpu.h"
10 9
11struct ScreenInfo; 10struct ScreenInfo;
@@ -39,17 +38,25 @@ public:
39 /// Notify rasterizer that any caches of the specified region should be flushed to 3DS memory 38 /// Notify rasterizer that any caches of the specified region should be flushed to 3DS memory
40 virtual void FlushRegion(PAddr addr, u32 size) = 0; 39 virtual void FlushRegion(PAddr addr, u32 size) = 0;
41 40
42 /// Notify rasterizer that any caches of the specified region should be flushed to 3DS memory and invalidated 41 /// Notify rasterizer that any caches of the specified region should be flushed to 3DS memory
42 /// and invalidated
43 virtual void FlushAndInvalidateRegion(PAddr addr, u32 size) = 0; 43 virtual void FlushAndInvalidateRegion(PAddr addr, u32 size) = 0;
44 44
45 /// Attempt to use a faster method to perform a display transfer 45 /// Attempt to use a faster method to perform a display transfer
46 virtual bool AccelerateDisplayTransfer(const GPU::Regs::DisplayTransferConfig& config) { return false; } 46 virtual bool AccelerateDisplayTransfer(const GPU::Regs::DisplayTransferConfig& config) {
47 return false;
48 }
47 49
48 /// Attempt to use a faster method to fill a region 50 /// Attempt to use a faster method to fill a region
49 virtual bool AccelerateFill(const GPU::Regs::MemoryFillConfig& config) { return false; } 51 virtual bool AccelerateFill(const GPU::Regs::MemoryFillConfig& config) {
52 return false;
53 }
50 54
51 /// Attempt to use a faster method to display the framebuffer to screen 55 /// Attempt to use a faster method to display the framebuffer to screen
52 virtual bool AccelerateDisplay(const GPU::Regs::FramebufferConfig& config, PAddr framebuffer_addr, u32 pixel_stride, ScreenInfo& screen_info) { return false; } 56 virtual bool AccelerateDisplay(const GPU::Regs::FramebufferConfig& config,
57 PAddr framebuffer_addr, u32 pixel_stride,
58 ScreenInfo& screen_info) {
59 return false;
60 }
53}; 61};
54
55} 62}
diff --git a/src/video_core/renderer_base.cpp b/src/video_core/renderer_base.cpp
index 3f451e062..fd38175b3 100644
--- a/src/video_core/renderer_base.cpp
+++ b/src/video_core/renderer_base.cpp
@@ -4,11 +4,10 @@
4 4
5#include <atomic> 5#include <atomic>
6#include <memory> 6#include <memory>
7
8#include "video_core/renderer_base.h" 7#include "video_core/renderer_base.h"
9#include "video_core/video_core.h"
10#include "video_core/swrasterizer.h"
11#include "video_core/renderer_opengl/gl_rasterizer.h" 8#include "video_core/renderer_opengl/gl_rasterizer.h"
9#include "video_core/swrasterizer.h"
10#include "video_core/video_core.h"
12 11
13void RendererBase::RefreshRasterizerSetting() { 12void RendererBase::RefreshRasterizerSetting() {
14 bool hw_renderer_enabled = VideoCore::g_hw_renderer_enabled; 13 bool hw_renderer_enabled = VideoCore::g_hw_renderer_enabled;
diff --git a/src/video_core/renderer_base.h b/src/video_core/renderer_base.h
index f68091cc8..589aca857 100644
--- a/src/video_core/renderer_base.h
+++ b/src/video_core/renderer_base.h
@@ -5,25 +5,17 @@
5#pragma once 5#pragma once
6 6
7#include <memory> 7#include <memory>
8
9#include "common/common_types.h" 8#include "common/common_types.h"
10
11#include "video_core/rasterizer_interface.h" 9#include "video_core/rasterizer_interface.h"
12 10
13class EmuWindow; 11class EmuWindow;
14 12
15class RendererBase : NonCopyable { 13class RendererBase : NonCopyable {
16public: 14public:
17
18 /// Used to reference a framebuffer 15 /// Used to reference a framebuffer
19 enum kFramebuffer { 16 enum kFramebuffer { kFramebuffer_VirtualXFB = 0, kFramebuffer_EFB, kFramebuffer_Texture };
20 kFramebuffer_VirtualXFB = 0,
21 kFramebuffer_EFB,
22 kFramebuffer_Texture
23 };
24 17
25 virtual ~RendererBase() { 18 virtual ~RendererBase() {}
26 }
27 19
28 /// Swap buffers (render frame) 20 /// Swap buffers (render frame)
29 virtual void SwapBuffers() = 0; 21 virtual void SwapBuffers() = 0;
@@ -59,8 +51,8 @@ public:
59 51
60protected: 52protected:
61 std::unique_ptr<VideoCore::RasterizerInterface> rasterizer; 53 std::unique_ptr<VideoCore::RasterizerInterface> rasterizer;
62 f32 m_current_fps = 0.0f; ///< Current framerate, should be set by the renderer 54 f32 m_current_fps = 0.0f; ///< Current framerate, should be set by the renderer
63 int m_current_frame = 0; ///< Current frame, should be set by the renderer 55 int m_current_frame = 0; ///< Current frame, should be set by the renderer
64 56
65private: 57private:
66 bool opengl_rasterizer_active = false; 58 bool opengl_rasterizer_active = false;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index f8393c618..60c9d9180 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -6,17 +6,13 @@
6#include <string> 6#include <string>
7#include <tuple> 7#include <tuple>
8#include <utility> 8#include <utility>
9
10#include <glad/glad.h> 9#include <glad/glad.h>
11
12#include "common/assert.h" 10#include "common/assert.h"
13#include "common/color.h" 11#include "common/color.h"
14#include "common/logging/log.h" 12#include "common/logging/log.h"
15#include "common/math_util.h" 13#include "common/math_util.h"
16#include "common/vector_math.h" 14#include "common/vector_math.h"
17
18#include "core/hw/gpu.h" 15#include "core/hw/gpu.h"
19
20#include "video_core/pica.h" 16#include "video_core/pica.h"
21#include "video_core/pica_state.h" 17#include "video_core/pica_state.h"
22#include "video_core/renderer_opengl/gl_rasterizer.h" 18#include "video_core/renderer_opengl/gl_rasterizer.h"
@@ -32,8 +28,7 @@ static bool IsPassThroughTevStage(const Pica::Regs::TevStageConfig& stage) {
32 stage.alpha_source1 == Pica::Regs::TevStageConfig::Source::Previous && 28 stage.alpha_source1 == Pica::Regs::TevStageConfig::Source::Previous &&
33 stage.color_modifier1 == Pica::Regs::TevStageConfig::ColorModifier::SourceColor && 29 stage.color_modifier1 == Pica::Regs::TevStageConfig::ColorModifier::SourceColor &&
34 stage.alpha_modifier1 == Pica::Regs::TevStageConfig::AlphaModifier::SourceAlpha && 30 stage.alpha_modifier1 == Pica::Regs::TevStageConfig::AlphaModifier::SourceAlpha &&
35 stage.GetColorMultiplier() == 1 && 31 stage.GetColorMultiplier() == 1 && stage.GetAlphaMultiplier() == 1);
36 stage.GetAlphaMultiplier() == 1);
37} 32}
38 33
39RasterizerOpenGL::RasterizerOpenGL() : shader_dirty(true) { 34RasterizerOpenGL::RasterizerOpenGL() : shader_dirty(true) {
@@ -65,26 +60,34 @@ RasterizerOpenGL::RasterizerOpenGL() : shader_dirty(true) {
65 uniform_block_data.fog_lut_dirty = true; 60 uniform_block_data.fog_lut_dirty = true;
66 61
67 // Set vertex attributes 62 // Set vertex attributes
68 glVertexAttribPointer(GLShader::ATTRIBUTE_POSITION, 4, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, position)); 63 glVertexAttribPointer(GLShader::ATTRIBUTE_POSITION, 4, GL_FLOAT, GL_FALSE,
64 sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, position));
69 glEnableVertexAttribArray(GLShader::ATTRIBUTE_POSITION); 65 glEnableVertexAttribArray(GLShader::ATTRIBUTE_POSITION);
70 66
71 glVertexAttribPointer(GLShader::ATTRIBUTE_COLOR, 4, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, color)); 67 glVertexAttribPointer(GLShader::ATTRIBUTE_COLOR, 4, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex),
68 (GLvoid*)offsetof(HardwareVertex, color));
72 glEnableVertexAttribArray(GLShader::ATTRIBUTE_COLOR); 69 glEnableVertexAttribArray(GLShader::ATTRIBUTE_COLOR);
73 70
74 glVertexAttribPointer(GLShader::ATTRIBUTE_TEXCOORD0, 2, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, tex_coord0)); 71 glVertexAttribPointer(GLShader::ATTRIBUTE_TEXCOORD0, 2, GL_FLOAT, GL_FALSE,
75 glVertexAttribPointer(GLShader::ATTRIBUTE_TEXCOORD1, 2, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, tex_coord1)); 72 sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, tex_coord0));
76 glVertexAttribPointer(GLShader::ATTRIBUTE_TEXCOORD2, 2, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, tex_coord2)); 73 glVertexAttribPointer(GLShader::ATTRIBUTE_TEXCOORD1, 2, GL_FLOAT, GL_FALSE,
74 sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, tex_coord1));
75 glVertexAttribPointer(GLShader::ATTRIBUTE_TEXCOORD2, 2, GL_FLOAT, GL_FALSE,
76 sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, tex_coord2));
77 glEnableVertexAttribArray(GLShader::ATTRIBUTE_TEXCOORD0); 77 glEnableVertexAttribArray(GLShader::ATTRIBUTE_TEXCOORD0);
78 glEnableVertexAttribArray(GLShader::ATTRIBUTE_TEXCOORD1); 78 glEnableVertexAttribArray(GLShader::ATTRIBUTE_TEXCOORD1);
79 glEnableVertexAttribArray(GLShader::ATTRIBUTE_TEXCOORD2); 79 glEnableVertexAttribArray(GLShader::ATTRIBUTE_TEXCOORD2);
80 80
81 glVertexAttribPointer(GLShader::ATTRIBUTE_TEXCOORD0_W, 1, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, tex_coord0_w)); 81 glVertexAttribPointer(GLShader::ATTRIBUTE_TEXCOORD0_W, 1, GL_FLOAT, GL_FALSE,
82 sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, tex_coord0_w));
82 glEnableVertexAttribArray(GLShader::ATTRIBUTE_TEXCOORD0_W); 83 glEnableVertexAttribArray(GLShader::ATTRIBUTE_TEXCOORD0_W);
83 84
84 glVertexAttribPointer(GLShader::ATTRIBUTE_NORMQUAT, 4, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, normquat)); 85 glVertexAttribPointer(GLShader::ATTRIBUTE_NORMQUAT, 4, GL_FLOAT, GL_FALSE,
86 sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, normquat));
85 glEnableVertexAttribArray(GLShader::ATTRIBUTE_NORMQUAT); 87 glEnableVertexAttribArray(GLShader::ATTRIBUTE_NORMQUAT);
86 88
87 glVertexAttribPointer(GLShader::ATTRIBUTE_VIEW, 3, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, view)); 89 glVertexAttribPointer(GLShader::ATTRIBUTE_VIEW, 3, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex),
90 (GLvoid*)offsetof(HardwareVertex, view));
88 glEnableVertexAttribArray(GLShader::ATTRIBUTE_VIEW); 91 glEnableVertexAttribArray(GLShader::ATTRIBUTE_VIEW);
89 92
90 // Create render framebuffer 93 // Create render framebuffer
@@ -129,9 +132,7 @@ RasterizerOpenGL::RasterizerOpenGL() : shader_dirty(true) {
129 SyncDepthWriteMask(); 132 SyncDepthWriteMask();
130} 133}
131 134
132RasterizerOpenGL::~RasterizerOpenGL() { 135RasterizerOpenGL::~RasterizerOpenGL() {}
133
134}
135 136
136/** 137/**
137 * This is a helper function to resolve an issue with opposite quaternions being interpolated by 138 * This is a helper function to resolve an issue with opposite quaternions being interpolated by
@@ -149,8 +150,8 @@ RasterizerOpenGL::~RasterizerOpenGL() {
149 * manually using two Lerps, and doing this correction before each Lerp. 150 * manually using two Lerps, and doing this correction before each Lerp.
150 */ 151 */
151static bool AreQuaternionsOpposite(Math::Vec4<Pica::float24> qa, Math::Vec4<Pica::float24> qb) { 152static bool AreQuaternionsOpposite(Math::Vec4<Pica::float24> qa, Math::Vec4<Pica::float24> qb) {
152 Math::Vec4f a{ qa.x.ToFloat32(), qa.y.ToFloat32(), qa.z.ToFloat32(), qa.w.ToFloat32() }; 153 Math::Vec4f a{qa.x.ToFloat32(), qa.y.ToFloat32(), qa.z.ToFloat32(), qa.w.ToFloat32()};
153 Math::Vec4f b{ qb.x.ToFloat32(), qb.y.ToFloat32(), qb.z.ToFloat32(), qb.w.ToFloat32() }; 154 Math::Vec4f b{qb.x.ToFloat32(), qb.y.ToFloat32(), qb.z.ToFloat32(), qb.w.ToFloat32()};
154 155
155 return (Math::Dot(a, b) < 0.f); 156 return (Math::Dot(a, b) < 0.f);
156} 157}
@@ -173,15 +174,20 @@ void RasterizerOpenGL::DrawTriangles() {
173 CachedSurface* color_surface; 174 CachedSurface* color_surface;
174 CachedSurface* depth_surface; 175 CachedSurface* depth_surface;
175 MathUtil::Rectangle<int> rect; 176 MathUtil::Rectangle<int> rect;
176 std::tie(color_surface, depth_surface, rect) = res_cache.GetFramebufferSurfaces(regs.framebuffer); 177 std::tie(color_surface, depth_surface, rect) =
178 res_cache.GetFramebufferSurfaces(regs.framebuffer);
177 179
178 state.draw.draw_framebuffer = framebuffer.handle; 180 state.draw.draw_framebuffer = framebuffer.handle;
179 state.Apply(); 181 state.Apply();
180 182
181 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, color_surface != nullptr ? color_surface->texture.handle : 0, 0); 183 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D,
182 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, depth_surface != nullptr ? depth_surface->texture.handle : 0, 0); 184 color_surface != nullptr ? color_surface->texture.handle : 0, 0);
185 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D,
186 depth_surface != nullptr ? depth_surface->texture.handle : 0, 0);
183 bool has_stencil = regs.framebuffer.depth_format == Pica::Regs::DepthFormat::D24S8; 187 bool has_stencil = regs.framebuffer.depth_format == Pica::Regs::DepthFormat::D24S8;
184 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, (has_stencil && depth_surface != nullptr) ? depth_surface->texture.handle : 0, 0); 188 glFramebufferTexture2D(
189 GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
190 (has_stencil && depth_surface != nullptr) ? depth_surface->texture.handle : 0, 0);
185 191
186 if (OpenGLState::CheckFBStatus(GL_DRAW_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) { 192 if (OpenGLState::CheckFBStatus(GL_DRAW_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) {
187 return; 193 return;
@@ -194,7 +200,8 @@ void RasterizerOpenGL::DrawTriangles() {
194 200
195 glViewport((GLint)(rect.left + regs.viewport_corner.x * color_surface->res_scale_width), 201 glViewport((GLint)(rect.left + regs.viewport_corner.x * color_surface->res_scale_width),
196 (GLint)(rect.bottom + regs.viewport_corner.y * color_surface->res_scale_height), 202 (GLint)(rect.bottom + regs.viewport_corner.y * color_surface->res_scale_height),
197 (GLsizei)(viewport_width * color_surface->res_scale_width), (GLsizei)(viewport_height * color_surface->res_scale_height)); 203 (GLsizei)(viewport_width * color_surface->res_scale_width),
204 (GLsizei)(viewport_height * color_surface->res_scale_height));
198 205
199 if (uniform_block_data.data.framebuffer_scale[0] != color_surface->res_scale_width || 206 if (uniform_block_data.data.framebuffer_scale[0] != color_surface->res_scale_width ||
200 uniform_block_data.data.framebuffer_scale[1] != color_surface->res_scale_height) { 207 uniform_block_data.data.framebuffer_scale[1] != color_surface->res_scale_height) {
@@ -245,14 +252,16 @@ void RasterizerOpenGL::DrawTriangles() {
245 252
246 // Sync the uniform data 253 // Sync the uniform data
247 if (uniform_block_data.dirty) { 254 if (uniform_block_data.dirty) {
248 glBufferData(GL_UNIFORM_BUFFER, sizeof(UniformData), &uniform_block_data.data, GL_STATIC_DRAW); 255 glBufferData(GL_UNIFORM_BUFFER, sizeof(UniformData), &uniform_block_data.data,
256 GL_STATIC_DRAW);
249 uniform_block_data.dirty = false; 257 uniform_block_data.dirty = false;
250 } 258 }
251 259
252 state.Apply(); 260 state.Apply();
253 261
254 // Draw the vertex batch 262 // Draw the vertex batch
255 glBufferData(GL_ARRAY_BUFFER, vertex_batch.size() * sizeof(HardwareVertex), vertex_batch.data(), GL_STREAM_DRAW); 263 glBufferData(GL_ARRAY_BUFFER, vertex_batch.size() * sizeof(HardwareVertex), vertex_batch.data(),
264 GL_STREAM_DRAW);
256 glDrawArrays(GL_TRIANGLES, 0, (GLsizei)vertex_batch.size()); 265 glDrawArrays(GL_TRIANGLES, 0, (GLsizei)vertex_batch.size());
257 266
258 // Mark framebuffer surfaces as dirty 267 // Mark framebuffer surfaces as dirty
@@ -278,7 +287,7 @@ void RasterizerOpenGL::DrawTriangles() {
278void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) { 287void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) {
279 const auto& regs = Pica::g_state.regs; 288 const auto& regs = Pica::g_state.regs;
280 289
281 switch(id) { 290 switch (id) {
282 // Culling 291 // Culling
283 case PICA_REG_INDEX(cull_mode): 292 case PICA_REG_INDEX(cull_mode):
284 SyncCullMode(); 293 SyncCullMode();
@@ -548,7 +557,7 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) {
548 SyncLightAmbient(7); 557 SyncLightAmbient(7);
549 break; 558 break;
550 559
551 // Fragment lighting position 560 // Fragment lighting position
552 case PICA_REG_INDEX_WORKAROUND(lighting.light[0].x, 0x144 + 0 * 0x10): 561 case PICA_REG_INDEX_WORKAROUND(lighting.light[0].x, 0x144 + 0 * 0x10):
553 case PICA_REG_INDEX_WORKAROUND(lighting.light[0].z, 0x145 + 0 * 0x10): 562 case PICA_REG_INDEX_WORKAROUND(lighting.light[0].z, 0x145 + 0 * 0x10):
554 SyncLightPosition(0); 563 SyncLightPosition(0);
@@ -659,13 +668,11 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) {
659 case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[4], 0x1cc): 668 case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[4], 0x1cc):
660 case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[5], 0x1cd): 669 case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[5], 0x1cd):
661 case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[6], 0x1ce): 670 case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[6], 0x1ce):
662 case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[7], 0x1cf): 671 case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[7], 0x1cf): {
663 {
664 auto& lut_config = regs.lighting.lut_config; 672 auto& lut_config = regs.lighting.lut_config;
665 uniform_block_data.lut_dirty[lut_config.type / 4] = true; 673 uniform_block_data.lut_dirty[lut_config.type / 4] = true;
666 break; 674 break;
667 } 675 }
668
669 } 676 }
670} 677}
671 678
@@ -699,8 +706,10 @@ bool RasterizerOpenGL::AccelerateDisplayTransfer(const GPU::Regs::DisplayTransfe
699 706
700 CachedSurface dst_params; 707 CachedSurface dst_params;
701 dst_params.addr = config.GetPhysicalOutputAddress(); 708 dst_params.addr = config.GetPhysicalOutputAddress();
702 dst_params.width = config.scaling != config.NoScale ? config.output_width / 2 : config.output_width.Value(); 709 dst_params.width =
703 dst_params.height = config.scaling == config.ScaleXY ? config.output_height / 2 : config.output_height.Value(); 710 config.scaling != config.NoScale ? config.output_width / 2 : config.output_width.Value();
711 dst_params.height =
712 config.scaling == config.ScaleXY ? config.output_height / 2 : config.output_height.Value();
704 dst_params.is_tiled = config.input_linear != config.dont_swizzle; 713 dst_params.is_tiled = config.input_linear != config.dont_swizzle;
705 dst_params.pixel_format = CachedSurface::PixelFormatFromGPUPixelFormat(config.output_format); 714 dst_params.pixel_format = CachedSurface::PixelFormatFromGPUPixelFormat(config.output_format);
706 715
@@ -735,7 +744,8 @@ bool RasterizerOpenGL::AccelerateDisplayTransfer(const GPU::Regs::DisplayTransfe
735 return false; 744 return false;
736 } 745 }
737 746
738 u32 dst_size = dst_params.width * dst_params.height * CachedSurface::GetFormatBpp(dst_params.pixel_format) / 8; 747 u32 dst_size = dst_params.width * dst_params.height *
748 CachedSurface::GetFormatBpp(dst_params.pixel_format) / 8;
739 dst_surface->dirty = true; 749 dst_surface->dirty = true;
740 res_cache.FlushRegion(config.GetPhysicalOutputAddress(), dst_size, dst_surface, true); 750 res_cache.FlushRegion(config.GetPhysicalOutputAddress(), dst_size, dst_surface, true);
741 return true; 751 return true;
@@ -757,12 +767,15 @@ bool RasterizerOpenGL::AccelerateFill(const GPU::Regs::MemoryFillConfig& config)
757 767
758 GLuint old_fb = cur_state.draw.draw_framebuffer; 768 GLuint old_fb = cur_state.draw.draw_framebuffer;
759 cur_state.draw.draw_framebuffer = framebuffer.handle; 769 cur_state.draw.draw_framebuffer = framebuffer.handle;
760 // TODO: When scissor test is implemented, need to disable scissor test in cur_state here so Clear call isn't affected 770 // TODO: When scissor test is implemented, need to disable scissor test in cur_state here so
771 // Clear call isn't affected
761 cur_state.Apply(); 772 cur_state.Apply();
762 773
763 if (dst_type == SurfaceType::Color || dst_type == SurfaceType::Texture) { 774 if (dst_type == SurfaceType::Color || dst_type == SurfaceType::Texture) {
764 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, dst_surface->texture.handle, 0); 775 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D,
765 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); 776 dst_surface->texture.handle, 0);
777 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0,
778 0);
766 779
767 if (OpenGLState::CheckFBStatus(GL_DRAW_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) { 780 if (OpenGLState::CheckFBStatus(GL_DRAW_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) {
768 return false; 781 return false;
@@ -770,8 +783,10 @@ bool RasterizerOpenGL::AccelerateFill(const GPU::Regs::MemoryFillConfig& config)
770 783
771 GLfloat color_values[4] = {0.0f, 0.0f, 0.0f, 0.0f}; 784 GLfloat color_values[4] = {0.0f, 0.0f, 0.0f, 0.0f};
772 785
773 // TODO: Handle additional pixel format and fill value size combinations to accelerate more cases 786 // TODO: Handle additional pixel format and fill value size combinations to accelerate more
774 // For instance, checking if fill value's bytes/bits repeat to allow filling I8/A8/I4/A4/... 787 // cases
788 // For instance, checking if fill value's bytes/bits repeat to allow filling
789 // I8/A8/I4/A4/...
775 // Currently only handles formats that are multiples of the fill value size 790 // Currently only handles formats that are multiples of the fill value size
776 791
777 if (config.fill_24bit) { 792 if (config.fill_24bit) {
@@ -846,7 +861,8 @@ bool RasterizerOpenGL::AccelerateFill(const GPU::Regs::MemoryFillConfig& config)
846 glClearBufferfv(GL_COLOR, 0, color_values); 861 glClearBufferfv(GL_COLOR, 0, color_values);
847 } else if (dst_type == SurfaceType::Depth) { 862 } else if (dst_type == SurfaceType::Depth) {
848 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); 863 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
849 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, dst_surface->texture.handle, 0); 864 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D,
865 dst_surface->texture.handle, 0);
850 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); 866 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
851 867
852 if (OpenGLState::CheckFBStatus(GL_DRAW_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) { 868 if (OpenGLState::CheckFBStatus(GL_DRAW_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) {
@@ -865,7 +881,8 @@ bool RasterizerOpenGL::AccelerateFill(const GPU::Regs::MemoryFillConfig& config)
865 glClearBufferfv(GL_DEPTH, 0, &value_float); 881 glClearBufferfv(GL_DEPTH, 0, &value_float);
866 } else if (dst_type == SurfaceType::DepthStencil) { 882 } else if (dst_type == SurfaceType::DepthStencil) {
867 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); 883 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
868 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, dst_surface->texture.handle, 0); 884 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
885 dst_surface->texture.handle, 0);
869 886
870 if (OpenGLState::CheckFBStatus(GL_DRAW_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) { 887 if (OpenGLState::CheckFBStatus(GL_DRAW_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) {
871 return false; 888 return false;
@@ -889,7 +906,9 @@ bool RasterizerOpenGL::AccelerateFill(const GPU::Regs::MemoryFillConfig& config)
889 return true; 906 return true;
890} 907}
891 908
892bool RasterizerOpenGL::AccelerateDisplay(const GPU::Regs::FramebufferConfig& config, PAddr framebuffer_addr, u32 pixel_stride, ScreenInfo& screen_info) { 909bool RasterizerOpenGL::AccelerateDisplay(const GPU::Regs::FramebufferConfig& config,
910 PAddr framebuffer_addr, u32 pixel_stride,
911 ScreenInfo& screen_info) {
893 if (framebuffer_addr == 0) { 912 if (framebuffer_addr == 0) {
894 return false; 913 return false;
895 } 914 }
@@ -912,10 +931,9 @@ bool RasterizerOpenGL::AccelerateDisplay(const GPU::Regs::FramebufferConfig& con
912 u32 scaled_width = src_surface->GetScaledWidth(); 931 u32 scaled_width = src_surface->GetScaledWidth();
913 u32 scaled_height = src_surface->GetScaledHeight(); 932 u32 scaled_height = src_surface->GetScaledHeight();
914 933
915 screen_info.display_texcoords = MathUtil::Rectangle<float>((float)src_rect.top / (float)scaled_height, 934 screen_info.display_texcoords = MathUtil::Rectangle<float>(
916 (float)src_rect.left / (float)scaled_width, 935 (float)src_rect.top / (float)scaled_height, (float)src_rect.left / (float)scaled_width,
917 (float)src_rect.bottom / (float)scaled_height, 936 (float)src_rect.bottom / (float)scaled_height, (float)src_rect.right / (float)scaled_width);
918 (float)src_rect.right / (float)scaled_width);
919 937
920 screen_info.display_texture = src_surface->texture.handle; 938 screen_info.display_texture = src_surface->texture.handle;
921 939
@@ -928,7 +946,8 @@ void RasterizerOpenGL::SamplerInfo::Create() {
928 wrap_s = wrap_t = TextureConfig::Repeat; 946 wrap_s = wrap_t = TextureConfig::Repeat;
929 border_color = 0; 947 border_color = 0;
930 948
931 glSamplerParameteri(sampler.handle, GL_TEXTURE_MIN_FILTER, GL_LINEAR); // default is GL_LINEAR_MIPMAP_LINEAR 949 glSamplerParameteri(sampler.handle, GL_TEXTURE_MIN_FILTER,
950 GL_LINEAR); // default is GL_LINEAR_MIPMAP_LINEAR
932 // Other attributes have correct defaults 951 // Other attributes have correct defaults
933} 952}
934 953
@@ -976,41 +995,64 @@ void RasterizerOpenGL::SetShader() {
976 } else { 995 } else {
977 LOG_DEBUG(Render_OpenGL, "Creating new shader"); 996 LOG_DEBUG(Render_OpenGL, "Creating new shader");
978 997
979 shader->shader.Create(GLShader::GenerateVertexShader().c_str(), GLShader::GenerateFragmentShader(config).c_str()); 998 shader->shader.Create(GLShader::GenerateVertexShader().c_str(),
999 GLShader::GenerateFragmentShader(config).c_str());
980 1000
981 state.draw.shader_program = shader->shader.handle; 1001 state.draw.shader_program = shader->shader.handle;
982 state.Apply(); 1002 state.Apply();
983 1003
984 // Set the texture samplers to correspond to different texture units 1004 // Set the texture samplers to correspond to different texture units
985 GLuint uniform_tex = glGetUniformLocation(shader->shader.handle, "tex[0]"); 1005 GLuint uniform_tex = glGetUniformLocation(shader->shader.handle, "tex[0]");
986 if (uniform_tex != -1) { glUniform1i(uniform_tex, 0); } 1006 if (uniform_tex != -1) {
1007 glUniform1i(uniform_tex, 0);
1008 }
987 uniform_tex = glGetUniformLocation(shader->shader.handle, "tex[1]"); 1009 uniform_tex = glGetUniformLocation(shader->shader.handle, "tex[1]");
988 if (uniform_tex != -1) { glUniform1i(uniform_tex, 1); } 1010 if (uniform_tex != -1) {
1011 glUniform1i(uniform_tex, 1);
1012 }
989 uniform_tex = glGetUniformLocation(shader->shader.handle, "tex[2]"); 1013 uniform_tex = glGetUniformLocation(shader->shader.handle, "tex[2]");
990 if (uniform_tex != -1) { glUniform1i(uniform_tex, 2); } 1014 if (uniform_tex != -1) {
1015 glUniform1i(uniform_tex, 2);
1016 }
991 1017
992 // Set the texture samplers to correspond to different lookup table texture units 1018 // Set the texture samplers to correspond to different lookup table texture units
993 GLuint uniform_lut = glGetUniformLocation(shader->shader.handle, "lut[0]"); 1019 GLuint uniform_lut = glGetUniformLocation(shader->shader.handle, "lut[0]");
994 if (uniform_lut != -1) { glUniform1i(uniform_lut, 3); } 1020 if (uniform_lut != -1) {
1021 glUniform1i(uniform_lut, 3);
1022 }
995 uniform_lut = glGetUniformLocation(shader->shader.handle, "lut[1]"); 1023 uniform_lut = glGetUniformLocation(shader->shader.handle, "lut[1]");
996 if (uniform_lut != -1) { glUniform1i(uniform_lut, 4); } 1024 if (uniform_lut != -1) {
1025 glUniform1i(uniform_lut, 4);
1026 }
997 uniform_lut = glGetUniformLocation(shader->shader.handle, "lut[2]"); 1027 uniform_lut = glGetUniformLocation(shader->shader.handle, "lut[2]");
998 if (uniform_lut != -1) { glUniform1i(uniform_lut, 5); } 1028 if (uniform_lut != -1) {
1029 glUniform1i(uniform_lut, 5);
1030 }
999 uniform_lut = glGetUniformLocation(shader->shader.handle, "lut[3]"); 1031 uniform_lut = glGetUniformLocation(shader->shader.handle, "lut[3]");
1000 if (uniform_lut != -1) { glUniform1i(uniform_lut, 6); } 1032 if (uniform_lut != -1) {
1033 glUniform1i(uniform_lut, 6);
1034 }
1001 uniform_lut = glGetUniformLocation(shader->shader.handle, "lut[4]"); 1035 uniform_lut = glGetUniformLocation(shader->shader.handle, "lut[4]");
1002 if (uniform_lut != -1) { glUniform1i(uniform_lut, 7); } 1036 if (uniform_lut != -1) {
1037 glUniform1i(uniform_lut, 7);
1038 }
1003 uniform_lut = glGetUniformLocation(shader->shader.handle, "lut[5]"); 1039 uniform_lut = glGetUniformLocation(shader->shader.handle, "lut[5]");
1004 if (uniform_lut != -1) { glUniform1i(uniform_lut, 8); } 1040 if (uniform_lut != -1) {
1041 glUniform1i(uniform_lut, 8);
1042 }
1005 1043
1006 GLuint uniform_fog_lut = glGetUniformLocation(shader->shader.handle, "fog_lut"); 1044 GLuint uniform_fog_lut = glGetUniformLocation(shader->shader.handle, "fog_lut");
1007 if (uniform_fog_lut != -1) { glUniform1i(uniform_fog_lut, 9); } 1045 if (uniform_fog_lut != -1) {
1046 glUniform1i(uniform_fog_lut, 9);
1047 }
1008 1048
1009 current_shader = shader_cache.emplace(config, std::move(shader)).first->second.get(); 1049 current_shader = shader_cache.emplace(config, std::move(shader)).first->second.get();
1010 1050
1011 unsigned int block_index = glGetUniformBlockIndex(current_shader->shader.handle, "shader_data"); 1051 unsigned int block_index =
1052 glGetUniformBlockIndex(current_shader->shader.handle, "shader_data");
1012 GLint block_size; 1053 GLint block_size;
1013 glGetActiveUniformBlockiv(current_shader->shader.handle, block_index, GL_UNIFORM_BLOCK_DATA_SIZE, &block_size); 1054 glGetActiveUniformBlockiv(current_shader->shader.handle, block_index,
1055 GL_UNIFORM_BLOCK_DATA_SIZE, &block_size);
1014 ASSERT_MSG(block_size == sizeof(UniformData), "Uniform block size did not match!"); 1056 ASSERT_MSG(block_size == sizeof(UniformData), "Uniform block size did not match!");
1015 glUniformBlockBinding(current_shader->shader.handle, block_index, 0); 1057 glUniformBlockBinding(current_shader->shader.handle, block_index, 0);
1016 1058
@@ -1073,7 +1115,8 @@ void RasterizerOpenGL::SyncDepthScale() {
1073} 1115}
1074 1116
1075void RasterizerOpenGL::SyncDepthOffset() { 1117void RasterizerOpenGL::SyncDepthOffset() {
1076 float depth_offset = Pica::float24::FromRaw(Pica::g_state.regs.viewport_depth_near_plane).ToFloat32(); 1118 float depth_offset =
1119 Pica::float24::FromRaw(Pica::g_state.regs.viewport_depth_near_plane).ToFloat32();
1077 if (depth_offset != uniform_block_data.data.depth_offset) { 1120 if (depth_offset != uniform_block_data.data.depth_offset) {
1078 uniform_block_data.data.depth_offset = depth_offset; 1121 uniform_block_data.data.depth_offset = depth_offset;
1079 uniform_block_data.dirty = true; 1122 uniform_block_data.dirty = true;
@@ -1086,10 +1129,14 @@ void RasterizerOpenGL::SyncBlendEnabled() {
1086 1129
1087void RasterizerOpenGL::SyncBlendFuncs() { 1130void RasterizerOpenGL::SyncBlendFuncs() {
1088 const auto& regs = Pica::g_state.regs; 1131 const auto& regs = Pica::g_state.regs;
1089 state.blend.rgb_equation = PicaToGL::BlendEquation(regs.output_merger.alpha_blending.blend_equation_rgb); 1132 state.blend.rgb_equation =
1090 state.blend.a_equation = PicaToGL::BlendEquation(regs.output_merger.alpha_blending.blend_equation_a); 1133 PicaToGL::BlendEquation(regs.output_merger.alpha_blending.blend_equation_rgb);
1091 state.blend.src_rgb_func = PicaToGL::BlendFunc(regs.output_merger.alpha_blending.factor_source_rgb); 1134 state.blend.a_equation =
1092 state.blend.dst_rgb_func = PicaToGL::BlendFunc(regs.output_merger.alpha_blending.factor_dest_rgb); 1135 PicaToGL::BlendEquation(regs.output_merger.alpha_blending.blend_equation_a);
1136 state.blend.src_rgb_func =
1137 PicaToGL::BlendFunc(regs.output_merger.alpha_blending.factor_source_rgb);
1138 state.blend.dst_rgb_func =
1139 PicaToGL::BlendFunc(regs.output_merger.alpha_blending.factor_dest_rgb);
1093 state.blend.src_a_func = PicaToGL::BlendFunc(regs.output_merger.alpha_blending.factor_source_a); 1140 state.blend.src_a_func = PicaToGL::BlendFunc(regs.output_merger.alpha_blending.factor_source_a);
1094 state.blend.dst_a_func = PicaToGL::BlendFunc(regs.output_merger.alpha_blending.factor_dest_a); 1141 state.blend.dst_a_func = PicaToGL::BlendFunc(regs.output_merger.alpha_blending.factor_dest_a);
1095} 1142}
@@ -1105,9 +1152,8 @@ void RasterizerOpenGL::SyncBlendColor() {
1105void RasterizerOpenGL::SyncFogColor() { 1152void RasterizerOpenGL::SyncFogColor() {
1106 const auto& regs = Pica::g_state.regs; 1153 const auto& regs = Pica::g_state.regs;
1107 uniform_block_data.data.fog_color = { 1154 uniform_block_data.data.fog_color = {
1108 regs.fog_color.r.Value() / 255.0f, 1155 regs.fog_color.r.Value() / 255.0f, regs.fog_color.g.Value() / 255.0f,
1109 regs.fog_color.g.Value() / 255.0f, 1156 regs.fog_color.b.Value() / 255.0f,
1110 regs.fog_color.b.Value() / 255.0f
1111 }; 1157 };
1112 uniform_block_data.dirty = true; 1158 uniform_block_data.dirty = true;
1113} 1159}
@@ -1115,14 +1161,14 @@ void RasterizerOpenGL::SyncFogColor() {
1115void RasterizerOpenGL::SyncFogLUT() { 1161void RasterizerOpenGL::SyncFogLUT() {
1116 std::array<GLuint, 128> new_data; 1162 std::array<GLuint, 128> new_data;
1117 1163
1118 std::transform(Pica::g_state.fog.lut.begin(), Pica::g_state.fog.lut.end(), new_data.begin(), [](const auto& entry) { 1164 std::transform(Pica::g_state.fog.lut.begin(), Pica::g_state.fog.lut.end(), new_data.begin(),
1119 return entry.raw; 1165 [](const auto& entry) { return entry.raw; });
1120 });
1121 1166
1122 if (new_data != fog_lut_data) { 1167 if (new_data != fog_lut_data) {
1123 fog_lut_data = new_data; 1168 fog_lut_data = new_data;
1124 glActiveTexture(GL_TEXTURE9); 1169 glActiveTexture(GL_TEXTURE9);
1125 glTexSubImage1D(GL_TEXTURE_1D, 0, 0, 128, GL_RED_INTEGER, GL_UNSIGNED_INT, fog_lut_data.data()); 1170 glTexSubImage1D(GL_TEXTURE_1D, 0, 0, 128, GL_RED_INTEGER, GL_UNSIGNED_INT,
1171 fog_lut_data.data());
1126 } 1172 }
1127} 1173}
1128 1174
@@ -1154,34 +1200,40 @@ void RasterizerOpenGL::SyncColorWriteMask() {
1154void RasterizerOpenGL::SyncStencilWriteMask() { 1200void RasterizerOpenGL::SyncStencilWriteMask() {
1155 const auto& regs = Pica::g_state.regs; 1201 const auto& regs = Pica::g_state.regs;
1156 state.stencil.write_mask = (regs.framebuffer.allow_depth_stencil_write != 0) 1202 state.stencil.write_mask = (regs.framebuffer.allow_depth_stencil_write != 0)
1157 ? static_cast<GLuint>(regs.output_merger.stencil_test.write_mask) 1203 ? static_cast<GLuint>(regs.output_merger.stencil_test.write_mask)
1158 : 0; 1204 : 0;
1159} 1205}
1160 1206
1161void RasterizerOpenGL::SyncDepthWriteMask() { 1207void RasterizerOpenGL::SyncDepthWriteMask() {
1162 const auto& regs = Pica::g_state.regs; 1208 const auto& regs = Pica::g_state.regs;
1163 state.depth.write_mask = (regs.framebuffer.allow_depth_stencil_write != 0 && regs.output_merger.depth_write_enable) 1209 state.depth.write_mask =
1164 ? GL_TRUE 1210 (regs.framebuffer.allow_depth_stencil_write != 0 && regs.output_merger.depth_write_enable)
1165 : GL_FALSE; 1211 ? GL_TRUE
1212 : GL_FALSE;
1166} 1213}
1167 1214
1168void RasterizerOpenGL::SyncStencilTest() { 1215void RasterizerOpenGL::SyncStencilTest() {
1169 const auto& regs = Pica::g_state.regs; 1216 const auto& regs = Pica::g_state.regs;
1170 state.stencil.test_enabled = regs.output_merger.stencil_test.enable && regs.framebuffer.depth_format == Pica::Regs::DepthFormat::D24S8; 1217 state.stencil.test_enabled = regs.output_merger.stencil_test.enable &&
1218 regs.framebuffer.depth_format == Pica::Regs::DepthFormat::D24S8;
1171 state.stencil.test_func = PicaToGL::CompareFunc(regs.output_merger.stencil_test.func); 1219 state.stencil.test_func = PicaToGL::CompareFunc(regs.output_merger.stencil_test.func);
1172 state.stencil.test_ref = regs.output_merger.stencil_test.reference_value; 1220 state.stencil.test_ref = regs.output_merger.stencil_test.reference_value;
1173 state.stencil.test_mask = regs.output_merger.stencil_test.input_mask; 1221 state.stencil.test_mask = regs.output_merger.stencil_test.input_mask;
1174 state.stencil.action_stencil_fail = PicaToGL::StencilOp(regs.output_merger.stencil_test.action_stencil_fail); 1222 state.stencil.action_stencil_fail =
1175 state.stencil.action_depth_fail = PicaToGL::StencilOp(regs.output_merger.stencil_test.action_depth_fail); 1223 PicaToGL::StencilOp(regs.output_merger.stencil_test.action_stencil_fail);
1176 state.stencil.action_depth_pass = PicaToGL::StencilOp(regs.output_merger.stencil_test.action_depth_pass); 1224 state.stencil.action_depth_fail =
1225 PicaToGL::StencilOp(regs.output_merger.stencil_test.action_depth_fail);
1226 state.stencil.action_depth_pass =
1227 PicaToGL::StencilOp(regs.output_merger.stencil_test.action_depth_pass);
1177} 1228}
1178 1229
1179void RasterizerOpenGL::SyncDepthTest() { 1230void RasterizerOpenGL::SyncDepthTest() {
1180 const auto& regs = Pica::g_state.regs; 1231 const auto& regs = Pica::g_state.regs;
1181 state.depth.test_enabled = regs.output_merger.depth_test_enable == 1 || 1232 state.depth.test_enabled =
1182 regs.output_merger.depth_write_enable == 1; 1233 regs.output_merger.depth_test_enable == 1 || regs.output_merger.depth_write_enable == 1;
1183 state.depth.test_func = regs.output_merger.depth_test_enable == 1 ? 1234 state.depth.test_func = regs.output_merger.depth_test_enable == 1
1184 PicaToGL::CompareFunc(regs.output_merger.depth_test_func) : GL_ALWAYS; 1235 ? PicaToGL::CompareFunc(regs.output_merger.depth_test_func)
1236 : GL_ALWAYS;
1185} 1237}
1186 1238
1187void RasterizerOpenGL::SyncScissorTest() { 1239void RasterizerOpenGL::SyncScissorTest() {
@@ -1208,7 +1260,8 @@ void RasterizerOpenGL::SyncCombinerColor() {
1208 } 1260 }
1209} 1261}
1210 1262
1211void RasterizerOpenGL::SyncTevConstColor(int stage_index, const Pica::Regs::TevStageConfig& tev_stage) { 1263void RasterizerOpenGL::SyncTevConstColor(int stage_index,
1264 const Pica::Regs::TevStageConfig& tev_stage) {
1212 auto const_color = PicaToGL::ColorRGBA8(tev_stage.const_color); 1265 auto const_color = PicaToGL::ColorRGBA8(tev_stage.const_color);
1213 if (const_color != uniform_block_data.data.const_color[stage_index]) { 1266 if (const_color != uniform_block_data.data.const_color[stage_index]) {
1214 uniform_block_data.data.const_color[stage_index] = const_color; 1267 uniform_block_data.data.const_color[stage_index] = const_color;
@@ -1237,7 +1290,8 @@ void RasterizerOpenGL::SyncLightingLUT(unsigned lut_index) {
1237 if (new_data != lighting_lut_data[lut_index]) { 1290 if (new_data != lighting_lut_data[lut_index]) {
1238 lighting_lut_data[lut_index] = new_data; 1291 lighting_lut_data[lut_index] = new_data;
1239 glActiveTexture(GL_TEXTURE3 + lut_index); 1292 glActiveTexture(GL_TEXTURE3 + lut_index);
1240 glTexSubImage1D(GL_TEXTURE_1D, 0, 0, 256, GL_RGBA, GL_FLOAT, lighting_lut_data[lut_index].data()); 1293 glTexSubImage1D(GL_TEXTURE_1D, 0, 0, 256, GL_RGBA, GL_FLOAT,
1294 lighting_lut_data[lut_index].data());
1241 } 1295 }
1242} 1296}
1243 1297
@@ -1277,7 +1331,7 @@ void RasterizerOpenGL::SyncLightPosition(int light_index) {
1277 GLvec3 position = { 1331 GLvec3 position = {
1278 Pica::float16::FromRaw(Pica::g_state.regs.lighting.light[light_index].x).ToFloat32(), 1332 Pica::float16::FromRaw(Pica::g_state.regs.lighting.light[light_index].x).ToFloat32(),
1279 Pica::float16::FromRaw(Pica::g_state.regs.lighting.light[light_index].y).ToFloat32(), 1333 Pica::float16::FromRaw(Pica::g_state.regs.lighting.light[light_index].y).ToFloat32(),
1280 Pica::float16::FromRaw(Pica::g_state.regs.lighting.light[light_index].z).ToFloat32() }; 1334 Pica::float16::FromRaw(Pica::g_state.regs.lighting.light[light_index].z).ToFloat32()};
1281 1335
1282 if (position != uniform_block_data.data.light_src[light_index].position) { 1336 if (position != uniform_block_data.data.light_src[light_index].position) {
1283 uniform_block_data.data.light_src[light_index].position = position; 1337 uniform_block_data.data.light_src[light_index].position = position;
@@ -1286,7 +1340,9 @@ void RasterizerOpenGL::SyncLightPosition(int light_index) {
1286} 1340}
1287 1341
1288void RasterizerOpenGL::SyncLightDistanceAttenuationBias(int light_index) { 1342void RasterizerOpenGL::SyncLightDistanceAttenuationBias(int light_index) {
1289 GLfloat dist_atten_bias = Pica::float20::FromRaw(Pica::g_state.regs.lighting.light[light_index].dist_atten_bias).ToFloat32(); 1343 GLfloat dist_atten_bias =
1344 Pica::float20::FromRaw(Pica::g_state.regs.lighting.light[light_index].dist_atten_bias)
1345 .ToFloat32();
1290 1346
1291 if (dist_atten_bias != uniform_block_data.data.light_src[light_index].dist_atten_bias) { 1347 if (dist_atten_bias != uniform_block_data.data.light_src[light_index].dist_atten_bias) {
1292 uniform_block_data.data.light_src[light_index].dist_atten_bias = dist_atten_bias; 1348 uniform_block_data.data.light_src[light_index].dist_atten_bias = dist_atten_bias;
@@ -1295,7 +1351,9 @@ void RasterizerOpenGL::SyncLightDistanceAttenuationBias(int light_index) {
1295} 1351}
1296 1352
1297void RasterizerOpenGL::SyncLightDistanceAttenuationScale(int light_index) { 1353void RasterizerOpenGL::SyncLightDistanceAttenuationScale(int light_index) {
1298 GLfloat dist_atten_scale = Pica::float20::FromRaw(Pica::g_state.regs.lighting.light[light_index].dist_atten_scale).ToFloat32(); 1354 GLfloat dist_atten_scale =
1355 Pica::float20::FromRaw(Pica::g_state.regs.lighting.light[light_index].dist_atten_scale)
1356 .ToFloat32();
1299 1357
1300 if (dist_atten_scale != uniform_block_data.data.light_src[light_index].dist_atten_scale) { 1358 if (dist_atten_scale != uniform_block_data.data.light_src[light_index].dist_atten_scale) {
1301 uniform_block_data.data.light_src[light_index].dist_atten_scale = dist_atten_scale; 1359 uniform_block_data.data.light_src[light_index].dist_atten_scale = dist_atten_scale;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index c5029432b..24fefed1b 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -8,18 +8,14 @@
8#include <cstddef> 8#include <cstddef>
9#include <cstring> 9#include <cstring>
10#include <memory> 10#include <memory>
11#include <vector>
12#include <unordered_map> 11#include <unordered_map>
13 12#include <vector>
14#include <glad/glad.h> 13#include <glad/glad.h>
15
16#include "common/bit_field.h" 14#include "common/bit_field.h"
17#include "common/common_types.h" 15#include "common/common_types.h"
18#include "common/hash.h" 16#include "common/hash.h"
19#include "common/vector_math.h" 17#include "common/vector_math.h"
20
21#include "core/hw/gpu.h" 18#include "core/hw/gpu.h"
22
23#include "video_core/pica.h" 19#include "video_core/pica.h"
24#include "video_core/pica_state.h" 20#include "video_core/pica_state.h"
25#include "video_core/pica_types.h" 21#include "video_core/pica_types.h"
@@ -40,10 +36,10 @@ struct ScreenInfo;
40 * Pica state is not being captured in the shader cache key, thereby resulting in (what should be) 36 * Pica state is not being captured in the shader cache key, thereby resulting in (what should be)
41 * two separate shaders sharing the same key. 37 * two separate shaders sharing the same key.
42 * 38 *
43 * We use a union because "implicitly-defined copy/move constructor for a union X copies the object representation of X." 39 * We use a union because "implicitly-defined copy/move constructor for a union X copies the object
44 * and "implicitly-defined copy assignment operator for a union X copies the object representation (3.9) of X." 40 * representation of X." and "implicitly-defined copy assignment operator for a union X copies the
45 * = Bytewise copy instead of memberwise copy. 41 * object representation (3.9) of X." = Bytewise copy instead of memberwise copy. This is important
46 * This is important because the padding bytes are included in the hash and comparison between objects. 42 * because the padding bytes are included in the hash and comparison between objects.
47 */ 43 */
48union PicaShaderConfig { 44union PicaShaderConfig {
49 45
@@ -60,8 +56,9 @@ union PicaShaderConfig {
60 56
61 state.depthmap_enable = regs.depthmap_enable; 57 state.depthmap_enable = regs.depthmap_enable;
62 58
63 state.alpha_test_func = regs.output_merger.alpha_test.enable ? 59 state.alpha_test_func = regs.output_merger.alpha_test.enable
64 regs.output_merger.alpha_test.func.Value() : Pica::Regs::CompareFunc::Always; 60 ? regs.output_merger.alpha_test.func.Value()
61 : Pica::Regs::CompareFunc::Always;
65 62
66 state.texture0_type = regs.texture0.type; 63 state.texture0_type = regs.texture0.type;
67 64
@@ -81,9 +78,8 @@ union PicaShaderConfig {
81 state.fog_mode = regs.fog_mode; 78 state.fog_mode = regs.fog_mode;
82 state.fog_flip = regs.fog_flip; 79 state.fog_flip = regs.fog_flip;
83 80
84 state.combiner_buffer_input = 81 state.combiner_buffer_input = regs.tev_combiner_buffer_input.update_mask_rgb.Value() |
85 regs.tev_combiner_buffer_input.update_mask_rgb.Value() | 82 regs.tev_combiner_buffer_input.update_mask_a.Value() << 4;
86 regs.tev_combiner_buffer_input.update_mask_a.Value() << 4;
87 83
88 // Fragment lighting 84 // Fragment lighting
89 85
@@ -95,8 +91,10 @@ union PicaShaderConfig {
95 const auto& light = regs.lighting.light[num]; 91 const auto& light = regs.lighting.light[num];
96 state.lighting.light[light_index].num = num; 92 state.lighting.light[light_index].num = num;
97 state.lighting.light[light_index].directional = light.config.directional != 0; 93 state.lighting.light[light_index].directional = light.config.directional != 0;
98 state.lighting.light[light_index].two_sided_diffuse = light.config.two_sided_diffuse != 0; 94 state.lighting.light[light_index].two_sided_diffuse =
99 state.lighting.light[light_index].dist_atten_enable = !regs.lighting.IsDistAttenDisabled(num); 95 light.config.two_sided_diffuse != 0;
96 state.lighting.light[light_index].dist_atten_enable =
97 !regs.lighting.IsDistAttenDisabled(num);
100 } 98 }
101 99
102 state.lighting.lut_d0.enable = regs.lighting.config1.disable_lut_d0 == 0; 100 state.lighting.lut_d0.enable = regs.lighting.config1.disable_lut_d0 == 0;
@@ -147,7 +145,7 @@ union PicaShaderConfig {
147 return (stage_index < 4) && ((state.combiner_buffer_input >> 4) & (1 << stage_index)); 145 return (stage_index < 4) && ((state.combiner_buffer_input >> 4) & (1 << stage_index));
148 } 146 }
149 147
150 bool operator ==(const PicaShaderConfig& o) const { 148 bool operator==(const PicaShaderConfig& o) const {
151 return std::memcmp(&state, &o.state, sizeof(PicaShaderConfig::State)) == 0; 149 return std::memcmp(&state, &o.state, sizeof(PicaShaderConfig::State)) == 0;
152 }; 150 };
153 151
@@ -212,7 +210,8 @@ union PicaShaderConfig {
212 } state; 210 } state;
213}; 211};
214#if (__GNUC__ >= 5) || defined(__clang__) || defined(_MSC_VER) 212#if (__GNUC__ >= 5) || defined(__clang__) || defined(_MSC_VER)
215static_assert(std::is_trivially_copyable<PicaShaderConfig::State>::value, "PicaShaderConfig::State must be trivially copyable"); 213static_assert(std::is_trivially_copyable<PicaShaderConfig::State>::value,
214 "PicaShaderConfig::State must be trivially copyable");
216#endif 215#endif
217 216
218namespace std { 217namespace std {
@@ -228,12 +227,10 @@ struct hash<PicaShaderConfig> {
228 227
229class RasterizerOpenGL : public VideoCore::RasterizerInterface { 228class RasterizerOpenGL : public VideoCore::RasterizerInterface {
230public: 229public:
231
232 RasterizerOpenGL(); 230 RasterizerOpenGL();
233 ~RasterizerOpenGL() override; 231 ~RasterizerOpenGL() override;
234 232
235 void AddTriangle(const Pica::Shader::OutputVertex& v0, 233 void AddTriangle(const Pica::Shader::OutputVertex& v0, const Pica::Shader::OutputVertex& v1,
236 const Pica::Shader::OutputVertex& v1,
237 const Pica::Shader::OutputVertex& v2) override; 234 const Pica::Shader::OutputVertex& v2) override;
238 void DrawTriangles() override; 235 void DrawTriangles() override;
239 void NotifyPicaRegisterChanged(u32 id) override; 236 void NotifyPicaRegisterChanged(u32 id) override;
@@ -242,7 +239,8 @@ public:
242 void FlushAndInvalidateRegion(PAddr addr, u32 size) override; 239 void FlushAndInvalidateRegion(PAddr addr, u32 size) override;
243 bool AccelerateDisplayTransfer(const GPU::Regs::DisplayTransferConfig& config) override; 240 bool AccelerateDisplayTransfer(const GPU::Regs::DisplayTransferConfig& config) override;
244 bool AccelerateFill(const GPU::Regs::MemoryFillConfig& config) override; 241 bool AccelerateFill(const GPU::Regs::MemoryFillConfig& config) override;
245 bool AccelerateDisplay(const GPU::Regs::FramebufferConfig& config, PAddr framebuffer_addr, u32 pixel_stride, ScreenInfo& screen_info) override; 242 bool AccelerateDisplay(const GPU::Regs::FramebufferConfig& config, PAddr framebuffer_addr,
243 u32 pixel_stride, ScreenInfo& screen_info) override;
246 244
247 /// OpenGL shader generated for a given Pica register state 245 /// OpenGL shader generated for a given Pica register state
248 struct PicaShader { 246 struct PicaShader {
@@ -251,13 +249,13 @@ public:
251 }; 249 };
252 250
253private: 251private:
254
255 struct SamplerInfo { 252 struct SamplerInfo {
256 using TextureConfig = Pica::Regs::TextureConfig; 253 using TextureConfig = Pica::Regs::TextureConfig;
257 254
258 OGLSampler sampler; 255 OGLSampler sampler;
259 256
260 /// Creates the sampler object, initializing its state so that it's in sync with the SamplerInfo struct. 257 /// Creates the sampler object, initializing its state so that it's in sync with the
258 /// SamplerInfo struct.
261 void Create(); 259 void Create();
262 /// Syncs the sampler object with the config, updating any necessary state. 260 /// Syncs the sampler object with the config, updating any necessary state.
263 void SyncWithConfig(const TextureConfig& config); 261 void SyncWithConfig(const TextureConfig& config);
@@ -343,8 +341,11 @@ private:
343 alignas(16) GLvec4 tev_combiner_buffer_color; 341 alignas(16) GLvec4 tev_combiner_buffer_color;
344 }; 342 };
345 343
346 static_assert(sizeof(UniformData) == 0x3C0, "The size of the UniformData structure has changed, update the structure in the shader"); 344 static_assert(
347 static_assert(sizeof(UniformData) < 16384, "UniformData structure must be less than 16kb as per the OpenGL spec"); 345 sizeof(UniformData) == 0x3C0,
346 "The size of the UniformData structure has changed, update the structure in the shader");
347 static_assert(sizeof(UniformData) < 16384,
348 "UniformData structure must be less than 16kb as per the OpenGL spec");
348 349
349 /// Sets the OpenGL shader in accordance with the current PICA register state 350 /// Sets the OpenGL shader in accordance with the current PICA register state
350 void SetShader(); 351 void SetShader();
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
index 7efd0038a..5cbad9b43 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -9,18 +9,14 @@
9#include <unordered_set> 9#include <unordered_set>
10#include <utility> 10#include <utility>
11#include <vector> 11#include <vector>
12
13#include <glad/glad.h> 12#include <glad/glad.h>
14
15#include "common/bit_field.h" 13#include "common/bit_field.h"
16#include "common/emu_window.h" 14#include "common/emu_window.h"
17#include "common/logging/log.h" 15#include "common/logging/log.h"
18#include "common/math_util.h" 16#include "common/math_util.h"
19#include "common/microprofile.h" 17#include "common/microprofile.h"
20#include "common/vector_math.h" 18#include "common/vector_math.h"
21
22#include "core/memory.h" 19#include "core/memory.h"
23
24#include "video_core/debug_utils/debug_utils.h" 20#include "video_core/debug_utils/debug_utils.h"
25#include "video_core/pica_state.h" 21#include "video_core/pica_state.h"
26#include "video_core/renderer_opengl/gl_rasterizer_cache.h" 22#include "video_core/renderer_opengl/gl_rasterizer_cache.h"
@@ -35,18 +31,18 @@ struct FormatTuple {
35}; 31};
36 32
37static const std::array<FormatTuple, 5> fb_format_tuples = {{ 33static const std::array<FormatTuple, 5> fb_format_tuples = {{
38 { GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8 }, // RGBA8 34 {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8}, // RGBA8
39 { GL_RGB8, GL_BGR, GL_UNSIGNED_BYTE }, // RGB8 35 {GL_RGB8, GL_BGR, GL_UNSIGNED_BYTE}, // RGB8
40 { GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_5_5_5_1 }, // RGB5A1 36 {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_5_5_5_1}, // RGB5A1
41 { GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5 }, // RGB565 37 {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5}, // RGB565
42 { GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4 }, // RGBA4 38 {GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4}, // RGBA4
43}}; 39}};
44 40
45static const std::array<FormatTuple, 4> depth_format_tuples = {{ 41static const std::array<FormatTuple, 4> depth_format_tuples = {{
46 { GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT }, // D16 42 {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // D16
47 {}, 43 {},
48 { GL_DEPTH_COMPONENT24, GL_DEPTH_COMPONENT, GL_UNSIGNED_INT }, // D24 44 {GL_DEPTH_COMPONENT24, GL_DEPTH_COMPONENT, GL_UNSIGNED_INT}, // D24
49 { GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8 }, // D24S8 45 {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // D24S8
50}}; 46}};
51 47
52RasterizerCacheOpenGL::RasterizerCacheOpenGL() { 48RasterizerCacheOpenGL::RasterizerCacheOpenGL() {
@@ -58,7 +54,9 @@ RasterizerCacheOpenGL::~RasterizerCacheOpenGL() {
58 FlushAll(); 54 FlushAll();
59} 55}
60 56
61static void MortonCopyPixels(CachedSurface::PixelFormat pixel_format, u32 width, u32 height, u32 bytes_per_pixel, u32 gl_bytes_per_pixel, u8* morton_data, u8* gl_data, bool morton_to_gl) { 57static void MortonCopyPixels(CachedSurface::PixelFormat pixel_format, u32 width, u32 height,
58 u32 bytes_per_pixel, u32 gl_bytes_per_pixel, u8* morton_data,
59 u8* gl_data, bool morton_to_gl) {
62 using PixelFormat = CachedSurface::PixelFormat; 60 using PixelFormat = CachedSurface::PixelFormat;
63 61
64 u8* data_ptrs[2]; 62 u8* data_ptrs[2];
@@ -72,7 +70,8 @@ static void MortonCopyPixels(CachedSurface::PixelFormat pixel_format, u32 width,
72 for (unsigned y = 0; y < height; ++y) { 70 for (unsigned y = 0; y < height; ++y) {
73 for (unsigned x = 0; x < width; ++x) { 71 for (unsigned x = 0; x < width; ++x) {
74 const u32 coarse_y = y & ~7; 72 const u32 coarse_y = y & ~7;
75 u32 morton_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * width * bytes_per_pixel; 73 u32 morton_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) +
74 coarse_y * width * bytes_per_pixel;
76 u32 gl_pixel_index = (x + (height - 1 - y) * width) * gl_bytes_per_pixel; 75 u32 gl_pixel_index = (x + (height - 1 - y) * width) * gl_bytes_per_pixel;
77 76
78 data_ptrs[morton_to_gl] = morton_data + morton_offset; 77 data_ptrs[morton_to_gl] = morton_data + morton_offset;
@@ -81,7 +80,8 @@ static void MortonCopyPixels(CachedSurface::PixelFormat pixel_format, u32 width,
81 // Swap depth and stencil value ordering since 3DS does not match OpenGL 80 // Swap depth and stencil value ordering since 3DS does not match OpenGL
82 u32 depth_stencil; 81 u32 depth_stencil;
83 memcpy(&depth_stencil, data_ptrs[1], sizeof(u32)); 82 memcpy(&depth_stencil, data_ptrs[1], sizeof(u32));
84 depth_stencil = (depth_stencil << depth_stencil_shifts[0]) | (depth_stencil >> depth_stencil_shifts[1]); 83 depth_stencil = (depth_stencil << depth_stencil_shifts[0]) |
84 (depth_stencil >> depth_stencil_shifts[1]);
85 85
86 memcpy(data_ptrs[0], &depth_stencil, sizeof(u32)); 86 memcpy(data_ptrs[0], &depth_stencil, sizeof(u32));
87 } 87 }
@@ -90,7 +90,8 @@ static void MortonCopyPixels(CachedSurface::PixelFormat pixel_format, u32 width,
90 for (unsigned y = 0; y < height; ++y) { 90 for (unsigned y = 0; y < height; ++y) {
91 for (unsigned x = 0; x < width; ++x) { 91 for (unsigned x = 0; x < width; ++x) {
92 const u32 coarse_y = y & ~7; 92 const u32 coarse_y = y & ~7;
93 u32 morton_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * width * bytes_per_pixel; 93 u32 morton_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) +
94 coarse_y * width * bytes_per_pixel;
94 u32 gl_pixel_index = (x + (height - 1 - y) * width) * gl_bytes_per_pixel; 95 u32 gl_pixel_index = (x + (height - 1 - y) * width) * gl_bytes_per_pixel;
95 96
96 data_ptrs[morton_to_gl] = morton_data + morton_offset; 97 data_ptrs[morton_to_gl] = morton_data + morton_offset;
@@ -102,17 +103,21 @@ static void MortonCopyPixels(CachedSurface::PixelFormat pixel_format, u32 width,
102 } 103 }
103} 104}
104 105
105bool RasterizerCacheOpenGL::BlitTextures(GLuint src_tex, GLuint dst_tex, CachedSurface::SurfaceType type, const MathUtil::Rectangle<int>& src_rect, const MathUtil::Rectangle<int>& dst_rect) { 106bool RasterizerCacheOpenGL::BlitTextures(GLuint src_tex, GLuint dst_tex,
107 CachedSurface::SurfaceType type,
108 const MathUtil::Rectangle<int>& src_rect,
109 const MathUtil::Rectangle<int>& dst_rect) {
106 using SurfaceType = CachedSurface::SurfaceType; 110 using SurfaceType = CachedSurface::SurfaceType;
107 111
108 OpenGLState cur_state = OpenGLState::GetCurState(); 112 OpenGLState cur_state = OpenGLState::GetCurState();
109 113
110 // Make sure textures aren't bound to texture units, since going to bind them to framebuffer components 114 // Make sure textures aren't bound to texture units, since going to bind them to framebuffer
115 // components
111 OpenGLState::ResetTexture(src_tex); 116 OpenGLState::ResetTexture(src_tex);
112 OpenGLState::ResetTexture(dst_tex); 117 OpenGLState::ResetTexture(dst_tex);
113 118
114 // Keep track of previous framebuffer bindings 119 // Keep track of previous framebuffer bindings
115 GLuint old_fbs[2] = { cur_state.draw.read_framebuffer, cur_state.draw.draw_framebuffer }; 120 GLuint old_fbs[2] = {cur_state.draw.read_framebuffer, cur_state.draw.draw_framebuffer};
116 cur_state.draw.read_framebuffer = transfer_framebuffers[0].handle; 121 cur_state.draw.read_framebuffer = transfer_framebuffers[0].handle;
117 cur_state.draw.draw_framebuffer = transfer_framebuffers[1].handle; 122 cur_state.draw.draw_framebuffer = transfer_framebuffers[1].handle;
118 cur_state.Apply(); 123 cur_state.Apply();
@@ -120,11 +125,15 @@ bool RasterizerCacheOpenGL::BlitTextures(GLuint src_tex, GLuint dst_tex, CachedS
120 u32 buffers = 0; 125 u32 buffers = 0;
121 126
122 if (type == SurfaceType::Color || type == SurfaceType::Texture) { 127 if (type == SurfaceType::Color || type == SurfaceType::Texture) {
123 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, src_tex, 0); 128 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, src_tex,
124 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); 129 0);
130 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0,
131 0);
125 132
126 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, dst_tex, 0); 133 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, dst_tex,
127 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); 134 0);
135 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0,
136 0);
128 137
129 buffers = GL_COLOR_BUFFER_BIT; 138 buffers = GL_COLOR_BUFFER_BIT;
130 } else if (type == SurfaceType::Depth) { 139 } else if (type == SurfaceType::Depth) {
@@ -139,10 +148,12 @@ bool RasterizerCacheOpenGL::BlitTextures(GLuint src_tex, GLuint dst_tex, CachedS
139 buffers = GL_DEPTH_BUFFER_BIT; 148 buffers = GL_DEPTH_BUFFER_BIT;
140 } else if (type == SurfaceType::DepthStencil) { 149 } else if (type == SurfaceType::DepthStencil) {
141 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); 150 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
142 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, src_tex, 0); 151 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
152 src_tex, 0);
143 153
144 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); 154 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
145 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, dst_tex, 0); 155 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
156 dst_tex, 0);
146 157
147 buffers = GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT; 158 buffers = GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT;
148 } 159 }
@@ -155,9 +166,9 @@ bool RasterizerCacheOpenGL::BlitTextures(GLuint src_tex, GLuint dst_tex, CachedS
155 return false; 166 return false;
156 } 167 }
157 168
158 glBlitFramebuffer(src_rect.left, src_rect.top, src_rect.right, src_rect.bottom, 169 glBlitFramebuffer(src_rect.left, src_rect.top, src_rect.right, src_rect.bottom, dst_rect.left,
159 dst_rect.left, dst_rect.top, dst_rect.right, dst_rect.bottom, 170 dst_rect.top, dst_rect.right, dst_rect.bottom, buffers,
160 buffers, buffers == GL_COLOR_BUFFER_BIT ? GL_LINEAR : GL_NEAREST); 171 buffers == GL_COLOR_BUFFER_BIT ? GL_LINEAR : GL_NEAREST);
161 172
162 // Restore previous framebuffer bindings 173 // Restore previous framebuffer bindings
163 cur_state.draw.read_framebuffer = old_fbs[0]; 174 cur_state.draw.read_framebuffer = old_fbs[0];
@@ -167,17 +178,24 @@ bool RasterizerCacheOpenGL::BlitTextures(GLuint src_tex, GLuint dst_tex, CachedS
167 return true; 178 return true;
168} 179}
169 180
170bool RasterizerCacheOpenGL::TryBlitSurfaces(CachedSurface* src_surface, const MathUtil::Rectangle<int>& src_rect, CachedSurface* dst_surface, const MathUtil::Rectangle<int>& dst_rect) { 181bool RasterizerCacheOpenGL::TryBlitSurfaces(CachedSurface* src_surface,
182 const MathUtil::Rectangle<int>& src_rect,
183 CachedSurface* dst_surface,
184 const MathUtil::Rectangle<int>& dst_rect) {
171 using SurfaceType = CachedSurface::SurfaceType; 185 using SurfaceType = CachedSurface::SurfaceType;
172 186
173 if (!CachedSurface::CheckFormatsBlittable(src_surface->pixel_format, dst_surface->pixel_format)) { 187 if (!CachedSurface::CheckFormatsBlittable(src_surface->pixel_format,
188 dst_surface->pixel_format)) {
174 return false; 189 return false;
175 } 190 }
176 191
177 return BlitTextures(src_surface->texture.handle, dst_surface->texture.handle, CachedSurface::GetFormatType(src_surface->pixel_format), src_rect, dst_rect); 192 return BlitTextures(src_surface->texture.handle, dst_surface->texture.handle,
193 CachedSurface::GetFormatType(src_surface->pixel_format), src_rect,
194 dst_rect);
178} 195}
179 196
180static void AllocateSurfaceTexture(GLuint texture, CachedSurface::PixelFormat pixel_format, u32 width, u32 height) { 197static void AllocateSurfaceTexture(GLuint texture, CachedSurface::PixelFormat pixel_format,
198 u32 width, u32 height) {
181 // Allocate an uninitialized texture of appropriate size and format for the surface 199 // Allocate an uninitialized texture of appropriate size and format for the surface
182 using SurfaceType = CachedSurface::SurfaceType; 200 using SurfaceType = CachedSurface::SurfaceType;
183 201
@@ -200,11 +218,11 @@ static void AllocateSurfaceTexture(GLuint texture, CachedSurface::PixelFormat pi
200 ASSERT(tuple_idx < depth_format_tuples.size()); 218 ASSERT(tuple_idx < depth_format_tuples.size());
201 tuple = depth_format_tuples[tuple_idx]; 219 tuple = depth_format_tuples[tuple_idx];
202 } else { 220 } else {
203 tuple = { GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE }; 221 tuple = {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE};
204 } 222 }
205 223
206 glTexImage2D(GL_TEXTURE_2D, 0, tuple.internal_format, width, height, 0, 224 glTexImage2D(GL_TEXTURE_2D, 0, tuple.internal_format, width, height, 0, tuple.format,
207 tuple.format, tuple.type, nullptr); 225 tuple.type, nullptr);
208 226
209 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0); 227 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0);
210 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); 228 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
@@ -217,7 +235,8 @@ static void AllocateSurfaceTexture(GLuint texture, CachedSurface::PixelFormat pi
217} 235}
218 236
219MICROPROFILE_DEFINE(OpenGL_SurfaceUpload, "OpenGL", "Surface Upload", MP_RGB(128, 64, 192)); 237MICROPROFILE_DEFINE(OpenGL_SurfaceUpload, "OpenGL", "Surface Upload", MP_RGB(128, 64, 192));
220CachedSurface* RasterizerCacheOpenGL::GetSurface(const CachedSurface& params, bool match_res_scale, bool load_if_create) { 238CachedSurface* RasterizerCacheOpenGL::GetSurface(const CachedSurface& params, bool match_res_scale,
239 bool load_if_create) {
221 using PixelFormat = CachedSurface::PixelFormat; 240 using PixelFormat = CachedSurface::PixelFormat;
222 using SurfaceType = CachedSurface::SurfaceType; 241 using SurfaceType = CachedSurface::SurfaceType;
223 242
@@ -225,29 +244,31 @@ CachedSurface* RasterizerCacheOpenGL::GetSurface(const CachedSurface& params, bo
225 return nullptr; 244 return nullptr;
226 } 245 }
227 246
228 u32 params_size = params.width * params.height * CachedSurface::GetFormatBpp(params.pixel_format) / 8; 247 u32 params_size =
248 params.width * params.height * CachedSurface::GetFormatBpp(params.pixel_format) / 8;
229 249
230 // Check for an exact match in existing surfaces 250 // Check for an exact match in existing surfaces
231 CachedSurface* best_exact_surface = nullptr; 251 CachedSurface* best_exact_surface = nullptr;
232 float exact_surface_goodness = -1.f; 252 float exact_surface_goodness = -1.f;
233 253
234 auto surface_interval = boost::icl::interval<PAddr>::right_open(params.addr, params.addr + params_size); 254 auto surface_interval =
255 boost::icl::interval<PAddr>::right_open(params.addr, params.addr + params_size);
235 auto range = surface_cache.equal_range(surface_interval); 256 auto range = surface_cache.equal_range(surface_interval);
236 for (auto it = range.first; it != range.second; ++it) { 257 for (auto it = range.first; it != range.second; ++it) {
237 for (auto it2 = it->second.begin(); it2 != it->second.end(); ++it2) { 258 for (auto it2 = it->second.begin(); it2 != it->second.end(); ++it2) {
238 CachedSurface* surface = it2->get(); 259 CachedSurface* surface = it2->get();
239 260
240 // Check if the request matches the surface exactly 261 // Check if the request matches the surface exactly
241 if (params.addr == surface->addr && 262 if (params.addr == surface->addr && params.width == surface->width &&
242 params.width == surface->width && params.height == surface->height && 263 params.height == surface->height && params.pixel_format == surface->pixel_format) {
243 params.pixel_format == surface->pixel_format)
244 {
245 // Make sure optional param-matching criteria are fulfilled 264 // Make sure optional param-matching criteria are fulfilled
246 bool tiling_match = (params.is_tiled == surface->is_tiled); 265 bool tiling_match = (params.is_tiled == surface->is_tiled);
247 bool res_scale_match = (params.res_scale_width == surface->res_scale_width && params.res_scale_height == surface->res_scale_height); 266 bool res_scale_match = (params.res_scale_width == surface->res_scale_width &&
267 params.res_scale_height == surface->res_scale_height);
248 if (!match_res_scale || res_scale_match) { 268 if (!match_res_scale || res_scale_match) {
249 // Prioritize same-tiling and highest resolution surfaces 269 // Prioritize same-tiling and highest resolution surfaces
250 float match_goodness = (float)tiling_match + surface->res_scale_width * surface->res_scale_height; 270 float match_goodness =
271 (float)tiling_match + surface->res_scale_width * surface->res_scale_height;
251 if (match_goodness > exact_surface_goodness || surface->dirty) { 272 if (match_goodness > exact_surface_goodness || surface->dirty) {
252 exact_surface_goodness = match_goodness; 273 exact_surface_goodness = match_goodness;
253 best_exact_surface = surface; 274 best_exact_surface = surface;
@@ -288,9 +309,11 @@ CachedSurface* RasterizerCacheOpenGL::GetSurface(const CachedSurface& params, bo
288 309
289 if (!load_if_create) { 310 if (!load_if_create) {
290 // Don't load any data; just allocate the surface's texture 311 // Don't load any data; just allocate the surface's texture
291 AllocateSurfaceTexture(new_surface->texture.handle, new_surface->pixel_format, new_surface->GetScaledWidth(), new_surface->GetScaledHeight()); 312 AllocateSurfaceTexture(new_surface->texture.handle, new_surface->pixel_format,
313 new_surface->GetScaledWidth(), new_surface->GetScaledHeight());
292 } else { 314 } else {
293 // TODO: Consider attempting subrect match in existing surfaces and direct blit here instead of memory upload below if that's a common scenario in some game 315 // TODO: Consider attempting subrect match in existing surfaces and direct blit here instead
316 // of memory upload below if that's a common scenario in some game
294 317
295 Memory::RasterizerFlushRegion(params.addr, params_size); 318 Memory::RasterizerFlushRegion(params.addr, params_size);
296 319
@@ -318,7 +341,7 @@ CachedSurface* RasterizerCacheOpenGL::GetSurface(const CachedSurface& params, bo
318 tuple = fb_format_tuples[(unsigned int)params.pixel_format]; 341 tuple = fb_format_tuples[(unsigned int)params.pixel_format];
319 } else { 342 } else {
320 // Texture 343 // Texture
321 tuple = { GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE }; 344 tuple = {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE};
322 } 345 }
323 346
324 std::vector<Math::Vec4<u8>> tex_buffer(params.width * params.height); 347 std::vector<Math::Vec4<u8>> tex_buffer(params.width * params.height);
@@ -326,19 +349,23 @@ CachedSurface* RasterizerCacheOpenGL::GetSurface(const CachedSurface& params, bo
326 Pica::DebugUtils::TextureInfo tex_info; 349 Pica::DebugUtils::TextureInfo tex_info;
327 tex_info.width = params.width; 350 tex_info.width = params.width;
328 tex_info.height = params.height; 351 tex_info.height = params.height;
329 tex_info.stride = params.width * CachedSurface::GetFormatBpp(params.pixel_format) / 8; 352 tex_info.stride =
353 params.width * CachedSurface::GetFormatBpp(params.pixel_format) / 8;
330 tex_info.format = (Pica::Regs::TextureFormat)params.pixel_format; 354 tex_info.format = (Pica::Regs::TextureFormat)params.pixel_format;
331 tex_info.physical_address = params.addr; 355 tex_info.physical_address = params.addr;
332 356
333 for (unsigned y = 0; y < params.height; ++y) { 357 for (unsigned y = 0; y < params.height; ++y) {
334 for (unsigned x = 0; x < params.width; ++x) { 358 for (unsigned x = 0; x < params.width; ++x) {
335 tex_buffer[x + params.width * y] = Pica::DebugUtils::LookupTexture(texture_src_data, x, params.height - 1 - y, tex_info); 359 tex_buffer[x + params.width * y] = Pica::DebugUtils::LookupTexture(
360 texture_src_data, x, params.height - 1 - y, tex_info);
336 } 361 }
337 } 362 }
338 363
339 glTexImage2D(GL_TEXTURE_2D, 0, tuple.internal_format, params.width, params.height, 0, GL_RGBA, GL_UNSIGNED_BYTE, tex_buffer.data()); 364 glTexImage2D(GL_TEXTURE_2D, 0, tuple.internal_format, params.width, params.height,
365 0, GL_RGBA, GL_UNSIGNED_BYTE, tex_buffer.data());
340 } else { 366 } else {
341 // Depth/Stencil formats need special treatment since they aren't sampleable using LookupTexture and can't use RGBA format 367 // Depth/Stencil formats need special treatment since they aren't sampleable using
368 // LookupTexture and can't use RGBA format
342 size_t tuple_idx = (size_t)params.pixel_format - 14; 369 size_t tuple_idx = (size_t)params.pixel_format - 14;
343 ASSERT(tuple_idx < depth_format_tuples.size()); 370 ASSERT(tuple_idx < depth_format_tuples.size());
344 const FormatTuple& tuple = depth_format_tuples[tuple_idx]; 371 const FormatTuple& tuple = depth_format_tuples[tuple_idx];
@@ -350,14 +377,18 @@ CachedSurface* RasterizerCacheOpenGL::GetSurface(const CachedSurface& params, bo
350 377
351 u32 gl_bytes_per_pixel = use_4bpp ? 4 : bytes_per_pixel; 378 u32 gl_bytes_per_pixel = use_4bpp ? 4 : bytes_per_pixel;
352 379
353 std::vector<u8> temp_fb_depth_buffer(params.width * params.height * gl_bytes_per_pixel); 380 std::vector<u8> temp_fb_depth_buffer(params.width * params.height *
381 gl_bytes_per_pixel);
354 382
355 u8* temp_fb_depth_buffer_ptr = use_4bpp ? temp_fb_depth_buffer.data() + 1 : temp_fb_depth_buffer.data(); 383 u8* temp_fb_depth_buffer_ptr =
384 use_4bpp ? temp_fb_depth_buffer.data() + 1 : temp_fb_depth_buffer.data();
356 385
357 MortonCopyPixels(params.pixel_format, params.width, params.height, bytes_per_pixel, gl_bytes_per_pixel, texture_src_data, temp_fb_depth_buffer_ptr, true); 386 MortonCopyPixels(params.pixel_format, params.width, params.height, bytes_per_pixel,
387 gl_bytes_per_pixel, texture_src_data, temp_fb_depth_buffer_ptr,
388 true);
358 389
359 glTexImage2D(GL_TEXTURE_2D, 0, tuple.internal_format, params.width, params.height, 0, 390 glTexImage2D(GL_TEXTURE_2D, 0, tuple.internal_format, params.width, params.height,
360 tuple.format, tuple.type, temp_fb_depth_buffer.data()); 391 0, tuple.format, tuple.type, temp_fb_depth_buffer.data());
361 } 392 }
362 } 393 }
363 glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); 394 glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
@@ -367,10 +398,13 @@ CachedSurface* RasterizerCacheOpenGL::GetSurface(const CachedSurface& params, bo
367 OGLTexture scaled_texture; 398 OGLTexture scaled_texture;
368 scaled_texture.Create(); 399 scaled_texture.Create();
369 400
370 AllocateSurfaceTexture(scaled_texture.handle, new_surface->pixel_format, new_surface->GetScaledWidth(), new_surface->GetScaledHeight()); 401 AllocateSurfaceTexture(scaled_texture.handle, new_surface->pixel_format,
371 BlitTextures(new_surface->texture.handle, scaled_texture.handle, CachedSurface::GetFormatType(new_surface->pixel_format), 402 new_surface->GetScaledWidth(), new_surface->GetScaledHeight());
372 MathUtil::Rectangle<int>(0, 0, new_surface->width, new_surface->height), 403 BlitTextures(new_surface->texture.handle, scaled_texture.handle,
373 MathUtil::Rectangle<int>(0, 0, new_surface->GetScaledWidth(), new_surface->GetScaledHeight())); 404 CachedSurface::GetFormatType(new_surface->pixel_format),
405 MathUtil::Rectangle<int>(0, 0, new_surface->width, new_surface->height),
406 MathUtil::Rectangle<int>(0, 0, new_surface->GetScaledWidth(),
407 new_surface->GetScaledHeight()));
374 408
375 new_surface->texture.Release(); 409 new_surface->texture.Release();
376 new_surface->texture.handle = scaled_texture.handle; 410 new_surface->texture.handle = scaled_texture.handle;
@@ -389,11 +423,15 @@ CachedSurface* RasterizerCacheOpenGL::GetSurface(const CachedSurface& params, bo
389 } 423 }
390 424
391 Memory::RasterizerMarkRegionCached(new_surface->addr, new_surface->size, 1); 425 Memory::RasterizerMarkRegionCached(new_surface->addr, new_surface->size, 1);
392 surface_cache.add(std::make_pair(boost::icl::interval<PAddr>::right_open(new_surface->addr, new_surface->addr + new_surface->size), std::set<std::shared_ptr<CachedSurface>>({ new_surface }))); 426 surface_cache.add(std::make_pair(boost::icl::interval<PAddr>::right_open(
427 new_surface->addr, new_surface->addr + new_surface->size),
428 std::set<std::shared_ptr<CachedSurface>>({new_surface})));
393 return new_surface.get(); 429 return new_surface.get();
394} 430}
395 431
396CachedSurface* RasterizerCacheOpenGL::GetSurfaceRect(const CachedSurface& params, bool match_res_scale, bool load_if_create, MathUtil::Rectangle<int>& out_rect) { 432CachedSurface* RasterizerCacheOpenGL::GetSurfaceRect(const CachedSurface& params,
433 bool match_res_scale, bool load_if_create,
434 MathUtil::Rectangle<int>& out_rect) {
397 if (params.addr == 0) { 435 if (params.addr == 0) {
398 return nullptr; 436 return nullptr;
399 } 437 }
@@ -405,7 +443,8 @@ CachedSurface* RasterizerCacheOpenGL::GetSurfaceRect(const CachedSurface& params
405 CachedSurface* best_subrect_surface = nullptr; 443 CachedSurface* best_subrect_surface = nullptr;
406 float subrect_surface_goodness = -1.f; 444 float subrect_surface_goodness = -1.f;
407 445
408 auto surface_interval = boost::icl::interval<PAddr>::right_open(params.addr, params.addr + params_size); 446 auto surface_interval =
447 boost::icl::interval<PAddr>::right_open(params.addr, params.addr + params_size);
409 auto cache_upper_bound = surface_cache.upper_bound(surface_interval); 448 auto cache_upper_bound = surface_cache.upper_bound(surface_interval);
410 for (auto it = surface_cache.lower_bound(surface_interval); it != cache_upper_bound; ++it) { 449 for (auto it = surface_cache.lower_bound(surface_interval); it != cache_upper_bound; ++it) {
411 for (auto it2 = it->second.begin(); it2 != it->second.end(); ++it2) { 450 for (auto it2 = it->second.begin(); it2 != it->second.end(); ++it2) {
@@ -414,14 +453,15 @@ CachedSurface* RasterizerCacheOpenGL::GetSurfaceRect(const CachedSurface& params
414 // Check if the request is contained in the surface 453 // Check if the request is contained in the surface
415 if (params.addr >= surface->addr && 454 if (params.addr >= surface->addr &&
416 params.addr + params_size - 1 <= surface->addr + surface->size - 1 && 455 params.addr + params_size - 1 <= surface->addr + surface->size - 1 &&
417 params.pixel_format == surface->pixel_format) 456 params.pixel_format == surface->pixel_format) {
418 {
419 // Make sure optional param-matching criteria are fulfilled 457 // Make sure optional param-matching criteria are fulfilled
420 bool tiling_match = (params.is_tiled == surface->is_tiled); 458 bool tiling_match = (params.is_tiled == surface->is_tiled);
421 bool res_scale_match = (params.res_scale_width == surface->res_scale_width && params.res_scale_height == surface->res_scale_height); 459 bool res_scale_match = (params.res_scale_width == surface->res_scale_width &&
460 params.res_scale_height == surface->res_scale_height);
422 if (!match_res_scale || res_scale_match) { 461 if (!match_res_scale || res_scale_match) {
423 // Prioritize same-tiling and highest resolution surfaces 462 // Prioritize same-tiling and highest resolution surfaces
424 float match_goodness = (float)tiling_match + surface->res_scale_width * surface->res_scale_height; 463 float match_goodness =
464 (float)tiling_match + surface->res_scale_width * surface->res_scale_height;
425 if (match_goodness > subrect_surface_goodness || surface->dirty) { 465 if (match_goodness > subrect_surface_goodness || surface->dirty) {
426 subrect_surface_goodness = match_goodness; 466 subrect_surface_goodness = match_goodness;
427 best_subrect_surface = surface; 467 best_subrect_surface = surface;
@@ -433,7 +473,8 @@ CachedSurface* RasterizerCacheOpenGL::GetSurfaceRect(const CachedSurface& params
433 473
434 // Return the best subrect surface if found 474 // Return the best subrect surface if found
435 if (best_subrect_surface != nullptr) { 475 if (best_subrect_surface != nullptr) {
436 unsigned int bytes_per_pixel = (CachedSurface::GetFormatBpp(best_subrect_surface->pixel_format) / 8); 476 unsigned int bytes_per_pixel =
477 (CachedSurface::GetFormatBpp(best_subrect_surface->pixel_format) / 8);
437 478
438 int x0, y0; 479 int x0, y0;
439 480
@@ -452,7 +493,9 @@ CachedSurface* RasterizerCacheOpenGL::GetSurfaceRect(const CachedSurface& params
452 y0 = begin_tile_index / tiles_per_row * 8; 493 y0 = begin_tile_index / tiles_per_row * 8;
453 494
454 // Tiled surfaces are flipped vertically in the rasterizer vs. 3DS memory. 495 // Tiled surfaces are flipped vertically in the rasterizer vs. 3DS memory.
455 out_rect = MathUtil::Rectangle<int>(x0, best_subrect_surface->height - y0, x0 + params.width, best_subrect_surface->height - (y0 + params.height)); 496 out_rect =
497 MathUtil::Rectangle<int>(x0, best_subrect_surface->height - y0, x0 + params.width,
498 best_subrect_surface->height - (y0 + params.height));
456 } 499 }
457 500
458 out_rect.left = (int)(out_rect.left * best_subrect_surface->res_scale_width); 501 out_rect.left = (int)(out_rect.left * best_subrect_surface->res_scale_width);
@@ -465,16 +508,20 @@ CachedSurface* RasterizerCacheOpenGL::GetSurfaceRect(const CachedSurface& params
465 508
466 // No subrect found - create and return a new surface 509 // No subrect found - create and return a new surface
467 if (!params.is_tiled) { 510 if (!params.is_tiled) {
468 out_rect = MathUtil::Rectangle<int>(0, 0, (int)(params.width * params.res_scale_width), (int)(params.height * params.res_scale_height)); 511 out_rect = MathUtil::Rectangle<int>(0, 0, (int)(params.width * params.res_scale_width),
512 (int)(params.height * params.res_scale_height));
469 } else { 513 } else {
470 out_rect = MathUtil::Rectangle<int>(0, (int)(params.height * params.res_scale_height), (int)(params.width * params.res_scale_width), 0); 514 out_rect = MathUtil::Rectangle<int>(0, (int)(params.height * params.res_scale_height),
515 (int)(params.width * params.res_scale_width), 0);
471 } 516 }
472 517
473 return GetSurface(params, match_res_scale, load_if_create); 518 return GetSurface(params, match_res_scale, load_if_create);
474} 519}
475 520
476CachedSurface* RasterizerCacheOpenGL::GetTextureSurface(const Pica::Regs::FullTextureConfig& config) { 521CachedSurface* RasterizerCacheOpenGL::GetTextureSurface(
477 Pica::DebugUtils::TextureInfo info = Pica::DebugUtils::TextureInfo::FromPicaRegister(config.config, config.format); 522 const Pica::Regs::FullTextureConfig& config) {
523 Pica::DebugUtils::TextureInfo info =
524 Pica::DebugUtils::TextureInfo::FromPicaRegister(config.config, config.format);
478 525
479 CachedSurface params; 526 CachedSurface params;
480 params.addr = info.physical_address; 527 params.addr = info.physical_address;
@@ -485,20 +532,28 @@ CachedSurface* RasterizerCacheOpenGL::GetTextureSurface(const Pica::Regs::FullTe
485 return GetSurface(params, false, true); 532 return GetSurface(params, false, true);
486} 533}
487 534
488std::tuple<CachedSurface*, CachedSurface*, MathUtil::Rectangle<int>> RasterizerCacheOpenGL::GetFramebufferSurfaces(const Pica::Regs::FramebufferConfig& config) { 535std::tuple<CachedSurface*, CachedSurface*, MathUtil::Rectangle<int>>
536RasterizerCacheOpenGL::GetFramebufferSurfaces(const Pica::Regs::FramebufferConfig& config) {
489 const auto& regs = Pica::g_state.regs; 537 const auto& regs = Pica::g_state.regs;
490 538
491 // Make sur that framebuffers don't overlap if both color and depth are being used 539 // Make sur that framebuffers don't overlap if both color and depth are being used
492 u32 fb_area = config.GetWidth() * config.GetHeight(); 540 u32 fb_area = config.GetWidth() * config.GetHeight();
493 bool framebuffers_overlap = config.GetColorBufferPhysicalAddress() != 0 && 541 bool framebuffers_overlap =
494 config.GetDepthBufferPhysicalAddress() != 0 && 542 config.GetColorBufferPhysicalAddress() != 0 &&
495 MathUtil::IntervalsIntersect(config.GetColorBufferPhysicalAddress(), fb_area * GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(config.color_format.Value())), 543 config.GetDepthBufferPhysicalAddress() != 0 &&
496 config.GetDepthBufferPhysicalAddress(), fb_area * Pica::Regs::BytesPerDepthPixel(config.depth_format)); 544 MathUtil::IntervalsIntersect(
545 config.GetColorBufferPhysicalAddress(),
546 fb_area * GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(config.color_format.Value())),
547 config.GetDepthBufferPhysicalAddress(),
548 fb_area * Pica::Regs::BytesPerDepthPixel(config.depth_format));
497 bool using_color_fb = config.GetColorBufferPhysicalAddress() != 0; 549 bool using_color_fb = config.GetColorBufferPhysicalAddress() != 0;
498 bool using_depth_fb = config.GetDepthBufferPhysicalAddress() != 0 && (regs.output_merger.depth_test_enable || regs.output_merger.depth_write_enable || !framebuffers_overlap); 550 bool using_depth_fb = config.GetDepthBufferPhysicalAddress() != 0 &&
551 (regs.output_merger.depth_test_enable ||
552 regs.output_merger.depth_write_enable || !framebuffers_overlap);
499 553
500 if (framebuffers_overlap && using_color_fb && using_depth_fb) { 554 if (framebuffers_overlap && using_color_fb && using_depth_fb) {
501 LOG_CRITICAL(Render_OpenGL, "Color and depth framebuffer memory regions overlap; overlapping framebuffers not supported!"); 555 LOG_CRITICAL(Render_OpenGL, "Color and depth framebuffer memory regions overlap; "
556 "overlapping framebuffers not supported!");
502 using_depth_fb = false; 557 using_depth_fb = false;
503 } 558 }
504 559
@@ -512,8 +567,10 @@ std::tuple<CachedSurface*, CachedSurface*, MathUtil::Rectangle<int>> RasterizerC
512 auto layout = VideoCore::g_emu_window->GetFramebufferLayout(); 567 auto layout = VideoCore::g_emu_window->GetFramebufferLayout();
513 568
514 // Assume same scaling factor for top and bottom screens 569 // Assume same scaling factor for top and bottom screens
515 color_params.res_scale_width = depth_params.res_scale_width = (float)layout.top_screen.GetWidth() / VideoCore::kScreenTopWidth; 570 color_params.res_scale_width = depth_params.res_scale_width =
516 color_params.res_scale_height = depth_params.res_scale_height = (float)layout.top_screen.GetHeight() / VideoCore::kScreenTopHeight; 571 (float)layout.top_screen.GetWidth() / VideoCore::kScreenTopWidth;
572 color_params.res_scale_height = depth_params.res_scale_height =
573 (float)layout.top_screen.GetHeight() / VideoCore::kScreenTopHeight;
517 } 574 }
518 575
519 color_params.addr = config.GetColorBufferPhysicalAddress(); 576 color_params.addr = config.GetColorBufferPhysicalAddress();
@@ -523,22 +580,28 @@ std::tuple<CachedSurface*, CachedSurface*, MathUtil::Rectangle<int>> RasterizerC
523 depth_params.pixel_format = CachedSurface::PixelFormatFromDepthFormat(config.depth_format); 580 depth_params.pixel_format = CachedSurface::PixelFormatFromDepthFormat(config.depth_format);
524 581
525 MathUtil::Rectangle<int> color_rect; 582 MathUtil::Rectangle<int> color_rect;
526 CachedSurface* color_surface = using_color_fb ? GetSurfaceRect(color_params, true, true, color_rect) : nullptr; 583 CachedSurface* color_surface =
584 using_color_fb ? GetSurfaceRect(color_params, true, true, color_rect) : nullptr;
527 585
528 MathUtil::Rectangle<int> depth_rect; 586 MathUtil::Rectangle<int> depth_rect;
529 CachedSurface* depth_surface = using_depth_fb ? GetSurfaceRect(depth_params, true, true, depth_rect) : nullptr; 587 CachedSurface* depth_surface =
588 using_depth_fb ? GetSurfaceRect(depth_params, true, true, depth_rect) : nullptr;
530 589
531 // Sanity check to make sure found surfaces aren't the same 590 // Sanity check to make sure found surfaces aren't the same
532 if (using_depth_fb && using_color_fb && color_surface == depth_surface) { 591 if (using_depth_fb && using_color_fb && color_surface == depth_surface) {
533 LOG_CRITICAL(Render_OpenGL, "Color and depth framebuffer surfaces overlap; overlapping surfaces not supported!"); 592 LOG_CRITICAL(
593 Render_OpenGL,
594 "Color and depth framebuffer surfaces overlap; overlapping surfaces not supported!");
534 using_depth_fb = false; 595 using_depth_fb = false;
535 depth_surface = nullptr; 596 depth_surface = nullptr;
536 } 597 }
537 598
538 MathUtil::Rectangle<int> rect; 599 MathUtil::Rectangle<int> rect;
539 600
540 if (color_surface != nullptr && depth_surface != nullptr && (depth_rect.left != color_rect.left || depth_rect.top != color_rect.top)) { 601 if (color_surface != nullptr && depth_surface != nullptr &&
541 // Can't specify separate color and depth viewport offsets in OpenGL, so re-zero both if they don't match 602 (depth_rect.left != color_rect.left || depth_rect.top != color_rect.top)) {
603 // Can't specify separate color and depth viewport offsets in OpenGL, so re-zero both if
604 // they don't match
542 if (color_rect.left != 0 || color_rect.top != 0) { 605 if (color_rect.left != 0 || color_rect.top != 0) {
543 color_surface = GetSurface(color_params, true, true); 606 color_surface = GetSurface(color_params, true, true);
544 } 607 }
@@ -548,9 +611,13 @@ std::tuple<CachedSurface*, CachedSurface*, MathUtil::Rectangle<int>> RasterizerC
548 } 611 }
549 612
550 if (!color_surface->is_tiled) { 613 if (!color_surface->is_tiled) {
551 rect = MathUtil::Rectangle<int>(0, 0, (int)(color_params.width * color_params.res_scale_width), (int)(color_params.height * color_params.res_scale_height)); 614 rect = MathUtil::Rectangle<int>(
615 0, 0, (int)(color_params.width * color_params.res_scale_width),
616 (int)(color_params.height * color_params.res_scale_height));
552 } else { 617 } else {
553 rect = MathUtil::Rectangle<int>(0, (int)(color_params.height * color_params.res_scale_height), (int)(color_params.width * color_params.res_scale_width), 0); 618 rect = MathUtil::Rectangle<int>(
619 0, (int)(color_params.height * color_params.res_scale_height),
620 (int)(color_params.width * color_params.res_scale_width), 0);
554 } 621 }
555 } else if (color_surface != nullptr) { 622 } else if (color_surface != nullptr) {
556 rect = color_rect; 623 rect = color_rect;
@@ -564,7 +631,8 @@ std::tuple<CachedSurface*, CachedSurface*, MathUtil::Rectangle<int>> RasterizerC
564} 631}
565 632
566CachedSurface* RasterizerCacheOpenGL::TryGetFillSurface(const GPU::Regs::MemoryFillConfig& config) { 633CachedSurface* RasterizerCacheOpenGL::TryGetFillSurface(const GPU::Regs::MemoryFillConfig& config) {
567 auto surface_interval = boost::icl::interval<PAddr>::right_open(config.GetStartAddress(), config.GetEndAddress()); 634 auto surface_interval =
635 boost::icl::interval<PAddr>::right_open(config.GetStartAddress(), config.GetEndAddress());
568 auto range = surface_cache.equal_range(surface_interval); 636 auto range = surface_cache.equal_range(surface_interval);
569 for (auto it = range.first; it != range.second; ++it) { 637 for (auto it = range.first; it != range.second; ++it) {
570 for (auto it2 = it->second.begin(); it2 != it->second.end(); ++it2) { 638 for (auto it2 = it->second.begin(); it2 != it->second.end(); ++it2) {
@@ -581,8 +649,9 @@ CachedSurface* RasterizerCacheOpenGL::TryGetFillSurface(const GPU::Regs::MemoryF
581 649
582 if (surface->addr == config.GetStartAddress() && 650 if (surface->addr == config.GetStartAddress() &&
583 CachedSurface::GetFormatBpp(surface->pixel_format) == bits_per_value && 651 CachedSurface::GetFormatBpp(surface->pixel_format) == bits_per_value &&
584 (surface->width * surface->height * CachedSurface::GetFormatBpp(surface->pixel_format) / 8) == (config.GetEndAddress() - config.GetStartAddress())) 652 (surface->width * surface->height *
585 { 653 CachedSurface::GetFormatBpp(surface->pixel_format) / 8) ==
654 (config.GetEndAddress() - config.GetStartAddress())) {
586 return surface; 655 return surface;
587 } 656 }
588 } 657 }
@@ -617,8 +686,11 @@ void RasterizerCacheOpenGL::FlushSurface(CachedSurface* surface) {
617 if (surface->res_scale_width != 1.f || surface->res_scale_height != 1.f) { 686 if (surface->res_scale_width != 1.f || surface->res_scale_height != 1.f) {
618 unscaled_tex.Create(); 687 unscaled_tex.Create();
619 688
620 AllocateSurfaceTexture(unscaled_tex.handle, surface->pixel_format, surface->width, surface->height); 689 AllocateSurfaceTexture(unscaled_tex.handle, surface->pixel_format, surface->width,
621 BlitTextures(surface->texture.handle, unscaled_tex.handle, CachedSurface::GetFormatType(surface->pixel_format), 690 surface->height);
691 BlitTextures(
692 surface->texture.handle, unscaled_tex.handle,
693 CachedSurface::GetFormatType(surface->pixel_format),
622 MathUtil::Rectangle<int>(0, 0, surface->GetScaledWidth(), surface->GetScaledHeight()), 694 MathUtil::Rectangle<int>(0, 0, surface->GetScaledWidth(), surface->GetScaledHeight()),
623 MathUtil::Rectangle<int>(0, 0, surface->width, surface->height)); 695 MathUtil::Rectangle<int>(0, 0, surface->width, surface->height));
624 696
@@ -648,10 +720,14 @@ void RasterizerCacheOpenGL::FlushSurface(CachedSurface* surface) {
648 720
649 glGetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, temp_gl_buffer.data()); 721 glGetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, temp_gl_buffer.data());
650 722
651 // Directly copy pixels. Internal OpenGL color formats are consistent so no conversion is necessary. 723 // Directly copy pixels. Internal OpenGL color formats are consistent so no conversion
652 MortonCopyPixels(surface->pixel_format, surface->width, surface->height, bytes_per_pixel, bytes_per_pixel, dst_buffer, temp_gl_buffer.data(), false); 724 // is necessary.
725 MortonCopyPixels(surface->pixel_format, surface->width, surface->height,
726 bytes_per_pixel, bytes_per_pixel, dst_buffer, temp_gl_buffer.data(),
727 false);
653 } else { 728 } else {
654 // Depth/Stencil formats need special treatment since they aren't sampleable using LookupTexture and can't use RGBA format 729 // Depth/Stencil formats need special treatment since they aren't sampleable using
730 // LookupTexture and can't use RGBA format
655 size_t tuple_idx = (size_t)surface->pixel_format - 14; 731 size_t tuple_idx = (size_t)surface->pixel_format - 14;
656 ASSERT(tuple_idx < depth_format_tuples.size()); 732 ASSERT(tuple_idx < depth_format_tuples.size());
657 const FormatTuple& tuple = depth_format_tuples[tuple_idx]; 733 const FormatTuple& tuple = depth_format_tuples[tuple_idx];
@@ -669,7 +745,9 @@ void RasterizerCacheOpenGL::FlushSurface(CachedSurface* surface) {
669 745
670 u8* temp_gl_buffer_ptr = use_4bpp ? temp_gl_buffer.data() + 1 : temp_gl_buffer.data(); 746 u8* temp_gl_buffer_ptr = use_4bpp ? temp_gl_buffer.data() + 1 : temp_gl_buffer.data();
671 747
672 MortonCopyPixels(surface->pixel_format, surface->width, surface->height, bytes_per_pixel, gl_bytes_per_pixel, dst_buffer, temp_gl_buffer_ptr, false); 748 MortonCopyPixels(surface->pixel_format, surface->width, surface->height,
749 bytes_per_pixel, gl_bytes_per_pixel, dst_buffer, temp_gl_buffer_ptr,
750 false);
673 } 751 }
674 } 752 }
675 glPixelStorei(GL_PACK_ROW_LENGTH, 0); 753 glPixelStorei(GL_PACK_ROW_LENGTH, 0);
@@ -680,7 +758,8 @@ void RasterizerCacheOpenGL::FlushSurface(CachedSurface* surface) {
680 cur_state.Apply(); 758 cur_state.Apply();
681} 759}
682 760
683void RasterizerCacheOpenGL::FlushRegion(PAddr addr, u32 size, const CachedSurface* skip_surface, bool invalidate) { 761void RasterizerCacheOpenGL::FlushRegion(PAddr addr, u32 size, const CachedSurface* skip_surface,
762 bool invalidate) {
684 if (size == 0) { 763 if (size == 0) {
685 return; 764 return;
686 } 765 }
@@ -691,8 +770,11 @@ void RasterizerCacheOpenGL::FlushRegion(PAddr addr, u32 size, const CachedSurfac
691 auto surface_interval = boost::icl::interval<PAddr>::right_open(addr, addr + size); 770 auto surface_interval = boost::icl::interval<PAddr>::right_open(addr, addr + size);
692 auto cache_upper_bound = surface_cache.upper_bound(surface_interval); 771 auto cache_upper_bound = surface_cache.upper_bound(surface_interval);
693 for (auto it = surface_cache.lower_bound(surface_interval); it != cache_upper_bound; ++it) { 772 for (auto it = surface_cache.lower_bound(surface_interval); it != cache_upper_bound; ++it) {
694 std::copy_if(it->second.begin(), it->second.end(), std::inserter(touching_surfaces, touching_surfaces.end()), 773 std::copy_if(it->second.begin(), it->second.end(),
695 [skip_surface](std::shared_ptr<CachedSurface> surface) { return (surface.get() != skip_surface); }); 774 std::inserter(touching_surfaces, touching_surfaces.end()),
775 [skip_surface](std::shared_ptr<CachedSurface> surface) {
776 return (surface.get() != skip_surface);
777 });
696 } 778 }
697 779
698 // Flush and invalidate surfaces 780 // Flush and invalidate surfaces
@@ -700,7 +782,10 @@ void RasterizerCacheOpenGL::FlushRegion(PAddr addr, u32 size, const CachedSurfac
700 FlushSurface(surface.get()); 782 FlushSurface(surface.get());
701 if (invalidate) { 783 if (invalidate) {
702 Memory::RasterizerMarkRegionCached(surface->addr, surface->size, -1); 784 Memory::RasterizerMarkRegionCached(surface->addr, surface->size, -1);
703 surface_cache.subtract(std::make_pair(boost::icl::interval<PAddr>::right_open(surface->addr, surface->addr + surface->size), std::set<std::shared_ptr<CachedSurface>>({ surface }))); 785 surface_cache.subtract(
786 std::make_pair(boost::icl::interval<PAddr>::right_open(
787 surface->addr, surface->addr + surface->size),
788 std::set<std::shared_ptr<CachedSurface>>({surface})));
704 } 789 }
705 } 790 }
706} 791}
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
index 225596415..849530d86 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -8,21 +8,18 @@
8#include <memory> 8#include <memory>
9#include <set> 9#include <set>
10#include <tuple> 10#include <tuple>
11
12#include <boost/icl/interval_map.hpp> 11#include <boost/icl/interval_map.hpp>
13#include <glad/glad.h> 12#include <glad/glad.h>
14
15#include "common/assert.h" 13#include "common/assert.h"
16#include "common/common_funcs.h" 14#include "common/common_funcs.h"
17#include "common/common_types.h" 15#include "common/common_types.h"
18
19#include "core/hw/gpu.h" 16#include "core/hw/gpu.h"
20
21#include "video_core/pica.h" 17#include "video_core/pica.h"
22#include "video_core/renderer_opengl/gl_resource_manager.h" 18#include "video_core/renderer_opengl/gl_resource_manager.h"
23 19
24namespace MathUtil { 20namespace MathUtil {
25template <class T> struct Rectangle; 21template <class T>
22struct Rectangle;
26} 23}
27 24
28struct CachedSurface; 25struct CachedSurface;
@@ -32,38 +29,38 @@ using SurfaceCache = boost::icl::interval_map<PAddr, std::set<std::shared_ptr<Ca
32struct CachedSurface { 29struct CachedSurface {
33 enum class PixelFormat { 30 enum class PixelFormat {
34 // First 5 formats are shared between textures and color buffers 31 // First 5 formats are shared between textures and color buffers
35 RGBA8 = 0, 32 RGBA8 = 0,
36 RGB8 = 1, 33 RGB8 = 1,
37 RGB5A1 = 2, 34 RGB5A1 = 2,
38 RGB565 = 3, 35 RGB565 = 3,
39 RGBA4 = 4, 36 RGBA4 = 4,
40 37
41 // Texture-only formats 38 // Texture-only formats
42 IA8 = 5, 39 IA8 = 5,
43 RG8 = 6, 40 RG8 = 6,
44 I8 = 7, 41 I8 = 7,
45 A8 = 8, 42 A8 = 8,
46 IA4 = 9, 43 IA4 = 9,
47 I4 = 10, 44 I4 = 10,
48 A4 = 11, 45 A4 = 11,
49 ETC1 = 12, 46 ETC1 = 12,
50 ETC1A4 = 13, 47 ETC1A4 = 13,
51 48
52 // Depth buffer-only formats 49 // Depth buffer-only formats
53 D16 = 14, 50 D16 = 14,
54 // gap 51 // gap
55 D24 = 16, 52 D24 = 16,
56 D24S8 = 17, 53 D24S8 = 17,
57 54
58 Invalid = 255, 55 Invalid = 255,
59 }; 56 };
60 57
61 enum class SurfaceType { 58 enum class SurfaceType {
62 Color = 0, 59 Color = 0,
63 Texture = 1, 60 Texture = 1,
64 Depth = 2, 61 Depth = 2,
65 DepthStencil = 3, 62 DepthStencil = 3,
66 Invalid = 4, 63 Invalid = 4,
67 }; 64 };
68 65
69 static unsigned int GetFormatBpp(CachedSurface::PixelFormat format) { 66 static unsigned int GetFormatBpp(CachedSurface::PixelFormat format) {
@@ -101,7 +98,8 @@ struct CachedSurface {
101 } 98 }
102 99
103 static PixelFormat PixelFormatFromDepthFormat(Pica::Regs::DepthFormat format) { 100 static PixelFormat PixelFormatFromDepthFormat(Pica::Regs::DepthFormat format) {
104 return ((unsigned int)format < 4) ? (PixelFormat)((unsigned int)format + 14) : PixelFormat::Invalid; 101 return ((unsigned int)format < 4) ? (PixelFormat)((unsigned int)format + 14)
102 : PixelFormat::Invalid;
105 } 103 }
106 104
107 static PixelFormat PixelFormatFromGPUPixelFormat(GPU::Regs::PixelFormat format) { 105 static PixelFormat PixelFormatFromGPUPixelFormat(GPU::Regs::PixelFormat format) {
@@ -120,7 +118,8 @@ struct CachedSurface {
120 SurfaceType a_type = GetFormatType(pixel_format_a); 118 SurfaceType a_type = GetFormatType(pixel_format_a);
121 SurfaceType b_type = GetFormatType(pixel_format_b); 119 SurfaceType b_type = GetFormatType(pixel_format_b);
122 120
123 if ((a_type == SurfaceType::Color || a_type == SurfaceType::Texture) && (b_type == SurfaceType::Color || b_type == SurfaceType::Texture)) { 121 if ((a_type == SurfaceType::Color || a_type == SurfaceType::Texture) &&
122 (b_type == SurfaceType::Color || b_type == SurfaceType::Texture)) {
124 return true; 123 return true;
125 } 124 }
126 125
@@ -187,22 +186,30 @@ public:
187 ~RasterizerCacheOpenGL(); 186 ~RasterizerCacheOpenGL();
188 187
189 /// Blits one texture to another 188 /// Blits one texture to another
190 bool BlitTextures(GLuint src_tex, GLuint dst_tex, CachedSurface::SurfaceType type, const MathUtil::Rectangle<int>& src_rect, const MathUtil::Rectangle<int>& dst_rect); 189 bool BlitTextures(GLuint src_tex, GLuint dst_tex, CachedSurface::SurfaceType type,
190 const MathUtil::Rectangle<int>& src_rect,
191 const MathUtil::Rectangle<int>& dst_rect);
191 192
192 /// Attempt to blit one surface's texture to another 193 /// Attempt to blit one surface's texture to another
193 bool TryBlitSurfaces(CachedSurface* src_surface, const MathUtil::Rectangle<int>& src_rect, CachedSurface* dst_surface, const MathUtil::Rectangle<int>& dst_rect); 194 bool TryBlitSurfaces(CachedSurface* src_surface, const MathUtil::Rectangle<int>& src_rect,
195 CachedSurface* dst_surface, const MathUtil::Rectangle<int>& dst_rect);
194 196
195 /// Loads a texture from 3DS memory to OpenGL and caches it (if not already cached) 197 /// Loads a texture from 3DS memory to OpenGL and caches it (if not already cached)
196 CachedSurface* GetSurface(const CachedSurface& params, bool match_res_scale, bool load_if_create); 198 CachedSurface* GetSurface(const CachedSurface& params, bool match_res_scale,
199 bool load_if_create);
197 200
198 /// Attempt to find a subrect (resolution scaled) of a surface, otherwise loads a texture from 3DS memory to OpenGL and caches it (if not already cached) 201 /// Attempt to find a subrect (resolution scaled) of a surface, otherwise loads a texture from
199 CachedSurface* GetSurfaceRect(const CachedSurface& params, bool match_res_scale, bool load_if_create, MathUtil::Rectangle<int>& out_rect); 202 /// 3DS memory to OpenGL and caches it (if not already cached)
203 CachedSurface* GetSurfaceRect(const CachedSurface& params, bool match_res_scale,
204 bool load_if_create, MathUtil::Rectangle<int>& out_rect);
200 205
201 /// Gets a surface based on the texture configuration 206 /// Gets a surface based on the texture configuration
202 CachedSurface* GetTextureSurface(const Pica::Regs::FullTextureConfig& config); 207 CachedSurface* GetTextureSurface(const Pica::Regs::FullTextureConfig& config);
203 208
204 /// Gets the color and depth surfaces and rect (resolution scaled) based on the framebuffer configuration 209 /// Gets the color and depth surfaces and rect (resolution scaled) based on the framebuffer
205 std::tuple<CachedSurface*, CachedSurface*, MathUtil::Rectangle<int>> GetFramebufferSurfaces(const Pica::Regs::FramebufferConfig& config); 210 /// configuration
211 std::tuple<CachedSurface*, CachedSurface*, MathUtil::Rectangle<int>> GetFramebufferSurfaces(
212 const Pica::Regs::FramebufferConfig& config);
206 213
207 /// Attempt to get a surface that exactly matches the fill region and format 214 /// Attempt to get a surface that exactly matches the fill region and format
208 CachedSurface* TryGetFillSurface(const GPU::Regs::MemoryFillConfig& config); 215 CachedSurface* TryGetFillSurface(const GPU::Regs::MemoryFillConfig& config);
@@ -210,7 +217,8 @@ public:
210 /// Write the surface back to memory 217 /// Write the surface back to memory
211 void FlushSurface(CachedSurface* surface); 218 void FlushSurface(CachedSurface* surface);
212 219
213 /// Write any cached resources overlapping the region back to memory (if dirty) and optionally invalidate them in the cache 220 /// Write any cached resources overlapping the region back to memory (if dirty) and optionally
221 /// invalidate them in the cache
214 void FlushRegion(PAddr addr, u32 size, const CachedSurface* skip_surface, bool invalidate); 222 void FlushRegion(PAddr addr, u32 size, const CachedSurface* skip_surface, bool invalidate);
215 223
216 /// Flush all cached resources tracked by this cache manager 224 /// Flush all cached resources tracked by this cache manager
diff --git a/src/video_core/renderer_opengl/gl_resource_manager.h b/src/video_core/renderer_opengl/gl_resource_manager.h
index eb128966c..13301ec9f 100644
--- a/src/video_core/renderer_opengl/gl_resource_manager.h
+++ b/src/video_core/renderer_opengl/gl_resource_manager.h
@@ -5,30 +5,36 @@
5#pragma once 5#pragma once
6 6
7#include <utility> 7#include <utility>
8
9#include <glad/glad.h> 8#include <glad/glad.h>
10
11#include "common/common_types.h" 9#include "common/common_types.h"
12
13#include "video_core/renderer_opengl/gl_shader_util.h" 10#include "video_core/renderer_opengl/gl_shader_util.h"
14#include "video_core/renderer_opengl/gl_state.h" 11#include "video_core/renderer_opengl/gl_state.h"
15 12
16class OGLTexture : private NonCopyable { 13class OGLTexture : private NonCopyable {
17public: 14public:
18 OGLTexture() = default; 15 OGLTexture() = default;
19 OGLTexture(OGLTexture&& o) { std::swap(handle, o.handle); } 16 OGLTexture(OGLTexture&& o) {
20 ~OGLTexture() { Release(); } 17 std::swap(handle, o.handle);
21 OGLTexture& operator=(OGLTexture&& o) { std::swap(handle, o.handle); return *this; } 18 }
19 ~OGLTexture() {
20 Release();
21 }
22 OGLTexture& operator=(OGLTexture&& o) {
23 std::swap(handle, o.handle);
24 return *this;
25 }
22 26
23 /// Creates a new internal OpenGL resource and stores the handle 27 /// Creates a new internal OpenGL resource and stores the handle
24 void Create() { 28 void Create() {
25 if (handle != 0) return; 29 if (handle != 0)
30 return;
26 glGenTextures(1, &handle); 31 glGenTextures(1, &handle);
27 } 32 }
28 33
29 /// Deletes the internal OpenGL resource 34 /// Deletes the internal OpenGL resource
30 void Release() { 35 void Release() {
31 if (handle == 0) return; 36 if (handle == 0)
37 return;
32 glDeleteTextures(1, &handle); 38 glDeleteTextures(1, &handle);
33 OpenGLState::ResetTexture(handle); 39 OpenGLState::ResetTexture(handle);
34 handle = 0; 40 handle = 0;
@@ -40,19 +46,28 @@ public:
40class OGLSampler : private NonCopyable { 46class OGLSampler : private NonCopyable {
41public: 47public:
42 OGLSampler() = default; 48 OGLSampler() = default;
43 OGLSampler(OGLSampler&& o) { std::swap(handle, o.handle); } 49 OGLSampler(OGLSampler&& o) {
44 ~OGLSampler() { Release(); } 50 std::swap(handle, o.handle);
45 OGLSampler& operator=(OGLSampler&& o) { std::swap(handle, o.handle); return *this; } 51 }
52 ~OGLSampler() {
53 Release();
54 }
55 OGLSampler& operator=(OGLSampler&& o) {
56 std::swap(handle, o.handle);
57 return *this;
58 }
46 59
47 /// Creates a new internal OpenGL resource and stores the handle 60 /// Creates a new internal OpenGL resource and stores the handle
48 void Create() { 61 void Create() {
49 if (handle != 0) return; 62 if (handle != 0)
63 return;
50 glGenSamplers(1, &handle); 64 glGenSamplers(1, &handle);
51 } 65 }
52 66
53 /// Deletes the internal OpenGL resource 67 /// Deletes the internal OpenGL resource
54 void Release() { 68 void Release() {
55 if (handle == 0) return; 69 if (handle == 0)
70 return;
56 glDeleteSamplers(1, &handle); 71 glDeleteSamplers(1, &handle);
57 OpenGLState::ResetSampler(handle); 72 OpenGLState::ResetSampler(handle);
58 handle = 0; 73 handle = 0;
@@ -64,19 +79,28 @@ public:
64class OGLShader : private NonCopyable { 79class OGLShader : private NonCopyable {
65public: 80public:
66 OGLShader() = default; 81 OGLShader() = default;
67 OGLShader(OGLShader&& o) { std::swap(handle, o.handle); } 82 OGLShader(OGLShader&& o) {
68 ~OGLShader() { Release(); } 83 std::swap(handle, o.handle);
69 OGLShader& operator=(OGLShader&& o) { std::swap(handle, o.handle); return *this; } 84 }
85 ~OGLShader() {
86 Release();
87 }
88 OGLShader& operator=(OGLShader&& o) {
89 std::swap(handle, o.handle);
90 return *this;
91 }
70 92
71 /// Creates a new internal OpenGL resource and stores the handle 93 /// Creates a new internal OpenGL resource and stores the handle
72 void Create(const char* vert_shader, const char* frag_shader) { 94 void Create(const char* vert_shader, const char* frag_shader) {
73 if (handle != 0) return; 95 if (handle != 0)
96 return;
74 handle = GLShader::LoadProgram(vert_shader, frag_shader); 97 handle = GLShader::LoadProgram(vert_shader, frag_shader);
75 } 98 }
76 99
77 /// Deletes the internal OpenGL resource 100 /// Deletes the internal OpenGL resource
78 void Release() { 101 void Release() {
79 if (handle == 0) return; 102 if (handle == 0)
103 return;
80 glDeleteProgram(handle); 104 glDeleteProgram(handle);
81 OpenGLState::ResetProgram(handle); 105 OpenGLState::ResetProgram(handle);
82 handle = 0; 106 handle = 0;
@@ -88,19 +112,28 @@ public:
88class OGLBuffer : private NonCopyable { 112class OGLBuffer : private NonCopyable {
89public: 113public:
90 OGLBuffer() = default; 114 OGLBuffer() = default;
91 OGLBuffer(OGLBuffer&& o) { std::swap(handle, o.handle); } 115 OGLBuffer(OGLBuffer&& o) {
92 ~OGLBuffer() { Release(); } 116 std::swap(handle, o.handle);
93 OGLBuffer& operator=(OGLBuffer&& o) { std::swap(handle, o.handle); return *this; } 117 }
118 ~OGLBuffer() {
119 Release();
120 }
121 OGLBuffer& operator=(OGLBuffer&& o) {
122 std::swap(handle, o.handle);
123 return *this;
124 }
94 125
95 /// Creates a new internal OpenGL resource and stores the handle 126 /// Creates a new internal OpenGL resource and stores the handle
96 void Create() { 127 void Create() {
97 if (handle != 0) return; 128 if (handle != 0)
129 return;
98 glGenBuffers(1, &handle); 130 glGenBuffers(1, &handle);
99 } 131 }
100 132
101 /// Deletes the internal OpenGL resource 133 /// Deletes the internal OpenGL resource
102 void Release() { 134 void Release() {
103 if (handle == 0) return; 135 if (handle == 0)
136 return;
104 glDeleteBuffers(1, &handle); 137 glDeleteBuffers(1, &handle);
105 OpenGLState::ResetBuffer(handle); 138 OpenGLState::ResetBuffer(handle);
106 handle = 0; 139 handle = 0;
@@ -112,19 +145,28 @@ public:
112class OGLVertexArray : private NonCopyable { 145class OGLVertexArray : private NonCopyable {
113public: 146public:
114 OGLVertexArray() = default; 147 OGLVertexArray() = default;
115 OGLVertexArray(OGLVertexArray&& o) { std::swap(handle, o.handle); } 148 OGLVertexArray(OGLVertexArray&& o) {
116 ~OGLVertexArray() { Release(); } 149 std::swap(handle, o.handle);
117 OGLVertexArray& operator=(OGLVertexArray&& o) { std::swap(handle, o.handle); return *this; } 150 }
151 ~OGLVertexArray() {
152 Release();
153 }
154 OGLVertexArray& operator=(OGLVertexArray&& o) {
155 std::swap(handle, o.handle);
156 return *this;
157 }
118 158
119 /// Creates a new internal OpenGL resource and stores the handle 159 /// Creates a new internal OpenGL resource and stores the handle
120 void Create() { 160 void Create() {
121 if (handle != 0) return; 161 if (handle != 0)
162 return;
122 glGenVertexArrays(1, &handle); 163 glGenVertexArrays(1, &handle);
123 } 164 }
124 165
125 /// Deletes the internal OpenGL resource 166 /// Deletes the internal OpenGL resource
126 void Release() { 167 void Release() {
127 if (handle == 0) return; 168 if (handle == 0)
169 return;
128 glDeleteVertexArrays(1, &handle); 170 glDeleteVertexArrays(1, &handle);
129 OpenGLState::ResetVertexArray(handle); 171 OpenGLState::ResetVertexArray(handle);
130 handle = 0; 172 handle = 0;
@@ -136,19 +178,28 @@ public:
136class OGLFramebuffer : private NonCopyable { 178class OGLFramebuffer : private NonCopyable {
137public: 179public:
138 OGLFramebuffer() = default; 180 OGLFramebuffer() = default;
139 OGLFramebuffer(OGLFramebuffer&& o) { std::swap(handle, o.handle); } 181 OGLFramebuffer(OGLFramebuffer&& o) {
140 ~OGLFramebuffer() { Release(); } 182 std::swap(handle, o.handle);
141 OGLFramebuffer& operator=(OGLFramebuffer&& o) { std::swap(handle, o.handle); return *this; } 183 }
184 ~OGLFramebuffer() {
185 Release();
186 }
187 OGLFramebuffer& operator=(OGLFramebuffer&& o) {
188 std::swap(handle, o.handle);
189 return *this;
190 }
142 191
143 /// Creates a new internal OpenGL resource and stores the handle 192 /// Creates a new internal OpenGL resource and stores the handle
144 void Create() { 193 void Create() {
145 if (handle != 0) return; 194 if (handle != 0)
195 return;
146 glGenFramebuffers(1, &handle); 196 glGenFramebuffers(1, &handle);
147 } 197 }
148 198
149 /// Deletes the internal OpenGL resource 199 /// Deletes the internal OpenGL resource
150 void Release() { 200 void Release() {
151 if (handle == 0) return; 201 if (handle == 0)
202 return;
152 glDeleteFramebuffers(1, &handle); 203 glDeleteFramebuffers(1, &handle);
153 OpenGLState::ResetFramebuffer(handle); 204 OpenGLState::ResetFramebuffer(handle);
154 handle = 0; 205 handle = 0;
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp
index 3de372f67..1808ee0a9 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp
@@ -4,11 +4,9 @@
4 4
5#include <array> 5#include <array>
6#include <cstddef> 6#include <cstddef>
7
8#include "common/assert.h" 7#include "common/assert.h"
9#include "common/bit_field.h" 8#include "common/bit_field.h"
10#include "common/logging/log.h" 9#include "common/logging/log.h"
11
12#include "video_core/pica.h" 10#include "video_core/pica.h"
13#include "video_core/renderer_opengl/gl_rasterizer.h" 11#include "video_core/renderer_opengl/gl_rasterizer.h"
14#include "video_core/renderer_opengl/gl_shader_gen.h" 12#include "video_core/renderer_opengl/gl_shader_gen.h"
@@ -21,19 +19,18 @@ namespace GLShader {
21 19
22/// Detects if a TEV stage is configured to be skipped (to avoid generating unnecessary code) 20/// Detects if a TEV stage is configured to be skipped (to avoid generating unnecessary code)
23static bool IsPassThroughTevStage(const TevStageConfig& stage) { 21static bool IsPassThroughTevStage(const TevStageConfig& stage) {
24 return (stage.color_op == TevStageConfig::Operation::Replace && 22 return (stage.color_op == TevStageConfig::Operation::Replace &&
25 stage.alpha_op == TevStageConfig::Operation::Replace && 23 stage.alpha_op == TevStageConfig::Operation::Replace &&
26 stage.color_source1 == TevStageConfig::Source::Previous && 24 stage.color_source1 == TevStageConfig::Source::Previous &&
27 stage.alpha_source1 == TevStageConfig::Source::Previous && 25 stage.alpha_source1 == TevStageConfig::Source::Previous &&
28 stage.color_modifier1 == TevStageConfig::ColorModifier::SourceColor && 26 stage.color_modifier1 == TevStageConfig::ColorModifier::SourceColor &&
29 stage.alpha_modifier1 == TevStageConfig::AlphaModifier::SourceAlpha && 27 stage.alpha_modifier1 == TevStageConfig::AlphaModifier::SourceAlpha &&
30 stage.GetColorMultiplier() == 1 && 28 stage.GetColorMultiplier() == 1 && stage.GetAlphaMultiplier() == 1);
31 stage.GetAlphaMultiplier() == 1);
32} 29}
33 30
34/// Writes the specified TEV stage source component(s) 31/// Writes the specified TEV stage source component(s)
35static void AppendSource(std::string& out, const PicaShaderConfig& config, TevStageConfig::Source source, 32static void AppendSource(std::string& out, const PicaShaderConfig& config,
36 const std::string& index_name) { 33 TevStageConfig::Source source, const std::string& index_name) {
37 const auto& state = config.state; 34 const auto& state = config.state;
38 using Source = TevStageConfig::Source; 35 using Source = TevStageConfig::Source;
39 switch (source) { 36 switch (source) {
@@ -48,7 +45,7 @@ static void AppendSource(std::string& out, const PicaShaderConfig& config, TevSt
48 break; 45 break;
49 case Source::Texture0: 46 case Source::Texture0:
50 // Only unit 0 respects the texturing type (according to 3DBrew) 47 // Only unit 0 respects the texturing type (according to 3DBrew)
51 switch(state.texture0_type) { 48 switch (state.texture0_type) {
52 case Pica::Regs::TextureConfig::Texture2D: 49 case Pica::Regs::TextureConfig::Texture2D:
53 out += "texture(tex[0], texcoord[0])"; 50 out += "texture(tex[0], texcoord[0])";
54 break; 51 break;
@@ -57,7 +54,8 @@ static void AppendSource(std::string& out, const PicaShaderConfig& config, TevSt
57 break; 54 break;
58 default: 55 default:
59 out += "texture(tex[0], texcoord[0])"; 56 out += "texture(tex[0], texcoord[0])";
60 LOG_CRITICAL(HW_GPU, "Unhandled texture type %x", static_cast<int>(state.texture0_type)); 57 LOG_CRITICAL(HW_GPU, "Unhandled texture type %x",
58 static_cast<int>(state.texture0_type));
61 UNIMPLEMENTED(); 59 UNIMPLEMENTED();
62 break; 60 break;
63 } 61 }
@@ -85,8 +83,9 @@ static void AppendSource(std::string& out, const PicaShaderConfig& config, TevSt
85} 83}
86 84
87/// Writes the color components to use for the specified TEV stage color modifier 85/// Writes the color components to use for the specified TEV stage color modifier
88static void AppendColorModifier(std::string& out, const PicaShaderConfig& config, TevStageConfig::ColorModifier modifier, 86static void AppendColorModifier(std::string& out, const PicaShaderConfig& config,
89 TevStageConfig::Source source, const std::string& index_name) { 87 TevStageConfig::ColorModifier modifier,
88 TevStageConfig::Source source, const std::string& index_name) {
90 using ColorModifier = TevStageConfig::ColorModifier; 89 using ColorModifier = TevStageConfig::ColorModifier;
91 switch (modifier) { 90 switch (modifier) {
92 case ColorModifier::SourceColor: 91 case ColorModifier::SourceColor:
@@ -142,8 +141,9 @@ static void AppendColorModifier(std::string& out, const PicaShaderConfig& config
142} 141}
143 142
144/// Writes the alpha component to use for the specified TEV stage alpha modifier 143/// Writes the alpha component to use for the specified TEV stage alpha modifier
145static void AppendAlphaModifier(std::string& out, const PicaShaderConfig& config, TevStageConfig::AlphaModifier modifier, 144static void AppendAlphaModifier(std::string& out, const PicaShaderConfig& config,
146 TevStageConfig::Source source, const std::string& index_name) { 145 TevStageConfig::AlphaModifier modifier,
146 TevStageConfig::Source source, const std::string& index_name) {
147 using AlphaModifier = TevStageConfig::AlphaModifier; 147 using AlphaModifier = TevStageConfig::AlphaModifier;
148 switch (modifier) { 148 switch (modifier) {
149 case AlphaModifier::SourceAlpha: 149 case AlphaModifier::SourceAlpha:
@@ -191,7 +191,7 @@ static void AppendAlphaModifier(std::string& out, const PicaShaderConfig& config
191 191
192/// Writes the combiner function for the color components for the specified TEV stage operation 192/// Writes the combiner function for the color components for the specified TEV stage operation
193static void AppendColorCombiner(std::string& out, TevStageConfig::Operation operation, 193static void AppendColorCombiner(std::string& out, TevStageConfig::Operation operation,
194 const std::string& variable_name) { 194 const std::string& variable_name) {
195 out += "clamp("; 195 out += "clamp(";
196 using Operation = TevStageConfig::Operation; 196 using Operation = TevStageConfig::Operation;
197 switch (operation) { 197 switch (operation) {
@@ -208,8 +208,10 @@ static void AppendColorCombiner(std::string& out, TevStageConfig::Operation oper
208 out += variable_name + "[0] + " + variable_name + "[1] - vec3(0.5)"; 208 out += variable_name + "[0] + " + variable_name + "[1] - vec3(0.5)";
209 break; 209 break;
210 case Operation::Lerp: 210 case Operation::Lerp:
211 // TODO(bunnei): Verify if HW actually does this per-component, otherwise we can just use builtin lerp 211 // TODO(bunnei): Verify if HW actually does this per-component, otherwise we can just use
212 out += variable_name + "[0] * " + variable_name + "[2] + " + variable_name + "[1] * (vec3(1.0) - " + variable_name + "[2])"; 212 // builtin lerp
213 out += variable_name + "[0] * " + variable_name + "[2] + " + variable_name +
214 "[1] * (vec3(1.0) - " + variable_name + "[2])";
213 break; 215 break;
214 case Operation::Subtract: 216 case Operation::Subtract:
215 out += variable_name + "[0] - " + variable_name + "[1]"; 217 out += variable_name + "[0] - " + variable_name + "[1]";
@@ -218,10 +220,12 @@ static void AppendColorCombiner(std::string& out, TevStageConfig::Operation oper
218 out += variable_name + "[0] * " + variable_name + "[1] + " + variable_name + "[2]"; 220 out += variable_name + "[0] * " + variable_name + "[1] + " + variable_name + "[2]";
219 break; 221 break;
220 case Operation::AddThenMultiply: 222 case Operation::AddThenMultiply:
221 out += "min(" + variable_name + "[0] + " + variable_name + "[1], vec3(1.0)) * " + variable_name + "[2]"; 223 out += "min(" + variable_name + "[0] + " + variable_name + "[1], vec3(1.0)) * " +
224 variable_name + "[2]";
222 break; 225 break;
223 case Operation::Dot3_RGB: 226 case Operation::Dot3_RGB:
224 out += "vec3(dot(" + variable_name + "[0] - vec3(0.5), " + variable_name + "[1] - vec3(0.5)) * 4.0)"; 227 out += "vec3(dot(" + variable_name + "[0] - vec3(0.5), " + variable_name +
228 "[1] - vec3(0.5)) * 4.0)";
225 break; 229 break;
226 default: 230 default:
227 out += "vec3(0.0)"; 231 out += "vec3(0.0)";
@@ -233,7 +237,7 @@ static void AppendColorCombiner(std::string& out, TevStageConfig::Operation oper
233 237
234/// Writes the combiner function for the alpha component for the specified TEV stage operation 238/// Writes the combiner function for the alpha component for the specified TEV stage operation
235static void AppendAlphaCombiner(std::string& out, TevStageConfig::Operation operation, 239static void AppendAlphaCombiner(std::string& out, TevStageConfig::Operation operation,
236 const std::string& variable_name) { 240 const std::string& variable_name) {
237 out += "clamp("; 241 out += "clamp(";
238 using Operation = TevStageConfig::Operation; 242 using Operation = TevStageConfig::Operation;
239 switch (operation) { 243 switch (operation) {
@@ -250,7 +254,8 @@ static void AppendAlphaCombiner(std::string& out, TevStageConfig::Operation oper
250 out += variable_name + "[0] + " + variable_name + "[1] - 0.5"; 254 out += variable_name + "[0] + " + variable_name + "[1] - 0.5";
251 break; 255 break;
252 case Operation::Lerp: 256 case Operation::Lerp:
253 out += variable_name + "[0] * " + variable_name + "[2] + " + variable_name + "[1] * (1.0 - " + variable_name + "[2])"; 257 out += variable_name + "[0] * " + variable_name + "[2] + " + variable_name +
258 "[1] * (1.0 - " + variable_name + "[2])";
254 break; 259 break;
255 case Operation::Subtract: 260 case Operation::Subtract:
256 out += variable_name + "[0] - " + variable_name + "[1]"; 261 out += variable_name + "[0] - " + variable_name + "[1]";
@@ -259,7 +264,8 @@ static void AppendAlphaCombiner(std::string& out, TevStageConfig::Operation oper
259 out += variable_name + "[0] * " + variable_name + "[1] + " + variable_name + "[2]"; 264 out += variable_name + "[0] * " + variable_name + "[1] + " + variable_name + "[2]";
260 break; 265 break;
261 case Operation::AddThenMultiply: 266 case Operation::AddThenMultiply:
262 out += "min(" + variable_name + "[0] + " + variable_name + "[1], 1.0) * " + variable_name + "[2]"; 267 out += "min(" + variable_name + "[0] + " + variable_name + "[1], 1.0) * " + variable_name +
268 "[2]";
263 break; 269 break;
264 default: 270 default:
265 out += "0.0"; 271 out += "0.0";
@@ -284,9 +290,8 @@ static void AppendAlphaTestCondition(std::string& out, Regs::CompareFunc func) {
284 case CompareFunc::LessThan: 290 case CompareFunc::LessThan:
285 case CompareFunc::LessThanOrEqual: 291 case CompareFunc::LessThanOrEqual:
286 case CompareFunc::GreaterThan: 292 case CompareFunc::GreaterThan:
287 case CompareFunc::GreaterThanOrEqual: 293 case CompareFunc::GreaterThanOrEqual: {
288 { 294 static const char* op[] = {"!=", "==", ">=", ">", "<=", "<"};
289 static const char* op[] = { "!=", "==", ">=", ">", "<=", "<", };
290 unsigned index = (unsigned)func - (unsigned)CompareFunc::Equal; 295 unsigned index = (unsigned)func - (unsigned)CompareFunc::Equal;
291 out += "int(last_tex_env_out.a * 255.0f) " + std::string(op[index]) + " alphatest_ref"; 296 out += "int(last_tex_env_out.a * 255.0f) " + std::string(op[index]) + " alphatest_ref";
292 break; 297 break;
@@ -301,7 +306,8 @@ static void AppendAlphaTestCondition(std::string& out, Regs::CompareFunc func) {
301 306
302/// Writes the code to emulate the specified TEV stage 307/// Writes the code to emulate the specified TEV stage
303static void WriteTevStage(std::string& out, const PicaShaderConfig& config, unsigned index) { 308static void WriteTevStage(std::string& out, const PicaShaderConfig& config, unsigned index) {
304 const auto stage = static_cast<const Pica::Regs::TevStageConfig>(config.state.tev_stages[index]); 309 const auto stage =
310 static_cast<const Pica::Regs::TevStageConfig>(config.state.tev_stages[index]);
305 if (!IsPassThroughTevStage(stage)) { 311 if (!IsPassThroughTevStage(stage)) {
306 std::string index_name = std::to_string(index); 312 std::string index_name = std::to_string(index);
307 313
@@ -330,8 +336,12 @@ static void WriteTevStage(std::string& out, const PicaShaderConfig& config, unsi
330 out += ";\n"; 336 out += ";\n";
331 337
332 out += "last_tex_env_out = vec4(" 338 out += "last_tex_env_out = vec4("
333 "clamp(color_output_" + index_name + " * " + std::to_string(stage.GetColorMultiplier()) + ".0, vec3(0.0), vec3(1.0))," 339 "clamp(color_output_" +
334 "clamp(alpha_output_" + index_name + " * " + std::to_string(stage.GetAlphaMultiplier()) + ".0, 0.0, 1.0));\n"; 340 index_name + " * " + std::to_string(stage.GetColorMultiplier()) +
341 ".0, vec3(0.0), vec3(1.0)),"
342 "clamp(alpha_output_" +
343 index_name + " * " + std::to_string(stage.GetAlphaMultiplier()) +
344 ".0, 0.0, 1.0));\n";
335 } 345 }
336 346
337 out += "combiner_buffer = next_combiner_buffer;\n"; 347 out += "combiner_buffer = next_combiner_buffer;\n";
@@ -355,13 +365,17 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) {
355 365
356 // Compute fragment normals 366 // Compute fragment normals
357 if (lighting.bump_mode == Pica::Regs::LightingBumpMode::NormalMap) { 367 if (lighting.bump_mode == Pica::Regs::LightingBumpMode::NormalMap) {
358 // Bump mapping is enabled using a normal map, read perturbation vector from the selected texture 368 // Bump mapping is enabled using a normal map, read perturbation vector from the selected
369 // texture
359 std::string bump_selector = std::to_string(lighting.bump_selector); 370 std::string bump_selector = std::to_string(lighting.bump_selector);
360 out += "vec3 surface_normal = 2.0 * texture(tex[" + bump_selector + "], texcoord[" + bump_selector + "]).rgb - 1.0;\n"; 371 out += "vec3 surface_normal = 2.0 * texture(tex[" + bump_selector + "], texcoord[" +
372 bump_selector + "]).rgb - 1.0;\n";
361 373
362 // Recompute Z-component of perturbation if 'renorm' is enabled, this provides a higher precision result 374 // Recompute Z-component of perturbation if 'renorm' is enabled, this provides a higher
375 // precision result
363 if (lighting.bump_renorm) { 376 if (lighting.bump_renorm) {
364 std::string val = "(1.0 - (surface_normal.x*surface_normal.x + surface_normal.y*surface_normal.y))"; 377 std::string val =
378 "(1.0 - (surface_normal.x*surface_normal.x + surface_normal.y*surface_normal.y))";
365 out += "surface_normal.z = sqrt(max(" + val + ", 0.0));\n"; 379 out += "surface_normal.z = sqrt(max(" + val + ", 0.0));\n";
366 } 380 }
367 } else if (lighting.bump_mode == Pica::Regs::LightingBumpMode::TangentMap) { 381 } else if (lighting.bump_mode == Pica::Regs::LightingBumpMode::TangentMap) {
@@ -373,7 +387,8 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) {
373 out += "vec3 surface_normal = vec3(0.0, 0.0, 1.0);\n"; 387 out += "vec3 surface_normal = vec3(0.0, 0.0, 1.0);\n";
374 } 388 }
375 389
376 // Rotate the surface-local normal by the interpolated normal quaternion to convert it to eyespace 390 // Rotate the surface-local normal by the interpolated normal quaternion to convert it to
391 // eyespace
377 out += "vec3 normal = normalize(quaternion_rotate(normquat, surface_normal));\n"; 392 out += "vec3 normal = normalize(quaternion_rotate(normquat, surface_normal));\n";
378 393
379 // Gets the index into the specified lookup table for specular lighting 394 // Gets the index into the specified lookup table for specular lighting
@@ -406,12 +421,14 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) {
406 421
407 if (abs) { 422 if (abs) {
408 // LUT index is in the range of (0.0, 1.0) 423 // LUT index is in the range of (0.0, 1.0)
409 index = lighting.light[light_num].two_sided_diffuse ? "abs(" + index + ")" : "max(" + index + ", 0.f)"; 424 index = lighting.light[light_num].two_sided_diffuse ? "abs(" + index + ")"
425 : "max(" + index + ", 0.f)";
410 return "(FLOAT_255 * clamp(" + index + ", 0.0, 1.0))"; 426 return "(FLOAT_255 * clamp(" + index + ", 0.0, 1.0))";
411 } else { 427 } else {
412 // LUT index is in the range of (-1.0, 1.0) 428 // LUT index is in the range of (-1.0, 1.0)
413 index = "clamp(" + index + ", -1.0, 1.0)"; 429 index = "clamp(" + index + ", -1.0, 1.0)";
414 return "(FLOAT_255 * ((" + index + " < 0) ? " + index + " + 2.0 : " + index + ") / 2.0)"; 430 return "(FLOAT_255 * ((" + index + " < 0) ? " + index + " + 2.0 : " + index +
431 ") / 2.0)";
415 } 432 }
416 433
417 return std::string(); 434 return std::string();
@@ -434,52 +451,74 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) {
434 else 451 else
435 out += "light_vector = normalize(" + light_src + ".position + view);\n"; 452 out += "light_vector = normalize(" + light_src + ".position + view);\n";
436 453
437 // Compute dot product of light_vector and normal, adjust if lighting is one-sided or two-sided 454 // Compute dot product of light_vector and normal, adjust if lighting is one-sided or
438 std::string dot_product = light_config.two_sided_diffuse ? "abs(dot(light_vector, normal))" : "max(dot(light_vector, normal), 0.0)"; 455 // two-sided
456 std::string dot_product = light_config.two_sided_diffuse
457 ? "abs(dot(light_vector, normal))"
458 : "max(dot(light_vector, normal), 0.0)";
439 459
440 // If enabled, compute distance attenuation value 460 // If enabled, compute distance attenuation value
441 std::string dist_atten = "1.0"; 461 std::string dist_atten = "1.0";
442 if (light_config.dist_atten_enable) { 462 if (light_config.dist_atten_enable) {
443 std::string index = "(" + light_src + ".dist_atten_scale * length(-view - " + light_src + ".position) + " + light_src + ".dist_atten_bias)"; 463 std::string index = "(" + light_src + ".dist_atten_scale * length(-view - " +
464 light_src + ".position) + " + light_src + ".dist_atten_bias)";
444 index = "((clamp(" + index + ", 0.0, FLOAT_255)))"; 465 index = "((clamp(" + index + ", 0.0, FLOAT_255)))";
445 const unsigned lut_num = ((unsigned)Regs::LightingSampler::DistanceAttenuation + light_config.num); 466 const unsigned lut_num =
467 ((unsigned)Regs::LightingSampler::DistanceAttenuation + light_config.num);
446 dist_atten = GetLutValue((Regs::LightingSampler)lut_num, index); 468 dist_atten = GetLutValue((Regs::LightingSampler)lut_num, index);
447 } 469 }
448 470
449 // If enabled, clamp specular component if lighting result is negative 471 // If enabled, clamp specular component if lighting result is negative
450 std::string clamp_highlights = lighting.clamp_highlights ? "(dot(light_vector, normal) <= 0.0 ? 0.0 : 1.0)" : "1.0"; 472 std::string clamp_highlights =
473 lighting.clamp_highlights ? "(dot(light_vector, normal) <= 0.0 ? 0.0 : 1.0)" : "1.0";
451 474
452 // Specular 0 component 475 // Specular 0 component
453 std::string d0_lut_value = "1.0"; 476 std::string d0_lut_value = "1.0";
454 if (lighting.lut_d0.enable && Pica::Regs::IsLightingSamplerSupported(lighting.config, Pica::Regs::LightingSampler::Distribution0)) { 477 if (lighting.lut_d0.enable &&
478 Pica::Regs::IsLightingSamplerSupported(lighting.config,
479 Pica::Regs::LightingSampler::Distribution0)) {
455 // Lookup specular "distribution 0" LUT value 480 // Lookup specular "distribution 0" LUT value
456 std::string index = GetLutIndex(light_config.num, lighting.lut_d0.type, lighting.lut_d0.abs_input); 481 std::string index =
457 d0_lut_value = "(" + std::to_string(lighting.lut_d0.scale) + " * " + GetLutValue(Regs::LightingSampler::Distribution0, index) + ")"; 482 GetLutIndex(light_config.num, lighting.lut_d0.type, lighting.lut_d0.abs_input);
483 d0_lut_value = "(" + std::to_string(lighting.lut_d0.scale) + " * " +
484 GetLutValue(Regs::LightingSampler::Distribution0, index) + ")";
458 } 485 }
459 std::string specular_0 = "(" + d0_lut_value + " * " + light_src + ".specular_0)"; 486 std::string specular_0 = "(" + d0_lut_value + " * " + light_src + ".specular_0)";
460 487
461 // If enabled, lookup ReflectRed value, otherwise, 1.0 is used 488 // If enabled, lookup ReflectRed value, otherwise, 1.0 is used
462 if (lighting.lut_rr.enable && Pica::Regs::IsLightingSamplerSupported(lighting.config, Pica::Regs::LightingSampler::ReflectRed)) { 489 if (lighting.lut_rr.enable &&
463 std::string index = GetLutIndex(light_config.num, lighting.lut_rr.type, lighting.lut_rr.abs_input); 490 Pica::Regs::IsLightingSamplerSupported(lighting.config,
464 std::string value = "(" + std::to_string(lighting.lut_rr.scale) + " * " + GetLutValue(Regs::LightingSampler::ReflectRed, index) + ")"; 491 Pica::Regs::LightingSampler::ReflectRed)) {
492 std::string index =
493 GetLutIndex(light_config.num, lighting.lut_rr.type, lighting.lut_rr.abs_input);
494 std::string value = "(" + std::to_string(lighting.lut_rr.scale) + " * " +
495 GetLutValue(Regs::LightingSampler::ReflectRed, index) + ")";
465 out += "refl_value.r = " + value + ";\n"; 496 out += "refl_value.r = " + value + ";\n";
466 } else { 497 } else {
467 out += "refl_value.r = 1.0;\n"; 498 out += "refl_value.r = 1.0;\n";
468 } 499 }
469 500
470 // If enabled, lookup ReflectGreen value, otherwise, ReflectRed value is used 501 // If enabled, lookup ReflectGreen value, otherwise, ReflectRed value is used
471 if (lighting.lut_rg.enable && Pica::Regs::IsLightingSamplerSupported(lighting.config, Pica::Regs::LightingSampler::ReflectGreen)) { 502 if (lighting.lut_rg.enable &&
472 std::string index = GetLutIndex(light_config.num, lighting.lut_rg.type, lighting.lut_rg.abs_input); 503 Pica::Regs::IsLightingSamplerSupported(lighting.config,
473 std::string value = "(" + std::to_string(lighting.lut_rg.scale) + " * " + GetLutValue(Regs::LightingSampler::ReflectGreen, index) + ")"; 504 Pica::Regs::LightingSampler::ReflectGreen)) {
505 std::string index =
506 GetLutIndex(light_config.num, lighting.lut_rg.type, lighting.lut_rg.abs_input);
507 std::string value = "(" + std::to_string(lighting.lut_rg.scale) + " * " +
508 GetLutValue(Regs::LightingSampler::ReflectGreen, index) + ")";
474 out += "refl_value.g = " + value + ";\n"; 509 out += "refl_value.g = " + value + ";\n";
475 } else { 510 } else {
476 out += "refl_value.g = refl_value.r;\n"; 511 out += "refl_value.g = refl_value.r;\n";
477 } 512 }
478 513
479 // If enabled, lookup ReflectBlue value, otherwise, ReflectRed value is used 514 // If enabled, lookup ReflectBlue value, otherwise, ReflectRed value is used
480 if (lighting.lut_rb.enable && Pica::Regs::IsLightingSamplerSupported(lighting.config, Pica::Regs::LightingSampler::ReflectBlue)) { 515 if (lighting.lut_rb.enable &&
481 std::string index = GetLutIndex(light_config.num, lighting.lut_rb.type, lighting.lut_rb.abs_input); 516 Pica::Regs::IsLightingSamplerSupported(lighting.config,
482 std::string value = "(" + std::to_string(lighting.lut_rb.scale) + " * " + GetLutValue(Regs::LightingSampler::ReflectBlue, index) + ")"; 517 Pica::Regs::LightingSampler::ReflectBlue)) {
518 std::string index =
519 GetLutIndex(light_config.num, lighting.lut_rb.type, lighting.lut_rb.abs_input);
520 std::string value = "(" + std::to_string(lighting.lut_rb.scale) + " * " +
521 GetLutValue(Regs::LightingSampler::ReflectBlue, index) + ")";
483 out += "refl_value.b = " + value + ";\n"; 522 out += "refl_value.b = " + value + ";\n";
484 } else { 523 } else {
485 out += "refl_value.b = refl_value.r;\n"; 524 out += "refl_value.b = refl_value.r;\n";
@@ -487,18 +526,26 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) {
487 526
488 // Specular 1 component 527 // Specular 1 component
489 std::string d1_lut_value = "1.0"; 528 std::string d1_lut_value = "1.0";
490 if (lighting.lut_d1.enable && Pica::Regs::IsLightingSamplerSupported(lighting.config, Pica::Regs::LightingSampler::Distribution1)) { 529 if (lighting.lut_d1.enable &&
530 Pica::Regs::IsLightingSamplerSupported(lighting.config,
531 Pica::Regs::LightingSampler::Distribution1)) {
491 // Lookup specular "distribution 1" LUT value 532 // Lookup specular "distribution 1" LUT value
492 std::string index = GetLutIndex(light_config.num, lighting.lut_d1.type, lighting.lut_d1.abs_input); 533 std::string index =
493 d1_lut_value = "(" + std::to_string(lighting.lut_d1.scale) + " * " + GetLutValue(Regs::LightingSampler::Distribution1, index) + ")"; 534 GetLutIndex(light_config.num, lighting.lut_d1.type, lighting.lut_d1.abs_input);
535 d1_lut_value = "(" + std::to_string(lighting.lut_d1.scale) + " * " +
536 GetLutValue(Regs::LightingSampler::Distribution1, index) + ")";
494 } 537 }
495 std::string specular_1 = "(" + d1_lut_value + " * refl_value * " + light_src + ".specular_1)"; 538 std::string specular_1 =
539 "(" + d1_lut_value + " * refl_value * " + light_src + ".specular_1)";
496 540
497 // Fresnel 541 // Fresnel
498 if (lighting.lut_fr.enable && Pica::Regs::IsLightingSamplerSupported(lighting.config, Pica::Regs::LightingSampler::Fresnel)) { 542 if (lighting.lut_fr.enable && Pica::Regs::IsLightingSamplerSupported(
543 lighting.config, Pica::Regs::LightingSampler::Fresnel)) {
499 // Lookup fresnel LUT value 544 // Lookup fresnel LUT value
500 std::string index = GetLutIndex(light_config.num, lighting.lut_fr.type, lighting.lut_fr.abs_input); 545 std::string index =
501 std::string value = "(" + std::to_string(lighting.lut_fr.scale) + " * " + GetLutValue(Regs::LightingSampler::Fresnel, index) + ")"; 546 GetLutIndex(light_config.num, lighting.lut_fr.type, lighting.lut_fr.abs_input);
547 std::string value = "(" + std::to_string(lighting.lut_fr.scale) + " * " +
548 GetLutValue(Regs::LightingSampler::Fresnel, index) + ")";
502 549
503 // Enabled for difffuse lighting alpha component 550 // Enabled for difffuse lighting alpha component
504 if (lighting.fresnel_selector == Pica::Regs::LightingFresnelSelector::PrimaryAlpha || 551 if (lighting.fresnel_selector == Pica::Regs::LightingFresnelSelector::PrimaryAlpha ||
@@ -512,10 +559,12 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) {
512 } 559 }
513 560
514 // Compute primary fragment color (diffuse lighting) function 561 // Compute primary fragment color (diffuse lighting) function
515 out += "diffuse_sum.rgb += ((" + light_src + ".diffuse * " + dot_product + ") + " + light_src + ".ambient) * " + dist_atten + ";\n"; 562 out += "diffuse_sum.rgb += ((" + light_src + ".diffuse * " + dot_product + ") + " +
563 light_src + ".ambient) * " + dist_atten + ";\n";
516 564
517 // Compute secondary fragment color (specular lighting) function 565 // Compute secondary fragment color (specular lighting) function
518 out += "specular_sum.rgb += (" + specular_0 + " + " + specular_1 + ") * " + clamp_highlights + " * " + dist_atten + ";\n"; 566 out += "specular_sum.rgb += (" + specular_0 + " + " + specular_1 + ") * " +
567 clamp_highlights + " * " + dist_atten + ";\n";
519 } 568 }
520 569
521 // Sum final lighting result 570 // Sum final lighting result
@@ -598,9 +647,9 @@ vec4 secondary_fragment_color = vec4(0.0);
598 out += "!"; 647 out += "!";
599 // x2,y2 have +1 added to cover the entire pixel area 648 // x2,y2 have +1 added to cover the entire pixel area
600 out += "(gl_FragCoord.x >= scissor_x1 * framebuffer_scale.x && " 649 out += "(gl_FragCoord.x >= scissor_x1 * framebuffer_scale.x && "
601 "gl_FragCoord.y >= scissor_y1 * framebuffer_scale.y && " 650 "gl_FragCoord.y >= scissor_y1 * framebuffer_scale.y && "
602 "gl_FragCoord.x < (scissor_x2 + 1) * framebuffer_scale.x && " 651 "gl_FragCoord.x < (scissor_x2 + 1) * framebuffer_scale.x && "
603 "gl_FragCoord.y < (scissor_y2 + 1) * framebuffer_scale.y)) discard;\n"; 652 "gl_FragCoord.y < (scissor_y2 + 1) * framebuffer_scale.y)) discard;\n";
604 } 653 }
605 654
606 out += "float z_over_w = 1.0 - gl_FragCoord.z * 2.0;\n"; 655 out += "float z_over_w = 1.0 - gl_FragCoord.z * 2.0;\n";
@@ -638,9 +687,11 @@ vec4 secondary_fragment_color = vec4(0.0);
638 out += "float fog_i = clamp(floor(fog_index), 0.0, 127.0);\n"; 687 out += "float fog_i = clamp(floor(fog_index), 0.0, 127.0);\n";
639 out += "float fog_f = fog_index - fog_i;\n"; 688 out += "float fog_f = fog_index - fog_i;\n";
640 out += "uint fog_lut_entry = texelFetch(fog_lut, int(fog_i), 0).r;\n"; 689 out += "uint fog_lut_entry = texelFetch(fog_lut, int(fog_i), 0).r;\n";
641 out += "float fog_lut_entry_difference = float(int((fog_lut_entry & 0x1FFFU) << 19U) >> 19);\n"; // Extract signed difference 690 out += "float fog_lut_entry_difference = float(int((fog_lut_entry & 0x1FFFU) << 19U) >> "
691 "19);\n"; // Extract signed difference
642 out += "float fog_lut_entry_value = float((fog_lut_entry >> 13U) & 0x7FFU);\n"; 692 out += "float fog_lut_entry_value = float((fog_lut_entry >> 13U) & 0x7FFU);\n";
643 out += "float fog_factor = (fog_lut_entry_value + fog_lut_entry_difference * fog_f) / 2047.0;\n"; 693 out += "float fog_factor = (fog_lut_entry_value + fog_lut_entry_difference * fog_f) / "
694 "2047.0;\n";
644 out += "fog_factor = clamp(fog_factor, 0.0, 1.0);\n"; 695 out += "fog_factor = clamp(fog_factor, 0.0, 1.0);\n";
645 696
646 // Blend the fog 697 // Blend the fog
@@ -658,14 +709,20 @@ vec4 secondary_fragment_color = vec4(0.0);
658std::string GenerateVertexShader() { 709std::string GenerateVertexShader() {
659 std::string out = "#version 330 core\n"; 710 std::string out = "#version 330 core\n";
660 711
661 out += "layout(location = " + std::to_string((int)ATTRIBUTE_POSITION) + ") in vec4 vert_position;\n"; 712 out += "layout(location = " + std::to_string((int)ATTRIBUTE_POSITION) +
662 out += "layout(location = " + std::to_string((int)ATTRIBUTE_COLOR) + ") in vec4 vert_color;\n"; 713 ") in vec4 vert_position;\n";
663 out += "layout(location = " + std::to_string((int)ATTRIBUTE_TEXCOORD0) + ") in vec2 vert_texcoord0;\n"; 714 out += "layout(location = " + std::to_string((int)ATTRIBUTE_COLOR) + ") in vec4 vert_color;\n";
664 out += "layout(location = " + std::to_string((int)ATTRIBUTE_TEXCOORD1) + ") in vec2 vert_texcoord1;\n"; 715 out += "layout(location = " + std::to_string((int)ATTRIBUTE_TEXCOORD0) +
665 out += "layout(location = " + std::to_string((int)ATTRIBUTE_TEXCOORD2) + ") in vec2 vert_texcoord2;\n"; 716 ") in vec2 vert_texcoord0;\n";
666 out += "layout(location = " + std::to_string((int)ATTRIBUTE_TEXCOORD0_W) + ") in float vert_texcoord0_w;\n"; 717 out += "layout(location = " + std::to_string((int)ATTRIBUTE_TEXCOORD1) +
667 out += "layout(location = " + std::to_string((int)ATTRIBUTE_NORMQUAT) + ") in vec4 vert_normquat;\n"; 718 ") in vec2 vert_texcoord1;\n";
668 out += "layout(location = " + std::to_string((int)ATTRIBUTE_VIEW) + ") in vec3 vert_view;\n"; 719 out += "layout(location = " + std::to_string((int)ATTRIBUTE_TEXCOORD2) +
720 ") in vec2 vert_texcoord2;\n";
721 out += "layout(location = " + std::to_string((int)ATTRIBUTE_TEXCOORD0_W) +
722 ") in float vert_texcoord0_w;\n";
723 out += "layout(location = " + std::to_string((int)ATTRIBUTE_NORMQUAT) +
724 ") in vec4 vert_normquat;\n";
725 out += "layout(location = " + std::to_string((int)ATTRIBUTE_VIEW) + ") in vec3 vert_view;\n";
669 726
670 out += R"( 727 out += R"(
671out vec4 primary_color; 728out vec4 primary_color;
diff --git a/src/video_core/renderer_opengl/gl_shader_util.cpp b/src/video_core/renderer_opengl/gl_shader_util.cpp
index dded3db46..fe07aa6eb 100644
--- a/src/video_core/renderer_opengl/gl_shader_util.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_util.cpp
@@ -3,9 +3,7 @@
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <vector> 5#include <vector>
6
7#include <glad/glad.h> 6#include <glad/glad.h>
8
9#include "common/logging/log.h" 7#include "common/logging/log.h"
10#include "video_core/renderer_opengl/gl_shader_util.h" 8#include "video_core/renderer_opengl/gl_shader_util.h"
11 9
@@ -56,7 +54,8 @@ GLuint LoadProgram(const char* vertex_shader, const char* fragment_shader) {
56 if (result) { 54 if (result) {
57 LOG_DEBUG(Render_OpenGL, "%s", &fragment_shader_error[0]); 55 LOG_DEBUG(Render_OpenGL, "%s", &fragment_shader_error[0]);
58 } else { 56 } else {
59 LOG_ERROR(Render_OpenGL, "Error compiling fragment shader:\n%s", &fragment_shader_error[0]); 57 LOG_ERROR(Render_OpenGL, "Error compiling fragment shader:\n%s",
58 &fragment_shader_error[0]);
60 } 59 }
61 } 60 }
62 61
diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp
index 13ee986b9..ed84cadea 100644
--- a/src/video_core/renderer_opengl/gl_state.cpp
+++ b/src/video_core/renderer_opengl/gl_state.cpp
@@ -3,10 +3,8 @@
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <glad/glad.h> 5#include <glad/glad.h>
6
7#include "common/common_funcs.h" 6#include "common/common_funcs.h"
8#include "common/logging/log.h" 7#include "common/logging/log.h"
9
10#include "video_core/renderer_opengl/gl_state.h" 8#include "video_core/renderer_opengl/gl_state.h"
11 9
12OpenGLState OpenGLState::cur_state; 10OpenGLState OpenGLState::cur_state;
@@ -106,11 +104,11 @@ void OpenGLState::Apply() const {
106 104
107 // Color mask 105 // Color mask
108 if (color_mask.red_enabled != cur_state.color_mask.red_enabled || 106 if (color_mask.red_enabled != cur_state.color_mask.red_enabled ||
109 color_mask.green_enabled != cur_state.color_mask.green_enabled || 107 color_mask.green_enabled != cur_state.color_mask.green_enabled ||
110 color_mask.blue_enabled != cur_state.color_mask.blue_enabled || 108 color_mask.blue_enabled != cur_state.color_mask.blue_enabled ||
111 color_mask.alpha_enabled != cur_state.color_mask.alpha_enabled) { 109 color_mask.alpha_enabled != cur_state.color_mask.alpha_enabled) {
112 glColorMask(color_mask.red_enabled, color_mask.green_enabled, 110 glColorMask(color_mask.red_enabled, color_mask.green_enabled, color_mask.blue_enabled,
113 color_mask.blue_enabled, color_mask.alpha_enabled); 111 color_mask.alpha_enabled);
114 } 112 }
115 113
116 // Stencil test 114 // Stencil test
@@ -123,15 +121,16 @@ void OpenGLState::Apply() const {
123 } 121 }
124 122
125 if (stencil.test_func != cur_state.stencil.test_func || 123 if (stencil.test_func != cur_state.stencil.test_func ||
126 stencil.test_ref != cur_state.stencil.test_ref || 124 stencil.test_ref != cur_state.stencil.test_ref ||
127 stencil.test_mask != cur_state.stencil.test_mask) { 125 stencil.test_mask != cur_state.stencil.test_mask) {
128 glStencilFunc(stencil.test_func, stencil.test_ref, stencil.test_mask); 126 glStencilFunc(stencil.test_func, stencil.test_ref, stencil.test_mask);
129 } 127 }
130 128
131 if (stencil.action_depth_fail != cur_state.stencil.action_depth_fail || 129 if (stencil.action_depth_fail != cur_state.stencil.action_depth_fail ||
132 stencil.action_depth_pass != cur_state.stencil.action_depth_pass || 130 stencil.action_depth_pass != cur_state.stencil.action_depth_pass ||
133 stencil.action_stencil_fail != cur_state.stencil.action_stencil_fail) { 131 stencil.action_stencil_fail != cur_state.stencil.action_stencil_fail) {
134 glStencilOp(stencil.action_stencil_fail, stencil.action_depth_fail, stencil.action_depth_pass); 132 glStencilOp(stencil.action_stencil_fail, stencil.action_depth_fail,
133 stencil.action_depth_pass);
135 } 134 }
136 135
137 // Stencil mask 136 // Stencil mask
@@ -154,23 +153,22 @@ void OpenGLState::Apply() const {
154 } 153 }
155 154
156 if (blend.color.red != cur_state.blend.color.red || 155 if (blend.color.red != cur_state.blend.color.red ||
157 blend.color.green != cur_state.blend.color.green || 156 blend.color.green != cur_state.blend.color.green ||
158 blend.color.blue != cur_state.blend.color.blue || 157 blend.color.blue != cur_state.blend.color.blue ||
159 blend.color.alpha != cur_state.blend.color.alpha) { 158 blend.color.alpha != cur_state.blend.color.alpha) {
160 glBlendColor(blend.color.red, blend.color.green, 159 glBlendColor(blend.color.red, blend.color.green, blend.color.blue, blend.color.alpha);
161 blend.color.blue, blend.color.alpha);
162 } 160 }
163 161
164 if (blend.src_rgb_func != cur_state.blend.src_rgb_func || 162 if (blend.src_rgb_func != cur_state.blend.src_rgb_func ||
165 blend.dst_rgb_func != cur_state.blend.dst_rgb_func || 163 blend.dst_rgb_func != cur_state.blend.dst_rgb_func ||
166 blend.src_a_func != cur_state.blend.src_a_func || 164 blend.src_a_func != cur_state.blend.src_a_func ||
167 blend.dst_a_func != cur_state.blend.dst_a_func) { 165 blend.dst_a_func != cur_state.blend.dst_a_func) {
168 glBlendFuncSeparate(blend.src_rgb_func, blend.dst_rgb_func, 166 glBlendFuncSeparate(blend.src_rgb_func, blend.dst_rgb_func, blend.src_a_func,
169 blend.src_a_func, blend.dst_a_func); 167 blend.dst_a_func);
170 } 168 }
171 169
172 if (blend.rgb_equation != cur_state.blend.rgb_equation || 170 if (blend.rgb_equation != cur_state.blend.rgb_equation ||
173 blend.a_equation != cur_state.blend.a_equation) { 171 blend.a_equation != cur_state.blend.a_equation) {
174 glBlendEquationSeparate(blend.rgb_equation, blend.a_equation); 172 glBlendEquationSeparate(blend.rgb_equation, blend.a_equation);
175 } 173 }
176 174
@@ -237,8 +235,11 @@ void OpenGLState::Apply() const {
237GLenum OpenGLState::CheckFBStatus(GLenum target) { 235GLenum OpenGLState::CheckFBStatus(GLenum target) {
238 GLenum fb_status = glCheckFramebufferStatus(target); 236 GLenum fb_status = glCheckFramebufferStatus(target);
239 if (fb_status != GL_FRAMEBUFFER_COMPLETE) { 237 if (fb_status != GL_FRAMEBUFFER_COMPLETE) {
240 const char* fb_description = (target == GL_READ_FRAMEBUFFER ? "READ" : (target == GL_DRAW_FRAMEBUFFER ? "DRAW" : "UNK")); 238 const char* fb_description =
241 LOG_CRITICAL(Render_OpenGL, "OpenGL %s framebuffer check failed, status %X", fb_description, fb_status); 239 (target == GL_READ_FRAMEBUFFER ? "READ"
240 : (target == GL_DRAW_FRAMEBUFFER ? "DRAW" : "UNK"));
241 LOG_CRITICAL(Render_OpenGL, "OpenGL %s framebuffer check failed, status %X", fb_description,
242 fb_status);
242 } 243 }
243 244
244 return fb_status; 245 return fb_status;
diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h
index 13c71b0a6..01dead883 100644
--- a/src/video_core/renderer_opengl/gl_state.h
+++ b/src/video_core/renderer_opengl/gl_state.h
@@ -9,14 +9,14 @@
9class OpenGLState { 9class OpenGLState {
10public: 10public:
11 struct { 11 struct {
12 bool enabled; // GL_CULL_FACE 12 bool enabled; // GL_CULL_FACE
13 GLenum mode; // GL_CULL_FACE_MODE 13 GLenum mode; // GL_CULL_FACE_MODE
14 GLenum front_face; // GL_FRONT_FACE 14 GLenum front_face; // GL_FRONT_FACE
15 } cull; 15 } cull;
16 16
17 struct { 17 struct {
18 bool test_enabled; // GL_DEPTH_TEST 18 bool test_enabled; // GL_DEPTH_TEST
19 GLenum test_func; // GL_DEPTH_FUNC 19 GLenum test_func; // GL_DEPTH_FUNC
20 GLboolean write_mask; // GL_DEPTH_WRITEMASK 20 GLboolean write_mask; // GL_DEPTH_WRITEMASK
21 } depth; 21 } depth;
22 22
@@ -28,24 +28,24 @@ public:
28 } color_mask; // GL_COLOR_WRITEMASK 28 } color_mask; // GL_COLOR_WRITEMASK
29 29
30 struct { 30 struct {
31 bool test_enabled; // GL_STENCIL_TEST 31 bool test_enabled; // GL_STENCIL_TEST
32 GLenum test_func; // GL_STENCIL_FUNC 32 GLenum test_func; // GL_STENCIL_FUNC
33 GLint test_ref; // GL_STENCIL_REF 33 GLint test_ref; // GL_STENCIL_REF
34 GLuint test_mask; // GL_STENCIL_VALUE_MASK 34 GLuint test_mask; // GL_STENCIL_VALUE_MASK
35 GLuint write_mask; // GL_STENCIL_WRITEMASK 35 GLuint write_mask; // GL_STENCIL_WRITEMASK
36 GLenum action_stencil_fail; // GL_STENCIL_FAIL 36 GLenum action_stencil_fail; // GL_STENCIL_FAIL
37 GLenum action_depth_fail; // GL_STENCIL_PASS_DEPTH_FAIL 37 GLenum action_depth_fail; // GL_STENCIL_PASS_DEPTH_FAIL
38 GLenum action_depth_pass; // GL_STENCIL_PASS_DEPTH_PASS 38 GLenum action_depth_pass; // GL_STENCIL_PASS_DEPTH_PASS
39 } stencil; 39 } stencil;
40 40
41 struct { 41 struct {
42 bool enabled; // GL_BLEND 42 bool enabled; // GL_BLEND
43 GLenum rgb_equation; // GL_BLEND_EQUATION_RGB 43 GLenum rgb_equation; // GL_BLEND_EQUATION_RGB
44 GLenum a_equation; // GL_BLEND_EQUATION_ALPHA 44 GLenum a_equation; // GL_BLEND_EQUATION_ALPHA
45 GLenum src_rgb_func; // GL_BLEND_SRC_RGB 45 GLenum src_rgb_func; // GL_BLEND_SRC_RGB
46 GLenum dst_rgb_func; // GL_BLEND_DST_RGB 46 GLenum dst_rgb_func; // GL_BLEND_DST_RGB
47 GLenum src_a_func; // GL_BLEND_SRC_ALPHA 47 GLenum src_a_func; // GL_BLEND_SRC_ALPHA
48 GLenum dst_a_func; // GL_BLEND_DST_ALPHA 48 GLenum dst_a_func; // GL_BLEND_DST_ALPHA
49 49
50 struct { 50 struct {
51 GLclampf red; 51 GLclampf red;
@@ -60,7 +60,7 @@ public:
60 // 3 texture units - one for each that is used in PICA fragment shader emulation 60 // 3 texture units - one for each that is used in PICA fragment shader emulation
61 struct { 61 struct {
62 GLuint texture_2d; // GL_TEXTURE_BINDING_2D 62 GLuint texture_2d; // GL_TEXTURE_BINDING_2D
63 GLuint sampler; // GL_SAMPLER_BINDING 63 GLuint sampler; // GL_SAMPLER_BINDING
64 } texture_units[3]; 64 } texture_units[3];
65 65
66 struct { 66 struct {
@@ -74,10 +74,10 @@ public:
74 struct { 74 struct {
75 GLuint read_framebuffer; // GL_READ_FRAMEBUFFER_BINDING 75 GLuint read_framebuffer; // GL_READ_FRAMEBUFFER_BINDING
76 GLuint draw_framebuffer; // GL_DRAW_FRAMEBUFFER_BINDING 76 GLuint draw_framebuffer; // GL_DRAW_FRAMEBUFFER_BINDING
77 GLuint vertex_array; // GL_VERTEX_ARRAY_BINDING 77 GLuint vertex_array; // GL_VERTEX_ARRAY_BINDING
78 GLuint vertex_buffer; // GL_ARRAY_BUFFER_BINDING 78 GLuint vertex_buffer; // GL_ARRAY_BUFFER_BINDING
79 GLuint uniform_buffer; // GL_UNIFORM_BUFFER_BINDING 79 GLuint uniform_buffer; // GL_UNIFORM_BUFFER_BINDING
80 GLuint shader_program; // GL_CURRENT_PROGRAM 80 GLuint shader_program; // GL_CURRENT_PROGRAM
81 } draw; 81 } draw;
82 82
83 OpenGLState(); 83 OpenGLState();
diff --git a/src/video_core/renderer_opengl/pica_to_gl.h b/src/video_core/renderer_opengl/pica_to_gl.h
index d9b9c9cc2..cc49867c8 100644
--- a/src/video_core/renderer_opengl/pica_to_gl.h
+++ b/src/video_core/renderer_opengl/pica_to_gl.h
@@ -6,15 +6,12 @@
6 6
7#include <array> 7#include <array>
8#include <cstddef> 8#include <cstddef>
9
10#include <glad/glad.h> 9#include <glad/glad.h>
11
12#include "common/assert.h" 10#include "common/assert.h"
13#include "common/bit_field.h" 11#include "common/bit_field.h"
14#include "common/common_funcs.h" 12#include "common/common_funcs.h"
15#include "common/common_types.h" 13#include "common/common_types.h"
16#include "common/logging/log.h" 14#include "common/logging/log.h"
17
18#include "video_core/pica.h" 15#include "video_core/pica.h"
19 16
20using GLvec2 = std::array<GLfloat, 2>; 17using GLvec2 = std::array<GLfloat, 2>;
@@ -25,8 +22,8 @@ namespace PicaToGL {
25 22
26inline GLenum TextureFilterMode(Pica::Regs::TextureConfig::TextureFilter mode) { 23inline GLenum TextureFilterMode(Pica::Regs::TextureConfig::TextureFilter mode) {
27 static const GLenum filter_mode_table[] = { 24 static const GLenum filter_mode_table[] = {
28 GL_NEAREST, // TextureFilter::Nearest 25 GL_NEAREST, // TextureFilter::Nearest
29 GL_LINEAR // TextureFilter::Linear 26 GL_LINEAR, // TextureFilter::Linear
30 }; 27 };
31 28
32 // Range check table for input 29 // Range check table for input
@@ -52,10 +49,10 @@ inline GLenum TextureFilterMode(Pica::Regs::TextureConfig::TextureFilter mode) {
52 49
53inline GLenum WrapMode(Pica::Regs::TextureConfig::WrapMode mode) { 50inline GLenum WrapMode(Pica::Regs::TextureConfig::WrapMode mode) {
54 static const GLenum wrap_mode_table[] = { 51 static const GLenum wrap_mode_table[] = {
55 GL_CLAMP_TO_EDGE, // WrapMode::ClampToEdge 52 GL_CLAMP_TO_EDGE, // WrapMode::ClampToEdge
56 GL_CLAMP_TO_BORDER,// WrapMode::ClampToBorder 53 GL_CLAMP_TO_BORDER, // WrapMode::ClampToBorder
57 GL_REPEAT, // WrapMode::Repeat 54 GL_REPEAT, // WrapMode::Repeat
58 GL_MIRRORED_REPEAT // WrapMode::MirroredRepeat 55 GL_MIRRORED_REPEAT, // WrapMode::MirroredRepeat
59 }; 56 };
60 57
61 // Range check table for input 58 // Range check table for input
@@ -131,22 +128,22 @@ inline GLenum BlendFunc(Pica::Regs::BlendFactor factor) {
131 128
132inline GLenum LogicOp(Pica::Regs::LogicOp op) { 129inline GLenum LogicOp(Pica::Regs::LogicOp op) {
133 static const GLenum logic_op_table[] = { 130 static const GLenum logic_op_table[] = {
134 GL_CLEAR, // Clear 131 GL_CLEAR, // Clear
135 GL_AND, // And 132 GL_AND, // And
136 GL_AND_REVERSE, // AndReverse 133 GL_AND_REVERSE, // AndReverse
137 GL_COPY, // Copy 134 GL_COPY, // Copy
138 GL_SET, // Set 135 GL_SET, // Set
139 GL_COPY_INVERTED, // CopyInverted 136 GL_COPY_INVERTED, // CopyInverted
140 GL_NOOP, // NoOp 137 GL_NOOP, // NoOp
141 GL_INVERT, // Invert 138 GL_INVERT, // Invert
142 GL_NAND, // Nand 139 GL_NAND, // Nand
143 GL_OR, // Or 140 GL_OR, // Or
144 GL_NOR, // Nor 141 GL_NOR, // Nor
145 GL_XOR, // Xor 142 GL_XOR, // Xor
146 GL_EQUIV, // Equiv 143 GL_EQUIV, // Equiv
147 GL_AND_INVERTED, // AndInverted 144 GL_AND_INVERTED, // AndInverted
148 GL_OR_REVERSE, // OrReverse 145 GL_OR_REVERSE, // OrReverse
149 GL_OR_INVERTED, // OrInverted 146 GL_OR_INVERTED, // OrInverted
150 }; 147 };
151 148
152 // Range check table for input 149 // Range check table for input
@@ -185,14 +182,14 @@ inline GLenum CompareFunc(Pica::Regs::CompareFunc func) {
185 182
186inline GLenum StencilOp(Pica::Regs::StencilAction action) { 183inline GLenum StencilOp(Pica::Regs::StencilAction action) {
187 static const GLenum stencil_op_table[] = { 184 static const GLenum stencil_op_table[] = {
188 GL_KEEP, // StencilAction::Keep 185 GL_KEEP, // StencilAction::Keep
189 GL_ZERO, // StencilAction::Zero 186 GL_ZERO, // StencilAction::Zero
190 GL_REPLACE, // StencilAction::Replace 187 GL_REPLACE, // StencilAction::Replace
191 GL_INCR, // StencilAction::Increment 188 GL_INCR, // StencilAction::Increment
192 GL_DECR, // StencilAction::Decrement 189 GL_DECR, // StencilAction::Decrement
193 GL_INVERT, // StencilAction::Invert 190 GL_INVERT, // StencilAction::Invert
194 GL_INCR_WRAP, // StencilAction::IncrementWrap 191 GL_INCR_WRAP, // StencilAction::IncrementWrap
195 GL_DECR_WRAP // StencilAction::DecrementWrap 192 GL_DECR_WRAP, // StencilAction::DecrementWrap
196 }; 193 };
197 194
198 // Range check table for input 195 // Range check table for input
@@ -207,18 +204,16 @@ inline GLenum StencilOp(Pica::Regs::StencilAction action) {
207} 204}
208 205
209inline GLvec4 ColorRGBA8(const u32 color) { 206inline GLvec4 ColorRGBA8(const u32 color) {
210 return { { (color >> 0 & 0xFF) / 255.0f, 207 return {{
211 (color >> 8 & 0xFF) / 255.0f, 208 (color >> 0 & 0xFF) / 255.0f, (color >> 8 & 0xFF) / 255.0f, (color >> 16 & 0xFF) / 255.0f,
212 (color >> 16 & 0xFF) / 255.0f, 209 (color >> 24 & 0xFF) / 255.0f,
213 (color >> 24 & 0xFF) / 255.0f 210 }};
214 } };
215} 211}
216 212
217inline std::array<GLfloat, 3> LightColor(const Pica::Regs::LightColor& color) { 213inline std::array<GLfloat, 3> LightColor(const Pica::Regs::LightColor& color) {
218 return { { color.r / 255.0f, 214 return {{
219 color.g / 255.0f, 215 color.r / 255.0f, color.g / 255.0f, color.b / 255.0f,
220 color.b / 255.0f 216 }};
221 } };
222} 217}
223 218
224} // namespace 219} // namespace
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index 8410e0a64..03a588364 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -6,23 +6,19 @@
6#include <cstddef> 6#include <cstddef>
7#include <cstdlib> 7#include <cstdlib>
8#include <memory> 8#include <memory>
9
10#include <glad/glad.h> 9#include <glad/glad.h>
11
12#include "common/assert.h" 10#include "common/assert.h"
13#include "common/bit_field.h" 11#include "common/bit_field.h"
14#include "common/emu_window.h" 12#include "common/emu_window.h"
15#include "common/logging/log.h" 13#include "common/logging/log.h"
16#include "common/profiler_reporting.h" 14#include "common/profiler_reporting.h"
17#include "common/synchronized_wrapper.h" 15#include "common/synchronized_wrapper.h"
18
19#include "core/hw/gpu.h" 16#include "core/hw/gpu.h"
20#include "core/hw/hw.h" 17#include "core/hw/hw.h"
21#include "core/hw/lcd.h" 18#include "core/hw/lcd.h"
22#include "core/memory.h" 19#include "core/memory.h"
23#include "core/settings.h" 20#include "core/settings.h"
24#include "core/tracer/recorder.h" 21#include "core/tracer/recorder.h"
25
26#include "video_core/debug_utils/debug_utils.h" 22#include "video_core/debug_utils/debug_utils.h"
27#include "video_core/rasterizer_interface.h" 23#include "video_core/rasterizer_interface.h"
28#include "video_core/renderer_opengl/renderer_opengl.h" 24#include "video_core/renderer_opengl/renderer_opengl.h"
@@ -87,24 +83,25 @@ struct ScreenRectVertex {
87 * by a 3x2 matrix. 83 * by a 3x2 matrix.
88 */ 84 */
89static std::array<GLfloat, 3 * 2> MakeOrthographicMatrix(const float width, const float height) { 85static std::array<GLfloat, 3 * 2> MakeOrthographicMatrix(const float width, const float height) {
90 std::array<GLfloat, 3 * 2> matrix; 86 std::array<GLfloat, 3 * 2> matrix; // Laid out in column-major order
91 87
88 // clang-format off
92 matrix[0] = 2.f / width; matrix[2] = 0.f; matrix[4] = -1.f; 89 matrix[0] = 2.f / width; matrix[2] = 0.f; matrix[4] = -1.f;
93 matrix[1] = 0.f; matrix[3] = -2.f / height; matrix[5] = 1.f; 90 matrix[1] = 0.f; matrix[3] = -2.f / height; matrix[5] = 1.f;
94 // Last matrix row is implicitly assumed to be [0, 0, 1]. 91 // Last matrix row is implicitly assumed to be [0, 0, 1].
92 // clang-format on
95 93
96 return matrix; 94 return matrix;
97} 95}
98 96
99/// RendererOpenGL constructor 97/// RendererOpenGL constructor
100RendererOpenGL::RendererOpenGL() { 98RendererOpenGL::RendererOpenGL() {
101 resolution_width = std::max(VideoCore::kScreenTopWidth, VideoCore::kScreenBottomWidth); 99 resolution_width = std::max(VideoCore::kScreenTopWidth, VideoCore::kScreenBottomWidth);
102 resolution_height = VideoCore::kScreenTopHeight + VideoCore::kScreenBottomHeight; 100 resolution_height = VideoCore::kScreenTopHeight + VideoCore::kScreenBottomHeight;
103} 101}
104 102
105/// RendererOpenGL destructor 103/// RendererOpenGL destructor
106RendererOpenGL::~RendererOpenGL() { 104RendererOpenGL::~RendererOpenGL() {}
107}
108 105
109/// Swap buffers (render frame) 106/// Swap buffers (render frame)
110void RendererOpenGL::SwapBuffers() { 107void RendererOpenGL::SwapBuffers() {
@@ -116,13 +113,15 @@ void RendererOpenGL::SwapBuffers() {
116 const auto& framebuffer = GPU::g_regs.framebuffer_config[i]; 113 const auto& framebuffer = GPU::g_regs.framebuffer_config[i];
117 114
118 // Main LCD (0): 0x1ED02204, Sub LCD (1): 0x1ED02A04 115 // Main LCD (0): 0x1ED02204, Sub LCD (1): 0x1ED02A04
119 u32 lcd_color_addr = (i == 0) ? LCD_REG_INDEX(color_fill_top) : LCD_REG_INDEX(color_fill_bottom); 116 u32 lcd_color_addr =
117 (i == 0) ? LCD_REG_INDEX(color_fill_top) : LCD_REG_INDEX(color_fill_bottom);
120 lcd_color_addr = HW::VADDR_LCD + 4 * lcd_color_addr; 118 lcd_color_addr = HW::VADDR_LCD + 4 * lcd_color_addr;
121 LCD::Regs::ColorFill color_fill = {0}; 119 LCD::Regs::ColorFill color_fill = {0};
122 LCD::Read(color_fill.raw, lcd_color_addr); 120 LCD::Read(color_fill.raw, lcd_color_addr);
123 121
124 if (color_fill.is_enabled) { 122 if (color_fill.is_enabled) {
125 LoadColorToActiveGLTexture(color_fill.color_r, color_fill.color_g, color_fill.color_b, screen_infos[i].texture); 123 LoadColorToActiveGLTexture(color_fill.color_r, color_fill.color_g, color_fill.color_b,
124 screen_infos[i].texture);
126 125
127 // Resize the texture in case the framebuffer size has changed 126 // Resize the texture in case the framebuffer size has changed
128 screen_infos[i].texture.width = 1; 127 screen_infos[i].texture.width = 1;
@@ -172,15 +171,14 @@ void RendererOpenGL::SwapBuffers() {
172 * Loads framebuffer from emulated memory into the active OpenGL texture. 171 * Loads framebuffer from emulated memory into the active OpenGL texture.
173 */ 172 */
174void RendererOpenGL::LoadFBToScreenInfo(const GPU::Regs::FramebufferConfig& framebuffer, 173void RendererOpenGL::LoadFBToScreenInfo(const GPU::Regs::FramebufferConfig& framebuffer,
175 ScreenInfo& screen_info) { 174 ScreenInfo& screen_info) {
176 175
177 const PAddr framebuffer_addr = framebuffer.active_fb == 0 ? 176 const PAddr framebuffer_addr =
178 framebuffer.address_left1 : framebuffer.address_left2; 177 framebuffer.active_fb == 0 ? framebuffer.address_left1 : framebuffer.address_left2;
179 178
180 LOG_TRACE(Render_OpenGL, "0x%08x bytes from 0x%08x(%dx%d), fmt %x", 179 LOG_TRACE(Render_OpenGL, "0x%08x bytes from 0x%08x(%dx%d), fmt %x",
181 framebuffer.stride * framebuffer.height, 180 framebuffer.stride * framebuffer.height, framebuffer_addr, (int)framebuffer.width,
182 framebuffer_addr, (int)framebuffer.width, 181 (int)framebuffer.height, (int)framebuffer.format);
183 (int)framebuffer.height, (int)framebuffer.format);
184 182
185 int bpp = GPU::Regs::BytesPerPixel(framebuffer.color_format); 183 int bpp = GPU::Regs::BytesPerPixel(framebuffer.color_format);
186 size_t pixel_stride = framebuffer.stride / bpp; 184 size_t pixel_stride = framebuffer.stride / bpp;
@@ -192,7 +190,8 @@ void RendererOpenGL::LoadFBToScreenInfo(const GPU::Regs::FramebufferConfig& fram
192 // only allows rows to have a memory alignement of 4. 190 // only allows rows to have a memory alignement of 4.
193 ASSERT(pixel_stride % 4 == 0); 191 ASSERT(pixel_stride % 4 == 0);
194 192
195 if (!Rasterizer()->AccelerateDisplay(framebuffer, framebuffer_addr, static_cast<u32>(pixel_stride), screen_info)) { 193 if (!Rasterizer()->AccelerateDisplay(framebuffer, framebuffer_addr,
194 static_cast<u32>(pixel_stride), screen_info)) {
196 // Reset the screen info's display texture to its own permanent texture 195 // Reset the screen info's display texture to its own permanent texture
197 screen_info.display_texture = screen_info.texture.resource.handle; 196 screen_info.display_texture = screen_info.texture.resource.handle;
198 screen_info.display_texcoords = MathUtil::Rectangle<float>(0.f, 0.f, 1.f, 1.f); 197 screen_info.display_texcoords = MathUtil::Rectangle<float>(0.f, 0.f, 1.f, 1.f);
@@ -208,12 +207,13 @@ void RendererOpenGL::LoadFBToScreenInfo(const GPU::Regs::FramebufferConfig& fram
208 glPixelStorei(GL_UNPACK_ROW_LENGTH, (GLint)pixel_stride); 207 glPixelStorei(GL_UNPACK_ROW_LENGTH, (GLint)pixel_stride);
209 208
210 // Update existing texture 209 // Update existing texture
211 // TODO: Test what happens on hardware when you change the framebuffer dimensions so that they 210 // TODO: Test what happens on hardware when you change the framebuffer dimensions so that
212 // differ from the LCD resolution. 211 // they differ from the LCD resolution.
213 // TODO: Applications could theoretically crash Citra here by specifying too large 212 // TODO: Applications could theoretically crash Citra here by specifying too large
214 // framebuffer sizes. We should make sure that this cannot happen. 213 // framebuffer sizes. We should make sure that this cannot happen.
215 glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, framebuffer.width, framebuffer.height, 214 glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, framebuffer.width, framebuffer.height,
216 screen_info.texture.gl_format, screen_info.texture.gl_type, framebuffer_data); 215 screen_info.texture.gl_format, screen_info.texture.gl_type,
216 framebuffer_data);
217 217
218 glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); 218 glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
219 219
@@ -223,9 +223,8 @@ void RendererOpenGL::LoadFBToScreenInfo(const GPU::Regs::FramebufferConfig& fram
223} 223}
224 224
225/** 225/**
226 * Fills active OpenGL texture with the given RGB color. 226 * Fills active OpenGL texture with the given RGB color. Since the color is solid, the texture can
227 * Since the color is solid, the texture can be 1x1 but will stretch across whatever it's rendered on. 227 * be 1x1 but will stretch across whatever it's rendered on.
228 * This has the added benefit of being *really fast*.
229 */ 228 */
230void RendererOpenGL::LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b, 229void RendererOpenGL::LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b,
231 const TextureInfo& texture) { 230 const TextureInfo& texture) {
@@ -233,7 +232,7 @@ void RendererOpenGL::LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color
233 state.Apply(); 232 state.Apply();
234 233
235 glActiveTexture(GL_TEXTURE0); 234 glActiveTexture(GL_TEXTURE0);
236 u8 framebuffer_data[3] = { color_r, color_g, color_b }; 235 u8 framebuffer_data[3] = {color_r, color_g, color_b};
237 236
238 // Update existing texture 237 // Update existing texture
239 glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB, 1, 1, 0, GL_RGB, GL_UNSIGNED_BYTE, framebuffer_data); 238 glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB, 1, 1, 0, GL_RGB, GL_UNSIGNED_BYTE, framebuffer_data);
@@ -246,7 +245,8 @@ void RendererOpenGL::LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color
246 * Initializes the OpenGL state and creates persistent objects. 245 * Initializes the OpenGL state and creates persistent objects.
247 */ 246 */
248void RendererOpenGL::InitOpenGLObjects() { 247void RendererOpenGL::InitOpenGLObjects() {
249 glClearColor(Settings::values.bg_red, Settings::values.bg_green, Settings::values.bg_blue, 0.0f); 248 glClearColor(Settings::values.bg_red, Settings::values.bg_green, Settings::values.bg_blue,
249 0.0f);
250 250
251 // Link shaders and get variable locations 251 // Link shaders and get variable locations
252 shader.Create(vertex_shader, fragment_shader); 252 shader.Create(vertex_shader, fragment_shader);
@@ -270,8 +270,10 @@ void RendererOpenGL::InitOpenGLObjects() {
270 270
271 // Attach vertex data to VAO 271 // Attach vertex data to VAO
272 glBufferData(GL_ARRAY_BUFFER, sizeof(ScreenRectVertex) * 4, nullptr, GL_STREAM_DRAW); 272 glBufferData(GL_ARRAY_BUFFER, sizeof(ScreenRectVertex) * 4, nullptr, GL_STREAM_DRAW);
273 glVertexAttribPointer(attrib_position, 2, GL_FLOAT, GL_FALSE, sizeof(ScreenRectVertex), (GLvoid*)offsetof(ScreenRectVertex, position)); 273 glVertexAttribPointer(attrib_position, 2, GL_FLOAT, GL_FALSE, sizeof(ScreenRectVertex),
274 glVertexAttribPointer(attrib_tex_coord, 2, GL_FLOAT, GL_FALSE, sizeof(ScreenRectVertex), (GLvoid*)offsetof(ScreenRectVertex, tex_coord)); 274 (GLvoid*)offsetof(ScreenRectVertex, position));
275 glVertexAttribPointer(attrib_tex_coord, 2, GL_FLOAT, GL_FALSE, sizeof(ScreenRectVertex),
276 (GLvoid*)offsetof(ScreenRectVertex, tex_coord));
275 glEnableVertexAttribArray(attrib_position); 277 glEnableVertexAttribArray(attrib_position);
276 glEnableVertexAttribArray(attrib_tex_coord); 278 glEnableVertexAttribArray(attrib_tex_coord);
277 279
@@ -352,23 +354,25 @@ void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture,
352 354
353 glActiveTexture(GL_TEXTURE0); 355 glActiveTexture(GL_TEXTURE0);
354 glTexImage2D(GL_TEXTURE_2D, 0, internal_format, texture.width, texture.height, 0, 356 glTexImage2D(GL_TEXTURE_2D, 0, internal_format, texture.width, texture.height, 0,
355 texture.gl_format, texture.gl_type, nullptr); 357 texture.gl_format, texture.gl_type, nullptr);
356 358
357 state.texture_units[0].texture_2d = 0; 359 state.texture_units[0].texture_2d = 0;
358 state.Apply(); 360 state.Apply();
359} 361}
360 362
361/** 363/**
362 * Draws a single texture to the emulator window, rotating the texture to correct for the 3DS's LCD rotation. 364 * Draws a single texture to the emulator window, rotating the texture to correct for the 3DS's LCD
365 * rotation.
363 */ 366 */
364void RendererOpenGL::DrawSingleScreenRotated(const ScreenInfo& screen_info, float x, float y, float w, float h) { 367void RendererOpenGL::DrawSingleScreenRotated(const ScreenInfo& screen_info, float x, float y,
368 float w, float h) {
365 auto& texcoords = screen_info.display_texcoords; 369 auto& texcoords = screen_info.display_texcoords;
366 370
367 std::array<ScreenRectVertex, 4> vertices = {{ 371 std::array<ScreenRectVertex, 4> vertices = {{
368 ScreenRectVertex(x, y, texcoords.bottom, texcoords.left), 372 ScreenRectVertex(x, y, texcoords.bottom, texcoords.left),
369 ScreenRectVertex(x+w, y, texcoords.bottom, texcoords.right), 373 ScreenRectVertex(x + w, y, texcoords.bottom, texcoords.right),
370 ScreenRectVertex(x, y+h, texcoords.top, texcoords.left), 374 ScreenRectVertex(x, y + h, texcoords.top, texcoords.left),
371 ScreenRectVertex(x+w, y+h, texcoords.top, texcoords.right), 375 ScreenRectVertex(x + w, y + h, texcoords.top, texcoords.right),
372 }}; 376 }};
373 377
374 state.texture_units[0].texture_2d = screen_info.display_texture; 378 state.texture_units[0].texture_2d = screen_info.display_texture;
@@ -391,25 +395,26 @@ void RendererOpenGL::DrawScreens() {
391 glClear(GL_COLOR_BUFFER_BIT); 395 glClear(GL_COLOR_BUFFER_BIT);
392 396
393 // Set projection matrix 397 // Set projection matrix
394 std::array<GLfloat, 3 * 2> ortho_matrix = MakeOrthographicMatrix((float)layout.width, 398 std::array<GLfloat, 3 * 2> ortho_matrix =
395 (float)layout.height); 399 MakeOrthographicMatrix((float)layout.width, (float)layout.height);
396 glUniformMatrix3x2fv(uniform_modelview_matrix, 1, GL_FALSE, ortho_matrix.data()); 400 glUniformMatrix3x2fv(uniform_modelview_matrix, 1, GL_FALSE, ortho_matrix.data());
397 401
398 // Bind texture in Texture Unit 0 402 // Bind texture in Texture Unit 0
399 glActiveTexture(GL_TEXTURE0); 403 glActiveTexture(GL_TEXTURE0);
400 glUniform1i(uniform_color_texture, 0); 404 glUniform1i(uniform_color_texture, 0);
401 405
402 DrawSingleScreenRotated(screen_infos[0], (float)layout.top_screen.left, (float)layout.top_screen.top, 406 DrawSingleScreenRotated(screen_infos[0], (float)layout.top_screen.left,
403 (float)layout.top_screen.GetWidth(), (float)layout.top_screen.GetHeight()); 407 (float)layout.top_screen.top, (float)layout.top_screen.GetWidth(),
404 DrawSingleScreenRotated(screen_infos[1], (float)layout.bottom_screen.left,(float)layout.bottom_screen.top, 408 (float)layout.top_screen.GetHeight());
405 (float)layout.bottom_screen.GetWidth(), (float)layout.bottom_screen.GetHeight()); 409 DrawSingleScreenRotated(screen_infos[1], (float)layout.bottom_screen.left,
410 (float)layout.bottom_screen.top, (float)layout.bottom_screen.GetWidth(),
411 (float)layout.bottom_screen.GetHeight());
406 412
407 m_current_frame++; 413 m_current_frame++;
408} 414}
409 415
410/// Updates the framerate 416/// Updates the framerate
411void RendererOpenGL::UpdateFramerate() { 417void RendererOpenGL::UpdateFramerate() {}
412}
413 418
414/** 419/**
415 * Set the emulator window to use for renderer 420 * Set the emulator window to use for renderer
@@ -420,14 +425,16 @@ void RendererOpenGL::SetWindow(EmuWindow* window) {
420} 425}
421 426
422static const char* GetSource(GLenum source) { 427static const char* GetSource(GLenum source) {
423#define RET(s) case GL_DEBUG_SOURCE_##s: return #s 428#define RET(s) \
429 case GL_DEBUG_SOURCE_##s: \
430 return #s
424 switch (source) { 431 switch (source) {
425 RET(API); 432 RET(API);
426 RET(WINDOW_SYSTEM); 433 RET(WINDOW_SYSTEM);
427 RET(SHADER_COMPILER); 434 RET(SHADER_COMPILER);
428 RET(THIRD_PARTY); 435 RET(THIRD_PARTY);
429 RET(APPLICATION); 436 RET(APPLICATION);
430 RET(OTHER); 437 RET(OTHER);
431 default: 438 default:
432 UNREACHABLE(); 439 UNREACHABLE();
433 } 440 }
@@ -435,23 +442,25 @@ static const char* GetSource(GLenum source) {
435} 442}
436 443
437static const char* GetType(GLenum type) { 444static const char* GetType(GLenum type) {
438#define RET(t) case GL_DEBUG_TYPE_##t: return #t 445#define RET(t) \
446 case GL_DEBUG_TYPE_##t: \
447 return #t
439 switch (type) { 448 switch (type) {
440 RET(ERROR); 449 RET(ERROR);
441 RET(DEPRECATED_BEHAVIOR); 450 RET(DEPRECATED_BEHAVIOR);
442 RET(UNDEFINED_BEHAVIOR); 451 RET(UNDEFINED_BEHAVIOR);
443 RET(PORTABILITY); 452 RET(PORTABILITY);
444 RET(PERFORMANCE); 453 RET(PERFORMANCE);
445 RET(OTHER); 454 RET(OTHER);
446 RET(MARKER); 455 RET(MARKER);
447 default: 456 default:
448 UNREACHABLE(); 457 UNREACHABLE();
449 } 458 }
450#undef RET 459#undef RET
451} 460}
452 461
453static void APIENTRY DebugHandler(GLenum source, GLenum type, GLuint id, GLenum severity, GLsizei length, 462static void APIENTRY DebugHandler(GLenum source, GLenum type, GLuint id, GLenum severity,
454 const GLchar* message, const void* user_param) { 463 GLsizei length, const GLchar* message, const void* user_param) {
455 Log::Level level; 464 Log::Level level;
456 switch (severity) { 465 switch (severity) {
457 case GL_DEBUG_SEVERITY_HIGH: 466 case GL_DEBUG_SEVERITY_HIGH:
@@ -465,8 +474,8 @@ static void APIENTRY DebugHandler(GLenum source, GLenum type, GLuint id, GLenum
465 level = Log::Level::Debug; 474 level = Log::Level::Debug;
466 break; 475 break;
467 } 476 }
468 LOG_GENERIC(Log::Class::Render_OpenGL, level, "%s %s %d: %s", 477 LOG_GENERIC(Log::Class::Render_OpenGL, level, "%s %s %d: %s", GetSource(source), GetType(type),
469 GetSource(source), GetType(type), id, message); 478 id, message);
470} 479}
471 480
472/// Initialize the renderer 481/// Initialize the renderer
@@ -493,5 +502,4 @@ bool RendererOpenGL::Init() {
493} 502}
494 503
495/// Shutdown the renderer 504/// Shutdown the renderer
496void RendererOpenGL::ShutDown() { 505void RendererOpenGL::ShutDown() {}
497}
diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h
index 00e1044ab..87c556cff 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.h
+++ b/src/video_core/renderer_opengl/renderer_opengl.h
@@ -5,14 +5,10 @@
5#pragma once 5#pragma once
6 6
7#include <array> 7#include <array>
8
9#include <glad/glad.h> 8#include <glad/glad.h>
10
11#include "common/common_types.h" 9#include "common/common_types.h"
12#include "common/math_util.h" 10#include "common/math_util.h"
13
14#include "core/hw/gpu.h" 11#include "core/hw/gpu.h"
15
16#include "video_core/renderer_base.h" 12#include "video_core/renderer_base.h"
17#include "video_core/renderer_opengl/gl_resource_manager.h" 13#include "video_core/renderer_opengl/gl_resource_manager.h"
18#include "video_core/renderer_opengl/gl_state.h" 14#include "video_core/renderer_opengl/gl_state.h"
@@ -38,7 +34,6 @@ struct ScreenInfo {
38 34
39class RendererOpenGL : public RendererBase { 35class RendererOpenGL : public RendererBase {
40public: 36public:
41
42 RendererOpenGL(); 37 RendererOpenGL();
43 ~RendererOpenGL() override; 38 ~RendererOpenGL() override;
44 39
@@ -67,15 +62,14 @@ private:
67 62
68 // Loads framebuffer from emulated memory into the display information structure 63 // Loads framebuffer from emulated memory into the display information structure
69 void LoadFBToScreenInfo(const GPU::Regs::FramebufferConfig& framebuffer, 64 void LoadFBToScreenInfo(const GPU::Regs::FramebufferConfig& framebuffer,
70 ScreenInfo& screen_info); 65 ScreenInfo& screen_info);
71 // Fills active OpenGL texture with the given RGB color. 66 // Fills active OpenGL texture with the given RGB color.
72 void LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b, 67 void LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b, const TextureInfo& texture);
73 const TextureInfo& texture);
74 68
75 EmuWindow* render_window; ///< Handle to render window 69 EmuWindow* render_window; ///< Handle to render window
76 70
77 int resolution_width; ///< Current resolution width 71 int resolution_width; ///< Current resolution width
78 int resolution_height; ///< Current resolution height 72 int resolution_height; ///< Current resolution height
79 73
80 OpenGLState state; 74 OpenGLState state;
81 75
@@ -83,10 +77,14 @@ private:
83 OGLVertexArray vertex_array; 77 OGLVertexArray vertex_array;
84 OGLBuffer vertex_buffer; 78 OGLBuffer vertex_buffer;
85 OGLShader shader; 79 OGLShader shader;
86 std::array<ScreenInfo, 2> screen_infos; ///< Display information for top and bottom screens respectively 80
81 /// Display information for top and bottom screens respectively
82 std::array<ScreenInfo, 2> screen_infos;
83
87 // Shader uniform location indices 84 // Shader uniform location indices
88 GLuint uniform_modelview_matrix; 85 GLuint uniform_modelview_matrix;
89 GLuint uniform_color_texture; 86 GLuint uniform_color_texture;
87
90 // Shader attribute input indices 88 // Shader attribute input indices
91 GLuint attrib_position; 89 GLuint attrib_position;
92 GLuint attrib_tex_coord; 90 GLuint attrib_tex_coord;
diff --git a/src/video_core/shader/shader.cpp b/src/video_core/shader/shader.cpp
index f565e2c91..272f3ffe1 100644
--- a/src/video_core/shader/shader.cpp
+++ b/src/video_core/shader/shader.cpp
@@ -7,23 +7,18 @@
7#include <cstring> 7#include <cstring>
8#include <unordered_map> 8#include <unordered_map>
9#include <utility> 9#include <utility>
10
11#include <boost/range/algorithm/fill.hpp> 10#include <boost/range/algorithm/fill.hpp>
12
13#include "common/bit_field.h" 11#include "common/bit_field.h"
14#include "common/hash.h" 12#include "common/hash.h"
15#include "common/logging/log.h" 13#include "common/logging/log.h"
16#include "common/microprofile.h" 14#include "common/microprofile.h"
17
18#include "video_core/pica.h" 15#include "video_core/pica.h"
19#include "video_core/pica_state.h" 16#include "video_core/pica_state.h"
20#include "video_core/shader/shader.h" 17#include "video_core/shader/shader.h"
21#include "video_core/shader/shader_interpreter.h" 18#include "video_core/shader/shader_interpreter.h"
22
23#ifdef ARCHITECTURE_x86_64 19#ifdef ARCHITECTURE_x86_64
24#include "video_core/shader/shader_jit_x64.h" 20#include "video_core/shader/shader_jit_x64.h"
25#endif // ARCHITECTURE_x86_64 21#endif // ARCHITECTURE_x86_64
26
27#include "video_core/video_core.h" 22#include "video_core/video_core.h"
28 23
29namespace Pica { 24namespace Pica {
@@ -46,10 +41,8 @@ OutputVertex OutputRegisters::ToVertex(const Regs::ShaderConfig& config) {
46 41
47 const auto& output_register_map = g_state.regs.vs_output_attributes[index]; 42 const auto& output_register_map = g_state.regs.vs_output_attributes[index];
48 43
49 u32 semantics[4] = { 44 u32 semantics[4] = {output_register_map.map_x, output_register_map.map_y,
50 output_register_map.map_x, output_register_map.map_y, 45 output_register_map.map_z, output_register_map.map_w};
51 output_register_map.map_z, output_register_map.map_w
52 };
53 46
54 for (unsigned comp = 0; comp < 4; ++comp) { 47 for (unsigned comp = 0; comp < 4; ++comp) {
55 float24* out = ((float24*)&ret) + semantics[comp]; 48 float24* out = ((float24*)&ret) + semantics[comp];
@@ -65,19 +58,20 @@ OutputVertex OutputRegisters::ToVertex(const Regs::ShaderConfig& config) {
65 index++; 58 index++;
66 } 59 }
67 60
68 // The hardware takes the absolute and saturates vertex colors like this, *before* doing interpolation 61 // The hardware takes the absolute and saturates vertex colors like this, *before* doing
62 // interpolation
69 for (unsigned i = 0; i < 4; ++i) { 63 for (unsigned i = 0; i < 4; ++i) {
70 ret.color[i] = float24::FromFloat32( 64 ret.color[i] = float24::FromFloat32(std::fmin(std::fabs(ret.color[i].ToFloat32()), 1.0f));
71 std::fmin(std::fabs(ret.color[i].ToFloat32()), 1.0f));
72 } 65 }
73 66
74 LOG_TRACE(HW_GPU, "Output vertex: pos(%.2f, %.2f, %.2f, %.2f), quat(%.2f, %.2f, %.2f, %.2f), " 67 LOG_TRACE(HW_GPU, "Output vertex: pos(%.2f, %.2f, %.2f, %.2f), quat(%.2f, %.2f, %.2f, %.2f), "
75 "col(%.2f, %.2f, %.2f, %.2f), tc0(%.2f, %.2f), view(%.2f, %.2f, %.2f)", 68 "col(%.2f, %.2f, %.2f, %.2f), tc0(%.2f, %.2f), view(%.2f, %.2f, %.2f)",
76 ret.pos.x.ToFloat32(), ret.pos.y.ToFloat32(), ret.pos.z.ToFloat32(), ret.pos.w.ToFloat32(), 69 ret.pos.x.ToFloat32(), ret.pos.y.ToFloat32(), ret.pos.z.ToFloat32(),
77 ret.quat.x.ToFloat32(), ret.quat.y.ToFloat32(), ret.quat.z.ToFloat32(), ret.quat.w.ToFloat32(), 70 ret.pos.w.ToFloat32(), ret.quat.x.ToFloat32(), ret.quat.y.ToFloat32(),
78 ret.color.x.ToFloat32(), ret.color.y.ToFloat32(), ret.color.z.ToFloat32(), ret.color.w.ToFloat32(), 71 ret.quat.z.ToFloat32(), ret.quat.w.ToFloat32(), ret.color.x.ToFloat32(),
79 ret.tc0.u().ToFloat32(), ret.tc0.v().ToFloat32(), 72 ret.color.y.ToFloat32(), ret.color.z.ToFloat32(), ret.color.w.ToFloat32(),
80 ret.view.x.ToFloat32(), ret.view.y.ToFloat32(), ret.view.z.ToFloat32()); 73 ret.tc0.u().ToFloat32(), ret.tc0.v().ToFloat32(), ret.view.x.ToFloat32(),
74 ret.view.y.ToFloat32(), ret.view.z.ToFloat32());
81 75
82 return ret; 76 return ret;
83} 77}
@@ -96,8 +90,9 @@ void ClearCache() {
96void ShaderSetup::Setup() { 90void ShaderSetup::Setup() {
97#ifdef ARCHITECTURE_x86_64 91#ifdef ARCHITECTURE_x86_64
98 if (VideoCore::g_shader_jit_enabled) { 92 if (VideoCore::g_shader_jit_enabled) {
99 u64 cache_key = (Common::ComputeHash64(&g_state.vs.program_code, sizeof(g_state.vs.program_code)) ^ 93 u64 cache_key =
100 Common::ComputeHash64(&g_state.vs.swizzle_data, sizeof(g_state.vs.swizzle_data))); 94 Common::ComputeHash64(&g_state.vs.program_code, sizeof(g_state.vs.program_code)) ^
95 Common::ComputeHash64(&g_state.vs.swizzle_data, sizeof(g_state.vs.swizzle_data));
101 96
102 auto iter = shader_map.find(cache_key); 97 auto iter = shader_map.find(cache_key);
103 if (iter != shader_map.end()) { 98 if (iter != shader_map.end()) {
@@ -127,7 +122,7 @@ void ShaderSetup::Run(UnitState<false>& state, const InputVertex& input, int num
127 const auto& attribute_register_map = config.input_register_map; 122 const auto& attribute_register_map = config.input_register_map;
128 123
129 for (unsigned i = 0; i < num_attributes; i++) 124 for (unsigned i = 0; i < num_attributes; i++)
130 state.registers.input[attribute_register_map.GetRegisterForAttribute(i)] = input.attr[i]; 125 state.registers.input[attribute_register_map.GetRegisterForAttribute(i)] = input.attr[i];
131 126
132 state.conditional_code[0] = false; 127 state.conditional_code[0] = false;
133 state.conditional_code[1] = false; 128 state.conditional_code[1] = false;
@@ -140,10 +135,11 @@ void ShaderSetup::Run(UnitState<false>& state, const InputVertex& input, int num
140#else 135#else
141 RunInterpreter(setup, state, config.main_offset); 136 RunInterpreter(setup, state, config.main_offset);
142#endif // ARCHITECTURE_x86_64 137#endif // ARCHITECTURE_x86_64
143
144} 138}
145 139
146DebugData<true> ShaderSetup::ProduceDebugInfo(const InputVertex& input, int num_attributes, const Regs::ShaderConfig& config, const ShaderSetup& setup) { 140DebugData<true> ShaderSetup::ProduceDebugInfo(const InputVertex& input, int num_attributes,
141 const Regs::ShaderConfig& config,
142 const ShaderSetup& setup) {
147 UnitState<true> state; 143 UnitState<true> state;
148 144
149 state.debug.max_offset = 0; 145 state.debug.max_offset = 0;
@@ -155,7 +151,7 @@ DebugData<true> ShaderSetup::ProduceDebugInfo(const InputVertex& input, int num_
155 boost::fill(state.registers.input, &dummy_register); 151 boost::fill(state.registers.input, &dummy_register);
156 152
157 for (unsigned i = 0; i < num_attributes; i++) 153 for (unsigned i = 0; i < num_attributes; i++)
158 state.registers.input[attribute_register_map.GetRegisterForAttribute(i)] = input.attr[i]; 154 state.registers.input[attribute_register_map.GetRegisterForAttribute(i)] = input.attr[i];
159 155
160 state.conditional_code[0] = false; 156 state.conditional_code[0] = false;
161 state.conditional_code[1] = false; 157 state.conditional_code[1] = false;
diff --git a/src/video_core/shader/shader.h b/src/video_core/shader/shader.h
index fee16df62..8858d67f8 100644
--- a/src/video_core/shader/shader.h
+++ b/src/video_core/shader/shader.h
@@ -9,16 +9,12 @@
9#include <memory> 9#include <memory>
10#include <type_traits> 10#include <type_traits>
11#include <vector> 11#include <vector>
12
13#include <boost/container/static_vector.hpp> 12#include <boost/container/static_vector.hpp>
14
15#include <nihstro/shader_bytecode.h> 13#include <nihstro/shader_bytecode.h>
16
17#include "common/assert.h" 14#include "common/assert.h"
18#include "common/common_funcs.h" 15#include "common/common_funcs.h"
19#include "common/common_types.h" 16#include "common/common_types.h"
20#include "common/vector_math.h" 17#include "common/vector_math.h"
21
22#include "video_core/pica.h" 18#include "video_core/pica.h"
23#include "video_core/pica_types.h" 19#include "video_core/pica_types.h"
24 20
@@ -94,46 +90,46 @@ struct OutputRegisters {
94static_assert(std::is_pod<OutputRegisters>::value, "Structure is not POD"); 90static_assert(std::is_pod<OutputRegisters>::value, "Structure is not POD");
95 91
96// Helper structure used to keep track of data useful for inspection of shader emulation 92// Helper structure used to keep track of data useful for inspection of shader emulation
97template<bool full_debugging> 93template <bool full_debugging>
98struct DebugData; 94struct DebugData;
99 95
100template<> 96template <>
101struct DebugData<false> { 97struct DebugData<false> {
102 // TODO: Hide these behind and interface and move them to DebugData<true> 98 // TODO: Hide these behind and interface and move them to DebugData<true>
103 u32 max_offset; // maximum program counter ever reached 99 u32 max_offset; // maximum program counter ever reached
104 u32 max_opdesc_id; // maximum swizzle pattern index ever used 100 u32 max_opdesc_id; // maximum swizzle pattern index ever used
105}; 101};
106 102
107template<> 103template <>
108struct DebugData<true> { 104struct DebugData<true> {
109 // Records store the input and output operands of a particular instruction. 105 // Records store the input and output operands of a particular instruction.
110 struct Record { 106 struct Record {
111 enum Type { 107 enum Type {
112 // Floating point arithmetic operands 108 // Floating point arithmetic operands
113 SRC1 = 0x1, 109 SRC1 = 0x1,
114 SRC2 = 0x2, 110 SRC2 = 0x2,
115 SRC3 = 0x4, 111 SRC3 = 0x4,
116 112
117 // Initial and final output operand value 113 // Initial and final output operand value
118 DEST_IN = 0x8, 114 DEST_IN = 0x8,
119 DEST_OUT = 0x10, 115 DEST_OUT = 0x10,
120 116
121 // Current and next instruction offset (in words) 117 // Current and next instruction offset (in words)
122 CUR_INSTR = 0x20, 118 CUR_INSTR = 0x20,
123 NEXT_INSTR = 0x40, 119 NEXT_INSTR = 0x40,
124 120
125 // Output address register value 121 // Output address register value
126 ADDR_REG_OUT = 0x80, 122 ADDR_REG_OUT = 0x80,
127 123
128 // Result of a comparison instruction 124 // Result of a comparison instruction
129 CMP_RESULT = 0x100, 125 CMP_RESULT = 0x100,
130 126
131 // Input values for conditional flow control instructions 127 // Input values for conditional flow control instructions
132 COND_BOOL_IN = 0x200, 128 COND_BOOL_IN = 0x200,
133 COND_CMP_IN = 0x400, 129 COND_CMP_IN = 0x400,
134 130
135 // Input values for a loop 131 // Input values for a loop
136 LOOP_INT_IN = 0x800, 132 LOOP_INT_IN = 0x800,
137 }; 133 };
138 134
139 Math::Vec4<float24> src1; 135 Math::Vec4<float24> src1;
@@ -156,7 +152,7 @@ struct DebugData<true> {
156 unsigned mask = 0; 152 unsigned mask = 0;
157 }; 153 };
158 154
159 u32 max_offset; // maximum program counter ever reached 155 u32 max_offset; // maximum program counter ever reached
160 u32 max_opdesc_id; // maximum swizzle pattern index ever used 156 u32 max_opdesc_id; // maximum swizzle pattern index ever used
161 157
162 // List of records for each executed shader instruction 158 // List of records for each executed shader instruction
@@ -167,10 +163,10 @@ struct DebugData<true> {
167using DebugDataRecord = DebugData<true>::Record; 163using DebugDataRecord = DebugData<true>::Record;
168 164
169// Helper function to set a DebugData<true>::Record field based on the template enum parameter. 165// Helper function to set a DebugData<true>::Record field based on the template enum parameter.
170template<DebugDataRecord::Type type, typename ValueType> 166template <DebugDataRecord::Type type, typename ValueType>
171inline void SetField(DebugDataRecord& record, ValueType value); 167inline void SetField(DebugDataRecord& record, ValueType value);
172 168
173template<> 169template <>
174inline void SetField<DebugDataRecord::SRC1>(DebugDataRecord& record, float24* value) { 170inline void SetField<DebugDataRecord::SRC1>(DebugDataRecord& record, float24* value) {
175 record.src1.x = value[0]; 171 record.src1.x = value[0];
176 record.src1.y = value[1]; 172 record.src1.y = value[1];
@@ -178,7 +174,7 @@ inline void SetField<DebugDataRecord::SRC1>(DebugDataRecord& record, float24* va
178 record.src1.w = value[3]; 174 record.src1.w = value[3];
179} 175}
180 176
181template<> 177template <>
182inline void SetField<DebugDataRecord::SRC2>(DebugDataRecord& record, float24* value) { 178inline void SetField<DebugDataRecord::SRC2>(DebugDataRecord& record, float24* value) {
183 record.src2.x = value[0]; 179 record.src2.x = value[0];
184 record.src2.y = value[1]; 180 record.src2.y = value[1];
@@ -186,7 +182,7 @@ inline void SetField<DebugDataRecord::SRC2>(DebugDataRecord& record, float24* va
186 record.src2.w = value[3]; 182 record.src2.w = value[3];
187} 183}
188 184
189template<> 185template <>
190inline void SetField<DebugDataRecord::SRC3>(DebugDataRecord& record, float24* value) { 186inline void SetField<DebugDataRecord::SRC3>(DebugDataRecord& record, float24* value) {
191 record.src3.x = value[0]; 187 record.src3.x = value[0];
192 record.src3.y = value[1]; 188 record.src3.y = value[1];
@@ -194,7 +190,7 @@ inline void SetField<DebugDataRecord::SRC3>(DebugDataRecord& record, float24* va
194 record.src3.w = value[3]; 190 record.src3.w = value[3];
195} 191}
196 192
197template<> 193template <>
198inline void SetField<DebugDataRecord::DEST_IN>(DebugDataRecord& record, float24* value) { 194inline void SetField<DebugDataRecord::DEST_IN>(DebugDataRecord& record, float24* value) {
199 record.dest_in.x = value[0]; 195 record.dest_in.x = value[0];
200 record.dest_in.y = value[1]; 196 record.dest_in.y = value[1];
@@ -202,7 +198,7 @@ inline void SetField<DebugDataRecord::DEST_IN>(DebugDataRecord& record, float24*
202 record.dest_in.w = value[3]; 198 record.dest_in.w = value[3];
203} 199}
204 200
205template<> 201template <>
206inline void SetField<DebugDataRecord::DEST_OUT>(DebugDataRecord& record, float24* value) { 202inline void SetField<DebugDataRecord::DEST_OUT>(DebugDataRecord& record, float24* value) {
207 record.dest_out.x = value[0]; 203 record.dest_out.x = value[0];
208 record.dest_out.y = value[1]; 204 record.dest_out.y = value[1];
@@ -210,67 +206,66 @@ inline void SetField<DebugDataRecord::DEST_OUT>(DebugDataRecord& record, float24
210 record.dest_out.w = value[3]; 206 record.dest_out.w = value[3];
211} 207}
212 208
213template<> 209template <>
214inline void SetField<DebugDataRecord::ADDR_REG_OUT>(DebugDataRecord& record, s32* value) { 210inline void SetField<DebugDataRecord::ADDR_REG_OUT>(DebugDataRecord& record, s32* value) {
215 record.address_registers[0] = value[0]; 211 record.address_registers[0] = value[0];
216 record.address_registers[1] = value[1]; 212 record.address_registers[1] = value[1];
217} 213}
218 214
219template<> 215template <>
220inline void SetField<DebugDataRecord::CMP_RESULT>(DebugDataRecord& record, bool* value) { 216inline void SetField<DebugDataRecord::CMP_RESULT>(DebugDataRecord& record, bool* value) {
221 record.conditional_code[0] = value[0]; 217 record.conditional_code[0] = value[0];
222 record.conditional_code[1] = value[1]; 218 record.conditional_code[1] = value[1];
223} 219}
224 220
225template<> 221template <>
226inline void SetField<DebugDataRecord::COND_BOOL_IN>(DebugDataRecord& record, bool value) { 222inline void SetField<DebugDataRecord::COND_BOOL_IN>(DebugDataRecord& record, bool value) {
227 record.cond_bool = value; 223 record.cond_bool = value;
228} 224}
229 225
230template<> 226template <>
231inline void SetField<DebugDataRecord::COND_CMP_IN>(DebugDataRecord& record, bool* value) { 227inline void SetField<DebugDataRecord::COND_CMP_IN>(DebugDataRecord& record, bool* value) {
232 record.cond_cmp[0] = value[0]; 228 record.cond_cmp[0] = value[0];
233 record.cond_cmp[1] = value[1]; 229 record.cond_cmp[1] = value[1];
234} 230}
235 231
236template<> 232template <>
237inline void SetField<DebugDataRecord::LOOP_INT_IN>(DebugDataRecord& record, Math::Vec4<u8> value) { 233inline void SetField<DebugDataRecord::LOOP_INT_IN>(DebugDataRecord& record, Math::Vec4<u8> value) {
238 record.loop_int = value; 234 record.loop_int = value;
239} 235}
240 236
241template<> 237template <>
242inline void SetField<DebugDataRecord::CUR_INSTR>(DebugDataRecord& record, u32 value) { 238inline void SetField<DebugDataRecord::CUR_INSTR>(DebugDataRecord& record, u32 value) {
243 record.instruction_offset = value; 239 record.instruction_offset = value;
244} 240}
245 241
246template<> 242template <>
247inline void SetField<DebugDataRecord::NEXT_INSTR>(DebugDataRecord& record, u32 value) { 243inline void SetField<DebugDataRecord::NEXT_INSTR>(DebugDataRecord& record, u32 value) {
248 record.next_instruction = value; 244 record.next_instruction = value;
249} 245}
250 246
251// Helper function to set debug information on the current shader iteration. 247// Helper function to set debug information on the current shader iteration.
252template<DebugDataRecord::Type type, typename ValueType> 248template <DebugDataRecord::Type type, typename ValueType>
253inline void Record(DebugData<false>& debug_data, u32 offset, ValueType value) { 249inline void Record(DebugData<false>& debug_data, u32 offset, ValueType value) {
254 // Debugging disabled => nothing to do 250 // Debugging disabled => nothing to do
255} 251}
256 252
257template<DebugDataRecord::Type type, typename ValueType> 253template <DebugDataRecord::Type type, typename ValueType>
258inline void Record(DebugData<true>& debug_data, u32 offset, ValueType value) { 254inline void Record(DebugData<true>& debug_data, u32 offset, ValueType value) {
259 if (offset >= debug_data.records.size()) 255 if (offset >= debug_data.records.size())
260 debug_data.records.resize(offset + 1); 256 debug_data.records.resize(offset + 1);
261 257
262 SetField<type, ValueType>(debug_data.records[offset], value); 258 SetField<type, ValueType>(debug_data.records[offset], value);
263 debug_data.records[offset].mask |= type; 259 debug_data.records[offset].mask |= type;
264} 260}
265 261
266
267/** 262/**
268 * This structure contains the state information that needs to be unique for a shader unit. The 3DS 263 * This structure contains the state information that needs to be unique for a shader unit. The 3DS
269 * has four shader units that process shaders in parallel. At the present, Citra only implements a 264 * has four shader units that process shaders in parallel. At the present, Citra only implements a
270 * single shader unit that processes all shaders serially. Putting the state information in a struct 265 * single shader unit that processes all shaders serially. Putting the state information in a struct
271 * here will make it easier for us to parallelize the shader processing later. 266 * here will make it easier for us to parallelize the shader processing later.
272 */ 267 */
273template<bool Debug> 268template <bool Debug>
274struct UnitState { 269struct UnitState {
275 struct Registers { 270 struct Registers {
276 // The registers are accessed by the shader JIT using SSE instructions, and are therefore 271 // The registers are accessed by the shader JIT using SSE instructions, and are therefore
@@ -293,10 +288,12 @@ struct UnitState {
293 static size_t InputOffset(const SourceRegister& reg) { 288 static size_t InputOffset(const SourceRegister& reg) {
294 switch (reg.GetRegisterType()) { 289 switch (reg.GetRegisterType()) {
295 case RegisterType::Input: 290 case RegisterType::Input:
296 return offsetof(UnitState, registers.input) + reg.GetIndex()*sizeof(Math::Vec4<float24>); 291 return offsetof(UnitState, registers.input) +
292 reg.GetIndex() * sizeof(Math::Vec4<float24>);
297 293
298 case RegisterType::Temporary: 294 case RegisterType::Temporary:
299 return offsetof(UnitState, registers.temporary) + reg.GetIndex()*sizeof(Math::Vec4<float24>); 295 return offsetof(UnitState, registers.temporary) +
296 reg.GetIndex() * sizeof(Math::Vec4<float24>);
300 297
301 default: 298 default:
302 UNREACHABLE(); 299 UNREACHABLE();
@@ -307,10 +304,12 @@ struct UnitState {
307 static size_t OutputOffset(const DestRegister& reg) { 304 static size_t OutputOffset(const DestRegister& reg) {
308 switch (reg.GetRegisterType()) { 305 switch (reg.GetRegisterType()) {
309 case RegisterType::Output: 306 case RegisterType::Output:
310 return offsetof(UnitState, output_registers.value) + reg.GetIndex()*sizeof(Math::Vec4<float24>); 307 return offsetof(UnitState, output_registers.value) +
308 reg.GetIndex() * sizeof(Math::Vec4<float24>);
311 309
312 case RegisterType::Temporary: 310 case RegisterType::Temporary:
313 return offsetof(UnitState, registers.temporary) + reg.GetIndex()*sizeof(Math::Vec4<float24>); 311 return offsetof(UnitState, registers.temporary) +
312 reg.GetIndex() * sizeof(Math::Vec4<float24>);
314 313
315 default: 314 default:
316 UNREACHABLE(); 315 UNREACHABLE();
@@ -336,13 +335,13 @@ struct ShaderSetup {
336 static size_t UniformOffset(RegisterType type, unsigned index) { 335 static size_t UniformOffset(RegisterType type, unsigned index) {
337 switch (type) { 336 switch (type) {
338 case RegisterType::FloatUniform: 337 case RegisterType::FloatUniform:
339 return offsetof(ShaderSetup, uniforms.f) + index*sizeof(Math::Vec4<float24>); 338 return offsetof(ShaderSetup, uniforms.f) + index * sizeof(Math::Vec4<float24>);
340 339
341 case RegisterType::BoolUniform: 340 case RegisterType::BoolUniform:
342 return offsetof(ShaderSetup, uniforms.b) + index*sizeof(bool); 341 return offsetof(ShaderSetup, uniforms.b) + index * sizeof(bool);
343 342
344 case RegisterType::IntUniform: 343 case RegisterType::IntUniform:
345 return offsetof(ShaderSetup, uniforms.i) + index*sizeof(Math::Vec4<u8>); 344 return offsetof(ShaderSetup, uniforms.i) + index * sizeof(Math::Vec4<u8>);
346 345
347 default: 346 default:
348 UNREACHABLE(); 347 UNREACHABLE();
@@ -354,8 +353,8 @@ struct ShaderSetup {
354 std::array<u32, 1024> swizzle_data; 353 std::array<u32, 1024> swizzle_data;
355 354
356 /** 355 /**
357 * Performs any shader unit setup that only needs to happen once per shader (as opposed to once per 356 * Performs any shader unit setup that only needs to happen once per shader (as opposed to once
358 * vertex, which would happen within the `Run` function). 357 * per vertex, which would happen within the `Run` function).
359 */ 358 */
360 void Setup(); 359 void Setup();
361 360
@@ -375,8 +374,8 @@ struct ShaderSetup {
375 * @param setup Setup object for the shader pipeline 374 * @param setup Setup object for the shader pipeline
376 * @return Debug information for this shader with regards to the given vertex 375 * @return Debug information for this shader with regards to the given vertex
377 */ 376 */
378 DebugData<true> ProduceDebugInfo(const InputVertex& input, int num_attributes, const Regs::ShaderConfig& config, const ShaderSetup& setup); 377 DebugData<true> ProduceDebugInfo(const InputVertex& input, int num_attributes,
379 378 const Regs::ShaderConfig& config, const ShaderSetup& setup);
380}; 379};
381 380
382} // namespace Shader 381} // namespace Shader
diff --git a/src/video_core/shader/shader_interpreter.cpp b/src/video_core/shader/shader_interpreter.cpp
index f6c86a759..501d00b6b 100644
--- a/src/video_core/shader/shader_interpreter.cpp
+++ b/src/video_core/shader/shader_interpreter.cpp
@@ -6,14 +6,11 @@
6#include <array> 6#include <array>
7#include <cmath> 7#include <cmath>
8#include <numeric> 8#include <numeric>
9
10#include <nihstro/shader_bytecode.h> 9#include <nihstro/shader_bytecode.h>
11
12#include "common/assert.h" 10#include "common/assert.h"
13#include "common/common_types.h" 11#include "common/common_types.h"
14#include "common/logging/log.h" 12#include "common/logging/log.h"
15#include "common/vector_math.h" 13#include "common/vector_math.h"
16
17#include "video_core/pica_state.h" 14#include "video_core/pica_state.h"
18#include "video_core/pica_types.h" 15#include "video_core/pica_types.h"
19#include "video_core/shader/shader.h" 16#include "video_core/shader/shader.h"
@@ -40,7 +37,7 @@ struct CallStackElement {
40 u32 loop_address; // The address where we'll return to after each loop iteration 37 u32 loop_address; // The address where we'll return to after each loop iteration
41}; 38};
42 39
43template<bool Debug> 40template <bool Debug>
44void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned offset) { 41void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned offset) {
45 // TODO: Is there a maximal size for this? 42 // TODO: Is there a maximal size for this?
46 boost::container::static_vector<CallStackElement, 16> call_stack; 43 boost::container::static_vector<CallStackElement, 16> call_stack;
@@ -74,14 +71,17 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
74 } 71 }
75 } 72 }
76 73
77 const Instruction instr = { program_code[program_counter] }; 74 const Instruction instr = {program_code[program_counter]};
78 const SwizzlePattern swizzle = { swizzle_data[instr.common.operand_desc_id] }; 75 const SwizzlePattern swizzle = {swizzle_data[instr.common.operand_desc_id]};
79 76
80 auto call = [&program_counter, &call_stack](UnitState<Debug>& state, u32 offset, u32 num_instructions, 77 auto call = [&program_counter, &call_stack](UnitState<Debug>& state, u32 offset,
81 u32 return_offset, u8 repeat_count, u8 loop_increment) { 78 u32 num_instructions, u32 return_offset,
82 program_counter = offset - 1; // -1 to make sure when incrementing the PC we end up at the correct offset 79 u8 repeat_count, u8 loop_increment) {
80 // -1 to make sure when incrementing the PC we end up at the correct offset
81 program_counter = offset - 1;
83 ASSERT(call_stack.size() < call_stack.capacity()); 82 ASSERT(call_stack.size() < call_stack.capacity());
84 call_stack.push_back({ offset + num_instructions, return_offset, repeat_count, loop_increment, offset }); 83 call_stack.push_back(
84 {offset + num_instructions, return_offset, repeat_count, loop_increment, offset});
85 }; 85 };
86 Record<DebugDataRecord::CUR_INSTR>(state.debug, iteration, program_counter); 86 Record<DebugDataRecord::CUR_INSTR>(state.debug, iteration, program_counter);
87 if (iteration > 0) 87 if (iteration > 0)
@@ -106,24 +106,26 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
106 }; 106 };
107 107
108 switch (instr.opcode.Value().GetInfo().type) { 108 switch (instr.opcode.Value().GetInfo().type) {
109 case OpCode::Type::Arithmetic: 109 case OpCode::Type::Arithmetic: {
110 { 110 const bool is_inverted =
111 const bool is_inverted = (0 != (instr.opcode.Value().GetInfo().subtype & OpCode::Info::SrcInversed)); 111 (0 != (instr.opcode.Value().GetInfo().subtype & OpCode::Info::SrcInversed));
112 112
113 const int address_offset = (instr.common.address_register_index == 0) 113 const int address_offset =
114 ? 0 : state.address_registers[instr.common.address_register_index - 1]; 114 (instr.common.address_register_index == 0)
115 ? 0
116 : state.address_registers[instr.common.address_register_index - 1];
115 117
116 const float24* src1_ = LookupSourceRegister(instr.common.GetSrc1(is_inverted) + (!is_inverted * address_offset)); 118 const float24* src1_ = LookupSourceRegister(instr.common.GetSrc1(is_inverted) +
117 const float24* src2_ = LookupSourceRegister(instr.common.GetSrc2(is_inverted) + ( is_inverted * address_offset)); 119 (!is_inverted * address_offset));
120 const float24* src2_ = LookupSourceRegister(instr.common.GetSrc2(is_inverted) +
121 (is_inverted * address_offset));
118 122
119 const bool negate_src1 = ((bool)swizzle.negate_src1 != false); 123 const bool negate_src1 = ((bool)swizzle.negate_src1 != false);
120 const bool negate_src2 = ((bool)swizzle.negate_src2 != false); 124 const bool negate_src2 = ((bool)swizzle.negate_src2 != false);
121 125
122 float24 src1[4] = { 126 float24 src1[4] = {
123 src1_[(int)swizzle.GetSelectorSrc1(0)], 127 src1_[(int)swizzle.GetSelectorSrc1(0)], src1_[(int)swizzle.GetSelectorSrc1(1)],
124 src1_[(int)swizzle.GetSelectorSrc1(1)], 128 src1_[(int)swizzle.GetSelectorSrc1(2)], src1_[(int)swizzle.GetSelectorSrc1(3)],
125 src1_[(int)swizzle.GetSelectorSrc1(2)],
126 src1_[(int)swizzle.GetSelectorSrc1(3)],
127 }; 129 };
128 if (negate_src1) { 130 if (negate_src1) {
129 src1[0] = src1[0] * float24::FromFloat32(-1); 131 src1[0] = src1[0] * float24::FromFloat32(-1);
@@ -132,10 +134,8 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
132 src1[3] = src1[3] * float24::FromFloat32(-1); 134 src1[3] = src1[3] * float24::FromFloat32(-1);
133 } 135 }
134 float24 src2[4] = { 136 float24 src2[4] = {
135 src2_[(int)swizzle.GetSelectorSrc2(0)], 137 src2_[(int)swizzle.GetSelectorSrc2(0)], src2_[(int)swizzle.GetSelectorSrc2(1)],
136 src2_[(int)swizzle.GetSelectorSrc2(1)], 138 src2_[(int)swizzle.GetSelectorSrc2(2)], src2_[(int)swizzle.GetSelectorSrc2(3)],
137 src2_[(int)swizzle.GetSelectorSrc2(2)],
138 src2_[(int)swizzle.GetSelectorSrc2(3)],
139 }; 139 };
140 if (negate_src2) { 140 if (negate_src2) {
141 src2[0] = src2[0] * float24::FromFloat32(-1); 141 src2[0] = src2[0] * float24::FromFloat32(-1);
@@ -144,15 +144,18 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
144 src2[3] = src2[3] * float24::FromFloat32(-1); 144 src2[3] = src2[3] * float24::FromFloat32(-1);
145 } 145 }
146 146
147 float24* dest = (instr.common.dest.Value() < 0x10) ? &state.output_registers.value[instr.common.dest.Value().GetIndex()][0] 147 float24* dest =
148 : (instr.common.dest.Value() < 0x20) ? &state.registers.temporary[instr.common.dest.Value().GetIndex()][0] 148 (instr.common.dest.Value() < 0x10)
149 : dummy_vec4_float24; 149 ? &state.output_registers.value[instr.common.dest.Value().GetIndex()][0]
150 : (instr.common.dest.Value() < 0x20)
151 ? &state.registers.temporary[instr.common.dest.Value().GetIndex()][0]
152 : dummy_vec4_float24;
150 153
151 state.debug.max_opdesc_id = std::max<u32>(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id); 154 state.debug.max_opdesc_id =
155 std::max<u32>(state.debug.max_opdesc_id, 1 + instr.common.operand_desc_id);
152 156
153 switch (instr.opcode.Value().EffectiveOpCode()) { 157 switch (instr.opcode.Value().EffectiveOpCode()) {
154 case OpCode::Id::ADD: 158 case OpCode::Id::ADD: {
155 {
156 Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); 159 Record<DebugDataRecord::SRC1>(state.debug, iteration, src1);
157 Record<DebugDataRecord::SRC2>(state.debug, iteration, src2); 160 Record<DebugDataRecord::SRC2>(state.debug, iteration, src2);
158 Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); 161 Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest);
@@ -166,8 +169,7 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
166 break; 169 break;
167 } 170 }
168 171
169 case OpCode::Id::MUL: 172 case OpCode::Id::MUL: {
170 {
171 Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); 173 Record<DebugDataRecord::SRC1>(state.debug, iteration, src1);
172 Record<DebugDataRecord::SRC2>(state.debug, iteration, src2); 174 Record<DebugDataRecord::SRC2>(state.debug, iteration, src2);
173 Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); 175 Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest);
@@ -228,8 +230,7 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
228 case OpCode::Id::DP3: 230 case OpCode::Id::DP3:
229 case OpCode::Id::DP4: 231 case OpCode::Id::DP4:
230 case OpCode::Id::DPH: 232 case OpCode::Id::DPH:
231 case OpCode::Id::DPHI: 233 case OpCode::Id::DPHI: {
232 {
233 Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); 234 Record<DebugDataRecord::SRC1>(state.debug, iteration, src1);
234 Record<DebugDataRecord::SRC2>(state.debug, iteration, src2); 235 Record<DebugDataRecord::SRC2>(state.debug, iteration, src2);
235 Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); 236 Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest);
@@ -239,7 +240,8 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
239 src1[3] = float24::FromFloat32(1.0f); 240 src1[3] = float24::FromFloat32(1.0f);
240 241
241 int num_components = (opcode == OpCode::Id::DP3) ? 3 : 4; 242 int num_components = (opcode == OpCode::Id::DP3) ? 3 : 4;
242 float24 dot = std::inner_product(src1, src1 + num_components, src2, float24::FromFloat32(0.f)); 243 float24 dot = std::inner_product(src1, src1 + num_components, src2,
244 float24::FromFloat32(0.f));
243 245
244 for (int i = 0; i < 4; ++i) { 246 for (int i = 0; i < 4; ++i) {
245 if (!swizzle.DestComponentEnabled(i)) 247 if (!swizzle.DestComponentEnabled(i))
@@ -252,8 +254,7 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
252 } 254 }
253 255
254 // Reciprocal 256 // Reciprocal
255 case OpCode::Id::RCP: 257 case OpCode::Id::RCP: {
256 {
257 Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); 258 Record<DebugDataRecord::SRC1>(state.debug, iteration, src1);
258 Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); 259 Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest);
259 float24 rcp_res = float24::FromFloat32(1.0f / src1[0].ToFloat32()); 260 float24 rcp_res = float24::FromFloat32(1.0f / src1[0].ToFloat32());
@@ -268,8 +269,7 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
268 } 269 }
269 270
270 // Reciprocal Square Root 271 // Reciprocal Square Root
271 case OpCode::Id::RSQ: 272 case OpCode::Id::RSQ: {
272 {
273 Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); 273 Record<DebugDataRecord::SRC1>(state.debug, iteration, src1);
274 Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); 274 Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest);
275 float24 rsq_res = float24::FromFloat32(1.0f / std::sqrt(src1[0].ToFloat32())); 275 float24 rsq_res = float24::FromFloat32(1.0f / std::sqrt(src1[0].ToFloat32()));
@@ -283,8 +283,7 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
283 break; 283 break;
284 } 284 }
285 285
286 case OpCode::Id::MOVA: 286 case OpCode::Id::MOVA: {
287 {
288 Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); 287 Record<DebugDataRecord::SRC1>(state.debug, iteration, src1);
289 for (int i = 0; i < 2; ++i) { 288 for (int i = 0; i < 2; ++i) {
290 if (!swizzle.DestComponentEnabled(i)) 289 if (!swizzle.DestComponentEnabled(i))
@@ -293,12 +292,12 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
293 // TODO: Figure out how the rounding is done on hardware 292 // TODO: Figure out how the rounding is done on hardware
294 state.address_registers[i] = static_cast<s32>(src1[i].ToFloat32()); 293 state.address_registers[i] = static_cast<s32>(src1[i].ToFloat32());
295 } 294 }
296 Record<DebugDataRecord::ADDR_REG_OUT>(state.debug, iteration, state.address_registers); 295 Record<DebugDataRecord::ADDR_REG_OUT>(state.debug, iteration,
296 state.address_registers);
297 break; 297 break;
298 } 298 }
299 299
300 case OpCode::Id::MOV: 300 case OpCode::Id::MOV: {
301 {
302 Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); 301 Record<DebugDataRecord::SRC1>(state.debug, iteration, src1);
303 Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); 302 Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest);
304 for (int i = 0; i < 4; ++i) { 303 for (int i = 0; i < 4; ++i) {
@@ -320,7 +319,8 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
320 if (!swizzle.DestComponentEnabled(i)) 319 if (!swizzle.DestComponentEnabled(i))
321 continue; 320 continue;
322 321
323 dest[i] = (src1[i] >= src2[i]) ? float24::FromFloat32(1.0f) : float24::FromFloat32(0.0f); 322 dest[i] = (src1[i] >= src2[i]) ? float24::FromFloat32(1.0f)
323 : float24::FromFloat32(0.0f);
324 } 324 }
325 Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest); 325 Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest);
326 break; 326 break;
@@ -334,7 +334,8 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
334 if (!swizzle.DestComponentEnabled(i)) 334 if (!swizzle.DestComponentEnabled(i))
335 continue; 335 continue;
336 336
337 dest[i] = (src1[i] < src2[i]) ? float24::FromFloat32(1.0f) : float24::FromFloat32(0.0f); 337 dest[i] = (src1[i] < src2[i]) ? float24::FromFloat32(1.0f)
338 : float24::FromFloat32(0.0f);
338 } 339 }
339 Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest); 340 Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest);
340 break; 341 break;
@@ -349,40 +350,39 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
349 auto op = (i == 0) ? compare_op.x.Value() : compare_op.y.Value(); 350 auto op = (i == 0) ? compare_op.x.Value() : compare_op.y.Value();
350 351
351 switch (op) { 352 switch (op) {
352 case Instruction::Common::CompareOpType::Equal: 353 case Instruction::Common::CompareOpType::Equal:
353 state.conditional_code[i] = (src1[i] == src2[i]); 354 state.conditional_code[i] = (src1[i] == src2[i]);
354 break; 355 break;
355 356
356 case Instruction::Common::CompareOpType::NotEqual: 357 case Instruction::Common::CompareOpType::NotEqual:
357 state.conditional_code[i] = (src1[i] != src2[i]); 358 state.conditional_code[i] = (src1[i] != src2[i]);
358 break; 359 break;
359 360
360 case Instruction::Common::CompareOpType::LessThan: 361 case Instruction::Common::CompareOpType::LessThan:
361 state.conditional_code[i] = (src1[i] < src2[i]); 362 state.conditional_code[i] = (src1[i] < src2[i]);
362 break; 363 break;
363 364
364 case Instruction::Common::CompareOpType::LessEqual: 365 case Instruction::Common::CompareOpType::LessEqual:
365 state.conditional_code[i] = (src1[i] <= src2[i]); 366 state.conditional_code[i] = (src1[i] <= src2[i]);
366 break; 367 break;
367 368
368 case Instruction::Common::CompareOpType::GreaterThan: 369 case Instruction::Common::CompareOpType::GreaterThan:
369 state.conditional_code[i] = (src1[i] > src2[i]); 370 state.conditional_code[i] = (src1[i] > src2[i]);
370 break; 371 break;
371 372
372 case Instruction::Common::CompareOpType::GreaterEqual: 373 case Instruction::Common::CompareOpType::GreaterEqual:
373 state.conditional_code[i] = (src1[i] >= src2[i]); 374 state.conditional_code[i] = (src1[i] >= src2[i]);
374 break; 375 break;
375 376
376 default: 377 default:
377 LOG_ERROR(HW_GPU, "Unknown compare mode %x", static_cast<int>(op)); 378 LOG_ERROR(HW_GPU, "Unknown compare mode %x", static_cast<int>(op));
378 break; 379 break;
379 } 380 }
380 } 381 }
381 Record<DebugDataRecord::CMP_RESULT>(state.debug, iteration, state.conditional_code); 382 Record<DebugDataRecord::CMP_RESULT>(state.debug, iteration, state.conditional_code);
382 break; 383 break;
383 384
384 case OpCode::Id::EX2: 385 case OpCode::Id::EX2: {
385 {
386 Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); 386 Record<DebugDataRecord::SRC1>(state.debug, iteration, src1);
387 Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); 387 Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest);
388 388
@@ -399,8 +399,7 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
399 break; 399 break;
400 } 400 }
401 401
402 case OpCode::Id::LG2: 402 case OpCode::Id::LG2: {
403 {
404 Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); 403 Record<DebugDataRecord::SRC1>(state.debug, iteration, src1);
405 Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); 404 Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest);
406 405
@@ -419,7 +418,8 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
419 418
420 default: 419 default:
421 LOG_ERROR(HW_GPU, "Unhandled arithmetic instruction: 0x%02x (%s): 0x%08x", 420 LOG_ERROR(HW_GPU, "Unhandled arithmetic instruction: 0x%02x (%s): 0x%08x",
422 (int)instr.opcode.Value().EffectiveOpCode(), instr.opcode.Value().GetInfo().name, instr.hex); 421 (int)instr.opcode.Value().EffectiveOpCode(),
422 instr.opcode.Value().GetInfo().name, instr.hex);
423 DEBUG_ASSERT(false); 423 DEBUG_ASSERT(false);
424 break; 424 break;
425 } 425 }
@@ -427,30 +427,32 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
427 break; 427 break;
428 } 428 }
429 429
430 case OpCode::Type::MultiplyAdd: 430 case OpCode::Type::MultiplyAdd: {
431 {
432 if ((instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MAD) || 431 if ((instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MAD) ||
433 (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MADI)) { 432 (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MADI)) {
434 const SwizzlePattern& swizzle = *reinterpret_cast<const SwizzlePattern*>(&swizzle_data[instr.mad.operand_desc_id]); 433 const SwizzlePattern& swizzle = *reinterpret_cast<const SwizzlePattern*>(
434 &swizzle_data[instr.mad.operand_desc_id]);
435 435
436 bool is_inverted = (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MADI); 436 bool is_inverted = (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MADI);
437 437
438 const int address_offset = (instr.mad.address_register_index == 0) 438 const int address_offset =
439 ? 0 : state.address_registers[instr.mad.address_register_index - 1]; 439 (instr.mad.address_register_index == 0)
440 ? 0
441 : state.address_registers[instr.mad.address_register_index - 1];
440 442
441 const float24* src1_ = LookupSourceRegister(instr.mad.GetSrc1(is_inverted)); 443 const float24* src1_ = LookupSourceRegister(instr.mad.GetSrc1(is_inverted));
442 const float24* src2_ = LookupSourceRegister(instr.mad.GetSrc2(is_inverted) + (!is_inverted * address_offset)); 444 const float24* src2_ = LookupSourceRegister(instr.mad.GetSrc2(is_inverted) +
443 const float24* src3_ = LookupSourceRegister(instr.mad.GetSrc3(is_inverted) + ( is_inverted * address_offset)); 445 (!is_inverted * address_offset));
446 const float24* src3_ = LookupSourceRegister(instr.mad.GetSrc3(is_inverted) +
447 (is_inverted * address_offset));
444 448
445 const bool negate_src1 = ((bool)swizzle.negate_src1 != false); 449 const bool negate_src1 = ((bool)swizzle.negate_src1 != false);
446 const bool negate_src2 = ((bool)swizzle.negate_src2 != false); 450 const bool negate_src2 = ((bool)swizzle.negate_src2 != false);
447 const bool negate_src3 = ((bool)swizzle.negate_src3 != false); 451 const bool negate_src3 = ((bool)swizzle.negate_src3 != false);
448 452
449 float24 src1[4] = { 453 float24 src1[4] = {
450 src1_[(int)swizzle.GetSelectorSrc1(0)], 454 src1_[(int)swizzle.GetSelectorSrc1(0)], src1_[(int)swizzle.GetSelectorSrc1(1)],
451 src1_[(int)swizzle.GetSelectorSrc1(1)], 455 src1_[(int)swizzle.GetSelectorSrc1(2)], src1_[(int)swizzle.GetSelectorSrc1(3)],
452 src1_[(int)swizzle.GetSelectorSrc1(2)],
453 src1_[(int)swizzle.GetSelectorSrc1(3)],
454 }; 456 };
455 if (negate_src1) { 457 if (negate_src1) {
456 src1[0] = src1[0] * float24::FromFloat32(-1); 458 src1[0] = src1[0] * float24::FromFloat32(-1);
@@ -459,10 +461,8 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
459 src1[3] = src1[3] * float24::FromFloat32(-1); 461 src1[3] = src1[3] * float24::FromFloat32(-1);
460 } 462 }
461 float24 src2[4] = { 463 float24 src2[4] = {
462 src2_[(int)swizzle.GetSelectorSrc2(0)], 464 src2_[(int)swizzle.GetSelectorSrc2(0)], src2_[(int)swizzle.GetSelectorSrc2(1)],
463 src2_[(int)swizzle.GetSelectorSrc2(1)], 465 src2_[(int)swizzle.GetSelectorSrc2(2)], src2_[(int)swizzle.GetSelectorSrc2(3)],
464 src2_[(int)swizzle.GetSelectorSrc2(2)],
465 src2_[(int)swizzle.GetSelectorSrc2(3)],
466 }; 466 };
467 if (negate_src2) { 467 if (negate_src2) {
468 src2[0] = src2[0] * float24::FromFloat32(-1); 468 src2[0] = src2[0] * float24::FromFloat32(-1);
@@ -471,10 +471,8 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
471 src2[3] = src2[3] * float24::FromFloat32(-1); 471 src2[3] = src2[3] * float24::FromFloat32(-1);
472 } 472 }
473 float24 src3[4] = { 473 float24 src3[4] = {
474 src3_[(int)swizzle.GetSelectorSrc3(0)], 474 src3_[(int)swizzle.GetSelectorSrc3(0)], src3_[(int)swizzle.GetSelectorSrc3(1)],
475 src3_[(int)swizzle.GetSelectorSrc3(1)], 475 src3_[(int)swizzle.GetSelectorSrc3(2)], src3_[(int)swizzle.GetSelectorSrc3(3)],
476 src3_[(int)swizzle.GetSelectorSrc3(2)],
477 src3_[(int)swizzle.GetSelectorSrc3(3)],
478 }; 476 };
479 if (negate_src3) { 477 if (negate_src3) {
480 src3[0] = src3[0] * float24::FromFloat32(-1); 478 src3[0] = src3[0] * float24::FromFloat32(-1);
@@ -483,9 +481,12 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
483 src3[3] = src3[3] * float24::FromFloat32(-1); 481 src3[3] = src3[3] * float24::FromFloat32(-1);
484 } 482 }
485 483
486 float24* dest = (instr.mad.dest.Value() < 0x10) ? &state.output_registers.value[instr.mad.dest.Value().GetIndex()][0] 484 float24* dest =
487 : (instr.mad.dest.Value() < 0x20) ? &state.registers.temporary[instr.mad.dest.Value().GetIndex()][0] 485 (instr.mad.dest.Value() < 0x10)
488 : dummy_vec4_float24; 486 ? &state.output_registers.value[instr.mad.dest.Value().GetIndex()][0]
487 : (instr.mad.dest.Value() < 0x20)
488 ? &state.registers.temporary[instr.mad.dest.Value().GetIndex()][0]
489 : dummy_vec4_float24;
489 490
490 Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); 491 Record<DebugDataRecord::SRC1>(state.debug, iteration, src1);
491 Record<DebugDataRecord::SRC2>(state.debug, iteration, src2); 492 Record<DebugDataRecord::SRC2>(state.debug, iteration, src2);
@@ -500,16 +501,17 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
500 Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest); 501 Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest);
501 } else { 502 } else {
502 LOG_ERROR(HW_GPU, "Unhandled multiply-add instruction: 0x%02x (%s): 0x%08x", 503 LOG_ERROR(HW_GPU, "Unhandled multiply-add instruction: 0x%02x (%s): 0x%08x",
503 (int)instr.opcode.Value().EffectiveOpCode(), instr.opcode.Value().GetInfo().name, instr.hex); 504 (int)instr.opcode.Value().EffectiveOpCode(),
505 instr.opcode.Value().GetInfo().name, instr.hex);
504 } 506 }
505 break; 507 break;
506 } 508 }
507 509
508 default: 510 default: {
509 { 511 static auto evaluate_condition = [](const UnitState<Debug>& state, bool refx, bool refy,
510 static auto evaluate_condition = [](const UnitState<Debug>& state, bool refx, bool refy, Instruction::FlowControlType flow_control) { 512 Instruction::FlowControlType flow_control) {
511 bool results[2] = { refx == state.conditional_code[0], 513 bool results[2] = {refx == state.conditional_code[0],
512 refy == state.conditional_code[1] }; 514 refy == state.conditional_code[1]};
513 515
514 switch (flow_control.op) { 516 switch (flow_control.op) {
515 case flow_control.Or: 517 case flow_control.Or:
@@ -533,44 +535,45 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
533 break; 535 break;
534 536
535 case OpCode::Id::JMPC: 537 case OpCode::Id::JMPC:
536 Record<DebugDataRecord::COND_CMP_IN>(state.debug, iteration, state.conditional_code); 538 Record<DebugDataRecord::COND_CMP_IN>(state.debug, iteration,
537 if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, instr.flow_control)) { 539 state.conditional_code);
540 if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy,
541 instr.flow_control)) {
538 program_counter = instr.flow_control.dest_offset - 1; 542 program_counter = instr.flow_control.dest_offset - 1;
539 } 543 }
540 break; 544 break;
541 545
542 case OpCode::Id::JMPU: 546 case OpCode::Id::JMPU:
543 Record<DebugDataRecord::COND_BOOL_IN>(state.debug, iteration, uniforms.b[instr.flow_control.bool_uniform_id]); 547 Record<DebugDataRecord::COND_BOOL_IN>(
548 state.debug, iteration, uniforms.b[instr.flow_control.bool_uniform_id]);
544 549
545 if (uniforms.b[instr.flow_control.bool_uniform_id] == !(instr.flow_control.num_instructions & 1)) { 550 if (uniforms.b[instr.flow_control.bool_uniform_id] ==
551 !(instr.flow_control.num_instructions & 1)) {
546 program_counter = instr.flow_control.dest_offset - 1; 552 program_counter = instr.flow_control.dest_offset - 1;
547 } 553 }
548 break; 554 break;
549 555
550 case OpCode::Id::CALL: 556 case OpCode::Id::CALL:
551 call(state, 557 call(state, instr.flow_control.dest_offset, instr.flow_control.num_instructions,
552 instr.flow_control.dest_offset,
553 instr.flow_control.num_instructions,
554 program_counter + 1, 0, 0); 558 program_counter + 1, 0, 0);
555 break; 559 break;
556 560
557 case OpCode::Id::CALLU: 561 case OpCode::Id::CALLU:
558 Record<DebugDataRecord::COND_BOOL_IN>(state.debug, iteration, uniforms.b[instr.flow_control.bool_uniform_id]); 562 Record<DebugDataRecord::COND_BOOL_IN>(
563 state.debug, iteration, uniforms.b[instr.flow_control.bool_uniform_id]);
559 if (uniforms.b[instr.flow_control.bool_uniform_id]) { 564 if (uniforms.b[instr.flow_control.bool_uniform_id]) {
560 call(state, 565 call(state, instr.flow_control.dest_offset, instr.flow_control.num_instructions,
561 instr.flow_control.dest_offset, 566 program_counter + 1, 0, 0);
562 instr.flow_control.num_instructions,
563 program_counter + 1, 0, 0);
564 } 567 }
565 break; 568 break;
566 569
567 case OpCode::Id::CALLC: 570 case OpCode::Id::CALLC:
568 Record<DebugDataRecord::COND_CMP_IN>(state.debug, iteration, state.conditional_code); 571 Record<DebugDataRecord::COND_CMP_IN>(state.debug, iteration,
569 if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, instr.flow_control)) { 572 state.conditional_code);
570 call(state, 573 if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy,
571 instr.flow_control.dest_offset, 574 instr.flow_control)) {
572 instr.flow_control.num_instructions, 575 call(state, instr.flow_control.dest_offset, instr.flow_control.num_instructions,
573 program_counter + 1, 0, 0); 576 program_counter + 1, 0, 0);
574 } 577 }
575 break; 578 break;
576 579
@@ -578,43 +581,42 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
578 break; 581 break;
579 582
580 case OpCode::Id::IFU: 583 case OpCode::Id::IFU:
581 Record<DebugDataRecord::COND_BOOL_IN>(state.debug, iteration, uniforms.b[instr.flow_control.bool_uniform_id]); 584 Record<DebugDataRecord::COND_BOOL_IN>(
585 state.debug, iteration, uniforms.b[instr.flow_control.bool_uniform_id]);
582 if (uniforms.b[instr.flow_control.bool_uniform_id]) { 586 if (uniforms.b[instr.flow_control.bool_uniform_id]) {
583 call(state, 587 call(state, program_counter + 1,
584 program_counter + 1,
585 instr.flow_control.dest_offset - program_counter - 1, 588 instr.flow_control.dest_offset - program_counter - 1,
586 instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, 0); 589 instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0,
590 0);
587 } else { 591 } else {
588 call(state, 592 call(state, instr.flow_control.dest_offset, instr.flow_control.num_instructions,
589 instr.flow_control.dest_offset, 593 instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0,
590 instr.flow_control.num_instructions, 594 0);
591 instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, 0);
592 } 595 }
593 596
594 break; 597 break;
595 598
596 case OpCode::Id::IFC: 599 case OpCode::Id::IFC: {
597 {
598 // TODO: Do we need to consider swizzlers here? 600 // TODO: Do we need to consider swizzlers here?
599 601
600 Record<DebugDataRecord::COND_CMP_IN>(state.debug, iteration, state.conditional_code); 602 Record<DebugDataRecord::COND_CMP_IN>(state.debug, iteration,
601 if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, instr.flow_control)) { 603 state.conditional_code);
602 call(state, 604 if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy,
603 program_counter + 1, 605 instr.flow_control)) {
606 call(state, program_counter + 1,
604 instr.flow_control.dest_offset - program_counter - 1, 607 instr.flow_control.dest_offset - program_counter - 1,
605 instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, 0); 608 instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0,
609 0);
606 } else { 610 } else {
607 call(state, 611 call(state, instr.flow_control.dest_offset, instr.flow_control.num_instructions,
608 instr.flow_control.dest_offset, 612 instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0,
609 instr.flow_control.num_instructions, 613 0);
610 instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, 0);
611 } 614 }
612 615
613 break; 616 break;
614 } 617 }
615 618
616 case OpCode::Id::LOOP: 619 case OpCode::Id::LOOP: {
617 {
618 Math::Vec4<u8> loop_param(uniforms.i[instr.flow_control.int_uniform_id].x, 620 Math::Vec4<u8> loop_param(uniforms.i[instr.flow_control.int_uniform_id].x,
619 uniforms.i[instr.flow_control.int_uniform_id].y, 621 uniforms.i[instr.flow_control.int_uniform_id].y,
620 uniforms.i[instr.flow_control.int_uniform_id].z, 622 uniforms.i[instr.flow_control.int_uniform_id].z,
@@ -622,18 +624,16 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
622 state.address_registers[2] = loop_param.y; 624 state.address_registers[2] = loop_param.y;
623 625
624 Record<DebugDataRecord::LOOP_INT_IN>(state.debug, iteration, loop_param); 626 Record<DebugDataRecord::LOOP_INT_IN>(state.debug, iteration, loop_param);
625 call(state, 627 call(state, program_counter + 1,
626 program_counter + 1,
627 instr.flow_control.dest_offset - program_counter + 1, 628 instr.flow_control.dest_offset - program_counter + 1,
628 instr.flow_control.dest_offset + 1, 629 instr.flow_control.dest_offset + 1, loop_param.x, loop_param.z);
629 loop_param.x,
630 loop_param.z);
631 break; 630 break;
632 } 631 }
633 632
634 default: 633 default:
635 LOG_ERROR(HW_GPU, "Unhandled instruction: 0x%02x (%s): 0x%08x", 634 LOG_ERROR(HW_GPU, "Unhandled instruction: 0x%02x (%s): 0x%08x",
636 (int)instr.opcode.Value().EffectiveOpCode(), instr.opcode.Value().GetInfo().name, instr.hex); 635 (int)instr.opcode.Value().EffectiveOpCode(),
636 instr.opcode.Value().GetInfo().name, instr.hex);
637 break; 637 break;
638 } 638 }
639 639
diff --git a/src/video_core/shader/shader_interpreter.h b/src/video_core/shader/shader_interpreter.h
index bb3ce1c6e..48ede0a2e 100644
--- a/src/video_core/shader/shader_interpreter.h
+++ b/src/video_core/shader/shader_interpreter.h
@@ -8,9 +8,10 @@ namespace Pica {
8 8
9namespace Shader { 9namespace Shader {
10 10
11template <bool Debug> struct UnitState; 11template <bool Debug>
12struct UnitState;
12 13
13template<bool Debug> 14template <bool Debug>
14void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned offset); 15void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned offset);
15 16
16} // namespace 17} // namespace
diff --git a/src/video_core/shader/shader_jit_x64.cpp b/src/video_core/shader/shader_jit_x64.cpp
index 43e7e6b4c..211c703ab 100644
--- a/src/video_core/shader/shader_jit_x64.cpp
+++ b/src/video_core/shader/shader_jit_x64.cpp
@@ -5,20 +5,16 @@
5#include <algorithm> 5#include <algorithm>
6#include <cmath> 6#include <cmath>
7#include <cstdint> 7#include <cstdint>
8#include <xmmintrin.h>
9
10#include <nihstro/shader_bytecode.h> 8#include <nihstro/shader_bytecode.h>
11 9#include <xmmintrin.h>
12#include "common/assert.h" 10#include "common/assert.h"
13#include "common/logging/log.h" 11#include "common/logging/log.h"
14#include "common/vector_math.h" 12#include "common/vector_math.h"
15#include "common/x64/abi.h" 13#include "common/x64/abi.h"
16#include "common/x64/cpu_detect.h" 14#include "common/x64/cpu_detect.h"
17#include "common/x64/emitter.h" 15#include "common/x64/emitter.h"
18
19#include "shader.h" 16#include "shader.h"
20#include "shader_jit_x64.h" 17#include "shader_jit_x64.h"
21
22#include "video_core/pica_state.h" 18#include "video_core/pica_state.h"
23#include "video_core/pica_types.h" 19#include "video_core/pica_types.h"
24 20
@@ -31,70 +27,70 @@ using namespace Gen;
31typedef void (JitShader::*JitFunction)(Instruction instr); 27typedef void (JitShader::*JitFunction)(Instruction instr);
32 28
33const JitFunction instr_table[64] = { 29const JitFunction instr_table[64] = {
34 &JitShader::Compile_ADD, // add 30 &JitShader::Compile_ADD, // add
35 &JitShader::Compile_DP3, // dp3 31 &JitShader::Compile_DP3, // dp3
36 &JitShader::Compile_DP4, // dp4 32 &JitShader::Compile_DP4, // dp4
37 &JitShader::Compile_DPH, // dph 33 &JitShader::Compile_DPH, // dph
38 nullptr, // unknown 34 nullptr, // unknown
39 &JitShader::Compile_EX2, // ex2 35 &JitShader::Compile_EX2, // ex2
40 &JitShader::Compile_LG2, // lg2 36 &JitShader::Compile_LG2, // lg2
41 nullptr, // unknown 37 nullptr, // unknown
42 &JitShader::Compile_MUL, // mul 38 &JitShader::Compile_MUL, // mul
43 &JitShader::Compile_SGE, // sge 39 &JitShader::Compile_SGE, // sge
44 &JitShader::Compile_SLT, // slt 40 &JitShader::Compile_SLT, // slt
45 &JitShader::Compile_FLR, // flr 41 &JitShader::Compile_FLR, // flr
46 &JitShader::Compile_MAX, // max 42 &JitShader::Compile_MAX, // max
47 &JitShader::Compile_MIN, // min 43 &JitShader::Compile_MIN, // min
48 &JitShader::Compile_RCP, // rcp 44 &JitShader::Compile_RCP, // rcp
49 &JitShader::Compile_RSQ, // rsq 45 &JitShader::Compile_RSQ, // rsq
50 nullptr, // unknown 46 nullptr, // unknown
51 nullptr, // unknown 47 nullptr, // unknown
52 &JitShader::Compile_MOVA, // mova 48 &JitShader::Compile_MOVA, // mova
53 &JitShader::Compile_MOV, // mov 49 &JitShader::Compile_MOV, // mov
54 nullptr, // unknown 50 nullptr, // unknown
55 nullptr, // unknown 51 nullptr, // unknown
56 nullptr, // unknown 52 nullptr, // unknown
57 nullptr, // unknown 53 nullptr, // unknown
58 &JitShader::Compile_DPH, // dphi 54 &JitShader::Compile_DPH, // dphi
59 nullptr, // unknown 55 nullptr, // unknown
60 &JitShader::Compile_SGE, // sgei 56 &JitShader::Compile_SGE, // sgei
61 &JitShader::Compile_SLT, // slti 57 &JitShader::Compile_SLT, // slti
62 nullptr, // unknown 58 nullptr, // unknown
63 nullptr, // unknown 59 nullptr, // unknown
64 nullptr, // unknown 60 nullptr, // unknown
65 nullptr, // unknown 61 nullptr, // unknown
66 nullptr, // unknown 62 nullptr, // unknown
67 &JitShader::Compile_NOP, // nop 63 &JitShader::Compile_NOP, // nop
68 &JitShader::Compile_END, // end 64 &JitShader::Compile_END, // end
69 nullptr, // break 65 nullptr, // break
70 &JitShader::Compile_CALL, // call 66 &JitShader::Compile_CALL, // call
71 &JitShader::Compile_CALLC, // callc 67 &JitShader::Compile_CALLC, // callc
72 &JitShader::Compile_CALLU, // callu 68 &JitShader::Compile_CALLU, // callu
73 &JitShader::Compile_IF, // ifu 69 &JitShader::Compile_IF, // ifu
74 &JitShader::Compile_IF, // ifc 70 &JitShader::Compile_IF, // ifc
75 &JitShader::Compile_LOOP, // loop 71 &JitShader::Compile_LOOP, // loop
76 nullptr, // emit 72 nullptr, // emit
77 nullptr, // sete 73 nullptr, // sete
78 &JitShader::Compile_JMP, // jmpc 74 &JitShader::Compile_JMP, // jmpc
79 &JitShader::Compile_JMP, // jmpu 75 &JitShader::Compile_JMP, // jmpu
80 &JitShader::Compile_CMP, // cmp 76 &JitShader::Compile_CMP, // cmp
81 &JitShader::Compile_CMP, // cmp 77 &JitShader::Compile_CMP, // cmp
82 &JitShader::Compile_MAD, // madi 78 &JitShader::Compile_MAD, // madi
83 &JitShader::Compile_MAD, // madi 79 &JitShader::Compile_MAD, // madi
84 &JitShader::Compile_MAD, // madi 80 &JitShader::Compile_MAD, // madi
85 &JitShader::Compile_MAD, // madi 81 &JitShader::Compile_MAD, // madi
86 &JitShader::Compile_MAD, // madi 82 &JitShader::Compile_MAD, // madi
87 &JitShader::Compile_MAD, // madi 83 &JitShader::Compile_MAD, // madi
88 &JitShader::Compile_MAD, // madi 84 &JitShader::Compile_MAD, // madi
89 &JitShader::Compile_MAD, // madi 85 &JitShader::Compile_MAD, // madi
90 &JitShader::Compile_MAD, // mad 86 &JitShader::Compile_MAD, // mad
91 &JitShader::Compile_MAD, // mad 87 &JitShader::Compile_MAD, // mad
92 &JitShader::Compile_MAD, // mad 88 &JitShader::Compile_MAD, // mad
93 &JitShader::Compile_MAD, // mad 89 &JitShader::Compile_MAD, // mad
94 &JitShader::Compile_MAD, // mad 90 &JitShader::Compile_MAD, // mad
95 &JitShader::Compile_MAD, // mad 91 &JitShader::Compile_MAD, // mad
96 &JitShader::Compile_MAD, // mad 92 &JitShader::Compile_MAD, // mad
97 &JitShader::Compile_MAD, // mad 93 &JitShader::Compile_MAD, // mad
98}; 94};
99 95
100// The following is used to alias some commonly used registers. Generally, RAX-RDX and XMM0-XMM3 can 96// The following is used to alias some commonly used registers. Generally, RAX-RDX and XMM0-XMM3 can
@@ -136,9 +132,9 @@ static const X64Reg NEGBIT = XMM15;
136// State registers that must not be modified by external functions calls 132// State registers that must not be modified by external functions calls
137// Scratch registers, e.g., SRC1 and SCRATCH, have to be saved on the side if needed 133// Scratch registers, e.g., SRC1 and SCRATCH, have to be saved on the side if needed
138static const BitSet32 persistent_regs = { 134static const BitSet32 persistent_regs = {
139 SETUP, STATE, // Pointers to register blocks 135 SETUP, STATE, // Pointers to register blocks
140 ADDROFFS_REG_0, ADDROFFS_REG_1, LOOPCOUNT_REG, COND0, COND1, // Cached registers 136 ADDROFFS_REG_0, ADDROFFS_REG_1, LOOPCOUNT_REG, COND0, COND1, // Cached registers
141 ONE+16, NEGBIT+16, // Constants 137 ONE + 16, NEGBIT + 16, // Constants
142}; 138};
143 139
144/// Raw constant for the source register selector that indicates no swizzling is performed 140/// Raw constant for the source register selector that indicates no swizzling is performed
@@ -152,7 +148,7 @@ static const u8 NO_DEST_REG_MASK = 0xf;
152 * @return Instruction at the specified offset 148 * @return Instruction at the specified offset
153 */ 149 */
154static Instruction GetVertexShaderInstruction(size_t offset) { 150static Instruction GetVertexShaderInstruction(size_t offset) {
155 return { g_state.vs.program_code[offset] }; 151 return {g_state.vs.program_code[offset]};
156} 152}
157 153
158static void LogCritical(const char* msg) { 154static void LogCritical(const char* msg) {
@@ -172,7 +168,8 @@ void JitShader::Compile_Assert(bool condition, const char* msg) {
172 * @param src_reg SourceRegister object corresponding to the source register to load 168 * @param src_reg SourceRegister object corresponding to the source register to load
173 * @param dest Destination XMM register to store the loaded, swizzled source register 169 * @param dest Destination XMM register to store the loaded, swizzled source register
174 */ 170 */
175void JitShader::Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRegister src_reg, X64Reg dest) { 171void JitShader::Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRegister src_reg,
172 X64Reg dest) {
176 X64Reg src_ptr; 173 X64Reg src_ptr;
177 size_t src_offset; 174 size_t src_offset;
178 175
@@ -189,7 +186,8 @@ void JitShader::Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRe
189 186
190 unsigned operand_desc_id; 187 unsigned operand_desc_id;
191 188
192 const bool is_inverted = (0 != (instr.opcode.Value().GetInfo().subtype & OpCode::Info::SrcInversed)); 189 const bool is_inverted =
190 (0 != (instr.opcode.Value().GetInfo().subtype & OpCode::Info::SrcInversed));
193 191
194 unsigned address_register_index; 192 unsigned address_register_index;
195 unsigned offset_src; 193 unsigned offset_src;
@@ -225,7 +223,7 @@ void JitShader::Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRe
225 MOVAPS(dest, MDisp(src_ptr, src_offset_disp)); 223 MOVAPS(dest, MDisp(src_ptr, src_offset_disp));
226 } 224 }
227 225
228 SwizzlePattern swiz = { g_state.vs.swizzle_data[operand_desc_id] }; 226 SwizzlePattern swiz = {g_state.vs.swizzle_data[operand_desc_id]};
229 227
230 // Generate instructions for source register swizzling as needed 228 // Generate instructions for source register swizzling as needed
231 u8 sel = swiz.GetRawSelector(src_num); 229 u8 sel = swiz.GetRawSelector(src_num);
@@ -238,13 +236,13 @@ void JitShader::Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRe
238 } 236 }
239 237
240 // If the source register should be negated, flip the negative bit using XOR 238 // If the source register should be negated, flip the negative bit using XOR
241 const bool negate[] = { swiz.negate_src1, swiz.negate_src2, swiz.negate_src3 }; 239 const bool negate[] = {swiz.negate_src1, swiz.negate_src2, swiz.negate_src3};
242 if (negate[src_num - 1]) { 240 if (negate[src_num - 1]) {
243 XORPS(dest, R(NEGBIT)); 241 XORPS(dest, R(NEGBIT));
244 } 242 }
245} 243}
246 244
247void JitShader::Compile_DestEnable(Instruction instr,X64Reg src) { 245void JitShader::Compile_DestEnable(Instruction instr, X64Reg src) {
248 DestRegister dest; 246 DestRegister dest;
249 unsigned operand_desc_id; 247 unsigned operand_desc_id;
250 if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MAD || 248 if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MAD ||
@@ -256,10 +254,11 @@ void JitShader::Compile_DestEnable(Instruction instr,X64Reg src) {
256 dest = instr.common.dest.Value(); 254 dest = instr.common.dest.Value();
257 } 255 }
258 256
259 SwizzlePattern swiz = { g_state.vs.swizzle_data[operand_desc_id] }; 257 SwizzlePattern swiz = {g_state.vs.swizzle_data[operand_desc_id]};
260 258
261 int dest_offset_disp = (int)UnitState<false>::OutputOffset(dest); 259 int dest_offset_disp = (int)UnitState<false>::OutputOffset(dest);
262 ASSERT_MSG(dest_offset_disp == UnitState<false>::OutputOffset(dest), "Destinaton offset too large for int type"); 260 ASSERT_MSG(dest_offset_disp == UnitState<false>::OutputOffset(dest),
261 "Destinaton offset too large for int type");
263 262
264 // If all components are enabled, write the result to the destination register 263 // If all components are enabled, write the result to the destination register
265 if (swiz.dest_mask == NO_DEST_REG_MASK) { 264 if (swiz.dest_mask == NO_DEST_REG_MASK) {
@@ -267,18 +266,21 @@ void JitShader::Compile_DestEnable(Instruction instr,X64Reg src) {
267 MOVAPS(MDisp(STATE, dest_offset_disp), src); 266 MOVAPS(MDisp(STATE, dest_offset_disp), src);
268 267
269 } else { 268 } else {
270 // Not all components are enabled, so mask the result when storing to the destination register... 269 // Not all components are enabled, so mask the result when storing to the destination
270 // register...
271 MOVAPS(SCRATCH, MDisp(STATE, dest_offset_disp)); 271 MOVAPS(SCRATCH, MDisp(STATE, dest_offset_disp));
272 272
273 if (Common::GetCPUCaps().sse4_1) { 273 if (Common::GetCPUCaps().sse4_1) {
274 u8 mask = ((swiz.dest_mask & 1) << 3) | ((swiz.dest_mask & 8) >> 3) | ((swiz.dest_mask & 2) << 1) | ((swiz.dest_mask & 4) >> 1); 274 u8 mask = ((swiz.dest_mask & 1) << 3) | ((swiz.dest_mask & 8) >> 3) |
275 ((swiz.dest_mask & 2) << 1) | ((swiz.dest_mask & 4) >> 1);
275 BLENDPS(SCRATCH, R(src), mask); 276 BLENDPS(SCRATCH, R(src), mask);
276 } else { 277 } else {
277 MOVAPS(SCRATCH2, R(src)); 278 MOVAPS(SCRATCH2, R(src));
278 UNPCKHPS(SCRATCH2, R(SCRATCH)); // Unpack X/Y components of source and destination 279 UNPCKHPS(SCRATCH2, R(SCRATCH)); // Unpack X/Y components of source and destination
279 UNPCKLPS(SCRATCH, R(src)); // Unpack Z/W components of source and destination 280 UNPCKLPS(SCRATCH, R(src)); // Unpack Z/W components of source and destination
280 281
281 // Compute selector to selectively copy source components to destination for SHUFPS instruction 282 // Compute selector to selectively copy source components to destination for SHUFPS
283 // instruction
282 u8 sel = ((swiz.DestComponentEnabled(0) ? 1 : 0) << 0) | 284 u8 sel = ((swiz.DestComponentEnabled(0) ? 1 : 0) << 0) |
283 ((swiz.DestComponentEnabled(1) ? 3 : 2) << 2) | 285 ((swiz.DestComponentEnabled(1) ? 3 : 2) << 2) |
284 ((swiz.DestComponentEnabled(2) ? 0 : 1) << 4) | 286 ((swiz.DestComponentEnabled(2) ? 0 : 1) << 4) |
@@ -336,7 +338,8 @@ void JitShader::Compile_EvaluateCondition(Instruction instr) {
336} 338}
337 339
338void JitShader::Compile_UniformCondition(Instruction instr) { 340void JitShader::Compile_UniformCondition(Instruction instr) {
339 int offset = ShaderSetup::UniformOffset(RegisterType::BoolUniform, instr.flow_control.bool_uniform_id); 341 int offset =
342 ShaderSetup::UniformOffset(RegisterType::BoolUniform, instr.flow_control.bool_uniform_id);
340 CMP(sizeof(bool) * 8, MDisp(SETUP, offset), Imm8(0)); 343 CMP(sizeof(bool) * 8, MDisp(SETUP, offset), Imm8(0));
341} 344}
342 345
@@ -512,7 +515,7 @@ void JitShader::Compile_MIN(Instruction instr) {
512} 515}
513 516
514void JitShader::Compile_MOVA(Instruction instr) { 517void JitShader::Compile_MOVA(Instruction instr) {
515 SwizzlePattern swiz = { g_state.vs.swizzle_data[instr.common.operand_desc_id] }; 518 SwizzlePattern swiz = {g_state.vs.swizzle_data[instr.common.operand_desc_id]};
516 519
517 if (!swiz.DestComponentEnabled(0) && !swiz.DestComponentEnabled(1)) { 520 if (!swiz.DestComponentEnabled(0) && !swiz.DestComponentEnabled(1)) {
518 return; // NoOp 521 return; // NoOp
@@ -583,8 +586,7 @@ void JitShader::Compile_RSQ(Instruction instr) {
583 Compile_DestEnable(instr, SRC1); 586 Compile_DestEnable(instr, SRC1);
584} 587}
585 588
586void JitShader::Compile_NOP(Instruction instr) { 589void JitShader::Compile_NOP(Instruction instr) {}
587}
588 590
589void JitShader::Compile_END(Instruction instr) { 591void JitShader::Compile_END(Instruction instr) {
590 ABI_PopRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8); 592 ABI_PopRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8);
@@ -597,7 +599,7 @@ void JitShader::Compile_CALL(Instruction instr) {
597 599
598 // Call the subroutine 600 // Call the subroutine
599 FixupBranch b = CALL(); 601 FixupBranch b = CALL();
600 fixup_branches.push_back({ b, instr.flow_control.dest_offset }); 602 fixup_branches.push_back({b, instr.flow_control.dest_offset});
601 603
602 // Skip over the return offset that's on the stack 604 // Skip over the return offset that's on the stack
603 ADD(64, R(RSP), Imm32(8)); 605 ADD(64, R(RSP), Imm32(8));
@@ -628,7 +630,7 @@ void JitShader::Compile_CMP(Instruction instr) {
628 // SSE doesn't have greater-than (GT) or greater-equal (GE) comparison operators. You need to 630 // SSE doesn't have greater-than (GT) or greater-equal (GE) comparison operators. You need to
629 // emulate them by swapping the lhs and rhs and using LT and LE. NLT and NLE can't be used here 631 // emulate them by swapping the lhs and rhs and using LT and LE. NLT and NLE can't be used here
630 // because they don't match when used with NaNs. 632 // because they don't match when used with NaNs.
631 static const u8 cmp[] = { CMP_EQ, CMP_NEQ, CMP_LT, CMP_LE, CMP_LT, CMP_LE }; 633 static const u8 cmp[] = {CMP_EQ, CMP_NEQ, CMP_LT, CMP_LE, CMP_LT, CMP_LE};
632 634
633 bool invert_op_x = (op_x == Op::GreaterThan || op_x == Op::GreaterEqual); 635 bool invert_op_x = (op_x == Op::GreaterThan || op_x == Op::GreaterEqual);
634 Gen::X64Reg lhs_x = invert_op_x ? SRC2 : SRC1; 636 Gen::X64Reg lhs_x = invert_op_x ? SRC2 : SRC1;
@@ -678,7 +680,8 @@ void JitShader::Compile_MAD(Instruction instr) {
678} 680}
679 681
680void JitShader::Compile_IF(Instruction instr) { 682void JitShader::Compile_IF(Instruction instr) {
681 Compile_Assert(instr.flow_control.dest_offset >= program_counter, "Backwards if-statements not supported"); 683 Compile_Assert(instr.flow_control.dest_offset >= program_counter,
684 "Backwards if-statements not supported");
682 685
683 // Evaluate the "IF" condition 686 // Evaluate the "IF" condition
684 if (instr.opcode.Value() == OpCode::Id::IFU) { 687 if (instr.opcode.Value() == OpCode::Id::IFU) {
@@ -709,29 +712,31 @@ void JitShader::Compile_IF(Instruction instr) {
709} 712}
710 713
711void JitShader::Compile_LOOP(Instruction instr) { 714void JitShader::Compile_LOOP(Instruction instr) {
712 Compile_Assert(instr.flow_control.dest_offset >= program_counter, "Backwards loops not supported"); 715 Compile_Assert(instr.flow_control.dest_offset >= program_counter,
716 "Backwards loops not supported");
713 Compile_Assert(!looping, "Nested loops not supported"); 717 Compile_Assert(!looping, "Nested loops not supported");
714 718
715 looping = true; 719 looping = true;
716 720
717 int offset = ShaderSetup::UniformOffset(RegisterType::IntUniform, instr.flow_control.int_uniform_id); 721 int offset =
722 ShaderSetup::UniformOffset(RegisterType::IntUniform, instr.flow_control.int_uniform_id);
718 MOV(32, R(LOOPCOUNT), MDisp(SETUP, offset)); 723 MOV(32, R(LOOPCOUNT), MDisp(SETUP, offset));
719 MOV(32, R(LOOPCOUNT_REG), R(LOOPCOUNT)); 724 MOV(32, R(LOOPCOUNT_REG), R(LOOPCOUNT));
720 SHR(32, R(LOOPCOUNT_REG), Imm8(8)); 725 SHR(32, R(LOOPCOUNT_REG), Imm8(8));
721 AND(32, R(LOOPCOUNT_REG), Imm32(0xff)); // Y-component is the start 726 AND(32, R(LOOPCOUNT_REG), Imm32(0xff)); // Y-component is the start
722 MOV(32, R(LOOPINC), R(LOOPCOUNT)); 727 MOV(32, R(LOOPINC), R(LOOPCOUNT));
723 SHR(32, R(LOOPINC), Imm8(16)); 728 SHR(32, R(LOOPINC), Imm8(16));
724 MOVZX(32, 8, LOOPINC, R(LOOPINC)); // Z-component is the incrementer 729 MOVZX(32, 8, LOOPINC, R(LOOPINC)); // Z-component is the incrementer
725 MOVZX(32, 8, LOOPCOUNT, R(LOOPCOUNT)); // X-component is iteration count 730 MOVZX(32, 8, LOOPCOUNT, R(LOOPCOUNT)); // X-component is iteration count
726 ADD(32, R(LOOPCOUNT), Imm8(1)); // Iteration count is X-component + 1 731 ADD(32, R(LOOPCOUNT), Imm8(1)); // Iteration count is X-component + 1
727 732
728 auto loop_start = GetCodePtr(); 733 auto loop_start = GetCodePtr();
729 734
730 Compile_Block(instr.flow_control.dest_offset + 1); 735 Compile_Block(instr.flow_control.dest_offset + 1);
731 736
732 ADD(32, R(LOOPCOUNT_REG), R(LOOPINC)); // Increment LOOPCOUNT_REG by Z-component 737 ADD(32, R(LOOPCOUNT_REG), R(LOOPINC)); // Increment LOOPCOUNT_REG by Z-component
733 SUB(32, R(LOOPCOUNT), Imm8(1)); // Increment loop count by 1 738 SUB(32, R(LOOPCOUNT), Imm8(1)); // Increment loop count by 1
734 J_CC(CC_NZ, loop_start); // Loop if not equal 739 J_CC(CC_NZ, loop_start); // Loop if not equal
735 740
736 looping = false; 741 looping = false;
737} 742}
@@ -744,11 +749,11 @@ void JitShader::Compile_JMP(Instruction instr) {
744 else 749 else
745 UNREACHABLE(); 750 UNREACHABLE();
746 751
747 bool inverted_condition = (instr.opcode.Value() == OpCode::Id::JMPU) && 752 bool inverted_condition =
748 (instr.flow_control.num_instructions & 1); 753 (instr.opcode.Value() == OpCode::Id::JMPU) && (instr.flow_control.num_instructions & 1);
749 754
750 FixupBranch b = J_CC(inverted_condition ? CC_Z : CC_NZ, true); 755 FixupBranch b = J_CC(inverted_condition ? CC_Z : CC_NZ, true);
751 fixup_branches.push_back({ b, instr.flow_control.dest_offset }); 756 fixup_branches.push_back({b, instr.flow_control.dest_offset});
752} 757}
753 758
754void JitShader::Compile_Block(unsigned end) { 759void JitShader::Compile_Block(unsigned end) {
@@ -773,7 +778,8 @@ void JitShader::Compile_NextInstr() {
773 Compile_Return(); 778 Compile_Return();
774 } 779 }
775 780
776 ASSERT_MSG(code_ptr[program_counter] == nullptr, "Tried to compile already compiled shader location!"); 781 ASSERT_MSG(code_ptr[program_counter] == nullptr,
782 "Tried to compile already compiled shader location!");
777 code_ptr[program_counter] = GetCodePtr(); 783 code_ptr[program_counter] = GetCodePtr();
778 784
779 Instruction instr = GetVertexShaderInstruction(program_counter++); 785 Instruction instr = GetVertexShaderInstruction(program_counter++);
@@ -787,7 +793,7 @@ void JitShader::Compile_NextInstr() {
787 } else { 793 } else {
788 // Unhandled instruction 794 // Unhandled instruction
789 LOG_CRITICAL(HW_GPU, "Unhandled instruction: 0x%02x (0x%08x)", 795 LOG_CRITICAL(HW_GPU, "Unhandled instruction: 0x%02x (0x%08x)",
790 instr.opcode.Value().EffectiveOpCode(), instr.hex); 796 instr.opcode.Value().EffectiveOpCode(), instr.hex);
791 } 797 }
792} 798}
793 799
@@ -801,7 +807,8 @@ void JitShader::FindReturnOffsets() {
801 case OpCode::Id::CALL: 807 case OpCode::Id::CALL:
802 case OpCode::Id::CALLC: 808 case OpCode::Id::CALLC:
803 case OpCode::Id::CALLU: 809 case OpCode::Id::CALLU:
804 return_offsets.push_back(instr.flow_control.dest_offset + instr.flow_control.num_instructions); 810 return_offsets.push_back(instr.flow_control.dest_offset +
811 instr.flow_control.num_instructions);
805 break; 812 break;
806 default: 813 default:
807 break; 814 break;
@@ -835,12 +842,12 @@ void JitShader::Compile() {
835 XOR(64, R(LOOPCOUNT_REG), R(LOOPCOUNT_REG)); 842 XOR(64, R(LOOPCOUNT_REG), R(LOOPCOUNT_REG));
836 843
837 // Used to set a register to one 844 // Used to set a register to one
838 static const __m128 one = { 1.f, 1.f, 1.f, 1.f }; 845 static const __m128 one = {1.f, 1.f, 1.f, 1.f};
839 MOV(PTRBITS, R(RAX), ImmPtr(&one)); 846 MOV(PTRBITS, R(RAX), ImmPtr(&one));
840 MOVAPS(ONE, MatR(RAX)); 847 MOVAPS(ONE, MatR(RAX));
841 848
842 // Used to negate registers 849 // Used to negate registers
843 static const __m128 neg = { -0.f, -0.f, -0.f, -0.f }; 850 static const __m128 neg = {-0.f, -0.f, -0.f, -0.f};
844 MOV(PTRBITS, R(RAX), ImmPtr(&neg)); 851 MOV(PTRBITS, R(RAX), ImmPtr(&neg));
845 MOVAPS(NEGBIT, MatR(RAX)); 852 MOVAPS(NEGBIT, MatR(RAX));
846 853
@@ -850,7 +857,8 @@ void JitShader::Compile() {
850 // Compile entire program 857 // Compile entire program
851 Compile_Block(static_cast<unsigned>(g_state.vs.program_code.size())); 858 Compile_Block(static_cast<unsigned>(g_state.vs.program_code.size()));
852 859
853 // Set the target for any incomplete branches now that the entire shader program has been emitted 860 // Set the target for any incomplete branches now that the entire shader program has been
861 // emitted
854 for (const auto& branch : fixup_branches) { 862 for (const auto& branch : fixup_branches) {
855 SetJumpTarget(branch.first, code_ptr[branch.second]); 863 SetJumpTarget(branch.first, code_ptr[branch.second]);
856 } 864 }
@@ -861,7 +869,8 @@ void JitShader::Compile() {
861 fixup_branches.clear(); 869 fixup_branches.clear();
862 fixup_branches.shrink_to_fit(); 870 fixup_branches.shrink_to_fit();
863 871
864 uintptr_t size = reinterpret_cast<uintptr_t>(GetCodePtr()) - reinterpret_cast<uintptr_t>(program); 872 uintptr_t size =
873 reinterpret_cast<uintptr_t>(GetCodePtr()) - reinterpret_cast<uintptr_t>(program);
865 ASSERT_MSG(size <= MAX_SHADER_SIZE, "Compiled a shader that exceeds the allocated size!"); 874 ASSERT_MSG(size <= MAX_SHADER_SIZE, "Compiled a shader that exceeds the allocated size!");
866 875
867 LOG_DEBUG(HW_GPU, "Compiled shader size=%lu", size); 876 LOG_DEBUG(HW_GPU, "Compiled shader size=%lu", size);
diff --git a/src/video_core/shader/shader_jit_x64.h b/src/video_core/shader/shader_jit_x64.h
index 5468459d4..98de5ecef 100644
--- a/src/video_core/shader/shader_jit_x64.h
+++ b/src/video_core/shader/shader_jit_x64.h
@@ -8,13 +8,10 @@
8#include <cstddef> 8#include <cstddef>
9#include <utility> 9#include <utility>
10#include <vector> 10#include <vector>
11
12#include <nihstro/shader_bytecode.h> 11#include <nihstro/shader_bytecode.h>
13
14#include "common/bit_set.h" 12#include "common/bit_set.h"
15#include "common/common_types.h" 13#include "common/common_types.h"
16#include "common/x64/emitter.h" 14#include "common/x64/emitter.h"
17
18#include "video_core/shader/shader.h" 15#include "video_core/shader/shader.h"
19 16
20using nihstro::Instruction; 17using nihstro::Instruction;
@@ -70,11 +67,11 @@ public:
70 void Compile_MAD(Instruction instr); 67 void Compile_MAD(Instruction instr);
71 68
72private: 69private:
73
74 void Compile_Block(unsigned end); 70 void Compile_Block(unsigned end);
75 void Compile_NextInstr(); 71 void Compile_NextInstr();
76 72
77 void Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRegister src_reg, Gen::X64Reg dest); 73 void Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRegister src_reg,
74 Gen::X64Reg dest);
78 void Compile_DestEnable(Instruction instr, Gen::X64Reg dest); 75 void Compile_DestEnable(Instruction instr, Gen::X64Reg dest);
79 76
80 /** 77 /**
@@ -111,8 +108,8 @@ private:
111 /// Offsets in code where a return needs to be inserted 108 /// Offsets in code where a return needs to be inserted
112 std::vector<unsigned> return_offsets; 109 std::vector<unsigned> return_offsets;
113 110
114 unsigned program_counter = 0; ///< Offset of the next instruction to decode 111 unsigned program_counter = 0; ///< Offset of the next instruction to decode
115 bool looping = false; ///< True if compiling a loop, used to check for nested loops 112 bool looping = false; ///< True if compiling a loop, used to check for nested loops
116 113
117 /// Branches that need to be fixed up once the entire shader program is compiled 114 /// Branches that need to be fixed up once the entire shader program is compiled
118 std::vector<std::pair<Gen::FixupBranch, unsigned>> fixup_branches; 115 std::vector<std::pair<Gen::FixupBranch, unsigned>> fixup_branches;
diff --git a/src/video_core/swrasterizer.cpp b/src/video_core/swrasterizer.cpp
index 03df15b01..9cd21f72b 100644
--- a/src/video_core/swrasterizer.cpp
+++ b/src/video_core/swrasterizer.cpp
@@ -8,9 +8,8 @@
8namespace VideoCore { 8namespace VideoCore {
9 9
10void SWRasterizer::AddTriangle(const Pica::Shader::OutputVertex& v0, 10void SWRasterizer::AddTriangle(const Pica::Shader::OutputVertex& v0,
11 const Pica::Shader::OutputVertex& v1, 11 const Pica::Shader::OutputVertex& v1,
12 const Pica::Shader::OutputVertex& v2) { 12 const Pica::Shader::OutputVertex& v2) {
13 Pica::Clipper::ProcessTriangle(v0, v1, v2); 13 Pica::Clipper::ProcessTriangle(v0, v1, v2);
14} 14}
15
16} 15}
diff --git a/src/video_core/swrasterizer.h b/src/video_core/swrasterizer.h
index 0a028b774..6d42d7409 100644
--- a/src/video_core/swrasterizer.h
+++ b/src/video_core/swrasterizer.h
@@ -5,7 +5,6 @@
5#pragma once 5#pragma once
6 6
7#include "common/common_types.h" 7#include "common/common_types.h"
8
9#include "video_core/rasterizer_interface.h" 8#include "video_core/rasterizer_interface.h"
10 9
11namespace Pica { 10namespace Pica {
@@ -17,14 +16,12 @@ struct OutputVertex;
17namespace VideoCore { 16namespace VideoCore {
18 17
19class SWRasterizer : public RasterizerInterface { 18class SWRasterizer : public RasterizerInterface {
20 void AddTriangle(const Pica::Shader::OutputVertex& v0, 19 void AddTriangle(const Pica::Shader::OutputVertex& v0, const Pica::Shader::OutputVertex& v1,
21 const Pica::Shader::OutputVertex& v1, 20 const Pica::Shader::OutputVertex& v2) override;
22 const Pica::Shader::OutputVertex& v2) override;
23 void DrawTriangles() override {} 21 void DrawTriangles() override {}
24 void NotifyPicaRegisterChanged(u32 id) override {} 22 void NotifyPicaRegisterChanged(u32 id) override {}
25 void FlushAll() override {} 23 void FlushAll() override {}
26 void FlushRegion(PAddr addr, u32 size) override {} 24 void FlushRegion(PAddr addr, u32 size) override {}
27 void FlushAndInvalidateRegion(PAddr addr, u32 size) override {} 25 void FlushAndInvalidateRegion(PAddr addr, u32 size) override {}
28}; 26};
29
30} 27}
diff --git a/src/video_core/vertex_loader.cpp b/src/video_core/vertex_loader.cpp
index e40f0f1ee..2b8ef7018 100644
--- a/src/video_core/vertex_loader.cpp
+++ b/src/video_core/vertex_loader.cpp
@@ -1,16 +1,12 @@
1#include <memory> 1#include <memory>
2
3#include <boost/range/algorithm/fill.hpp> 2#include <boost/range/algorithm/fill.hpp>
4
5#include "common/alignment.h" 3#include "common/alignment.h"
6#include "common/assert.h" 4#include "common/assert.h"
7#include "common/bit_field.h" 5#include "common/bit_field.h"
8#include "common/common_types.h" 6#include "common/common_types.h"
9#include "common/logging/log.h" 7#include "common/logging/log.h"
10#include "common/vector_math.h" 8#include "common/vector_math.h"
11
12#include "core/memory.h" 9#include "core/memory.h"
13
14#include "video_core/debug_utils/debug_utils.h" 10#include "video_core/debug_utils/debug_utils.h"
15#include "video_core/pica.h" 11#include "video_core/pica.h"
16#include "video_core/pica_state.h" 12#include "video_core/pica_state.h"
@@ -41,24 +37,32 @@ void VertexLoader::Setup(const Pica::Regs& regs) {
41 // TODO: What happens if a loader overwrites a previous one's data? 37 // TODO: What happens if a loader overwrites a previous one's data?
42 for (unsigned component = 0; component < loader_config.component_count; ++component) { 38 for (unsigned component = 0; component < loader_config.component_count; ++component) {
43 if (component >= 12) { 39 if (component >= 12) {
44 LOG_ERROR(HW_GPU, "Overflow in the vertex attribute loader %u trying to load component %u", loader, component); 40 LOG_ERROR(HW_GPU,
41 "Overflow in the vertex attribute loader %u trying to load component %u",
42 loader, component);
45 continue; 43 continue;
46 } 44 }
47 45
48 u32 attribute_index = loader_config.GetComponent(component); 46 u32 attribute_index = loader_config.GetComponent(component);
49 if (attribute_index < 12) { 47 if (attribute_index < 12) {
50 offset = Common::AlignUp(offset, attribute_config.GetElementSizeInBytes(attribute_index)); 48 offset = Common::AlignUp(offset,
49 attribute_config.GetElementSizeInBytes(attribute_index));
51 vertex_attribute_sources[attribute_index] = loader_config.data_offset + offset; 50 vertex_attribute_sources[attribute_index] = loader_config.data_offset + offset;
52 vertex_attribute_strides[attribute_index] = static_cast<u32>(loader_config.byte_count); 51 vertex_attribute_strides[attribute_index] =
53 vertex_attribute_formats[attribute_index] = attribute_config.GetFormat(attribute_index); 52 static_cast<u32>(loader_config.byte_count);
54 vertex_attribute_elements[attribute_index] = attribute_config.GetNumElements(attribute_index); 53 vertex_attribute_formats[attribute_index] =
54 attribute_config.GetFormat(attribute_index);
55 vertex_attribute_elements[attribute_index] =
56 attribute_config.GetNumElements(attribute_index);
55 offset += attribute_config.GetStride(attribute_index); 57 offset += attribute_config.GetStride(attribute_index);
56 } else if (attribute_index < 16) { 58 } else if (attribute_index < 16) {
57 // Attribute ids 12, 13, 14 and 15 signify 4, 8, 12 and 16-byte paddings, respectively 59 // Attribute ids 12, 13, 14 and 15 signify 4, 8, 12 and 16-byte paddings,
60 // respectively
58 offset = Common::AlignUp(offset, 4); 61 offset = Common::AlignUp(offset, 4);
59 offset += (attribute_index - 11) * 4; 62 offset += (attribute_index - 11) * 4;
60 } else { 63 } else {
61 UNREACHABLE(); // This is truly unreachable due to the number of bits for each component 64 UNREACHABLE(); // This is truly unreachable due to the number of bits for each
65 // component
62 } 66 }
63 } 67 }
64 } 68 }
@@ -66,48 +70,55 @@ void VertexLoader::Setup(const Pica::Regs& regs) {
66 is_setup = true; 70 is_setup = true;
67} 71}
68 72
69void VertexLoader::LoadVertex(u32 base_address, int index, int vertex, Shader::InputVertex& input, DebugUtils::MemoryAccessTracker& memory_accesses) { 73void VertexLoader::LoadVertex(u32 base_address, int index, int vertex, Shader::InputVertex& input,
74 DebugUtils::MemoryAccessTracker& memory_accesses) {
70 ASSERT_MSG(is_setup, "A VertexLoader needs to be setup before loading vertices."); 75 ASSERT_MSG(is_setup, "A VertexLoader needs to be setup before loading vertices.");
71 76
72 for (int i = 0; i < num_total_attributes; ++i) { 77 for (int i = 0; i < num_total_attributes; ++i) {
73 if (vertex_attribute_elements[i] != 0) { 78 if (vertex_attribute_elements[i] != 0) {
74 // Load per-vertex data from the loader arrays 79 // Load per-vertex data from the loader arrays
75 u32 source_addr = base_address + vertex_attribute_sources[i] + vertex_attribute_strides[i] * vertex; 80 u32 source_addr =
81 base_address + vertex_attribute_sources[i] + vertex_attribute_strides[i] * vertex;
76 82
77 if (g_debug_context && Pica::g_debug_context->recorder) { 83 if (g_debug_context && Pica::g_debug_context->recorder) {
78 memory_accesses.AddAccess(source_addr, vertex_attribute_elements[i] * ( 84 memory_accesses.AddAccess(
79 (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::FLOAT) ? 4 85 source_addr,
80 : (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::SHORT) ? 2 : 1)); 86 vertex_attribute_elements[i] *
87 ((vertex_attribute_formats[i] == Regs::VertexAttributeFormat::FLOAT)
88 ? 4
89 : (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::SHORT)
90 ? 2
91 : 1));
81 } 92 }
82 93
83 switch (vertex_attribute_formats[i]) { 94 switch (vertex_attribute_formats[i]) {
84 case Regs::VertexAttributeFormat::BYTE: 95 case Regs::VertexAttributeFormat::BYTE: {
85 { 96 const s8* srcdata =
86 const s8* srcdata = reinterpret_cast<const s8*>(Memory::GetPhysicalPointer(source_addr)); 97 reinterpret_cast<const s8*>(Memory::GetPhysicalPointer(source_addr));
87 for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) { 98 for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) {
88 input.attr[i][comp] = float24::FromFloat32(srcdata[comp]); 99 input.attr[i][comp] = float24::FromFloat32(srcdata[comp]);
89 } 100 }
90 break; 101 break;
91 } 102 }
92 case Regs::VertexAttributeFormat::UBYTE: 103 case Regs::VertexAttributeFormat::UBYTE: {
93 { 104 const u8* srcdata =
94 const u8* srcdata = reinterpret_cast<const u8*>(Memory::GetPhysicalPointer(source_addr)); 105 reinterpret_cast<const u8*>(Memory::GetPhysicalPointer(source_addr));
95 for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) { 106 for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) {
96 input.attr[i][comp] = float24::FromFloat32(srcdata[comp]); 107 input.attr[i][comp] = float24::FromFloat32(srcdata[comp]);
97 } 108 }
98 break; 109 break;
99 } 110 }
100 case Regs::VertexAttributeFormat::SHORT: 111 case Regs::VertexAttributeFormat::SHORT: {
101 { 112 const s16* srcdata =
102 const s16* srcdata = reinterpret_cast<const s16*>(Memory::GetPhysicalPointer(source_addr)); 113 reinterpret_cast<const s16*>(Memory::GetPhysicalPointer(source_addr));
103 for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) { 114 for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) {
104 input.attr[i][comp] = float24::FromFloat32(srcdata[comp]); 115 input.attr[i][comp] = float24::FromFloat32(srcdata[comp]);
105 } 116 }
106 break; 117 break;
107 } 118 }
108 case Regs::VertexAttributeFormat::FLOAT: 119 case Regs::VertexAttributeFormat::FLOAT: {
109 { 120 const float* srcdata =
110 const float* srcdata = reinterpret_cast<const float*>(Memory::GetPhysicalPointer(source_addr)); 121 reinterpret_cast<const float*>(Memory::GetPhysicalPointer(source_addr));
111 for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) { 122 for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) {
112 input.attr[i][comp] = float24::FromFloat32(srcdata[comp]); 123 input.attr[i][comp] = float24::FromFloat32(srcdata[comp]);
113 } 124 }
@@ -119,22 +130,23 @@ void VertexLoader::LoadVertex(u32 base_address, int index, int vertex, Shader::I
119 // is *not* carried over from the default attribute settings even if they're 130 // is *not* carried over from the default attribute settings even if they're
120 // enabled for this attribute. 131 // enabled for this attribute.
121 for (unsigned int comp = vertex_attribute_elements[i]; comp < 4; ++comp) { 132 for (unsigned int comp = vertex_attribute_elements[i]; comp < 4; ++comp) {
122 input.attr[i][comp] = comp == 3 ? float24::FromFloat32(1.0f) : float24::FromFloat32(0.0f); 133 input.attr[i][comp] =
134 comp == 3 ? float24::FromFloat32(1.0f) : float24::FromFloat32(0.0f);
123 } 135 }
124 136
125 LOG_TRACE(HW_GPU, "Loaded %d components of attribute %x for vertex %x (index %x) from 0x%08x + 0x%08x + 0x%04x: %f %f %f %f", 137 LOG_TRACE(HW_GPU, "Loaded %d components of attribute %x for vertex %x (index %x) from "
126 vertex_attribute_elements[i], i, vertex, index, 138 "0x%08x + 0x%08x + 0x%04x: %f %f %f %f",
127 base_address, 139 vertex_attribute_elements[i], i, vertex, index, base_address,
128 vertex_attribute_sources[i], 140 vertex_attribute_sources[i], vertex_attribute_strides[i] * vertex,
129 vertex_attribute_strides[i] * vertex, 141 input.attr[i][0].ToFloat32(), input.attr[i][1].ToFloat32(),
130 input.attr[i][0].ToFloat32(), input.attr[i][1].ToFloat32(), input.attr[i][2].ToFloat32(), input.attr[i][3].ToFloat32()); 142 input.attr[i][2].ToFloat32(), input.attr[i][3].ToFloat32());
131 } else if (vertex_attribute_is_default[i]) { 143 } else if (vertex_attribute_is_default[i]) {
132 // Load the default attribute if we're configured to do so 144 // Load the default attribute if we're configured to do so
133 input.attr[i] = g_state.vs_default_attributes[i]; 145 input.attr[i] = g_state.vs_default_attributes[i];
134 LOG_TRACE(HW_GPU, "Loaded default attribute %x for vertex %x (index %x): (%f, %f, %f, %f)", 146 LOG_TRACE(HW_GPU,
135 i, vertex, index, 147 "Loaded default attribute %x for vertex %x (index %x): (%f, %f, %f, %f)", i,
136 input.attr[i][0].ToFloat32(), input.attr[i][1].ToFloat32(), 148 vertex, index, input.attr[i][0].ToFloat32(), input.attr[i][1].ToFloat32(),
137 input.attr[i][2].ToFloat32(), input.attr[i][3].ToFloat32()); 149 input.attr[i][2].ToFloat32(), input.attr[i][3].ToFloat32());
138 } else { 150 } else {
139 // TODO(yuriks): In this case, no data gets loaded and the vertex 151 // TODO(yuriks): In this case, no data gets loaded and the vertex
140 // remains with the last value it had. This isn't currently maintained 152 // remains with the last value it had. This isn't currently maintained
@@ -143,4 +155,4 @@ void VertexLoader::LoadVertex(u32 base_address, int index, int vertex, Shader::I
143 } 155 }
144} 156}
145 157
146} // namespace Pica 158} // namespace Pica
diff --git a/src/video_core/vertex_loader.h b/src/video_core/vertex_loader.h
index ac162c254..9f2098bb2 100644
--- a/src/video_core/vertex_loader.h
+++ b/src/video_core/vertex_loader.h
@@ -1,7 +1,6 @@
1#pragma once 1#pragma once
2 2
3#include <array> 3#include <array>
4
5#include "common/common_types.h" 4#include "common/common_types.h"
6#include "video_core/pica.h" 5#include "video_core/pica.h"
7 6
@@ -23,9 +22,12 @@ public:
23 } 22 }
24 23
25 void Setup(const Pica::Regs& regs); 24 void Setup(const Pica::Regs& regs);
26 void LoadVertex(u32 base_address, int index, int vertex, Shader::InputVertex& input, DebugUtils::MemoryAccessTracker& memory_accesses); 25 void LoadVertex(u32 base_address, int index, int vertex, Shader::InputVertex& input,
26 DebugUtils::MemoryAccessTracker& memory_accesses);
27 27
28 int GetNumTotalAttributes() const { return num_total_attributes; } 28 int GetNumTotalAttributes() const {
29 return num_total_attributes;
30 }
29 31
30private: 32private:
31 std::array<u32, 16> vertex_attribute_sources; 33 std::array<u32, 16> vertex_attribute_sources;
@@ -37,4 +39,4 @@ private:
37 bool is_setup = false; 39 bool is_setup = false;
38}; 40};
39 41
40} // namespace Pica 42} // namespace Pica
diff --git a/src/video_core/video_core.cpp b/src/video_core/video_core.cpp
index bd6e5eb6b..83e33dfc2 100644
--- a/src/video_core/video_core.cpp
+++ b/src/video_core/video_core.cpp
@@ -3,21 +3,19 @@
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <memory> 5#include <memory>
6
7#include "common/logging/log.h" 6#include "common/logging/log.h"
8
9#include "video_core/pica.h" 7#include "video_core/pica.h"
10#include "video_core/renderer_base.h" 8#include "video_core/renderer_base.h"
11#include "video_core/video_core.h"
12#include "video_core/renderer_opengl/renderer_opengl.h" 9#include "video_core/renderer_opengl/renderer_opengl.h"
10#include "video_core/video_core.h"
13 11
14//////////////////////////////////////////////////////////////////////////////////////////////////// 12////////////////////////////////////////////////////////////////////////////////////////////////////
15// Video Core namespace 13// Video Core namespace
16 14
17namespace VideoCore { 15namespace VideoCore {
18 16
19EmuWindow* g_emu_window = nullptr; ///< Frontend emulator window 17EmuWindow* g_emu_window = nullptr; ///< Frontend emulator window
20std::unique_ptr<RendererBase> g_renderer; ///< Renderer plugin 18std::unique_ptr<RendererBase> g_renderer; ///< Renderer plugin
21 19
22std::atomic<bool> g_hw_renderer_enabled; 20std::atomic<bool> g_hw_renderer_enabled;
23std::atomic<bool> g_shader_jit_enabled; 21std::atomic<bool> g_shader_jit_enabled;
diff --git a/src/video_core/video_core.h b/src/video_core/video_core.h
index 30267489e..e2d725ab1 100644
--- a/src/video_core/video_core.h
+++ b/src/video_core/video_core.h
@@ -22,18 +22,19 @@ namespace VideoCore {
22// framebuffers in video memory are stored in column-major order and rendered sideways, causing 22// framebuffers in video memory are stored in column-major order and rendered sideways, causing
23// the widths and heights of the framebuffers read by the LCD to be switched compared to the 23// the widths and heights of the framebuffers read by the LCD to be switched compared to the
24// heights and widths of the screens listed here. 24// heights and widths of the screens listed here.
25static const int kScreenTopWidth = 400; ///< 3DS top screen width 25static const int kScreenTopWidth = 400; ///< 3DS top screen width
26static const int kScreenTopHeight = 240; ///< 3DS top screen height 26static const int kScreenTopHeight = 240; ///< 3DS top screen height
27static const int kScreenBottomWidth = 320; ///< 3DS bottom screen width 27static const int kScreenBottomWidth = 320; ///< 3DS bottom screen width
28static const int kScreenBottomHeight = 240; ///< 3DS bottom screen height 28static const int kScreenBottomHeight = 240; ///< 3DS bottom screen height
29 29
30// Video core renderer 30// Video core renderer
31// --------------------- 31// ---------------------
32 32
33extern std::unique_ptr<RendererBase> g_renderer; ///< Renderer plugin 33extern std::unique_ptr<RendererBase> g_renderer; ///< Renderer plugin
34extern EmuWindow* g_emu_window; ///< Emu window 34extern EmuWindow* g_emu_window; ///< Emu window
35 35
36// TODO: Wrap these in a user settings struct along with any other graphics settings (often set from qt ui) 36// TODO: Wrap these in a user settings struct along with any other graphics settings (often set from
37// qt ui)
37extern std::atomic<bool> g_hw_renderer_enabled; 38extern std::atomic<bool> g_hw_renderer_enabled;
38extern std::atomic<bool> g_shader_jit_enabled; 39extern std::atomic<bool> g_shader_jit_enabled;
39extern std::atomic<bool> g_scaled_resolution_enabled; 40extern std::atomic<bool> g_scaled_resolution_enabled;